{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 4878,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0002050020500205002,
      "grad_norm": 37.64109909379259,
      "learning_rate": 1.360544217687075e-06,
      "loss": 4.3738,
      "step": 1
    },
    {
      "epoch": 0.0004100041000410004,
      "grad_norm": 39.87283600719748,
      "learning_rate": 2.72108843537415e-06,
      "loss": 4.512,
      "step": 2
    },
    {
      "epoch": 0.0006150061500615006,
      "grad_norm": 39.472896772216814,
      "learning_rate": 4.081632653061224e-06,
      "loss": 4.4425,
      "step": 3
    },
    {
      "epoch": 0.0008200082000820008,
      "grad_norm": 33.38905207047185,
      "learning_rate": 5.4421768707483e-06,
      "loss": 3.9203,
      "step": 4
    },
    {
      "epoch": 0.001025010250102501,
      "grad_norm": 23.08525793257891,
      "learning_rate": 6.802721088435375e-06,
      "loss": 3.4012,
      "step": 5
    },
    {
      "epoch": 0.0012300123001230013,
      "grad_norm": 19.088184046352346,
      "learning_rate": 8.163265306122448e-06,
      "loss": 2.9879,
      "step": 6
    },
    {
      "epoch": 0.0014350143501435015,
      "grad_norm": 20.338037765865536,
      "learning_rate": 9.523809523809523e-06,
      "loss": 2.8328,
      "step": 7
    },
    {
      "epoch": 0.0016400164001640015,
      "grad_norm": 14.542801875019975,
      "learning_rate": 1.08843537414966e-05,
      "loss": 2.5144,
      "step": 8
    },
    {
      "epoch": 0.0018450184501845018,
      "grad_norm": 12.089573241966134,
      "learning_rate": 1.2244897959183674e-05,
      "loss": 2.3532,
      "step": 9
    },
    {
      "epoch": 0.002050020500205002,
      "grad_norm": 8.053574726057994,
      "learning_rate": 1.360544217687075e-05,
      "loss": 2.1302,
      "step": 10
    },
    {
      "epoch": 0.002255022550225502,
      "grad_norm": 7.013914761875336,
      "learning_rate": 1.4965986394557824e-05,
      "loss": 2.1662,
      "step": 11
    },
    {
      "epoch": 0.0024600246002460025,
      "grad_norm": 5.418822437305117,
      "learning_rate": 1.6326530612244897e-05,
      "loss": 1.9452,
      "step": 12
    },
    {
      "epoch": 0.0026650266502665026,
      "grad_norm": 5.940146989570587,
      "learning_rate": 1.7687074829931973e-05,
      "loss": 2.0287,
      "step": 13
    },
    {
      "epoch": 0.002870028700287003,
      "grad_norm": 4.234885410824282,
      "learning_rate": 1.9047619047619046e-05,
      "loss": 1.8651,
      "step": 14
    },
    {
      "epoch": 0.003075030750307503,
      "grad_norm": 3.4291547117840477,
      "learning_rate": 2.0408163265306123e-05,
      "loss": 1.8889,
      "step": 15
    },
    {
      "epoch": 0.003280032800328003,
      "grad_norm": 3.078114123353822,
      "learning_rate": 2.17687074829932e-05,
      "loss": 1.8215,
      "step": 16
    },
    {
      "epoch": 0.0034850348503485036,
      "grad_norm": 3.1064321131215844,
      "learning_rate": 2.3129251700680275e-05,
      "loss": 1.8451,
      "step": 17
    },
    {
      "epoch": 0.0036900369003690036,
      "grad_norm": 2.71846922315322,
      "learning_rate": 2.448979591836735e-05,
      "loss": 1.797,
      "step": 18
    },
    {
      "epoch": 0.003895038950389504,
      "grad_norm": 2.266772231433325,
      "learning_rate": 2.5850340136054425e-05,
      "loss": 1.7298,
      "step": 19
    },
    {
      "epoch": 0.004100041000410004,
      "grad_norm": 2.456088314124704,
      "learning_rate": 2.72108843537415e-05,
      "loss": 1.7609,
      "step": 20
    },
    {
      "epoch": 0.004305043050430504,
      "grad_norm": 2.0487428566103847,
      "learning_rate": 2.857142857142857e-05,
      "loss": 1.6864,
      "step": 21
    },
    {
      "epoch": 0.004510045100451004,
      "grad_norm": 2.267998391844328,
      "learning_rate": 2.9931972789115647e-05,
      "loss": 1.6707,
      "step": 22
    },
    {
      "epoch": 0.004715047150471505,
      "grad_norm": 2.0069255800287094,
      "learning_rate": 3.1292517006802724e-05,
      "loss": 1.6673,
      "step": 23
    },
    {
      "epoch": 0.004920049200492005,
      "grad_norm": 2.1781103798419172,
      "learning_rate": 3.265306122448979e-05,
      "loss": 1.666,
      "step": 24
    },
    {
      "epoch": 0.005125051250512505,
      "grad_norm": 1.6937443021011496,
      "learning_rate": 3.401360544217687e-05,
      "loss": 1.5955,
      "step": 25
    },
    {
      "epoch": 0.005330053300533005,
      "grad_norm": 1.8065755233050094,
      "learning_rate": 3.5374149659863946e-05,
      "loss": 1.658,
      "step": 26
    },
    {
      "epoch": 0.005535055350553505,
      "grad_norm": 1.9182580321381462,
      "learning_rate": 3.673469387755102e-05,
      "loss": 1.6824,
      "step": 27
    },
    {
      "epoch": 0.005740057400574006,
      "grad_norm": 1.4236834414305823,
      "learning_rate": 3.809523809523809e-05,
      "loss": 1.6489,
      "step": 28
    },
    {
      "epoch": 0.005945059450594506,
      "grad_norm": 1.8731003710510907,
      "learning_rate": 3.945578231292517e-05,
      "loss": 1.6246,
      "step": 29
    },
    {
      "epoch": 0.006150061500615006,
      "grad_norm": 1.6001336131350723,
      "learning_rate": 4.0816326530612245e-05,
      "loss": 1.5944,
      "step": 30
    },
    {
      "epoch": 0.006355063550635506,
      "grad_norm": 1.6408864666073666,
      "learning_rate": 4.217687074829932e-05,
      "loss": 1.6628,
      "step": 31
    },
    {
      "epoch": 0.006560065600656006,
      "grad_norm": 1.393804100860193,
      "learning_rate": 4.35374149659864e-05,
      "loss": 1.5598,
      "step": 32
    },
    {
      "epoch": 0.006765067650676507,
      "grad_norm": 1.4316132023407333,
      "learning_rate": 4.4897959183673474e-05,
      "loss": 1.6082,
      "step": 33
    },
    {
      "epoch": 0.006970069700697007,
      "grad_norm": 1.3589272465171653,
      "learning_rate": 4.625850340136055e-05,
      "loss": 1.5753,
      "step": 34
    },
    {
      "epoch": 0.007175071750717507,
      "grad_norm": 1.223106309031031,
      "learning_rate": 4.761904761904762e-05,
      "loss": 1.6179,
      "step": 35
    },
    {
      "epoch": 0.007380073800738007,
      "grad_norm": 1.3729279883180139,
      "learning_rate": 4.89795918367347e-05,
      "loss": 1.6669,
      "step": 36
    },
    {
      "epoch": 0.007585075850758507,
      "grad_norm": 1.2255963668981733,
      "learning_rate": 5.034013605442177e-05,
      "loss": 1.6251,
      "step": 37
    },
    {
      "epoch": 0.007790077900779008,
      "grad_norm": 1.3998796560107785,
      "learning_rate": 5.170068027210885e-05,
      "loss": 1.5948,
      "step": 38
    },
    {
      "epoch": 0.007995079950799507,
      "grad_norm": 1.3886588280175667,
      "learning_rate": 5.3061224489795926e-05,
      "loss": 1.5789,
      "step": 39
    },
    {
      "epoch": 0.008200082000820008,
      "grad_norm": 1.3232325802476532,
      "learning_rate": 5.4421768707483e-05,
      "loss": 1.6008,
      "step": 40
    },
    {
      "epoch": 0.008405084050840509,
      "grad_norm": 1.0695923303603694,
      "learning_rate": 5.5782312925170065e-05,
      "loss": 1.5568,
      "step": 41
    },
    {
      "epoch": 0.008610086100861008,
      "grad_norm": 1.4754731221757968,
      "learning_rate": 5.714285714285714e-05,
      "loss": 1.6707,
      "step": 42
    },
    {
      "epoch": 0.008815088150881509,
      "grad_norm": 1.1993902293465908,
      "learning_rate": 5.850340136054422e-05,
      "loss": 1.5834,
      "step": 43
    },
    {
      "epoch": 0.009020090200902008,
      "grad_norm": 1.333962688219652,
      "learning_rate": 5.9863945578231295e-05,
      "loss": 1.6117,
      "step": 44
    },
    {
      "epoch": 0.00922509225092251,
      "grad_norm": 1.177419286591825,
      "learning_rate": 6.122448979591838e-05,
      "loss": 1.5804,
      "step": 45
    },
    {
      "epoch": 0.00943009430094301,
      "grad_norm": 1.1413837752740432,
      "learning_rate": 6.258503401360545e-05,
      "loss": 1.5583,
      "step": 46
    },
    {
      "epoch": 0.00963509635096351,
      "grad_norm": 1.1203710530412794,
      "learning_rate": 6.394557823129253e-05,
      "loss": 1.5605,
      "step": 47
    },
    {
      "epoch": 0.00984009840098401,
      "grad_norm": 1.1063122050829752,
      "learning_rate": 6.530612244897959e-05,
      "loss": 1.5567,
      "step": 48
    },
    {
      "epoch": 0.01004510045100451,
      "grad_norm": 1.184928744026521,
      "learning_rate": 6.666666666666667e-05,
      "loss": 1.5321,
      "step": 49
    },
    {
      "epoch": 0.01025010250102501,
      "grad_norm": 1.188774308015982,
      "learning_rate": 6.802721088435374e-05,
      "loss": 1.6235,
      "step": 50
    },
    {
      "epoch": 0.010455104551045511,
      "grad_norm": 1.122080209760507,
      "learning_rate": 6.938775510204082e-05,
      "loss": 1.577,
      "step": 51
    },
    {
      "epoch": 0.01066010660106601,
      "grad_norm": 1.1881047432968916,
      "learning_rate": 7.074829931972789e-05,
      "loss": 1.5724,
      "step": 52
    },
    {
      "epoch": 0.010865108651086511,
      "grad_norm": 1.094093048422763,
      "learning_rate": 7.210884353741498e-05,
      "loss": 1.5447,
      "step": 53
    },
    {
      "epoch": 0.01107011070110701,
      "grad_norm": 1.307737210298449,
      "learning_rate": 7.346938775510205e-05,
      "loss": 1.5103,
      "step": 54
    },
    {
      "epoch": 0.011275112751127511,
      "grad_norm": 1.0517542954361705,
      "learning_rate": 7.482993197278913e-05,
      "loss": 1.6141,
      "step": 55
    },
    {
      "epoch": 0.011480114801148012,
      "grad_norm": 1.0023397214832885,
      "learning_rate": 7.619047619047618e-05,
      "loss": 1.571,
      "step": 56
    },
    {
      "epoch": 0.011685116851168511,
      "grad_norm": 1.045679620732055,
      "learning_rate": 7.755102040816327e-05,
      "loss": 1.5603,
      "step": 57
    },
    {
      "epoch": 0.011890118901189012,
      "grad_norm": 0.9846175647305268,
      "learning_rate": 7.891156462585034e-05,
      "loss": 1.5654,
      "step": 58
    },
    {
      "epoch": 0.012095120951209511,
      "grad_norm": 1.0438531186227216,
      "learning_rate": 8.027210884353742e-05,
      "loss": 1.5724,
      "step": 59
    },
    {
      "epoch": 0.012300123001230012,
      "grad_norm": 1.018743767783559,
      "learning_rate": 8.163265306122449e-05,
      "loss": 1.5669,
      "step": 60
    },
    {
      "epoch": 0.012505125051250513,
      "grad_norm": 1.0821685280571838,
      "learning_rate": 8.299319727891157e-05,
      "loss": 1.5832,
      "step": 61
    },
    {
      "epoch": 0.012710127101271012,
      "grad_norm": 1.2877899437969236,
      "learning_rate": 8.435374149659864e-05,
      "loss": 1.616,
      "step": 62
    },
    {
      "epoch": 0.012915129151291513,
      "grad_norm": 1.0758679535113083,
      "learning_rate": 8.571428571428571e-05,
      "loss": 1.5676,
      "step": 63
    },
    {
      "epoch": 0.013120131201312012,
      "grad_norm": 1.0735525982150025,
      "learning_rate": 8.70748299319728e-05,
      "loss": 1.6061,
      "step": 64
    },
    {
      "epoch": 0.013325133251332513,
      "grad_norm": 1.2104333826852989,
      "learning_rate": 8.843537414965987e-05,
      "loss": 1.6127,
      "step": 65
    },
    {
      "epoch": 0.013530135301353014,
      "grad_norm": 1.3035430704098177,
      "learning_rate": 8.979591836734695e-05,
      "loss": 1.6724,
      "step": 66
    },
    {
      "epoch": 0.013735137351373513,
      "grad_norm": 0.984580171641528,
      "learning_rate": 9.115646258503402e-05,
      "loss": 1.5247,
      "step": 67
    },
    {
      "epoch": 0.013940139401394014,
      "grad_norm": 1.0972435438699233,
      "learning_rate": 9.25170068027211e-05,
      "loss": 1.5786,
      "step": 68
    },
    {
      "epoch": 0.014145141451414513,
      "grad_norm": 1.2511354478770902,
      "learning_rate": 9.387755102040817e-05,
      "loss": 1.6113,
      "step": 69
    },
    {
      "epoch": 0.014350143501435014,
      "grad_norm": 1.0936337767540483,
      "learning_rate": 9.523809523809524e-05,
      "loss": 1.5919,
      "step": 70
    },
    {
      "epoch": 0.014555145551455515,
      "grad_norm": 1.1467496227035352,
      "learning_rate": 9.659863945578231e-05,
      "loss": 1.5329,
      "step": 71
    },
    {
      "epoch": 0.014760147601476014,
      "grad_norm": 1.0713353060352995,
      "learning_rate": 9.79591836734694e-05,
      "loss": 1.5832,
      "step": 72
    },
    {
      "epoch": 0.014965149651496515,
      "grad_norm": 1.1166945816101541,
      "learning_rate": 9.931972789115646e-05,
      "loss": 1.6824,
      "step": 73
    },
    {
      "epoch": 0.015170151701517014,
      "grad_norm": 1.0960393805979314,
      "learning_rate": 0.00010068027210884355,
      "loss": 1.6052,
      "step": 74
    },
    {
      "epoch": 0.015375153751537515,
      "grad_norm": 1.048126877391856,
      "learning_rate": 0.00010204081632653062,
      "loss": 1.5975,
      "step": 75
    },
    {
      "epoch": 0.015580155801558016,
      "grad_norm": 1.055404329447733,
      "learning_rate": 0.0001034013605442177,
      "loss": 1.6247,
      "step": 76
    },
    {
      "epoch": 0.015785157851578517,
      "grad_norm": 1.088923103794291,
      "learning_rate": 0.00010476190476190477,
      "loss": 1.565,
      "step": 77
    },
    {
      "epoch": 0.015990159901599015,
      "grad_norm": 1.1753599911471933,
      "learning_rate": 0.00010612244897959185,
      "loss": 1.5744,
      "step": 78
    },
    {
      "epoch": 0.016195161951619515,
      "grad_norm": 1.0794471964691739,
      "learning_rate": 0.00010748299319727892,
      "loss": 1.6342,
      "step": 79
    },
    {
      "epoch": 0.016400164001640016,
      "grad_norm": 1.2035928559538578,
      "learning_rate": 0.000108843537414966,
      "loss": 1.5847,
      "step": 80
    },
    {
      "epoch": 0.016605166051660517,
      "grad_norm": 1.08114649790515,
      "learning_rate": 0.00011020408163265306,
      "loss": 1.615,
      "step": 81
    },
    {
      "epoch": 0.016810168101681018,
      "grad_norm": 1.2086894815182474,
      "learning_rate": 0.00011156462585034013,
      "loss": 1.6343,
      "step": 82
    },
    {
      "epoch": 0.017015170151701516,
      "grad_norm": 1.0410041600988618,
      "learning_rate": 0.00011292517006802721,
      "loss": 1.629,
      "step": 83
    },
    {
      "epoch": 0.017220172201722016,
      "grad_norm": 1.0784682769702671,
      "learning_rate": 0.00011428571428571428,
      "loss": 1.5933,
      "step": 84
    },
    {
      "epoch": 0.017425174251742517,
      "grad_norm": 1.0787477943305062,
      "learning_rate": 0.00011564625850340137,
      "loss": 1.6087,
      "step": 85
    },
    {
      "epoch": 0.017630176301763018,
      "grad_norm": 0.9608305364472287,
      "learning_rate": 0.00011700680272108844,
      "loss": 1.5138,
      "step": 86
    },
    {
      "epoch": 0.01783517835178352,
      "grad_norm": 0.9577791495814476,
      "learning_rate": 0.00011836734693877552,
      "loss": 1.575,
      "step": 87
    },
    {
      "epoch": 0.018040180401804017,
      "grad_norm": 0.9781754947236491,
      "learning_rate": 0.00011972789115646259,
      "loss": 1.5526,
      "step": 88
    },
    {
      "epoch": 0.018245182451824517,
      "grad_norm": 1.1099549338633823,
      "learning_rate": 0.00012108843537414967,
      "loss": 1.6376,
      "step": 89
    },
    {
      "epoch": 0.01845018450184502,
      "grad_norm": 1.0700630463054988,
      "learning_rate": 0.00012244897959183676,
      "loss": 1.6277,
      "step": 90
    },
    {
      "epoch": 0.01865518655186552,
      "grad_norm": 1.069946434823473,
      "learning_rate": 0.0001238095238095238,
      "loss": 1.6369,
      "step": 91
    },
    {
      "epoch": 0.01886018860188602,
      "grad_norm": 0.977616253015782,
      "learning_rate": 0.0001251700680272109,
      "loss": 1.6097,
      "step": 92
    },
    {
      "epoch": 0.019065190651906518,
      "grad_norm": 1.2977078345338526,
      "learning_rate": 0.00012653061224489798,
      "loss": 1.5614,
      "step": 93
    },
    {
      "epoch": 0.01927019270192702,
      "grad_norm": 1.1905228370313738,
      "learning_rate": 0.00012789115646258506,
      "loss": 1.5899,
      "step": 94
    },
    {
      "epoch": 0.01947519475194752,
      "grad_norm": 0.9076860345777181,
      "learning_rate": 0.00012925170068027212,
      "loss": 1.5657,
      "step": 95
    },
    {
      "epoch": 0.01968019680196802,
      "grad_norm": 1.0262019407725596,
      "learning_rate": 0.00013061224489795917,
      "loss": 1.6335,
      "step": 96
    },
    {
      "epoch": 0.01988519885198852,
      "grad_norm": 1.2489537278474532,
      "learning_rate": 0.00013197278911564626,
      "loss": 1.6117,
      "step": 97
    },
    {
      "epoch": 0.02009020090200902,
      "grad_norm": 1.1470594814333361,
      "learning_rate": 0.00013333333333333334,
      "loss": 1.6462,
      "step": 98
    },
    {
      "epoch": 0.02029520295202952,
      "grad_norm": 1.1140813099111917,
      "learning_rate": 0.0001346938775510204,
      "loss": 1.6675,
      "step": 99
    },
    {
      "epoch": 0.02050020500205002,
      "grad_norm": 1.0347428297472658,
      "learning_rate": 0.00013605442176870748,
      "loss": 1.6536,
      "step": 100
    },
    {
      "epoch": 0.02070520705207052,
      "grad_norm": 1.0859214215544615,
      "learning_rate": 0.00013741496598639456,
      "loss": 1.6088,
      "step": 101
    },
    {
      "epoch": 0.020910209102091022,
      "grad_norm": 1.049032445550708,
      "learning_rate": 0.00013877551020408165,
      "loss": 1.636,
      "step": 102
    },
    {
      "epoch": 0.02111521115211152,
      "grad_norm": 1.0986846920925164,
      "learning_rate": 0.0001401360544217687,
      "loss": 1.6125,
      "step": 103
    },
    {
      "epoch": 0.02132021320213202,
      "grad_norm": 1.1108375489491855,
      "learning_rate": 0.00014149659863945578,
      "loss": 1.6426,
      "step": 104
    },
    {
      "epoch": 0.02152521525215252,
      "grad_norm": 1.177355850105982,
      "learning_rate": 0.00014285714285714287,
      "loss": 1.5887,
      "step": 105
    },
    {
      "epoch": 0.021730217302173022,
      "grad_norm": 1.1216542459849195,
      "learning_rate": 0.00014421768707482995,
      "loss": 1.6183,
      "step": 106
    },
    {
      "epoch": 0.021935219352193523,
      "grad_norm": 1.1743373962795476,
      "learning_rate": 0.000145578231292517,
      "loss": 1.6305,
      "step": 107
    },
    {
      "epoch": 0.02214022140221402,
      "grad_norm": 1.23704891266505,
      "learning_rate": 0.0001469387755102041,
      "loss": 1.6428,
      "step": 108
    },
    {
      "epoch": 0.02234522345223452,
      "grad_norm": 1.23714819830845,
      "learning_rate": 0.00014829931972789117,
      "loss": 1.7324,
      "step": 109
    },
    {
      "epoch": 0.022550225502255022,
      "grad_norm": 1.0262073791537545,
      "learning_rate": 0.00014965986394557826,
      "loss": 1.5315,
      "step": 110
    },
    {
      "epoch": 0.022755227552275523,
      "grad_norm": 1.1553886431113178,
      "learning_rate": 0.0001510204081632653,
      "loss": 1.6211,
      "step": 111
    },
    {
      "epoch": 0.022960229602296024,
      "grad_norm": 1.1861448771352119,
      "learning_rate": 0.00015238095238095237,
      "loss": 1.631,
      "step": 112
    },
    {
      "epoch": 0.02316523165231652,
      "grad_norm": 1.314362679837142,
      "learning_rate": 0.00015374149659863945,
      "loss": 1.6005,
      "step": 113
    },
    {
      "epoch": 0.023370233702337023,
      "grad_norm": 1.263537873458596,
      "learning_rate": 0.00015510204081632654,
      "loss": 1.6394,
      "step": 114
    },
    {
      "epoch": 0.023575235752357523,
      "grad_norm": 1.1888656577304768,
      "learning_rate": 0.00015646258503401362,
      "loss": 1.6573,
      "step": 115
    },
    {
      "epoch": 0.023780237802378024,
      "grad_norm": 1.1256448238432635,
      "learning_rate": 0.00015782312925170067,
      "loss": 1.6569,
      "step": 116
    },
    {
      "epoch": 0.023985239852398525,
      "grad_norm": 1.215308548571746,
      "learning_rate": 0.00015918367346938776,
      "loss": 1.6432,
      "step": 117
    },
    {
      "epoch": 0.024190241902419023,
      "grad_norm": 1.235957365691106,
      "learning_rate": 0.00016054421768707484,
      "loss": 1.632,
      "step": 118
    },
    {
      "epoch": 0.024395243952439524,
      "grad_norm": 1.3100431872941194,
      "learning_rate": 0.00016190476190476192,
      "loss": 1.6065,
      "step": 119
    },
    {
      "epoch": 0.024600246002460024,
      "grad_norm": 1.2873096863935316,
      "learning_rate": 0.00016326530612244898,
      "loss": 1.4878,
      "step": 120
    },
    {
      "epoch": 0.024805248052480525,
      "grad_norm": 1.2096947815483081,
      "learning_rate": 0.00016462585034013606,
      "loss": 1.6348,
      "step": 121
    },
    {
      "epoch": 0.025010250102501026,
      "grad_norm": 1.263927652256806,
      "learning_rate": 0.00016598639455782315,
      "loss": 1.632,
      "step": 122
    },
    {
      "epoch": 0.025215252152521524,
      "grad_norm": 1.3027019149962984,
      "learning_rate": 0.00016734693877551023,
      "loss": 1.6461,
      "step": 123
    },
    {
      "epoch": 0.025420254202542025,
      "grad_norm": 1.4804585808697446,
      "learning_rate": 0.00016870748299319729,
      "loss": 1.6369,
      "step": 124
    },
    {
      "epoch": 0.025625256252562525,
      "grad_norm": 1.03261017329102,
      "learning_rate": 0.00017006802721088434,
      "loss": 1.6201,
      "step": 125
    },
    {
      "epoch": 0.025830258302583026,
      "grad_norm": 1.1685157501083643,
      "learning_rate": 0.00017142857142857143,
      "loss": 1.6189,
      "step": 126
    },
    {
      "epoch": 0.026035260352603527,
      "grad_norm": 1.2129101850548476,
      "learning_rate": 0.0001727891156462585,
      "loss": 1.5909,
      "step": 127
    },
    {
      "epoch": 0.026240262402624025,
      "grad_norm": 1.2418719064099584,
      "learning_rate": 0.0001741496598639456,
      "loss": 1.6522,
      "step": 128
    },
    {
      "epoch": 0.026445264452644526,
      "grad_norm": 1.2136958419140977,
      "learning_rate": 0.00017551020408163265,
      "loss": 1.609,
      "step": 129
    },
    {
      "epoch": 0.026650266502665027,
      "grad_norm": 1.4799462684969362,
      "learning_rate": 0.00017687074829931973,
      "loss": 1.7233,
      "step": 130
    },
    {
      "epoch": 0.026855268552685527,
      "grad_norm": 1.1277518979638743,
      "learning_rate": 0.00017823129251700681,
      "loss": 1.6089,
      "step": 131
    },
    {
      "epoch": 0.02706027060270603,
      "grad_norm": 2.624680936006109,
      "learning_rate": 0.0001795918367346939,
      "loss": 1.6087,
      "step": 132
    },
    {
      "epoch": 0.027265272652726526,
      "grad_norm": 1.1282762242532551,
      "learning_rate": 0.00018095238095238095,
      "loss": 1.621,
      "step": 133
    },
    {
      "epoch": 0.027470274702747027,
      "grad_norm": 1.4754640362528888,
      "learning_rate": 0.00018231292517006804,
      "loss": 1.6509,
      "step": 134
    },
    {
      "epoch": 0.027675276752767528,
      "grad_norm": 1.163950069566443,
      "learning_rate": 0.00018367346938775512,
      "loss": 1.6341,
      "step": 135
    },
    {
      "epoch": 0.02788027880278803,
      "grad_norm": 1.0401044592245412,
      "learning_rate": 0.0001850340136054422,
      "loss": 1.7007,
      "step": 136
    },
    {
      "epoch": 0.02808528085280853,
      "grad_norm": 1.3591068252321274,
      "learning_rate": 0.00018639455782312926,
      "loss": 1.6996,
      "step": 137
    },
    {
      "epoch": 0.028290282902829027,
      "grad_norm": 1.1397043865244645,
      "learning_rate": 0.00018775510204081634,
      "loss": 1.5753,
      "step": 138
    },
    {
      "epoch": 0.028495284952849528,
      "grad_norm": 1.1540356283967317,
      "learning_rate": 0.00018911564625850343,
      "loss": 1.6087,
      "step": 139
    },
    {
      "epoch": 0.02870028700287003,
      "grad_norm": 1.230927536460245,
      "learning_rate": 0.00019047619047619048,
      "loss": 1.5921,
      "step": 140
    },
    {
      "epoch": 0.02890528905289053,
      "grad_norm": 1.127823608621212,
      "learning_rate": 0.00019183673469387756,
      "loss": 1.5908,
      "step": 141
    },
    {
      "epoch": 0.02911029110291103,
      "grad_norm": 1.1887684798697176,
      "learning_rate": 0.00019319727891156462,
      "loss": 1.6862,
      "step": 142
    },
    {
      "epoch": 0.029315293152931528,
      "grad_norm": 1.2863024046618226,
      "learning_rate": 0.0001945578231292517,
      "loss": 1.6094,
      "step": 143
    },
    {
      "epoch": 0.02952029520295203,
      "grad_norm": 1.1885397366217607,
      "learning_rate": 0.0001959183673469388,
      "loss": 1.6782,
      "step": 144
    },
    {
      "epoch": 0.02972529725297253,
      "grad_norm": 1.184431128873404,
      "learning_rate": 0.00019727891156462587,
      "loss": 1.71,
      "step": 145
    },
    {
      "epoch": 0.02993029930299303,
      "grad_norm": 1.43125176648162,
      "learning_rate": 0.00019863945578231293,
      "loss": 1.7075,
      "step": 146
    },
    {
      "epoch": 0.03013530135301353,
      "grad_norm": 1.2742070457194576,
      "learning_rate": 0.0002,
      "loss": 1.6954,
      "step": 147
    },
    {
      "epoch": 0.03034030340303403,
      "grad_norm": 1.6480154811113514,
      "learning_rate": 0.00019999997795227224,
      "loss": 1.6228,
      "step": 148
    },
    {
      "epoch": 0.03054530545305453,
      "grad_norm": 1.0680986570634565,
      "learning_rate": 0.0001999999118090987,
      "loss": 1.7318,
      "step": 149
    },
    {
      "epoch": 0.03075030750307503,
      "grad_norm": 1.2622281390071541,
      "learning_rate": 0.0001999998015705085,
      "loss": 1.7252,
      "step": 150
    },
    {
      "epoch": 0.03095530955309553,
      "grad_norm": 1.1040247667751586,
      "learning_rate": 0.00019999964723655032,
      "loss": 1.6719,
      "step": 151
    },
    {
      "epoch": 0.031160311603116032,
      "grad_norm": 1.165309851848606,
      "learning_rate": 0.00019999944880729213,
      "loss": 1.6872,
      "step": 152
    },
    {
      "epoch": 0.03136531365313653,
      "grad_norm": 1.1139968864900518,
      "learning_rate": 0.00019999920628282152,
      "loss": 1.7099,
      "step": 153
    },
    {
      "epoch": 0.031570315703157034,
      "grad_norm": 1.1039470575016952,
      "learning_rate": 0.00019999891966324538,
      "loss": 1.6291,
      "step": 154
    },
    {
      "epoch": 0.03177531775317753,
      "grad_norm": 1.1360166943272085,
      "learning_rate": 0.0001999985889486901,
      "loss": 1.6737,
      "step": 155
    },
    {
      "epoch": 0.03198031980319803,
      "grad_norm": 0.958675839167699,
      "learning_rate": 0.00019999821413930146,
      "loss": 1.6348,
      "step": 156
    },
    {
      "epoch": 0.03218532185321853,
      "grad_norm": 1.1670953989857722,
      "learning_rate": 0.00019999779523524483,
      "loss": 1.7131,
      "step": 157
    },
    {
      "epoch": 0.03239032390323903,
      "grad_norm": 1.082095257350303,
      "learning_rate": 0.00019999733223670488,
      "loss": 1.648,
      "step": 158
    },
    {
      "epoch": 0.032595325953259535,
      "grad_norm": 1.1171693717316644,
      "learning_rate": 0.00019999682514388578,
      "loss": 1.7464,
      "step": 159
    },
    {
      "epoch": 0.03280032800328003,
      "grad_norm": 1.1401801511264775,
      "learning_rate": 0.00019999627395701113,
      "loss": 1.7225,
      "step": 160
    },
    {
      "epoch": 0.03300533005330053,
      "grad_norm": 0.9789030820958351,
      "learning_rate": 0.00019999567867632397,
      "loss": 1.7047,
      "step": 161
    },
    {
      "epoch": 0.033210332103321034,
      "grad_norm": 1.1084676506755051,
      "learning_rate": 0.0001999950393020868,
      "loss": 1.7534,
      "step": 162
    },
    {
      "epoch": 0.03341533415334153,
      "grad_norm": 1.0600543936710014,
      "learning_rate": 0.00019999435583458155,
      "loss": 1.672,
      "step": 163
    },
    {
      "epoch": 0.033620336203362036,
      "grad_norm": 1.113180803512727,
      "learning_rate": 0.00019999362827410964,
      "loss": 1.7815,
      "step": 164
    },
    {
      "epoch": 0.033825338253382534,
      "grad_norm": 0.9440610859866324,
      "learning_rate": 0.0001999928566209918,
      "loss": 1.7094,
      "step": 165
    },
    {
      "epoch": 0.03403034030340303,
      "grad_norm": 1.059843009867599,
      "learning_rate": 0.0001999920408755684,
      "loss": 1.7048,
      "step": 166
    },
    {
      "epoch": 0.034235342353423535,
      "grad_norm": 0.94896147910256,
      "learning_rate": 0.00019999118103819906,
      "loss": 1.691,
      "step": 167
    },
    {
      "epoch": 0.03444034440344403,
      "grad_norm": 1.0680880654418765,
      "learning_rate": 0.000199990277109263,
      "loss": 1.6689,
      "step": 168
    },
    {
      "epoch": 0.03464534645346454,
      "grad_norm": 0.992212593994851,
      "learning_rate": 0.00019998932908915873,
      "loss": 1.6764,
      "step": 169
    },
    {
      "epoch": 0.034850348503485035,
      "grad_norm": 0.879423198436075,
      "learning_rate": 0.00019998833697830435,
      "loss": 1.6399,
      "step": 170
    },
    {
      "epoch": 0.03505535055350553,
      "grad_norm": 1.2176693077785534,
      "learning_rate": 0.0001999873007771373,
      "loss": 1.7688,
      "step": 171
    },
    {
      "epoch": 0.035260352603526036,
      "grad_norm": 1.013637605210522,
      "learning_rate": 0.00019998622048611453,
      "loss": 1.6707,
      "step": 172
    },
    {
      "epoch": 0.035465354653546534,
      "grad_norm": 0.9420062932703167,
      "learning_rate": 0.0001999850961057124,
      "loss": 1.6589,
      "step": 173
    },
    {
      "epoch": 0.03567035670356704,
      "grad_norm": 0.9351476084159396,
      "learning_rate": 0.00019998392763642667,
      "loss": 1.7395,
      "step": 174
    },
    {
      "epoch": 0.035875358753587536,
      "grad_norm": 1.0370641703483614,
      "learning_rate": 0.00019998271507877261,
      "loss": 1.7044,
      "step": 175
    },
    {
      "epoch": 0.03608036080360803,
      "grad_norm": 0.920277981500977,
      "learning_rate": 0.0001999814584332849,
      "loss": 1.7103,
      "step": 176
    },
    {
      "epoch": 0.03628536285362854,
      "grad_norm": 1.0533965924810595,
      "learning_rate": 0.00019998015770051765,
      "loss": 1.691,
      "step": 177
    },
    {
      "epoch": 0.036490364903649035,
      "grad_norm": 1.0051367563688869,
      "learning_rate": 0.00019997881288104445,
      "loss": 1.7118,
      "step": 178
    },
    {
      "epoch": 0.03669536695366954,
      "grad_norm": 0.9093082446698368,
      "learning_rate": 0.00019997742397545828,
      "loss": 1.6682,
      "step": 179
    },
    {
      "epoch": 0.03690036900369004,
      "grad_norm": 1.0155664884370854,
      "learning_rate": 0.0001999759909843716,
      "loss": 1.7184,
      "step": 180
    },
    {
      "epoch": 0.037105371053710534,
      "grad_norm": 0.9544214348599572,
      "learning_rate": 0.0001999745139084163,
      "loss": 1.6963,
      "step": 181
    },
    {
      "epoch": 0.03731037310373104,
      "grad_norm": 0.9560958102567276,
      "learning_rate": 0.00019997299274824367,
      "loss": 1.695,
      "step": 182
    },
    {
      "epoch": 0.037515375153751536,
      "grad_norm": 0.988594578922743,
      "learning_rate": 0.0001999714275045245,
      "loss": 1.6722,
      "step": 183
    },
    {
      "epoch": 0.03772037720377204,
      "grad_norm": 1.0361367366457916,
      "learning_rate": 0.00019996981817794898,
      "loss": 1.7246,
      "step": 184
    },
    {
      "epoch": 0.03792537925379254,
      "grad_norm": 0.9208549225862263,
      "learning_rate": 0.00019996816476922677,
      "loss": 1.6338,
      "step": 185
    },
    {
      "epoch": 0.038130381303813035,
      "grad_norm": 1.0410273936492669,
      "learning_rate": 0.0001999664672790869,
      "loss": 1.6747,
      "step": 186
    },
    {
      "epoch": 0.03833538335383354,
      "grad_norm": 0.9382463828768859,
      "learning_rate": 0.00019996472570827796,
      "loss": 1.6842,
      "step": 187
    },
    {
      "epoch": 0.03854038540385404,
      "grad_norm": 0.853513525121089,
      "learning_rate": 0.00019996294005756785,
      "loss": 1.6848,
      "step": 188
    },
    {
      "epoch": 0.03874538745387454,
      "grad_norm": 0.9463063527448772,
      "learning_rate": 0.00019996111032774395,
      "loss": 1.6699,
      "step": 189
    },
    {
      "epoch": 0.03895038950389504,
      "grad_norm": 0.9675038054864104,
      "learning_rate": 0.00019995923651961315,
      "loss": 1.7363,
      "step": 190
    },
    {
      "epoch": 0.039155391553915536,
      "grad_norm": 1.2525486011718874,
      "learning_rate": 0.00019995731863400165,
      "loss": 1.6369,
      "step": 191
    },
    {
      "epoch": 0.03936039360393604,
      "grad_norm": 1.0253006271832148,
      "learning_rate": 0.00019995535667175518,
      "loss": 1.6919,
      "step": 192
    },
    {
      "epoch": 0.03956539565395654,
      "grad_norm": 0.962317240667466,
      "learning_rate": 0.00019995335063373887,
      "loss": 1.7173,
      "step": 193
    },
    {
      "epoch": 0.03977039770397704,
      "grad_norm": 0.9703756024479744,
      "learning_rate": 0.00019995130052083727,
      "loss": 1.7198,
      "step": 194
    },
    {
      "epoch": 0.03997539975399754,
      "grad_norm": 0.870466862148783,
      "learning_rate": 0.00019994920633395445,
      "loss": 1.6581,
      "step": 195
    },
    {
      "epoch": 0.04018040180401804,
      "grad_norm": 0.926969236942892,
      "learning_rate": 0.0001999470680740138,
      "loss": 1.6509,
      "step": 196
    },
    {
      "epoch": 0.04038540385403854,
      "grad_norm": 0.9586452572040121,
      "learning_rate": 0.0001999448857419582,
      "loss": 1.6437,
      "step": 197
    },
    {
      "epoch": 0.04059040590405904,
      "grad_norm": 1.2221310341386278,
      "learning_rate": 0.00019994265933874998,
      "loss": 1.6887,
      "step": 198
    },
    {
      "epoch": 0.04079540795407954,
      "grad_norm": 0.9575123751994977,
      "learning_rate": 0.00019994038886537085,
      "loss": 1.6664,
      "step": 199
    },
    {
      "epoch": 0.04100041000410004,
      "grad_norm": 0.9562693613955552,
      "learning_rate": 0.00019993807432282202,
      "loss": 1.7664,
      "step": 200
    },
    {
      "epoch": 0.04120541205412054,
      "grad_norm": 0.9364057810167934,
      "learning_rate": 0.00019993571571212408,
      "loss": 1.6861,
      "step": 201
    },
    {
      "epoch": 0.04141041410414104,
      "grad_norm": 0.9607321302901767,
      "learning_rate": 0.00019993331303431707,
      "loss": 1.7155,
      "step": 202
    },
    {
      "epoch": 0.04161541615416154,
      "grad_norm": 0.8967762993782126,
      "learning_rate": 0.00019993086629046045,
      "loss": 1.6838,
      "step": 203
    },
    {
      "epoch": 0.041820418204182044,
      "grad_norm": 0.9405470283620619,
      "learning_rate": 0.00019992837548163316,
      "loss": 1.7276,
      "step": 204
    },
    {
      "epoch": 0.04202542025420254,
      "grad_norm": 0.9271912358418682,
      "learning_rate": 0.0001999258406089335,
      "loss": 1.6796,
      "step": 205
    },
    {
      "epoch": 0.04223042230422304,
      "grad_norm": 0.966775595641443,
      "learning_rate": 0.00019992326167347923,
      "loss": 1.6944,
      "step": 206
    },
    {
      "epoch": 0.042435424354243544,
      "grad_norm": 0.9040603671143699,
      "learning_rate": 0.00019992063867640757,
      "loss": 1.6618,
      "step": 207
    },
    {
      "epoch": 0.04264042640426404,
      "grad_norm": 0.9709079011282012,
      "learning_rate": 0.00019991797161887512,
      "loss": 1.7549,
      "step": 208
    },
    {
      "epoch": 0.042845428454284545,
      "grad_norm": 1.6669592548994134,
      "learning_rate": 0.00019991526050205797,
      "loss": 1.6914,
      "step": 209
    },
    {
      "epoch": 0.04305043050430504,
      "grad_norm": 0.8671344062949641,
      "learning_rate": 0.00019991250532715155,
      "loss": 1.6139,
      "step": 210
    },
    {
      "epoch": 0.04325543255432554,
      "grad_norm": 0.908591899397052,
      "learning_rate": 0.0001999097060953708,
      "loss": 1.6494,
      "step": 211
    },
    {
      "epoch": 0.043460434604346045,
      "grad_norm": 0.929737140473697,
      "learning_rate": 0.00019990686280795003,
      "loss": 1.7305,
      "step": 212
    },
    {
      "epoch": 0.04366543665436654,
      "grad_norm": 1.0175328138250153,
      "learning_rate": 0.000199903975466143,
      "loss": 1.6901,
      "step": 213
    },
    {
      "epoch": 0.043870438704387046,
      "grad_norm": 0.945359063406858,
      "learning_rate": 0.00019990104407122293,
      "loss": 1.6753,
      "step": 214
    },
    {
      "epoch": 0.044075440754407544,
      "grad_norm": 0.8947645059347635,
      "learning_rate": 0.00019989806862448243,
      "loss": 1.761,
      "step": 215
    },
    {
      "epoch": 0.04428044280442804,
      "grad_norm": 0.7859015453280959,
      "learning_rate": 0.00019989504912723346,
      "loss": 1.6966,
      "step": 216
    },
    {
      "epoch": 0.044485444854448546,
      "grad_norm": 0.9367836223750419,
      "learning_rate": 0.0001998919855808076,
      "loss": 1.7264,
      "step": 217
    },
    {
      "epoch": 0.04469044690446904,
      "grad_norm": 1.1755111563714267,
      "learning_rate": 0.00019988887798655562,
      "loss": 1.7141,
      "step": 218
    },
    {
      "epoch": 0.04489544895448955,
      "grad_norm": 0.9180246013524452,
      "learning_rate": 0.00019988572634584793,
      "loss": 1.7313,
      "step": 219
    },
    {
      "epoch": 0.045100451004510045,
      "grad_norm": 0.8905061476312363,
      "learning_rate": 0.00019988253066007417,
      "loss": 1.6064,
      "step": 220
    },
    {
      "epoch": 0.04530545305453054,
      "grad_norm": 0.9164868611871474,
      "learning_rate": 0.00019987929093064354,
      "loss": 1.6908,
      "step": 221
    },
    {
      "epoch": 0.04551045510455105,
      "grad_norm": 0.8809112992446848,
      "learning_rate": 0.00019987600715898462,
      "loss": 1.7134,
      "step": 222
    },
    {
      "epoch": 0.045715457154571544,
      "grad_norm": 0.8130758075460126,
      "learning_rate": 0.00019987267934654538,
      "loss": 1.6908,
      "step": 223
    },
    {
      "epoch": 0.04592045920459205,
      "grad_norm": 0.959569750555629,
      "learning_rate": 0.00019986930749479328,
      "loss": 1.7207,
      "step": 224
    },
    {
      "epoch": 0.046125461254612546,
      "grad_norm": 0.9399740349053441,
      "learning_rate": 0.00019986589160521509,
      "loss": 1.7304,
      "step": 225
    },
    {
      "epoch": 0.04633046330463304,
      "grad_norm": 0.9749818376521407,
      "learning_rate": 0.00019986243167931705,
      "loss": 1.7212,
      "step": 226
    },
    {
      "epoch": 0.04653546535465355,
      "grad_norm": 0.9480976624242922,
      "learning_rate": 0.00019985892771862493,
      "loss": 1.6559,
      "step": 227
    },
    {
      "epoch": 0.046740467404674045,
      "grad_norm": 0.8757724926796171,
      "learning_rate": 0.00019985537972468372,
      "loss": 1.6985,
      "step": 228
    },
    {
      "epoch": 0.04694546945469455,
      "grad_norm": 1.0392770845993302,
      "learning_rate": 0.000199851787699058,
      "loss": 1.6915,
      "step": 229
    },
    {
      "epoch": 0.04715047150471505,
      "grad_norm": 0.825488039548811,
      "learning_rate": 0.00019984815164333163,
      "loss": 1.6358,
      "step": 230
    },
    {
      "epoch": 0.047355473554735544,
      "grad_norm": 0.8603098218807111,
      "learning_rate": 0.00019984447155910797,
      "loss": 1.6625,
      "step": 231
    },
    {
      "epoch": 0.04756047560475605,
      "grad_norm": 1.0881930338910415,
      "learning_rate": 0.00019984074744800977,
      "loss": 1.744,
      "step": 232
    },
    {
      "epoch": 0.047765477654776546,
      "grad_norm": 0.9004790977146169,
      "learning_rate": 0.0001998369793116792,
      "loss": 1.6156,
      "step": 233
    },
    {
      "epoch": 0.04797047970479705,
      "grad_norm": 0.843192453484823,
      "learning_rate": 0.00019983316715177783,
      "loss": 1.7418,
      "step": 234
    },
    {
      "epoch": 0.04817548175481755,
      "grad_norm": 0.8986673302180079,
      "learning_rate": 0.0001998293109699866,
      "loss": 1.6954,
      "step": 235
    },
    {
      "epoch": 0.048380483804838045,
      "grad_norm": 0.9264618698411912,
      "learning_rate": 0.000199825410768006,
      "loss": 1.7137,
      "step": 236
    },
    {
      "epoch": 0.04858548585485855,
      "grad_norm": 0.8689597161934539,
      "learning_rate": 0.0001998214665475558,
      "loss": 1.6582,
      "step": 237
    },
    {
      "epoch": 0.04879048790487905,
      "grad_norm": 0.8772130277417363,
      "learning_rate": 0.00019981747831037522,
      "loss": 1.6941,
      "step": 238
    },
    {
      "epoch": 0.04899548995489955,
      "grad_norm": 0.8340591687106946,
      "learning_rate": 0.00019981344605822288,
      "loss": 1.6634,
      "step": 239
    },
    {
      "epoch": 0.04920049200492005,
      "grad_norm": 0.8401965767330165,
      "learning_rate": 0.00019980936979287686,
      "loss": 1.678,
      "step": 240
    },
    {
      "epoch": 0.049405494054940546,
      "grad_norm": 0.8245301241762782,
      "learning_rate": 0.00019980524951613456,
      "loss": 1.669,
      "step": 241
    },
    {
      "epoch": 0.04961049610496105,
      "grad_norm": 0.9437689960366863,
      "learning_rate": 0.00019980108522981284,
      "loss": 1.738,
      "step": 242
    },
    {
      "epoch": 0.04981549815498155,
      "grad_norm": 0.9763198363780384,
      "learning_rate": 0.000199796876935748,
      "loss": 1.668,
      "step": 243
    },
    {
      "epoch": 0.05002050020500205,
      "grad_norm": 0.8737629260795708,
      "learning_rate": 0.00019979262463579568,
      "loss": 1.6792,
      "step": 244
    },
    {
      "epoch": 0.05022550225502255,
      "grad_norm": 0.8069493184489265,
      "learning_rate": 0.00019978832833183097,
      "loss": 1.7035,
      "step": 245
    },
    {
      "epoch": 0.05043050430504305,
      "grad_norm": 0.8681544137254046,
      "learning_rate": 0.0001997839880257483,
      "loss": 1.6365,
      "step": 246
    },
    {
      "epoch": 0.05063550635506355,
      "grad_norm": 0.8722461307414754,
      "learning_rate": 0.0001997796037194616,
      "loss": 1.6715,
      "step": 247
    },
    {
      "epoch": 0.05084050840508405,
      "grad_norm": 0.7593083712776789,
      "learning_rate": 0.0001997751754149041,
      "loss": 1.6472,
      "step": 248
    },
    {
      "epoch": 0.051045510455104554,
      "grad_norm": 0.8123117432522353,
      "learning_rate": 0.00019977070311402853,
      "loss": 1.7593,
      "step": 249
    },
    {
      "epoch": 0.05125051250512505,
      "grad_norm": 0.7658545177246406,
      "learning_rate": 0.00019976618681880694,
      "loss": 1.6805,
      "step": 250
    },
    {
      "epoch": 0.05145551455514555,
      "grad_norm": 0.8504380870043369,
      "learning_rate": 0.00019976162653123083,
      "loss": 1.7382,
      "step": 251
    },
    {
      "epoch": 0.05166051660516605,
      "grad_norm": 0.8765622953723591,
      "learning_rate": 0.00019975702225331107,
      "loss": 1.7509,
      "step": 252
    },
    {
      "epoch": 0.05186551865518655,
      "grad_norm": 0.906069518948904,
      "learning_rate": 0.0001997523739870779,
      "loss": 1.6519,
      "step": 253
    },
    {
      "epoch": 0.052070520705207055,
      "grad_norm": 0.8265904007741197,
      "learning_rate": 0.0001997476817345811,
      "loss": 1.6882,
      "step": 254
    },
    {
      "epoch": 0.05227552275522755,
      "grad_norm": 0.8681084240084533,
      "learning_rate": 0.00019974294549788963,
      "loss": 1.6868,
      "step": 255
    },
    {
      "epoch": 0.05248052480524805,
      "grad_norm": 0.7676002631197196,
      "learning_rate": 0.00019973816527909198,
      "loss": 1.6594,
      "step": 256
    },
    {
      "epoch": 0.052685526855268554,
      "grad_norm": 0.9402313473097638,
      "learning_rate": 0.00019973334108029607,
      "loss": 1.6828,
      "step": 257
    },
    {
      "epoch": 0.05289052890528905,
      "grad_norm": 0.9680655375503149,
      "learning_rate": 0.00019972847290362905,
      "loss": 1.7108,
      "step": 258
    },
    {
      "epoch": 0.053095530955309556,
      "grad_norm": 0.8961177011725427,
      "learning_rate": 0.0001997235607512377,
      "loss": 1.669,
      "step": 259
    },
    {
      "epoch": 0.05330053300533005,
      "grad_norm": 0.8980329048421593,
      "learning_rate": 0.00019971860462528792,
      "loss": 1.6589,
      "step": 260
    },
    {
      "epoch": 0.05350553505535055,
      "grad_norm": 0.8786908981440502,
      "learning_rate": 0.00019971360452796522,
      "loss": 1.7079,
      "step": 261
    },
    {
      "epoch": 0.053710537105371055,
      "grad_norm": 0.8939300913741398,
      "learning_rate": 0.0001997085604614744,
      "loss": 1.6911,
      "step": 262
    },
    {
      "epoch": 0.05391553915539155,
      "grad_norm": 0.9058662042202257,
      "learning_rate": 0.0001997034724280396,
      "loss": 1.6606,
      "step": 263
    },
    {
      "epoch": 0.05412054120541206,
      "grad_norm": 0.916508823598797,
      "learning_rate": 0.0001996983404299045,
      "loss": 1.7122,
      "step": 264
    },
    {
      "epoch": 0.054325543255432554,
      "grad_norm": 0.8464400647917526,
      "learning_rate": 0.00019969316446933206,
      "loss": 1.8475,
      "step": 265
    },
    {
      "epoch": 0.05453054530545305,
      "grad_norm": 0.8758257718918342,
      "learning_rate": 0.00019968794454860463,
      "loss": 1.6787,
      "step": 266
    },
    {
      "epoch": 0.054735547355473556,
      "grad_norm": 0.7925659885251065,
      "learning_rate": 0.00019968268067002394,
      "loss": 1.6086,
      "step": 267
    },
    {
      "epoch": 0.05494054940549405,
      "grad_norm": 0.8450354500328823,
      "learning_rate": 0.00019967737283591114,
      "loss": 1.6648,
      "step": 268
    },
    {
      "epoch": 0.05514555145551456,
      "grad_norm": 0.9036819266280411,
      "learning_rate": 0.00019967202104860673,
      "loss": 1.6666,
      "step": 269
    },
    {
      "epoch": 0.055350553505535055,
      "grad_norm": 0.8565219317533235,
      "learning_rate": 0.00019966662531047065,
      "loss": 1.7605,
      "step": 270
    },
    {
      "epoch": 0.05555555555555555,
      "grad_norm": 0.9283608775604035,
      "learning_rate": 0.0001996611856238821,
      "loss": 1.6799,
      "step": 271
    },
    {
      "epoch": 0.05576055760557606,
      "grad_norm": 0.8540262917400903,
      "learning_rate": 0.0001996557019912398,
      "loss": 1.6752,
      "step": 272
    },
    {
      "epoch": 0.055965559655596554,
      "grad_norm": 0.797370579432595,
      "learning_rate": 0.00019965017441496175,
      "loss": 1.6608,
      "step": 273
    },
    {
      "epoch": 0.05617056170561706,
      "grad_norm": 0.8447672234496054,
      "learning_rate": 0.00019964460289748534,
      "loss": 1.6921,
      "step": 274
    },
    {
      "epoch": 0.056375563755637556,
      "grad_norm": 0.7526738281793146,
      "learning_rate": 0.00019963898744126743,
      "loss": 1.6387,
      "step": 275
    },
    {
      "epoch": 0.056580565805658053,
      "grad_norm": 0.8521597244164438,
      "learning_rate": 0.0001996333280487841,
      "loss": 1.7606,
      "step": 276
    },
    {
      "epoch": 0.05678556785567856,
      "grad_norm": 0.8451918881710495,
      "learning_rate": 0.00019962762472253097,
      "loss": 1.6698,
      "step": 277
    },
    {
      "epoch": 0.056990569905699055,
      "grad_norm": 0.7823593836795394,
      "learning_rate": 0.00019962187746502285,
      "loss": 1.6631,
      "step": 278
    },
    {
      "epoch": 0.05719557195571956,
      "grad_norm": 0.8325494714864385,
      "learning_rate": 0.0001996160862787941,
      "loss": 1.646,
      "step": 279
    },
    {
      "epoch": 0.05740057400574006,
      "grad_norm": 0.9395873033868068,
      "learning_rate": 0.0001996102511663983,
      "loss": 1.7329,
      "step": 280
    },
    {
      "epoch": 0.057605576055760555,
      "grad_norm": 0.90380560375249,
      "learning_rate": 0.00019960437213040853,
      "loss": 1.6352,
      "step": 281
    },
    {
      "epoch": 0.05781057810578106,
      "grad_norm": 0.7899481343839726,
      "learning_rate": 0.00019959844917341718,
      "loss": 1.6621,
      "step": 282
    },
    {
      "epoch": 0.058015580155801556,
      "grad_norm": 0.9016359282524641,
      "learning_rate": 0.0001995924822980359,
      "loss": 1.6813,
      "step": 283
    },
    {
      "epoch": 0.05822058220582206,
      "grad_norm": 0.7609504593299513,
      "learning_rate": 0.00019958647150689597,
      "loss": 1.6573,
      "step": 284
    },
    {
      "epoch": 0.05842558425584256,
      "grad_norm": 0.8335768095218871,
      "learning_rate": 0.00019958041680264777,
      "loss": 1.6838,
      "step": 285
    },
    {
      "epoch": 0.058630586305863056,
      "grad_norm": 0.8152716191306715,
      "learning_rate": 0.00019957431818796114,
      "loss": 1.7209,
      "step": 286
    },
    {
      "epoch": 0.05883558835588356,
      "grad_norm": 0.8147120853791615,
      "learning_rate": 0.0001995681756655254,
      "loss": 1.7223,
      "step": 287
    },
    {
      "epoch": 0.05904059040590406,
      "grad_norm": 0.9314119965471911,
      "learning_rate": 0.000199561989238049,
      "loss": 1.7462,
      "step": 288
    },
    {
      "epoch": 0.05924559245592456,
      "grad_norm": 0.848695964389857,
      "learning_rate": 0.0001995557589082599,
      "loss": 1.6657,
      "step": 289
    },
    {
      "epoch": 0.05945059450594506,
      "grad_norm": 0.8647246480101162,
      "learning_rate": 0.00019954948467890547,
      "loss": 1.7354,
      "step": 290
    },
    {
      "epoch": 0.05965559655596556,
      "grad_norm": 0.785338177840371,
      "learning_rate": 0.0001995431665527523,
      "loss": 1.6301,
      "step": 291
    },
    {
      "epoch": 0.05986059860598606,
      "grad_norm": 0.9148906004988794,
      "learning_rate": 0.00019953680453258635,
      "loss": 1.7205,
      "step": 292
    },
    {
      "epoch": 0.06006560065600656,
      "grad_norm": 0.8427839862925017,
      "learning_rate": 0.0001995303986212131,
      "loss": 1.6924,
      "step": 293
    },
    {
      "epoch": 0.06027060270602706,
      "grad_norm": 0.8366619935581524,
      "learning_rate": 0.00019952394882145717,
      "loss": 1.7019,
      "step": 294
    },
    {
      "epoch": 0.06047560475604756,
      "grad_norm": 0.9462889093224218,
      "learning_rate": 0.00019951745513616264,
      "loss": 1.6461,
      "step": 295
    },
    {
      "epoch": 0.06068060680606806,
      "grad_norm": 0.8181337956924931,
      "learning_rate": 0.00019951091756819297,
      "loss": 1.7102,
      "step": 296
    },
    {
      "epoch": 0.06088560885608856,
      "grad_norm": 0.8745010970962003,
      "learning_rate": 0.00019950433612043092,
      "loss": 1.6653,
      "step": 297
    },
    {
      "epoch": 0.06109061090610906,
      "grad_norm": 0.8691592008417229,
      "learning_rate": 0.0001994977107957786,
      "loss": 1.7435,
      "step": 298
    },
    {
      "epoch": 0.061295612956129564,
      "grad_norm": 0.8262244757756005,
      "learning_rate": 0.00019949104159715743,
      "loss": 1.7613,
      "step": 299
    },
    {
      "epoch": 0.06150061500615006,
      "grad_norm": 0.8767746516904414,
      "learning_rate": 0.0001994843285275083,
      "loss": 1.6388,
      "step": 300
    },
    {
      "epoch": 0.06170561705617056,
      "grad_norm": 0.735136392661012,
      "learning_rate": 0.00019947757158979136,
      "loss": 1.7056,
      "step": 301
    },
    {
      "epoch": 0.06191061910619106,
      "grad_norm": 0.8063600969530592,
      "learning_rate": 0.00019947077078698606,
      "loss": 1.6949,
      "step": 302
    },
    {
      "epoch": 0.06211562115621156,
      "grad_norm": 0.7916804685850515,
      "learning_rate": 0.00019946392612209127,
      "loss": 1.735,
      "step": 303
    },
    {
      "epoch": 0.062320623206232065,
      "grad_norm": 0.7717510489712706,
      "learning_rate": 0.00019945703759812519,
      "loss": 1.704,
      "step": 304
    },
    {
      "epoch": 0.06252562525625256,
      "grad_norm": 0.8214275311030498,
      "learning_rate": 0.00019945010521812536,
      "loss": 1.7546,
      "step": 305
    },
    {
      "epoch": 0.06273062730627306,
      "grad_norm": 0.9410459345695534,
      "learning_rate": 0.00019944312898514862,
      "loss": 1.6898,
      "step": 306
    },
    {
      "epoch": 0.06293562935629356,
      "grad_norm": 0.7562640868994822,
      "learning_rate": 0.00019943610890227115,
      "loss": 1.6685,
      "step": 307
    },
    {
      "epoch": 0.06314063140631407,
      "grad_norm": 0.8090080988717784,
      "learning_rate": 0.0001994290449725885,
      "loss": 1.7533,
      "step": 308
    },
    {
      "epoch": 0.06334563345633457,
      "grad_norm": 0.834008023305076,
      "learning_rate": 0.00019942193719921556,
      "loss": 1.7246,
      "step": 309
    },
    {
      "epoch": 0.06355063550635506,
      "grad_norm": 0.7497143199335253,
      "learning_rate": 0.00019941478558528655,
      "loss": 1.7394,
      "step": 310
    },
    {
      "epoch": 0.06375563755637556,
      "grad_norm": 0.7627945575098661,
      "learning_rate": 0.000199407590133955,
      "loss": 1.6969,
      "step": 311
    },
    {
      "epoch": 0.06396063960639606,
      "grad_norm": 0.9400529224551543,
      "learning_rate": 0.0001994003508483937,
      "loss": 1.751,
      "step": 312
    },
    {
      "epoch": 0.06416564165641657,
      "grad_norm": 0.8363970953361202,
      "learning_rate": 0.00019939306773179497,
      "loss": 1.664,
      "step": 313
    },
    {
      "epoch": 0.06437064370643707,
      "grad_norm": 0.7992189104603543,
      "learning_rate": 0.00019938574078737022,
      "loss": 1.7102,
      "step": 314
    },
    {
      "epoch": 0.06457564575645756,
      "grad_norm": 0.7383565024737827,
      "learning_rate": 0.00019937837001835038,
      "loss": 1.6802,
      "step": 315
    },
    {
      "epoch": 0.06478064780647806,
      "grad_norm": 0.8490497834561951,
      "learning_rate": 0.0001993709554279856,
      "loss": 1.6754,
      "step": 316
    },
    {
      "epoch": 0.06498564985649856,
      "grad_norm": 0.824252780929852,
      "learning_rate": 0.00019936349701954535,
      "loss": 1.6617,
      "step": 317
    },
    {
      "epoch": 0.06519065190651907,
      "grad_norm": 0.8708350937462481,
      "learning_rate": 0.0001993559947963185,
      "loss": 1.6481,
      "step": 318
    },
    {
      "epoch": 0.06539565395653957,
      "grad_norm": 0.9448886748422978,
      "learning_rate": 0.00019934844876161317,
      "loss": 1.6939,
      "step": 319
    },
    {
      "epoch": 0.06560065600656007,
      "grad_norm": 0.8886030172642768,
      "learning_rate": 0.00019934085891875678,
      "loss": 1.6466,
      "step": 320
    },
    {
      "epoch": 0.06580565805658056,
      "grad_norm": 0.8468417149960004,
      "learning_rate": 0.00019933322527109613,
      "loss": 1.7076,
      "step": 321
    },
    {
      "epoch": 0.06601066010660106,
      "grad_norm": 0.8356183208795587,
      "learning_rate": 0.0001993255478219973,
      "loss": 1.6777,
      "step": 322
    },
    {
      "epoch": 0.06621566215662157,
      "grad_norm": 0.8176615780960169,
      "learning_rate": 0.00019931782657484578,
      "loss": 1.6329,
      "step": 323
    },
    {
      "epoch": 0.06642066420664207,
      "grad_norm": 0.9234714415387821,
      "learning_rate": 0.00019931006153304617,
      "loss": 1.7228,
      "step": 324
    },
    {
      "epoch": 0.06662566625666257,
      "grad_norm": 0.8750201930968287,
      "learning_rate": 0.00019930225270002255,
      "loss": 1.7187,
      "step": 325
    },
    {
      "epoch": 0.06683066830668306,
      "grad_norm": 0.8043754773013777,
      "learning_rate": 0.0001992944000792183,
      "loss": 1.6983,
      "step": 326
    },
    {
      "epoch": 0.06703567035670356,
      "grad_norm": 1.176385094398134,
      "learning_rate": 0.000199286503674096,
      "loss": 1.6879,
      "step": 327
    },
    {
      "epoch": 0.06724067240672407,
      "grad_norm": 0.8257921560671576,
      "learning_rate": 0.00019927856348813766,
      "loss": 1.7166,
      "step": 328
    },
    {
      "epoch": 0.06744567445674457,
      "grad_norm": 0.7395939677267889,
      "learning_rate": 0.0001992705795248445,
      "loss": 1.7063,
      "step": 329
    },
    {
      "epoch": 0.06765067650676507,
      "grad_norm": 0.8505478671569628,
      "learning_rate": 0.00019926255178773713,
      "loss": 1.6846,
      "step": 330
    },
    {
      "epoch": 0.06785567855678556,
      "grad_norm": 1.0025970595269902,
      "learning_rate": 0.00019925448028035536,
      "loss": 1.7244,
      "step": 331
    },
    {
      "epoch": 0.06806068060680606,
      "grad_norm": 0.8071768060875504,
      "learning_rate": 0.00019924636500625838,
      "loss": 1.6797,
      "step": 332
    },
    {
      "epoch": 0.06826568265682657,
      "grad_norm": 0.8500723882929231,
      "learning_rate": 0.0001992382059690247,
      "loss": 1.7133,
      "step": 333
    },
    {
      "epoch": 0.06847068470684707,
      "grad_norm": 0.8353911832574507,
      "learning_rate": 0.00019923000317225204,
      "loss": 1.6779,
      "step": 334
    },
    {
      "epoch": 0.06867568675686757,
      "grad_norm": 0.7469721061109655,
      "learning_rate": 0.00019922175661955748,
      "loss": 1.5978,
      "step": 335
    },
    {
      "epoch": 0.06888068880688807,
      "grad_norm": 0.8370216466399697,
      "learning_rate": 0.00019921346631457737,
      "loss": 1.6644,
      "step": 336
    },
    {
      "epoch": 0.06908569085690856,
      "grad_norm": 0.7795077165915985,
      "learning_rate": 0.00019920513226096733,
      "loss": 1.6899,
      "step": 337
    },
    {
      "epoch": 0.06929069290692907,
      "grad_norm": 0.7989381240543669,
      "learning_rate": 0.00019919675446240236,
      "loss": 1.6641,
      "step": 338
    },
    {
      "epoch": 0.06949569495694957,
      "grad_norm": 0.8202459565119227,
      "learning_rate": 0.00019918833292257662,
      "loss": 1.764,
      "step": 339
    },
    {
      "epoch": 0.06970069700697007,
      "grad_norm": 0.9642826930192796,
      "learning_rate": 0.00019917986764520363,
      "loss": 1.7256,
      "step": 340
    },
    {
      "epoch": 0.06990569905699057,
      "grad_norm": 0.7751249580268084,
      "learning_rate": 0.00019917135863401628,
      "loss": 1.724,
      "step": 341
    },
    {
      "epoch": 0.07011070110701106,
      "grad_norm": 0.8240115578533698,
      "learning_rate": 0.0001991628058927666,
      "loss": 1.7282,
      "step": 342
    },
    {
      "epoch": 0.07031570315703158,
      "grad_norm": 0.8694534341023328,
      "learning_rate": 0.0001991542094252259,
      "loss": 1.7179,
      "step": 343
    },
    {
      "epoch": 0.07052070520705207,
      "grad_norm": 0.7949885187461482,
      "learning_rate": 0.00019914556923518494,
      "loss": 1.6659,
      "step": 344
    },
    {
      "epoch": 0.07072570725707257,
      "grad_norm": 0.7884711254464456,
      "learning_rate": 0.00019913688532645357,
      "loss": 1.6789,
      "step": 345
    },
    {
      "epoch": 0.07093070930709307,
      "grad_norm": 0.7936316579720082,
      "learning_rate": 0.00019912815770286107,
      "loss": 1.6737,
      "step": 346
    },
    {
      "epoch": 0.07113571135711357,
      "grad_norm": 0.7975937243843574,
      "learning_rate": 0.00019911938636825585,
      "loss": 1.7021,
      "step": 347
    },
    {
      "epoch": 0.07134071340713408,
      "grad_norm": 0.768223286707478,
      "learning_rate": 0.0001991105713265057,
      "loss": 1.7509,
      "step": 348
    },
    {
      "epoch": 0.07154571545715457,
      "grad_norm": 0.7908301258100003,
      "learning_rate": 0.0001991017125814977,
      "loss": 1.6639,
      "step": 349
    },
    {
      "epoch": 0.07175071750717507,
      "grad_norm": 0.7863374328870854,
      "learning_rate": 0.00019909281013713806,
      "loss": 1.6864,
      "step": 350
    },
    {
      "epoch": 0.07195571955719557,
      "grad_norm": 0.7432698938040034,
      "learning_rate": 0.00019908386399735241,
      "loss": 1.7412,
      "step": 351
    },
    {
      "epoch": 0.07216072160721607,
      "grad_norm": 0.816977470055735,
      "learning_rate": 0.00019907487416608564,
      "loss": 1.7051,
      "step": 352
    },
    {
      "epoch": 0.07236572365723658,
      "grad_norm": 0.7736641813354557,
      "learning_rate": 0.00019906584064730175,
      "loss": 1.7509,
      "step": 353
    },
    {
      "epoch": 0.07257072570725707,
      "grad_norm": 0.7890465259589068,
      "learning_rate": 0.00019905676344498418,
      "loss": 1.5788,
      "step": 354
    },
    {
      "epoch": 0.07277572775727757,
      "grad_norm": 0.8288921793208962,
      "learning_rate": 0.00019904764256313556,
      "loss": 1.7062,
      "step": 355
    },
    {
      "epoch": 0.07298072980729807,
      "grad_norm": 0.8620745516366974,
      "learning_rate": 0.00019903847800577777,
      "loss": 1.7138,
      "step": 356
    },
    {
      "epoch": 0.07318573185731857,
      "grad_norm": 0.7864264586400553,
      "learning_rate": 0.00019902926977695195,
      "loss": 1.6925,
      "step": 357
    },
    {
      "epoch": 0.07339073390733908,
      "grad_norm": 0.7068687312949009,
      "learning_rate": 0.00019902001788071853,
      "loss": 1.7022,
      "step": 358
    },
    {
      "epoch": 0.07359573595735958,
      "grad_norm": 0.8337630012991832,
      "learning_rate": 0.0001990107223211572,
      "loss": 1.6438,
      "step": 359
    },
    {
      "epoch": 0.07380073800738007,
      "grad_norm": 0.8748687360789178,
      "learning_rate": 0.00019900138310236683,
      "loss": 1.7435,
      "step": 360
    },
    {
      "epoch": 0.07400574005740057,
      "grad_norm": 0.8398150383516015,
      "learning_rate": 0.00019899200022846562,
      "loss": 1.7629,
      "step": 361
    },
    {
      "epoch": 0.07421074210742107,
      "grad_norm": 0.7886652412451903,
      "learning_rate": 0.00019898257370359098,
      "loss": 1.7215,
      "step": 362
    },
    {
      "epoch": 0.07441574415744158,
      "grad_norm": 0.833456831173047,
      "learning_rate": 0.00019897310353189957,
      "loss": 1.7028,
      "step": 363
    },
    {
      "epoch": 0.07462074620746208,
      "grad_norm": 0.7372738914896392,
      "learning_rate": 0.0001989635897175673,
      "loss": 1.698,
      "step": 364
    },
    {
      "epoch": 0.07482574825748257,
      "grad_norm": 0.8129197926773489,
      "learning_rate": 0.0001989540322647894,
      "loss": 1.7716,
      "step": 365
    },
    {
      "epoch": 0.07503075030750307,
      "grad_norm": 0.7706484417008176,
      "learning_rate": 0.00019894443117778022,
      "loss": 1.632,
      "step": 366
    },
    {
      "epoch": 0.07523575235752357,
      "grad_norm": 0.7547385265793601,
      "learning_rate": 0.00019893478646077338,
      "loss": 1.6862,
      "step": 367
    },
    {
      "epoch": 0.07544075440754408,
      "grad_norm": 0.7988981756827476,
      "learning_rate": 0.0001989250981180218,
      "loss": 1.7207,
      "step": 368
    },
    {
      "epoch": 0.07564575645756458,
      "grad_norm": 0.8369643222520285,
      "learning_rate": 0.00019891536615379755,
      "loss": 1.6916,
      "step": 369
    },
    {
      "epoch": 0.07585075850758508,
      "grad_norm": 0.7968054048841254,
      "learning_rate": 0.00019890559057239205,
      "loss": 1.6586,
      "step": 370
    },
    {
      "epoch": 0.07605576055760557,
      "grad_norm": 0.7636168310737993,
      "learning_rate": 0.00019889577137811583,
      "loss": 1.6611,
      "step": 371
    },
    {
      "epoch": 0.07626076260762607,
      "grad_norm": 0.7681478549081471,
      "learning_rate": 0.00019888590857529875,
      "loss": 1.6902,
      "step": 372
    },
    {
      "epoch": 0.07646576465764658,
      "grad_norm": 0.7249456750008301,
      "learning_rate": 0.00019887600216828984,
      "loss": 1.6903,
      "step": 373
    },
    {
      "epoch": 0.07667076670766708,
      "grad_norm": 0.7452739123590649,
      "learning_rate": 0.00019886605216145738,
      "loss": 1.6806,
      "step": 374
    },
    {
      "epoch": 0.07687576875768758,
      "grad_norm": 0.868138692805327,
      "learning_rate": 0.00019885605855918885,
      "loss": 1.7422,
      "step": 375
    },
    {
      "epoch": 0.07708077080770807,
      "grad_norm": 0.8676648084050012,
      "learning_rate": 0.00019884602136589102,
      "loss": 1.6858,
      "step": 376
    },
    {
      "epoch": 0.07728577285772857,
      "grad_norm": 0.816884434773197,
      "learning_rate": 0.0001988359405859898,
      "loss": 1.7284,
      "step": 377
    },
    {
      "epoch": 0.07749077490774908,
      "grad_norm": 0.8149320377988114,
      "learning_rate": 0.00019882581622393034,
      "loss": 1.6996,
      "step": 378
    },
    {
      "epoch": 0.07769577695776958,
      "grad_norm": 0.7684164517583669,
      "learning_rate": 0.00019881564828417707,
      "loss": 1.6851,
      "step": 379
    },
    {
      "epoch": 0.07790077900779008,
      "grad_norm": 0.8709540920827276,
      "learning_rate": 0.00019880543677121358,
      "loss": 1.6891,
      "step": 380
    },
    {
      "epoch": 0.07810578105781057,
      "grad_norm": 0.7534280894079938,
      "learning_rate": 0.00019879518168954265,
      "loss": 1.6665,
      "step": 381
    },
    {
      "epoch": 0.07831078310783107,
      "grad_norm": 0.8437338486777195,
      "learning_rate": 0.00019878488304368635,
      "loss": 1.7022,
      "step": 382
    },
    {
      "epoch": 0.07851578515785158,
      "grad_norm": 0.8125361927980784,
      "learning_rate": 0.00019877454083818585,
      "loss": 1.7219,
      "step": 383
    },
    {
      "epoch": 0.07872078720787208,
      "grad_norm": 0.75216898844302,
      "learning_rate": 0.00019876415507760165,
      "loss": 1.6029,
      "step": 384
    },
    {
      "epoch": 0.07892578925789258,
      "grad_norm": 0.8817636028790754,
      "learning_rate": 0.00019875372576651337,
      "loss": 1.7671,
      "step": 385
    },
    {
      "epoch": 0.07913079130791308,
      "grad_norm": 0.8658881974299575,
      "learning_rate": 0.00019874325290951988,
      "loss": 1.7312,
      "step": 386
    },
    {
      "epoch": 0.07933579335793357,
      "grad_norm": 0.7908139405025414,
      "learning_rate": 0.00019873273651123925,
      "loss": 1.6827,
      "step": 387
    },
    {
      "epoch": 0.07954079540795408,
      "grad_norm": 0.8606154137481271,
      "learning_rate": 0.0001987221765763087,
      "loss": 1.7425,
      "step": 388
    },
    {
      "epoch": 0.07974579745797458,
      "grad_norm": 0.7930288347953397,
      "learning_rate": 0.00019871157310938467,
      "loss": 1.6605,
      "step": 389
    },
    {
      "epoch": 0.07995079950799508,
      "grad_norm": 0.8315190946529247,
      "learning_rate": 0.00019870092611514286,
      "loss": 1.697,
      "step": 390
    },
    {
      "epoch": 0.08015580155801558,
      "grad_norm": 0.7642307128697646,
      "learning_rate": 0.00019869023559827807,
      "loss": 1.667,
      "step": 391
    },
    {
      "epoch": 0.08036080360803607,
      "grad_norm": 0.8535484452935961,
      "learning_rate": 0.00019867950156350435,
      "loss": 1.6685,
      "step": 392
    },
    {
      "epoch": 0.08056580565805659,
      "grad_norm": 0.8219053835318966,
      "learning_rate": 0.00019866872401555488,
      "loss": 1.6292,
      "step": 393
    },
    {
      "epoch": 0.08077080770807708,
      "grad_norm": 0.7499777228628962,
      "learning_rate": 0.00019865790295918212,
      "loss": 1.7191,
      "step": 394
    },
    {
      "epoch": 0.08097580975809758,
      "grad_norm": 0.8495663893303287,
      "learning_rate": 0.00019864703839915767,
      "loss": 1.6794,
      "step": 395
    },
    {
      "epoch": 0.08118081180811808,
      "grad_norm": 0.7346984863887028,
      "learning_rate": 0.00019863613034027224,
      "loss": 1.6282,
      "step": 396
    },
    {
      "epoch": 0.08138581385813858,
      "grad_norm": 0.8050009341689788,
      "learning_rate": 0.00019862517878733586,
      "loss": 1.7637,
      "step": 397
    },
    {
      "epoch": 0.08159081590815909,
      "grad_norm": 0.7988611683570271,
      "learning_rate": 0.00019861418374517764,
      "loss": 1.7244,
      "step": 398
    },
    {
      "epoch": 0.08179581795817958,
      "grad_norm": 0.7577691801900096,
      "learning_rate": 0.0001986031452186459,
      "loss": 1.7389,
      "step": 399
    },
    {
      "epoch": 0.08200082000820008,
      "grad_norm": 0.7556795415346234,
      "learning_rate": 0.0001985920632126081,
      "loss": 1.6342,
      "step": 400
    },
    {
      "epoch": 0.08220582205822058,
      "grad_norm": 0.8482017309815681,
      "learning_rate": 0.0001985809377319509,
      "loss": 1.7032,
      "step": 401
    },
    {
      "epoch": 0.08241082410824108,
      "grad_norm": 0.787147249291876,
      "learning_rate": 0.00019856976878158023,
      "loss": 1.7228,
      "step": 402
    },
    {
      "epoch": 0.08261582615826159,
      "grad_norm": 0.8036672897169851,
      "learning_rate": 0.00019855855636642094,
      "loss": 1.6603,
      "step": 403
    },
    {
      "epoch": 0.08282082820828209,
      "grad_norm": 0.7641728093193555,
      "learning_rate": 0.00019854730049141732,
      "loss": 1.6314,
      "step": 404
    },
    {
      "epoch": 0.08302583025830258,
      "grad_norm": 0.7653906874680237,
      "learning_rate": 0.00019853600116153262,
      "loss": 1.72,
      "step": 405
    },
    {
      "epoch": 0.08323083230832308,
      "grad_norm": 0.7857851929745933,
      "learning_rate": 0.00019852465838174937,
      "loss": 1.6812,
      "step": 406
    },
    {
      "epoch": 0.08343583435834358,
      "grad_norm": 0.8000632649586302,
      "learning_rate": 0.0001985132721570692,
      "loss": 1.6784,
      "step": 407
    },
    {
      "epoch": 0.08364083640836409,
      "grad_norm": 0.7996010748331727,
      "learning_rate": 0.00019850184249251294,
      "loss": 1.7292,
      "step": 408
    },
    {
      "epoch": 0.08384583845838459,
      "grad_norm": 0.8035675536154948,
      "learning_rate": 0.00019849036939312056,
      "loss": 1.7375,
      "step": 409
    },
    {
      "epoch": 0.08405084050840508,
      "grad_norm": 0.8122035791807035,
      "learning_rate": 0.00019847885286395113,
      "loss": 1.6656,
      "step": 410
    },
    {
      "epoch": 0.08425584255842558,
      "grad_norm": 0.7621885251273879,
      "learning_rate": 0.00019846729291008293,
      "loss": 1.7645,
      "step": 411
    },
    {
      "epoch": 0.08446084460844608,
      "grad_norm": 0.7011620695678992,
      "learning_rate": 0.0001984556895366134,
      "loss": 1.6803,
      "step": 412
    },
    {
      "epoch": 0.08466584665846659,
      "grad_norm": 0.8582839410110316,
      "learning_rate": 0.0001984440427486591,
      "loss": 1.7871,
      "step": 413
    },
    {
      "epoch": 0.08487084870848709,
      "grad_norm": 0.8347710297475587,
      "learning_rate": 0.00019843235255135572,
      "loss": 1.7437,
      "step": 414
    },
    {
      "epoch": 0.08507585075850758,
      "grad_norm": 0.8325109206375484,
      "learning_rate": 0.00019842061894985807,
      "loss": 1.7213,
      "step": 415
    },
    {
      "epoch": 0.08528085280852808,
      "grad_norm": 0.8158999680605145,
      "learning_rate": 0.00019840884194934018,
      "loss": 1.7098,
      "step": 416
    },
    {
      "epoch": 0.08548585485854858,
      "grad_norm": 0.7540846343454949,
      "learning_rate": 0.0001983970215549952,
      "loss": 1.6895,
      "step": 417
    },
    {
      "epoch": 0.08569085690856909,
      "grad_norm": 0.7971443040047358,
      "learning_rate": 0.0001983851577720353,
      "loss": 1.6831,
      "step": 418
    },
    {
      "epoch": 0.08589585895858959,
      "grad_norm": 0.8281239117924155,
      "learning_rate": 0.00019837325060569197,
      "loss": 1.6004,
      "step": 419
    },
    {
      "epoch": 0.08610086100861009,
      "grad_norm": 0.7655996559675265,
      "learning_rate": 0.00019836130006121563,
      "loss": 1.6425,
      "step": 420
    },
    {
      "epoch": 0.08630586305863058,
      "grad_norm": 0.781090999746419,
      "learning_rate": 0.00019834930614387602,
      "loss": 1.6565,
      "step": 421
    },
    {
      "epoch": 0.08651086510865108,
      "grad_norm": 0.7906233152409404,
      "learning_rate": 0.00019833726885896185,
      "loss": 1.6829,
      "step": 422
    },
    {
      "epoch": 0.08671586715867159,
      "grad_norm": 0.8806550648859682,
      "learning_rate": 0.00019832518821178102,
      "loss": 1.6959,
      "step": 423
    },
    {
      "epoch": 0.08692086920869209,
      "grad_norm": 0.8347538964383344,
      "learning_rate": 0.00019831306420766057,
      "loss": 1.7088,
      "step": 424
    },
    {
      "epoch": 0.08712587125871259,
      "grad_norm": 0.7325629458284244,
      "learning_rate": 0.00019830089685194663,
      "loss": 1.6846,
      "step": 425
    },
    {
      "epoch": 0.08733087330873308,
      "grad_norm": 0.765571450743036,
      "learning_rate": 0.00019828868615000443,
      "loss": 1.6897,
      "step": 426
    },
    {
      "epoch": 0.08753587535875358,
      "grad_norm": 0.8480287539216526,
      "learning_rate": 0.00019827643210721838,
      "loss": 1.7078,
      "step": 427
    },
    {
      "epoch": 0.08774087740877409,
      "grad_norm": 0.7425628970025092,
      "learning_rate": 0.00019826413472899193,
      "loss": 1.633,
      "step": 428
    },
    {
      "epoch": 0.08794587945879459,
      "grad_norm": 0.7986674418566917,
      "learning_rate": 0.0001982517940207476,
      "loss": 1.7174,
      "step": 429
    },
    {
      "epoch": 0.08815088150881509,
      "grad_norm": 0.8721087815441965,
      "learning_rate": 0.00019823940998792722,
      "loss": 1.7366,
      "step": 430
    },
    {
      "epoch": 0.08835588355883559,
      "grad_norm": 0.7875399357031263,
      "learning_rate": 0.00019822698263599145,
      "loss": 1.5826,
      "step": 431
    },
    {
      "epoch": 0.08856088560885608,
      "grad_norm": 0.8162070818499303,
      "learning_rate": 0.00019821451197042026,
      "loss": 1.709,
      "step": 432
    },
    {
      "epoch": 0.0887658876588766,
      "grad_norm": 0.7635777512510616,
      "learning_rate": 0.00019820199799671265,
      "loss": 1.6264,
      "step": 433
    },
    {
      "epoch": 0.08897088970889709,
      "grad_norm": 0.8896320078667912,
      "learning_rate": 0.0001981894407203867,
      "loss": 1.6593,
      "step": 434
    },
    {
      "epoch": 0.08917589175891759,
      "grad_norm": 0.8711397890377418,
      "learning_rate": 0.0001981768401469796,
      "loss": 1.7198,
      "step": 435
    },
    {
      "epoch": 0.08938089380893809,
      "grad_norm": 0.8172766441468787,
      "learning_rate": 0.00019816419628204758,
      "loss": 1.7353,
      "step": 436
    },
    {
      "epoch": 0.08958589585895858,
      "grad_norm": 0.7827133814345119,
      "learning_rate": 0.00019815150913116608,
      "loss": 1.7884,
      "step": 437
    },
    {
      "epoch": 0.0897908979089791,
      "grad_norm": 0.8487907274850991,
      "learning_rate": 0.00019813877869992954,
      "loss": 1.7279,
      "step": 438
    },
    {
      "epoch": 0.08999589995899959,
      "grad_norm": 0.8387400254977331,
      "learning_rate": 0.0001981260049939515,
      "loss": 1.6778,
      "step": 439
    },
    {
      "epoch": 0.09020090200902009,
      "grad_norm": 0.8368595835848883,
      "learning_rate": 0.00019811318801886456,
      "loss": 1.714,
      "step": 440
    },
    {
      "epoch": 0.09040590405904059,
      "grad_norm": 0.8700540729481459,
      "learning_rate": 0.00019810032778032043,
      "loss": 1.7357,
      "step": 441
    },
    {
      "epoch": 0.09061090610906108,
      "grad_norm": 0.8994143848786682,
      "learning_rate": 0.00019808742428398994,
      "loss": 1.7405,
      "step": 442
    },
    {
      "epoch": 0.0908159081590816,
      "grad_norm": 0.7740404614686632,
      "learning_rate": 0.00019807447753556287,
      "loss": 1.7154,
      "step": 443
    },
    {
      "epoch": 0.0910209102091021,
      "grad_norm": 0.7928740791403559,
      "learning_rate": 0.0001980614875407482,
      "loss": 1.6624,
      "step": 444
    },
    {
      "epoch": 0.09122591225912259,
      "grad_norm": 0.7870107002717357,
      "learning_rate": 0.00019804845430527391,
      "loss": 1.708,
      "step": 445
    },
    {
      "epoch": 0.09143091430914309,
      "grad_norm": 0.7895707063600054,
      "learning_rate": 0.00019803537783488707,
      "loss": 1.7503,
      "step": 446
    },
    {
      "epoch": 0.09163591635916359,
      "grad_norm": 0.8489490388680045,
      "learning_rate": 0.0001980222581353538,
      "loss": 1.7634,
      "step": 447
    },
    {
      "epoch": 0.0918409184091841,
      "grad_norm": 0.8287261838512044,
      "learning_rate": 0.00019800909521245933,
      "loss": 1.6905,
      "step": 448
    },
    {
      "epoch": 0.0920459204592046,
      "grad_norm": 0.7462095172109476,
      "learning_rate": 0.00019799588907200782,
      "loss": 1.8165,
      "step": 449
    },
    {
      "epoch": 0.09225092250922509,
      "grad_norm": 0.7309335585458496,
      "learning_rate": 0.00019798263971982266,
      "loss": 1.6134,
      "step": 450
    },
    {
      "epoch": 0.09245592455924559,
      "grad_norm": 0.8290559074120037,
      "learning_rate": 0.0001979693471617462,
      "loss": 1.7024,
      "step": 451
    },
    {
      "epoch": 0.09266092660926609,
      "grad_norm": 0.7986537368313037,
      "learning_rate": 0.00019795601140363983,
      "loss": 1.7512,
      "step": 452
    },
    {
      "epoch": 0.0928659286592866,
      "grad_norm": 0.7763318066313323,
      "learning_rate": 0.00019794263245138404,
      "loss": 1.7358,
      "step": 453
    },
    {
      "epoch": 0.0930709307093071,
      "grad_norm": 0.8431382846534751,
      "learning_rate": 0.00019792921031087829,
      "loss": 1.6584,
      "step": 454
    },
    {
      "epoch": 0.09327593275932759,
      "grad_norm": 0.8732411541366695,
      "learning_rate": 0.0001979157449880412,
      "loss": 1.6535,
      "step": 455
    },
    {
      "epoch": 0.09348093480934809,
      "grad_norm": 0.7938674820974778,
      "learning_rate": 0.00019790223648881035,
      "loss": 1.6841,
      "step": 456
    },
    {
      "epoch": 0.09368593685936859,
      "grad_norm": 0.7838316116341935,
      "learning_rate": 0.00019788868481914233,
      "loss": 1.6705,
      "step": 457
    },
    {
      "epoch": 0.0938909389093891,
      "grad_norm": 0.8464944297811771,
      "learning_rate": 0.00019787508998501285,
      "loss": 1.6512,
      "step": 458
    },
    {
      "epoch": 0.0940959409594096,
      "grad_norm": 0.7274538896666675,
      "learning_rate": 0.00019786145199241658,
      "loss": 1.6973,
      "step": 459
    },
    {
      "epoch": 0.0943009430094301,
      "grad_norm": 0.8452629491526242,
      "learning_rate": 0.00019784777084736732,
      "loss": 1.7159,
      "step": 460
    },
    {
      "epoch": 0.09450594505945059,
      "grad_norm": 0.8259490676322622,
      "learning_rate": 0.00019783404655589776,
      "loss": 1.6919,
      "step": 461
    },
    {
      "epoch": 0.09471094710947109,
      "grad_norm": 0.7959521989923912,
      "learning_rate": 0.00019782027912405975,
      "loss": 1.7484,
      "step": 462
    },
    {
      "epoch": 0.0949159491594916,
      "grad_norm": 0.8355678732640808,
      "learning_rate": 0.00019780646855792404,
      "loss": 1.6646,
      "step": 463
    },
    {
      "epoch": 0.0951209512095121,
      "grad_norm": 0.8871398787753433,
      "learning_rate": 0.0001977926148635805,
      "loss": 1.7233,
      "step": 464
    },
    {
      "epoch": 0.0953259532595326,
      "grad_norm": 0.8427949928791173,
      "learning_rate": 0.000197778718047138,
      "loss": 1.7198,
      "step": 465
    },
    {
      "epoch": 0.09553095530955309,
      "grad_norm": 0.8112619766607961,
      "learning_rate": 0.00019776477811472436,
      "loss": 1.6703,
      "step": 466
    },
    {
      "epoch": 0.09573595735957359,
      "grad_norm": 0.7727413832903378,
      "learning_rate": 0.00019775079507248645,
      "loss": 1.6463,
      "step": 467
    },
    {
      "epoch": 0.0959409594095941,
      "grad_norm": 0.8525189308487066,
      "learning_rate": 0.00019773676892659024,
      "loss": 1.6884,
      "step": 468
    },
    {
      "epoch": 0.0961459614596146,
      "grad_norm": 0.7999931325392722,
      "learning_rate": 0.0001977226996832205,
      "loss": 1.6569,
      "step": 469
    },
    {
      "epoch": 0.0963509635096351,
      "grad_norm": 0.8190908180073811,
      "learning_rate": 0.00019770858734858126,
      "loss": 1.7679,
      "step": 470
    },
    {
      "epoch": 0.0965559655596556,
      "grad_norm": 0.7800499019875347,
      "learning_rate": 0.0001976944319288953,
      "loss": 1.624,
      "step": 471
    },
    {
      "epoch": 0.09676096760967609,
      "grad_norm": 0.7463117231647134,
      "learning_rate": 0.00019768023343040455,
      "loss": 1.6632,
      "step": 472
    },
    {
      "epoch": 0.0969659696596966,
      "grad_norm": 0.7659423912829482,
      "learning_rate": 0.00019766599185936997,
      "loss": 1.6501,
      "step": 473
    },
    {
      "epoch": 0.0971709717097171,
      "grad_norm": 0.8057912288118586,
      "learning_rate": 0.00019765170722207135,
      "loss": 1.7021,
      "step": 474
    },
    {
      "epoch": 0.0973759737597376,
      "grad_norm": 0.7579472265489474,
      "learning_rate": 0.00019763737952480762,
      "loss": 1.6855,
      "step": 475
    },
    {
      "epoch": 0.0975809758097581,
      "grad_norm": 0.795453799926076,
      "learning_rate": 0.00019762300877389666,
      "loss": 1.6781,
      "step": 476
    },
    {
      "epoch": 0.09778597785977859,
      "grad_norm": 0.7803483098398056,
      "learning_rate": 0.00019760859497567528,
      "loss": 1.6827,
      "step": 477
    },
    {
      "epoch": 0.0979909799097991,
      "grad_norm": 0.7606664546018259,
      "learning_rate": 0.00019759413813649933,
      "loss": 1.7018,
      "step": 478
    },
    {
      "epoch": 0.0981959819598196,
      "grad_norm": 0.8243364804331386,
      "learning_rate": 0.00019757963826274357,
      "loss": 1.6747,
      "step": 479
    },
    {
      "epoch": 0.0984009840098401,
      "grad_norm": 0.8193616865124249,
      "learning_rate": 0.00019756509536080185,
      "loss": 1.6041,
      "step": 480
    },
    {
      "epoch": 0.0986059860598606,
      "grad_norm": 0.7639841242141624,
      "learning_rate": 0.0001975505094370869,
      "loss": 1.694,
      "step": 481
    },
    {
      "epoch": 0.09881098810988109,
      "grad_norm": 0.8576414677090153,
      "learning_rate": 0.00019753588049803046,
      "loss": 1.734,
      "step": 482
    },
    {
      "epoch": 0.0990159901599016,
      "grad_norm": 0.8721771050311198,
      "learning_rate": 0.00019752120855008324,
      "loss": 1.647,
      "step": 483
    },
    {
      "epoch": 0.0992209922099221,
      "grad_norm": 0.8216809030017547,
      "learning_rate": 0.00019750649359971488,
      "loss": 1.676,
      "step": 484
    },
    {
      "epoch": 0.0994259942599426,
      "grad_norm": 0.741288134401714,
      "learning_rate": 0.000197491735653414,
      "loss": 1.6838,
      "step": 485
    },
    {
      "epoch": 0.0996309963099631,
      "grad_norm": 0.7470529216950845,
      "learning_rate": 0.00019747693471768818,
      "loss": 1.7296,
      "step": 486
    },
    {
      "epoch": 0.0998359983599836,
      "grad_norm": 0.7504402483351444,
      "learning_rate": 0.000197462090799064,
      "loss": 1.7273,
      "step": 487
    },
    {
      "epoch": 0.1000410004100041,
      "grad_norm": 0.7446405182211473,
      "learning_rate": 0.00019744720390408687,
      "loss": 1.668,
      "step": 488
    },
    {
      "epoch": 0.1002460024600246,
      "grad_norm": 0.7099938307259624,
      "learning_rate": 0.00019743227403932134,
      "loss": 1.7785,
      "step": 489
    },
    {
      "epoch": 0.1004510045100451,
      "grad_norm": 0.7746770926232007,
      "learning_rate": 0.00019741730121135075,
      "loss": 1.7409,
      "step": 490
    },
    {
      "epoch": 0.1006560065600656,
      "grad_norm": 0.8024043188757939,
      "learning_rate": 0.0001974022854267774,
      "loss": 1.6947,
      "step": 491
    },
    {
      "epoch": 0.1008610086100861,
      "grad_norm": 0.7006027936532472,
      "learning_rate": 0.00019738722669222268,
      "loss": 1.6709,
      "step": 492
    },
    {
      "epoch": 0.1010660106601066,
      "grad_norm": 0.7513617348789843,
      "learning_rate": 0.00019737212501432666,
      "loss": 1.742,
      "step": 493
    },
    {
      "epoch": 0.1012710127101271,
      "grad_norm": 0.7470340242865157,
      "learning_rate": 0.0001973569803997486,
      "loss": 1.6897,
      "step": 494
    },
    {
      "epoch": 0.1014760147601476,
      "grad_norm": 0.8020519910234862,
      "learning_rate": 0.00019734179285516655,
      "loss": 1.7027,
      "step": 495
    },
    {
      "epoch": 0.1016810168101681,
      "grad_norm": 0.7248567489397829,
      "learning_rate": 0.00019732656238727754,
      "loss": 1.6378,
      "step": 496
    },
    {
      "epoch": 0.1018860188601886,
      "grad_norm": 0.763222840367488,
      "learning_rate": 0.0001973112890027975,
      "loss": 1.6748,
      "step": 497
    },
    {
      "epoch": 0.10209102091020911,
      "grad_norm": 0.7729051614464107,
      "learning_rate": 0.00019729597270846133,
      "loss": 1.7187,
      "step": 498
    },
    {
      "epoch": 0.1022960229602296,
      "grad_norm": 0.7125702383653151,
      "learning_rate": 0.00019728061351102273,
      "loss": 1.6378,
      "step": 499
    },
    {
      "epoch": 0.1025010250102501,
      "grad_norm": 0.7155720204867984,
      "learning_rate": 0.00019726521141725454,
      "loss": 1.6643,
      "step": 500
    },
    {
      "epoch": 0.1027060270602706,
      "grad_norm": 0.7089598253301342,
      "learning_rate": 0.00019724976643394827,
      "loss": 1.6806,
      "step": 501
    },
    {
      "epoch": 0.1029110291102911,
      "grad_norm": 0.7082371126527723,
      "learning_rate": 0.00019723427856791452,
      "loss": 1.6904,
      "step": 502
    },
    {
      "epoch": 0.10311603116031161,
      "grad_norm": 0.7447287455120091,
      "learning_rate": 0.00019721874782598273,
      "loss": 1.6732,
      "step": 503
    },
    {
      "epoch": 0.1033210332103321,
      "grad_norm": 0.7286536803119806,
      "learning_rate": 0.00019720317421500122,
      "loss": 1.645,
      "step": 504
    },
    {
      "epoch": 0.1035260352603526,
      "grad_norm": 2.770519946109433,
      "learning_rate": 0.0001971875577418373,
      "loss": 1.7472,
      "step": 505
    },
    {
      "epoch": 0.1037310373103731,
      "grad_norm": 0.783805035999857,
      "learning_rate": 0.00019717189841337703,
      "loss": 1.7358,
      "step": 506
    },
    {
      "epoch": 0.1039360393603936,
      "grad_norm": 0.7637995545329411,
      "learning_rate": 0.00019715619623652554,
      "loss": 1.6203,
      "step": 507
    },
    {
      "epoch": 0.10414104141041411,
      "grad_norm": 0.6924701868056873,
      "learning_rate": 0.00019714045121820676,
      "loss": 1.6707,
      "step": 508
    },
    {
      "epoch": 0.1043460434604346,
      "grad_norm": 0.8118324325378106,
      "learning_rate": 0.00019712466336536353,
      "loss": 1.737,
      "step": 509
    },
    {
      "epoch": 0.1045510455104551,
      "grad_norm": 0.7902623460751316,
      "learning_rate": 0.0001971088326849576,
      "loss": 1.7185,
      "step": 510
    },
    {
      "epoch": 0.1047560475604756,
      "grad_norm": 0.7316929483305475,
      "learning_rate": 0.0001970929591839695,
      "loss": 1.6915,
      "step": 511
    },
    {
      "epoch": 0.1049610496104961,
      "grad_norm": 0.8508758741225556,
      "learning_rate": 0.0001970770428693988,
      "loss": 1.669,
      "step": 512
    },
    {
      "epoch": 0.10516605166051661,
      "grad_norm": 0.737216755724445,
      "learning_rate": 0.0001970610837482638,
      "loss": 1.6925,
      "step": 513
    },
    {
      "epoch": 0.10537105371053711,
      "grad_norm": 0.7490703230727511,
      "learning_rate": 0.00019704508182760185,
      "loss": 1.7485,
      "step": 514
    },
    {
      "epoch": 0.1055760557605576,
      "grad_norm": 0.7208283831084737,
      "learning_rate": 0.00019702903711446898,
      "loss": 1.6914,
      "step": 515
    },
    {
      "epoch": 0.1057810578105781,
      "grad_norm": 0.7705109170182252,
      "learning_rate": 0.0001970129496159402,
      "loss": 1.7231,
      "step": 516
    },
    {
      "epoch": 0.1059860598605986,
      "grad_norm": 0.7913632313341308,
      "learning_rate": 0.0001969968193391094,
      "loss": 1.6956,
      "step": 517
    },
    {
      "epoch": 0.10619106191061911,
      "grad_norm": 0.7524549480699058,
      "learning_rate": 0.00019698064629108928,
      "loss": 1.6691,
      "step": 518
    },
    {
      "epoch": 0.10639606396063961,
      "grad_norm": 0.8414835918561137,
      "learning_rate": 0.0001969644304790114,
      "loss": 1.6683,
      "step": 519
    },
    {
      "epoch": 0.1066010660106601,
      "grad_norm": 0.8179155243555126,
      "learning_rate": 0.0001969481719100262,
      "loss": 1.7008,
      "step": 520
    },
    {
      "epoch": 0.1068060680606806,
      "grad_norm": 0.7488520165577811,
      "learning_rate": 0.00019693187059130303,
      "loss": 1.7221,
      "step": 521
    },
    {
      "epoch": 0.1070110701107011,
      "grad_norm": 0.7098979253267371,
      "learning_rate": 0.00019691552653002992,
      "loss": 1.6796,
      "step": 522
    },
    {
      "epoch": 0.10721607216072161,
      "grad_norm": 0.7276703610950348,
      "learning_rate": 0.00019689913973341397,
      "loss": 1.6978,
      "step": 523
    },
    {
      "epoch": 0.10742107421074211,
      "grad_norm": 1.4401586354659643,
      "learning_rate": 0.00019688271020868093,
      "loss": 1.7208,
      "step": 524
    },
    {
      "epoch": 0.10762607626076261,
      "grad_norm": 0.7654467696451249,
      "learning_rate": 0.0001968662379630755,
      "loss": 1.7143,
      "step": 525
    },
    {
      "epoch": 0.1078310783107831,
      "grad_norm": 0.7932918174797919,
      "learning_rate": 0.0001968497230038612,
      "loss": 1.7658,
      "step": 526
    },
    {
      "epoch": 0.1080360803608036,
      "grad_norm": 0.7214865764464613,
      "learning_rate": 0.00019683316533832042,
      "loss": 1.7363,
      "step": 527
    },
    {
      "epoch": 0.10824108241082411,
      "grad_norm": 0.7710833669927666,
      "learning_rate": 0.00019681656497375424,
      "loss": 1.6911,
      "step": 528
    },
    {
      "epoch": 0.10844608446084461,
      "grad_norm": 2.082117028514879,
      "learning_rate": 0.00019679992191748275,
      "loss": 1.7145,
      "step": 529
    },
    {
      "epoch": 0.10865108651086511,
      "grad_norm": 1.5991309954490074,
      "learning_rate": 0.00019678323617684473,
      "loss": 1.7231,
      "step": 530
    },
    {
      "epoch": 0.1088560885608856,
      "grad_norm": 2.3948981536411247,
      "learning_rate": 0.00019676650775919788,
      "loss": 1.6851,
      "step": 531
    },
    {
      "epoch": 0.1090610906109061,
      "grad_norm": 3.4599222886447807,
      "learning_rate": 0.0001967497366719186,
      "loss": 1.8015,
      "step": 532
    },
    {
      "epoch": 0.10926609266092661,
      "grad_norm": 1.178961869403467,
      "learning_rate": 0.0001967329229224023,
      "loss": 1.6913,
      "step": 533
    },
    {
      "epoch": 0.10947109471094711,
      "grad_norm": 1.0419999568710827,
      "learning_rate": 0.0001967160665180629,
      "loss": 1.6393,
      "step": 534
    },
    {
      "epoch": 0.10967609676096761,
      "grad_norm": 0.8422855168488247,
      "learning_rate": 0.00019669916746633347,
      "loss": 1.7423,
      "step": 535
    },
    {
      "epoch": 0.1098810988109881,
      "grad_norm": 4.052938668367732,
      "learning_rate": 0.00019668222577466567,
      "loss": 1.7677,
      "step": 536
    },
    {
      "epoch": 0.1100861008610086,
      "grad_norm": 0.924623638056083,
      "learning_rate": 0.00019666524145053004,
      "loss": 1.6939,
      "step": 537
    },
    {
      "epoch": 0.11029110291102912,
      "grad_norm": 0.8238440764742335,
      "learning_rate": 0.0001966482145014158,
      "loss": 1.6461,
      "step": 538
    },
    {
      "epoch": 0.11049610496104961,
      "grad_norm": 0.8970963953123221,
      "learning_rate": 0.00019663114493483115,
      "loss": 1.6731,
      "step": 539
    },
    {
      "epoch": 0.11070110701107011,
      "grad_norm": 0.7865007821667908,
      "learning_rate": 0.00019661403275830297,
      "loss": 1.6787,
      "step": 540
    },
    {
      "epoch": 0.11090610906109061,
      "grad_norm": 0.8696028850679443,
      "learning_rate": 0.00019659687797937697,
      "loss": 1.7659,
      "step": 541
    },
    {
      "epoch": 0.1111111111111111,
      "grad_norm": 0.6906295733072493,
      "learning_rate": 0.00019657968060561758,
      "loss": 1.6525,
      "step": 542
    },
    {
      "epoch": 0.11131611316113162,
      "grad_norm": 0.7930376310943481,
      "learning_rate": 0.0001965624406446081,
      "loss": 1.6842,
      "step": 543
    },
    {
      "epoch": 0.11152111521115211,
      "grad_norm": 0.7544524600342143,
      "learning_rate": 0.00019654515810395057,
      "loss": 1.6905,
      "step": 544
    },
    {
      "epoch": 0.11172611726117261,
      "grad_norm": 0.7725631779594652,
      "learning_rate": 0.00019652783299126578,
      "loss": 1.632,
      "step": 545
    },
    {
      "epoch": 0.11193111931119311,
      "grad_norm": 0.7642907196358375,
      "learning_rate": 0.00019651046531419332,
      "loss": 1.8001,
      "step": 546
    },
    {
      "epoch": 0.1121361213612136,
      "grad_norm": 0.7457051675697578,
      "learning_rate": 0.00019649305508039159,
      "loss": 1.6379,
      "step": 547
    },
    {
      "epoch": 0.11234112341123412,
      "grad_norm": 0.7815886279365971,
      "learning_rate": 0.00019647560229753768,
      "loss": 1.752,
      "step": 548
    },
    {
      "epoch": 0.11254612546125461,
      "grad_norm": 0.7001482181012725,
      "learning_rate": 0.00019645810697332746,
      "loss": 1.6749,
      "step": 549
    },
    {
      "epoch": 0.11275112751127511,
      "grad_norm": 0.7021756980401541,
      "learning_rate": 0.0001964405691154756,
      "loss": 1.7534,
      "step": 550
    },
    {
      "epoch": 0.11295612956129561,
      "grad_norm": 0.7041752086506201,
      "learning_rate": 0.00019642298873171545,
      "loss": 1.7196,
      "step": 551
    },
    {
      "epoch": 0.11316113161131611,
      "grad_norm": 0.6641202881414029,
      "learning_rate": 0.00019640536582979923,
      "loss": 1.7212,
      "step": 552
    },
    {
      "epoch": 0.11336613366133662,
      "grad_norm": 0.8219760989551604,
      "learning_rate": 0.00019638770041749778,
      "loss": 1.7443,
      "step": 553
    },
    {
      "epoch": 0.11357113571135712,
      "grad_norm": 0.7446323188545191,
      "learning_rate": 0.0001963699925026008,
      "loss": 1.6992,
      "step": 554
    },
    {
      "epoch": 0.11377613776137761,
      "grad_norm": 0.7283067844851286,
      "learning_rate": 0.0001963522420929166,
      "loss": 1.6881,
      "step": 555
    },
    {
      "epoch": 0.11398113981139811,
      "grad_norm": 0.7547374240703352,
      "learning_rate": 0.0001963344491962724,
      "loss": 1.7041,
      "step": 556
    },
    {
      "epoch": 0.11418614186141861,
      "grad_norm": 0.8045748005183614,
      "learning_rate": 0.00019631661382051396,
      "loss": 1.6051,
      "step": 557
    },
    {
      "epoch": 0.11439114391143912,
      "grad_norm": 0.7879592335697111,
      "learning_rate": 0.00019629873597350596,
      "loss": 1.6769,
      "step": 558
    },
    {
      "epoch": 0.11459614596145962,
      "grad_norm": 0.8023384806228847,
      "learning_rate": 0.00019628081566313164,
      "loss": 1.6205,
      "step": 559
    },
    {
      "epoch": 0.11480114801148011,
      "grad_norm": 0.757443234258085,
      "learning_rate": 0.0001962628528972931,
      "loss": 1.6705,
      "step": 560
    },
    {
      "epoch": 0.11500615006150061,
      "grad_norm": 0.8172873659873222,
      "learning_rate": 0.00019624484768391106,
      "loss": 1.7583,
      "step": 561
    },
    {
      "epoch": 0.11521115211152111,
      "grad_norm": 0.7089928380076084,
      "learning_rate": 0.00019622680003092503,
      "loss": 1.7084,
      "step": 562
    },
    {
      "epoch": 0.11541615416154162,
      "grad_norm": 0.7292017391593942,
      "learning_rate": 0.0001962087099462932,
      "loss": 1.737,
      "step": 563
    },
    {
      "epoch": 0.11562115621156212,
      "grad_norm": 0.7595599987912868,
      "learning_rate": 0.0001961905774379925,
      "loss": 1.6998,
      "step": 564
    },
    {
      "epoch": 0.11582615826158262,
      "grad_norm": 0.7094092757884572,
      "learning_rate": 0.0001961724025140185,
      "loss": 1.688,
      "step": 565
    },
    {
      "epoch": 0.11603116031160311,
      "grad_norm": 0.762874147705992,
      "learning_rate": 0.00019615418518238552,
      "loss": 1.7624,
      "step": 566
    },
    {
      "epoch": 0.11623616236162361,
      "grad_norm": 0.8326950889290972,
      "learning_rate": 0.00019613592545112657,
      "loss": 1.7572,
      "step": 567
    },
    {
      "epoch": 0.11644116441164412,
      "grad_norm": 0.7155392474035641,
      "learning_rate": 0.0001961176233282934,
      "loss": 1.6575,
      "step": 568
    },
    {
      "epoch": 0.11664616646166462,
      "grad_norm": 0.7732444983449245,
      "learning_rate": 0.00019609927882195636,
      "loss": 1.722,
      "step": 569
    },
    {
      "epoch": 0.11685116851168512,
      "grad_norm": 0.8822083366753746,
      "learning_rate": 0.0001960808919402046,
      "loss": 1.7837,
      "step": 570
    },
    {
      "epoch": 0.11705617056170561,
      "grad_norm": 0.7530112763599104,
      "learning_rate": 0.0001960624626911459,
      "loss": 1.7197,
      "step": 571
    },
    {
      "epoch": 0.11726117261172611,
      "grad_norm": 0.7286752953774666,
      "learning_rate": 0.00019604399108290665,
      "loss": 1.6545,
      "step": 572
    },
    {
      "epoch": 0.11746617466174662,
      "grad_norm": 0.7897632469994833,
      "learning_rate": 0.00019602547712363203,
      "loss": 1.7346,
      "step": 573
    },
    {
      "epoch": 0.11767117671176712,
      "grad_norm": 0.7938108321618835,
      "learning_rate": 0.0001960069208214859,
      "loss": 1.7572,
      "step": 574
    },
    {
      "epoch": 0.11787617876178762,
      "grad_norm": 0.8020492204411614,
      "learning_rate": 0.0001959883221846507,
      "loss": 1.6684,
      "step": 575
    },
    {
      "epoch": 0.11808118081180811,
      "grad_norm": 0.8291826844922005,
      "learning_rate": 0.00019596968122132755,
      "loss": 1.7271,
      "step": 576
    },
    {
      "epoch": 0.11828618286182861,
      "grad_norm": 0.7087079948977139,
      "learning_rate": 0.00019595099793973635,
      "loss": 1.6655,
      "step": 577
    },
    {
      "epoch": 0.11849118491184912,
      "grad_norm": 0.819297167312269,
      "learning_rate": 0.0001959322723481155,
      "loss": 1.6903,
      "step": 578
    },
    {
      "epoch": 0.11869618696186962,
      "grad_norm": 0.8177083385090965,
      "learning_rate": 0.0001959135044547222,
      "loss": 1.7223,
      "step": 579
    },
    {
      "epoch": 0.11890118901189012,
      "grad_norm": 0.7209537141489044,
      "learning_rate": 0.0001958946942678322,
      "loss": 1.6286,
      "step": 580
    },
    {
      "epoch": 0.11910619106191062,
      "grad_norm": 0.7611356839436053,
      "learning_rate": 0.00019587584179573994,
      "loss": 1.7327,
      "step": 581
    },
    {
      "epoch": 0.11931119311193111,
      "grad_norm": 0.8254627704961581,
      "learning_rate": 0.0001958569470467585,
      "loss": 1.6685,
      "step": 582
    },
    {
      "epoch": 0.11951619516195162,
      "grad_norm": 0.7622126317435031,
      "learning_rate": 0.00019583801002921963,
      "loss": 1.6962,
      "step": 583
    },
    {
      "epoch": 0.11972119721197212,
      "grad_norm": 0.817331535155199,
      "learning_rate": 0.0001958190307514737,
      "loss": 1.7762,
      "step": 584
    },
    {
      "epoch": 0.11992619926199262,
      "grad_norm": 0.7629200159897975,
      "learning_rate": 0.00019580000922188965,
      "loss": 1.6366,
      "step": 585
    },
    {
      "epoch": 0.12013120131201312,
      "grad_norm": 0.7079526051805777,
      "learning_rate": 0.00019578094544885516,
      "loss": 1.634,
      "step": 586
    },
    {
      "epoch": 0.12033620336203361,
      "grad_norm": 0.7599249140746483,
      "learning_rate": 0.0001957618394407765,
      "loss": 1.7381,
      "step": 587
    },
    {
      "epoch": 0.12054120541205413,
      "grad_norm": 0.82356941117184,
      "learning_rate": 0.0001957426912060785,
      "loss": 1.765,
      "step": 588
    },
    {
      "epoch": 0.12074620746207462,
      "grad_norm": 0.6894290046436214,
      "learning_rate": 0.00019572350075320469,
      "loss": 1.6965,
      "step": 589
    },
    {
      "epoch": 0.12095120951209512,
      "grad_norm": 0.6762523177558277,
      "learning_rate": 0.0001957042680906172,
      "loss": 1.6276,
      "step": 590
    },
    {
      "epoch": 0.12115621156211562,
      "grad_norm": 0.8909921375408455,
      "learning_rate": 0.00019568499322679674,
      "loss": 1.7439,
      "step": 591
    },
    {
      "epoch": 0.12136121361213612,
      "grad_norm": 0.7602235580506228,
      "learning_rate": 0.00019566567617024263,
      "loss": 1.674,
      "step": 592
    },
    {
      "epoch": 0.12156621566215663,
      "grad_norm": 0.7242445798680968,
      "learning_rate": 0.00019564631692947288,
      "loss": 1.7531,
      "step": 593
    },
    {
      "epoch": 0.12177121771217712,
      "grad_norm": 0.8480253501470826,
      "learning_rate": 0.00019562691551302397,
      "loss": 1.7475,
      "step": 594
    },
    {
      "epoch": 0.12197621976219762,
      "grad_norm": 0.7729121548165052,
      "learning_rate": 0.00019560747192945107,
      "loss": 1.7089,
      "step": 595
    },
    {
      "epoch": 0.12218122181221812,
      "grad_norm": 0.7076690388262546,
      "learning_rate": 0.00019558798618732792,
      "loss": 1.6762,
      "step": 596
    },
    {
      "epoch": 0.12238622386223862,
      "grad_norm": 0.7922373701126969,
      "learning_rate": 0.00019556845829524683,
      "loss": 1.7165,
      "step": 597
    },
    {
      "epoch": 0.12259122591225913,
      "grad_norm": 0.7028133998075322,
      "learning_rate": 0.00019554888826181873,
      "loss": 1.6457,
      "step": 598
    },
    {
      "epoch": 0.12279622796227962,
      "grad_norm": 0.6977790685235766,
      "learning_rate": 0.0001955292760956731,
      "loss": 1.6802,
      "step": 599
    },
    {
      "epoch": 0.12300123001230012,
      "grad_norm": 0.7493226703610272,
      "learning_rate": 0.00019550962180545808,
      "loss": 1.6978,
      "step": 600
    },
    {
      "epoch": 0.12320623206232062,
      "grad_norm": 0.8600862131180389,
      "learning_rate": 0.00019548992539984022,
      "loss": 1.7207,
      "step": 601
    },
    {
      "epoch": 0.12341123411234112,
      "grad_norm": 0.7623787496255919,
      "learning_rate": 0.00019547018688750476,
      "loss": 1.6811,
      "step": 602
    },
    {
      "epoch": 0.12361623616236163,
      "grad_norm": 0.7632695969304624,
      "learning_rate": 0.0001954504062771555,
      "loss": 1.671,
      "step": 603
    },
    {
      "epoch": 0.12382123821238213,
      "grad_norm": 0.6927444238734469,
      "learning_rate": 0.00019543058357751483,
      "loss": 1.6437,
      "step": 604
    },
    {
      "epoch": 0.12402624026240262,
      "grad_norm": 0.7282969189338665,
      "learning_rate": 0.00019541071879732367,
      "loss": 1.6515,
      "step": 605
    },
    {
      "epoch": 0.12423124231242312,
      "grad_norm": 0.7525430459845069,
      "learning_rate": 0.0001953908119453414,
      "loss": 1.6612,
      "step": 606
    },
    {
      "epoch": 0.12443624436244362,
      "grad_norm": 0.7143221822535928,
      "learning_rate": 0.00019537086303034608,
      "loss": 1.7161,
      "step": 607
    },
    {
      "epoch": 0.12464124641246413,
      "grad_norm": 0.7008301427591075,
      "learning_rate": 0.00019535087206113427,
      "loss": 1.6549,
      "step": 608
    },
    {
      "epoch": 0.12484624846248463,
      "grad_norm": 0.771609790409738,
      "learning_rate": 0.0001953308390465211,
      "loss": 1.671,
      "step": 609
    },
    {
      "epoch": 0.12505125051250512,
      "grad_norm": 0.7008284466642101,
      "learning_rate": 0.00019531076399534022,
      "loss": 1.7197,
      "step": 610
    },
    {
      "epoch": 0.12525625256252562,
      "grad_norm": 0.7175186999596657,
      "learning_rate": 0.00019529064691644376,
      "loss": 1.6728,
      "step": 611
    },
    {
      "epoch": 0.12546125461254612,
      "grad_norm": 0.8027127988740639,
      "learning_rate": 0.00019527048781870247,
      "loss": 1.7976,
      "step": 612
    },
    {
      "epoch": 0.12566625666256662,
      "grad_norm": 0.6970642714660732,
      "learning_rate": 0.00019525028671100566,
      "loss": 1.73,
      "step": 613
    },
    {
      "epoch": 0.12587125871258711,
      "grad_norm": 0.7145094642434252,
      "learning_rate": 0.000195230043602261,
      "loss": 1.7257,
      "step": 614
    },
    {
      "epoch": 0.12607626076260764,
      "grad_norm": 0.6747921321796597,
      "learning_rate": 0.0001952097585013948,
      "loss": 1.7103,
      "step": 615
    },
    {
      "epoch": 0.12628126281262814,
      "grad_norm": 0.6894592468090761,
      "learning_rate": 0.00019518943141735195,
      "loss": 1.6761,
      "step": 616
    },
    {
      "epoch": 0.12648626486264863,
      "grad_norm": 0.7051256329334021,
      "learning_rate": 0.0001951690623590957,
      "loss": 1.7204,
      "step": 617
    },
    {
      "epoch": 0.12669126691266913,
      "grad_norm": 0.6235098069211322,
      "learning_rate": 0.0001951486513356079,
      "loss": 1.7023,
      "step": 618
    },
    {
      "epoch": 0.12689626896268963,
      "grad_norm": 0.7556495063175042,
      "learning_rate": 0.00019512819835588885,
      "loss": 1.7459,
      "step": 619
    },
    {
      "epoch": 0.12710127101271013,
      "grad_norm": 0.7032330727817956,
      "learning_rate": 0.00019510770342895742,
      "loss": 1.7094,
      "step": 620
    },
    {
      "epoch": 0.12730627306273062,
      "grad_norm": 0.6963801753782002,
      "learning_rate": 0.0001950871665638509,
      "loss": 1.6947,
      "step": 621
    },
    {
      "epoch": 0.12751127511275112,
      "grad_norm": 0.7427477546158143,
      "learning_rate": 0.0001950665877696252,
      "loss": 1.6702,
      "step": 622
    },
    {
      "epoch": 0.12771627716277162,
      "grad_norm": 0.7175560457683797,
      "learning_rate": 0.00019504596705535455,
      "loss": 1.733,
      "step": 623
    },
    {
      "epoch": 0.12792127921279212,
      "grad_norm": 0.6382014645533433,
      "learning_rate": 0.00019502530443013178,
      "loss": 1.6044,
      "step": 624
    },
    {
      "epoch": 0.12812628126281264,
      "grad_norm": 0.6931547392933766,
      "learning_rate": 0.00019500459990306817,
      "loss": 1.6547,
      "step": 625
    },
    {
      "epoch": 0.12833128331283314,
      "grad_norm": 0.8131052747319877,
      "learning_rate": 0.00019498385348329348,
      "loss": 1.7869,
      "step": 626
    },
    {
      "epoch": 0.12853628536285364,
      "grad_norm": 0.7495456567167106,
      "learning_rate": 0.00019496306517995587,
      "loss": 1.7483,
      "step": 627
    },
    {
      "epoch": 0.12874128741287413,
      "grad_norm": 0.7062520390299838,
      "learning_rate": 0.00019494223500222217,
      "loss": 1.6604,
      "step": 628
    },
    {
      "epoch": 0.12894628946289463,
      "grad_norm": 0.7931400753068338,
      "learning_rate": 0.00019492136295927743,
      "loss": 1.7362,
      "step": 629
    },
    {
      "epoch": 0.12915129151291513,
      "grad_norm": 0.6875574892712922,
      "learning_rate": 0.00019490044906032532,
      "loss": 1.6809,
      "step": 630
    },
    {
      "epoch": 0.12935629356293563,
      "grad_norm": 0.7111801502584136,
      "learning_rate": 0.0001948794933145879,
      "loss": 1.6546,
      "step": 631
    },
    {
      "epoch": 0.12956129561295612,
      "grad_norm": 0.7613366380444706,
      "learning_rate": 0.00019485849573130573,
      "loss": 1.7999,
      "step": 632
    },
    {
      "epoch": 0.12976629766297662,
      "grad_norm": 0.662224975718407,
      "learning_rate": 0.00019483745631973775,
      "loss": 1.6292,
      "step": 633
    },
    {
      "epoch": 0.12997129971299712,
      "grad_norm": 0.7246334231070312,
      "learning_rate": 0.0001948163750891614,
      "loss": 1.6925,
      "step": 634
    },
    {
      "epoch": 0.13017630176301764,
      "grad_norm": 0.6538882243932387,
      "learning_rate": 0.0001947952520488726,
      "loss": 1.659,
      "step": 635
    },
    {
      "epoch": 0.13038130381303814,
      "grad_norm": 0.6964780144077147,
      "learning_rate": 0.00019477408720818554,
      "loss": 1.7207,
      "step": 636
    },
    {
      "epoch": 0.13058630586305864,
      "grad_norm": 0.9548657397064115,
      "learning_rate": 0.00019475288057643303,
      "loss": 1.678,
      "step": 637
    },
    {
      "epoch": 0.13079130791307914,
      "grad_norm": 0.6384891194382379,
      "learning_rate": 0.00019473163216296625,
      "loss": 1.6983,
      "step": 638
    },
    {
      "epoch": 0.13099630996309963,
      "grad_norm": 0.7488985765381595,
      "learning_rate": 0.0001947103419771547,
      "loss": 1.6761,
      "step": 639
    },
    {
      "epoch": 0.13120131201312013,
      "grad_norm": 0.6866424805132988,
      "learning_rate": 0.00019468901002838644,
      "loss": 1.7165,
      "step": 640
    },
    {
      "epoch": 0.13140631406314063,
      "grad_norm": 0.763162900878345,
      "learning_rate": 0.0001946676363260679,
      "loss": 1.7092,
      "step": 641
    },
    {
      "epoch": 0.13161131611316113,
      "grad_norm": 0.6962827620119562,
      "learning_rate": 0.0001946462208796239,
      "loss": 1.6458,
      "step": 642
    },
    {
      "epoch": 0.13181631816318162,
      "grad_norm": 0.8119141383482595,
      "learning_rate": 0.00019462476369849766,
      "loss": 1.689,
      "step": 643
    },
    {
      "epoch": 0.13202132021320212,
      "grad_norm": 0.6672873571751815,
      "learning_rate": 0.00019460326479215083,
      "loss": 1.6127,
      "step": 644
    },
    {
      "epoch": 0.13222632226322265,
      "grad_norm": 0.7079483424952373,
      "learning_rate": 0.00019458172417006347,
      "loss": 1.7251,
      "step": 645
    },
    {
      "epoch": 0.13243132431324314,
      "grad_norm": 0.7584391354266112,
      "learning_rate": 0.00019456014184173398,
      "loss": 1.6707,
      "step": 646
    },
    {
      "epoch": 0.13263632636326364,
      "grad_norm": 0.716327893186426,
      "learning_rate": 0.00019453851781667925,
      "loss": 1.6356,
      "step": 647
    },
    {
      "epoch": 0.13284132841328414,
      "grad_norm": 0.6541907454010327,
      "learning_rate": 0.00019451685210443442,
      "loss": 1.7062,
      "step": 648
    },
    {
      "epoch": 0.13304633046330464,
      "grad_norm": 0.7137558788176999,
      "learning_rate": 0.00019449514471455313,
      "loss": 1.6513,
      "step": 649
    },
    {
      "epoch": 0.13325133251332513,
      "grad_norm": 0.7223891571495126,
      "learning_rate": 0.00019447339565660732,
      "loss": 1.7102,
      "step": 650
    },
    {
      "epoch": 0.13345633456334563,
      "grad_norm": 0.735645043490125,
      "learning_rate": 0.00019445160494018735,
      "loss": 1.7034,
      "step": 651
    },
    {
      "epoch": 0.13366133661336613,
      "grad_norm": 0.7096162370659085,
      "learning_rate": 0.00019442977257490193,
      "loss": 1.7026,
      "step": 652
    },
    {
      "epoch": 0.13386633866338662,
      "grad_norm": 0.7283081633487957,
      "learning_rate": 0.0001944078985703782,
      "loss": 1.7265,
      "step": 653
    },
    {
      "epoch": 0.13407134071340712,
      "grad_norm": 0.7076346579111047,
      "learning_rate": 0.00019438598293626148,
      "loss": 1.6433,
      "step": 654
    },
    {
      "epoch": 0.13427634276342765,
      "grad_norm": 0.8176454103089839,
      "learning_rate": 0.0001943640256822157,
      "loss": 1.7193,
      "step": 655
    },
    {
      "epoch": 0.13448134481344814,
      "grad_norm": 0.7026108842601033,
      "learning_rate": 0.00019434202681792293,
      "loss": 1.6104,
      "step": 656
    },
    {
      "epoch": 0.13468634686346864,
      "grad_norm": 0.685796468425738,
      "learning_rate": 0.00019431998635308372,
      "loss": 1.5332,
      "step": 657
    },
    {
      "epoch": 0.13489134891348914,
      "grad_norm": 0.7127802289338453,
      "learning_rate": 0.0001942979042974168,
      "loss": 1.6274,
      "step": 658
    },
    {
      "epoch": 0.13509635096350964,
      "grad_norm": 0.717598671490414,
      "learning_rate": 0.00019427578066065954,
      "loss": 1.7171,
      "step": 659
    },
    {
      "epoch": 0.13530135301353013,
      "grad_norm": 0.6912432544716882,
      "learning_rate": 0.00019425361545256727,
      "loss": 1.764,
      "step": 660
    },
    {
      "epoch": 0.13550635506355063,
      "grad_norm": 0.8142900011304077,
      "learning_rate": 0.00019423140868291396,
      "loss": 1.6733,
      "step": 661
    },
    {
      "epoch": 0.13571135711357113,
      "grad_norm": 0.6982401576159545,
      "learning_rate": 0.00019420916036149178,
      "loss": 1.7152,
      "step": 662
    },
    {
      "epoch": 0.13591635916359163,
      "grad_norm": 0.6730554983695302,
      "learning_rate": 0.00019418687049811115,
      "loss": 1.6932,
      "step": 663
    },
    {
      "epoch": 0.13612136121361212,
      "grad_norm": 0.7400434973007438,
      "learning_rate": 0.00019416453910260097,
      "loss": 1.7901,
      "step": 664
    },
    {
      "epoch": 0.13632636326363265,
      "grad_norm": 0.7113339420294928,
      "learning_rate": 0.0001941421661848083,
      "loss": 1.6646,
      "step": 665
    },
    {
      "epoch": 0.13653136531365315,
      "grad_norm": 0.6665150137351554,
      "learning_rate": 0.00019411975175459865,
      "loss": 1.6461,
      "step": 666
    },
    {
      "epoch": 0.13673636736367364,
      "grad_norm": 0.7453441081986116,
      "learning_rate": 0.00019409729582185574,
      "loss": 1.6918,
      "step": 667
    },
    {
      "epoch": 0.13694136941369414,
      "grad_norm": 0.7024419398270235,
      "learning_rate": 0.0001940747983964816,
      "loss": 1.6069,
      "step": 668
    },
    {
      "epoch": 0.13714637146371464,
      "grad_norm": 0.6826807669155491,
      "learning_rate": 0.00019405225948839657,
      "loss": 1.694,
      "step": 669
    },
    {
      "epoch": 0.13735137351373514,
      "grad_norm": 0.6680462216322685,
      "learning_rate": 0.0001940296791075393,
      "loss": 1.6868,
      "step": 670
    },
    {
      "epoch": 0.13755637556375563,
      "grad_norm": 0.8221542322696126,
      "learning_rate": 0.0001940070572638667,
      "loss": 1.6666,
      "step": 671
    },
    {
      "epoch": 0.13776137761377613,
      "grad_norm": 0.6739792335836811,
      "learning_rate": 0.00019398439396735398,
      "loss": 1.7057,
      "step": 672
    },
    {
      "epoch": 0.13796637966379663,
      "grad_norm": 0.6915752955334832,
      "learning_rate": 0.00019396168922799462,
      "loss": 1.6484,
      "step": 673
    },
    {
      "epoch": 0.13817138171381713,
      "grad_norm": 0.7598970632238722,
      "learning_rate": 0.00019393894305580041,
      "loss": 1.6509,
      "step": 674
    },
    {
      "epoch": 0.13837638376383765,
      "grad_norm": 0.6784006628222828,
      "learning_rate": 0.00019391615546080133,
      "loss": 1.6495,
      "step": 675
    },
    {
      "epoch": 0.13858138581385815,
      "grad_norm": 0.6894897436652209,
      "learning_rate": 0.0001938933264530457,
      "loss": 1.6764,
      "step": 676
    },
    {
      "epoch": 0.13878638786387865,
      "grad_norm": 0.7857036837790523,
      "learning_rate": 0.00019387045604260007,
      "loss": 1.6664,
      "step": 677
    },
    {
      "epoch": 0.13899138991389914,
      "grad_norm": 0.6818001039808111,
      "learning_rate": 0.00019384754423954926,
      "loss": 1.6526,
      "step": 678
    },
    {
      "epoch": 0.13919639196391964,
      "grad_norm": 0.7081724548685108,
      "learning_rate": 0.00019382459105399632,
      "loss": 1.6996,
      "step": 679
    },
    {
      "epoch": 0.13940139401394014,
      "grad_norm": 0.7140795694108882,
      "learning_rate": 0.00019380159649606257,
      "loss": 1.7172,
      "step": 680
    },
    {
      "epoch": 0.13960639606396064,
      "grad_norm": 0.7080091135319612,
      "learning_rate": 0.00019377856057588755,
      "loss": 1.6917,
      "step": 681
    },
    {
      "epoch": 0.13981139811398113,
      "grad_norm": 0.6761647836996737,
      "learning_rate": 0.00019375548330362907,
      "loss": 1.7723,
      "step": 682
    },
    {
      "epoch": 0.14001640016400163,
      "grad_norm": 0.7143847128372665,
      "learning_rate": 0.00019373236468946318,
      "loss": 1.6908,
      "step": 683
    },
    {
      "epoch": 0.14022140221402213,
      "grad_norm": 0.7447363936876037,
      "learning_rate": 0.0001937092047435841,
      "loss": 1.6374,
      "step": 684
    },
    {
      "epoch": 0.14042640426404265,
      "grad_norm": 0.6796516121340412,
      "learning_rate": 0.00019368600347620428,
      "loss": 1.6395,
      "step": 685
    },
    {
      "epoch": 0.14063140631406315,
      "grad_norm": 0.6497045167602054,
      "learning_rate": 0.00019366276089755453,
      "loss": 1.6955,
      "step": 686
    },
    {
      "epoch": 0.14083640836408365,
      "grad_norm": 0.730287449803201,
      "learning_rate": 0.00019363947701788372,
      "loss": 1.8079,
      "step": 687
    },
    {
      "epoch": 0.14104141041410415,
      "grad_norm": 0.8091140477761806,
      "learning_rate": 0.00019361615184745895,
      "loss": 1.6728,
      "step": 688
    },
    {
      "epoch": 0.14124641246412464,
      "grad_norm": 0.6323788011111765,
      "learning_rate": 0.00019359278539656557,
      "loss": 1.646,
      "step": 689
    },
    {
      "epoch": 0.14145141451414514,
      "grad_norm": 0.7258790043908935,
      "learning_rate": 0.00019356937767550715,
      "loss": 1.7093,
      "step": 690
    },
    {
      "epoch": 0.14165641656416564,
      "grad_norm": 0.6949124425464823,
      "learning_rate": 0.00019354592869460545,
      "loss": 1.6549,
      "step": 691
    },
    {
      "epoch": 0.14186141861418614,
      "grad_norm": 0.7000728118326219,
      "learning_rate": 0.00019352243846420034,
      "loss": 1.7468,
      "step": 692
    },
    {
      "epoch": 0.14206642066420663,
      "grad_norm": 0.7011285591169198,
      "learning_rate": 0.00019349890699464997,
      "loss": 1.6919,
      "step": 693
    },
    {
      "epoch": 0.14227142271422713,
      "grad_norm": 0.668775875766485,
      "learning_rate": 0.0001934753342963307,
      "loss": 1.69,
      "step": 694
    },
    {
      "epoch": 0.14247642476424766,
      "grad_norm": 0.7273460222566939,
      "learning_rate": 0.0001934517203796369,
      "loss": 1.6517,
      "step": 695
    },
    {
      "epoch": 0.14268142681426815,
      "grad_norm": 0.7469698129504518,
      "learning_rate": 0.0001934280652549814,
      "loss": 1.7333,
      "step": 696
    },
    {
      "epoch": 0.14288642886428865,
      "grad_norm": 0.5847816123055689,
      "learning_rate": 0.0001934043689327949,
      "loss": 1.7141,
      "step": 697
    },
    {
      "epoch": 0.14309143091430915,
      "grad_norm": 0.7884207755015578,
      "learning_rate": 0.00019338063142352644,
      "loss": 1.7298,
      "step": 698
    },
    {
      "epoch": 0.14329643296432965,
      "grad_norm": 0.8044078385037076,
      "learning_rate": 0.00019335685273764322,
      "loss": 1.7462,
      "step": 699
    },
    {
      "epoch": 0.14350143501435014,
      "grad_norm": 0.6380879417186065,
      "learning_rate": 0.0001933330328856305,
      "loss": 1.6637,
      "step": 700
    },
    {
      "epoch": 0.14370643706437064,
      "grad_norm": 0.7217261357612341,
      "learning_rate": 0.0001933091718779918,
      "loss": 1.7333,
      "step": 701
    },
    {
      "epoch": 0.14391143911439114,
      "grad_norm": 0.8240663565497209,
      "learning_rate": 0.0001932852697252487,
      "loss": 1.608,
      "step": 702
    },
    {
      "epoch": 0.14411644116441163,
      "grad_norm": 0.6671549962916544,
      "learning_rate": 0.000193261326437941,
      "loss": 1.7626,
      "step": 703
    },
    {
      "epoch": 0.14432144321443213,
      "grad_norm": 0.7294815007273447,
      "learning_rate": 0.0001932373420266266,
      "loss": 1.6816,
      "step": 704
    },
    {
      "epoch": 0.14452644526445266,
      "grad_norm": 0.7136490706611349,
      "learning_rate": 0.0001932133165018815,
      "loss": 1.7224,
      "step": 705
    },
    {
      "epoch": 0.14473144731447316,
      "grad_norm": 0.6768368867330093,
      "learning_rate": 0.0001931892498742999,
      "loss": 1.7045,
      "step": 706
    },
    {
      "epoch": 0.14493644936449365,
      "grad_norm": 0.6901595801919572,
      "learning_rate": 0.00019316514215449404,
      "loss": 1.69,
      "step": 707
    },
    {
      "epoch": 0.14514145141451415,
      "grad_norm": 0.7379399680837766,
      "learning_rate": 0.0001931409933530944,
      "loss": 1.7071,
      "step": 708
    },
    {
      "epoch": 0.14534645346453465,
      "grad_norm": 0.6950229857578474,
      "learning_rate": 0.00019311680348074945,
      "loss": 1.6706,
      "step": 709
    },
    {
      "epoch": 0.14555145551455514,
      "grad_norm": 1.4239867677236318,
      "learning_rate": 0.00019309257254812584,
      "loss": 1.6475,
      "step": 710
    },
    {
      "epoch": 0.14575645756457564,
      "grad_norm": 0.7853253803550803,
      "learning_rate": 0.00019306830056590833,
      "loss": 1.7422,
      "step": 711
    },
    {
      "epoch": 0.14596145961459614,
      "grad_norm": 0.7912920302745574,
      "learning_rate": 0.00019304398754479976,
      "loss": 1.6676,
      "step": 712
    },
    {
      "epoch": 0.14616646166461664,
      "grad_norm": 0.7559316805604154,
      "learning_rate": 0.000193019633495521,
      "loss": 1.705,
      "step": 713
    },
    {
      "epoch": 0.14637146371463713,
      "grad_norm": 0.7515552055905841,
      "learning_rate": 0.00019299523842881118,
      "loss": 1.6541,
      "step": 714
    },
    {
      "epoch": 0.14657646576465766,
      "grad_norm": 0.7718033361975094,
      "learning_rate": 0.00019297080235542731,
      "loss": 1.6959,
      "step": 715
    },
    {
      "epoch": 0.14678146781467816,
      "grad_norm": 0.7365498016658,
      "learning_rate": 0.0001929463252861447,
      "loss": 1.6972,
      "step": 716
    },
    {
      "epoch": 0.14698646986469865,
      "grad_norm": 0.6738686695891294,
      "learning_rate": 0.00019292180723175654,
      "loss": 1.675,
      "step": 717
    },
    {
      "epoch": 0.14719147191471915,
      "grad_norm": 0.7561118156387349,
      "learning_rate": 0.0001928972482030742,
      "loss": 1.7018,
      "step": 718
    },
    {
      "epoch": 0.14739647396473965,
      "grad_norm": 0.7795391791753485,
      "learning_rate": 0.0001928726482109271,
      "loss": 1.7041,
      "step": 719
    },
    {
      "epoch": 0.14760147601476015,
      "grad_norm": 0.6935708735515262,
      "learning_rate": 0.00019284800726616274,
      "loss": 1.6432,
      "step": 720
    },
    {
      "epoch": 0.14780647806478064,
      "grad_norm": 0.7664408672918853,
      "learning_rate": 0.00019282332537964663,
      "loss": 1.7302,
      "step": 721
    },
    {
      "epoch": 0.14801148011480114,
      "grad_norm": 0.7477278209068755,
      "learning_rate": 0.0001927986025622624,
      "loss": 1.6377,
      "step": 722
    },
    {
      "epoch": 0.14821648216482164,
      "grad_norm": 0.7276436261402113,
      "learning_rate": 0.0001927738388249116,
      "loss": 1.6981,
      "step": 723
    },
    {
      "epoch": 0.14842148421484214,
      "grad_norm": 0.8276994419711222,
      "learning_rate": 0.000192749034178514,
      "loss": 1.7433,
      "step": 724
    },
    {
      "epoch": 0.14862648626486266,
      "grad_norm": 0.749452837493542,
      "learning_rate": 0.00019272418863400728,
      "loss": 1.6612,
      "step": 725
    },
    {
      "epoch": 0.14883148831488316,
      "grad_norm": 0.7116894702731099,
      "learning_rate": 0.0001926993022023472,
      "loss": 1.7382,
      "step": 726
    },
    {
      "epoch": 0.14903649036490366,
      "grad_norm": 0.8298799172038066,
      "learning_rate": 0.0001926743748945076,
      "loss": 1.6468,
      "step": 727
    },
    {
      "epoch": 0.14924149241492415,
      "grad_norm": 0.7418561397260032,
      "learning_rate": 0.00019264940672148018,
      "loss": 1.6234,
      "step": 728
    },
    {
      "epoch": 0.14944649446494465,
      "grad_norm": 0.691296953981057,
      "learning_rate": 0.00019262439769427488,
      "loss": 1.6744,
      "step": 729
    },
    {
      "epoch": 0.14965149651496515,
      "grad_norm": 0.7064986352931485,
      "learning_rate": 0.00019259934782391946,
      "loss": 1.7109,
      "step": 730
    },
    {
      "epoch": 0.14985649856498565,
      "grad_norm": 0.7862650639141375,
      "learning_rate": 0.00019257425712145986,
      "loss": 1.6965,
      "step": 731
    },
    {
      "epoch": 0.15006150061500614,
      "grad_norm": 0.7759906572416613,
      "learning_rate": 0.00019254912559795982,
      "loss": 1.7832,
      "step": 732
    },
    {
      "epoch": 0.15026650266502664,
      "grad_norm": 0.689959023978982,
      "learning_rate": 0.00019252395326450132,
      "loss": 1.7431,
      "step": 733
    },
    {
      "epoch": 0.15047150471504714,
      "grad_norm": 0.7773747442610998,
      "learning_rate": 0.00019249874013218415,
      "loss": 1.674,
      "step": 734
    },
    {
      "epoch": 0.15067650676506766,
      "grad_norm": 0.7046889912343489,
      "learning_rate": 0.0001924734862121262,
      "loss": 1.6346,
      "step": 735
    },
    {
      "epoch": 0.15088150881508816,
      "grad_norm": 0.7343048491563514,
      "learning_rate": 0.00019244819151546322,
      "loss": 1.7504,
      "step": 736
    },
    {
      "epoch": 0.15108651086510866,
      "grad_norm": 0.7836332111854394,
      "learning_rate": 0.00019242285605334912,
      "loss": 1.7218,
      "step": 737
    },
    {
      "epoch": 0.15129151291512916,
      "grad_norm": 0.719339195666881,
      "learning_rate": 0.00019239747983695562,
      "loss": 1.7019,
      "step": 738
    },
    {
      "epoch": 0.15149651496514965,
      "grad_norm": 0.6783349731793807,
      "learning_rate": 0.00019237206287747252,
      "loss": 1.6142,
      "step": 739
    },
    {
      "epoch": 0.15170151701517015,
      "grad_norm": 0.7290436025627312,
      "learning_rate": 0.0001923466051861075,
      "loss": 1.6786,
      "step": 740
    },
    {
      "epoch": 0.15190651906519065,
      "grad_norm": 0.7727689220580422,
      "learning_rate": 0.00019232110677408625,
      "loss": 1.7007,
      "step": 741
    },
    {
      "epoch": 0.15211152111521115,
      "grad_norm": 0.7224680246507847,
      "learning_rate": 0.00019229556765265246,
      "loss": 1.6901,
      "step": 742
    },
    {
      "epoch": 0.15231652316523164,
      "grad_norm": 0.7535700570650502,
      "learning_rate": 0.00019226998783306772,
      "loss": 1.6844,
      "step": 743
    },
    {
      "epoch": 0.15252152521525214,
      "grad_norm": 0.8030946274710645,
      "learning_rate": 0.00019224436732661148,
      "loss": 1.7006,
      "step": 744
    },
    {
      "epoch": 0.15272652726527267,
      "grad_norm": 0.7401564290099959,
      "learning_rate": 0.0001922187061445813,
      "loss": 1.6807,
      "step": 745
    },
    {
      "epoch": 0.15293152931529316,
      "grad_norm": 0.8159603172747175,
      "learning_rate": 0.00019219300429829258,
      "loss": 1.7427,
      "step": 746
    },
    {
      "epoch": 0.15313653136531366,
      "grad_norm": 0.7196371365597042,
      "learning_rate": 0.00019216726179907868,
      "loss": 1.6721,
      "step": 747
    },
    {
      "epoch": 0.15334153341533416,
      "grad_norm": 0.7042282056199732,
      "learning_rate": 0.00019214147865829082,
      "loss": 1.7774,
      "step": 748
    },
    {
      "epoch": 0.15354653546535466,
      "grad_norm": 0.7038330647815587,
      "learning_rate": 0.00019211565488729825,
      "loss": 1.6748,
      "step": 749
    },
    {
      "epoch": 0.15375153751537515,
      "grad_norm": 0.7215543246141798,
      "learning_rate": 0.00019208979049748805,
      "loss": 1.6464,
      "step": 750
    },
    {
      "epoch": 0.15395653956539565,
      "grad_norm": 0.650350189502158,
      "learning_rate": 0.00019206388550026523,
      "loss": 1.805,
      "step": 751
    },
    {
      "epoch": 0.15416154161541615,
      "grad_norm": 0.7248020516056501,
      "learning_rate": 0.00019203793990705273,
      "loss": 1.6476,
      "step": 752
    },
    {
      "epoch": 0.15436654366543665,
      "grad_norm": 0.6850130547924245,
      "learning_rate": 0.00019201195372929139,
      "loss": 1.6508,
      "step": 753
    },
    {
      "epoch": 0.15457154571545714,
      "grad_norm": 0.6584037461711865,
      "learning_rate": 0.0001919859269784399,
      "loss": 1.6975,
      "step": 754
    },
    {
      "epoch": 0.15477654776547767,
      "grad_norm": 0.7042533368717044,
      "learning_rate": 0.00019195985966597494,
      "loss": 1.7084,
      "step": 755
    },
    {
      "epoch": 0.15498154981549817,
      "grad_norm": 0.6916924771265472,
      "learning_rate": 0.00019193375180339095,
      "loss": 1.6243,
      "step": 756
    },
    {
      "epoch": 0.15518655186551866,
      "grad_norm": 0.789790297021731,
      "learning_rate": 0.0001919076034022003,
      "loss": 1.7046,
      "step": 757
    },
    {
      "epoch": 0.15539155391553916,
      "grad_norm": 0.6875331912721294,
      "learning_rate": 0.00019188141447393334,
      "loss": 1.6407,
      "step": 758
    },
    {
      "epoch": 0.15559655596555966,
      "grad_norm": 0.7752788318017302,
      "learning_rate": 0.00019185518503013807,
      "loss": 1.7957,
      "step": 759
    },
    {
      "epoch": 0.15580155801558015,
      "grad_norm": 0.8078684289774277,
      "learning_rate": 0.00019182891508238056,
      "loss": 1.6765,
      "step": 760
    },
    {
      "epoch": 0.15600656006560065,
      "grad_norm": 0.6386796570512407,
      "learning_rate": 0.00019180260464224467,
      "loss": 1.6398,
      "step": 761
    },
    {
      "epoch": 0.15621156211562115,
      "grad_norm": 0.6455166552222789,
      "learning_rate": 0.00019177625372133209,
      "loss": 1.6925,
      "step": 762
    },
    {
      "epoch": 0.15641656416564165,
      "grad_norm": 0.810862189886265,
      "learning_rate": 0.00019174986233126234,
      "loss": 1.7278,
      "step": 763
    },
    {
      "epoch": 0.15662156621566214,
      "grad_norm": 0.6163496250678044,
      "learning_rate": 0.00019172343048367289,
      "loss": 1.6303,
      "step": 764
    },
    {
      "epoch": 0.15682656826568267,
      "grad_norm": 0.696217271270026,
      "learning_rate": 0.00019169695819021892,
      "loss": 1.7329,
      "step": 765
    },
    {
      "epoch": 0.15703157031570317,
      "grad_norm": 0.7349137675712252,
      "learning_rate": 0.00019167044546257355,
      "loss": 1.673,
      "step": 766
    },
    {
      "epoch": 0.15723657236572366,
      "grad_norm": 0.7847045456766413,
      "learning_rate": 0.0001916438923124277,
      "loss": 1.6088,
      "step": 767
    },
    {
      "epoch": 0.15744157441574416,
      "grad_norm": 0.7747429199903232,
      "learning_rate": 0.00019161729875149006,
      "loss": 1.7312,
      "step": 768
    },
    {
      "epoch": 0.15764657646576466,
      "grad_norm": 0.7437304522919976,
      "learning_rate": 0.0001915906647914872,
      "loss": 1.7226,
      "step": 769
    },
    {
      "epoch": 0.15785157851578516,
      "grad_norm": 0.7334915002288332,
      "learning_rate": 0.00019156399044416352,
      "loss": 1.6969,
      "step": 770
    },
    {
      "epoch": 0.15805658056580565,
      "grad_norm": 0.6953103054492031,
      "learning_rate": 0.00019153727572128113,
      "loss": 1.6887,
      "step": 771
    },
    {
      "epoch": 0.15826158261582615,
      "grad_norm": 0.687499632492689,
      "learning_rate": 0.00019151052063462007,
      "loss": 1.7419,
      "step": 772
    },
    {
      "epoch": 0.15846658466584665,
      "grad_norm": 0.7805948008431428,
      "learning_rate": 0.00019148372519597808,
      "loss": 1.749,
      "step": 773
    },
    {
      "epoch": 0.15867158671586715,
      "grad_norm": 0.7818982203947401,
      "learning_rate": 0.00019145688941717075,
      "loss": 1.6652,
      "step": 774
    },
    {
      "epoch": 0.15887658876588767,
      "grad_norm": 0.7320495222334545,
      "learning_rate": 0.0001914300133100314,
      "loss": 1.6915,
      "step": 775
    },
    {
      "epoch": 0.15908159081590817,
      "grad_norm": 0.7804260936083108,
      "learning_rate": 0.00019140309688641123,
      "loss": 1.6963,
      "step": 776
    },
    {
      "epoch": 0.15928659286592867,
      "grad_norm": 0.7604663864321175,
      "learning_rate": 0.0001913761401581791,
      "loss": 1.6859,
      "step": 777
    },
    {
      "epoch": 0.15949159491594916,
      "grad_norm": 0.6727302874956032,
      "learning_rate": 0.00019134914313722178,
      "loss": 1.6377,
      "step": 778
    },
    {
      "epoch": 0.15969659696596966,
      "grad_norm": 0.7101389386956383,
      "learning_rate": 0.00019132210583544366,
      "loss": 1.7482,
      "step": 779
    },
    {
      "epoch": 0.15990159901599016,
      "grad_norm": 0.6984364332599773,
      "learning_rate": 0.00019129502826476698,
      "loss": 1.7494,
      "step": 780
    },
    {
      "epoch": 0.16010660106601066,
      "grad_norm": 0.7207480762012807,
      "learning_rate": 0.00019126791043713173,
      "loss": 1.674,
      "step": 781
    },
    {
      "epoch": 0.16031160311603115,
      "grad_norm": 0.6498071125057682,
      "learning_rate": 0.0001912407523644956,
      "loss": 1.716,
      "step": 782
    },
    {
      "epoch": 0.16051660516605165,
      "grad_norm": 0.7053619225609432,
      "learning_rate": 0.00019121355405883413,
      "loss": 1.579,
      "step": 783
    },
    {
      "epoch": 0.16072160721607215,
      "grad_norm": 0.674479155987102,
      "learning_rate": 0.0001911863155321405,
      "loss": 1.7246,
      "step": 784
    },
    {
      "epoch": 0.16092660926609267,
      "grad_norm": 0.7343009952595465,
      "learning_rate": 0.00019115903679642565,
      "loss": 1.7329,
      "step": 785
    },
    {
      "epoch": 0.16113161131611317,
      "grad_norm": 0.7132288209781151,
      "learning_rate": 0.0001911317178637183,
      "loss": 1.7128,
      "step": 786
    },
    {
      "epoch": 0.16133661336613367,
      "grad_norm": 0.6988174737208028,
      "learning_rate": 0.00019110435874606486,
      "loss": 1.7224,
      "step": 787
    },
    {
      "epoch": 0.16154161541615417,
      "grad_norm": 0.688544636732044,
      "learning_rate": 0.0001910769594555294,
      "loss": 1.7219,
      "step": 788
    },
    {
      "epoch": 0.16174661746617466,
      "grad_norm": 0.6968777239911965,
      "learning_rate": 0.00019104952000419378,
      "loss": 1.7181,
      "step": 789
    },
    {
      "epoch": 0.16195161951619516,
      "grad_norm": 0.6527150151185663,
      "learning_rate": 0.00019102204040415761,
      "loss": 1.6811,
      "step": 790
    },
    {
      "epoch": 0.16215662156621566,
      "grad_norm": 0.6982138890248064,
      "learning_rate": 0.00019099452066753808,
      "loss": 1.6319,
      "step": 791
    },
    {
      "epoch": 0.16236162361623616,
      "grad_norm": 0.7445818886153832,
      "learning_rate": 0.00019096696080647018,
      "loss": 1.66,
      "step": 792
    },
    {
      "epoch": 0.16256662566625665,
      "grad_norm": 0.6518191707772367,
      "learning_rate": 0.00019093936083310653,
      "loss": 1.7224,
      "step": 793
    },
    {
      "epoch": 0.16277162771627715,
      "grad_norm": 0.7155219264389436,
      "learning_rate": 0.00019091172075961748,
      "loss": 1.7605,
      "step": 794
    },
    {
      "epoch": 0.16297662976629768,
      "grad_norm": 0.7073441135239922,
      "learning_rate": 0.00019088404059819104,
      "loss": 1.7127,
      "step": 795
    },
    {
      "epoch": 0.16318163181631817,
      "grad_norm": 0.7002062904148844,
      "learning_rate": 0.0001908563203610329,
      "loss": 1.701,
      "step": 796
    },
    {
      "epoch": 0.16338663386633867,
      "grad_norm": 0.6533771171765951,
      "learning_rate": 0.00019082856006036645,
      "loss": 1.635,
      "step": 797
    },
    {
      "epoch": 0.16359163591635917,
      "grad_norm": 0.7396803765413693,
      "learning_rate": 0.0001908007597084327,
      "loss": 1.7317,
      "step": 798
    },
    {
      "epoch": 0.16379663796637967,
      "grad_norm": 0.6843103151239934,
      "learning_rate": 0.0001907729193174903,
      "loss": 1.6857,
      "step": 799
    },
    {
      "epoch": 0.16400164001640016,
      "grad_norm": 0.6891857006068521,
      "learning_rate": 0.00019074503889981566,
      "loss": 1.6891,
      "step": 800
    },
    {
      "epoch": 0.16420664206642066,
      "grad_norm": 0.7208825918374903,
      "learning_rate": 0.0001907171184677028,
      "loss": 1.7782,
      "step": 801
    },
    {
      "epoch": 0.16441164411644116,
      "grad_norm": 0.7424150106814328,
      "learning_rate": 0.0001906891580334633,
      "loss": 1.719,
      "step": 802
    },
    {
      "epoch": 0.16461664616646166,
      "grad_norm": 0.6813977567751245,
      "learning_rate": 0.00019066115760942647,
      "loss": 1.7918,
      "step": 803
    },
    {
      "epoch": 0.16482164821648215,
      "grad_norm": 0.6815153922978643,
      "learning_rate": 0.00019063311720793918,
      "loss": 1.7102,
      "step": 804
    },
    {
      "epoch": 0.16502665026650268,
      "grad_norm": 0.7434835896386757,
      "learning_rate": 0.00019060503684136603,
      "loss": 1.6669,
      "step": 805
    },
    {
      "epoch": 0.16523165231652318,
      "grad_norm": 0.7176791513974383,
      "learning_rate": 0.00019057691652208915,
      "loss": 1.6751,
      "step": 806
    },
    {
      "epoch": 0.16543665436654367,
      "grad_norm": 0.6404635546952481,
      "learning_rate": 0.00019054875626250834,
      "loss": 1.6802,
      "step": 807
    },
    {
      "epoch": 0.16564165641656417,
      "grad_norm": 0.7862699666116406,
      "learning_rate": 0.00019052055607504103,
      "loss": 1.7165,
      "step": 808
    },
    {
      "epoch": 0.16584665846658467,
      "grad_norm": 0.6748901758704504,
      "learning_rate": 0.00019049231597212214,
      "loss": 1.6411,
      "step": 809
    },
    {
      "epoch": 0.16605166051660517,
      "grad_norm": 0.6334195790465038,
      "learning_rate": 0.0001904640359662043,
      "loss": 1.666,
      "step": 810
    },
    {
      "epoch": 0.16625666256662566,
      "grad_norm": 0.7032533821534248,
      "learning_rate": 0.00019043571606975777,
      "loss": 1.7189,
      "step": 811
    },
    {
      "epoch": 0.16646166461664616,
      "grad_norm": 0.6736035565240697,
      "learning_rate": 0.00019040735629527027,
      "loss": 1.7225,
      "step": 812
    },
    {
      "epoch": 0.16666666666666666,
      "grad_norm": 0.6990622565895293,
      "learning_rate": 0.00019037895665524715,
      "loss": 1.6483,
      "step": 813
    },
    {
      "epoch": 0.16687166871668715,
      "grad_norm": 0.7308111216648203,
      "learning_rate": 0.00019035051716221143,
      "loss": 1.6895,
      "step": 814
    },
    {
      "epoch": 0.16707667076670768,
      "grad_norm": 0.7051146623961817,
      "learning_rate": 0.00019032203782870364,
      "loss": 1.6705,
      "step": 815
    },
    {
      "epoch": 0.16728167281672818,
      "grad_norm": 0.7347551716579923,
      "learning_rate": 0.0001902935186672818,
      "loss": 1.7335,
      "step": 816
    },
    {
      "epoch": 0.16748667486674867,
      "grad_norm": 0.7324400798011572,
      "learning_rate": 0.00019026495969052162,
      "loss": 1.695,
      "step": 817
    },
    {
      "epoch": 0.16769167691676917,
      "grad_norm": 0.74982494417203,
      "learning_rate": 0.00019023636091101626,
      "loss": 1.652,
      "step": 818
    },
    {
      "epoch": 0.16789667896678967,
      "grad_norm": 0.7942630371480432,
      "learning_rate": 0.00019020772234137656,
      "loss": 1.7479,
      "step": 819
    },
    {
      "epoch": 0.16810168101681017,
      "grad_norm": 0.7354769200197937,
      "learning_rate": 0.00019017904399423077,
      "loss": 1.7174,
      "step": 820
    },
    {
      "epoch": 0.16830668306683066,
      "grad_norm": 0.7562489211350594,
      "learning_rate": 0.00019015032588222473,
      "loss": 1.6347,
      "step": 821
    },
    {
      "epoch": 0.16851168511685116,
      "grad_norm": 0.7595626084321155,
      "learning_rate": 0.00019012156801802182,
      "loss": 1.6914,
      "step": 822
    },
    {
      "epoch": 0.16871668716687166,
      "grad_norm": 0.7214950578904973,
      "learning_rate": 0.000190092770414303,
      "loss": 1.6705,
      "step": 823
    },
    {
      "epoch": 0.16892168921689216,
      "grad_norm": 0.7447618573876167,
      "learning_rate": 0.00019006393308376672,
      "loss": 1.6672,
      "step": 824
    },
    {
      "epoch": 0.16912669126691268,
      "grad_norm": 0.7445575734469274,
      "learning_rate": 0.00019003505603912884,
      "loss": 1.7343,
      "step": 825
    },
    {
      "epoch": 0.16933169331693318,
      "grad_norm": 0.6733678011951022,
      "learning_rate": 0.0001900061392931229,
      "loss": 1.7441,
      "step": 826
    },
    {
      "epoch": 0.16953669536695368,
      "grad_norm": 0.762440487524822,
      "learning_rate": 0.00018997718285849983,
      "loss": 1.6341,
      "step": 827
    },
    {
      "epoch": 0.16974169741697417,
      "grad_norm": 0.701991498412628,
      "learning_rate": 0.0001899481867480281,
      "loss": 1.6574,
      "step": 828
    },
    {
      "epoch": 0.16994669946699467,
      "grad_norm": 0.6739744404871206,
      "learning_rate": 0.0001899191509744937,
      "loss": 1.6725,
      "step": 829
    },
    {
      "epoch": 0.17015170151701517,
      "grad_norm": 0.6718533792642212,
      "learning_rate": 0.0001898900755507001,
      "loss": 1.6701,
      "step": 830
    },
    {
      "epoch": 0.17035670356703567,
      "grad_norm": 0.6811662941338024,
      "learning_rate": 0.00018986096048946824,
      "loss": 1.7758,
      "step": 831
    },
    {
      "epoch": 0.17056170561705616,
      "grad_norm": 0.6874609101501581,
      "learning_rate": 0.0001898318058036365,
      "loss": 1.7216,
      "step": 832
    },
    {
      "epoch": 0.17076670766707666,
      "grad_norm": 0.6533975558087971,
      "learning_rate": 0.00018980261150606075,
      "loss": 1.7028,
      "step": 833
    },
    {
      "epoch": 0.17097170971709716,
      "grad_norm": 0.7098335837225809,
      "learning_rate": 0.00018977337760961444,
      "loss": 1.7797,
      "step": 834
    },
    {
      "epoch": 0.17117671176711768,
      "grad_norm": 0.6179015872900394,
      "learning_rate": 0.00018974410412718836,
      "loss": 1.688,
      "step": 835
    },
    {
      "epoch": 0.17138171381713818,
      "grad_norm": 0.580510879130731,
      "learning_rate": 0.0001897147910716907,
      "loss": 1.7012,
      "step": 836
    },
    {
      "epoch": 0.17158671586715868,
      "grad_norm": 0.6795642698500958,
      "learning_rate": 0.0001896854384560473,
      "loss": 1.7463,
      "step": 837
    },
    {
      "epoch": 0.17179171791717918,
      "grad_norm": 0.5666775996494776,
      "learning_rate": 0.00018965604629320125,
      "loss": 1.6507,
      "step": 838
    },
    {
      "epoch": 0.17199671996719967,
      "grad_norm": 0.6045763823421743,
      "learning_rate": 0.00018962661459611318,
      "loss": 1.7533,
      "step": 839
    },
    {
      "epoch": 0.17220172201722017,
      "grad_norm": 0.6508738877098017,
      "learning_rate": 0.0001895971433777612,
      "loss": 1.6705,
      "step": 840
    },
    {
      "epoch": 0.17240672406724067,
      "grad_norm": 0.7193160516803674,
      "learning_rate": 0.00018956763265114065,
      "loss": 1.5835,
      "step": 841
    },
    {
      "epoch": 0.17261172611726117,
      "grad_norm": 0.6129779610520512,
      "learning_rate": 0.00018953808242926453,
      "loss": 1.6987,
      "step": 842
    },
    {
      "epoch": 0.17281672816728166,
      "grad_norm": 0.7416867858127081,
      "learning_rate": 0.0001895084927251631,
      "loss": 1.7034,
      "step": 843
    },
    {
      "epoch": 0.17302173021730216,
      "grad_norm": 0.6270794650525814,
      "learning_rate": 0.00018947886355188406,
      "loss": 1.6292,
      "step": 844
    },
    {
      "epoch": 0.1732267322673227,
      "grad_norm": 0.6314331647234465,
      "learning_rate": 0.00018944919492249254,
      "loss": 1.726,
      "step": 845
    },
    {
      "epoch": 0.17343173431734318,
      "grad_norm": 0.6365832806860144,
      "learning_rate": 0.00018941948685007108,
      "loss": 1.7083,
      "step": 846
    },
    {
      "epoch": 0.17363673636736368,
      "grad_norm": 0.6234270131402238,
      "learning_rate": 0.00018938973934771956,
      "loss": 1.6867,
      "step": 847
    },
    {
      "epoch": 0.17384173841738418,
      "grad_norm": 0.6091048529498941,
      "learning_rate": 0.0001893599524285553,
      "loss": 1.6649,
      "step": 848
    },
    {
      "epoch": 0.17404674046740468,
      "grad_norm": 0.6382079176903626,
      "learning_rate": 0.00018933012610571295,
      "loss": 1.6588,
      "step": 849
    },
    {
      "epoch": 0.17425174251742517,
      "grad_norm": 0.7037016853945891,
      "learning_rate": 0.0001893002603923446,
      "loss": 1.7812,
      "step": 850
    },
    {
      "epoch": 0.17445674456744567,
      "grad_norm": 0.6517788002418122,
      "learning_rate": 0.00018927035530161962,
      "loss": 1.6209,
      "step": 851
    },
    {
      "epoch": 0.17466174661746617,
      "grad_norm": 0.595878747391538,
      "learning_rate": 0.00018924041084672486,
      "loss": 1.6769,
      "step": 852
    },
    {
      "epoch": 0.17486674866748667,
      "grad_norm": 0.6481032387337692,
      "learning_rate": 0.00018921042704086443,
      "loss": 1.6431,
      "step": 853
    },
    {
      "epoch": 0.17507175071750716,
      "grad_norm": 0.6672757210081901,
      "learning_rate": 0.00018918040389725982,
      "loss": 1.7228,
      "step": 854
    },
    {
      "epoch": 0.1752767527675277,
      "grad_norm": 0.6868433443528669,
      "learning_rate": 0.00018915034142914986,
      "loss": 1.7128,
      "step": 855
    },
    {
      "epoch": 0.17548175481754819,
      "grad_norm": 0.6167996729418488,
      "learning_rate": 0.00018912023964979077,
      "loss": 1.6251,
      "step": 856
    },
    {
      "epoch": 0.17568675686756868,
      "grad_norm": 0.7049193924498683,
      "learning_rate": 0.00018909009857245605,
      "loss": 1.7385,
      "step": 857
    },
    {
      "epoch": 0.17589175891758918,
      "grad_norm": 0.754097457272695,
      "learning_rate": 0.00018905991821043652,
      "loss": 1.6502,
      "step": 858
    },
    {
      "epoch": 0.17609676096760968,
      "grad_norm": 0.738043389759603,
      "learning_rate": 0.00018902969857704043,
      "loss": 1.6915,
      "step": 859
    },
    {
      "epoch": 0.17630176301763018,
      "grad_norm": 0.6476270549640606,
      "learning_rate": 0.00018899943968559316,
      "loss": 1.6814,
      "step": 860
    },
    {
      "epoch": 0.17650676506765067,
      "grad_norm": 0.7189972551247747,
      "learning_rate": 0.00018896914154943758,
      "loss": 1.5685,
      "step": 861
    },
    {
      "epoch": 0.17671176711767117,
      "grad_norm": 0.7085270944540953,
      "learning_rate": 0.00018893880418193376,
      "loss": 1.7272,
      "step": 862
    },
    {
      "epoch": 0.17691676916769167,
      "grad_norm": 0.6505751176170754,
      "learning_rate": 0.00018890842759645908,
      "loss": 1.7331,
      "step": 863
    },
    {
      "epoch": 0.17712177121771217,
      "grad_norm": 0.7960554282230355,
      "learning_rate": 0.00018887801180640827,
      "loss": 1.7332,
      "step": 864
    },
    {
      "epoch": 0.1773267732677327,
      "grad_norm": 0.6346770840549366,
      "learning_rate": 0.00018884755682519328,
      "loss": 1.6385,
      "step": 865
    },
    {
      "epoch": 0.1775317753177532,
      "grad_norm": 0.6484925128459751,
      "learning_rate": 0.00018881706266624343,
      "loss": 1.6236,
      "step": 866
    },
    {
      "epoch": 0.17773677736777369,
      "grad_norm": 0.6834546493061789,
      "learning_rate": 0.0001887865293430052,
      "loss": 1.6875,
      "step": 867
    },
    {
      "epoch": 0.17794177941779418,
      "grad_norm": 0.625820435126923,
      "learning_rate": 0.00018875595686894243,
      "loss": 1.6337,
      "step": 868
    },
    {
      "epoch": 0.17814678146781468,
      "grad_norm": 0.6976994235085078,
      "learning_rate": 0.00018872534525753615,
      "loss": 1.6852,
      "step": 869
    },
    {
      "epoch": 0.17835178351783518,
      "grad_norm": 0.663386461429031,
      "learning_rate": 0.00018869469452228476,
      "loss": 1.6919,
      "step": 870
    },
    {
      "epoch": 0.17855678556785567,
      "grad_norm": 0.6523917836752499,
      "learning_rate": 0.00018866400467670378,
      "loss": 1.7506,
      "step": 871
    },
    {
      "epoch": 0.17876178761787617,
      "grad_norm": 0.642552322851459,
      "learning_rate": 0.00018863327573432606,
      "loss": 1.7302,
      "step": 872
    },
    {
      "epoch": 0.17896678966789667,
      "grad_norm": 0.696356842279368,
      "learning_rate": 0.00018860250770870167,
      "loss": 1.6714,
      "step": 873
    },
    {
      "epoch": 0.17917179171791717,
      "grad_norm": 0.6254447508296375,
      "learning_rate": 0.0001885717006133979,
      "loss": 1.5611,
      "step": 874
    },
    {
      "epoch": 0.1793767937679377,
      "grad_norm": 0.6867999913543839,
      "learning_rate": 0.0001885408544619993,
      "loss": 1.7095,
      "step": 875
    },
    {
      "epoch": 0.1795817958179582,
      "grad_norm": 0.6469042868572288,
      "learning_rate": 0.0001885099692681076,
      "loss": 1.6229,
      "step": 876
    },
    {
      "epoch": 0.1797867978679787,
      "grad_norm": 0.6901766814219229,
      "learning_rate": 0.00018847904504534175,
      "loss": 1.7213,
      "step": 877
    },
    {
      "epoch": 0.17999179991799918,
      "grad_norm": 0.6621394493706978,
      "learning_rate": 0.00018844808180733797,
      "loss": 1.6127,
      "step": 878
    },
    {
      "epoch": 0.18019680196801968,
      "grad_norm": 0.6281790144588328,
      "learning_rate": 0.00018841707956774963,
      "loss": 1.6385,
      "step": 879
    },
    {
      "epoch": 0.18040180401804018,
      "grad_norm": 0.6411798812730471,
      "learning_rate": 0.00018838603834024729,
      "loss": 1.7314,
      "step": 880
    },
    {
      "epoch": 0.18060680606806068,
      "grad_norm": 0.6230387534082905,
      "learning_rate": 0.0001883549581385187,
      "loss": 1.6461,
      "step": 881
    },
    {
      "epoch": 0.18081180811808117,
      "grad_norm": 0.6955757939286624,
      "learning_rate": 0.00018832383897626892,
      "loss": 1.7007,
      "step": 882
    },
    {
      "epoch": 0.18101681016810167,
      "grad_norm": 0.6160689276828045,
      "learning_rate": 0.00018829268086721995,
      "loss": 1.6704,
      "step": 883
    },
    {
      "epoch": 0.18122181221812217,
      "grad_norm": 0.6294389998455219,
      "learning_rate": 0.0001882614838251112,
      "loss": 1.6471,
      "step": 884
    },
    {
      "epoch": 0.1814268142681427,
      "grad_norm": 0.6831616359839734,
      "learning_rate": 0.00018823024786369908,
      "loss": 1.6823,
      "step": 885
    },
    {
      "epoch": 0.1816318163181632,
      "grad_norm": 0.6948313729204673,
      "learning_rate": 0.00018819897299675726,
      "loss": 1.6644,
      "step": 886
    },
    {
      "epoch": 0.1818368183681837,
      "grad_norm": 0.5933553903879749,
      "learning_rate": 0.00018816765923807655,
      "loss": 1.6355,
      "step": 887
    },
    {
      "epoch": 0.1820418204182042,
      "grad_norm": 0.6718657080059676,
      "learning_rate": 0.00018813630660146488,
      "loss": 1.7208,
      "step": 888
    },
    {
      "epoch": 0.18224682246822468,
      "grad_norm": 0.6431331703803308,
      "learning_rate": 0.00018810491510074735,
      "loss": 1.6422,
      "step": 889
    },
    {
      "epoch": 0.18245182451824518,
      "grad_norm": 0.6522599073402241,
      "learning_rate": 0.00018807348474976618,
      "loss": 1.7161,
      "step": 890
    },
    {
      "epoch": 0.18265682656826568,
      "grad_norm": 0.6525287583387367,
      "learning_rate": 0.00018804201556238068,
      "loss": 1.6337,
      "step": 891
    },
    {
      "epoch": 0.18286182861828618,
      "grad_norm": 0.7264408965183254,
      "learning_rate": 0.0001880105075524674,
      "loss": 1.6296,
      "step": 892
    },
    {
      "epoch": 0.18306683066830667,
      "grad_norm": 0.7192954633367987,
      "learning_rate": 0.0001879789607339199,
      "loss": 1.6373,
      "step": 893
    },
    {
      "epoch": 0.18327183271832717,
      "grad_norm": 0.6692160236756574,
      "learning_rate": 0.0001879473751206489,
      "loss": 1.6973,
      "step": 894
    },
    {
      "epoch": 0.1834768347683477,
      "grad_norm": 0.665613575409276,
      "learning_rate": 0.00018791575072658225,
      "loss": 1.6229,
      "step": 895
    },
    {
      "epoch": 0.1836818368183682,
      "grad_norm": 0.7646965856440251,
      "learning_rate": 0.00018788408756566485,
      "loss": 1.7205,
      "step": 896
    },
    {
      "epoch": 0.1838868388683887,
      "grad_norm": 0.6247188692917274,
      "learning_rate": 0.00018785238565185866,
      "loss": 1.5969,
      "step": 897
    },
    {
      "epoch": 0.1840918409184092,
      "grad_norm": 0.6727511736404558,
      "learning_rate": 0.00018782064499914288,
      "loss": 1.6933,
      "step": 898
    },
    {
      "epoch": 0.18429684296842969,
      "grad_norm": 0.6824196078723912,
      "learning_rate": 0.00018778886562151366,
      "loss": 1.6754,
      "step": 899
    },
    {
      "epoch": 0.18450184501845018,
      "grad_norm": 0.7009488925557734,
      "learning_rate": 0.00018775704753298423,
      "loss": 1.6492,
      "step": 900
    },
    {
      "epoch": 0.18470684706847068,
      "grad_norm": 0.5620775171970334,
      "learning_rate": 0.0001877251907475849,
      "loss": 1.5734,
      "step": 901
    },
    {
      "epoch": 0.18491184911849118,
      "grad_norm": 0.662085497525188,
      "learning_rate": 0.00018769329527936317,
      "loss": 1.6812,
      "step": 902
    },
    {
      "epoch": 0.18511685116851168,
      "grad_norm": 0.7093135331815954,
      "learning_rate": 0.0001876613611423834,
      "loss": 1.6756,
      "step": 903
    },
    {
      "epoch": 0.18532185321853217,
      "grad_norm": 0.626843350279162,
      "learning_rate": 0.00018762938835072712,
      "loss": 1.6753,
      "step": 904
    },
    {
      "epoch": 0.1855268552685527,
      "grad_norm": 0.6209614968378987,
      "learning_rate": 0.00018759737691849288,
      "loss": 1.6498,
      "step": 905
    },
    {
      "epoch": 0.1857318573185732,
      "grad_norm": 0.7326164217270169,
      "learning_rate": 0.00018756532685979628,
      "loss": 1.711,
      "step": 906
    },
    {
      "epoch": 0.1859368593685937,
      "grad_norm": 0.6614405610821985,
      "learning_rate": 0.0001875332381887699,
      "loss": 1.6676,
      "step": 907
    },
    {
      "epoch": 0.1861418614186142,
      "grad_norm": 0.6623463439047697,
      "learning_rate": 0.00018750111091956346,
      "loss": 1.7013,
      "step": 908
    },
    {
      "epoch": 0.1863468634686347,
      "grad_norm": 0.6543491722596658,
      "learning_rate": 0.00018746894506634353,
      "loss": 1.7193,
      "step": 909
    },
    {
      "epoch": 0.18655186551865519,
      "grad_norm": 0.6192123062294089,
      "learning_rate": 0.00018743674064329387,
      "loss": 1.6958,
      "step": 910
    },
    {
      "epoch": 0.18675686756867568,
      "grad_norm": 0.559686087942116,
      "learning_rate": 0.00018740449766461512,
      "loss": 1.6373,
      "step": 911
    },
    {
      "epoch": 0.18696186961869618,
      "grad_norm": 0.6982378232968304,
      "learning_rate": 0.000187372216144525,
      "loss": 1.7026,
      "step": 912
    },
    {
      "epoch": 0.18716687166871668,
      "grad_norm": 0.710073928723775,
      "learning_rate": 0.00018733989609725815,
      "loss": 1.6801,
      "step": 913
    },
    {
      "epoch": 0.18737187371873718,
      "grad_norm": 0.6245826793678664,
      "learning_rate": 0.00018730753753706626,
      "loss": 1.6767,
      "step": 914
    },
    {
      "epoch": 0.1875768757687577,
      "grad_norm": 0.6103701115701096,
      "learning_rate": 0.00018727514047821802,
      "loss": 1.6452,
      "step": 915
    },
    {
      "epoch": 0.1877818778187782,
      "grad_norm": 0.686021057243264,
      "learning_rate": 0.00018724270493499903,
      "loss": 1.6239,
      "step": 916
    },
    {
      "epoch": 0.1879868798687987,
      "grad_norm": 0.6986842278961767,
      "learning_rate": 0.00018721023092171187,
      "loss": 1.6669,
      "step": 917
    },
    {
      "epoch": 0.1881918819188192,
      "grad_norm": 0.6354306436352476,
      "learning_rate": 0.00018717771845267614,
      "loss": 1.6364,
      "step": 918
    },
    {
      "epoch": 0.1883968839688397,
      "grad_norm": 0.7265544328486528,
      "learning_rate": 0.00018714516754222835,
      "loss": 1.6864,
      "step": 919
    },
    {
      "epoch": 0.1886018860188602,
      "grad_norm": 0.6790080191377459,
      "learning_rate": 0.00018711257820472197,
      "loss": 1.7714,
      "step": 920
    },
    {
      "epoch": 0.18880688806888068,
      "grad_norm": 0.6033377256121235,
      "learning_rate": 0.00018707995045452744,
      "loss": 1.7136,
      "step": 921
    },
    {
      "epoch": 0.18901189011890118,
      "grad_norm": 0.6661358074015494,
      "learning_rate": 0.00018704728430603206,
      "loss": 1.715,
      "step": 922
    },
    {
      "epoch": 0.18921689216892168,
      "grad_norm": 0.6739728687339908,
      "learning_rate": 0.00018701457977364017,
      "loss": 1.7122,
      "step": 923
    },
    {
      "epoch": 0.18942189421894218,
      "grad_norm": 0.708908289034046,
      "learning_rate": 0.00018698183687177296,
      "loss": 1.7211,
      "step": 924
    },
    {
      "epoch": 0.1896268962689627,
      "grad_norm": 0.6053915906514942,
      "learning_rate": 0.00018694905561486856,
      "loss": 1.6773,
      "step": 925
    },
    {
      "epoch": 0.1898318983189832,
      "grad_norm": 0.7007139325779282,
      "learning_rate": 0.00018691623601738199,
      "loss": 1.6532,
      "step": 926
    },
    {
      "epoch": 0.1900369003690037,
      "grad_norm": 0.7269221330685796,
      "learning_rate": 0.00018688337809378528,
      "loss": 1.6779,
      "step": 927
    },
    {
      "epoch": 0.1902419024190242,
      "grad_norm": 0.6831541499910486,
      "learning_rate": 0.00018685048185856722,
      "loss": 1.6481,
      "step": 928
    },
    {
      "epoch": 0.1904469044690447,
      "grad_norm": 0.6311514207678236,
      "learning_rate": 0.00018681754732623355,
      "loss": 1.6201,
      "step": 929
    },
    {
      "epoch": 0.1906519065190652,
      "grad_norm": 0.7593897817772871,
      "learning_rate": 0.00018678457451130691,
      "loss": 1.7087,
      "step": 930
    },
    {
      "epoch": 0.1908569085690857,
      "grad_norm": 0.7043794145600288,
      "learning_rate": 0.00018675156342832684,
      "loss": 1.6666,
      "step": 931
    },
    {
      "epoch": 0.19106191061910618,
      "grad_norm": 0.6435374384674419,
      "learning_rate": 0.0001867185140918497,
      "loss": 1.6296,
      "step": 932
    },
    {
      "epoch": 0.19126691266912668,
      "grad_norm": 0.6863965252841963,
      "learning_rate": 0.00018668542651644878,
      "loss": 1.6131,
      "step": 933
    },
    {
      "epoch": 0.19147191471914718,
      "grad_norm": 0.6623138147392169,
      "learning_rate": 0.00018665230071671413,
      "loss": 1.7351,
      "step": 934
    },
    {
      "epoch": 0.1916769167691677,
      "grad_norm": 0.5431701316896971,
      "learning_rate": 0.00018661913670725276,
      "loss": 1.6592,
      "step": 935
    },
    {
      "epoch": 0.1918819188191882,
      "grad_norm": 0.6212699878028247,
      "learning_rate": 0.00018658593450268852,
      "loss": 1.7047,
      "step": 936
    },
    {
      "epoch": 0.1920869208692087,
      "grad_norm": 0.6761152604779588,
      "learning_rate": 0.00018655269411766207,
      "loss": 1.6299,
      "step": 937
    },
    {
      "epoch": 0.1922919229192292,
      "grad_norm": 0.5955755276333651,
      "learning_rate": 0.00018651941556683085,
      "loss": 1.7577,
      "step": 938
    },
    {
      "epoch": 0.1924969249692497,
      "grad_norm": 0.6937537649155028,
      "learning_rate": 0.00018648609886486923,
      "loss": 1.6609,
      "step": 939
    },
    {
      "epoch": 0.1927019270192702,
      "grad_norm": 0.62982357910632,
      "learning_rate": 0.00018645274402646835,
      "loss": 1.6935,
      "step": 940
    },
    {
      "epoch": 0.1929069290692907,
      "grad_norm": 0.6297491935607556,
      "learning_rate": 0.00018641935106633622,
      "loss": 1.6599,
      "step": 941
    },
    {
      "epoch": 0.1931119311193112,
      "grad_norm": 0.6171966103138852,
      "learning_rate": 0.00018638591999919755,
      "loss": 1.6043,
      "step": 942
    },
    {
      "epoch": 0.19331693316933168,
      "grad_norm": 0.6342148064043517,
      "learning_rate": 0.00018635245083979394,
      "loss": 1.699,
      "step": 943
    },
    {
      "epoch": 0.19352193521935218,
      "grad_norm": 0.6778005036022733,
      "learning_rate": 0.00018631894360288383,
      "loss": 1.7222,
      "step": 944
    },
    {
      "epoch": 0.1937269372693727,
      "grad_norm": 0.7039712152490014,
      "learning_rate": 0.00018628539830324229,
      "loss": 1.6682,
      "step": 945
    },
    {
      "epoch": 0.1939319393193932,
      "grad_norm": 0.6359100004005492,
      "learning_rate": 0.00018625181495566135,
      "loss": 1.7002,
      "step": 946
    },
    {
      "epoch": 0.1941369413694137,
      "grad_norm": 0.6493205974074798,
      "learning_rate": 0.00018621819357494973,
      "loss": 1.6289,
      "step": 947
    },
    {
      "epoch": 0.1943419434194342,
      "grad_norm": 0.7134232018326085,
      "learning_rate": 0.00018618453417593286,
      "loss": 1.6818,
      "step": 948
    },
    {
      "epoch": 0.1945469454694547,
      "grad_norm": 0.6617690737765433,
      "learning_rate": 0.0001861508367734531,
      "loss": 1.6865,
      "step": 949
    },
    {
      "epoch": 0.1947519475194752,
      "grad_norm": 0.635854748112604,
      "learning_rate": 0.00018611710138236945,
      "loss": 1.6846,
      "step": 950
    },
    {
      "epoch": 0.1949569495694957,
      "grad_norm": 0.6819488168694777,
      "learning_rate": 0.00018608332801755764,
      "loss": 1.6825,
      "step": 951
    },
    {
      "epoch": 0.1951619516195162,
      "grad_norm": 0.6442455936564754,
      "learning_rate": 0.00018604951669391019,
      "loss": 1.6546,
      "step": 952
    },
    {
      "epoch": 0.19536695366953669,
      "grad_norm": 0.6260761239670712,
      "learning_rate": 0.00018601566742633643,
      "loss": 1.616,
      "step": 953
    },
    {
      "epoch": 0.19557195571955718,
      "grad_norm": 0.6311195148986711,
      "learning_rate": 0.0001859817802297623,
      "loss": 1.615,
      "step": 954
    },
    {
      "epoch": 0.1957769577695777,
      "grad_norm": 0.7400128305595713,
      "learning_rate": 0.00018594785511913048,
      "loss": 1.7351,
      "step": 955
    },
    {
      "epoch": 0.1959819598195982,
      "grad_norm": 0.7062002100164632,
      "learning_rate": 0.0001859138921094005,
      "loss": 1.6898,
      "step": 956
    },
    {
      "epoch": 0.1961869618696187,
      "grad_norm": 0.7025409544089614,
      "learning_rate": 0.0001858798912155484,
      "loss": 1.7283,
      "step": 957
    },
    {
      "epoch": 0.1963919639196392,
      "grad_norm": 0.7267700125822197,
      "learning_rate": 0.00018584585245256708,
      "loss": 1.6672,
      "step": 958
    },
    {
      "epoch": 0.1965969659696597,
      "grad_norm": 0.6454570816655464,
      "learning_rate": 0.00018581177583546605,
      "loss": 1.6633,
      "step": 959
    },
    {
      "epoch": 0.1968019680196802,
      "grad_norm": 0.681269214064641,
      "learning_rate": 0.00018577766137927161,
      "loss": 1.6807,
      "step": 960
    },
    {
      "epoch": 0.1970069700697007,
      "grad_norm": 0.6016372216397006,
      "learning_rate": 0.00018574350909902662,
      "loss": 1.6529,
      "step": 961
    },
    {
      "epoch": 0.1972119721197212,
      "grad_norm": 0.7040037068472542,
      "learning_rate": 0.00018570931900979077,
      "loss": 1.6621,
      "step": 962
    },
    {
      "epoch": 0.1974169741697417,
      "grad_norm": 0.7182140442483879,
      "learning_rate": 0.00018567509112664022,
      "loss": 1.6996,
      "step": 963
    },
    {
      "epoch": 0.19762197621976219,
      "grad_norm": 0.6923275670242,
      "learning_rate": 0.00018564082546466805,
      "loss": 1.7104,
      "step": 964
    },
    {
      "epoch": 0.1978269782697827,
      "grad_norm": 0.7235893988059019,
      "learning_rate": 0.0001856065220389837,
      "loss": 1.7492,
      "step": 965
    },
    {
      "epoch": 0.1980319803198032,
      "grad_norm": 0.6825868110391046,
      "learning_rate": 0.00018557218086471356,
      "loss": 1.7028,
      "step": 966
    },
    {
      "epoch": 0.1982369823698237,
      "grad_norm": 0.6804354496100514,
      "learning_rate": 0.00018553780195700042,
      "loss": 1.6618,
      "step": 967
    },
    {
      "epoch": 0.1984419844198442,
      "grad_norm": 0.6574339754924009,
      "learning_rate": 0.00018550338533100392,
      "loss": 1.63,
      "step": 968
    },
    {
      "epoch": 0.1986469864698647,
      "grad_norm": 0.7600921012603404,
      "learning_rate": 0.00018546893100190016,
      "loss": 1.6948,
      "step": 969
    },
    {
      "epoch": 0.1988519885198852,
      "grad_norm": 0.6543012879369916,
      "learning_rate": 0.00018543443898488197,
      "loss": 1.6899,
      "step": 970
    },
    {
      "epoch": 0.1990569905699057,
      "grad_norm": 0.7578265469318429,
      "learning_rate": 0.0001853999092951587,
      "loss": 1.7701,
      "step": 971
    },
    {
      "epoch": 0.1992619926199262,
      "grad_norm": 0.8386036267324629,
      "learning_rate": 0.00018536534194795644,
      "loss": 1.5844,
      "step": 972
    },
    {
      "epoch": 0.1994669946699467,
      "grad_norm": 0.7164929122871192,
      "learning_rate": 0.0001853307369585178,
      "loss": 1.6906,
      "step": 973
    },
    {
      "epoch": 0.1996719967199672,
      "grad_norm": 0.7492342299544166,
      "learning_rate": 0.00018529609434210197,
      "loss": 1.6171,
      "step": 974
    },
    {
      "epoch": 0.1998769987699877,
      "grad_norm": 0.6558700088496812,
      "learning_rate": 0.00018526141411398484,
      "loss": 1.6507,
      "step": 975
    },
    {
      "epoch": 0.2000820008200082,
      "grad_norm": 0.6426340152263696,
      "learning_rate": 0.00018522669628945874,
      "loss": 1.5916,
      "step": 976
    },
    {
      "epoch": 0.2002870028700287,
      "grad_norm": 0.6934145519717209,
      "learning_rate": 0.00018519194088383273,
      "loss": 1.651,
      "step": 977
    },
    {
      "epoch": 0.2004920049200492,
      "grad_norm": 0.7040967656789024,
      "learning_rate": 0.00018515714791243228,
      "loss": 1.6111,
      "step": 978
    },
    {
      "epoch": 0.2006970069700697,
      "grad_norm": 0.6901121663671723,
      "learning_rate": 0.00018512231739059958,
      "loss": 1.679,
      "step": 979
    },
    {
      "epoch": 0.2009020090200902,
      "grad_norm": 0.6868686799005908,
      "learning_rate": 0.00018508744933369328,
      "loss": 1.6811,
      "step": 980
    },
    {
      "epoch": 0.2011070110701107,
      "grad_norm": 0.746174738997187,
      "learning_rate": 0.0001850525437570886,
      "loss": 1.7036,
      "step": 981
    },
    {
      "epoch": 0.2013120131201312,
      "grad_norm": 0.7323903706351386,
      "learning_rate": 0.00018501760067617733,
      "loss": 1.6839,
      "step": 982
    },
    {
      "epoch": 0.2015170151701517,
      "grad_norm": 0.6249017402280476,
      "learning_rate": 0.00018498262010636774,
      "loss": 1.6363,
      "step": 983
    },
    {
      "epoch": 0.2017220172201722,
      "grad_norm": 0.6545683006113007,
      "learning_rate": 0.00018494760206308475,
      "loss": 1.7319,
      "step": 984
    },
    {
      "epoch": 0.20192701927019271,
      "grad_norm": 0.6704929354264713,
      "learning_rate": 0.00018491254656176966,
      "loss": 1.6923,
      "step": 985
    },
    {
      "epoch": 0.2021320213202132,
      "grad_norm": 0.6023514081810556,
      "learning_rate": 0.00018487745361788039,
      "loss": 1.6631,
      "step": 986
    },
    {
      "epoch": 0.2023370233702337,
      "grad_norm": 0.7083014769759648,
      "learning_rate": 0.00018484232324689125,
      "loss": 1.7633,
      "step": 987
    },
    {
      "epoch": 0.2025420254202542,
      "grad_norm": 0.6047125819281758,
      "learning_rate": 0.00018480715546429326,
      "loss": 1.6393,
      "step": 988
    },
    {
      "epoch": 0.2027470274702747,
      "grad_norm": 0.64552835821049,
      "learning_rate": 0.00018477195028559376,
      "loss": 1.7111,
      "step": 989
    },
    {
      "epoch": 0.2029520295202952,
      "grad_norm": 0.5766011316751815,
      "learning_rate": 0.0001847367077263166,
      "loss": 1.7569,
      "step": 990
    },
    {
      "epoch": 0.2031570315703157,
      "grad_norm": 0.6472249738364082,
      "learning_rate": 0.00018470142780200222,
      "loss": 1.6724,
      "step": 991
    },
    {
      "epoch": 0.2033620336203362,
      "grad_norm": 0.6631421149372152,
      "learning_rate": 0.00018466611052820738,
      "loss": 1.7017,
      "step": 992
    },
    {
      "epoch": 0.2035670356703567,
      "grad_norm": 0.67922295872642,
      "learning_rate": 0.00018463075592050547,
      "loss": 1.6453,
      "step": 993
    },
    {
      "epoch": 0.2037720377203772,
      "grad_norm": 0.5695165455860778,
      "learning_rate": 0.0001845953639944862,
      "loss": 1.693,
      "step": 994
    },
    {
      "epoch": 0.20397703977039772,
      "grad_norm": 0.6029213831421523,
      "learning_rate": 0.00018455993476575584,
      "loss": 1.7288,
      "step": 995
    },
    {
      "epoch": 0.20418204182041821,
      "grad_norm": 0.7098523384647641,
      "learning_rate": 0.00018452446824993704,
      "loss": 1.6359,
      "step": 996
    },
    {
      "epoch": 0.2043870438704387,
      "grad_norm": 0.6320691249332998,
      "learning_rate": 0.000184488964462669,
      "loss": 1.6041,
      "step": 997
    },
    {
      "epoch": 0.2045920459204592,
      "grad_norm": 0.6523600893387305,
      "learning_rate": 0.00018445342341960716,
      "loss": 1.7956,
      "step": 998
    },
    {
      "epoch": 0.2047970479704797,
      "grad_norm": 0.6267989345857562,
      "learning_rate": 0.0001844178451364236,
      "loss": 1.6364,
      "step": 999
    },
    {
      "epoch": 0.2050020500205002,
      "grad_norm": 0.6630669793804784,
      "learning_rate": 0.00018438222962880666,
      "loss": 1.6816,
      "step": 1000
    },
    {
      "epoch": 0.2052070520705207,
      "grad_norm": 0.6527480820089179,
      "learning_rate": 0.00018434657691246122,
      "loss": 1.7495,
      "step": 1001
    },
    {
      "epoch": 0.2054120541205412,
      "grad_norm": 0.6253097938893096,
      "learning_rate": 0.00018431088700310844,
      "loss": 1.6822,
      "step": 1002
    },
    {
      "epoch": 0.2056170561705617,
      "grad_norm": 0.6066983766470995,
      "learning_rate": 0.00018427515991648603,
      "loss": 1.6449,
      "step": 1003
    },
    {
      "epoch": 0.2058220582205822,
      "grad_norm": 0.6705916229756751,
      "learning_rate": 0.00018423939566834793,
      "loss": 1.7344,
      "step": 1004
    },
    {
      "epoch": 0.20602706027060272,
      "grad_norm": 0.5808359319763892,
      "learning_rate": 0.0001842035942744646,
      "loss": 1.6742,
      "step": 1005
    },
    {
      "epoch": 0.20623206232062322,
      "grad_norm": 0.6844397083927419,
      "learning_rate": 0.0001841677557506228,
      "loss": 1.6905,
      "step": 1006
    },
    {
      "epoch": 0.2064370643706437,
      "grad_norm": 0.6498127791345648,
      "learning_rate": 0.0001841318801126257,
      "loss": 1.6887,
      "step": 1007
    },
    {
      "epoch": 0.2066420664206642,
      "grad_norm": 0.5929726972211439,
      "learning_rate": 0.0001840959673762929,
      "loss": 1.6322,
      "step": 1008
    },
    {
      "epoch": 0.2068470684706847,
      "grad_norm": 0.6521214221380964,
      "learning_rate": 0.00018406001755746015,
      "loss": 1.646,
      "step": 1009
    },
    {
      "epoch": 0.2070520705207052,
      "grad_norm": 0.6553444612125416,
      "learning_rate": 0.00018402403067197974,
      "loss": 1.7022,
      "step": 1010
    },
    {
      "epoch": 0.2072570725707257,
      "grad_norm": 0.6878399963942156,
      "learning_rate": 0.0001839880067357203,
      "loss": 1.7154,
      "step": 1011
    },
    {
      "epoch": 0.2074620746207462,
      "grad_norm": 0.6195305531040249,
      "learning_rate": 0.00018395194576456667,
      "loss": 1.6426,
      "step": 1012
    },
    {
      "epoch": 0.2076670766707667,
      "grad_norm": 0.6540177502815521,
      "learning_rate": 0.00018391584777442015,
      "loss": 1.651,
      "step": 1013
    },
    {
      "epoch": 0.2078720787207872,
      "grad_norm": 0.6667688860618947,
      "learning_rate": 0.00018387971278119834,
      "loss": 1.6757,
      "step": 1014
    },
    {
      "epoch": 0.20807708077080772,
      "grad_norm": 0.6866339014112675,
      "learning_rate": 0.00018384354080083504,
      "loss": 1.66,
      "step": 1015
    },
    {
      "epoch": 0.20828208282082822,
      "grad_norm": 0.5714898507843518,
      "learning_rate": 0.00018380733184928053,
      "loss": 1.6507,
      "step": 1016
    },
    {
      "epoch": 0.20848708487084872,
      "grad_norm": 0.7040472841317994,
      "learning_rate": 0.0001837710859425013,
      "loss": 1.6743,
      "step": 1017
    },
    {
      "epoch": 0.2086920869208692,
      "grad_norm": 0.7176484615257038,
      "learning_rate": 0.0001837348030964801,
      "loss": 1.7361,
      "step": 1018
    },
    {
      "epoch": 0.2088970889708897,
      "grad_norm": 0.6394810283324819,
      "learning_rate": 0.00018369848332721607,
      "loss": 1.6316,
      "step": 1019
    },
    {
      "epoch": 0.2091020910209102,
      "grad_norm": 0.7614099541056943,
      "learning_rate": 0.00018366212665072454,
      "loss": 1.7467,
      "step": 1020
    },
    {
      "epoch": 0.2093070930709307,
      "grad_norm": 0.6270839442565453,
      "learning_rate": 0.00018362573308303718,
      "loss": 1.6569,
      "step": 1021
    },
    {
      "epoch": 0.2095120951209512,
      "grad_norm": 0.609601236813474,
      "learning_rate": 0.0001835893026402019,
      "loss": 1.5895,
      "step": 1022
    },
    {
      "epoch": 0.2097170971709717,
      "grad_norm": 0.6098604247930536,
      "learning_rate": 0.00018355283533828286,
      "loss": 1.6569,
      "step": 1023
    },
    {
      "epoch": 0.2099220992209922,
      "grad_norm": 0.6809722972139036,
      "learning_rate": 0.00018351633119336044,
      "loss": 1.6978,
      "step": 1024
    },
    {
      "epoch": 0.21012710127101272,
      "grad_norm": 0.6063103095680642,
      "learning_rate": 0.00018347979022153137,
      "loss": 1.7016,
      "step": 1025
    },
    {
      "epoch": 0.21033210332103322,
      "grad_norm": 0.6132134343622095,
      "learning_rate": 0.00018344321243890854,
      "loss": 1.6838,
      "step": 1026
    },
    {
      "epoch": 0.21053710537105372,
      "grad_norm": 0.6482611541562687,
      "learning_rate": 0.00018340659786162108,
      "loss": 1.5998,
      "step": 1027
    },
    {
      "epoch": 0.21074210742107422,
      "grad_norm": 0.5718708862079491,
      "learning_rate": 0.00018336994650581438,
      "loss": 1.7523,
      "step": 1028
    },
    {
      "epoch": 0.2109471094710947,
      "grad_norm": 0.5876548902833577,
      "learning_rate": 0.00018333325838765,
      "loss": 1.7048,
      "step": 1029
    },
    {
      "epoch": 0.2111521115211152,
      "grad_norm": 0.6689386890967747,
      "learning_rate": 0.00018329653352330572,
      "loss": 1.6088,
      "step": 1030
    },
    {
      "epoch": 0.2113571135711357,
      "grad_norm": 0.6344376859708848,
      "learning_rate": 0.00018325977192897554,
      "loss": 1.7159,
      "step": 1031
    },
    {
      "epoch": 0.2115621156211562,
      "grad_norm": 0.5688368287200323,
      "learning_rate": 0.00018322297362086972,
      "loss": 1.7146,
      "step": 1032
    },
    {
      "epoch": 0.2117671176711767,
      "grad_norm": 0.6200379089749919,
      "learning_rate": 0.00018318613861521455,
      "loss": 1.6564,
      "step": 1033
    },
    {
      "epoch": 0.2119721197211972,
      "grad_norm": 0.738264631673555,
      "learning_rate": 0.00018314926692825263,
      "loss": 1.6543,
      "step": 1034
    },
    {
      "epoch": 0.21217712177121772,
      "grad_norm": 0.5996884334912127,
      "learning_rate": 0.0001831123585762427,
      "loss": 1.6602,
      "step": 1035
    },
    {
      "epoch": 0.21238212382123822,
      "grad_norm": 0.665617761833811,
      "learning_rate": 0.00018307541357545965,
      "loss": 1.7042,
      "step": 1036
    },
    {
      "epoch": 0.21258712587125872,
      "grad_norm": 0.7386404176333486,
      "learning_rate": 0.00018303843194219458,
      "loss": 1.7546,
      "step": 1037
    },
    {
      "epoch": 0.21279212792127922,
      "grad_norm": 0.7153636445810029,
      "learning_rate": 0.00018300141369275469,
      "loss": 1.7095,
      "step": 1038
    },
    {
      "epoch": 0.21299712997129971,
      "grad_norm": 0.6267714193921318,
      "learning_rate": 0.00018296435884346336,
      "loss": 1.6147,
      "step": 1039
    },
    {
      "epoch": 0.2132021320213202,
      "grad_norm": 0.5966612011210995,
      "learning_rate": 0.00018292726741066007,
      "loss": 1.6725,
      "step": 1040
    },
    {
      "epoch": 0.2134071340713407,
      "grad_norm": 0.6435584621675465,
      "learning_rate": 0.00018289013941070046,
      "loss": 1.666,
      "step": 1041
    },
    {
      "epoch": 0.2136121361213612,
      "grad_norm": 0.696637634061216,
      "learning_rate": 0.0001828529748599563,
      "loss": 1.7,
      "step": 1042
    },
    {
      "epoch": 0.2138171381713817,
      "grad_norm": 0.6752273655174961,
      "learning_rate": 0.00018281577377481548,
      "loss": 1.6443,
      "step": 1043
    },
    {
      "epoch": 0.2140221402214022,
      "grad_norm": 0.7460891430531494,
      "learning_rate": 0.00018277853617168195,
      "loss": 1.7339,
      "step": 1044
    },
    {
      "epoch": 0.21422714227142273,
      "grad_norm": 0.7186007723286648,
      "learning_rate": 0.00018274126206697583,
      "loss": 1.7144,
      "step": 1045
    },
    {
      "epoch": 0.21443214432144322,
      "grad_norm": 0.6265002973175462,
      "learning_rate": 0.00018270395147713332,
      "loss": 1.6081,
      "step": 1046
    },
    {
      "epoch": 0.21463714637146372,
      "grad_norm": 0.6161241021074347,
      "learning_rate": 0.00018266660441860667,
      "loss": 1.6279,
      "step": 1047
    },
    {
      "epoch": 0.21484214842148422,
      "grad_norm": 0.6506433832876856,
      "learning_rate": 0.00018262922090786423,
      "loss": 1.6392,
      "step": 1048
    },
    {
      "epoch": 0.21504715047150472,
      "grad_norm": 0.6137088349058902,
      "learning_rate": 0.00018259180096139046,
      "loss": 1.5933,
      "step": 1049
    },
    {
      "epoch": 0.21525215252152521,
      "grad_norm": 0.6235691472210967,
      "learning_rate": 0.00018255434459568578,
      "loss": 1.6799,
      "step": 1050
    },
    {
      "epoch": 0.2154571545715457,
      "grad_norm": 0.6335587073147397,
      "learning_rate": 0.00018251685182726684,
      "loss": 1.7331,
      "step": 1051
    },
    {
      "epoch": 0.2156621566215662,
      "grad_norm": 0.6247519602576221,
      "learning_rate": 0.00018247932267266624,
      "loss": 1.7287,
      "step": 1052
    },
    {
      "epoch": 0.2158671586715867,
      "grad_norm": 0.6612000305498553,
      "learning_rate": 0.00018244175714843256,
      "loss": 1.7095,
      "step": 1053
    },
    {
      "epoch": 0.2160721607216072,
      "grad_norm": 0.698803591480772,
      "learning_rate": 0.00018240415527113056,
      "loss": 1.7888,
      "step": 1054
    },
    {
      "epoch": 0.21627716277162773,
      "grad_norm": 0.6422452000784236,
      "learning_rate": 0.0001823665170573409,
      "loss": 1.6688,
      "step": 1055
    },
    {
      "epoch": 0.21648216482164823,
      "grad_norm": 0.6150914827269612,
      "learning_rate": 0.00018232884252366037,
      "loss": 1.6808,
      "step": 1056
    },
    {
      "epoch": 0.21668716687166872,
      "grad_norm": 0.6327455073208014,
      "learning_rate": 0.0001822911316867017,
      "loss": 1.7,
      "step": 1057
    },
    {
      "epoch": 0.21689216892168922,
      "grad_norm": 0.5797578498575513,
      "learning_rate": 0.00018225338456309364,
      "loss": 1.6743,
      "step": 1058
    },
    {
      "epoch": 0.21709717097170972,
      "grad_norm": 0.6689974232616634,
      "learning_rate": 0.00018221560116948103,
      "loss": 1.6638,
      "step": 1059
    },
    {
      "epoch": 0.21730217302173022,
      "grad_norm": 0.6262350475559852,
      "learning_rate": 0.0001821777815225245,
      "loss": 1.6817,
      "step": 1060
    },
    {
      "epoch": 0.2175071750717507,
      "grad_norm": 0.6647200127526969,
      "learning_rate": 0.00018213992563890095,
      "loss": 1.6059,
      "step": 1061
    },
    {
      "epoch": 0.2177121771217712,
      "grad_norm": 0.6901827437214324,
      "learning_rate": 0.000182102033535303,
      "loss": 1.6737,
      "step": 1062
    },
    {
      "epoch": 0.2179171791717917,
      "grad_norm": 0.6446134041565922,
      "learning_rate": 0.00018206410522843936,
      "loss": 1.6196,
      "step": 1063
    },
    {
      "epoch": 0.2181221812218122,
      "grad_norm": 0.6626325366198685,
      "learning_rate": 0.00018202614073503472,
      "loss": 1.7149,
      "step": 1064
    },
    {
      "epoch": 0.21832718327183273,
      "grad_norm": 0.6738404387198728,
      "learning_rate": 0.00018198814007182968,
      "loss": 1.7126,
      "step": 1065
    },
    {
      "epoch": 0.21853218532185323,
      "grad_norm": 0.6412262519983919,
      "learning_rate": 0.0001819501032555808,
      "loss": 1.6831,
      "step": 1066
    },
    {
      "epoch": 0.21873718737187373,
      "grad_norm": 0.6720492541608051,
      "learning_rate": 0.0001819120303030606,
      "loss": 1.6247,
      "step": 1067
    },
    {
      "epoch": 0.21894218942189422,
      "grad_norm": 0.6070616714045508,
      "learning_rate": 0.00018187392123105751,
      "loss": 1.6523,
      "step": 1068
    },
    {
      "epoch": 0.21914719147191472,
      "grad_norm": 0.6303014408998721,
      "learning_rate": 0.00018183577605637588,
      "loss": 1.6995,
      "step": 1069
    },
    {
      "epoch": 0.21935219352193522,
      "grad_norm": 0.610438410446903,
      "learning_rate": 0.00018179759479583605,
      "loss": 1.7238,
      "step": 1070
    },
    {
      "epoch": 0.21955719557195572,
      "grad_norm": 0.5703559673359521,
      "learning_rate": 0.0001817593774662742,
      "loss": 1.6505,
      "step": 1071
    },
    {
      "epoch": 0.2197621976219762,
      "grad_norm": 0.6254315676741946,
      "learning_rate": 0.0001817211240845424,
      "loss": 1.7664,
      "step": 1072
    },
    {
      "epoch": 0.2199671996719967,
      "grad_norm": 0.6509528677848835,
      "learning_rate": 0.0001816828346675087,
      "loss": 1.618,
      "step": 1073
    },
    {
      "epoch": 0.2201722017220172,
      "grad_norm": 0.6047442337801903,
      "learning_rate": 0.00018164450923205698,
      "loss": 1.6593,
      "step": 1074
    },
    {
      "epoch": 0.22037720377203773,
      "grad_norm": 0.5747572431805756,
      "learning_rate": 0.000181606147795087,
      "loss": 1.6534,
      "step": 1075
    },
    {
      "epoch": 0.22058220582205823,
      "grad_norm": 0.692640993465633,
      "learning_rate": 0.00018156775037351443,
      "loss": 1.7792,
      "step": 1076
    },
    {
      "epoch": 0.22078720787207873,
      "grad_norm": 0.5732946235655187,
      "learning_rate": 0.00018152931698427077,
      "loss": 1.6232,
      "step": 1077
    },
    {
      "epoch": 0.22099220992209923,
      "grad_norm": 0.6856102460653584,
      "learning_rate": 0.0001814908476443034,
      "loss": 1.7241,
      "step": 1078
    },
    {
      "epoch": 0.22119721197211972,
      "grad_norm": 0.6430265909429804,
      "learning_rate": 0.00018145234237057554,
      "loss": 1.6716,
      "step": 1079
    },
    {
      "epoch": 0.22140221402214022,
      "grad_norm": 0.5978237061298097,
      "learning_rate": 0.00018141380118006633,
      "loss": 1.5965,
      "step": 1080
    },
    {
      "epoch": 0.22160721607216072,
      "grad_norm": 0.6535802650069463,
      "learning_rate": 0.0001813752240897706,
      "loss": 1.6572,
      "step": 1081
    },
    {
      "epoch": 0.22181221812218122,
      "grad_norm": 0.6183980048551903,
      "learning_rate": 0.00018133661111669914,
      "loss": 1.681,
      "step": 1082
    },
    {
      "epoch": 0.2220172201722017,
      "grad_norm": 0.5553454745738735,
      "learning_rate": 0.0001812979622778785,
      "loss": 1.651,
      "step": 1083
    },
    {
      "epoch": 0.2222222222222222,
      "grad_norm": 0.6686772813481492,
      "learning_rate": 0.0001812592775903511,
      "loss": 1.754,
      "step": 1084
    },
    {
      "epoch": 0.22242722427224274,
      "grad_norm": 0.6317618463239347,
      "learning_rate": 0.00018122055707117505,
      "loss": 1.6074,
      "step": 1085
    },
    {
      "epoch": 0.22263222632226323,
      "grad_norm": 0.6715124896586417,
      "learning_rate": 0.0001811818007374244,
      "loss": 1.6941,
      "step": 1086
    },
    {
      "epoch": 0.22283722837228373,
      "grad_norm": 0.6871089219892469,
      "learning_rate": 0.0001811430086061889,
      "loss": 1.6937,
      "step": 1087
    },
    {
      "epoch": 0.22304223042230423,
      "grad_norm": 0.6184197465342502,
      "learning_rate": 0.00018110418069457418,
      "loss": 1.7004,
      "step": 1088
    },
    {
      "epoch": 0.22324723247232472,
      "grad_norm": 0.676386424973752,
      "learning_rate": 0.0001810653170197015,
      "loss": 1.6206,
      "step": 1089
    },
    {
      "epoch": 0.22345223452234522,
      "grad_norm": 0.6523990157189616,
      "learning_rate": 0.00018102641759870804,
      "loss": 1.6688,
      "step": 1090
    },
    {
      "epoch": 0.22365723657236572,
      "grad_norm": 0.6129149497419799,
      "learning_rate": 0.00018098748244874666,
      "loss": 1.6234,
      "step": 1091
    },
    {
      "epoch": 0.22386223862238622,
      "grad_norm": 0.6246462746819592,
      "learning_rate": 0.00018094851158698597,
      "loss": 1.6615,
      "step": 1092
    },
    {
      "epoch": 0.22406724067240671,
      "grad_norm": 0.6510923258120017,
      "learning_rate": 0.00018090950503061036,
      "loss": 1.6218,
      "step": 1093
    },
    {
      "epoch": 0.2242722427224272,
      "grad_norm": 0.6329807738737093,
      "learning_rate": 0.0001808704627968199,
      "loss": 1.65,
      "step": 1094
    },
    {
      "epoch": 0.22447724477244774,
      "grad_norm": 0.6415947415364737,
      "learning_rate": 0.00018083138490283056,
      "loss": 1.7035,
      "step": 1095
    },
    {
      "epoch": 0.22468224682246823,
      "grad_norm": 0.6309501001858635,
      "learning_rate": 0.00018079227136587382,
      "loss": 1.714,
      "step": 1096
    },
    {
      "epoch": 0.22488724887248873,
      "grad_norm": 0.656375003661927,
      "learning_rate": 0.000180753122203197,
      "loss": 1.716,
      "step": 1097
    },
    {
      "epoch": 0.22509225092250923,
      "grad_norm": 0.5832571059442074,
      "learning_rate": 0.0001807139374320631,
      "loss": 1.5351,
      "step": 1098
    },
    {
      "epoch": 0.22529725297252973,
      "grad_norm": 0.6963227362958574,
      "learning_rate": 0.00018067471706975083,
      "loss": 1.7598,
      "step": 1099
    },
    {
      "epoch": 0.22550225502255022,
      "grad_norm": 0.6855640626310727,
      "learning_rate": 0.00018063546113355455,
      "loss": 1.6721,
      "step": 1100
    },
    {
      "epoch": 0.22570725707257072,
      "grad_norm": 0.6485668313776874,
      "learning_rate": 0.00018059616964078443,
      "loss": 1.7035,
      "step": 1101
    },
    {
      "epoch": 0.22591225912259122,
      "grad_norm": 0.62947514920102,
      "learning_rate": 0.00018055684260876614,
      "loss": 1.6161,
      "step": 1102
    },
    {
      "epoch": 0.22611726117261172,
      "grad_norm": 0.6629950604358232,
      "learning_rate": 0.00018051748005484117,
      "loss": 1.6534,
      "step": 1103
    },
    {
      "epoch": 0.22632226322263221,
      "grad_norm": 0.6555220358174337,
      "learning_rate": 0.00018047808199636657,
      "loss": 1.72,
      "step": 1104
    },
    {
      "epoch": 0.22652726527265274,
      "grad_norm": 0.657585363257024,
      "learning_rate": 0.00018043864845071513,
      "loss": 1.6073,
      "step": 1105
    },
    {
      "epoch": 0.22673226732267324,
      "grad_norm": 0.7428113892379641,
      "learning_rate": 0.0001803991794352753,
      "loss": 1.7752,
      "step": 1106
    },
    {
      "epoch": 0.22693726937269373,
      "grad_norm": 0.5902889006678823,
      "learning_rate": 0.000180359674967451,
      "loss": 1.6372,
      "step": 1107
    },
    {
      "epoch": 0.22714227142271423,
      "grad_norm": 0.6880548646796353,
      "learning_rate": 0.000180320135064662,
      "loss": 1.7503,
      "step": 1108
    },
    {
      "epoch": 0.22734727347273473,
      "grad_norm": 0.6351447864709977,
      "learning_rate": 0.00018028055974434354,
      "loss": 1.6692,
      "step": 1109
    },
    {
      "epoch": 0.22755227552275523,
      "grad_norm": 0.6463876645966141,
      "learning_rate": 0.00018024094902394658,
      "loss": 1.7113,
      "step": 1110
    },
    {
      "epoch": 0.22775727757277572,
      "grad_norm": 0.5800066938406596,
      "learning_rate": 0.0001802013029209377,
      "loss": 1.7219,
      "step": 1111
    },
    {
      "epoch": 0.22796227962279622,
      "grad_norm": 0.6459432254651825,
      "learning_rate": 0.0001801616214527989,
      "loss": 1.6143,
      "step": 1112
    },
    {
      "epoch": 0.22816728167281672,
      "grad_norm": 0.6808409468168838,
      "learning_rate": 0.00018012190463702799,
      "loss": 1.6921,
      "step": 1113
    },
    {
      "epoch": 0.22837228372283722,
      "grad_norm": 0.6271128415064747,
      "learning_rate": 0.00018008215249113823,
      "loss": 1.6554,
      "step": 1114
    },
    {
      "epoch": 0.22857728577285774,
      "grad_norm": 0.5682211232353036,
      "learning_rate": 0.00018004236503265858,
      "loss": 1.5751,
      "step": 1115
    },
    {
      "epoch": 0.22878228782287824,
      "grad_norm": 0.6811284162153839,
      "learning_rate": 0.00018000254227913348,
      "loss": 1.675,
      "step": 1116
    },
    {
      "epoch": 0.22898728987289874,
      "grad_norm": 0.6729865719086112,
      "learning_rate": 0.00017996268424812288,
      "loss": 1.7078,
      "step": 1117
    },
    {
      "epoch": 0.22919229192291923,
      "grad_norm": 0.5559498998406727,
      "learning_rate": 0.00017992279095720246,
      "loss": 1.5794,
      "step": 1118
    },
    {
      "epoch": 0.22939729397293973,
      "grad_norm": 0.6010677513988688,
      "learning_rate": 0.0001798828624239633,
      "loss": 1.6065,
      "step": 1119
    },
    {
      "epoch": 0.22960229602296023,
      "grad_norm": 0.6766942591577791,
      "learning_rate": 0.00017984289866601204,
      "loss": 1.7126,
      "step": 1120
    },
    {
      "epoch": 0.22980729807298073,
      "grad_norm": 0.6355849074308124,
      "learning_rate": 0.00017980289970097096,
      "loss": 1.7291,
      "step": 1121
    },
    {
      "epoch": 0.23001230012300122,
      "grad_norm": 0.6013376258637811,
      "learning_rate": 0.00017976286554647773,
      "loss": 1.6295,
      "step": 1122
    },
    {
      "epoch": 0.23021730217302172,
      "grad_norm": 0.6281327708449368,
      "learning_rate": 0.00017972279622018557,
      "loss": 1.7222,
      "step": 1123
    },
    {
      "epoch": 0.23042230422304222,
      "grad_norm": 0.6114395285130246,
      "learning_rate": 0.00017968269173976328,
      "loss": 1.6986,
      "step": 1124
    },
    {
      "epoch": 0.23062730627306274,
      "grad_norm": 0.5898172463075345,
      "learning_rate": 0.00017964255212289513,
      "loss": 1.6767,
      "step": 1125
    },
    {
      "epoch": 0.23083230832308324,
      "grad_norm": 0.5356245153773754,
      "learning_rate": 0.0001796023773872808,
      "loss": 1.6343,
      "step": 1126
    },
    {
      "epoch": 0.23103731037310374,
      "grad_norm": 0.6818053088312895,
      "learning_rate": 0.00017956216755063558,
      "loss": 1.693,
      "step": 1127
    },
    {
      "epoch": 0.23124231242312424,
      "grad_norm": 0.5810994751825839,
      "learning_rate": 0.00017952192263069017,
      "loss": 1.7253,
      "step": 1128
    },
    {
      "epoch": 0.23144731447314473,
      "grad_norm": 0.5725178214330862,
      "learning_rate": 0.00017948164264519072,
      "loss": 1.659,
      "step": 1129
    },
    {
      "epoch": 0.23165231652316523,
      "grad_norm": 0.623038594680975,
      "learning_rate": 0.0001794413276118989,
      "loss": 1.7561,
      "step": 1130
    },
    {
      "epoch": 0.23185731857318573,
      "grad_norm": 0.6555934366154492,
      "learning_rate": 0.00017940097754859177,
      "loss": 1.6605,
      "step": 1131
    },
    {
      "epoch": 0.23206232062320623,
      "grad_norm": 0.5497217183103144,
      "learning_rate": 0.00017936059247306195,
      "loss": 1.5735,
      "step": 1132
    },
    {
      "epoch": 0.23226732267322672,
      "grad_norm": 0.633888958656351,
      "learning_rate": 0.00017932017240311735,
      "loss": 1.6126,
      "step": 1133
    },
    {
      "epoch": 0.23247232472324722,
      "grad_norm": 0.6734381308538583,
      "learning_rate": 0.00017927971735658143,
      "loss": 1.7163,
      "step": 1134
    },
    {
      "epoch": 0.23267732677326775,
      "grad_norm": 0.6729603556436241,
      "learning_rate": 0.00017923922735129302,
      "loss": 1.6758,
      "step": 1135
    },
    {
      "epoch": 0.23288232882328824,
      "grad_norm": 0.6939613987632494,
      "learning_rate": 0.00017919870240510632,
      "loss": 1.7194,
      "step": 1136
    },
    {
      "epoch": 0.23308733087330874,
      "grad_norm": 0.677342108238906,
      "learning_rate": 0.00017915814253589108,
      "loss": 1.6796,
      "step": 1137
    },
    {
      "epoch": 0.23329233292332924,
      "grad_norm": 0.7101418129420499,
      "learning_rate": 0.00017911754776153224,
      "loss": 1.6604,
      "step": 1138
    },
    {
      "epoch": 0.23349733497334973,
      "grad_norm": 0.684114516013831,
      "learning_rate": 0.00017907691809993038,
      "loss": 1.722,
      "step": 1139
    },
    {
      "epoch": 0.23370233702337023,
      "grad_norm": 0.6880244159063225,
      "learning_rate": 0.00017903625356900128,
      "loss": 1.7303,
      "step": 1140
    },
    {
      "epoch": 0.23390733907339073,
      "grad_norm": 0.6773122681633772,
      "learning_rate": 0.00017899555418667614,
      "loss": 1.6886,
      "step": 1141
    },
    {
      "epoch": 0.23411234112341123,
      "grad_norm": 0.6800676743574444,
      "learning_rate": 0.0001789548199709015,
      "loss": 1.7234,
      "step": 1142
    },
    {
      "epoch": 0.23431734317343172,
      "grad_norm": 0.6115666392352647,
      "learning_rate": 0.00017891405093963938,
      "loss": 1.6425,
      "step": 1143
    },
    {
      "epoch": 0.23452234522345222,
      "grad_norm": 0.6441014373889458,
      "learning_rate": 0.000178873247110867,
      "loss": 1.7114,
      "step": 1144
    },
    {
      "epoch": 0.23472734727347275,
      "grad_norm": 0.6619614500831901,
      "learning_rate": 0.00017883240850257706,
      "loss": 1.6928,
      "step": 1145
    },
    {
      "epoch": 0.23493234932349324,
      "grad_norm": 0.5846167586860179,
      "learning_rate": 0.00017879153513277748,
      "loss": 1.6468,
      "step": 1146
    },
    {
      "epoch": 0.23513735137351374,
      "grad_norm": 0.6275117122748974,
      "learning_rate": 0.0001787506270194916,
      "loss": 1.7417,
      "step": 1147
    },
    {
      "epoch": 0.23534235342353424,
      "grad_norm": 0.7217133165951389,
      "learning_rate": 0.000178709684180758,
      "loss": 1.666,
      "step": 1148
    },
    {
      "epoch": 0.23554735547355474,
      "grad_norm": 0.6605461085192059,
      "learning_rate": 0.00017866870663463057,
      "loss": 1.7194,
      "step": 1149
    },
    {
      "epoch": 0.23575235752357523,
      "grad_norm": 0.6334386119819172,
      "learning_rate": 0.00017862769439917867,
      "loss": 1.6781,
      "step": 1150
    },
    {
      "epoch": 0.23595735957359573,
      "grad_norm": 0.6583847305168595,
      "learning_rate": 0.0001785866474924867,
      "loss": 1.6675,
      "step": 1151
    },
    {
      "epoch": 0.23616236162361623,
      "grad_norm": 0.7187168348467076,
      "learning_rate": 0.00017854556593265459,
      "loss": 1.6713,
      "step": 1152
    },
    {
      "epoch": 0.23636736367363673,
      "grad_norm": 0.543285806477839,
      "learning_rate": 0.0001785044497377974,
      "loss": 1.5351,
      "step": 1153
    },
    {
      "epoch": 0.23657236572365722,
      "grad_norm": 0.5968396343728228,
      "learning_rate": 0.00017846329892604547,
      "loss": 1.5917,
      "step": 1154
    },
    {
      "epoch": 0.23677736777367775,
      "grad_norm": 0.595296242453684,
      "learning_rate": 0.00017842211351554448,
      "loss": 1.6154,
      "step": 1155
    },
    {
      "epoch": 0.23698236982369825,
      "grad_norm": 0.655695938326144,
      "learning_rate": 0.0001783808935244553,
      "loss": 1.6976,
      "step": 1156
    },
    {
      "epoch": 0.23718737187371874,
      "grad_norm": 0.6603466475083676,
      "learning_rate": 0.00017833963897095407,
      "loss": 1.7848,
      "step": 1157
    },
    {
      "epoch": 0.23739237392373924,
      "grad_norm": 0.5965591505612715,
      "learning_rate": 0.00017829834987323219,
      "loss": 1.6585,
      "step": 1158
    },
    {
      "epoch": 0.23759737597375974,
      "grad_norm": 0.6846445124693009,
      "learning_rate": 0.0001782570262494963,
      "loss": 1.638,
      "step": 1159
    },
    {
      "epoch": 0.23780237802378024,
      "grad_norm": 0.6081023084140853,
      "learning_rate": 0.0001782156681179682,
      "loss": 1.6193,
      "step": 1160
    },
    {
      "epoch": 0.23800738007380073,
      "grad_norm": 0.6142293079588311,
      "learning_rate": 0.00017817427549688493,
      "loss": 1.6415,
      "step": 1161
    },
    {
      "epoch": 0.23821238212382123,
      "grad_norm": 0.5689166948788361,
      "learning_rate": 0.0001781328484044988,
      "loss": 1.6667,
      "step": 1162
    },
    {
      "epoch": 0.23841738417384173,
      "grad_norm": 0.5966731578529272,
      "learning_rate": 0.00017809138685907726,
      "loss": 1.6608,
      "step": 1163
    },
    {
      "epoch": 0.23862238622386223,
      "grad_norm": 0.5574581986963836,
      "learning_rate": 0.00017804989087890299,
      "loss": 1.6127,
      "step": 1164
    },
    {
      "epoch": 0.23882738827388275,
      "grad_norm": 0.5552595351059207,
      "learning_rate": 0.00017800836048227376,
      "loss": 1.6528,
      "step": 1165
    },
    {
      "epoch": 0.23903239032390325,
      "grad_norm": 0.5962411346751958,
      "learning_rate": 0.00017796679568750267,
      "loss": 1.6704,
      "step": 1166
    },
    {
      "epoch": 0.23923739237392375,
      "grad_norm": 0.7255158685221458,
      "learning_rate": 0.00017792519651291784,
      "loss": 1.7162,
      "step": 1167
    },
    {
      "epoch": 0.23944239442394424,
      "grad_norm": 0.6535718068137458,
      "learning_rate": 0.00017788356297686266,
      "loss": 1.7305,
      "step": 1168
    },
    {
      "epoch": 0.23964739647396474,
      "grad_norm": 0.6141326957283628,
      "learning_rate": 0.00017784189509769562,
      "loss": 1.7464,
      "step": 1169
    },
    {
      "epoch": 0.23985239852398524,
      "grad_norm": 0.6225349922606345,
      "learning_rate": 0.00017780019289379034,
      "loss": 1.6247,
      "step": 1170
    },
    {
      "epoch": 0.24005740057400574,
      "grad_norm": 0.6527944754509596,
      "learning_rate": 0.00017775845638353562,
      "loss": 1.7579,
      "step": 1171
    },
    {
      "epoch": 0.24026240262402623,
      "grad_norm": 0.545256922886162,
      "learning_rate": 0.00017771668558533535,
      "loss": 1.6288,
      "step": 1172
    },
    {
      "epoch": 0.24046740467404673,
      "grad_norm": 0.6322860390515759,
      "learning_rate": 0.00017767488051760857,
      "loss": 1.6815,
      "step": 1173
    },
    {
      "epoch": 0.24067240672406723,
      "grad_norm": 0.558527531232273,
      "learning_rate": 0.00017763304119878937,
      "loss": 1.679,
      "step": 1174
    },
    {
      "epoch": 0.24087740877408775,
      "grad_norm": 0.6015816114871074,
      "learning_rate": 0.00017759116764732707,
      "loss": 1.6893,
      "step": 1175
    },
    {
      "epoch": 0.24108241082410825,
      "grad_norm": 0.6444916956192004,
      "learning_rate": 0.0001775492598816859,
      "loss": 1.63,
      "step": 1176
    },
    {
      "epoch": 0.24128741287412875,
      "grad_norm": 0.6275065932751949,
      "learning_rate": 0.00017750731792034538,
      "loss": 1.6183,
      "step": 1177
    },
    {
      "epoch": 0.24149241492414925,
      "grad_norm": 0.6675104860296741,
      "learning_rate": 0.00017746534178179996,
      "loss": 1.7199,
      "step": 1178
    },
    {
      "epoch": 0.24169741697416974,
      "grad_norm": 0.6991590756154631,
      "learning_rate": 0.00017742333148455921,
      "loss": 1.7149,
      "step": 1179
    },
    {
      "epoch": 0.24190241902419024,
      "grad_norm": 0.5910431795162895,
      "learning_rate": 0.00017738128704714777,
      "loss": 1.72,
      "step": 1180
    },
    {
      "epoch": 0.24210742107421074,
      "grad_norm": 0.5400219386867433,
      "learning_rate": 0.00017733920848810527,
      "loss": 1.6114,
      "step": 1181
    },
    {
      "epoch": 0.24231242312423124,
      "grad_norm": 0.6883171673615529,
      "learning_rate": 0.00017729709582598656,
      "loss": 1.63,
      "step": 1182
    },
    {
      "epoch": 0.24251742517425173,
      "grad_norm": 0.6973382336237606,
      "learning_rate": 0.0001772549490793613,
      "loss": 1.7232,
      "step": 1183
    },
    {
      "epoch": 0.24272242722427223,
      "grad_norm": 0.5317517508919957,
      "learning_rate": 0.00017721276826681437,
      "loss": 1.5532,
      "step": 1184
    },
    {
      "epoch": 0.24292742927429276,
      "grad_norm": 0.6139132606059314,
      "learning_rate": 0.00017717055340694553,
      "loss": 1.6358,
      "step": 1185
    },
    {
      "epoch": 0.24313243132431325,
      "grad_norm": 0.6646427306471898,
      "learning_rate": 0.00017712830451836964,
      "loss": 1.663,
      "step": 1186
    },
    {
      "epoch": 0.24333743337433375,
      "grad_norm": 0.6247243553899692,
      "learning_rate": 0.00017708602161971653,
      "loss": 1.6915,
      "step": 1187
    },
    {
      "epoch": 0.24354243542435425,
      "grad_norm": 0.6688362626005366,
      "learning_rate": 0.0001770437047296311,
      "loss": 1.6821,
      "step": 1188
    },
    {
      "epoch": 0.24374743747437475,
      "grad_norm": 0.6187248183297874,
      "learning_rate": 0.00017700135386677305,
      "loss": 1.6545,
      "step": 1189
    },
    {
      "epoch": 0.24395243952439524,
      "grad_norm": 0.6568357054279115,
      "learning_rate": 0.00017695896904981731,
      "loss": 1.673,
      "step": 1190
    },
    {
      "epoch": 0.24415744157441574,
      "grad_norm": 0.5936537232963011,
      "learning_rate": 0.0001769165502974536,
      "loss": 1.6379,
      "step": 1191
    },
    {
      "epoch": 0.24436244362443624,
      "grad_norm": 0.596265149943637,
      "learning_rate": 0.00017687409762838664,
      "loss": 1.7245,
      "step": 1192
    },
    {
      "epoch": 0.24456744567445673,
      "grad_norm": 0.668349935288758,
      "learning_rate": 0.00017683161106133618,
      "loss": 1.6877,
      "step": 1193
    },
    {
      "epoch": 0.24477244772447723,
      "grad_norm": 0.6397956848249847,
      "learning_rate": 0.00017678909061503683,
      "loss": 1.7403,
      "step": 1194
    },
    {
      "epoch": 0.24497744977449776,
      "grad_norm": 0.615062511481778,
      "learning_rate": 0.00017674653630823822,
      "loss": 1.641,
      "step": 1195
    },
    {
      "epoch": 0.24518245182451825,
      "grad_norm": 0.729722158941603,
      "learning_rate": 0.00017670394815970478,
      "loss": 1.69,
      "step": 1196
    },
    {
      "epoch": 0.24538745387453875,
      "grad_norm": 0.5890849203624392,
      "learning_rate": 0.00017666132618821603,
      "loss": 1.7275,
      "step": 1197
    },
    {
      "epoch": 0.24559245592455925,
      "grad_norm": 0.5945395303668463,
      "learning_rate": 0.00017661867041256628,
      "loss": 1.6575,
      "step": 1198
    },
    {
      "epoch": 0.24579745797457975,
      "grad_norm": 0.656394857559786,
      "learning_rate": 0.00017657598085156481,
      "loss": 1.6021,
      "step": 1199
    },
    {
      "epoch": 0.24600246002460024,
      "grad_norm": 0.5612491650367858,
      "learning_rate": 0.00017653325752403575,
      "loss": 1.6334,
      "step": 1200
    },
    {
      "epoch": 0.24620746207462074,
      "grad_norm": 0.5995355142372665,
      "learning_rate": 0.0001764905004488182,
      "loss": 1.6672,
      "step": 1201
    },
    {
      "epoch": 0.24641246412464124,
      "grad_norm": 0.9692712141583573,
      "learning_rate": 0.000176447709644766,
      "loss": 1.6688,
      "step": 1202
    },
    {
      "epoch": 0.24661746617466174,
      "grad_norm": 0.6451967023076832,
      "learning_rate": 0.00017640488513074803,
      "loss": 1.6387,
      "step": 1203
    },
    {
      "epoch": 0.24682246822468223,
      "grad_norm": 0.6329026205423912,
      "learning_rate": 0.00017636202692564794,
      "loss": 1.5987,
      "step": 1204
    },
    {
      "epoch": 0.24702747027470276,
      "grad_norm": 0.5864552343557909,
      "learning_rate": 0.0001763191350483642,
      "loss": 1.6265,
      "step": 1205
    },
    {
      "epoch": 0.24723247232472326,
      "grad_norm": 0.6180913141017705,
      "learning_rate": 0.00017627620951781022,
      "loss": 1.6636,
      "step": 1206
    },
    {
      "epoch": 0.24743747437474375,
      "grad_norm": 0.5974270129407625,
      "learning_rate": 0.0001762332503529142,
      "loss": 1.6617,
      "step": 1207
    },
    {
      "epoch": 0.24764247642476425,
      "grad_norm": 0.5795015068206213,
      "learning_rate": 0.00017619025757261922,
      "loss": 1.6589,
      "step": 1208
    },
    {
      "epoch": 0.24784747847478475,
      "grad_norm": 0.6240424455080713,
      "learning_rate": 0.00017614723119588306,
      "loss": 1.6332,
      "step": 1209
    },
    {
      "epoch": 0.24805248052480525,
      "grad_norm": 0.5506171723169756,
      "learning_rate": 0.00017610417124167845,
      "loss": 1.641,
      "step": 1210
    },
    {
      "epoch": 0.24825748257482574,
      "grad_norm": 0.5821762097512913,
      "learning_rate": 0.00017606107772899287,
      "loss": 1.6975,
      "step": 1211
    },
    {
      "epoch": 0.24846248462484624,
      "grad_norm": 0.6268004380209853,
      "learning_rate": 0.0001760179506768286,
      "loss": 1.7219,
      "step": 1212
    },
    {
      "epoch": 0.24866748667486674,
      "grad_norm": 0.6683423084019315,
      "learning_rate": 0.00017597479010420268,
      "loss": 1.6747,
      "step": 1213
    },
    {
      "epoch": 0.24887248872488724,
      "grad_norm": 0.5853556399217581,
      "learning_rate": 0.00017593159603014705,
      "loss": 1.6004,
      "step": 1214
    },
    {
      "epoch": 0.24907749077490776,
      "grad_norm": 0.5836242010044278,
      "learning_rate": 0.00017588836847370816,
      "loss": 1.6399,
      "step": 1215
    },
    {
      "epoch": 0.24928249282492826,
      "grad_norm": 0.6125128198338414,
      "learning_rate": 0.0001758451074539476,
      "loss": 1.6455,
      "step": 1216
    },
    {
      "epoch": 0.24948749487494876,
      "grad_norm": 0.5888477564814605,
      "learning_rate": 0.00017580181298994138,
      "loss": 1.6125,
      "step": 1217
    },
    {
      "epoch": 0.24969249692496925,
      "grad_norm": 0.6456215977499503,
      "learning_rate": 0.00017575848510078046,
      "loss": 1.6527,
      "step": 1218
    },
    {
      "epoch": 0.24989749897498975,
      "grad_norm": 0.6259218740613081,
      "learning_rate": 0.0001757151238055704,
      "loss": 1.7374,
      "step": 1219
    },
    {
      "epoch": 0.25010250102501025,
      "grad_norm": 0.6256511247046743,
      "learning_rate": 0.00017567172912343163,
      "loss": 1.7331,
      "step": 1220
    },
    {
      "epoch": 0.25030750307503075,
      "grad_norm": 0.6469612239408504,
      "learning_rate": 0.00017562830107349921,
      "loss": 1.7232,
      "step": 1221
    },
    {
      "epoch": 0.25051250512505124,
      "grad_norm": 0.5977854832938785,
      "learning_rate": 0.00017558483967492294,
      "loss": 1.6107,
      "step": 1222
    },
    {
      "epoch": 0.25071750717507174,
      "grad_norm": 0.6316968216081972,
      "learning_rate": 0.0001755413449468673,
      "loss": 1.7046,
      "step": 1223
    },
    {
      "epoch": 0.25092250922509224,
      "grad_norm": 0.5939743168294715,
      "learning_rate": 0.00017549781690851148,
      "loss": 1.6284,
      "step": 1224
    },
    {
      "epoch": 0.25112751127511274,
      "grad_norm": 0.5745089711040662,
      "learning_rate": 0.0001754542555790494,
      "loss": 1.6245,
      "step": 1225
    },
    {
      "epoch": 0.25133251332513323,
      "grad_norm": 0.61712776627725,
      "learning_rate": 0.00017541066097768963,
      "loss": 1.6657,
      "step": 1226
    },
    {
      "epoch": 0.25153751537515373,
      "grad_norm": 0.5732872885962008,
      "learning_rate": 0.00017536703312365538,
      "loss": 1.695,
      "step": 1227
    },
    {
      "epoch": 0.25174251742517423,
      "grad_norm": 0.5875974180912894,
      "learning_rate": 0.0001753233720361846,
      "loss": 1.6971,
      "step": 1228
    },
    {
      "epoch": 0.2519475194751947,
      "grad_norm": 0.6930756963760735,
      "learning_rate": 0.00017527967773452977,
      "loss": 1.6736,
      "step": 1229
    },
    {
      "epoch": 0.2521525215252153,
      "grad_norm": 0.5669508245232758,
      "learning_rate": 0.00017523595023795813,
      "loss": 1.6108,
      "step": 1230
    },
    {
      "epoch": 0.2523575235752358,
      "grad_norm": 0.65153166234203,
      "learning_rate": 0.00017519218956575154,
      "loss": 1.701,
      "step": 1231
    },
    {
      "epoch": 0.2525625256252563,
      "grad_norm": 0.6157448600343771,
      "learning_rate": 0.00017514839573720643,
      "loss": 1.6984,
      "step": 1232
    },
    {
      "epoch": 0.25276752767527677,
      "grad_norm": 0.5775774658675717,
      "learning_rate": 0.0001751045687716339,
      "loss": 1.6983,
      "step": 1233
    },
    {
      "epoch": 0.25297252972529727,
      "grad_norm": 0.6765353133342762,
      "learning_rate": 0.0001750607086883597,
      "loss": 1.6635,
      "step": 1234
    },
    {
      "epoch": 0.25317753177531777,
      "grad_norm": 0.5587045697539772,
      "learning_rate": 0.00017501681550672406,
      "loss": 1.7145,
      "step": 1235
    },
    {
      "epoch": 0.25338253382533826,
      "grad_norm": 0.6258177026130144,
      "learning_rate": 0.0001749728892460819,
      "loss": 1.6848,
      "step": 1236
    },
    {
      "epoch": 0.25358753587535876,
      "grad_norm": 0.6721455377933682,
      "learning_rate": 0.00017492892992580273,
      "loss": 1.7098,
      "step": 1237
    },
    {
      "epoch": 0.25379253792537926,
      "grad_norm": 0.596785572111564,
      "learning_rate": 0.00017488493756527058,
      "loss": 1.5643,
      "step": 1238
    },
    {
      "epoch": 0.25399753997539976,
      "grad_norm": 0.5282985964042504,
      "learning_rate": 0.00017484091218388412,
      "loss": 1.6862,
      "step": 1239
    },
    {
      "epoch": 0.25420254202542025,
      "grad_norm": 0.6465647003155989,
      "learning_rate": 0.00017479685380105648,
      "loss": 1.7735,
      "step": 1240
    },
    {
      "epoch": 0.25440754407544075,
      "grad_norm": 0.7092872012738344,
      "learning_rate": 0.00017475276243621548,
      "loss": 1.7216,
      "step": 1241
    },
    {
      "epoch": 0.25461254612546125,
      "grad_norm": 0.6179172722131054,
      "learning_rate": 0.00017470863810880335,
      "loss": 1.6967,
      "step": 1242
    },
    {
      "epoch": 0.25481754817548175,
      "grad_norm": 0.5870969934010827,
      "learning_rate": 0.00017466448083827696,
      "loss": 1.6133,
      "step": 1243
    },
    {
      "epoch": 0.25502255022550224,
      "grad_norm": 0.626709306491909,
      "learning_rate": 0.00017462029064410764,
      "loss": 1.6671,
      "step": 1244
    },
    {
      "epoch": 0.25522755227552274,
      "grad_norm": 0.6497905454914743,
      "learning_rate": 0.00017457606754578121,
      "loss": 1.6596,
      "step": 1245
    },
    {
      "epoch": 0.25543255432554324,
      "grad_norm": 0.5855519005678879,
      "learning_rate": 0.00017453181156279812,
      "loss": 1.5915,
      "step": 1246
    },
    {
      "epoch": 0.25563755637556373,
      "grad_norm": 0.6294201366641414,
      "learning_rate": 0.00017448752271467322,
      "loss": 1.6856,
      "step": 1247
    },
    {
      "epoch": 0.25584255842558423,
      "grad_norm": 0.6313807726517735,
      "learning_rate": 0.00017444320102093586,
      "loss": 1.6572,
      "step": 1248
    },
    {
      "epoch": 0.25604756047560473,
      "grad_norm": 0.589907047746136,
      "learning_rate": 0.00017439884650112989,
      "loss": 1.7081,
      "step": 1249
    },
    {
      "epoch": 0.2562525625256253,
      "grad_norm": 0.5695463687038377,
      "learning_rate": 0.00017435445917481367,
      "loss": 1.6886,
      "step": 1250
    },
    {
      "epoch": 0.2564575645756458,
      "grad_norm": 0.6706794470638209,
      "learning_rate": 0.00017431003906156,
      "loss": 1.6505,
      "step": 1251
    },
    {
      "epoch": 0.2566625666256663,
      "grad_norm": 0.6487571707875301,
      "learning_rate": 0.0001742655861809561,
      "loss": 1.7099,
      "step": 1252
    },
    {
      "epoch": 0.2568675686756868,
      "grad_norm": 0.5843031658919652,
      "learning_rate": 0.0001742211005526037,
      "loss": 1.6746,
      "step": 1253
    },
    {
      "epoch": 0.2570725707257073,
      "grad_norm": 0.6033662248436049,
      "learning_rate": 0.0001741765821961189,
      "loss": 1.6349,
      "step": 1254
    },
    {
      "epoch": 0.25727757277572777,
      "grad_norm": 0.6140630362597016,
      "learning_rate": 0.00017413203113113228,
      "loss": 1.7308,
      "step": 1255
    },
    {
      "epoch": 0.25748257482574827,
      "grad_norm": 0.5967057128088326,
      "learning_rate": 0.00017408744737728885,
      "loss": 1.6947,
      "step": 1256
    },
    {
      "epoch": 0.25768757687576876,
      "grad_norm": 0.5534018256707625,
      "learning_rate": 0.00017404283095424802,
      "loss": 1.5626,
      "step": 1257
    },
    {
      "epoch": 0.25789257892578926,
      "grad_norm": 0.5719676468948501,
      "learning_rate": 0.00017399818188168365,
      "loss": 1.6869,
      "step": 1258
    },
    {
      "epoch": 0.25809758097580976,
      "grad_norm": 0.5886964261328439,
      "learning_rate": 0.00017395350017928383,
      "loss": 1.6181,
      "step": 1259
    },
    {
      "epoch": 0.25830258302583026,
      "grad_norm": 0.6594976215291675,
      "learning_rate": 0.00017390878586675127,
      "loss": 1.7694,
      "step": 1260
    },
    {
      "epoch": 0.25850758507585075,
      "grad_norm": 0.6469873592257641,
      "learning_rate": 0.00017386403896380294,
      "loss": 1.6883,
      "step": 1261
    },
    {
      "epoch": 0.25871258712587125,
      "grad_norm": 0.6166959802811376,
      "learning_rate": 0.00017381925949017015,
      "loss": 1.6766,
      "step": 1262
    },
    {
      "epoch": 0.25891758917589175,
      "grad_norm": 0.6189575888663595,
      "learning_rate": 0.00017377444746559864,
      "loss": 1.7222,
      "step": 1263
    },
    {
      "epoch": 0.25912259122591225,
      "grad_norm": 0.6133730278154782,
      "learning_rate": 0.00017372960290984842,
      "loss": 1.7051,
      "step": 1264
    },
    {
      "epoch": 0.25932759327593274,
      "grad_norm": 0.5794436823907965,
      "learning_rate": 0.00017368472584269399,
      "loss": 1.6266,
      "step": 1265
    },
    {
      "epoch": 0.25953259532595324,
      "grad_norm": 0.5921412182656112,
      "learning_rate": 0.00017363981628392404,
      "loss": 1.6364,
      "step": 1266
    },
    {
      "epoch": 0.25973759737597374,
      "grad_norm": 0.6056471099876192,
      "learning_rate": 0.00017359487425334166,
      "loss": 1.618,
      "step": 1267
    },
    {
      "epoch": 0.25994259942599424,
      "grad_norm": 0.7019773106193898,
      "learning_rate": 0.00017354989977076422,
      "loss": 1.7432,
      "step": 1268
    },
    {
      "epoch": 0.26014760147601473,
      "grad_norm": 0.6064269477414022,
      "learning_rate": 0.00017350489285602346,
      "loss": 1.6912,
      "step": 1269
    },
    {
      "epoch": 0.2603526035260353,
      "grad_norm": 0.5943643011595683,
      "learning_rate": 0.00017345985352896535,
      "loss": 1.6425,
      "step": 1270
    },
    {
      "epoch": 0.2605576055760558,
      "grad_norm": 0.6071028301014882,
      "learning_rate": 0.00017341478180945026,
      "loss": 1.6788,
      "step": 1271
    },
    {
      "epoch": 0.2607626076260763,
      "grad_norm": 0.6102675017067584,
      "learning_rate": 0.00017336967771735266,
      "loss": 1.7146,
      "step": 1272
    },
    {
      "epoch": 0.2609676096760968,
      "grad_norm": 0.5191364633878737,
      "learning_rate": 0.00017332454127256148,
      "loss": 1.6282,
      "step": 1273
    },
    {
      "epoch": 0.2611726117261173,
      "grad_norm": 0.605045078944176,
      "learning_rate": 0.0001732793724949798,
      "loss": 1.7719,
      "step": 1274
    },
    {
      "epoch": 0.2613776137761378,
      "grad_norm": 0.6608931979706598,
      "learning_rate": 0.00017323417140452504,
      "loss": 1.6727,
      "step": 1275
    },
    {
      "epoch": 0.26158261582615827,
      "grad_norm": 0.6501551072589721,
      "learning_rate": 0.0001731889380211288,
      "loss": 1.6668,
      "step": 1276
    },
    {
      "epoch": 0.26178761787617877,
      "grad_norm": 0.612729731177046,
      "learning_rate": 0.00017314367236473697,
      "loss": 1.6437,
      "step": 1277
    },
    {
      "epoch": 0.26199261992619927,
      "grad_norm": 0.6255821252613866,
      "learning_rate": 0.0001730983744553096,
      "loss": 1.6706,
      "step": 1278
    },
    {
      "epoch": 0.26219762197621976,
      "grad_norm": 0.6686075379709553,
      "learning_rate": 0.00017305304431282104,
      "loss": 1.6272,
      "step": 1279
    },
    {
      "epoch": 0.26240262402624026,
      "grad_norm": 0.5678836681217919,
      "learning_rate": 0.00017300768195725982,
      "loss": 1.666,
      "step": 1280
    },
    {
      "epoch": 0.26260762607626076,
      "grad_norm": 0.5876097229058804,
      "learning_rate": 0.0001729622874086287,
      "loss": 1.7364,
      "step": 1281
    },
    {
      "epoch": 0.26281262812628126,
      "grad_norm": 0.6272330518141753,
      "learning_rate": 0.0001729168606869446,
      "loss": 1.6717,
      "step": 1282
    },
    {
      "epoch": 0.26301763017630175,
      "grad_norm": 0.5425694659985298,
      "learning_rate": 0.0001728714018122386,
      "loss": 1.6753,
      "step": 1283
    },
    {
      "epoch": 0.26322263222632225,
      "grad_norm": 0.5444006285868122,
      "learning_rate": 0.000172825910804556,
      "loss": 1.6493,
      "step": 1284
    },
    {
      "epoch": 0.26342763427634275,
      "grad_norm": 0.5581509043738077,
      "learning_rate": 0.00017278038768395634,
      "loss": 1.6092,
      "step": 1285
    },
    {
      "epoch": 0.26363263632636325,
      "grad_norm": 0.6006936411981415,
      "learning_rate": 0.00017273483247051322,
      "loss": 1.6916,
      "step": 1286
    },
    {
      "epoch": 0.26383763837638374,
      "grad_norm": 0.6017265100904956,
      "learning_rate": 0.00017268924518431438,
      "loss": 1.6164,
      "step": 1287
    },
    {
      "epoch": 0.26404264042640424,
      "grad_norm": 0.6061915792148266,
      "learning_rate": 0.00017264362584546177,
      "loss": 1.6671,
      "step": 1288
    },
    {
      "epoch": 0.26424764247642474,
      "grad_norm": 0.5556108252651032,
      "learning_rate": 0.00017259797447407142,
      "loss": 1.6239,
      "step": 1289
    },
    {
      "epoch": 0.2644526445264453,
      "grad_norm": 0.661357957597831,
      "learning_rate": 0.00017255229109027355,
      "loss": 1.6603,
      "step": 1290
    },
    {
      "epoch": 0.2646576465764658,
      "grad_norm": 0.6175750504161309,
      "learning_rate": 0.00017250657571421245,
      "loss": 1.6146,
      "step": 1291
    },
    {
      "epoch": 0.2648626486264863,
      "grad_norm": 0.600257010147089,
      "learning_rate": 0.00017246082836604648,
      "loss": 1.6829,
      "step": 1292
    },
    {
      "epoch": 0.2650676506765068,
      "grad_norm": 0.5775177622266917,
      "learning_rate": 0.0001724150490659482,
      "loss": 1.7015,
      "step": 1293
    },
    {
      "epoch": 0.2652726527265273,
      "grad_norm": 0.6838693345849056,
      "learning_rate": 0.00017236923783410413,
      "loss": 1.7381,
      "step": 1294
    },
    {
      "epoch": 0.2654776547765478,
      "grad_norm": 0.6305676657796984,
      "learning_rate": 0.000172323394690715,
      "loss": 1.5973,
      "step": 1295
    },
    {
      "epoch": 0.2656826568265683,
      "grad_norm": 0.5119521897728015,
      "learning_rate": 0.00017227751965599554,
      "loss": 1.6255,
      "step": 1296
    },
    {
      "epoch": 0.2658876588765888,
      "grad_norm": 0.6786487420519335,
      "learning_rate": 0.00017223161275017453,
      "loss": 1.6456,
      "step": 1297
    },
    {
      "epoch": 0.26609266092660927,
      "grad_norm": 0.6986941688961231,
      "learning_rate": 0.0001721856739934949,
      "loss": 1.7205,
      "step": 1298
    },
    {
      "epoch": 0.26629766297662977,
      "grad_norm": 0.6502912351351416,
      "learning_rate": 0.00017213970340621346,
      "loss": 1.6901,
      "step": 1299
    },
    {
      "epoch": 0.26650266502665027,
      "grad_norm": 0.6331665213576733,
      "learning_rate": 0.00017209370100860122,
      "loss": 1.6167,
      "step": 1300
    },
    {
      "epoch": 0.26670766707667076,
      "grad_norm": 0.6334026362816706,
      "learning_rate": 0.00017204766682094312,
      "loss": 1.7432,
      "step": 1301
    },
    {
      "epoch": 0.26691266912669126,
      "grad_norm": 0.6276459470257831,
      "learning_rate": 0.00017200160086353815,
      "loss": 1.6979,
      "step": 1302
    },
    {
      "epoch": 0.26711767117671176,
      "grad_norm": 0.5802282455752659,
      "learning_rate": 0.00017195550315669928,
      "loss": 1.695,
      "step": 1303
    },
    {
      "epoch": 0.26732267322673225,
      "grad_norm": 0.6581337896490818,
      "learning_rate": 0.00017190937372075352,
      "loss": 1.6429,
      "step": 1304
    },
    {
      "epoch": 0.26752767527675275,
      "grad_norm": 0.6623965176565422,
      "learning_rate": 0.00017186321257604186,
      "loss": 1.6979,
      "step": 1305
    },
    {
      "epoch": 0.26773267732677325,
      "grad_norm": 0.6159210747516142,
      "learning_rate": 0.0001718170197429193,
      "loss": 1.6429,
      "step": 1306
    },
    {
      "epoch": 0.26793767937679375,
      "grad_norm": 0.5850424705820864,
      "learning_rate": 0.0001717707952417547,
      "loss": 1.6739,
      "step": 1307
    },
    {
      "epoch": 0.26814268142681424,
      "grad_norm": 0.6487051129394048,
      "learning_rate": 0.00017172453909293105,
      "loss": 1.6742,
      "step": 1308
    },
    {
      "epoch": 0.26834768347683474,
      "grad_norm": 0.6208189819600345,
      "learning_rate": 0.00017167825131684513,
      "loss": 1.6212,
      "step": 1309
    },
    {
      "epoch": 0.2685526855268553,
      "grad_norm": 0.594602984726979,
      "learning_rate": 0.00017163193193390784,
      "loss": 1.7176,
      "step": 1310
    },
    {
      "epoch": 0.2687576875768758,
      "grad_norm": 0.6677213097723341,
      "learning_rate": 0.0001715855809645438,
      "loss": 1.6581,
      "step": 1311
    },
    {
      "epoch": 0.2689626896268963,
      "grad_norm": 0.6438882694035374,
      "learning_rate": 0.00017153919842919176,
      "loss": 1.6564,
      "step": 1312
    },
    {
      "epoch": 0.2691676916769168,
      "grad_norm": 0.5995075789368377,
      "learning_rate": 0.00017149278434830433,
      "loss": 1.7549,
      "step": 1313
    },
    {
      "epoch": 0.2693726937269373,
      "grad_norm": 0.5750266252366152,
      "learning_rate": 0.00017144633874234799,
      "loss": 1.6935,
      "step": 1314
    },
    {
      "epoch": 0.2695776957769578,
      "grad_norm": 0.6608867219851539,
      "learning_rate": 0.0001713998616318031,
      "loss": 1.6916,
      "step": 1315
    },
    {
      "epoch": 0.2697826978269783,
      "grad_norm": 0.6304124991165608,
      "learning_rate": 0.000171353353037164,
      "loss": 1.6271,
      "step": 1316
    },
    {
      "epoch": 0.2699876998769988,
      "grad_norm": 0.5345250031806231,
      "learning_rate": 0.00017130681297893884,
      "loss": 1.6428,
      "step": 1317
    },
    {
      "epoch": 0.2701927019270193,
      "grad_norm": 0.6341024047899217,
      "learning_rate": 0.0001712602414776497,
      "loss": 1.752,
      "step": 1318
    },
    {
      "epoch": 0.27039770397703977,
      "grad_norm": 0.7167032559485453,
      "learning_rate": 0.00017121363855383248,
      "loss": 1.666,
      "step": 1319
    },
    {
      "epoch": 0.27060270602706027,
      "grad_norm": 0.599522774387851,
      "learning_rate": 0.00017116700422803693,
      "loss": 1.6305,
      "step": 1320
    },
    {
      "epoch": 0.27080770807708077,
      "grad_norm": 0.5570009859079458,
      "learning_rate": 0.0001711203385208267,
      "loss": 1.5971,
      "step": 1321
    },
    {
      "epoch": 0.27101271012710126,
      "grad_norm": 0.5968462144206931,
      "learning_rate": 0.00017107364145277923,
      "loss": 1.6723,
      "step": 1322
    },
    {
      "epoch": 0.27121771217712176,
      "grad_norm": 0.6055178925964502,
      "learning_rate": 0.00017102691304448581,
      "loss": 1.6337,
      "step": 1323
    },
    {
      "epoch": 0.27142271422714226,
      "grad_norm": 0.5554698383764707,
      "learning_rate": 0.00017098015331655156,
      "loss": 1.6817,
      "step": 1324
    },
    {
      "epoch": 0.27162771627716276,
      "grad_norm": 0.5744501417185245,
      "learning_rate": 0.00017093336228959536,
      "loss": 1.6714,
      "step": 1325
    },
    {
      "epoch": 0.27183271832718325,
      "grad_norm": 0.5497029266074408,
      "learning_rate": 0.00017088653998424994,
      "loss": 1.6724,
      "step": 1326
    },
    {
      "epoch": 0.27203772037720375,
      "grad_norm": 0.626441371749172,
      "learning_rate": 0.00017083968642116182,
      "loss": 1.6562,
      "step": 1327
    },
    {
      "epoch": 0.27224272242722425,
      "grad_norm": 0.566767549313504,
      "learning_rate": 0.00017079280162099128,
      "loss": 1.7045,
      "step": 1328
    },
    {
      "epoch": 0.27244772447724475,
      "grad_norm": 0.6688022993264132,
      "learning_rate": 0.0001707458856044124,
      "loss": 1.6831,
      "step": 1329
    },
    {
      "epoch": 0.2726527265272653,
      "grad_norm": 0.6527138355403727,
      "learning_rate": 0.000170698938392113,
      "loss": 1.6803,
      "step": 1330
    },
    {
      "epoch": 0.2728577285772858,
      "grad_norm": 0.5890437952057781,
      "learning_rate": 0.00017065196000479467,
      "loss": 1.643,
      "step": 1331
    },
    {
      "epoch": 0.2730627306273063,
      "grad_norm": 0.5615301477669931,
      "learning_rate": 0.00017060495046317272,
      "loss": 1.6531,
      "step": 1332
    },
    {
      "epoch": 0.2732677326773268,
      "grad_norm": 0.6048771172107056,
      "learning_rate": 0.00017055790978797627,
      "loss": 1.7048,
      "step": 1333
    },
    {
      "epoch": 0.2734727347273473,
      "grad_norm": 0.611478334204796,
      "learning_rate": 0.00017051083799994813,
      "loss": 1.6763,
      "step": 1334
    },
    {
      "epoch": 0.2736777367773678,
      "grad_norm": 0.6393942037768471,
      "learning_rate": 0.00017046373511984476,
      "loss": 1.7222,
      "step": 1335
    },
    {
      "epoch": 0.2738827388273883,
      "grad_norm": 0.6955381000036535,
      "learning_rate": 0.0001704166011684364,
      "loss": 1.6932,
      "step": 1336
    },
    {
      "epoch": 0.2740877408774088,
      "grad_norm": 0.5767841297419991,
      "learning_rate": 0.00017036943616650699,
      "loss": 1.6721,
      "step": 1337
    },
    {
      "epoch": 0.2742927429274293,
      "grad_norm": 0.5969083859379442,
      "learning_rate": 0.00017032224013485415,
      "loss": 1.619,
      "step": 1338
    },
    {
      "epoch": 0.2744977449774498,
      "grad_norm": 0.5438130904033252,
      "learning_rate": 0.00017027501309428922,
      "loss": 1.5779,
      "step": 1339
    },
    {
      "epoch": 0.2747027470274703,
      "grad_norm": 0.6023741598939625,
      "learning_rate": 0.00017022775506563714,
      "loss": 1.5599,
      "step": 1340
    },
    {
      "epoch": 0.27490774907749077,
      "grad_norm": 0.5376175874356415,
      "learning_rate": 0.0001701804660697366,
      "loss": 1.6476,
      "step": 1341
    },
    {
      "epoch": 0.27511275112751127,
      "grad_norm": 0.5870472264603122,
      "learning_rate": 0.0001701331461274398,
      "loss": 1.6252,
      "step": 1342
    },
    {
      "epoch": 0.27531775317753177,
      "grad_norm": 0.6088578288023003,
      "learning_rate": 0.0001700857952596128,
      "loss": 1.6766,
      "step": 1343
    },
    {
      "epoch": 0.27552275522755226,
      "grad_norm": 0.5608785246945501,
      "learning_rate": 0.0001700384134871351,
      "loss": 1.6357,
      "step": 1344
    },
    {
      "epoch": 0.27572775727757276,
      "grad_norm": 0.5514302257527606,
      "learning_rate": 0.00016999100083089994,
      "loss": 1.6706,
      "step": 1345
    },
    {
      "epoch": 0.27593275932759326,
      "grad_norm": 0.6264056553330709,
      "learning_rate": 0.00016994355731181414,
      "loss": 1.6735,
      "step": 1346
    },
    {
      "epoch": 0.27613776137761376,
      "grad_norm": 0.6268090004738055,
      "learning_rate": 0.00016989608295079814,
      "loss": 1.666,
      "step": 1347
    },
    {
      "epoch": 0.27634276342763425,
      "grad_norm": 0.6191028198003796,
      "learning_rate": 0.00016984857776878597,
      "loss": 1.6315,
      "step": 1348
    },
    {
      "epoch": 0.27654776547765475,
      "grad_norm": 0.596446209853012,
      "learning_rate": 0.00016980104178672528,
      "loss": 1.6243,
      "step": 1349
    },
    {
      "epoch": 0.2767527675276753,
      "grad_norm": 0.6415935522089514,
      "learning_rate": 0.00016975347502557722,
      "loss": 1.6834,
      "step": 1350
    },
    {
      "epoch": 0.2769577695776958,
      "grad_norm": 0.6927149512261636,
      "learning_rate": 0.00016970587750631664,
      "loss": 1.6837,
      "step": 1351
    },
    {
      "epoch": 0.2771627716277163,
      "grad_norm": 0.6023580006661222,
      "learning_rate": 0.00016965824924993186,
      "loss": 1.7345,
      "step": 1352
    },
    {
      "epoch": 0.2773677736777368,
      "grad_norm": 0.5884586926153903,
      "learning_rate": 0.00016961059027742473,
      "loss": 1.6748,
      "step": 1353
    },
    {
      "epoch": 0.2775727757277573,
      "grad_norm": 0.5902395958526696,
      "learning_rate": 0.00016956290060981077,
      "loss": 1.589,
      "step": 1354
    },
    {
      "epoch": 0.2777777777777778,
      "grad_norm": 0.6005857241077599,
      "learning_rate": 0.0001695151802681189,
      "loss": 1.6168,
      "step": 1355
    },
    {
      "epoch": 0.2779827798277983,
      "grad_norm": 0.6140206599296282,
      "learning_rate": 0.00016946742927339162,
      "loss": 1.655,
      "step": 1356
    },
    {
      "epoch": 0.2781877818778188,
      "grad_norm": 0.543111149759142,
      "learning_rate": 0.00016941964764668498,
      "loss": 1.6522,
      "step": 1357
    },
    {
      "epoch": 0.2783927839278393,
      "grad_norm": 0.5320484042760413,
      "learning_rate": 0.0001693718354090685,
      "loss": 1.597,
      "step": 1358
    },
    {
      "epoch": 0.2785977859778598,
      "grad_norm": 0.5717413309730874,
      "learning_rate": 0.0001693239925816252,
      "loss": 1.6199,
      "step": 1359
    },
    {
      "epoch": 0.2788027880278803,
      "grad_norm": 0.6147641595117893,
      "learning_rate": 0.00016927611918545157,
      "loss": 1.6841,
      "step": 1360
    },
    {
      "epoch": 0.2790077900779008,
      "grad_norm": 0.5576693273730527,
      "learning_rate": 0.0001692282152416576,
      "loss": 1.7787,
      "step": 1361
    },
    {
      "epoch": 0.27921279212792127,
      "grad_norm": 0.5728239367377934,
      "learning_rate": 0.0001691802807713668,
      "loss": 1.6289,
      "step": 1362
    },
    {
      "epoch": 0.27941779417794177,
      "grad_norm": 0.6177858903277267,
      "learning_rate": 0.00016913231579571608,
      "loss": 1.7331,
      "step": 1363
    },
    {
      "epoch": 0.27962279622796227,
      "grad_norm": 0.547970777812909,
      "learning_rate": 0.00016908432033585578,
      "loss": 1.6687,
      "step": 1364
    },
    {
      "epoch": 0.27982779827798276,
      "grad_norm": 0.5619612331214883,
      "learning_rate": 0.00016903629441294976,
      "loss": 1.7112,
      "step": 1365
    },
    {
      "epoch": 0.28003280032800326,
      "grad_norm": 0.6197396709335629,
      "learning_rate": 0.00016898823804817524,
      "loss": 1.7214,
      "step": 1366
    },
    {
      "epoch": 0.28023780237802376,
      "grad_norm": 0.6096890992042201,
      "learning_rate": 0.00016894015126272288,
      "loss": 1.756,
      "step": 1367
    },
    {
      "epoch": 0.28044280442804426,
      "grad_norm": 0.5746803751581485,
      "learning_rate": 0.00016889203407779679,
      "loss": 1.6358,
      "step": 1368
    },
    {
      "epoch": 0.28064780647806475,
      "grad_norm": 0.559456353139058,
      "learning_rate": 0.00016884388651461447,
      "loss": 1.6158,
      "step": 1369
    },
    {
      "epoch": 0.2808528085280853,
      "grad_norm": 0.6279177200550988,
      "learning_rate": 0.0001687957085944068,
      "loss": 1.7557,
      "step": 1370
    },
    {
      "epoch": 0.2810578105781058,
      "grad_norm": 0.5954126669108744,
      "learning_rate": 0.000168747500338418,
      "loss": 1.6827,
      "step": 1371
    },
    {
      "epoch": 0.2812628126281263,
      "grad_norm": 0.7091191735393801,
      "learning_rate": 0.00016869926176790582,
      "loss": 1.6465,
      "step": 1372
    },
    {
      "epoch": 0.2814678146781468,
      "grad_norm": 0.6094884965036207,
      "learning_rate": 0.00016865099290414124,
      "loss": 1.6946,
      "step": 1373
    },
    {
      "epoch": 0.2816728167281673,
      "grad_norm": 0.6288416307609498,
      "learning_rate": 0.00016860269376840856,
      "loss": 1.6302,
      "step": 1374
    },
    {
      "epoch": 0.2818778187781878,
      "grad_norm": 0.6149902924427048,
      "learning_rate": 0.00016855436438200562,
      "loss": 1.6473,
      "step": 1375
    },
    {
      "epoch": 0.2820828208282083,
      "grad_norm": 0.6553959592743003,
      "learning_rate": 0.0001685060047662434,
      "loss": 1.6361,
      "step": 1376
    },
    {
      "epoch": 0.2822878228782288,
      "grad_norm": 0.5531393188586536,
      "learning_rate": 0.00016845761494244633,
      "loss": 1.5902,
      "step": 1377
    },
    {
      "epoch": 0.2824928249282493,
      "grad_norm": 0.6114938874288971,
      "learning_rate": 0.00016840919493195212,
      "loss": 1.6819,
      "step": 1378
    },
    {
      "epoch": 0.2826978269782698,
      "grad_norm": 0.644743919034271,
      "learning_rate": 0.0001683607447561118,
      "loss": 1.7007,
      "step": 1379
    },
    {
      "epoch": 0.2829028290282903,
      "grad_norm": 0.5728313072151706,
      "learning_rate": 0.00016831226443628968,
      "loss": 1.5309,
      "step": 1380
    },
    {
      "epoch": 0.2831078310783108,
      "grad_norm": 0.5683682080497117,
      "learning_rate": 0.00016826375399386337,
      "loss": 1.67,
      "step": 1381
    },
    {
      "epoch": 0.2833128331283313,
      "grad_norm": 0.6075185871427463,
      "learning_rate": 0.00016821521345022377,
      "loss": 1.6961,
      "step": 1382
    },
    {
      "epoch": 0.2835178351783518,
      "grad_norm": 0.5778006045886844,
      "learning_rate": 0.0001681666428267751,
      "loss": 1.6771,
      "step": 1383
    },
    {
      "epoch": 0.28372283722837227,
      "grad_norm": 0.5696748002171197,
      "learning_rate": 0.00016811804214493476,
      "loss": 1.6813,
      "step": 1384
    },
    {
      "epoch": 0.28392783927839277,
      "grad_norm": 0.6336855026210187,
      "learning_rate": 0.00016806941142613342,
      "loss": 1.6587,
      "step": 1385
    },
    {
      "epoch": 0.28413284132841327,
      "grad_norm": 0.5908344627382948,
      "learning_rate": 0.0001680207506918151,
      "loss": 1.6944,
      "step": 1386
    },
    {
      "epoch": 0.28433784337843376,
      "grad_norm": 0.6040203227523477,
      "learning_rate": 0.00016797205996343687,
      "loss": 1.6738,
      "step": 1387
    },
    {
      "epoch": 0.28454284542845426,
      "grad_norm": 0.616407029349558,
      "learning_rate": 0.00016792333926246916,
      "loss": 1.7029,
      "step": 1388
    },
    {
      "epoch": 0.28474784747847476,
      "grad_norm": 0.6329408167169194,
      "learning_rate": 0.00016787458861039563,
      "loss": 1.6862,
      "step": 1389
    },
    {
      "epoch": 0.2849528495284953,
      "grad_norm": 0.7838731139616325,
      "learning_rate": 0.00016782580802871306,
      "loss": 1.6748,
      "step": 1390
    },
    {
      "epoch": 0.2851578515785158,
      "grad_norm": 0.5882036060554755,
      "learning_rate": 0.0001677769975389315,
      "loss": 1.6451,
      "step": 1391
    },
    {
      "epoch": 0.2853628536285363,
      "grad_norm": 0.5693591084489307,
      "learning_rate": 0.00016772815716257412,
      "loss": 1.7635,
      "step": 1392
    },
    {
      "epoch": 0.2855678556785568,
      "grad_norm": 0.5711515693203755,
      "learning_rate": 0.0001676792869211773,
      "loss": 1.6348,
      "step": 1393
    },
    {
      "epoch": 0.2857728577285773,
      "grad_norm": 0.5610445863235145,
      "learning_rate": 0.0001676303868362907,
      "loss": 1.6005,
      "step": 1394
    },
    {
      "epoch": 0.2859778597785978,
      "grad_norm": 0.5839332863795611,
      "learning_rate": 0.0001675814569294769,
      "loss": 1.6338,
      "step": 1395
    },
    {
      "epoch": 0.2861828618286183,
      "grad_norm": 0.5828673270389891,
      "learning_rate": 0.00016753249722231185,
      "loss": 1.7122,
      "step": 1396
    },
    {
      "epoch": 0.2863878638786388,
      "grad_norm": 0.6216480342251545,
      "learning_rate": 0.0001674835077363845,
      "loss": 1.6316,
      "step": 1397
    },
    {
      "epoch": 0.2865928659286593,
      "grad_norm": 0.5722777389771095,
      "learning_rate": 0.00016743448849329702,
      "loss": 1.6823,
      "step": 1398
    },
    {
      "epoch": 0.2867978679786798,
      "grad_norm": 0.6225080484074912,
      "learning_rate": 0.00016738543951466466,
      "loss": 1.6717,
      "step": 1399
    },
    {
      "epoch": 0.2870028700287003,
      "grad_norm": 0.5675434638445968,
      "learning_rate": 0.0001673363608221158,
      "loss": 1.5686,
      "step": 1400
    },
    {
      "epoch": 0.2872078720787208,
      "grad_norm": 0.6487391589510901,
      "learning_rate": 0.0001672872524372919,
      "loss": 1.7171,
      "step": 1401
    },
    {
      "epoch": 0.2874128741287413,
      "grad_norm": 0.5931338987262978,
      "learning_rate": 0.00016723811438184755,
      "loss": 1.7028,
      "step": 1402
    },
    {
      "epoch": 0.2876178761787618,
      "grad_norm": 0.5939444403769348,
      "learning_rate": 0.00016718894667745038,
      "loss": 1.5771,
      "step": 1403
    },
    {
      "epoch": 0.2878228782287823,
      "grad_norm": 0.6119809914439992,
      "learning_rate": 0.0001671397493457811,
      "loss": 1.7637,
      "step": 1404
    },
    {
      "epoch": 0.2880278802788028,
      "grad_norm": 0.5781933512143425,
      "learning_rate": 0.0001670905224085335,
      "loss": 1.6768,
      "step": 1405
    },
    {
      "epoch": 0.28823288232882327,
      "grad_norm": 0.5499145869768924,
      "learning_rate": 0.00016704126588741448,
      "loss": 1.7184,
      "step": 1406
    },
    {
      "epoch": 0.28843788437884377,
      "grad_norm": 0.608443581122789,
      "learning_rate": 0.00016699197980414384,
      "loss": 1.6659,
      "step": 1407
    },
    {
      "epoch": 0.28864288642886426,
      "grad_norm": 0.6494720628224756,
      "learning_rate": 0.00016694266418045456,
      "loss": 1.7825,
      "step": 1408
    },
    {
      "epoch": 0.28884788847888476,
      "grad_norm": 0.6269297302941028,
      "learning_rate": 0.00016689331903809256,
      "loss": 1.6731,
      "step": 1409
    },
    {
      "epoch": 0.2890528905289053,
      "grad_norm": 0.5830461601018152,
      "learning_rate": 0.00016684394439881687,
      "loss": 1.6496,
      "step": 1410
    },
    {
      "epoch": 0.2892578925789258,
      "grad_norm": 0.5944504329483641,
      "learning_rate": 0.0001667945402843994,
      "loss": 1.6719,
      "step": 1411
    },
    {
      "epoch": 0.2894628946289463,
      "grad_norm": 0.5614784103184588,
      "learning_rate": 0.0001667451067166251,
      "loss": 1.6328,
      "step": 1412
    },
    {
      "epoch": 0.2896678966789668,
      "grad_norm": 0.5938633061809798,
      "learning_rate": 0.00016669564371729197,
      "loss": 1.6245,
      "step": 1413
    },
    {
      "epoch": 0.2898728987289873,
      "grad_norm": 0.5712304151533564,
      "learning_rate": 0.00016664615130821092,
      "loss": 1.6089,
      "step": 1414
    },
    {
      "epoch": 0.2900779007790078,
      "grad_norm": 0.5981680777222429,
      "learning_rate": 0.0001665966295112059,
      "loss": 1.7199,
      "step": 1415
    },
    {
      "epoch": 0.2902829028290283,
      "grad_norm": 0.5856478174301754,
      "learning_rate": 0.00016654707834811369,
      "loss": 1.722,
      "step": 1416
    },
    {
      "epoch": 0.2904879048790488,
      "grad_norm": 0.6225004333262452,
      "learning_rate": 0.00016649749784078418,
      "loss": 1.6306,
      "step": 1417
    },
    {
      "epoch": 0.2906929069290693,
      "grad_norm": 0.5685468712927579,
      "learning_rate": 0.0001664478880110801,
      "loss": 1.7041,
      "step": 1418
    },
    {
      "epoch": 0.2908979089790898,
      "grad_norm": 0.6079538986000567,
      "learning_rate": 0.00016639824888087712,
      "loss": 1.7059,
      "step": 1419
    },
    {
      "epoch": 0.2911029110291103,
      "grad_norm": 0.6414222790850636,
      "learning_rate": 0.00016634858047206378,
      "loss": 1.6947,
      "step": 1420
    },
    {
      "epoch": 0.2913079130791308,
      "grad_norm": 0.6249494675095583,
      "learning_rate": 0.00016629888280654174,
      "loss": 1.6235,
      "step": 1421
    },
    {
      "epoch": 0.2915129151291513,
      "grad_norm": 0.6491004754604272,
      "learning_rate": 0.00016624915590622528,
      "loss": 1.7217,
      "step": 1422
    },
    {
      "epoch": 0.2917179171791718,
      "grad_norm": 0.606198026146697,
      "learning_rate": 0.00016619939979304173,
      "loss": 1.6218,
      "step": 1423
    },
    {
      "epoch": 0.2919229192291923,
      "grad_norm": 0.5877569466767635,
      "learning_rate": 0.00016614961448893132,
      "loss": 1.6471,
      "step": 1424
    },
    {
      "epoch": 0.2921279212792128,
      "grad_norm": 0.5656702517456443,
      "learning_rate": 0.00016609980001584706,
      "loss": 1.6128,
      "step": 1425
    },
    {
      "epoch": 0.2923329233292333,
      "grad_norm": 0.6154393901226662,
      "learning_rate": 0.0001660499563957549,
      "loss": 1.7274,
      "step": 1426
    },
    {
      "epoch": 0.29253792537925377,
      "grad_norm": 0.5835405308587939,
      "learning_rate": 0.0001660000836506336,
      "loss": 1.7261,
      "step": 1427
    },
    {
      "epoch": 0.29274292742927427,
      "grad_norm": 0.5706537439759921,
      "learning_rate": 0.00016595018180247476,
      "loss": 1.7385,
      "step": 1428
    },
    {
      "epoch": 0.29294792947929477,
      "grad_norm": 0.6479207054037849,
      "learning_rate": 0.00016590025087328283,
      "loss": 1.7044,
      "step": 1429
    },
    {
      "epoch": 0.2931529315293153,
      "grad_norm": 0.6301267350576135,
      "learning_rate": 0.00016585029088507513,
      "loss": 1.7019,
      "step": 1430
    },
    {
      "epoch": 0.2933579335793358,
      "grad_norm": 0.5958598241177567,
      "learning_rate": 0.00016580030185988167,
      "loss": 1.6502,
      "step": 1431
    },
    {
      "epoch": 0.2935629356293563,
      "grad_norm": 0.5585997523185458,
      "learning_rate": 0.0001657502838197454,
      "loss": 1.6493,
      "step": 1432
    },
    {
      "epoch": 0.2937679376793768,
      "grad_norm": 0.6396998664630253,
      "learning_rate": 0.00016570023678672195,
      "loss": 1.6624,
      "step": 1433
    },
    {
      "epoch": 0.2939729397293973,
      "grad_norm": 0.5452526554629904,
      "learning_rate": 0.00016565016078287984,
      "loss": 1.6854,
      "step": 1434
    },
    {
      "epoch": 0.2941779417794178,
      "grad_norm": 0.5566176716853116,
      "learning_rate": 0.00016560005583030028,
      "loss": 1.7564,
      "step": 1435
    },
    {
      "epoch": 0.2943829438294383,
      "grad_norm": 0.6130102024107688,
      "learning_rate": 0.00016554992195107725,
      "loss": 1.7088,
      "step": 1436
    },
    {
      "epoch": 0.2945879458794588,
      "grad_norm": 0.5785907747932231,
      "learning_rate": 0.00016549975916731757,
      "loss": 1.7137,
      "step": 1437
    },
    {
      "epoch": 0.2947929479294793,
      "grad_norm": 0.5004139534862897,
      "learning_rate": 0.00016544956750114072,
      "loss": 1.6113,
      "step": 1438
    },
    {
      "epoch": 0.2949979499794998,
      "grad_norm": 0.5775971071235202,
      "learning_rate": 0.00016539934697467894,
      "loss": 1.6734,
      "step": 1439
    },
    {
      "epoch": 0.2952029520295203,
      "grad_norm": 0.6062372497504022,
      "learning_rate": 0.0001653490976100772,
      "loss": 1.6311,
      "step": 1440
    },
    {
      "epoch": 0.2954079540795408,
      "grad_norm": 0.5859856426173122,
      "learning_rate": 0.0001652988194294932,
      "loss": 1.5166,
      "step": 1441
    },
    {
      "epoch": 0.2956129561295613,
      "grad_norm": 0.5967393507878219,
      "learning_rate": 0.00016524851245509735,
      "loss": 1.6911,
      "step": 1442
    },
    {
      "epoch": 0.2958179581795818,
      "grad_norm": 0.5767887239009115,
      "learning_rate": 0.0001651981767090727,
      "loss": 1.6983,
      "step": 1443
    },
    {
      "epoch": 0.2960229602296023,
      "grad_norm": 0.5523697251522889,
      "learning_rate": 0.000165147812213615,
      "loss": 1.6263,
      "step": 1444
    },
    {
      "epoch": 0.2962279622796228,
      "grad_norm": 0.6067968583365366,
      "learning_rate": 0.0001650974189909328,
      "loss": 1.7411,
      "step": 1445
    },
    {
      "epoch": 0.2964329643296433,
      "grad_norm": 0.6107939553536341,
      "learning_rate": 0.00016504699706324714,
      "loss": 1.6371,
      "step": 1446
    },
    {
      "epoch": 0.2966379663796638,
      "grad_norm": 0.607001639158129,
      "learning_rate": 0.00016499654645279183,
      "loss": 1.6997,
      "step": 1447
    },
    {
      "epoch": 0.2968429684296843,
      "grad_norm": 0.6500444406045572,
      "learning_rate": 0.00016494606718181332,
      "loss": 1.6451,
      "step": 1448
    },
    {
      "epoch": 0.29704797047970477,
      "grad_norm": 0.6063097225408537,
      "learning_rate": 0.0001648955592725706,
      "loss": 1.6879,
      "step": 1449
    },
    {
      "epoch": 0.2972529725297253,
      "grad_norm": 0.6001187813604127,
      "learning_rate": 0.00016484502274733545,
      "loss": 1.575,
      "step": 1450
    },
    {
      "epoch": 0.2974579745797458,
      "grad_norm": 0.5833045505796,
      "learning_rate": 0.0001647944576283921,
      "loss": 1.6644,
      "step": 1451
    },
    {
      "epoch": 0.2976629766297663,
      "grad_norm": 0.6376388274561628,
      "learning_rate": 0.0001647438639380375,
      "loss": 1.5922,
      "step": 1452
    },
    {
      "epoch": 0.2978679786797868,
      "grad_norm": 0.5577993845281046,
      "learning_rate": 0.00016469324169858122,
      "loss": 1.6857,
      "step": 1453
    },
    {
      "epoch": 0.2980729807298073,
      "grad_norm": 0.6069534255387015,
      "learning_rate": 0.00016464259093234532,
      "loss": 1.5942,
      "step": 1454
    },
    {
      "epoch": 0.2982779827798278,
      "grad_norm": 0.5878791282239109,
      "learning_rate": 0.00016459191166166446,
      "loss": 1.676,
      "step": 1455
    },
    {
      "epoch": 0.2984829848298483,
      "grad_norm": 0.5401765415796359,
      "learning_rate": 0.00016454120390888596,
      "loss": 1.5596,
      "step": 1456
    },
    {
      "epoch": 0.2986879868798688,
      "grad_norm": 0.5706706407852579,
      "learning_rate": 0.00016449046769636958,
      "loss": 1.634,
      "step": 1457
    },
    {
      "epoch": 0.2988929889298893,
      "grad_norm": 0.6488679600213937,
      "learning_rate": 0.0001644397030464877,
      "loss": 1.6726,
      "step": 1458
    },
    {
      "epoch": 0.2990979909799098,
      "grad_norm": 0.6207910747118691,
      "learning_rate": 0.00016438890998162525,
      "loss": 1.652,
      "step": 1459
    },
    {
      "epoch": 0.2993029930299303,
      "grad_norm": 0.48123702384748535,
      "learning_rate": 0.00016433808852417962,
      "loss": 1.6781,
      "step": 1460
    },
    {
      "epoch": 0.2995079950799508,
      "grad_norm": 0.6176812532996043,
      "learning_rate": 0.0001642872386965608,
      "loss": 1.6483,
      "step": 1461
    },
    {
      "epoch": 0.2997129971299713,
      "grad_norm": 0.5966631733091627,
      "learning_rate": 0.00016423636052119122,
      "loss": 1.6711,
      "step": 1462
    },
    {
      "epoch": 0.2999179991799918,
      "grad_norm": 0.54138892575733,
      "learning_rate": 0.00016418545402050586,
      "loss": 1.6333,
      "step": 1463
    },
    {
      "epoch": 0.3001230012300123,
      "grad_norm": 0.5082718777872532,
      "learning_rate": 0.0001641345192169522,
      "loss": 1.6375,
      "step": 1464
    },
    {
      "epoch": 0.3003280032800328,
      "grad_norm": 0.5653767507826191,
      "learning_rate": 0.00016408355613299014,
      "loss": 1.6154,
      "step": 1465
    },
    {
      "epoch": 0.3005330053300533,
      "grad_norm": 0.5577558551273679,
      "learning_rate": 0.00016403256479109209,
      "loss": 1.6291,
      "step": 1466
    },
    {
      "epoch": 0.3007380073800738,
      "grad_norm": 0.5751186685437938,
      "learning_rate": 0.0001639815452137429,
      "loss": 1.5786,
      "step": 1467
    },
    {
      "epoch": 0.3009430094300943,
      "grad_norm": 0.5339315480495862,
      "learning_rate": 0.00016393049742343988,
      "loss": 1.6837,
      "step": 1468
    },
    {
      "epoch": 0.3011480114801148,
      "grad_norm": 0.592096439017495,
      "learning_rate": 0.00016387942144269286,
      "loss": 1.6602,
      "step": 1469
    },
    {
      "epoch": 0.3013530135301353,
      "grad_norm": 0.6350021083036947,
      "learning_rate": 0.00016382831729402396,
      "loss": 1.7384,
      "step": 1470
    },
    {
      "epoch": 0.3015580155801558,
      "grad_norm": 0.5456170004296444,
      "learning_rate": 0.00016377718499996778,
      "loss": 1.6979,
      "step": 1471
    },
    {
      "epoch": 0.3017630176301763,
      "grad_norm": 0.5625163540139964,
      "learning_rate": 0.00016372602458307136,
      "loss": 1.65,
      "step": 1472
    },
    {
      "epoch": 0.3019680196801968,
      "grad_norm": 0.6275864662064967,
      "learning_rate": 0.00016367483606589413,
      "loss": 1.6612,
      "step": 1473
    },
    {
      "epoch": 0.3021730217302173,
      "grad_norm": 0.6654054205446531,
      "learning_rate": 0.00016362361947100788,
      "loss": 1.6183,
      "step": 1474
    },
    {
      "epoch": 0.3023780237802378,
      "grad_norm": 0.534824858068565,
      "learning_rate": 0.00016357237482099684,
      "loss": 1.6559,
      "step": 1475
    },
    {
      "epoch": 0.3025830258302583,
      "grad_norm": 0.6049666407438539,
      "learning_rate": 0.00016352110213845746,
      "loss": 1.6219,
      "step": 1476
    },
    {
      "epoch": 0.3027880278802788,
      "grad_norm": 0.6556382273694806,
      "learning_rate": 0.0001634698014459988,
      "loss": 1.6815,
      "step": 1477
    },
    {
      "epoch": 0.3029930299302993,
      "grad_norm": 0.6183860894338956,
      "learning_rate": 0.0001634184727662421,
      "loss": 1.6166,
      "step": 1478
    },
    {
      "epoch": 0.3031980319803198,
      "grad_norm": 0.6035312388612828,
      "learning_rate": 0.0001633671161218209,
      "loss": 1.666,
      "step": 1479
    },
    {
      "epoch": 0.3034030340303403,
      "grad_norm": 0.6224830450563352,
      "learning_rate": 0.0001633157315353812,
      "loss": 1.6779,
      "step": 1480
    },
    {
      "epoch": 0.3036080360803608,
      "grad_norm": 0.6158059189888997,
      "learning_rate": 0.0001632643190295813,
      "loss": 1.6379,
      "step": 1481
    },
    {
      "epoch": 0.3038130381303813,
      "grad_norm": 0.5943417474762814,
      "learning_rate": 0.00016321287862709175,
      "loss": 1.7575,
      "step": 1482
    },
    {
      "epoch": 0.3040180401804018,
      "grad_norm": 0.5940620362442668,
      "learning_rate": 0.0001631614103505954,
      "loss": 1.6821,
      "step": 1483
    },
    {
      "epoch": 0.3042230422304223,
      "grad_norm": 0.6384725486547611,
      "learning_rate": 0.00016310991422278744,
      "loss": 1.6872,
      "step": 1484
    },
    {
      "epoch": 0.3044280442804428,
      "grad_norm": 0.5853932926543159,
      "learning_rate": 0.00016305839026637534,
      "loss": 1.6891,
      "step": 1485
    },
    {
      "epoch": 0.3046330463304633,
      "grad_norm": 0.6556969925421489,
      "learning_rate": 0.0001630068385040788,
      "loss": 1.6724,
      "step": 1486
    },
    {
      "epoch": 0.3048380483804838,
      "grad_norm": 0.5740200304718273,
      "learning_rate": 0.0001629552589586298,
      "loss": 1.7759,
      "step": 1487
    },
    {
      "epoch": 0.3050430504305043,
      "grad_norm": 0.6272507977664349,
      "learning_rate": 0.00016290365165277262,
      "loss": 1.6311,
      "step": 1488
    },
    {
      "epoch": 0.3052480524805248,
      "grad_norm": 0.6192117676394726,
      "learning_rate": 0.0001628520166092637,
      "loss": 1.6603,
      "step": 1489
    },
    {
      "epoch": 0.30545305453054533,
      "grad_norm": 0.6666399759615597,
      "learning_rate": 0.00016280035385087175,
      "loss": 1.6987,
      "step": 1490
    },
    {
      "epoch": 0.30565805658056583,
      "grad_norm": 0.5537870439952756,
      "learning_rate": 0.0001627486634003777,
      "loss": 1.6197,
      "step": 1491
    },
    {
      "epoch": 0.3058630586305863,
      "grad_norm": 0.6271339691764065,
      "learning_rate": 0.0001626969452805747,
      "loss": 1.6324,
      "step": 1492
    },
    {
      "epoch": 0.3060680606806068,
      "grad_norm": 0.5900091056753516,
      "learning_rate": 0.00016264519951426806,
      "loss": 1.6604,
      "step": 1493
    },
    {
      "epoch": 0.3062730627306273,
      "grad_norm": 0.55510744309782,
      "learning_rate": 0.00016259342612427537,
      "loss": 1.7371,
      "step": 1494
    },
    {
      "epoch": 0.3064780647806478,
      "grad_norm": 0.5225734408907443,
      "learning_rate": 0.0001625416251334263,
      "loss": 1.6408,
      "step": 1495
    },
    {
      "epoch": 0.3066830668306683,
      "grad_norm": 0.633984094717057,
      "learning_rate": 0.00016248979656456275,
      "loss": 1.722,
      "step": 1496
    },
    {
      "epoch": 0.3068880688806888,
      "grad_norm": 0.5940183625252852,
      "learning_rate": 0.0001624379404405387,
      "loss": 1.669,
      "step": 1497
    },
    {
      "epoch": 0.3070930709307093,
      "grad_norm": 0.5117203968935347,
      "learning_rate": 0.00016238605678422046,
      "loss": 1.6204,
      "step": 1498
    },
    {
      "epoch": 0.3072980729807298,
      "grad_norm": 0.6375268570205453,
      "learning_rate": 0.00016233414561848627,
      "loss": 1.6866,
      "step": 1499
    },
    {
      "epoch": 0.3075030750307503,
      "grad_norm": 0.5880112788446207,
      "learning_rate": 0.0001622822069662266,
      "loss": 1.6545,
      "step": 1500
    },
    {
      "epoch": 0.3077080770807708,
      "grad_norm": 0.5735574502024673,
      "learning_rate": 0.00016223024085034414,
      "loss": 1.7094,
      "step": 1501
    },
    {
      "epoch": 0.3079130791307913,
      "grad_norm": 0.5758395382762499,
      "learning_rate": 0.00016217824729375345,
      "loss": 1.5721,
      "step": 1502
    },
    {
      "epoch": 0.3081180811808118,
      "grad_norm": 0.5523134159331391,
      "learning_rate": 0.00016212622631938138,
      "loss": 1.6624,
      "step": 1503
    },
    {
      "epoch": 0.3083230832308323,
      "grad_norm": 0.5646346101504085,
      "learning_rate": 0.00016207417795016684,
      "loss": 1.7276,
      "step": 1504
    },
    {
      "epoch": 0.3085280852808528,
      "grad_norm": 0.5749170166885552,
      "learning_rate": 0.00016202210220906074,
      "loss": 1.7067,
      "step": 1505
    },
    {
      "epoch": 0.3087330873308733,
      "grad_norm": 0.4782237386560857,
      "learning_rate": 0.00016196999911902618,
      "loss": 1.6002,
      "step": 1506
    },
    {
      "epoch": 0.3089380893808938,
      "grad_norm": 0.5679524765129618,
      "learning_rate": 0.00016191786870303822,
      "loss": 1.7205,
      "step": 1507
    },
    {
      "epoch": 0.3091430914309143,
      "grad_norm": 0.5053090688123828,
      "learning_rate": 0.00016186571098408402,
      "loss": 1.6143,
      "step": 1508
    },
    {
      "epoch": 0.3093480934809348,
      "grad_norm": 0.5542317603451554,
      "learning_rate": 0.00016181352598516275,
      "loss": 1.6587,
      "step": 1509
    },
    {
      "epoch": 0.30955309553095534,
      "grad_norm": 0.5658256166476654,
      "learning_rate": 0.00016176131372928562,
      "loss": 1.6991,
      "step": 1510
    },
    {
      "epoch": 0.30975809758097583,
      "grad_norm": 0.550839221938382,
      "learning_rate": 0.00016170907423947585,
      "loss": 1.5961,
      "step": 1511
    },
    {
      "epoch": 0.30996309963099633,
      "grad_norm": 0.5130365852102582,
      "learning_rate": 0.00016165680753876872,
      "loss": 1.6286,
      "step": 1512
    },
    {
      "epoch": 0.31016810168101683,
      "grad_norm": 0.5201506371904974,
      "learning_rate": 0.00016160451365021146,
      "loss": 1.6852,
      "step": 1513
    },
    {
      "epoch": 0.3103731037310373,
      "grad_norm": 0.4659832141335574,
      "learning_rate": 0.00016155219259686326,
      "loss": 1.6011,
      "step": 1514
    },
    {
      "epoch": 0.3105781057810578,
      "grad_norm": 0.6116647912874431,
      "learning_rate": 0.00016149984440179537,
      "loss": 1.6497,
      "step": 1515
    },
    {
      "epoch": 0.3107831078310783,
      "grad_norm": 0.5308969478843171,
      "learning_rate": 0.00016144746908809094,
      "loss": 1.7091,
      "step": 1516
    },
    {
      "epoch": 0.3109881098810988,
      "grad_norm": 0.5203820445129897,
      "learning_rate": 0.0001613950666788451,
      "loss": 1.643,
      "step": 1517
    },
    {
      "epoch": 0.3111931119311193,
      "grad_norm": 0.6183169755345523,
      "learning_rate": 0.000161342637197165,
      "loss": 1.6003,
      "step": 1518
    },
    {
      "epoch": 0.3113981139811398,
      "grad_norm": 0.6136747127270937,
      "learning_rate": 0.00016129018066616952,
      "loss": 1.8026,
      "step": 1519
    },
    {
      "epoch": 0.3116031160311603,
      "grad_norm": 0.5817723945364702,
      "learning_rate": 0.00016123769710898976,
      "loss": 1.6406,
      "step": 1520
    },
    {
      "epoch": 0.3118081180811808,
      "grad_norm": 0.5735548512583484,
      "learning_rate": 0.00016118518654876848,
      "loss": 1.6114,
      "step": 1521
    },
    {
      "epoch": 0.3120131201312013,
      "grad_norm": 0.6138611699549323,
      "learning_rate": 0.00016113264900866048,
      "loss": 1.6961,
      "step": 1522
    },
    {
      "epoch": 0.3122181221812218,
      "grad_norm": 0.5379988913748793,
      "learning_rate": 0.0001610800845118324,
      "loss": 1.6559,
      "step": 1523
    },
    {
      "epoch": 0.3124231242312423,
      "grad_norm": 0.6183425338983181,
      "learning_rate": 0.00016102749308146284,
      "loss": 1.7289,
      "step": 1524
    },
    {
      "epoch": 0.3126281262812628,
      "grad_norm": 0.57905816432174,
      "learning_rate": 0.00016097487474074228,
      "loss": 1.7162,
      "step": 1525
    },
    {
      "epoch": 0.3128331283312833,
      "grad_norm": 0.5750642849271494,
      "learning_rate": 0.0001609222295128729,
      "loss": 1.6312,
      "step": 1526
    },
    {
      "epoch": 0.3130381303813038,
      "grad_norm": 0.5534029028893083,
      "learning_rate": 0.0001608695574210689,
      "loss": 1.6657,
      "step": 1527
    },
    {
      "epoch": 0.3132431324313243,
      "grad_norm": 0.6227328178019984,
      "learning_rate": 0.00016081685848855627,
      "loss": 1.657,
      "step": 1528
    },
    {
      "epoch": 0.3134481344813448,
      "grad_norm": 0.646428670960226,
      "learning_rate": 0.00016076413273857288,
      "loss": 1.6409,
      "step": 1529
    },
    {
      "epoch": 0.31365313653136534,
      "grad_norm": 0.5703451530633927,
      "learning_rate": 0.0001607113801943684,
      "loss": 1.6329,
      "step": 1530
    },
    {
      "epoch": 0.31385813858138584,
      "grad_norm": 0.6117652726218614,
      "learning_rate": 0.00016065860087920424,
      "loss": 1.6421,
      "step": 1531
    },
    {
      "epoch": 0.31406314063140633,
      "grad_norm": 0.6227058046968832,
      "learning_rate": 0.00016060579481635368,
      "loss": 1.6384,
      "step": 1532
    },
    {
      "epoch": 0.31426814268142683,
      "grad_norm": 0.5327861257160884,
      "learning_rate": 0.0001605529620291019,
      "loss": 1.6584,
      "step": 1533
    },
    {
      "epoch": 0.31447314473144733,
      "grad_norm": 0.5321587551317813,
      "learning_rate": 0.00016050010254074564,
      "loss": 1.5922,
      "step": 1534
    },
    {
      "epoch": 0.3146781467814678,
      "grad_norm": 0.5705952632728047,
      "learning_rate": 0.00016044721637459354,
      "loss": 1.7038,
      "step": 1535
    },
    {
      "epoch": 0.3148831488314883,
      "grad_norm": 0.5914355141960834,
      "learning_rate": 0.0001603943035539661,
      "loss": 1.7166,
      "step": 1536
    },
    {
      "epoch": 0.3150881508815088,
      "grad_norm": 0.5264366172331062,
      "learning_rate": 0.00016034136410219538,
      "loss": 1.5967,
      "step": 1537
    },
    {
      "epoch": 0.3152931529315293,
      "grad_norm": 0.5339024874637761,
      "learning_rate": 0.00016028839804262528,
      "loss": 1.6049,
      "step": 1538
    },
    {
      "epoch": 0.3154981549815498,
      "grad_norm": 0.5836726609422397,
      "learning_rate": 0.00016023540539861144,
      "loss": 1.7084,
      "step": 1539
    },
    {
      "epoch": 0.3157031570315703,
      "grad_norm": 0.5805632490811234,
      "learning_rate": 0.0001601823861935212,
      "loss": 1.6971,
      "step": 1540
    },
    {
      "epoch": 0.3159081590815908,
      "grad_norm": 0.6403114053296829,
      "learning_rate": 0.00016012934045073367,
      "loss": 1.7083,
      "step": 1541
    },
    {
      "epoch": 0.3161131611316113,
      "grad_norm": 0.5258402890664735,
      "learning_rate": 0.00016007626819363954,
      "loss": 1.6404,
      "step": 1542
    },
    {
      "epoch": 0.3163181631816318,
      "grad_norm": 0.5970121779241416,
      "learning_rate": 0.0001600231694456413,
      "loss": 1.7279,
      "step": 1543
    },
    {
      "epoch": 0.3165231652316523,
      "grad_norm": 0.6343125666489673,
      "learning_rate": 0.00015997004423015304,
      "loss": 1.7945,
      "step": 1544
    },
    {
      "epoch": 0.3167281672816728,
      "grad_norm": 0.5153636063707583,
      "learning_rate": 0.00015991689257060065,
      "loss": 1.6076,
      "step": 1545
    },
    {
      "epoch": 0.3169331693316933,
      "grad_norm": 0.5160392265344995,
      "learning_rate": 0.0001598637144904215,
      "loss": 1.7185,
      "step": 1546
    },
    {
      "epoch": 0.3171381713817138,
      "grad_norm": 0.5967646540854691,
      "learning_rate": 0.00015981051001306482,
      "loss": 1.682,
      "step": 1547
    },
    {
      "epoch": 0.3173431734317343,
      "grad_norm": 0.5775798643998891,
      "learning_rate": 0.00015975727916199125,
      "loss": 1.7016,
      "step": 1548
    },
    {
      "epoch": 0.3175481754817548,
      "grad_norm": 0.5264697985756588,
      "learning_rate": 0.00015970402196067326,
      "loss": 1.6893,
      "step": 1549
    },
    {
      "epoch": 0.31775317753177534,
      "grad_norm": 0.6081792600824154,
      "learning_rate": 0.0001596507384325948,
      "loss": 1.5229,
      "step": 1550
    },
    {
      "epoch": 0.31795817958179584,
      "grad_norm": 0.6325072580282736,
      "learning_rate": 0.00015959742860125153,
      "loss": 1.7011,
      "step": 1551
    },
    {
      "epoch": 0.31816318163181634,
      "grad_norm": 0.6172374078055283,
      "learning_rate": 0.00015954409249015065,
      "loss": 1.6627,
      "step": 1552
    },
    {
      "epoch": 0.31836818368183684,
      "grad_norm": 0.5210368811839309,
      "learning_rate": 0.00015949073012281093,
      "loss": 1.602,
      "step": 1553
    },
    {
      "epoch": 0.31857318573185733,
      "grad_norm": 0.5824052977175983,
      "learning_rate": 0.00015943734152276277,
      "loss": 1.6087,
      "step": 1554
    },
    {
      "epoch": 0.31877818778187783,
      "grad_norm": 0.5799045635179078,
      "learning_rate": 0.00015938392671354813,
      "loss": 1.6054,
      "step": 1555
    },
    {
      "epoch": 0.31898318983189833,
      "grad_norm": 0.6101232325561027,
      "learning_rate": 0.00015933048571872051,
      "loss": 1.6746,
      "step": 1556
    },
    {
      "epoch": 0.3191881918819188,
      "grad_norm": 0.62031143873723,
      "learning_rate": 0.00015927701856184495,
      "loss": 1.6361,
      "step": 1557
    },
    {
      "epoch": 0.3193931939319393,
      "grad_norm": 0.5340591908608283,
      "learning_rate": 0.00015922352526649803,
      "loss": 1.6567,
      "step": 1558
    },
    {
      "epoch": 0.3195981959819598,
      "grad_norm": 0.5921128155516477,
      "learning_rate": 0.00015917000585626785,
      "loss": 1.611,
      "step": 1559
    },
    {
      "epoch": 0.3198031980319803,
      "grad_norm": 0.6005879551200329,
      "learning_rate": 0.0001591164603547541,
      "loss": 1.6542,
      "step": 1560
    },
    {
      "epoch": 0.3200082000820008,
      "grad_norm": 0.5478549313862134,
      "learning_rate": 0.00015906288878556784,
      "loss": 1.594,
      "step": 1561
    },
    {
      "epoch": 0.3202132021320213,
      "grad_norm": 0.6228574999405933,
      "learning_rate": 0.0001590092911723317,
      "loss": 1.7298,
      "step": 1562
    },
    {
      "epoch": 0.3204182041820418,
      "grad_norm": 0.5571955643386896,
      "learning_rate": 0.0001589556675386799,
      "loss": 1.6305,
      "step": 1563
    },
    {
      "epoch": 0.3206232062320623,
      "grad_norm": 0.6699193358734897,
      "learning_rate": 0.00015890201790825788,
      "loss": 1.7347,
      "step": 1564
    },
    {
      "epoch": 0.3208282082820828,
      "grad_norm": 0.6115176380293189,
      "learning_rate": 0.0001588483423047228,
      "loss": 1.6538,
      "step": 1565
    },
    {
      "epoch": 0.3210332103321033,
      "grad_norm": 0.5855253007786319,
      "learning_rate": 0.00015879464075174308,
      "loss": 1.6841,
      "step": 1566
    },
    {
      "epoch": 0.3212382123821238,
      "grad_norm": 0.709009552707181,
      "learning_rate": 0.00015874091327299872,
      "loss": 1.7253,
      "step": 1567
    },
    {
      "epoch": 0.3214432144321443,
      "grad_norm": 0.5932666319209314,
      "learning_rate": 0.00015868715989218109,
      "loss": 1.6043,
      "step": 1568
    },
    {
      "epoch": 0.3216482164821648,
      "grad_norm": 0.5963803147610166,
      "learning_rate": 0.00015863338063299294,
      "loss": 1.6697,
      "step": 1569
    },
    {
      "epoch": 0.32185321853218535,
      "grad_norm": 0.5729312161917175,
      "learning_rate": 0.00015857957551914853,
      "loss": 1.6554,
      "step": 1570
    },
    {
      "epoch": 0.32205822058220585,
      "grad_norm": 0.5969413652818404,
      "learning_rate": 0.00015852574457437345,
      "loss": 1.614,
      "step": 1571
    },
    {
      "epoch": 0.32226322263222634,
      "grad_norm": 0.5692049023093891,
      "learning_rate": 0.0001584718878224047,
      "loss": 1.6431,
      "step": 1572
    },
    {
      "epoch": 0.32246822468224684,
      "grad_norm": 0.622346988005485,
      "learning_rate": 0.00015841800528699072,
      "loss": 1.6848,
      "step": 1573
    },
    {
      "epoch": 0.32267322673226734,
      "grad_norm": 0.5616108191195761,
      "learning_rate": 0.00015836409699189114,
      "loss": 1.6516,
      "step": 1574
    },
    {
      "epoch": 0.32287822878228783,
      "grad_norm": 0.5671225321927746,
      "learning_rate": 0.00015831016296087715,
      "loss": 1.6504,
      "step": 1575
    },
    {
      "epoch": 0.32308323083230833,
      "grad_norm": 0.5191009492610145,
      "learning_rate": 0.0001582562032177312,
      "loss": 1.5916,
      "step": 1576
    },
    {
      "epoch": 0.32328823288232883,
      "grad_norm": 0.6255391702873446,
      "learning_rate": 0.0001582022177862471,
      "loss": 1.6749,
      "step": 1577
    },
    {
      "epoch": 0.3234932349323493,
      "grad_norm": 0.5342099977843426,
      "learning_rate": 0.00015814820669022986,
      "loss": 1.6707,
      "step": 1578
    },
    {
      "epoch": 0.3236982369823698,
      "grad_norm": 0.5699650352474503,
      "learning_rate": 0.00015809416995349608,
      "loss": 1.676,
      "step": 1579
    },
    {
      "epoch": 0.3239032390323903,
      "grad_norm": 0.5466164647150458,
      "learning_rate": 0.00015804010759987343,
      "loss": 1.6765,
      "step": 1580
    },
    {
      "epoch": 0.3241082410824108,
      "grad_norm": 0.5409749893919559,
      "learning_rate": 0.00015798601965320096,
      "loss": 1.6276,
      "step": 1581
    },
    {
      "epoch": 0.3243132431324313,
      "grad_norm": 0.5854379903493903,
      "learning_rate": 0.00015793190613732892,
      "loss": 1.6655,
      "step": 1582
    },
    {
      "epoch": 0.3245182451824518,
      "grad_norm": 0.6131935964876746,
      "learning_rate": 0.00015787776707611902,
      "loss": 1.7083,
      "step": 1583
    },
    {
      "epoch": 0.3247232472324723,
      "grad_norm": 0.657230494200856,
      "learning_rate": 0.00015782360249344407,
      "loss": 1.6877,
      "step": 1584
    },
    {
      "epoch": 0.3249282492824928,
      "grad_norm": 0.5313191465744274,
      "learning_rate": 0.00015776941241318822,
      "loss": 1.631,
      "step": 1585
    },
    {
      "epoch": 0.3251332513325133,
      "grad_norm": 0.5697330884026001,
      "learning_rate": 0.00015771519685924682,
      "loss": 1.7189,
      "step": 1586
    },
    {
      "epoch": 0.3253382533825338,
      "grad_norm": 0.5956595244876132,
      "learning_rate": 0.00015766095585552646,
      "loss": 1.6337,
      "step": 1587
    },
    {
      "epoch": 0.3255432554325543,
      "grad_norm": 0.593133601810476,
      "learning_rate": 0.00015760668942594496,
      "loss": 1.6399,
      "step": 1588
    },
    {
      "epoch": 0.3257482574825748,
      "grad_norm": 0.5761780065409972,
      "learning_rate": 0.00015755239759443135,
      "loss": 1.6087,
      "step": 1589
    },
    {
      "epoch": 0.32595325953259535,
      "grad_norm": 0.5925984526938471,
      "learning_rate": 0.00015749808038492585,
      "loss": 1.6524,
      "step": 1590
    },
    {
      "epoch": 0.32615826158261585,
      "grad_norm": 0.5850255560121983,
      "learning_rate": 0.00015744373782137992,
      "loss": 1.6432,
      "step": 1591
    },
    {
      "epoch": 0.32636326363263635,
      "grad_norm": 0.5748735732717458,
      "learning_rate": 0.0001573893699277561,
      "loss": 1.5956,
      "step": 1592
    },
    {
      "epoch": 0.32656826568265684,
      "grad_norm": 0.6065485410032901,
      "learning_rate": 0.0001573349767280282,
      "loss": 1.7322,
      "step": 1593
    },
    {
      "epoch": 0.32677326773267734,
      "grad_norm": 0.6207846211881545,
      "learning_rate": 0.00015728055824618112,
      "loss": 1.6414,
      "step": 1594
    },
    {
      "epoch": 0.32697826978269784,
      "grad_norm": 0.60966945180391,
      "learning_rate": 0.00015722611450621102,
      "loss": 1.6396,
      "step": 1595
    },
    {
      "epoch": 0.32718327183271834,
      "grad_norm": 0.5487266395738928,
      "learning_rate": 0.000157171645532125,
      "loss": 1.6069,
      "step": 1596
    },
    {
      "epoch": 0.32738827388273883,
      "grad_norm": 0.5600983596625937,
      "learning_rate": 0.00015711715134794147,
      "loss": 1.6288,
      "step": 1597
    },
    {
      "epoch": 0.32759327593275933,
      "grad_norm": 0.5725634986973878,
      "learning_rate": 0.00015706263197768987,
      "loss": 1.5998,
      "step": 1598
    },
    {
      "epoch": 0.32779827798277983,
      "grad_norm": 0.5586418168855353,
      "learning_rate": 0.0001570080874454108,
      "loss": 1.589,
      "step": 1599
    },
    {
      "epoch": 0.3280032800328003,
      "grad_norm": 0.5485713679637177,
      "learning_rate": 0.00015695351777515583,
      "loss": 1.503,
      "step": 1600
    },
    {
      "epoch": 0.3282082820828208,
      "grad_norm": 0.5823726402188383,
      "learning_rate": 0.0001568989229909878,
      "loss": 1.6207,
      "step": 1601
    },
    {
      "epoch": 0.3284132841328413,
      "grad_norm": 0.5557323266145563,
      "learning_rate": 0.0001568443031169805,
      "loss": 1.6109,
      "step": 1602
    },
    {
      "epoch": 0.3286182861828618,
      "grad_norm": 0.6657897515465551,
      "learning_rate": 0.00015678965817721881,
      "loss": 1.7639,
      "step": 1603
    },
    {
      "epoch": 0.3288232882328823,
      "grad_norm": 0.5733910269341344,
      "learning_rate": 0.00015673498819579864,
      "loss": 1.6422,
      "step": 1604
    },
    {
      "epoch": 0.3290282902829028,
      "grad_norm": 0.585377799360969,
      "learning_rate": 0.00015668029319682698,
      "loss": 1.6102,
      "step": 1605
    },
    {
      "epoch": 0.3292332923329233,
      "grad_norm": 0.5546926420462501,
      "learning_rate": 0.00015662557320442186,
      "loss": 1.6394,
      "step": 1606
    },
    {
      "epoch": 0.3294382943829438,
      "grad_norm": 0.6253775735216174,
      "learning_rate": 0.0001565708282427123,
      "loss": 1.6266,
      "step": 1607
    },
    {
      "epoch": 0.3296432964329643,
      "grad_norm": 0.618952334138591,
      "learning_rate": 0.00015651605833583832,
      "loss": 1.7086,
      "step": 1608
    },
    {
      "epoch": 0.3298482984829848,
      "grad_norm": 0.5609377010825004,
      "learning_rate": 0.00015646126350795102,
      "loss": 1.6711,
      "step": 1609
    },
    {
      "epoch": 0.33005330053300536,
      "grad_norm": 0.5116265674264238,
      "learning_rate": 0.00015640644378321235,
      "loss": 1.6449,
      "step": 1610
    },
    {
      "epoch": 0.33025830258302585,
      "grad_norm": 0.555263215164451,
      "learning_rate": 0.00015635159918579535,
      "loss": 1.5872,
      "step": 1611
    },
    {
      "epoch": 0.33046330463304635,
      "grad_norm": 0.6432144082072414,
      "learning_rate": 0.00015629672973988402,
      "loss": 1.6744,
      "step": 1612
    },
    {
      "epoch": 0.33066830668306685,
      "grad_norm": 0.5855426576651509,
      "learning_rate": 0.00015624183546967323,
      "loss": 1.6365,
      "step": 1613
    },
    {
      "epoch": 0.33087330873308735,
      "grad_norm": 0.6499437957468279,
      "learning_rate": 0.00015618691639936896,
      "loss": 1.6525,
      "step": 1614
    },
    {
      "epoch": 0.33107831078310784,
      "grad_norm": 0.5002458007657454,
      "learning_rate": 0.0001561319725531879,
      "loss": 1.5731,
      "step": 1615
    },
    {
      "epoch": 0.33128331283312834,
      "grad_norm": 0.6286863989121065,
      "learning_rate": 0.0001560770039553579,
      "loss": 1.599,
      "step": 1616
    },
    {
      "epoch": 0.33148831488314884,
      "grad_norm": 0.6046846610346077,
      "learning_rate": 0.00015602201063011752,
      "loss": 1.6664,
      "step": 1617
    },
    {
      "epoch": 0.33169331693316934,
      "grad_norm": 0.5189878297018823,
      "learning_rate": 0.0001559669926017164,
      "loss": 1.6235,
      "step": 1618
    },
    {
      "epoch": 0.33189831898318983,
      "grad_norm": 0.6094463477253609,
      "learning_rate": 0.00015591194989441492,
      "loss": 1.7501,
      "step": 1619
    },
    {
      "epoch": 0.33210332103321033,
      "grad_norm": 0.647981605618956,
      "learning_rate": 0.0001558568825324845,
      "loss": 1.6398,
      "step": 1620
    },
    {
      "epoch": 0.3323083230832308,
      "grad_norm": 0.55411513030306,
      "learning_rate": 0.00015580179054020725,
      "loss": 1.6643,
      "step": 1621
    },
    {
      "epoch": 0.3325133251332513,
      "grad_norm": 0.5647748332443943,
      "learning_rate": 0.00015574667394187627,
      "loss": 1.7195,
      "step": 1622
    },
    {
      "epoch": 0.3327183271832718,
      "grad_norm": 0.6149250387843381,
      "learning_rate": 0.00015569153276179547,
      "loss": 1.6611,
      "step": 1623
    },
    {
      "epoch": 0.3329233292332923,
      "grad_norm": 0.5870375631238135,
      "learning_rate": 0.00015563636702427966,
      "loss": 1.7075,
      "step": 1624
    },
    {
      "epoch": 0.3331283312833128,
      "grad_norm": 0.5428384129030388,
      "learning_rate": 0.00015558117675365437,
      "loss": 1.6616,
      "step": 1625
    },
    {
      "epoch": 0.3333333333333333,
      "grad_norm": 0.6197037259504439,
      "learning_rate": 0.00015552596197425595,
      "loss": 1.6958,
      "step": 1626
    },
    {
      "epoch": 0.3335383353833538,
      "grad_norm": 0.5624511768510775,
      "learning_rate": 0.00015547072271043173,
      "loss": 1.5972,
      "step": 1627
    },
    {
      "epoch": 0.3337433374333743,
      "grad_norm": 0.5187645316511276,
      "learning_rate": 0.00015541545898653961,
      "loss": 1.6799,
      "step": 1628
    },
    {
      "epoch": 0.3339483394833948,
      "grad_norm": 0.6312735507952534,
      "learning_rate": 0.00015536017082694846,
      "loss": 1.6578,
      "step": 1629
    },
    {
      "epoch": 0.33415334153341536,
      "grad_norm": 0.6156743275382334,
      "learning_rate": 0.0001553048582560378,
      "loss": 1.716,
      "step": 1630
    },
    {
      "epoch": 0.33435834358343586,
      "grad_norm": 0.5900175590897995,
      "learning_rate": 0.00015524952129819796,
      "loss": 1.634,
      "step": 1631
    },
    {
      "epoch": 0.33456334563345635,
      "grad_norm": 0.568716879683154,
      "learning_rate": 0.00015519415997783002,
      "loss": 1.6427,
      "step": 1632
    },
    {
      "epoch": 0.33476834768347685,
      "grad_norm": 0.5502966428997514,
      "learning_rate": 0.00015513877431934584,
      "loss": 1.6725,
      "step": 1633
    },
    {
      "epoch": 0.33497334973349735,
      "grad_norm": 0.6151753382081785,
      "learning_rate": 0.00015508336434716795,
      "loss": 1.6782,
      "step": 1634
    },
    {
      "epoch": 0.33517835178351785,
      "grad_norm": 0.5829160585638562,
      "learning_rate": 0.00015502793008572964,
      "loss": 1.6899,
      "step": 1635
    },
    {
      "epoch": 0.33538335383353834,
      "grad_norm": 0.5931408186265753,
      "learning_rate": 0.00015497247155947492,
      "loss": 1.6837,
      "step": 1636
    },
    {
      "epoch": 0.33558835588355884,
      "grad_norm": 0.6026650377425974,
      "learning_rate": 0.00015491698879285842,
      "loss": 1.6257,
      "step": 1637
    },
    {
      "epoch": 0.33579335793357934,
      "grad_norm": 0.532461746324293,
      "learning_rate": 0.00015486148181034553,
      "loss": 1.6284,
      "step": 1638
    },
    {
      "epoch": 0.33599835998359984,
      "grad_norm": 0.6386340893470719,
      "learning_rate": 0.00015480595063641238,
      "loss": 1.6796,
      "step": 1639
    },
    {
      "epoch": 0.33620336203362033,
      "grad_norm": 0.6037468828026835,
      "learning_rate": 0.00015475039529554564,
      "loss": 1.7667,
      "step": 1640
    },
    {
      "epoch": 0.33640836408364083,
      "grad_norm": 0.5635709449103589,
      "learning_rate": 0.00015469481581224272,
      "loss": 1.648,
      "step": 1641
    },
    {
      "epoch": 0.33661336613366133,
      "grad_norm": 0.5084373853514397,
      "learning_rate": 0.00015463921221101158,
      "loss": 1.5599,
      "step": 1642
    },
    {
      "epoch": 0.3368183681836818,
      "grad_norm": 0.6391914419141972,
      "learning_rate": 0.00015458358451637093,
      "loss": 1.6874,
      "step": 1643
    },
    {
      "epoch": 0.3370233702337023,
      "grad_norm": 0.5950859759723373,
      "learning_rate": 0.00015452793275285006,
      "loss": 1.6331,
      "step": 1644
    },
    {
      "epoch": 0.3372283722837228,
      "grad_norm": 0.6211635976751108,
      "learning_rate": 0.00015447225694498887,
      "loss": 1.7091,
      "step": 1645
    },
    {
      "epoch": 0.3374333743337433,
      "grad_norm": 0.5343127736045816,
      "learning_rate": 0.00015441655711733785,
      "loss": 1.7163,
      "step": 1646
    },
    {
      "epoch": 0.3376383763837638,
      "grad_norm": 0.565977935539105,
      "learning_rate": 0.00015436083329445805,
      "loss": 1.7085,
      "step": 1647
    },
    {
      "epoch": 0.3378433784337843,
      "grad_norm": 0.5406296483162867,
      "learning_rate": 0.00015430508550092124,
      "loss": 1.6425,
      "step": 1648
    },
    {
      "epoch": 0.3380483804838048,
      "grad_norm": 0.5231616714524926,
      "learning_rate": 0.00015424931376130957,
      "loss": 1.7039,
      "step": 1649
    },
    {
      "epoch": 0.33825338253382536,
      "grad_norm": 0.5323725080074326,
      "learning_rate": 0.00015419351810021592,
      "loss": 1.6244,
      "step": 1650
    },
    {
      "epoch": 0.33845838458384586,
      "grad_norm": 0.5129775750096033,
      "learning_rate": 0.00015413769854224357,
      "loss": 1.6414,
      "step": 1651
    },
    {
      "epoch": 0.33866338663386636,
      "grad_norm": 0.5444353015609091,
      "learning_rate": 0.00015408185511200646,
      "loss": 1.6455,
      "step": 1652
    },
    {
      "epoch": 0.33886838868388686,
      "grad_norm": 0.551735368139582,
      "learning_rate": 0.00015402598783412897,
      "loss": 1.6714,
      "step": 1653
    },
    {
      "epoch": 0.33907339073390735,
      "grad_norm": 0.5638749902872925,
      "learning_rate": 0.00015397009673324608,
      "loss": 1.685,
      "step": 1654
    },
    {
      "epoch": 0.33927839278392785,
      "grad_norm": 0.5810194079182676,
      "learning_rate": 0.00015391418183400313,
      "loss": 1.6446,
      "step": 1655
    },
    {
      "epoch": 0.33948339483394835,
      "grad_norm": 0.6126328711012577,
      "learning_rate": 0.00015385824316105614,
      "loss": 1.596,
      "step": 1656
    },
    {
      "epoch": 0.33968839688396885,
      "grad_norm": 0.5403590423760342,
      "learning_rate": 0.00015380228073907156,
      "loss": 1.6428,
      "step": 1657
    },
    {
      "epoch": 0.33989339893398934,
      "grad_norm": 0.602602566251628,
      "learning_rate": 0.00015374629459272612,
      "loss": 1.6756,
      "step": 1658
    },
    {
      "epoch": 0.34009840098400984,
      "grad_norm": 0.5880330031291576,
      "learning_rate": 0.0001536902847467073,
      "loss": 1.6478,
      "step": 1659
    },
    {
      "epoch": 0.34030340303403034,
      "grad_norm": 0.5668568450745457,
      "learning_rate": 0.00015363425122571285,
      "loss": 1.5807,
      "step": 1660
    },
    {
      "epoch": 0.34050840508405084,
      "grad_norm": 0.5932088461639127,
      "learning_rate": 0.000153578194054451,
      "loss": 1.6765,
      "step": 1661
    },
    {
      "epoch": 0.34071340713407133,
      "grad_norm": 0.5600105702542542,
      "learning_rate": 0.00015352211325764042,
      "loss": 1.6032,
      "step": 1662
    },
    {
      "epoch": 0.34091840918409183,
      "grad_norm": 0.521513431101829,
      "learning_rate": 0.0001534660088600102,
      "loss": 1.6388,
      "step": 1663
    },
    {
      "epoch": 0.34112341123411233,
      "grad_norm": 0.5789111646759075,
      "learning_rate": 0.00015340988088629982,
      "loss": 1.694,
      "step": 1664
    },
    {
      "epoch": 0.3413284132841328,
      "grad_norm": 0.5369205610119117,
      "learning_rate": 0.0001533537293612592,
      "loss": 1.6439,
      "step": 1665
    },
    {
      "epoch": 0.3415334153341533,
      "grad_norm": 0.5287426687052339,
      "learning_rate": 0.00015329755430964855,
      "loss": 1.605,
      "step": 1666
    },
    {
      "epoch": 0.3417384173841738,
      "grad_norm": 0.4963182573882921,
      "learning_rate": 0.00015324135575623857,
      "loss": 1.6616,
      "step": 1667
    },
    {
      "epoch": 0.3419434194341943,
      "grad_norm": 0.5323323111210633,
      "learning_rate": 0.00015318513372581026,
      "loss": 1.681,
      "step": 1668
    },
    {
      "epoch": 0.3421484214842148,
      "grad_norm": 0.546287224160045,
      "learning_rate": 0.00015312888824315493,
      "loss": 1.6302,
      "step": 1669
    },
    {
      "epoch": 0.34235342353423537,
      "grad_norm": 0.5344070123931473,
      "learning_rate": 0.0001530726193330743,
      "loss": 1.6694,
      "step": 1670
    },
    {
      "epoch": 0.34255842558425587,
      "grad_norm": 0.6280400876524551,
      "learning_rate": 0.00015301632702038046,
      "loss": 1.6097,
      "step": 1671
    },
    {
      "epoch": 0.34276342763427636,
      "grad_norm": 0.5232553469435289,
      "learning_rate": 0.00015296001132989573,
      "loss": 1.6069,
      "step": 1672
    },
    {
      "epoch": 0.34296842968429686,
      "grad_norm": 0.5873367814016014,
      "learning_rate": 0.00015290367228645274,
      "loss": 1.6551,
      "step": 1673
    },
    {
      "epoch": 0.34317343173431736,
      "grad_norm": 0.6222353004332912,
      "learning_rate": 0.00015284730991489446,
      "loss": 1.6964,
      "step": 1674
    },
    {
      "epoch": 0.34337843378433786,
      "grad_norm": 0.544747566749662,
      "learning_rate": 0.00015279092424007418,
      "loss": 1.6713,
      "step": 1675
    },
    {
      "epoch": 0.34358343583435835,
      "grad_norm": 0.5902207579849043,
      "learning_rate": 0.00015273451528685539,
      "loss": 1.5981,
      "step": 1676
    },
    {
      "epoch": 0.34378843788437885,
      "grad_norm": 0.6093540192670327,
      "learning_rate": 0.00015267808308011183,
      "loss": 1.6595,
      "step": 1677
    },
    {
      "epoch": 0.34399343993439935,
      "grad_norm": 0.5849310757415744,
      "learning_rate": 0.0001526216276447276,
      "loss": 1.6207,
      "step": 1678
    },
    {
      "epoch": 0.34419844198441985,
      "grad_norm": 0.6392302966849692,
      "learning_rate": 0.00015256514900559694,
      "loss": 1.6301,
      "step": 1679
    },
    {
      "epoch": 0.34440344403444034,
      "grad_norm": 0.5962228685231873,
      "learning_rate": 0.00015250864718762438,
      "loss": 1.6631,
      "step": 1680
    },
    {
      "epoch": 0.34460844608446084,
      "grad_norm": 0.5853347135318259,
      "learning_rate": 0.00015245212221572468,
      "loss": 1.6164,
      "step": 1681
    },
    {
      "epoch": 0.34481344813448134,
      "grad_norm": 0.6602196760535952,
      "learning_rate": 0.00015239557411482276,
      "loss": 1.7026,
      "step": 1682
    },
    {
      "epoch": 0.34501845018450183,
      "grad_norm": 0.6030016779364876,
      "learning_rate": 0.00015233900290985373,
      "loss": 1.6896,
      "step": 1683
    },
    {
      "epoch": 0.34522345223452233,
      "grad_norm": 0.526912577459109,
      "learning_rate": 0.00015228240862576303,
      "loss": 1.6117,
      "step": 1684
    },
    {
      "epoch": 0.34542845428454283,
      "grad_norm": 0.5955254016227488,
      "learning_rate": 0.00015222579128750603,
      "loss": 1.6221,
      "step": 1685
    },
    {
      "epoch": 0.3456334563345633,
      "grad_norm": 0.5871478192634015,
      "learning_rate": 0.00015216915092004847,
      "loss": 1.6877,
      "step": 1686
    },
    {
      "epoch": 0.3458384583845838,
      "grad_norm": 0.5482498742214277,
      "learning_rate": 0.00015211248754836616,
      "loss": 1.6323,
      "step": 1687
    },
    {
      "epoch": 0.3460434604346043,
      "grad_norm": 0.5450644757691485,
      "learning_rate": 0.00015205580119744512,
      "loss": 1.6376,
      "step": 1688
    },
    {
      "epoch": 0.3462484624846248,
      "grad_norm": 0.5960625000649314,
      "learning_rate": 0.00015199909189228137,
      "loss": 1.6232,
      "step": 1689
    },
    {
      "epoch": 0.3464534645346454,
      "grad_norm": 0.6607425068589716,
      "learning_rate": 0.00015194235965788124,
      "loss": 1.6867,
      "step": 1690
    },
    {
      "epoch": 0.34665846658466587,
      "grad_norm": 0.5775691030515819,
      "learning_rate": 0.000151885604519261,
      "loss": 1.6173,
      "step": 1691
    },
    {
      "epoch": 0.34686346863468637,
      "grad_norm": 0.5702122112404413,
      "learning_rate": 0.0001518288265014471,
      "loss": 1.6367,
      "step": 1692
    },
    {
      "epoch": 0.34706847068470686,
      "grad_norm": 0.6564208205979628,
      "learning_rate": 0.00015177202562947602,
      "loss": 1.6145,
      "step": 1693
    },
    {
      "epoch": 0.34727347273472736,
      "grad_norm": 0.65692230602941,
      "learning_rate": 0.00015171520192839446,
      "loss": 1.692,
      "step": 1694
    },
    {
      "epoch": 0.34747847478474786,
      "grad_norm": 0.5449801153723827,
      "learning_rate": 0.000151658355423259,
      "loss": 1.6202,
      "step": 1695
    },
    {
      "epoch": 0.34768347683476836,
      "grad_norm": 0.6291062430582015,
      "learning_rate": 0.00015160148613913642,
      "loss": 1.6894,
      "step": 1696
    },
    {
      "epoch": 0.34788847888478885,
      "grad_norm": 0.5761359293304741,
      "learning_rate": 0.0001515445941011035,
      "loss": 1.5725,
      "step": 1697
    },
    {
      "epoch": 0.34809348093480935,
      "grad_norm": 0.6176068847705807,
      "learning_rate": 0.00015148767933424696,
      "loss": 1.6317,
      "step": 1698
    },
    {
      "epoch": 0.34829848298482985,
      "grad_norm": 0.6028913335303212,
      "learning_rate": 0.00015143074186366374,
      "loss": 1.604,
      "step": 1699
    },
    {
      "epoch": 0.34850348503485035,
      "grad_norm": 0.5862828710950136,
      "learning_rate": 0.0001513737817144606,
      "loss": 1.6069,
      "step": 1700
    },
    {
      "epoch": 0.34870848708487084,
      "grad_norm": 0.5465012014851335,
      "learning_rate": 0.00015131679891175438,
      "loss": 1.6571,
      "step": 1701
    },
    {
      "epoch": 0.34891348913489134,
      "grad_norm": 0.6720528422400531,
      "learning_rate": 0.00015125979348067195,
      "loss": 1.698,
      "step": 1702
    },
    {
      "epoch": 0.34911849118491184,
      "grad_norm": 0.620343406904573,
      "learning_rate": 0.00015120276544635007,
      "loss": 1.6841,
      "step": 1703
    },
    {
      "epoch": 0.34932349323493234,
      "grad_norm": 0.591826032855559,
      "learning_rate": 0.00015114571483393552,
      "loss": 1.674,
      "step": 1704
    },
    {
      "epoch": 0.34952849528495283,
      "grad_norm": 0.550277802412764,
      "learning_rate": 0.00015108864166858506,
      "loss": 1.6216,
      "step": 1705
    },
    {
      "epoch": 0.34973349733497333,
      "grad_norm": 0.6161607732083954,
      "learning_rate": 0.00015103154597546532,
      "loss": 1.6167,
      "step": 1706
    },
    {
      "epoch": 0.34993849938499383,
      "grad_norm": 0.6225743336682613,
      "learning_rate": 0.00015097442777975295,
      "loss": 1.6702,
      "step": 1707
    },
    {
      "epoch": 0.3501435014350143,
      "grad_norm": 0.5317790917490676,
      "learning_rate": 0.00015091728710663445,
      "loss": 1.6735,
      "step": 1708
    },
    {
      "epoch": 0.3503485034850348,
      "grad_norm": 0.5742252874836843,
      "learning_rate": 0.00015086012398130624,
      "loss": 1.6381,
      "step": 1709
    },
    {
      "epoch": 0.3505535055350554,
      "grad_norm": 0.5961741041096219,
      "learning_rate": 0.00015080293842897468,
      "loss": 1.6527,
      "step": 1710
    },
    {
      "epoch": 0.3507585075850759,
      "grad_norm": 0.5224316752967613,
      "learning_rate": 0.00015074573047485604,
      "loss": 1.6825,
      "step": 1711
    },
    {
      "epoch": 0.35096350963509637,
      "grad_norm": 0.4834365629521206,
      "learning_rate": 0.00015068850014417635,
      "loss": 1.6302,
      "step": 1712
    },
    {
      "epoch": 0.35116851168511687,
      "grad_norm": 0.6025243605637712,
      "learning_rate": 0.00015063124746217166,
      "loss": 1.6865,
      "step": 1713
    },
    {
      "epoch": 0.35137351373513737,
      "grad_norm": 0.609654396206412,
      "learning_rate": 0.00015057397245408772,
      "loss": 1.5896,
      "step": 1714
    },
    {
      "epoch": 0.35157851578515786,
      "grad_norm": 0.5094118161082266,
      "learning_rate": 0.0001505166751451803,
      "loss": 1.6228,
      "step": 1715
    },
    {
      "epoch": 0.35178351783517836,
      "grad_norm": 0.5719282652035563,
      "learning_rate": 0.00015045935556071485,
      "loss": 1.7014,
      "step": 1716
    },
    {
      "epoch": 0.35198851988519886,
      "grad_norm": 0.5765880868026615,
      "learning_rate": 0.0001504020137259667,
      "loss": 1.5913,
      "step": 1717
    },
    {
      "epoch": 0.35219352193521936,
      "grad_norm": 0.6040820788939356,
      "learning_rate": 0.00015034464966622103,
      "loss": 1.6797,
      "step": 1718
    },
    {
      "epoch": 0.35239852398523985,
      "grad_norm": 0.6038907066362674,
      "learning_rate": 0.00015028726340677277,
      "loss": 1.6659,
      "step": 1719
    },
    {
      "epoch": 0.35260352603526035,
      "grad_norm": 0.5331098424480955,
      "learning_rate": 0.00015022985497292662,
      "loss": 1.6802,
      "step": 1720
    },
    {
      "epoch": 0.35280852808528085,
      "grad_norm": 0.555754030532374,
      "learning_rate": 0.00015017242438999711,
      "loss": 1.7191,
      "step": 1721
    },
    {
      "epoch": 0.35301353013530135,
      "grad_norm": 0.556487224859208,
      "learning_rate": 0.00015011497168330851,
      "loss": 1.5971,
      "step": 1722
    },
    {
      "epoch": 0.35321853218532184,
      "grad_norm": 0.5401759358557732,
      "learning_rate": 0.00015005749687819488,
      "loss": 1.6382,
      "step": 1723
    },
    {
      "epoch": 0.35342353423534234,
      "grad_norm": 0.49642833456506713,
      "learning_rate": 0.00015000000000000001,
      "loss": 1.5671,
      "step": 1724
    },
    {
      "epoch": 0.35362853628536284,
      "grad_norm": 0.5617076341377524,
      "learning_rate": 0.00014994248107407735,
      "loss": 1.6429,
      "step": 1725
    },
    {
      "epoch": 0.35383353833538334,
      "grad_norm": 0.6082603658900633,
      "learning_rate": 0.00014988494012579018,
      "loss": 1.6493,
      "step": 1726
    },
    {
      "epoch": 0.35403854038540383,
      "grad_norm": 0.5330591965008474,
      "learning_rate": 0.00014982737718051143,
      "loss": 1.5926,
      "step": 1727
    },
    {
      "epoch": 0.35424354243542433,
      "grad_norm": 0.5419773966322774,
      "learning_rate": 0.00014976979226362372,
      "loss": 1.6846,
      "step": 1728
    },
    {
      "epoch": 0.3544485444854448,
      "grad_norm": 0.6110764865851752,
      "learning_rate": 0.0001497121854005194,
      "loss": 1.5879,
      "step": 1729
    },
    {
      "epoch": 0.3546535465354654,
      "grad_norm": 0.5978549863044518,
      "learning_rate": 0.0001496545566166005,
      "loss": 1.5818,
      "step": 1730
    },
    {
      "epoch": 0.3548585485854859,
      "grad_norm": 0.5631804875064101,
      "learning_rate": 0.00014959690593727867,
      "loss": 1.6403,
      "step": 1731
    },
    {
      "epoch": 0.3550635506355064,
      "grad_norm": 0.6052697837634529,
      "learning_rate": 0.00014953923338797525,
      "loss": 1.6593,
      "step": 1732
    },
    {
      "epoch": 0.3552685526855269,
      "grad_norm": 0.6019382886706088,
      "learning_rate": 0.00014948153899412117,
      "loss": 1.6565,
      "step": 1733
    },
    {
      "epoch": 0.35547355473554737,
      "grad_norm": 0.5938042790425807,
      "learning_rate": 0.00014942382278115713,
      "loss": 1.6956,
      "step": 1734
    },
    {
      "epoch": 0.35567855678556787,
      "grad_norm": 0.5434100540156978,
      "learning_rate": 0.00014936608477453327,
      "loss": 1.6359,
      "step": 1735
    },
    {
      "epoch": 0.35588355883558837,
      "grad_norm": 0.5709215426087542,
      "learning_rate": 0.00014930832499970942,
      "loss": 1.6779,
      "step": 1736
    },
    {
      "epoch": 0.35608856088560886,
      "grad_norm": 0.6170412210009857,
      "learning_rate": 0.00014925054348215514,
      "loss": 1.6539,
      "step": 1737
    },
    {
      "epoch": 0.35629356293562936,
      "grad_norm": 0.6001765732036815,
      "learning_rate": 0.00014919274024734932,
      "loss": 1.6624,
      "step": 1738
    },
    {
      "epoch": 0.35649856498564986,
      "grad_norm": 0.5466509317661203,
      "learning_rate": 0.00014913491532078058,
      "loss": 1.7094,
      "step": 1739
    },
    {
      "epoch": 0.35670356703567035,
      "grad_norm": 0.5779924064284437,
      "learning_rate": 0.00014907706872794714,
      "loss": 1.6246,
      "step": 1740
    },
    {
      "epoch": 0.35690856908569085,
      "grad_norm": 0.5697276052761565,
      "learning_rate": 0.0001490192004943567,
      "loss": 1.6454,
      "step": 1741
    },
    {
      "epoch": 0.35711357113571135,
      "grad_norm": 0.6025969182717065,
      "learning_rate": 0.0001489613106455265,
      "loss": 1.6029,
      "step": 1742
    },
    {
      "epoch": 0.35731857318573185,
      "grad_norm": 0.6160056016225646,
      "learning_rate": 0.00014890339920698334,
      "loss": 1.6772,
      "step": 1743
    },
    {
      "epoch": 0.35752357523575234,
      "grad_norm": 0.5591041568403124,
      "learning_rate": 0.00014884546620426355,
      "loss": 1.6816,
      "step": 1744
    },
    {
      "epoch": 0.35772857728577284,
      "grad_norm": 0.5766152081156878,
      "learning_rate": 0.00014878751166291294,
      "loss": 1.6527,
      "step": 1745
    },
    {
      "epoch": 0.35793357933579334,
      "grad_norm": 0.5234920585905708,
      "learning_rate": 0.00014872953560848677,
      "loss": 1.6888,
      "step": 1746
    },
    {
      "epoch": 0.35813858138581384,
      "grad_norm": 0.5272317927038603,
      "learning_rate": 0.00014867153806654996,
      "loss": 1.661,
      "step": 1747
    },
    {
      "epoch": 0.35834358343583433,
      "grad_norm": 0.575154394340279,
      "learning_rate": 0.00014861351906267673,
      "loss": 1.6422,
      "step": 1748
    },
    {
      "epoch": 0.35854858548585483,
      "grad_norm": 0.5360553303951028,
      "learning_rate": 0.0001485554786224508,
      "loss": 1.5351,
      "step": 1749
    },
    {
      "epoch": 0.3587535875358754,
      "grad_norm": 0.6010369925056703,
      "learning_rate": 0.00014849741677146541,
      "loss": 1.6325,
      "step": 1750
    },
    {
      "epoch": 0.3589585895858959,
      "grad_norm": 0.5568039112696883,
      "learning_rate": 0.0001484393335353232,
      "loss": 1.6794,
      "step": 1751
    },
    {
      "epoch": 0.3591635916359164,
      "grad_norm": 0.559377587463221,
      "learning_rate": 0.00014838122893963618,
      "loss": 1.6792,
      "step": 1752
    },
    {
      "epoch": 0.3593685936859369,
      "grad_norm": 0.5909510959520973,
      "learning_rate": 0.00014832310301002587,
      "loss": 1.6569,
      "step": 1753
    },
    {
      "epoch": 0.3595735957359574,
      "grad_norm": 0.591970821601008,
      "learning_rate": 0.0001482649557721232,
      "loss": 1.644,
      "step": 1754
    },
    {
      "epoch": 0.35977859778597787,
      "grad_norm": 0.5003211660043793,
      "learning_rate": 0.00014820678725156844,
      "loss": 1.6719,
      "step": 1755
    },
    {
      "epoch": 0.35998359983599837,
      "grad_norm": 0.5433120973749493,
      "learning_rate": 0.00014814859747401123,
      "loss": 1.629,
      "step": 1756
    },
    {
      "epoch": 0.36018860188601887,
      "grad_norm": 0.5958620106137993,
      "learning_rate": 0.00014809038646511062,
      "loss": 1.6826,
      "step": 1757
    },
    {
      "epoch": 0.36039360393603936,
      "grad_norm": 0.5122995884938817,
      "learning_rate": 0.00014803215425053504,
      "loss": 1.5742,
      "step": 1758
    },
    {
      "epoch": 0.36059860598605986,
      "grad_norm": 0.47327092439572627,
      "learning_rate": 0.00014797390085596228,
      "loss": 1.6212,
      "step": 1759
    },
    {
      "epoch": 0.36080360803608036,
      "grad_norm": 0.544121867106752,
      "learning_rate": 0.0001479156263070794,
      "loss": 1.5911,
      "step": 1760
    },
    {
      "epoch": 0.36100861008610086,
      "grad_norm": 0.6177706205829336,
      "learning_rate": 0.0001478573306295828,
      "loss": 1.7169,
      "step": 1761
    },
    {
      "epoch": 0.36121361213612135,
      "grad_norm": 0.5279672432137881,
      "learning_rate": 0.0001477990138491783,
      "loss": 1.6308,
      "step": 1762
    },
    {
      "epoch": 0.36141861418614185,
      "grad_norm": 0.5108946200192143,
      "learning_rate": 0.00014774067599158093,
      "loss": 1.5732,
      "step": 1763
    },
    {
      "epoch": 0.36162361623616235,
      "grad_norm": 0.6326442389629837,
      "learning_rate": 0.00014768231708251498,
      "loss": 1.6418,
      "step": 1764
    },
    {
      "epoch": 0.36182861828618285,
      "grad_norm": 0.6183147647223144,
      "learning_rate": 0.0001476239371477141,
      "loss": 1.5786,
      "step": 1765
    },
    {
      "epoch": 0.36203362033620334,
      "grad_norm": 0.550509136570209,
      "learning_rate": 0.0001475655362129212,
      "loss": 1.6161,
      "step": 1766
    },
    {
      "epoch": 0.36223862238622384,
      "grad_norm": 0.4983729819066215,
      "learning_rate": 0.00014750711430388847,
      "loss": 1.5951,
      "step": 1767
    },
    {
      "epoch": 0.36244362443624434,
      "grad_norm": 0.5600708867513791,
      "learning_rate": 0.00014744867144637726,
      "loss": 1.6295,
      "step": 1768
    },
    {
      "epoch": 0.36264862648626484,
      "grad_norm": 0.6329404127859258,
      "learning_rate": 0.00014739020766615826,
      "loss": 1.6863,
      "step": 1769
    },
    {
      "epoch": 0.3628536285362854,
      "grad_norm": 0.5341822075269688,
      "learning_rate": 0.0001473317229890113,
      "loss": 1.6154,
      "step": 1770
    },
    {
      "epoch": 0.3630586305863059,
      "grad_norm": 0.5684149684186564,
      "learning_rate": 0.00014727321744072546,
      "loss": 1.6974,
      "step": 1771
    },
    {
      "epoch": 0.3632636326363264,
      "grad_norm": 0.586534189304384,
      "learning_rate": 0.0001472146910470991,
      "loss": 1.6452,
      "step": 1772
    },
    {
      "epoch": 0.3634686346863469,
      "grad_norm": 0.607537100747522,
      "learning_rate": 0.00014715614383393964,
      "loss": 1.6763,
      "step": 1773
    },
    {
      "epoch": 0.3636736367363674,
      "grad_norm": 0.5842371244180671,
      "learning_rate": 0.00014709757582706374,
      "loss": 1.7317,
      "step": 1774
    },
    {
      "epoch": 0.3638786387863879,
      "grad_norm": 0.5684426519415506,
      "learning_rate": 0.00014703898705229726,
      "loss": 1.692,
      "step": 1775
    },
    {
      "epoch": 0.3640836408364084,
      "grad_norm": 0.572071732472923,
      "learning_rate": 0.00014698037753547514,
      "loss": 1.668,
      "step": 1776
    },
    {
      "epoch": 0.36428864288642887,
      "grad_norm": 0.5922093196382778,
      "learning_rate": 0.00014692174730244158,
      "loss": 1.6961,
      "step": 1777
    },
    {
      "epoch": 0.36449364493644937,
      "grad_norm": 0.5753554886135926,
      "learning_rate": 0.00014686309637904977,
      "loss": 1.6365,
      "step": 1778
    },
    {
      "epoch": 0.36469864698646987,
      "grad_norm": 0.5485439284483765,
      "learning_rate": 0.00014680442479116215,
      "loss": 1.6975,
      "step": 1779
    },
    {
      "epoch": 0.36490364903649036,
      "grad_norm": 0.5224373889447186,
      "learning_rate": 0.00014674573256465024,
      "loss": 1.6442,
      "step": 1780
    },
    {
      "epoch": 0.36510865108651086,
      "grad_norm": 0.5813866042653603,
      "learning_rate": 0.00014668701972539458,
      "loss": 1.6447,
      "step": 1781
    },
    {
      "epoch": 0.36531365313653136,
      "grad_norm": 0.6148780578774461,
      "learning_rate": 0.00014662828629928494,
      "loss": 1.6979,
      "step": 1782
    },
    {
      "epoch": 0.36551865518655186,
      "grad_norm": 0.5992253572346069,
      "learning_rate": 0.00014656953231222006,
      "loss": 1.7374,
      "step": 1783
    },
    {
      "epoch": 0.36572365723657235,
      "grad_norm": 0.5861449043198859,
      "learning_rate": 0.00014651075779010774,
      "loss": 1.6908,
      "step": 1784
    },
    {
      "epoch": 0.36592865928659285,
      "grad_norm": 0.6133331719326407,
      "learning_rate": 0.00014645196275886498,
      "loss": 1.6957,
      "step": 1785
    },
    {
      "epoch": 0.36613366133661335,
      "grad_norm": 0.6204151873011282,
      "learning_rate": 0.00014639314724441754,
      "loss": 1.6192,
      "step": 1786
    },
    {
      "epoch": 0.36633866338663384,
      "grad_norm": 0.5432801305625949,
      "learning_rate": 0.00014633431127270057,
      "loss": 1.5985,
      "step": 1787
    },
    {
      "epoch": 0.36654366543665434,
      "grad_norm": 0.6008319852011252,
      "learning_rate": 0.000146275454869658,
      "loss": 1.6924,
      "step": 1788
    },
    {
      "epoch": 0.36674866748667484,
      "grad_norm": 0.6011855401980891,
      "learning_rate": 0.00014621657806124274,
      "loss": 1.6985,
      "step": 1789
    },
    {
      "epoch": 0.3669536695366954,
      "grad_norm": 0.6085108987066442,
      "learning_rate": 0.0001461576808734169,
      "loss": 1.6171,
      "step": 1790
    },
    {
      "epoch": 0.3671586715867159,
      "grad_norm": 0.6231654246481261,
      "learning_rate": 0.00014609876333215142,
      "loss": 1.6552,
      "step": 1791
    },
    {
      "epoch": 0.3673636736367364,
      "grad_norm": 0.5804498492413966,
      "learning_rate": 0.00014603982546342625,
      "loss": 1.6389,
      "step": 1792
    },
    {
      "epoch": 0.3675686756867569,
      "grad_norm": 0.5492002912186064,
      "learning_rate": 0.00014598086729323035,
      "loss": 1.5882,
      "step": 1793
    },
    {
      "epoch": 0.3677736777367774,
      "grad_norm": 0.5950241329323683,
      "learning_rate": 0.00014592188884756155,
      "loss": 1.6166,
      "step": 1794
    },
    {
      "epoch": 0.3679786797867979,
      "grad_norm": 0.5332968393927306,
      "learning_rate": 0.00014586289015242667,
      "loss": 1.6097,
      "step": 1795
    },
    {
      "epoch": 0.3681836818368184,
      "grad_norm": 0.6291082116455485,
      "learning_rate": 0.00014580387123384146,
      "loss": 1.7253,
      "step": 1796
    },
    {
      "epoch": 0.3683886838868389,
      "grad_norm": 0.5459247424660225,
      "learning_rate": 0.00014574483211783062,
      "loss": 1.6186,
      "step": 1797
    },
    {
      "epoch": 0.36859368593685937,
      "grad_norm": 0.5359738738014396,
      "learning_rate": 0.00014568577283042766,
      "loss": 1.7112,
      "step": 1798
    },
    {
      "epoch": 0.36879868798687987,
      "grad_norm": 0.5912518102722361,
      "learning_rate": 0.00014562669339767504,
      "loss": 1.6439,
      "step": 1799
    },
    {
      "epoch": 0.36900369003690037,
      "grad_norm": 0.5282021873923861,
      "learning_rate": 0.00014556759384562416,
      "loss": 1.5995,
      "step": 1800
    },
    {
      "epoch": 0.36920869208692086,
      "grad_norm": 0.5600266130812795,
      "learning_rate": 0.0001455084742003352,
      "loss": 1.69,
      "step": 1801
    },
    {
      "epoch": 0.36941369413694136,
      "grad_norm": 0.5464969202308386,
      "learning_rate": 0.00014544933448787725,
      "loss": 1.7284,
      "step": 1802
    },
    {
      "epoch": 0.36961869618696186,
      "grad_norm": 0.48886262438418515,
      "learning_rate": 0.0001453901747343282,
      "loss": 1.6371,
      "step": 1803
    },
    {
      "epoch": 0.36982369823698236,
      "grad_norm": 0.5025122945208039,
      "learning_rate": 0.00014533099496577488,
      "loss": 1.6813,
      "step": 1804
    },
    {
      "epoch": 0.37002870028700285,
      "grad_norm": 0.5624916134960064,
      "learning_rate": 0.0001452717952083128,
      "loss": 1.611,
      "step": 1805
    },
    {
      "epoch": 0.37023370233702335,
      "grad_norm": 0.5870215069472015,
      "learning_rate": 0.00014521257548804644,
      "loss": 1.6393,
      "step": 1806
    },
    {
      "epoch": 0.37043870438704385,
      "grad_norm": 0.549446872033983,
      "learning_rate": 0.00014515333583108896,
      "loss": 1.6776,
      "step": 1807
    },
    {
      "epoch": 0.37064370643706435,
      "grad_norm": 0.5300151703158199,
      "learning_rate": 0.00014509407626356232,
      "loss": 1.6533,
      "step": 1808
    },
    {
      "epoch": 0.37084870848708484,
      "grad_norm": 0.5989410462999809,
      "learning_rate": 0.00014503479681159738,
      "loss": 1.6537,
      "step": 1809
    },
    {
      "epoch": 0.3710537105371054,
      "grad_norm": 0.5368992858540935,
      "learning_rate": 0.00014497549750133365,
      "loss": 1.6781,
      "step": 1810
    },
    {
      "epoch": 0.3712587125871259,
      "grad_norm": 0.5533226806844579,
      "learning_rate": 0.0001449161783589194,
      "loss": 1.6581,
      "step": 1811
    },
    {
      "epoch": 0.3714637146371464,
      "grad_norm": 0.5241552700758864,
      "learning_rate": 0.00014485683941051173,
      "loss": 1.5942,
      "step": 1812
    },
    {
      "epoch": 0.3716687166871669,
      "grad_norm": 0.6058610852237856,
      "learning_rate": 0.00014479748068227637,
      "loss": 1.5724,
      "step": 1813
    },
    {
      "epoch": 0.3718737187371874,
      "grad_norm": 0.5110595905909968,
      "learning_rate": 0.00014473810220038785,
      "loss": 1.5825,
      "step": 1814
    },
    {
      "epoch": 0.3720787207872079,
      "grad_norm": 0.5783753480313738,
      "learning_rate": 0.0001446787039910294,
      "loss": 1.6987,
      "step": 1815
    },
    {
      "epoch": 0.3722837228372284,
      "grad_norm": 0.5400468144955473,
      "learning_rate": 0.00014461928608039285,
      "loss": 1.7266,
      "step": 1816
    },
    {
      "epoch": 0.3724887248872489,
      "grad_norm": 0.5860991322357123,
      "learning_rate": 0.0001445598484946789,
      "loss": 1.636,
      "step": 1817
    },
    {
      "epoch": 0.3726937269372694,
      "grad_norm": 0.5023613601584973,
      "learning_rate": 0.00014450039126009677,
      "loss": 1.647,
      "step": 1818
    },
    {
      "epoch": 0.3728987289872899,
      "grad_norm": 0.5650956274875231,
      "learning_rate": 0.0001444409144028644,
      "loss": 1.6734,
      "step": 1819
    },
    {
      "epoch": 0.37310373103731037,
      "grad_norm": 0.46927417237734576,
      "learning_rate": 0.00014438141794920838,
      "loss": 1.6833,
      "step": 1820
    },
    {
      "epoch": 0.37330873308733087,
      "grad_norm": 0.5283852968314336,
      "learning_rate": 0.00014432190192536397,
      "loss": 1.6737,
      "step": 1821
    },
    {
      "epoch": 0.37351373513735137,
      "grad_norm": 0.5085186939183504,
      "learning_rate": 0.000144262366357575,
      "loss": 1.6372,
      "step": 1822
    },
    {
      "epoch": 0.37371873718737186,
      "grad_norm": 0.5205053198340397,
      "learning_rate": 0.00014420281127209398,
      "loss": 1.5857,
      "step": 1823
    },
    {
      "epoch": 0.37392373923739236,
      "grad_norm": 0.5229325072501138,
      "learning_rate": 0.00014414323669518193,
      "loss": 1.6506,
      "step": 1824
    },
    {
      "epoch": 0.37412874128741286,
      "grad_norm": 0.5629653343937632,
      "learning_rate": 0.00014408364265310864,
      "loss": 1.691,
      "step": 1825
    },
    {
      "epoch": 0.37433374333743336,
      "grad_norm": 0.5134321816295735,
      "learning_rate": 0.00014402402917215227,
      "loss": 1.5935,
      "step": 1826
    },
    {
      "epoch": 0.37453874538745385,
      "grad_norm": 0.5466101442618813,
      "learning_rate": 0.0001439643962785997,
      "loss": 1.738,
      "step": 1827
    },
    {
      "epoch": 0.37474374743747435,
      "grad_norm": 0.5238125414218799,
      "learning_rate": 0.00014390474399874636,
      "loss": 1.5999,
      "step": 1828
    },
    {
      "epoch": 0.37494874948749485,
      "grad_norm": 0.5218377363580129,
      "learning_rate": 0.00014384507235889614,
      "loss": 1.6034,
      "step": 1829
    },
    {
      "epoch": 0.3751537515375154,
      "grad_norm": 0.4976050208989313,
      "learning_rate": 0.00014378538138536153,
      "loss": 1.5986,
      "step": 1830
    },
    {
      "epoch": 0.3753587535875359,
      "grad_norm": 0.5137796969843305,
      "learning_rate": 0.00014372567110446357,
      "loss": 1.5881,
      "step": 1831
    },
    {
      "epoch": 0.3755637556375564,
      "grad_norm": 0.5736930190020063,
      "learning_rate": 0.00014366594154253175,
      "loss": 1.6483,
      "step": 1832
    },
    {
      "epoch": 0.3757687576875769,
      "grad_norm": 0.5116032297411255,
      "learning_rate": 0.00014360619272590412,
      "loss": 1.5853,
      "step": 1833
    },
    {
      "epoch": 0.3759737597375974,
      "grad_norm": 0.4980688711218655,
      "learning_rate": 0.00014354642468092713,
      "loss": 1.6644,
      "step": 1834
    },
    {
      "epoch": 0.3761787617876179,
      "grad_norm": 0.5377789744253809,
      "learning_rate": 0.00014348663743395584,
      "loss": 1.6568,
      "step": 1835
    },
    {
      "epoch": 0.3763837638376384,
      "grad_norm": 0.5230764135862036,
      "learning_rate": 0.0001434268310113537,
      "loss": 1.6075,
      "step": 1836
    },
    {
      "epoch": 0.3765887658876589,
      "grad_norm": 0.504561126454411,
      "learning_rate": 0.00014336700543949256,
      "loss": 1.6486,
      "step": 1837
    },
    {
      "epoch": 0.3767937679376794,
      "grad_norm": 0.4701493381619338,
      "learning_rate": 0.00014330716074475286,
      "loss": 1.6083,
      "step": 1838
    },
    {
      "epoch": 0.3769987699876999,
      "grad_norm": 0.5138330976926028,
      "learning_rate": 0.00014324729695352337,
      "loss": 1.6443,
      "step": 1839
    },
    {
      "epoch": 0.3772037720377204,
      "grad_norm": 0.5286145108714454,
      "learning_rate": 0.00014318741409220128,
      "loss": 1.5607,
      "step": 1840
    },
    {
      "epoch": 0.3774087740877409,
      "grad_norm": 0.5723966036621873,
      "learning_rate": 0.00014312751218719224,
      "loss": 1.6693,
      "step": 1841
    },
    {
      "epoch": 0.37761377613776137,
      "grad_norm": 0.49228538180092907,
      "learning_rate": 0.00014306759126491022,
      "loss": 1.68,
      "step": 1842
    },
    {
      "epoch": 0.37781877818778187,
      "grad_norm": 0.5511010808124289,
      "learning_rate": 0.00014300765135177764,
      "loss": 1.6871,
      "step": 1843
    },
    {
      "epoch": 0.37802378023780236,
      "grad_norm": 0.5467359429891819,
      "learning_rate": 0.0001429476924742253,
      "loss": 1.6168,
      "step": 1844
    },
    {
      "epoch": 0.37822878228782286,
      "grad_norm": 0.5543801792450568,
      "learning_rate": 0.00014288771465869235,
      "loss": 1.6691,
      "step": 1845
    },
    {
      "epoch": 0.37843378433784336,
      "grad_norm": 0.49778237935472697,
      "learning_rate": 0.00014282771793162625,
      "loss": 1.6203,
      "step": 1846
    },
    {
      "epoch": 0.37863878638786386,
      "grad_norm": 0.5977987421200576,
      "learning_rate": 0.00014276770231948284,
      "loss": 1.668,
      "step": 1847
    },
    {
      "epoch": 0.37884378843788435,
      "grad_norm": 0.5387975611512261,
      "learning_rate": 0.00014270766784872627,
      "loss": 1.6272,
      "step": 1848
    },
    {
      "epoch": 0.37904879048790485,
      "grad_norm": 0.501506501292781,
      "learning_rate": 0.00014264761454582903,
      "loss": 1.5806,
      "step": 1849
    },
    {
      "epoch": 0.3792537925379254,
      "grad_norm": 0.4617814254616416,
      "learning_rate": 0.0001425875424372719,
      "loss": 1.5993,
      "step": 1850
    },
    {
      "epoch": 0.3794587945879459,
      "grad_norm": 0.5358879751000583,
      "learning_rate": 0.00014252745154954392,
      "loss": 1.55,
      "step": 1851
    },
    {
      "epoch": 0.3796637966379664,
      "grad_norm": 0.4923664734760799,
      "learning_rate": 0.00014246734190914245,
      "loss": 1.8,
      "step": 1852
    },
    {
      "epoch": 0.3798687986879869,
      "grad_norm": 0.481058797874573,
      "learning_rate": 0.00014240721354257313,
      "loss": 1.5733,
      "step": 1853
    },
    {
      "epoch": 0.3800738007380074,
      "grad_norm": 0.4933341040676817,
      "learning_rate": 0.0001423470664763498,
      "loss": 1.6176,
      "step": 1854
    },
    {
      "epoch": 0.3802788027880279,
      "grad_norm": 0.502656617402608,
      "learning_rate": 0.00014228690073699466,
      "loss": 1.5912,
      "step": 1855
    },
    {
      "epoch": 0.3804838048380484,
      "grad_norm": 0.5286207683211155,
      "learning_rate": 0.00014222671635103802,
      "loss": 1.6505,
      "step": 1856
    },
    {
      "epoch": 0.3806888068880689,
      "grad_norm": 0.5127352122792401,
      "learning_rate": 0.0001421665133450184,
      "loss": 1.6597,
      "step": 1857
    },
    {
      "epoch": 0.3808938089380894,
      "grad_norm": 0.4977091356272721,
      "learning_rate": 0.0001421062917454827,
      "loss": 1.6198,
      "step": 1858
    },
    {
      "epoch": 0.3810988109881099,
      "grad_norm": 0.5021022640916121,
      "learning_rate": 0.0001420460515789858,
      "loss": 1.639,
      "step": 1859
    },
    {
      "epoch": 0.3813038130381304,
      "grad_norm": 0.5326355391478207,
      "learning_rate": 0.00014198579287209097,
      "loss": 1.6867,
      "step": 1860
    },
    {
      "epoch": 0.3815088150881509,
      "grad_norm": 0.5146240562095568,
      "learning_rate": 0.00014192551565136953,
      "loss": 1.6063,
      "step": 1861
    },
    {
      "epoch": 0.3817138171381714,
      "grad_norm": 0.4966864123885708,
      "learning_rate": 0.00014186521994340095,
      "loss": 1.6287,
      "step": 1862
    },
    {
      "epoch": 0.38191881918819187,
      "grad_norm": 0.5473012060097575,
      "learning_rate": 0.00014180490577477293,
      "loss": 1.614,
      "step": 1863
    },
    {
      "epoch": 0.38212382123821237,
      "grad_norm": 0.5408838808758386,
      "learning_rate": 0.00014174457317208132,
      "loss": 1.6388,
      "step": 1864
    },
    {
      "epoch": 0.38232882328823287,
      "grad_norm": 0.49167247744856524,
      "learning_rate": 0.00014168422216193,
      "loss": 1.6015,
      "step": 1865
    },
    {
      "epoch": 0.38253382533825336,
      "grad_norm": 0.5568825321692835,
      "learning_rate": 0.00014162385277093103,
      "loss": 1.6386,
      "step": 1866
    },
    {
      "epoch": 0.38273882738827386,
      "grad_norm": 0.603523886052413,
      "learning_rate": 0.00014156346502570453,
      "loss": 1.7053,
      "step": 1867
    },
    {
      "epoch": 0.38294382943829436,
      "grad_norm": 0.5085498766412403,
      "learning_rate": 0.00014150305895287886,
      "loss": 1.5173,
      "step": 1868
    },
    {
      "epoch": 0.38314883148831486,
      "grad_norm": 0.5122342458911501,
      "learning_rate": 0.0001414426345790903,
      "loss": 1.6655,
      "step": 1869
    },
    {
      "epoch": 0.3833538335383354,
      "grad_norm": 0.5193474716248473,
      "learning_rate": 0.00014138219193098321,
      "loss": 1.608,
      "step": 1870
    },
    {
      "epoch": 0.3835588355883559,
      "grad_norm": 0.5403398184208354,
      "learning_rate": 0.00014132173103521012,
      "loss": 1.6687,
      "step": 1871
    },
    {
      "epoch": 0.3837638376383764,
      "grad_norm": 0.4666837183754349,
      "learning_rate": 0.00014126125191843146,
      "loss": 1.5998,
      "step": 1872
    },
    {
      "epoch": 0.3839688396883969,
      "grad_norm": 0.4917514933571927,
      "learning_rate": 0.00014120075460731583,
      "loss": 1.7046,
      "step": 1873
    },
    {
      "epoch": 0.3841738417384174,
      "grad_norm": 0.4792509097853011,
      "learning_rate": 0.00014114023912853977,
      "loss": 1.6512,
      "step": 1874
    },
    {
      "epoch": 0.3843788437884379,
      "grad_norm": 0.5248543484097465,
      "learning_rate": 0.00014107970550878787,
      "loss": 1.5534,
      "step": 1875
    },
    {
      "epoch": 0.3845838458384584,
      "grad_norm": 0.4330975430003677,
      "learning_rate": 0.00014101915377475274,
      "loss": 1.5329,
      "step": 1876
    },
    {
      "epoch": 0.3847888478884789,
      "grad_norm": 0.5271389145154657,
      "learning_rate": 0.00014095858395313484,
      "loss": 1.6893,
      "step": 1877
    },
    {
      "epoch": 0.3849938499384994,
      "grad_norm": 0.5275935385395334,
      "learning_rate": 0.0001408979960706428,
      "loss": 1.6249,
      "step": 1878
    },
    {
      "epoch": 0.3851988519885199,
      "grad_norm": 0.5251051425457449,
      "learning_rate": 0.00014083739015399314,
      "loss": 1.6248,
      "step": 1879
    },
    {
      "epoch": 0.3854038540385404,
      "grad_norm": 0.5472960361944075,
      "learning_rate": 0.0001407767662299102,
      "loss": 1.7137,
      "step": 1880
    },
    {
      "epoch": 0.3856088560885609,
      "grad_norm": 0.4842862703840487,
      "learning_rate": 0.00014071612432512651,
      "loss": 1.6399,
      "step": 1881
    },
    {
      "epoch": 0.3858138581385814,
      "grad_norm": 0.514186897854033,
      "learning_rate": 0.0001406554644663823,
      "loss": 1.7584,
      "step": 1882
    },
    {
      "epoch": 0.3860188601886019,
      "grad_norm": 0.5256250504230642,
      "learning_rate": 0.00014059478668042581,
      "loss": 1.6759,
      "step": 1883
    },
    {
      "epoch": 0.3862238622386224,
      "grad_norm": 0.47708847797483583,
      "learning_rate": 0.00014053409099401323,
      "loss": 1.5653,
      "step": 1884
    },
    {
      "epoch": 0.38642886428864287,
      "grad_norm": 0.4808074567168712,
      "learning_rate": 0.00014047337743390865,
      "loss": 1.6085,
      "step": 1885
    },
    {
      "epoch": 0.38663386633866337,
      "grad_norm": 0.5379097049586811,
      "learning_rate": 0.00014041264602688387,
      "loss": 1.671,
      "step": 1886
    },
    {
      "epoch": 0.38683886838868387,
      "grad_norm": 0.5590844068440689,
      "learning_rate": 0.00014035189679971875,
      "loss": 1.6947,
      "step": 1887
    },
    {
      "epoch": 0.38704387043870436,
      "grad_norm": 0.46778803409473096,
      "learning_rate": 0.00014029112977920088,
      "loss": 1.6003,
      "step": 1888
    },
    {
      "epoch": 0.38724887248872486,
      "grad_norm": 0.5235748950788491,
      "learning_rate": 0.00014023034499212588,
      "loss": 1.6214,
      "step": 1889
    },
    {
      "epoch": 0.3874538745387454,
      "grad_norm": 0.5084301034829668,
      "learning_rate": 0.00014016954246529696,
      "loss": 1.6589,
      "step": 1890
    },
    {
      "epoch": 0.3876588765887659,
      "grad_norm": 0.5150116757044244,
      "learning_rate": 0.00014010872222552532,
      "loss": 1.6186,
      "step": 1891
    },
    {
      "epoch": 0.3878638786387864,
      "grad_norm": 0.46063976559991104,
      "learning_rate": 0.00014004788429962988,
      "loss": 1.5996,
      "step": 1892
    },
    {
      "epoch": 0.3880688806888069,
      "grad_norm": 0.5333616264893559,
      "learning_rate": 0.00013998702871443748,
      "loss": 1.7368,
      "step": 1893
    },
    {
      "epoch": 0.3882738827388274,
      "grad_norm": 0.5231380511987593,
      "learning_rate": 0.00013992615549678262,
      "loss": 1.5673,
      "step": 1894
    },
    {
      "epoch": 0.3884788847888479,
      "grad_norm": 0.46415379036185195,
      "learning_rate": 0.0001398652646735076,
      "loss": 1.681,
      "step": 1895
    },
    {
      "epoch": 0.3886838868388684,
      "grad_norm": 0.5021717862089831,
      "learning_rate": 0.00013980435627146252,
      "loss": 1.6704,
      "step": 1896
    },
    {
      "epoch": 0.3888888888888889,
      "grad_norm": 0.5488608334361356,
      "learning_rate": 0.00013974343031750524,
      "loss": 1.5421,
      "step": 1897
    },
    {
      "epoch": 0.3890938909389094,
      "grad_norm": 0.514548825932426,
      "learning_rate": 0.00013968248683850134,
      "loss": 1.6595,
      "step": 1898
    },
    {
      "epoch": 0.3892988929889299,
      "grad_norm": 0.6140825978519333,
      "learning_rate": 0.0001396215258613241,
      "loss": 1.6863,
      "step": 1899
    },
    {
      "epoch": 0.3895038950389504,
      "grad_norm": 0.5235700004370976,
      "learning_rate": 0.00013956054741285452,
      "loss": 1.6869,
      "step": 1900
    },
    {
      "epoch": 0.3897088970889709,
      "grad_norm": 0.5156685806462629,
      "learning_rate": 0.00013949955151998136,
      "loss": 1.6603,
      "step": 1901
    },
    {
      "epoch": 0.3899138991389914,
      "grad_norm": 0.5547814540033799,
      "learning_rate": 0.00013943853820960105,
      "loss": 1.5975,
      "step": 1902
    },
    {
      "epoch": 0.3901189011890119,
      "grad_norm": 0.4763428662757902,
      "learning_rate": 0.00013937750750861767,
      "loss": 1.592,
      "step": 1903
    },
    {
      "epoch": 0.3903239032390324,
      "grad_norm": 0.47301906468085203,
      "learning_rate": 0.00013931645944394297,
      "loss": 1.6213,
      "step": 1904
    },
    {
      "epoch": 0.3905289052890529,
      "grad_norm": 0.5168194189397619,
      "learning_rate": 0.00013925539404249638,
      "loss": 1.6669,
      "step": 1905
    },
    {
      "epoch": 0.39073390733907337,
      "grad_norm": 0.517618601215397,
      "learning_rate": 0.000139194311331205,
      "loss": 1.6276,
      "step": 1906
    },
    {
      "epoch": 0.39093890938909387,
      "grad_norm": 0.5740641387947559,
      "learning_rate": 0.00013913321133700345,
      "loss": 1.6484,
      "step": 1907
    },
    {
      "epoch": 0.39114391143911437,
      "grad_norm": 0.5220024956764855,
      "learning_rate": 0.00013907209408683415,
      "loss": 1.6518,
      "step": 1908
    },
    {
      "epoch": 0.39134891348913486,
      "grad_norm": 0.5069357463688486,
      "learning_rate": 0.00013901095960764696,
      "loss": 1.6506,
      "step": 1909
    },
    {
      "epoch": 0.3915539155391554,
      "grad_norm": 0.5162482614187639,
      "learning_rate": 0.00013894980792639945,
      "loss": 1.594,
      "step": 1910
    },
    {
      "epoch": 0.3917589175891759,
      "grad_norm": 0.5475161774302121,
      "learning_rate": 0.00013888863907005668,
      "loss": 1.6714,
      "step": 1911
    },
    {
      "epoch": 0.3919639196391964,
      "grad_norm": 0.639273711472952,
      "learning_rate": 0.0001388274530655914,
      "loss": 1.7005,
      "step": 1912
    },
    {
      "epoch": 0.3921689216892169,
      "grad_norm": 0.5459673609382728,
      "learning_rate": 0.00013876624993998382,
      "loss": 1.6095,
      "step": 1913
    },
    {
      "epoch": 0.3923739237392374,
      "grad_norm": 0.572023981264735,
      "learning_rate": 0.00013870502972022173,
      "loss": 1.5871,
      "step": 1914
    },
    {
      "epoch": 0.3925789257892579,
      "grad_norm": 0.5467813608844581,
      "learning_rate": 0.00013864379243330046,
      "loss": 1.6066,
      "step": 1915
    },
    {
      "epoch": 0.3927839278392784,
      "grad_norm": 0.5896010275575714,
      "learning_rate": 0.00013858253810622293,
      "loss": 1.7256,
      "step": 1916
    },
    {
      "epoch": 0.3929889298892989,
      "grad_norm": 0.5430639565544005,
      "learning_rate": 0.00013852126676599944,
      "loss": 1.7053,
      "step": 1917
    },
    {
      "epoch": 0.3931939319393194,
      "grad_norm": 0.5367496954101093,
      "learning_rate": 0.00013845997843964792,
      "loss": 1.6492,
      "step": 1918
    },
    {
      "epoch": 0.3933989339893399,
      "grad_norm": 0.5096222945458827,
      "learning_rate": 0.0001383986731541937,
      "loss": 1.6632,
      "step": 1919
    },
    {
      "epoch": 0.3936039360393604,
      "grad_norm": 0.5328538003146586,
      "learning_rate": 0.00013833735093666963,
      "loss": 1.6541,
      "step": 1920
    },
    {
      "epoch": 0.3938089380893809,
      "grad_norm": 0.5460442800553386,
      "learning_rate": 0.00013827601181411604,
      "loss": 1.6917,
      "step": 1921
    },
    {
      "epoch": 0.3940139401394014,
      "grad_norm": 0.5199979660322286,
      "learning_rate": 0.00013821465581358072,
      "loss": 1.565,
      "step": 1922
    },
    {
      "epoch": 0.3942189421894219,
      "grad_norm": 0.5130904636502194,
      "learning_rate": 0.0001381532829621188,
      "loss": 1.6758,
      "step": 1923
    },
    {
      "epoch": 0.3944239442394424,
      "grad_norm": 0.5419366826007934,
      "learning_rate": 0.000138091893286793,
      "loss": 1.7313,
      "step": 1924
    },
    {
      "epoch": 0.3946289462894629,
      "grad_norm": 0.456210471109287,
      "learning_rate": 0.0001380304868146733,
      "loss": 1.54,
      "step": 1925
    },
    {
      "epoch": 0.3948339483394834,
      "grad_norm": 0.5375926738816738,
      "learning_rate": 0.00013796906357283723,
      "loss": 1.6607,
      "step": 1926
    },
    {
      "epoch": 0.3950389503895039,
      "grad_norm": 0.5489172672263147,
      "learning_rate": 0.0001379076235883696,
      "loss": 1.6175,
      "step": 1927
    },
    {
      "epoch": 0.39524395243952437,
      "grad_norm": 0.5168211728003793,
      "learning_rate": 0.0001378461668883627,
      "loss": 1.6294,
      "step": 1928
    },
    {
      "epoch": 0.39544895448954487,
      "grad_norm": 0.5087723008887542,
      "learning_rate": 0.0001377846934999161,
      "loss": 1.6459,
      "step": 1929
    },
    {
      "epoch": 0.3956539565395654,
      "grad_norm": 0.5186032448868557,
      "learning_rate": 0.00013772320345013678,
      "loss": 1.6256,
      "step": 1930
    },
    {
      "epoch": 0.3958589585895859,
      "grad_norm": 0.5702188115361192,
      "learning_rate": 0.00013766169676613906,
      "loss": 1.62,
      "step": 1931
    },
    {
      "epoch": 0.3960639606396064,
      "grad_norm": 0.5450350941952552,
      "learning_rate": 0.00013760017347504462,
      "loss": 1.5974,
      "step": 1932
    },
    {
      "epoch": 0.3962689626896269,
      "grad_norm": 0.52807741403906,
      "learning_rate": 0.00013753863360398241,
      "loss": 1.5844,
      "step": 1933
    },
    {
      "epoch": 0.3964739647396474,
      "grad_norm": 0.5277606054926492,
      "learning_rate": 0.0001374770771800887,
      "loss": 1.6242,
      "step": 1934
    },
    {
      "epoch": 0.3966789667896679,
      "grad_norm": 0.5345399675824376,
      "learning_rate": 0.00013741550423050712,
      "loss": 1.6462,
      "step": 1935
    },
    {
      "epoch": 0.3968839688396884,
      "grad_norm": 0.4757854703010009,
      "learning_rate": 0.00013735391478238848,
      "loss": 1.6136,
      "step": 1936
    },
    {
      "epoch": 0.3970889708897089,
      "grad_norm": 0.4883807837040311,
      "learning_rate": 0.00013729230886289098,
      "loss": 1.6681,
      "step": 1937
    },
    {
      "epoch": 0.3972939729397294,
      "grad_norm": 0.5201768366633468,
      "learning_rate": 0.00013723068649918,
      "loss": 1.6626,
      "step": 1938
    },
    {
      "epoch": 0.3974989749897499,
      "grad_norm": 0.5931031713663376,
      "learning_rate": 0.00013716904771842825,
      "loss": 1.7142,
      "step": 1939
    },
    {
      "epoch": 0.3977039770397704,
      "grad_norm": 0.5531884250131377,
      "learning_rate": 0.00013710739254781554,
      "loss": 1.6061,
      "step": 1940
    },
    {
      "epoch": 0.3979089790897909,
      "grad_norm": 0.5256474386396165,
      "learning_rate": 0.00013704572101452911,
      "loss": 1.721,
      "step": 1941
    },
    {
      "epoch": 0.3981139811398114,
      "grad_norm": 0.545159683838341,
      "learning_rate": 0.00013698403314576325,
      "loss": 1.6526,
      "step": 1942
    },
    {
      "epoch": 0.3983189831898319,
      "grad_norm": 0.4998367611503313,
      "learning_rate": 0.00013692232896871947,
      "loss": 1.6041,
      "step": 1943
    },
    {
      "epoch": 0.3985239852398524,
      "grad_norm": 0.5440881416443756,
      "learning_rate": 0.00013686060851060656,
      "loss": 1.6655,
      "step": 1944
    },
    {
      "epoch": 0.3987289872898729,
      "grad_norm": 0.49882710323321633,
      "learning_rate": 0.00013679887179864043,
      "loss": 1.6669,
      "step": 1945
    },
    {
      "epoch": 0.3989339893398934,
      "grad_norm": 0.5022680898309423,
      "learning_rate": 0.00013673711886004415,
      "loss": 1.6235,
      "step": 1946
    },
    {
      "epoch": 0.3991389913899139,
      "grad_norm": 0.6028555679768342,
      "learning_rate": 0.00013667534972204795,
      "loss": 1.6772,
      "step": 1947
    },
    {
      "epoch": 0.3993439934399344,
      "grad_norm": 0.5403072958120235,
      "learning_rate": 0.00013661356441188922,
      "loss": 1.6852,
      "step": 1948
    },
    {
      "epoch": 0.3995489954899549,
      "grad_norm": 0.5067037244078386,
      "learning_rate": 0.0001365517629568125,
      "loss": 1.6407,
      "step": 1949
    },
    {
      "epoch": 0.3997539975399754,
      "grad_norm": 0.5480829305600373,
      "learning_rate": 0.0001364899453840694,
      "loss": 1.6229,
      "step": 1950
    },
    {
      "epoch": 0.3999589995899959,
      "grad_norm": 0.5392124978523058,
      "learning_rate": 0.0001364281117209187,
      "loss": 1.6596,
      "step": 1951
    },
    {
      "epoch": 0.4001640016400164,
      "grad_norm": 0.5411960766962421,
      "learning_rate": 0.00013636626199462615,
      "loss": 1.5984,
      "step": 1952
    },
    {
      "epoch": 0.4003690036900369,
      "grad_norm": 0.464630641605827,
      "learning_rate": 0.00013630439623246474,
      "loss": 1.6089,
      "step": 1953
    },
    {
      "epoch": 0.4005740057400574,
      "grad_norm": 0.5363660824215917,
      "learning_rate": 0.00013624251446171445,
      "loss": 1.6299,
      "step": 1954
    },
    {
      "epoch": 0.4007790077900779,
      "grad_norm": 0.5249787036699148,
      "learning_rate": 0.00013618061670966227,
      "loss": 1.5612,
      "step": 1955
    },
    {
      "epoch": 0.4009840098400984,
      "grad_norm": 0.524575800254269,
      "learning_rate": 0.0001361187030036024,
      "loss": 1.6192,
      "step": 1956
    },
    {
      "epoch": 0.4011890118901189,
      "grad_norm": 0.49667668786480623,
      "learning_rate": 0.00013605677337083586,
      "loss": 1.6444,
      "step": 1957
    },
    {
      "epoch": 0.4013940139401394,
      "grad_norm": 0.5029661817388345,
      "learning_rate": 0.0001359948278386709,
      "loss": 1.6532,
      "step": 1958
    },
    {
      "epoch": 0.4015990159901599,
      "grad_norm": 0.5761850896172349,
      "learning_rate": 0.00013593286643442265,
      "loss": 1.6693,
      "step": 1959
    },
    {
      "epoch": 0.4018040180401804,
      "grad_norm": 0.5288200268116514,
      "learning_rate": 0.00013587088918541322,
      "loss": 1.6374,
      "step": 1960
    },
    {
      "epoch": 0.4020090200902009,
      "grad_norm": 0.5156510963772725,
      "learning_rate": 0.00013580889611897184,
      "loss": 1.6024,
      "step": 1961
    },
    {
      "epoch": 0.4022140221402214,
      "grad_norm": 0.5272588040940779,
      "learning_rate": 0.0001357468872624346,
      "loss": 1.6536,
      "step": 1962
    },
    {
      "epoch": 0.4024190241902419,
      "grad_norm": 0.5901458443834909,
      "learning_rate": 0.00013568486264314456,
      "loss": 1.7249,
      "step": 1963
    },
    {
      "epoch": 0.4026240262402624,
      "grad_norm": 0.5200718559261266,
      "learning_rate": 0.00013562282228845183,
      "loss": 1.6553,
      "step": 1964
    },
    {
      "epoch": 0.4028290282902829,
      "grad_norm": 0.4973690946582959,
      "learning_rate": 0.0001355607662257133,
      "loss": 1.6603,
      "step": 1965
    },
    {
      "epoch": 0.4030340303403034,
      "grad_norm": 0.5626474630298155,
      "learning_rate": 0.00013549869448229294,
      "loss": 1.6599,
      "step": 1966
    },
    {
      "epoch": 0.4032390323903239,
      "grad_norm": 0.5581489194311449,
      "learning_rate": 0.00013543660708556157,
      "loss": 1.6086,
      "step": 1967
    },
    {
      "epoch": 0.4034440344403444,
      "grad_norm": 0.4949076014961063,
      "learning_rate": 0.00013537450406289685,
      "loss": 1.5967,
      "step": 1968
    },
    {
      "epoch": 0.4036490364903649,
      "grad_norm": 0.5435251256139924,
      "learning_rate": 0.00013531238544168343,
      "loss": 1.6389,
      "step": 1969
    },
    {
      "epoch": 0.40385403854038543,
      "grad_norm": 0.5145533922449875,
      "learning_rate": 0.0001352502512493128,
      "loss": 1.5798,
      "step": 1970
    },
    {
      "epoch": 0.4040590405904059,
      "grad_norm": 0.5072794838453296,
      "learning_rate": 0.0001351881015131833,
      "loss": 1.667,
      "step": 1971
    },
    {
      "epoch": 0.4042640426404264,
      "grad_norm": 0.5029798339796385,
      "learning_rate": 0.0001351259362607002,
      "loss": 1.5781,
      "step": 1972
    },
    {
      "epoch": 0.4044690446904469,
      "grad_norm": 0.5736183300894925,
      "learning_rate": 0.00013506375551927547,
      "loss": 1.7497,
      "step": 1973
    },
    {
      "epoch": 0.4046740467404674,
      "grad_norm": 0.526322658976693,
      "learning_rate": 0.000135001559316328,
      "loss": 1.6223,
      "step": 1974
    },
    {
      "epoch": 0.4048790487904879,
      "grad_norm": 0.5781298647896926,
      "learning_rate": 0.00013493934767928352,
      "loss": 1.6686,
      "step": 1975
    },
    {
      "epoch": 0.4050840508405084,
      "grad_norm": 0.641387973875099,
      "learning_rate": 0.00013487712063557452,
      "loss": 1.6772,
      "step": 1976
    },
    {
      "epoch": 0.4052890528905289,
      "grad_norm": 0.5555644827933304,
      "learning_rate": 0.00013481487821264033,
      "loss": 1.6931,
      "step": 1977
    },
    {
      "epoch": 0.4054940549405494,
      "grad_norm": 0.5601542479062248,
      "learning_rate": 0.000134752620437927,
      "loss": 1.6774,
      "step": 1978
    },
    {
      "epoch": 0.4056990569905699,
      "grad_norm": 0.5273240386483041,
      "learning_rate": 0.00013469034733888736,
      "loss": 1.5764,
      "step": 1979
    },
    {
      "epoch": 0.4059040590405904,
      "grad_norm": 0.47925132004844073,
      "learning_rate": 0.00013462805894298106,
      "loss": 1.6306,
      "step": 1980
    },
    {
      "epoch": 0.4061090610906109,
      "grad_norm": 0.5273996375854236,
      "learning_rate": 0.00013456575527767445,
      "loss": 1.6449,
      "step": 1981
    },
    {
      "epoch": 0.4063140631406314,
      "grad_norm": 0.562906886797999,
      "learning_rate": 0.00013450343637044058,
      "loss": 1.5631,
      "step": 1982
    },
    {
      "epoch": 0.4065190651906519,
      "grad_norm": 0.5425512826454009,
      "learning_rate": 0.00013444110224875925,
      "loss": 1.59,
      "step": 1983
    },
    {
      "epoch": 0.4067240672406724,
      "grad_norm": 0.5032492279285105,
      "learning_rate": 0.00013437875294011704,
      "loss": 1.6719,
      "step": 1984
    },
    {
      "epoch": 0.4069290692906929,
      "grad_norm": 0.5717038458224182,
      "learning_rate": 0.00013431638847200708,
      "loss": 1.6815,
      "step": 1985
    },
    {
      "epoch": 0.4071340713407134,
      "grad_norm": 0.5357452489607015,
      "learning_rate": 0.00013425400887192933,
      "loss": 1.6323,
      "step": 1986
    },
    {
      "epoch": 0.4073390733907339,
      "grad_norm": 0.5835632135711694,
      "learning_rate": 0.00013419161416739032,
      "loss": 1.6831,
      "step": 1987
    },
    {
      "epoch": 0.4075440754407544,
      "grad_norm": 0.503318136097742,
      "learning_rate": 0.0001341292043859033,
      "loss": 1.6387,
      "step": 1988
    },
    {
      "epoch": 0.4077490774907749,
      "grad_norm": 0.5136942658239361,
      "learning_rate": 0.00013406677955498818,
      "loss": 1.699,
      "step": 1989
    },
    {
      "epoch": 0.40795407954079543,
      "grad_norm": 0.5437296027247385,
      "learning_rate": 0.00013400433970217135,
      "loss": 1.6169,
      "step": 1990
    },
    {
      "epoch": 0.40815908159081593,
      "grad_norm": 0.5467446983910647,
      "learning_rate": 0.0001339418848549861,
      "loss": 1.6595,
      "step": 1991
    },
    {
      "epoch": 0.40836408364083643,
      "grad_norm": 0.5219812157735233,
      "learning_rate": 0.00013387941504097213,
      "loss": 1.6329,
      "step": 1992
    },
    {
      "epoch": 0.4085690856908569,
      "grad_norm": 0.5435807763490486,
      "learning_rate": 0.00013381693028767573,
      "loss": 1.6553,
      "step": 1993
    },
    {
      "epoch": 0.4087740877408774,
      "grad_norm": 0.5081232456586999,
      "learning_rate": 0.00013375443062264988,
      "loss": 1.6199,
      "step": 1994
    },
    {
      "epoch": 0.4089790897908979,
      "grad_norm": 0.5085429537660069,
      "learning_rate": 0.0001336919160734541,
      "loss": 1.6778,
      "step": 1995
    },
    {
      "epoch": 0.4091840918409184,
      "grad_norm": 0.48189414321970997,
      "learning_rate": 0.00013362938666765443,
      "loss": 1.6444,
      "step": 1996
    },
    {
      "epoch": 0.4093890938909389,
      "grad_norm": 0.4986801106157369,
      "learning_rate": 0.00013356684243282356,
      "loss": 1.6475,
      "step": 1997
    },
    {
      "epoch": 0.4095940959409594,
      "grad_norm": 0.5597030678284673,
      "learning_rate": 0.00013350428339654058,
      "loss": 1.7235,
      "step": 1998
    },
    {
      "epoch": 0.4097990979909799,
      "grad_norm": 0.5187518865617184,
      "learning_rate": 0.00013344170958639123,
      "loss": 1.598,
      "step": 1999
    },
    {
      "epoch": 0.4100041000410004,
      "grad_norm": 0.45624808165714237,
      "learning_rate": 0.00013337912102996772,
      "loss": 1.6117,
      "step": 2000
    },
    {
      "epoch": 0.4102091020910209,
      "grad_norm": 0.5382054962947498,
      "learning_rate": 0.00013331651775486873,
      "loss": 1.6095,
      "step": 2001
    },
    {
      "epoch": 0.4104141041410414,
      "grad_norm": 0.49445168708768333,
      "learning_rate": 0.00013325389978869947,
      "loss": 1.596,
      "step": 2002
    },
    {
      "epoch": 0.4106191061910619,
      "grad_norm": 0.521899673559171,
      "learning_rate": 0.00013319126715907165,
      "loss": 1.6692,
      "step": 2003
    },
    {
      "epoch": 0.4108241082410824,
      "grad_norm": 0.485367847507791,
      "learning_rate": 0.00013312861989360337,
      "loss": 1.6457,
      "step": 2004
    },
    {
      "epoch": 0.4110291102911029,
      "grad_norm": 0.5371500642272338,
      "learning_rate": 0.0001330659580199192,
      "loss": 1.7048,
      "step": 2005
    },
    {
      "epoch": 0.4112341123411234,
      "grad_norm": 0.49676254692300204,
      "learning_rate": 0.00013300328156565027,
      "loss": 1.582,
      "step": 2006
    },
    {
      "epoch": 0.4114391143911439,
      "grad_norm": 0.5460817293620392,
      "learning_rate": 0.000132940590558434,
      "loss": 1.6407,
      "step": 2007
    },
    {
      "epoch": 0.4116441164411644,
      "grad_norm": 0.5394735353452728,
      "learning_rate": 0.00013287788502591426,
      "loss": 1.6469,
      "step": 2008
    },
    {
      "epoch": 0.4118491184911849,
      "grad_norm": 0.5590812181570827,
      "learning_rate": 0.00013281516499574135,
      "loss": 1.6113,
      "step": 2009
    },
    {
      "epoch": 0.41205412054120544,
      "grad_norm": 0.5234343425293484,
      "learning_rate": 0.00013275243049557192,
      "loss": 1.684,
      "step": 2010
    },
    {
      "epoch": 0.41225912259122593,
      "grad_norm": 0.5291689075493777,
      "learning_rate": 0.00013268968155306913,
      "loss": 1.5735,
      "step": 2011
    },
    {
      "epoch": 0.41246412464124643,
      "grad_norm": 0.4888944096601653,
      "learning_rate": 0.00013262691819590234,
      "loss": 1.6448,
      "step": 2012
    },
    {
      "epoch": 0.41266912669126693,
      "grad_norm": 0.520106352497343,
      "learning_rate": 0.00013256414045174735,
      "loss": 1.6199,
      "step": 2013
    },
    {
      "epoch": 0.4128741287412874,
      "grad_norm": 0.5488147932232325,
      "learning_rate": 0.00013250134834828626,
      "loss": 1.6238,
      "step": 2014
    },
    {
      "epoch": 0.4130791307913079,
      "grad_norm": 0.4875942848981133,
      "learning_rate": 0.00013243854191320758,
      "loss": 1.6183,
      "step": 2015
    },
    {
      "epoch": 0.4132841328413284,
      "grad_norm": 0.5087531930253176,
      "learning_rate": 0.0001323757211742061,
      "loss": 1.668,
      "step": 2016
    },
    {
      "epoch": 0.4134891348913489,
      "grad_norm": 0.6033249451828697,
      "learning_rate": 0.0001323128861589829,
      "loss": 1.6017,
      "step": 2017
    },
    {
      "epoch": 0.4136941369413694,
      "grad_norm": 0.47327438166821995,
      "learning_rate": 0.00013225003689524534,
      "loss": 1.6135,
      "step": 2018
    },
    {
      "epoch": 0.4138991389913899,
      "grad_norm": 0.5351667404128417,
      "learning_rate": 0.00013218717341070707,
      "loss": 1.6695,
      "step": 2019
    },
    {
      "epoch": 0.4141041410414104,
      "grad_norm": 0.5132694973313963,
      "learning_rate": 0.00013212429573308812,
      "loss": 1.6484,
      "step": 2020
    },
    {
      "epoch": 0.4143091430914309,
      "grad_norm": 0.5228646219304549,
      "learning_rate": 0.00013206140389011463,
      "loss": 1.6422,
      "step": 2021
    },
    {
      "epoch": 0.4145141451414514,
      "grad_norm": 0.6261495414317594,
      "learning_rate": 0.000131998497909519,
      "loss": 1.6358,
      "step": 2022
    },
    {
      "epoch": 0.4147191471914719,
      "grad_norm": 0.48951632321683874,
      "learning_rate": 0.00013193557781904,
      "loss": 1.7117,
      "step": 2023
    },
    {
      "epoch": 0.4149241492414924,
      "grad_norm": 0.5509823018811135,
      "learning_rate": 0.0001318726436464225,
      "loss": 1.6334,
      "step": 2024
    },
    {
      "epoch": 0.4151291512915129,
      "grad_norm": 0.49880550857827843,
      "learning_rate": 0.0001318096954194176,
      "loss": 1.62,
      "step": 2025
    },
    {
      "epoch": 0.4153341533415334,
      "grad_norm": 0.5283465471770175,
      "learning_rate": 0.00013174673316578256,
      "loss": 1.5566,
      "step": 2026
    },
    {
      "epoch": 0.4155391553915539,
      "grad_norm": 0.5231478124824578,
      "learning_rate": 0.00013168375691328095,
      "loss": 1.608,
      "step": 2027
    },
    {
      "epoch": 0.4157441574415744,
      "grad_norm": 0.450931378405646,
      "learning_rate": 0.0001316207666896824,
      "loss": 1.6248,
      "step": 2028
    },
    {
      "epoch": 0.4159491594915949,
      "grad_norm": 0.5493052752380496,
      "learning_rate": 0.00013155776252276276,
      "loss": 1.6463,
      "step": 2029
    },
    {
      "epoch": 0.41615416154161544,
      "grad_norm": 0.5969219675262446,
      "learning_rate": 0.00013149474444030393,
      "loss": 1.6508,
      "step": 2030
    },
    {
      "epoch": 0.41635916359163594,
      "grad_norm": 0.5039481102809289,
      "learning_rate": 0.00013143171247009415,
      "loss": 1.6625,
      "step": 2031
    },
    {
      "epoch": 0.41656416564165644,
      "grad_norm": 0.5525457894141088,
      "learning_rate": 0.00013136866663992754,
      "loss": 1.6552,
      "step": 2032
    },
    {
      "epoch": 0.41676916769167693,
      "grad_norm": 0.4631889378508682,
      "learning_rate": 0.00013130560697760445,
      "loss": 1.5278,
      "step": 2033
    },
    {
      "epoch": 0.41697416974169743,
      "grad_norm": 0.539112757024435,
      "learning_rate": 0.0001312425335109314,
      "loss": 1.6333,
      "step": 2034
    },
    {
      "epoch": 0.41717917179171793,
      "grad_norm": 0.5262788373972446,
      "learning_rate": 0.0001311794462677209,
      "loss": 1.6967,
      "step": 2035
    },
    {
      "epoch": 0.4173841738417384,
      "grad_norm": 0.5160680580463141,
      "learning_rate": 0.00013111634527579152,
      "loss": 1.5911,
      "step": 2036
    },
    {
      "epoch": 0.4175891758917589,
      "grad_norm": 0.4818656997471046,
      "learning_rate": 0.00013105323056296798,
      "loss": 1.6154,
      "step": 2037
    },
    {
      "epoch": 0.4177941779417794,
      "grad_norm": 0.5487802832339057,
      "learning_rate": 0.00013099010215708088,
      "loss": 1.6189,
      "step": 2038
    },
    {
      "epoch": 0.4179991799917999,
      "grad_norm": 0.5101340652052012,
      "learning_rate": 0.00013092696008596715,
      "loss": 1.5977,
      "step": 2039
    },
    {
      "epoch": 0.4182041820418204,
      "grad_norm": 0.49215672965071877,
      "learning_rate": 0.00013086380437746947,
      "loss": 1.5728,
      "step": 2040
    },
    {
      "epoch": 0.4184091840918409,
      "grad_norm": 0.5350264579579491,
      "learning_rate": 0.00013080063505943666,
      "loss": 1.6559,
      "step": 2041
    },
    {
      "epoch": 0.4186141861418614,
      "grad_norm": 0.5426506186064547,
      "learning_rate": 0.00013073745215972353,
      "loss": 1.6676,
      "step": 2042
    },
    {
      "epoch": 0.4188191881918819,
      "grad_norm": 0.5272612696715079,
      "learning_rate": 0.00013067425570619082,
      "loss": 1.5666,
      "step": 2043
    },
    {
      "epoch": 0.4190241902419024,
      "grad_norm": 0.5741411108582464,
      "learning_rate": 0.00013061104572670537,
      "loss": 1.6532,
      "step": 2044
    },
    {
      "epoch": 0.4192291922919229,
      "grad_norm": 0.5280309590888705,
      "learning_rate": 0.00013054782224913988,
      "loss": 1.5775,
      "step": 2045
    },
    {
      "epoch": 0.4194341943419434,
      "grad_norm": 0.5375310097761675,
      "learning_rate": 0.00013048458530137298,
      "loss": 1.6103,
      "step": 2046
    },
    {
      "epoch": 0.4196391963919639,
      "grad_norm": 0.49084963477795623,
      "learning_rate": 0.00013042133491128935,
      "loss": 1.6106,
      "step": 2047
    },
    {
      "epoch": 0.4198441984419844,
      "grad_norm": 0.5639861533825508,
      "learning_rate": 0.0001303580711067795,
      "loss": 1.6796,
      "step": 2048
    },
    {
      "epoch": 0.4200492004920049,
      "grad_norm": 0.4982352636643307,
      "learning_rate": 0.0001302947939157399,
      "loss": 1.5828,
      "step": 2049
    },
    {
      "epoch": 0.42025420254202545,
      "grad_norm": 0.5384516572175626,
      "learning_rate": 0.00013023150336607297,
      "loss": 1.5727,
      "step": 2050
    },
    {
      "epoch": 0.42045920459204594,
      "grad_norm": 0.5117938323017456,
      "learning_rate": 0.00013016819948568687,
      "loss": 1.6528,
      "step": 2051
    },
    {
      "epoch": 0.42066420664206644,
      "grad_norm": 0.5079816934398997,
      "learning_rate": 0.00013010488230249582,
      "loss": 1.6354,
      "step": 2052
    },
    {
      "epoch": 0.42086920869208694,
      "grad_norm": 0.5288042480301147,
      "learning_rate": 0.00013004155184441978,
      "loss": 1.6638,
      "step": 2053
    },
    {
      "epoch": 0.42107421074210744,
      "grad_norm": 0.5075058718298264,
      "learning_rate": 0.0001299782081393846,
      "loss": 1.6204,
      "step": 2054
    },
    {
      "epoch": 0.42127921279212793,
      "grad_norm": 0.5159503492853633,
      "learning_rate": 0.00012991485121532201,
      "loss": 1.6047,
      "step": 2055
    },
    {
      "epoch": 0.42148421484214843,
      "grad_norm": 0.4858756341486414,
      "learning_rate": 0.00012985148110016947,
      "loss": 1.6458,
      "step": 2056
    },
    {
      "epoch": 0.4216892168921689,
      "grad_norm": 0.5130547018084762,
      "learning_rate": 0.00012978809782187038,
      "loss": 1.6129,
      "step": 2057
    },
    {
      "epoch": 0.4218942189421894,
      "grad_norm": 0.5477055075489994,
      "learning_rate": 0.00012972470140837385,
      "loss": 1.6163,
      "step": 2058
    },
    {
      "epoch": 0.4220992209922099,
      "grad_norm": 0.4999949520517846,
      "learning_rate": 0.00012966129188763485,
      "loss": 1.623,
      "step": 2059
    },
    {
      "epoch": 0.4223042230422304,
      "grad_norm": 0.5172887157516003,
      "learning_rate": 0.00012959786928761407,
      "loss": 1.6731,
      "step": 2060
    },
    {
      "epoch": 0.4225092250922509,
      "grad_norm": 0.5066172544344552,
      "learning_rate": 0.00012953443363627803,
      "loss": 1.6093,
      "step": 2061
    },
    {
      "epoch": 0.4227142271422714,
      "grad_norm": 0.5445742369368239,
      "learning_rate": 0.00012947098496159893,
      "loss": 1.7007,
      "step": 2062
    },
    {
      "epoch": 0.4229192291922919,
      "grad_norm": 0.5746502222729577,
      "learning_rate": 0.00012940752329155473,
      "loss": 1.6079,
      "step": 2063
    },
    {
      "epoch": 0.4231242312423124,
      "grad_norm": 0.5676578296876167,
      "learning_rate": 0.00012934404865412924,
      "loss": 1.6303,
      "step": 2064
    },
    {
      "epoch": 0.4233292332923329,
      "grad_norm": 0.524013077402647,
      "learning_rate": 0.0001292805610773118,
      "loss": 1.5972,
      "step": 2065
    },
    {
      "epoch": 0.4235342353423534,
      "grad_norm": 0.5191795081846384,
      "learning_rate": 0.00012921706058909756,
      "loss": 1.6343,
      "step": 2066
    },
    {
      "epoch": 0.4237392373923739,
      "grad_norm": 0.8243378793273545,
      "learning_rate": 0.00012915354721748738,
      "loss": 1.6146,
      "step": 2067
    },
    {
      "epoch": 0.4239442394423944,
      "grad_norm": 0.5216625499768733,
      "learning_rate": 0.00012909002099048775,
      "loss": 1.6531,
      "step": 2068
    },
    {
      "epoch": 0.4241492414924149,
      "grad_norm": 0.534217068140855,
      "learning_rate": 0.00012902648193611086,
      "loss": 1.6983,
      "step": 2069
    },
    {
      "epoch": 0.42435424354243545,
      "grad_norm": 0.5537807518013668,
      "learning_rate": 0.00012896293008237455,
      "loss": 1.6889,
      "step": 2070
    },
    {
      "epoch": 0.42455924559245595,
      "grad_norm": 0.5000376429203801,
      "learning_rate": 0.00012889936545730225,
      "loss": 1.6549,
      "step": 2071
    },
    {
      "epoch": 0.42476424764247644,
      "grad_norm": 0.5222234108418661,
      "learning_rate": 0.0001288357880889232,
      "loss": 1.5681,
      "step": 2072
    },
    {
      "epoch": 0.42496924969249694,
      "grad_norm": 0.5040226674591807,
      "learning_rate": 0.00012877219800527193,
      "loss": 1.5993,
      "step": 2073
    },
    {
      "epoch": 0.42517425174251744,
      "grad_norm": 0.49077622400216847,
      "learning_rate": 0.00012870859523438893,
      "loss": 1.5894,
      "step": 2074
    },
    {
      "epoch": 0.42537925379253794,
      "grad_norm": 0.5056206302040633,
      "learning_rate": 0.0001286449798043201,
      "loss": 1.6337,
      "step": 2075
    },
    {
      "epoch": 0.42558425584255843,
      "grad_norm": 0.5542238499148647,
      "learning_rate": 0.00012858135174311693,
      "loss": 1.625,
      "step": 2076
    },
    {
      "epoch": 0.42578925789257893,
      "grad_norm": 0.5501214964368766,
      "learning_rate": 0.00012851771107883655,
      "loss": 1.5964,
      "step": 2077
    },
    {
      "epoch": 0.42599425994259943,
      "grad_norm": 0.49752788455468033,
      "learning_rate": 0.00012845405783954152,
      "loss": 1.71,
      "step": 2078
    },
    {
      "epoch": 0.4261992619926199,
      "grad_norm": 0.47643060629196343,
      "learning_rate": 0.00012839039205330007,
      "loss": 1.6201,
      "step": 2079
    },
    {
      "epoch": 0.4264042640426404,
      "grad_norm": 0.5343716011444284,
      "learning_rate": 0.00012832671374818597,
      "loss": 1.648,
      "step": 2080
    },
    {
      "epoch": 0.4266092660926609,
      "grad_norm": 0.6646075197067107,
      "learning_rate": 0.00012826302295227836,
      "loss": 1.6491,
      "step": 2081
    },
    {
      "epoch": 0.4268142681426814,
      "grad_norm": 0.5388197627268985,
      "learning_rate": 0.00012819931969366207,
      "loss": 1.6477,
      "step": 2082
    },
    {
      "epoch": 0.4270192701927019,
      "grad_norm": 0.5198449165623379,
      "learning_rate": 0.0001281356040004273,
      "loss": 1.581,
      "step": 2083
    },
    {
      "epoch": 0.4272242722427224,
      "grad_norm": 0.5877476016414378,
      "learning_rate": 0.00012807187590066979,
      "loss": 1.6241,
      "step": 2084
    },
    {
      "epoch": 0.4274292742927429,
      "grad_norm": 0.4992588613805797,
      "learning_rate": 0.00012800813542249072,
      "loss": 1.5306,
      "step": 2085
    },
    {
      "epoch": 0.4276342763427634,
      "grad_norm": 0.540205030159357,
      "learning_rate": 0.00012794438259399672,
      "loss": 1.6179,
      "step": 2086
    },
    {
      "epoch": 0.4278392783927839,
      "grad_norm": 0.5133839360614646,
      "learning_rate": 0.00012788061744329997,
      "loss": 1.5674,
      "step": 2087
    },
    {
      "epoch": 0.4280442804428044,
      "grad_norm": 0.5306368694523946,
      "learning_rate": 0.00012781683999851795,
      "loss": 1.686,
      "step": 2088
    },
    {
      "epoch": 0.4282492824928249,
      "grad_norm": 0.5421592198170199,
      "learning_rate": 0.0001277530502877736,
      "loss": 1.6195,
      "step": 2089
    },
    {
      "epoch": 0.42845428454284545,
      "grad_norm": 0.5069745819988721,
      "learning_rate": 0.00012768924833919532,
      "loss": 1.6021,
      "step": 2090
    },
    {
      "epoch": 0.42865928659286595,
      "grad_norm": 0.5322301428680724,
      "learning_rate": 0.00012762543418091689,
      "loss": 1.5605,
      "step": 2091
    },
    {
      "epoch": 0.42886428864288645,
      "grad_norm": 0.6321245928069295,
      "learning_rate": 0.00012756160784107738,
      "loss": 1.6462,
      "step": 2092
    },
    {
      "epoch": 0.42906929069290695,
      "grad_norm": 0.5389888107131428,
      "learning_rate": 0.00012749776934782133,
      "loss": 1.6231,
      "step": 2093
    },
    {
      "epoch": 0.42927429274292744,
      "grad_norm": 0.5934225226676983,
      "learning_rate": 0.00012743391872929865,
      "loss": 1.582,
      "step": 2094
    },
    {
      "epoch": 0.42947929479294794,
      "grad_norm": 0.5620097205105296,
      "learning_rate": 0.00012737005601366457,
      "loss": 1.6466,
      "step": 2095
    },
    {
      "epoch": 0.42968429684296844,
      "grad_norm": 0.5022648535916745,
      "learning_rate": 0.00012730618122907959,
      "loss": 1.6392,
      "step": 2096
    },
    {
      "epoch": 0.42988929889298894,
      "grad_norm": 0.5248833817638882,
      "learning_rate": 0.0001272422944037096,
      "loss": 1.6297,
      "step": 2097
    },
    {
      "epoch": 0.43009430094300943,
      "grad_norm": 0.5733973485514842,
      "learning_rate": 0.0001271783955657258,
      "loss": 1.6626,
      "step": 2098
    },
    {
      "epoch": 0.43029930299302993,
      "grad_norm": 0.6084702847018025,
      "learning_rate": 0.0001271144847433047,
      "loss": 1.6381,
      "step": 2099
    },
    {
      "epoch": 0.43050430504305043,
      "grad_norm": 0.54498369637951,
      "learning_rate": 0.00012705056196462801,
      "loss": 1.6562,
      "step": 2100
    },
    {
      "epoch": 0.4307093070930709,
      "grad_norm": 0.5633386996255672,
      "learning_rate": 0.0001269866272578828,
      "loss": 1.6587,
      "step": 2101
    },
    {
      "epoch": 0.4309143091430914,
      "grad_norm": 0.5619127811316713,
      "learning_rate": 0.0001269226806512614,
      "loss": 1.5643,
      "step": 2102
    },
    {
      "epoch": 0.4311193111931119,
      "grad_norm": 0.598425179783864,
      "learning_rate": 0.0001268587221729613,
      "loss": 1.6259,
      "step": 2103
    },
    {
      "epoch": 0.4313243132431324,
      "grad_norm": 0.5719151954946781,
      "learning_rate": 0.00012679475185118535,
      "loss": 1.6203,
      "step": 2104
    },
    {
      "epoch": 0.4315293152931529,
      "grad_norm": 0.5356397109583548,
      "learning_rate": 0.0001267307697141415,
      "loss": 1.5678,
      "step": 2105
    },
    {
      "epoch": 0.4317343173431734,
      "grad_norm": 0.6203681803670734,
      "learning_rate": 0.00012666677579004296,
      "loss": 1.6861,
      "step": 2106
    },
    {
      "epoch": 0.4319393193931939,
      "grad_norm": 0.5969382161746908,
      "learning_rate": 0.0001266027701071082,
      "loss": 1.6506,
      "step": 2107
    },
    {
      "epoch": 0.4321443214432144,
      "grad_norm": 0.5877935423111643,
      "learning_rate": 0.00012653875269356076,
      "loss": 1.6532,
      "step": 2108
    },
    {
      "epoch": 0.4323493234932349,
      "grad_norm": 0.5052465647166653,
      "learning_rate": 0.00012647472357762938,
      "loss": 1.6828,
      "step": 2109
    },
    {
      "epoch": 0.43255432554325546,
      "grad_norm": 0.530408932339115,
      "learning_rate": 0.0001264106827875481,
      "loss": 1.569,
      "step": 2110
    },
    {
      "epoch": 0.43275932759327596,
      "grad_norm": 0.6346565978648429,
      "learning_rate": 0.00012634663035155595,
      "loss": 1.6071,
      "step": 2111
    },
    {
      "epoch": 0.43296432964329645,
      "grad_norm": 0.6032169894221104,
      "learning_rate": 0.00012628256629789713,
      "loss": 1.6688,
      "step": 2112
    },
    {
      "epoch": 0.43316933169331695,
      "grad_norm": 0.5281126936510784,
      "learning_rate": 0.00012621849065482093,
      "loss": 1.5989,
      "step": 2113
    },
    {
      "epoch": 0.43337433374333745,
      "grad_norm": 0.5435861810416938,
      "learning_rate": 0.0001261544034505819,
      "loss": 1.6458,
      "step": 2114
    },
    {
      "epoch": 0.43357933579335795,
      "grad_norm": 0.5993840394271078,
      "learning_rate": 0.00012609030471343952,
      "loss": 1.6393,
      "step": 2115
    },
    {
      "epoch": 0.43378433784337844,
      "grad_norm": 0.6159968377473376,
      "learning_rate": 0.0001260261944716584,
      "loss": 1.6695,
      "step": 2116
    },
    {
      "epoch": 0.43398933989339894,
      "grad_norm": 0.5408960905874177,
      "learning_rate": 0.00012596207275350832,
      "loss": 1.6045,
      "step": 2117
    },
    {
      "epoch": 0.43419434194341944,
      "grad_norm": 0.55375972011674,
      "learning_rate": 0.00012589793958726398,
      "loss": 1.6004,
      "step": 2118
    },
    {
      "epoch": 0.43439934399343993,
      "grad_norm": 0.552016902209634,
      "learning_rate": 0.00012583379500120517,
      "loss": 1.6282,
      "step": 2119
    },
    {
      "epoch": 0.43460434604346043,
      "grad_norm": 0.5532532312459124,
      "learning_rate": 0.00012576963902361684,
      "loss": 1.6509,
      "step": 2120
    },
    {
      "epoch": 0.43480934809348093,
      "grad_norm": 0.5572167228417764,
      "learning_rate": 0.00012570547168278874,
      "loss": 1.605,
      "step": 2121
    },
    {
      "epoch": 0.4350143501435014,
      "grad_norm": 0.5864773233007516,
      "learning_rate": 0.00012564129300701585,
      "loss": 1.6421,
      "step": 2122
    },
    {
      "epoch": 0.4352193521935219,
      "grad_norm": 0.5040098130329621,
      "learning_rate": 0.00012557710302459803,
      "loss": 1.6982,
      "step": 2123
    },
    {
      "epoch": 0.4354243542435424,
      "grad_norm": 0.49952204858797405,
      "learning_rate": 0.00012551290176384005,
      "loss": 1.6008,
      "step": 2124
    },
    {
      "epoch": 0.4356293562935629,
      "grad_norm": 0.5224613033959592,
      "learning_rate": 0.00012544868925305189,
      "loss": 1.5299,
      "step": 2125
    },
    {
      "epoch": 0.4358343583435834,
      "grad_norm": 0.5036024977194129,
      "learning_rate": 0.00012538446552054822,
      "loss": 1.6216,
      "step": 2126
    },
    {
      "epoch": 0.4360393603936039,
      "grad_norm": 0.5065015134324423,
      "learning_rate": 0.0001253202305946489,
      "loss": 1.6242,
      "step": 2127
    },
    {
      "epoch": 0.4362443624436244,
      "grad_norm": 0.4865923876353277,
      "learning_rate": 0.00012525598450367854,
      "loss": 1.6299,
      "step": 2128
    },
    {
      "epoch": 0.4364493644936449,
      "grad_norm": 0.5001369984663168,
      "learning_rate": 0.00012519172727596675,
      "loss": 1.6086,
      "step": 2129
    },
    {
      "epoch": 0.43665436654366546,
      "grad_norm": 0.5336689201633212,
      "learning_rate": 0.0001251274589398481,
      "loss": 1.5928,
      "step": 2130
    },
    {
      "epoch": 0.43685936859368596,
      "grad_norm": 0.458484602444854,
      "learning_rate": 0.00012506317952366196,
      "loss": 1.567,
      "step": 2131
    },
    {
      "epoch": 0.43706437064370646,
      "grad_norm": 0.45472408648008505,
      "learning_rate": 0.0001249988890557526,
      "loss": 1.6658,
      "step": 2132
    },
    {
      "epoch": 0.43726937269372695,
      "grad_norm": 0.48451902869427593,
      "learning_rate": 0.0001249345875644693,
      "loss": 1.5793,
      "step": 2133
    },
    {
      "epoch": 0.43747437474374745,
      "grad_norm": 0.54992603870482,
      "learning_rate": 0.000124870275078166,
      "loss": 1.661,
      "step": 2134
    },
    {
      "epoch": 0.43767937679376795,
      "grad_norm": 0.5082912037596098,
      "learning_rate": 0.00012480595162520162,
      "loss": 1.6473,
      "step": 2135
    },
    {
      "epoch": 0.43788437884378845,
      "grad_norm": 0.4724840655210908,
      "learning_rate": 0.00012474161723393987,
      "loss": 1.5916,
      "step": 2136
    },
    {
      "epoch": 0.43808938089380894,
      "grad_norm": 0.5049927808634362,
      "learning_rate": 0.0001246772719327493,
      "loss": 1.6583,
      "step": 2137
    },
    {
      "epoch": 0.43829438294382944,
      "grad_norm": 0.530508083576109,
      "learning_rate": 0.0001246129157500033,
      "loss": 1.6635,
      "step": 2138
    },
    {
      "epoch": 0.43849938499384994,
      "grad_norm": 0.5122346630969103,
      "learning_rate": 0.00012454854871407994,
      "loss": 1.6209,
      "step": 2139
    },
    {
      "epoch": 0.43870438704387044,
      "grad_norm": 0.5120420117474423,
      "learning_rate": 0.0001244841708533622,
      "loss": 1.5957,
      "step": 2140
    },
    {
      "epoch": 0.43890938909389093,
      "grad_norm": 0.4866815909875715,
      "learning_rate": 0.0001244197821962378,
      "loss": 1.6452,
      "step": 2141
    },
    {
      "epoch": 0.43911439114391143,
      "grad_norm": 0.5279550794563505,
      "learning_rate": 0.0001243553827710992,
      "loss": 1.5392,
      "step": 2142
    },
    {
      "epoch": 0.43931939319393193,
      "grad_norm": 0.44556466936063477,
      "learning_rate": 0.00012429097260634365,
      "loss": 1.5182,
      "step": 2143
    },
    {
      "epoch": 0.4395243952439524,
      "grad_norm": 0.5552509582306528,
      "learning_rate": 0.00012422655173037304,
      "loss": 1.6877,
      "step": 2144
    },
    {
      "epoch": 0.4397293972939729,
      "grad_norm": 0.5215355483501207,
      "learning_rate": 0.00012416212017159412,
      "loss": 1.6233,
      "step": 2145
    },
    {
      "epoch": 0.4399343993439934,
      "grad_norm": 0.5189189052206612,
      "learning_rate": 0.00012409767795841823,
      "loss": 1.6281,
      "step": 2146
    },
    {
      "epoch": 0.4401394013940139,
      "grad_norm": 0.4843645387729046,
      "learning_rate": 0.0001240332251192615,
      "loss": 1.618,
      "step": 2147
    },
    {
      "epoch": 0.4403444034440344,
      "grad_norm": 0.48282550360680987,
      "learning_rate": 0.00012396876168254466,
      "loss": 1.592,
      "step": 2148
    },
    {
      "epoch": 0.4405494054940549,
      "grad_norm": 0.6274710133982132,
      "learning_rate": 0.0001239042876766932,
      "loss": 1.6735,
      "step": 2149
    },
    {
      "epoch": 0.44075440754407547,
      "grad_norm": 0.48471373703752735,
      "learning_rate": 0.00012383980313013715,
      "loss": 1.5537,
      "step": 2150
    },
    {
      "epoch": 0.44095940959409596,
      "grad_norm": 0.5384884989516128,
      "learning_rate": 0.00012377530807131137,
      "loss": 1.646,
      "step": 2151
    },
    {
      "epoch": 0.44116441164411646,
      "grad_norm": 0.49710256286569837,
      "learning_rate": 0.00012371080252865515,
      "loss": 1.5203,
      "step": 2152
    },
    {
      "epoch": 0.44136941369413696,
      "grad_norm": 0.5523918815485819,
      "learning_rate": 0.00012364628653061257,
      "loss": 1.6173,
      "step": 2153
    },
    {
      "epoch": 0.44157441574415746,
      "grad_norm": 0.4714398565088359,
      "learning_rate": 0.00012358176010563224,
      "loss": 1.5593,
      "step": 2154
    },
    {
      "epoch": 0.44177941779417795,
      "grad_norm": 0.4778642717551116,
      "learning_rate": 0.00012351722328216735,
      "loss": 1.6605,
      "step": 2155
    },
    {
      "epoch": 0.44198441984419845,
      "grad_norm": 0.5217475080141332,
      "learning_rate": 0.00012345267608867574,
      "loss": 1.6409,
      "step": 2156
    },
    {
      "epoch": 0.44218942189421895,
      "grad_norm": 0.4879055418506378,
      "learning_rate": 0.0001233881185536198,
      "loss": 1.6221,
      "step": 2157
    },
    {
      "epoch": 0.44239442394423945,
      "grad_norm": 0.5236382595855923,
      "learning_rate": 0.0001233235507054664,
      "loss": 1.5952,
      "step": 2158
    },
    {
      "epoch": 0.44259942599425994,
      "grad_norm": 0.5052041265063858,
      "learning_rate": 0.00012325897257268708,
      "loss": 1.6354,
      "step": 2159
    },
    {
      "epoch": 0.44280442804428044,
      "grad_norm": 0.5391553084710383,
      "learning_rate": 0.0001231943841837579,
      "loss": 1.6056,
      "step": 2160
    },
    {
      "epoch": 0.44300943009430094,
      "grad_norm": 0.5317024944837467,
      "learning_rate": 0.00012312978556715932,
      "loss": 1.6136,
      "step": 2161
    },
    {
      "epoch": 0.44321443214432144,
      "grad_norm": 0.45039348648677635,
      "learning_rate": 0.00012306517675137645,
      "loss": 1.5624,
      "step": 2162
    },
    {
      "epoch": 0.44341943419434193,
      "grad_norm": 0.5589837897894556,
      "learning_rate": 0.00012300055776489884,
      "loss": 1.6923,
      "step": 2163
    },
    {
      "epoch": 0.44362443624436243,
      "grad_norm": 0.5731646696151985,
      "learning_rate": 0.00012293592863622045,
      "loss": 1.6387,
      "step": 2164
    },
    {
      "epoch": 0.4438294382943829,
      "grad_norm": 0.5298811885026682,
      "learning_rate": 0.00012287128939383993,
      "loss": 1.6285,
      "step": 2165
    },
    {
      "epoch": 0.4440344403444034,
      "grad_norm": 0.5608041756463177,
      "learning_rate": 0.00012280664006626013,
      "loss": 1.6672,
      "step": 2166
    },
    {
      "epoch": 0.4442394423944239,
      "grad_norm": 0.4611347879059605,
      "learning_rate": 0.0001227419806819885,
      "loss": 1.5744,
      "step": 2167
    },
    {
      "epoch": 0.4444444444444444,
      "grad_norm": 0.5225137696362192,
      "learning_rate": 0.0001226773112695369,
      "loss": 1.5742,
      "step": 2168
    },
    {
      "epoch": 0.4446494464944649,
      "grad_norm": 0.5269321270877484,
      "learning_rate": 0.0001226126318574216,
      "loss": 1.6185,
      "step": 2169
    },
    {
      "epoch": 0.44485444854448547,
      "grad_norm": 0.5102159914914142,
      "learning_rate": 0.0001225479424741633,
      "loss": 1.6252,
      "step": 2170
    },
    {
      "epoch": 0.44505945059450597,
      "grad_norm": 0.5620848269228764,
      "learning_rate": 0.000122483243148287,
      "loss": 1.6388,
      "step": 2171
    },
    {
      "epoch": 0.44526445264452646,
      "grad_norm": 0.5909929809325606,
      "learning_rate": 0.00012241853390832226,
      "loss": 1.6388,
      "step": 2172
    },
    {
      "epoch": 0.44546945469454696,
      "grad_norm": 0.5645828183647269,
      "learning_rate": 0.0001223538147828029,
      "loss": 1.658,
      "step": 2173
    },
    {
      "epoch": 0.44567445674456746,
      "grad_norm": 0.5378105266035673,
      "learning_rate": 0.00012228908580026702,
      "loss": 1.6329,
      "step": 2174
    },
    {
      "epoch": 0.44587945879458796,
      "grad_norm": 0.5018137468578521,
      "learning_rate": 0.00012222434698925727,
      "loss": 1.6595,
      "step": 2175
    },
    {
      "epoch": 0.44608446084460845,
      "grad_norm": 0.48769179572466265,
      "learning_rate": 0.0001221595983783205,
      "loss": 1.5996,
      "step": 2176
    },
    {
      "epoch": 0.44628946289462895,
      "grad_norm": 0.5453521504564052,
      "learning_rate": 0.0001220948399960078,
      "loss": 1.6718,
      "step": 2177
    },
    {
      "epoch": 0.44649446494464945,
      "grad_norm": 0.5356855434269039,
      "learning_rate": 0.00012203007187087485,
      "loss": 1.581,
      "step": 2178
    },
    {
      "epoch": 0.44669946699466995,
      "grad_norm": 0.44856708369186865,
      "learning_rate": 0.00012196529403148132,
      "loss": 1.5513,
      "step": 2179
    },
    {
      "epoch": 0.44690446904469044,
      "grad_norm": 0.5143991891610954,
      "learning_rate": 0.00012190050650639131,
      "loss": 1.6605,
      "step": 2180
    },
    {
      "epoch": 0.44710947109471094,
      "grad_norm": 0.5569965352253903,
      "learning_rate": 0.00012183570932417323,
      "loss": 1.6337,
      "step": 2181
    },
    {
      "epoch": 0.44731447314473144,
      "grad_norm": 0.4846297856392185,
      "learning_rate": 0.00012177090251339965,
      "loss": 1.6488,
      "step": 2182
    },
    {
      "epoch": 0.44751947519475194,
      "grad_norm": 0.5676152994023149,
      "learning_rate": 0.00012170608610264742,
      "loss": 1.6705,
      "step": 2183
    },
    {
      "epoch": 0.44772447724477243,
      "grad_norm": 0.46163168541793304,
      "learning_rate": 0.00012164126012049766,
      "loss": 1.6689,
      "step": 2184
    },
    {
      "epoch": 0.44792947929479293,
      "grad_norm": 0.5139110587076279,
      "learning_rate": 0.00012157642459553564,
      "loss": 1.6773,
      "step": 2185
    },
    {
      "epoch": 0.44813448134481343,
      "grad_norm": 0.46633175126525284,
      "learning_rate": 0.00012151157955635097,
      "loss": 1.5892,
      "step": 2186
    },
    {
      "epoch": 0.4483394833948339,
      "grad_norm": 0.49448660065559547,
      "learning_rate": 0.00012144672503153726,
      "loss": 1.5627,
      "step": 2187
    },
    {
      "epoch": 0.4485444854448544,
      "grad_norm": 0.5250935178167044,
      "learning_rate": 0.00012138186104969247,
      "loss": 1.5623,
      "step": 2188
    },
    {
      "epoch": 0.4487494874948749,
      "grad_norm": 0.5407192805562584,
      "learning_rate": 0.00012131698763941863,
      "loss": 1.5759,
      "step": 2189
    },
    {
      "epoch": 0.4489544895448955,
      "grad_norm": 0.5106637399093619,
      "learning_rate": 0.00012125210482932203,
      "loss": 1.6665,
      "step": 2190
    },
    {
      "epoch": 0.44915949159491597,
      "grad_norm": 0.5293031553006489,
      "learning_rate": 0.00012118721264801299,
      "loss": 1.6175,
      "step": 2191
    },
    {
      "epoch": 0.44936449364493647,
      "grad_norm": 0.5468323908564092,
      "learning_rate": 0.000121122311124106,
      "loss": 1.6102,
      "step": 2192
    },
    {
      "epoch": 0.44956949569495697,
      "grad_norm": 0.523231532303066,
      "learning_rate": 0.0001210574002862197,
      "loss": 1.7186,
      "step": 2193
    },
    {
      "epoch": 0.44977449774497746,
      "grad_norm": 0.5222157286598067,
      "learning_rate": 0.00012099248016297681,
      "loss": 1.6269,
      "step": 2194
    },
    {
      "epoch": 0.44997949979499796,
      "grad_norm": 0.5543478262983981,
      "learning_rate": 0.00012092755078300422,
      "loss": 1.6321,
      "step": 2195
    },
    {
      "epoch": 0.45018450184501846,
      "grad_norm": 0.5250057809043659,
      "learning_rate": 0.00012086261217493276,
      "loss": 1.6546,
      "step": 2196
    },
    {
      "epoch": 0.45038950389503896,
      "grad_norm": 0.4766727878388731,
      "learning_rate": 0.00012079766436739742,
      "loss": 1.6267,
      "step": 2197
    },
    {
      "epoch": 0.45059450594505945,
      "grad_norm": 0.535968447646884,
      "learning_rate": 0.00012073270738903726,
      "loss": 1.6451,
      "step": 2198
    },
    {
      "epoch": 0.45079950799507995,
      "grad_norm": 0.5460785149856947,
      "learning_rate": 0.00012066774126849529,
      "loss": 1.7289,
      "step": 2199
    },
    {
      "epoch": 0.45100451004510045,
      "grad_norm": 0.5448945869127142,
      "learning_rate": 0.00012060276603441871,
      "loss": 1.6506,
      "step": 2200
    },
    {
      "epoch": 0.45120951209512095,
      "grad_norm": 0.49201681757479115,
      "learning_rate": 0.00012053778171545857,
      "loss": 1.6035,
      "step": 2201
    },
    {
      "epoch": 0.45141451414514144,
      "grad_norm": 0.47186569033147796,
      "learning_rate": 0.00012047278834027005,
      "loss": 1.4887,
      "step": 2202
    },
    {
      "epoch": 0.45161951619516194,
      "grad_norm": 0.47916073409169957,
      "learning_rate": 0.00012040778593751227,
      "loss": 1.6477,
      "step": 2203
    },
    {
      "epoch": 0.45182451824518244,
      "grad_norm": 0.4987732650660998,
      "learning_rate": 0.00012034277453584828,
      "loss": 1.6708,
      "step": 2204
    },
    {
      "epoch": 0.45202952029520294,
      "grad_norm": 0.496588246317768,
      "learning_rate": 0.00012027775416394522,
      "loss": 1.5524,
      "step": 2205
    },
    {
      "epoch": 0.45223452234522343,
      "grad_norm": 0.4828001014108145,
      "learning_rate": 0.0001202127248504741,
      "loss": 1.5849,
      "step": 2206
    },
    {
      "epoch": 0.45243952439524393,
      "grad_norm": 0.482606435953857,
      "learning_rate": 0.00012014768662410985,
      "loss": 1.595,
      "step": 2207
    },
    {
      "epoch": 0.45264452644526443,
      "grad_norm": 0.5584448365120266,
      "learning_rate": 0.00012008263951353143,
      "loss": 1.6303,
      "step": 2208
    },
    {
      "epoch": 0.4528495284952849,
      "grad_norm": 0.5141864628725594,
      "learning_rate": 0.00012001758354742163,
      "loss": 1.652,
      "step": 2209
    },
    {
      "epoch": 0.4530545305453055,
      "grad_norm": 0.470178592338048,
      "learning_rate": 0.00011995251875446718,
      "loss": 1.5825,
      "step": 2210
    },
    {
      "epoch": 0.453259532595326,
      "grad_norm": 0.48622644202780685,
      "learning_rate": 0.0001198874451633587,
      "loss": 1.6344,
      "step": 2211
    },
    {
      "epoch": 0.4534645346453465,
      "grad_norm": 0.5137268092341475,
      "learning_rate": 0.00011982236280279066,
      "loss": 1.6607,
      "step": 2212
    },
    {
      "epoch": 0.45366953669536697,
      "grad_norm": 0.508893577800389,
      "learning_rate": 0.0001197572717014615,
      "loss": 1.674,
      "step": 2213
    },
    {
      "epoch": 0.45387453874538747,
      "grad_norm": 0.48372042066740545,
      "learning_rate": 0.00011969217188807333,
      "loss": 1.6283,
      "step": 2214
    },
    {
      "epoch": 0.45407954079540797,
      "grad_norm": 0.4944984056659895,
      "learning_rate": 0.00011962706339133229,
      "loss": 1.6082,
      "step": 2215
    },
    {
      "epoch": 0.45428454284542846,
      "grad_norm": 0.5325089566366165,
      "learning_rate": 0.00011956194623994827,
      "loss": 1.569,
      "step": 2216
    },
    {
      "epoch": 0.45448954489544896,
      "grad_norm": 0.5011500795056895,
      "learning_rate": 0.00011949682046263491,
      "loss": 1.6693,
      "step": 2217
    },
    {
      "epoch": 0.45469454694546946,
      "grad_norm": 0.46754227705442525,
      "learning_rate": 0.00011943168608810978,
      "loss": 1.7038,
      "step": 2218
    },
    {
      "epoch": 0.45489954899548996,
      "grad_norm": 0.5483068720363576,
      "learning_rate": 0.00011936654314509415,
      "loss": 1.6316,
      "step": 2219
    },
    {
      "epoch": 0.45510455104551045,
      "grad_norm": 0.5096240811125679,
      "learning_rate": 0.00011930139166231308,
      "loss": 1.5714,
      "step": 2220
    },
    {
      "epoch": 0.45530955309553095,
      "grad_norm": 0.4813416419550769,
      "learning_rate": 0.00011923623166849547,
      "loss": 1.5927,
      "step": 2221
    },
    {
      "epoch": 0.45551455514555145,
      "grad_norm": 0.5170365330513925,
      "learning_rate": 0.00011917106319237386,
      "loss": 1.5518,
      "step": 2222
    },
    {
      "epoch": 0.45571955719557194,
      "grad_norm": 0.4884764179572433,
      "learning_rate": 0.0001191058862626846,
      "loss": 1.5164,
      "step": 2223
    },
    {
      "epoch": 0.45592455924559244,
      "grad_norm": 0.4954391531247397,
      "learning_rate": 0.00011904070090816777,
      "loss": 1.6353,
      "step": 2224
    },
    {
      "epoch": 0.45612956129561294,
      "grad_norm": 0.4960297156689326,
      "learning_rate": 0.00011897550715756713,
      "loss": 1.5118,
      "step": 2225
    },
    {
      "epoch": 0.45633456334563344,
      "grad_norm": 0.5390779402823423,
      "learning_rate": 0.0001189103050396302,
      "loss": 1.6791,
      "step": 2226
    },
    {
      "epoch": 0.45653956539565393,
      "grad_norm": 0.499269726115277,
      "learning_rate": 0.0001188450945831081,
      "loss": 1.5772,
      "step": 2227
    },
    {
      "epoch": 0.45674456744567443,
      "grad_norm": 0.46350476901104354,
      "learning_rate": 0.00011877987581675572,
      "loss": 1.6041,
      "step": 2228
    },
    {
      "epoch": 0.45694956949569493,
      "grad_norm": 0.47674721816803073,
      "learning_rate": 0.00011871464876933155,
      "loss": 1.5654,
      "step": 2229
    },
    {
      "epoch": 0.4571545715457155,
      "grad_norm": 0.4576385907402217,
      "learning_rate": 0.00011864941346959775,
      "loss": 1.642,
      "step": 2230
    },
    {
      "epoch": 0.457359573595736,
      "grad_norm": 0.4837016287745565,
      "learning_rate": 0.00011858416994632013,
      "loss": 1.6331,
      "step": 2231
    },
    {
      "epoch": 0.4575645756457565,
      "grad_norm": 0.5604741232557158,
      "learning_rate": 0.0001185189182282681,
      "loss": 1.699,
      "step": 2232
    },
    {
      "epoch": 0.457769577695777,
      "grad_norm": 0.4315380507020846,
      "learning_rate": 0.00011845365834421474,
      "loss": 1.5785,
      "step": 2233
    },
    {
      "epoch": 0.45797457974579747,
      "grad_norm": 0.5565812745284081,
      "learning_rate": 0.0001183883903229367,
      "loss": 1.6175,
      "step": 2234
    },
    {
      "epoch": 0.45817958179581797,
      "grad_norm": 0.5204126419888842,
      "learning_rate": 0.00011832311419321414,
      "loss": 1.5621,
      "step": 2235
    },
    {
      "epoch": 0.45838458384583847,
      "grad_norm": 0.43601270242126144,
      "learning_rate": 0.00011825782998383092,
      "loss": 1.5952,
      "step": 2236
    },
    {
      "epoch": 0.45858958589585896,
      "grad_norm": 0.4497791685004938,
      "learning_rate": 0.00011819253772357442,
      "loss": 1.6251,
      "step": 2237
    },
    {
      "epoch": 0.45879458794587946,
      "grad_norm": 0.4717757418463207,
      "learning_rate": 0.00011812723744123553,
      "loss": 1.5702,
      "step": 2238
    },
    {
      "epoch": 0.45899958999589996,
      "grad_norm": 0.49821996077055847,
      "learning_rate": 0.00011806192916560872,
      "loss": 1.6545,
      "step": 2239
    },
    {
      "epoch": 0.45920459204592046,
      "grad_norm": 0.46129632190603165,
      "learning_rate": 0.00011799661292549195,
      "loss": 1.5715,
      "step": 2240
    },
    {
      "epoch": 0.45940959409594095,
      "grad_norm": 0.49406280764032334,
      "learning_rate": 0.00011793128874968675,
      "loss": 1.632,
      "step": 2241
    },
    {
      "epoch": 0.45961459614596145,
      "grad_norm": 0.49000898358974676,
      "learning_rate": 0.00011786595666699809,
      "loss": 1.6326,
      "step": 2242
    },
    {
      "epoch": 0.45981959819598195,
      "grad_norm": 0.4356473279545904,
      "learning_rate": 0.00011780061670623448,
      "loss": 1.5789,
      "step": 2243
    },
    {
      "epoch": 0.46002460024600245,
      "grad_norm": 0.4892664839892892,
      "learning_rate": 0.00011773526889620783,
      "loss": 1.6733,
      "step": 2244
    },
    {
      "epoch": 0.46022960229602294,
      "grad_norm": 0.5556881400551208,
      "learning_rate": 0.00011766991326573356,
      "loss": 1.6861,
      "step": 2245
    },
    {
      "epoch": 0.46043460434604344,
      "grad_norm": 0.4486446257422821,
      "learning_rate": 0.00011760454984363058,
      "loss": 1.5268,
      "step": 2246
    },
    {
      "epoch": 0.46063960639606394,
      "grad_norm": 0.49179939605265005,
      "learning_rate": 0.0001175391786587211,
      "loss": 1.6432,
      "step": 2247
    },
    {
      "epoch": 0.46084460844608444,
      "grad_norm": 0.527555706448139,
      "learning_rate": 0.00011747379973983095,
      "loss": 1.5776,
      "step": 2248
    },
    {
      "epoch": 0.46104961049610493,
      "grad_norm": 0.4944724729281564,
      "learning_rate": 0.00011740841311578919,
      "loss": 1.6567,
      "step": 2249
    },
    {
      "epoch": 0.4612546125461255,
      "grad_norm": 0.5034188954175542,
      "learning_rate": 0.00011734301881542835,
      "loss": 1.6355,
      "step": 2250
    },
    {
      "epoch": 0.461459614596146,
      "grad_norm": 0.4692162254222976,
      "learning_rate": 0.00011727761686758438,
      "loss": 1.5915,
      "step": 2251
    },
    {
      "epoch": 0.4616646166461665,
      "grad_norm": 0.49030093156236876,
      "learning_rate": 0.00011721220730109654,
      "loss": 1.5702,
      "step": 2252
    },
    {
      "epoch": 0.461869618696187,
      "grad_norm": 0.5293979426920176,
      "learning_rate": 0.00011714679014480751,
      "loss": 1.6379,
      "step": 2253
    },
    {
      "epoch": 0.4620746207462075,
      "grad_norm": 0.5064174921529848,
      "learning_rate": 0.00011708136542756325,
      "loss": 1.5728,
      "step": 2254
    },
    {
      "epoch": 0.462279622796228,
      "grad_norm": 0.4937639548400506,
      "learning_rate": 0.00011701593317821306,
      "loss": 1.5463,
      "step": 2255
    },
    {
      "epoch": 0.46248462484624847,
      "grad_norm": 0.4848169917908273,
      "learning_rate": 0.00011695049342560968,
      "loss": 1.6266,
      "step": 2256
    },
    {
      "epoch": 0.46268962689626897,
      "grad_norm": 0.505922621676798,
      "learning_rate": 0.00011688504619860899,
      "loss": 1.6066,
      "step": 2257
    },
    {
      "epoch": 0.46289462894628947,
      "grad_norm": 0.5175374609665985,
      "learning_rate": 0.00011681959152607025,
      "loss": 1.6234,
      "step": 2258
    },
    {
      "epoch": 0.46309963099630996,
      "grad_norm": 0.48630453982276306,
      "learning_rate": 0.00011675412943685604,
      "loss": 1.5987,
      "step": 2259
    },
    {
      "epoch": 0.46330463304633046,
      "grad_norm": 0.5137283850061084,
      "learning_rate": 0.0001166886599598321,
      "loss": 1.562,
      "step": 2260
    },
    {
      "epoch": 0.46350963509635096,
      "grad_norm": 0.4843909314091931,
      "learning_rate": 0.00011662318312386755,
      "loss": 1.5956,
      "step": 2261
    },
    {
      "epoch": 0.46371463714637146,
      "grad_norm": 0.4937067253155184,
      "learning_rate": 0.00011655769895783469,
      "loss": 1.6087,
      "step": 2262
    },
    {
      "epoch": 0.46391963919639195,
      "grad_norm": 0.49294092335364975,
      "learning_rate": 0.00011649220749060903,
      "loss": 1.647,
      "step": 2263
    },
    {
      "epoch": 0.46412464124641245,
      "grad_norm": 0.5069275258145326,
      "learning_rate": 0.00011642670875106938,
      "loss": 1.5944,
      "step": 2264
    },
    {
      "epoch": 0.46432964329643295,
      "grad_norm": 0.5247022335682496,
      "learning_rate": 0.00011636120276809763,
      "loss": 1.6094,
      "step": 2265
    },
    {
      "epoch": 0.46453464534645345,
      "grad_norm": 0.5302073199734748,
      "learning_rate": 0.00011629568957057903,
      "loss": 1.6542,
      "step": 2266
    },
    {
      "epoch": 0.46473964739647394,
      "grad_norm": 0.4614223163381095,
      "learning_rate": 0.00011623016918740188,
      "loss": 1.5973,
      "step": 2267
    },
    {
      "epoch": 0.46494464944649444,
      "grad_norm": 0.5359077012270838,
      "learning_rate": 0.00011616464164745768,
      "loss": 1.7003,
      "step": 2268
    },
    {
      "epoch": 0.46514965149651494,
      "grad_norm": 0.5368926793827554,
      "learning_rate": 0.00011609910697964114,
      "loss": 1.6342,
      "step": 2269
    },
    {
      "epoch": 0.4653546535465355,
      "grad_norm": 0.469705446395763,
      "learning_rate": 0.00011603356521285005,
      "loss": 1.6145,
      "step": 2270
    },
    {
      "epoch": 0.465559655596556,
      "grad_norm": 0.5097166245134442,
      "learning_rate": 0.00011596801637598531,
      "loss": 1.6119,
      "step": 2271
    },
    {
      "epoch": 0.4657646576465765,
      "grad_norm": 0.5139429972702196,
      "learning_rate": 0.00011590246049795101,
      "loss": 1.6085,
      "step": 2272
    },
    {
      "epoch": 0.465969659696597,
      "grad_norm": 0.5458882595022456,
      "learning_rate": 0.00011583689760765435,
      "loss": 1.6548,
      "step": 2273
    },
    {
      "epoch": 0.4661746617466175,
      "grad_norm": 0.49188711206373154,
      "learning_rate": 0.00011577132773400552,
      "loss": 1.627,
      "step": 2274
    },
    {
      "epoch": 0.466379663796638,
      "grad_norm": 0.5369889449614228,
      "learning_rate": 0.00011570575090591791,
      "loss": 1.6578,
      "step": 2275
    },
    {
      "epoch": 0.4665846658466585,
      "grad_norm": 0.5823023557728096,
      "learning_rate": 0.00011564016715230788,
      "loss": 1.6762,
      "step": 2276
    },
    {
      "epoch": 0.466789667896679,
      "grad_norm": 0.4705088760551964,
      "learning_rate": 0.00011557457650209488,
      "loss": 1.6055,
      "step": 2277
    },
    {
      "epoch": 0.46699466994669947,
      "grad_norm": 0.4726782938818058,
      "learning_rate": 0.00011550897898420148,
      "loss": 1.5717,
      "step": 2278
    },
    {
      "epoch": 0.46719967199671997,
      "grad_norm": 0.4821132831803551,
      "learning_rate": 0.0001154433746275531,
      "loss": 1.5156,
      "step": 2279
    },
    {
      "epoch": 0.46740467404674046,
      "grad_norm": 0.4802441590712986,
      "learning_rate": 0.00011537776346107834,
      "loss": 1.5545,
      "step": 2280
    },
    {
      "epoch": 0.46760967609676096,
      "grad_norm": 0.5087575727183979,
      "learning_rate": 0.00011531214551370877,
      "loss": 1.6351,
      "step": 2281
    },
    {
      "epoch": 0.46781467814678146,
      "grad_norm": 0.5143936428207051,
      "learning_rate": 0.00011524652081437886,
      "loss": 1.6002,
      "step": 2282
    },
    {
      "epoch": 0.46801968019680196,
      "grad_norm": 0.48573209051853344,
      "learning_rate": 0.00011518088939202614,
      "loss": 1.6696,
      "step": 2283
    },
    {
      "epoch": 0.46822468224682245,
      "grad_norm": 0.4568152073951472,
      "learning_rate": 0.00011511525127559109,
      "loss": 1.5745,
      "step": 2284
    },
    {
      "epoch": 0.46842968429684295,
      "grad_norm": 0.5141425459674389,
      "learning_rate": 0.00011504960649401712,
      "loss": 1.6114,
      "step": 2285
    },
    {
      "epoch": 0.46863468634686345,
      "grad_norm": 0.5425349486881768,
      "learning_rate": 0.00011498395507625066,
      "loss": 1.5927,
      "step": 2286
    },
    {
      "epoch": 0.46883968839688395,
      "grad_norm": 0.5401999411034183,
      "learning_rate": 0.00011491829705124093,
      "loss": 1.657,
      "step": 2287
    },
    {
      "epoch": 0.46904469044690444,
      "grad_norm": 0.4930794446289379,
      "learning_rate": 0.00011485263244794016,
      "loss": 1.5945,
      "step": 2288
    },
    {
      "epoch": 0.46924969249692494,
      "grad_norm": 0.48302905761849846,
      "learning_rate": 0.00011478696129530346,
      "loss": 1.5507,
      "step": 2289
    },
    {
      "epoch": 0.4694546945469455,
      "grad_norm": 0.5529478748509801,
      "learning_rate": 0.0001147212836222888,
      "loss": 1.5967,
      "step": 2290
    },
    {
      "epoch": 0.469659696596966,
      "grad_norm": 0.45504910634552553,
      "learning_rate": 0.00011465559945785711,
      "loss": 1.6189,
      "step": 2291
    },
    {
      "epoch": 0.4698646986469865,
      "grad_norm": 0.5223227317092374,
      "learning_rate": 0.00011458990883097205,
      "loss": 1.6803,
      "step": 2292
    },
    {
      "epoch": 0.470069700697007,
      "grad_norm": 0.5004932491352585,
      "learning_rate": 0.00011452421177060022,
      "loss": 1.6869,
      "step": 2293
    },
    {
      "epoch": 0.4702747027470275,
      "grad_norm": 0.5147278474125473,
      "learning_rate": 0.0001144585083057111,
      "loss": 1.5897,
      "step": 2294
    },
    {
      "epoch": 0.470479704797048,
      "grad_norm": 0.48215347737227754,
      "learning_rate": 0.00011439279846527682,
      "loss": 1.5188,
      "step": 2295
    },
    {
      "epoch": 0.4706847068470685,
      "grad_norm": 0.5580465090243764,
      "learning_rate": 0.00011432708227827254,
      "loss": 1.6698,
      "step": 2296
    },
    {
      "epoch": 0.470889708897089,
      "grad_norm": 0.5304498002445579,
      "learning_rate": 0.00011426135977367604,
      "loss": 1.5778,
      "step": 2297
    },
    {
      "epoch": 0.4710947109471095,
      "grad_norm": 0.48671076063650104,
      "learning_rate": 0.00011419563098046799,
      "loss": 1.5694,
      "step": 2298
    },
    {
      "epoch": 0.47129971299712997,
      "grad_norm": 0.5422502064405047,
      "learning_rate": 0.00011412989592763181,
      "loss": 1.6876,
      "step": 2299
    },
    {
      "epoch": 0.47150471504715047,
      "grad_norm": 0.534484966022562,
      "learning_rate": 0.00011406415464415363,
      "loss": 1.6413,
      "step": 2300
    },
    {
      "epoch": 0.47170971709717097,
      "grad_norm": 0.4825865458285362,
      "learning_rate": 0.00011399840715902244,
      "loss": 1.6293,
      "step": 2301
    },
    {
      "epoch": 0.47191471914719146,
      "grad_norm": 0.4854239660113782,
      "learning_rate": 0.00011393265350122981,
      "loss": 1.5754,
      "step": 2302
    },
    {
      "epoch": 0.47211972119721196,
      "grad_norm": 0.5029086619176066,
      "learning_rate": 0.00011386689369977015,
      "loss": 1.6192,
      "step": 2303
    },
    {
      "epoch": 0.47232472324723246,
      "grad_norm": 0.5394439778676559,
      "learning_rate": 0.00011380112778364058,
      "loss": 1.5888,
      "step": 2304
    },
    {
      "epoch": 0.47252972529725296,
      "grad_norm": 0.5421504249480066,
      "learning_rate": 0.00011373535578184082,
      "loss": 1.5933,
      "step": 2305
    },
    {
      "epoch": 0.47273472734727345,
      "grad_norm": 0.4329304297479539,
      "learning_rate": 0.00011366957772337337,
      "loss": 1.6202,
      "step": 2306
    },
    {
      "epoch": 0.47293972939729395,
      "grad_norm": 0.43476931795384094,
      "learning_rate": 0.00011360379363724338,
      "loss": 1.5888,
      "step": 2307
    },
    {
      "epoch": 0.47314473144731445,
      "grad_norm": 0.531836936820513,
      "learning_rate": 0.00011353800355245856,
      "loss": 1.6634,
      "step": 2308
    },
    {
      "epoch": 0.47334973349733495,
      "grad_norm": 0.5147397557535468,
      "learning_rate": 0.00011347220749802945,
      "loss": 1.6116,
      "step": 2309
    },
    {
      "epoch": 0.4735547355473555,
      "grad_norm": 0.5518973499148925,
      "learning_rate": 0.00011340640550296906,
      "loss": 1.6636,
      "step": 2310
    },
    {
      "epoch": 0.473759737597376,
      "grad_norm": 0.4909563420940473,
      "learning_rate": 0.0001133405975962931,
      "loss": 1.6831,
      "step": 2311
    },
    {
      "epoch": 0.4739647396473965,
      "grad_norm": 0.5198956369396392,
      "learning_rate": 0.00011327478380701989,
      "loss": 1.6843,
      "step": 2312
    },
    {
      "epoch": 0.474169741697417,
      "grad_norm": 0.49830301694805923,
      "learning_rate": 0.00011320896416417026,
      "loss": 1.514,
      "step": 2313
    },
    {
      "epoch": 0.4743747437474375,
      "grad_norm": 0.5187545398255399,
      "learning_rate": 0.00011314313869676769,
      "loss": 1.5757,
      "step": 2314
    },
    {
      "epoch": 0.474579745797458,
      "grad_norm": 0.5384269747925001,
      "learning_rate": 0.00011307730743383826,
      "loss": 1.5965,
      "step": 2315
    },
    {
      "epoch": 0.4747847478474785,
      "grad_norm": 0.5378151236519214,
      "learning_rate": 0.00011301147040441055,
      "loss": 1.6185,
      "step": 2316
    },
    {
      "epoch": 0.474989749897499,
      "grad_norm": 0.521754909949357,
      "learning_rate": 0.00011294562763751573,
      "loss": 1.5829,
      "step": 2317
    },
    {
      "epoch": 0.4751947519475195,
      "grad_norm": 0.4697886691531133,
      "learning_rate": 0.0001128797791621874,
      "loss": 1.6663,
      "step": 2318
    },
    {
      "epoch": 0.47539975399754,
      "grad_norm": 0.5309026896781432,
      "learning_rate": 0.00011281392500746177,
      "loss": 1.6035,
      "step": 2319
    },
    {
      "epoch": 0.4756047560475605,
      "grad_norm": 0.46459856486107975,
      "learning_rate": 0.00011274806520237755,
      "loss": 1.5916,
      "step": 2320
    },
    {
      "epoch": 0.47580975809758097,
      "grad_norm": 0.48680313444577894,
      "learning_rate": 0.00011268219977597594,
      "loss": 1.6259,
      "step": 2321
    },
    {
      "epoch": 0.47601476014760147,
      "grad_norm": 0.5067966113169905,
      "learning_rate": 0.00011261632875730052,
      "loss": 1.6574,
      "step": 2322
    },
    {
      "epoch": 0.47621976219762197,
      "grad_norm": 0.5131417472436789,
      "learning_rate": 0.00011255045217539748,
      "loss": 1.6761,
      "step": 2323
    },
    {
      "epoch": 0.47642476424764246,
      "grad_norm": 0.4841404237792854,
      "learning_rate": 0.00011248457005931539,
      "loss": 1.5836,
      "step": 2324
    },
    {
      "epoch": 0.47662976629766296,
      "grad_norm": 0.5071375986370167,
      "learning_rate": 0.00011241868243810525,
      "loss": 1.6052,
      "step": 2325
    },
    {
      "epoch": 0.47683476834768346,
      "grad_norm": 0.4528402009254952,
      "learning_rate": 0.00011235278934082057,
      "loss": 1.6842,
      "step": 2326
    },
    {
      "epoch": 0.47703977039770395,
      "grad_norm": 0.49836550778390243,
      "learning_rate": 0.0001122868907965171,
      "loss": 1.6185,
      "step": 2327
    },
    {
      "epoch": 0.47724477244772445,
      "grad_norm": 0.5145929849826683,
      "learning_rate": 0.0001122209868342532,
      "loss": 1.6534,
      "step": 2328
    },
    {
      "epoch": 0.47744977449774495,
      "grad_norm": 0.45886796707133826,
      "learning_rate": 0.00011215507748308948,
      "loss": 1.5393,
      "step": 2329
    },
    {
      "epoch": 0.4776547765477655,
      "grad_norm": 0.4694812409190323,
      "learning_rate": 0.00011208916277208894,
      "loss": 1.5387,
      "step": 2330
    },
    {
      "epoch": 0.477859778597786,
      "grad_norm": 0.4637910427532401,
      "learning_rate": 0.00011202324273031706,
      "loss": 1.5485,
      "step": 2331
    },
    {
      "epoch": 0.4780647806478065,
      "grad_norm": 0.5353326062311138,
      "learning_rate": 0.0001119573173868415,
      "loss": 1.6811,
      "step": 2332
    },
    {
      "epoch": 0.478269782697827,
      "grad_norm": 0.4982408592050999,
      "learning_rate": 0.00011189138677073236,
      "loss": 1.6201,
      "step": 2333
    },
    {
      "epoch": 0.4784747847478475,
      "grad_norm": 0.4953129986805206,
      "learning_rate": 0.00011182545091106209,
      "loss": 1.6302,
      "step": 2334
    },
    {
      "epoch": 0.478679786797868,
      "grad_norm": 0.49332846201190206,
      "learning_rate": 0.00011175950983690536,
      "loss": 1.5992,
      "step": 2335
    },
    {
      "epoch": 0.4788847888478885,
      "grad_norm": 0.45243072914605553,
      "learning_rate": 0.0001116935635773392,
      "loss": 1.6021,
      "step": 2336
    },
    {
      "epoch": 0.479089790897909,
      "grad_norm": 0.48561084414706707,
      "learning_rate": 0.00011162761216144294,
      "loss": 1.6843,
      "step": 2337
    },
    {
      "epoch": 0.4792947929479295,
      "grad_norm": 0.47927300604352957,
      "learning_rate": 0.00011156165561829805,
      "loss": 1.6517,
      "step": 2338
    },
    {
      "epoch": 0.47949979499795,
      "grad_norm": 0.4442454083340828,
      "learning_rate": 0.00011149569397698853,
      "loss": 1.5565,
      "step": 2339
    },
    {
      "epoch": 0.4797047970479705,
      "grad_norm": 0.5381660359712772,
      "learning_rate": 0.00011142972726660037,
      "loss": 1.6523,
      "step": 2340
    },
    {
      "epoch": 0.479909799097991,
      "grad_norm": 0.45053015629064147,
      "learning_rate": 0.00011136375551622189,
      "loss": 1.5825,
      "step": 2341
    },
    {
      "epoch": 0.48011480114801147,
      "grad_norm": 0.46369572366146095,
      "learning_rate": 0.00011129777875494367,
      "loss": 1.5612,
      "step": 2342
    },
    {
      "epoch": 0.48031980319803197,
      "grad_norm": 0.48810575669816464,
      "learning_rate": 0.0001112317970118584,
      "loss": 1.6376,
      "step": 2343
    },
    {
      "epoch": 0.48052480524805247,
      "grad_norm": 0.5254086860127589,
      "learning_rate": 0.00011116581031606113,
      "loss": 1.6935,
      "step": 2344
    },
    {
      "epoch": 0.48072980729807296,
      "grad_norm": 0.5124948665863931,
      "learning_rate": 0.00011109981869664891,
      "loss": 1.6198,
      "step": 2345
    },
    {
      "epoch": 0.48093480934809346,
      "grad_norm": 0.5461771124928989,
      "learning_rate": 0.00011103382218272107,
      "loss": 1.6161,
      "step": 2346
    },
    {
      "epoch": 0.48113981139811396,
      "grad_norm": 0.4863473469789765,
      "learning_rate": 0.0001109678208033791,
      "loss": 1.592,
      "step": 2347
    },
    {
      "epoch": 0.48134481344813446,
      "grad_norm": 0.5046390734295799,
      "learning_rate": 0.00011090181458772658,
      "loss": 1.6116,
      "step": 2348
    },
    {
      "epoch": 0.48154981549815495,
      "grad_norm": 0.4737685706404132,
      "learning_rate": 0.00011083580356486925,
      "loss": 1.5607,
      "step": 2349
    },
    {
      "epoch": 0.4817548175481755,
      "grad_norm": 0.5236025428121257,
      "learning_rate": 0.00011076978776391498,
      "loss": 1.6434,
      "step": 2350
    },
    {
      "epoch": 0.481959819598196,
      "grad_norm": 0.5230584848789296,
      "learning_rate": 0.00011070376721397373,
      "loss": 1.6571,
      "step": 2351
    },
    {
      "epoch": 0.4821648216482165,
      "grad_norm": 0.4918779821776992,
      "learning_rate": 0.00011063774194415762,
      "loss": 1.5898,
      "step": 2352
    },
    {
      "epoch": 0.482369823698237,
      "grad_norm": 0.4748366711117525,
      "learning_rate": 0.00011057171198358069,
      "loss": 1.566,
      "step": 2353
    },
    {
      "epoch": 0.4825748257482575,
      "grad_norm": 0.4954311061768946,
      "learning_rate": 0.00011050567736135922,
      "loss": 1.5944,
      "step": 2354
    },
    {
      "epoch": 0.482779827798278,
      "grad_norm": 0.47698109405814104,
      "learning_rate": 0.00011043963810661145,
      "loss": 1.4945,
      "step": 2355
    },
    {
      "epoch": 0.4829848298482985,
      "grad_norm": 0.4504676414625187,
      "learning_rate": 0.0001103735942484577,
      "loss": 1.5752,
      "step": 2356
    },
    {
      "epoch": 0.483189831898319,
      "grad_norm": 0.48840594738611287,
      "learning_rate": 0.00011030754581602034,
      "loss": 1.5283,
      "step": 2357
    },
    {
      "epoch": 0.4833948339483395,
      "grad_norm": 0.5245347530391755,
      "learning_rate": 0.0001102414928384237,
      "loss": 1.6494,
      "step": 2358
    },
    {
      "epoch": 0.48359983599836,
      "grad_norm": 0.4850473636673112,
      "learning_rate": 0.0001101754353447941,
      "loss": 1.6233,
      "step": 2359
    },
    {
      "epoch": 0.4838048380483805,
      "grad_norm": 0.5221234980748952,
      "learning_rate": 0.00011010937336425997,
      "loss": 1.6097,
      "step": 2360
    },
    {
      "epoch": 0.484009840098401,
      "grad_norm": 0.5243373889101719,
      "learning_rate": 0.00011004330692595159,
      "loss": 1.6357,
      "step": 2361
    },
    {
      "epoch": 0.4842148421484215,
      "grad_norm": 0.4331562470316017,
      "learning_rate": 0.00010997723605900128,
      "loss": 1.609,
      "step": 2362
    },
    {
      "epoch": 0.484419844198442,
      "grad_norm": 0.43944438677907005,
      "learning_rate": 0.00010991116079254326,
      "loss": 1.5822,
      "step": 2363
    },
    {
      "epoch": 0.48462484624846247,
      "grad_norm": 0.4669528875625807,
      "learning_rate": 0.00010984508115571377,
      "loss": 1.6129,
      "step": 2364
    },
    {
      "epoch": 0.48482984829848297,
      "grad_norm": 0.5302574842280688,
      "learning_rate": 0.0001097789971776509,
      "loss": 1.5272,
      "step": 2365
    },
    {
      "epoch": 0.48503485034850347,
      "grad_norm": 0.48915275804868724,
      "learning_rate": 0.00010971290888749465,
      "loss": 1.6127,
      "step": 2366
    },
    {
      "epoch": 0.48523985239852396,
      "grad_norm": 0.46774279092863985,
      "learning_rate": 0.00010964681631438702,
      "loss": 1.61,
      "step": 2367
    },
    {
      "epoch": 0.48544485444854446,
      "grad_norm": 0.48916150402670916,
      "learning_rate": 0.00010958071948747175,
      "loss": 1.5986,
      "step": 2368
    },
    {
      "epoch": 0.48564985649856496,
      "grad_norm": 0.5381627483880119,
      "learning_rate": 0.00010951461843589464,
      "loss": 1.6588,
      "step": 2369
    },
    {
      "epoch": 0.4858548585485855,
      "grad_norm": 0.49255876080831035,
      "learning_rate": 0.00010944851318880314,
      "loss": 1.6333,
      "step": 2370
    },
    {
      "epoch": 0.486059860598606,
      "grad_norm": 0.4530107037969976,
      "learning_rate": 0.00010938240377534673,
      "loss": 1.6044,
      "step": 2371
    },
    {
      "epoch": 0.4862648626486265,
      "grad_norm": 0.4679867863257569,
      "learning_rate": 0.00010931629022467664,
      "loss": 1.5914,
      "step": 2372
    },
    {
      "epoch": 0.486469864698647,
      "grad_norm": 0.4609246427105402,
      "learning_rate": 0.0001092501725659459,
      "loss": 1.6302,
      "step": 2373
    },
    {
      "epoch": 0.4866748667486675,
      "grad_norm": 0.5634552571191087,
      "learning_rate": 0.00010918405082830947,
      "loss": 1.6898,
      "step": 2374
    },
    {
      "epoch": 0.486879868798688,
      "grad_norm": 0.4347474955091194,
      "learning_rate": 0.00010911792504092398,
      "loss": 1.6411,
      "step": 2375
    },
    {
      "epoch": 0.4870848708487085,
      "grad_norm": 0.46431622202420253,
      "learning_rate": 0.0001090517952329479,
      "loss": 1.6204,
      "step": 2376
    },
    {
      "epoch": 0.487289872898729,
      "grad_norm": 0.47692010798204787,
      "learning_rate": 0.00010898566143354152,
      "loss": 1.5208,
      "step": 2377
    },
    {
      "epoch": 0.4874948749487495,
      "grad_norm": 0.5378789929228168,
      "learning_rate": 0.00010891952367186673,
      "loss": 1.5894,
      "step": 2378
    },
    {
      "epoch": 0.48769987699877,
      "grad_norm": 0.5123897483953619,
      "learning_rate": 0.00010885338197708741,
      "loss": 1.5772,
      "step": 2379
    },
    {
      "epoch": 0.4879048790487905,
      "grad_norm": 0.4584599283163656,
      "learning_rate": 0.00010878723637836896,
      "loss": 1.5962,
      "step": 2380
    },
    {
      "epoch": 0.488109881098811,
      "grad_norm": 0.4762694224577129,
      "learning_rate": 0.00010872108690487859,
      "loss": 1.6019,
      "step": 2381
    },
    {
      "epoch": 0.4883148831488315,
      "grad_norm": 0.5050385453733731,
      "learning_rate": 0.00010865493358578525,
      "loss": 1.5604,
      "step": 2382
    },
    {
      "epoch": 0.488519885198852,
      "grad_norm": 0.5524601479762588,
      "learning_rate": 0.00010858877645025947,
      "loss": 1.7108,
      "step": 2383
    },
    {
      "epoch": 0.4887248872488725,
      "grad_norm": 0.46286418508676025,
      "learning_rate": 0.00010852261552747365,
      "loss": 1.6257,
      "step": 2384
    },
    {
      "epoch": 0.488929889298893,
      "grad_norm": 0.5073850683660276,
      "learning_rate": 0.00010845645084660168,
      "loss": 1.6136,
      "step": 2385
    },
    {
      "epoch": 0.48913489134891347,
      "grad_norm": 0.5496010084900519,
      "learning_rate": 0.00010839028243681913,
      "loss": 1.6215,
      "step": 2386
    },
    {
      "epoch": 0.48933989339893397,
      "grad_norm": 0.45297160435671774,
      "learning_rate": 0.00010832411032730338,
      "loss": 1.6088,
      "step": 2387
    },
    {
      "epoch": 0.48954489544895446,
      "grad_norm": 0.49003040788274843,
      "learning_rate": 0.00010825793454723325,
      "loss": 1.6281,
      "step": 2388
    },
    {
      "epoch": 0.48974989749897496,
      "grad_norm": 0.4549031176294867,
      "learning_rate": 0.00010819175512578926,
      "loss": 1.5464,
      "step": 2389
    },
    {
      "epoch": 0.4899548995489955,
      "grad_norm": 0.49100533756003345,
      "learning_rate": 0.00010812557209215354,
      "loss": 1.5634,
      "step": 2390
    },
    {
      "epoch": 0.490159901599016,
      "grad_norm": 0.5769889879117706,
      "learning_rate": 0.00010805938547550975,
      "loss": 1.5911,
      "step": 2391
    },
    {
      "epoch": 0.4903649036490365,
      "grad_norm": 0.4287139312395359,
      "learning_rate": 0.00010799319530504328,
      "loss": 1.5459,
      "step": 2392
    },
    {
      "epoch": 0.490569905699057,
      "grad_norm": 0.43376555126658445,
      "learning_rate": 0.00010792700160994091,
      "loss": 1.5647,
      "step": 2393
    },
    {
      "epoch": 0.4907749077490775,
      "grad_norm": 0.5233643726619,
      "learning_rate": 0.00010786080441939106,
      "loss": 1.6711,
      "step": 2394
    },
    {
      "epoch": 0.490979909799098,
      "grad_norm": 0.5633323902721322,
      "learning_rate": 0.00010779460376258373,
      "loss": 1.5535,
      "step": 2395
    },
    {
      "epoch": 0.4911849118491185,
      "grad_norm": 0.47732599941857085,
      "learning_rate": 0.00010772839966871033,
      "loss": 1.5987,
      "step": 2396
    },
    {
      "epoch": 0.491389913899139,
      "grad_norm": 0.46067596959487644,
      "learning_rate": 0.00010766219216696389,
      "loss": 1.5897,
      "step": 2397
    },
    {
      "epoch": 0.4915949159491595,
      "grad_norm": 0.43869446298812403,
      "learning_rate": 0.00010759598128653891,
      "loss": 1.5501,
      "step": 2398
    },
    {
      "epoch": 0.49179991799918,
      "grad_norm": 0.512174786123945,
      "learning_rate": 0.00010752976705663141,
      "loss": 1.6311,
      "step": 2399
    },
    {
      "epoch": 0.4920049200492005,
      "grad_norm": 0.5075030594159609,
      "learning_rate": 0.00010746354950643882,
      "loss": 1.5888,
      "step": 2400
    },
    {
      "epoch": 0.492209922099221,
      "grad_norm": 0.4652475912328771,
      "learning_rate": 0.00010739732866516006,
      "loss": 1.5667,
      "step": 2401
    },
    {
      "epoch": 0.4924149241492415,
      "grad_norm": 0.5109471502775814,
      "learning_rate": 0.00010733110456199553,
      "loss": 1.5664,
      "step": 2402
    },
    {
      "epoch": 0.492619926199262,
      "grad_norm": 0.41415626725373245,
      "learning_rate": 0.00010726487722614704,
      "loss": 1.5234,
      "step": 2403
    },
    {
      "epoch": 0.4928249282492825,
      "grad_norm": 0.453900556364923,
      "learning_rate": 0.00010719864668681789,
      "loss": 1.5408,
      "step": 2404
    },
    {
      "epoch": 0.493029930299303,
      "grad_norm": 0.5159658103391713,
      "learning_rate": 0.00010713241297321266,
      "loss": 1.6307,
      "step": 2405
    },
    {
      "epoch": 0.4932349323493235,
      "grad_norm": 0.4495477608661638,
      "learning_rate": 0.00010706617611453744,
      "loss": 1.6134,
      "step": 2406
    },
    {
      "epoch": 0.49343993439934397,
      "grad_norm": 0.46861893482379374,
      "learning_rate": 0.00010699993613999966,
      "loss": 1.6052,
      "step": 2407
    },
    {
      "epoch": 0.49364493644936447,
      "grad_norm": 0.4572047369011586,
      "learning_rate": 0.00010693369307880816,
      "loss": 1.5282,
      "step": 2408
    },
    {
      "epoch": 0.49384993849938497,
      "grad_norm": 0.5203231874347147,
      "learning_rate": 0.00010686744696017314,
      "loss": 1.7314,
      "step": 2409
    },
    {
      "epoch": 0.4940549405494055,
      "grad_norm": 0.47779029984765703,
      "learning_rate": 0.00010680119781330608,
      "loss": 1.6489,
      "step": 2410
    },
    {
      "epoch": 0.494259942599426,
      "grad_norm": 0.47885386449454903,
      "learning_rate": 0.00010673494566741986,
      "loss": 1.6085,
      "step": 2411
    },
    {
      "epoch": 0.4944649446494465,
      "grad_norm": 0.5108249036297599,
      "learning_rate": 0.0001066686905517287,
      "loss": 1.5901,
      "step": 2412
    },
    {
      "epoch": 0.494669946699467,
      "grad_norm": 0.5196745731885813,
      "learning_rate": 0.00010660243249544803,
      "loss": 1.7071,
      "step": 2413
    },
    {
      "epoch": 0.4948749487494875,
      "grad_norm": 0.5023310126977671,
      "learning_rate": 0.00010653617152779469,
      "loss": 1.5968,
      "step": 2414
    },
    {
      "epoch": 0.495079950799508,
      "grad_norm": 0.48776548190121755,
      "learning_rate": 0.00010646990767798673,
      "loss": 1.6243,
      "step": 2415
    },
    {
      "epoch": 0.4952849528495285,
      "grad_norm": 0.4604749085603614,
      "learning_rate": 0.00010640364097524351,
      "loss": 1.5148,
      "step": 2416
    },
    {
      "epoch": 0.495489954899549,
      "grad_norm": 0.5058738074819576,
      "learning_rate": 0.00010633737144878567,
      "loss": 1.6582,
      "step": 2417
    },
    {
      "epoch": 0.4956949569495695,
      "grad_norm": 0.4705812627629428,
      "learning_rate": 0.00010627109912783497,
      "loss": 1.5764,
      "step": 2418
    },
    {
      "epoch": 0.49589995899959,
      "grad_norm": 0.4207336641892229,
      "learning_rate": 0.00010620482404161455,
      "loss": 1.5807,
      "step": 2419
    },
    {
      "epoch": 0.4961049610496105,
      "grad_norm": 0.49470056794660433,
      "learning_rate": 0.00010613854621934876,
      "loss": 1.6516,
      "step": 2420
    },
    {
      "epoch": 0.496309963099631,
      "grad_norm": 0.5011553114874971,
      "learning_rate": 0.00010607226569026296,
      "loss": 1.5954,
      "step": 2421
    },
    {
      "epoch": 0.4965149651496515,
      "grad_norm": 0.4715568061798656,
      "learning_rate": 0.00010600598248358402,
      "loss": 1.5588,
      "step": 2422
    },
    {
      "epoch": 0.496719967199672,
      "grad_norm": 0.4465718327712777,
      "learning_rate": 0.00010593969662853971,
      "loss": 1.6177,
      "step": 2423
    },
    {
      "epoch": 0.4969249692496925,
      "grad_norm": 0.46102051514978815,
      "learning_rate": 0.00010587340815435913,
      "loss": 1.5527,
      "step": 2424
    },
    {
      "epoch": 0.497129971299713,
      "grad_norm": 0.4847758311808002,
      "learning_rate": 0.00010580711709027247,
      "loss": 1.6046,
      "step": 2425
    },
    {
      "epoch": 0.4973349733497335,
      "grad_norm": 0.48483871309018783,
      "learning_rate": 0.00010574082346551106,
      "loss": 1.5434,
      "step": 2426
    },
    {
      "epoch": 0.497539975399754,
      "grad_norm": 0.4632410477156264,
      "learning_rate": 0.00010567452730930743,
      "loss": 1.5945,
      "step": 2427
    },
    {
      "epoch": 0.4977449774497745,
      "grad_norm": 0.4528789393023613,
      "learning_rate": 0.00010560822865089507,
      "loss": 1.5985,
      "step": 2428
    },
    {
      "epoch": 0.49794997949979497,
      "grad_norm": 0.4999662475226222,
      "learning_rate": 0.0001055419275195088,
      "loss": 1.6006,
      "step": 2429
    },
    {
      "epoch": 0.4981549815498155,
      "grad_norm": 0.4824404460357333,
      "learning_rate": 0.00010547562394438432,
      "loss": 1.6129,
      "step": 2430
    },
    {
      "epoch": 0.498359983599836,
      "grad_norm": 0.51597552922563,
      "learning_rate": 0.0001054093179547585,
      "loss": 1.6651,
      "step": 2431
    },
    {
      "epoch": 0.4985649856498565,
      "grad_norm": 0.47077000695063503,
      "learning_rate": 0.00010534300957986934,
      "loss": 1.5855,
      "step": 2432
    },
    {
      "epoch": 0.498769987699877,
      "grad_norm": 0.5511054571725165,
      "learning_rate": 0.00010527669884895573,
      "loss": 1.6265,
      "step": 2433
    },
    {
      "epoch": 0.4989749897498975,
      "grad_norm": 0.4873107508785988,
      "learning_rate": 0.00010521038579125772,
      "loss": 1.6211,
      "step": 2434
    },
    {
      "epoch": 0.499179991799918,
      "grad_norm": 0.48425913975104357,
      "learning_rate": 0.00010514407043601639,
      "loss": 1.5269,
      "step": 2435
    },
    {
      "epoch": 0.4993849938499385,
      "grad_norm": 0.4448744832694071,
      "learning_rate": 0.00010507775281247376,
      "loss": 1.6123,
      "step": 2436
    },
    {
      "epoch": 0.499589995899959,
      "grad_norm": 0.5021480905739342,
      "learning_rate": 0.00010501143294987291,
      "loss": 1.5658,
      "step": 2437
    },
    {
      "epoch": 0.4997949979499795,
      "grad_norm": 0.47015251130786984,
      "learning_rate": 0.00010494511087745787,
      "loss": 1.5841,
      "step": 2438
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.4515258663005253,
      "learning_rate": 0.00010487878662447362,
      "loss": 1.4943,
      "step": 2439
    },
    {
      "epoch": 0.5002050020500205,
      "grad_norm": 0.47194872939643995,
      "learning_rate": 0.00010481246022016621,
      "loss": 1.574,
      "step": 2440
    },
    {
      "epoch": 0.500410004100041,
      "grad_norm": 0.5223748220714067,
      "learning_rate": 0.00010474613169378255,
      "loss": 1.6495,
      "step": 2441
    },
    {
      "epoch": 0.5006150061500615,
      "grad_norm": 0.45534207125689324,
      "learning_rate": 0.0001046798010745705,
      "loss": 1.5767,
      "step": 2442
    },
    {
      "epoch": 0.500820008200082,
      "grad_norm": 0.48462075831370743,
      "learning_rate": 0.00010461346839177886,
      "loss": 1.6031,
      "step": 2443
    },
    {
      "epoch": 0.5010250102501025,
      "grad_norm": 0.520479204301412,
      "learning_rate": 0.0001045471336746573,
      "loss": 1.61,
      "step": 2444
    },
    {
      "epoch": 0.501230012300123,
      "grad_norm": 0.5010954605082324,
      "learning_rate": 0.00010448079695245642,
      "loss": 1.6324,
      "step": 2445
    },
    {
      "epoch": 0.5014350143501435,
      "grad_norm": 0.47110560381163424,
      "learning_rate": 0.00010441445825442772,
      "loss": 1.5896,
      "step": 2446
    },
    {
      "epoch": 0.501640016400164,
      "grad_norm": 0.4666942644677452,
      "learning_rate": 0.00010434811760982354,
      "loss": 1.6386,
      "step": 2447
    },
    {
      "epoch": 0.5018450184501845,
      "grad_norm": 0.5054809604278697,
      "learning_rate": 0.00010428177504789713,
      "loss": 1.6291,
      "step": 2448
    },
    {
      "epoch": 0.502050020500205,
      "grad_norm": 0.45789489860046845,
      "learning_rate": 0.00010421543059790249,
      "loss": 1.5651,
      "step": 2449
    },
    {
      "epoch": 0.5022550225502255,
      "grad_norm": 0.5122351289085794,
      "learning_rate": 0.00010414908428909451,
      "loss": 1.622,
      "step": 2450
    },
    {
      "epoch": 0.502460024600246,
      "grad_norm": 0.46720146443111,
      "learning_rate": 0.00010408273615072893,
      "loss": 1.6171,
      "step": 2451
    },
    {
      "epoch": 0.5026650266502665,
      "grad_norm": 0.4929517202147766,
      "learning_rate": 0.00010401638621206225,
      "loss": 1.5988,
      "step": 2452
    },
    {
      "epoch": 0.502870028700287,
      "grad_norm": 0.5219468133294127,
      "learning_rate": 0.00010395003450235177,
      "loss": 1.6357,
      "step": 2453
    },
    {
      "epoch": 0.5030750307503075,
      "grad_norm": 0.5131930453579404,
      "learning_rate": 0.00010388368105085557,
      "loss": 1.6377,
      "step": 2454
    },
    {
      "epoch": 0.503280032800328,
      "grad_norm": 0.48978927651683835,
      "learning_rate": 0.00010381732588683253,
      "loss": 1.559,
      "step": 2455
    },
    {
      "epoch": 0.5034850348503485,
      "grad_norm": 0.464008509874065,
      "learning_rate": 0.00010375096903954224,
      "loss": 1.4978,
      "step": 2456
    },
    {
      "epoch": 0.503690036900369,
      "grad_norm": 0.47675478064905874,
      "learning_rate": 0.0001036846105382451,
      "loss": 1.5878,
      "step": 2457
    },
    {
      "epoch": 0.5038950389503895,
      "grad_norm": 0.4882697008509031,
      "learning_rate": 0.00010361825041220212,
      "loss": 1.5783,
      "step": 2458
    },
    {
      "epoch": 0.5041000410004101,
      "grad_norm": 0.4925912638521127,
      "learning_rate": 0.00010355188869067515,
      "loss": 1.6102,
      "step": 2459
    },
    {
      "epoch": 0.5043050430504306,
      "grad_norm": 0.5024673878362161,
      "learning_rate": 0.00010348552540292671,
      "loss": 1.6067,
      "step": 2460
    },
    {
      "epoch": 0.504510045100451,
      "grad_norm": 0.5238039575010716,
      "learning_rate": 0.0001034191605782199,
      "loss": 1.632,
      "step": 2461
    },
    {
      "epoch": 0.5047150471504716,
      "grad_norm": 0.5558373580576796,
      "learning_rate": 0.00010335279424581871,
      "loss": 1.4658,
      "step": 2462
    },
    {
      "epoch": 0.504920049200492,
      "grad_norm": 0.5475872001937412,
      "learning_rate": 0.00010328642643498762,
      "loss": 1.5872,
      "step": 2463
    },
    {
      "epoch": 0.5051250512505125,
      "grad_norm": 0.5025179634215936,
      "learning_rate": 0.00010322005717499181,
      "loss": 1.5393,
      "step": 2464
    },
    {
      "epoch": 0.505330053300533,
      "grad_norm": 0.5029049452595459,
      "learning_rate": 0.00010315368649509716,
      "loss": 1.5776,
      "step": 2465
    },
    {
      "epoch": 0.5055350553505535,
      "grad_norm": 0.48131739466367407,
      "learning_rate": 0.00010308731442457005,
      "loss": 1.553,
      "step": 2466
    },
    {
      "epoch": 0.505740057400574,
      "grad_norm": 0.5111585139078691,
      "learning_rate": 0.00010302094099267759,
      "loss": 1.6123,
      "step": 2467
    },
    {
      "epoch": 0.5059450594505945,
      "grad_norm": 0.5173421848686773,
      "learning_rate": 0.00010295456622868745,
      "loss": 1.6605,
      "step": 2468
    },
    {
      "epoch": 0.506150061500615,
      "grad_norm": 0.5308233016317108,
      "learning_rate": 0.00010288819016186781,
      "loss": 1.6493,
      "step": 2469
    },
    {
      "epoch": 0.5063550635506355,
      "grad_norm": 0.4715321020694935,
      "learning_rate": 0.00010282181282148764,
      "loss": 1.6209,
      "step": 2470
    },
    {
      "epoch": 0.506560065600656,
      "grad_norm": 0.5830310639222391,
      "learning_rate": 0.00010275543423681621,
      "loss": 1.5886,
      "step": 2471
    },
    {
      "epoch": 0.5067650676506765,
      "grad_norm": 0.503817186060206,
      "learning_rate": 0.00010268905443712352,
      "loss": 1.6209,
      "step": 2472
    },
    {
      "epoch": 0.506970069700697,
      "grad_norm": 0.46038191349913693,
      "learning_rate": 0.00010262267345168002,
      "loss": 1.5915,
      "step": 2473
    },
    {
      "epoch": 0.5071750717507175,
      "grad_norm": 0.4758399941440051,
      "learning_rate": 0.0001025562913097567,
      "loss": 1.6005,
      "step": 2474
    },
    {
      "epoch": 0.507380073800738,
      "grad_norm": 0.4290919832977104,
      "learning_rate": 0.0001024899080406251,
      "loss": 1.6065,
      "step": 2475
    },
    {
      "epoch": 0.5075850758507585,
      "grad_norm": 0.4875553974177856,
      "learning_rate": 0.00010242352367355721,
      "loss": 1.6644,
      "step": 2476
    },
    {
      "epoch": 0.507790077900779,
      "grad_norm": 0.5208232481313042,
      "learning_rate": 0.0001023571382378255,
      "loss": 1.5928,
      "step": 2477
    },
    {
      "epoch": 0.5079950799507995,
      "grad_norm": 0.4855824646514368,
      "learning_rate": 0.00010229075176270298,
      "loss": 1.5313,
      "step": 2478
    },
    {
      "epoch": 0.50820008200082,
      "grad_norm": 0.5354337933570282,
      "learning_rate": 0.000102224364277463,
      "loss": 1.5763,
      "step": 2479
    },
    {
      "epoch": 0.5084050840508405,
      "grad_norm": 0.5246556148006474,
      "learning_rate": 0.00010215797581137947,
      "loss": 1.626,
      "step": 2480
    },
    {
      "epoch": 0.508610086100861,
      "grad_norm": 0.5248200890079275,
      "learning_rate": 0.00010209158639372669,
      "loss": 1.6198,
      "step": 2481
    },
    {
      "epoch": 0.5088150881508815,
      "grad_norm": 0.512113162776252,
      "learning_rate": 0.00010202519605377933,
      "loss": 1.6075,
      "step": 2482
    },
    {
      "epoch": 0.509020090200902,
      "grad_norm": 0.47168642330826854,
      "learning_rate": 0.00010195880482081259,
      "loss": 1.5638,
      "step": 2483
    },
    {
      "epoch": 0.5092250922509225,
      "grad_norm": 0.5586806365233687,
      "learning_rate": 0.0001018924127241019,
      "loss": 1.6259,
      "step": 2484
    },
    {
      "epoch": 0.509430094300943,
      "grad_norm": 0.46263332699102744,
      "learning_rate": 0.00010182601979292324,
      "loss": 1.5757,
      "step": 2485
    },
    {
      "epoch": 0.5096350963509635,
      "grad_norm": 0.5169451668995334,
      "learning_rate": 0.00010175962605655278,
      "loss": 1.6019,
      "step": 2486
    },
    {
      "epoch": 0.509840098400984,
      "grad_norm": 0.5106679612665614,
      "learning_rate": 0.00010169323154426727,
      "loss": 1.5749,
      "step": 2487
    },
    {
      "epoch": 0.5100451004510045,
      "grad_norm": 0.47639820709974806,
      "learning_rate": 0.00010162683628534353,
      "loss": 1.5993,
      "step": 2488
    },
    {
      "epoch": 0.510250102501025,
      "grad_norm": 0.4797874742922257,
      "learning_rate": 0.00010156044030905893,
      "loss": 1.5926,
      "step": 2489
    },
    {
      "epoch": 0.5104551045510455,
      "grad_norm": 0.5110094133179703,
      "learning_rate": 0.00010149404364469108,
      "loss": 1.6174,
      "step": 2490
    },
    {
      "epoch": 0.510660106601066,
      "grad_norm": 0.45701149123203805,
      "learning_rate": 0.00010142764632151791,
      "loss": 1.5263,
      "step": 2491
    },
    {
      "epoch": 0.5108651086510865,
      "grad_norm": 0.4606429909667692,
      "learning_rate": 0.00010136124836881756,
      "loss": 1.6431,
      "step": 2492
    },
    {
      "epoch": 0.511070110701107,
      "grad_norm": 0.5528467574561731,
      "learning_rate": 0.00010129484981586852,
      "loss": 1.6401,
      "step": 2493
    },
    {
      "epoch": 0.5112751127511275,
      "grad_norm": 0.4853128402441001,
      "learning_rate": 0.00010122845069194957,
      "loss": 1.6239,
      "step": 2494
    },
    {
      "epoch": 0.511480114801148,
      "grad_norm": 0.5058352624526249,
      "learning_rate": 0.00010116205102633973,
      "loss": 1.5764,
      "step": 2495
    },
    {
      "epoch": 0.5116851168511685,
      "grad_norm": 0.5215394579725485,
      "learning_rate": 0.00010109565084831816,
      "loss": 1.5929,
      "step": 2496
    },
    {
      "epoch": 0.511890118901189,
      "grad_norm": 0.4582735441208923,
      "learning_rate": 0.00010102925018716436,
      "loss": 1.5617,
      "step": 2497
    },
    {
      "epoch": 0.5120951209512095,
      "grad_norm": 0.5491773649701894,
      "learning_rate": 0.000100962849072158,
      "loss": 1.6699,
      "step": 2498
    },
    {
      "epoch": 0.5123001230012301,
      "grad_norm": 0.4811743553150433,
      "learning_rate": 0.00010089644753257897,
      "loss": 1.5442,
      "step": 2499
    },
    {
      "epoch": 0.5125051250512506,
      "grad_norm": 0.49544065018608047,
      "learning_rate": 0.0001008300455977073,
      "loss": 1.6331,
      "step": 2500
    },
    {
      "epoch": 0.5127101271012711,
      "grad_norm": 0.4564115367370197,
      "learning_rate": 0.00010076364329682327,
      "loss": 1.5818,
      "step": 2501
    },
    {
      "epoch": 0.5129151291512916,
      "grad_norm": 0.49393267098549415,
      "learning_rate": 0.0001006972406592072,
      "loss": 1.6238,
      "step": 2502
    },
    {
      "epoch": 0.5131201312013121,
      "grad_norm": 0.4919227152974293,
      "learning_rate": 0.00010063083771413975,
      "loss": 1.6198,
      "step": 2503
    },
    {
      "epoch": 0.5133251332513326,
      "grad_norm": 0.4983782900020245,
      "learning_rate": 0.00010056443449090148,
      "loss": 1.6591,
      "step": 2504
    },
    {
      "epoch": 0.513530135301353,
      "grad_norm": 0.4379587385398632,
      "learning_rate": 0.00010049803101877328,
      "loss": 1.5746,
      "step": 2505
    },
    {
      "epoch": 0.5137351373513735,
      "grad_norm": 0.459744160917418,
      "learning_rate": 0.00010043162732703601,
      "loss": 1.6218,
      "step": 2506
    },
    {
      "epoch": 0.513940139401394,
      "grad_norm": 0.5324166525407472,
      "learning_rate": 0.00010036522344497073,
      "loss": 1.6129,
      "step": 2507
    },
    {
      "epoch": 0.5141451414514145,
      "grad_norm": 0.4835041887056759,
      "learning_rate": 0.0001002988194018585,
      "loss": 1.5706,
      "step": 2508
    },
    {
      "epoch": 0.514350143501435,
      "grad_norm": 0.4737438506372438,
      "learning_rate": 0.00010023241522698048,
      "loss": 1.5815,
      "step": 2509
    },
    {
      "epoch": 0.5145551455514555,
      "grad_norm": 0.45369081143961243,
      "learning_rate": 0.00010016601094961792,
      "loss": 1.5817,
      "step": 2510
    },
    {
      "epoch": 0.514760147601476,
      "grad_norm": 0.431106950670304,
      "learning_rate": 0.00010009960659905211,
      "loss": 1.5066,
      "step": 2511
    },
    {
      "epoch": 0.5149651496514965,
      "grad_norm": 0.4955192895411924,
      "learning_rate": 0.00010003320220456425,
      "loss": 1.6076,
      "step": 2512
    },
    {
      "epoch": 0.515170151701517,
      "grad_norm": 0.540572428181615,
      "learning_rate": 9.996679779543578e-05,
      "loss": 1.5916,
      "step": 2513
    },
    {
      "epoch": 0.5153751537515375,
      "grad_norm": 0.47234105520451763,
      "learning_rate": 9.990039340094793e-05,
      "loss": 1.558,
      "step": 2514
    },
    {
      "epoch": 0.515580155801558,
      "grad_norm": 0.47920848127457555,
      "learning_rate": 9.983398905038211e-05,
      "loss": 1.6319,
      "step": 2515
    },
    {
      "epoch": 0.5157851578515785,
      "grad_norm": 0.49213997433948753,
      "learning_rate": 9.976758477301951e-05,
      "loss": 1.6662,
      "step": 2516
    },
    {
      "epoch": 0.515990159901599,
      "grad_norm": 0.45054389918426824,
      "learning_rate": 9.97011805981415e-05,
      "loss": 1.5518,
      "step": 2517
    },
    {
      "epoch": 0.5161951619516195,
      "grad_norm": 0.5029764414406267,
      "learning_rate": 9.96347765550293e-05,
      "loss": 1.6486,
      "step": 2518
    },
    {
      "epoch": 0.51640016400164,
      "grad_norm": 0.5052305700272539,
      "learning_rate": 9.9568372672964e-05,
      "loss": 1.5791,
      "step": 2519
    },
    {
      "epoch": 0.5166051660516605,
      "grad_norm": 0.5420813213312199,
      "learning_rate": 9.950196898122677e-05,
      "loss": 1.5652,
      "step": 2520
    },
    {
      "epoch": 0.516810168101681,
      "grad_norm": 0.4344333235712946,
      "learning_rate": 9.943556550909853e-05,
      "loss": 1.5429,
      "step": 2521
    },
    {
      "epoch": 0.5170151701517015,
      "grad_norm": 0.4864341815769745,
      "learning_rate": 9.936916228586028e-05,
      "loss": 1.6041,
      "step": 2522
    },
    {
      "epoch": 0.517220172201722,
      "grad_norm": 0.5164224728911496,
      "learning_rate": 9.93027593407928e-05,
      "loss": 1.656,
      "step": 2523
    },
    {
      "epoch": 0.5174251742517425,
      "grad_norm": 0.48558373722109194,
      "learning_rate": 9.923635670317677e-05,
      "loss": 1.5759,
      "step": 2524
    },
    {
      "epoch": 0.517630176301763,
      "grad_norm": 0.48590969028469266,
      "learning_rate": 9.916995440229274e-05,
      "loss": 1.5563,
      "step": 2525
    },
    {
      "epoch": 0.5178351783517835,
      "grad_norm": 0.4967037758031681,
      "learning_rate": 9.910355246742104e-05,
      "loss": 1.6096,
      "step": 2526
    },
    {
      "epoch": 0.518040180401804,
      "grad_norm": 0.49675997747506484,
      "learning_rate": 9.903715092784201e-05,
      "loss": 1.5492,
      "step": 2527
    },
    {
      "epoch": 0.5182451824518245,
      "grad_norm": 0.45251477918910415,
      "learning_rate": 9.897074981283566e-05,
      "loss": 1.5897,
      "step": 2528
    },
    {
      "epoch": 0.518450184501845,
      "grad_norm": 0.5039776535215352,
      "learning_rate": 9.890434915168186e-05,
      "loss": 1.6139,
      "step": 2529
    },
    {
      "epoch": 0.5186551865518655,
      "grad_norm": 0.5034529955913656,
      "learning_rate": 9.883794897366032e-05,
      "loss": 1.5903,
      "step": 2530
    },
    {
      "epoch": 0.518860188601886,
      "grad_norm": 0.5014220844700772,
      "learning_rate": 9.877154930805044e-05,
      "loss": 1.6361,
      "step": 2531
    },
    {
      "epoch": 0.5190651906519065,
      "grad_norm": 0.4733108419139549,
      "learning_rate": 9.870515018413147e-05,
      "loss": 1.6271,
      "step": 2532
    },
    {
      "epoch": 0.519270192701927,
      "grad_norm": 0.47555218542064365,
      "learning_rate": 9.863875163118246e-05,
      "loss": 1.5793,
      "step": 2533
    },
    {
      "epoch": 0.5194751947519475,
      "grad_norm": 0.512479108337581,
      "learning_rate": 9.857235367848212e-05,
      "loss": 1.5722,
      "step": 2534
    },
    {
      "epoch": 0.519680196801968,
      "grad_norm": 0.4411341485803346,
      "learning_rate": 9.850595635530894e-05,
      "loss": 1.5596,
      "step": 2535
    },
    {
      "epoch": 0.5198851988519885,
      "grad_norm": 0.4894139127533064,
      "learning_rate": 9.84395596909411e-05,
      "loss": 1.5474,
      "step": 2536
    },
    {
      "epoch": 0.520090200902009,
      "grad_norm": 0.5169028198689807,
      "learning_rate": 9.837316371465647e-05,
      "loss": 1.6659,
      "step": 2537
    },
    {
      "epoch": 0.5202952029520295,
      "grad_norm": 0.4271890419775494,
      "learning_rate": 9.830676845573277e-05,
      "loss": 1.5635,
      "step": 2538
    },
    {
      "epoch": 0.5205002050020501,
      "grad_norm": 0.49725971368584343,
      "learning_rate": 9.824037394344723e-05,
      "loss": 1.5679,
      "step": 2539
    },
    {
      "epoch": 0.5207052070520706,
      "grad_norm": 0.49802112891904055,
      "learning_rate": 9.81739802070768e-05,
      "loss": 1.5815,
      "step": 2540
    },
    {
      "epoch": 0.5209102091020911,
      "grad_norm": 0.48247149935574224,
      "learning_rate": 9.810758727589813e-05,
      "loss": 1.6449,
      "step": 2541
    },
    {
      "epoch": 0.5211152111521116,
      "grad_norm": 0.42232875110504203,
      "learning_rate": 9.80411951791874e-05,
      "loss": 1.5436,
      "step": 2542
    },
    {
      "epoch": 0.5213202132021321,
      "grad_norm": 0.4608539619915591,
      "learning_rate": 9.797480394622067e-05,
      "loss": 1.5227,
      "step": 2543
    },
    {
      "epoch": 0.5215252152521526,
      "grad_norm": 0.46229272710756314,
      "learning_rate": 9.790841360627335e-05,
      "loss": 1.6085,
      "step": 2544
    },
    {
      "epoch": 0.5217302173021731,
      "grad_norm": 0.5008077375281823,
      "learning_rate": 9.784202418862055e-05,
      "loss": 1.5789,
      "step": 2545
    },
    {
      "epoch": 0.5219352193521936,
      "grad_norm": 0.4803787861926077,
      "learning_rate": 9.777563572253704e-05,
      "loss": 1.6053,
      "step": 2546
    },
    {
      "epoch": 0.522140221402214,
      "grad_norm": 0.46517282070826577,
      "learning_rate": 9.770924823729707e-05,
      "loss": 1.5822,
      "step": 2547
    },
    {
      "epoch": 0.5223452234522346,
      "grad_norm": 0.5128084079117348,
      "learning_rate": 9.76428617621745e-05,
      "loss": 1.614,
      "step": 2548
    },
    {
      "epoch": 0.522550225502255,
      "grad_norm": 0.4634951299647063,
      "learning_rate": 9.757647632644281e-05,
      "loss": 1.6387,
      "step": 2549
    },
    {
      "epoch": 0.5227552275522755,
      "grad_norm": 0.4738270248530166,
      "learning_rate": 9.751009195937492e-05,
      "loss": 1.571,
      "step": 2550
    },
    {
      "epoch": 0.522960229602296,
      "grad_norm": 0.500545655030884,
      "learning_rate": 9.744370869024333e-05,
      "loss": 1.6107,
      "step": 2551
    },
    {
      "epoch": 0.5231652316523165,
      "grad_norm": 0.5018393676972772,
      "learning_rate": 9.737732654832001e-05,
      "loss": 1.6351,
      "step": 2552
    },
    {
      "epoch": 0.523370233702337,
      "grad_norm": 0.48920127556465665,
      "learning_rate": 9.73109455628765e-05,
      "loss": 1.5841,
      "step": 2553
    },
    {
      "epoch": 0.5235752357523575,
      "grad_norm": 0.4893372441157476,
      "learning_rate": 9.724456576318381e-05,
      "loss": 1.6178,
      "step": 2554
    },
    {
      "epoch": 0.523780237802378,
      "grad_norm": 0.5170917820909494,
      "learning_rate": 9.717818717851239e-05,
      "loss": 1.619,
      "step": 2555
    },
    {
      "epoch": 0.5239852398523985,
      "grad_norm": 0.51058600115176,
      "learning_rate": 9.711180983813221e-05,
      "loss": 1.6348,
      "step": 2556
    },
    {
      "epoch": 0.524190241902419,
      "grad_norm": 0.49067760840444696,
      "learning_rate": 9.70454337713126e-05,
      "loss": 1.5937,
      "step": 2557
    },
    {
      "epoch": 0.5243952439524395,
      "grad_norm": 0.5159856838872472,
      "learning_rate": 9.697905900732242e-05,
      "loss": 1.6992,
      "step": 2558
    },
    {
      "epoch": 0.52460024600246,
      "grad_norm": 0.4923969095974549,
      "learning_rate": 9.691268557542997e-05,
      "loss": 1.6061,
      "step": 2559
    },
    {
      "epoch": 0.5248052480524805,
      "grad_norm": 0.45800071463730624,
      "learning_rate": 9.684631350490287e-05,
      "loss": 1.536,
      "step": 2560
    },
    {
      "epoch": 0.525010250102501,
      "grad_norm": 0.5360345920950236,
      "learning_rate": 9.67799428250082e-05,
      "loss": 1.6033,
      "step": 2561
    },
    {
      "epoch": 0.5252152521525215,
      "grad_norm": 0.5180160431365916,
      "learning_rate": 9.67135735650124e-05,
      "loss": 1.6462,
      "step": 2562
    },
    {
      "epoch": 0.525420254202542,
      "grad_norm": 0.4812536913627767,
      "learning_rate": 9.664720575418131e-05,
      "loss": 1.6137,
      "step": 2563
    },
    {
      "epoch": 0.5256252562525625,
      "grad_norm": 0.4809066363559525,
      "learning_rate": 9.658083942178009e-05,
      "loss": 1.5367,
      "step": 2564
    },
    {
      "epoch": 0.525830258302583,
      "grad_norm": 0.48273280601119806,
      "learning_rate": 9.651447459707333e-05,
      "loss": 1.5507,
      "step": 2565
    },
    {
      "epoch": 0.5260352603526035,
      "grad_norm": 0.45050581541491375,
      "learning_rate": 9.644811130932487e-05,
      "loss": 1.4679,
      "step": 2566
    },
    {
      "epoch": 0.526240262402624,
      "grad_norm": 0.500768133112126,
      "learning_rate": 9.63817495877979e-05,
      "loss": 1.5782,
      "step": 2567
    },
    {
      "epoch": 0.5264452644526445,
      "grad_norm": 0.5284710322872105,
      "learning_rate": 9.631538946175496e-05,
      "loss": 1.6695,
      "step": 2568
    },
    {
      "epoch": 0.526650266502665,
      "grad_norm": 0.5109318990102362,
      "learning_rate": 9.624903096045777e-05,
      "loss": 1.564,
      "step": 2569
    },
    {
      "epoch": 0.5268552685526855,
      "grad_norm": 0.48838063559915174,
      "learning_rate": 9.618267411316748e-05,
      "loss": 1.6029,
      "step": 2570
    },
    {
      "epoch": 0.527060270602706,
      "grad_norm": 0.5276404347631407,
      "learning_rate": 9.611631894914445e-05,
      "loss": 1.581,
      "step": 2571
    },
    {
      "epoch": 0.5272652726527265,
      "grad_norm": 0.5322367855685195,
      "learning_rate": 9.604996549764825e-05,
      "loss": 1.6502,
      "step": 2572
    },
    {
      "epoch": 0.527470274702747,
      "grad_norm": 0.4821398347525406,
      "learning_rate": 9.598361378793779e-05,
      "loss": 1.5425,
      "step": 2573
    },
    {
      "epoch": 0.5276752767527675,
      "grad_norm": 0.5156658004918746,
      "learning_rate": 9.59172638492711e-05,
      "loss": 1.6408,
      "step": 2574
    },
    {
      "epoch": 0.527880278802788,
      "grad_norm": 0.5282466669936305,
      "learning_rate": 9.585091571090548e-05,
      "loss": 1.6889,
      "step": 2575
    },
    {
      "epoch": 0.5280852808528085,
      "grad_norm": 0.4733323569592494,
      "learning_rate": 9.578456940209754e-05,
      "loss": 1.6095,
      "step": 2576
    },
    {
      "epoch": 0.528290282902829,
      "grad_norm": 0.4811658428753099,
      "learning_rate": 9.571822495210289e-05,
      "loss": 1.6596,
      "step": 2577
    },
    {
      "epoch": 0.5284952849528495,
      "grad_norm": 0.5167827792861637,
      "learning_rate": 9.565188239017647e-05,
      "loss": 1.6315,
      "step": 2578
    },
    {
      "epoch": 0.5287002870028701,
      "grad_norm": 0.5012575618177271,
      "learning_rate": 9.55855417455723e-05,
      "loss": 1.5874,
      "step": 2579
    },
    {
      "epoch": 0.5289052890528906,
      "grad_norm": 0.5413913504440435,
      "learning_rate": 9.551920304754359e-05,
      "loss": 1.6183,
      "step": 2580
    },
    {
      "epoch": 0.5291102911029111,
      "grad_norm": 0.46071158692215214,
      "learning_rate": 9.545286632534273e-05,
      "loss": 1.541,
      "step": 2581
    },
    {
      "epoch": 0.5293152931529316,
      "grad_norm": 0.4477291072986434,
      "learning_rate": 9.538653160822117e-05,
      "loss": 1.5624,
      "step": 2582
    },
    {
      "epoch": 0.5295202952029521,
      "grad_norm": 0.5111728290860247,
      "learning_rate": 9.532019892542954e-05,
      "loss": 1.5701,
      "step": 2583
    },
    {
      "epoch": 0.5297252972529726,
      "grad_norm": 0.44421690248757384,
      "learning_rate": 9.525386830621747e-05,
      "loss": 1.5369,
      "step": 2584
    },
    {
      "epoch": 0.5299302993029931,
      "grad_norm": 0.4383958783572774,
      "learning_rate": 9.518753977983378e-05,
      "loss": 1.5485,
      "step": 2585
    },
    {
      "epoch": 0.5301353013530136,
      "grad_norm": 0.472800731554737,
      "learning_rate": 9.51212133755264e-05,
      "loss": 1.5338,
      "step": 2586
    },
    {
      "epoch": 0.5303403034030341,
      "grad_norm": 0.5423207566409595,
      "learning_rate": 9.505488912254217e-05,
      "loss": 1.6267,
      "step": 2587
    },
    {
      "epoch": 0.5305453054530546,
      "grad_norm": 0.45905269694253686,
      "learning_rate": 9.498856705012713e-05,
      "loss": 1.5588,
      "step": 2588
    },
    {
      "epoch": 0.5307503075030751,
      "grad_norm": 0.4937161742446509,
      "learning_rate": 9.492224718752628e-05,
      "loss": 1.5922,
      "step": 2589
    },
    {
      "epoch": 0.5309553095530956,
      "grad_norm": 0.4766148408996389,
      "learning_rate": 9.485592956398363e-05,
      "loss": 1.6414,
      "step": 2590
    },
    {
      "epoch": 0.531160311603116,
      "grad_norm": 0.49227759639704805,
      "learning_rate": 9.478961420874227e-05,
      "loss": 1.5908,
      "step": 2591
    },
    {
      "epoch": 0.5313653136531366,
      "grad_norm": 0.4478427512634655,
      "learning_rate": 9.472330115104428e-05,
      "loss": 1.5812,
      "step": 2592
    },
    {
      "epoch": 0.531570315703157,
      "grad_norm": 0.4427938910898017,
      "learning_rate": 9.465699042013068e-05,
      "loss": 1.5835,
      "step": 2593
    },
    {
      "epoch": 0.5317753177531775,
      "grad_norm": 0.4865116701435559,
      "learning_rate": 9.45906820452415e-05,
      "loss": 1.6174,
      "step": 2594
    },
    {
      "epoch": 0.531980319803198,
      "grad_norm": 0.4578424745835675,
      "learning_rate": 9.452437605561572e-05,
      "loss": 1.5859,
      "step": 2595
    },
    {
      "epoch": 0.5321853218532185,
      "grad_norm": 0.46196737986894526,
      "learning_rate": 9.445807248049121e-05,
      "loss": 1.5602,
      "step": 2596
    },
    {
      "epoch": 0.532390323903239,
      "grad_norm": 0.4447995981027458,
      "learning_rate": 9.439177134910493e-05,
      "loss": 1.557,
      "step": 2597
    },
    {
      "epoch": 0.5325953259532595,
      "grad_norm": 0.44419772585666994,
      "learning_rate": 9.432547269069261e-05,
      "loss": 1.6468,
      "step": 2598
    },
    {
      "epoch": 0.53280032800328,
      "grad_norm": 0.507291563731087,
      "learning_rate": 9.425917653448897e-05,
      "loss": 1.5493,
      "step": 2599
    },
    {
      "epoch": 0.5330053300533005,
      "grad_norm": 0.452533551154994,
      "learning_rate": 9.419288290972757e-05,
      "loss": 1.5742,
      "step": 2600
    },
    {
      "epoch": 0.533210332103321,
      "grad_norm": 0.43701229369428923,
      "learning_rate": 9.412659184564088e-05,
      "loss": 1.6192,
      "step": 2601
    },
    {
      "epoch": 0.5334153341533415,
      "grad_norm": 0.48292773924668453,
      "learning_rate": 9.40603033714603e-05,
      "loss": 1.6176,
      "step": 2602
    },
    {
      "epoch": 0.533620336203362,
      "grad_norm": 0.467810898431718,
      "learning_rate": 9.3994017516416e-05,
      "loss": 1.5941,
      "step": 2603
    },
    {
      "epoch": 0.5338253382533825,
      "grad_norm": 0.4384255556257593,
      "learning_rate": 9.392773430973705e-05,
      "loss": 1.5881,
      "step": 2604
    },
    {
      "epoch": 0.534030340303403,
      "grad_norm": 0.45708780221103745,
      "learning_rate": 9.38614537806513e-05,
      "loss": 1.5534,
      "step": 2605
    },
    {
      "epoch": 0.5342353423534235,
      "grad_norm": 0.41758424556981916,
      "learning_rate": 9.379517595838548e-05,
      "loss": 1.5603,
      "step": 2606
    },
    {
      "epoch": 0.534440344403444,
      "grad_norm": 0.48014776181606317,
      "learning_rate": 9.372890087216505e-05,
      "loss": 1.5584,
      "step": 2607
    },
    {
      "epoch": 0.5346453464534645,
      "grad_norm": 0.435177373212569,
      "learning_rate": 9.366262855121436e-05,
      "loss": 1.5959,
      "step": 2608
    },
    {
      "epoch": 0.534850348503485,
      "grad_norm": 0.46847034893299017,
      "learning_rate": 9.35963590247565e-05,
      "loss": 1.5881,
      "step": 2609
    },
    {
      "epoch": 0.5350553505535055,
      "grad_norm": 0.4550717051165404,
      "learning_rate": 9.353009232201328e-05,
      "loss": 1.5379,
      "step": 2610
    },
    {
      "epoch": 0.535260352603526,
      "grad_norm": 0.4200978439413184,
      "learning_rate": 9.346382847220534e-05,
      "loss": 1.5902,
      "step": 2611
    },
    {
      "epoch": 0.5354653546535465,
      "grad_norm": 0.48924871328365954,
      "learning_rate": 9.339756750455199e-05,
      "loss": 1.5238,
      "step": 2612
    },
    {
      "epoch": 0.535670356703567,
      "grad_norm": 0.46890992504076023,
      "learning_rate": 9.333130944827132e-05,
      "loss": 1.5627,
      "step": 2613
    },
    {
      "epoch": 0.5358753587535875,
      "grad_norm": 0.5351852825224304,
      "learning_rate": 9.326505433258015e-05,
      "loss": 1.6476,
      "step": 2614
    },
    {
      "epoch": 0.536080360803608,
      "grad_norm": 0.5287386691839768,
      "learning_rate": 9.319880218669394e-05,
      "loss": 1.6258,
      "step": 2615
    },
    {
      "epoch": 0.5362853628536285,
      "grad_norm": 0.45695847329910527,
      "learning_rate": 9.31325530398269e-05,
      "loss": 1.711,
      "step": 2616
    },
    {
      "epoch": 0.536490364903649,
      "grad_norm": 0.4694246732168661,
      "learning_rate": 9.306630692119182e-05,
      "loss": 1.4855,
      "step": 2617
    },
    {
      "epoch": 0.5366953669536695,
      "grad_norm": 0.47905575407870277,
      "learning_rate": 9.300006386000033e-05,
      "loss": 1.5275,
      "step": 2618
    },
    {
      "epoch": 0.5369003690036901,
      "grad_norm": 0.44527898617751693,
      "learning_rate": 9.293382388546259e-05,
      "loss": 1.5784,
      "step": 2619
    },
    {
      "epoch": 0.5371053710537106,
      "grad_norm": 0.5034381066746775,
      "learning_rate": 9.286758702678736e-05,
      "loss": 1.5708,
      "step": 2620
    },
    {
      "epoch": 0.5373103731037311,
      "grad_norm": 0.5198418243115236,
      "learning_rate": 9.280135331318216e-05,
      "loss": 1.6699,
      "step": 2621
    },
    {
      "epoch": 0.5375153751537516,
      "grad_norm": 0.4610284443386145,
      "learning_rate": 9.273512277385297e-05,
      "loss": 1.6123,
      "step": 2622
    },
    {
      "epoch": 0.5377203772037721,
      "grad_norm": 0.47474226913694023,
      "learning_rate": 9.266889543800447e-05,
      "loss": 1.564,
      "step": 2623
    },
    {
      "epoch": 0.5379253792537926,
      "grad_norm": 0.4726079947220515,
      "learning_rate": 9.260267133483997e-05,
      "loss": 1.5539,
      "step": 2624
    },
    {
      "epoch": 0.5381303813038131,
      "grad_norm": 0.5144171408522766,
      "learning_rate": 9.253645049356119e-05,
      "loss": 1.64,
      "step": 2625
    },
    {
      "epoch": 0.5383353833538336,
      "grad_norm": 0.5433169961057769,
      "learning_rate": 9.247023294336862e-05,
      "loss": 1.6346,
      "step": 2626
    },
    {
      "epoch": 0.5385403854038541,
      "grad_norm": 0.5338001873146371,
      "learning_rate": 9.24040187134611e-05,
      "loss": 1.6284,
      "step": 2627
    },
    {
      "epoch": 0.5387453874538746,
      "grad_norm": 0.48403605698163754,
      "learning_rate": 9.233780783303611e-05,
      "loss": 1.5404,
      "step": 2628
    },
    {
      "epoch": 0.5389503895038951,
      "grad_norm": 0.47396167503238784,
      "learning_rate": 9.22716003312897e-05,
      "loss": 1.5824,
      "step": 2629
    },
    {
      "epoch": 0.5391553915539156,
      "grad_norm": 0.4890984894825298,
      "learning_rate": 9.22053962374163e-05,
      "loss": 1.5415,
      "step": 2630
    },
    {
      "epoch": 0.5393603936039361,
      "grad_norm": 0.5012797003146663,
      "learning_rate": 9.213919558060897e-05,
      "loss": 1.6447,
      "step": 2631
    },
    {
      "epoch": 0.5395653956539566,
      "grad_norm": 0.501901005795332,
      "learning_rate": 9.207299839005911e-05,
      "loss": 1.6074,
      "step": 2632
    },
    {
      "epoch": 0.5397703977039771,
      "grad_norm": 0.43227741500071715,
      "learning_rate": 9.200680469495672e-05,
      "loss": 1.569,
      "step": 2633
    },
    {
      "epoch": 0.5399753997539976,
      "grad_norm": 0.4836658396704612,
      "learning_rate": 9.194061452449024e-05,
      "loss": 1.5177,
      "step": 2634
    },
    {
      "epoch": 0.540180401804018,
      "grad_norm": 0.5198709557637989,
      "learning_rate": 9.187442790784648e-05,
      "loss": 1.6129,
      "step": 2635
    },
    {
      "epoch": 0.5403854038540385,
      "grad_norm": 0.48987872009318717,
      "learning_rate": 9.180824487421077e-05,
      "loss": 1.6051,
      "step": 2636
    },
    {
      "epoch": 0.540590405904059,
      "grad_norm": 0.49629354038737566,
      "learning_rate": 9.174206545276677e-05,
      "loss": 1.6167,
      "step": 2637
    },
    {
      "epoch": 0.5407954079540795,
      "grad_norm": 0.45153789191171245,
      "learning_rate": 9.167588967269666e-05,
      "loss": 1.593,
      "step": 2638
    },
    {
      "epoch": 0.5410004100041,
      "grad_norm": 0.47009419797315166,
      "learning_rate": 9.160971756318087e-05,
      "loss": 1.58,
      "step": 2639
    },
    {
      "epoch": 0.5412054120541205,
      "grad_norm": 0.4676357691060261,
      "learning_rate": 9.154354915339836e-05,
      "loss": 1.594,
      "step": 2640
    },
    {
      "epoch": 0.541410414104141,
      "grad_norm": 0.49308540622553537,
      "learning_rate": 9.147738447252639e-05,
      "loss": 1.5343,
      "step": 2641
    },
    {
      "epoch": 0.5416154161541615,
      "grad_norm": 0.4432118342796589,
      "learning_rate": 9.141122354974055e-05,
      "loss": 1.5569,
      "step": 2642
    },
    {
      "epoch": 0.541820418204182,
      "grad_norm": 0.4464565682562934,
      "learning_rate": 9.13450664142148e-05,
      "loss": 1.6124,
      "step": 2643
    },
    {
      "epoch": 0.5420254202542025,
      "grad_norm": 0.4722069223751032,
      "learning_rate": 9.127891309512141e-05,
      "loss": 1.5676,
      "step": 2644
    },
    {
      "epoch": 0.542230422304223,
      "grad_norm": 0.4208935145134012,
      "learning_rate": 9.121276362163106e-05,
      "loss": 1.5593,
      "step": 2645
    },
    {
      "epoch": 0.5424354243542435,
      "grad_norm": 0.49514854171077044,
      "learning_rate": 9.114661802291262e-05,
      "loss": 1.6249,
      "step": 2646
    },
    {
      "epoch": 0.542640426404264,
      "grad_norm": 0.4806808338103088,
      "learning_rate": 9.108047632813328e-05,
      "loss": 1.5678,
      "step": 2647
    },
    {
      "epoch": 0.5428454284542845,
      "grad_norm": 0.4687343613699098,
      "learning_rate": 9.101433856645854e-05,
      "loss": 1.5483,
      "step": 2648
    },
    {
      "epoch": 0.543050430504305,
      "grad_norm": 0.5117833042240184,
      "learning_rate": 9.09482047670521e-05,
      "loss": 1.6888,
      "step": 2649
    },
    {
      "epoch": 0.5432554325543255,
      "grad_norm": 0.47515329692938435,
      "learning_rate": 9.088207495907603e-05,
      "loss": 1.642,
      "step": 2650
    },
    {
      "epoch": 0.543460434604346,
      "grad_norm": 0.5008671300898089,
      "learning_rate": 9.081594917169055e-05,
      "loss": 1.5892,
      "step": 2651
    },
    {
      "epoch": 0.5436654366543665,
      "grad_norm": 0.4941233205944891,
      "learning_rate": 9.074982743405413e-05,
      "loss": 1.6168,
      "step": 2652
    },
    {
      "epoch": 0.543870438704387,
      "grad_norm": 0.47430444833549046,
      "learning_rate": 9.068370977532341e-05,
      "loss": 1.6107,
      "step": 2653
    },
    {
      "epoch": 0.5440754407544075,
      "grad_norm": 0.4441716597389282,
      "learning_rate": 9.061759622465332e-05,
      "loss": 1.5605,
      "step": 2654
    },
    {
      "epoch": 0.544280442804428,
      "grad_norm": 0.482702388115561,
      "learning_rate": 9.055148681119688e-05,
      "loss": 1.6001,
      "step": 2655
    },
    {
      "epoch": 0.5444854448544485,
      "grad_norm": 0.49679740261205135,
      "learning_rate": 9.048538156410538e-05,
      "loss": 1.5632,
      "step": 2656
    },
    {
      "epoch": 0.544690446904469,
      "grad_norm": 0.4989006409344413,
      "learning_rate": 9.041928051252826e-05,
      "loss": 1.6114,
      "step": 2657
    },
    {
      "epoch": 0.5448954489544895,
      "grad_norm": 0.45274305074276694,
      "learning_rate": 9.0353183685613e-05,
      "loss": 1.5803,
      "step": 2658
    },
    {
      "epoch": 0.5451004510045101,
      "grad_norm": 0.4769916990922205,
      "learning_rate": 9.028709111250537e-05,
      "loss": 1.6207,
      "step": 2659
    },
    {
      "epoch": 0.5453054530545306,
      "grad_norm": 0.4555478084149483,
      "learning_rate": 9.022100282234913e-05,
      "loss": 1.5347,
      "step": 2660
    },
    {
      "epoch": 0.5455104551045511,
      "grad_norm": 0.46890901886584824,
      "learning_rate": 9.015491884428623e-05,
      "loss": 1.6012,
      "step": 2661
    },
    {
      "epoch": 0.5457154571545716,
      "grad_norm": 0.5110587419253257,
      "learning_rate": 9.008883920745675e-05,
      "loss": 1.6358,
      "step": 2662
    },
    {
      "epoch": 0.5459204592045921,
      "grad_norm": 0.4971322165718797,
      "learning_rate": 9.002276394099874e-05,
      "loss": 1.6863,
      "step": 2663
    },
    {
      "epoch": 0.5461254612546126,
      "grad_norm": 0.4301596112623638,
      "learning_rate": 8.995669307404845e-05,
      "loss": 1.5963,
      "step": 2664
    },
    {
      "epoch": 0.5463304633046331,
      "grad_norm": 0.46577192480695356,
      "learning_rate": 8.989062663574006e-05,
      "loss": 1.5492,
      "step": 2665
    },
    {
      "epoch": 0.5465354653546536,
      "grad_norm": 0.49235849065317666,
      "learning_rate": 8.98245646552059e-05,
      "loss": 1.6296,
      "step": 2666
    },
    {
      "epoch": 0.5467404674046741,
      "grad_norm": 0.4929929729188508,
      "learning_rate": 8.975850716157634e-05,
      "loss": 1.6624,
      "step": 2667
    },
    {
      "epoch": 0.5469454694546946,
      "grad_norm": 0.5125100632119998,
      "learning_rate": 8.969245418397969e-05,
      "loss": 1.6665,
      "step": 2668
    },
    {
      "epoch": 0.5471504715047151,
      "grad_norm": 0.4588157206541979,
      "learning_rate": 8.962640575154232e-05,
      "loss": 1.5865,
      "step": 2669
    },
    {
      "epoch": 0.5473554735547356,
      "grad_norm": 0.4657542785898101,
      "learning_rate": 8.956036189338858e-05,
      "loss": 1.571,
      "step": 2670
    },
    {
      "epoch": 0.5475604756047561,
      "grad_norm": 0.525947865233076,
      "learning_rate": 8.949432263864079e-05,
      "loss": 1.5987,
      "step": 2671
    },
    {
      "epoch": 0.5477654776547766,
      "grad_norm": 0.4968635537358625,
      "learning_rate": 8.942828801641933e-05,
      "loss": 1.5802,
      "step": 2672
    },
    {
      "epoch": 0.5479704797047971,
      "grad_norm": 0.4541055800603526,
      "learning_rate": 8.936225805584242e-05,
      "loss": 1.5398,
      "step": 2673
    },
    {
      "epoch": 0.5481754817548176,
      "grad_norm": 0.44786853103890034,
      "learning_rate": 8.929623278602627e-05,
      "loss": 1.5615,
      "step": 2674
    },
    {
      "epoch": 0.5483804838048381,
      "grad_norm": 0.515978158095013,
      "learning_rate": 8.923021223608504e-05,
      "loss": 1.623,
      "step": 2675
    },
    {
      "epoch": 0.5485854858548586,
      "grad_norm": 0.4758521821455385,
      "learning_rate": 8.916419643513074e-05,
      "loss": 1.5767,
      "step": 2676
    },
    {
      "epoch": 0.548790487904879,
      "grad_norm": 0.42388582225051624,
      "learning_rate": 8.909818541227343e-05,
      "loss": 1.54,
      "step": 2677
    },
    {
      "epoch": 0.5489954899548996,
      "grad_norm": 0.46264576269985275,
      "learning_rate": 8.90321791966209e-05,
      "loss": 1.5786,
      "step": 2678
    },
    {
      "epoch": 0.54920049200492,
      "grad_norm": 0.47999391205809916,
      "learning_rate": 8.896617781727894e-05,
      "loss": 1.5743,
      "step": 2679
    },
    {
      "epoch": 0.5494054940549405,
      "grad_norm": 0.46458200088613294,
      "learning_rate": 8.890018130335111e-05,
      "loss": 1.5876,
      "step": 2680
    },
    {
      "epoch": 0.549610496104961,
      "grad_norm": 0.48492234649104576,
      "learning_rate": 8.883418968393892e-05,
      "loss": 1.591,
      "step": 2681
    },
    {
      "epoch": 0.5498154981549815,
      "grad_norm": 0.4871763740885184,
      "learning_rate": 8.87682029881416e-05,
      "loss": 1.6211,
      "step": 2682
    },
    {
      "epoch": 0.550020500205002,
      "grad_norm": 0.4922734701906859,
      "learning_rate": 8.870222124505635e-05,
      "loss": 1.6959,
      "step": 2683
    },
    {
      "epoch": 0.5502255022550225,
      "grad_norm": 0.4631013858494112,
      "learning_rate": 8.863624448377814e-05,
      "loss": 1.6305,
      "step": 2684
    },
    {
      "epoch": 0.550430504305043,
      "grad_norm": 0.4516747226217731,
      "learning_rate": 8.857027273339967e-05,
      "loss": 1.5598,
      "step": 2685
    },
    {
      "epoch": 0.5506355063550635,
      "grad_norm": 0.49884978796348173,
      "learning_rate": 8.85043060230115e-05,
      "loss": 1.5981,
      "step": 2686
    },
    {
      "epoch": 0.550840508405084,
      "grad_norm": 0.4440033032555108,
      "learning_rate": 8.843834438170193e-05,
      "loss": 1.6103,
      "step": 2687
    },
    {
      "epoch": 0.5510455104551045,
      "grad_norm": 0.46783955392220905,
      "learning_rate": 8.837238783855709e-05,
      "loss": 1.6407,
      "step": 2688
    },
    {
      "epoch": 0.551250512505125,
      "grad_norm": 0.4601370042279884,
      "learning_rate": 8.830643642266082e-05,
      "loss": 1.56,
      "step": 2689
    },
    {
      "epoch": 0.5514555145551455,
      "grad_norm": 0.5260210732647638,
      "learning_rate": 8.824049016309465e-05,
      "loss": 1.642,
      "step": 2690
    },
    {
      "epoch": 0.551660516605166,
      "grad_norm": 0.4295839060842287,
      "learning_rate": 8.817454908893795e-05,
      "loss": 1.6299,
      "step": 2691
    },
    {
      "epoch": 0.5518655186551865,
      "grad_norm": 0.4605964301695693,
      "learning_rate": 8.810861322926764e-05,
      "loss": 1.5491,
      "step": 2692
    },
    {
      "epoch": 0.552070520705207,
      "grad_norm": 0.5081232754819388,
      "learning_rate": 8.80426826131585e-05,
      "loss": 1.5578,
      "step": 2693
    },
    {
      "epoch": 0.5522755227552275,
      "grad_norm": 0.46776415319094106,
      "learning_rate": 8.797675726968297e-05,
      "loss": 1.5834,
      "step": 2694
    },
    {
      "epoch": 0.552480524805248,
      "grad_norm": 0.4762230514657226,
      "learning_rate": 8.791083722791108e-05,
      "loss": 1.5943,
      "step": 2695
    },
    {
      "epoch": 0.5526855268552685,
      "grad_norm": 0.4621670020844566,
      "learning_rate": 8.784492251691057e-05,
      "loss": 1.5191,
      "step": 2696
    },
    {
      "epoch": 0.552890528905289,
      "grad_norm": 0.43661422338656364,
      "learning_rate": 8.777901316574685e-05,
      "loss": 1.579,
      "step": 2697
    },
    {
      "epoch": 0.5530955309553095,
      "grad_norm": 0.47112068475656355,
      "learning_rate": 8.771310920348292e-05,
      "loss": 1.6154,
      "step": 2698
    },
    {
      "epoch": 0.5533005330053301,
      "grad_norm": 0.4726113720612676,
      "learning_rate": 8.764721065917947e-05,
      "loss": 1.5752,
      "step": 2699
    },
    {
      "epoch": 0.5535055350553506,
      "grad_norm": 0.4655246624426711,
      "learning_rate": 8.758131756189476e-05,
      "loss": 1.5813,
      "step": 2700
    },
    {
      "epoch": 0.5537105371053711,
      "grad_norm": 0.5219761208482683,
      "learning_rate": 8.751542994068464e-05,
      "loss": 1.5848,
      "step": 2701
    },
    {
      "epoch": 0.5539155391553916,
      "grad_norm": 0.4323111530410708,
      "learning_rate": 8.744954782460254e-05,
      "loss": 1.5232,
      "step": 2702
    },
    {
      "epoch": 0.5541205412054121,
      "grad_norm": 0.4983349944847731,
      "learning_rate": 8.73836712426995e-05,
      "loss": 1.5749,
      "step": 2703
    },
    {
      "epoch": 0.5543255432554326,
      "grad_norm": 0.4645968573838534,
      "learning_rate": 8.731780022402409e-05,
      "loss": 1.6401,
      "step": 2704
    },
    {
      "epoch": 0.5545305453054531,
      "grad_norm": 0.4544524966052656,
      "learning_rate": 8.725193479762247e-05,
      "loss": 1.5581,
      "step": 2705
    },
    {
      "epoch": 0.5547355473554736,
      "grad_norm": 0.47219152650339763,
      "learning_rate": 8.718607499253825e-05,
      "loss": 1.5865,
      "step": 2706
    },
    {
      "epoch": 0.5549405494054941,
      "grad_norm": 0.4931596434888753,
      "learning_rate": 8.712022083781264e-05,
      "loss": 1.6059,
      "step": 2707
    },
    {
      "epoch": 0.5551455514555146,
      "grad_norm": 0.46198286930221866,
      "learning_rate": 8.70543723624843e-05,
      "loss": 1.6138,
      "step": 2708
    },
    {
      "epoch": 0.5553505535055351,
      "grad_norm": 0.4008861442313229,
      "learning_rate": 8.698852959558944e-05,
      "loss": 1.5455,
      "step": 2709
    },
    {
      "epoch": 0.5555555555555556,
      "grad_norm": 0.48589720803198055,
      "learning_rate": 8.692269256616175e-05,
      "loss": 1.6954,
      "step": 2710
    },
    {
      "epoch": 0.5557605576055761,
      "grad_norm": 0.4590082932518373,
      "learning_rate": 8.685686130323232e-05,
      "loss": 1.5966,
      "step": 2711
    },
    {
      "epoch": 0.5559655596555966,
      "grad_norm": 0.47823333668560974,
      "learning_rate": 8.679103583582979e-05,
      "loss": 1.5673,
      "step": 2712
    },
    {
      "epoch": 0.5561705617056171,
      "grad_norm": 0.49813589210415066,
      "learning_rate": 8.672521619298016e-05,
      "loss": 1.6617,
      "step": 2713
    },
    {
      "epoch": 0.5563755637556376,
      "grad_norm": 0.44496314213167276,
      "learning_rate": 8.665940240370688e-05,
      "loss": 1.5597,
      "step": 2714
    },
    {
      "epoch": 0.5565805658056581,
      "grad_norm": 0.45661253320674594,
      "learning_rate": 8.659359449703095e-05,
      "loss": 1.6677,
      "step": 2715
    },
    {
      "epoch": 0.5567855678556786,
      "grad_norm": 0.408860296952707,
      "learning_rate": 8.652779250197056e-05,
      "loss": 1.5866,
      "step": 2716
    },
    {
      "epoch": 0.5569905699056991,
      "grad_norm": 0.4379583156761515,
      "learning_rate": 8.646199644754146e-05,
      "loss": 1.5629,
      "step": 2717
    },
    {
      "epoch": 0.5571955719557196,
      "grad_norm": 0.4435669081957282,
      "learning_rate": 8.639620636275667e-05,
      "loss": 1.5433,
      "step": 2718
    },
    {
      "epoch": 0.5574005740057401,
      "grad_norm": 0.5128765961711079,
      "learning_rate": 8.633042227662662e-05,
      "loss": 1.6223,
      "step": 2719
    },
    {
      "epoch": 0.5576055760557606,
      "grad_norm": 0.4859469667685511,
      "learning_rate": 8.626464421815919e-05,
      "loss": 1.6443,
      "step": 2720
    },
    {
      "epoch": 0.557810578105781,
      "grad_norm": 0.454298878880921,
      "learning_rate": 8.619887221635944e-05,
      "loss": 1.5565,
      "step": 2721
    },
    {
      "epoch": 0.5580155801558015,
      "grad_norm": 0.5197011290371658,
      "learning_rate": 8.613310630022986e-05,
      "loss": 1.5898,
      "step": 2722
    },
    {
      "epoch": 0.558220582205822,
      "grad_norm": 0.5013274459520718,
      "learning_rate": 8.606734649877022e-05,
      "loss": 1.6439,
      "step": 2723
    },
    {
      "epoch": 0.5584255842558425,
      "grad_norm": 0.47436418576967115,
      "learning_rate": 8.600159284097757e-05,
      "loss": 1.5735,
      "step": 2724
    },
    {
      "epoch": 0.558630586305863,
      "grad_norm": 0.47253807238704093,
      "learning_rate": 8.593584535584637e-05,
      "loss": 1.5755,
      "step": 2725
    },
    {
      "epoch": 0.5588355883558835,
      "grad_norm": 0.4871736311217662,
      "learning_rate": 8.58701040723682e-05,
      "loss": 1.5583,
      "step": 2726
    },
    {
      "epoch": 0.559040590405904,
      "grad_norm": 0.48761585720030787,
      "learning_rate": 8.580436901953202e-05,
      "loss": 1.6417,
      "step": 2727
    },
    {
      "epoch": 0.5592455924559245,
      "grad_norm": 0.4513739946761981,
      "learning_rate": 8.573864022632398e-05,
      "loss": 1.5474,
      "step": 2728
    },
    {
      "epoch": 0.559450594505945,
      "grad_norm": 0.4399086954887682,
      "learning_rate": 8.567291772172751e-05,
      "loss": 1.5521,
      "step": 2729
    },
    {
      "epoch": 0.5596555965559655,
      "grad_norm": 0.47760950875664,
      "learning_rate": 8.560720153472319e-05,
      "loss": 1.6213,
      "step": 2730
    },
    {
      "epoch": 0.559860598605986,
      "grad_norm": 0.47082550165766734,
      "learning_rate": 8.554149169428894e-05,
      "loss": 1.6389,
      "step": 2731
    },
    {
      "epoch": 0.5600656006560065,
      "grad_norm": 0.4552501017617182,
      "learning_rate": 8.547578822939979e-05,
      "loss": 1.6262,
      "step": 2732
    },
    {
      "epoch": 0.560270602706027,
      "grad_norm": 0.41940772156927014,
      "learning_rate": 8.541009116902797e-05,
      "loss": 1.5014,
      "step": 2733
    },
    {
      "epoch": 0.5604756047560475,
      "grad_norm": 0.5082813672622422,
      "learning_rate": 8.534440054214294e-05,
      "loss": 1.6688,
      "step": 2734
    },
    {
      "epoch": 0.560680606806068,
      "grad_norm": 0.4204459505094942,
      "learning_rate": 8.52787163777112e-05,
      "loss": 1.5351,
      "step": 2735
    },
    {
      "epoch": 0.5608856088560885,
      "grad_norm": 0.4802801163655674,
      "learning_rate": 8.521303870469655e-05,
      "loss": 1.5699,
      "step": 2736
    },
    {
      "epoch": 0.561090610906109,
      "grad_norm": 0.44683758676501706,
      "learning_rate": 8.514736755205986e-05,
      "loss": 1.5248,
      "step": 2737
    },
    {
      "epoch": 0.5612956129561295,
      "grad_norm": 0.47643246461329575,
      "learning_rate": 8.508170294875909e-05,
      "loss": 1.6018,
      "step": 2738
    },
    {
      "epoch": 0.5615006150061501,
      "grad_norm": 0.5081701375502914,
      "learning_rate": 8.501604492374939e-05,
      "loss": 1.5642,
      "step": 2739
    },
    {
      "epoch": 0.5617056170561706,
      "grad_norm": 0.4349787653384339,
      "learning_rate": 8.495039350598288e-05,
      "loss": 1.5287,
      "step": 2740
    },
    {
      "epoch": 0.5619106191061911,
      "grad_norm": 0.48786456697125097,
      "learning_rate": 8.488474872440892e-05,
      "loss": 1.5895,
      "step": 2741
    },
    {
      "epoch": 0.5621156211562116,
      "grad_norm": 0.4311788620603534,
      "learning_rate": 8.48191106079739e-05,
      "loss": 1.5655,
      "step": 2742
    },
    {
      "epoch": 0.5623206232062321,
      "grad_norm": 0.4794441517134273,
      "learning_rate": 8.475347918562118e-05,
      "loss": 1.6106,
      "step": 2743
    },
    {
      "epoch": 0.5625256252562526,
      "grad_norm": 0.45767858156296126,
      "learning_rate": 8.468785448629128e-05,
      "loss": 1.5493,
      "step": 2744
    },
    {
      "epoch": 0.5627306273062731,
      "grad_norm": 0.4974461426461692,
      "learning_rate": 8.46222365389217e-05,
      "loss": 1.6653,
      "step": 2745
    },
    {
      "epoch": 0.5629356293562936,
      "grad_norm": 0.4744386568612818,
      "learning_rate": 8.45566253724469e-05,
      "loss": 1.6077,
      "step": 2746
    },
    {
      "epoch": 0.5631406314063141,
      "grad_norm": 0.4515951826847733,
      "learning_rate": 8.449102101579856e-05,
      "loss": 1.6145,
      "step": 2747
    },
    {
      "epoch": 0.5633456334563346,
      "grad_norm": 0.4662380834371251,
      "learning_rate": 8.442542349790514e-05,
      "loss": 1.5551,
      "step": 2748
    },
    {
      "epoch": 0.5635506355063551,
      "grad_norm": 0.48473926644768106,
      "learning_rate": 8.435983284769216e-05,
      "loss": 1.5523,
      "step": 2749
    },
    {
      "epoch": 0.5637556375563756,
      "grad_norm": 0.44151878917462695,
      "learning_rate": 8.429424909408214e-05,
      "loss": 1.5974,
      "step": 2750
    },
    {
      "epoch": 0.5639606396063961,
      "grad_norm": 0.43500873928167183,
      "learning_rate": 8.42286722659945e-05,
      "loss": 1.5042,
      "step": 2751
    },
    {
      "epoch": 0.5641656416564166,
      "grad_norm": 0.46636021809359474,
      "learning_rate": 8.416310239234566e-05,
      "loss": 1.5779,
      "step": 2752
    },
    {
      "epoch": 0.5643706437064371,
      "grad_norm": 0.47471533160762547,
      "learning_rate": 8.409753950204901e-05,
      "loss": 1.5122,
      "step": 2753
    },
    {
      "epoch": 0.5645756457564576,
      "grad_norm": 0.45922278719459037,
      "learning_rate": 8.403198362401473e-05,
      "loss": 1.5727,
      "step": 2754
    },
    {
      "epoch": 0.5647806478064781,
      "grad_norm": 0.41621380912394257,
      "learning_rate": 8.396643478715001e-05,
      "loss": 1.5294,
      "step": 2755
    },
    {
      "epoch": 0.5649856498564986,
      "grad_norm": 0.48339088637946764,
      "learning_rate": 8.390089302035888e-05,
      "loss": 1.5857,
      "step": 2756
    },
    {
      "epoch": 0.5651906519065191,
      "grad_norm": 0.45192567408837614,
      "learning_rate": 8.38353583525423e-05,
      "loss": 1.542,
      "step": 2757
    },
    {
      "epoch": 0.5653956539565396,
      "grad_norm": 0.4719122920371752,
      "learning_rate": 8.376983081259814e-05,
      "loss": 1.6055,
      "step": 2758
    },
    {
      "epoch": 0.5656006560065601,
      "grad_norm": 0.4495282078593574,
      "learning_rate": 8.370431042942099e-05,
      "loss": 1.6092,
      "step": 2759
    },
    {
      "epoch": 0.5658056580565806,
      "grad_norm": 0.4911656806600491,
      "learning_rate": 8.36387972319024e-05,
      "loss": 1.5596,
      "step": 2760
    },
    {
      "epoch": 0.5660106601066011,
      "grad_norm": 0.44589803030364894,
      "learning_rate": 8.357329124893067e-05,
      "loss": 1.5874,
      "step": 2761
    },
    {
      "epoch": 0.5662156621566216,
      "grad_norm": 0.44043619793201755,
      "learning_rate": 8.350779250939098e-05,
      "loss": 1.5944,
      "step": 2762
    },
    {
      "epoch": 0.566420664206642,
      "grad_norm": 0.490881176486932,
      "learning_rate": 8.344230104216535e-05,
      "loss": 1.5374,
      "step": 2763
    },
    {
      "epoch": 0.5666256662566626,
      "grad_norm": 0.4473742243308702,
      "learning_rate": 8.337681687613247e-05,
      "loss": 1.5579,
      "step": 2764
    },
    {
      "epoch": 0.566830668306683,
      "grad_norm": 0.4336860241181803,
      "learning_rate": 8.331134004016794e-05,
      "loss": 1.5435,
      "step": 2765
    },
    {
      "epoch": 0.5670356703567035,
      "grad_norm": 0.5058022455119351,
      "learning_rate": 8.324587056314401e-05,
      "loss": 1.6264,
      "step": 2766
    },
    {
      "epoch": 0.567240672406724,
      "grad_norm": 0.5185403358228308,
      "learning_rate": 8.318040847392976e-05,
      "loss": 1.638,
      "step": 2767
    },
    {
      "epoch": 0.5674456744567445,
      "grad_norm": 0.44435853900434275,
      "learning_rate": 8.311495380139104e-05,
      "loss": 1.6164,
      "step": 2768
    },
    {
      "epoch": 0.567650676506765,
      "grad_norm": 0.48967934204395624,
      "learning_rate": 8.304950657439033e-05,
      "loss": 1.6625,
      "step": 2769
    },
    {
      "epoch": 0.5678556785567855,
      "grad_norm": 0.5194040898941595,
      "learning_rate": 8.298406682178694e-05,
      "loss": 1.5344,
      "step": 2770
    },
    {
      "epoch": 0.568060680606806,
      "grad_norm": 0.45524359340987325,
      "learning_rate": 8.291863457243679e-05,
      "loss": 1.5712,
      "step": 2771
    },
    {
      "epoch": 0.5682656826568265,
      "grad_norm": 0.45186248322350536,
      "learning_rate": 8.285320985519254e-05,
      "loss": 1.6038,
      "step": 2772
    },
    {
      "epoch": 0.568470684706847,
      "grad_norm": 0.49936259381381526,
      "learning_rate": 8.278779269890347e-05,
      "loss": 1.5869,
      "step": 2773
    },
    {
      "epoch": 0.5686756867568675,
      "grad_norm": 0.48542247967144936,
      "learning_rate": 8.272238313241563e-05,
      "loss": 1.5745,
      "step": 2774
    },
    {
      "epoch": 0.568880688806888,
      "grad_norm": 0.46993731641929926,
      "learning_rate": 8.265698118457166e-05,
      "loss": 1.5529,
      "step": 2775
    },
    {
      "epoch": 0.5690856908569085,
      "grad_norm": 0.5098162873204044,
      "learning_rate": 8.259158688421085e-05,
      "loss": 1.5573,
      "step": 2776
    },
    {
      "epoch": 0.569290692906929,
      "grad_norm": 0.4711388927942418,
      "learning_rate": 8.25262002601691e-05,
      "loss": 1.5464,
      "step": 2777
    },
    {
      "epoch": 0.5694956949569495,
      "grad_norm": 0.46566844609010144,
      "learning_rate": 8.24608213412789e-05,
      "loss": 1.5588,
      "step": 2778
    },
    {
      "epoch": 0.5697006970069701,
      "grad_norm": 0.4433806059014826,
      "learning_rate": 8.239545015636944e-05,
      "loss": 1.5686,
      "step": 2779
    },
    {
      "epoch": 0.5699056990569906,
      "grad_norm": 0.42027399491719564,
      "learning_rate": 8.233008673426646e-05,
      "loss": 1.5258,
      "step": 2780
    },
    {
      "epoch": 0.5701107011070111,
      "grad_norm": 0.4664372984386003,
      "learning_rate": 8.226473110379221e-05,
      "loss": 1.6087,
      "step": 2781
    },
    {
      "epoch": 0.5703157031570316,
      "grad_norm": 0.4909885728039717,
      "learning_rate": 8.219938329376556e-05,
      "loss": 1.5991,
      "step": 2782
    },
    {
      "epoch": 0.5705207052070521,
      "grad_norm": 0.43739714020024856,
      "learning_rate": 8.213404333300191e-05,
      "loss": 1.4763,
      "step": 2783
    },
    {
      "epoch": 0.5707257072570726,
      "grad_norm": 0.44265461264361633,
      "learning_rate": 8.206871125031324e-05,
      "loss": 1.4952,
      "step": 2784
    },
    {
      "epoch": 0.5709307093070931,
      "grad_norm": 0.4751427399786054,
      "learning_rate": 8.200338707450806e-05,
      "loss": 1.531,
      "step": 2785
    },
    {
      "epoch": 0.5711357113571136,
      "grad_norm": 0.47996445696949575,
      "learning_rate": 8.19380708343913e-05,
      "loss": 1.5095,
      "step": 2786
    },
    {
      "epoch": 0.5713407134071341,
      "grad_norm": 0.4642615540618763,
      "learning_rate": 8.187276255876451e-05,
      "loss": 1.5285,
      "step": 2787
    },
    {
      "epoch": 0.5715457154571546,
      "grad_norm": 0.46879634584675467,
      "learning_rate": 8.180746227642562e-05,
      "loss": 1.5427,
      "step": 2788
    },
    {
      "epoch": 0.5717507175071751,
      "grad_norm": 0.4327553497951608,
      "learning_rate": 8.174217001616908e-05,
      "loss": 1.5685,
      "step": 2789
    },
    {
      "epoch": 0.5719557195571956,
      "grad_norm": 0.48044823109466844,
      "learning_rate": 8.167688580678587e-05,
      "loss": 1.5816,
      "step": 2790
    },
    {
      "epoch": 0.5721607216072161,
      "grad_norm": 0.43271691731053397,
      "learning_rate": 8.161160967706333e-05,
      "loss": 1.5133,
      "step": 2791
    },
    {
      "epoch": 0.5723657236572366,
      "grad_norm": 0.43865516134348287,
      "learning_rate": 8.154634165578527e-05,
      "loss": 1.5587,
      "step": 2792
    },
    {
      "epoch": 0.5725707257072571,
      "grad_norm": 0.5041463736308865,
      "learning_rate": 8.148108177173191e-05,
      "loss": 1.6045,
      "step": 2793
    },
    {
      "epoch": 0.5727757277572776,
      "grad_norm": 0.5002933100013144,
      "learning_rate": 8.141583005367988e-05,
      "loss": 1.6006,
      "step": 2794
    },
    {
      "epoch": 0.5729807298072981,
      "grad_norm": 0.48374712959962823,
      "learning_rate": 8.135058653040226e-05,
      "loss": 1.581,
      "step": 2795
    },
    {
      "epoch": 0.5731857318573186,
      "grad_norm": 0.4894348464778049,
      "learning_rate": 8.128535123066846e-05,
      "loss": 1.5838,
      "step": 2796
    },
    {
      "epoch": 0.5733907339073391,
      "grad_norm": 0.4848920999677346,
      "learning_rate": 8.122012418324429e-05,
      "loss": 1.5297,
      "step": 2797
    },
    {
      "epoch": 0.5735957359573596,
      "grad_norm": 0.5055371614414685,
      "learning_rate": 8.115490541689192e-05,
      "loss": 1.6007,
      "step": 2798
    },
    {
      "epoch": 0.5738007380073801,
      "grad_norm": 0.4945907352973538,
      "learning_rate": 8.108969496036979e-05,
      "loss": 1.5596,
      "step": 2799
    },
    {
      "epoch": 0.5740057400574006,
      "grad_norm": 0.4670049217763332,
      "learning_rate": 8.102449284243287e-05,
      "loss": 1.5708,
      "step": 2800
    },
    {
      "epoch": 0.5742107421074211,
      "grad_norm": 0.4608502951675274,
      "learning_rate": 8.095929909183226e-05,
      "loss": 1.5495,
      "step": 2801
    },
    {
      "epoch": 0.5744157441574416,
      "grad_norm": 0.5198947623278084,
      "learning_rate": 8.089411373731541e-05,
      "loss": 1.6064,
      "step": 2802
    },
    {
      "epoch": 0.5746207462074621,
      "grad_norm": 0.48376635257919165,
      "learning_rate": 8.082893680762619e-05,
      "loss": 1.5722,
      "step": 2803
    },
    {
      "epoch": 0.5748257482574826,
      "grad_norm": 0.49674901594952897,
      "learning_rate": 8.076376833150458e-05,
      "loss": 1.5457,
      "step": 2804
    },
    {
      "epoch": 0.5750307503075031,
      "grad_norm": 0.4955390689538443,
      "learning_rate": 8.069860833768693e-05,
      "loss": 1.5985,
      "step": 2805
    },
    {
      "epoch": 0.5752357523575236,
      "grad_norm": 0.4699349188279682,
      "learning_rate": 8.063345685490589e-05,
      "loss": 1.5834,
      "step": 2806
    },
    {
      "epoch": 0.575440754407544,
      "grad_norm": 0.4704767130444989,
      "learning_rate": 8.056831391189023e-05,
      "loss": 1.5617,
      "step": 2807
    },
    {
      "epoch": 0.5756457564575646,
      "grad_norm": 0.48457633227697255,
      "learning_rate": 8.050317953736512e-05,
      "loss": 1.5693,
      "step": 2808
    },
    {
      "epoch": 0.575850758507585,
      "grad_norm": 0.4292273459988384,
      "learning_rate": 8.043805376005177e-05,
      "loss": 1.59,
      "step": 2809
    },
    {
      "epoch": 0.5760557605576055,
      "grad_norm": 0.4577162302998883,
      "learning_rate": 8.03729366086677e-05,
      "loss": 1.5494,
      "step": 2810
    },
    {
      "epoch": 0.576260762607626,
      "grad_norm": 0.4954568906574125,
      "learning_rate": 8.030782811192668e-05,
      "loss": 1.6075,
      "step": 2811
    },
    {
      "epoch": 0.5764657646576465,
      "grad_norm": 0.4744112646384498,
      "learning_rate": 8.024272829853852e-05,
      "loss": 1.5645,
      "step": 2812
    },
    {
      "epoch": 0.576670766707667,
      "grad_norm": 0.5179082114337228,
      "learning_rate": 8.017763719720936e-05,
      "loss": 1.5323,
      "step": 2813
    },
    {
      "epoch": 0.5768757687576875,
      "grad_norm": 0.4702611550792192,
      "learning_rate": 8.011255483664133e-05,
      "loss": 1.5291,
      "step": 2814
    },
    {
      "epoch": 0.577080770807708,
      "grad_norm": 0.47217938018014816,
      "learning_rate": 8.004748124553283e-05,
      "loss": 1.6742,
      "step": 2815
    },
    {
      "epoch": 0.5772857728577285,
      "grad_norm": 0.5022788038282318,
      "learning_rate": 7.99824164525784e-05,
      "loss": 1.6052,
      "step": 2816
    },
    {
      "epoch": 0.577490774907749,
      "grad_norm": 0.4297384242177964,
      "learning_rate": 7.99173604864686e-05,
      "loss": 1.5042,
      "step": 2817
    },
    {
      "epoch": 0.5776957769577695,
      "grad_norm": 0.4577017852336908,
      "learning_rate": 7.985231337589019e-05,
      "loss": 1.5592,
      "step": 2818
    },
    {
      "epoch": 0.5779007790077901,
      "grad_norm": 0.46778619294343626,
      "learning_rate": 7.978727514952595e-05,
      "loss": 1.6089,
      "step": 2819
    },
    {
      "epoch": 0.5781057810578106,
      "grad_norm": 0.474161425400192,
      "learning_rate": 7.972224583605483e-05,
      "loss": 1.5598,
      "step": 2820
    },
    {
      "epoch": 0.5783107831078311,
      "grad_norm": 0.46254774541116084,
      "learning_rate": 7.965722546415173e-05,
      "loss": 1.5729,
      "step": 2821
    },
    {
      "epoch": 0.5785157851578516,
      "grad_norm": 0.4528521920224102,
      "learning_rate": 7.959221406248775e-05,
      "loss": 1.6784,
      "step": 2822
    },
    {
      "epoch": 0.5787207872078721,
      "grad_norm": 0.42230356506918815,
      "learning_rate": 7.952721165972996e-05,
      "loss": 1.5009,
      "step": 2823
    },
    {
      "epoch": 0.5789257892578926,
      "grad_norm": 0.4981664901815227,
      "learning_rate": 7.946221828454144e-05,
      "loss": 1.6676,
      "step": 2824
    },
    {
      "epoch": 0.5791307913079131,
      "grad_norm": 0.44921689065838266,
      "learning_rate": 7.939723396558132e-05,
      "loss": 1.5823,
      "step": 2825
    },
    {
      "epoch": 0.5793357933579336,
      "grad_norm": 0.4472783342030656,
      "learning_rate": 7.93322587315047e-05,
      "loss": 1.6223,
      "step": 2826
    },
    {
      "epoch": 0.5795407954079541,
      "grad_norm": 0.45704025689837224,
      "learning_rate": 7.926729261096276e-05,
      "loss": 1.5228,
      "step": 2827
    },
    {
      "epoch": 0.5797457974579746,
      "grad_norm": 0.48113985417457633,
      "learning_rate": 7.92023356326026e-05,
      "loss": 1.6116,
      "step": 2828
    },
    {
      "epoch": 0.5799507995079951,
      "grad_norm": 0.4810649777128658,
      "learning_rate": 7.913738782506727e-05,
      "loss": 1.5653,
      "step": 2829
    },
    {
      "epoch": 0.5801558015580156,
      "grad_norm": 0.45546034023084525,
      "learning_rate": 7.907244921699581e-05,
      "loss": 1.5618,
      "step": 2830
    },
    {
      "epoch": 0.5803608036080361,
      "grad_norm": 0.42028535047155097,
      "learning_rate": 7.900751983702317e-05,
      "loss": 1.5681,
      "step": 2831
    },
    {
      "epoch": 0.5805658056580566,
      "grad_norm": 0.4554780118570541,
      "learning_rate": 7.894259971378031e-05,
      "loss": 1.5828,
      "step": 2832
    },
    {
      "epoch": 0.5807708077080771,
      "grad_norm": 0.43349384022146037,
      "learning_rate": 7.887768887589403e-05,
      "loss": 1.5456,
      "step": 2833
    },
    {
      "epoch": 0.5809758097580976,
      "grad_norm": 0.4589527668069594,
      "learning_rate": 7.881278735198705e-05,
      "loss": 1.594,
      "step": 2834
    },
    {
      "epoch": 0.5811808118081181,
      "grad_norm": 0.43096675233898796,
      "learning_rate": 7.8747895170678e-05,
      "loss": 1.5244,
      "step": 2835
    },
    {
      "epoch": 0.5813858138581386,
      "grad_norm": 0.4302936709112231,
      "learning_rate": 7.868301236058138e-05,
      "loss": 1.5938,
      "step": 2836
    },
    {
      "epoch": 0.5815908159081591,
      "grad_norm": 0.4454121200108281,
      "learning_rate": 7.861813895030754e-05,
      "loss": 1.5697,
      "step": 2837
    },
    {
      "epoch": 0.5817958179581796,
      "grad_norm": 0.41208843086439834,
      "learning_rate": 7.855327496846276e-05,
      "loss": 1.5315,
      "step": 2838
    },
    {
      "epoch": 0.5820008200082001,
      "grad_norm": 0.46822681477448247,
      "learning_rate": 7.848842044364905e-05,
      "loss": 1.6061,
      "step": 2839
    },
    {
      "epoch": 0.5822058220582206,
      "grad_norm": 0.4172105089296651,
      "learning_rate": 7.842357540446437e-05,
      "loss": 1.5323,
      "step": 2840
    },
    {
      "epoch": 0.5824108241082411,
      "grad_norm": 0.4881793993515432,
      "learning_rate": 7.835873987950238e-05,
      "loss": 1.5293,
      "step": 2841
    },
    {
      "epoch": 0.5826158261582616,
      "grad_norm": 0.46743189424655723,
      "learning_rate": 7.829391389735259e-05,
      "loss": 1.5823,
      "step": 2842
    },
    {
      "epoch": 0.5828208282082821,
      "grad_norm": 0.46323901263219186,
      "learning_rate": 7.822909748660039e-05,
      "loss": 1.5939,
      "step": 2843
    },
    {
      "epoch": 0.5830258302583026,
      "grad_norm": 0.4603240255662383,
      "learning_rate": 7.816429067582678e-05,
      "loss": 1.6043,
      "step": 2844
    },
    {
      "epoch": 0.5832308323083231,
      "grad_norm": 0.45580500878688224,
      "learning_rate": 7.809949349360872e-05,
      "loss": 1.5585,
      "step": 2845
    },
    {
      "epoch": 0.5834358343583436,
      "grad_norm": 0.42889856701096885,
      "learning_rate": 7.803470596851872e-05,
      "loss": 1.5065,
      "step": 2846
    },
    {
      "epoch": 0.5836408364083641,
      "grad_norm": 0.4981112831154715,
      "learning_rate": 7.796992812912516e-05,
      "loss": 1.6294,
      "step": 2847
    },
    {
      "epoch": 0.5838458384583846,
      "grad_norm": 0.4798442243225811,
      "learning_rate": 7.790516000399219e-05,
      "loss": 1.5568,
      "step": 2848
    },
    {
      "epoch": 0.5840508405084051,
      "grad_norm": 0.4622015940788796,
      "learning_rate": 7.784040162167954e-05,
      "loss": 1.5457,
      "step": 2849
    },
    {
      "epoch": 0.5842558425584256,
      "grad_norm": 0.4353184257811204,
      "learning_rate": 7.777565301074275e-05,
      "loss": 1.5658,
      "step": 2850
    },
    {
      "epoch": 0.584460844608446,
      "grad_norm": 0.41631359808026636,
      "learning_rate": 7.7710914199733e-05,
      "loss": 1.5555,
      "step": 2851
    },
    {
      "epoch": 0.5846658466584665,
      "grad_norm": 0.469145393073749,
      "learning_rate": 7.764618521719715e-05,
      "loss": 1.5743,
      "step": 2852
    },
    {
      "epoch": 0.584870848708487,
      "grad_norm": 0.47258341502657575,
      "learning_rate": 7.758146609167773e-05,
      "loss": 1.5345,
      "step": 2853
    },
    {
      "epoch": 0.5850758507585075,
      "grad_norm": 0.4346090686240297,
      "learning_rate": 7.7516756851713e-05,
      "loss": 1.5774,
      "step": 2854
    },
    {
      "epoch": 0.585280852808528,
      "grad_norm": 0.48693640936995536,
      "learning_rate": 7.745205752583673e-05,
      "loss": 1.6069,
      "step": 2855
    },
    {
      "epoch": 0.5854858548585485,
      "grad_norm": 0.4448892411221868,
      "learning_rate": 7.738736814257843e-05,
      "loss": 1.5614,
      "step": 2856
    },
    {
      "epoch": 0.585690856908569,
      "grad_norm": 0.5150479486429531,
      "learning_rate": 7.732268873046313e-05,
      "loss": 1.5747,
      "step": 2857
    },
    {
      "epoch": 0.5858958589585895,
      "grad_norm": 0.4709818682182925,
      "learning_rate": 7.72580193180115e-05,
      "loss": 1.5686,
      "step": 2858
    },
    {
      "epoch": 0.5861008610086101,
      "grad_norm": 0.43790425227806434,
      "learning_rate": 7.71933599337399e-05,
      "loss": 1.52,
      "step": 2859
    },
    {
      "epoch": 0.5863058630586306,
      "grad_norm": 0.4413626622421896,
      "learning_rate": 7.71287106061601e-05,
      "loss": 1.5015,
      "step": 2860
    },
    {
      "epoch": 0.5865108651086511,
      "grad_norm": 0.4310407818275572,
      "learning_rate": 7.706407136377956e-05,
      "loss": 1.55,
      "step": 2861
    },
    {
      "epoch": 0.5867158671586716,
      "grad_norm": 0.4731523562710143,
      "learning_rate": 7.69994422351012e-05,
      "loss": 1.5154,
      "step": 2862
    },
    {
      "epoch": 0.5869208692086921,
      "grad_norm": 0.4871955782599282,
      "learning_rate": 7.69348232486236e-05,
      "loss": 1.6519,
      "step": 2863
    },
    {
      "epoch": 0.5871258712587126,
      "grad_norm": 0.5002230614648069,
      "learning_rate": 7.687021443284071e-05,
      "loss": 1.6242,
      "step": 2864
    },
    {
      "epoch": 0.5873308733087331,
      "grad_norm": 0.4649034147981404,
      "learning_rate": 7.680561581624212e-05,
      "loss": 1.6344,
      "step": 2865
    },
    {
      "epoch": 0.5875358753587536,
      "grad_norm": 0.5040437033568057,
      "learning_rate": 7.674102742731293e-05,
      "loss": 1.6426,
      "step": 2866
    },
    {
      "epoch": 0.5877408774087741,
      "grad_norm": 0.4806300106642581,
      "learning_rate": 7.667644929453362e-05,
      "loss": 1.5363,
      "step": 2867
    },
    {
      "epoch": 0.5879458794587946,
      "grad_norm": 0.43324569620733566,
      "learning_rate": 7.661188144638027e-05,
      "loss": 1.5575,
      "step": 2868
    },
    {
      "epoch": 0.5881508815088151,
      "grad_norm": 0.45123913290457685,
      "learning_rate": 7.654732391132429e-05,
      "loss": 1.5476,
      "step": 2869
    },
    {
      "epoch": 0.5883558835588356,
      "grad_norm": 0.4234195302813191,
      "learning_rate": 7.648277671783266e-05,
      "loss": 1.5205,
      "step": 2870
    },
    {
      "epoch": 0.5885608856088561,
      "grad_norm": 0.40813996975126404,
      "learning_rate": 7.641823989436781e-05,
      "loss": 1.52,
      "step": 2871
    },
    {
      "epoch": 0.5887658876588766,
      "grad_norm": 0.4788725993372061,
      "learning_rate": 7.635371346938746e-05,
      "loss": 1.5362,
      "step": 2872
    },
    {
      "epoch": 0.5889708897088971,
      "grad_norm": 0.42564232016355286,
      "learning_rate": 7.628919747134489e-05,
      "loss": 1.5739,
      "step": 2873
    },
    {
      "epoch": 0.5891758917589176,
      "grad_norm": 0.465141720475574,
      "learning_rate": 7.622469192868867e-05,
      "loss": 1.5213,
      "step": 2874
    },
    {
      "epoch": 0.5893808938089381,
      "grad_norm": 0.45931080212450043,
      "learning_rate": 7.616019686986285e-05,
      "loss": 1.6028,
      "step": 2875
    },
    {
      "epoch": 0.5895858958589586,
      "grad_norm": 0.4633635984104622,
      "learning_rate": 7.609571232330685e-05,
      "loss": 1.5936,
      "step": 2876
    },
    {
      "epoch": 0.5897908979089791,
      "grad_norm": 0.4664872502605939,
      "learning_rate": 7.603123831745536e-05,
      "loss": 1.5708,
      "step": 2877
    },
    {
      "epoch": 0.5899958999589996,
      "grad_norm": 0.40842845857718346,
      "learning_rate": 7.596677488073854e-05,
      "loss": 1.541,
      "step": 2878
    },
    {
      "epoch": 0.5902009020090201,
      "grad_norm": 0.46007633693727107,
      "learning_rate": 7.590232204158179e-05,
      "loss": 1.5702,
      "step": 2879
    },
    {
      "epoch": 0.5904059040590406,
      "grad_norm": 0.5008518912866813,
      "learning_rate": 7.583787982840588e-05,
      "loss": 1.5923,
      "step": 2880
    },
    {
      "epoch": 0.5906109061090611,
      "grad_norm": 0.4334617236720966,
      "learning_rate": 7.577344826962697e-05,
      "loss": 1.4979,
      "step": 2881
    },
    {
      "epoch": 0.5908159081590816,
      "grad_norm": 0.4256248857790475,
      "learning_rate": 7.570902739365637e-05,
      "loss": 1.5362,
      "step": 2882
    },
    {
      "epoch": 0.5910209102091021,
      "grad_norm": 0.4478164990192499,
      "learning_rate": 7.564461722890081e-05,
      "loss": 1.5848,
      "step": 2883
    },
    {
      "epoch": 0.5912259122591226,
      "grad_norm": 0.46074969260142695,
      "learning_rate": 7.558021780376223e-05,
      "loss": 1.5268,
      "step": 2884
    },
    {
      "epoch": 0.5914309143091431,
      "grad_norm": 0.4867293073168773,
      "learning_rate": 7.551582914663781e-05,
      "loss": 1.5988,
      "step": 2885
    },
    {
      "epoch": 0.5916359163591636,
      "grad_norm": 0.3882639942277087,
      "learning_rate": 7.54514512859201e-05,
      "loss": 1.4282,
      "step": 2886
    },
    {
      "epoch": 0.5918409184091841,
      "grad_norm": 0.3998976149246561,
      "learning_rate": 7.538708424999674e-05,
      "loss": 1.5167,
      "step": 2887
    },
    {
      "epoch": 0.5920459204592046,
      "grad_norm": 0.5057311694400677,
      "learning_rate": 7.532272806725072e-05,
      "loss": 1.4919,
      "step": 2888
    },
    {
      "epoch": 0.5922509225092251,
      "grad_norm": 0.4794172732968755,
      "learning_rate": 7.525838276606016e-05,
      "loss": 1.6711,
      "step": 2889
    },
    {
      "epoch": 0.5924559245592456,
      "grad_norm": 0.4422451479514918,
      "learning_rate": 7.519404837479837e-05,
      "loss": 1.5541,
      "step": 2890
    },
    {
      "epoch": 0.5926609266092661,
      "grad_norm": 0.5146714235555389,
      "learning_rate": 7.5129724921834e-05,
      "loss": 1.5923,
      "step": 2891
    },
    {
      "epoch": 0.5928659286592866,
      "grad_norm": 0.45403884245738774,
      "learning_rate": 7.506541243553072e-05,
      "loss": 1.6134,
      "step": 2892
    },
    {
      "epoch": 0.593070930709307,
      "grad_norm": 0.4374507581359449,
      "learning_rate": 7.50011109442474e-05,
      "loss": 1.5332,
      "step": 2893
    },
    {
      "epoch": 0.5932759327593276,
      "grad_norm": 0.4406527983008887,
      "learning_rate": 7.493682047633808e-05,
      "loss": 1.5565,
      "step": 2894
    },
    {
      "epoch": 0.593480934809348,
      "grad_norm": 0.4726042059440526,
      "learning_rate": 7.487254106015195e-05,
      "loss": 1.596,
      "step": 2895
    },
    {
      "epoch": 0.5936859368593685,
      "grad_norm": 0.448435253083987,
      "learning_rate": 7.480827272403326e-05,
      "loss": 1.6385,
      "step": 2896
    },
    {
      "epoch": 0.593890938909389,
      "grad_norm": 0.4494791698334838,
      "learning_rate": 7.474401549632147e-05,
      "loss": 1.4956,
      "step": 2897
    },
    {
      "epoch": 0.5940959409594095,
      "grad_norm": 0.4699834326011467,
      "learning_rate": 7.467976940535112e-05,
      "loss": 1.5578,
      "step": 2898
    },
    {
      "epoch": 0.5943009430094301,
      "grad_norm": 0.5087872607468341,
      "learning_rate": 7.461553447945179e-05,
      "loss": 1.6666,
      "step": 2899
    },
    {
      "epoch": 0.5945059450594506,
      "grad_norm": 0.44851189394034247,
      "learning_rate": 7.455131074694816e-05,
      "loss": 1.5925,
      "step": 2900
    },
    {
      "epoch": 0.5947109471094711,
      "grad_norm": 0.4175534470770163,
      "learning_rate": 7.448709823615995e-05,
      "loss": 1.5458,
      "step": 2901
    },
    {
      "epoch": 0.5949159491594916,
      "grad_norm": 0.47400132301893866,
      "learning_rate": 7.442289697540201e-05,
      "loss": 1.5097,
      "step": 2902
    },
    {
      "epoch": 0.5951209512095121,
      "grad_norm": 0.46887493732744584,
      "learning_rate": 7.435870699298416e-05,
      "loss": 1.5297,
      "step": 2903
    },
    {
      "epoch": 0.5953259532595326,
      "grad_norm": 0.48584437331528135,
      "learning_rate": 7.429452831721127e-05,
      "loss": 1.6051,
      "step": 2904
    },
    {
      "epoch": 0.5955309553095531,
      "grad_norm": 0.5025077618232665,
      "learning_rate": 7.42303609763832e-05,
      "loss": 1.6578,
      "step": 2905
    },
    {
      "epoch": 0.5957359573595736,
      "grad_norm": 0.449442576433012,
      "learning_rate": 7.41662049987948e-05,
      "loss": 1.5585,
      "step": 2906
    },
    {
      "epoch": 0.5959409594095941,
      "grad_norm": 0.42670517925271795,
      "learning_rate": 7.410206041273606e-05,
      "loss": 1.5461,
      "step": 2907
    },
    {
      "epoch": 0.5961459614596146,
      "grad_norm": 0.4680815828320162,
      "learning_rate": 7.40379272464917e-05,
      "loss": 1.6232,
      "step": 2908
    },
    {
      "epoch": 0.5963509635096351,
      "grad_norm": 0.4538627587316065,
      "learning_rate": 7.397380552834161e-05,
      "loss": 1.553,
      "step": 2909
    },
    {
      "epoch": 0.5965559655596556,
      "grad_norm": 0.4526768698468136,
      "learning_rate": 7.39096952865605e-05,
      "loss": 1.6067,
      "step": 2910
    },
    {
      "epoch": 0.5967609676096761,
      "grad_norm": 0.4253044617278166,
      "learning_rate": 7.384559654941814e-05,
      "loss": 1.5509,
      "step": 2911
    },
    {
      "epoch": 0.5969659696596966,
      "grad_norm": 0.4803130212739422,
      "learning_rate": 7.378150934517906e-05,
      "loss": 1.6028,
      "step": 2912
    },
    {
      "epoch": 0.5971709717097171,
      "grad_norm": 0.4688828197748933,
      "learning_rate": 7.371743370210289e-05,
      "loss": 1.5739,
      "step": 2913
    },
    {
      "epoch": 0.5973759737597376,
      "grad_norm": 0.4647481537688531,
      "learning_rate": 7.365336964844408e-05,
      "loss": 1.5398,
      "step": 2914
    },
    {
      "epoch": 0.5975809758097581,
      "grad_norm": 0.4291945269309361,
      "learning_rate": 7.35893172124519e-05,
      "loss": 1.5232,
      "step": 2915
    },
    {
      "epoch": 0.5977859778597786,
      "grad_norm": 0.43486850161559637,
      "learning_rate": 7.352527642237064e-05,
      "loss": 1.5937,
      "step": 2916
    },
    {
      "epoch": 0.5979909799097991,
      "grad_norm": 0.47206834237971973,
      "learning_rate": 7.346124730643929e-05,
      "loss": 1.5586,
      "step": 2917
    },
    {
      "epoch": 0.5981959819598196,
      "grad_norm": 0.4498992161060695,
      "learning_rate": 7.339722989289183e-05,
      "loss": 1.5829,
      "step": 2918
    },
    {
      "epoch": 0.5984009840098401,
      "grad_norm": 0.46517941039745553,
      "learning_rate": 7.333322420995708e-05,
      "loss": 1.5481,
      "step": 2919
    },
    {
      "epoch": 0.5986059860598606,
      "grad_norm": 0.5022982277225991,
      "learning_rate": 7.326923028585854e-05,
      "loss": 1.6315,
      "step": 2920
    },
    {
      "epoch": 0.5988109881098811,
      "grad_norm": 0.46503977065256796,
      "learning_rate": 7.32052481488147e-05,
      "loss": 1.5343,
      "step": 2921
    },
    {
      "epoch": 0.5990159901599016,
      "grad_norm": 0.44620462033036284,
      "learning_rate": 7.31412778270387e-05,
      "loss": 1.5502,
      "step": 2922
    },
    {
      "epoch": 0.5992209922099221,
      "grad_norm": 0.4496059349665212,
      "learning_rate": 7.307731934873862e-05,
      "loss": 1.6046,
      "step": 2923
    },
    {
      "epoch": 0.5994259942599426,
      "grad_norm": 0.4321324175035921,
      "learning_rate": 7.301337274211722e-05,
      "loss": 1.5535,
      "step": 2924
    },
    {
      "epoch": 0.5996309963099631,
      "grad_norm": 0.5175574511849769,
      "learning_rate": 7.294943803537202e-05,
      "loss": 1.6659,
      "step": 2925
    },
    {
      "epoch": 0.5998359983599836,
      "grad_norm": 0.3977693749582312,
      "learning_rate": 7.288551525669536e-05,
      "loss": 1.5706,
      "step": 2926
    },
    {
      "epoch": 0.6000410004100041,
      "grad_norm": 0.43619380730220897,
      "learning_rate": 7.282160443427424e-05,
      "loss": 1.6211,
      "step": 2927
    },
    {
      "epoch": 0.6002460024600246,
      "grad_norm": 0.45241328781708706,
      "learning_rate": 7.275770559629042e-05,
      "loss": 1.5221,
      "step": 2928
    },
    {
      "epoch": 0.6004510045100451,
      "grad_norm": 0.483219973559941,
      "learning_rate": 7.269381877092045e-05,
      "loss": 1.5598,
      "step": 2929
    },
    {
      "epoch": 0.6006560065600656,
      "grad_norm": 0.47601924043064653,
      "learning_rate": 7.262994398633547e-05,
      "loss": 1.592,
      "step": 2930
    },
    {
      "epoch": 0.6008610086100861,
      "grad_norm": 0.46329133087345176,
      "learning_rate": 7.256608127070137e-05,
      "loss": 1.6068,
      "step": 2931
    },
    {
      "epoch": 0.6010660106601066,
      "grad_norm": 0.45790279170063364,
      "learning_rate": 7.250223065217869e-05,
      "loss": 1.5248,
      "step": 2932
    },
    {
      "epoch": 0.6012710127101271,
      "grad_norm": 0.45302107601922315,
      "learning_rate": 7.243839215892263e-05,
      "loss": 1.5888,
      "step": 2933
    },
    {
      "epoch": 0.6014760147601476,
      "grad_norm": 0.41069801676078876,
      "learning_rate": 7.237456581908315e-05,
      "loss": 1.4927,
      "step": 2934
    },
    {
      "epoch": 0.6016810168101681,
      "grad_norm": 0.41391511299416306,
      "learning_rate": 7.231075166080467e-05,
      "loss": 1.5581,
      "step": 2935
    },
    {
      "epoch": 0.6018860188601886,
      "grad_norm": 0.4244492935350169,
      "learning_rate": 7.224694971222641e-05,
      "loss": 1.5608,
      "step": 2936
    },
    {
      "epoch": 0.602091020910209,
      "grad_norm": 0.49046412759116587,
      "learning_rate": 7.218316000148207e-05,
      "loss": 1.607,
      "step": 2937
    },
    {
      "epoch": 0.6022960229602295,
      "grad_norm": 0.4678840773151214,
      "learning_rate": 7.211938255670003e-05,
      "loss": 1.5151,
      "step": 2938
    },
    {
      "epoch": 0.6025010250102502,
      "grad_norm": 0.46787063802713913,
      "learning_rate": 7.205561740600329e-05,
      "loss": 1.5338,
      "step": 2939
    },
    {
      "epoch": 0.6027060270602707,
      "grad_norm": 0.4345510102151128,
      "learning_rate": 7.19918645775093e-05,
      "loss": 1.5101,
      "step": 2940
    },
    {
      "epoch": 0.6029110291102912,
      "grad_norm": 0.518363603649018,
      "learning_rate": 7.192812409933025e-05,
      "loss": 1.5883,
      "step": 2941
    },
    {
      "epoch": 0.6031160311603116,
      "grad_norm": 0.44056034890151863,
      "learning_rate": 7.186439599957273e-05,
      "loss": 1.5402,
      "step": 2942
    },
    {
      "epoch": 0.6033210332103321,
      "grad_norm": 0.4791752040908096,
      "learning_rate": 7.180068030633798e-05,
      "loss": 1.5683,
      "step": 2943
    },
    {
      "epoch": 0.6035260352603526,
      "grad_norm": 0.4828693826400533,
      "learning_rate": 7.173697704772164e-05,
      "loss": 1.5392,
      "step": 2944
    },
    {
      "epoch": 0.6037310373103731,
      "grad_norm": 0.4514751248424061,
      "learning_rate": 7.167328625181404e-05,
      "loss": 1.5748,
      "step": 2945
    },
    {
      "epoch": 0.6039360393603936,
      "grad_norm": 0.4711325186459684,
      "learning_rate": 7.160960794669992e-05,
      "loss": 1.4962,
      "step": 2946
    },
    {
      "epoch": 0.6041410414104141,
      "grad_norm": 0.4502351071834052,
      "learning_rate": 7.15459421604585e-05,
      "loss": 1.5814,
      "step": 2947
    },
    {
      "epoch": 0.6043460434604346,
      "grad_norm": 0.4587706708821412,
      "learning_rate": 7.148228892116351e-05,
      "loss": 1.502,
      "step": 2948
    },
    {
      "epoch": 0.6045510455104551,
      "grad_norm": 0.4155835311109425,
      "learning_rate": 7.141864825688307e-05,
      "loss": 1.5445,
      "step": 2949
    },
    {
      "epoch": 0.6047560475604756,
      "grad_norm": 0.40179931469561625,
      "learning_rate": 7.13550201956799e-05,
      "loss": 1.512,
      "step": 2950
    },
    {
      "epoch": 0.6049610496104961,
      "grad_norm": 0.4925202936110988,
      "learning_rate": 7.129140476561108e-05,
      "loss": 1.6364,
      "step": 2951
    },
    {
      "epoch": 0.6051660516605166,
      "grad_norm": 0.44635459860991183,
      "learning_rate": 7.122780199472809e-05,
      "loss": 1.534,
      "step": 2952
    },
    {
      "epoch": 0.6053710537105371,
      "grad_norm": 0.3957148211831439,
      "learning_rate": 7.116421191107687e-05,
      "loss": 1.577,
      "step": 2953
    },
    {
      "epoch": 0.6055760557605576,
      "grad_norm": 0.4461287587990288,
      "learning_rate": 7.110063454269777e-05,
      "loss": 1.5355,
      "step": 2954
    },
    {
      "epoch": 0.6057810578105781,
      "grad_norm": 0.42016913288286717,
      "learning_rate": 7.103706991762546e-05,
      "loss": 1.5818,
      "step": 2955
    },
    {
      "epoch": 0.6059860598605986,
      "grad_norm": 0.4681872730089598,
      "learning_rate": 7.097351806388915e-05,
      "loss": 1.5473,
      "step": 2956
    },
    {
      "epoch": 0.6061910619106191,
      "grad_norm": 0.4467933961188191,
      "learning_rate": 7.090997900951227e-05,
      "loss": 1.5756,
      "step": 2957
    },
    {
      "epoch": 0.6063960639606396,
      "grad_norm": 0.45454303945035157,
      "learning_rate": 7.084645278251263e-05,
      "loss": 1.5476,
      "step": 2958
    },
    {
      "epoch": 0.6066010660106601,
      "grad_norm": 0.4242354162291587,
      "learning_rate": 7.078293941090249e-05,
      "loss": 1.5584,
      "step": 2959
    },
    {
      "epoch": 0.6068060680606806,
      "grad_norm": 0.43653224264582896,
      "learning_rate": 7.071943892268822e-05,
      "loss": 1.5486,
      "step": 2960
    },
    {
      "epoch": 0.6070110701107011,
      "grad_norm": 0.43139034505263063,
      "learning_rate": 7.065595134587078e-05,
      "loss": 1.5366,
      "step": 2961
    },
    {
      "epoch": 0.6072160721607216,
      "grad_norm": 0.4514582820547564,
      "learning_rate": 7.059247670844528e-05,
      "loss": 1.4895,
      "step": 2962
    },
    {
      "epoch": 0.6074210742107421,
      "grad_norm": 0.47433438538277,
      "learning_rate": 7.052901503840111e-05,
      "loss": 1.5186,
      "step": 2963
    },
    {
      "epoch": 0.6076260762607626,
      "grad_norm": 0.4941721400249152,
      "learning_rate": 7.046556636372202e-05,
      "loss": 1.5917,
      "step": 2964
    },
    {
      "epoch": 0.6078310783107831,
      "grad_norm": 0.49800041869459405,
      "learning_rate": 7.040213071238592e-05,
      "loss": 1.4808,
      "step": 2965
    },
    {
      "epoch": 0.6080360803608036,
      "grad_norm": 0.452958066822433,
      "learning_rate": 7.033870811236516e-05,
      "loss": 1.5035,
      "step": 2966
    },
    {
      "epoch": 0.6082410824108241,
      "grad_norm": 0.47066943263641875,
      "learning_rate": 7.027529859162616e-05,
      "loss": 1.5362,
      "step": 2967
    },
    {
      "epoch": 0.6084460844608446,
      "grad_norm": 0.4459782854808815,
      "learning_rate": 7.021190217812966e-05,
      "loss": 1.5301,
      "step": 2968
    },
    {
      "epoch": 0.6086510865108651,
      "grad_norm": 0.46638297805865353,
      "learning_rate": 7.014851889983057e-05,
      "loss": 1.5454,
      "step": 2969
    },
    {
      "epoch": 0.6088560885608856,
      "grad_norm": 0.46390237728714767,
      "learning_rate": 7.008514878467805e-05,
      "loss": 1.532,
      "step": 2970
    },
    {
      "epoch": 0.6090610906109061,
      "grad_norm": 0.44795921069907085,
      "learning_rate": 7.002179186061542e-05,
      "loss": 1.5688,
      "step": 2971
    },
    {
      "epoch": 0.6092660926609266,
      "grad_norm": 0.4690095808970144,
      "learning_rate": 6.995844815558026e-05,
      "loss": 1.5604,
      "step": 2972
    },
    {
      "epoch": 0.6094710947109471,
      "grad_norm": 0.45825983851594926,
      "learning_rate": 6.98951176975042e-05,
      "loss": 1.5627,
      "step": 2973
    },
    {
      "epoch": 0.6096760967609676,
      "grad_norm": 0.5150166896673117,
      "learning_rate": 6.983180051431315e-05,
      "loss": 1.5798,
      "step": 2974
    },
    {
      "epoch": 0.6098810988109881,
      "grad_norm": 0.455109123650538,
      "learning_rate": 6.976849663392708e-05,
      "loss": 1.5797,
      "step": 2975
    },
    {
      "epoch": 0.6100861008610086,
      "grad_norm": 0.4702531173078538,
      "learning_rate": 6.97052060842601e-05,
      "loss": 1.5718,
      "step": 2976
    },
    {
      "epoch": 0.6102911029110291,
      "grad_norm": 0.4495664668884594,
      "learning_rate": 6.964192889322053e-05,
      "loss": 1.5585,
      "step": 2977
    },
    {
      "epoch": 0.6104961049610496,
      "grad_norm": 0.46226082522956324,
      "learning_rate": 6.957866508871068e-05,
      "loss": 1.5363,
      "step": 2978
    },
    {
      "epoch": 0.6107011070110702,
      "grad_norm": 0.4707280057172589,
      "learning_rate": 6.951541469862706e-05,
      "loss": 1.6113,
      "step": 2979
    },
    {
      "epoch": 0.6109061090610907,
      "grad_norm": 0.48183454794360314,
      "learning_rate": 6.945217775086017e-05,
      "loss": 1.5371,
      "step": 2980
    },
    {
      "epoch": 0.6111111111111112,
      "grad_norm": 0.4589876764504603,
      "learning_rate": 6.938895427329463e-05,
      "loss": 1.6195,
      "step": 2981
    },
    {
      "epoch": 0.6113161131611317,
      "grad_norm": 0.45775643294615465,
      "learning_rate": 6.932574429380918e-05,
      "loss": 1.5202,
      "step": 2982
    },
    {
      "epoch": 0.6115211152111522,
      "grad_norm": 0.4798856422208839,
      "learning_rate": 6.926254784027648e-05,
      "loss": 1.5764,
      "step": 2983
    },
    {
      "epoch": 0.6117261172611727,
      "grad_norm": 0.4403281723941011,
      "learning_rate": 6.919936494056336e-05,
      "loss": 1.5124,
      "step": 2984
    },
    {
      "epoch": 0.6119311193111932,
      "grad_norm": 0.43176902468149503,
      "learning_rate": 6.913619562253055e-05,
      "loss": 1.6334,
      "step": 2985
    },
    {
      "epoch": 0.6121361213612136,
      "grad_norm": 0.4597713414597451,
      "learning_rate": 6.907303991403289e-05,
      "loss": 1.6131,
      "step": 2986
    },
    {
      "epoch": 0.6123411234112341,
      "grad_norm": 0.45445690196581606,
      "learning_rate": 6.900989784291911e-05,
      "loss": 1.5836,
      "step": 2987
    },
    {
      "epoch": 0.6125461254612546,
      "grad_norm": 0.43526419128306093,
      "learning_rate": 6.894676943703206e-05,
      "loss": 1.4932,
      "step": 2988
    },
    {
      "epoch": 0.6127511275112751,
      "grad_norm": 0.4779583093014876,
      "learning_rate": 6.888365472420851e-05,
      "loss": 1.53,
      "step": 2989
    },
    {
      "epoch": 0.6129561295612956,
      "grad_norm": 0.4576740543678265,
      "learning_rate": 6.882055373227914e-05,
      "loss": 1.5736,
      "step": 2990
    },
    {
      "epoch": 0.6131611316113161,
      "grad_norm": 0.4481675687238009,
      "learning_rate": 6.875746648906863e-05,
      "loss": 1.5595,
      "step": 2991
    },
    {
      "epoch": 0.6133661336613366,
      "grad_norm": 0.4761378801904838,
      "learning_rate": 6.869439302239556e-05,
      "loss": 1.5474,
      "step": 2992
    },
    {
      "epoch": 0.6135711357113571,
      "grad_norm": 0.45375696383716857,
      "learning_rate": 6.863133336007248e-05,
      "loss": 1.5704,
      "step": 2993
    },
    {
      "epoch": 0.6137761377613776,
      "grad_norm": 0.4662242508913138,
      "learning_rate": 6.856828752990589e-05,
      "loss": 1.5479,
      "step": 2994
    },
    {
      "epoch": 0.6139811398113981,
      "grad_norm": 0.4469917888154684,
      "learning_rate": 6.850525555969607e-05,
      "loss": 1.5636,
      "step": 2995
    },
    {
      "epoch": 0.6141861418614186,
      "grad_norm": 0.44574423297664095,
      "learning_rate": 6.844223747723728e-05,
      "loss": 1.4877,
      "step": 2996
    },
    {
      "epoch": 0.6143911439114391,
      "grad_norm": 0.421475813096146,
      "learning_rate": 6.83792333103176e-05,
      "loss": 1.5472,
      "step": 2997
    },
    {
      "epoch": 0.6145961459614596,
      "grad_norm": 0.4376875543122658,
      "learning_rate": 6.831624308671905e-05,
      "loss": 1.641,
      "step": 2998
    },
    {
      "epoch": 0.6148011480114801,
      "grad_norm": 0.47070626573915497,
      "learning_rate": 6.825326683421744e-05,
      "loss": 1.5454,
      "step": 2999
    },
    {
      "epoch": 0.6150061500615006,
      "grad_norm": 0.4923536137605395,
      "learning_rate": 6.819030458058243e-05,
      "loss": 1.5655,
      "step": 3000
    },
    {
      "epoch": 0.6152111521115211,
      "grad_norm": 0.4589559562216384,
      "learning_rate": 6.812735635357753e-05,
      "loss": 1.5332,
      "step": 3001
    },
    {
      "epoch": 0.6154161541615416,
      "grad_norm": 0.41406116254882996,
      "learning_rate": 6.806442218096001e-05,
      "loss": 1.5288,
      "step": 3002
    },
    {
      "epoch": 0.6156211562115621,
      "grad_norm": 0.47434444684201854,
      "learning_rate": 6.800150209048097e-05,
      "loss": 1.506,
      "step": 3003
    },
    {
      "epoch": 0.6158261582615826,
      "grad_norm": 0.42832017576009973,
      "learning_rate": 6.79385961098854e-05,
      "loss": 1.5521,
      "step": 3004
    },
    {
      "epoch": 0.6160311603116031,
      "grad_norm": 0.4675808043623191,
      "learning_rate": 6.787570426691189e-05,
      "loss": 1.5702,
      "step": 3005
    },
    {
      "epoch": 0.6162361623616236,
      "grad_norm": 0.465647503517337,
      "learning_rate": 6.781282658929294e-05,
      "loss": 1.5811,
      "step": 3006
    },
    {
      "epoch": 0.6164411644116441,
      "grad_norm": 0.4424898207483636,
      "learning_rate": 6.774996310475473e-05,
      "loss": 1.5018,
      "step": 3007
    },
    {
      "epoch": 0.6166461664616646,
      "grad_norm": 0.44481342522395206,
      "learning_rate": 6.768711384101712e-05,
      "loss": 1.5328,
      "step": 3008
    },
    {
      "epoch": 0.6168511685116851,
      "grad_norm": 0.4698399725371324,
      "learning_rate": 6.762427882579389e-05,
      "loss": 1.4813,
      "step": 3009
    },
    {
      "epoch": 0.6170561705617056,
      "grad_norm": 0.49707658871041654,
      "learning_rate": 6.756145808679243e-05,
      "loss": 1.546,
      "step": 3010
    },
    {
      "epoch": 0.6172611726117261,
      "grad_norm": 0.4426897382888866,
      "learning_rate": 6.749865165171375e-05,
      "loss": 1.5276,
      "step": 3011
    },
    {
      "epoch": 0.6174661746617466,
      "grad_norm": 0.38791195809148604,
      "learning_rate": 6.74358595482527e-05,
      "loss": 1.5666,
      "step": 3012
    },
    {
      "epoch": 0.6176711767117671,
      "grad_norm": 0.4221514508427358,
      "learning_rate": 6.737308180409767e-05,
      "loss": 1.538,
      "step": 3013
    },
    {
      "epoch": 0.6178761787617876,
      "grad_norm": 0.4572047201575024,
      "learning_rate": 6.731031844693087e-05,
      "loss": 1.5332,
      "step": 3014
    },
    {
      "epoch": 0.6180811808118081,
      "grad_norm": 0.43861767506070165,
      "learning_rate": 6.724756950442807e-05,
      "loss": 1.544,
      "step": 3015
    },
    {
      "epoch": 0.6182861828618286,
      "grad_norm": 0.4323427771692878,
      "learning_rate": 6.718483500425867e-05,
      "loss": 1.5908,
      "step": 3016
    },
    {
      "epoch": 0.6184911849118491,
      "grad_norm": 0.4746355695421071,
      "learning_rate": 6.712211497408578e-05,
      "loss": 1.6562,
      "step": 3017
    },
    {
      "epoch": 0.6186961869618696,
      "grad_norm": 0.44815873191171124,
      "learning_rate": 6.705940944156603e-05,
      "loss": 1.5668,
      "step": 3018
    },
    {
      "epoch": 0.6189011890118902,
      "grad_norm": 0.43563679181631687,
      "learning_rate": 6.699671843434972e-05,
      "loss": 1.5005,
      "step": 3019
    },
    {
      "epoch": 0.6191061910619107,
      "grad_norm": 0.4571595922823676,
      "learning_rate": 6.69340419800808e-05,
      "loss": 1.5652,
      "step": 3020
    },
    {
      "epoch": 0.6193111931119312,
      "grad_norm": 0.45875034758171485,
      "learning_rate": 6.687138010639667e-05,
      "loss": 1.578,
      "step": 3021
    },
    {
      "epoch": 0.6195161951619517,
      "grad_norm": 0.4253248575499902,
      "learning_rate": 6.680873284092839e-05,
      "loss": 1.4906,
      "step": 3022
    },
    {
      "epoch": 0.6197211972119722,
      "grad_norm": 0.4531013198035524,
      "learning_rate": 6.674610021130055e-05,
      "loss": 1.6082,
      "step": 3023
    },
    {
      "epoch": 0.6199261992619927,
      "grad_norm": 0.4152722403328041,
      "learning_rate": 6.668348224513126e-05,
      "loss": 1.5313,
      "step": 3024
    },
    {
      "epoch": 0.6201312013120132,
      "grad_norm": 0.4393915941181943,
      "learning_rate": 6.662087897003229e-05,
      "loss": 1.5132,
      "step": 3025
    },
    {
      "epoch": 0.6203362033620337,
      "grad_norm": 0.47087162990327114,
      "learning_rate": 6.655829041360877e-05,
      "loss": 1.5645,
      "step": 3026
    },
    {
      "epoch": 0.6205412054120542,
      "grad_norm": 0.46168923411024254,
      "learning_rate": 6.649571660345944e-05,
      "loss": 1.583,
      "step": 3027
    },
    {
      "epoch": 0.6207462074620747,
      "grad_norm": 0.43839687490525603,
      "learning_rate": 6.643315756717648e-05,
      "loss": 1.5532,
      "step": 3028
    },
    {
      "epoch": 0.6209512095120951,
      "grad_norm": 0.5046946912544114,
      "learning_rate": 6.637061333234557e-05,
      "loss": 1.6101,
      "step": 3029
    },
    {
      "epoch": 0.6211562115621156,
      "grad_norm": 0.4703506483765253,
      "learning_rate": 6.630808392654593e-05,
      "loss": 1.6161,
      "step": 3030
    },
    {
      "epoch": 0.6213612136121361,
      "grad_norm": 0.4410805848457068,
      "learning_rate": 6.624556937735013e-05,
      "loss": 1.5445,
      "step": 3031
    },
    {
      "epoch": 0.6215662156621566,
      "grad_norm": 0.5680585085077579,
      "learning_rate": 6.61830697123243e-05,
      "loss": 1.5846,
      "step": 3032
    },
    {
      "epoch": 0.6217712177121771,
      "grad_norm": 0.42924480457354586,
      "learning_rate": 6.612058495902791e-05,
      "loss": 1.6052,
      "step": 3033
    },
    {
      "epoch": 0.6219762197621976,
      "grad_norm": 0.42982813747209114,
      "learning_rate": 6.605811514501392e-05,
      "loss": 1.5308,
      "step": 3034
    },
    {
      "epoch": 0.6221812218122181,
      "grad_norm": 0.4441246106502223,
      "learning_rate": 6.599566029782863e-05,
      "loss": 1.5295,
      "step": 3035
    },
    {
      "epoch": 0.6223862238622386,
      "grad_norm": 0.4326730877644339,
      "learning_rate": 6.593322044501185e-05,
      "loss": 1.5984,
      "step": 3036
    },
    {
      "epoch": 0.6225912259122591,
      "grad_norm": 0.4358961385466027,
      "learning_rate": 6.587079561409672e-05,
      "loss": 1.6145,
      "step": 3037
    },
    {
      "epoch": 0.6227962279622796,
      "grad_norm": 0.4578112174036775,
      "learning_rate": 6.580838583260968e-05,
      "loss": 1.5642,
      "step": 3038
    },
    {
      "epoch": 0.6230012300123001,
      "grad_norm": 0.48652487147283446,
      "learning_rate": 6.57459911280707e-05,
      "loss": 1.6143,
      "step": 3039
    },
    {
      "epoch": 0.6232062320623206,
      "grad_norm": 0.5141630065593029,
      "learning_rate": 6.568361152799293e-05,
      "loss": 1.5935,
      "step": 3040
    },
    {
      "epoch": 0.6234112341123411,
      "grad_norm": 0.4431000907807186,
      "learning_rate": 6.562124705988297e-05,
      "loss": 1.5108,
      "step": 3041
    },
    {
      "epoch": 0.6236162361623616,
      "grad_norm": 0.4801120183870235,
      "learning_rate": 6.555889775124076e-05,
      "loss": 1.5576,
      "step": 3042
    },
    {
      "epoch": 0.6238212382123821,
      "grad_norm": 0.46990654971245066,
      "learning_rate": 6.549656362955944e-05,
      "loss": 1.5396,
      "step": 3043
    },
    {
      "epoch": 0.6240262402624026,
      "grad_norm": 0.5185763461791878,
      "learning_rate": 6.54342447223256e-05,
      "loss": 1.4894,
      "step": 3044
    },
    {
      "epoch": 0.6242312423124231,
      "grad_norm": 0.4982719135297392,
      "learning_rate": 6.537194105701895e-05,
      "loss": 1.5106,
      "step": 3045
    },
    {
      "epoch": 0.6244362443624436,
      "grad_norm": 0.46857862001505124,
      "learning_rate": 6.530965266111264e-05,
      "loss": 1.5691,
      "step": 3046
    },
    {
      "epoch": 0.6246412464124641,
      "grad_norm": 0.47676696594678053,
      "learning_rate": 6.524737956207304e-05,
      "loss": 1.5025,
      "step": 3047
    },
    {
      "epoch": 0.6248462484624846,
      "grad_norm": 0.494878130134965,
      "learning_rate": 6.518512178735968e-05,
      "loss": 1.659,
      "step": 3048
    },
    {
      "epoch": 0.6250512505125051,
      "grad_norm": 0.4555208676122304,
      "learning_rate": 6.512287936442549e-05,
      "loss": 1.5844,
      "step": 3049
    },
    {
      "epoch": 0.6252562525625256,
      "grad_norm": 0.4666617153415811,
      "learning_rate": 6.50606523207165e-05,
      "loss": 1.5279,
      "step": 3050
    },
    {
      "epoch": 0.6254612546125461,
      "grad_norm": 0.5084049715324417,
      "learning_rate": 6.4998440683672e-05,
      "loss": 1.5811,
      "step": 3051
    },
    {
      "epoch": 0.6256662566625666,
      "grad_norm": 0.4538076629443115,
      "learning_rate": 6.493624448072457e-05,
      "loss": 1.5516,
      "step": 3052
    },
    {
      "epoch": 0.6258712587125871,
      "grad_norm": 0.43887531584056944,
      "learning_rate": 6.487406373929982e-05,
      "loss": 1.6212,
      "step": 3053
    },
    {
      "epoch": 0.6260762607626076,
      "grad_norm": 0.40300359557326754,
      "learning_rate": 6.48118984868167e-05,
      "loss": 1.5393,
      "step": 3054
    },
    {
      "epoch": 0.6262812628126281,
      "grad_norm": 0.46929896086456635,
      "learning_rate": 6.474974875068721e-05,
      "loss": 1.5414,
      "step": 3055
    },
    {
      "epoch": 0.6264862648626486,
      "grad_norm": 0.4600063367614203,
      "learning_rate": 6.468761455831656e-05,
      "loss": 1.5997,
      "step": 3056
    },
    {
      "epoch": 0.6266912669126691,
      "grad_norm": 0.43472745047654127,
      "learning_rate": 6.462549593710316e-05,
      "loss": 1.5287,
      "step": 3057
    },
    {
      "epoch": 0.6268962689626896,
      "grad_norm": 0.47110124986521373,
      "learning_rate": 6.456339291443845e-05,
      "loss": 1.5291,
      "step": 3058
    },
    {
      "epoch": 0.6271012710127102,
      "grad_norm": 0.4340888098912825,
      "learning_rate": 6.450130551770706e-05,
      "loss": 1.5416,
      "step": 3059
    },
    {
      "epoch": 0.6273062730627307,
      "grad_norm": 0.4541200900506299,
      "learning_rate": 6.443923377428672e-05,
      "loss": 1.5921,
      "step": 3060
    },
    {
      "epoch": 0.6275112751127512,
      "grad_norm": 0.4133637532071334,
      "learning_rate": 6.43771777115482e-05,
      "loss": 1.5532,
      "step": 3061
    },
    {
      "epoch": 0.6277162771627717,
      "grad_norm": 0.4112910106629686,
      "learning_rate": 6.431513735685543e-05,
      "loss": 1.5015,
      "step": 3062
    },
    {
      "epoch": 0.6279212792127922,
      "grad_norm": 0.4435137536551326,
      "learning_rate": 6.425311273756543e-05,
      "loss": 1.5479,
      "step": 3063
    },
    {
      "epoch": 0.6281262812628127,
      "grad_norm": 0.4787810811384867,
      "learning_rate": 6.419110388102818e-05,
      "loss": 1.5751,
      "step": 3064
    },
    {
      "epoch": 0.6283312833128332,
      "grad_norm": 0.4617157378231925,
      "learning_rate": 6.41291108145868e-05,
      "loss": 1.5023,
      "step": 3065
    },
    {
      "epoch": 0.6285362853628537,
      "grad_norm": 0.4203353332527203,
      "learning_rate": 6.406713356557739e-05,
      "loss": 1.4986,
      "step": 3066
    },
    {
      "epoch": 0.6287412874128742,
      "grad_norm": 0.43506829203387276,
      "learning_rate": 6.400517216132909e-05,
      "loss": 1.5543,
      "step": 3067
    },
    {
      "epoch": 0.6289462894628947,
      "grad_norm": 0.412736930042528,
      "learning_rate": 6.394322662916415e-05,
      "loss": 1.5562,
      "step": 3068
    },
    {
      "epoch": 0.6291512915129152,
      "grad_norm": 0.44163430888022454,
      "learning_rate": 6.388129699639762e-05,
      "loss": 1.606,
      "step": 3069
    },
    {
      "epoch": 0.6293562935629357,
      "grad_norm": 0.48693563184484756,
      "learning_rate": 6.381938329033775e-05,
      "loss": 1.5168,
      "step": 3070
    },
    {
      "epoch": 0.6295612956129562,
      "grad_norm": 0.4685244259782071,
      "learning_rate": 6.37574855382856e-05,
      "loss": 1.572,
      "step": 3071
    },
    {
      "epoch": 0.6297662976629766,
      "grad_norm": 0.43973339157362423,
      "learning_rate": 6.369560376753527e-05,
      "loss": 1.5996,
      "step": 3072
    },
    {
      "epoch": 0.6299712997129971,
      "grad_norm": 0.4196159961602663,
      "learning_rate": 6.363373800537387e-05,
      "loss": 1.5748,
      "step": 3073
    },
    {
      "epoch": 0.6301763017630176,
      "grad_norm": 0.5040263111844968,
      "learning_rate": 6.357188827908133e-05,
      "loss": 1.6471,
      "step": 3074
    },
    {
      "epoch": 0.6303813038130381,
      "grad_norm": 0.4638902429714762,
      "learning_rate": 6.351005461593063e-05,
      "loss": 1.5586,
      "step": 3075
    },
    {
      "epoch": 0.6305863058630586,
      "grad_norm": 0.4529245288933407,
      "learning_rate": 6.344823704318752e-05,
      "loss": 1.5208,
      "step": 3076
    },
    {
      "epoch": 0.6307913079130791,
      "grad_norm": 0.46428297219333026,
      "learning_rate": 6.338643558811082e-05,
      "loss": 1.5182,
      "step": 3077
    },
    {
      "epoch": 0.6309963099630996,
      "grad_norm": 0.4213468150289308,
      "learning_rate": 6.332465027795208e-05,
      "loss": 1.5517,
      "step": 3078
    },
    {
      "epoch": 0.6312013120131201,
      "grad_norm": 0.4111582335445511,
      "learning_rate": 6.326288113995589e-05,
      "loss": 1.4768,
      "step": 3079
    },
    {
      "epoch": 0.6314063140631406,
      "grad_norm": 0.4050386293384675,
      "learning_rate": 6.320112820135961e-05,
      "loss": 1.5074,
      "step": 3080
    },
    {
      "epoch": 0.6316113161131611,
      "grad_norm": 0.45694246954957957,
      "learning_rate": 6.313939148939347e-05,
      "loss": 1.6012,
      "step": 3081
    },
    {
      "epoch": 0.6318163181631816,
      "grad_norm": 0.476477422781459,
      "learning_rate": 6.307767103128057e-05,
      "loss": 1.632,
      "step": 3082
    },
    {
      "epoch": 0.6320213202132021,
      "grad_norm": 0.42829792876761863,
      "learning_rate": 6.301596685423679e-05,
      "loss": 1.5791,
      "step": 3083
    },
    {
      "epoch": 0.6322263222632226,
      "grad_norm": 0.4468566056534431,
      "learning_rate": 6.295427898547091e-05,
      "loss": 1.5671,
      "step": 3084
    },
    {
      "epoch": 0.6324313243132431,
      "grad_norm": 0.4608232611540185,
      "learning_rate": 6.289260745218447e-05,
      "loss": 1.6174,
      "step": 3085
    },
    {
      "epoch": 0.6326363263632636,
      "grad_norm": 0.4296584707129513,
      "learning_rate": 6.283095228157179e-05,
      "loss": 1.511,
      "step": 3086
    },
    {
      "epoch": 0.6328413284132841,
      "grad_norm": 0.37268380637691445,
      "learning_rate": 6.276931350082003e-05,
      "loss": 1.4874,
      "step": 3087
    },
    {
      "epoch": 0.6330463304633046,
      "grad_norm": 0.523482712003226,
      "learning_rate": 6.270769113710903e-05,
      "loss": 1.6072,
      "step": 3088
    },
    {
      "epoch": 0.6332513325133251,
      "grad_norm": 0.472150025473708,
      "learning_rate": 6.264608521761153e-05,
      "loss": 1.5999,
      "step": 3089
    },
    {
      "epoch": 0.6334563345633456,
      "grad_norm": 0.4744383769578823,
      "learning_rate": 6.258449576949292e-05,
      "loss": 1.52,
      "step": 3090
    },
    {
      "epoch": 0.6336613366133661,
      "grad_norm": 0.45943524836929384,
      "learning_rate": 6.252292281991133e-05,
      "loss": 1.5093,
      "step": 3091
    },
    {
      "epoch": 0.6338663386633866,
      "grad_norm": 0.463725695131211,
      "learning_rate": 6.246136639601764e-05,
      "loss": 1.5473,
      "step": 3092
    },
    {
      "epoch": 0.6340713407134071,
      "grad_norm": 0.46094504465199243,
      "learning_rate": 6.23998265249554e-05,
      "loss": 1.5483,
      "step": 3093
    },
    {
      "epoch": 0.6342763427634276,
      "grad_norm": 0.48481260909743107,
      "learning_rate": 6.233830323386091e-05,
      "loss": 1.5721,
      "step": 3094
    },
    {
      "epoch": 0.6344813448134481,
      "grad_norm": 0.43536682799752646,
      "learning_rate": 6.227679654986323e-05,
      "loss": 1.5682,
      "step": 3095
    },
    {
      "epoch": 0.6346863468634686,
      "grad_norm": 0.4807136300208779,
      "learning_rate": 6.221530650008391e-05,
      "loss": 1.5898,
      "step": 3096
    },
    {
      "epoch": 0.6348913489134891,
      "grad_norm": 0.45622830239550205,
      "learning_rate": 6.215383311163733e-05,
      "loss": 1.578,
      "step": 3097
    },
    {
      "epoch": 0.6350963509635096,
      "grad_norm": 0.4196356993103045,
      "learning_rate": 6.209237641163041e-05,
      "loss": 1.5267,
      "step": 3098
    },
    {
      "epoch": 0.6353013530135302,
      "grad_norm": 0.42762962872414567,
      "learning_rate": 6.203093642716278e-05,
      "loss": 1.4724,
      "step": 3099
    },
    {
      "epoch": 0.6355063550635507,
      "grad_norm": 0.48169889092815393,
      "learning_rate": 6.196951318532672e-05,
      "loss": 1.6326,
      "step": 3100
    },
    {
      "epoch": 0.6357113571135712,
      "grad_norm": 0.3938422042991795,
      "learning_rate": 6.190810671320704e-05,
      "loss": 1.5175,
      "step": 3101
    },
    {
      "epoch": 0.6359163591635917,
      "grad_norm": 0.40589681316749804,
      "learning_rate": 6.184671703788124e-05,
      "loss": 1.5179,
      "step": 3102
    },
    {
      "epoch": 0.6361213612136122,
      "grad_norm": 0.5312270393941052,
      "learning_rate": 6.178534418641932e-05,
      "loss": 1.5355,
      "step": 3103
    },
    {
      "epoch": 0.6363263632636327,
      "grad_norm": 0.40999673496993094,
      "learning_rate": 6.172398818588394e-05,
      "loss": 1.5355,
      "step": 3104
    },
    {
      "epoch": 0.6365313653136532,
      "grad_norm": 0.45408557733919874,
      "learning_rate": 6.166264906333038e-05,
      "loss": 1.5046,
      "step": 3105
    },
    {
      "epoch": 0.6367363673636737,
      "grad_norm": 0.40139532139574635,
      "learning_rate": 6.160132684580632e-05,
      "loss": 1.4976,
      "step": 3106
    },
    {
      "epoch": 0.6369413694136942,
      "grad_norm": 0.4391190505685638,
      "learning_rate": 6.154002156035212e-05,
      "loss": 1.5184,
      "step": 3107
    },
    {
      "epoch": 0.6371463714637147,
      "grad_norm": 0.42102805174258645,
      "learning_rate": 6.147873323400057e-05,
      "loss": 1.5964,
      "step": 3108
    },
    {
      "epoch": 0.6373513735137352,
      "grad_norm": 0.47359380008561136,
      "learning_rate": 6.14174618937771e-05,
      "loss": 1.5208,
      "step": 3109
    },
    {
      "epoch": 0.6375563755637557,
      "grad_norm": 0.4229507042072085,
      "learning_rate": 6.135620756669953e-05,
      "loss": 1.5073,
      "step": 3110
    },
    {
      "epoch": 0.6377613776137762,
      "grad_norm": 0.41421593544702223,
      "learning_rate": 6.129497027977829e-05,
      "loss": 1.5434,
      "step": 3111
    },
    {
      "epoch": 0.6379663796637967,
      "grad_norm": 0.4652491878822899,
      "learning_rate": 6.123375006001621e-05,
      "loss": 1.5935,
      "step": 3112
    },
    {
      "epoch": 0.6381713817138172,
      "grad_norm": 0.4805623615374848,
      "learning_rate": 6.117254693440864e-05,
      "loss": 1.6429,
      "step": 3113
    },
    {
      "epoch": 0.6383763837638377,
      "grad_norm": 0.44277759439079867,
      "learning_rate": 6.111136092994334e-05,
      "loss": 1.5765,
      "step": 3114
    },
    {
      "epoch": 0.6385813858138581,
      "grad_norm": 0.44702645540004915,
      "learning_rate": 6.105019207360056e-05,
      "loss": 1.5363,
      "step": 3115
    },
    {
      "epoch": 0.6387863878638786,
      "grad_norm": 0.4859048689662486,
      "learning_rate": 6.0989040392353045e-05,
      "loss": 1.5809,
      "step": 3116
    },
    {
      "epoch": 0.6389913899138991,
      "grad_norm": 0.45622043939948925,
      "learning_rate": 6.092790591316586e-05,
      "loss": 1.55,
      "step": 3117
    },
    {
      "epoch": 0.6391963919639196,
      "grad_norm": 0.45598645945246613,
      "learning_rate": 6.0866788662996566e-05,
      "loss": 1.5495,
      "step": 3118
    },
    {
      "epoch": 0.6394013940139401,
      "grad_norm": 0.48336684790519213,
      "learning_rate": 6.080568866879504e-05,
      "loss": 1.502,
      "step": 3119
    },
    {
      "epoch": 0.6396063960639606,
      "grad_norm": 0.4475737666285707,
      "learning_rate": 6.074460595750362e-05,
      "loss": 1.5202,
      "step": 3120
    },
    {
      "epoch": 0.6398113981139811,
      "grad_norm": 0.439372112039752,
      "learning_rate": 6.068354055605705e-05,
      "loss": 1.5591,
      "step": 3121
    },
    {
      "epoch": 0.6400164001640016,
      "grad_norm": 0.4896458467327331,
      "learning_rate": 6.0622492491382355e-05,
      "loss": 1.5066,
      "step": 3122
    },
    {
      "epoch": 0.6402214022140221,
      "grad_norm": 0.4758426880877212,
      "learning_rate": 6.056146179039899e-05,
      "loss": 1.5882,
      "step": 3123
    },
    {
      "epoch": 0.6404264042640426,
      "grad_norm": 0.42875447883323625,
      "learning_rate": 6.050044848001866e-05,
      "loss": 1.5841,
      "step": 3124
    },
    {
      "epoch": 0.6406314063140631,
      "grad_norm": 0.44377805630181605,
      "learning_rate": 6.043945258714553e-05,
      "loss": 1.5293,
      "step": 3125
    },
    {
      "epoch": 0.6408364083640836,
      "grad_norm": 0.4687476067771096,
      "learning_rate": 6.037847413867594e-05,
      "loss": 1.5161,
      "step": 3126
    },
    {
      "epoch": 0.6410414104141041,
      "grad_norm": 0.48434801119612425,
      "learning_rate": 6.03175131614987e-05,
      "loss": 1.5752,
      "step": 3127
    },
    {
      "epoch": 0.6412464124641246,
      "grad_norm": 0.44642566908429804,
      "learning_rate": 6.025656968249479e-05,
      "loss": 1.5015,
      "step": 3128
    },
    {
      "epoch": 0.6414514145141451,
      "grad_norm": 0.4948960693915527,
      "learning_rate": 6.01956437285375e-05,
      "loss": 1.5151,
      "step": 3129
    },
    {
      "epoch": 0.6416564165641656,
      "grad_norm": 0.44075415990186484,
      "learning_rate": 6.0134735326492456e-05,
      "loss": 1.4731,
      "step": 3130
    },
    {
      "epoch": 0.6418614186141861,
      "grad_norm": 0.43010664451961794,
      "learning_rate": 6.0073844503217416e-05,
      "loss": 1.4747,
      "step": 3131
    },
    {
      "epoch": 0.6420664206642066,
      "grad_norm": 0.4980942805847485,
      "learning_rate": 6.001297128556254e-05,
      "loss": 1.6157,
      "step": 3132
    },
    {
      "epoch": 0.6422714227142271,
      "grad_norm": 0.4839110453278517,
      "learning_rate": 5.995211570037013e-05,
      "loss": 1.5369,
      "step": 3133
    },
    {
      "epoch": 0.6424764247642476,
      "grad_norm": 0.48502879785065045,
      "learning_rate": 5.9891277774474706e-05,
      "loss": 1.5872,
      "step": 3134
    },
    {
      "epoch": 0.6426814268142681,
      "grad_norm": 0.46185359359460143,
      "learning_rate": 5.983045753470308e-05,
      "loss": 1.5725,
      "step": 3135
    },
    {
      "epoch": 0.6428864288642886,
      "grad_norm": 0.431272990971999,
      "learning_rate": 5.9769655007874135e-05,
      "loss": 1.5558,
      "step": 3136
    },
    {
      "epoch": 0.6430914309143091,
      "grad_norm": 0.45513265309122675,
      "learning_rate": 5.97088702207991e-05,
      "loss": 1.6232,
      "step": 3137
    },
    {
      "epoch": 0.6432964329643296,
      "grad_norm": 0.46927313281518557,
      "learning_rate": 5.964810320028129e-05,
      "loss": 1.5666,
      "step": 3138
    },
    {
      "epoch": 0.6435014350143502,
      "grad_norm": 0.4662299114881269,
      "learning_rate": 5.958735397311617e-05,
      "loss": 1.5095,
      "step": 3139
    },
    {
      "epoch": 0.6437064370643707,
      "grad_norm": 0.47693210481377235,
      "learning_rate": 5.9526622566091404e-05,
      "loss": 1.5459,
      "step": 3140
    },
    {
      "epoch": 0.6439114391143912,
      "grad_norm": 0.4575681722182784,
      "learning_rate": 5.946590900598676e-05,
      "loss": 1.5084,
      "step": 3141
    },
    {
      "epoch": 0.6441164411644117,
      "grad_norm": 0.42513599949011427,
      "learning_rate": 5.940521331957418e-05,
      "loss": 1.5618,
      "step": 3142
    },
    {
      "epoch": 0.6443214432144322,
      "grad_norm": 0.4498039731205022,
      "learning_rate": 5.934453553361774e-05,
      "loss": 1.552,
      "step": 3143
    },
    {
      "epoch": 0.6445264452644527,
      "grad_norm": 0.4746241405188141,
      "learning_rate": 5.928387567487352e-05,
      "loss": 1.5377,
      "step": 3144
    },
    {
      "epoch": 0.6447314473144732,
      "grad_norm": 0.46130790069385874,
      "learning_rate": 5.9223233770089805e-05,
      "loss": 1.5467,
      "step": 3145
    },
    {
      "epoch": 0.6449364493644937,
      "grad_norm": 0.4683276467710355,
      "learning_rate": 5.91626098460069e-05,
      "loss": 1.5173,
      "step": 3146
    },
    {
      "epoch": 0.6451414514145142,
      "grad_norm": 0.4583855578057574,
      "learning_rate": 5.9102003929357176e-05,
      "loss": 1.5856,
      "step": 3147
    },
    {
      "epoch": 0.6453464534645347,
      "grad_norm": 0.4573042182018581,
      "learning_rate": 5.904141604686515e-05,
      "loss": 1.4664,
      "step": 3148
    },
    {
      "epoch": 0.6455514555145552,
      "grad_norm": 0.417714253556272,
      "learning_rate": 5.8980846225247286e-05,
      "loss": 1.4652,
      "step": 3149
    },
    {
      "epoch": 0.6457564575645757,
      "grad_norm": 0.4687947482213487,
      "learning_rate": 5.8920294491212135e-05,
      "loss": 1.5532,
      "step": 3150
    },
    {
      "epoch": 0.6459614596145962,
      "grad_norm": 0.47899408800382387,
      "learning_rate": 5.885976087146023e-05,
      "loss": 1.6239,
      "step": 3151
    },
    {
      "epoch": 0.6461664616646167,
      "grad_norm": 0.47064112213968223,
      "learning_rate": 5.879924539268421e-05,
      "loss": 1.513,
      "step": 3152
    },
    {
      "epoch": 0.6463714637146372,
      "grad_norm": 0.4678637247192265,
      "learning_rate": 5.873874808156856e-05,
      "loss": 1.5423,
      "step": 3153
    },
    {
      "epoch": 0.6465764657646577,
      "grad_norm": 0.4546401482607076,
      "learning_rate": 5.8678268964789917e-05,
      "loss": 1.5566,
      "step": 3154
    },
    {
      "epoch": 0.6467814678146782,
      "grad_norm": 0.46931630190362994,
      "learning_rate": 5.861780806901682e-05,
      "loss": 1.5479,
      "step": 3155
    },
    {
      "epoch": 0.6469864698646987,
      "grad_norm": 0.4940517346671692,
      "learning_rate": 5.855736542090973e-05,
      "loss": 1.5689,
      "step": 3156
    },
    {
      "epoch": 0.6471914719147192,
      "grad_norm": 0.4275033835012508,
      "learning_rate": 5.8496941047121166e-05,
      "loss": 1.5947,
      "step": 3157
    },
    {
      "epoch": 0.6473964739647396,
      "grad_norm": 0.4676661954109531,
      "learning_rate": 5.843653497429546e-05,
      "loss": 1.5834,
      "step": 3158
    },
    {
      "epoch": 0.6476014760147601,
      "grad_norm": 0.4585392099430612,
      "learning_rate": 5.8376147229069e-05,
      "loss": 1.5086,
      "step": 3159
    },
    {
      "epoch": 0.6478064780647806,
      "grad_norm": 0.4370536235935077,
      "learning_rate": 5.831577783807005e-05,
      "loss": 1.6054,
      "step": 3160
    },
    {
      "epoch": 0.6480114801148011,
      "grad_norm": 0.4869207263266161,
      "learning_rate": 5.8255426827918736e-05,
      "loss": 1.6038,
      "step": 3161
    },
    {
      "epoch": 0.6482164821648216,
      "grad_norm": 0.44697840626255725,
      "learning_rate": 5.819509422522711e-05,
      "loss": 1.5771,
      "step": 3162
    },
    {
      "epoch": 0.6484214842148421,
      "grad_norm": 0.4372204434607352,
      "learning_rate": 5.813478005659905e-05,
      "loss": 1.5428,
      "step": 3163
    },
    {
      "epoch": 0.6486264862648626,
      "grad_norm": 0.44608074032163564,
      "learning_rate": 5.80744843486305e-05,
      "loss": 1.5268,
      "step": 3164
    },
    {
      "epoch": 0.6488314883148831,
      "grad_norm": 0.482804218009497,
      "learning_rate": 5.8014207127909046e-05,
      "loss": 1.5367,
      "step": 3165
    },
    {
      "epoch": 0.6490364903649036,
      "grad_norm": 0.44096838777295444,
      "learning_rate": 5.795394842101423e-05,
      "loss": 1.5316,
      "step": 3166
    },
    {
      "epoch": 0.6492414924149241,
      "grad_norm": 0.4031261525943413,
      "learning_rate": 5.789370825451737e-05,
      "loss": 1.474,
      "step": 3167
    },
    {
      "epoch": 0.6494464944649446,
      "grad_norm": 0.4685714831295918,
      "learning_rate": 5.7833486654981606e-05,
      "loss": 1.5835,
      "step": 3168
    },
    {
      "epoch": 0.6496514965149651,
      "grad_norm": 0.4502305900481912,
      "learning_rate": 5.7773283648961995e-05,
      "loss": 1.5674,
      "step": 3169
    },
    {
      "epoch": 0.6498564985649856,
      "grad_norm": 0.47007172799125435,
      "learning_rate": 5.771309926300534e-05,
      "loss": 1.5817,
      "step": 3170
    },
    {
      "epoch": 0.6500615006150061,
      "grad_norm": 0.4636312018459254,
      "learning_rate": 5.7652933523650197e-05,
      "loss": 1.5721,
      "step": 3171
    },
    {
      "epoch": 0.6502665026650266,
      "grad_norm": 0.46894903589726933,
      "learning_rate": 5.759278645742692e-05,
      "loss": 1.5265,
      "step": 3172
    },
    {
      "epoch": 0.6504715047150471,
      "grad_norm": 0.39333860872717535,
      "learning_rate": 5.753265809085757e-05,
      "loss": 1.5062,
      "step": 3173
    },
    {
      "epoch": 0.6506765067650676,
      "grad_norm": 0.4329914069624682,
      "learning_rate": 5.7472548450456086e-05,
      "loss": 1.5056,
      "step": 3174
    },
    {
      "epoch": 0.6508815088150881,
      "grad_norm": 0.4356872910419979,
      "learning_rate": 5.741245756272813e-05,
      "loss": 1.5391,
      "step": 3175
    },
    {
      "epoch": 0.6510865108651086,
      "grad_norm": 0.4015684419558019,
      "learning_rate": 5.735238545417101e-05,
      "loss": 1.519,
      "step": 3176
    },
    {
      "epoch": 0.6512915129151291,
      "grad_norm": 0.5035392397900486,
      "learning_rate": 5.729233215127378e-05,
      "loss": 1.6376,
      "step": 3177
    },
    {
      "epoch": 0.6514965149651496,
      "grad_norm": 0.45269166269271116,
      "learning_rate": 5.723229768051719e-05,
      "loss": 1.6163,
      "step": 3178
    },
    {
      "epoch": 0.6517015170151702,
      "grad_norm": 0.4479504049392582,
      "learning_rate": 5.717228206837375e-05,
      "loss": 1.4901,
      "step": 3179
    },
    {
      "epoch": 0.6519065190651907,
      "grad_norm": 0.47281687678925094,
      "learning_rate": 5.711228534130766e-05,
      "loss": 1.5553,
      "step": 3180
    },
    {
      "epoch": 0.6521115211152112,
      "grad_norm": 0.44998228518805594,
      "learning_rate": 5.7052307525774704e-05,
      "loss": 1.6461,
      "step": 3181
    },
    {
      "epoch": 0.6523165231652317,
      "grad_norm": 0.43728922449546276,
      "learning_rate": 5.699234864822239e-05,
      "loss": 1.5376,
      "step": 3182
    },
    {
      "epoch": 0.6525215252152522,
      "grad_norm": 0.4618517835386227,
      "learning_rate": 5.6932408735089804e-05,
      "loss": 1.5749,
      "step": 3183
    },
    {
      "epoch": 0.6527265272652727,
      "grad_norm": 0.47972418886107165,
      "learning_rate": 5.687248781280781e-05,
      "loss": 1.5501,
      "step": 3184
    },
    {
      "epoch": 0.6529315293152932,
      "grad_norm": 0.49097487992129674,
      "learning_rate": 5.681258590779872e-05,
      "loss": 1.5995,
      "step": 3185
    },
    {
      "epoch": 0.6531365313653137,
      "grad_norm": 0.4101905104609192,
      "learning_rate": 5.675270304647664e-05,
      "loss": 1.5156,
      "step": 3186
    },
    {
      "epoch": 0.6533415334153342,
      "grad_norm": 0.4578466938795278,
      "learning_rate": 5.669283925524715e-05,
      "loss": 1.5198,
      "step": 3187
    },
    {
      "epoch": 0.6535465354653547,
      "grad_norm": 0.4252736251730851,
      "learning_rate": 5.663299456050743e-05,
      "loss": 1.525,
      "step": 3188
    },
    {
      "epoch": 0.6537515375153752,
      "grad_norm": 0.4693685063599757,
      "learning_rate": 5.657316898864634e-05,
      "loss": 1.543,
      "step": 3189
    },
    {
      "epoch": 0.6539565395653957,
      "grad_norm": 0.4534799718928922,
      "learning_rate": 5.6513362566044156e-05,
      "loss": 1.5692,
      "step": 3190
    },
    {
      "epoch": 0.6541615416154162,
      "grad_norm": 0.5014795228701405,
      "learning_rate": 5.645357531907288e-05,
      "loss": 1.5632,
      "step": 3191
    },
    {
      "epoch": 0.6543665436654367,
      "grad_norm": 0.47029548478402916,
      "learning_rate": 5.639380727409593e-05,
      "loss": 1.5434,
      "step": 3192
    },
    {
      "epoch": 0.6545715457154572,
      "grad_norm": 0.4814289997456462,
      "learning_rate": 5.633405845746826e-05,
      "loss": 1.5905,
      "step": 3193
    },
    {
      "epoch": 0.6547765477654777,
      "grad_norm": 0.44070980057757314,
      "learning_rate": 5.6274328895536453e-05,
      "loss": 1.4297,
      "step": 3194
    },
    {
      "epoch": 0.6549815498154982,
      "grad_norm": 0.45404925459138346,
      "learning_rate": 5.621461861463846e-05,
      "loss": 1.5642,
      "step": 3195
    },
    {
      "epoch": 0.6551865518655187,
      "grad_norm": 0.4681611431930925,
      "learning_rate": 5.615492764110388e-05,
      "loss": 1.5062,
      "step": 3196
    },
    {
      "epoch": 0.6553915539155392,
      "grad_norm": 0.4803046238257875,
      "learning_rate": 5.6095256001253674e-05,
      "loss": 1.5625,
      "step": 3197
    },
    {
      "epoch": 0.6555965559655597,
      "grad_norm": 0.43861031958124674,
      "learning_rate": 5.6035603721400286e-05,
      "loss": 1.5785,
      "step": 3198
    },
    {
      "epoch": 0.6558015580155802,
      "grad_norm": 0.4450815930691942,
      "learning_rate": 5.597597082784776e-05,
      "loss": 1.5822,
      "step": 3199
    },
    {
      "epoch": 0.6560065600656007,
      "grad_norm": 0.42804738567236916,
      "learning_rate": 5.59163573468914e-05,
      "loss": 1.576,
      "step": 3200
    },
    {
      "epoch": 0.6562115621156211,
      "grad_norm": 0.4095625581118568,
      "learning_rate": 5.585676330481806e-05,
      "loss": 1.5105,
      "step": 3201
    },
    {
      "epoch": 0.6564165641656416,
      "grad_norm": 0.4052058646869245,
      "learning_rate": 5.5797188727906066e-05,
      "loss": 1.5147,
      "step": 3202
    },
    {
      "epoch": 0.6566215662156621,
      "grad_norm": 0.44528867818643275,
      "learning_rate": 5.5737633642425e-05,
      "loss": 1.5739,
      "step": 3203
    },
    {
      "epoch": 0.6568265682656826,
      "grad_norm": 0.45313991909875634,
      "learning_rate": 5.567809807463606e-05,
      "loss": 1.583,
      "step": 3204
    },
    {
      "epoch": 0.6570315703157031,
      "grad_norm": 0.4707462858715875,
      "learning_rate": 5.561858205079165e-05,
      "loss": 1.5937,
      "step": 3205
    },
    {
      "epoch": 0.6572365723657236,
      "grad_norm": 0.47426278351042955,
      "learning_rate": 5.555908559713561e-05,
      "loss": 1.6182,
      "step": 3206
    },
    {
      "epoch": 0.6574415744157441,
      "grad_norm": 0.4475531963539952,
      "learning_rate": 5.549960873990325e-05,
      "loss": 1.5851,
      "step": 3207
    },
    {
      "epoch": 0.6576465764657646,
      "grad_norm": 0.40005703959252115,
      "learning_rate": 5.544015150532109e-05,
      "loss": 1.5213,
      "step": 3208
    },
    {
      "epoch": 0.6578515785157851,
      "grad_norm": 0.43718927586236406,
      "learning_rate": 5.538071391960715e-05,
      "loss": 1.5161,
      "step": 3209
    },
    {
      "epoch": 0.6580565805658056,
      "grad_norm": 0.4504802482071545,
      "learning_rate": 5.5321296008970646e-05,
      "loss": 1.5618,
      "step": 3210
    },
    {
      "epoch": 0.6582615826158261,
      "grad_norm": 0.43689191329818405,
      "learning_rate": 5.526189779961215e-05,
      "loss": 1.5378,
      "step": 3211
    },
    {
      "epoch": 0.6584665846658466,
      "grad_norm": 0.4447015078021641,
      "learning_rate": 5.520251931772364e-05,
      "loss": 1.5398,
      "step": 3212
    },
    {
      "epoch": 0.6586715867158671,
      "grad_norm": 0.4398048376276036,
      "learning_rate": 5.514316058948827e-05,
      "loss": 1.5153,
      "step": 3213
    },
    {
      "epoch": 0.6588765887658876,
      "grad_norm": 0.4308897243836838,
      "learning_rate": 5.508382164108059e-05,
      "loss": 1.5565,
      "step": 3214
    },
    {
      "epoch": 0.6590815908159081,
      "grad_norm": 0.4270836856207494,
      "learning_rate": 5.5024502498666375e-05,
      "loss": 1.5176,
      "step": 3215
    },
    {
      "epoch": 0.6592865928659286,
      "grad_norm": 0.45852784704097116,
      "learning_rate": 5.496520318840266e-05,
      "loss": 1.594,
      "step": 3216
    },
    {
      "epoch": 0.6594915949159491,
      "grad_norm": 0.48635835007725897,
      "learning_rate": 5.490592373643768e-05,
      "loss": 1.5736,
      "step": 3217
    },
    {
      "epoch": 0.6596965969659696,
      "grad_norm": 0.4684679165980292,
      "learning_rate": 5.484666416891109e-05,
      "loss": 1.5488,
      "step": 3218
    },
    {
      "epoch": 0.6599015990159902,
      "grad_norm": 0.4344646996812627,
      "learning_rate": 5.478742451195358e-05,
      "loss": 1.5833,
      "step": 3219
    },
    {
      "epoch": 0.6601066010660107,
      "grad_norm": 0.44405561678189354,
      "learning_rate": 5.472820479168721e-05,
      "loss": 1.484,
      "step": 3220
    },
    {
      "epoch": 0.6603116031160312,
      "grad_norm": 0.4440539122831839,
      "learning_rate": 5.466900503422516e-05,
      "loss": 1.4991,
      "step": 3221
    },
    {
      "epoch": 0.6605166051660517,
      "grad_norm": 0.44579948169081185,
      "learning_rate": 5.46098252656718e-05,
      "loss": 1.5371,
      "step": 3222
    },
    {
      "epoch": 0.6607216072160722,
      "grad_norm": 0.4639735878052296,
      "learning_rate": 5.455066551212278e-05,
      "loss": 1.5072,
      "step": 3223
    },
    {
      "epoch": 0.6609266092660927,
      "grad_norm": 0.4371141112010233,
      "learning_rate": 5.44915257996648e-05,
      "loss": 1.5633,
      "step": 3224
    },
    {
      "epoch": 0.6611316113161132,
      "grad_norm": 0.4671515981857899,
      "learning_rate": 5.443240615437586e-05,
      "loss": 1.6421,
      "step": 3225
    },
    {
      "epoch": 0.6613366133661337,
      "grad_norm": 0.45830368772484725,
      "learning_rate": 5.437330660232498e-05,
      "loss": 1.493,
      "step": 3226
    },
    {
      "epoch": 0.6615416154161542,
      "grad_norm": 0.423427476354327,
      "learning_rate": 5.431422716957236e-05,
      "loss": 1.4763,
      "step": 3227
    },
    {
      "epoch": 0.6617466174661747,
      "grad_norm": 0.43795866049945825,
      "learning_rate": 5.4255167882169424e-05,
      "loss": 1.5489,
      "step": 3228
    },
    {
      "epoch": 0.6619516195161952,
      "grad_norm": 0.45181197556742075,
      "learning_rate": 5.419612876615854e-05,
      "loss": 1.5402,
      "step": 3229
    },
    {
      "epoch": 0.6621566215662157,
      "grad_norm": 0.4701909936533608,
      "learning_rate": 5.413710984757335e-05,
      "loss": 1.5774,
      "step": 3230
    },
    {
      "epoch": 0.6623616236162362,
      "grad_norm": 0.44082413536912224,
      "learning_rate": 5.407811115243849e-05,
      "loss": 1.5174,
      "step": 3231
    },
    {
      "epoch": 0.6625666256662567,
      "grad_norm": 0.5038142283139778,
      "learning_rate": 5.4019132706769706e-05,
      "loss": 1.5697,
      "step": 3232
    },
    {
      "epoch": 0.6627716277162772,
      "grad_norm": 0.4519124230988949,
      "learning_rate": 5.396017453657376e-05,
      "loss": 1.6061,
      "step": 3233
    },
    {
      "epoch": 0.6629766297662977,
      "grad_norm": 0.4543715581578266,
      "learning_rate": 5.3901236667848586e-05,
      "loss": 1.4984,
      "step": 3234
    },
    {
      "epoch": 0.6631816318163182,
      "grad_norm": 0.5382084445769868,
      "learning_rate": 5.384231912658311e-05,
      "loss": 1.6021,
      "step": 3235
    },
    {
      "epoch": 0.6633866338663387,
      "grad_norm": 0.4543702242725902,
      "learning_rate": 5.3783421938757286e-05,
      "loss": 1.5822,
      "step": 3236
    },
    {
      "epoch": 0.6635916359163592,
      "grad_norm": 0.43404116282001404,
      "learning_rate": 5.3724545130342074e-05,
      "loss": 1.5109,
      "step": 3237
    },
    {
      "epoch": 0.6637966379663797,
      "grad_norm": 0.49184910316176467,
      "learning_rate": 5.3665688727299444e-05,
      "loss": 1.4924,
      "step": 3238
    },
    {
      "epoch": 0.6640016400164002,
      "grad_norm": 0.45457346271665605,
      "learning_rate": 5.360685275558244e-05,
      "loss": 1.5243,
      "step": 3239
    },
    {
      "epoch": 0.6642066420664207,
      "grad_norm": 0.45648174844956557,
      "learning_rate": 5.3548037241135065e-05,
      "loss": 1.5364,
      "step": 3240
    },
    {
      "epoch": 0.6644116441164412,
      "grad_norm": 0.4304824196228037,
      "learning_rate": 5.348924220989227e-05,
      "loss": 1.5507,
      "step": 3241
    },
    {
      "epoch": 0.6646166461664617,
      "grad_norm": 0.4132082078195509,
      "learning_rate": 5.3430467687779985e-05,
      "loss": 1.518,
      "step": 3242
    },
    {
      "epoch": 0.6648216482164822,
      "grad_norm": 0.44485408074420546,
      "learning_rate": 5.337171370071508e-05,
      "loss": 1.5063,
      "step": 3243
    },
    {
      "epoch": 0.6650266502665027,
      "grad_norm": 0.4610816590170921,
      "learning_rate": 5.331298027460539e-05,
      "loss": 1.49,
      "step": 3244
    },
    {
      "epoch": 0.6652316523165231,
      "grad_norm": 0.45830666473374365,
      "learning_rate": 5.325426743534978e-05,
      "loss": 1.6203,
      "step": 3245
    },
    {
      "epoch": 0.6654366543665436,
      "grad_norm": 0.4364399927710269,
      "learning_rate": 5.3195575208837865e-05,
      "loss": 1.5124,
      "step": 3246
    },
    {
      "epoch": 0.6656416564165641,
      "grad_norm": 0.44074298374785986,
      "learning_rate": 5.3136903620950276e-05,
      "loss": 1.5157,
      "step": 3247
    },
    {
      "epoch": 0.6658466584665846,
      "grad_norm": 0.48338441506509955,
      "learning_rate": 5.3078252697558464e-05,
      "loss": 1.5949,
      "step": 3248
    },
    {
      "epoch": 0.6660516605166051,
      "grad_norm": 0.42034387716725086,
      "learning_rate": 5.301962246452485e-05,
      "loss": 1.5054,
      "step": 3249
    },
    {
      "epoch": 0.6662566625666256,
      "grad_norm": 0.45845655682944,
      "learning_rate": 5.296101294770276e-05,
      "loss": 1.4709,
      "step": 3250
    },
    {
      "epoch": 0.6664616646166461,
      "grad_norm": 0.48613464354798214,
      "learning_rate": 5.290242417293628e-05,
      "loss": 1.5544,
      "step": 3251
    },
    {
      "epoch": 0.6666666666666666,
      "grad_norm": 0.46059748913873183,
      "learning_rate": 5.28438561660604e-05,
      "loss": 1.5271,
      "step": 3252
    },
    {
      "epoch": 0.6668716687166871,
      "grad_norm": 0.4780978020855852,
      "learning_rate": 5.278530895290091e-05,
      "loss": 1.5691,
      "step": 3253
    },
    {
      "epoch": 0.6670766707667076,
      "grad_norm": 0.4893768677333458,
      "learning_rate": 5.27267825592745e-05,
      "loss": 1.5178,
      "step": 3254
    },
    {
      "epoch": 0.6672816728167281,
      "grad_norm": 0.46629887929368835,
      "learning_rate": 5.266827701098871e-05,
      "loss": 1.5838,
      "step": 3255
    },
    {
      "epoch": 0.6674866748667486,
      "grad_norm": 0.4871967092805627,
      "learning_rate": 5.260979233384178e-05,
      "loss": 1.5383,
      "step": 3256
    },
    {
      "epoch": 0.6676916769167691,
      "grad_norm": 0.41135390415159157,
      "learning_rate": 5.255132855362277e-05,
      "loss": 1.5044,
      "step": 3257
    },
    {
      "epoch": 0.6678966789667896,
      "grad_norm": 0.40528359300517297,
      "learning_rate": 5.249288569611155e-05,
      "loss": 1.4979,
      "step": 3258
    },
    {
      "epoch": 0.6681016810168102,
      "grad_norm": 0.4689360016431024,
      "learning_rate": 5.2434463787078816e-05,
      "loss": 1.5467,
      "step": 3259
    },
    {
      "epoch": 0.6683066830668307,
      "grad_norm": 0.46261930828341913,
      "learning_rate": 5.237606285228591e-05,
      "loss": 1.5157,
      "step": 3260
    },
    {
      "epoch": 0.6685116851168512,
      "grad_norm": 0.4159058853386433,
      "learning_rate": 5.2317682917485055e-05,
      "loss": 1.544,
      "step": 3261
    },
    {
      "epoch": 0.6687166871668717,
      "grad_norm": 0.4564289006751174,
      "learning_rate": 5.2259324008419116e-05,
      "loss": 1.5863,
      "step": 3262
    },
    {
      "epoch": 0.6689216892168922,
      "grad_norm": 0.389663625442729,
      "learning_rate": 5.2200986150821696e-05,
      "loss": 1.5405,
      "step": 3263
    },
    {
      "epoch": 0.6691266912669127,
      "grad_norm": 0.4177727704252784,
      "learning_rate": 5.2142669370417205e-05,
      "loss": 1.5592,
      "step": 3264
    },
    {
      "epoch": 0.6693316933169332,
      "grad_norm": 0.41924533000753,
      "learning_rate": 5.208437369292061e-05,
      "loss": 1.5364,
      "step": 3265
    },
    {
      "epoch": 0.6695366953669537,
      "grad_norm": 0.41375648844573704,
      "learning_rate": 5.202609914403773e-05,
      "loss": 1.5578,
      "step": 3266
    },
    {
      "epoch": 0.6697416974169742,
      "grad_norm": 0.4499001075657649,
      "learning_rate": 5.196784574946496e-05,
      "loss": 1.5027,
      "step": 3267
    },
    {
      "epoch": 0.6699466994669947,
      "grad_norm": 0.44061258187447844,
      "learning_rate": 5.190961353488941e-05,
      "loss": 1.4833,
      "step": 3268
    },
    {
      "epoch": 0.6701517015170152,
      "grad_norm": 0.4564597456267822,
      "learning_rate": 5.18514025259888e-05,
      "loss": 1.5635,
      "step": 3269
    },
    {
      "epoch": 0.6703567035670357,
      "grad_norm": 0.4876368927800723,
      "learning_rate": 5.179321274843156e-05,
      "loss": 1.6083,
      "step": 3270
    },
    {
      "epoch": 0.6705617056170562,
      "grad_norm": 0.41844214114797085,
      "learning_rate": 5.173504422787679e-05,
      "loss": 1.5222,
      "step": 3271
    },
    {
      "epoch": 0.6707667076670767,
      "grad_norm": 0.39595399126139963,
      "learning_rate": 5.167689698997413e-05,
      "loss": 1.4209,
      "step": 3272
    },
    {
      "epoch": 0.6709717097170972,
      "grad_norm": 0.4353923004952022,
      "learning_rate": 5.161877106036386e-05,
      "loss": 1.5611,
      "step": 3273
    },
    {
      "epoch": 0.6711767117671177,
      "grad_norm": 0.48766291706185577,
      "learning_rate": 5.156066646467683e-05,
      "loss": 1.5934,
      "step": 3274
    },
    {
      "epoch": 0.6713817138171382,
      "grad_norm": 0.41107440161040765,
      "learning_rate": 5.150258322853461e-05,
      "loss": 1.5048,
      "step": 3275
    },
    {
      "epoch": 0.6715867158671587,
      "grad_norm": 0.4089277441236762,
      "learning_rate": 5.1444521377549204e-05,
      "loss": 1.4841,
      "step": 3276
    },
    {
      "epoch": 0.6717917179171792,
      "grad_norm": 0.4200621045097158,
      "learning_rate": 5.13864809373233e-05,
      "loss": 1.5143,
      "step": 3277
    },
    {
      "epoch": 0.6719967199671997,
      "grad_norm": 0.4524101612533242,
      "learning_rate": 5.132846193345007e-05,
      "loss": 1.5806,
      "step": 3278
    },
    {
      "epoch": 0.6722017220172202,
      "grad_norm": 0.4868826051583284,
      "learning_rate": 5.1270464391513215e-05,
      "loss": 1.5266,
      "step": 3279
    },
    {
      "epoch": 0.6724067240672407,
      "grad_norm": 0.4352576675761379,
      "learning_rate": 5.1212488337087114e-05,
      "loss": 1.5244,
      "step": 3280
    },
    {
      "epoch": 0.6726117261172612,
      "grad_norm": 0.47956965952267,
      "learning_rate": 5.115453379573647e-05,
      "loss": 1.5969,
      "step": 3281
    },
    {
      "epoch": 0.6728167281672817,
      "grad_norm": 0.4109248912388438,
      "learning_rate": 5.109660079301668e-05,
      "loss": 1.5469,
      "step": 3282
    },
    {
      "epoch": 0.6730217302173022,
      "grad_norm": 0.42311964836028715,
      "learning_rate": 5.103868935447354e-05,
      "loss": 1.553,
      "step": 3283
    },
    {
      "epoch": 0.6732267322673227,
      "grad_norm": 0.4251106940803215,
      "learning_rate": 5.098079950564332e-05,
      "loss": 1.5754,
      "step": 3284
    },
    {
      "epoch": 0.6734317343173432,
      "grad_norm": 0.4142761889856098,
      "learning_rate": 5.092293127205288e-05,
      "loss": 1.5349,
      "step": 3285
    },
    {
      "epoch": 0.6736367363673637,
      "grad_norm": 0.4786030878073973,
      "learning_rate": 5.086508467921942e-05,
      "loss": 1.6177,
      "step": 3286
    },
    {
      "epoch": 0.6738417384173842,
      "grad_norm": 0.4448873820802366,
      "learning_rate": 5.080725975265073e-05,
      "loss": 1.564,
      "step": 3287
    },
    {
      "epoch": 0.6740467404674046,
      "grad_norm": 0.42762099261673503,
      "learning_rate": 5.074945651784491e-05,
      "loss": 1.4838,
      "step": 3288
    },
    {
      "epoch": 0.6742517425174251,
      "grad_norm": 0.43942819574509145,
      "learning_rate": 5.069167500029056e-05,
      "loss": 1.4998,
      "step": 3289
    },
    {
      "epoch": 0.6744567445674456,
      "grad_norm": 0.4593525714765017,
      "learning_rate": 5.0633915225466765e-05,
      "loss": 1.5516,
      "step": 3290
    },
    {
      "epoch": 0.6746617466174661,
      "grad_norm": 0.46519959158845237,
      "learning_rate": 5.0576177218842925e-05,
      "loss": 1.5615,
      "step": 3291
    },
    {
      "epoch": 0.6748667486674866,
      "grad_norm": 0.44795557772312,
      "learning_rate": 5.051846100587882e-05,
      "loss": 1.5078,
      "step": 3292
    },
    {
      "epoch": 0.6750717507175071,
      "grad_norm": 0.4374852977059392,
      "learning_rate": 5.046076661202479e-05,
      "loss": 1.5441,
      "step": 3293
    },
    {
      "epoch": 0.6752767527675276,
      "grad_norm": 0.4518116152808659,
      "learning_rate": 5.040309406272135e-05,
      "loss": 1.5561,
      "step": 3294
    },
    {
      "epoch": 0.6754817548175481,
      "grad_norm": 0.45109543035592686,
      "learning_rate": 5.034544338339953e-05,
      "loss": 1.4883,
      "step": 3295
    },
    {
      "epoch": 0.6756867568675686,
      "grad_norm": 0.4595021682880069,
      "learning_rate": 5.028781459948062e-05,
      "loss": 1.5424,
      "step": 3296
    },
    {
      "epoch": 0.6758917589175891,
      "grad_norm": 0.4580091531377571,
      "learning_rate": 5.02302077363763e-05,
      "loss": 1.5855,
      "step": 3297
    },
    {
      "epoch": 0.6760967609676096,
      "grad_norm": 0.5037125658735325,
      "learning_rate": 5.01726228194886e-05,
      "loss": 1.6201,
      "step": 3298
    },
    {
      "epoch": 0.6763017630176302,
      "grad_norm": 0.4596821987836919,
      "learning_rate": 5.011505987420982e-05,
      "loss": 1.5818,
      "step": 3299
    },
    {
      "epoch": 0.6765067650676507,
      "grad_norm": 0.43253371192535567,
      "learning_rate": 5.005751892592265e-05,
      "loss": 1.5097,
      "step": 3300
    },
    {
      "epoch": 0.6767117671176712,
      "grad_norm": 0.4520097086298899,
      "learning_rate": 5.000000000000002e-05,
      "loss": 1.5738,
      "step": 3301
    },
    {
      "epoch": 0.6769167691676917,
      "grad_norm": 0.45520370211679917,
      "learning_rate": 4.9942503121805106e-05,
      "loss": 1.5482,
      "step": 3302
    },
    {
      "epoch": 0.6771217712177122,
      "grad_norm": 0.4190783952091041,
      "learning_rate": 4.9885028316691495e-05,
      "loss": 1.5249,
      "step": 3303
    },
    {
      "epoch": 0.6773267732677327,
      "grad_norm": 0.4167142270934749,
      "learning_rate": 4.9827575610002895e-05,
      "loss": 1.5026,
      "step": 3304
    },
    {
      "epoch": 0.6775317753177532,
      "grad_norm": 0.4794154988242696,
      "learning_rate": 4.977014502707341e-05,
      "loss": 1.478,
      "step": 3305
    },
    {
      "epoch": 0.6777367773677737,
      "grad_norm": 0.5122653574706574,
      "learning_rate": 4.9712736593227285e-05,
      "loss": 1.5957,
      "step": 3306
    },
    {
      "epoch": 0.6779417794177942,
      "grad_norm": 0.43751156107248174,
      "learning_rate": 4.9655350333779014e-05,
      "loss": 1.5047,
      "step": 3307
    },
    {
      "epoch": 0.6781467814678147,
      "grad_norm": 0.47794919233487926,
      "learning_rate": 4.9597986274033316e-05,
      "loss": 1.5505,
      "step": 3308
    },
    {
      "epoch": 0.6783517835178352,
      "grad_norm": 0.44200923561022265,
      "learning_rate": 4.9540644439285156e-05,
      "loss": 1.4832,
      "step": 3309
    },
    {
      "epoch": 0.6785567855678557,
      "grad_norm": 0.49638739841323437,
      "learning_rate": 4.9483324854819714e-05,
      "loss": 1.5028,
      "step": 3310
    },
    {
      "epoch": 0.6787617876178762,
      "grad_norm": 0.42288256790014744,
      "learning_rate": 4.942602754591229e-05,
      "loss": 1.4888,
      "step": 3311
    },
    {
      "epoch": 0.6789667896678967,
      "grad_norm": 0.5217752470278747,
      "learning_rate": 4.93687525378284e-05,
      "loss": 1.5406,
      "step": 3312
    },
    {
      "epoch": 0.6791717917179172,
      "grad_norm": 0.44987998197624973,
      "learning_rate": 4.931149985582367e-05,
      "loss": 1.5479,
      "step": 3313
    },
    {
      "epoch": 0.6793767937679377,
      "grad_norm": 0.45820887889094164,
      "learning_rate": 4.9254269525143984e-05,
      "loss": 1.5327,
      "step": 3314
    },
    {
      "epoch": 0.6795817958179582,
      "grad_norm": 0.4531900413901263,
      "learning_rate": 4.919706157102533e-05,
      "loss": 1.5592,
      "step": 3315
    },
    {
      "epoch": 0.6797867978679787,
      "grad_norm": 0.48091473278906766,
      "learning_rate": 4.9139876018693795e-05,
      "loss": 1.6223,
      "step": 3316
    },
    {
      "epoch": 0.6799917999179992,
      "grad_norm": 0.4563396264811171,
      "learning_rate": 4.908271289336561e-05,
      "loss": 1.5207,
      "step": 3317
    },
    {
      "epoch": 0.6801968019680197,
      "grad_norm": 0.4540193442254131,
      "learning_rate": 4.9025572220247076e-05,
      "loss": 1.53,
      "step": 3318
    },
    {
      "epoch": 0.6804018040180402,
      "grad_norm": 0.4633787206835466,
      "learning_rate": 4.896845402453466e-05,
      "loss": 1.5992,
      "step": 3319
    },
    {
      "epoch": 0.6806068060680607,
      "grad_norm": 0.4472115524336386,
      "learning_rate": 4.891135833141495e-05,
      "loss": 1.5669,
      "step": 3320
    },
    {
      "epoch": 0.6808118081180812,
      "grad_norm": 0.38442693827120805,
      "learning_rate": 4.8854285166064485e-05,
      "loss": 1.5442,
      "step": 3321
    },
    {
      "epoch": 0.6810168101681017,
      "grad_norm": 0.4214897296888067,
      "learning_rate": 4.879723455364996e-05,
      "loss": 1.5529,
      "step": 3322
    },
    {
      "epoch": 0.6812218122181222,
      "grad_norm": 0.45359008292760117,
      "learning_rate": 4.8740206519328105e-05,
      "loss": 1.4759,
      "step": 3323
    },
    {
      "epoch": 0.6814268142681427,
      "grad_norm": 0.46672478571553133,
      "learning_rate": 4.868320108824563e-05,
      "loss": 1.5533,
      "step": 3324
    },
    {
      "epoch": 0.6816318163181632,
      "grad_norm": 0.4712752016035746,
      "learning_rate": 4.86262182855394e-05,
      "loss": 1.5446,
      "step": 3325
    },
    {
      "epoch": 0.6818368183681837,
      "grad_norm": 0.47240359661998654,
      "learning_rate": 4.856925813633627e-05,
      "loss": 1.6226,
      "step": 3326
    },
    {
      "epoch": 0.6820418204182042,
      "grad_norm": 0.46320753602402087,
      "learning_rate": 4.8512320665753044e-05,
      "loss": 1.5242,
      "step": 3327
    },
    {
      "epoch": 0.6822468224682247,
      "grad_norm": 0.4199712064906492,
      "learning_rate": 4.8455405898896555e-05,
      "loss": 1.5466,
      "step": 3328
    },
    {
      "epoch": 0.6824518245182452,
      "grad_norm": 0.4323237118538542,
      "learning_rate": 4.839851386086358e-05,
      "loss": 1.5201,
      "step": 3329
    },
    {
      "epoch": 0.6826568265682657,
      "grad_norm": 0.481619147844327,
      "learning_rate": 4.8341644576740985e-05,
      "loss": 1.5663,
      "step": 3330
    },
    {
      "epoch": 0.6828618286182861,
      "grad_norm": 0.45568849997630817,
      "learning_rate": 4.828479807160557e-05,
      "loss": 1.5195,
      "step": 3331
    },
    {
      "epoch": 0.6830668306683066,
      "grad_norm": 0.85738628176556,
      "learning_rate": 4.8227974370524e-05,
      "loss": 1.5824,
      "step": 3332
    },
    {
      "epoch": 0.6832718327183271,
      "grad_norm": 0.4437049961280124,
      "learning_rate": 4.817117349855297e-05,
      "loss": 1.534,
      "step": 3333
    },
    {
      "epoch": 0.6834768347683476,
      "grad_norm": 0.42864014184004096,
      "learning_rate": 4.8114395480739025e-05,
      "loss": 1.4808,
      "step": 3334
    },
    {
      "epoch": 0.6836818368183681,
      "grad_norm": 0.477092822492682,
      "learning_rate": 4.805764034211876e-05,
      "loss": 1.5487,
      "step": 3335
    },
    {
      "epoch": 0.6838868388683886,
      "grad_norm": 0.48019371943636857,
      "learning_rate": 4.800090810771862e-05,
      "loss": 1.5015,
      "step": 3336
    },
    {
      "epoch": 0.6840918409184091,
      "grad_norm": 0.49980573444650966,
      "learning_rate": 4.794419880255492e-05,
      "loss": 1.5497,
      "step": 3337
    },
    {
      "epoch": 0.6842968429684296,
      "grad_norm": 0.4334981434938236,
      "learning_rate": 4.788751245163387e-05,
      "loss": 1.5272,
      "step": 3338
    },
    {
      "epoch": 0.6845018450184502,
      "grad_norm": 0.46447989784280636,
      "learning_rate": 4.783084907995156e-05,
      "loss": 1.4925,
      "step": 3339
    },
    {
      "epoch": 0.6847068470684707,
      "grad_norm": 0.41757449203661123,
      "learning_rate": 4.7774208712493984e-05,
      "loss": 1.4792,
      "step": 3340
    },
    {
      "epoch": 0.6849118491184912,
      "grad_norm": 0.5092435407892048,
      "learning_rate": 4.7717591374237005e-05,
      "loss": 1.546,
      "step": 3341
    },
    {
      "epoch": 0.6851168511685117,
      "grad_norm": 0.4427989129199794,
      "learning_rate": 4.7660997090146276e-05,
      "loss": 1.5872,
      "step": 3342
    },
    {
      "epoch": 0.6853218532185322,
      "grad_norm": 0.47937769179808537,
      "learning_rate": 4.760442588517728e-05,
      "loss": 1.5642,
      "step": 3343
    },
    {
      "epoch": 0.6855268552685527,
      "grad_norm": 0.41708844290352937,
      "learning_rate": 4.754787778427533e-05,
      "loss": 1.5376,
      "step": 3344
    },
    {
      "epoch": 0.6857318573185732,
      "grad_norm": 0.4586409613086554,
      "learning_rate": 4.74913528123756e-05,
      "loss": 1.5336,
      "step": 3345
    },
    {
      "epoch": 0.6859368593685937,
      "grad_norm": 0.45281816391435126,
      "learning_rate": 4.7434850994403065e-05,
      "loss": 1.558,
      "step": 3346
    },
    {
      "epoch": 0.6861418614186142,
      "grad_norm": 0.4163603490325916,
      "learning_rate": 4.7378372355272435e-05,
      "loss": 1.5023,
      "step": 3347
    },
    {
      "epoch": 0.6863468634686347,
      "grad_norm": 0.4062199185217212,
      "learning_rate": 4.732191691988822e-05,
      "loss": 1.4881,
      "step": 3348
    },
    {
      "epoch": 0.6865518655186552,
      "grad_norm": 0.4241142312480205,
      "learning_rate": 4.7265484713144644e-05,
      "loss": 1.5349,
      "step": 3349
    },
    {
      "epoch": 0.6867568675686757,
      "grad_norm": 0.46004240101197413,
      "learning_rate": 4.720907575992585e-05,
      "loss": 1.5553,
      "step": 3350
    },
    {
      "epoch": 0.6869618696186962,
      "grad_norm": 0.45672962079221424,
      "learning_rate": 4.715269008510552e-05,
      "loss": 1.4798,
      "step": 3351
    },
    {
      "epoch": 0.6871668716687167,
      "grad_norm": 0.42500130650678186,
      "learning_rate": 4.7096327713547276e-05,
      "loss": 1.508,
      "step": 3352
    },
    {
      "epoch": 0.6873718737187372,
      "grad_norm": 0.4330170385822167,
      "learning_rate": 4.703998867010431e-05,
      "loss": 1.5096,
      "step": 3353
    },
    {
      "epoch": 0.6875768757687577,
      "grad_norm": 0.4568956383472607,
      "learning_rate": 4.698367297961954e-05,
      "loss": 1.4984,
      "step": 3354
    },
    {
      "epoch": 0.6877818778187782,
      "grad_norm": 0.42969367527050895,
      "learning_rate": 4.69273806669257e-05,
      "loss": 1.5486,
      "step": 3355
    },
    {
      "epoch": 0.6879868798687987,
      "grad_norm": 0.4492036849163193,
      "learning_rate": 4.687111175684509e-05,
      "loss": 1.5559,
      "step": 3356
    },
    {
      "epoch": 0.6881918819188192,
      "grad_norm": 0.45026674633707314,
      "learning_rate": 4.681486627418978e-05,
      "loss": 1.4977,
      "step": 3357
    },
    {
      "epoch": 0.6883968839688397,
      "grad_norm": 0.446967550670305,
      "learning_rate": 4.675864424376146e-05,
      "loss": 1.51,
      "step": 3358
    },
    {
      "epoch": 0.6886018860188602,
      "grad_norm": 0.43557138512954835,
      "learning_rate": 4.670244569035145e-05,
      "loss": 1.5446,
      "step": 3359
    },
    {
      "epoch": 0.6888068880688807,
      "grad_norm": 0.41776030770884587,
      "learning_rate": 4.664627063874083e-05,
      "loss": 1.5121,
      "step": 3360
    },
    {
      "epoch": 0.6890118901189012,
      "grad_norm": 0.47047269463768904,
      "learning_rate": 4.659011911370017e-05,
      "loss": 1.5202,
      "step": 3361
    },
    {
      "epoch": 0.6892168921689217,
      "grad_norm": 0.4487549917119925,
      "learning_rate": 4.653399113998981e-05,
      "loss": 1.5904,
      "step": 3362
    },
    {
      "epoch": 0.6894218942189422,
      "grad_norm": 0.42411164246405403,
      "learning_rate": 4.647788674235961e-05,
      "loss": 1.5071,
      "step": 3363
    },
    {
      "epoch": 0.6896268962689627,
      "grad_norm": 0.4603334336763966,
      "learning_rate": 4.6421805945549015e-05,
      "loss": 1.5399,
      "step": 3364
    },
    {
      "epoch": 0.6898318983189832,
      "grad_norm": 0.48212398952038743,
      "learning_rate": 4.6365748774287176e-05,
      "loss": 1.4934,
      "step": 3365
    },
    {
      "epoch": 0.6900369003690037,
      "grad_norm": 0.3761357649447258,
      "learning_rate": 4.630971525329274e-05,
      "loss": 1.4323,
      "step": 3366
    },
    {
      "epoch": 0.6902419024190242,
      "grad_norm": 0.4414865511782292,
      "learning_rate": 4.6253705407273886e-05,
      "loss": 1.5461,
      "step": 3367
    },
    {
      "epoch": 0.6904469044690447,
      "grad_norm": 0.4455325320566932,
      "learning_rate": 4.61977192609285e-05,
      "loss": 1.5684,
      "step": 3368
    },
    {
      "epoch": 0.6906519065190652,
      "grad_norm": 0.43676154900615155,
      "learning_rate": 4.614175683894384e-05,
      "loss": 1.4868,
      "step": 3369
    },
    {
      "epoch": 0.6908569085690857,
      "grad_norm": 0.4256562964118114,
      "learning_rate": 4.6085818165996876e-05,
      "loss": 1.4887,
      "step": 3370
    },
    {
      "epoch": 0.6910619106191062,
      "grad_norm": 0.4254030274358486,
      "learning_rate": 4.602990326675397e-05,
      "loss": 1.5416,
      "step": 3371
    },
    {
      "epoch": 0.6912669126691267,
      "grad_norm": 0.4643846977970754,
      "learning_rate": 4.597401216587104e-05,
      "loss": 1.6195,
      "step": 3372
    },
    {
      "epoch": 0.6914719147191472,
      "grad_norm": 0.4546663920223562,
      "learning_rate": 4.5918144887993574e-05,
      "loss": 1.5839,
      "step": 3373
    },
    {
      "epoch": 0.6916769167691676,
      "grad_norm": 0.4286593978294174,
      "learning_rate": 4.586230145775647e-05,
      "loss": 1.5487,
      "step": 3374
    },
    {
      "epoch": 0.6918819188191881,
      "grad_norm": 0.430485559822579,
      "learning_rate": 4.58064818997841e-05,
      "loss": 1.5554,
      "step": 3375
    },
    {
      "epoch": 0.6920869208692086,
      "grad_norm": 0.42554754188571264,
      "learning_rate": 4.575068623869045e-05,
      "loss": 1.5089,
      "step": 3376
    },
    {
      "epoch": 0.6922919229192291,
      "grad_norm": 0.42310839889276436,
      "learning_rate": 4.569491449907878e-05,
      "loss": 1.4908,
      "step": 3377
    },
    {
      "epoch": 0.6924969249692496,
      "grad_norm": 0.4358770635002334,
      "learning_rate": 4.563916670554196e-05,
      "loss": 1.5499,
      "step": 3378
    },
    {
      "epoch": 0.6927019270192702,
      "grad_norm": 0.4076771917822465,
      "learning_rate": 4.55834428826622e-05,
      "loss": 1.4985,
      "step": 3379
    },
    {
      "epoch": 0.6929069290692907,
      "grad_norm": 0.4705652288905525,
      "learning_rate": 4.552774305501115e-05,
      "loss": 1.5249,
      "step": 3380
    },
    {
      "epoch": 0.6931119311193112,
      "grad_norm": 0.427408820052831,
      "learning_rate": 4.547206724714996e-05,
      "loss": 1.5432,
      "step": 3381
    },
    {
      "epoch": 0.6933169331693317,
      "grad_norm": 0.39254391299183433,
      "learning_rate": 4.54164154836291e-05,
      "loss": 1.4498,
      "step": 3382
    },
    {
      "epoch": 0.6935219352193522,
      "grad_norm": 0.4132924271080056,
      "learning_rate": 4.536078778898845e-05,
      "loss": 1.5393,
      "step": 3383
    },
    {
      "epoch": 0.6937269372693727,
      "grad_norm": 0.4287777964160761,
      "learning_rate": 4.530518418775733e-05,
      "loss": 1.5807,
      "step": 3384
    },
    {
      "epoch": 0.6939319393193932,
      "grad_norm": 0.43879863236690086,
      "learning_rate": 4.5249604704454363e-05,
      "loss": 1.5951,
      "step": 3385
    },
    {
      "epoch": 0.6941369413694137,
      "grad_norm": 0.4132170990575669,
      "learning_rate": 4.5194049363587634e-05,
      "loss": 1.5102,
      "step": 3386
    },
    {
      "epoch": 0.6943419434194342,
      "grad_norm": 0.4036200593937283,
      "learning_rate": 4.513851818965449e-05,
      "loss": 1.5177,
      "step": 3387
    },
    {
      "epoch": 0.6945469454694547,
      "grad_norm": 0.41810042999117114,
      "learning_rate": 4.5083011207141614e-05,
      "loss": 1.5447,
      "step": 3388
    },
    {
      "epoch": 0.6947519475194752,
      "grad_norm": 0.409223608940766,
      "learning_rate": 4.502752844052514e-05,
      "loss": 1.5719,
      "step": 3389
    },
    {
      "epoch": 0.6949569495694957,
      "grad_norm": 0.44314024370613014,
      "learning_rate": 4.4972069914270366e-05,
      "loss": 1.5703,
      "step": 3390
    },
    {
      "epoch": 0.6951619516195162,
      "grad_norm": 0.4475381624454827,
      "learning_rate": 4.4916635652832076e-05,
      "loss": 1.5426,
      "step": 3391
    },
    {
      "epoch": 0.6953669536695367,
      "grad_norm": 0.4408687213248625,
      "learning_rate": 4.4861225680654194e-05,
      "loss": 1.4948,
      "step": 3392
    },
    {
      "epoch": 0.6955719557195572,
      "grad_norm": 0.4010041155055108,
      "learning_rate": 4.480584002216999e-05,
      "loss": 1.4558,
      "step": 3393
    },
    {
      "epoch": 0.6957769577695777,
      "grad_norm": 0.4457869383136856,
      "learning_rate": 4.4750478701802065e-05,
      "loss": 1.5658,
      "step": 3394
    },
    {
      "epoch": 0.6959819598195982,
      "grad_norm": 0.4400401036204604,
      "learning_rate": 4.469514174396221e-05,
      "loss": 1.566,
      "step": 3395
    },
    {
      "epoch": 0.6961869618696187,
      "grad_norm": 0.47764958247925376,
      "learning_rate": 4.4639829173051554e-05,
      "loss": 1.5338,
      "step": 3396
    },
    {
      "epoch": 0.6963919639196392,
      "grad_norm": 0.40103978052738076,
      "learning_rate": 4.45845410134604e-05,
      "loss": 1.4861,
      "step": 3397
    },
    {
      "epoch": 0.6965969659696597,
      "grad_norm": 0.4470805029496455,
      "learning_rate": 4.4529277289568314e-05,
      "loss": 1.4873,
      "step": 3398
    },
    {
      "epoch": 0.6968019680196802,
      "grad_norm": 0.42769937686655585,
      "learning_rate": 4.447403802574406e-05,
      "loss": 1.5501,
      "step": 3399
    },
    {
      "epoch": 0.6970069700697007,
      "grad_norm": 0.42509408103612056,
      "learning_rate": 4.4418823246345653e-05,
      "loss": 1.5076,
      "step": 3400
    },
    {
      "epoch": 0.6972119721197212,
      "grad_norm": 0.4392847101699351,
      "learning_rate": 4.4363632975720356e-05,
      "loss": 1.5428,
      "step": 3401
    },
    {
      "epoch": 0.6974169741697417,
      "grad_norm": 0.41661042923817515,
      "learning_rate": 4.430846723820453e-05,
      "loss": 1.5129,
      "step": 3402
    },
    {
      "epoch": 0.6976219762197622,
      "grad_norm": 0.4166677899972221,
      "learning_rate": 4.425332605812377e-05,
      "loss": 1.5491,
      "step": 3403
    },
    {
      "epoch": 0.6978269782697827,
      "grad_norm": 0.4173682753715901,
      "learning_rate": 4.4198209459792785e-05,
      "loss": 1.5259,
      "step": 3404
    },
    {
      "epoch": 0.6980319803198032,
      "grad_norm": 0.43395100736859943,
      "learning_rate": 4.414311746751551e-05,
      "loss": 1.5057,
      "step": 3405
    },
    {
      "epoch": 0.6982369823698237,
      "grad_norm": 0.4854154804817381,
      "learning_rate": 4.4088050105585075e-05,
      "loss": 1.5854,
      "step": 3406
    },
    {
      "epoch": 0.6984419844198442,
      "grad_norm": 0.4217382420864878,
      "learning_rate": 4.403300739828363e-05,
      "loss": 1.4926,
      "step": 3407
    },
    {
      "epoch": 0.6986469864698647,
      "grad_norm": 0.4175918016545473,
      "learning_rate": 4.397798936988251e-05,
      "loss": 1.5669,
      "step": 3408
    },
    {
      "epoch": 0.6988519885198852,
      "grad_norm": 0.4418920860537964,
      "learning_rate": 4.392299604464213e-05,
      "loss": 1.4934,
      "step": 3409
    },
    {
      "epoch": 0.6990569905699057,
      "grad_norm": 0.4412124670566063,
      "learning_rate": 4.386802744681209e-05,
      "loss": 1.5221,
      "step": 3410
    },
    {
      "epoch": 0.6992619926199262,
      "grad_norm": 0.5069530891898816,
      "learning_rate": 4.3813083600631065e-05,
      "loss": 1.5709,
      "step": 3411
    },
    {
      "epoch": 0.6994669946699467,
      "grad_norm": 0.4596403264248444,
      "learning_rate": 4.3758164530326785e-05,
      "loss": 1.5157,
      "step": 3412
    },
    {
      "epoch": 0.6996719967199672,
      "grad_norm": 0.45137278859473307,
      "learning_rate": 4.370327026011602e-05,
      "loss": 1.5145,
      "step": 3413
    },
    {
      "epoch": 0.6998769987699877,
      "grad_norm": 0.41587400036515626,
      "learning_rate": 4.364840081420466e-05,
      "loss": 1.5342,
      "step": 3414
    },
    {
      "epoch": 0.7000820008200082,
      "grad_norm": 0.4743037472631479,
      "learning_rate": 4.359355621678764e-05,
      "loss": 1.5002,
      "step": 3415
    },
    {
      "epoch": 0.7002870028700287,
      "grad_norm": 0.42520225732492695,
      "learning_rate": 4.353873649204899e-05,
      "loss": 1.5329,
      "step": 3416
    },
    {
      "epoch": 0.7004920049200491,
      "grad_norm": 0.45840189764321404,
      "learning_rate": 4.348394166416169e-05,
      "loss": 1.4784,
      "step": 3417
    },
    {
      "epoch": 0.7006970069700696,
      "grad_norm": 0.42071088490187714,
      "learning_rate": 4.3429171757287735e-05,
      "loss": 1.5151,
      "step": 3418
    },
    {
      "epoch": 0.7009020090200903,
      "grad_norm": 0.48064593064725697,
      "learning_rate": 4.337442679557815e-05,
      "loss": 1.5222,
      "step": 3419
    },
    {
      "epoch": 0.7011070110701108,
      "grad_norm": 0.419227455622307,
      "learning_rate": 4.3319706803173e-05,
      "loss": 1.5203,
      "step": 3420
    },
    {
      "epoch": 0.7013120131201313,
      "grad_norm": 0.4478070461742405,
      "learning_rate": 4.3265011804201374e-05,
      "loss": 1.5226,
      "step": 3421
    },
    {
      "epoch": 0.7015170151701517,
      "grad_norm": 0.38939450798439873,
      "learning_rate": 4.321034182278122e-05,
      "loss": 1.4921,
      "step": 3422
    },
    {
      "epoch": 0.7017220172201722,
      "grad_norm": 0.432943091577237,
      "learning_rate": 4.315569688301953e-05,
      "loss": 1.5492,
      "step": 3423
    },
    {
      "epoch": 0.7019270192701927,
      "grad_norm": 0.4522085470669943,
      "learning_rate": 4.310107700901224e-05,
      "loss": 1.5218,
      "step": 3424
    },
    {
      "epoch": 0.7021320213202132,
      "grad_norm": 0.4389529856307205,
      "learning_rate": 4.304648222484414e-05,
      "loss": 1.5248,
      "step": 3425
    },
    {
      "epoch": 0.7023370233702337,
      "grad_norm": 0.43361807006065684,
      "learning_rate": 4.299191255458922e-05,
      "loss": 1.4982,
      "step": 3426
    },
    {
      "epoch": 0.7025420254202542,
      "grad_norm": 0.4428330728296285,
      "learning_rate": 4.293736802231014e-05,
      "loss": 1.5575,
      "step": 3427
    },
    {
      "epoch": 0.7027470274702747,
      "grad_norm": 0.42304663484500776,
      "learning_rate": 4.288284865205856e-05,
      "loss": 1.5525,
      "step": 3428
    },
    {
      "epoch": 0.7029520295202952,
      "grad_norm": 0.5375403529385177,
      "learning_rate": 4.2828354467875046e-05,
      "loss": 1.5681,
      "step": 3429
    },
    {
      "epoch": 0.7031570315703157,
      "grad_norm": 0.4469907031456321,
      "learning_rate": 4.277388549378902e-05,
      "loss": 1.5519,
      "step": 3430
    },
    {
      "epoch": 0.7033620336203362,
      "grad_norm": 0.4434889157923991,
      "learning_rate": 4.271944175381886e-05,
      "loss": 1.5557,
      "step": 3431
    },
    {
      "epoch": 0.7035670356703567,
      "grad_norm": 0.4573709635194942,
      "learning_rate": 4.266502327197182e-05,
      "loss": 1.4891,
      "step": 3432
    },
    {
      "epoch": 0.7037720377203772,
      "grad_norm": 0.40044529720496036,
      "learning_rate": 4.261063007224393e-05,
      "loss": 1.481,
      "step": 3433
    },
    {
      "epoch": 0.7039770397703977,
      "grad_norm": 0.4065579144032816,
      "learning_rate": 4.255626217862013e-05,
      "loss": 1.509,
      "step": 3434
    },
    {
      "epoch": 0.7041820418204182,
      "grad_norm": 0.4417422837865661,
      "learning_rate": 4.250191961507416e-05,
      "loss": 1.5363,
      "step": 3435
    },
    {
      "epoch": 0.7043870438704387,
      "grad_norm": 0.45869242898495816,
      "learning_rate": 4.244760240556864e-05,
      "loss": 1.5098,
      "step": 3436
    },
    {
      "epoch": 0.7045920459204592,
      "grad_norm": 0.440833702806443,
      "learning_rate": 4.2393310574055045e-05,
      "loss": 1.5766,
      "step": 3437
    },
    {
      "epoch": 0.7047970479704797,
      "grad_norm": 0.44301830700045536,
      "learning_rate": 4.233904414447355e-05,
      "loss": 1.601,
      "step": 3438
    },
    {
      "epoch": 0.7050020500205002,
      "grad_norm": 0.41138186532763177,
      "learning_rate": 4.228480314075321e-05,
      "loss": 1.5381,
      "step": 3439
    },
    {
      "epoch": 0.7052070520705207,
      "grad_norm": 0.4181858285284662,
      "learning_rate": 4.2230587586811774e-05,
      "loss": 1.4558,
      "step": 3440
    },
    {
      "epoch": 0.7054120541205412,
      "grad_norm": 0.45318187511742036,
      "learning_rate": 4.217639750655594e-05,
      "loss": 1.5367,
      "step": 3441
    },
    {
      "epoch": 0.7056170561705617,
      "grad_norm": 0.461080252842011,
      "learning_rate": 4.2122232923880976e-05,
      "loss": 1.5358,
      "step": 3442
    },
    {
      "epoch": 0.7058220582205822,
      "grad_norm": 0.4282880729133403,
      "learning_rate": 4.20680938626711e-05,
      "loss": 1.5008,
      "step": 3443
    },
    {
      "epoch": 0.7060270602706027,
      "grad_norm": 0.4420501366242678,
      "learning_rate": 4.201398034679911e-05,
      "loss": 1.5725,
      "step": 3444
    },
    {
      "epoch": 0.7062320623206232,
      "grad_norm": 0.4230491945615498,
      "learning_rate": 4.195989240012659e-05,
      "loss": 1.4954,
      "step": 3445
    },
    {
      "epoch": 0.7064370643706437,
      "grad_norm": 0.4564392948570072,
      "learning_rate": 4.1905830046503935e-05,
      "loss": 1.5392,
      "step": 3446
    },
    {
      "epoch": 0.7066420664206642,
      "grad_norm": 0.44668778153424543,
      "learning_rate": 4.185179330977011e-05,
      "loss": 1.5944,
      "step": 3447
    },
    {
      "epoch": 0.7068470684706847,
      "grad_norm": 0.45313738343263005,
      "learning_rate": 4.1797782213752944e-05,
      "loss": 1.546,
      "step": 3448
    },
    {
      "epoch": 0.7070520705207052,
      "grad_norm": 0.44923932056304106,
      "learning_rate": 4.174379678226883e-05,
      "loss": 1.5248,
      "step": 3449
    },
    {
      "epoch": 0.7072570725707257,
      "grad_norm": 0.4315630861222875,
      "learning_rate": 4.168983703912285e-05,
      "loss": 1.5486,
      "step": 3450
    },
    {
      "epoch": 0.7074620746207462,
      "grad_norm": 0.41190529391375,
      "learning_rate": 4.163590300810888e-05,
      "loss": 1.5493,
      "step": 3451
    },
    {
      "epoch": 0.7076670766707667,
      "grad_norm": 0.4392506153198524,
      "learning_rate": 4.1581994713009295e-05,
      "loss": 1.5037,
      "step": 3452
    },
    {
      "epoch": 0.7078720787207872,
      "grad_norm": 0.4400255584273175,
      "learning_rate": 4.152811217759529e-05,
      "loss": 1.5175,
      "step": 3453
    },
    {
      "epoch": 0.7080770807708077,
      "grad_norm": 0.4448843962161563,
      "learning_rate": 4.1474255425626576e-05,
      "loss": 1.5294,
      "step": 3454
    },
    {
      "epoch": 0.7082820828208282,
      "grad_norm": 0.4293907220316177,
      "learning_rate": 4.142042448085148e-05,
      "loss": 1.5179,
      "step": 3455
    },
    {
      "epoch": 0.7084870848708487,
      "grad_norm": 0.4604403949206365,
      "learning_rate": 4.136661936700709e-05,
      "loss": 1.5544,
      "step": 3456
    },
    {
      "epoch": 0.7086920869208692,
      "grad_norm": 0.4221718115519598,
      "learning_rate": 4.1312840107818964e-05,
      "loss": 1.5202,
      "step": 3457
    },
    {
      "epoch": 0.7088970889708897,
      "grad_norm": 0.4438852978714222,
      "learning_rate": 4.125908672700129e-05,
      "loss": 1.5205,
      "step": 3458
    },
    {
      "epoch": 0.7091020910209103,
      "grad_norm": 0.42779241322623174,
      "learning_rate": 4.1205359248256946e-05,
      "loss": 1.5026,
      "step": 3459
    },
    {
      "epoch": 0.7093070930709308,
      "grad_norm": 0.44449370777953856,
      "learning_rate": 4.115165769527723e-05,
      "loss": 1.5675,
      "step": 3460
    },
    {
      "epoch": 0.7095120951209513,
      "grad_norm": 0.4115172080529009,
      "learning_rate": 4.109798209174213e-05,
      "loss": 1.5289,
      "step": 3461
    },
    {
      "epoch": 0.7097170971709718,
      "grad_norm": 0.42032460048347703,
      "learning_rate": 4.104433246132015e-05,
      "loss": 1.4504,
      "step": 3462
    },
    {
      "epoch": 0.7099220992209923,
      "grad_norm": 0.458318128519686,
      "learning_rate": 4.099070882766829e-05,
      "loss": 1.5374,
      "step": 3463
    },
    {
      "epoch": 0.7101271012710128,
      "grad_norm": 0.4239279152205268,
      "learning_rate": 4.09371112144322e-05,
      "loss": 1.503,
      "step": 3464
    },
    {
      "epoch": 0.7103321033210332,
      "grad_norm": 0.4339864582306654,
      "learning_rate": 4.088353964524593e-05,
      "loss": 1.5237,
      "step": 3465
    },
    {
      "epoch": 0.7105371053710537,
      "grad_norm": 0.3952308073103569,
      "learning_rate": 4.0829994143732164e-05,
      "loss": 1.4593,
      "step": 3466
    },
    {
      "epoch": 0.7107421074210742,
      "grad_norm": 0.408357519264169,
      "learning_rate": 4.077647473350201e-05,
      "loss": 1.4765,
      "step": 3467
    },
    {
      "epoch": 0.7109471094710947,
      "grad_norm": 0.44430598642048014,
      "learning_rate": 4.072298143815507e-05,
      "loss": 1.5329,
      "step": 3468
    },
    {
      "epoch": 0.7111521115211152,
      "grad_norm": 0.4325669955567285,
      "learning_rate": 4.066951428127952e-05,
      "loss": 1.4894,
      "step": 3469
    },
    {
      "epoch": 0.7113571135711357,
      "grad_norm": 0.41909485491285814,
      "learning_rate": 4.0616073286451864e-05,
      "loss": 1.5386,
      "step": 3470
    },
    {
      "epoch": 0.7115621156211562,
      "grad_norm": 0.39404165115905665,
      "learning_rate": 4.056265847723724e-05,
      "loss": 1.535,
      "step": 3471
    },
    {
      "epoch": 0.7117671176711767,
      "grad_norm": 0.5034144390339675,
      "learning_rate": 4.0509269877189106e-05,
      "loss": 1.5463,
      "step": 3472
    },
    {
      "epoch": 0.7119721197211972,
      "grad_norm": 0.4680054728828605,
      "learning_rate": 4.04559075098494e-05,
      "loss": 1.5466,
      "step": 3473
    },
    {
      "epoch": 0.7121771217712177,
      "grad_norm": 0.43312439318892254,
      "learning_rate": 4.040257139874848e-05,
      "loss": 1.5269,
      "step": 3474
    },
    {
      "epoch": 0.7123821238212382,
      "grad_norm": 0.44383045655181763,
      "learning_rate": 4.034926156740518e-05,
      "loss": 1.4704,
      "step": 3475
    },
    {
      "epoch": 0.7125871258712587,
      "grad_norm": 0.44043217741475005,
      "learning_rate": 4.029597803932675e-05,
      "loss": 1.5555,
      "step": 3476
    },
    {
      "epoch": 0.7127921279212792,
      "grad_norm": 0.4128799344301832,
      "learning_rate": 4.024272083800876e-05,
      "loss": 1.5333,
      "step": 3477
    },
    {
      "epoch": 0.7129971299712997,
      "grad_norm": 0.4234364538758268,
      "learning_rate": 4.0189489986935226e-05,
      "loss": 1.4906,
      "step": 3478
    },
    {
      "epoch": 0.7132021320213202,
      "grad_norm": 0.4189761608780559,
      "learning_rate": 4.01362855095785e-05,
      "loss": 1.6038,
      "step": 3479
    },
    {
      "epoch": 0.7134071340713407,
      "grad_norm": 0.4434163998455193,
      "learning_rate": 4.008310742939939e-05,
      "loss": 1.519,
      "step": 3480
    },
    {
      "epoch": 0.7136121361213612,
      "grad_norm": 0.4747082498066494,
      "learning_rate": 4.002995576984696e-05,
      "loss": 1.5524,
      "step": 3481
    },
    {
      "epoch": 0.7138171381713817,
      "grad_norm": 0.424749214420041,
      "learning_rate": 3.9976830554358746e-05,
      "loss": 1.4631,
      "step": 3482
    },
    {
      "epoch": 0.7140221402214022,
      "grad_norm": 0.4474781010886821,
      "learning_rate": 3.992373180636051e-05,
      "loss": 1.5556,
      "step": 3483
    },
    {
      "epoch": 0.7142271422714227,
      "grad_norm": 0.4272012681059497,
      "learning_rate": 3.9870659549266354e-05,
      "loss": 1.4721,
      "step": 3484
    },
    {
      "epoch": 0.7144321443214432,
      "grad_norm": 0.45784549195994245,
      "learning_rate": 3.9817613806478804e-05,
      "loss": 1.587,
      "step": 3485
    },
    {
      "epoch": 0.7146371463714637,
      "grad_norm": 0.39280557750909423,
      "learning_rate": 3.976459460138856e-05,
      "loss": 1.4343,
      "step": 3486
    },
    {
      "epoch": 0.7148421484214842,
      "grad_norm": 0.4702743874616334,
      "learning_rate": 3.971160195737475e-05,
      "loss": 1.5719,
      "step": 3487
    },
    {
      "epoch": 0.7150471504715047,
      "grad_norm": 0.46514364183722834,
      "learning_rate": 3.965863589780466e-05,
      "loss": 1.5651,
      "step": 3488
    },
    {
      "epoch": 0.7152521525215252,
      "grad_norm": 0.43992963173696936,
      "learning_rate": 3.9605696446033945e-05,
      "loss": 1.5687,
      "step": 3489
    },
    {
      "epoch": 0.7154571545715457,
      "grad_norm": 0.4117199720267242,
      "learning_rate": 3.9552783625406464e-05,
      "loss": 1.518,
      "step": 3490
    },
    {
      "epoch": 0.7156621566215662,
      "grad_norm": 0.4355313104177029,
      "learning_rate": 3.9499897459254375e-05,
      "loss": 1.5807,
      "step": 3491
    },
    {
      "epoch": 0.7158671586715867,
      "grad_norm": 0.42866631057671767,
      "learning_rate": 3.944703797089814e-05,
      "loss": 1.5364,
      "step": 3492
    },
    {
      "epoch": 0.7160721607216072,
      "grad_norm": 0.43935661240317686,
      "learning_rate": 3.939420518364633e-05,
      "loss": 1.4922,
      "step": 3493
    },
    {
      "epoch": 0.7162771627716277,
      "grad_norm": 0.47896483328035433,
      "learning_rate": 3.9341399120795816e-05,
      "loss": 1.5388,
      "step": 3494
    },
    {
      "epoch": 0.7164821648216482,
      "grad_norm": 0.43778865996397376,
      "learning_rate": 3.928861980563163e-05,
      "loss": 1.5275,
      "step": 3495
    },
    {
      "epoch": 0.7166871668716687,
      "grad_norm": 0.48464463006108066,
      "learning_rate": 3.923586726142711e-05,
      "loss": 1.5944,
      "step": 3496
    },
    {
      "epoch": 0.7168921689216892,
      "grad_norm": 0.4465115615789122,
      "learning_rate": 3.9183141511443725e-05,
      "loss": 1.5275,
      "step": 3497
    },
    {
      "epoch": 0.7170971709717097,
      "grad_norm": 0.4524076301786442,
      "learning_rate": 3.913044257893114e-05,
      "loss": 1.56,
      "step": 3498
    },
    {
      "epoch": 0.7173021730217303,
      "grad_norm": 0.4717705849785628,
      "learning_rate": 3.907777048712715e-05,
      "loss": 1.5876,
      "step": 3499
    },
    {
      "epoch": 0.7175071750717508,
      "grad_norm": 0.461517906057566,
      "learning_rate": 3.902512525925775e-05,
      "loss": 1.5465,
      "step": 3500
    },
    {
      "epoch": 0.7177121771217713,
      "grad_norm": 0.4207830208243561,
      "learning_rate": 3.897250691853712e-05,
      "loss": 1.5323,
      "step": 3501
    },
    {
      "epoch": 0.7179171791717918,
      "grad_norm": 0.4655345510157027,
      "learning_rate": 3.891991548816759e-05,
      "loss": 1.504,
      "step": 3502
    },
    {
      "epoch": 0.7181221812218123,
      "grad_norm": 0.44410473596029776,
      "learning_rate": 3.8867350991339555e-05,
      "loss": 1.5581,
      "step": 3503
    },
    {
      "epoch": 0.7183271832718328,
      "grad_norm": 0.4554069522872964,
      "learning_rate": 3.881481345123158e-05,
      "loss": 1.529,
      "step": 3504
    },
    {
      "epoch": 0.7185321853218533,
      "grad_norm": 0.40766495934876845,
      "learning_rate": 3.876230289101027e-05,
      "loss": 1.4977,
      "step": 3505
    },
    {
      "epoch": 0.7187371873718738,
      "grad_norm": 0.43066675659376075,
      "learning_rate": 3.8709819333830455e-05,
      "loss": 1.5454,
      "step": 3506
    },
    {
      "epoch": 0.7189421894218943,
      "grad_norm": 0.41372498726595935,
      "learning_rate": 3.865736280283503e-05,
      "loss": 1.5124,
      "step": 3507
    },
    {
      "epoch": 0.7191471914719147,
      "grad_norm": 0.4623352781739944,
      "learning_rate": 3.8604933321154904e-05,
      "loss": 1.5,
      "step": 3508
    },
    {
      "epoch": 0.7193521935219352,
      "grad_norm": 0.45849104121148104,
      "learning_rate": 3.855253091190909e-05,
      "loss": 1.4792,
      "step": 3509
    },
    {
      "epoch": 0.7195571955719557,
      "grad_norm": 0.41535432797406957,
      "learning_rate": 3.8500155598204644e-05,
      "loss": 1.4615,
      "step": 3510
    },
    {
      "epoch": 0.7197621976219762,
      "grad_norm": 0.4321507939070184,
      "learning_rate": 3.8447807403136726e-05,
      "loss": 1.5123,
      "step": 3511
    },
    {
      "epoch": 0.7199671996719967,
      "grad_norm": 0.4480024118598738,
      "learning_rate": 3.8395486349788554e-05,
      "loss": 1.529,
      "step": 3512
    },
    {
      "epoch": 0.7201722017220172,
      "grad_norm": 0.44537613264683973,
      "learning_rate": 3.8343192461231294e-05,
      "loss": 1.4935,
      "step": 3513
    },
    {
      "epoch": 0.7203772037720377,
      "grad_norm": 0.4178029213699762,
      "learning_rate": 3.829092576052416e-05,
      "loss": 1.4947,
      "step": 3514
    },
    {
      "epoch": 0.7205822058220582,
      "grad_norm": 0.4751344059662765,
      "learning_rate": 3.82386862707144e-05,
      "loss": 1.5313,
      "step": 3515
    },
    {
      "epoch": 0.7207872078720787,
      "grad_norm": 0.4409610574528483,
      "learning_rate": 3.818647401483724e-05,
      "loss": 1.5332,
      "step": 3516
    },
    {
      "epoch": 0.7209922099220992,
      "grad_norm": 0.4039428968645369,
      "learning_rate": 3.813428901591598e-05,
      "loss": 1.5064,
      "step": 3517
    },
    {
      "epoch": 0.7211972119721197,
      "grad_norm": 0.42390271115736167,
      "learning_rate": 3.808213129696177e-05,
      "loss": 1.4504,
      "step": 3518
    },
    {
      "epoch": 0.7214022140221402,
      "grad_norm": 0.425234606125503,
      "learning_rate": 3.8030000880973835e-05,
      "loss": 1.5143,
      "step": 3519
    },
    {
      "epoch": 0.7216072160721607,
      "grad_norm": 0.48586531785319365,
      "learning_rate": 3.7977897790939254e-05,
      "loss": 1.5753,
      "step": 3520
    },
    {
      "epoch": 0.7218122181221812,
      "grad_norm": 0.42897234277321256,
      "learning_rate": 3.7925822049833193e-05,
      "loss": 1.5014,
      "step": 3521
    },
    {
      "epoch": 0.7220172201722017,
      "grad_norm": 0.43760456001409564,
      "learning_rate": 3.7873773680618616e-05,
      "loss": 1.505,
      "step": 3522
    },
    {
      "epoch": 0.7222222222222222,
      "grad_norm": 0.47664240088393667,
      "learning_rate": 3.7821752706246584e-05,
      "loss": 1.5474,
      "step": 3523
    },
    {
      "epoch": 0.7224272242722427,
      "grad_norm": 0.43316928152618966,
      "learning_rate": 3.7769759149655916e-05,
      "loss": 1.522,
      "step": 3524
    },
    {
      "epoch": 0.7226322263222632,
      "grad_norm": 0.4664159211339371,
      "learning_rate": 3.771779303377342e-05,
      "loss": 1.536,
      "step": 3525
    },
    {
      "epoch": 0.7228372283722837,
      "grad_norm": 0.4638344468560447,
      "learning_rate": 3.766585438151375e-05,
      "loss": 1.5076,
      "step": 3526
    },
    {
      "epoch": 0.7230422304223042,
      "grad_norm": 0.4158251119345487,
      "learning_rate": 3.7613943215779556e-05,
      "loss": 1.4753,
      "step": 3527
    },
    {
      "epoch": 0.7232472324723247,
      "grad_norm": 0.412504154057362,
      "learning_rate": 3.7562059559461296e-05,
      "loss": 1.5147,
      "step": 3528
    },
    {
      "epoch": 0.7234522345223452,
      "grad_norm": 0.44755225470966187,
      "learning_rate": 3.75102034354373e-05,
      "loss": 1.554,
      "step": 3529
    },
    {
      "epoch": 0.7236572365723657,
      "grad_norm": 0.4360438766040676,
      "learning_rate": 3.745837486657374e-05,
      "loss": 1.4688,
      "step": 3530
    },
    {
      "epoch": 0.7238622386223862,
      "grad_norm": 0.4928569866627821,
      "learning_rate": 3.740657387572464e-05,
      "loss": 1.5451,
      "step": 3531
    },
    {
      "epoch": 0.7240672406724067,
      "grad_norm": 0.4256922044416373,
      "learning_rate": 3.735480048573194e-05,
      "loss": 1.4341,
      "step": 3532
    },
    {
      "epoch": 0.7242722427224272,
      "grad_norm": 0.4230515874731703,
      "learning_rate": 3.730305471942531e-05,
      "loss": 1.4926,
      "step": 3533
    },
    {
      "epoch": 0.7244772447724477,
      "grad_norm": 0.4475356759214047,
      "learning_rate": 3.725133659962232e-05,
      "loss": 1.5184,
      "step": 3534
    },
    {
      "epoch": 0.7246822468224682,
      "grad_norm": 0.4718617660111662,
      "learning_rate": 3.719964614912829e-05,
      "loss": 1.5251,
      "step": 3535
    },
    {
      "epoch": 0.7248872488724887,
      "grad_norm": 0.47987041064474306,
      "learning_rate": 3.7147983390736316e-05,
      "loss": 1.5386,
      "step": 3536
    },
    {
      "epoch": 0.7250922509225092,
      "grad_norm": 0.44374350064458823,
      "learning_rate": 3.7096348347227405e-05,
      "loss": 1.5154,
      "step": 3537
    },
    {
      "epoch": 0.7252972529725297,
      "grad_norm": 0.4701861993828469,
      "learning_rate": 3.704474104137019e-05,
      "loss": 1.4923,
      "step": 3538
    },
    {
      "epoch": 0.7255022550225503,
      "grad_norm": 0.4374661785625125,
      "learning_rate": 3.6993161495921226e-05,
      "loss": 1.4979,
      "step": 3539
    },
    {
      "epoch": 0.7257072570725708,
      "grad_norm": 0.4554914814027369,
      "learning_rate": 3.6941609733624706e-05,
      "loss": 1.5469,
      "step": 3540
    },
    {
      "epoch": 0.7259122591225913,
      "grad_norm": 0.45357691624776486,
      "learning_rate": 3.689008577721258e-05,
      "loss": 1.4659,
      "step": 3541
    },
    {
      "epoch": 0.7261172611726118,
      "grad_norm": 0.41709035606040246,
      "learning_rate": 3.683858964940464e-05,
      "loss": 1.5781,
      "step": 3542
    },
    {
      "epoch": 0.7263222632226323,
      "grad_norm": 0.5011181446991144,
      "learning_rate": 3.678712137290827e-05,
      "loss": 1.5754,
      "step": 3543
    },
    {
      "epoch": 0.7265272652726528,
      "grad_norm": 0.4208079192141814,
      "learning_rate": 3.6735680970418705e-05,
      "loss": 1.4918,
      "step": 3544
    },
    {
      "epoch": 0.7267322673226733,
      "grad_norm": 0.4477359199473054,
      "learning_rate": 3.668426846461881e-05,
      "loss": 1.5365,
      "step": 3545
    },
    {
      "epoch": 0.7269372693726938,
      "grad_norm": 0.42703503289716865,
      "learning_rate": 3.66328838781791e-05,
      "loss": 1.4746,
      "step": 3546
    },
    {
      "epoch": 0.7271422714227143,
      "grad_norm": 0.48872266369857126,
      "learning_rate": 3.658152723375794e-05,
      "loss": 1.5011,
      "step": 3547
    },
    {
      "epoch": 0.7273472734727348,
      "grad_norm": 0.42437991818125514,
      "learning_rate": 3.653019855400123e-05,
      "loss": 1.4812,
      "step": 3548
    },
    {
      "epoch": 0.7275522755227553,
      "grad_norm": 0.44980773572727784,
      "learning_rate": 3.647889786154254e-05,
      "loss": 1.453,
      "step": 3549
    },
    {
      "epoch": 0.7277572775727758,
      "grad_norm": 0.40202533560929643,
      "learning_rate": 3.642762517900322e-05,
      "loss": 1.4669,
      "step": 3550
    },
    {
      "epoch": 0.7279622796227962,
      "grad_norm": 0.3943361566093643,
      "learning_rate": 3.6376380528992125e-05,
      "loss": 1.4786,
      "step": 3551
    },
    {
      "epoch": 0.7281672816728167,
      "grad_norm": 0.4725434254209094,
      "learning_rate": 3.632516393410589e-05,
      "loss": 1.4986,
      "step": 3552
    },
    {
      "epoch": 0.7283722837228372,
      "grad_norm": 0.46756230394120024,
      "learning_rate": 3.6273975416928675e-05,
      "loss": 1.5168,
      "step": 3553
    },
    {
      "epoch": 0.7285772857728577,
      "grad_norm": 0.5025578152294731,
      "learning_rate": 3.622281500003224e-05,
      "loss": 1.6007,
      "step": 3554
    },
    {
      "epoch": 0.7287822878228782,
      "grad_norm": 0.4668190146540949,
      "learning_rate": 3.6171682705976085e-05,
      "loss": 1.5661,
      "step": 3555
    },
    {
      "epoch": 0.7289872898728987,
      "grad_norm": 0.4563147554874595,
      "learning_rate": 3.612057855730715e-05,
      "loss": 1.5565,
      "step": 3556
    },
    {
      "epoch": 0.7291922919229192,
      "grad_norm": 0.4917818577673773,
      "learning_rate": 3.6069502576560124e-05,
      "loss": 1.5121,
      "step": 3557
    },
    {
      "epoch": 0.7293972939729397,
      "grad_norm": 0.44992302787004207,
      "learning_rate": 3.6018454786257136e-05,
      "loss": 1.5631,
      "step": 3558
    },
    {
      "epoch": 0.7296022960229602,
      "grad_norm": 0.4195053738853119,
      "learning_rate": 3.5967435208907943e-05,
      "loss": 1.5098,
      "step": 3559
    },
    {
      "epoch": 0.7298072980729807,
      "grad_norm": 0.4229655260905307,
      "learning_rate": 3.59164438670099e-05,
      "loss": 1.4849,
      "step": 3560
    },
    {
      "epoch": 0.7300123001230012,
      "grad_norm": 0.4777103833690583,
      "learning_rate": 3.58654807830478e-05,
      "loss": 1.5169,
      "step": 3561
    },
    {
      "epoch": 0.7302173021730217,
      "grad_norm": 0.4190670776897408,
      "learning_rate": 3.581454597949414e-05,
      "loss": 1.4915,
      "step": 3562
    },
    {
      "epoch": 0.7304223042230422,
      "grad_norm": 0.42907633890684843,
      "learning_rate": 3.576363947880881e-05,
      "loss": 1.4984,
      "step": 3563
    },
    {
      "epoch": 0.7306273062730627,
      "grad_norm": 0.4634321069214964,
      "learning_rate": 3.571276130343925e-05,
      "loss": 1.5566,
      "step": 3564
    },
    {
      "epoch": 0.7308323083230832,
      "grad_norm": 0.48664513689765004,
      "learning_rate": 3.56619114758204e-05,
      "loss": 1.5414,
      "step": 3565
    },
    {
      "epoch": 0.7310373103731037,
      "grad_norm": 0.4603575252245323,
      "learning_rate": 3.561109001837475e-05,
      "loss": 1.494,
      "step": 3566
    },
    {
      "epoch": 0.7312423124231242,
      "grad_norm": 0.41840996358847155,
      "learning_rate": 3.5560296953512295e-05,
      "loss": 1.5179,
      "step": 3567
    },
    {
      "epoch": 0.7314473144731447,
      "grad_norm": 0.4373800476355417,
      "learning_rate": 3.550953230363044e-05,
      "loss": 1.5279,
      "step": 3568
    },
    {
      "epoch": 0.7316523165231652,
      "grad_norm": 0.4318747201927023,
      "learning_rate": 3.5458796091114076e-05,
      "loss": 1.4996,
      "step": 3569
    },
    {
      "epoch": 0.7318573185731857,
      "grad_norm": 0.4345557389320695,
      "learning_rate": 3.5408088338335545e-05,
      "loss": 1.5158,
      "step": 3570
    },
    {
      "epoch": 0.7320623206232062,
      "grad_norm": 0.4022959055144593,
      "learning_rate": 3.5357409067654676e-05,
      "loss": 1.466,
      "step": 3571
    },
    {
      "epoch": 0.7322673226732267,
      "grad_norm": 0.42845587270388763,
      "learning_rate": 3.530675830141877e-05,
      "loss": 1.6099,
      "step": 3572
    },
    {
      "epoch": 0.7324723247232472,
      "grad_norm": 0.4221213743064392,
      "learning_rate": 3.525613606196249e-05,
      "loss": 1.5098,
      "step": 3573
    },
    {
      "epoch": 0.7326773267732677,
      "grad_norm": 0.47495821094180796,
      "learning_rate": 3.5205542371607925e-05,
      "loss": 1.4911,
      "step": 3574
    },
    {
      "epoch": 0.7328823288232882,
      "grad_norm": 0.4469084513198053,
      "learning_rate": 3.515497725266458e-05,
      "loss": 1.5727,
      "step": 3575
    },
    {
      "epoch": 0.7330873308733087,
      "grad_norm": 0.4260853954451166,
      "learning_rate": 3.510444072742938e-05,
      "loss": 1.5017,
      "step": 3576
    },
    {
      "epoch": 0.7332923329233292,
      "grad_norm": 0.45678735882094973,
      "learning_rate": 3.50539328181867e-05,
      "loss": 1.4577,
      "step": 3577
    },
    {
      "epoch": 0.7334973349733497,
      "grad_norm": 0.39521063279384955,
      "learning_rate": 3.5003453547208176e-05,
      "loss": 1.524,
      "step": 3578
    },
    {
      "epoch": 0.7337023370233703,
      "grad_norm": 0.48379438386037976,
      "learning_rate": 3.4953002936752874e-05,
      "loss": 1.5225,
      "step": 3579
    },
    {
      "epoch": 0.7339073390733908,
      "grad_norm": 0.45083580542993734,
      "learning_rate": 3.490258100906724e-05,
      "loss": 1.4978,
      "step": 3580
    },
    {
      "epoch": 0.7341123411234113,
      "grad_norm": 0.4326580781488519,
      "learning_rate": 3.485218778638499e-05,
      "loss": 1.4629,
      "step": 3581
    },
    {
      "epoch": 0.7343173431734318,
      "grad_norm": 0.45949642165557864,
      "learning_rate": 3.4801823290927315e-05,
      "loss": 1.5132,
      "step": 3582
    },
    {
      "epoch": 0.7345223452234523,
      "grad_norm": 0.4970509096604444,
      "learning_rate": 3.475148754490267e-05,
      "loss": 1.527,
      "step": 3583
    },
    {
      "epoch": 0.7347273472734728,
      "grad_norm": 0.43460755566178566,
      "learning_rate": 3.470118057050681e-05,
      "loss": 1.4843,
      "step": 3584
    },
    {
      "epoch": 0.7349323493234933,
      "grad_norm": 0.4798452017293138,
      "learning_rate": 3.4650902389922824e-05,
      "loss": 1.5169,
      "step": 3585
    },
    {
      "epoch": 0.7351373513735138,
      "grad_norm": 0.45479290964325825,
      "learning_rate": 3.460065302532108e-05,
      "loss": 1.5349,
      "step": 3586
    },
    {
      "epoch": 0.7353423534235343,
      "grad_norm": 0.43749084533346677,
      "learning_rate": 3.455043249885928e-05,
      "loss": 1.5355,
      "step": 3587
    },
    {
      "epoch": 0.7355473554735548,
      "grad_norm": 0.39210104845206367,
      "learning_rate": 3.450024083268245e-05,
      "loss": 1.5071,
      "step": 3588
    },
    {
      "epoch": 0.7357523575235753,
      "grad_norm": 0.43219956287284417,
      "learning_rate": 3.445007804892278e-05,
      "loss": 1.4867,
      "step": 3589
    },
    {
      "epoch": 0.7359573595735958,
      "grad_norm": 0.4508015978885218,
      "learning_rate": 3.439994416969978e-05,
      "loss": 1.4995,
      "step": 3590
    },
    {
      "epoch": 0.7361623616236163,
      "grad_norm": 0.4155368053450593,
      "learning_rate": 3.4349839217120194e-05,
      "loss": 1.5017,
      "step": 3591
    },
    {
      "epoch": 0.7363673636736368,
      "grad_norm": 0.4678831400591447,
      "learning_rate": 3.429976321327805e-05,
      "loss": 1.4535,
      "step": 3592
    },
    {
      "epoch": 0.7365723657236573,
      "grad_norm": 0.48997655861328476,
      "learning_rate": 3.4249716180254624e-05,
      "loss": 1.6354,
      "step": 3593
    },
    {
      "epoch": 0.7367773677736777,
      "grad_norm": 0.4403336749040627,
      "learning_rate": 3.419969814011835e-05,
      "loss": 1.5564,
      "step": 3594
    },
    {
      "epoch": 0.7369823698236982,
      "grad_norm": 0.4238528814210788,
      "learning_rate": 3.414970911492491e-05,
      "loss": 1.4502,
      "step": 3595
    },
    {
      "epoch": 0.7371873718737187,
      "grad_norm": 0.4035819565287794,
      "learning_rate": 3.4099749126717175e-05,
      "loss": 1.4993,
      "step": 3596
    },
    {
      "epoch": 0.7373923739237392,
      "grad_norm": 0.4428776576547031,
      "learning_rate": 3.404981819752524e-05,
      "loss": 1.5182,
      "step": 3597
    },
    {
      "epoch": 0.7375973759737597,
      "grad_norm": 0.46546257726278006,
      "learning_rate": 3.399991634936641e-05,
      "loss": 1.5818,
      "step": 3598
    },
    {
      "epoch": 0.7378023780237802,
      "grad_norm": 0.42164558427264753,
      "learning_rate": 3.395004360424512e-05,
      "loss": 1.4588,
      "step": 3599
    },
    {
      "epoch": 0.7380073800738007,
      "grad_norm": 0.4169000668927247,
      "learning_rate": 3.390019998415297e-05,
      "loss": 1.4747,
      "step": 3600
    },
    {
      "epoch": 0.7382123821238212,
      "grad_norm": 0.44287746933564504,
      "learning_rate": 3.3850385511068695e-05,
      "loss": 1.497,
      "step": 3601
    },
    {
      "epoch": 0.7384173841738417,
      "grad_norm": 0.44780791429689304,
      "learning_rate": 3.380060020695825e-05,
      "loss": 1.5425,
      "step": 3602
    },
    {
      "epoch": 0.7386223862238622,
      "grad_norm": 0.4459467111624526,
      "learning_rate": 3.3750844093774736e-05,
      "loss": 1.587,
      "step": 3603
    },
    {
      "epoch": 0.7388273882738827,
      "grad_norm": 0.43272062877269973,
      "learning_rate": 3.3701117193458295e-05,
      "loss": 1.5392,
      "step": 3604
    },
    {
      "epoch": 0.7390323903239032,
      "grad_norm": 0.4560160315304973,
      "learning_rate": 3.365141952793622e-05,
      "loss": 1.5285,
      "step": 3605
    },
    {
      "epoch": 0.7392373923739237,
      "grad_norm": 0.44449981896343366,
      "learning_rate": 3.360175111912291e-05,
      "loss": 1.545,
      "step": 3606
    },
    {
      "epoch": 0.7394423944239442,
      "grad_norm": 0.4727726552862745,
      "learning_rate": 3.35521119889199e-05,
      "loss": 1.4878,
      "step": 3607
    },
    {
      "epoch": 0.7396473964739647,
      "grad_norm": 0.4701232802966624,
      "learning_rate": 3.350250215921581e-05,
      "loss": 1.5061,
      "step": 3608
    },
    {
      "epoch": 0.7398523985239852,
      "grad_norm": 0.44168940116918903,
      "learning_rate": 3.345292165188632e-05,
      "loss": 1.4634,
      "step": 3609
    },
    {
      "epoch": 0.7400574005740057,
      "grad_norm": 0.4429834658934189,
      "learning_rate": 3.3403370488794136e-05,
      "loss": 1.5374,
      "step": 3610
    },
    {
      "epoch": 0.7402624026240262,
      "grad_norm": 0.4390432074897609,
      "learning_rate": 3.335384869178908e-05,
      "loss": 1.5059,
      "step": 3611
    },
    {
      "epoch": 0.7404674046740467,
      "grad_norm": 0.46439204073258106,
      "learning_rate": 3.330435628270806e-05,
      "loss": 1.5173,
      "step": 3612
    },
    {
      "epoch": 0.7406724067240672,
      "grad_norm": 0.4687132771064403,
      "learning_rate": 3.325489328337491e-05,
      "loss": 1.5584,
      "step": 3613
    },
    {
      "epoch": 0.7408774087740877,
      "grad_norm": 0.41477307140111364,
      "learning_rate": 3.320545971560063e-05,
      "loss": 1.523,
      "step": 3614
    },
    {
      "epoch": 0.7410824108241082,
      "grad_norm": 0.4449021863987743,
      "learning_rate": 3.3156055601183155e-05,
      "loss": 1.523,
      "step": 3615
    },
    {
      "epoch": 0.7412874128741287,
      "grad_norm": 0.4166686958201675,
      "learning_rate": 3.310668096190741e-05,
      "loss": 1.4762,
      "step": 3616
    },
    {
      "epoch": 0.7414924149241492,
      "grad_norm": 0.4375628871819494,
      "learning_rate": 3.305733581954544e-05,
      "loss": 1.5376,
      "step": 3617
    },
    {
      "epoch": 0.7416974169741697,
      "grad_norm": 0.4449591766063655,
      "learning_rate": 3.300802019585615e-05,
      "loss": 1.4523,
      "step": 3618
    },
    {
      "epoch": 0.7419024190241903,
      "grad_norm": 0.4170242072564302,
      "learning_rate": 3.2958734112585546e-05,
      "loss": 1.4179,
      "step": 3619
    },
    {
      "epoch": 0.7421074210742108,
      "grad_norm": 0.4479486715367858,
      "learning_rate": 3.290947759146651e-05,
      "loss": 1.4986,
      "step": 3620
    },
    {
      "epoch": 0.7423124231242313,
      "grad_norm": 0.4577372804717908,
      "learning_rate": 3.286025065421892e-05,
      "loss": 1.4656,
      "step": 3621
    },
    {
      "epoch": 0.7425174251742518,
      "grad_norm": 0.4285175199785407,
      "learning_rate": 3.281105332254966e-05,
      "loss": 1.5027,
      "step": 3622
    },
    {
      "epoch": 0.7427224272242723,
      "grad_norm": 0.447407183446742,
      "learning_rate": 3.276188561815249e-05,
      "loss": 1.5739,
      "step": 3623
    },
    {
      "epoch": 0.7429274292742928,
      "grad_norm": 0.413495389841362,
      "learning_rate": 3.2712747562708115e-05,
      "loss": 1.5002,
      "step": 3624
    },
    {
      "epoch": 0.7431324313243133,
      "grad_norm": 0.46420309863004994,
      "learning_rate": 3.2663639177884234e-05,
      "loss": 1.5632,
      "step": 3625
    },
    {
      "epoch": 0.7433374333743338,
      "grad_norm": 0.455108248395503,
      "learning_rate": 3.261456048533535e-05,
      "loss": 1.5226,
      "step": 3626
    },
    {
      "epoch": 0.7435424354243543,
      "grad_norm": 0.4374069729309448,
      "learning_rate": 3.2565511506703005e-05,
      "loss": 1.4733,
      "step": 3627
    },
    {
      "epoch": 0.7437474374743748,
      "grad_norm": 0.4354544465623817,
      "learning_rate": 3.251649226361555e-05,
      "loss": 1.4362,
      "step": 3628
    },
    {
      "epoch": 0.7439524395243953,
      "grad_norm": 0.49078041968050845,
      "learning_rate": 3.246750277768819e-05,
      "loss": 1.5765,
      "step": 3629
    },
    {
      "epoch": 0.7441574415744158,
      "grad_norm": 0.41825159367516007,
      "learning_rate": 3.2418543070523135e-05,
      "loss": 1.4936,
      "step": 3630
    },
    {
      "epoch": 0.7443624436244363,
      "grad_norm": 0.478845903588315,
      "learning_rate": 3.2369613163709356e-05,
      "loss": 1.5562,
      "step": 3631
    },
    {
      "epoch": 0.7445674456744568,
      "grad_norm": 0.4737596458641103,
      "learning_rate": 3.232071307882268e-05,
      "loss": 1.4573,
      "step": 3632
    },
    {
      "epoch": 0.7447724477244773,
      "grad_norm": 0.42698103983244834,
      "learning_rate": 3.227184283742591e-05,
      "loss": 1.5207,
      "step": 3633
    },
    {
      "epoch": 0.7449774497744978,
      "grad_norm": 0.4209080963414414,
      "learning_rate": 3.222300246106852e-05,
      "loss": 1.4845,
      "step": 3634
    },
    {
      "epoch": 0.7451824518245183,
      "grad_norm": 0.47586484877735274,
      "learning_rate": 3.217419197128695e-05,
      "loss": 1.5105,
      "step": 3635
    },
    {
      "epoch": 0.7453874538745388,
      "grad_norm": 0.4039924718246903,
      "learning_rate": 3.21254113896044e-05,
      "loss": 1.4905,
      "step": 3636
    },
    {
      "epoch": 0.7455924559245592,
      "grad_norm": 0.46500934435606234,
      "learning_rate": 3.207666073753084e-05,
      "loss": 1.5188,
      "step": 3637
    },
    {
      "epoch": 0.7457974579745797,
      "grad_norm": 0.44496747035294837,
      "learning_rate": 3.202794003656316e-05,
      "loss": 1.5542,
      "step": 3638
    },
    {
      "epoch": 0.7460024600246002,
      "grad_norm": 0.4130377147171396,
      "learning_rate": 3.1979249308184957e-05,
      "loss": 1.5801,
      "step": 3639
    },
    {
      "epoch": 0.7462074620746207,
      "grad_norm": 0.4387364316566754,
      "learning_rate": 3.193058857386658e-05,
      "loss": 1.5146,
      "step": 3640
    },
    {
      "epoch": 0.7464124641246412,
      "grad_norm": 0.4655716114982046,
      "learning_rate": 3.188195785506527e-05,
      "loss": 1.5578,
      "step": 3641
    },
    {
      "epoch": 0.7466174661746617,
      "grad_norm": 0.44108467006116464,
      "learning_rate": 3.18333571732249e-05,
      "loss": 1.5426,
      "step": 3642
    },
    {
      "epoch": 0.7468224682246822,
      "grad_norm": 0.42221178090823297,
      "learning_rate": 3.178478654977624e-05,
      "loss": 1.4577,
      "step": 3643
    },
    {
      "epoch": 0.7470274702747027,
      "grad_norm": 0.46314649034235084,
      "learning_rate": 3.173624600613666e-05,
      "loss": 1.5696,
      "step": 3644
    },
    {
      "epoch": 0.7472324723247232,
      "grad_norm": 0.45585573972573246,
      "learning_rate": 3.168773556371034e-05,
      "loss": 1.547,
      "step": 3645
    },
    {
      "epoch": 0.7474374743747437,
      "grad_norm": 0.43666099128498387,
      "learning_rate": 3.163925524388822e-05,
      "loss": 1.5721,
      "step": 3646
    },
    {
      "epoch": 0.7476424764247642,
      "grad_norm": 0.4013041281766275,
      "learning_rate": 3.1590805068047865e-05,
      "loss": 1.5191,
      "step": 3647
    },
    {
      "epoch": 0.7478474784747847,
      "grad_norm": 0.43691889920123905,
      "learning_rate": 3.154238505755367e-05,
      "loss": 1.5473,
      "step": 3648
    },
    {
      "epoch": 0.7480524805248052,
      "grad_norm": 0.40429236742212604,
      "learning_rate": 3.149399523375661e-05,
      "loss": 1.4341,
      "step": 3649
    },
    {
      "epoch": 0.7482574825748257,
      "grad_norm": 0.449955144337816,
      "learning_rate": 3.144563561799438e-05,
      "loss": 1.5013,
      "step": 3650
    },
    {
      "epoch": 0.7484624846248462,
      "grad_norm": 0.4290599110243751,
      "learning_rate": 3.139730623159144e-05,
      "loss": 1.502,
      "step": 3651
    },
    {
      "epoch": 0.7486674866748667,
      "grad_norm": 0.42449902930507555,
      "learning_rate": 3.1349007095858786e-05,
      "loss": 1.5444,
      "step": 3652
    },
    {
      "epoch": 0.7488724887248872,
      "grad_norm": 0.4134993070461911,
      "learning_rate": 3.1300738232094184e-05,
      "loss": 1.4699,
      "step": 3653
    },
    {
      "epoch": 0.7490774907749077,
      "grad_norm": 0.457843350728091,
      "learning_rate": 3.125249966158201e-05,
      "loss": 1.5093,
      "step": 3654
    },
    {
      "epoch": 0.7492824928249282,
      "grad_norm": 0.42878523779372857,
      "learning_rate": 3.120429140559327e-05,
      "loss": 1.5284,
      "step": 3655
    },
    {
      "epoch": 0.7494874948749487,
      "grad_norm": 0.43503319476679875,
      "learning_rate": 3.115611348538556e-05,
      "loss": 1.5945,
      "step": 3656
    },
    {
      "epoch": 0.7496924969249692,
      "grad_norm": 0.4240016972637633,
      "learning_rate": 3.110796592220322e-05,
      "loss": 1.5532,
      "step": 3657
    },
    {
      "epoch": 0.7498974989749897,
      "grad_norm": 0.41016701176151243,
      "learning_rate": 3.1059848737277144e-05,
      "loss": 1.5155,
      "step": 3658
    },
    {
      "epoch": 0.7501025010250103,
      "grad_norm": 0.4308807045137664,
      "learning_rate": 3.10117619518248e-05,
      "loss": 1.4694,
      "step": 3659
    },
    {
      "epoch": 0.7503075030750308,
      "grad_norm": 0.43167365000093716,
      "learning_rate": 3.096370558705028e-05,
      "loss": 1.4964,
      "step": 3660
    },
    {
      "epoch": 0.7505125051250513,
      "grad_norm": 0.472214979490928,
      "learning_rate": 3.091567966414424e-05,
      "loss": 1.5867,
      "step": 3661
    },
    {
      "epoch": 0.7507175071750718,
      "grad_norm": 0.40197351486886024,
      "learning_rate": 3.086768420428392e-05,
      "loss": 1.5349,
      "step": 3662
    },
    {
      "epoch": 0.7509225092250923,
      "grad_norm": 0.448438770927794,
      "learning_rate": 3.081971922863319e-05,
      "loss": 1.4753,
      "step": 3663
    },
    {
      "epoch": 0.7511275112751128,
      "grad_norm": 0.4077326521952239,
      "learning_rate": 3.07717847583424e-05,
      "loss": 1.5048,
      "step": 3664
    },
    {
      "epoch": 0.7513325133251333,
      "grad_norm": 0.38823784037535536,
      "learning_rate": 3.072388081454848e-05,
      "loss": 1.5137,
      "step": 3665
    },
    {
      "epoch": 0.7515375153751538,
      "grad_norm": 0.47072160673614233,
      "learning_rate": 3.067600741837483e-05,
      "loss": 1.5082,
      "step": 3666
    },
    {
      "epoch": 0.7517425174251743,
      "grad_norm": 0.4735992508468832,
      "learning_rate": 3.0628164590931506e-05,
      "loss": 1.5767,
      "step": 3667
    },
    {
      "epoch": 0.7519475194751948,
      "grad_norm": 0.4488441421334702,
      "learning_rate": 3.0580352353315025e-05,
      "loss": 1.5457,
      "step": 3668
    },
    {
      "epoch": 0.7521525215252153,
      "grad_norm": 0.4401092562868568,
      "learning_rate": 3.05325707266084e-05,
      "loss": 1.5085,
      "step": 3669
    },
    {
      "epoch": 0.7523575235752358,
      "grad_norm": 0.4860138617916938,
      "learning_rate": 3.048481973188114e-05,
      "loss": 1.4712,
      "step": 3670
    },
    {
      "epoch": 0.7525625256252563,
      "grad_norm": 0.4383024949833615,
      "learning_rate": 3.043709939018925e-05,
      "loss": 1.5238,
      "step": 3671
    },
    {
      "epoch": 0.7527675276752768,
      "grad_norm": 0.39104857183460157,
      "learning_rate": 3.0389409722575258e-05,
      "loss": 1.5181,
      "step": 3672
    },
    {
      "epoch": 0.7529725297252973,
      "grad_norm": 0.4595604434658205,
      "learning_rate": 3.0341750750068164e-05,
      "loss": 1.5245,
      "step": 3673
    },
    {
      "epoch": 0.7531775317753178,
      "grad_norm": 0.4868060482313587,
      "learning_rate": 3.0294122493683374e-05,
      "loss": 1.5371,
      "step": 3674
    },
    {
      "epoch": 0.7533825338253383,
      "grad_norm": 0.5347300234622846,
      "learning_rate": 3.0246524974422808e-05,
      "loss": 1.5285,
      "step": 3675
    },
    {
      "epoch": 0.7535875358753588,
      "grad_norm": 0.41544729019195703,
      "learning_rate": 3.0198958213274753e-05,
      "loss": 1.451,
      "step": 3676
    },
    {
      "epoch": 0.7537925379253793,
      "grad_norm": 0.39348240252758643,
      "learning_rate": 3.0151422231214022e-05,
      "loss": 1.4317,
      "step": 3677
    },
    {
      "epoch": 0.7539975399753998,
      "grad_norm": 0.41045158941177545,
      "learning_rate": 3.010391704920187e-05,
      "loss": 1.4807,
      "step": 3678
    },
    {
      "epoch": 0.7542025420254203,
      "grad_norm": 0.4969316040115307,
      "learning_rate": 3.005644268818588e-05,
      "loss": 1.5515,
      "step": 3679
    },
    {
      "epoch": 0.7544075440754408,
      "grad_norm": 0.4212094218849045,
      "learning_rate": 3.0008999169100103e-05,
      "loss": 1.5152,
      "step": 3680
    },
    {
      "epoch": 0.7546125461254612,
      "grad_norm": 0.44353103713081743,
      "learning_rate": 2.9961586512864947e-05,
      "loss": 1.4816,
      "step": 3681
    },
    {
      "epoch": 0.7548175481754817,
      "grad_norm": 0.4032243754555605,
      "learning_rate": 2.991420474038721e-05,
      "loss": 1.5447,
      "step": 3682
    },
    {
      "epoch": 0.7550225502255022,
      "grad_norm": 0.41984635839243095,
      "learning_rate": 2.9866853872560198e-05,
      "loss": 1.5768,
      "step": 3683
    },
    {
      "epoch": 0.7552275522755227,
      "grad_norm": 0.4162000812606667,
      "learning_rate": 2.9819533930263433e-05,
      "loss": 1.513,
      "step": 3684
    },
    {
      "epoch": 0.7554325543255432,
      "grad_norm": 0.45318731289781133,
      "learning_rate": 2.977224493436288e-05,
      "loss": 1.6093,
      "step": 3685
    },
    {
      "epoch": 0.7556375563755637,
      "grad_norm": 0.5160044152215718,
      "learning_rate": 2.9724986905710815e-05,
      "loss": 1.5745,
      "step": 3686
    },
    {
      "epoch": 0.7558425584255842,
      "grad_norm": 0.4170517604407413,
      "learning_rate": 2.967775986514585e-05,
      "loss": 1.5149,
      "step": 3687
    },
    {
      "epoch": 0.7560475604756047,
      "grad_norm": 0.44447149009848486,
      "learning_rate": 2.963056383349301e-05,
      "loss": 1.5859,
      "step": 3688
    },
    {
      "epoch": 0.7562525625256252,
      "grad_norm": 0.4370067312058709,
      "learning_rate": 2.9583398831563625e-05,
      "loss": 1.5362,
      "step": 3689
    },
    {
      "epoch": 0.7564575645756457,
      "grad_norm": 0.4635620192162683,
      "learning_rate": 2.9536264880155285e-05,
      "loss": 1.5339,
      "step": 3690
    },
    {
      "epoch": 0.7566625666256662,
      "grad_norm": 0.44190281887382143,
      "learning_rate": 2.9489162000051918e-05,
      "loss": 1.4467,
      "step": 3691
    },
    {
      "epoch": 0.7568675686756867,
      "grad_norm": 0.45642912860996926,
      "learning_rate": 2.944209021202372e-05,
      "loss": 1.4937,
      "step": 3692
    },
    {
      "epoch": 0.7570725707257072,
      "grad_norm": 0.421560964474826,
      "learning_rate": 2.9395049536827255e-05,
      "loss": 1.4959,
      "step": 3693
    },
    {
      "epoch": 0.7572775727757277,
      "grad_norm": 0.41581850160653455,
      "learning_rate": 2.9348039995205347e-05,
      "loss": 1.528,
      "step": 3694
    },
    {
      "epoch": 0.7574825748257482,
      "grad_norm": 0.42611710433717004,
      "learning_rate": 2.9301061607887025e-05,
      "loss": 1.494,
      "step": 3695
    },
    {
      "epoch": 0.7576875768757687,
      "grad_norm": 0.4052948784561073,
      "learning_rate": 2.925411439558764e-05,
      "loss": 1.447,
      "step": 3696
    },
    {
      "epoch": 0.7578925789257892,
      "grad_norm": 0.41957288838456885,
      "learning_rate": 2.9207198379008736e-05,
      "loss": 1.5447,
      "step": 3697
    },
    {
      "epoch": 0.7580975809758097,
      "grad_norm": 0.40537087922710396,
      "learning_rate": 2.9160313578838184e-05,
      "loss": 1.5367,
      "step": 3698
    },
    {
      "epoch": 0.7583025830258303,
      "grad_norm": 0.4377696412426714,
      "learning_rate": 2.9113460015750072e-05,
      "loss": 1.4184,
      "step": 3699
    },
    {
      "epoch": 0.7585075850758508,
      "grad_norm": 0.4677032980285699,
      "learning_rate": 2.9066637710404675e-05,
      "loss": 1.5133,
      "step": 3700
    },
    {
      "epoch": 0.7587125871258713,
      "grad_norm": 0.4225395198726482,
      "learning_rate": 2.901984668344848e-05,
      "loss": 1.5249,
      "step": 3701
    },
    {
      "epoch": 0.7589175891758918,
      "grad_norm": 0.4499503976105507,
      "learning_rate": 2.8973086955514195e-05,
      "loss": 1.5726,
      "step": 3702
    },
    {
      "epoch": 0.7591225912259123,
      "grad_norm": 0.43737479063184526,
      "learning_rate": 2.892635854722079e-05,
      "loss": 1.5447,
      "step": 3703
    },
    {
      "epoch": 0.7593275932759328,
      "grad_norm": 0.43614796834189923,
      "learning_rate": 2.8879661479173305e-05,
      "loss": 1.4912,
      "step": 3704
    },
    {
      "epoch": 0.7595325953259533,
      "grad_norm": 0.4184546091278081,
      "learning_rate": 2.883299577196308e-05,
      "loss": 1.4966,
      "step": 3705
    },
    {
      "epoch": 0.7597375973759738,
      "grad_norm": 0.42526362858254263,
      "learning_rate": 2.8786361446167554e-05,
      "loss": 1.4569,
      "step": 3706
    },
    {
      "epoch": 0.7599425994259943,
      "grad_norm": 0.4333005626080129,
      "learning_rate": 2.87397585223503e-05,
      "loss": 1.4848,
      "step": 3707
    },
    {
      "epoch": 0.7601476014760148,
      "grad_norm": 0.43166615036754713,
      "learning_rate": 2.8693187021061162e-05,
      "loss": 1.5183,
      "step": 3708
    },
    {
      "epoch": 0.7603526035260353,
      "grad_norm": 0.4484933790335611,
      "learning_rate": 2.8646646962836e-05,
      "loss": 1.5217,
      "step": 3709
    },
    {
      "epoch": 0.7605576055760558,
      "grad_norm": 0.45378671876285576,
      "learning_rate": 2.8600138368196906e-05,
      "loss": 1.4772,
      "step": 3710
    },
    {
      "epoch": 0.7607626076260763,
      "grad_norm": 0.41390657265820774,
      "learning_rate": 2.855366125765204e-05,
      "loss": 1.5491,
      "step": 3711
    },
    {
      "epoch": 0.7609676096760968,
      "grad_norm": 0.4412676751496288,
      "learning_rate": 2.8507215651695662e-05,
      "loss": 1.4833,
      "step": 3712
    },
    {
      "epoch": 0.7611726117261173,
      "grad_norm": 0.3907761825902976,
      "learning_rate": 2.846080157080824e-05,
      "loss": 1.43,
      "step": 3713
    },
    {
      "epoch": 0.7613776137761378,
      "grad_norm": 0.40938901971793557,
      "learning_rate": 2.84144190354562e-05,
      "loss": 1.546,
      "step": 3714
    },
    {
      "epoch": 0.7615826158261583,
      "grad_norm": 0.4446398114123376,
      "learning_rate": 2.83680680660922e-05,
      "loss": 1.5346,
      "step": 3715
    },
    {
      "epoch": 0.7617876178761788,
      "grad_norm": 0.4211783605102619,
      "learning_rate": 2.8321748683154893e-05,
      "loss": 1.4768,
      "step": 3716
    },
    {
      "epoch": 0.7619926199261993,
      "grad_norm": 0.5057481065322842,
      "learning_rate": 2.8275460907068973e-05,
      "loss": 1.5884,
      "step": 3717
    },
    {
      "epoch": 0.7621976219762198,
      "grad_norm": 0.4171268273113572,
      "learning_rate": 2.822920475824531e-05,
      "loss": 1.5121,
      "step": 3718
    },
    {
      "epoch": 0.7624026240262403,
      "grad_norm": 0.41297430867511664,
      "learning_rate": 2.818298025708075e-05,
      "loss": 1.481,
      "step": 3719
    },
    {
      "epoch": 0.7626076260762608,
      "grad_norm": 0.48358578305399613,
      "learning_rate": 2.8136787423958143e-05,
      "loss": 1.6013,
      "step": 3720
    },
    {
      "epoch": 0.7628126281262813,
      "grad_norm": 0.44146485953194536,
      "learning_rate": 2.80906262792465e-05,
      "loss": 1.5073,
      "step": 3721
    },
    {
      "epoch": 0.7630176301763018,
      "grad_norm": 0.45987917876869283,
      "learning_rate": 2.804449684330074e-05,
      "loss": 1.5012,
      "step": 3722
    },
    {
      "epoch": 0.7632226322263223,
      "grad_norm": 0.4481092401353202,
      "learning_rate": 2.7998399136461883e-05,
      "loss": 1.5307,
      "step": 3723
    },
    {
      "epoch": 0.7634276342763427,
      "grad_norm": 0.4293490599275573,
      "learning_rate": 2.795233317905691e-05,
      "loss": 1.4638,
      "step": 3724
    },
    {
      "epoch": 0.7636326363263632,
      "grad_norm": 0.4307387926492202,
      "learning_rate": 2.790629899139878e-05,
      "loss": 1.4905,
      "step": 3725
    },
    {
      "epoch": 0.7638376383763837,
      "grad_norm": 0.419529915008727,
      "learning_rate": 2.786029659378655e-05,
      "loss": 1.5036,
      "step": 3726
    },
    {
      "epoch": 0.7640426404264042,
      "grad_norm": 0.44201203902214875,
      "learning_rate": 2.7814326006505108e-05,
      "loss": 1.4848,
      "step": 3727
    },
    {
      "epoch": 0.7642476424764247,
      "grad_norm": 0.44574520823671954,
      "learning_rate": 2.7768387249825457e-05,
      "loss": 1.4805,
      "step": 3728
    },
    {
      "epoch": 0.7644526445264452,
      "grad_norm": 0.446416947006107,
      "learning_rate": 2.7722480344004488e-05,
      "loss": 1.5296,
      "step": 3729
    },
    {
      "epoch": 0.7646576465764657,
      "grad_norm": 0.4147510631807401,
      "learning_rate": 2.767660530928503e-05,
      "loss": 1.5204,
      "step": 3730
    },
    {
      "epoch": 0.7648626486264862,
      "grad_norm": 0.3886932394257091,
      "learning_rate": 2.7630762165895884e-05,
      "loss": 1.5465,
      "step": 3731
    },
    {
      "epoch": 0.7650676506765067,
      "grad_norm": 0.45896780333900133,
      "learning_rate": 2.7584950934051824e-05,
      "loss": 1.5259,
      "step": 3732
    },
    {
      "epoch": 0.7652726527265272,
      "grad_norm": 0.3553960761467297,
      "learning_rate": 2.753917163395353e-05,
      "loss": 1.4615,
      "step": 3733
    },
    {
      "epoch": 0.7654776547765477,
      "grad_norm": 0.4052434456126481,
      "learning_rate": 2.7493424285787584e-05,
      "loss": 1.4696,
      "step": 3734
    },
    {
      "epoch": 0.7656826568265682,
      "grad_norm": 0.42573193143725796,
      "learning_rate": 2.7447708909726477e-05,
      "loss": 1.4805,
      "step": 3735
    },
    {
      "epoch": 0.7658876588765887,
      "grad_norm": 0.419615061517486,
      "learning_rate": 2.7402025525928586e-05,
      "loss": 1.512,
      "step": 3736
    },
    {
      "epoch": 0.7660926609266092,
      "grad_norm": 0.48440651924170874,
      "learning_rate": 2.7356374154538254e-05,
      "loss": 1.5044,
      "step": 3737
    },
    {
      "epoch": 0.7662976629766297,
      "grad_norm": 0.43254781622370125,
      "learning_rate": 2.7310754815685624e-05,
      "loss": 1.439,
      "step": 3738
    },
    {
      "epoch": 0.7665026650266503,
      "grad_norm": 0.41026555705590156,
      "learning_rate": 2.72651675294868e-05,
      "loss": 1.5002,
      "step": 3739
    },
    {
      "epoch": 0.7667076670766708,
      "grad_norm": 0.42262351065521786,
      "learning_rate": 2.7219612316043675e-05,
      "loss": 1.5122,
      "step": 3740
    },
    {
      "epoch": 0.7669126691266913,
      "grad_norm": 0.4161255322382909,
      "learning_rate": 2.7174089195443987e-05,
      "loss": 1.5414,
      "step": 3741
    },
    {
      "epoch": 0.7671176711767118,
      "grad_norm": 0.45746283051097364,
      "learning_rate": 2.7128598187761445e-05,
      "loss": 1.4854,
      "step": 3742
    },
    {
      "epoch": 0.7673226732267323,
      "grad_norm": 0.43693662542708245,
      "learning_rate": 2.7083139313055427e-05,
      "loss": 1.5436,
      "step": 3743
    },
    {
      "epoch": 0.7675276752767528,
      "grad_norm": 0.4130326354056413,
      "learning_rate": 2.7037712591371322e-05,
      "loss": 1.4443,
      "step": 3744
    },
    {
      "epoch": 0.7677326773267733,
      "grad_norm": 0.4418125758373944,
      "learning_rate": 2.6992318042740207e-05,
      "loss": 1.5299,
      "step": 3745
    },
    {
      "epoch": 0.7679376793767938,
      "grad_norm": 0.416507573368006,
      "learning_rate": 2.6946955687178997e-05,
      "loss": 1.5628,
      "step": 3746
    },
    {
      "epoch": 0.7681426814268143,
      "grad_norm": 0.4767874644098243,
      "learning_rate": 2.6901625544690434e-05,
      "loss": 1.5442,
      "step": 3747
    },
    {
      "epoch": 0.7683476834768348,
      "grad_norm": 0.4311472497908615,
      "learning_rate": 2.6856327635263045e-05,
      "loss": 1.5098,
      "step": 3748
    },
    {
      "epoch": 0.7685526855268553,
      "grad_norm": 0.41426591296824494,
      "learning_rate": 2.681106197887121e-05,
      "loss": 1.504,
      "step": 3749
    },
    {
      "epoch": 0.7687576875768758,
      "grad_norm": 0.49482238443008325,
      "learning_rate": 2.6765828595474984e-05,
      "loss": 1.5308,
      "step": 3750
    },
    {
      "epoch": 0.7689626896268963,
      "grad_norm": 0.4224667104451078,
      "learning_rate": 2.6720627505020223e-05,
      "loss": 1.4902,
      "step": 3751
    },
    {
      "epoch": 0.7691676916769168,
      "grad_norm": 0.423281626185111,
      "learning_rate": 2.667545872743854e-05,
      "loss": 1.4967,
      "step": 3752
    },
    {
      "epoch": 0.7693726937269373,
      "grad_norm": 0.4742526981176098,
      "learning_rate": 2.6630322282647334e-05,
      "loss": 1.527,
      "step": 3753
    },
    {
      "epoch": 0.7695776957769578,
      "grad_norm": 0.3730177478733768,
      "learning_rate": 2.6585218190549765e-05,
      "loss": 1.4448,
      "step": 3754
    },
    {
      "epoch": 0.7697826978269783,
      "grad_norm": 0.41758851794312185,
      "learning_rate": 2.6540146471034655e-05,
      "loss": 1.5105,
      "step": 3755
    },
    {
      "epoch": 0.7699876998769988,
      "grad_norm": 0.4211132054987742,
      "learning_rate": 2.6495107143976572e-05,
      "loss": 1.4988,
      "step": 3756
    },
    {
      "epoch": 0.7701927019270193,
      "grad_norm": 0.43243630872812017,
      "learning_rate": 2.6450100229235795e-05,
      "loss": 1.514,
      "step": 3757
    },
    {
      "epoch": 0.7703977039770398,
      "grad_norm": 0.3954026428145335,
      "learning_rate": 2.6405125746658354e-05,
      "loss": 1.468,
      "step": 3758
    },
    {
      "epoch": 0.7706027060270603,
      "grad_norm": 0.44975464494216955,
      "learning_rate": 2.636018371607598e-05,
      "loss": 1.524,
      "step": 3759
    },
    {
      "epoch": 0.7708077080770808,
      "grad_norm": 0.46102853090538465,
      "learning_rate": 2.6315274157306037e-05,
      "loss": 1.525,
      "step": 3760
    },
    {
      "epoch": 0.7710127101271013,
      "grad_norm": 0.43896308471768103,
      "learning_rate": 2.62703970901516e-05,
      "loss": 1.5073,
      "step": 3761
    },
    {
      "epoch": 0.7712177121771218,
      "grad_norm": 0.45642037283080394,
      "learning_rate": 2.6225552534401388e-05,
      "loss": 1.5704,
      "step": 3762
    },
    {
      "epoch": 0.7714227142271423,
      "grad_norm": 0.4447712367579291,
      "learning_rate": 2.618074050982985e-05,
      "loss": 1.5141,
      "step": 3763
    },
    {
      "epoch": 0.7716277162771628,
      "grad_norm": 0.4482384118927603,
      "learning_rate": 2.6135961036197064e-05,
      "loss": 1.5647,
      "step": 3764
    },
    {
      "epoch": 0.7718327183271833,
      "grad_norm": 0.39946221087218386,
      "learning_rate": 2.609121413324872e-05,
      "loss": 1.4841,
      "step": 3765
    },
    {
      "epoch": 0.7720377203772038,
      "grad_norm": 0.4481475554027438,
      "learning_rate": 2.6046499820716175e-05,
      "loss": 1.4693,
      "step": 3766
    },
    {
      "epoch": 0.7722427224272242,
      "grad_norm": 0.44491421232607,
      "learning_rate": 2.600181811831638e-05,
      "loss": 1.5038,
      "step": 3767
    },
    {
      "epoch": 0.7724477244772447,
      "grad_norm": 0.41022193169088217,
      "learning_rate": 2.595716904575196e-05,
      "loss": 1.5196,
      "step": 3768
    },
    {
      "epoch": 0.7726527265272652,
      "grad_norm": 0.44776584797913777,
      "learning_rate": 2.591255262271115e-05,
      "loss": 1.5324,
      "step": 3769
    },
    {
      "epoch": 0.7728577285772857,
      "grad_norm": 0.4468971228438905,
      "learning_rate": 2.5867968868867742e-05,
      "loss": 1.4574,
      "step": 3770
    },
    {
      "epoch": 0.7730627306273062,
      "grad_norm": 0.4154437197892019,
      "learning_rate": 2.5823417803881146e-05,
      "loss": 1.5207,
      "step": 3771
    },
    {
      "epoch": 0.7732677326773267,
      "grad_norm": 0.40255791747056074,
      "learning_rate": 2.5778899447396333e-05,
      "loss": 1.5128,
      "step": 3772
    },
    {
      "epoch": 0.7734727347273472,
      "grad_norm": 0.4227332462897162,
      "learning_rate": 2.573441381904389e-05,
      "loss": 1.4981,
      "step": 3773
    },
    {
      "epoch": 0.7736777367773677,
      "grad_norm": 0.4548120702954003,
      "learning_rate": 2.5689960938440007e-05,
      "loss": 1.5347,
      "step": 3774
    },
    {
      "epoch": 0.7738827388273882,
      "grad_norm": 0.4105391306983932,
      "learning_rate": 2.564554082518633e-05,
      "loss": 1.5332,
      "step": 3775
    },
    {
      "epoch": 0.7740877408774087,
      "grad_norm": 0.4425576334630604,
      "learning_rate": 2.5601153498870134e-05,
      "loss": 1.4919,
      "step": 3776
    },
    {
      "epoch": 0.7742927429274292,
      "grad_norm": 0.40735099549486947,
      "learning_rate": 2.5556798979064167e-05,
      "loss": 1.4936,
      "step": 3777
    },
    {
      "epoch": 0.7744977449774497,
      "grad_norm": 0.4295662874853571,
      "learning_rate": 2.551247728532682e-05,
      "loss": 1.5295,
      "step": 3778
    },
    {
      "epoch": 0.7747027470274703,
      "grad_norm": 0.4294894073726961,
      "learning_rate": 2.546818843720189e-05,
      "loss": 1.5059,
      "step": 3779
    },
    {
      "epoch": 0.7749077490774908,
      "grad_norm": 0.4071088870604697,
      "learning_rate": 2.5423932454218802e-05,
      "loss": 1.4806,
      "step": 3780
    },
    {
      "epoch": 0.7751127511275113,
      "grad_norm": 0.40890864399769267,
      "learning_rate": 2.53797093558924e-05,
      "loss": 1.5332,
      "step": 3781
    },
    {
      "epoch": 0.7753177531775318,
      "grad_norm": 0.3821532203417036,
      "learning_rate": 2.5335519161723042e-05,
      "loss": 1.4722,
      "step": 3782
    },
    {
      "epoch": 0.7755227552275523,
      "grad_norm": 0.44180206360413943,
      "learning_rate": 2.5291361891196652e-05,
      "loss": 1.5044,
      "step": 3783
    },
    {
      "epoch": 0.7757277572775728,
      "grad_norm": 0.4375162346001822,
      "learning_rate": 2.524723756378452e-05,
      "loss": 1.5256,
      "step": 3784
    },
    {
      "epoch": 0.7759327593275933,
      "grad_norm": 0.4520353377518609,
      "learning_rate": 2.5203146198943518e-05,
      "loss": 1.4936,
      "step": 3785
    },
    {
      "epoch": 0.7761377613776138,
      "grad_norm": 0.4239453294922117,
      "learning_rate": 2.515908781611591e-05,
      "loss": 1.4457,
      "step": 3786
    },
    {
      "epoch": 0.7763427634276343,
      "grad_norm": 0.4736515691015482,
      "learning_rate": 2.511506243472944e-05,
      "loss": 1.5642,
      "step": 3787
    },
    {
      "epoch": 0.7765477654776548,
      "grad_norm": 0.4586532717800692,
      "learning_rate": 2.507107007419729e-05,
      "loss": 1.5255,
      "step": 3788
    },
    {
      "epoch": 0.7767527675276753,
      "grad_norm": 0.4602919398343417,
      "learning_rate": 2.5027110753918094e-05,
      "loss": 1.4812,
      "step": 3789
    },
    {
      "epoch": 0.7769577695776958,
      "grad_norm": 0.45572410680384345,
      "learning_rate": 2.4983184493275947e-05,
      "loss": 1.5065,
      "step": 3790
    },
    {
      "epoch": 0.7771627716277163,
      "grad_norm": 0.39740112063896643,
      "learning_rate": 2.4939291311640324e-05,
      "loss": 1.4841,
      "step": 3791
    },
    {
      "epoch": 0.7773677736777368,
      "grad_norm": 0.4497098650548763,
      "learning_rate": 2.4895431228366107e-05,
      "loss": 1.509,
      "step": 3792
    },
    {
      "epoch": 0.7775727757277573,
      "grad_norm": 0.4824480597776566,
      "learning_rate": 2.485160426279357e-05,
      "loss": 1.5171,
      "step": 3793
    },
    {
      "epoch": 0.7777777777777778,
      "grad_norm": 0.4018467361664024,
      "learning_rate": 2.480781043424849e-05,
      "loss": 1.4425,
      "step": 3794
    },
    {
      "epoch": 0.7779827798277983,
      "grad_norm": 0.4574345048065237,
      "learning_rate": 2.4764049762041874e-05,
      "loss": 1.4661,
      "step": 3795
    },
    {
      "epoch": 0.7781877818778188,
      "grad_norm": 0.48877264975976215,
      "learning_rate": 2.4720322265470254e-05,
      "loss": 1.5726,
      "step": 3796
    },
    {
      "epoch": 0.7783927839278393,
      "grad_norm": 0.3935915969637343,
      "learning_rate": 2.4676627963815447e-05,
      "loss": 1.4706,
      "step": 3797
    },
    {
      "epoch": 0.7785977859778598,
      "grad_norm": 0.4493680826055248,
      "learning_rate": 2.463296687634462e-05,
      "loss": 1.4976,
      "step": 3798
    },
    {
      "epoch": 0.7788027880278803,
      "grad_norm": 0.43619506147666215,
      "learning_rate": 2.4589339022310386e-05,
      "loss": 1.5902,
      "step": 3799
    },
    {
      "epoch": 0.7790077900779008,
      "grad_norm": 0.4270168149107795,
      "learning_rate": 2.454574442095059e-05,
      "loss": 1.4938,
      "step": 3800
    },
    {
      "epoch": 0.7792127921279213,
      "grad_norm": 0.46626381524641214,
      "learning_rate": 2.4502183091488527e-05,
      "loss": 1.5499,
      "step": 3801
    },
    {
      "epoch": 0.7794177941779418,
      "grad_norm": 0.4406457859726337,
      "learning_rate": 2.445865505313274e-05,
      "loss": 1.5216,
      "step": 3802
    },
    {
      "epoch": 0.7796227962279623,
      "grad_norm": 0.4267729505363921,
      "learning_rate": 2.441516032507708e-05,
      "loss": 1.4458,
      "step": 3803
    },
    {
      "epoch": 0.7798277982779828,
      "grad_norm": 0.42507403015298584,
      "learning_rate": 2.4371698926500806e-05,
      "loss": 1.498,
      "step": 3804
    },
    {
      "epoch": 0.7800328003280033,
      "grad_norm": 0.4601021312533811,
      "learning_rate": 2.432827087656836e-05,
      "loss": 1.523,
      "step": 3805
    },
    {
      "epoch": 0.7802378023780238,
      "grad_norm": 0.4462546492731634,
      "learning_rate": 2.4284876194429596e-05,
      "loss": 1.6108,
      "step": 3806
    },
    {
      "epoch": 0.7804428044280443,
      "grad_norm": 0.4088646354381824,
      "learning_rate": 2.4241514899219575e-05,
      "loss": 1.5244,
      "step": 3807
    },
    {
      "epoch": 0.7806478064780648,
      "grad_norm": 0.4644591353783464,
      "learning_rate": 2.4198187010058614e-05,
      "loss": 1.4856,
      "step": 3808
    },
    {
      "epoch": 0.7808528085280853,
      "grad_norm": 0.471857822205609,
      "learning_rate": 2.415489254605242e-05,
      "loss": 1.5513,
      "step": 3809
    },
    {
      "epoch": 0.7810578105781057,
      "grad_norm": 0.4721726959382495,
      "learning_rate": 2.4111631526291846e-05,
      "loss": 1.5458,
      "step": 3810
    },
    {
      "epoch": 0.7812628126281262,
      "grad_norm": 0.4426483710039198,
      "learning_rate": 2.4068403969852992e-05,
      "loss": 1.5261,
      "step": 3811
    },
    {
      "epoch": 0.7814678146781467,
      "grad_norm": 0.4153366517004238,
      "learning_rate": 2.4025209895797328e-05,
      "loss": 1.4858,
      "step": 3812
    },
    {
      "epoch": 0.7816728167281672,
      "grad_norm": 0.4335632999552693,
      "learning_rate": 2.3982049323171407e-05,
      "loss": 1.5498,
      "step": 3813
    },
    {
      "epoch": 0.7818778187781877,
      "grad_norm": 0.4467160923534647,
      "learning_rate": 2.3938922271007147e-05,
      "loss": 1.5025,
      "step": 3814
    },
    {
      "epoch": 0.7820828208282082,
      "grad_norm": 0.42915778177766384,
      "learning_rate": 2.389582875832157e-05,
      "loss": 1.5277,
      "step": 3815
    },
    {
      "epoch": 0.7822878228782287,
      "grad_norm": 0.44923044257124095,
      "learning_rate": 2.3852768804116955e-05,
      "loss": 1.5139,
      "step": 3816
    },
    {
      "epoch": 0.7824928249282492,
      "grad_norm": 0.4593342289691706,
      "learning_rate": 2.3809742427380823e-05,
      "loss": 1.4862,
      "step": 3817
    },
    {
      "epoch": 0.7826978269782697,
      "grad_norm": 0.4280863693950598,
      "learning_rate": 2.3766749647085783e-05,
      "loss": 1.4976,
      "step": 3818
    },
    {
      "epoch": 0.7829028290282903,
      "grad_norm": 0.4577424306022073,
      "learning_rate": 2.372379048218979e-05,
      "loss": 1.5314,
      "step": 3819
    },
    {
      "epoch": 0.7831078310783108,
      "grad_norm": 0.4543289912894854,
      "learning_rate": 2.3680864951635828e-05,
      "loss": 1.4736,
      "step": 3820
    },
    {
      "epoch": 0.7833128331283313,
      "grad_norm": 0.42654384403920753,
      "learning_rate": 2.3637973074352114e-05,
      "loss": 1.5521,
      "step": 3821
    },
    {
      "epoch": 0.7835178351783518,
      "grad_norm": 0.4562389187444123,
      "learning_rate": 2.359511486925199e-05,
      "loss": 1.5124,
      "step": 3822
    },
    {
      "epoch": 0.7837228372283723,
      "grad_norm": 0.40968177686695917,
      "learning_rate": 2.3552290355233998e-05,
      "loss": 1.4837,
      "step": 3823
    },
    {
      "epoch": 0.7839278392783928,
      "grad_norm": 0.4059699670313258,
      "learning_rate": 2.3509499551181825e-05,
      "loss": 1.4929,
      "step": 3824
    },
    {
      "epoch": 0.7841328413284133,
      "grad_norm": 0.405307950074977,
      "learning_rate": 2.3466742475964264e-05,
      "loss": 1.5005,
      "step": 3825
    },
    {
      "epoch": 0.7843378433784338,
      "grad_norm": 0.40571983667174233,
      "learning_rate": 2.342401914843523e-05,
      "loss": 1.4375,
      "step": 3826
    },
    {
      "epoch": 0.7845428454284543,
      "grad_norm": 0.44588680860509766,
      "learning_rate": 2.3381329587433732e-05,
      "loss": 1.5187,
      "step": 3827
    },
    {
      "epoch": 0.7847478474784748,
      "grad_norm": 0.4251091228911665,
      "learning_rate": 2.3338673811783973e-05,
      "loss": 1.5011,
      "step": 3828
    },
    {
      "epoch": 0.7849528495284953,
      "grad_norm": 0.5775313580442965,
      "learning_rate": 2.329605184029523e-05,
      "loss": 1.4137,
      "step": 3829
    },
    {
      "epoch": 0.7851578515785158,
      "grad_norm": 0.4372130112039747,
      "learning_rate": 2.3253463691761822e-05,
      "loss": 1.5516,
      "step": 3830
    },
    {
      "epoch": 0.7853628536285363,
      "grad_norm": 0.40720589927125134,
      "learning_rate": 2.3210909384963196e-05,
      "loss": 1.4588,
      "step": 3831
    },
    {
      "epoch": 0.7855678556785568,
      "grad_norm": 0.4308913794465252,
      "learning_rate": 2.3168388938663834e-05,
      "loss": 1.4919,
      "step": 3832
    },
    {
      "epoch": 0.7857728577285773,
      "grad_norm": 0.45017543815028255,
      "learning_rate": 2.312590237161335e-05,
      "loss": 1.5043,
      "step": 3833
    },
    {
      "epoch": 0.7859778597785978,
      "grad_norm": 0.42222677565598876,
      "learning_rate": 2.3083449702546424e-05,
      "loss": 1.4907,
      "step": 3834
    },
    {
      "epoch": 0.7861828618286183,
      "grad_norm": 0.46441047332515534,
      "learning_rate": 2.3041030950182706e-05,
      "loss": 1.5141,
      "step": 3835
    },
    {
      "epoch": 0.7863878638786388,
      "grad_norm": 0.43232420546690575,
      "learning_rate": 2.2998646133226966e-05,
      "loss": 1.5137,
      "step": 3836
    },
    {
      "epoch": 0.7865928659286593,
      "grad_norm": 0.4416596017767906,
      "learning_rate": 2.2956295270368965e-05,
      "loss": 1.5068,
      "step": 3837
    },
    {
      "epoch": 0.7867978679786798,
      "grad_norm": 0.4207706303938015,
      "learning_rate": 2.2913978380283452e-05,
      "loss": 1.5564,
      "step": 3838
    },
    {
      "epoch": 0.7870028700287003,
      "grad_norm": 0.4533635629054324,
      "learning_rate": 2.2871695481630374e-05,
      "loss": 1.5492,
      "step": 3839
    },
    {
      "epoch": 0.7872078720787208,
      "grad_norm": 0.39896485535293114,
      "learning_rate": 2.2829446593054493e-05,
      "loss": 1.521,
      "step": 3840
    },
    {
      "epoch": 0.7874128741287413,
      "grad_norm": 0.3840082909604336,
      "learning_rate": 2.2787231733185677e-05,
      "loss": 1.3995,
      "step": 3841
    },
    {
      "epoch": 0.7876178761787618,
      "grad_norm": 0.44081458862908096,
      "learning_rate": 2.274505092063873e-05,
      "loss": 1.5166,
      "step": 3842
    },
    {
      "epoch": 0.7878228782287823,
      "grad_norm": 0.4761511352908657,
      "learning_rate": 2.2702904174013473e-05,
      "loss": 1.5456,
      "step": 3843
    },
    {
      "epoch": 0.7880278802788028,
      "grad_norm": 0.41244978122658543,
      "learning_rate": 2.266079151189472e-05,
      "loss": 1.5009,
      "step": 3844
    },
    {
      "epoch": 0.7882328823288233,
      "grad_norm": 0.41179707509773916,
      "learning_rate": 2.2618712952852272e-05,
      "loss": 1.4848,
      "step": 3845
    },
    {
      "epoch": 0.7884378843788438,
      "grad_norm": 0.40759702927550595,
      "learning_rate": 2.2576668515440825e-05,
      "loss": 1.5098,
      "step": 3846
    },
    {
      "epoch": 0.7886428864288643,
      "grad_norm": 0.43357995391244253,
      "learning_rate": 2.2534658218200076e-05,
      "loss": 1.4126,
      "step": 3847
    },
    {
      "epoch": 0.7888478884788848,
      "grad_norm": 0.43548024078421804,
      "learning_rate": 2.2492682079654636e-05,
      "loss": 1.4655,
      "step": 3848
    },
    {
      "epoch": 0.7890528905289053,
      "grad_norm": 0.4647389956747395,
      "learning_rate": 2.2450740118314085e-05,
      "loss": 1.4997,
      "step": 3849
    },
    {
      "epoch": 0.7892578925789258,
      "grad_norm": 0.4407727544031389,
      "learning_rate": 2.2408832352672947e-05,
      "loss": 1.5237,
      "step": 3850
    },
    {
      "epoch": 0.7894628946289463,
      "grad_norm": 0.42325027119403114,
      "learning_rate": 2.2366958801210636e-05,
      "loss": 1.5467,
      "step": 3851
    },
    {
      "epoch": 0.7896678966789668,
      "grad_norm": 0.4578310381512447,
      "learning_rate": 2.2325119482391467e-05,
      "loss": 1.5244,
      "step": 3852
    },
    {
      "epoch": 0.7898728987289872,
      "grad_norm": 0.44357675293153465,
      "learning_rate": 2.2283314414664656e-05,
      "loss": 1.5064,
      "step": 3853
    },
    {
      "epoch": 0.7900779007790077,
      "grad_norm": 0.4401491866332217,
      "learning_rate": 2.2241543616464377e-05,
      "loss": 1.5059,
      "step": 3854
    },
    {
      "epoch": 0.7902829028290282,
      "grad_norm": 0.46135008317059095,
      "learning_rate": 2.2199807106209668e-05,
      "loss": 1.5634,
      "step": 3855
    },
    {
      "epoch": 0.7904879048790487,
      "grad_norm": 0.4694352985697873,
      "learning_rate": 2.21581049023044e-05,
      "loss": 1.4694,
      "step": 3856
    },
    {
      "epoch": 0.7906929069290692,
      "grad_norm": 0.4392205865831773,
      "learning_rate": 2.211643702313736e-05,
      "loss": 1.53,
      "step": 3857
    },
    {
      "epoch": 0.7908979089790897,
      "grad_norm": 0.41607616712802875,
      "learning_rate": 2.2074803487082162e-05,
      "loss": 1.4657,
      "step": 3858
    },
    {
      "epoch": 0.7911029110291103,
      "grad_norm": 0.44218210236891825,
      "learning_rate": 2.2033204312497334e-05,
      "loss": 1.5534,
      "step": 3859
    },
    {
      "epoch": 0.7913079130791308,
      "grad_norm": 0.4932536598157732,
      "learning_rate": 2.1991639517726237e-05,
      "loss": 1.5265,
      "step": 3860
    },
    {
      "epoch": 0.7915129151291513,
      "grad_norm": 0.4105257144866682,
      "learning_rate": 2.195010912109704e-05,
      "loss": 1.4614,
      "step": 3861
    },
    {
      "epoch": 0.7917179171791718,
      "grad_norm": 0.4672175431037312,
      "learning_rate": 2.1908613140922763e-05,
      "loss": 1.5033,
      "step": 3862
    },
    {
      "epoch": 0.7919229192291923,
      "grad_norm": 0.46516287665259515,
      "learning_rate": 2.18671515955012e-05,
      "loss": 1.476,
      "step": 3863
    },
    {
      "epoch": 0.7921279212792128,
      "grad_norm": 0.46221775791896275,
      "learning_rate": 2.1825724503115062e-05,
      "loss": 1.5368,
      "step": 3864
    },
    {
      "epoch": 0.7923329233292333,
      "grad_norm": 0.41708550953083207,
      "learning_rate": 2.1784331882031816e-05,
      "loss": 1.5361,
      "step": 3865
    },
    {
      "epoch": 0.7925379253792538,
      "grad_norm": 0.45251360223720916,
      "learning_rate": 2.1742973750503725e-05,
      "loss": 1.5699,
      "step": 3866
    },
    {
      "epoch": 0.7927429274292743,
      "grad_norm": 0.46660172623426666,
      "learning_rate": 2.1701650126767824e-05,
      "loss": 1.5297,
      "step": 3867
    },
    {
      "epoch": 0.7929479294792948,
      "grad_norm": 0.44926145668220285,
      "learning_rate": 2.166036102904594e-05,
      "loss": 1.4898,
      "step": 3868
    },
    {
      "epoch": 0.7931529315293153,
      "grad_norm": 0.41078091933158717,
      "learning_rate": 2.1619106475544738e-05,
      "loss": 1.5204,
      "step": 3869
    },
    {
      "epoch": 0.7933579335793358,
      "grad_norm": 0.4512073660324291,
      "learning_rate": 2.1577886484455535e-05,
      "loss": 1.467,
      "step": 3870
    },
    {
      "epoch": 0.7935629356293563,
      "grad_norm": 0.42707121612272103,
      "learning_rate": 2.1536701073954558e-05,
      "loss": 1.4385,
      "step": 3871
    },
    {
      "epoch": 0.7937679376793768,
      "grad_norm": 0.44128032778263787,
      "learning_rate": 2.1495550262202645e-05,
      "loss": 1.5384,
      "step": 3872
    },
    {
      "epoch": 0.7939729397293973,
      "grad_norm": 0.40817953139629315,
      "learning_rate": 2.145443406734542e-05,
      "loss": 1.4615,
      "step": 3873
    },
    {
      "epoch": 0.7941779417794178,
      "grad_norm": 0.46034884654284425,
      "learning_rate": 2.141335250751331e-05,
      "loss": 1.5602,
      "step": 3874
    },
    {
      "epoch": 0.7943829438294383,
      "grad_norm": 0.43680395226362767,
      "learning_rate": 2.1372305600821353e-05,
      "loss": 1.5229,
      "step": 3875
    },
    {
      "epoch": 0.7945879458794588,
      "grad_norm": 0.42813043860691685,
      "learning_rate": 2.133129336536944e-05,
      "loss": 1.5277,
      "step": 3876
    },
    {
      "epoch": 0.7947929479294793,
      "grad_norm": 0.42083255647281076,
      "learning_rate": 2.1290315819242067e-05,
      "loss": 1.5161,
      "step": 3877
    },
    {
      "epoch": 0.7949979499794998,
      "grad_norm": 0.4183760460201756,
      "learning_rate": 2.124937298050843e-05,
      "loss": 1.4829,
      "step": 3878
    },
    {
      "epoch": 0.7952029520295203,
      "grad_norm": 0.43184957674153296,
      "learning_rate": 2.1208464867222544e-05,
      "loss": 1.517,
      "step": 3879
    },
    {
      "epoch": 0.7954079540795408,
      "grad_norm": 0.4963474553298808,
      "learning_rate": 2.1167591497422943e-05,
      "loss": 1.5076,
      "step": 3880
    },
    {
      "epoch": 0.7956129561295613,
      "grad_norm": 0.4104173597290443,
      "learning_rate": 2.1126752889133007e-05,
      "loss": 1.4726,
      "step": 3881
    },
    {
      "epoch": 0.7958179581795818,
      "grad_norm": 0.4510374130605548,
      "learning_rate": 2.1085949060360654e-05,
      "loss": 1.484,
      "step": 3882
    },
    {
      "epoch": 0.7960229602296023,
      "grad_norm": 0.43901542900714546,
      "learning_rate": 2.104518002909851e-05,
      "loss": 1.5393,
      "step": 3883
    },
    {
      "epoch": 0.7962279622796228,
      "grad_norm": 0.43084078735416814,
      "learning_rate": 2.1004445813323904e-05,
      "loss": 1.531,
      "step": 3884
    },
    {
      "epoch": 0.7964329643296433,
      "grad_norm": 0.4016919037064875,
      "learning_rate": 2.0963746430998756e-05,
      "loss": 1.4731,
      "step": 3885
    },
    {
      "epoch": 0.7966379663796638,
      "grad_norm": 0.45113990388412745,
      "learning_rate": 2.0923081900069618e-05,
      "loss": 1.4641,
      "step": 3886
    },
    {
      "epoch": 0.7968429684296843,
      "grad_norm": 0.4419446300529957,
      "learning_rate": 2.0882452238467755e-05,
      "loss": 1.487,
      "step": 3887
    },
    {
      "epoch": 0.7970479704797048,
      "grad_norm": 0.3995835644738447,
      "learning_rate": 2.084185746410894e-05,
      "loss": 1.4708,
      "step": 3888
    },
    {
      "epoch": 0.7972529725297253,
      "grad_norm": 0.4434416885001936,
      "learning_rate": 2.0801297594893687e-05,
      "loss": 1.5754,
      "step": 3889
    },
    {
      "epoch": 0.7974579745797458,
      "grad_norm": 0.47581857973876973,
      "learning_rate": 2.0760772648707016e-05,
      "loss": 1.5696,
      "step": 3890
    },
    {
      "epoch": 0.7976629766297663,
      "grad_norm": 0.4605248574137105,
      "learning_rate": 2.0720282643418576e-05,
      "loss": 1.5073,
      "step": 3891
    },
    {
      "epoch": 0.7978679786797868,
      "grad_norm": 0.4230751268743984,
      "learning_rate": 2.0679827596882663e-05,
      "loss": 1.4895,
      "step": 3892
    },
    {
      "epoch": 0.7980729807298073,
      "grad_norm": 0.4258769779341127,
      "learning_rate": 2.0639407526938082e-05,
      "loss": 1.4905,
      "step": 3893
    },
    {
      "epoch": 0.7982779827798278,
      "grad_norm": 0.4480096158252072,
      "learning_rate": 2.0599022451408222e-05,
      "loss": 1.4983,
      "step": 3894
    },
    {
      "epoch": 0.7984829848298483,
      "grad_norm": 0.4550163765958765,
      "learning_rate": 2.055867238810113e-05,
      "loss": 1.5582,
      "step": 3895
    },
    {
      "epoch": 0.7986879868798687,
      "grad_norm": 0.4391129718992249,
      "learning_rate": 2.0518357354809293e-05,
      "loss": 1.4178,
      "step": 3896
    },
    {
      "epoch": 0.7988929889298892,
      "grad_norm": 0.4421320958025758,
      "learning_rate": 2.0478077369309855e-05,
      "loss": 1.4899,
      "step": 3897
    },
    {
      "epoch": 0.7990979909799097,
      "grad_norm": 0.4022570824055907,
      "learning_rate": 2.0437832449364447e-05,
      "loss": 1.515,
      "step": 3898
    },
    {
      "epoch": 0.7993029930299304,
      "grad_norm": 0.4054839765666077,
      "learning_rate": 2.0397622612719202e-05,
      "loss": 1.4254,
      "step": 3899
    },
    {
      "epoch": 0.7995079950799509,
      "grad_norm": 0.4223934393524632,
      "learning_rate": 2.0357447877104895e-05,
      "loss": 1.4388,
      "step": 3900
    },
    {
      "epoch": 0.7997129971299713,
      "grad_norm": 0.41923905777489334,
      "learning_rate": 2.0317308260236732e-05,
      "loss": 1.4717,
      "step": 3901
    },
    {
      "epoch": 0.7999179991799918,
      "grad_norm": 0.4094556483191631,
      "learning_rate": 2.0277203779814447e-05,
      "loss": 1.5066,
      "step": 3902
    },
    {
      "epoch": 0.8001230012300123,
      "grad_norm": 0.4396441396721408,
      "learning_rate": 2.023713445352232e-05,
      "loss": 1.4918,
      "step": 3903
    },
    {
      "epoch": 0.8003280032800328,
      "grad_norm": 0.42859447348634844,
      "learning_rate": 2.0197100299029058e-05,
      "loss": 1.5656,
      "step": 3904
    },
    {
      "epoch": 0.8005330053300533,
      "grad_norm": 0.4468662706630017,
      "learning_rate": 2.015710133398797e-05,
      "loss": 1.5262,
      "step": 3905
    },
    {
      "epoch": 0.8007380073800738,
      "grad_norm": 0.43107449757424876,
      "learning_rate": 2.011713757603675e-05,
      "loss": 1.5235,
      "step": 3906
    },
    {
      "epoch": 0.8009430094300943,
      "grad_norm": 0.4134033966088403,
      "learning_rate": 2.0077209042797562e-05,
      "loss": 1.5365,
      "step": 3907
    },
    {
      "epoch": 0.8011480114801148,
      "grad_norm": 0.44781800542569394,
      "learning_rate": 2.003731575187714e-05,
      "loss": 1.5678,
      "step": 3908
    },
    {
      "epoch": 0.8013530135301353,
      "grad_norm": 0.4370194704013605,
      "learning_rate": 1.999745772086655e-05,
      "loss": 1.4628,
      "step": 3909
    },
    {
      "epoch": 0.8015580155801558,
      "grad_norm": 0.45195350027248254,
      "learning_rate": 1.995763496734143e-05,
      "loss": 1.47,
      "step": 3910
    },
    {
      "epoch": 0.8017630176301763,
      "grad_norm": 0.46484496837610184,
      "learning_rate": 1.9917847508861775e-05,
      "loss": 1.5817,
      "step": 3911
    },
    {
      "epoch": 0.8019680196801968,
      "grad_norm": 0.43686203379316896,
      "learning_rate": 1.9878095362972037e-05,
      "loss": 1.515,
      "step": 3912
    },
    {
      "epoch": 0.8021730217302173,
      "grad_norm": 0.3990794971106179,
      "learning_rate": 1.9838378547201132e-05,
      "loss": 1.4321,
      "step": 3913
    },
    {
      "epoch": 0.8023780237802378,
      "grad_norm": 0.38340188102664985,
      "learning_rate": 1.9798697079062332e-05,
      "loss": 1.5401,
      "step": 3914
    },
    {
      "epoch": 0.8025830258302583,
      "grad_norm": 0.41838839024518004,
      "learning_rate": 1.9759050976053407e-05,
      "loss": 1.4874,
      "step": 3915
    },
    {
      "epoch": 0.8027880278802788,
      "grad_norm": 0.39920374043593765,
      "learning_rate": 1.9719440255656474e-05,
      "loss": 1.4701,
      "step": 3916
    },
    {
      "epoch": 0.8029930299302993,
      "grad_norm": 0.4104848011698136,
      "learning_rate": 1.9679864935338042e-05,
      "loss": 1.5035,
      "step": 3917
    },
    {
      "epoch": 0.8031980319803198,
      "grad_norm": 0.41447789993178596,
      "learning_rate": 1.9640325032549024e-05,
      "loss": 1.4034,
      "step": 3918
    },
    {
      "epoch": 0.8034030340303403,
      "grad_norm": 0.42359174165888985,
      "learning_rate": 1.960082056472473e-05,
      "loss": 1.5512,
      "step": 3919
    },
    {
      "epoch": 0.8036080360803608,
      "grad_norm": 0.4631687679898118,
      "learning_rate": 1.956135154928487e-05,
      "loss": 1.5347,
      "step": 3920
    },
    {
      "epoch": 0.8038130381303813,
      "grad_norm": 0.405143043430309,
      "learning_rate": 1.9521918003633442e-05,
      "loss": 1.502,
      "step": 3921
    },
    {
      "epoch": 0.8040180401804018,
      "grad_norm": 0.4242304095629188,
      "learning_rate": 1.9482519945158872e-05,
      "loss": 1.5119,
      "step": 3922
    },
    {
      "epoch": 0.8042230422304223,
      "grad_norm": 0.4420297907550419,
      "learning_rate": 1.944315739123388e-05,
      "loss": 1.4655,
      "step": 3923
    },
    {
      "epoch": 0.8044280442804428,
      "grad_norm": 0.4531359342668859,
      "learning_rate": 1.940383035921558e-05,
      "loss": 1.5734,
      "step": 3924
    },
    {
      "epoch": 0.8046330463304633,
      "grad_norm": 0.40392690504627216,
      "learning_rate": 1.9364538866445436e-05,
      "loss": 1.4831,
      "step": 3925
    },
    {
      "epoch": 0.8048380483804838,
      "grad_norm": 0.4186726502831277,
      "learning_rate": 1.93252829302492e-05,
      "loss": 1.4891,
      "step": 3926
    },
    {
      "epoch": 0.8050430504305043,
      "grad_norm": 0.4193498525156862,
      "learning_rate": 1.9286062567936937e-05,
      "loss": 1.3818,
      "step": 3927
    },
    {
      "epoch": 0.8052480524805248,
      "grad_norm": 0.407285709244237,
      "learning_rate": 1.924687779680302e-05,
      "loss": 1.4818,
      "step": 3928
    },
    {
      "epoch": 0.8054530545305453,
      "grad_norm": 0.42675081334514614,
      "learning_rate": 1.9207728634126187e-05,
      "loss": 1.5283,
      "step": 3929
    },
    {
      "epoch": 0.8056580565805658,
      "grad_norm": 0.43830907293385163,
      "learning_rate": 1.916861509716945e-05,
      "loss": 1.5343,
      "step": 3930
    },
    {
      "epoch": 0.8058630586305863,
      "grad_norm": 0.43622926126059786,
      "learning_rate": 1.9129537203180102e-05,
      "loss": 1.4821,
      "step": 3931
    },
    {
      "epoch": 0.8060680606806068,
      "grad_norm": 0.40695212633150213,
      "learning_rate": 1.9090494969389696e-05,
      "loss": 1.4858,
      "step": 3932
    },
    {
      "epoch": 0.8062730627306273,
      "grad_norm": 0.44589757663149765,
      "learning_rate": 1.9051488413014064e-05,
      "loss": 1.5816,
      "step": 3933
    },
    {
      "epoch": 0.8064780647806478,
      "grad_norm": 0.3736714034831028,
      "learning_rate": 1.901251755125335e-05,
      "loss": 1.4148,
      "step": 3934
    },
    {
      "epoch": 0.8066830668306683,
      "grad_norm": 0.4262764833584188,
      "learning_rate": 1.8973582401291967e-05,
      "loss": 1.4005,
      "step": 3935
    },
    {
      "epoch": 0.8068880688806888,
      "grad_norm": 0.4119971066379156,
      "learning_rate": 1.8934682980298502e-05,
      "loss": 1.4796,
      "step": 3936
    },
    {
      "epoch": 0.8070930709307093,
      "grad_norm": 0.4056155244054997,
      "learning_rate": 1.8895819305425856e-05,
      "loss": 1.4493,
      "step": 3937
    },
    {
      "epoch": 0.8072980729807298,
      "grad_norm": 0.437297861093439,
      "learning_rate": 1.8856991393811097e-05,
      "loss": 1.5224,
      "step": 3938
    },
    {
      "epoch": 0.8075030750307504,
      "grad_norm": 0.427907085522041,
      "learning_rate": 1.88181992625756e-05,
      "loss": 1.5368,
      "step": 3939
    },
    {
      "epoch": 0.8077080770807709,
      "grad_norm": 0.4026043917708178,
      "learning_rate": 1.8779442928824963e-05,
      "loss": 1.4875,
      "step": 3940
    },
    {
      "epoch": 0.8079130791307914,
      "grad_norm": 0.4175682348813712,
      "learning_rate": 1.8740722409648947e-05,
      "loss": 1.4663,
      "step": 3941
    },
    {
      "epoch": 0.8081180811808119,
      "grad_norm": 0.44468124745085347,
      "learning_rate": 1.8702037722121523e-05,
      "loss": 1.5126,
      "step": 3942
    },
    {
      "epoch": 0.8083230832308324,
      "grad_norm": 0.4554108651114654,
      "learning_rate": 1.8663388883300905e-05,
      "loss": 1.5663,
      "step": 3943
    },
    {
      "epoch": 0.8085280852808528,
      "grad_norm": 0.4346400885220679,
      "learning_rate": 1.8624775910229418e-05,
      "loss": 1.4504,
      "step": 3944
    },
    {
      "epoch": 0.8087330873308733,
      "grad_norm": 0.4289981398511209,
      "learning_rate": 1.8586198819933686e-05,
      "loss": 1.4381,
      "step": 3945
    },
    {
      "epoch": 0.8089380893808938,
      "grad_norm": 0.4515270386667911,
      "learning_rate": 1.854765762942445e-05,
      "loss": 1.5434,
      "step": 3946
    },
    {
      "epoch": 0.8091430914309143,
      "grad_norm": 0.43332729340732434,
      "learning_rate": 1.8509152355696623e-05,
      "loss": 1.491,
      "step": 3947
    },
    {
      "epoch": 0.8093480934809348,
      "grad_norm": 0.4365381482984787,
      "learning_rate": 1.8470683015729272e-05,
      "loss": 1.4699,
      "step": 3948
    },
    {
      "epoch": 0.8095530955309553,
      "grad_norm": 0.39474061589884146,
      "learning_rate": 1.84322496264856e-05,
      "loss": 1.4424,
      "step": 3949
    },
    {
      "epoch": 0.8097580975809758,
      "grad_norm": 0.418374954458447,
      "learning_rate": 1.8393852204912997e-05,
      "loss": 1.4725,
      "step": 3950
    },
    {
      "epoch": 0.8099630996309963,
      "grad_norm": 0.47782494393803604,
      "learning_rate": 1.8355490767943028e-05,
      "loss": 1.5437,
      "step": 3951
    },
    {
      "epoch": 0.8101681016810168,
      "grad_norm": 0.42455096584701413,
      "learning_rate": 1.8317165332491303e-05,
      "loss": 1.5727,
      "step": 3952
    },
    {
      "epoch": 0.8103731037310373,
      "grad_norm": 0.4425374586757662,
      "learning_rate": 1.8278875915457618e-05,
      "loss": 1.4644,
      "step": 3953
    },
    {
      "epoch": 0.8105781057810578,
      "grad_norm": 0.3935435645028859,
      "learning_rate": 1.8240622533725814e-05,
      "loss": 1.5015,
      "step": 3954
    },
    {
      "epoch": 0.8107831078310783,
      "grad_norm": 0.47922103077379513,
      "learning_rate": 1.820240520416394e-05,
      "loss": 1.5803,
      "step": 3955
    },
    {
      "epoch": 0.8109881098810988,
      "grad_norm": 0.44487982531220893,
      "learning_rate": 1.8164223943624113e-05,
      "loss": 1.5156,
      "step": 3956
    },
    {
      "epoch": 0.8111931119311193,
      "grad_norm": 0.43633927709533915,
      "learning_rate": 1.8126078768942512e-05,
      "loss": 1.4503,
      "step": 3957
    },
    {
      "epoch": 0.8113981139811398,
      "grad_norm": 0.43501552606041066,
      "learning_rate": 1.8087969696939433e-05,
      "loss": 1.4699,
      "step": 3958
    },
    {
      "epoch": 0.8116031160311603,
      "grad_norm": 0.43768237180377356,
      "learning_rate": 1.8049896744419216e-05,
      "loss": 1.5217,
      "step": 3959
    },
    {
      "epoch": 0.8118081180811808,
      "grad_norm": 0.4233845148180157,
      "learning_rate": 1.801185992817034e-05,
      "loss": 1.4626,
      "step": 3960
    },
    {
      "epoch": 0.8120131201312013,
      "grad_norm": 0.5149738101072708,
      "learning_rate": 1.7973859264965288e-05,
      "loss": 1.537,
      "step": 3961
    },
    {
      "epoch": 0.8122181221812218,
      "grad_norm": 0.4469575878396721,
      "learning_rate": 1.7935894771560647e-05,
      "loss": 1.4982,
      "step": 3962
    },
    {
      "epoch": 0.8124231242312423,
      "grad_norm": 0.40035316266162,
      "learning_rate": 1.7897966464697034e-05,
      "loss": 1.4477,
      "step": 3963
    },
    {
      "epoch": 0.8126281262812628,
      "grad_norm": 0.3951811453006624,
      "learning_rate": 1.7860074361099067e-05,
      "loss": 1.4195,
      "step": 3964
    },
    {
      "epoch": 0.8128331283312833,
      "grad_norm": 0.4246267451127877,
      "learning_rate": 1.7822218477475494e-05,
      "loss": 1.5358,
      "step": 3965
    },
    {
      "epoch": 0.8130381303813038,
      "grad_norm": 0.43655999424816394,
      "learning_rate": 1.7784398830519e-05,
      "loss": 1.4701,
      "step": 3966
    },
    {
      "epoch": 0.8132431324313243,
      "grad_norm": 0.43946568029204325,
      "learning_rate": 1.7746615436906365e-05,
      "loss": 1.4982,
      "step": 3967
    },
    {
      "epoch": 0.8134481344813448,
      "grad_norm": 0.4001602392142344,
      "learning_rate": 1.7708868313298332e-05,
      "loss": 1.4555,
      "step": 3968
    },
    {
      "epoch": 0.8136531365313653,
      "grad_norm": 0.4371306036918452,
      "learning_rate": 1.767115747633965e-05,
      "loss": 1.5094,
      "step": 3969
    },
    {
      "epoch": 0.8138581385813858,
      "grad_norm": 0.45326464186107945,
      "learning_rate": 1.763348294265912e-05,
      "loss": 1.5277,
      "step": 3970
    },
    {
      "epoch": 0.8140631406314063,
      "grad_norm": 0.3970819593068182,
      "learning_rate": 1.7595844728869448e-05,
      "loss": 1.441,
      "step": 3971
    },
    {
      "epoch": 0.8142681426814268,
      "grad_norm": 0.39982819099467,
      "learning_rate": 1.7558242851567442e-05,
      "loss": 1.4894,
      "step": 3972
    },
    {
      "epoch": 0.8144731447314473,
      "grad_norm": 0.44818881706609065,
      "learning_rate": 1.752067732733378e-05,
      "loss": 1.4921,
      "step": 3973
    },
    {
      "epoch": 0.8146781467814678,
      "grad_norm": 0.3899482663502815,
      "learning_rate": 1.7483148172733145e-05,
      "loss": 1.4848,
      "step": 3974
    },
    {
      "epoch": 0.8148831488314883,
      "grad_norm": 0.4363047074612505,
      "learning_rate": 1.7445655404314208e-05,
      "loss": 1.5074,
      "step": 3975
    },
    {
      "epoch": 0.8150881508815088,
      "grad_norm": 0.45261111833740086,
      "learning_rate": 1.7408199038609586e-05,
      "loss": 1.5241,
      "step": 3976
    },
    {
      "epoch": 0.8152931529315293,
      "grad_norm": 0.45222049740683995,
      "learning_rate": 1.737077909213579e-05,
      "loss": 1.5032,
      "step": 3977
    },
    {
      "epoch": 0.8154981549815498,
      "grad_norm": 0.4480219783858042,
      "learning_rate": 1.7333395581393365e-05,
      "loss": 1.5194,
      "step": 3978
    },
    {
      "epoch": 0.8157031570315704,
      "grad_norm": 0.4117301766459738,
      "learning_rate": 1.7296048522866692e-05,
      "loss": 1.4641,
      "step": 3979
    },
    {
      "epoch": 0.8159081590815909,
      "grad_norm": 0.46184463055887004,
      "learning_rate": 1.7258737933024182e-05,
      "loss": 1.567,
      "step": 3980
    },
    {
      "epoch": 0.8161131611316114,
      "grad_norm": 0.42427677205086417,
      "learning_rate": 1.7221463828318073e-05,
      "loss": 1.4557,
      "step": 3981
    },
    {
      "epoch": 0.8163181631816319,
      "grad_norm": 0.42847948146774567,
      "learning_rate": 1.718422622518455e-05,
      "loss": 1.5771,
      "step": 3982
    },
    {
      "epoch": 0.8165231652316524,
      "grad_norm": 0.41843569083962945,
      "learning_rate": 1.7147025140043727e-05,
      "loss": 1.4736,
      "step": 3983
    },
    {
      "epoch": 0.8167281672816729,
      "grad_norm": 0.41914757382645457,
      "learning_rate": 1.7109860589299552e-05,
      "loss": 1.4841,
      "step": 3984
    },
    {
      "epoch": 0.8169331693316934,
      "grad_norm": 0.41093866418822345,
      "learning_rate": 1.7072732589339955e-05,
      "loss": 1.4927,
      "step": 3985
    },
    {
      "epoch": 0.8171381713817139,
      "grad_norm": 0.4060646927708431,
      "learning_rate": 1.7035641156536675e-05,
      "loss": 1.5237,
      "step": 3986
    },
    {
      "epoch": 0.8173431734317343,
      "grad_norm": 0.4553394166052772,
      "learning_rate": 1.6998586307245313e-05,
      "loss": 1.5359,
      "step": 3987
    },
    {
      "epoch": 0.8175481754817548,
      "grad_norm": 0.48167049978287974,
      "learning_rate": 1.696156805780543e-05,
      "loss": 1.492,
      "step": 3988
    },
    {
      "epoch": 0.8177531775317753,
      "grad_norm": 0.45061653390771095,
      "learning_rate": 1.6924586424540346e-05,
      "loss": 1.5123,
      "step": 3989
    },
    {
      "epoch": 0.8179581795817958,
      "grad_norm": 0.38615392837604146,
      "learning_rate": 1.688764142375733e-05,
      "loss": 1.4522,
      "step": 3990
    },
    {
      "epoch": 0.8181631816318163,
      "grad_norm": 0.40736554512568307,
      "learning_rate": 1.6850733071747405e-05,
      "loss": 1.4938,
      "step": 3991
    },
    {
      "epoch": 0.8183681836818368,
      "grad_norm": 0.40560956105470275,
      "learning_rate": 1.68138613847855e-05,
      "loss": 1.5218,
      "step": 3992
    },
    {
      "epoch": 0.8185731857318573,
      "grad_norm": 0.41524187637052146,
      "learning_rate": 1.6777026379130324e-05,
      "loss": 1.426,
      "step": 3993
    },
    {
      "epoch": 0.8187781877818778,
      "grad_norm": 0.4325522247923781,
      "learning_rate": 1.6740228071024454e-05,
      "loss": 1.5093,
      "step": 3994
    },
    {
      "epoch": 0.8189831898318983,
      "grad_norm": 0.40411280399317184,
      "learning_rate": 1.6703466476694307e-05,
      "loss": 1.4346,
      "step": 3995
    },
    {
      "epoch": 0.8191881918819188,
      "grad_norm": 0.435211648607629,
      "learning_rate": 1.6666741612350034e-05,
      "loss": 1.5092,
      "step": 3996
    },
    {
      "epoch": 0.8193931939319393,
      "grad_norm": 0.4101536614618035,
      "learning_rate": 1.663005349418566e-05,
      "loss": 1.4298,
      "step": 3997
    },
    {
      "epoch": 0.8195981959819598,
      "grad_norm": 0.4396975459223735,
      "learning_rate": 1.6593402138378934e-05,
      "loss": 1.4857,
      "step": 3998
    },
    {
      "epoch": 0.8198031980319803,
      "grad_norm": 0.430574743329003,
      "learning_rate": 1.6556787561091492e-05,
      "loss": 1.4968,
      "step": 3999
    },
    {
      "epoch": 0.8200082000820008,
      "grad_norm": 0.40527803868456297,
      "learning_rate": 1.652020977846864e-05,
      "loss": 1.4781,
      "step": 4000
    },
    {
      "epoch": 0.8202132021320213,
      "grad_norm": 0.40336534632349746,
      "learning_rate": 1.6483668806639584e-05,
      "loss": 1.482,
      "step": 4001
    },
    {
      "epoch": 0.8204182041820418,
      "grad_norm": 0.43810860147071806,
      "learning_rate": 1.6447164661717197e-05,
      "loss": 1.4851,
      "step": 4002
    },
    {
      "epoch": 0.8206232062320623,
      "grad_norm": 0.4659763698416814,
      "learning_rate": 1.6410697359798122e-05,
      "loss": 1.5014,
      "step": 4003
    },
    {
      "epoch": 0.8208282082820828,
      "grad_norm": 0.4008227714251599,
      "learning_rate": 1.6374266916962832e-05,
      "loss": 1.4652,
      "step": 4004
    },
    {
      "epoch": 0.8210332103321033,
      "grad_norm": 0.44413733118072135,
      "learning_rate": 1.6337873349275456e-05,
      "loss": 1.4906,
      "step": 4005
    },
    {
      "epoch": 0.8212382123821238,
      "grad_norm": 0.4387813098192902,
      "learning_rate": 1.6301516672783945e-05,
      "loss": 1.5313,
      "step": 4006
    },
    {
      "epoch": 0.8214432144321443,
      "grad_norm": 0.43535659339060073,
      "learning_rate": 1.6265196903519918e-05,
      "loss": 1.5104,
      "step": 4007
    },
    {
      "epoch": 0.8216482164821648,
      "grad_norm": 0.4231400775472947,
      "learning_rate": 1.6228914057498746e-05,
      "loss": 1.4356,
      "step": 4008
    },
    {
      "epoch": 0.8218532185321853,
      "grad_norm": 0.4139599366722531,
      "learning_rate": 1.619266815071948e-05,
      "loss": 1.499,
      "step": 4009
    },
    {
      "epoch": 0.8220582205822058,
      "grad_norm": 0.42914480568592833,
      "learning_rate": 1.6156459199164952e-05,
      "loss": 1.4612,
      "step": 4010
    },
    {
      "epoch": 0.8222632226322263,
      "grad_norm": 0.4251087675544272,
      "learning_rate": 1.612028721880169e-05,
      "loss": 1.4902,
      "step": 4011
    },
    {
      "epoch": 0.8224682246822468,
      "grad_norm": 0.45028571857062394,
      "learning_rate": 1.6084152225579863e-05,
      "loss": 1.5486,
      "step": 4012
    },
    {
      "epoch": 0.8226732267322673,
      "grad_norm": 0.43502191375973126,
      "learning_rate": 1.6048054235433364e-05,
      "loss": 1.5055,
      "step": 4013
    },
    {
      "epoch": 0.8228782287822878,
      "grad_norm": 0.40416886315713924,
      "learning_rate": 1.6011993264279734e-05,
      "loss": 1.4766,
      "step": 4014
    },
    {
      "epoch": 0.8230832308323083,
      "grad_norm": 0.44534120537498745,
      "learning_rate": 1.5975969328020257e-05,
      "loss": 1.4721,
      "step": 4015
    },
    {
      "epoch": 0.8232882328823288,
      "grad_norm": 0.43038848235478067,
      "learning_rate": 1.593998244253988e-05,
      "loss": 1.4657,
      "step": 4016
    },
    {
      "epoch": 0.8234932349323493,
      "grad_norm": 0.4447565003571557,
      "learning_rate": 1.5904032623707144e-05,
      "loss": 1.5198,
      "step": 4017
    },
    {
      "epoch": 0.8236982369823698,
      "grad_norm": 0.41264652318624195,
      "learning_rate": 1.58681198873743e-05,
      "loss": 1.4702,
      "step": 4018
    },
    {
      "epoch": 0.8239032390323904,
      "grad_norm": 0.39932582475887524,
      "learning_rate": 1.5832244249377204e-05,
      "loss": 1.4538,
      "step": 4019
    },
    {
      "epoch": 0.8241082410824109,
      "grad_norm": 0.4157218665292185,
      "learning_rate": 1.5796405725535402e-05,
      "loss": 1.5086,
      "step": 4020
    },
    {
      "epoch": 0.8243132431324314,
      "grad_norm": 0.4526047443566796,
      "learning_rate": 1.5760604331652075e-05,
      "loss": 1.5082,
      "step": 4021
    },
    {
      "epoch": 0.8245182451824519,
      "grad_norm": 0.4309206360954103,
      "learning_rate": 1.5724840083514005e-05,
      "loss": 1.5425,
      "step": 4022
    },
    {
      "epoch": 0.8247232472324724,
      "grad_norm": 0.44352321613135137,
      "learning_rate": 1.5689112996891576e-05,
      "loss": 1.5858,
      "step": 4023
    },
    {
      "epoch": 0.8249282492824929,
      "grad_norm": 0.4273080895507938,
      "learning_rate": 1.56534230875388e-05,
      "loss": 1.4687,
      "step": 4024
    },
    {
      "epoch": 0.8251332513325134,
      "grad_norm": 0.4649697050286364,
      "learning_rate": 1.561777037119333e-05,
      "loss": 1.5155,
      "step": 4025
    },
    {
      "epoch": 0.8253382533825339,
      "grad_norm": 0.43779085962740655,
      "learning_rate": 1.5582154863576414e-05,
      "loss": 1.4941,
      "step": 4026
    },
    {
      "epoch": 0.8255432554325544,
      "grad_norm": 0.42229959210095425,
      "learning_rate": 1.5546576580392846e-05,
      "loss": 1.485,
      "step": 4027
    },
    {
      "epoch": 0.8257482574825749,
      "grad_norm": 0.4050420990768643,
      "learning_rate": 1.551103553733104e-05,
      "loss": 1.4905,
      "step": 4028
    },
    {
      "epoch": 0.8259532595325954,
      "grad_norm": 0.39236556199383155,
      "learning_rate": 1.5475531750062955e-05,
      "loss": 1.4381,
      "step": 4029
    },
    {
      "epoch": 0.8261582615826158,
      "grad_norm": 0.4037580485948823,
      "learning_rate": 1.5440065234244162e-05,
      "loss": 1.5478,
      "step": 4030
    },
    {
      "epoch": 0.8263632636326363,
      "grad_norm": 0.42387433258676543,
      "learning_rate": 1.5404636005513805e-05,
      "loss": 1.5062,
      "step": 4031
    },
    {
      "epoch": 0.8265682656826568,
      "grad_norm": 0.4003384499584203,
      "learning_rate": 1.536924407949456e-05,
      "loss": 1.5205,
      "step": 4032
    },
    {
      "epoch": 0.8267732677326773,
      "grad_norm": 0.4461250096921476,
      "learning_rate": 1.533388947179264e-05,
      "loss": 1.5256,
      "step": 4033
    },
    {
      "epoch": 0.8269782697826978,
      "grad_norm": 0.4261393558615221,
      "learning_rate": 1.5298572197997797e-05,
      "loss": 1.5124,
      "step": 4034
    },
    {
      "epoch": 0.8271832718327183,
      "grad_norm": 0.4543693909621413,
      "learning_rate": 1.5263292273683404e-05,
      "loss": 1.5163,
      "step": 4035
    },
    {
      "epoch": 0.8273882738827388,
      "grad_norm": 0.394591475219828,
      "learning_rate": 1.5228049714406246e-05,
      "loss": 1.4583,
      "step": 4036
    },
    {
      "epoch": 0.8275932759327593,
      "grad_norm": 0.42123587137950136,
      "learning_rate": 1.5192844535706741e-05,
      "loss": 1.5258,
      "step": 4037
    },
    {
      "epoch": 0.8277982779827798,
      "grad_norm": 0.42469824137124834,
      "learning_rate": 1.5157676753108752e-05,
      "loss": 1.4822,
      "step": 4038
    },
    {
      "epoch": 0.8280032800328003,
      "grad_norm": 0.46304443157857894,
      "learning_rate": 1.512254638211964e-05,
      "loss": 1.4912,
      "step": 4039
    },
    {
      "epoch": 0.8282082820828208,
      "grad_norm": 0.41214509834924007,
      "learning_rate": 1.508745343823037e-05,
      "loss": 1.4867,
      "step": 4040
    },
    {
      "epoch": 0.8284132841328413,
      "grad_norm": 0.43337673883934186,
      "learning_rate": 1.5052397936915264e-05,
      "loss": 1.5154,
      "step": 4041
    },
    {
      "epoch": 0.8286182861828618,
      "grad_norm": 0.42173127670929955,
      "learning_rate": 1.5017379893632255e-05,
      "loss": 1.4693,
      "step": 4042
    },
    {
      "epoch": 0.8288232882328823,
      "grad_norm": 0.40105356786212654,
      "learning_rate": 1.4982399323822705e-05,
      "loss": 1.4558,
      "step": 4043
    },
    {
      "epoch": 0.8290282902829028,
      "grad_norm": 0.4134886774282266,
      "learning_rate": 1.4947456242911406e-05,
      "loss": 1.4603,
      "step": 4044
    },
    {
      "epoch": 0.8292332923329233,
      "grad_norm": 0.407878632342707,
      "learning_rate": 1.4912550666306747e-05,
      "loss": 1.4935,
      "step": 4045
    },
    {
      "epoch": 0.8294382943829438,
      "grad_norm": 0.4287419898274544,
      "learning_rate": 1.4877682609400423e-05,
      "loss": 1.4902,
      "step": 4046
    },
    {
      "epoch": 0.8296432964329643,
      "grad_norm": 0.4287778705999938,
      "learning_rate": 1.4842852087567727e-05,
      "loss": 1.539,
      "step": 4047
    },
    {
      "epoch": 0.8298482984829848,
      "grad_norm": 0.39679546858806153,
      "learning_rate": 1.4808059116167305e-05,
      "loss": 1.4377,
      "step": 4048
    },
    {
      "epoch": 0.8300533005330053,
      "grad_norm": 0.40336653116303844,
      "learning_rate": 1.4773303710541275e-05,
      "loss": 1.4537,
      "step": 4049
    },
    {
      "epoch": 0.8302583025830258,
      "grad_norm": 0.4434921199495249,
      "learning_rate": 1.4738585886015178e-05,
      "loss": 1.4598,
      "step": 4050
    },
    {
      "epoch": 0.8304633046330463,
      "grad_norm": 0.4862007546427389,
      "learning_rate": 1.4703905657898043e-05,
      "loss": 1.5946,
      "step": 4051
    },
    {
      "epoch": 0.8306683066830668,
      "grad_norm": 0.4508330156615731,
      "learning_rate": 1.4669263041482218e-05,
      "loss": 1.556,
      "step": 4052
    },
    {
      "epoch": 0.8308733087330873,
      "grad_norm": 0.40871876236025084,
      "learning_rate": 1.4634658052043582e-05,
      "loss": 1.4902,
      "step": 4053
    },
    {
      "epoch": 0.8310783107831078,
      "grad_norm": 0.40849565334971594,
      "learning_rate": 1.4600090704841318e-05,
      "loss": 1.4955,
      "step": 4054
    },
    {
      "epoch": 0.8312833128331283,
      "grad_norm": 0.44929879887737095,
      "learning_rate": 1.4565561015118057e-05,
      "loss": 1.5442,
      "step": 4055
    },
    {
      "epoch": 0.8314883148831488,
      "grad_norm": 0.44082578485236096,
      "learning_rate": 1.453106899809985e-05,
      "loss": 1.5276,
      "step": 4056
    },
    {
      "epoch": 0.8316933169331693,
      "grad_norm": 0.41489356572284175,
      "learning_rate": 1.4496614668996077e-05,
      "loss": 1.4893,
      "step": 4057
    },
    {
      "epoch": 0.8318983189831898,
      "grad_norm": 0.44764787361825054,
      "learning_rate": 1.4462198042999565e-05,
      "loss": 1.4971,
      "step": 4058
    },
    {
      "epoch": 0.8321033210332104,
      "grad_norm": 0.397129027366937,
      "learning_rate": 1.4427819135286469e-05,
      "loss": 1.4764,
      "step": 4059
    },
    {
      "epoch": 0.8323083230832309,
      "grad_norm": 0.44725605325884854,
      "learning_rate": 1.43934779610163e-05,
      "loss": 1.5393,
      "step": 4060
    },
    {
      "epoch": 0.8325133251332514,
      "grad_norm": 0.4017898606940109,
      "learning_rate": 1.4359174535331999e-05,
      "loss": 1.5071,
      "step": 4061
    },
    {
      "epoch": 0.8327183271832719,
      "grad_norm": 0.4165684258014967,
      "learning_rate": 1.4324908873359766e-05,
      "loss": 1.514,
      "step": 4062
    },
    {
      "epoch": 0.8329233292332924,
      "grad_norm": 0.4608710002230284,
      "learning_rate": 1.429068099020926e-05,
      "loss": 1.5183,
      "step": 4063
    },
    {
      "epoch": 0.8331283312833129,
      "grad_norm": 0.4607040741739288,
      "learning_rate": 1.4256490900973385e-05,
      "loss": 1.4828,
      "step": 4064
    },
    {
      "epoch": 0.8333333333333334,
      "grad_norm": 0.4067657365697677,
      "learning_rate": 1.4222338620728404e-05,
      "loss": 1.5249,
      "step": 4065
    },
    {
      "epoch": 0.8335383353833539,
      "grad_norm": 0.4312227878730755,
      "learning_rate": 1.418822416453397e-05,
      "loss": 1.4182,
      "step": 4066
    },
    {
      "epoch": 0.8337433374333744,
      "grad_norm": 0.4260066857339835,
      "learning_rate": 1.4154147547432971e-05,
      "loss": 1.4421,
      "step": 4067
    },
    {
      "epoch": 0.8339483394833949,
      "grad_norm": 0.44743336030759656,
      "learning_rate": 1.4120108784451625e-05,
      "loss": 1.4926,
      "step": 4068
    },
    {
      "epoch": 0.8341533415334154,
      "grad_norm": 0.41742696939301815,
      "learning_rate": 1.4086107890599543e-05,
      "loss": 1.5156,
      "step": 4069
    },
    {
      "epoch": 0.8343583435834359,
      "grad_norm": 0.41146580888849155,
      "learning_rate": 1.405214488086951e-05,
      "loss": 1.4777,
      "step": 4070
    },
    {
      "epoch": 0.8345633456334564,
      "grad_norm": 0.4748583072519751,
      "learning_rate": 1.4018219770237717e-05,
      "loss": 1.5351,
      "step": 4071
    },
    {
      "epoch": 0.8347683476834769,
      "grad_norm": 0.4219038451439774,
      "learning_rate": 1.3984332573663584e-05,
      "loss": 1.455,
      "step": 4072
    },
    {
      "epoch": 0.8349733497334973,
      "grad_norm": 0.4327922181388339,
      "learning_rate": 1.39504833060898e-05,
      "loss": 1.4365,
      "step": 4073
    },
    {
      "epoch": 0.8351783517835178,
      "grad_norm": 0.4239352174034709,
      "learning_rate": 1.3916671982442386e-05,
      "loss": 1.5566,
      "step": 4074
    },
    {
      "epoch": 0.8353833538335383,
      "grad_norm": 0.40703109422690775,
      "learning_rate": 1.3882898617630569e-05,
      "loss": 1.4885,
      "step": 4075
    },
    {
      "epoch": 0.8355883558835588,
      "grad_norm": 0.44321100817299836,
      "learning_rate": 1.3849163226546902e-05,
      "loss": 1.4935,
      "step": 4076
    },
    {
      "epoch": 0.8357933579335793,
      "grad_norm": 0.411309352068566,
      "learning_rate": 1.3815465824067153e-05,
      "loss": 1.4615,
      "step": 4077
    },
    {
      "epoch": 0.8359983599835998,
      "grad_norm": 0.4211245698043953,
      "learning_rate": 1.3781806425050303e-05,
      "loss": 1.5172,
      "step": 4078
    },
    {
      "epoch": 0.8362033620336203,
      "grad_norm": 0.4185606618630242,
      "learning_rate": 1.3748185044338669e-05,
      "loss": 1.4531,
      "step": 4079
    },
    {
      "epoch": 0.8364083640836408,
      "grad_norm": 0.4270472197962592,
      "learning_rate": 1.3714601696757712e-05,
      "loss": 1.5748,
      "step": 4080
    },
    {
      "epoch": 0.8366133661336613,
      "grad_norm": 0.42700289622647514,
      "learning_rate": 1.3681056397116198e-05,
      "loss": 1.4934,
      "step": 4081
    },
    {
      "epoch": 0.8368183681836818,
      "grad_norm": 0.43607904301376715,
      "learning_rate": 1.3647549160206075e-05,
      "loss": 1.4756,
      "step": 4082
    },
    {
      "epoch": 0.8370233702337023,
      "grad_norm": 0.39851169789580204,
      "learning_rate": 1.3614080000802487e-05,
      "loss": 1.443,
      "step": 4083
    },
    {
      "epoch": 0.8372283722837228,
      "grad_norm": 0.43119601429217375,
      "learning_rate": 1.358064893366382e-05,
      "loss": 1.5449,
      "step": 4084
    },
    {
      "epoch": 0.8374333743337433,
      "grad_norm": 0.4251188737325228,
      "learning_rate": 1.3547255973531648e-05,
      "loss": 1.505,
      "step": 4085
    },
    {
      "epoch": 0.8376383763837638,
      "grad_norm": 0.46717160726417184,
      "learning_rate": 1.351390113513078e-05,
      "loss": 1.5261,
      "step": 4086
    },
    {
      "epoch": 0.8378433784337843,
      "grad_norm": 0.44398503012810975,
      "learning_rate": 1.3480584433169174e-05,
      "loss": 1.5036,
      "step": 4087
    },
    {
      "epoch": 0.8380483804838048,
      "grad_norm": 0.43747392112788513,
      "learning_rate": 1.3447305882337968e-05,
      "loss": 1.4621,
      "step": 4088
    },
    {
      "epoch": 0.8382533825338253,
      "grad_norm": 0.4424733247384556,
      "learning_rate": 1.3414065497311478e-05,
      "loss": 1.5323,
      "step": 4089
    },
    {
      "epoch": 0.8384583845838458,
      "grad_norm": 0.43783215535801406,
      "learning_rate": 1.3380863292747214e-05,
      "loss": 1.472,
      "step": 4090
    },
    {
      "epoch": 0.8386633866338663,
      "grad_norm": 0.40336961353481776,
      "learning_rate": 1.3347699283285875e-05,
      "loss": 1.4637,
      "step": 4091
    },
    {
      "epoch": 0.8388683886838868,
      "grad_norm": 0.4323102209883956,
      "learning_rate": 1.331457348355125e-05,
      "loss": 1.5092,
      "step": 4092
    },
    {
      "epoch": 0.8390733907339073,
      "grad_norm": 0.41398547995065305,
      "learning_rate": 1.3281485908150315e-05,
      "loss": 1.513,
      "step": 4093
    },
    {
      "epoch": 0.8392783927839278,
      "grad_norm": 0.4104016327713903,
      "learning_rate": 1.3248436571673162e-05,
      "loss": 1.5075,
      "step": 4094
    },
    {
      "epoch": 0.8394833948339483,
      "grad_norm": 0.45945721944098505,
      "learning_rate": 1.3215425488693078e-05,
      "loss": 1.5099,
      "step": 4095
    },
    {
      "epoch": 0.8396883968839688,
      "grad_norm": 0.41985751920089753,
      "learning_rate": 1.3182452673766454e-05,
      "loss": 1.4774,
      "step": 4096
    },
    {
      "epoch": 0.8398933989339893,
      "grad_norm": 0.4424241484434764,
      "learning_rate": 1.3149518141432804e-05,
      "loss": 1.4572,
      "step": 4097
    },
    {
      "epoch": 0.8400984009840098,
      "grad_norm": 0.434656299403232,
      "learning_rate": 1.3116621906214743e-05,
      "loss": 1.4646,
      "step": 4098
    },
    {
      "epoch": 0.8403034030340304,
      "grad_norm": 0.4157358420521401,
      "learning_rate": 1.3083763982618025e-05,
      "loss": 1.4931,
      "step": 4099
    },
    {
      "epoch": 0.8405084050840509,
      "grad_norm": 0.4457006185067519,
      "learning_rate": 1.3050944385131447e-05,
      "loss": 1.5404,
      "step": 4100
    },
    {
      "epoch": 0.8407134071340714,
      "grad_norm": 0.44221406300249744,
      "learning_rate": 1.3018163128227057e-05,
      "loss": 1.5158,
      "step": 4101
    },
    {
      "epoch": 0.8409184091840919,
      "grad_norm": 0.4218222688641252,
      "learning_rate": 1.2985420226359846e-05,
      "loss": 1.4694,
      "step": 4102
    },
    {
      "epoch": 0.8411234112341124,
      "grad_norm": 0.46390991997664527,
      "learning_rate": 1.2952715693967964e-05,
      "loss": 1.5141,
      "step": 4103
    },
    {
      "epoch": 0.8413284132841329,
      "grad_norm": 0.4365061168954958,
      "learning_rate": 1.2920049545472602e-05,
      "loss": 1.4831,
      "step": 4104
    },
    {
      "epoch": 0.8415334153341534,
      "grad_norm": 0.3834868482231454,
      "learning_rate": 1.2887421795278044e-05,
      "loss": 1.4766,
      "step": 4105
    },
    {
      "epoch": 0.8417384173841739,
      "grad_norm": 0.4150280334364376,
      "learning_rate": 1.2854832457771648e-05,
      "loss": 1.4822,
      "step": 4106
    },
    {
      "epoch": 0.8419434194341944,
      "grad_norm": 0.4132803357544623,
      "learning_rate": 1.2822281547323867e-05,
      "loss": 1.4133,
      "step": 4107
    },
    {
      "epoch": 0.8421484214842149,
      "grad_norm": 0.4423716162927889,
      "learning_rate": 1.278976907828815e-05,
      "loss": 1.5493,
      "step": 4108
    },
    {
      "epoch": 0.8423534235342354,
      "grad_norm": 0.43658856717276107,
      "learning_rate": 1.2757295065001007e-05,
      "loss": 1.5239,
      "step": 4109
    },
    {
      "epoch": 0.8425584255842559,
      "grad_norm": 0.42346545413445763,
      "learning_rate": 1.2724859521781996e-05,
      "loss": 1.5179,
      "step": 4110
    },
    {
      "epoch": 0.8427634276342764,
      "grad_norm": 0.42778902649789075,
      "learning_rate": 1.269246246293374e-05,
      "loss": 1.401,
      "step": 4111
    },
    {
      "epoch": 0.8429684296842969,
      "grad_norm": 0.4473573092377989,
      "learning_rate": 1.2660103902741871e-05,
      "loss": 1.5053,
      "step": 4112
    },
    {
      "epoch": 0.8431734317343174,
      "grad_norm": 0.41684770909553376,
      "learning_rate": 1.262778385547504e-05,
      "loss": 1.5044,
      "step": 4113
    },
    {
      "epoch": 0.8433784337843379,
      "grad_norm": 0.4922006371516634,
      "learning_rate": 1.2595502335384912e-05,
      "loss": 1.4563,
      "step": 4114
    },
    {
      "epoch": 0.8435834358343584,
      "grad_norm": 0.43190174808785736,
      "learning_rate": 1.2563259356706147e-05,
      "loss": 1.5369,
      "step": 4115
    },
    {
      "epoch": 0.8437884378843789,
      "grad_norm": 0.4313134129047217,
      "learning_rate": 1.253105493365646e-05,
      "loss": 1.4374,
      "step": 4116
    },
    {
      "epoch": 0.8439934399343993,
      "grad_norm": 0.4023212907054902,
      "learning_rate": 1.2498889080436549e-05,
      "loss": 1.4541,
      "step": 4117
    },
    {
      "epoch": 0.8441984419844198,
      "grad_norm": 0.42201721492040123,
      "learning_rate": 1.2466761811230098e-05,
      "loss": 1.4871,
      "step": 4118
    },
    {
      "epoch": 0.8444034440344403,
      "grad_norm": 0.46135414242003453,
      "learning_rate": 1.2434673140203745e-05,
      "loss": 1.5035,
      "step": 4119
    },
    {
      "epoch": 0.8446084460844608,
      "grad_norm": 0.4290380524381426,
      "learning_rate": 1.2402623081507126e-05,
      "loss": 1.5248,
      "step": 4120
    },
    {
      "epoch": 0.8448134481344813,
      "grad_norm": 0.4125116008014693,
      "learning_rate": 1.2370611649272878e-05,
      "loss": 1.5291,
      "step": 4121
    },
    {
      "epoch": 0.8450184501845018,
      "grad_norm": 0.41585347883182866,
      "learning_rate": 1.2338638857616613e-05,
      "loss": 1.463,
      "step": 4122
    },
    {
      "epoch": 0.8452234522345223,
      "grad_norm": 0.4045722738621591,
      "learning_rate": 1.2306704720636852e-05,
      "loss": 1.4222,
      "step": 4123
    },
    {
      "epoch": 0.8454284542845428,
      "grad_norm": 0.48315250910190094,
      "learning_rate": 1.227480925241511e-05,
      "loss": 1.5062,
      "step": 4124
    },
    {
      "epoch": 0.8456334563345633,
      "grad_norm": 0.4262544886355197,
      "learning_rate": 1.22429524670158e-05,
      "loss": 1.5121,
      "step": 4125
    },
    {
      "epoch": 0.8458384583845838,
      "grad_norm": 0.4728518838554152,
      "learning_rate": 1.2211134378486378e-05,
      "loss": 1.4759,
      "step": 4126
    },
    {
      "epoch": 0.8460434604346043,
      "grad_norm": 0.38890553968018216,
      "learning_rate": 1.2179355000857119e-05,
      "loss": 1.506,
      "step": 4127
    },
    {
      "epoch": 0.8462484624846248,
      "grad_norm": 0.4217522643826517,
      "learning_rate": 1.2147614348141335e-05,
      "loss": 1.5279,
      "step": 4128
    },
    {
      "epoch": 0.8464534645346453,
      "grad_norm": 0.40840689414950304,
      "learning_rate": 1.2115912434335187e-05,
      "loss": 1.4975,
      "step": 4129
    },
    {
      "epoch": 0.8466584665846658,
      "grad_norm": 0.4337534256898972,
      "learning_rate": 1.2084249273417759e-05,
      "loss": 1.4863,
      "step": 4130
    },
    {
      "epoch": 0.8468634686346863,
      "grad_norm": 0.44411041644152915,
      "learning_rate": 1.2052624879351104e-05,
      "loss": 1.5731,
      "step": 4131
    },
    {
      "epoch": 0.8470684706847068,
      "grad_norm": 0.4020542945675568,
      "learning_rate": 1.2021039266080104e-05,
      "loss": 1.4646,
      "step": 4132
    },
    {
      "epoch": 0.8472734727347273,
      "grad_norm": 0.4031473741737517,
      "learning_rate": 1.1989492447532613e-05,
      "loss": 1.4506,
      "step": 4133
    },
    {
      "epoch": 0.8474784747847478,
      "grad_norm": 0.3980220562439092,
      "learning_rate": 1.195798443761933e-05,
      "loss": 1.4365,
      "step": 4134
    },
    {
      "epoch": 0.8476834768347683,
      "grad_norm": 0.44446476612360397,
      "learning_rate": 1.1926515250233839e-05,
      "loss": 1.5009,
      "step": 4135
    },
    {
      "epoch": 0.8478884788847888,
      "grad_norm": 0.4360524507061598,
      "learning_rate": 1.1895084899252663e-05,
      "loss": 1.4678,
      "step": 4136
    },
    {
      "epoch": 0.8480934809348093,
      "grad_norm": 0.4334059789136883,
      "learning_rate": 1.1863693398535114e-05,
      "loss": 1.4738,
      "step": 4137
    },
    {
      "epoch": 0.8482984829848298,
      "grad_norm": 0.3767589040866388,
      "learning_rate": 1.1832340761923444e-05,
      "loss": 1.4884,
      "step": 4138
    },
    {
      "epoch": 0.8485034850348504,
      "grad_norm": 0.42308555232716255,
      "learning_rate": 1.1801027003242749e-05,
      "loss": 1.4751,
      "step": 4139
    },
    {
      "epoch": 0.8487084870848709,
      "grad_norm": 0.37688539315308583,
      "learning_rate": 1.1769752136300927e-05,
      "loss": 1.4351,
      "step": 4140
    },
    {
      "epoch": 0.8489134891348914,
      "grad_norm": 0.40308373744729736,
      "learning_rate": 1.1738516174888836e-05,
      "loss": 1.4775,
      "step": 4141
    },
    {
      "epoch": 0.8491184911849119,
      "grad_norm": 0.4557699319332778,
      "learning_rate": 1.170731913278007e-05,
      "loss": 1.4945,
      "step": 4142
    },
    {
      "epoch": 0.8493234932349324,
      "grad_norm": 0.4262927329939197,
      "learning_rate": 1.1676161023731114e-05,
      "loss": 1.5031,
      "step": 4143
    },
    {
      "epoch": 0.8495284952849529,
      "grad_norm": 0.40261358364790967,
      "learning_rate": 1.1645041861481288e-05,
      "loss": 1.4495,
      "step": 4144
    },
    {
      "epoch": 0.8497334973349734,
      "grad_norm": 0.44112937158919285,
      "learning_rate": 1.1613961659752715e-05,
      "loss": 1.5159,
      "step": 4145
    },
    {
      "epoch": 0.8499384993849939,
      "grad_norm": 0.4204954968162001,
      "learning_rate": 1.1582920432250388e-05,
      "loss": 1.5136,
      "step": 4146
    },
    {
      "epoch": 0.8501435014350144,
      "grad_norm": 0.4125128231081385,
      "learning_rate": 1.1551918192662048e-05,
      "loss": 1.4665,
      "step": 4147
    },
    {
      "epoch": 0.8503485034850349,
      "grad_norm": 0.4337987555691318,
      "learning_rate": 1.1520954954658247e-05,
      "loss": 1.4972,
      "step": 4148
    },
    {
      "epoch": 0.8505535055350554,
      "grad_norm": 0.4546567480474969,
      "learning_rate": 1.149003073189242e-05,
      "loss": 1.5252,
      "step": 4149
    },
    {
      "epoch": 0.8507585075850759,
      "grad_norm": 0.4052719865626236,
      "learning_rate": 1.1459145538000705e-05,
      "loss": 1.4715,
      "step": 4150
    },
    {
      "epoch": 0.8509635096350964,
      "grad_norm": 0.4425343036806855,
      "learning_rate": 1.1428299386602104e-05,
      "loss": 1.5461,
      "step": 4151
    },
    {
      "epoch": 0.8511685116851169,
      "grad_norm": 0.4179351243946177,
      "learning_rate": 1.139749229129834e-05,
      "loss": 1.4199,
      "step": 4152
    },
    {
      "epoch": 0.8513735137351374,
      "grad_norm": 0.39347999398500727,
      "learning_rate": 1.1366724265673933e-05,
      "loss": 1.4521,
      "step": 4153
    },
    {
      "epoch": 0.8515785157851579,
      "grad_norm": 0.49832723323802064,
      "learning_rate": 1.1335995323296222e-05,
      "loss": 1.4861,
      "step": 4154
    },
    {
      "epoch": 0.8517835178351784,
      "grad_norm": 0.4168686029341923,
      "learning_rate": 1.1305305477715256e-05,
      "loss": 1.5202,
      "step": 4155
    },
    {
      "epoch": 0.8519885198851989,
      "grad_norm": 0.4824042221259518,
      "learning_rate": 1.1274654742463841e-05,
      "loss": 1.5418,
      "step": 4156
    },
    {
      "epoch": 0.8521935219352194,
      "grad_norm": 0.4225640926450485,
      "learning_rate": 1.1244043131057592e-05,
      "loss": 1.4781,
      "step": 4157
    },
    {
      "epoch": 0.8523985239852399,
      "grad_norm": 0.4083433149237101,
      "learning_rate": 1.1213470656994817e-05,
      "loss": 1.4527,
      "step": 4158
    },
    {
      "epoch": 0.8526035260352604,
      "grad_norm": 0.39471799456166173,
      "learning_rate": 1.1182937333756582e-05,
      "loss": 1.4227,
      "step": 4159
    },
    {
      "epoch": 0.8528085280852808,
      "grad_norm": 0.4162050440104547,
      "learning_rate": 1.1152443174806725e-05,
      "loss": 1.4783,
      "step": 4160
    },
    {
      "epoch": 0.8530135301353013,
      "grad_norm": 0.4929613126886529,
      "learning_rate": 1.1121988193591737e-05,
      "loss": 1.5586,
      "step": 4161
    },
    {
      "epoch": 0.8532185321853218,
      "grad_norm": 0.4340736942317783,
      "learning_rate": 1.109157240354094e-05,
      "loss": 1.4842,
      "step": 4162
    },
    {
      "epoch": 0.8534235342353423,
      "grad_norm": 0.41727988054721904,
      "learning_rate": 1.1061195818066284e-05,
      "loss": 1.4777,
      "step": 4163
    },
    {
      "epoch": 0.8536285362853628,
      "grad_norm": 0.43616071509035476,
      "learning_rate": 1.1030858450562442e-05,
      "loss": 1.4103,
      "step": 4164
    },
    {
      "epoch": 0.8538335383353833,
      "grad_norm": 0.4246995343847903,
      "learning_rate": 1.100056031440685e-05,
      "loss": 1.4944,
      "step": 4165
    },
    {
      "epoch": 0.8540385403854038,
      "grad_norm": 0.42822049962519326,
      "learning_rate": 1.0970301422959583e-05,
      "loss": 1.4971,
      "step": 4166
    },
    {
      "epoch": 0.8542435424354243,
      "grad_norm": 0.43015427213922725,
      "learning_rate": 1.0940081789563461e-05,
      "loss": 1.496,
      "step": 4167
    },
    {
      "epoch": 0.8544485444854448,
      "grad_norm": 0.4510875221798475,
      "learning_rate": 1.0909901427543968e-05,
      "loss": 1.5228,
      "step": 4168
    },
    {
      "epoch": 0.8546535465354653,
      "grad_norm": 0.41347933720167424,
      "learning_rate": 1.0879760350209234e-05,
      "loss": 1.501,
      "step": 4169
    },
    {
      "epoch": 0.8548585485854858,
      "grad_norm": 0.3948400745747476,
      "learning_rate": 1.0849658570850152e-05,
      "loss": 1.5529,
      "step": 4170
    },
    {
      "epoch": 0.8550635506355063,
      "grad_norm": 0.4064343396996718,
      "learning_rate": 1.0819596102740193e-05,
      "loss": 1.4636,
      "step": 4171
    },
    {
      "epoch": 0.8552685526855268,
      "grad_norm": 0.42274600906019216,
      "learning_rate": 1.0789572959135597e-05,
      "loss": 1.4089,
      "step": 4172
    },
    {
      "epoch": 0.8554735547355473,
      "grad_norm": 0.45452119318393625,
      "learning_rate": 1.0759589153275163e-05,
      "loss": 1.4974,
      "step": 4173
    },
    {
      "epoch": 0.8556785567855678,
      "grad_norm": 0.4309073461580129,
      "learning_rate": 1.0729644698380403e-05,
      "loss": 1.5576,
      "step": 4174
    },
    {
      "epoch": 0.8558835588355883,
      "grad_norm": 0.42434758651070265,
      "learning_rate": 1.0699739607655435e-05,
      "loss": 1.4597,
      "step": 4175
    },
    {
      "epoch": 0.8560885608856088,
      "grad_norm": 0.4077456640838372,
      "learning_rate": 1.0669873894287052e-05,
      "loss": 1.4549,
      "step": 4176
    },
    {
      "epoch": 0.8562935629356293,
      "grad_norm": 0.4380778822990617,
      "learning_rate": 1.0640047571444722e-05,
      "loss": 1.4855,
      "step": 4177
    },
    {
      "epoch": 0.8564985649856498,
      "grad_norm": 0.47499256880701546,
      "learning_rate": 1.0610260652280469e-05,
      "loss": 1.537,
      "step": 4178
    },
    {
      "epoch": 0.8567035670356704,
      "grad_norm": 0.4848525994042098,
      "learning_rate": 1.058051314992896e-05,
      "loss": 1.4817,
      "step": 4179
    },
    {
      "epoch": 0.8569085690856909,
      "grad_norm": 0.4382805146474575,
      "learning_rate": 1.0550805077507475e-05,
      "loss": 1.5312,
      "step": 4180
    },
    {
      "epoch": 0.8571135711357114,
      "grad_norm": 0.43980349710025407,
      "learning_rate": 1.0521136448115954e-05,
      "loss": 1.5196,
      "step": 4181
    },
    {
      "epoch": 0.8573185731857319,
      "grad_norm": 0.4279471456597939,
      "learning_rate": 1.049150727483692e-05,
      "loss": 1.4811,
      "step": 4182
    },
    {
      "epoch": 0.8575235752357524,
      "grad_norm": 0.42014260987782875,
      "learning_rate": 1.0461917570735491e-05,
      "loss": 1.4568,
      "step": 4183
    },
    {
      "epoch": 0.8577285772857729,
      "grad_norm": 0.41942169326765594,
      "learning_rate": 1.0432367348859362e-05,
      "loss": 1.4781,
      "step": 4184
    },
    {
      "epoch": 0.8579335793357934,
      "grad_norm": 0.40641492600052964,
      "learning_rate": 1.0402856622238832e-05,
      "loss": 1.499,
      "step": 4185
    },
    {
      "epoch": 0.8581385813858139,
      "grad_norm": 0.41570132072114757,
      "learning_rate": 1.0373385403886792e-05,
      "loss": 1.4406,
      "step": 4186
    },
    {
      "epoch": 0.8583435834358344,
      "grad_norm": 0.4223311677636492,
      "learning_rate": 1.034395370679876e-05,
      "loss": 1.4797,
      "step": 4187
    },
    {
      "epoch": 0.8585485854858549,
      "grad_norm": 0.42141961497704095,
      "learning_rate": 1.0314561543952729e-05,
      "loss": 1.4437,
      "step": 4188
    },
    {
      "epoch": 0.8587535875358754,
      "grad_norm": 0.4363700177051303,
      "learning_rate": 1.028520892830932e-05,
      "loss": 1.5351,
      "step": 4189
    },
    {
      "epoch": 0.8589585895858959,
      "grad_norm": 0.4385994172438943,
      "learning_rate": 1.0255895872811683e-05,
      "loss": 1.4796,
      "step": 4190
    },
    {
      "epoch": 0.8591635916359164,
      "grad_norm": 0.444436929476376,
      "learning_rate": 1.0226622390385554e-05,
      "loss": 1.4326,
      "step": 4191
    },
    {
      "epoch": 0.8593685936859369,
      "grad_norm": 0.3943030917234177,
      "learning_rate": 1.0197388493939242e-05,
      "loss": 1.4727,
      "step": 4192
    },
    {
      "epoch": 0.8595735957359574,
      "grad_norm": 0.42236713500496903,
      "learning_rate": 1.0168194196363534e-05,
      "loss": 1.4825,
      "step": 4193
    },
    {
      "epoch": 0.8597785977859779,
      "grad_norm": 0.3747138113033476,
      "learning_rate": 1.01390395105318e-05,
      "loss": 1.4119,
      "step": 4194
    },
    {
      "epoch": 0.8599835998359984,
      "grad_norm": 0.4678661263058261,
      "learning_rate": 1.0109924449299901e-05,
      "loss": 1.4742,
      "step": 4195
    },
    {
      "epoch": 0.8601886018860189,
      "grad_norm": 0.4444176502569113,
      "learning_rate": 1.0080849025506279e-05,
      "loss": 1.4235,
      "step": 4196
    },
    {
      "epoch": 0.8603936039360394,
      "grad_norm": 0.43547922030464714,
      "learning_rate": 1.0051813251971898e-05,
      "loss": 1.4836,
      "step": 4197
    },
    {
      "epoch": 0.8605986059860599,
      "grad_norm": 0.4315554574480792,
      "learning_rate": 1.0022817141500196e-05,
      "loss": 1.5749,
      "step": 4198
    },
    {
      "epoch": 0.8608036080360804,
      "grad_norm": 0.4929229154152233,
      "learning_rate": 9.993860706877135e-06,
      "loss": 1.5223,
      "step": 4199
    },
    {
      "epoch": 0.8610086100861009,
      "grad_norm": 0.4230669418185058,
      "learning_rate": 9.964943960871186e-06,
      "loss": 1.4754,
      "step": 4200
    },
    {
      "epoch": 0.8612136121361214,
      "grad_norm": 0.4065688935199412,
      "learning_rate": 9.93606691623329e-06,
      "loss": 1.471,
      "step": 4201
    },
    {
      "epoch": 0.8614186141861419,
      "grad_norm": 0.42764817804893823,
      "learning_rate": 9.907229585696986e-06,
      "loss": 1.4991,
      "step": 4202
    },
    {
      "epoch": 0.8616236162361623,
      "grad_norm": 0.3699568942068727,
      "learning_rate": 9.878431981978176e-06,
      "loss": 1.4631,
      "step": 4203
    },
    {
      "epoch": 0.8618286182861828,
      "grad_norm": 0.4057556544599368,
      "learning_rate": 9.849674117775299e-06,
      "loss": 1.4801,
      "step": 4204
    },
    {
      "epoch": 0.8620336203362033,
      "grad_norm": 0.45998238538523883,
      "learning_rate": 9.820956005769278e-06,
      "loss": 1.535,
      "step": 4205
    },
    {
      "epoch": 0.8622386223862238,
      "grad_norm": 0.4489879082621855,
      "learning_rate": 9.792277658623461e-06,
      "loss": 1.5197,
      "step": 4206
    },
    {
      "epoch": 0.8624436244362443,
      "grad_norm": 0.42107743766253847,
      "learning_rate": 9.763639088983722e-06,
      "loss": 1.4833,
      "step": 4207
    },
    {
      "epoch": 0.8626486264862648,
      "grad_norm": 0.3804768939174557,
      "learning_rate": 9.735040309478394e-06,
      "loss": 1.4694,
      "step": 4208
    },
    {
      "epoch": 0.8628536285362853,
      "grad_norm": 0.40233385635308083,
      "learning_rate": 9.706481332718208e-06,
      "loss": 1.4571,
      "step": 4209
    },
    {
      "epoch": 0.8630586305863058,
      "grad_norm": 0.39613851885223883,
      "learning_rate": 9.677962171296395e-06,
      "loss": 1.472,
      "step": 4210
    },
    {
      "epoch": 0.8632636326363263,
      "grad_norm": 0.4129840646275053,
      "learning_rate": 9.649482837788559e-06,
      "loss": 1.4308,
      "step": 4211
    },
    {
      "epoch": 0.8634686346863468,
      "grad_norm": 0.4394922531420773,
      "learning_rate": 9.621043344752834e-06,
      "loss": 1.4982,
      "step": 4212
    },
    {
      "epoch": 0.8636736367363673,
      "grad_norm": 0.38478743119041986,
      "learning_rate": 9.592643704729753e-06,
      "loss": 1.5066,
      "step": 4213
    },
    {
      "epoch": 0.8638786387863878,
      "grad_norm": 0.4101073038765194,
      "learning_rate": 9.564283930242257e-06,
      "loss": 1.5096,
      "step": 4214
    },
    {
      "epoch": 0.8640836408364083,
      "grad_norm": 0.3925096037052107,
      "learning_rate": 9.535964033795708e-06,
      "loss": 1.4631,
      "step": 4215
    },
    {
      "epoch": 0.8642886428864288,
      "grad_norm": 0.39001733363386804,
      "learning_rate": 9.507684027877884e-06,
      "loss": 1.4507,
      "step": 4216
    },
    {
      "epoch": 0.8644936449364493,
      "grad_norm": 0.3972002241726259,
      "learning_rate": 9.479443924959008e-06,
      "loss": 1.3943,
      "step": 4217
    },
    {
      "epoch": 0.8646986469864698,
      "grad_norm": 0.4507564362276557,
      "learning_rate": 9.451243737491654e-06,
      "loss": 1.5709,
      "step": 4218
    },
    {
      "epoch": 0.8649036490364904,
      "grad_norm": 0.38149621005196965,
      "learning_rate": 9.423083477910854e-06,
      "loss": 1.4806,
      "step": 4219
    },
    {
      "epoch": 0.8651086510865109,
      "grad_norm": 0.44735436609976137,
      "learning_rate": 9.394963158633995e-06,
      "loss": 1.4901,
      "step": 4220
    },
    {
      "epoch": 0.8653136531365314,
      "grad_norm": 0.4369129859042145,
      "learning_rate": 9.366882792060827e-06,
      "loss": 1.5002,
      "step": 4221
    },
    {
      "epoch": 0.8655186551865519,
      "grad_norm": 0.42121606214503354,
      "learning_rate": 9.338842390573566e-06,
      "loss": 1.478,
      "step": 4222
    },
    {
      "epoch": 0.8657236572365724,
      "grad_norm": 0.3897416846088761,
      "learning_rate": 9.31084196653671e-06,
      "loss": 1.4581,
      "step": 4223
    },
    {
      "epoch": 0.8659286592865929,
      "grad_norm": 0.40879126238424246,
      "learning_rate": 9.282881532297205e-06,
      "loss": 1.4583,
      "step": 4224
    },
    {
      "epoch": 0.8661336613366134,
      "grad_norm": 0.44276131025032817,
      "learning_rate": 9.254961100184333e-06,
      "loss": 1.509,
      "step": 4225
    },
    {
      "epoch": 0.8663386633866339,
      "grad_norm": 0.4394094281916402,
      "learning_rate": 9.227080682509693e-06,
      "loss": 1.5744,
      "step": 4226
    },
    {
      "epoch": 0.8665436654366544,
      "grad_norm": 0.410236230190187,
      "learning_rate": 9.199240291567336e-06,
      "loss": 1.4625,
      "step": 4227
    },
    {
      "epoch": 0.8667486674866749,
      "grad_norm": 0.4708193078012326,
      "learning_rate": 9.171439939633564e-06,
      "loss": 1.5104,
      "step": 4228
    },
    {
      "epoch": 0.8669536695366954,
      "grad_norm": 0.4028185800653282,
      "learning_rate": 9.143679638967106e-06,
      "loss": 1.4632,
      "step": 4229
    },
    {
      "epoch": 0.8671586715867159,
      "grad_norm": 0.36620970294973504,
      "learning_rate": 9.115959401808983e-06,
      "loss": 1.4854,
      "step": 4230
    },
    {
      "epoch": 0.8673636736367364,
      "grad_norm": 0.39047334149894813,
      "learning_rate": 9.088279240382536e-06,
      "loss": 1.4853,
      "step": 4231
    },
    {
      "epoch": 0.8675686756867569,
      "grad_norm": 0.40868567495099245,
      "learning_rate": 9.060639166893493e-06,
      "loss": 1.5342,
      "step": 4232
    },
    {
      "epoch": 0.8677736777367774,
      "grad_norm": 0.4747592219893723,
      "learning_rate": 9.033039193529857e-06,
      "loss": 1.5702,
      "step": 4233
    },
    {
      "epoch": 0.8679786797867979,
      "grad_norm": 0.4599886482785605,
      "learning_rate": 9.00547933246193e-06,
      "loss": 1.467,
      "step": 4234
    },
    {
      "epoch": 0.8681836818368184,
      "grad_norm": 0.39518441818371003,
      "learning_rate": 8.977959595842412e-06,
      "loss": 1.421,
      "step": 4235
    },
    {
      "epoch": 0.8683886838868389,
      "grad_norm": 0.4170806018914923,
      "learning_rate": 8.950479995806215e-06,
      "loss": 1.4702,
      "step": 4236
    },
    {
      "epoch": 0.8685936859368594,
      "grad_norm": 0.4344307439072057,
      "learning_rate": 8.923040544470629e-06,
      "loss": 1.5347,
      "step": 4237
    },
    {
      "epoch": 0.8687986879868799,
      "grad_norm": 0.46008997249726113,
      "learning_rate": 8.895641253935182e-06,
      "loss": 1.536,
      "step": 4238
    },
    {
      "epoch": 0.8690036900369004,
      "grad_norm": 0.4382179003295939,
      "learning_rate": 8.868282136281703e-06,
      "loss": 1.5422,
      "step": 4239
    },
    {
      "epoch": 0.8692086920869209,
      "grad_norm": 0.3935056480118967,
      "learning_rate": 8.840963203574348e-06,
      "loss": 1.5094,
      "step": 4240
    },
    {
      "epoch": 0.8694136941369414,
      "grad_norm": 0.4756323404083744,
      "learning_rate": 8.813684467859507e-06,
      "loss": 1.5178,
      "step": 4241
    },
    {
      "epoch": 0.8696186961869619,
      "grad_norm": 0.42978310761280036,
      "learning_rate": 8.786445941165878e-06,
      "loss": 1.4612,
      "step": 4242
    },
    {
      "epoch": 0.8698236982369824,
      "grad_norm": 0.43004323838058317,
      "learning_rate": 8.759247635504408e-06,
      "loss": 1.4408,
      "step": 4243
    },
    {
      "epoch": 0.8700287002870029,
      "grad_norm": 0.412104087334505,
      "learning_rate": 8.732089562868295e-06,
      "loss": 1.4698,
      "step": 4244
    },
    {
      "epoch": 0.8702337023370234,
      "grad_norm": 0.4366207912804082,
      "learning_rate": 8.704971735233048e-06,
      "loss": 1.4923,
      "step": 4245
    },
    {
      "epoch": 0.8704387043870438,
      "grad_norm": 0.45767928416432124,
      "learning_rate": 8.677894164556356e-06,
      "loss": 1.5269,
      "step": 4246
    },
    {
      "epoch": 0.8706437064370643,
      "grad_norm": 0.4178834734045859,
      "learning_rate": 8.650856862778245e-06,
      "loss": 1.4923,
      "step": 4247
    },
    {
      "epoch": 0.8708487084870848,
      "grad_norm": 0.38339128832077085,
      "learning_rate": 8.623859841820903e-06,
      "loss": 1.4559,
      "step": 4248
    },
    {
      "epoch": 0.8710537105371053,
      "grad_norm": 0.3641696256162045,
      "learning_rate": 8.596903113588806e-06,
      "loss": 1.4266,
      "step": 4249
    },
    {
      "epoch": 0.8712587125871258,
      "grad_norm": 0.4227922466624673,
      "learning_rate": 8.569986689968611e-06,
      "loss": 1.4577,
      "step": 4250
    },
    {
      "epoch": 0.8714637146371463,
      "grad_norm": 0.4074902888434092,
      "learning_rate": 8.543110582829272e-06,
      "loss": 1.4784,
      "step": 4251
    },
    {
      "epoch": 0.8716687166871668,
      "grad_norm": 0.3779493043347049,
      "learning_rate": 8.51627480402193e-06,
      "loss": 1.4892,
      "step": 4252
    },
    {
      "epoch": 0.8718737187371873,
      "grad_norm": 0.40829898208490795,
      "learning_rate": 8.489479365379949e-06,
      "loss": 1.5291,
      "step": 4253
    },
    {
      "epoch": 0.8720787207872078,
      "grad_norm": 0.4526017370721461,
      "learning_rate": 8.462724278718882e-06,
      "loss": 1.4941,
      "step": 4254
    },
    {
      "epoch": 0.8722837228372283,
      "grad_norm": 0.41561719708546446,
      "learning_rate": 8.4360095558365e-06,
      "loss": 1.4973,
      "step": 4255
    },
    {
      "epoch": 0.8724887248872488,
      "grad_norm": 0.43827811565348723,
      "learning_rate": 8.409335208512803e-06,
      "loss": 1.5097,
      "step": 4256
    },
    {
      "epoch": 0.8726937269372693,
      "grad_norm": 0.41387120080020373,
      "learning_rate": 8.382701248509949e-06,
      "loss": 1.5527,
      "step": 4257
    },
    {
      "epoch": 0.8728987289872898,
      "grad_norm": 0.39352095279882277,
      "learning_rate": 8.356107687572324e-06,
      "loss": 1.4886,
      "step": 4258
    },
    {
      "epoch": 0.8731037310373104,
      "grad_norm": 0.4465117634916217,
      "learning_rate": 8.329554537426465e-06,
      "loss": 1.4878,
      "step": 4259
    },
    {
      "epoch": 0.8733087330873309,
      "grad_norm": 0.44303676088370064,
      "learning_rate": 8.303041809781088e-06,
      "loss": 1.4686,
      "step": 4260
    },
    {
      "epoch": 0.8735137351373514,
      "grad_norm": 0.5095313285999533,
      "learning_rate": 8.27656951632715e-06,
      "loss": 1.5157,
      "step": 4261
    },
    {
      "epoch": 0.8737187371873719,
      "grad_norm": 0.39851770952819626,
      "learning_rate": 8.250137668737667e-06,
      "loss": 1.458,
      "step": 4262
    },
    {
      "epoch": 0.8739237392373924,
      "grad_norm": 0.4203748814655289,
      "learning_rate": 8.223746278667942e-06,
      "loss": 1.53,
      "step": 4263
    },
    {
      "epoch": 0.8741287412874129,
      "grad_norm": 0.4693413530901096,
      "learning_rate": 8.197395357755355e-06,
      "loss": 1.5987,
      "step": 4264
    },
    {
      "epoch": 0.8743337433374334,
      "grad_norm": 0.3998092303594904,
      "learning_rate": 8.171084917619454e-06,
      "loss": 1.4713,
      "step": 4265
    },
    {
      "epoch": 0.8745387453874539,
      "grad_norm": 0.40235441996765325,
      "learning_rate": 8.144814969861936e-06,
      "loss": 1.5324,
      "step": 4266
    },
    {
      "epoch": 0.8747437474374744,
      "grad_norm": 0.405840537687244,
      "learning_rate": 8.11858552606668e-06,
      "loss": 1.4868,
      "step": 4267
    },
    {
      "epoch": 0.8749487494874949,
      "grad_norm": 0.4351379641523539,
      "learning_rate": 8.092396597799689e-06,
      "loss": 1.4358,
      "step": 4268
    },
    {
      "epoch": 0.8751537515375154,
      "grad_norm": 0.4022996855143869,
      "learning_rate": 8.066248196609072e-06,
      "loss": 1.4827,
      "step": 4269
    },
    {
      "epoch": 0.8753587535875359,
      "grad_norm": 0.4619074994391109,
      "learning_rate": 8.040140334025082e-06,
      "loss": 1.515,
      "step": 4270
    },
    {
      "epoch": 0.8755637556375564,
      "grad_norm": 0.3989868523353025,
      "learning_rate": 8.014073021560086e-06,
      "loss": 1.4552,
      "step": 4271
    },
    {
      "epoch": 0.8757687576875769,
      "grad_norm": 0.41904062067240616,
      "learning_rate": 7.988046270708616e-06,
      "loss": 1.4556,
      "step": 4272
    },
    {
      "epoch": 0.8759737597375974,
      "grad_norm": 0.4504709236134617,
      "learning_rate": 7.962060092947277e-06,
      "loss": 1.4927,
      "step": 4273
    },
    {
      "epoch": 0.8761787617876179,
      "grad_norm": 0.44010629176765953,
      "learning_rate": 7.936114499734792e-06,
      "loss": 1.5425,
      "step": 4274
    },
    {
      "epoch": 0.8763837638376384,
      "grad_norm": 0.4100243902057019,
      "learning_rate": 7.91020950251199e-06,
      "loss": 1.4879,
      "step": 4275
    },
    {
      "epoch": 0.8765887658876589,
      "grad_norm": 0.42681235433148745,
      "learning_rate": 7.884345112701764e-06,
      "loss": 1.4558,
      "step": 4276
    },
    {
      "epoch": 0.8767937679376794,
      "grad_norm": 0.39334661084575034,
      "learning_rate": 7.858521341709168e-06,
      "loss": 1.5146,
      "step": 4277
    },
    {
      "epoch": 0.8769987699876999,
      "grad_norm": 0.38736696773150486,
      "learning_rate": 7.83273820092133e-06,
      "loss": 1.4604,
      "step": 4278
    },
    {
      "epoch": 0.8772037720377204,
      "grad_norm": 0.39709696775443526,
      "learning_rate": 7.80699570170742e-06,
      "loss": 1.432,
      "step": 4279
    },
    {
      "epoch": 0.8774087740877409,
      "grad_norm": 0.41612812643447483,
      "learning_rate": 7.781293855418703e-06,
      "loss": 1.4924,
      "step": 4280
    },
    {
      "epoch": 0.8776137761377614,
      "grad_norm": 0.46812269509607357,
      "learning_rate": 7.755632673388525e-06,
      "loss": 1.5099,
      "step": 4281
    },
    {
      "epoch": 0.8778187781877819,
      "grad_norm": 0.4170027286603011,
      "learning_rate": 7.7300121669323e-06,
      "loss": 1.4918,
      "step": 4282
    },
    {
      "epoch": 0.8780237802378024,
      "grad_norm": 0.373162785284076,
      "learning_rate": 7.704432347347535e-06,
      "loss": 1.4712,
      "step": 4283
    },
    {
      "epoch": 0.8782287822878229,
      "grad_norm": 0.41775581774521026,
      "learning_rate": 7.67889322591374e-06,
      "loss": 1.4908,
      "step": 4284
    },
    {
      "epoch": 0.8784337843378434,
      "grad_norm": 0.440317165079676,
      "learning_rate": 7.653394813892523e-06,
      "loss": 1.4927,
      "step": 4285
    },
    {
      "epoch": 0.8786387863878639,
      "grad_norm": 0.41508998038173794,
      "learning_rate": 7.627937122527507e-06,
      "loss": 1.5474,
      "step": 4286
    },
    {
      "epoch": 0.8788437884378844,
      "grad_norm": 0.44862409038027806,
      "learning_rate": 7.6025201630443795e-06,
      "loss": 1.5374,
      "step": 4287
    },
    {
      "epoch": 0.8790487904879049,
      "grad_norm": 0.4386583881089859,
      "learning_rate": 7.577143946650889e-06,
      "loss": 1.5678,
      "step": 4288
    },
    {
      "epoch": 0.8792537925379253,
      "grad_norm": 0.41497831541122543,
      "learning_rate": 7.551808484536782e-06,
      "loss": 1.4867,
      "step": 4289
    },
    {
      "epoch": 0.8794587945879458,
      "grad_norm": 0.3986380875785754,
      "learning_rate": 7.526513787873835e-06,
      "loss": 1.5332,
      "step": 4290
    },
    {
      "epoch": 0.8796637966379663,
      "grad_norm": 0.4397287389332245,
      "learning_rate": 7.501259867815847e-06,
      "loss": 1.4814,
      "step": 4291
    },
    {
      "epoch": 0.8798687986879868,
      "grad_norm": 0.40803663498113696,
      "learning_rate": 7.476046735498676e-06,
      "loss": 1.459,
      "step": 4292
    },
    {
      "epoch": 0.8800738007380073,
      "grad_norm": 0.4113532718027076,
      "learning_rate": 7.450874402040176e-06,
      "loss": 1.4226,
      "step": 4293
    },
    {
      "epoch": 0.8802788027880278,
      "grad_norm": 0.4696285085274754,
      "learning_rate": 7.4257428785401764e-06,
      "loss": 1.4852,
      "step": 4294
    },
    {
      "epoch": 0.8804838048380483,
      "grad_norm": 0.48518595019249383,
      "learning_rate": 7.400652176080558e-06,
      "loss": 1.5037,
      "step": 4295
    },
    {
      "epoch": 0.8806888068880688,
      "grad_norm": 0.4099246803436755,
      "learning_rate": 7.375602305725138e-06,
      "loss": 1.4554,
      "step": 4296
    },
    {
      "epoch": 0.8808938089380893,
      "grad_norm": 0.3764477098910107,
      "learning_rate": 7.350593278519824e-06,
      "loss": 1.4645,
      "step": 4297
    },
    {
      "epoch": 0.8810988109881098,
      "grad_norm": 0.39809139125590737,
      "learning_rate": 7.325625105492428e-06,
      "loss": 1.4922,
      "step": 4298
    },
    {
      "epoch": 0.8813038130381304,
      "grad_norm": 0.40945707627666006,
      "learning_rate": 7.3006977976528004e-06,
      "loss": 1.4548,
      "step": 4299
    },
    {
      "epoch": 0.8815088150881509,
      "grad_norm": 0.38372289975393553,
      "learning_rate": 7.275811365992735e-06,
      "loss": 1.448,
      "step": 4300
    },
    {
      "epoch": 0.8817138171381714,
      "grad_norm": 0.47339773925572554,
      "learning_rate": 7.250965821486011e-06,
      "loss": 1.5102,
      "step": 4301
    },
    {
      "epoch": 0.8819188191881919,
      "grad_norm": 0.4036016508790276,
      "learning_rate": 7.22616117508842e-06,
      "loss": 1.58,
      "step": 4302
    },
    {
      "epoch": 0.8821238212382124,
      "grad_norm": 0.4572132388300134,
      "learning_rate": 7.201397437737634e-06,
      "loss": 1.469,
      "step": 4303
    },
    {
      "epoch": 0.8823288232882329,
      "grad_norm": 0.41993333750887724,
      "learning_rate": 7.176674620353374e-06,
      "loss": 1.4395,
      "step": 4304
    },
    {
      "epoch": 0.8825338253382534,
      "grad_norm": 0.4491953266519826,
      "learning_rate": 7.151992733837276e-06,
      "loss": 1.5266,
      "step": 4305
    },
    {
      "epoch": 0.8827388273882739,
      "grad_norm": 0.42705442120420445,
      "learning_rate": 7.127351789072911e-06,
      "loss": 1.5475,
      "step": 4306
    },
    {
      "epoch": 0.8829438294382944,
      "grad_norm": 0.3786944821213462,
      "learning_rate": 7.1027517969258104e-06,
      "loss": 1.4423,
      "step": 4307
    },
    {
      "epoch": 0.8831488314883149,
      "grad_norm": 0.4187861064282639,
      "learning_rate": 7.078192768243486e-06,
      "loss": 1.483,
      "step": 4308
    },
    {
      "epoch": 0.8833538335383354,
      "grad_norm": 0.4475937168579179,
      "learning_rate": 7.053674713855319e-06,
      "loss": 1.4535,
      "step": 4309
    },
    {
      "epoch": 0.8835588355883559,
      "grad_norm": 0.4117847844233788,
      "learning_rate": 7.029197644572694e-06,
      "loss": 1.5013,
      "step": 4310
    },
    {
      "epoch": 0.8837638376383764,
      "grad_norm": 0.42153387898085704,
      "learning_rate": 7.004761571188856e-06,
      "loss": 1.4541,
      "step": 4311
    },
    {
      "epoch": 0.8839688396883969,
      "grad_norm": 0.42043585894663543,
      "learning_rate": 6.980366504479008e-06,
      "loss": 1.4958,
      "step": 4312
    },
    {
      "epoch": 0.8841738417384174,
      "grad_norm": 0.39502934468044415,
      "learning_rate": 6.956012455200278e-06,
      "loss": 1.5121,
      "step": 4313
    },
    {
      "epoch": 0.8843788437884379,
      "grad_norm": 0.4262024364755581,
      "learning_rate": 6.931699434091676e-06,
      "loss": 1.4827,
      "step": 4314
    },
    {
      "epoch": 0.8845838458384584,
      "grad_norm": 0.4103014261063067,
      "learning_rate": 6.90742745187416e-06,
      "loss": 1.4557,
      "step": 4315
    },
    {
      "epoch": 0.8847888478884789,
      "grad_norm": 0.4273168355342068,
      "learning_rate": 6.883196519250568e-06,
      "loss": 1.4911,
      "step": 4316
    },
    {
      "epoch": 0.8849938499384994,
      "grad_norm": 0.44556455395940653,
      "learning_rate": 6.859006646905619e-06,
      "loss": 1.4668,
      "step": 4317
    },
    {
      "epoch": 0.8851988519885199,
      "grad_norm": 0.4154958473790276,
      "learning_rate": 6.834857845505971e-06,
      "loss": 1.46,
      "step": 4318
    },
    {
      "epoch": 0.8854038540385404,
      "grad_norm": 0.4063941186814045,
      "learning_rate": 6.810750125700127e-06,
      "loss": 1.4725,
      "step": 4319
    },
    {
      "epoch": 0.8856088560885609,
      "grad_norm": 0.4007699245160234,
      "learning_rate": 6.7866834981185175e-06,
      "loss": 1.4726,
      "step": 4320
    },
    {
      "epoch": 0.8858138581385814,
      "grad_norm": 0.43461484437042064,
      "learning_rate": 6.762657973373432e-06,
      "loss": 1.4577,
      "step": 4321
    },
    {
      "epoch": 0.8860188601886019,
      "grad_norm": 0.387988967347175,
      "learning_rate": 6.738673562059006e-06,
      "loss": 1.4631,
      "step": 4322
    },
    {
      "epoch": 0.8862238622386224,
      "grad_norm": 0.3888541817566676,
      "learning_rate": 6.714730274751302e-06,
      "loss": 1.462,
      "step": 4323
    },
    {
      "epoch": 0.8864288642886429,
      "grad_norm": 0.398133842018511,
      "learning_rate": 6.69082812200823e-06,
      "loss": 1.4749,
      "step": 4324
    },
    {
      "epoch": 0.8866338663386634,
      "grad_norm": 0.39116550605028294,
      "learning_rate": 6.666967114369504e-06,
      "loss": 1.4567,
      "step": 4325
    },
    {
      "epoch": 0.8868388683886839,
      "grad_norm": 0.4240717354402672,
      "learning_rate": 6.643147262356808e-06,
      "loss": 1.4466,
      "step": 4326
    },
    {
      "epoch": 0.8870438704387044,
      "grad_norm": 0.41052038294049636,
      "learning_rate": 6.61936857647355e-06,
      "loss": 1.4516,
      "step": 4327
    },
    {
      "epoch": 0.8872488724887249,
      "grad_norm": 0.4224533246185816,
      "learning_rate": 6.59563106720511e-06,
      "loss": 1.4958,
      "step": 4328
    },
    {
      "epoch": 0.8874538745387454,
      "grad_norm": 0.45107254009332937,
      "learning_rate": 6.571934745018626e-06,
      "loss": 1.5279,
      "step": 4329
    },
    {
      "epoch": 0.8876588765887659,
      "grad_norm": 0.4035330292488205,
      "learning_rate": 6.548279620363074e-06,
      "loss": 1.4749,
      "step": 4330
    },
    {
      "epoch": 0.8878638786387864,
      "grad_norm": 0.4367041439211082,
      "learning_rate": 6.524665703669331e-06,
      "loss": 1.4704,
      "step": 4331
    },
    {
      "epoch": 0.8880688806888068,
      "grad_norm": 0.43217366715307726,
      "learning_rate": 6.501093005350023e-06,
      "loss": 1.441,
      "step": 4332
    },
    {
      "epoch": 0.8882738827388273,
      "grad_norm": 0.45891051651383613,
      "learning_rate": 6.477561535799681e-06,
      "loss": 1.5233,
      "step": 4333
    },
    {
      "epoch": 0.8884788847888478,
      "grad_norm": 0.4183459553877193,
      "learning_rate": 6.454071305394582e-06,
      "loss": 1.4864,
      "step": 4334
    },
    {
      "epoch": 0.8886838868388683,
      "grad_norm": 0.41425569288733005,
      "learning_rate": 6.430622324492852e-06,
      "loss": 1.4419,
      "step": 4335
    },
    {
      "epoch": 0.8888888888888888,
      "grad_norm": 0.4397703457312809,
      "learning_rate": 6.4072146034344415e-06,
      "loss": 1.4862,
      "step": 4336
    },
    {
      "epoch": 0.8890938909389093,
      "grad_norm": 0.42607442763953934,
      "learning_rate": 6.383848152541072e-06,
      "loss": 1.493,
      "step": 4337
    },
    {
      "epoch": 0.8892988929889298,
      "grad_norm": 0.4145253654502572,
      "learning_rate": 6.360522982116301e-06,
      "loss": 1.475,
      "step": 4338
    },
    {
      "epoch": 0.8895038950389504,
      "grad_norm": 0.39209549785040015,
      "learning_rate": 6.33723910244548e-06,
      "loss": 1.4827,
      "step": 4339
    },
    {
      "epoch": 0.8897088970889709,
      "grad_norm": 0.43560293264071587,
      "learning_rate": 6.313996523795717e-06,
      "loss": 1.5003,
      "step": 4340
    },
    {
      "epoch": 0.8899138991389914,
      "grad_norm": 0.5100273568085638,
      "learning_rate": 6.290795256415927e-06,
      "loss": 1.5628,
      "step": 4341
    },
    {
      "epoch": 0.8901189011890119,
      "grad_norm": 0.40451975659736783,
      "learning_rate": 6.2676353105368346e-06,
      "loss": 1.4569,
      "step": 4342
    },
    {
      "epoch": 0.8903239032390324,
      "grad_norm": 0.4219556299115111,
      "learning_rate": 6.244516696370928e-06,
      "loss": 1.5075,
      "step": 4343
    },
    {
      "epoch": 0.8905289052890529,
      "grad_norm": 0.4381633536779827,
      "learning_rate": 6.221439424112463e-06,
      "loss": 1.507,
      "step": 4344
    },
    {
      "epoch": 0.8907339073390734,
      "grad_norm": 0.4274465298179701,
      "learning_rate": 6.198403503937467e-06,
      "loss": 1.4717,
      "step": 4345
    },
    {
      "epoch": 0.8909389093890939,
      "grad_norm": 0.4357826663308224,
      "learning_rate": 6.175408946003703e-06,
      "loss": 1.5013,
      "step": 4346
    },
    {
      "epoch": 0.8911439114391144,
      "grad_norm": 0.46362662071190486,
      "learning_rate": 6.152455760450748e-06,
      "loss": 1.5811,
      "step": 4347
    },
    {
      "epoch": 0.8913489134891349,
      "grad_norm": 0.4260789101277547,
      "learning_rate": 6.1295439573999415e-06,
      "loss": 1.5608,
      "step": 4348
    },
    {
      "epoch": 0.8915539155391554,
      "grad_norm": 0.39134435123359895,
      "learning_rate": 6.106673546954322e-06,
      "loss": 1.4752,
      "step": 4349
    },
    {
      "epoch": 0.8917589175891759,
      "grad_norm": 0.41104812737606716,
      "learning_rate": 6.083844539198691e-06,
      "loss": 1.4942,
      "step": 4350
    },
    {
      "epoch": 0.8919639196391964,
      "grad_norm": 0.3850017330641284,
      "learning_rate": 6.061056944199606e-06,
      "loss": 1.4849,
      "step": 4351
    },
    {
      "epoch": 0.8921689216892169,
      "grad_norm": 0.39519086707393414,
      "learning_rate": 6.0383107720053735e-06,
      "loss": 1.4646,
      "step": 4352
    },
    {
      "epoch": 0.8923739237392374,
      "grad_norm": 0.38712510667130645,
      "learning_rate": 6.0156060326460264e-06,
      "loss": 1.4497,
      "step": 4353
    },
    {
      "epoch": 0.8925789257892579,
      "grad_norm": 0.3929588382694851,
      "learning_rate": 5.992942736133322e-06,
      "loss": 1.4404,
      "step": 4354
    },
    {
      "epoch": 0.8927839278392784,
      "grad_norm": 0.39897917401525135,
      "learning_rate": 5.9703208924607345e-06,
      "loss": 1.4297,
      "step": 4355
    },
    {
      "epoch": 0.8929889298892989,
      "grad_norm": 0.3839579187159569,
      "learning_rate": 5.947740511603461e-06,
      "loss": 1.4661,
      "step": 4356
    },
    {
      "epoch": 0.8931939319393194,
      "grad_norm": 0.4413139091397072,
      "learning_rate": 5.925201603518415e-06,
      "loss": 1.4465,
      "step": 4357
    },
    {
      "epoch": 0.8933989339893399,
      "grad_norm": 0.40779105483386197,
      "learning_rate": 5.902704178144269e-06,
      "loss": 1.5334,
      "step": 4358
    },
    {
      "epoch": 0.8936039360393604,
      "grad_norm": 0.42566553139562646,
      "learning_rate": 5.880248245401354e-06,
      "loss": 1.4879,
      "step": 4359
    },
    {
      "epoch": 0.8938089380893809,
      "grad_norm": 0.43877516219285406,
      "learning_rate": 5.857833815191704e-06,
      "loss": 1.5261,
      "step": 4360
    },
    {
      "epoch": 0.8940139401394014,
      "grad_norm": 0.4126074243371334,
      "learning_rate": 5.835460897399059e-06,
      "loss": 1.5265,
      "step": 4361
    },
    {
      "epoch": 0.8942189421894219,
      "grad_norm": 0.4197863581825808,
      "learning_rate": 5.813129501888859e-06,
      "loss": 1.4589,
      "step": 4362
    },
    {
      "epoch": 0.8944239442394424,
      "grad_norm": 0.4162512697969112,
      "learning_rate": 5.79083963850825e-06,
      "loss": 1.5739,
      "step": 4363
    },
    {
      "epoch": 0.8946289462894629,
      "grad_norm": 0.40482578653204193,
      "learning_rate": 5.768591317086047e-06,
      "loss": 1.4723,
      "step": 4364
    },
    {
      "epoch": 0.8948339483394834,
      "grad_norm": 0.5031846296513467,
      "learning_rate": 5.746384547432737e-06,
      "loss": 1.5114,
      "step": 4365
    },
    {
      "epoch": 0.8950389503895039,
      "grad_norm": 0.41312402290596706,
      "learning_rate": 5.724219339340508e-06,
      "loss": 1.4926,
      "step": 4366
    },
    {
      "epoch": 0.8952439524395244,
      "grad_norm": 0.4476948110488828,
      "learning_rate": 5.702095702583188e-06,
      "loss": 1.4996,
      "step": 4367
    },
    {
      "epoch": 0.8954489544895449,
      "grad_norm": 0.41201960879148447,
      "learning_rate": 5.6800136469163156e-06,
      "loss": 1.5225,
      "step": 4368
    },
    {
      "epoch": 0.8956539565395654,
      "grad_norm": 0.38809535534862694,
      "learning_rate": 5.657973182077081e-06,
      "loss": 1.4487,
      "step": 4369
    },
    {
      "epoch": 0.8958589585895859,
      "grad_norm": 0.40451963783488726,
      "learning_rate": 5.635974317784309e-06,
      "loss": 1.4064,
      "step": 4370
    },
    {
      "epoch": 0.8960639606396064,
      "grad_norm": 0.43671851752447083,
      "learning_rate": 5.614017063738519e-06,
      "loss": 1.5012,
      "step": 4371
    },
    {
      "epoch": 0.8962689626896269,
      "grad_norm": 0.3791427685973977,
      "learning_rate": 5.592101429621821e-06,
      "loss": 1.4475,
      "step": 4372
    },
    {
      "epoch": 0.8964739647396474,
      "grad_norm": 0.41866960933738107,
      "learning_rate": 5.570227425098051e-06,
      "loss": 1.5183,
      "step": 4373
    },
    {
      "epoch": 0.8966789667896679,
      "grad_norm": 0.4105122314505505,
      "learning_rate": 5.54839505981265e-06,
      "loss": 1.4754,
      "step": 4374
    },
    {
      "epoch": 0.8968839688396884,
      "grad_norm": 0.4168407798458137,
      "learning_rate": 5.526604343392694e-06,
      "loss": 1.4888,
      "step": 4375
    },
    {
      "epoch": 0.8970889708897088,
      "grad_norm": 0.3777130109190239,
      "learning_rate": 5.504855285446897e-06,
      "loss": 1.4894,
      "step": 4376
    },
    {
      "epoch": 0.8972939729397293,
      "grad_norm": 0.4572916946220848,
      "learning_rate": 5.483147895565588e-06,
      "loss": 1.5311,
      "step": 4377
    },
    {
      "epoch": 0.8974989749897498,
      "grad_norm": 0.3817856617287783,
      "learning_rate": 5.461482183320754e-06,
      "loss": 1.4343,
      "step": 4378
    },
    {
      "epoch": 0.8977039770397705,
      "grad_norm": 0.37738546683367635,
      "learning_rate": 5.439858158266009e-06,
      "loss": 1.4145,
      "step": 4379
    },
    {
      "epoch": 0.897908979089791,
      "grad_norm": 0.3952501044243125,
      "learning_rate": 5.418275829936537e-06,
      "loss": 1.4981,
      "step": 4380
    },
    {
      "epoch": 0.8981139811398114,
      "grad_norm": 0.4167985162739483,
      "learning_rate": 5.396735207849179e-06,
      "loss": 1.4914,
      "step": 4381
    },
    {
      "epoch": 0.8983189831898319,
      "grad_norm": 0.38839115524610257,
      "learning_rate": 5.375236301502351e-06,
      "loss": 1.4656,
      "step": 4382
    },
    {
      "epoch": 0.8985239852398524,
      "grad_norm": 0.42589564232486415,
      "learning_rate": 5.353779120376101e-06,
      "loss": 1.471,
      "step": 4383
    },
    {
      "epoch": 0.8987289872898729,
      "grad_norm": 0.3959255466586945,
      "learning_rate": 5.332363673932106e-06,
      "loss": 1.4355,
      "step": 4384
    },
    {
      "epoch": 0.8989339893398934,
      "grad_norm": 0.41647974152015393,
      "learning_rate": 5.310989971613567e-06,
      "loss": 1.4511,
      "step": 4385
    },
    {
      "epoch": 0.8991389913899139,
      "grad_norm": 0.4438481168520766,
      "learning_rate": 5.289658022845323e-06,
      "loss": 1.4887,
      "step": 4386
    },
    {
      "epoch": 0.8993439934399344,
      "grad_norm": 0.42804066215262604,
      "learning_rate": 5.268367837033783e-06,
      "loss": 1.4732,
      "step": 4387
    },
    {
      "epoch": 0.8995489954899549,
      "grad_norm": 0.4001595505609998,
      "learning_rate": 5.247119423566982e-06,
      "loss": 1.4831,
      "step": 4388
    },
    {
      "epoch": 0.8997539975399754,
      "grad_norm": 0.4357052463910056,
      "learning_rate": 5.225912791814469e-06,
      "loss": 1.4839,
      "step": 4389
    },
    {
      "epoch": 0.8999589995899959,
      "grad_norm": 0.37603492396208343,
      "learning_rate": 5.204747951127442e-06,
      "loss": 1.4468,
      "step": 4390
    },
    {
      "epoch": 0.9001640016400164,
      "grad_norm": 0.4362640423523535,
      "learning_rate": 5.183624910838602e-06,
      "loss": 1.4993,
      "step": 4391
    },
    {
      "epoch": 0.9003690036900369,
      "grad_norm": 0.42870809562502443,
      "learning_rate": 5.162543680262266e-06,
      "loss": 1.4781,
      "step": 4392
    },
    {
      "epoch": 0.9005740057400574,
      "grad_norm": 0.4912405403685505,
      "learning_rate": 5.141504268694297e-06,
      "loss": 1.5798,
      "step": 4393
    },
    {
      "epoch": 0.9007790077900779,
      "grad_norm": 0.4237245324241607,
      "learning_rate": 5.120506685412108e-06,
      "loss": 1.4802,
      "step": 4394
    },
    {
      "epoch": 0.9009840098400984,
      "grad_norm": 0.4298838163077209,
      "learning_rate": 5.099550939674691e-06,
      "loss": 1.5339,
      "step": 4395
    },
    {
      "epoch": 0.9011890118901189,
      "grad_norm": 0.4261848250576572,
      "learning_rate": 5.078637040722589e-06,
      "loss": 1.4837,
      "step": 4396
    },
    {
      "epoch": 0.9013940139401394,
      "grad_norm": 0.4087698210055334,
      "learning_rate": 5.057764997777847e-06,
      "loss": 1.472,
      "step": 4397
    },
    {
      "epoch": 0.9015990159901599,
      "grad_norm": 0.40548444024710545,
      "learning_rate": 5.036934820044126e-06,
      "loss": 1.4768,
      "step": 4398
    },
    {
      "epoch": 0.9018040180401804,
      "grad_norm": 0.39760237227440365,
      "learning_rate": 5.016146516706566e-06,
      "loss": 1.4723,
      "step": 4399
    },
    {
      "epoch": 0.9020090200902009,
      "grad_norm": 0.45649469436456097,
      "learning_rate": 4.995400096931846e-06,
      "loss": 1.4644,
      "step": 4400
    },
    {
      "epoch": 0.9022140221402214,
      "grad_norm": 0.3840876202952878,
      "learning_rate": 4.974695569868237e-06,
      "loss": 1.4985,
      "step": 4401
    },
    {
      "epoch": 0.9024190241902419,
      "grad_norm": 0.48057576272888625,
      "learning_rate": 4.954032944645459e-06,
      "loss": 1.455,
      "step": 4402
    },
    {
      "epoch": 0.9026240262402624,
      "grad_norm": 0.3772560654634475,
      "learning_rate": 4.933412230374812e-06,
      "loss": 1.4148,
      "step": 4403
    },
    {
      "epoch": 0.9028290282902829,
      "grad_norm": 0.40135335040807557,
      "learning_rate": 4.9128334361491e-06,
      "loss": 1.4279,
      "step": 4404
    },
    {
      "epoch": 0.9030340303403034,
      "grad_norm": 0.4313473353658115,
      "learning_rate": 4.892296571042598e-06,
      "loss": 1.4683,
      "step": 4405
    },
    {
      "epoch": 0.9032390323903239,
      "grad_norm": 0.40619140288261074,
      "learning_rate": 4.871801644111173e-06,
      "loss": 1.5104,
      "step": 4406
    },
    {
      "epoch": 0.9034440344403444,
      "grad_norm": 0.4084792119047769,
      "learning_rate": 4.8513486643921195e-06,
      "loss": 1.4179,
      "step": 4407
    },
    {
      "epoch": 0.9036490364903649,
      "grad_norm": 0.4254646699105221,
      "learning_rate": 4.830937640904309e-06,
      "loss": 1.5176,
      "step": 4408
    },
    {
      "epoch": 0.9038540385403854,
      "grad_norm": 0.40021577405821507,
      "learning_rate": 4.810568582648056e-06,
      "loss": 1.4892,
      "step": 4409
    },
    {
      "epoch": 0.9040590405904059,
      "grad_norm": 0.41631489380610787,
      "learning_rate": 4.790241498605174e-06,
      "loss": 1.445,
      "step": 4410
    },
    {
      "epoch": 0.9042640426404264,
      "grad_norm": 0.45863252323245773,
      "learning_rate": 4.769956397739017e-06,
      "loss": 1.4509,
      "step": 4411
    },
    {
      "epoch": 0.9044690446904469,
      "grad_norm": 0.38862650656314107,
      "learning_rate": 4.749713288994373e-06,
      "loss": 1.4867,
      "step": 4412
    },
    {
      "epoch": 0.9046740467404674,
      "grad_norm": 0.46409065828767937,
      "learning_rate": 4.729512181297524e-06,
      "loss": 1.5809,
      "step": 4413
    },
    {
      "epoch": 0.9048790487904879,
      "grad_norm": 0.9264537185083902,
      "learning_rate": 4.709353083556267e-06,
      "loss": 1.5154,
      "step": 4414
    },
    {
      "epoch": 0.9050840508405084,
      "grad_norm": 0.43895061320621714,
      "learning_rate": 4.689236004659825e-06,
      "loss": 1.5032,
      "step": 4415
    },
    {
      "epoch": 0.9052890528905289,
      "grad_norm": 0.4284209690941435,
      "learning_rate": 4.669160953478913e-06,
      "loss": 1.5023,
      "step": 4416
    },
    {
      "epoch": 0.9054940549405494,
      "grad_norm": 0.42568915094453375,
      "learning_rate": 4.649127938865749e-06,
      "loss": 1.5208,
      "step": 4417
    },
    {
      "epoch": 0.9056990569905699,
      "grad_norm": 0.40906890209747016,
      "learning_rate": 4.629136969653936e-06,
      "loss": 1.4958,
      "step": 4418
    },
    {
      "epoch": 0.9059040590405905,
      "grad_norm": 0.4296248118132678,
      "learning_rate": 4.609188054658631e-06,
      "loss": 1.5011,
      "step": 4419
    },
    {
      "epoch": 0.906109061090611,
      "grad_norm": 0.44818031416034615,
      "learning_rate": 4.589281202676366e-06,
      "loss": 1.4743,
      "step": 4420
    },
    {
      "epoch": 0.9063140631406315,
      "grad_norm": 0.4074294804428407,
      "learning_rate": 4.569416422485151e-06,
      "loss": 1.5113,
      "step": 4421
    },
    {
      "epoch": 0.906519065190652,
      "grad_norm": 0.45007376441789937,
      "learning_rate": 4.549593722844492e-06,
      "loss": 1.4651,
      "step": 4422
    },
    {
      "epoch": 0.9067240672406724,
      "grad_norm": 0.4337646770574745,
      "learning_rate": 4.529813112495251e-06,
      "loss": 1.5071,
      "step": 4423
    },
    {
      "epoch": 0.906929069290693,
      "grad_norm": 0.4768651715415433,
      "learning_rate": 4.5100746001598194e-06,
      "loss": 1.5528,
      "step": 4424
    },
    {
      "epoch": 0.9071340713407134,
      "grad_norm": 0.4207868137640321,
      "learning_rate": 4.490378194541955e-06,
      "loss": 1.4583,
      "step": 4425
    },
    {
      "epoch": 0.9073390733907339,
      "grad_norm": 0.3956413454971181,
      "learning_rate": 4.47072390432689e-06,
      "loss": 1.4614,
      "step": 4426
    },
    {
      "epoch": 0.9075440754407544,
      "grad_norm": 0.41323175023849396,
      "learning_rate": 4.451111738181279e-06,
      "loss": 1.4825,
      "step": 4427
    },
    {
      "epoch": 0.9077490774907749,
      "grad_norm": 0.42953635702334275,
      "learning_rate": 4.431541704753173e-06,
      "loss": 1.5218,
      "step": 4428
    },
    {
      "epoch": 0.9079540795407954,
      "grad_norm": 0.4599875582005663,
      "learning_rate": 4.412013812672089e-06,
      "loss": 1.4912,
      "step": 4429
    },
    {
      "epoch": 0.9081590815908159,
      "grad_norm": 0.43144965301230526,
      "learning_rate": 4.392528070548951e-06,
      "loss": 1.5,
      "step": 4430
    },
    {
      "epoch": 0.9083640836408364,
      "grad_norm": 0.4220856446726741,
      "learning_rate": 4.373084486976053e-06,
      "loss": 1.4709,
      "step": 4431
    },
    {
      "epoch": 0.9085690856908569,
      "grad_norm": 0.4178747146754646,
      "learning_rate": 4.353683070527148e-06,
      "loss": 1.4553,
      "step": 4432
    },
    {
      "epoch": 0.9087740877408774,
      "grad_norm": 0.4175328176169357,
      "learning_rate": 4.3343238297573695e-06,
      "loss": 1.5339,
      "step": 4433
    },
    {
      "epoch": 0.9089790897908979,
      "grad_norm": 0.3971593483845302,
      "learning_rate": 4.31500677320329e-06,
      "loss": 1.4506,
      "step": 4434
    },
    {
      "epoch": 0.9091840918409184,
      "grad_norm": 0.4050944744498629,
      "learning_rate": 4.295731909382827e-06,
      "loss": 1.4915,
      "step": 4435
    },
    {
      "epoch": 0.9093890938909389,
      "grad_norm": 0.4462672095431968,
      "learning_rate": 4.276499246795329e-06,
      "loss": 1.533,
      "step": 4436
    },
    {
      "epoch": 0.9095940959409594,
      "grad_norm": 0.39833843094022603,
      "learning_rate": 4.257308793921522e-06,
      "loss": 1.4537,
      "step": 4437
    },
    {
      "epoch": 0.9097990979909799,
      "grad_norm": 0.37633381980439107,
      "learning_rate": 4.238160559223514e-06,
      "loss": 1.4676,
      "step": 4438
    },
    {
      "epoch": 0.9100041000410004,
      "grad_norm": 0.39343908088699486,
      "learning_rate": 4.219054551144841e-06,
      "loss": 1.5038,
      "step": 4439
    },
    {
      "epoch": 0.9102091020910209,
      "grad_norm": 0.3890658655239858,
      "learning_rate": 4.199990778110363e-06,
      "loss": 1.4893,
      "step": 4440
    },
    {
      "epoch": 0.9104141041410414,
      "grad_norm": 0.42989092807082757,
      "learning_rate": 4.180969248526334e-06,
      "loss": 1.3789,
      "step": 4441
    },
    {
      "epoch": 0.9106191061910619,
      "grad_norm": 0.4090079859228634,
      "learning_rate": 4.161989970780366e-06,
      "loss": 1.4405,
      "step": 4442
    },
    {
      "epoch": 0.9108241082410824,
      "grad_norm": 0.43302347273677383,
      "learning_rate": 4.143052953241488e-06,
      "loss": 1.4864,
      "step": 4443
    },
    {
      "epoch": 0.9110291102911029,
      "grad_norm": 0.36420720128816036,
      "learning_rate": 4.124158204260064e-06,
      "loss": 1.4864,
      "step": 4444
    },
    {
      "epoch": 0.9112341123411234,
      "grad_norm": 0.4015953520775453,
      "learning_rate": 4.105305732167819e-06,
      "loss": 1.4506,
      "step": 4445
    },
    {
      "epoch": 0.9114391143911439,
      "grad_norm": 0.4028126478295471,
      "learning_rate": 4.086495545277824e-06,
      "loss": 1.4718,
      "step": 4446
    },
    {
      "epoch": 0.9116441164411644,
      "grad_norm": 0.44682379527963934,
      "learning_rate": 4.067727651884501e-06,
      "loss": 1.4948,
      "step": 4447
    },
    {
      "epoch": 0.9118491184911849,
      "grad_norm": 0.3951347802065758,
      "learning_rate": 4.049002060263663e-06,
      "loss": 1.498,
      "step": 4448
    },
    {
      "epoch": 0.9120541205412054,
      "grad_norm": 0.48157101415511205,
      "learning_rate": 4.030318778672448e-06,
      "loss": 1.5343,
      "step": 4449
    },
    {
      "epoch": 0.9122591225912259,
      "grad_norm": 0.42879029221658904,
      "learning_rate": 4.011677815349335e-06,
      "loss": 1.4847,
      "step": 4450
    },
    {
      "epoch": 0.9124641246412464,
      "grad_norm": 0.3888410941702474,
      "learning_rate": 3.993079178514125e-06,
      "loss": 1.4367,
      "step": 4451
    },
    {
      "epoch": 0.9126691266912669,
      "grad_norm": 0.4298310911224064,
      "learning_rate": 3.97452287636797e-06,
      "loss": 1.4636,
      "step": 4452
    },
    {
      "epoch": 0.9128741287412874,
      "grad_norm": 0.4663686910828376,
      "learning_rate": 3.9560089170933565e-06,
      "loss": 1.5502,
      "step": 4453
    },
    {
      "epoch": 0.9130791307913079,
      "grad_norm": 0.45299088870355975,
      "learning_rate": 3.937537308854133e-06,
      "loss": 1.5273,
      "step": 4454
    },
    {
      "epoch": 0.9132841328413284,
      "grad_norm": 0.44430264039157247,
      "learning_rate": 3.919108059795406e-06,
      "loss": 1.4836,
      "step": 4455
    },
    {
      "epoch": 0.9134891348913489,
      "grad_norm": 0.40772800661490394,
      "learning_rate": 3.900721178043654e-06,
      "loss": 1.4779,
      "step": 4456
    },
    {
      "epoch": 0.9136941369413694,
      "grad_norm": 0.45301093810920023,
      "learning_rate": 3.882376671706622e-06,
      "loss": 1.5227,
      "step": 4457
    },
    {
      "epoch": 0.9138991389913899,
      "grad_norm": 0.4090074161991738,
      "learning_rate": 3.864074548873431e-06,
      "loss": 1.4663,
      "step": 4458
    },
    {
      "epoch": 0.9141041410414105,
      "grad_norm": 0.42870978658495645,
      "learning_rate": 3.845814817614502e-06,
      "loss": 1.4628,
      "step": 4459
    },
    {
      "epoch": 0.914309143091431,
      "grad_norm": 0.4251652358938823,
      "learning_rate": 3.827597485981527e-06,
      "loss": 1.4736,
      "step": 4460
    },
    {
      "epoch": 0.9145141451414515,
      "grad_norm": 0.43075237257587945,
      "learning_rate": 3.8094225620075253e-06,
      "loss": 1.4821,
      "step": 4461
    },
    {
      "epoch": 0.914719147191472,
      "grad_norm": 0.40917543461982836,
      "learning_rate": 3.7912900537067976e-06,
      "loss": 1.5301,
      "step": 4462
    },
    {
      "epoch": 0.9149241492414925,
      "grad_norm": 0.4016414367307887,
      "learning_rate": 3.7731999690749585e-06,
      "loss": 1.4666,
      "step": 4463
    },
    {
      "epoch": 0.915129151291513,
      "grad_norm": 0.43876664911346974,
      "learning_rate": 3.7551523160889278e-06,
      "loss": 1.4816,
      "step": 4464
    },
    {
      "epoch": 0.9153341533415335,
      "grad_norm": 0.4088555704556853,
      "learning_rate": 3.737147102706906e-06,
      "loss": 1.4387,
      "step": 4465
    },
    {
      "epoch": 0.915539155391554,
      "grad_norm": 0.42785444759701224,
      "learning_rate": 3.7191843368683645e-06,
      "loss": 1.5544,
      "step": 4466
    },
    {
      "epoch": 0.9157441574415744,
      "grad_norm": 0.368992514426279,
      "learning_rate": 3.701264026494067e-06,
      "loss": 1.4259,
      "step": 4467
    },
    {
      "epoch": 0.9159491594915949,
      "grad_norm": 0.41956990721389625,
      "learning_rate": 3.683386179486037e-06,
      "loss": 1.4885,
      "step": 4468
    },
    {
      "epoch": 0.9161541615416154,
      "grad_norm": 0.38627572576098745,
      "learning_rate": 3.665550803727613e-06,
      "loss": 1.4264,
      "step": 4469
    },
    {
      "epoch": 0.9163591635916359,
      "grad_norm": 0.43194735714894306,
      "learning_rate": 3.6477579070833933e-06,
      "loss": 1.5109,
      "step": 4470
    },
    {
      "epoch": 0.9165641656416564,
      "grad_norm": 0.4276156872704722,
      "learning_rate": 3.630007497399224e-06,
      "loss": 1.4828,
      "step": 4471
    },
    {
      "epoch": 0.9167691676916769,
      "grad_norm": 0.3947192775074258,
      "learning_rate": 3.612299582502232e-06,
      "loss": 1.4867,
      "step": 4472
    },
    {
      "epoch": 0.9169741697416974,
      "grad_norm": 0.41848054789026884,
      "learning_rate": 3.5946341702007836e-06,
      "loss": 1.4689,
      "step": 4473
    },
    {
      "epoch": 0.9171791717917179,
      "grad_norm": 0.40582495797375623,
      "learning_rate": 3.5770112682845468e-06,
      "loss": 1.4884,
      "step": 4474
    },
    {
      "epoch": 0.9173841738417384,
      "grad_norm": 0.39984132673246475,
      "learning_rate": 3.5594308845244286e-06,
      "loss": 1.4434,
      "step": 4475
    },
    {
      "epoch": 0.9175891758917589,
      "grad_norm": 0.44777856455775167,
      "learning_rate": 3.5418930266725605e-06,
      "loss": 1.4633,
      "step": 4476
    },
    {
      "epoch": 0.9177941779417794,
      "grad_norm": 0.419848422486166,
      "learning_rate": 3.5243977024623453e-06,
      "loss": 1.518,
      "step": 4477
    },
    {
      "epoch": 0.9179991799917999,
      "grad_norm": 0.42144448705675136,
      "learning_rate": 3.5069449196084126e-06,
      "loss": 1.5145,
      "step": 4478
    },
    {
      "epoch": 0.9182041820418204,
      "grad_norm": 0.4297846511344611,
      "learning_rate": 3.4895346858066724e-06,
      "loss": 1.4509,
      "step": 4479
    },
    {
      "epoch": 0.9184091840918409,
      "grad_norm": 0.43441525212771925,
      "learning_rate": 3.4721670087342282e-06,
      "loss": 1.4791,
      "step": 4480
    },
    {
      "epoch": 0.9186141861418614,
      "grad_norm": 0.41349419501085394,
      "learning_rate": 3.4548418960494433e-06,
      "loss": 1.4903,
      "step": 4481
    },
    {
      "epoch": 0.9188191881918819,
      "grad_norm": 0.4608204075693658,
      "learning_rate": 3.437559355391917e-06,
      "loss": 1.5354,
      "step": 4482
    },
    {
      "epoch": 0.9190241902419024,
      "grad_norm": 0.44247800859939584,
      "learning_rate": 3.420319394382432e-06,
      "loss": 1.5231,
      "step": 4483
    },
    {
      "epoch": 0.9192291922919229,
      "grad_norm": 0.41916841496271157,
      "learning_rate": 3.4031220206230617e-06,
      "loss": 1.4664,
      "step": 4484
    },
    {
      "epoch": 0.9194341943419434,
      "grad_norm": 0.41373473630690827,
      "learning_rate": 3.385967241697041e-06,
      "loss": 1.38,
      "step": 4485
    },
    {
      "epoch": 0.9196391963919639,
      "grad_norm": 0.418404612139362,
      "learning_rate": 3.3688550651688632e-06,
      "loss": 1.4544,
      "step": 4486
    },
    {
      "epoch": 0.9198441984419844,
      "grad_norm": 0.4181703166344262,
      "learning_rate": 3.3517854985842147e-06,
      "loss": 1.4959,
      "step": 4487
    },
    {
      "epoch": 0.9200492004920049,
      "grad_norm": 0.4222133418582104,
      "learning_rate": 3.3347585494699963e-06,
      "loss": 1.443,
      "step": 4488
    },
    {
      "epoch": 0.9202542025420254,
      "grad_norm": 0.4101106198461772,
      "learning_rate": 3.317774225334336e-06,
      "loss": 1.4424,
      "step": 4489
    },
    {
      "epoch": 0.9204592045920459,
      "grad_norm": 0.39972761456560146,
      "learning_rate": 3.300832533666509e-06,
      "loss": 1.3824,
      "step": 4490
    },
    {
      "epoch": 0.9206642066420664,
      "grad_norm": 0.4282198779684472,
      "learning_rate": 3.2839334819370846e-06,
      "loss": 1.4889,
      "step": 4491
    },
    {
      "epoch": 0.9208692086920869,
      "grad_norm": 0.4483828393661136,
      "learning_rate": 3.2670770775977467e-06,
      "loss": 1.5322,
      "step": 4492
    },
    {
      "epoch": 0.9210742107421074,
      "grad_norm": 0.4381826226976708,
      "learning_rate": 3.250263328081382e-06,
      "loss": 1.3928,
      "step": 4493
    },
    {
      "epoch": 0.9212792127921279,
      "grad_norm": 0.4450788638055356,
      "learning_rate": 3.2334922408021384e-06,
      "loss": 1.4922,
      "step": 4494
    },
    {
      "epoch": 0.9214842148421484,
      "grad_norm": 0.4066091165142853,
      "learning_rate": 3.2167638231552777e-06,
      "loss": 1.4956,
      "step": 4495
    },
    {
      "epoch": 0.9216892168921689,
      "grad_norm": 0.4132591086685028,
      "learning_rate": 3.200078082517255e-06,
      "loss": 1.4969,
      "step": 4496
    },
    {
      "epoch": 0.9218942189421894,
      "grad_norm": 0.4103944125872859,
      "learning_rate": 3.1834350262457625e-06,
      "loss": 1.4701,
      "step": 4497
    },
    {
      "epoch": 0.9220992209922099,
      "grad_norm": 0.38472458134357945,
      "learning_rate": 3.1668346616795963e-06,
      "loss": 1.4379,
      "step": 4498
    },
    {
      "epoch": 0.9223042230422305,
      "grad_norm": 0.4267717543937199,
      "learning_rate": 3.1502769961387903e-06,
      "loss": 1.5257,
      "step": 4499
    },
    {
      "epoch": 0.922509225092251,
      "grad_norm": 0.41878321656621137,
      "learning_rate": 3.1337620369245037e-06,
      "loss": 1.5126,
      "step": 4500
    },
    {
      "epoch": 0.9227142271422715,
      "grad_norm": 0.418269177368489,
      "learning_rate": 3.117289791319089e-06,
      "loss": 1.4299,
      "step": 4501
    },
    {
      "epoch": 0.922919229192292,
      "grad_norm": 0.39811051005006315,
      "learning_rate": 3.1008602665860586e-06,
      "loss": 1.4175,
      "step": 4502
    },
    {
      "epoch": 0.9231242312423125,
      "grad_norm": 0.3991435884314393,
      "learning_rate": 3.0844734699700726e-06,
      "loss": 1.4738,
      "step": 4503
    },
    {
      "epoch": 0.923329233292333,
      "grad_norm": 0.408831360944762,
      "learning_rate": 3.0681294086969957e-06,
      "loss": 1.5171,
      "step": 4504
    },
    {
      "epoch": 0.9235342353423535,
      "grad_norm": 0.4138747600359767,
      "learning_rate": 3.051828089973796e-06,
      "loss": 1.4676,
      "step": 4505
    },
    {
      "epoch": 0.923739237392374,
      "grad_norm": 0.4045311013648829,
      "learning_rate": 3.0355695209886126e-06,
      "loss": 1.4878,
      "step": 4506
    },
    {
      "epoch": 0.9239442394423945,
      "grad_norm": 0.407115128351299,
      "learning_rate": 3.019353708910733e-06,
      "loss": 1.4807,
      "step": 4507
    },
    {
      "epoch": 0.924149241492415,
      "grad_norm": 0.4003536976833089,
      "learning_rate": 3.003180660890592e-06,
      "loss": 1.4305,
      "step": 4508
    },
    {
      "epoch": 0.9243542435424354,
      "grad_norm": 0.4014028430429668,
      "learning_rate": 2.9870503840597973e-06,
      "loss": 1.4639,
      "step": 4509
    },
    {
      "epoch": 0.924559245592456,
      "grad_norm": 0.4244954691294579,
      "learning_rate": 2.9709628855310367e-06,
      "loss": 1.4561,
      "step": 4510
    },
    {
      "epoch": 0.9247642476424764,
      "grad_norm": 0.41609049800610626,
      "learning_rate": 2.95491817239818e-06,
      "loss": 1.4355,
      "step": 4511
    },
    {
      "epoch": 0.9249692496924969,
      "grad_norm": 0.41175801146054786,
      "learning_rate": 2.93891625173619e-06,
      "loss": 1.5555,
      "step": 4512
    },
    {
      "epoch": 0.9251742517425174,
      "grad_norm": 0.39186745527876476,
      "learning_rate": 2.9229571306012226e-06,
      "loss": 1.4959,
      "step": 4513
    },
    {
      "epoch": 0.9253792537925379,
      "grad_norm": 0.3806252485752179,
      "learning_rate": 2.9070408160305153e-06,
      "loss": 1.4773,
      "step": 4514
    },
    {
      "epoch": 0.9255842558425584,
      "grad_norm": 0.43030039070004605,
      "learning_rate": 2.8911673150424313e-06,
      "loss": 1.517,
      "step": 4515
    },
    {
      "epoch": 0.9257892578925789,
      "grad_norm": 0.3983980134298662,
      "learning_rate": 2.875336634636472e-06,
      "loss": 1.5188,
      "step": 4516
    },
    {
      "epoch": 0.9259942599425994,
      "grad_norm": 0.3708434983240725,
      "learning_rate": 2.859548781793242e-06,
      "loss": 1.449,
      "step": 4517
    },
    {
      "epoch": 0.9261992619926199,
      "grad_norm": 0.44734482673099224,
      "learning_rate": 2.8438037634744617e-06,
      "loss": 1.4864,
      "step": 4518
    },
    {
      "epoch": 0.9264042640426404,
      "grad_norm": 0.3998819406117882,
      "learning_rate": 2.8281015866229776e-06,
      "loss": 1.5016,
      "step": 4519
    },
    {
      "epoch": 0.9266092660926609,
      "grad_norm": 0.3758323865602232,
      "learning_rate": 2.8124422581627287e-06,
      "loss": 1.452,
      "step": 4520
    },
    {
      "epoch": 0.9268142681426814,
      "grad_norm": 0.4157014156700869,
      "learning_rate": 2.796825784998791e-06,
      "loss": 1.5021,
      "step": 4521
    },
    {
      "epoch": 0.9270192701927019,
      "grad_norm": 0.41259368015487885,
      "learning_rate": 2.7812521740172904e-06,
      "loss": 1.4436,
      "step": 4522
    },
    {
      "epoch": 0.9272242722427224,
      "grad_norm": 0.3876767690598938,
      "learning_rate": 2.7657214320854773e-06,
      "loss": 1.4316,
      "step": 4523
    },
    {
      "epoch": 0.9274292742927429,
      "grad_norm": 0.44028876979510134,
      "learning_rate": 2.7502335660517185e-06,
      "loss": 1.5091,
      "step": 4524
    },
    {
      "epoch": 0.9276342763427634,
      "grad_norm": 0.40281137782747767,
      "learning_rate": 2.734788582745473e-06,
      "loss": 1.3714,
      "step": 4525
    },
    {
      "epoch": 0.9278392783927839,
      "grad_norm": 0.3976192134347834,
      "learning_rate": 2.7193864889772603e-06,
      "loss": 1.5117,
      "step": 4526
    },
    {
      "epoch": 0.9280442804428044,
      "grad_norm": 0.40219946408255786,
      "learning_rate": 2.7040272915387022e-06,
      "loss": 1.4581,
      "step": 4527
    },
    {
      "epoch": 0.9282492824928249,
      "grad_norm": 0.38864124668356803,
      "learning_rate": 2.6887109972025037e-06,
      "loss": 1.4581,
      "step": 4528
    },
    {
      "epoch": 0.9284542845428454,
      "grad_norm": 0.4060343137796328,
      "learning_rate": 2.6734376127224625e-06,
      "loss": 1.4097,
      "step": 4529
    },
    {
      "epoch": 0.9286592865928659,
      "grad_norm": 0.39372678668660055,
      "learning_rate": 2.658207144833447e-06,
      "loss": 1.4593,
      "step": 4530
    },
    {
      "epoch": 0.9288642886428864,
      "grad_norm": 0.40514047654048657,
      "learning_rate": 2.6430196002514065e-06,
      "loss": 1.4801,
      "step": 4531
    },
    {
      "epoch": 0.9290692906929069,
      "grad_norm": 0.3775697515584575,
      "learning_rate": 2.627874985673351e-06,
      "loss": 1.3906,
      "step": 4532
    },
    {
      "epoch": 0.9292742927429274,
      "grad_norm": 0.4530877012632655,
      "learning_rate": 2.6127733077773497e-06,
      "loss": 1.4922,
      "step": 4533
    },
    {
      "epoch": 0.9294792947929479,
      "grad_norm": 0.3857017010570035,
      "learning_rate": 2.597714573222576e-06,
      "loss": 1.5006,
      "step": 4534
    },
    {
      "epoch": 0.9296842968429684,
      "grad_norm": 0.43168979898810245,
      "learning_rate": 2.5826987886492627e-06,
      "loss": 1.5039,
      "step": 4535
    },
    {
      "epoch": 0.9298892988929889,
      "grad_norm": 0.42538292559643276,
      "learning_rate": 2.5677259606786684e-06,
      "loss": 1.4594,
      "step": 4536
    },
    {
      "epoch": 0.9300943009430094,
      "grad_norm": 0.3840161220880411,
      "learning_rate": 2.552796095913124e-06,
      "loss": 1.4574,
      "step": 4537
    },
    {
      "epoch": 0.9302993029930299,
      "grad_norm": 0.4227867781729992,
      "learning_rate": 2.5379092009360284e-06,
      "loss": 1.4985,
      "step": 4538
    },
    {
      "epoch": 0.9305043050430505,
      "grad_norm": 0.40578227368197406,
      "learning_rate": 2.5230652823118204e-06,
      "loss": 1.4651,
      "step": 4539
    },
    {
      "epoch": 0.930709307093071,
      "grad_norm": 0.39310472945149344,
      "learning_rate": 2.50826434658602e-06,
      "loss": 1.4618,
      "step": 4540
    },
    {
      "epoch": 0.9309143091430915,
      "grad_norm": 0.4408790962793832,
      "learning_rate": 2.4935064002851395e-06,
      "loss": 1.4854,
      "step": 4541
    },
    {
      "epoch": 0.931119311193112,
      "grad_norm": 0.39326167074612256,
      "learning_rate": 2.478791449916773e-06,
      "loss": 1.4778,
      "step": 4542
    },
    {
      "epoch": 0.9313243132431325,
      "grad_norm": 0.42048960730523716,
      "learning_rate": 2.4641195019695306e-06,
      "loss": 1.4709,
      "step": 4543
    },
    {
      "epoch": 0.931529315293153,
      "grad_norm": 0.44237083359266777,
      "learning_rate": 2.4494905629130925e-06,
      "loss": 1.4857,
      "step": 4544
    },
    {
      "epoch": 0.9317343173431735,
      "grad_norm": 0.44301680845058533,
      "learning_rate": 2.4349046391981546e-06,
      "loss": 1.5511,
      "step": 4545
    },
    {
      "epoch": 0.931939319393194,
      "grad_norm": 0.43117470621383425,
      "learning_rate": 2.420361737256438e-06,
      "loss": 1.4436,
      "step": 4546
    },
    {
      "epoch": 0.9321443214432145,
      "grad_norm": 0.40973765300621207,
      "learning_rate": 2.4058618635007133e-06,
      "loss": 1.4737,
      "step": 4547
    },
    {
      "epoch": 0.932349323493235,
      "grad_norm": 0.4172904348028181,
      "learning_rate": 2.3914050243247445e-06,
      "loss": 1.5277,
      "step": 4548
    },
    {
      "epoch": 0.9325543255432555,
      "grad_norm": 0.40977859284544466,
      "learning_rate": 2.3769912261033533e-06,
      "loss": 1.463,
      "step": 4549
    },
    {
      "epoch": 0.932759327593276,
      "grad_norm": 0.4269107879704024,
      "learning_rate": 2.3626204751923784e-06,
      "loss": 1.4333,
      "step": 4550
    },
    {
      "epoch": 0.9329643296432965,
      "grad_norm": 0.4154100739547564,
      "learning_rate": 2.3482927779286623e-06,
      "loss": 1.5191,
      "step": 4551
    },
    {
      "epoch": 0.933169331693317,
      "grad_norm": 0.36183219664822497,
      "learning_rate": 2.334008140630062e-06,
      "loss": 1.3816,
      "step": 4552
    },
    {
      "epoch": 0.9333743337433374,
      "grad_norm": 0.452358022642829,
      "learning_rate": 2.3197665695954607e-06,
      "loss": 1.5761,
      "step": 4553
    },
    {
      "epoch": 0.933579335793358,
      "grad_norm": 0.4097403150247577,
      "learning_rate": 2.3055680711047355e-06,
      "loss": 1.5097,
      "step": 4554
    },
    {
      "epoch": 0.9337843378433784,
      "grad_norm": 0.43341915366844846,
      "learning_rate": 2.291412651418778e-06,
      "loss": 1.4573,
      "step": 4555
    },
    {
      "epoch": 0.9339893398933989,
      "grad_norm": 0.4427625650634552,
      "learning_rate": 2.277300316779507e-06,
      "loss": 1.471,
      "step": 4556
    },
    {
      "epoch": 0.9341943419434194,
      "grad_norm": 0.4007717730377636,
      "learning_rate": 2.2632310734097994e-06,
      "loss": 1.4694,
      "step": 4557
    },
    {
      "epoch": 0.9343993439934399,
      "grad_norm": 0.4798354637907963,
      "learning_rate": 2.2492049275135486e-06,
      "loss": 1.5728,
      "step": 4558
    },
    {
      "epoch": 0.9346043460434604,
      "grad_norm": 0.432366661718664,
      "learning_rate": 2.2352218852756625e-06,
      "loss": 1.4707,
      "step": 4559
    },
    {
      "epoch": 0.9348093480934809,
      "grad_norm": 0.4460440923710054,
      "learning_rate": 2.22128195286202e-06,
      "loss": 1.4969,
      "step": 4560
    },
    {
      "epoch": 0.9350143501435014,
      "grad_norm": 0.4684781583418065,
      "learning_rate": 2.207385136419504e-06,
      "loss": 1.5261,
      "step": 4561
    },
    {
      "epoch": 0.9352193521935219,
      "grad_norm": 0.43713686511410216,
      "learning_rate": 2.19353144207598e-06,
      "loss": 1.5117,
      "step": 4562
    },
    {
      "epoch": 0.9354243542435424,
      "grad_norm": 0.43758544839160146,
      "learning_rate": 2.179720875940272e-06,
      "loss": 1.5151,
      "step": 4563
    },
    {
      "epoch": 0.9356293562935629,
      "grad_norm": 0.4129268916678531,
      "learning_rate": 2.165953444102242e-06,
      "loss": 1.4388,
      "step": 4564
    },
    {
      "epoch": 0.9358343583435834,
      "grad_norm": 0.4491329177134426,
      "learning_rate": 2.1522291526326898e-06,
      "loss": 1.519,
      "step": 4565
    },
    {
      "epoch": 0.9360393603936039,
      "grad_norm": 0.40560543303076346,
      "learning_rate": 2.1385480075834076e-06,
      "loss": 1.435,
      "step": 4566
    },
    {
      "epoch": 0.9362443624436244,
      "grad_norm": 0.4248879851154153,
      "learning_rate": 2.1249100149871693e-06,
      "loss": 1.4664,
      "step": 4567
    },
    {
      "epoch": 0.9364493644936449,
      "grad_norm": 0.4368460992615822,
      "learning_rate": 2.111315180857687e-06,
      "loss": 1.4749,
      "step": 4568
    },
    {
      "epoch": 0.9366543665436654,
      "grad_norm": 0.36533356517722737,
      "learning_rate": 2.0977635111896654e-06,
      "loss": 1.4435,
      "step": 4569
    },
    {
      "epoch": 0.9368593685936859,
      "grad_norm": 0.4010820531220128,
      "learning_rate": 2.0842550119588024e-06,
      "loss": 1.4344,
      "step": 4570
    },
    {
      "epoch": 0.9370643706437064,
      "grad_norm": 0.4258966255979595,
      "learning_rate": 2.0707896891216995e-06,
      "loss": 1.5236,
      "step": 4571
    },
    {
      "epoch": 0.9372693726937269,
      "grad_norm": 0.37911085021378865,
      "learning_rate": 2.057367548615974e-06,
      "loss": 1.4584,
      "step": 4572
    },
    {
      "epoch": 0.9374743747437474,
      "grad_norm": 0.40465394133403687,
      "learning_rate": 2.04398859636018e-06,
      "loss": 1.4883,
      "step": 4573
    },
    {
      "epoch": 0.9376793767937679,
      "grad_norm": 0.4372930249798691,
      "learning_rate": 2.03065283825381e-06,
      "loss": 1.4839,
      "step": 4574
    },
    {
      "epoch": 0.9378843788437884,
      "grad_norm": 0.3937186132948349,
      "learning_rate": 2.0173602801773495e-06,
      "loss": 1.507,
      "step": 4575
    },
    {
      "epoch": 0.9380893808938089,
      "grad_norm": 0.411973499164815,
      "learning_rate": 2.0041109279921864e-06,
      "loss": 1.4148,
      "step": 4576
    },
    {
      "epoch": 0.9382943829438294,
      "grad_norm": 0.44493513969349274,
      "learning_rate": 1.990904787540704e-06,
      "loss": 1.5318,
      "step": 4577
    },
    {
      "epoch": 0.9384993849938499,
      "grad_norm": 0.396585601401724,
      "learning_rate": 1.97774186464621e-06,
      "loss": 1.4856,
      "step": 4578
    },
    {
      "epoch": 0.9387043870438705,
      "grad_norm": 0.3936346698096212,
      "learning_rate": 1.964622165112939e-06,
      "loss": 1.4564,
      "step": 4579
    },
    {
      "epoch": 0.938909389093891,
      "grad_norm": 0.44329940607578605,
      "learning_rate": 1.951545694726098e-06,
      "loss": 1.5444,
      "step": 4580
    },
    {
      "epoch": 0.9391143911439115,
      "grad_norm": 0.42419625776220793,
      "learning_rate": 1.9385124592518065e-06,
      "loss": 1.4748,
      "step": 4581
    },
    {
      "epoch": 0.939319393193932,
      "grad_norm": 0.4154748513673259,
      "learning_rate": 1.925522464437135e-06,
      "loss": 1.4738,
      "step": 4582
    },
    {
      "epoch": 0.9395243952439525,
      "grad_norm": 0.43896780954596126,
      "learning_rate": 1.91257571601009e-06,
      "loss": 1.4588,
      "step": 4583
    },
    {
      "epoch": 0.939729397293973,
      "grad_norm": 0.44005980233680353,
      "learning_rate": 1.8996722196795713e-06,
      "loss": 1.4943,
      "step": 4584
    },
    {
      "epoch": 0.9399343993439935,
      "grad_norm": 0.419656120553632,
      "learning_rate": 1.8868119811354611e-06,
      "loss": 1.5182,
      "step": 4585
    },
    {
      "epoch": 0.940139401394014,
      "grad_norm": 0.37749984616269355,
      "learning_rate": 1.8739950060485234e-06,
      "loss": 1.579,
      "step": 4586
    },
    {
      "epoch": 0.9403444034440345,
      "grad_norm": 0.41286570468517103,
      "learning_rate": 1.8612213000704704e-06,
      "loss": 1.5313,
      "step": 4587
    },
    {
      "epoch": 0.940549405494055,
      "grad_norm": 0.4106493276962578,
      "learning_rate": 1.8484908688339186e-06,
      "loss": 1.4712,
      "step": 4588
    },
    {
      "epoch": 0.9407544075440755,
      "grad_norm": 0.47054730258968175,
      "learning_rate": 1.8358037179524224e-06,
      "loss": 1.4799,
      "step": 4589
    },
    {
      "epoch": 0.940959409594096,
      "grad_norm": 0.4324103409490006,
      "learning_rate": 1.8231598530204287e-06,
      "loss": 1.5168,
      "step": 4590
    },
    {
      "epoch": 0.9411644116441165,
      "grad_norm": 0.41466057803080025,
      "learning_rate": 1.810559279613322e-06,
      "loss": 1.4592,
      "step": 4591
    },
    {
      "epoch": 0.941369413694137,
      "grad_norm": 0.4051172885912199,
      "learning_rate": 1.7980020032873468e-06,
      "loss": 1.4895,
      "step": 4592
    },
    {
      "epoch": 0.9415744157441575,
      "grad_norm": 0.40438540291362046,
      "learning_rate": 1.7854880295797405e-06,
      "loss": 1.4249,
      "step": 4593
    },
    {
      "epoch": 0.941779417794178,
      "grad_norm": 0.4430811178964784,
      "learning_rate": 1.7730173640085445e-06,
      "loss": 1.5037,
      "step": 4594
    },
    {
      "epoch": 0.9419844198441985,
      "grad_norm": 0.4055527400379786,
      "learning_rate": 1.7605900120728047e-06,
      "loss": 1.4637,
      "step": 4595
    },
    {
      "epoch": 0.942189421894219,
      "grad_norm": 0.4025049571858752,
      "learning_rate": 1.748205979252393e-06,
      "loss": 1.4788,
      "step": 4596
    },
    {
      "epoch": 0.9423944239442394,
      "grad_norm": 0.4123895047425271,
      "learning_rate": 1.7358652710081081e-06,
      "loss": 1.44,
      "step": 4597
    },
    {
      "epoch": 0.9425994259942599,
      "grad_norm": 0.4060466673787726,
      "learning_rate": 1.72356789278163e-06,
      "loss": 1.499,
      "step": 4598
    },
    {
      "epoch": 0.9428044280442804,
      "grad_norm": 0.39758461289611335,
      "learning_rate": 1.711313849995555e-06,
      "loss": 1.4974,
      "step": 4599
    },
    {
      "epoch": 0.9430094300943009,
      "grad_norm": 0.38959189170047004,
      "learning_rate": 1.6991031480533715e-06,
      "loss": 1.4806,
      "step": 4600
    },
    {
      "epoch": 0.9432144321443214,
      "grad_norm": 0.3882309791214617,
      "learning_rate": 1.686935792339439e-06,
      "loss": 1.4639,
      "step": 4601
    },
    {
      "epoch": 0.9434194341943419,
      "grad_norm": 0.47363832224666175,
      "learning_rate": 1.6748117882189883e-06,
      "loss": 1.5344,
      "step": 4602
    },
    {
      "epoch": 0.9436244362443624,
      "grad_norm": 0.4576299615926088,
      "learning_rate": 1.6627311410381652e-06,
      "loss": 1.4975,
      "step": 4603
    },
    {
      "epoch": 0.9438294382943829,
      "grad_norm": 0.40109436131476384,
      "learning_rate": 1.650693856123997e-06,
      "loss": 1.5051,
      "step": 4604
    },
    {
      "epoch": 0.9440344403444034,
      "grad_norm": 0.3986627337671457,
      "learning_rate": 1.6386999387843716e-06,
      "loss": 1.4442,
      "step": 4605
    },
    {
      "epoch": 0.9442394423944239,
      "grad_norm": 0.42906664911223463,
      "learning_rate": 1.626749394308058e-06,
      "loss": 1.4606,
      "step": 4606
    },
    {
      "epoch": 0.9444444444444444,
      "grad_norm": 0.44572873121667206,
      "learning_rate": 1.614842227964708e-06,
      "loss": 1.4834,
      "step": 4607
    },
    {
      "epoch": 0.9446494464944649,
      "grad_norm": 0.4143428816862491,
      "learning_rate": 1.6029784450048323e-06,
      "loss": 1.4857,
      "step": 4608
    },
    {
      "epoch": 0.9448544485444854,
      "grad_norm": 0.38526828484852355,
      "learning_rate": 1.5911580506598245e-06,
      "loss": 1.5359,
      "step": 4609
    },
    {
      "epoch": 0.9450594505945059,
      "grad_norm": 0.38978783684109825,
      "learning_rate": 1.579381050141948e-06,
      "loss": 1.4901,
      "step": 4610
    },
    {
      "epoch": 0.9452644526445264,
      "grad_norm": 0.42659787671916094,
      "learning_rate": 1.5676474486443272e-06,
      "loss": 1.4861,
      "step": 4611
    },
    {
      "epoch": 0.9454694546945469,
      "grad_norm": 0.4151510242273399,
      "learning_rate": 1.5559572513409338e-06,
      "loss": 1.4169,
      "step": 4612
    },
    {
      "epoch": 0.9456744567445674,
      "grad_norm": 0.4169208646551611,
      "learning_rate": 1.5443104633866112e-06,
      "loss": 1.445,
      "step": 4613
    },
    {
      "epoch": 0.9458794587945879,
      "grad_norm": 0.43940355005413406,
      "learning_rate": 1.5327070899170736e-06,
      "loss": 1.4002,
      "step": 4614
    },
    {
      "epoch": 0.9460844608446084,
      "grad_norm": 0.42457098167300533,
      "learning_rate": 1.521147136048895e-06,
      "loss": 1.4624,
      "step": 4615
    },
    {
      "epoch": 0.9462894628946289,
      "grad_norm": 0.427672103467531,
      "learning_rate": 1.5096306068794641e-06,
      "loss": 1.4695,
      "step": 4616
    },
    {
      "epoch": 0.9464944649446494,
      "grad_norm": 0.434582645115717,
      "learning_rate": 1.4981575074870635e-06,
      "loss": 1.4816,
      "step": 4617
    },
    {
      "epoch": 0.9466994669946699,
      "grad_norm": 0.3983691978804189,
      "learning_rate": 1.4867278429308018e-06,
      "loss": 1.4416,
      "step": 4618
    },
    {
      "epoch": 0.9469044690446905,
      "grad_norm": 0.40122954706122715,
      "learning_rate": 1.4753416182506363e-06,
      "loss": 1.472,
      "step": 4619
    },
    {
      "epoch": 0.947109471094711,
      "grad_norm": 0.4340474962124046,
      "learning_rate": 1.4639988384673842e-06,
      "loss": 1.5026,
      "step": 4620
    },
    {
      "epoch": 0.9473144731447315,
      "grad_norm": 0.42517397394612205,
      "learning_rate": 1.4526995085826888e-06,
      "loss": 1.5021,
      "step": 4621
    },
    {
      "epoch": 0.947519475194752,
      "grad_norm": 0.4030591178317383,
      "learning_rate": 1.4414436335790538e-06,
      "loss": 1.4922,
      "step": 4622
    },
    {
      "epoch": 0.9477244772447725,
      "grad_norm": 0.3886046334608059,
      "learning_rate": 1.4302312184197974e-06,
      "loss": 1.4632,
      "step": 4623
    },
    {
      "epoch": 0.947929479294793,
      "grad_norm": 0.40698460786524404,
      "learning_rate": 1.4190622680490873e-06,
      "loss": 1.4846,
      "step": 4624
    },
    {
      "epoch": 0.9481344813448135,
      "grad_norm": 0.401137773814169,
      "learning_rate": 1.4079367873919059e-06,
      "loss": 1.4501,
      "step": 4625
    },
    {
      "epoch": 0.948339483394834,
      "grad_norm": 0.4222612713513452,
      "learning_rate": 1.396854781354129e-06,
      "loss": 1.4041,
      "step": 4626
    },
    {
      "epoch": 0.9485444854448545,
      "grad_norm": 0.3859821651219978,
      "learning_rate": 1.3858162548223807e-06,
      "loss": 1.4602,
      "step": 4627
    },
    {
      "epoch": 0.948749487494875,
      "grad_norm": 0.4538824938074429,
      "learning_rate": 1.3748212126641569e-06,
      "loss": 1.5304,
      "step": 4628
    },
    {
      "epoch": 0.9489544895448955,
      "grad_norm": 0.3996911278353664,
      "learning_rate": 1.3638696597277679e-06,
      "loss": 1.467,
      "step": 4629
    },
    {
      "epoch": 0.949159491594916,
      "grad_norm": 0.4175656017910606,
      "learning_rate": 1.3529616008423506e-06,
      "loss": 1.491,
      "step": 4630
    },
    {
      "epoch": 0.9493644936449365,
      "grad_norm": 0.39212672558834766,
      "learning_rate": 1.3420970408178913e-06,
      "loss": 1.494,
      "step": 4631
    },
    {
      "epoch": 0.949569495694957,
      "grad_norm": 0.42694031876878125,
      "learning_rate": 1.331275984445135e-06,
      "loss": 1.5484,
      "step": 4632
    },
    {
      "epoch": 0.9497744977449775,
      "grad_norm": 0.4112477329366927,
      "learning_rate": 1.3204984364956874e-06,
      "loss": 1.4851,
      "step": 4633
    },
    {
      "epoch": 0.949979499794998,
      "grad_norm": 0.42367707448676856,
      "learning_rate": 1.3097644017219468e-06,
      "loss": 1.5204,
      "step": 4634
    },
    {
      "epoch": 0.9501845018450185,
      "grad_norm": 0.4244367293639013,
      "learning_rate": 1.2990738848571494e-06,
      "loss": 1.4597,
      "step": 4635
    },
    {
      "epoch": 0.950389503895039,
      "grad_norm": 0.46912325802861377,
      "learning_rate": 1.288426890615335e-06,
      "loss": 1.5255,
      "step": 4636
    },
    {
      "epoch": 0.9505945059450595,
      "grad_norm": 0.4526593116073533,
      "learning_rate": 1.2778234236913155e-06,
      "loss": 1.4554,
      "step": 4637
    },
    {
      "epoch": 0.95079950799508,
      "grad_norm": 0.43815685939747784,
      "learning_rate": 1.2672634887607614e-06,
      "loss": 1.4792,
      "step": 4638
    },
    {
      "epoch": 0.9510045100451004,
      "grad_norm": 0.43757886235224264,
      "learning_rate": 1.256747090480115e-06,
      "loss": 1.4616,
      "step": 4639
    },
    {
      "epoch": 0.951209512095121,
      "grad_norm": 0.40585951067855275,
      "learning_rate": 1.2462742334866218e-06,
      "loss": 1.5298,
      "step": 4640
    },
    {
      "epoch": 0.9514145141451414,
      "grad_norm": 0.4651036546841763,
      "learning_rate": 1.2358449223983547e-06,
      "loss": 1.5235,
      "step": 4641
    },
    {
      "epoch": 0.9516195161951619,
      "grad_norm": 0.48241150699281893,
      "learning_rate": 1.2254591618141686e-06,
      "loss": 1.508,
      "step": 4642
    },
    {
      "epoch": 0.9518245182451824,
      "grad_norm": 0.4260358302361235,
      "learning_rate": 1.2151169563136888e-06,
      "loss": 1.4758,
      "step": 4643
    },
    {
      "epoch": 0.9520295202952029,
      "grad_norm": 0.39055915828560206,
      "learning_rate": 1.2048183104573563e-06,
      "loss": 1.4866,
      "step": 4644
    },
    {
      "epoch": 0.9522345223452234,
      "grad_norm": 0.4335721694380325,
      "learning_rate": 1.1945632287864383e-06,
      "loss": 1.4905,
      "step": 4645
    },
    {
      "epoch": 0.9524395243952439,
      "grad_norm": 0.4446314618662908,
      "learning_rate": 1.1843517158229288e-06,
      "loss": 1.4474,
      "step": 4646
    },
    {
      "epoch": 0.9526445264452644,
      "grad_norm": 0.41233027881018863,
      "learning_rate": 1.1741837760696595e-06,
      "loss": 1.4767,
      "step": 4647
    },
    {
      "epoch": 0.9528495284952849,
      "grad_norm": 0.4340371603042303,
      "learning_rate": 1.1640594140102213e-06,
      "loss": 1.4323,
      "step": 4648
    },
    {
      "epoch": 0.9530545305453054,
      "grad_norm": 0.4366875349622588,
      "learning_rate": 1.1539786341089876e-06,
      "loss": 1.4748,
      "step": 4649
    },
    {
      "epoch": 0.9532595325953259,
      "grad_norm": 0.45489270178308966,
      "learning_rate": 1.143941440811147e-06,
      "loss": 1.5774,
      "step": 4650
    },
    {
      "epoch": 0.9534645346453464,
      "grad_norm": 0.39725661609282786,
      "learning_rate": 1.1339478385426262e-06,
      "loss": 1.4378,
      "step": 4651
    },
    {
      "epoch": 0.9536695366953669,
      "grad_norm": 0.4253148346347174,
      "learning_rate": 1.1239978317101662e-06,
      "loss": 1.4674,
      "step": 4652
    },
    {
      "epoch": 0.9538745387453874,
      "grad_norm": 0.4306067235387545,
      "learning_rate": 1.114091424701258e-06,
      "loss": 1.5214,
      "step": 4653
    },
    {
      "epoch": 0.9540795407954079,
      "grad_norm": 0.4015431996269364,
      "learning_rate": 1.1042286218841736e-06,
      "loss": 1.5021,
      "step": 4654
    },
    {
      "epoch": 0.9542845428454284,
      "grad_norm": 0.4722780786684882,
      "learning_rate": 1.0944094276079675e-06,
      "loss": 1.4955,
      "step": 4655
    },
    {
      "epoch": 0.9544895448954489,
      "grad_norm": 0.4313275113611618,
      "learning_rate": 1.0846338462024541e-06,
      "loss": 1.4772,
      "step": 4656
    },
    {
      "epoch": 0.9546945469454694,
      "grad_norm": 0.43272841039882,
      "learning_rate": 1.0749018819782297e-06,
      "loss": 1.4857,
      "step": 4657
    },
    {
      "epoch": 0.9548995489954899,
      "grad_norm": 0.43086454656866835,
      "learning_rate": 1.0652135392266394e-06,
      "loss": 1.5047,
      "step": 4658
    },
    {
      "epoch": 0.9551045510455105,
      "grad_norm": 0.3803434158641607,
      "learning_rate": 1.055568822219799e-06,
      "loss": 1.478,
      "step": 4659
    },
    {
      "epoch": 0.955309553095531,
      "grad_norm": 0.4023398093422054,
      "learning_rate": 1.0459677352106067e-06,
      "loss": 1.4558,
      "step": 4660
    },
    {
      "epoch": 0.9555145551455515,
      "grad_norm": 0.39676256246624875,
      "learning_rate": 1.036410282432687e-06,
      "loss": 1.5035,
      "step": 4661
    },
    {
      "epoch": 0.955719557195572,
      "grad_norm": 0.4255844743479049,
      "learning_rate": 1.0268964681004356e-06,
      "loss": 1.459,
      "step": 4662
    },
    {
      "epoch": 0.9559245592455925,
      "grad_norm": 0.448379318965663,
      "learning_rate": 1.0174262964090408e-06,
      "loss": 1.5082,
      "step": 4663
    },
    {
      "epoch": 0.956129561295613,
      "grad_norm": 0.3985825209838232,
      "learning_rate": 1.0079997715343959e-06,
      "loss": 1.4754,
      "step": 4664
    },
    {
      "epoch": 0.9563345633456335,
      "grad_norm": 0.43058774652209775,
      "learning_rate": 9.986168976331866e-07,
      "loss": 1.4845,
      "step": 4665
    },
    {
      "epoch": 0.956539565395654,
      "grad_norm": 0.3794112998420427,
      "learning_rate": 9.892776788428149e-07,
      "loss": 1.4555,
      "step": 4666
    },
    {
      "epoch": 0.9567445674456745,
      "grad_norm": 0.40235105057860043,
      "learning_rate": 9.79982119281464e-07,
      "loss": 1.4668,
      "step": 4667
    },
    {
      "epoch": 0.956949569495695,
      "grad_norm": 0.3792903886393411,
      "learning_rate": 9.707302230480553e-07,
      "loss": 1.5232,
      "step": 4668
    },
    {
      "epoch": 0.9571545715457155,
      "grad_norm": 0.41672471938273514,
      "learning_rate": 9.615219942222474e-07,
      "loss": 1.4407,
      "step": 4669
    },
    {
      "epoch": 0.957359573595736,
      "grad_norm": 0.42753742021273905,
      "learning_rate": 9.523574368644483e-07,
      "loss": 1.5231,
      "step": 4670
    },
    {
      "epoch": 0.9575645756457565,
      "grad_norm": 0.39432676701692954,
      "learning_rate": 9.432365550158251e-07,
      "loss": 1.4167,
      "step": 4671
    },
    {
      "epoch": 0.957769577695777,
      "grad_norm": 0.42024170158675034,
      "learning_rate": 9.341593526982606e-07,
      "loss": 1.4961,
      "step": 4672
    },
    {
      "epoch": 0.9579745797457975,
      "grad_norm": 0.41605481659116417,
      "learning_rate": 9.251258339143864e-07,
      "loss": 1.515,
      "step": 4673
    },
    {
      "epoch": 0.958179581795818,
      "grad_norm": 0.41798006553937567,
      "learning_rate": 9.161360026475829e-07,
      "loss": 1.4719,
      "step": 4674
    },
    {
      "epoch": 0.9583845838458385,
      "grad_norm": 0.4066078260983904,
      "learning_rate": 9.071898628619569e-07,
      "loss": 1.4714,
      "step": 4675
    },
    {
      "epoch": 0.958589585895859,
      "grad_norm": 0.40563154867170487,
      "learning_rate": 8.982874185023415e-07,
      "loss": 1.4738,
      "step": 4676
    },
    {
      "epoch": 0.9587945879458795,
      "grad_norm": 0.4023707092057499,
      "learning_rate": 8.89428673494308e-07,
      "loss": 1.4289,
      "step": 4677
    },
    {
      "epoch": 0.9589995899959,
      "grad_norm": 0.4294599983077088,
      "learning_rate": 8.80613631744176e-07,
      "loss": 1.4649,
      "step": 4678
    },
    {
      "epoch": 0.9592045920459205,
      "grad_norm": 0.42785494858028356,
      "learning_rate": 8.718422971389584e-07,
      "loss": 1.4668,
      "step": 4679
    },
    {
      "epoch": 0.959409594095941,
      "grad_norm": 0.4218830364066306,
      "learning_rate": 8.63114673546428e-07,
      "loss": 1.5228,
      "step": 4680
    },
    {
      "epoch": 0.9596145961459615,
      "grad_norm": 0.427049384612903,
      "learning_rate": 8.544307648150729e-07,
      "loss": 1.4823,
      "step": 4681
    },
    {
      "epoch": 0.959819598195982,
      "grad_norm": 0.45085133029000424,
      "learning_rate": 8.45790574774108e-07,
      "loss": 1.5397,
      "step": 4682
    },
    {
      "epoch": 0.9600246002460024,
      "grad_norm": 0.42410111273667755,
      "learning_rate": 8.371941072334299e-07,
      "loss": 1.4962,
      "step": 4683
    },
    {
      "epoch": 0.9602296022960229,
      "grad_norm": 0.41234092871602834,
      "learning_rate": 8.286413659837288e-07,
      "loss": 1.5249,
      "step": 4684
    },
    {
      "epoch": 0.9604346043460434,
      "grad_norm": 0.4120491097994356,
      "learning_rate": 8.201323547963547e-07,
      "loss": 1.4971,
      "step": 4685
    },
    {
      "epoch": 0.9606396063960639,
      "grad_norm": 0.418622126870649,
      "learning_rate": 8.116670774234058e-07,
      "loss": 1.4799,
      "step": 4686
    },
    {
      "epoch": 0.9608446084460844,
      "grad_norm": 0.4239843854934979,
      "learning_rate": 8.032455375976744e-07,
      "loss": 1.6007,
      "step": 4687
    },
    {
      "epoch": 0.9610496104961049,
      "grad_norm": 0.36989986033355643,
      "learning_rate": 7.948677390326786e-07,
      "loss": 1.4977,
      "step": 4688
    },
    {
      "epoch": 0.9612546125461254,
      "grad_norm": 0.3757542674850543,
      "learning_rate": 7.865336854226524e-07,
      "loss": 1.4711,
      "step": 4689
    },
    {
      "epoch": 0.9614596145961459,
      "grad_norm": 0.4709794100569463,
      "learning_rate": 7.782433804425227e-07,
      "loss": 1.4974,
      "step": 4690
    },
    {
      "epoch": 0.9616646166461664,
      "grad_norm": 0.41583974910439636,
      "learning_rate": 7.699968277479652e-07,
      "loss": 1.4301,
      "step": 4691
    },
    {
      "epoch": 0.9618696186961869,
      "grad_norm": 0.4265693210591978,
      "learning_rate": 7.617940309753047e-07,
      "loss": 1.4841,
      "step": 4692
    },
    {
      "epoch": 0.9620746207462074,
      "grad_norm": 0.40143824887457236,
      "learning_rate": 7.536349937416143e-07,
      "loss": 1.5417,
      "step": 4693
    },
    {
      "epoch": 0.9622796227962279,
      "grad_norm": 0.4361494700553997,
      "learning_rate": 7.455197196446495e-07,
      "loss": 1.4935,
      "step": 4694
    },
    {
      "epoch": 0.9624846248462484,
      "grad_norm": 0.41389207386012566,
      "learning_rate": 7.374482122628922e-07,
      "loss": 1.4911,
      "step": 4695
    },
    {
      "epoch": 0.9626896268962689,
      "grad_norm": 0.4182687843116768,
      "learning_rate": 7.294204751555067e-07,
      "loss": 1.4344,
      "step": 4696
    },
    {
      "epoch": 0.9628946289462894,
      "grad_norm": 0.41550251715708325,
      "learning_rate": 7.214365118623611e-07,
      "loss": 1.4439,
      "step": 4697
    },
    {
      "epoch": 0.9630996309963099,
      "grad_norm": 0.39844119833303215,
      "learning_rate": 7.134963259040172e-07,
      "loss": 1.5171,
      "step": 4698
    },
    {
      "epoch": 0.9633046330463305,
      "grad_norm": 0.4374964656548517,
      "learning_rate": 7.055999207817188e-07,
      "loss": 1.4718,
      "step": 4699
    },
    {
      "epoch": 0.963509635096351,
      "grad_norm": 0.42050163623983694,
      "learning_rate": 6.977472999774471e-07,
      "loss": 1.5295,
      "step": 4700
    },
    {
      "epoch": 0.9637146371463715,
      "grad_norm": 0.42229955545226977,
      "learning_rate": 6.899384669538433e-07,
      "loss": 1.4836,
      "step": 4701
    },
    {
      "epoch": 0.963919639196392,
      "grad_norm": 0.4253691954860886,
      "learning_rate": 6.821734251542533e-07,
      "loss": 1.5109,
      "step": 4702
    },
    {
      "epoch": 0.9641246412464125,
      "grad_norm": 0.4510284699224987,
      "learning_rate": 6.744521780026936e-07,
      "loss": 1.5006,
      "step": 4703
    },
    {
      "epoch": 0.964329643296433,
      "grad_norm": 0.40629505372498653,
      "learning_rate": 6.667747289038851e-07,
      "loss": 1.465,
      "step": 4704
    },
    {
      "epoch": 0.9645346453464535,
      "grad_norm": 0.4127960730083617,
      "learning_rate": 6.591410812432419e-07,
      "loss": 1.4636,
      "step": 4705
    },
    {
      "epoch": 0.964739647396474,
      "grad_norm": 0.3797148049331649,
      "learning_rate": 6.515512383868605e-07,
      "loss": 1.448,
      "step": 4706
    },
    {
      "epoch": 0.9649446494464945,
      "grad_norm": 0.39124058476983414,
      "learning_rate": 6.440052036815081e-07,
      "loss": 1.477,
      "step": 4707
    },
    {
      "epoch": 0.965149651496515,
      "grad_norm": 0.39564688560498384,
      "learning_rate": 6.365029804546452e-07,
      "loss": 1.51,
      "step": 4708
    },
    {
      "epoch": 0.9653546535465355,
      "grad_norm": 0.40609961326935906,
      "learning_rate": 6.290445720144144e-07,
      "loss": 1.4874,
      "step": 4709
    },
    {
      "epoch": 0.965559655596556,
      "grad_norm": 0.4606722567468486,
      "learning_rate": 6.216299816496185e-07,
      "loss": 1.4852,
      "step": 4710
    },
    {
      "epoch": 0.9657646576465765,
      "grad_norm": 0.4190921743361472,
      "learning_rate": 6.142592126297753e-07,
      "loss": 1.4367,
      "step": 4711
    },
    {
      "epoch": 0.965969659696597,
      "grad_norm": 0.43796809660481956,
      "learning_rate": 6.069322682050516e-07,
      "loss": 1.4808,
      "step": 4712
    },
    {
      "epoch": 0.9661746617466175,
      "grad_norm": 0.4149639021606424,
      "learning_rate": 5.996491516062963e-07,
      "loss": 1.4742,
      "step": 4713
    },
    {
      "epoch": 0.966379663796638,
      "grad_norm": 0.38530540213969156,
      "learning_rate": 5.924098660450295e-07,
      "loss": 1.5079,
      "step": 4714
    },
    {
      "epoch": 0.9665846658466585,
      "grad_norm": 0.4184697124524958,
      "learning_rate": 5.852144147134531e-07,
      "loss": 1.5402,
      "step": 4715
    },
    {
      "epoch": 0.966789667896679,
      "grad_norm": 0.4456164181437981,
      "learning_rate": 5.780628007844401e-07,
      "loss": 1.4669,
      "step": 4716
    },
    {
      "epoch": 0.9669946699466995,
      "grad_norm": 0.4454321522803196,
      "learning_rate": 5.709550274115128e-07,
      "loss": 1.5302,
      "step": 4717
    },
    {
      "epoch": 0.96719967199672,
      "grad_norm": 0.4414207175769216,
      "learning_rate": 5.638910977288747e-07,
      "loss": 1.4703,
      "step": 4718
    },
    {
      "epoch": 0.9674046740467405,
      "grad_norm": 0.4659810004199522,
      "learning_rate": 5.568710148514122e-07,
      "loss": 1.5372,
      "step": 4719
    },
    {
      "epoch": 0.967609676096761,
      "grad_norm": 0.4106833448285871,
      "learning_rate": 5.498947818746602e-07,
      "loss": 1.4944,
      "step": 4720
    },
    {
      "epoch": 0.9678146781467815,
      "grad_norm": 0.37805980965060865,
      "learning_rate": 5.429624018748136e-07,
      "loss": 1.4878,
      "step": 4721
    },
    {
      "epoch": 0.968019680196802,
      "grad_norm": 0.42625196752407407,
      "learning_rate": 5.360738779087382e-07,
      "loss": 1.4859,
      "step": 4722
    },
    {
      "epoch": 0.9682246822468225,
      "grad_norm": 0.43478381156727663,
      "learning_rate": 5.292292130139598e-07,
      "loss": 1.5147,
      "step": 4723
    },
    {
      "epoch": 0.968429684296843,
      "grad_norm": 0.4002246285727642,
      "learning_rate": 5.22428410208664e-07,
      "loss": 1.4095,
      "step": 4724
    },
    {
      "epoch": 0.9686346863468634,
      "grad_norm": 0.38366057516835866,
      "learning_rate": 5.156714724917078e-07,
      "loss": 1.4476,
      "step": 4725
    },
    {
      "epoch": 0.968839688396884,
      "grad_norm": 0.4337626103727618,
      "learning_rate": 5.089584028425743e-07,
      "loss": 1.5306,
      "step": 4726
    },
    {
      "epoch": 0.9690446904469044,
      "grad_norm": 0.40252390973139135,
      "learning_rate": 5.022892042214289e-07,
      "loss": 1.4648,
      "step": 4727
    },
    {
      "epoch": 0.9692496924969249,
      "grad_norm": 0.38813388482940925,
      "learning_rate": 4.956638795690971e-07,
      "loss": 1.4158,
      "step": 4728
    },
    {
      "epoch": 0.9694546945469454,
      "grad_norm": 0.37727259572021915,
      "learning_rate": 4.890824318070419e-07,
      "loss": 1.4199,
      "step": 4729
    },
    {
      "epoch": 0.9696596965969659,
      "grad_norm": 0.4256671856842206,
      "learning_rate": 4.825448638373642e-07,
      "loss": 1.4919,
      "step": 4730
    },
    {
      "epoch": 0.9698646986469864,
      "grad_norm": 0.47119797029152144,
      "learning_rate": 4.7605117854284676e-07,
      "loss": 1.5173,
      "step": 4731
    },
    {
      "epoch": 0.9700697006970069,
      "grad_norm": 0.414881165668109,
      "learning_rate": 4.6960137878692134e-07,
      "loss": 1.5033,
      "step": 4732
    },
    {
      "epoch": 0.9702747027470274,
      "grad_norm": 0.38346657561480907,
      "learning_rate": 4.631954674136463e-07,
      "loss": 1.5122,
      "step": 4733
    },
    {
      "epoch": 0.9704797047970479,
      "grad_norm": 0.4258394678264236,
      "learning_rate": 4.568334472477287e-07,
      "loss": 1.4828,
      "step": 4734
    },
    {
      "epoch": 0.9706847068470684,
      "grad_norm": 0.4156653037754415,
      "learning_rate": 4.505153210945467e-07,
      "loss": 1.5297,
      "step": 4735
    },
    {
      "epoch": 0.9708897088970889,
      "grad_norm": 0.43030859498255475,
      "learning_rate": 4.442410917400941e-07,
      "loss": 1.5136,
      "step": 4736
    },
    {
      "epoch": 0.9710947109471094,
      "grad_norm": 0.4424199234429633,
      "learning_rate": 4.380107619510243e-07,
      "loss": 1.4358,
      "step": 4737
    },
    {
      "epoch": 0.9712997129971299,
      "grad_norm": 0.39855901712653724,
      "learning_rate": 4.318243344746287e-07,
      "loss": 1.4813,
      "step": 4738
    },
    {
      "epoch": 0.9715047150471505,
      "grad_norm": 0.4160824523828196,
      "learning_rate": 4.2568181203884725e-07,
      "loss": 1.4974,
      "step": 4739
    },
    {
      "epoch": 0.971709717097171,
      "grad_norm": 0.44560965593088836,
      "learning_rate": 4.195831973522468e-07,
      "loss": 1.4886,
      "step": 4740
    },
    {
      "epoch": 0.9719147191471915,
      "grad_norm": 0.4410754690295941,
      "learning_rate": 4.1352849310404287e-07,
      "loss": 1.5187,
      "step": 4741
    },
    {
      "epoch": 0.972119721197212,
      "grad_norm": 0.401686922030873,
      "learning_rate": 4.0751770196407745e-07,
      "loss": 1.4874,
      "step": 4742
    },
    {
      "epoch": 0.9723247232472325,
      "grad_norm": 0.4375544509971017,
      "learning_rate": 4.015508265828527e-07,
      "loss": 1.5499,
      "step": 4743
    },
    {
      "epoch": 0.972529725297253,
      "grad_norm": 0.4071523224615726,
      "learning_rate": 3.95627869591475e-07,
      "loss": 1.4506,
      "step": 4744
    },
    {
      "epoch": 0.9727347273472735,
      "grad_norm": 0.38503078562721504,
      "learning_rate": 3.8974883360169966e-07,
      "loss": 1.4753,
      "step": 4745
    },
    {
      "epoch": 0.972939729397294,
      "grad_norm": 0.4491544181139598,
      "learning_rate": 3.8391372120591964e-07,
      "loss": 1.4853,
      "step": 4746
    },
    {
      "epoch": 0.9731447314473145,
      "grad_norm": 0.41571417430065877,
      "learning_rate": 3.7812253497715445e-07,
      "loss": 1.5118,
      "step": 4747
    },
    {
      "epoch": 0.973349733497335,
      "grad_norm": 0.5144917204932947,
      "learning_rate": 3.7237527746905034e-07,
      "loss": 1.4622,
      "step": 4748
    },
    {
      "epoch": 0.9735547355473555,
      "grad_norm": 0.48049674908218165,
      "learning_rate": 3.6667195121589115e-07,
      "loss": 1.5274,
      "step": 4749
    },
    {
      "epoch": 0.973759737597376,
      "grad_norm": 0.4126431333133009,
      "learning_rate": 3.6101255873257634e-07,
      "loss": 1.4432,
      "step": 4750
    },
    {
      "epoch": 0.9739647396473965,
      "grad_norm": 0.42070510748039774,
      "learning_rate": 3.553971025146541e-07,
      "loss": 1.5106,
      "step": 4751
    },
    {
      "epoch": 0.974169741697417,
      "grad_norm": 0.4427413259298651,
      "learning_rate": 3.498255850382659e-07,
      "loss": 1.5155,
      "step": 4752
    },
    {
      "epoch": 0.9743747437474375,
      "grad_norm": 0.4420356563874356,
      "learning_rate": 3.4429800876021324e-07,
      "loss": 1.523,
      "step": 4753
    },
    {
      "epoch": 0.974579745797458,
      "grad_norm": 0.39347009097955726,
      "learning_rate": 3.38814376117913e-07,
      "loss": 1.4368,
      "step": 4754
    },
    {
      "epoch": 0.9747847478474785,
      "grad_norm": 0.34641017895382636,
      "learning_rate": 3.3337468952937546e-07,
      "loss": 1.351,
      "step": 4755
    },
    {
      "epoch": 0.974989749897499,
      "grad_norm": 0.41593376371532825,
      "learning_rate": 3.2797895139327074e-07,
      "loss": 1.4888,
      "step": 4756
    },
    {
      "epoch": 0.9751947519475195,
      "grad_norm": 0.4328847199977505,
      "learning_rate": 3.226271640888734e-07,
      "loss": 1.4963,
      "step": 4757
    },
    {
      "epoch": 0.97539975399754,
      "grad_norm": 0.3986398509048014,
      "learning_rate": 3.173193299760735e-07,
      "loss": 1.5254,
      "step": 4758
    },
    {
      "epoch": 0.9756047560475605,
      "grad_norm": 0.4188488332973968,
      "learning_rate": 3.1205545139538775e-07,
      "loss": 1.5053,
      "step": 4759
    },
    {
      "epoch": 0.975809758097581,
      "grad_norm": 0.40041212468923243,
      "learning_rate": 3.0683553066793715e-07,
      "loss": 1.4585,
      "step": 4760
    },
    {
      "epoch": 0.9760147601476015,
      "grad_norm": 0.4201099386305583,
      "learning_rate": 3.0165957009549163e-07,
      "loss": 1.5044,
      "step": 4761
    },
    {
      "epoch": 0.976219762197622,
      "grad_norm": 0.41623579088204754,
      "learning_rate": 2.9652757196039216e-07,
      "loss": 1.4784,
      "step": 4762
    },
    {
      "epoch": 0.9764247642476425,
      "grad_norm": 0.43686370709521993,
      "learning_rate": 2.9143953852562856e-07,
      "loss": 1.4495,
      "step": 4763
    },
    {
      "epoch": 0.976629766297663,
      "grad_norm": 0.44562982363815135,
      "learning_rate": 2.86395472034795e-07,
      "loss": 1.4928,
      "step": 4764
    },
    {
      "epoch": 0.9768347683476835,
      "grad_norm": 0.3599273158143983,
      "learning_rate": 2.813953747120901e-07,
      "loss": 1.401,
      "step": 4765
    },
    {
      "epoch": 0.977039770397704,
      "grad_norm": 0.4292916923316039,
      "learning_rate": 2.7643924876232794e-07,
      "loss": 1.5005,
      "step": 4766
    },
    {
      "epoch": 0.9772447724477245,
      "grad_norm": 0.4145690105930325,
      "learning_rate": 2.715270963709382e-07,
      "loss": 1.5025,
      "step": 4767
    },
    {
      "epoch": 0.977449774497745,
      "grad_norm": 0.3959551114664484,
      "learning_rate": 2.6665891970395487e-07,
      "loss": 1.4598,
      "step": 4768
    },
    {
      "epoch": 0.9776547765477654,
      "grad_norm": 0.46354582222510776,
      "learning_rate": 2.618347209080163e-07,
      "loss": 1.5248,
      "step": 4769
    },
    {
      "epoch": 0.977859778597786,
      "grad_norm": 0.4060424677087937,
      "learning_rate": 2.570545021103876e-07,
      "loss": 1.4273,
      "step": 4770
    },
    {
      "epoch": 0.9780647806478064,
      "grad_norm": 0.3675780316713538,
      "learning_rate": 2.52318265418916e-07,
      "loss": 1.4516,
      "step": 4771
    },
    {
      "epoch": 0.9782697826978269,
      "grad_norm": 0.43631992772309053,
      "learning_rate": 2.476260129220864e-07,
      "loss": 1.491,
      "step": 4772
    },
    {
      "epoch": 0.9784747847478474,
      "grad_norm": 0.4515275514757521,
      "learning_rate": 2.429777466889438e-07,
      "loss": 1.4841,
      "step": 4773
    },
    {
      "epoch": 0.9786797867978679,
      "grad_norm": 0.4189350600473239,
      "learning_rate": 2.3837346876918187e-07,
      "loss": 1.4818,
      "step": 4774
    },
    {
      "epoch": 0.9788847888478884,
      "grad_norm": 0.4449532304337471,
      "learning_rate": 2.338131811930655e-07,
      "loss": 1.4825,
      "step": 4775
    },
    {
      "epoch": 0.9790897908979089,
      "grad_norm": 0.3975122126376216,
      "learning_rate": 2.2929688597147503e-07,
      "loss": 1.4481,
      "step": 4776
    },
    {
      "epoch": 0.9792947929479294,
      "grad_norm": 0.43959619992170107,
      "learning_rate": 2.2482458509590632e-07,
      "loss": 1.4507,
      "step": 4777
    },
    {
      "epoch": 0.9794997949979499,
      "grad_norm": 0.45413831139471583,
      "learning_rate": 2.203962805384263e-07,
      "loss": 1.4736,
      "step": 4778
    },
    {
      "epoch": 0.9797047970479705,
      "grad_norm": 0.41676242646049017,
      "learning_rate": 2.1601197425170638e-07,
      "loss": 1.4565,
      "step": 4779
    },
    {
      "epoch": 0.979909799097991,
      "grad_norm": 0.4636818934551148,
      "learning_rate": 2.116716681690556e-07,
      "loss": 1.5103,
      "step": 4780
    },
    {
      "epoch": 0.9801148011480115,
      "grad_norm": 0.44965586992755496,
      "learning_rate": 2.073753642043208e-07,
      "loss": 1.4701,
      "step": 4781
    },
    {
      "epoch": 0.980319803198032,
      "grad_norm": 0.38922209473061264,
      "learning_rate": 2.0312306425200877e-07,
      "loss": 1.457,
      "step": 4782
    },
    {
      "epoch": 0.9805248052480525,
      "grad_norm": 0.432587203511188,
      "learning_rate": 1.989147701871641e-07,
      "loss": 1.4537,
      "step": 4783
    },
    {
      "epoch": 0.980729807298073,
      "grad_norm": 0.405219187548281,
      "learning_rate": 1.9475048386546902e-07,
      "loss": 1.4231,
      "step": 4784
    },
    {
      "epoch": 0.9809348093480935,
      "grad_norm": 0.4308760032805639,
      "learning_rate": 1.906302071231658e-07,
      "loss": 1.4876,
      "step": 4785
    },
    {
      "epoch": 0.981139811398114,
      "grad_norm": 0.4550905711420411,
      "learning_rate": 1.8655394177712336e-07,
      "loss": 1.4504,
      "step": 4786
    },
    {
      "epoch": 0.9813448134481345,
      "grad_norm": 0.42541104380579137,
      "learning_rate": 1.8252168962479277e-07,
      "loss": 1.5008,
      "step": 4787
    },
    {
      "epoch": 0.981549815498155,
      "grad_norm": 0.3829103190083179,
      "learning_rate": 1.7853345244420726e-07,
      "loss": 1.4594,
      "step": 4788
    },
    {
      "epoch": 0.9817548175481755,
      "grad_norm": 0.4159772737147079,
      "learning_rate": 1.7458923199400457e-07,
      "loss": 1.4758,
      "step": 4789
    },
    {
      "epoch": 0.981959819598196,
      "grad_norm": 0.4486934333537602,
      "learning_rate": 1.7068903001339342e-07,
      "loss": 1.4261,
      "step": 4790
    },
    {
      "epoch": 0.9821648216482165,
      "grad_norm": 0.42196303577083094,
      "learning_rate": 1.6683284822219813e-07,
      "loss": 1.4501,
      "step": 4791
    },
    {
      "epoch": 0.982369823698237,
      "grad_norm": 0.41955075559599125,
      "learning_rate": 1.6302068832081407e-07,
      "loss": 1.4533,
      "step": 4792
    },
    {
      "epoch": 0.9825748257482575,
      "grad_norm": 0.4007589889956423,
      "learning_rate": 1.5925255199024104e-07,
      "loss": 1.4553,
      "step": 4793
    },
    {
      "epoch": 0.982779827798278,
      "grad_norm": 0.40973927985609965,
      "learning_rate": 1.5552844089203877e-07,
      "loss": 1.4301,
      "step": 4794
    },
    {
      "epoch": 0.9829848298482985,
      "grad_norm": 0.4290011542576445,
      "learning_rate": 1.518483566683826e-07,
      "loss": 1.4963,
      "step": 4795
    },
    {
      "epoch": 0.983189831898319,
      "grad_norm": 0.39700694003378206,
      "learning_rate": 1.4821230094200777e-07,
      "loss": 1.4408,
      "step": 4796
    },
    {
      "epoch": 0.9833948339483395,
      "grad_norm": 0.4468864256479169,
      "learning_rate": 1.446202753162762e-07,
      "loss": 1.4466,
      "step": 4797
    },
    {
      "epoch": 0.98359983599836,
      "grad_norm": 0.3857946333915545,
      "learning_rate": 1.4107228137508754e-07,
      "loss": 1.452,
      "step": 4798
    },
    {
      "epoch": 0.9838048380483805,
      "grad_norm": 0.4088873547940262,
      "learning_rate": 1.3756832068294588e-07,
      "loss": 1.4245,
      "step": 4799
    },
    {
      "epoch": 0.984009840098401,
      "grad_norm": 0.41877538837429634,
      "learning_rate": 1.3410839478493754e-07,
      "loss": 1.5136,
      "step": 4800
    },
    {
      "epoch": 0.9842148421484215,
      "grad_norm": 0.3890165045313717,
      "learning_rate": 1.3069250520675314e-07,
      "loss": 1.417,
      "step": 4801
    },
    {
      "epoch": 0.984419844198442,
      "grad_norm": 0.41222355070570493,
      "learning_rate": 1.2732065345462118e-07,
      "loss": 1.4547,
      "step": 4802
    },
    {
      "epoch": 0.9846248462484625,
      "grad_norm": 0.4149785733436685,
      "learning_rate": 1.2399284101538566e-07,
      "loss": 1.4654,
      "step": 4803
    },
    {
      "epoch": 0.984829848298483,
      "grad_norm": 0.4480904638205778,
      "learning_rate": 1.2070906935646165e-07,
      "loss": 1.4858,
      "step": 4804
    },
    {
      "epoch": 0.9850348503485035,
      "grad_norm": 0.3965491757927723,
      "learning_rate": 1.1746933992584642e-07,
      "loss": 1.4479,
      "step": 4805
    },
    {
      "epoch": 0.985239852398524,
      "grad_norm": 0.4519263292626218,
      "learning_rate": 1.1427365415209723e-07,
      "loss": 1.4798,
      "step": 4806
    },
    {
      "epoch": 0.9854448544485445,
      "grad_norm": 0.4286642561964939,
      "learning_rate": 1.1112201344438689e-07,
      "loss": 1.4771,
      "step": 4807
    },
    {
      "epoch": 0.985649856498565,
      "grad_norm": 0.40895973603783775,
      "learning_rate": 1.0801441919242594e-07,
      "loss": 1.444,
      "step": 4808
    },
    {
      "epoch": 0.9858548585485855,
      "grad_norm": 0.45609641739854473,
      "learning_rate": 1.0495087276654048e-07,
      "loss": 1.5198,
      "step": 4809
    },
    {
      "epoch": 0.986059860598606,
      "grad_norm": 0.444124756879576,
      "learning_rate": 1.0193137551759436e-07,
      "loss": 1.4881,
      "step": 4810
    },
    {
      "epoch": 0.9862648626486265,
      "grad_norm": 0.4255164966940527,
      "learning_rate": 9.895592877706695e-08,
      "loss": 1.4958,
      "step": 4811
    },
    {
      "epoch": 0.986469864698647,
      "grad_norm": 0.39245316606553554,
      "learning_rate": 9.602453385699762e-08,
      "loss": 1.4554,
      "step": 4812
    },
    {
      "epoch": 0.9866748667486674,
      "grad_norm": 0.39263972070907505,
      "learning_rate": 9.313719204997462e-08,
      "loss": 1.4835,
      "step": 4813
    },
    {
      "epoch": 0.9868798687986879,
      "grad_norm": 0.4833814181160818,
      "learning_rate": 9.029390462921284e-08,
      "loss": 1.518,
      "step": 4814
    },
    {
      "epoch": 0.9870848708487084,
      "grad_norm": 0.4062334357911581,
      "learning_rate": 8.749467284845381e-08,
      "loss": 1.4927,
      "step": 4815
    },
    {
      "epoch": 0.9872898728987289,
      "grad_norm": 0.41795026215893444,
      "learning_rate": 8.47394979420324e-08,
      "loss": 1.4618,
      "step": 4816
    },
    {
      "epoch": 0.9874948749487494,
      "grad_norm": 0.4159697756371367,
      "learning_rate": 8.202838112486566e-08,
      "loss": 1.4918,
      "step": 4817
    },
    {
      "epoch": 0.9876998769987699,
      "grad_norm": 0.40522404226150216,
      "learning_rate": 7.936132359243064e-08,
      "loss": 1.469,
      "step": 4818
    },
    {
      "epoch": 0.9879048790487905,
      "grad_norm": 0.3864043385033911,
      "learning_rate": 7.673832652077551e-08,
      "loss": 1.4025,
      "step": 4819
    },
    {
      "epoch": 0.988109881098811,
      "grad_norm": 0.4079666960997385,
      "learning_rate": 7.415939106651948e-08,
      "loss": 1.4871,
      "step": 4820
    },
    {
      "epoch": 0.9883148831488315,
      "grad_norm": 0.37265363065866836,
      "learning_rate": 7.162451836685291e-08,
      "loss": 1.4625,
      "step": 4821
    },
    {
      "epoch": 0.988519885198852,
      "grad_norm": 0.4075121362007614,
      "learning_rate": 6.913370953955945e-08,
      "loss": 1.4401,
      "step": 4822
    },
    {
      "epoch": 0.9887248872488725,
      "grad_norm": 0.36437806832101377,
      "learning_rate": 6.66869656829494e-08,
      "loss": 1.4126,
      "step": 4823
    },
    {
      "epoch": 0.988929889298893,
      "grad_norm": 0.41253358562330233,
      "learning_rate": 6.428428787593754e-08,
      "loss": 1.4728,
      "step": 4824
    },
    {
      "epoch": 0.9891348913489135,
      "grad_norm": 0.4309973749764567,
      "learning_rate": 6.192567717798747e-08,
      "loss": 1.4838,
      "step": 4825
    },
    {
      "epoch": 0.989339893398934,
      "grad_norm": 0.400551961981492,
      "learning_rate": 5.961113462915613e-08,
      "loss": 1.4772,
      "step": 4826
    },
    {
      "epoch": 0.9895448954489545,
      "grad_norm": 0.42354319590893913,
      "learning_rate": 5.734066125003823e-08,
      "loss": 1.4291,
      "step": 4827
    },
    {
      "epoch": 0.989749897498975,
      "grad_norm": 0.4432523371427619,
      "learning_rate": 5.5114258041799596e-08,
      "loss": 1.4644,
      "step": 4828
    },
    {
      "epoch": 0.9899548995489955,
      "grad_norm": 0.44130520419860686,
      "learning_rate": 5.293192598621044e-08,
      "loss": 1.4911,
      "step": 4829
    },
    {
      "epoch": 0.990159901599016,
      "grad_norm": 0.4159219994022452,
      "learning_rate": 5.079366604555658e-08,
      "loss": 1.419,
      "step": 4830
    },
    {
      "epoch": 0.9903649036490365,
      "grad_norm": 0.3898255846453458,
      "learning_rate": 4.869947916271711e-08,
      "loss": 1.4689,
      "step": 4831
    },
    {
      "epoch": 0.990569905699057,
      "grad_norm": 0.43950526834446957,
      "learning_rate": 4.6649366261142243e-08,
      "loss": 1.4529,
      "step": 4832
    },
    {
      "epoch": 0.9907749077490775,
      "grad_norm": 0.40683627802699623,
      "learning_rate": 4.4643328244831085e-08,
      "loss": 1.4632,
      "step": 4833
    },
    {
      "epoch": 0.990979909799098,
      "grad_norm": 0.4198115460878348,
      "learning_rate": 4.2681365998364916e-08,
      "loss": 1.4538,
      "step": 4834
    },
    {
      "epoch": 0.9911849118491185,
      "grad_norm": 0.3978429993506763,
      "learning_rate": 4.076348038687394e-08,
      "loss": 1.4341,
      "step": 4835
    },
    {
      "epoch": 0.991389913899139,
      "grad_norm": 0.4568937319876624,
      "learning_rate": 3.888967225604834e-08,
      "loss": 1.5123,
      "step": 4836
    },
    {
      "epoch": 0.9915949159491595,
      "grad_norm": 0.3697961838990177,
      "learning_rate": 3.705994243217159e-08,
      "loss": 1.4678,
      "step": 4837
    },
    {
      "epoch": 0.99179991799918,
      "grad_norm": 0.37875620032364216,
      "learning_rate": 3.5274291722053877e-08,
      "loss": 1.4273,
      "step": 4838
    },
    {
      "epoch": 0.9920049200492005,
      "grad_norm": 0.3851465693595728,
      "learning_rate": 3.353272091309867e-08,
      "loss": 1.4268,
      "step": 4839
    },
    {
      "epoch": 0.992209922099221,
      "grad_norm": 0.43416259824483977,
      "learning_rate": 3.183523077324724e-08,
      "loss": 1.5395,
      "step": 4840
    },
    {
      "epoch": 0.9924149241492415,
      "grad_norm": 0.3877409953073631,
      "learning_rate": 3.018182205102304e-08,
      "loss": 1.4802,
      "step": 4841
    },
    {
      "epoch": 0.992619926199262,
      "grad_norm": 0.43388487465277253,
      "learning_rate": 2.8572495475509555e-08,
      "loss": 1.5293,
      "step": 4842
    },
    {
      "epoch": 0.9928249282492825,
      "grad_norm": 0.4090449695298792,
      "learning_rate": 2.7007251756339113e-08,
      "loss": 1.488,
      "step": 4843
    },
    {
      "epoch": 0.993029930299303,
      "grad_norm": 0.4054120593911369,
      "learning_rate": 2.5486091583715176e-08,
      "loss": 1.4195,
      "step": 4844
    },
    {
      "epoch": 0.9932349323493235,
      "grad_norm": 0.42585963293773976,
      "learning_rate": 2.400901562840119e-08,
      "loss": 1.477,
      "step": 4845
    },
    {
      "epoch": 0.993439934399344,
      "grad_norm": 0.40256109649364047,
      "learning_rate": 2.2576024541720587e-08,
      "loss": 1.4544,
      "step": 4846
    },
    {
      "epoch": 0.9936449364493645,
      "grad_norm": 0.42821230925476933,
      "learning_rate": 2.1187118955556805e-08,
      "loss": 1.5493,
      "step": 4847
    },
    {
      "epoch": 0.993849938499385,
      "grad_norm": 0.42042153418732986,
      "learning_rate": 1.9842299482353277e-08,
      "loss": 1.5471,
      "step": 4848
    },
    {
      "epoch": 0.9940549405494055,
      "grad_norm": 0.3768683670469088,
      "learning_rate": 1.8541566715113424e-08,
      "loss": 1.395,
      "step": 4849
    },
    {
      "epoch": 0.994259942599426,
      "grad_norm": 0.4369814528504174,
      "learning_rate": 1.7284921227400662e-08,
      "loss": 1.5094,
      "step": 4850
    },
    {
      "epoch": 0.9944649446494465,
      "grad_norm": 0.3841243443062922,
      "learning_rate": 1.607236357333841e-08,
      "loss": 1.4953,
      "step": 4851
    },
    {
      "epoch": 0.994669946699467,
      "grad_norm": 0.4315875249745099,
      "learning_rate": 1.4903894287610075e-08,
      "loss": 1.5117,
      "step": 4852
    },
    {
      "epoch": 0.9948749487494875,
      "grad_norm": 0.3728734240249131,
      "learning_rate": 1.3779513885470163e-08,
      "loss": 1.4637,
      "step": 4853
    },
    {
      "epoch": 0.995079950799508,
      "grad_norm": 0.4321237298735864,
      "learning_rate": 1.2699222862699867e-08,
      "loss": 1.4809,
      "step": 4854
    },
    {
      "epoch": 0.9952849528495284,
      "grad_norm": 0.40885449785880884,
      "learning_rate": 1.166302169566258e-08,
      "loss": 1.5055,
      "step": 4855
    },
    {
      "epoch": 0.995489954899549,
      "grad_norm": 0.4496581284435422,
      "learning_rate": 1.0670910841281689e-08,
      "loss": 1.5178,
      "step": 4856
    },
    {
      "epoch": 0.9956949569495694,
      "grad_norm": 0.42099984898874393,
      "learning_rate": 9.722890737029478e-09,
      "loss": 1.4703,
      "step": 4857
    },
    {
      "epoch": 0.9958999589995899,
      "grad_norm": 0.43382081968722686,
      "learning_rate": 8.818961800949321e-09,
      "loss": 1.4281,
      "step": 4858
    },
    {
      "epoch": 0.9961049610496105,
      "grad_norm": 0.4338470114430421,
      "learning_rate": 7.959124431622389e-09,
      "loss": 1.5158,
      "step": 4859
    },
    {
      "epoch": 0.996309963099631,
      "grad_norm": 0.4132278308700588,
      "learning_rate": 7.1433790082009456e-09,
      "loss": 1.5049,
      "step": 4860
    },
    {
      "epoch": 0.9965149651496515,
      "grad_norm": 0.43225488319709143,
      "learning_rate": 6.371725890386149e-09,
      "loss": 1.4922,
      "step": 4861
    },
    {
      "epoch": 0.996719967199672,
      "grad_norm": 0.41067329939197106,
      "learning_rate": 5.644165418450253e-09,
      "loss": 1.4199,
      "step": 4862
    },
    {
      "epoch": 0.9969249692496925,
      "grad_norm": 0.3685522577926695,
      "learning_rate": 4.960697913203305e-09,
      "loss": 1.4375,
      "step": 4863
    },
    {
      "epoch": 0.997129971299713,
      "grad_norm": 0.452913901435384,
      "learning_rate": 4.321323676037547e-09,
      "loss": 1.5359,
      "step": 4864
    },
    {
      "epoch": 0.9973349733497335,
      "grad_norm": 0.3931547223995384,
      "learning_rate": 3.726042988883016e-09,
      "loss": 1.5742,
      "step": 4865
    },
    {
      "epoch": 0.997539975399754,
      "grad_norm": 0.39494048963820855,
      "learning_rate": 3.1748561142297407e-09,
      "loss": 1.4847,
      "step": 4866
    },
    {
      "epoch": 0.9977449774497745,
      "grad_norm": 0.4412776056160834,
      "learning_rate": 2.6677632951277454e-09,
      "loss": 1.4449,
      "step": 4867
    },
    {
      "epoch": 0.997949979499795,
      "grad_norm": 0.41272395540200857,
      "learning_rate": 2.2047647551759474e-09,
      "loss": 1.4359,
      "step": 4868
    },
    {
      "epoch": 0.9981549815498155,
      "grad_norm": 0.40679571389735636,
      "learning_rate": 1.7858606985443616e-09,
      "loss": 1.3884,
      "step": 4869
    },
    {
      "epoch": 0.998359983599836,
      "grad_norm": 0.41502356742027807,
      "learning_rate": 1.411051309940792e-09,
      "loss": 1.481,
      "step": 4870
    },
    {
      "epoch": 0.9985649856498565,
      "grad_norm": 0.43328652001773216,
      "learning_rate": 1.08033675464414e-09,
      "loss": 1.516,
      "step": 4871
    },
    {
      "epoch": 0.998769987699877,
      "grad_norm": 0.40995482546673423,
      "learning_rate": 7.937171784933029e-10,
      "loss": 1.5096,
      "step": 4872
    },
    {
      "epoch": 0.9989749897498975,
      "grad_norm": 0.40505711826231994,
      "learning_rate": 5.51192707864967e-10,
      "loss": 1.532,
      "step": 4873
    },
    {
      "epoch": 0.999179991799918,
      "grad_norm": 0.4058151772170968,
      "learning_rate": 3.527634496958143e-10,
      "loss": 1.4582,
      "step": 4874
    },
    {
      "epoch": 0.9993849938499385,
      "grad_norm": 0.3900087938588259,
      "learning_rate": 1.9842949149362355e-10,
      "loss": 1.468,
      "step": 4875
    },
    {
      "epoch": 0.999589995899959,
      "grad_norm": 0.42760432050046554,
      "learning_rate": 8.819090131506613e-11,
      "loss": 1.4997,
      "step": 4876
    },
    {
      "epoch": 0.9997949979499795,
      "grad_norm": 0.44867171037446835,
      "learning_rate": 2.204772775460384e-11,
      "loss": 1.5601,
      "step": 4877
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.4227223601971009,
      "learning_rate": 0.0,
      "loss": 1.4835,
      "step": 4878
    },
    {
      "epoch": 1.0,
      "step": 4878,
      "total_flos": 1495789164199936.0,
      "train_loss": 1.5954388801364343,
      "train_runtime": 53290.4384,
      "train_samples_per_second": 11.715,
      "train_steps_per_second": 0.092
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 4878,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 50000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1495789164199936.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}