{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 4154,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00024077529645458377,
      "grad_norm": 26.920116424560547,
      "learning_rate": 0.0,
      "loss": 4.5785,
      "step": 1
    },
    {
      "epoch": 0.00048155059290916753,
      "grad_norm": 8.383347511291504,
      "learning_rate": 1.6000000000000001e-06,
      "loss": 5.9464,
      "step": 2
    },
    {
      "epoch": 0.0007223258893637513,
      "grad_norm": 16.050535202026367,
      "learning_rate": 3.2000000000000003e-06,
      "loss": 7.4406,
      "step": 3
    },
    {
      "epoch": 0.0009631011858183351,
      "grad_norm": 20.88637351989746,
      "learning_rate": 4.800000000000001e-06,
      "loss": 9.6416,
      "step": 4
    },
    {
      "epoch": 0.0012038764822729189,
      "grad_norm": 10.887105941772461,
      "learning_rate": 6.4000000000000006e-06,
      "loss": 4.9843,
      "step": 5
    },
    {
      "epoch": 0.0014446517787275025,
      "grad_norm": 12.895423889160156,
      "learning_rate": 8.000000000000001e-06,
      "loss": 4.7898,
      "step": 6
    },
    {
      "epoch": 0.0016854270751820862,
      "grad_norm": 11.34997844696045,
      "learning_rate": 9.600000000000001e-06,
      "loss": 6.4794,
      "step": 7
    },
    {
      "epoch": 0.0019262023716366701,
      "grad_norm": 11.045844078063965,
      "learning_rate": 1.1200000000000001e-05,
      "loss": 5.5106,
      "step": 8
    },
    {
      "epoch": 0.002166977668091254,
      "grad_norm": 6.337793827056885,
      "learning_rate": 1.2800000000000001e-05,
      "loss": 4.1446,
      "step": 9
    },
    {
      "epoch": 0.0024077529645458377,
      "grad_norm": 16.309860229492188,
      "learning_rate": 1.44e-05,
      "loss": 6.5782,
      "step": 10
    },
    {
      "epoch": 0.002648528261000421,
      "grad_norm": 18.269319534301758,
      "learning_rate": 1.6000000000000003e-05,
      "loss": 7.8044,
      "step": 11
    },
    {
      "epoch": 0.002889303557455005,
      "grad_norm": 3.6918132305145264,
      "learning_rate": 1.76e-05,
      "loss": 2.8253,
      "step": 12
    },
    {
      "epoch": 0.003130078853909589,
      "grad_norm": 13.319107055664062,
      "learning_rate": 1.9200000000000003e-05,
      "loss": 5.9873,
      "step": 13
    },
    {
      "epoch": 0.0033708541503641725,
      "grad_norm": 16.075435638427734,
      "learning_rate": 2.08e-05,
      "loss": 5.123,
      "step": 14
    },
    {
      "epoch": 0.0036116294468187564,
      "grad_norm": 13.996861457824707,
      "learning_rate": 2.2400000000000002e-05,
      "loss": 6.5848,
      "step": 15
    },
    {
      "epoch": 0.0038524047432733403,
      "grad_norm": 23.012784957885742,
      "learning_rate": 2.4e-05,
      "loss": 9.885,
      "step": 16
    },
    {
      "epoch": 0.004093180039727924,
      "grad_norm": 8.31369686126709,
      "learning_rate": 2.5600000000000002e-05,
      "loss": 6.0589,
      "step": 17
    },
    {
      "epoch": 0.004333955336182508,
      "grad_norm": 20.620271682739258,
      "learning_rate": 2.7200000000000004e-05,
      "loss": 5.3127,
      "step": 18
    },
    {
      "epoch": 0.004574730632637091,
      "grad_norm": 13.044432640075684,
      "learning_rate": 2.88e-05,
      "loss": 5.1087,
      "step": 19
    },
    {
      "epoch": 0.004815505929091675,
      "grad_norm": 31.605579376220703,
      "learning_rate": 3.04e-05,
      "loss": 6.1106,
      "step": 20
    },
    {
      "epoch": 0.005056281225546259,
      "grad_norm": 8.28500747680664,
      "learning_rate": 3.2000000000000005e-05,
      "loss": 4.5685,
      "step": 21
    },
    {
      "epoch": 0.005297056522000842,
      "grad_norm": 14.513694763183594,
      "learning_rate": 3.3600000000000004e-05,
      "loss": 7.3525,
      "step": 22
    },
    {
      "epoch": 0.005537831818455427,
      "grad_norm": 10.957548141479492,
      "learning_rate": 3.52e-05,
      "loss": 6.184,
      "step": 23
    },
    {
      "epoch": 0.00577860711491001,
      "grad_norm": 6.602078914642334,
      "learning_rate": 3.68e-05,
      "loss": 3.2581,
      "step": 24
    },
    {
      "epoch": 0.006019382411364594,
      "grad_norm": 10.11325454711914,
      "learning_rate": 3.8400000000000005e-05,
      "loss": 7.9033,
      "step": 25
    },
    {
      "epoch": 0.006260157707819178,
      "grad_norm": 10.033835411071777,
      "learning_rate": 4e-05,
      "loss": 4.9469,
      "step": 26
    },
    {
      "epoch": 0.0065009330042737614,
      "grad_norm": 10.819141387939453,
      "learning_rate": 4.16e-05,
      "loss": 7.46,
      "step": 27
    },
    {
      "epoch": 0.006741708300728345,
      "grad_norm": 25.810640335083008,
      "learning_rate": 4.32e-05,
      "loss": 5.7763,
      "step": 28
    },
    {
      "epoch": 0.006982483597182929,
      "grad_norm": 10.680785179138184,
      "learning_rate": 4.4800000000000005e-05,
      "loss": 5.9753,
      "step": 29
    },
    {
      "epoch": 0.007223258893637513,
      "grad_norm": 14.47507381439209,
      "learning_rate": 4.64e-05,
      "loss": 5.2033,
      "step": 30
    },
    {
      "epoch": 0.007464034190092096,
      "grad_norm": 17.440105438232422,
      "learning_rate": 4.8e-05,
      "loss": 5.6511,
      "step": 31
    },
    {
      "epoch": 0.0077048094865466805,
      "grad_norm": 10.847347259521484,
      "learning_rate": 4.96e-05,
      "loss": 6.7464,
      "step": 32
    },
    {
      "epoch": 0.007945584783001265,
      "grad_norm": 9.08476734161377,
      "learning_rate": 5.1200000000000004e-05,
      "loss": 5.9733,
      "step": 33
    },
    {
      "epoch": 0.008186360079455848,
      "grad_norm": 6.558286190032959,
      "learning_rate": 5.28e-05,
      "loss": 4.6828,
      "step": 34
    },
    {
      "epoch": 0.008427135375910432,
      "grad_norm": 33.23648452758789,
      "learning_rate": 5.440000000000001e-05,
      "loss": 5.5094,
      "step": 35
    },
    {
      "epoch": 0.008667910672365015,
      "grad_norm": 8.349298477172852,
      "learning_rate": 5.6000000000000006e-05,
      "loss": 4.1564,
      "step": 36
    },
    {
      "epoch": 0.008908685968819599,
      "grad_norm": 42.14231491088867,
      "learning_rate": 5.76e-05,
      "loss": 4.5537,
      "step": 37
    },
    {
      "epoch": 0.009149461265274182,
      "grad_norm": 14.191291809082031,
      "learning_rate": 5.92e-05,
      "loss": 6.6075,
      "step": 38
    },
    {
      "epoch": 0.009390236561728767,
      "grad_norm": 73.26921844482422,
      "learning_rate": 6.08e-05,
      "loss": 6.399,
      "step": 39
    },
    {
      "epoch": 0.00963101185818335,
      "grad_norm": 14.339468002319336,
      "learning_rate": 6.24e-05,
      "loss": 5.774,
      "step": 40
    },
    {
      "epoch": 0.009871787154637934,
      "grad_norm": 15.463168144226074,
      "learning_rate": 6.400000000000001e-05,
      "loss": 5.1967,
      "step": 41
    },
    {
      "epoch": 0.010112562451092518,
      "grad_norm": 49.4256477355957,
      "learning_rate": 6.560000000000001e-05,
      "loss": 3.4196,
      "step": 42
    },
    {
      "epoch": 0.010353337747547101,
      "grad_norm": 28.241819381713867,
      "learning_rate": 6.720000000000001e-05,
      "loss": 5.9708,
      "step": 43
    },
    {
      "epoch": 0.010594113044001685,
      "grad_norm": 7.766085624694824,
      "learning_rate": 6.879999999999999e-05,
      "loss": 5.5671,
      "step": 44
    },
    {
      "epoch": 0.01083488834045627,
      "grad_norm": 6.781948566436768,
      "learning_rate": 7.04e-05,
      "loss": 3.8236,
      "step": 45
    },
    {
      "epoch": 0.011075663636910853,
      "grad_norm": 22.539283752441406,
      "learning_rate": 7.2e-05,
      "loss": 5.7164,
      "step": 46
    },
    {
      "epoch": 0.011316438933365437,
      "grad_norm": 138.10426330566406,
      "learning_rate": 7.36e-05,
      "loss": 4.4738,
      "step": 47
    },
    {
      "epoch": 0.01155721422982002,
      "grad_norm": 6.446707725524902,
      "learning_rate": 7.52e-05,
      "loss": 3.4618,
      "step": 48
    },
    {
      "epoch": 0.011797989526274604,
      "grad_norm": 11.496111869812012,
      "learning_rate": 7.680000000000001e-05,
      "loss": 7.4023,
      "step": 49
    },
    {
      "epoch": 0.012038764822729187,
      "grad_norm": 12.171050071716309,
      "learning_rate": 7.840000000000001e-05,
      "loss": 5.3548,
      "step": 50
    },
    {
      "epoch": 0.012279540119183772,
      "grad_norm": 6.225719928741455,
      "learning_rate": 8e-05,
      "loss": 3.7378,
      "step": 51
    },
    {
      "epoch": 0.012520315415638356,
      "grad_norm": 30.29821014404297,
      "learning_rate": 8.16e-05,
      "loss": 4.3953,
      "step": 52
    },
    {
      "epoch": 0.01276109071209294,
      "grad_norm": 8.839107513427734,
      "learning_rate": 8.32e-05,
      "loss": 6.1286,
      "step": 53
    },
    {
      "epoch": 0.013001866008547523,
      "grad_norm": 15.737375259399414,
      "learning_rate": 8.48e-05,
      "loss": 4.3585,
      "step": 54
    },
    {
      "epoch": 0.013242641305002106,
      "grad_norm": 13.612770080566406,
      "learning_rate": 8.64e-05,
      "loss": 5.2315,
      "step": 55
    },
    {
      "epoch": 0.01348341660145669,
      "grad_norm": 20.0008544921875,
      "learning_rate": 8.800000000000001e-05,
      "loss": 5.8228,
      "step": 56
    },
    {
      "epoch": 0.013724191897911275,
      "grad_norm": 5.86611795425415,
      "learning_rate": 8.960000000000001e-05,
      "loss": 4.7417,
      "step": 57
    },
    {
      "epoch": 0.013964967194365858,
      "grad_norm": 11.262532234191895,
      "learning_rate": 9.120000000000001e-05,
      "loss": 4.7822,
      "step": 58
    },
    {
      "epoch": 0.014205742490820442,
      "grad_norm": 12.375737190246582,
      "learning_rate": 9.28e-05,
      "loss": 5.4579,
      "step": 59
    },
    {
      "epoch": 0.014446517787275025,
      "grad_norm": 10.653188705444336,
      "learning_rate": 9.44e-05,
      "loss": 6.4277,
      "step": 60
    },
    {
      "epoch": 0.014687293083729609,
      "grad_norm": 39.603145599365234,
      "learning_rate": 9.6e-05,
      "loss": 6.2061,
      "step": 61
    },
    {
      "epoch": 0.014928068380184192,
      "grad_norm": 16.928340911865234,
      "learning_rate": 9.76e-05,
      "loss": 7.8526,
      "step": 62
    },
    {
      "epoch": 0.015168843676638778,
      "grad_norm": 13.248088836669922,
      "learning_rate": 9.92e-05,
      "loss": 3.6139,
      "step": 63
    },
    {
      "epoch": 0.015409618973093361,
      "grad_norm": 4.931760311126709,
      "learning_rate": 0.00010080000000000001,
      "loss": 3.006,
      "step": 64
    },
    {
      "epoch": 0.015650394269547944,
      "grad_norm": 11.34759521484375,
      "learning_rate": 0.00010240000000000001,
      "loss": 6.164,
      "step": 65
    },
    {
      "epoch": 0.01589116956600253,
      "grad_norm": 9.345470428466797,
      "learning_rate": 0.00010400000000000001,
      "loss": 4.9446,
      "step": 66
    },
    {
      "epoch": 0.01613194486245711,
      "grad_norm": 10.860252380371094,
      "learning_rate": 0.0001056,
      "loss": 3.235,
      "step": 67
    },
    {
      "epoch": 0.016372720158911697,
      "grad_norm": 22.121963500976562,
      "learning_rate": 0.00010720000000000002,
      "loss": 6.3519,
      "step": 68
    },
    {
      "epoch": 0.01661349545536628,
      "grad_norm": 23.994407653808594,
      "learning_rate": 0.00010880000000000002,
      "loss": 5.4503,
      "step": 69
    },
    {
      "epoch": 0.016854270751820864,
      "grad_norm": 5.721750736236572,
      "learning_rate": 0.00011040000000000001,
      "loss": 3.448,
      "step": 70
    },
    {
      "epoch": 0.017095046048275445,
      "grad_norm": 46.67560577392578,
      "learning_rate": 0.00011200000000000001,
      "loss": 3.8795,
      "step": 71
    },
    {
      "epoch": 0.01733582134473003,
      "grad_norm": 11.732275009155273,
      "learning_rate": 0.0001136,
      "loss": 3.7436,
      "step": 72
    },
    {
      "epoch": 0.017576596641184616,
      "grad_norm": 19.560314178466797,
      "learning_rate": 0.0001152,
      "loss": 5.2577,
      "step": 73
    },
    {
      "epoch": 0.017817371937639197,
      "grad_norm": 47.164306640625,
      "learning_rate": 0.00011679999999999999,
      "loss": 5.2273,
      "step": 74
    },
    {
      "epoch": 0.018058147234093783,
      "grad_norm": 25.40642738342285,
      "learning_rate": 0.0001184,
      "loss": 5.912,
      "step": 75
    },
    {
      "epoch": 0.018298922530548364,
      "grad_norm": 10.063149452209473,
      "learning_rate": 0.00012,
      "loss": 5.6455,
      "step": 76
    },
    {
      "epoch": 0.01853969782700295,
      "grad_norm": 15.481316566467285,
      "learning_rate": 0.0001216,
      "loss": 3.1296,
      "step": 77
    },
    {
      "epoch": 0.018780473123457535,
      "grad_norm": 10.528315544128418,
      "learning_rate": 0.0001232,
      "loss": 4.7821,
      "step": 78
    },
    {
      "epoch": 0.019021248419912117,
      "grad_norm": 17.255693435668945,
      "learning_rate": 0.0001248,
      "loss": 3.7784,
      "step": 79
    },
    {
      "epoch": 0.0192620237163667,
      "grad_norm": 26.67706298828125,
      "learning_rate": 0.0001264,
      "loss": 6.29,
      "step": 80
    },
    {
      "epoch": 0.019502799012821283,
      "grad_norm": 10.499022483825684,
      "learning_rate": 0.00012800000000000002,
      "loss": 5.3771,
      "step": 81
    },
    {
      "epoch": 0.01974357430927587,
      "grad_norm": 8.131221771240234,
      "learning_rate": 0.0001296,
      "loss": 4.0706,
      "step": 82
    },
    {
      "epoch": 0.01998434960573045,
      "grad_norm": 45.455230712890625,
      "learning_rate": 0.00013120000000000002,
      "loss": 3.3879,
      "step": 83
    },
    {
      "epoch": 0.020225124902185036,
      "grad_norm": 19.001127243041992,
      "learning_rate": 0.0001328,
      "loss": 7.6711,
      "step": 84
    },
    {
      "epoch": 0.02046590019863962,
      "grad_norm": 6.148112773895264,
      "learning_rate": 0.00013440000000000001,
      "loss": 3.472,
      "step": 85
    },
    {
      "epoch": 0.020706675495094203,
      "grad_norm": 11.255337715148926,
      "learning_rate": 0.00013600000000000003,
      "loss": 8.2827,
      "step": 86
    },
    {
      "epoch": 0.020947450791548788,
      "grad_norm": 9.724173545837402,
      "learning_rate": 0.00013759999999999998,
      "loss": 4.0604,
      "step": 87
    },
    {
      "epoch": 0.02118822608800337,
      "grad_norm": 58.57503128051758,
      "learning_rate": 0.0001392,
      "loss": 3.8051,
      "step": 88
    },
    {
      "epoch": 0.021429001384457955,
      "grad_norm": 19.386682510375977,
      "learning_rate": 0.0001408,
      "loss": 7.015,
      "step": 89
    },
    {
      "epoch": 0.02166977668091254,
      "grad_norm": 7.111973762512207,
      "learning_rate": 0.0001424,
      "loss": 2.9986,
      "step": 90
    },
    {
      "epoch": 0.02191055197736712,
      "grad_norm": 10.568584442138672,
      "learning_rate": 0.000144,
      "loss": 4.6358,
      "step": 91
    },
    {
      "epoch": 0.022151327273821707,
      "grad_norm": 10.066975593566895,
      "learning_rate": 0.00014560000000000002,
      "loss": 4.6346,
      "step": 92
    },
    {
      "epoch": 0.02239210257027629,
      "grad_norm": 25.012971878051758,
      "learning_rate": 0.0001472,
      "loss": 5.2808,
      "step": 93
    },
    {
      "epoch": 0.022632877866730874,
      "grad_norm": 8.683295249938965,
      "learning_rate": 0.0001488,
      "loss": 5.2811,
      "step": 94
    },
    {
      "epoch": 0.022873653163185455,
      "grad_norm": 5.454954624176025,
      "learning_rate": 0.0001504,
      "loss": 3.4226,
      "step": 95
    },
    {
      "epoch": 0.02311442845964004,
      "grad_norm": 5.077779293060303,
      "learning_rate": 0.000152,
      "loss": 2.7948,
      "step": 96
    },
    {
      "epoch": 0.023355203756094626,
      "grad_norm": 33.022857666015625,
      "learning_rate": 0.00015360000000000002,
      "loss": 7.5081,
      "step": 97
    },
    {
      "epoch": 0.023595979052549208,
      "grad_norm": 15.922677040100098,
      "learning_rate": 0.0001552,
      "loss": 5.6511,
      "step": 98
    },
    {
      "epoch": 0.023836754349003793,
      "grad_norm": 7.067165851593018,
      "learning_rate": 0.00015680000000000002,
      "loss": 3.4348,
      "step": 99
    },
    {
      "epoch": 0.024077529645458375,
      "grad_norm": 23.432310104370117,
      "learning_rate": 0.00015840000000000003,
      "loss": 4.1903,
      "step": 100
    },
    {
      "epoch": 0.02431830494191296,
      "grad_norm": 6.100611686706543,
      "learning_rate": 0.00016,
      "loss": 4.3271,
      "step": 101
    },
    {
      "epoch": 0.024559080238367545,
      "grad_norm": 32.087772369384766,
      "learning_rate": 0.00016160000000000002,
      "loss": 3.9315,
      "step": 102
    },
    {
      "epoch": 0.024799855534822127,
      "grad_norm": 6.975442886352539,
      "learning_rate": 0.0001632,
      "loss": 4.5229,
      "step": 103
    },
    {
      "epoch": 0.025040630831276712,
      "grad_norm": 15.898163795471191,
      "learning_rate": 0.0001648,
      "loss": 4.6573,
      "step": 104
    },
    {
      "epoch": 0.025281406127731294,
      "grad_norm": 22.853700637817383,
      "learning_rate": 0.0001664,
      "loss": 5.0038,
      "step": 105
    },
    {
      "epoch": 0.02552218142418588,
      "grad_norm": 11.595605850219727,
      "learning_rate": 0.000168,
      "loss": 2.2838,
      "step": 106
    },
    {
      "epoch": 0.025762956720640464,
      "grad_norm": 32.837886810302734,
      "learning_rate": 0.0001696,
      "loss": 3.5895,
      "step": 107
    },
    {
      "epoch": 0.026003732017095046,
      "grad_norm": 46.952144622802734,
      "learning_rate": 0.00017120000000000001,
      "loss": 2.7184,
      "step": 108
    },
    {
      "epoch": 0.02624450731354963,
      "grad_norm": 10.757477760314941,
      "learning_rate": 0.0001728,
      "loss": 5.1609,
      "step": 109
    },
    {
      "epoch": 0.026485282610004213,
      "grad_norm": 7.686053276062012,
      "learning_rate": 0.0001744,
      "loss": 4.8981,
      "step": 110
    },
    {
      "epoch": 0.026726057906458798,
      "grad_norm": 9.239624977111816,
      "learning_rate": 0.00017600000000000002,
      "loss": 4.208,
      "step": 111
    },
    {
      "epoch": 0.02696683320291338,
      "grad_norm": 12.09170150756836,
      "learning_rate": 0.0001776,
      "loss": 4.2583,
      "step": 112
    },
    {
      "epoch": 0.027207608499367965,
      "grad_norm": 9.89987850189209,
      "learning_rate": 0.00017920000000000002,
      "loss": 4.0053,
      "step": 113
    },
    {
      "epoch": 0.02744838379582255,
      "grad_norm": 10.28636360168457,
      "learning_rate": 0.0001808,
      "loss": 5.7005,
      "step": 114
    },
    {
      "epoch": 0.027689159092277132,
      "grad_norm": 4.695734024047852,
      "learning_rate": 0.00018240000000000002,
      "loss": 3.4277,
      "step": 115
    },
    {
      "epoch": 0.027929934388731717,
      "grad_norm": 105.01580810546875,
      "learning_rate": 0.00018400000000000003,
      "loss": 3.761,
      "step": 116
    },
    {
      "epoch": 0.0281707096851863,
      "grad_norm": 9.191910743713379,
      "learning_rate": 0.0001856,
      "loss": 4.2246,
      "step": 117
    },
    {
      "epoch": 0.028411484981640884,
      "grad_norm": 26.174537658691406,
      "learning_rate": 0.00018720000000000002,
      "loss": 2.9553,
      "step": 118
    },
    {
      "epoch": 0.02865226027809547,
      "grad_norm": 9.34518814086914,
      "learning_rate": 0.0001888,
      "loss": 3.3358,
      "step": 119
    },
    {
      "epoch": 0.02889303557455005,
      "grad_norm": 9.987439155578613,
      "learning_rate": 0.0001904,
      "loss": 3.6771,
      "step": 120
    },
    {
      "epoch": 0.029133810871004636,
      "grad_norm": 7.954049587249756,
      "learning_rate": 0.000192,
      "loss": 3.2811,
      "step": 121
    },
    {
      "epoch": 0.029374586167459218,
      "grad_norm": 8.947925567626953,
      "learning_rate": 0.00019360000000000002,
      "loss": 2.854,
      "step": 122
    },
    {
      "epoch": 0.029615361463913803,
      "grad_norm": 31.957183837890625,
      "learning_rate": 0.0001952,
      "loss": 7.1923,
      "step": 123
    },
    {
      "epoch": 0.029856136760368385,
      "grad_norm": 10.06078815460205,
      "learning_rate": 0.0001968,
      "loss": 3.0629,
      "step": 124
    },
    {
      "epoch": 0.03009691205682297,
      "grad_norm": 9.508298873901367,
      "learning_rate": 0.0001984,
      "loss": 4.4433,
      "step": 125
    },
    {
      "epoch": 0.030337687353277555,
      "grad_norm": 49.658111572265625,
      "learning_rate": 0.0002,
      "loss": 4.6153,
      "step": 126
    },
    {
      "epoch": 0.030578462649732137,
      "grad_norm": 21.386220932006836,
      "learning_rate": 0.00019999996959988735,
      "loss": 5.5672,
      "step": 127
    },
    {
      "epoch": 0.030819237946186722,
      "grad_norm": 122.65118408203125,
      "learning_rate": 0.0001999998783995679,
      "loss": 3.5313,
      "step": 128
    },
    {
      "epoch": 0.031060013242641304,
      "grad_norm": 13.517218589782715,
      "learning_rate": 0.00019999972639909706,
      "loss": 4.7874,
      "step": 129
    },
    {
      "epoch": 0.03130078853909589,
      "grad_norm": 18.364986419677734,
      "learning_rate": 0.00019999951359856726,
      "loss": 6.3622,
      "step": 130
    },
    {
      "epoch": 0.03154156383555047,
      "grad_norm": 10.25970458984375,
      "learning_rate": 0.0001999992399981079,
      "loss": 3.7715,
      "step": 131
    },
    {
      "epoch": 0.03178233913200506,
      "grad_norm": 15.492377281188965,
      "learning_rate": 0.0001999989055978853,
      "loss": 3.5824,
      "step": 132
    },
    {
      "epoch": 0.03202311442845964,
      "grad_norm": 28.90912437438965,
      "learning_rate": 0.00019999851039810283,
      "loss": 4.791,
      "step": 133
    },
    {
      "epoch": 0.03226388972491422,
      "grad_norm": 9.603219032287598,
      "learning_rate": 0.00019999805439900072,
      "loss": 3.1532,
      "step": 134
    },
    {
      "epoch": 0.032504665021368805,
      "grad_norm": 7.891742706298828,
      "learning_rate": 0.0001999975376008562,
      "loss": 2.59,
      "step": 135
    },
    {
      "epoch": 0.03274544031782339,
      "grad_norm": 14.559179306030273,
      "learning_rate": 0.0001999969600039836,
      "loss": 3.9376,
      "step": 136
    },
    {
      "epoch": 0.032986215614277975,
      "grad_norm": 17.962955474853516,
      "learning_rate": 0.00019999632160873398,
      "loss": 3.7606,
      "step": 137
    },
    {
      "epoch": 0.03322699091073256,
      "grad_norm": 13.648564338684082,
      "learning_rate": 0.0001999956224154955,
      "loss": 4.019,
      "step": 138
    },
    {
      "epoch": 0.033467766207187145,
      "grad_norm": 14.759313583374023,
      "learning_rate": 0.00019999486242469337,
      "loss": 3.5558,
      "step": 139
    },
    {
      "epoch": 0.03370854150364173,
      "grad_norm": 15.668071746826172,
      "learning_rate": 0.00019999404163678955,
      "loss": 3.7936,
      "step": 140
    },
    {
      "epoch": 0.03394931680009631,
      "grad_norm": 17.56260108947754,
      "learning_rate": 0.00019999316005228312,
      "loss": 2.4151,
      "step": 141
    },
    {
      "epoch": 0.03419009209655089,
      "grad_norm": 5.186138153076172,
      "learning_rate": 0.0001999922176717101,
      "loss": 2.7492,
      "step": 142
    },
    {
      "epoch": 0.03443086739300548,
      "grad_norm": 12.366766929626465,
      "learning_rate": 0.00019999121449564347,
      "loss": 3.1902,
      "step": 143
    },
    {
      "epoch": 0.03467164268946006,
      "grad_norm": 16.707490921020508,
      "learning_rate": 0.0001999901505246931,
      "loss": 2.3057,
      "step": 144
    },
    {
      "epoch": 0.03491241798591464,
      "grad_norm": 9.578150749206543,
      "learning_rate": 0.00019998902575950596,
      "loss": 4.3383,
      "step": 145
    },
    {
      "epoch": 0.03515319328236923,
      "grad_norm": 19.261411666870117,
      "learning_rate": 0.0001999878402007659,
      "loss": 5.0221,
      "step": 146
    },
    {
      "epoch": 0.03539396857882381,
      "grad_norm": 8.17841911315918,
      "learning_rate": 0.0001999865938491937,
      "loss": 2.2664,
      "step": 147
    },
    {
      "epoch": 0.035634743875278395,
      "grad_norm": 109.94926452636719,
      "learning_rate": 0.00019998528670554715,
      "loss": 6.5844,
      "step": 148
    },
    {
      "epoch": 0.035875519171732984,
      "grad_norm": 8.02511215209961,
      "learning_rate": 0.00019998391877062104,
      "loss": 3.7571,
      "step": 149
    },
    {
      "epoch": 0.036116294468187565,
      "grad_norm": 8.986191749572754,
      "learning_rate": 0.00019998249004524703,
      "loss": 5.3496,
      "step": 150
    },
    {
      "epoch": 0.03635706976464215,
      "grad_norm": 5.070540904998779,
      "learning_rate": 0.0001999810005302938,
      "loss": 2.581,
      "step": 151
    },
    {
      "epoch": 0.03659784506109673,
      "grad_norm": 10.687249183654785,
      "learning_rate": 0.00019997945022666701,
      "loss": 3.4334,
      "step": 152
    },
    {
      "epoch": 0.03683862035755132,
      "grad_norm": 7.046168327331543,
      "learning_rate": 0.00019997783913530923,
      "loss": 2.8572,
      "step": 153
    },
    {
      "epoch": 0.0370793956540059,
      "grad_norm": 4.520480632781982,
      "learning_rate": 0.0001999761672572,
      "loss": 2.4164,
      "step": 154
    },
    {
      "epoch": 0.03732017095046048,
      "grad_norm": 20.134994506835938,
      "learning_rate": 0.0001999744345933558,
      "loss": 5.1227,
      "step": 155
    },
    {
      "epoch": 0.03756094624691507,
      "grad_norm": 8.454794883728027,
      "learning_rate": 0.00019997264114483015,
      "loss": 4.3214,
      "step": 156
    },
    {
      "epoch": 0.03780172154336965,
      "grad_norm": 19.004796981811523,
      "learning_rate": 0.00019997078691271348,
      "loss": 2.8001,
      "step": 157
    },
    {
      "epoch": 0.03804249683982423,
      "grad_norm": 8.622836112976074,
      "learning_rate": 0.00019996887189813306,
      "loss": 2.6805,
      "step": 158
    },
    {
      "epoch": 0.038283272136278815,
      "grad_norm": 11.92911434173584,
      "learning_rate": 0.00019996689610225332,
      "loss": 3.4712,
      "step": 159
    },
    {
      "epoch": 0.0385240474327334,
      "grad_norm": 8.337674140930176,
      "learning_rate": 0.00019996485952627552,
      "loss": 3.6351,
      "step": 160
    },
    {
      "epoch": 0.038764822729187985,
      "grad_norm": 16.739974975585938,
      "learning_rate": 0.00019996276217143792,
      "loss": 2.8034,
      "step": 161
    },
    {
      "epoch": 0.03900559802564257,
      "grad_norm": 19.424123764038086,
      "learning_rate": 0.0001999606040390157,
      "loss": 3.6437,
      "step": 162
    },
    {
      "epoch": 0.039246373322097156,
      "grad_norm": 6.484769344329834,
      "learning_rate": 0.000199958385130321,
      "loss": 1.9908,
      "step": 163
    },
    {
      "epoch": 0.03948714861855174,
      "grad_norm": 12.377532005310059,
      "learning_rate": 0.0001999561054467029,
      "loss": 4.8526,
      "step": 164
    },
    {
      "epoch": 0.03972792391500632,
      "grad_norm": 13.827719688415527,
      "learning_rate": 0.00019995376498954754,
      "loss": 3.6073,
      "step": 165
    },
    {
      "epoch": 0.0399686992114609,
      "grad_norm": 7.668979167938232,
      "learning_rate": 0.00019995136376027786,
      "loss": 2.496,
      "step": 166
    },
    {
      "epoch": 0.04020947450791549,
      "grad_norm": 8.068209648132324,
      "learning_rate": 0.00019994890176035378,
      "loss": 4.0669,
      "step": 167
    },
    {
      "epoch": 0.04045024980437007,
      "grad_norm": 11.890876770019531,
      "learning_rate": 0.00019994637899127228,
      "loss": 2.6487,
      "step": 168
    },
    {
      "epoch": 0.04069102510082465,
      "grad_norm": 16.064224243164062,
      "learning_rate": 0.00019994379545456713,
      "loss": 2.9892,
      "step": 169
    },
    {
      "epoch": 0.04093180039727924,
      "grad_norm": 7.469193458557129,
      "learning_rate": 0.00019994115115180922,
      "loss": 3.3422,
      "step": 170
    },
    {
      "epoch": 0.04117257569373382,
      "grad_norm": 14.787521362304688,
      "learning_rate": 0.00019993844608460622,
      "loss": 3.911,
      "step": 171
    },
    {
      "epoch": 0.041413350990188405,
      "grad_norm": 24.229990005493164,
      "learning_rate": 0.00019993568025460283,
      "loss": 3.3516,
      "step": 172
    },
    {
      "epoch": 0.041654126286642994,
      "grad_norm": 4.197109222412109,
      "learning_rate": 0.0001999328536634807,
      "loss": 1.3666,
      "step": 173
    },
    {
      "epoch": 0.041894901583097575,
      "grad_norm": 9.006143569946289,
      "learning_rate": 0.00019992996631295836,
      "loss": 4.234,
      "step": 174
    },
    {
      "epoch": 0.04213567687955216,
      "grad_norm": 21.24369239807129,
      "learning_rate": 0.00019992701820479138,
      "loss": 3.2965,
      "step": 175
    },
    {
      "epoch": 0.04237645217600674,
      "grad_norm": 21.48784828186035,
      "learning_rate": 0.0001999240093407722,
      "loss": 1.7589,
      "step": 176
    },
    {
      "epoch": 0.04261722747246133,
      "grad_norm": 8.93320369720459,
      "learning_rate": 0.00019992093972273018,
      "loss": 1.9561,
      "step": 177
    },
    {
      "epoch": 0.04285800276891591,
      "grad_norm": 12.301058769226074,
      "learning_rate": 0.0001999178093525317,
      "loss": 2.0668,
      "step": 178
    },
    {
      "epoch": 0.04309877806537049,
      "grad_norm": 18.54864501953125,
      "learning_rate": 0.00019991461823208004,
      "loss": 3.1243,
      "step": 179
    },
    {
      "epoch": 0.04333955336182508,
      "grad_norm": 14.172440528869629,
      "learning_rate": 0.00019991136636331538,
      "loss": 2.7406,
      "step": 180
    },
    {
      "epoch": 0.04358032865827966,
      "grad_norm": 42.0859375,
      "learning_rate": 0.00019990805374821483,
      "loss": 1.4452,
      "step": 181
    },
    {
      "epoch": 0.04382110395473424,
      "grad_norm": 10.7669677734375,
      "learning_rate": 0.00019990468038879255,
      "loss": 3.331,
      "step": 182
    },
    {
      "epoch": 0.044061879251188825,
      "grad_norm": 11.51449966430664,
      "learning_rate": 0.0001999012462870995,
      "loss": 1.4512,
      "step": 183
    },
    {
      "epoch": 0.044302654547643414,
      "grad_norm": 21.03165054321289,
      "learning_rate": 0.00019989775144522358,
      "loss": 3.0687,
      "step": 184
    },
    {
      "epoch": 0.044543429844097995,
      "grad_norm": 11.455255508422852,
      "learning_rate": 0.00019989419586528975,
      "loss": 3.6598,
      "step": 185
    },
    {
      "epoch": 0.04478420514055258,
      "grad_norm": 10.61294174194336,
      "learning_rate": 0.00019989057954945976,
      "loss": 2.4758,
      "step": 186
    },
    {
      "epoch": 0.045024980437007166,
      "grad_norm": 96.17725372314453,
      "learning_rate": 0.00019988690249993235,
      "loss": 2.7045,
      "step": 187
    },
    {
      "epoch": 0.04526575573346175,
      "grad_norm": 8.609686851501465,
      "learning_rate": 0.00019988316471894314,
      "loss": 1.7687,
      "step": 188
    },
    {
      "epoch": 0.04550653102991633,
      "grad_norm": 7.83888053894043,
      "learning_rate": 0.00019987936620876478,
      "loss": 1.7098,
      "step": 189
    },
    {
      "epoch": 0.04574730632637091,
      "grad_norm": 6.7235941886901855,
      "learning_rate": 0.00019987550697170674,
      "loss": 1.6275,
      "step": 190
    },
    {
      "epoch": 0.0459880816228255,
      "grad_norm": 14.214694023132324,
      "learning_rate": 0.0001998715870101154,
      "loss": 4.1546,
      "step": 191
    },
    {
      "epoch": 0.04622885691928008,
      "grad_norm": 3.766120433807373,
      "learning_rate": 0.0001998676063263742,
      "loss": 2.2139,
      "step": 192
    },
    {
      "epoch": 0.04646963221573466,
      "grad_norm": 4.959268093109131,
      "learning_rate": 0.0001998635649229033,
      "loss": 1.4615,
      "step": 193
    },
    {
      "epoch": 0.04671040751218925,
      "grad_norm": 6.699900150299072,
      "learning_rate": 0.00019985946280215994,
      "loss": 1.9309,
      "step": 194
    },
    {
      "epoch": 0.046951182808643833,
      "grad_norm": 8.718276023864746,
      "learning_rate": 0.00019985529996663823,
      "loss": 2.1614,
      "step": 195
    },
    {
      "epoch": 0.047191958105098415,
      "grad_norm": 13.810513496398926,
      "learning_rate": 0.00019985107641886917,
      "loss": 3.8401,
      "step": 196
    },
    {
      "epoch": 0.047432733401553004,
      "grad_norm": 12.379217147827148,
      "learning_rate": 0.00019984679216142066,
      "loss": 1.5629,
      "step": 197
    },
    {
      "epoch": 0.047673508698007586,
      "grad_norm": 10.015958786010742,
      "learning_rate": 0.00019984244719689756,
      "loss": 1.6573,
      "step": 198
    },
    {
      "epoch": 0.04791428399446217,
      "grad_norm": 12.203784942626953,
      "learning_rate": 0.00019983804152794163,
      "loss": 1.9251,
      "step": 199
    },
    {
      "epoch": 0.04815505929091675,
      "grad_norm": 8.036340713500977,
      "learning_rate": 0.0001998335751572315,
      "loss": 1.7192,
      "step": 200
    },
    {
      "epoch": 0.04839583458737134,
      "grad_norm": 8.729804039001465,
      "learning_rate": 0.00019982904808748275,
      "loss": 0.9223,
      "step": 201
    },
    {
      "epoch": 0.04863660988382592,
      "grad_norm": 32.94856262207031,
      "learning_rate": 0.00019982446032144785,
      "loss": 3.5147,
      "step": 202
    },
    {
      "epoch": 0.0488773851802805,
      "grad_norm": 5.190202713012695,
      "learning_rate": 0.00019981981186191616,
      "loss": 1.0766,
      "step": 203
    },
    {
      "epoch": 0.04911816047673509,
      "grad_norm": 15.163110733032227,
      "learning_rate": 0.00019981510271171394,
      "loss": 3.0481,
      "step": 204
    },
    {
      "epoch": 0.04935893577318967,
      "grad_norm": 35.894718170166016,
      "learning_rate": 0.00019981033287370443,
      "loss": 3.3266,
      "step": 205
    },
    {
      "epoch": 0.04959971106964425,
      "grad_norm": 12.832849502563477,
      "learning_rate": 0.0001998055023507876,
      "loss": 1.614,
      "step": 206
    },
    {
      "epoch": 0.049840486366098835,
      "grad_norm": 12.771391868591309,
      "learning_rate": 0.00019980061114590055,
      "loss": 1.63,
      "step": 207
    },
    {
      "epoch": 0.050081261662553424,
      "grad_norm": 20.120861053466797,
      "learning_rate": 0.00019979565926201703,
      "loss": 3.5633,
      "step": 208
    },
    {
      "epoch": 0.050322036959008005,
      "grad_norm": 10.067777633666992,
      "learning_rate": 0.00019979064670214782,
      "loss": 1.7442,
      "step": 209
    },
    {
      "epoch": 0.05056281225546259,
      "grad_norm": 5.05864143371582,
      "learning_rate": 0.0001997855734693406,
      "loss": 2.6813,
      "step": 210
    },
    {
      "epoch": 0.050803587551917176,
      "grad_norm": 5.616927623748779,
      "learning_rate": 0.0001997804395666799,
      "loss": 1.4455,
      "step": 211
    },
    {
      "epoch": 0.05104436284837176,
      "grad_norm": 18.011022567749023,
      "learning_rate": 0.00019977524499728712,
      "loss": 1.095,
      "step": 212
    },
    {
      "epoch": 0.05128513814482634,
      "grad_norm": 4.923522472381592,
      "learning_rate": 0.0001997699897643206,
      "loss": 1.7786,
      "step": 213
    },
    {
      "epoch": 0.05152591344128093,
      "grad_norm": 8.910199165344238,
      "learning_rate": 0.00019976467387097552,
      "loss": 2.6016,
      "step": 214
    },
    {
      "epoch": 0.05176668873773551,
      "grad_norm": 6.376938343048096,
      "learning_rate": 0.00019975929732048394,
      "loss": 1.3324,
      "step": 215
    },
    {
      "epoch": 0.05200746403419009,
      "grad_norm": 25.141647338867188,
      "learning_rate": 0.00019975386011611483,
      "loss": 1.0559,
      "step": 216
    },
    {
      "epoch": 0.05224823933064467,
      "grad_norm": 69.8543472290039,
      "learning_rate": 0.00019974836226117405,
      "loss": 2.2873,
      "step": 217
    },
    {
      "epoch": 0.05248901462709926,
      "grad_norm": 13.50328254699707,
      "learning_rate": 0.00019974280375900424,
      "loss": 2.7354,
      "step": 218
    },
    {
      "epoch": 0.052729789923553844,
      "grad_norm": 9.282197952270508,
      "learning_rate": 0.00019973718461298502,
      "loss": 2.1698,
      "step": 219
    },
    {
      "epoch": 0.052970565220008425,
      "grad_norm": 12.128793716430664,
      "learning_rate": 0.00019973150482653287,
      "loss": 2.3485,
      "step": 220
    },
    {
      "epoch": 0.053211340516463014,
      "grad_norm": 6.763794422149658,
      "learning_rate": 0.00019972576440310105,
      "loss": 1.2761,
      "step": 221
    },
    {
      "epoch": 0.053452115812917596,
      "grad_norm": 4.751701354980469,
      "learning_rate": 0.00019971996334617985,
      "loss": 1.0254,
      "step": 222
    },
    {
      "epoch": 0.05369289110937218,
      "grad_norm": 10.452568054199219,
      "learning_rate": 0.00019971410165929622,
      "loss": 1.5987,
      "step": 223
    },
    {
      "epoch": 0.05393366640582676,
      "grad_norm": 5.189295768737793,
      "learning_rate": 0.00019970817934601413,
      "loss": 0.9901,
      "step": 224
    },
    {
      "epoch": 0.05417444170228135,
      "grad_norm": 6.027712821960449,
      "learning_rate": 0.00019970219640993438,
      "loss": 2.048,
      "step": 225
    },
    {
      "epoch": 0.05441521699873593,
      "grad_norm": 5.749260425567627,
      "learning_rate": 0.00019969615285469455,
      "loss": 1.1023,
      "step": 226
    },
    {
      "epoch": 0.05465599229519051,
      "grad_norm": 14.753028869628906,
      "learning_rate": 0.0001996900486839692,
      "loss": 1.657,
      "step": 227
    },
    {
      "epoch": 0.0548967675916451,
      "grad_norm": 19.311214447021484,
      "learning_rate": 0.0001996838839014696,
      "loss": 1.5839,
      "step": 228
    },
    {
      "epoch": 0.05513754288809968,
      "grad_norm": 2.4820916652679443,
      "learning_rate": 0.000199677658510944,
      "loss": 1.2341,
      "step": 229
    },
    {
      "epoch": 0.055378318184554264,
      "grad_norm": 16.423561096191406,
      "learning_rate": 0.0001996713725161775,
      "loss": 2.0571,
      "step": 230
    },
    {
      "epoch": 0.055619093481008845,
      "grad_norm": 10.467788696289062,
      "learning_rate": 0.00019966502592099188,
      "loss": 1.5059,
      "step": 231
    },
    {
      "epoch": 0.055859868777463434,
      "grad_norm": 3.1604106426239014,
      "learning_rate": 0.000199658618729246,
      "loss": 1.099,
      "step": 232
    },
    {
      "epoch": 0.056100644073918016,
      "grad_norm": 9.024856567382812,
      "learning_rate": 0.00019965215094483539,
      "loss": 1.2078,
      "step": 233
    },
    {
      "epoch": 0.0563414193703726,
      "grad_norm": 2.999100923538208,
      "learning_rate": 0.00019964562257169247,
      "loss": 0.792,
      "step": 234
    },
    {
      "epoch": 0.056582194666827186,
      "grad_norm": 16.542631149291992,
      "learning_rate": 0.00019963903361378655,
      "loss": 2.7538,
      "step": 235
    },
    {
      "epoch": 0.05682296996328177,
      "grad_norm": 5.134494781494141,
      "learning_rate": 0.00019963238407512366,
      "loss": 2.8096,
      "step": 236
    },
    {
      "epoch": 0.05706374525973635,
      "grad_norm": 7.121161460876465,
      "learning_rate": 0.0001996256739597468,
      "loss": 1.1293,
      "step": 237
    },
    {
      "epoch": 0.05730452055619094,
      "grad_norm": 3.483020544052124,
      "learning_rate": 0.00019961890327173574,
      "loss": 1.8818,
      "step": 238
    },
    {
      "epoch": 0.05754529585264552,
      "grad_norm": 4.338151454925537,
      "learning_rate": 0.00019961207201520703,
      "loss": 1.0518,
      "step": 239
    },
    {
      "epoch": 0.0577860711491001,
      "grad_norm": 6.399717807769775,
      "learning_rate": 0.00019960518019431408,
      "loss": 1.2687,
      "step": 240
    },
    {
      "epoch": 0.05802684644555468,
      "grad_norm": 1.5952820777893066,
      "learning_rate": 0.00019959822781324718,
      "loss": 0.6345,
      "step": 241
    },
    {
      "epoch": 0.05826762174200927,
      "grad_norm": 9.329618453979492,
      "learning_rate": 0.0001995912148762334,
      "loss": 1.3564,
      "step": 242
    },
    {
      "epoch": 0.058508397038463854,
      "grad_norm": 7.548645973205566,
      "learning_rate": 0.00019958414138753657,
      "loss": 1.0375,
      "step": 243
    },
    {
      "epoch": 0.058749172334918436,
      "grad_norm": 18.993824005126953,
      "learning_rate": 0.00019957700735145738,
      "loss": 2.458,
      "step": 244
    },
    {
      "epoch": 0.058989947631373024,
      "grad_norm": 20.46088981628418,
      "learning_rate": 0.0001995698127723334,
      "loss": 2.1789,
      "step": 245
    },
    {
      "epoch": 0.059230722927827606,
      "grad_norm": 6.136659145355225,
      "learning_rate": 0.00019956255765453892,
      "loss": 1.3776,
      "step": 246
    },
    {
      "epoch": 0.05947149822428219,
      "grad_norm": 61.323387145996094,
      "learning_rate": 0.00019955524200248505,
      "loss": 1.657,
      "step": 247
    },
    {
      "epoch": 0.05971227352073677,
      "grad_norm": 4.754699230194092,
      "learning_rate": 0.00019954786582061977,
      "loss": 1.0319,
      "step": 248
    },
    {
      "epoch": 0.05995304881719136,
      "grad_norm": 10.321673393249512,
      "learning_rate": 0.0001995404291134278,
      "loss": 2.1272,
      "step": 249
    },
    {
      "epoch": 0.06019382411364594,
      "grad_norm": 8.861504554748535,
      "learning_rate": 0.0001995329318854306,
      "loss": 1.4962,
      "step": 250
    },
    {
      "epoch": 0.06043459941010052,
      "grad_norm": 44.048126220703125,
      "learning_rate": 0.0001995253741411866,
      "loss": 2.3729,
      "step": 251
    },
    {
      "epoch": 0.06067537470655511,
      "grad_norm": 2.5337188243865967,
      "learning_rate": 0.0001995177558852909,
      "loss": 0.564,
      "step": 252
    },
    {
      "epoch": 0.06091615000300969,
      "grad_norm": 11.171781539916992,
      "learning_rate": 0.0001995100771223754,
      "loss": 1.2622,
      "step": 253
    },
    {
      "epoch": 0.061156925299464274,
      "grad_norm": 10.259223937988281,
      "learning_rate": 0.0001995023378571088,
      "loss": 1.9257,
      "step": 254
    },
    {
      "epoch": 0.061397700595918855,
      "grad_norm": 8.954612731933594,
      "learning_rate": 0.0001994945380941966,
      "loss": 1.008,
      "step": 255
    },
    {
      "epoch": 0.061638475892373444,
      "grad_norm": 26.720203399658203,
      "learning_rate": 0.0001994866778383811,
      "loss": 1.8667,
      "step": 256
    },
    {
      "epoch": 0.061879251188828026,
      "grad_norm": 8.071576118469238,
      "learning_rate": 0.00019947875709444131,
      "loss": 1.5516,
      "step": 257
    },
    {
      "epoch": 0.06212002648528261,
      "grad_norm": 1.8321843147277832,
      "learning_rate": 0.00019947077586719307,
      "loss": 0.8952,
      "step": 258
    },
    {
      "epoch": 0.062360801781737196,
      "grad_norm": 10.932100296020508,
      "learning_rate": 0.000199462734161489,
      "loss": 1.3651,
      "step": 259
    },
    {
      "epoch": 0.06260157707819178,
      "grad_norm": 4.2766828536987305,
      "learning_rate": 0.00019945463198221846,
      "loss": 1.0024,
      "step": 260
    },
    {
      "epoch": 0.06284235237464636,
      "grad_norm": 7.300168991088867,
      "learning_rate": 0.00019944646933430762,
      "loss": 1.1335,
      "step": 261
    },
    {
      "epoch": 0.06308312767110094,
      "grad_norm": 2.313037395477295,
      "learning_rate": 0.00019943824622271935,
      "loss": 1.1619,
      "step": 262
    },
    {
      "epoch": 0.06332390296755552,
      "grad_norm": 3.020617961883545,
      "learning_rate": 0.00019942996265245335,
      "loss": 1.0354,
      "step": 263
    },
    {
      "epoch": 0.06356467826401012,
      "grad_norm": 14.848864555358887,
      "learning_rate": 0.00019942161862854601,
      "loss": 1.4934,
      "step": 264
    },
    {
      "epoch": 0.0638054535604647,
      "grad_norm": 8.351217269897461,
      "learning_rate": 0.0001994132141560706,
      "loss": 1.5487,
      "step": 265
    },
    {
      "epoch": 0.06404622885691928,
      "grad_norm": 5.606395244598389,
      "learning_rate": 0.00019940474924013698,
      "loss": 1.6069,
      "step": 266
    },
    {
      "epoch": 0.06428700415337386,
      "grad_norm": 5.64864444732666,
      "learning_rate": 0.00019939622388589183,
      "loss": 1.0187,
      "step": 267
    },
    {
      "epoch": 0.06452777944982845,
      "grad_norm": 8.374507904052734,
      "learning_rate": 0.00019938763809851864,
      "loss": 1.2051,
      "step": 268
    },
    {
      "epoch": 0.06476855474628303,
      "grad_norm": 3.38839054107666,
      "learning_rate": 0.00019937899188323757,
      "loss": 0.6262,
      "step": 269
    },
    {
      "epoch": 0.06500933004273761,
      "grad_norm": 2.6535820960998535,
      "learning_rate": 0.00019937028524530552,
      "loss": 0.6246,
      "step": 270
    },
    {
      "epoch": 0.0652501053391922,
      "grad_norm": 5.218156337738037,
      "learning_rate": 0.00019936151819001618,
      "loss": 1.8041,
      "step": 271
    },
    {
      "epoch": 0.06549088063564679,
      "grad_norm": 6.953288555145264,
      "learning_rate": 0.00019935269072269987,
      "loss": 0.886,
      "step": 272
    },
    {
      "epoch": 0.06573165593210137,
      "grad_norm": 3.9206128120422363,
      "learning_rate": 0.00019934380284872377,
      "loss": 0.5613,
      "step": 273
    },
    {
      "epoch": 0.06597243122855595,
      "grad_norm": 3.6051864624023438,
      "learning_rate": 0.00019933485457349174,
      "loss": 0.8749,
      "step": 274
    },
    {
      "epoch": 0.06621320652501053,
      "grad_norm": 3.60562801361084,
      "learning_rate": 0.00019932584590244434,
      "loss": 0.892,
      "step": 275
    },
    {
      "epoch": 0.06645398182146511,
      "grad_norm": 3.6393070220947266,
      "learning_rate": 0.0001993167768410588,
      "loss": 0.8526,
      "step": 276
    },
    {
      "epoch": 0.0666947571179197,
      "grad_norm": 10.952275276184082,
      "learning_rate": 0.0001993076473948492,
      "loss": 1.1597,
      "step": 277
    },
    {
      "epoch": 0.06693553241437429,
      "grad_norm": 9.785892486572266,
      "learning_rate": 0.00019929845756936626,
      "loss": 1.1667,
      "step": 278
    },
    {
      "epoch": 0.06717630771082887,
      "grad_norm": 8.532158851623535,
      "learning_rate": 0.00019928920737019733,
      "loss": 1.4692,
      "step": 279
    },
    {
      "epoch": 0.06741708300728345,
      "grad_norm": 12.74774169921875,
      "learning_rate": 0.00019927989680296667,
      "loss": 2.1035,
      "step": 280
    },
    {
      "epoch": 0.06765785830373804,
      "grad_norm": 10.734175682067871,
      "learning_rate": 0.00019927052587333507,
      "loss": 1.8876,
      "step": 281
    },
    {
      "epoch": 0.06789863360019262,
      "grad_norm": 4.373108863830566,
      "learning_rate": 0.00019926109458700007,
      "loss": 0.9184,
      "step": 282
    },
    {
      "epoch": 0.0681394088966472,
      "grad_norm": 7.9593281745910645,
      "learning_rate": 0.00019925160294969593,
      "loss": 1.2637,
      "step": 283
    },
    {
      "epoch": 0.06838018419310178,
      "grad_norm": 5.800394058227539,
      "learning_rate": 0.0001992420509671936,
      "loss": 0.8262,
      "step": 284
    },
    {
      "epoch": 0.06862095948955638,
      "grad_norm": 5.995545864105225,
      "learning_rate": 0.00019923243864530064,
      "loss": 1.6762,
      "step": 285
    },
    {
      "epoch": 0.06886173478601096,
      "grad_norm": 21.66741371154785,
      "learning_rate": 0.00019922276598986145,
      "loss": 1.1287,
      "step": 286
    },
    {
      "epoch": 0.06910251008246554,
      "grad_norm": 12.231538772583008,
      "learning_rate": 0.00019921303300675697,
      "loss": 1.4966,
      "step": 287
    },
    {
      "epoch": 0.06934328537892012,
      "grad_norm": 19.181198120117188,
      "learning_rate": 0.00019920323970190487,
      "loss": 1.7811,
      "step": 288
    },
    {
      "epoch": 0.0695840606753747,
      "grad_norm": 4.649646282196045,
      "learning_rate": 0.00019919338608125956,
      "loss": 1.2632,
      "step": 289
    },
    {
      "epoch": 0.06982483597182929,
      "grad_norm": 5.04226541519165,
      "learning_rate": 0.00019918347215081204,
      "loss": 1.3552,
      "step": 290
    },
    {
      "epoch": 0.07006561126828387,
      "grad_norm": 4.240399360656738,
      "learning_rate": 0.00019917349791658996,
      "loss": 1.2266,
      "step": 291
    },
    {
      "epoch": 0.07030638656473846,
      "grad_norm": 13.989855766296387,
      "learning_rate": 0.0001991634633846577,
      "loss": 1.429,
      "step": 292
    },
    {
      "epoch": 0.07054716186119304,
      "grad_norm": 8.629983901977539,
      "learning_rate": 0.00019915336856111631,
      "loss": 1.0381,
      "step": 293
    },
    {
      "epoch": 0.07078793715764763,
      "grad_norm": 14.188498497009277,
      "learning_rate": 0.00019914321345210342,
      "loss": 2.8836,
      "step": 294
    },
    {
      "epoch": 0.07102871245410221,
      "grad_norm": 8.33694076538086,
      "learning_rate": 0.00019913299806379334,
      "loss": 0.6366,
      "step": 295
    },
    {
      "epoch": 0.07126948775055679,
      "grad_norm": 16.30498695373535,
      "learning_rate": 0.00019912272240239716,
      "loss": 1.3799,
      "step": 296
    },
    {
      "epoch": 0.07151026304701137,
      "grad_norm": 5.43389368057251,
      "learning_rate": 0.00019911238647416242,
      "loss": 1.1131,
      "step": 297
    },
    {
      "epoch": 0.07175103834346597,
      "grad_norm": 20.10192108154297,
      "learning_rate": 0.00019910199028537337,
      "loss": 1.1515,
      "step": 298
    },
    {
      "epoch": 0.07199181363992055,
      "grad_norm": 3.4195728302001953,
      "learning_rate": 0.00019909153384235095,
      "loss": 0.5817,
      "step": 299
    },
    {
      "epoch": 0.07223258893637513,
      "grad_norm": 6.387148857116699,
      "learning_rate": 0.00019908101715145272,
      "loss": 0.7634,
      "step": 300
    },
    {
      "epoch": 0.07247336423282971,
      "grad_norm": 4.05348539352417,
      "learning_rate": 0.00019907044021907281,
      "loss": 0.8352,
      "step": 301
    },
    {
      "epoch": 0.0727141395292843,
      "grad_norm": 2.757005214691162,
      "learning_rate": 0.00019905980305164205,
      "loss": 0.7532,
      "step": 302
    },
    {
      "epoch": 0.07295491482573888,
      "grad_norm": 2.14371919631958,
      "learning_rate": 0.00019904910565562785,
      "loss": 1.2168,
      "step": 303
    },
    {
      "epoch": 0.07319569012219346,
      "grad_norm": 5.939690589904785,
      "learning_rate": 0.00019903834803753425,
      "loss": 0.8704,
      "step": 304
    },
    {
      "epoch": 0.07343646541864805,
      "grad_norm": 7.156602382659912,
      "learning_rate": 0.0001990275302039019,
      "loss": 0.8243,
      "step": 305
    },
    {
      "epoch": 0.07367724071510263,
      "grad_norm": 3.6926629543304443,
      "learning_rate": 0.00019901665216130808,
      "loss": 0.8763,
      "step": 306
    },
    {
      "epoch": 0.07391801601155722,
      "grad_norm": 7.309814453125,
      "learning_rate": 0.00019900571391636665,
      "loss": 0.7731,
      "step": 307
    },
    {
      "epoch": 0.0741587913080118,
      "grad_norm": 12.59055233001709,
      "learning_rate": 0.00019899471547572811,
      "loss": 1.0003,
      "step": 308
    },
    {
      "epoch": 0.07439956660446638,
      "grad_norm": 3.9260809421539307,
      "learning_rate": 0.00019898365684607952,
      "loss": 0.9478,
      "step": 309
    },
    {
      "epoch": 0.07464034190092096,
      "grad_norm": 3.1046080589294434,
      "learning_rate": 0.00019897253803414456,
      "loss": 0.7514,
      "step": 310
    },
    {
      "epoch": 0.07488111719737554,
      "grad_norm": 2.8333990573883057,
      "learning_rate": 0.0001989613590466835,
      "loss": 0.4307,
      "step": 311
    },
    {
      "epoch": 0.07512189249383014,
      "grad_norm": 11.99578857421875,
      "learning_rate": 0.00019895011989049316,
      "loss": 1.0123,
      "step": 312
    },
    {
      "epoch": 0.07536266779028472,
      "grad_norm": 2.916750431060791,
      "learning_rate": 0.000198938820572407,
      "loss": 0.9809,
      "step": 313
    },
    {
      "epoch": 0.0756034430867393,
      "grad_norm": 3.6491167545318604,
      "learning_rate": 0.00019892746109929498,
      "loss": 0.3447,
      "step": 314
    },
    {
      "epoch": 0.07584421838319388,
      "grad_norm": 3.625203847885132,
      "learning_rate": 0.00019891604147806376,
      "loss": 1.0226,
      "step": 315
    },
    {
      "epoch": 0.07608499367964847,
      "grad_norm": 3.9918270111083984,
      "learning_rate": 0.00019890456171565643,
      "loss": 0.6953,
      "step": 316
    },
    {
      "epoch": 0.07632576897610305,
      "grad_norm": 7.0212554931640625,
      "learning_rate": 0.00019889302181905278,
      "loss": 1.1393,
      "step": 317
    },
    {
      "epoch": 0.07656654427255763,
      "grad_norm": 6.060014247894287,
      "learning_rate": 0.00019888142179526902,
      "loss": 0.9609,
      "step": 318
    },
    {
      "epoch": 0.07680731956901223,
      "grad_norm": 6.098717212677002,
      "learning_rate": 0.00019886976165135807,
      "loss": 1.3731,
      "step": 319
    },
    {
      "epoch": 0.0770480948654668,
      "grad_norm": 8.985902786254883,
      "learning_rate": 0.00019885804139440925,
      "loss": 1.4469,
      "step": 320
    },
    {
      "epoch": 0.07728887016192139,
      "grad_norm": 6.856400966644287,
      "learning_rate": 0.00019884626103154856,
      "loss": 1.4352,
      "step": 321
    },
    {
      "epoch": 0.07752964545837597,
      "grad_norm": 4.309900283813477,
      "learning_rate": 0.00019883442056993841,
      "loss": 0.4605,
      "step": 322
    },
    {
      "epoch": 0.07777042075483055,
      "grad_norm": 2.33298397064209,
      "learning_rate": 0.00019882252001677793,
      "loss": 1.2381,
      "step": 323
    },
    {
      "epoch": 0.07801119605128513,
      "grad_norm": 3.6052260398864746,
      "learning_rate": 0.0001988105593793026,
      "loss": 1.0468,
      "step": 324
    },
    {
      "epoch": 0.07825197134773972,
      "grad_norm": 4.753766059875488,
      "learning_rate": 0.00019879853866478455,
      "loss": 1.1693,
      "step": 325
    },
    {
      "epoch": 0.07849274664419431,
      "grad_norm": 3.6719765663146973,
      "learning_rate": 0.00019878645788053238,
      "loss": 0.7712,
      "step": 326
    },
    {
      "epoch": 0.07873352194064889,
      "grad_norm": 3.6164121627807617,
      "learning_rate": 0.00019877431703389128,
      "loss": 1.2832,
      "step": 327
    },
    {
      "epoch": 0.07897429723710347,
      "grad_norm": 9.66127872467041,
      "learning_rate": 0.00019876211613224288,
      "loss": 2.2482,
      "step": 328
    },
    {
      "epoch": 0.07921507253355806,
      "grad_norm": 2.208888053894043,
      "learning_rate": 0.00019874985518300532,
      "loss": 1.1646,
      "step": 329
    },
    {
      "epoch": 0.07945584783001264,
      "grad_norm": 1.7235151529312134,
      "learning_rate": 0.00019873753419363336,
      "loss": 0.5038,
      "step": 330
    },
    {
      "epoch": 0.07969662312646722,
      "grad_norm": 1.9844493865966797,
      "learning_rate": 0.00019872515317161812,
      "loss": 1.1001,
      "step": 331
    },
    {
      "epoch": 0.0799373984229218,
      "grad_norm": 7.393949508666992,
      "learning_rate": 0.00019871271212448734,
      "loss": 1.7001,
      "step": 332
    },
    {
      "epoch": 0.0801781737193764,
      "grad_norm": 10.367690086364746,
      "learning_rate": 0.00019870021105980522,
      "loss": 0.8829,
      "step": 333
    },
    {
      "epoch": 0.08041894901583098,
      "grad_norm": 6.111469745635986,
      "learning_rate": 0.00019868764998517236,
      "loss": 1.6088,
      "step": 334
    },
    {
      "epoch": 0.08065972431228556,
      "grad_norm": 4.986114978790283,
      "learning_rate": 0.00019867502890822598,
      "loss": 0.3513,
      "step": 335
    },
    {
      "epoch": 0.08090049960874014,
      "grad_norm": 4.137001037597656,
      "learning_rate": 0.00019866234783663968,
      "loss": 1.1246,
      "step": 336
    },
    {
      "epoch": 0.08114127490519472,
      "grad_norm": 2.4128201007843018,
      "learning_rate": 0.00019864960677812364,
      "loss": 0.7535,
      "step": 337
    },
    {
      "epoch": 0.0813820502016493,
      "grad_norm": 19.265674591064453,
      "learning_rate": 0.0001986368057404244,
      "loss": 1.0217,
      "step": 338
    },
    {
      "epoch": 0.0816228254981039,
      "grad_norm": 5.218925952911377,
      "learning_rate": 0.00019862394473132503,
      "loss": 0.6478,
      "step": 339
    },
    {
      "epoch": 0.08186360079455848,
      "grad_norm": 9.463326454162598,
      "learning_rate": 0.00019861102375864508,
      "loss": 0.4951,
      "step": 340
    },
    {
      "epoch": 0.08210437609101306,
      "grad_norm": 4.882657527923584,
      "learning_rate": 0.0001985980428302405,
      "loss": 0.5187,
      "step": 341
    },
    {
      "epoch": 0.08234515138746765,
      "grad_norm": 9.088946342468262,
      "learning_rate": 0.00019858500195400373,
      "loss": 1.6635,
      "step": 342
    },
    {
      "epoch": 0.08258592668392223,
      "grad_norm": 3.0154218673706055,
      "learning_rate": 0.0001985719011378637,
      "loss": 1.4851,
      "step": 343
    },
    {
      "epoch": 0.08282670198037681,
      "grad_norm": 8.918438911437988,
      "learning_rate": 0.00019855874038978563,
      "loss": 0.8483,
      "step": 344
    },
    {
      "epoch": 0.08306747727683139,
      "grad_norm": 3.460216760635376,
      "learning_rate": 0.00019854551971777137,
      "loss": 0.858,
      "step": 345
    },
    {
      "epoch": 0.08330825257328599,
      "grad_norm": 5.214385032653809,
      "learning_rate": 0.00019853223912985913,
      "loss": 0.6952,
      "step": 346
    },
    {
      "epoch": 0.08354902786974057,
      "grad_norm": 9.299979209899902,
      "learning_rate": 0.00019851889863412345,
      "loss": 0.5402,
      "step": 347
    },
    {
      "epoch": 0.08378980316619515,
      "grad_norm": 3.114903211593628,
      "learning_rate": 0.0001985054982386755,
      "loss": 0.5039,
      "step": 348
    },
    {
      "epoch": 0.08403057846264973,
      "grad_norm": 1.686824917793274,
      "learning_rate": 0.00019849203795166263,
      "loss": 0.5443,
      "step": 349
    },
    {
      "epoch": 0.08427135375910431,
      "grad_norm": 21.62729835510254,
      "learning_rate": 0.00019847851778126877,
      "loss": 0.9847,
      "step": 350
    },
    {
      "epoch": 0.0845121290555589,
      "grad_norm": 1.997676134109497,
      "learning_rate": 0.00019846493773571425,
      "loss": 0.2535,
      "step": 351
    },
    {
      "epoch": 0.08475290435201348,
      "grad_norm": 3.0039217472076416,
      "learning_rate": 0.0001984512978232558,
      "loss": 1.1073,
      "step": 352
    },
    {
      "epoch": 0.08499367964846807,
      "grad_norm": 1.8206866979599,
      "learning_rate": 0.00019843759805218637,
      "loss": 1.4459,
      "step": 353
    },
    {
      "epoch": 0.08523445494492266,
      "grad_norm": 2.975524663925171,
      "learning_rate": 0.0001984238384308356,
      "loss": 1.5481,
      "step": 354
    },
    {
      "epoch": 0.08547523024137724,
      "grad_norm": 2.0778095722198486,
      "learning_rate": 0.0001984100189675693,
      "loss": 0.8862,
      "step": 355
    },
    {
      "epoch": 0.08571600553783182,
      "grad_norm": 15.60510540008545,
      "learning_rate": 0.0001983961396707897,
      "loss": 0.6816,
      "step": 356
    },
    {
      "epoch": 0.0859567808342864,
      "grad_norm": 3.4831383228302,
      "learning_rate": 0.00019838220054893552,
      "loss": 0.6734,
      "step": 357
    },
    {
      "epoch": 0.08619755613074098,
      "grad_norm": 3.5622880458831787,
      "learning_rate": 0.00019836820161048176,
      "loss": 1.166,
      "step": 358
    },
    {
      "epoch": 0.08643833142719556,
      "grad_norm": 0.6584992408752441,
      "learning_rate": 0.00019835414286393979,
      "loss": 0.563,
      "step": 359
    },
    {
      "epoch": 0.08667910672365016,
      "grad_norm": 3.729058027267456,
      "learning_rate": 0.00019834002431785735,
      "loss": 0.8303,
      "step": 360
    },
    {
      "epoch": 0.08691988202010474,
      "grad_norm": 5.881722450256348,
      "learning_rate": 0.0001983258459808186,
      "loss": 1.3364,
      "step": 361
    },
    {
      "epoch": 0.08716065731655932,
      "grad_norm": 5.635914325714111,
      "learning_rate": 0.00019831160786144394,
      "loss": 0.7647,
      "step": 362
    },
    {
      "epoch": 0.0874014326130139,
      "grad_norm": 3.507514715194702,
      "learning_rate": 0.0001982973099683902,
      "loss": 0.5602,
      "step": 363
    },
    {
      "epoch": 0.08764220790946849,
      "grad_norm": 5.673732757568359,
      "learning_rate": 0.00019828295231035051,
      "loss": 0.946,
      "step": 364
    },
    {
      "epoch": 0.08788298320592307,
      "grad_norm": 2.3530821800231934,
      "learning_rate": 0.0001982685348960544,
      "loss": 0.3095,
      "step": 365
    },
    {
      "epoch": 0.08812375850237765,
      "grad_norm": 3.0282411575317383,
      "learning_rate": 0.00019825405773426767,
      "loss": 0.791,
      "step": 366
    },
    {
      "epoch": 0.08836453379883225,
      "grad_norm": 5.266041278839111,
      "learning_rate": 0.0001982395208337925,
      "loss": 1.4795,
      "step": 367
    },
    {
      "epoch": 0.08860530909528683,
      "grad_norm": 2.5949831008911133,
      "learning_rate": 0.0001982249242034673,
      "loss": 0.4774,
      "step": 368
    },
    {
      "epoch": 0.08884608439174141,
      "grad_norm": 2.186204195022583,
      "learning_rate": 0.00019821026785216687,
      "loss": 0.8617,
      "step": 369
    },
    {
      "epoch": 0.08908685968819599,
      "grad_norm": 18.886642456054688,
      "learning_rate": 0.00019819555178880234,
      "loss": 0.7616,
      "step": 370
    },
    {
      "epoch": 0.08932763498465057,
      "grad_norm": 2.88727068901062,
      "learning_rate": 0.00019818077602232106,
      "loss": 0.5059,
      "step": 371
    },
    {
      "epoch": 0.08956841028110515,
      "grad_norm": 6.770381927490234,
      "learning_rate": 0.00019816594056170676,
      "loss": 1.5388,
      "step": 372
    },
    {
      "epoch": 0.08980918557755974,
      "grad_norm": 4.677947044372559,
      "learning_rate": 0.00019815104541597944,
      "loss": 0.5632,
      "step": 373
    },
    {
      "epoch": 0.09004996087401433,
      "grad_norm": 2.849351406097412,
      "learning_rate": 0.00019813609059419538,
      "loss": 0.3689,
      "step": 374
    },
    {
      "epoch": 0.09029073617046891,
      "grad_norm": 2.1919734477996826,
      "learning_rate": 0.0001981210761054471,
      "loss": 1.003,
      "step": 375
    },
    {
      "epoch": 0.0905315114669235,
      "grad_norm": 3.296410083770752,
      "learning_rate": 0.0001981060019588635,
      "loss": 0.5615,
      "step": 376
    },
    {
      "epoch": 0.09077228676337808,
      "grad_norm": 2.373533248901367,
      "learning_rate": 0.00019809086816360968,
      "loss": 0.7389,
      "step": 377
    },
    {
      "epoch": 0.09101306205983266,
      "grad_norm": 4.461115837097168,
      "learning_rate": 0.00019807567472888702,
      "loss": 0.98,
      "step": 378
    },
    {
      "epoch": 0.09125383735628724,
      "grad_norm": 6.4342427253723145,
      "learning_rate": 0.00019806042166393314,
      "loss": 0.8969,
      "step": 379
    },
    {
      "epoch": 0.09149461265274182,
      "grad_norm": 2.5169475078582764,
      "learning_rate": 0.00019804510897802197,
      "loss": 0.3081,
      "step": 380
    },
    {
      "epoch": 0.09173538794919642,
      "grad_norm": 5.742027282714844,
      "learning_rate": 0.00019802973668046363,
      "loss": 1.2418,
      "step": 381
    },
    {
      "epoch": 0.091976163245651,
      "grad_norm": 1.0783274173736572,
      "learning_rate": 0.00019801430478060453,
      "loss": 0.4456,
      "step": 382
    },
    {
      "epoch": 0.09221693854210558,
      "grad_norm": 5.443319797515869,
      "learning_rate": 0.0001979988132878273,
      "loss": 1.0314,
      "step": 383
    },
    {
      "epoch": 0.09245771383856016,
      "grad_norm": 1.8633432388305664,
      "learning_rate": 0.00019798326221155078,
      "loss": 1.3362,
      "step": 384
    },
    {
      "epoch": 0.09269848913501474,
      "grad_norm": 8.395817756652832,
      "learning_rate": 0.00019796765156123008,
      "loss": 1.7206,
      "step": 385
    },
    {
      "epoch": 0.09293926443146933,
      "grad_norm": 0.9301803112030029,
      "learning_rate": 0.00019795198134635653,
      "loss": 0.3155,
      "step": 386
    },
    {
      "epoch": 0.09318003972792392,
      "grad_norm": 6.0776047706604,
      "learning_rate": 0.00019793625157645762,
      "loss": 0.7454,
      "step": 387
    },
    {
      "epoch": 0.0934208150243785,
      "grad_norm": 4.320910453796387,
      "learning_rate": 0.00019792046226109708,
      "loss": 0.9696,
      "step": 388
    },
    {
      "epoch": 0.09366159032083309,
      "grad_norm": 8.204424858093262,
      "learning_rate": 0.0001979046134098749,
      "loss": 1.1431,
      "step": 389
    },
    {
      "epoch": 0.09390236561728767,
      "grad_norm": 0.629797101020813,
      "learning_rate": 0.00019788870503242715,
      "loss": 0.4199,
      "step": 390
    },
    {
      "epoch": 0.09414314091374225,
      "grad_norm": 3.0499680042266846,
      "learning_rate": 0.00019787273713842623,
      "loss": 0.722,
      "step": 391
    },
    {
      "epoch": 0.09438391621019683,
      "grad_norm": 2.0613560676574707,
      "learning_rate": 0.00019785670973758058,
      "loss": 0.8111,
      "step": 392
    },
    {
      "epoch": 0.09462469150665141,
      "grad_norm": 14.847646713256836,
      "learning_rate": 0.00019784062283963495,
      "loss": 1.0207,
      "step": 393
    },
    {
      "epoch": 0.09486546680310601,
      "grad_norm": 2.1953060626983643,
      "learning_rate": 0.00019782447645437022,
      "loss": 0.3284,
      "step": 394
    },
    {
      "epoch": 0.09510624209956059,
      "grad_norm": 6.55955171585083,
      "learning_rate": 0.00019780827059160338,
      "loss": 1.3168,
      "step": 395
    },
    {
      "epoch": 0.09534701739601517,
      "grad_norm": 4.817495822906494,
      "learning_rate": 0.0001977920052611877,
      "loss": 0.6965,
      "step": 396
    },
    {
      "epoch": 0.09558779269246975,
      "grad_norm": 2.0958549976348877,
      "learning_rate": 0.00019777568047301243,
      "loss": 1.2996,
      "step": 397
    },
    {
      "epoch": 0.09582856798892433,
      "grad_norm": 3.6508209705352783,
      "learning_rate": 0.00019775929623700318,
      "loss": 0.4667,
      "step": 398
    },
    {
      "epoch": 0.09606934328537892,
      "grad_norm": 4.169986724853516,
      "learning_rate": 0.00019774285256312152,
      "loss": 1.0308,
      "step": 399
    },
    {
      "epoch": 0.0963101185818335,
      "grad_norm": 4.545289516448975,
      "learning_rate": 0.00019772634946136535,
      "loss": 1.4587,
      "step": 400
    },
    {
      "epoch": 0.0965508938782881,
      "grad_norm": 2.637938976287842,
      "learning_rate": 0.00019770978694176846,
      "loss": 0.7042,
      "step": 401
    },
    {
      "epoch": 0.09679166917474268,
      "grad_norm": 5.515408992767334,
      "learning_rate": 0.00019769316501440102,
      "loss": 1.0088,
      "step": 402
    },
    {
      "epoch": 0.09703244447119726,
      "grad_norm": 1.7717092037200928,
      "learning_rate": 0.00019767648368936914,
      "loss": 0.3585,
      "step": 403
    },
    {
      "epoch": 0.09727321976765184,
      "grad_norm": 5.126103401184082,
      "learning_rate": 0.0001976597429768151,
      "loss": 1.5234,
      "step": 404
    },
    {
      "epoch": 0.09751399506410642,
      "grad_norm": 0.473143607378006,
      "learning_rate": 0.00019764294288691727,
      "loss": 0.2934,
      "step": 405
    },
    {
      "epoch": 0.097754770360561,
      "grad_norm": 7.283068656921387,
      "learning_rate": 0.0001976260834298902,
      "loss": 1.1666,
      "step": 406
    },
    {
      "epoch": 0.09799554565701558,
      "grad_norm": 2.16549015045166,
      "learning_rate": 0.00019760916461598446,
      "loss": 0.4612,
      "step": 407
    },
    {
      "epoch": 0.09823632095347018,
      "grad_norm": 1.2254639863967896,
      "learning_rate": 0.0001975921864554867,
      "loss": 0.7512,
      "step": 408
    },
    {
      "epoch": 0.09847709624992476,
      "grad_norm": 1.8601148128509521,
      "learning_rate": 0.0001975751489587197,
      "loss": 0.8824,
      "step": 409
    },
    {
      "epoch": 0.09871787154637934,
      "grad_norm": 2.0946712493896484,
      "learning_rate": 0.0001975580521360423,
      "loss": 0.6299,
      "step": 410
    },
    {
      "epoch": 0.09895864684283392,
      "grad_norm": 5.10854434967041,
      "learning_rate": 0.00019754089599784938,
      "loss": 1.3609,
      "step": 411
    },
    {
      "epoch": 0.0991994221392885,
      "grad_norm": 2.166837453842163,
      "learning_rate": 0.00019752368055457197,
      "loss": 0.7314,
      "step": 412
    },
    {
      "epoch": 0.09944019743574309,
      "grad_norm": 3.2308640480041504,
      "learning_rate": 0.00019750640581667702,
      "loss": 1.679,
      "step": 413
    },
    {
      "epoch": 0.09968097273219767,
      "grad_norm": 3.175098180770874,
      "learning_rate": 0.00019748907179466767,
      "loss": 0.7682,
      "step": 414
    },
    {
      "epoch": 0.09992174802865227,
      "grad_norm": 6.487977981567383,
      "learning_rate": 0.00019747167849908304,
      "loss": 0.926,
      "step": 415
    },
    {
      "epoch": 0.10016252332510685,
      "grad_norm": 3.602936029434204,
      "learning_rate": 0.00019745422594049825,
      "loss": 1.0786,
      "step": 416
    },
    {
      "epoch": 0.10040329862156143,
      "grad_norm": 2.2537026405334473,
      "learning_rate": 0.00019743671412952453,
      "loss": 0.3749,
      "step": 417
    },
    {
      "epoch": 0.10064407391801601,
      "grad_norm": 3.4394688606262207,
      "learning_rate": 0.00019741914307680908,
      "loss": 0.6582,
      "step": 418
    },
    {
      "epoch": 0.10088484921447059,
      "grad_norm": 4.710788726806641,
      "learning_rate": 0.00019740151279303518,
      "loss": 1.0236,
      "step": 419
    },
    {
      "epoch": 0.10112562451092517,
      "grad_norm": 2.518106698989868,
      "learning_rate": 0.000197383823288922,
      "loss": 0.7708,
      "step": 420
    },
    {
      "epoch": 0.10136639980737976,
      "grad_norm": 2.9978835582733154,
      "learning_rate": 0.0001973660745752249,
      "loss": 0.4426,
      "step": 421
    },
    {
      "epoch": 0.10160717510383435,
      "grad_norm": 2.2193732261657715,
      "learning_rate": 0.0001973482666627351,
      "loss": 1.0488,
      "step": 422
    },
    {
      "epoch": 0.10184795040028893,
      "grad_norm": 2.385712146759033,
      "learning_rate": 0.0001973303995622798,
      "loss": 0.5798,
      "step": 423
    },
    {
      "epoch": 0.10208872569674352,
      "grad_norm": 6.944875240325928,
      "learning_rate": 0.00019731247328472228,
      "loss": 0.9012,
      "step": 424
    },
    {
      "epoch": 0.1023295009931981,
      "grad_norm": 1.5543016195297241,
      "learning_rate": 0.00019729448784096179,
      "loss": 0.5052,
      "step": 425
    },
    {
      "epoch": 0.10257027628965268,
      "grad_norm": 4.3643317222595215,
      "learning_rate": 0.00019727644324193347,
      "loss": 1.0582,
      "step": 426
    },
    {
      "epoch": 0.10281105158610726,
      "grad_norm": 3.4253134727478027,
      "learning_rate": 0.00019725833949860847,
      "loss": 0.8646,
      "step": 427
    },
    {
      "epoch": 0.10305182688256186,
      "grad_norm": 6.012450218200684,
      "learning_rate": 0.00019724017662199397,
      "loss": 0.9271,
      "step": 428
    },
    {
      "epoch": 0.10329260217901644,
      "grad_norm": 4.788900375366211,
      "learning_rate": 0.00019722195462313296,
      "loss": 0.6417,
      "step": 429
    },
    {
      "epoch": 0.10353337747547102,
      "grad_norm": 1.210336446762085,
      "learning_rate": 0.00019720367351310452,
      "loss": 0.6169,
      "step": 430
    },
    {
      "epoch": 0.1037741527719256,
      "grad_norm": 3.0183141231536865,
      "learning_rate": 0.00019718533330302358,
      "loss": 1.077,
      "step": 431
    },
    {
      "epoch": 0.10401492806838018,
      "grad_norm": 1.4695411920547485,
      "learning_rate": 0.000197166934004041,
      "loss": 0.423,
      "step": 432
    },
    {
      "epoch": 0.10425570336483476,
      "grad_norm": 3.7340753078460693,
      "learning_rate": 0.00019714847562734365,
      "loss": 0.506,
      "step": 433
    },
    {
      "epoch": 0.10449647866128935,
      "grad_norm": 0.8714501261711121,
      "learning_rate": 0.00019712995818415424,
      "loss": 0.3461,
      "step": 434
    },
    {
      "epoch": 0.10473725395774394,
      "grad_norm": 1.6766986846923828,
      "learning_rate": 0.00019711138168573142,
      "loss": 0.9932,
      "step": 435
    },
    {
      "epoch": 0.10497802925419852,
      "grad_norm": 2.76531720161438,
      "learning_rate": 0.00019709274614336975,
      "loss": 0.7046,
      "step": 436
    },
    {
      "epoch": 0.1052188045506531,
      "grad_norm": 6.036025524139404,
      "learning_rate": 0.00019707405156839966,
      "loss": 0.9637,
      "step": 437
    },
    {
      "epoch": 0.10545957984710769,
      "grad_norm": 4.022448539733887,
      "learning_rate": 0.0001970552979721875,
      "loss": 0.2239,
      "step": 438
    },
    {
      "epoch": 0.10570035514356227,
      "grad_norm": 2.6519360542297363,
      "learning_rate": 0.0001970364853661355,
      "loss": 1.0182,
      "step": 439
    },
    {
      "epoch": 0.10594113044001685,
      "grad_norm": 5.855311870574951,
      "learning_rate": 0.0001970176137616818,
      "loss": 0.7519,
      "step": 440
    },
    {
      "epoch": 0.10618190573647143,
      "grad_norm": 3.5491368770599365,
      "learning_rate": 0.00019699868317030035,
      "loss": 0.8588,
      "step": 441
    },
    {
      "epoch": 0.10642268103292603,
      "grad_norm": 4.17829704284668,
      "learning_rate": 0.00019697969360350098,
      "loss": 0.9785,
      "step": 442
    },
    {
      "epoch": 0.10666345632938061,
      "grad_norm": 4.346673488616943,
      "learning_rate": 0.00019696064507282937,
      "loss": 0.7598,
      "step": 443
    },
    {
      "epoch": 0.10690423162583519,
      "grad_norm": 2.906926155090332,
      "learning_rate": 0.00019694153758986714,
      "loss": 0.6547,
      "step": 444
    },
    {
      "epoch": 0.10714500692228977,
      "grad_norm": 1.955552339553833,
      "learning_rate": 0.00019692237116623163,
      "loss": 0.925,
      "step": 445
    },
    {
      "epoch": 0.10738578221874436,
      "grad_norm": 4.8115739822387695,
      "learning_rate": 0.00019690314581357607,
      "loss": 0.9647,
      "step": 446
    },
    {
      "epoch": 0.10762655751519894,
      "grad_norm": 2.199876308441162,
      "learning_rate": 0.00019688386154358955,
      "loss": 1.2637,
      "step": 447
    },
    {
      "epoch": 0.10786733281165352,
      "grad_norm": 8.052813529968262,
      "learning_rate": 0.0001968645183679969,
      "loss": 0.7113,
      "step": 448
    },
    {
      "epoch": 0.10810810810810811,
      "grad_norm": 6.857846260070801,
      "learning_rate": 0.00019684511629855888,
      "loss": 1.0796,
      "step": 449
    },
    {
      "epoch": 0.1083488834045627,
      "grad_norm": 3.3255105018615723,
      "learning_rate": 0.00019682565534707194,
      "loss": 0.4504,
      "step": 450
    },
    {
      "epoch": 0.10858965870101728,
      "grad_norm": 4.091807842254639,
      "learning_rate": 0.0001968061355253684,
      "loss": 0.9397,
      "step": 451
    },
    {
      "epoch": 0.10883043399747186,
      "grad_norm": 2.051816701889038,
      "learning_rate": 0.00019678655684531634,
      "loss": 0.5485,
      "step": 452
    },
    {
      "epoch": 0.10907120929392644,
      "grad_norm": 1.8907794952392578,
      "learning_rate": 0.00019676691931881968,
      "loss": 0.567,
      "step": 453
    },
    {
      "epoch": 0.10931198459038102,
      "grad_norm": 4.47649621963501,
      "learning_rate": 0.00019674722295781805,
      "loss": 0.8856,
      "step": 454
    },
    {
      "epoch": 0.1095527598868356,
      "grad_norm": 5.481165409088135,
      "learning_rate": 0.0001967274677742869,
      "loss": 0.4616,
      "step": 455
    },
    {
      "epoch": 0.1097935351832902,
      "grad_norm": 8.510377883911133,
      "learning_rate": 0.0001967076537802374,
      "loss": 0.3674,
      "step": 456
    },
    {
      "epoch": 0.11003431047974478,
      "grad_norm": 3.4752211570739746,
      "learning_rate": 0.00019668778098771647,
      "loss": 0.7903,
      "step": 457
    },
    {
      "epoch": 0.11027508577619936,
      "grad_norm": 3.52034330368042,
      "learning_rate": 0.00019666784940880691,
      "loss": 0.5652,
      "step": 458
    },
    {
      "epoch": 0.11051586107265395,
      "grad_norm": 4.425768852233887,
      "learning_rate": 0.0001966478590556271,
      "loss": 0.6404,
      "step": 459
    },
    {
      "epoch": 0.11075663636910853,
      "grad_norm": 9.201542854309082,
      "learning_rate": 0.00019662780994033125,
      "loss": 1.0613,
      "step": 460
    },
    {
      "epoch": 0.11099741166556311,
      "grad_norm": 3.8637278079986572,
      "learning_rate": 0.00019660770207510924,
      "loss": 1.1498,
      "step": 461
    },
    {
      "epoch": 0.11123818696201769,
      "grad_norm": 5.719259738922119,
      "learning_rate": 0.0001965875354721867,
      "loss": 1.0628,
      "step": 462
    },
    {
      "epoch": 0.11147896225847229,
      "grad_norm": 1.5758776664733887,
      "learning_rate": 0.00019656731014382501,
      "loss": 0.5364,
      "step": 463
    },
    {
      "epoch": 0.11171973755492687,
      "grad_norm": 7.384488582611084,
      "learning_rate": 0.00019654702610232114,
      "loss": 0.7939,
      "step": 464
    },
    {
      "epoch": 0.11196051285138145,
      "grad_norm": 5.359811782836914,
      "learning_rate": 0.0001965266833600079,
      "loss": 0.7241,
      "step": 465
    },
    {
      "epoch": 0.11220128814783603,
      "grad_norm": 3.234246015548706,
      "learning_rate": 0.0001965062819292537,
      "loss": 0.974,
      "step": 466
    },
    {
      "epoch": 0.11244206344429061,
      "grad_norm": 2.34318208694458,
      "learning_rate": 0.00019648582182246266,
      "loss": 0.3588,
      "step": 467
    },
    {
      "epoch": 0.1126828387407452,
      "grad_norm": 3.9500319957733154,
      "learning_rate": 0.0001964653030520746,
      "loss": 0.6119,
      "step": 468
    },
    {
      "epoch": 0.11292361403719978,
      "grad_norm": 2.85276198387146,
      "learning_rate": 0.00019644472563056485,
      "loss": 0.6573,
      "step": 469
    },
    {
      "epoch": 0.11316438933365437,
      "grad_norm": 1.7280099391937256,
      "learning_rate": 0.0001964240895704447,
      "loss": 0.8111,
      "step": 470
    },
    {
      "epoch": 0.11340516463010895,
      "grad_norm": 2.8521628379821777,
      "learning_rate": 0.00019640339488426084,
      "loss": 0.4068,
      "step": 471
    },
    {
      "epoch": 0.11364593992656354,
      "grad_norm": 3.4895570278167725,
      "learning_rate": 0.00019638264158459566,
      "loss": 0.8143,
      "step": 472
    },
    {
      "epoch": 0.11388671522301812,
      "grad_norm": 1.5952945947647095,
      "learning_rate": 0.00019636182968406726,
      "loss": 0.5789,
      "step": 473
    },
    {
      "epoch": 0.1141274905194727,
      "grad_norm": 3.6532886028289795,
      "learning_rate": 0.00019634095919532932,
      "loss": 0.4563,
      "step": 474
    },
    {
      "epoch": 0.11436826581592728,
      "grad_norm": 1.950562596321106,
      "learning_rate": 0.00019632003013107113,
      "loss": 0.6839,
      "step": 475
    },
    {
      "epoch": 0.11460904111238188,
      "grad_norm": 6.8443779945373535,
      "learning_rate": 0.00019629904250401757,
      "loss": 0.5238,
      "step": 476
    },
    {
      "epoch": 0.11484981640883646,
      "grad_norm": 3.7890400886535645,
      "learning_rate": 0.00019627799632692923,
      "loss": 0.6927,
      "step": 477
    },
    {
      "epoch": 0.11509059170529104,
      "grad_norm": 6.215263366699219,
      "learning_rate": 0.0001962568916126022,
      "loss": 0.962,
      "step": 478
    },
    {
      "epoch": 0.11533136700174562,
      "grad_norm": 2.3885769844055176,
      "learning_rate": 0.0001962357283738682,
      "loss": 0.5657,
      "step": 479
    },
    {
      "epoch": 0.1155721422982002,
      "grad_norm": 2.069955587387085,
      "learning_rate": 0.00019621450662359456,
      "loss": 0.5302,
      "step": 480
    },
    {
      "epoch": 0.11581291759465479,
      "grad_norm": 2.8343095779418945,
      "learning_rate": 0.0001961932263746841,
      "loss": 0.3862,
      "step": 481
    },
    {
      "epoch": 0.11605369289110937,
      "grad_norm": 2.8576223850250244,
      "learning_rate": 0.00019617188764007524,
      "loss": 1.4014,
      "step": 482
    },
    {
      "epoch": 0.11629446818756396,
      "grad_norm": 2.8722829818725586,
      "learning_rate": 0.00019615049043274205,
      "loss": 0.3474,
      "step": 483
    },
    {
      "epoch": 0.11653524348401854,
      "grad_norm": 3.647714376449585,
      "learning_rate": 0.00019612903476569406,
      "loss": 0.8658,
      "step": 484
    },
    {
      "epoch": 0.11677601878047313,
      "grad_norm": 6.923486232757568,
      "learning_rate": 0.00019610752065197634,
      "loss": 1.0191,
      "step": 485
    },
    {
      "epoch": 0.11701679407692771,
      "grad_norm": 2.716620683670044,
      "learning_rate": 0.0001960859481046695,
      "loss": 0.6586,
      "step": 486
    },
    {
      "epoch": 0.11725756937338229,
      "grad_norm": 3.657470941543579,
      "learning_rate": 0.00019606431713688975,
      "loss": 0.5454,
      "step": 487
    },
    {
      "epoch": 0.11749834466983687,
      "grad_norm": 4.070058345794678,
      "learning_rate": 0.00019604262776178876,
      "loss": 0.5342,
      "step": 488
    },
    {
      "epoch": 0.11773911996629145,
      "grad_norm": 5.016479015350342,
      "learning_rate": 0.0001960208799925537,
      "loss": 0.4871,
      "step": 489
    },
    {
      "epoch": 0.11797989526274605,
      "grad_norm": 0.8800312280654907,
      "learning_rate": 0.00019599907384240726,
      "loss": 0.7974,
      "step": 490
    },
    {
      "epoch": 0.11822067055920063,
      "grad_norm": 1.534217119216919,
      "learning_rate": 0.00019597720932460763,
      "loss": 0.5083,
      "step": 491
    },
    {
      "epoch": 0.11846144585565521,
      "grad_norm": 2.766813278198242,
      "learning_rate": 0.0001959552864524485,
      "loss": 0.7765,
      "step": 492
    },
    {
      "epoch": 0.1187022211521098,
      "grad_norm": 8.230446815490723,
      "learning_rate": 0.00019593330523925902,
      "loss": 0.6511,
      "step": 493
    },
    {
      "epoch": 0.11894299644856438,
      "grad_norm": 2.781522750854492,
      "learning_rate": 0.00019591126569840382,
      "loss": 0.4147,
      "step": 494
    },
    {
      "epoch": 0.11918377174501896,
      "grad_norm": 4.93475341796875,
      "learning_rate": 0.00019588916784328295,
      "loss": 0.4109,
      "step": 495
    },
    {
      "epoch": 0.11942454704147354,
      "grad_norm": 3.2182798385620117,
      "learning_rate": 0.00019586701168733202,
      "loss": 0.5109,
      "step": 496
    },
    {
      "epoch": 0.11966532233792813,
      "grad_norm": 5.978203773498535,
      "learning_rate": 0.00019584479724402197,
      "loss": 0.5933,
      "step": 497
    },
    {
      "epoch": 0.11990609763438272,
      "grad_norm": 2.445081949234009,
      "learning_rate": 0.00019582252452685927,
      "loss": 1.1266,
      "step": 498
    },
    {
      "epoch": 0.1201468729308373,
      "grad_norm": 2.4677765369415283,
      "learning_rate": 0.0001958001935493858,
      "loss": 0.4697,
      "step": 499
    },
    {
      "epoch": 0.12038764822729188,
      "grad_norm": 0.8610912561416626,
      "learning_rate": 0.00019577780432517879,
      "loss": 1.0763,
      "step": 500
    },
    {
      "epoch": 0.12062842352374646,
      "grad_norm": 1.9866464138031006,
      "learning_rate": 0.0001957553568678509,
      "loss": 0.729,
      "step": 501
    },
    {
      "epoch": 0.12086919882020104,
      "grad_norm": 2.3875463008880615,
      "learning_rate": 0.00019573285119105037,
      "loss": 0.8719,
      "step": 502
    },
    {
      "epoch": 0.12110997411665562,
      "grad_norm": 4.172793388366699,
      "learning_rate": 0.0001957102873084606,
      "loss": 1.0308,
      "step": 503
    },
    {
      "epoch": 0.12135074941311022,
      "grad_norm": 1.5716460943222046,
      "learning_rate": 0.0001956876652338005,
      "loss": 1.0994,
      "step": 504
    },
    {
      "epoch": 0.1215915247095648,
      "grad_norm": 8.024327278137207,
      "learning_rate": 0.00019566498498082438,
      "loss": 0.437,
      "step": 505
    },
    {
      "epoch": 0.12183230000601938,
      "grad_norm": 2.5161705017089844,
      "learning_rate": 0.0001956422465633218,
      "loss": 1.0868,
      "step": 506
    },
    {
      "epoch": 0.12207307530247397,
      "grad_norm": 4.083341598510742,
      "learning_rate": 0.0001956194499951179,
      "loss": 0.694,
      "step": 507
    },
    {
      "epoch": 0.12231385059892855,
      "grad_norm": 2.113607406616211,
      "learning_rate": 0.00019559659529007293,
      "loss": 0.8918,
      "step": 508
    },
    {
      "epoch": 0.12255462589538313,
      "grad_norm": 2.2010605335235596,
      "learning_rate": 0.00019557368246208263,
      "loss": 0.2703,
      "step": 509
    },
    {
      "epoch": 0.12279540119183771,
      "grad_norm": 2.9058799743652344,
      "learning_rate": 0.0001955507115250781,
      "loss": 1.207,
      "step": 510
    },
    {
      "epoch": 0.1230361764882923,
      "grad_norm": 7.344447612762451,
      "learning_rate": 0.00019552768249302566,
      "loss": 1.1835,
      "step": 511
    },
    {
      "epoch": 0.12327695178474689,
      "grad_norm": 1.3118301630020142,
      "learning_rate": 0.00019550459537992704,
      "loss": 0.5164,
      "step": 512
    },
    {
      "epoch": 0.12351772708120147,
      "grad_norm": 1.65935480594635,
      "learning_rate": 0.00019548145019981924,
      "loss": 0.7932,
      "step": 513
    },
    {
      "epoch": 0.12375850237765605,
      "grad_norm": 3.032277822494507,
      "learning_rate": 0.0001954582469667746,
      "loss": 0.2637,
      "step": 514
    },
    {
      "epoch": 0.12399927767411063,
      "grad_norm": 4.980113983154297,
      "learning_rate": 0.00019543498569490076,
      "loss": 0.8955,
      "step": 515
    },
    {
      "epoch": 0.12424005297056522,
      "grad_norm": 4.82036828994751,
      "learning_rate": 0.00019541166639834058,
      "loss": 1.1343,
      "step": 516
    },
    {
      "epoch": 0.1244808282670198,
      "grad_norm": 1.410509705543518,
      "learning_rate": 0.0001953882890912723,
      "loss": 0.8404,
      "step": 517
    },
    {
      "epoch": 0.12472160356347439,
      "grad_norm": 4.177162170410156,
      "learning_rate": 0.00019536485378790928,
      "loss": 1.0445,
      "step": 518
    },
    {
      "epoch": 0.12496237885992897,
      "grad_norm": 1.0933364629745483,
      "learning_rate": 0.00019534136050250033,
      "loss": 0.5387,
      "step": 519
    },
    {
      "epoch": 0.12520315415638356,
      "grad_norm": 1.1372244358062744,
      "learning_rate": 0.00019531780924932939,
      "loss": 0.5226,
      "step": 520
    },
    {
      "epoch": 0.12544392945283814,
      "grad_norm": 43.66477966308594,
      "learning_rate": 0.00019529420004271567,
      "loss": 0.2868,
      "step": 521
    },
    {
      "epoch": 0.12568470474929272,
      "grad_norm": 3.931898593902588,
      "learning_rate": 0.0001952705328970136,
      "loss": 0.5439,
      "step": 522
    },
    {
      "epoch": 0.1259254800457473,
      "grad_norm": 1.9407854080200195,
      "learning_rate": 0.00019524680782661294,
      "loss": 0.9395,
      "step": 523
    },
    {
      "epoch": 0.12616625534220188,
      "grad_norm": 3.817629814147949,
      "learning_rate": 0.0001952230248459385,
      "loss": 1.0245,
      "step": 524
    },
    {
      "epoch": 0.12640703063865646,
      "grad_norm": 6.78740119934082,
      "learning_rate": 0.0001951991839694504,
      "loss": 0.5898,
      "step": 525
    },
    {
      "epoch": 0.12664780593511105,
      "grad_norm": 3.067821979522705,
      "learning_rate": 0.00019517528521164395,
      "loss": 1.1765,
      "step": 526
    },
    {
      "epoch": 0.12688858123156563,
      "grad_norm": 3.173957109451294,
      "learning_rate": 0.00019515132858704965,
      "loss": 0.9526,
      "step": 527
    },
    {
      "epoch": 0.12712935652802024,
      "grad_norm": 2.5016558170318604,
      "learning_rate": 0.00019512731411023323,
      "loss": 1.0662,
      "step": 528
    },
    {
      "epoch": 0.12737013182447482,
      "grad_norm": 3.3116912841796875,
      "learning_rate": 0.00019510324179579548,
      "loss": 1.3049,
      "step": 529
    },
    {
      "epoch": 0.1276109071209294,
      "grad_norm": 4.086653709411621,
      "learning_rate": 0.00019507911165837248,
      "loss": 1.0897,
      "step": 530
    },
    {
      "epoch": 0.12785168241738398,
      "grad_norm": 7.5260329246521,
      "learning_rate": 0.00019505492371263533,
      "loss": 1.0542,
      "step": 531
    },
    {
      "epoch": 0.12809245771383856,
      "grad_norm": 4.74697208404541,
      "learning_rate": 0.00019503067797329044,
      "loss": 1.4031,
      "step": 532
    },
    {
      "epoch": 0.12833323301029315,
      "grad_norm": 3.090668201446533,
      "learning_rate": 0.0001950063744550792,
      "loss": 0.4726,
      "step": 533
    },
    {
      "epoch": 0.12857400830674773,
      "grad_norm": 2.889418840408325,
      "learning_rate": 0.00019498201317277828,
      "loss": 1.3182,
      "step": 534
    },
    {
      "epoch": 0.1288147836032023,
      "grad_norm": 2.548130989074707,
      "learning_rate": 0.00019495759414119932,
      "loss": 0.6617,
      "step": 535
    },
    {
      "epoch": 0.1290555588996569,
      "grad_norm": 2.9702346324920654,
      "learning_rate": 0.0001949331173751892,
      "loss": 0.7535,
      "step": 536
    },
    {
      "epoch": 0.12929633419611147,
      "grad_norm": 6.834994316101074,
      "learning_rate": 0.00019490858288962983,
      "loss": 1.2718,
      "step": 537
    },
    {
      "epoch": 0.12953710949256605,
      "grad_norm": 6.125328540802002,
      "learning_rate": 0.00019488399069943823,
      "loss": 0.6736,
      "step": 538
    },
    {
      "epoch": 0.12977788478902064,
      "grad_norm": 5.69896125793457,
      "learning_rate": 0.0001948593408195665,
      "loss": 0.6771,
      "step": 539
    },
    {
      "epoch": 0.13001866008547522,
      "grad_norm": 2.1542887687683105,
      "learning_rate": 0.0001948346332650018,
      "loss": 0.2843,
      "step": 540
    },
    {
      "epoch": 0.13025943538192983,
      "grad_norm": 4.776561737060547,
      "learning_rate": 0.0001948098680507665,
      "loss": 0.5372,
      "step": 541
    },
    {
      "epoch": 0.1305002106783844,
      "grad_norm": 1.1416128873825073,
      "learning_rate": 0.00019478504519191773,
      "loss": 0.7292,
      "step": 542
    },
    {
      "epoch": 0.130740985974839,
      "grad_norm": 1.7264859676361084,
      "learning_rate": 0.00019476016470354796,
      "loss": 0.5956,
      "step": 543
    },
    {
      "epoch": 0.13098176127129357,
      "grad_norm": 2.4325296878814697,
      "learning_rate": 0.00019473522660078455,
      "loss": 0.819,
      "step": 544
    },
    {
      "epoch": 0.13122253656774815,
      "grad_norm": 2.0552382469177246,
      "learning_rate": 0.00019471023089878995,
      "loss": 1.0633,
      "step": 545
    },
    {
      "epoch": 0.13146331186420274,
      "grad_norm": 6.430831432342529,
      "learning_rate": 0.00019468517761276154,
      "loss": 0.711,
      "step": 546
    },
    {
      "epoch": 0.13170408716065732,
      "grad_norm": 3.12066650390625,
      "learning_rate": 0.00019466006675793185,
      "loss": 0.525,
      "step": 547
    },
    {
      "epoch": 0.1319448624571119,
      "grad_norm": 18.034626007080078,
      "learning_rate": 0.00019463489834956827,
      "loss": 0.7595,
      "step": 548
    },
    {
      "epoch": 0.13218563775356648,
      "grad_norm": 17.345428466796875,
      "learning_rate": 0.0001946096724029733,
      "loss": 1.1251,
      "step": 549
    },
    {
      "epoch": 0.13242641305002106,
      "grad_norm": 1.709258794784546,
      "learning_rate": 0.00019458438893348433,
      "loss": 1.4069,
      "step": 550
    },
    {
      "epoch": 0.13266718834647565,
      "grad_norm": 2.9705605506896973,
      "learning_rate": 0.0001945590479564738,
      "loss": 0.8629,
      "step": 551
    },
    {
      "epoch": 0.13290796364293023,
      "grad_norm": 1.2169429063796997,
      "learning_rate": 0.00019453364948734906,
      "loss": 0.4889,
      "step": 552
    },
    {
      "epoch": 0.1331487389393848,
      "grad_norm": 2.59025502204895,
      "learning_rate": 0.00019450819354155244,
      "loss": 0.1758,
      "step": 553
    },
    {
      "epoch": 0.1333895142358394,
      "grad_norm": 1.7973146438598633,
      "learning_rate": 0.00019448268013456125,
      "loss": 0.9624,
      "step": 554
    },
    {
      "epoch": 0.133630289532294,
      "grad_norm": 1.6008778810501099,
      "learning_rate": 0.00019445710928188764,
      "loss": 0.8347,
      "step": 555
    },
    {
      "epoch": 0.13387106482874858,
      "grad_norm": 2.505977153778076,
      "learning_rate": 0.00019443148099907877,
      "loss": 0.3091,
      "step": 556
    },
    {
      "epoch": 0.13411184012520316,
      "grad_norm": 3.7619707584381104,
      "learning_rate": 0.0001944057953017167,
      "loss": 0.7405,
      "step": 557
    },
    {
      "epoch": 0.13435261542165775,
      "grad_norm": 2.6600496768951416,
      "learning_rate": 0.0001943800522054184,
      "loss": 0.5151,
      "step": 558
    },
    {
      "epoch": 0.13459339071811233,
      "grad_norm": 5.565666198730469,
      "learning_rate": 0.0001943542517258357,
      "loss": 1.0332,
      "step": 559
    },
    {
      "epoch": 0.1348341660145669,
      "grad_norm": 2.515794277191162,
      "learning_rate": 0.00019432839387865537,
      "loss": 1.1725,
      "step": 560
    },
    {
      "epoch": 0.1350749413110215,
      "grad_norm": 3.981748104095459,
      "learning_rate": 0.00019430247867959906,
      "loss": 0.5203,
      "step": 561
    },
    {
      "epoch": 0.13531571660747607,
      "grad_norm": 2.138054847717285,
      "learning_rate": 0.00019427650614442323,
      "loss": 0.0975,
      "step": 562
    },
    {
      "epoch": 0.13555649190393065,
      "grad_norm": 4.705209255218506,
      "learning_rate": 0.00019425047628891925,
      "loss": 0.8184,
      "step": 563
    },
    {
      "epoch": 0.13579726720038524,
      "grad_norm": 1.8869285583496094,
      "learning_rate": 0.00019422438912891337,
      "loss": 1.061,
      "step": 564
    },
    {
      "epoch": 0.13603804249683982,
      "grad_norm": 5.188673973083496,
      "learning_rate": 0.00019419824468026655,
      "loss": 0.9384,
      "step": 565
    },
    {
      "epoch": 0.1362788177932944,
      "grad_norm": 3.5460383892059326,
      "learning_rate": 0.0001941720429588748,
      "loss": 0.6326,
      "step": 566
    },
    {
      "epoch": 0.13651959308974898,
      "grad_norm": 3.3124594688415527,
      "learning_rate": 0.00019414578398066872,
      "loss": 1.625,
      "step": 567
    },
    {
      "epoch": 0.13676036838620356,
      "grad_norm": 1.9925857782363892,
      "learning_rate": 0.00019411946776161387,
      "loss": 0.86,
      "step": 568
    },
    {
      "epoch": 0.13700114368265817,
      "grad_norm": 2.7330362796783447,
      "learning_rate": 0.00019409309431771057,
      "loss": 0.5012,
      "step": 569
    },
    {
      "epoch": 0.13724191897911275,
      "grad_norm": 5.8978776931762695,
      "learning_rate": 0.00019406666366499393,
      "loss": 0.8465,
      "step": 570
    },
    {
      "epoch": 0.13748269427556734,
      "grad_norm": 1.46619713306427,
      "learning_rate": 0.00019404017581953385,
      "loss": 0.5121,
      "step": 571
    },
    {
      "epoch": 0.13772346957202192,
      "grad_norm": 3.0455288887023926,
      "learning_rate": 0.000194013630797435,
      "loss": 0.6288,
      "step": 572
    },
    {
      "epoch": 0.1379642448684765,
      "grad_norm": 2.932802677154541,
      "learning_rate": 0.00019398702861483678,
      "loss": 0.9645,
      "step": 573
    },
    {
      "epoch": 0.13820502016493108,
      "grad_norm": 4.07331657409668,
      "learning_rate": 0.00019396036928791345,
      "loss": 0.7568,
      "step": 574
    },
    {
      "epoch": 0.13844579546138566,
      "grad_norm": 2.658447027206421,
      "learning_rate": 0.00019393365283287386,
      "loss": 0.9391,
      "step": 575
    },
    {
      "epoch": 0.13868657075784024,
      "grad_norm": 1.1309797763824463,
      "learning_rate": 0.00019390687926596173,
      "loss": 0.8911,
      "step": 576
    },
    {
      "epoch": 0.13892734605429483,
      "grad_norm": 6.038357734680176,
      "learning_rate": 0.00019388004860345544,
      "loss": 0.4398,
      "step": 577
    },
    {
      "epoch": 0.1391681213507494,
      "grad_norm": 5.158764362335205,
      "learning_rate": 0.0001938531608616681,
      "loss": 0.5778,
      "step": 578
    },
    {
      "epoch": 0.139408896647204,
      "grad_norm": 0.5642886161804199,
      "learning_rate": 0.00019382621605694745,
      "loss": 0.5383,
      "step": 579
    },
    {
      "epoch": 0.13964967194365857,
      "grad_norm": 2.256866455078125,
      "learning_rate": 0.00019379921420567607,
      "loss": 0.5772,
      "step": 580
    },
    {
      "epoch": 0.13989044724011315,
      "grad_norm": 2.100571870803833,
      "learning_rate": 0.00019377215532427115,
      "loss": 0.9185,
      "step": 581
    },
    {
      "epoch": 0.14013122253656773,
      "grad_norm": 6.56969690322876,
      "learning_rate": 0.0001937450394291845,
      "loss": 1.0287,
      "step": 582
    },
    {
      "epoch": 0.14037199783302234,
      "grad_norm": 2.601640462875366,
      "learning_rate": 0.00019371786653690266,
      "loss": 1.5671,
      "step": 583
    },
    {
      "epoch": 0.14061277312947693,
      "grad_norm": 5.737114906311035,
      "learning_rate": 0.00019369063666394682,
      "loss": 0.9538,
      "step": 584
    },
    {
      "epoch": 0.1408535484259315,
      "grad_norm": 3.682819128036499,
      "learning_rate": 0.0001936633498268728,
      "loss": 0.8473,
      "step": 585
    },
    {
      "epoch": 0.1410943237223861,
      "grad_norm": 3.1207540035247803,
      "learning_rate": 0.00019363600604227105,
      "loss": 0.5173,
      "step": 586
    },
    {
      "epoch": 0.14133509901884067,
      "grad_norm": 2.8920652866363525,
      "learning_rate": 0.0001936086053267667,
      "loss": 0.7551,
      "step": 587
    },
    {
      "epoch": 0.14157587431529525,
      "grad_norm": 4.445816993713379,
      "learning_rate": 0.00019358114769701937,
      "loss": 0.6121,
      "step": 588
    },
    {
      "epoch": 0.14181664961174983,
      "grad_norm": 2.9601528644561768,
      "learning_rate": 0.00019355363316972342,
      "loss": 1.3002,
      "step": 589
    },
    {
      "epoch": 0.14205742490820442,
      "grad_norm": 6.101936340332031,
      "learning_rate": 0.0001935260617616077,
      "loss": 0.7764,
      "step": 590
    },
    {
      "epoch": 0.142298200204659,
      "grad_norm": 4.4996562004089355,
      "learning_rate": 0.00019349843348943574,
      "loss": 0.744,
      "step": 591
    },
    {
      "epoch": 0.14253897550111358,
      "grad_norm": 1.1355993747711182,
      "learning_rate": 0.00019347074837000554,
      "loss": 0.3457,
      "step": 592
    },
    {
      "epoch": 0.14277975079756816,
      "grad_norm": 5.778316497802734,
      "learning_rate": 0.00019344300642014974,
      "loss": 1.1338,
      "step": 593
    },
    {
      "epoch": 0.14302052609402274,
      "grad_norm": 1.4276717901229858,
      "learning_rate": 0.00019341520765673553,
      "loss": 0.4207,
      "step": 594
    },
    {
      "epoch": 0.14326130139047732,
      "grad_norm": 2.5159173011779785,
      "learning_rate": 0.00019338735209666457,
      "loss": 0.6524,
      "step": 595
    },
    {
      "epoch": 0.14350207668693193,
      "grad_norm": 1.4529104232788086,
      "learning_rate": 0.00019335943975687316,
      "loss": 0.4851,
      "step": 596
    },
    {
      "epoch": 0.14374285198338652,
      "grad_norm": 3.2440574169158936,
      "learning_rate": 0.000193331470654332,
      "loss": 0.8624,
      "step": 597
    },
    {
      "epoch": 0.1439836272798411,
      "grad_norm": 0.9937834739685059,
      "learning_rate": 0.00019330344480604646,
      "loss": 0.5917,
      "step": 598
    },
    {
      "epoch": 0.14422440257629568,
      "grad_norm": 2.456488609313965,
      "learning_rate": 0.00019327536222905623,
      "loss": 0.3061,
      "step": 599
    },
    {
      "epoch": 0.14446517787275026,
      "grad_norm": 4.499001979827881,
      "learning_rate": 0.00019324722294043558,
      "loss": 0.8591,
      "step": 600
    },
    {
      "epoch": 0.14470595316920484,
      "grad_norm": 1.5652315616607666,
      "learning_rate": 0.0001932190269572933,
      "loss": 0.1562,
      "step": 601
    },
    {
      "epoch": 0.14494672846565942,
      "grad_norm": 2.7683820724487305,
      "learning_rate": 0.00019319077429677268,
      "loss": 0.4619,
      "step": 602
    },
    {
      "epoch": 0.145187503762114,
      "grad_norm": 2.41717529296875,
      "learning_rate": 0.00019316246497605127,
      "loss": 0.8059,
      "step": 603
    },
    {
      "epoch": 0.1454282790585686,
      "grad_norm": 3.7864205837249756,
      "learning_rate": 0.00019313409901234127,
      "loss": 0.9378,
      "step": 604
    },
    {
      "epoch": 0.14566905435502317,
      "grad_norm": 2.745898723602295,
      "learning_rate": 0.00019310567642288922,
      "loss": 0.4861,
      "step": 605
    },
    {
      "epoch": 0.14590982965147775,
      "grad_norm": 2.9701218605041504,
      "learning_rate": 0.00019307719722497612,
      "loss": 0.9418,
      "step": 606
    },
    {
      "epoch": 0.14615060494793233,
      "grad_norm": 5.684365749359131,
      "learning_rate": 0.00019304866143591746,
      "loss": 0.7847,
      "step": 607
    },
    {
      "epoch": 0.14639138024438691,
      "grad_norm": 18.988853454589844,
      "learning_rate": 0.00019302006907306296,
      "loss": 1.1569,
      "step": 608
    },
    {
      "epoch": 0.1466321555408415,
      "grad_norm": 3.721798896789551,
      "learning_rate": 0.0001929914201537969,
      "loss": 0.8478,
      "step": 609
    },
    {
      "epoch": 0.1468729308372961,
      "grad_norm": 1.7376899719238281,
      "learning_rate": 0.00019296271469553786,
      "loss": 0.5951,
      "step": 610
    },
    {
      "epoch": 0.1471137061337507,
      "grad_norm": 13.979349136352539,
      "learning_rate": 0.00019293395271573885,
      "loss": 0.6307,
      "step": 611
    },
    {
      "epoch": 0.14735448143020527,
      "grad_norm": 3.307643175125122,
      "learning_rate": 0.00019290513423188724,
      "loss": 0.954,
      "step": 612
    },
    {
      "epoch": 0.14759525672665985,
      "grad_norm": 2.229158878326416,
      "learning_rate": 0.00019287625926150465,
      "loss": 0.9561,
      "step": 613
    },
    {
      "epoch": 0.14783603202311443,
      "grad_norm": 1.0365084409713745,
      "learning_rate": 0.0001928473278221472,
      "loss": 1.1344,
      "step": 614
    },
    {
      "epoch": 0.14807680731956901,
      "grad_norm": 2.6409239768981934,
      "learning_rate": 0.00019281833993140525,
      "loss": 0.657,
      "step": 615
    },
    {
      "epoch": 0.1483175826160236,
      "grad_norm": 1.3793067932128906,
      "learning_rate": 0.00019278929560690347,
      "loss": 0.6292,
      "step": 616
    },
    {
      "epoch": 0.14855835791247818,
      "grad_norm": 1.6385407447814941,
      "learning_rate": 0.00019276019486630093,
      "loss": 0.4795,
      "step": 617
    },
    {
      "epoch": 0.14879913320893276,
      "grad_norm": 4.453542232513428,
      "learning_rate": 0.00019273103772729093,
      "loss": 1.0279,
      "step": 618
    },
    {
      "epoch": 0.14903990850538734,
      "grad_norm": 0.5888392925262451,
      "learning_rate": 0.00019270182420760102,
      "loss": 0.3529,
      "step": 619
    },
    {
      "epoch": 0.14928068380184192,
      "grad_norm": 2.5179574489593506,
      "learning_rate": 0.00019267255432499318,
      "loss": 0.5329,
      "step": 620
    },
    {
      "epoch": 0.1495214590982965,
      "grad_norm": 10.619978904724121,
      "learning_rate": 0.0001926432280972635,
      "loss": 0.828,
      "step": 621
    },
    {
      "epoch": 0.1497622343947511,
      "grad_norm": 0.41896963119506836,
      "learning_rate": 0.0001926138455422424,
      "loss": 0.5892,
      "step": 622
    },
    {
      "epoch": 0.15000300969120567,
      "grad_norm": 3.307152032852173,
      "learning_rate": 0.00019258440667779456,
      "loss": 0.9538,
      "step": 623
    },
    {
      "epoch": 0.15024378498766028,
      "grad_norm": 1.9945799112319946,
      "learning_rate": 0.00019255491152181885,
      "loss": 0.3184,
      "step": 624
    },
    {
      "epoch": 0.15048456028411486,
      "grad_norm": 2.7123000621795654,
      "learning_rate": 0.00019252536009224845,
      "loss": 0.5069,
      "step": 625
    },
    {
      "epoch": 0.15072533558056944,
      "grad_norm": 2.0505239963531494,
      "learning_rate": 0.0001924957524070506,
      "loss": 0.6904,
      "step": 626
    },
    {
      "epoch": 0.15096611087702402,
      "grad_norm": 2.483839273452759,
      "learning_rate": 0.00019246608848422691,
      "loss": 1.4015,
      "step": 627
    },
    {
      "epoch": 0.1512068861734786,
      "grad_norm": 3.842451333999634,
      "learning_rate": 0.00019243636834181312,
      "loss": 0.5501,
      "step": 628
    },
    {
      "epoch": 0.1514476614699332,
      "grad_norm": 1.5859034061431885,
      "learning_rate": 0.00019240659199787908,
      "loss": 0.5125,
      "step": 629
    },
    {
      "epoch": 0.15168843676638777,
      "grad_norm": 1.8935115337371826,
      "learning_rate": 0.0001923767594705289,
      "loss": 0.4354,
      "step": 630
    },
    {
      "epoch": 0.15192921206284235,
      "grad_norm": 4.2323384284973145,
      "learning_rate": 0.00019234687077790085,
      "loss": 0.8988,
      "step": 631
    },
    {
      "epoch": 0.15216998735929693,
      "grad_norm": 1.3674668073654175,
      "learning_rate": 0.00019231692593816733,
      "loss": 0.3303,
      "step": 632
    },
    {
      "epoch": 0.1524107626557515,
      "grad_norm": 7.714446544647217,
      "learning_rate": 0.0001922869249695348,
      "loss": 0.2196,
      "step": 633
    },
    {
      "epoch": 0.1526515379522061,
      "grad_norm": 3.0279879570007324,
      "learning_rate": 0.00019225686789024402,
      "loss": 0.6256,
      "step": 634
    },
    {
      "epoch": 0.15289231324866068,
      "grad_norm": 4.163952350616455,
      "learning_rate": 0.0001922267547185697,
      "loss": 0.9441,
      "step": 635
    },
    {
      "epoch": 0.15313308854511526,
      "grad_norm": 1.383583426475525,
      "learning_rate": 0.00019219658547282067,
      "loss": 0.7899,
      "step": 636
    },
    {
      "epoch": 0.15337386384156987,
      "grad_norm": 1.438839077949524,
      "learning_rate": 0.00019216636017133998,
      "loss": 0.4349,
      "step": 637
    },
    {
      "epoch": 0.15361463913802445,
      "grad_norm": 7.890371322631836,
      "learning_rate": 0.00019213607883250466,
      "loss": 1.5545,
      "step": 638
    },
    {
      "epoch": 0.15385541443447903,
      "grad_norm": 6.0160746574401855,
      "learning_rate": 0.0001921057414747258,
      "loss": 1.8333,
      "step": 639
    },
    {
      "epoch": 0.1540961897309336,
      "grad_norm": 1.7680754661560059,
      "learning_rate": 0.00019207534811644864,
      "loss": 0.805,
      "step": 640
    },
    {
      "epoch": 0.1543369650273882,
      "grad_norm": 3.0242257118225098,
      "learning_rate": 0.00019204489877615237,
      "loss": 0.4745,
      "step": 641
    },
    {
      "epoch": 0.15457774032384278,
      "grad_norm": 1.6106970310211182,
      "learning_rate": 0.00019201439347235025,
      "loss": 0.5615,
      "step": 642
    },
    {
      "epoch": 0.15481851562029736,
      "grad_norm": 3.6016252040863037,
      "learning_rate": 0.0001919838322235896,
      "loss": 1.3254,
      "step": 643
    },
    {
      "epoch": 0.15505929091675194,
      "grad_norm": 6.142489433288574,
      "learning_rate": 0.00019195321504845173,
      "loss": 0.5939,
      "step": 644
    },
    {
      "epoch": 0.15530006621320652,
      "grad_norm": 2.9963788986206055,
      "learning_rate": 0.00019192254196555191,
      "loss": 0.8563,
      "step": 645
    },
    {
      "epoch": 0.1555408415096611,
      "grad_norm": 2.010145664215088,
      "learning_rate": 0.00019189181299353946,
      "loss": 0.6641,
      "step": 646
    },
    {
      "epoch": 0.15578161680611569,
      "grad_norm": 3.030747890472412,
      "learning_rate": 0.0001918610281510977,
      "loss": 1.0257,
      "step": 647
    },
    {
      "epoch": 0.15602239210257027,
      "grad_norm": 3.0926742553710938,
      "learning_rate": 0.0001918301874569439,
      "loss": 0.7438,
      "step": 648
    },
    {
      "epoch": 0.15626316739902485,
      "grad_norm": 3.063593864440918,
      "learning_rate": 0.00019179929092982912,
      "loss": 0.6192,
      "step": 649
    },
    {
      "epoch": 0.15650394269547943,
      "grad_norm": 1.6936414241790771,
      "learning_rate": 0.0001917683385885387,
      "loss": 0.3439,
      "step": 650
    },
    {
      "epoch": 0.15674471799193404,
      "grad_norm": 27.274925231933594,
      "learning_rate": 0.0001917373304518917,
      "loss": 0.8737,
      "step": 651
    },
    {
      "epoch": 0.15698549328838862,
      "grad_norm": 2.2580983638763428,
      "learning_rate": 0.000191706266538741,
      "loss": 0.9577,
      "step": 652
    },
    {
      "epoch": 0.1572262685848432,
      "grad_norm": 1.4257384538650513,
      "learning_rate": 0.00019167514686797369,
      "loss": 0.1513,
      "step": 653
    },
    {
      "epoch": 0.15746704388129779,
      "grad_norm": 2.24150013923645,
      "learning_rate": 0.00019164397145851055,
      "loss": 0.6569,
      "step": 654
    },
    {
      "epoch": 0.15770781917775237,
      "grad_norm": 5.1359758377075195,
      "learning_rate": 0.00019161274032930626,
      "loss": 0.9886,
      "step": 655
    },
    {
      "epoch": 0.15794859447420695,
      "grad_norm": 2.413954734802246,
      "learning_rate": 0.00019158145349934945,
      "loss": 0.2666,
      "step": 656
    },
    {
      "epoch": 0.15818936977066153,
      "grad_norm": 0.6739373803138733,
      "learning_rate": 0.00019155011098766255,
      "loss": 0.5449,
      "step": 657
    },
    {
      "epoch": 0.1584301450671161,
      "grad_norm": 0.7366794943809509,
      "learning_rate": 0.00019151871281330193,
      "loss": 0.2757,
      "step": 658
    },
    {
      "epoch": 0.1586709203635707,
      "grad_norm": 2.2127983570098877,
      "learning_rate": 0.00019148725899535774,
      "loss": 0.5392,
      "step": 659
    },
    {
      "epoch": 0.15891169566002528,
      "grad_norm": 1.907882571220398,
      "learning_rate": 0.00019145574955295395,
      "loss": 0.4752,
      "step": 660
    },
    {
      "epoch": 0.15915247095647986,
      "grad_norm": 4.098206520080566,
      "learning_rate": 0.00019142418450524836,
      "loss": 0.9706,
      "step": 661
    },
    {
      "epoch": 0.15939324625293444,
      "grad_norm": 3.782545804977417,
      "learning_rate": 0.00019139256387143262,
      "loss": 1.0815,
      "step": 662
    },
    {
      "epoch": 0.15963402154938902,
      "grad_norm": 2.8690521717071533,
      "learning_rate": 0.00019136088767073215,
      "loss": 1.0296,
      "step": 663
    },
    {
      "epoch": 0.1598747968458436,
      "grad_norm": 6.640118598937988,
      "learning_rate": 0.00019132915592240613,
      "loss": 0.6574,
      "step": 664
    },
    {
      "epoch": 0.1601155721422982,
      "grad_norm": 5.299488067626953,
      "learning_rate": 0.00019129736864574755,
      "loss": 0.9321,
      "step": 665
    },
    {
      "epoch": 0.1603563474387528,
      "grad_norm": 1.4800339937210083,
      "learning_rate": 0.0001912655258600831,
      "loss": 1.0515,
      "step": 666
    },
    {
      "epoch": 0.16059712273520738,
      "grad_norm": 4.096741199493408,
      "learning_rate": 0.00019123362758477334,
      "loss": 0.8097,
      "step": 667
    },
    {
      "epoch": 0.16083789803166196,
      "grad_norm": 1.2806522846221924,
      "learning_rate": 0.00019120167383921243,
      "loss": 0.5217,
      "step": 668
    },
    {
      "epoch": 0.16107867332811654,
      "grad_norm": 2.5771350860595703,
      "learning_rate": 0.0001911696646428284,
      "loss": 0.725,
      "step": 669
    },
    {
      "epoch": 0.16131944862457112,
      "grad_norm": 2.9327738285064697,
      "learning_rate": 0.0001911376000150828,
      "loss": 0.7475,
      "step": 670
    },
    {
      "epoch": 0.1615602239210257,
      "grad_norm": 3.3815646171569824,
      "learning_rate": 0.00019110547997547108,
      "loss": 0.935,
      "step": 671
    },
    {
      "epoch": 0.16180099921748028,
      "grad_norm": 7.282792568206787,
      "learning_rate": 0.00019107330454352228,
      "loss": 1.0584,
      "step": 672
    },
    {
      "epoch": 0.16204177451393487,
      "grad_norm": 12.47275447845459,
      "learning_rate": 0.00019104107373879909,
      "loss": 0.6211,
      "step": 673
    },
    {
      "epoch": 0.16228254981038945,
      "grad_norm": 1.406531572341919,
      "learning_rate": 0.00019100878758089798,
      "loss": 0.5329,
      "step": 674
    },
    {
      "epoch": 0.16252332510684403,
      "grad_norm": 2.693037748336792,
      "learning_rate": 0.00019097644608944897,
      "loss": 0.6528,
      "step": 675
    },
    {
      "epoch": 0.1627641004032986,
      "grad_norm": 0.5329806804656982,
      "learning_rate": 0.0001909440492841158,
      "loss": 0.4698,
      "step": 676
    },
    {
      "epoch": 0.1630048756997532,
      "grad_norm": 3.925929069519043,
      "learning_rate": 0.0001909115971845957,
      "loss": 0.6919,
      "step": 677
    },
    {
      "epoch": 0.1632456509962078,
      "grad_norm": 9.350509643554688,
      "learning_rate": 0.00019087908981061972,
      "loss": 1.1159,
      "step": 678
    },
    {
      "epoch": 0.16348642629266238,
      "grad_norm": 6.900551795959473,
      "learning_rate": 0.00019084652718195238,
      "loss": 0.5557,
      "step": 679
    },
    {
      "epoch": 0.16372720158911697,
      "grad_norm": 1.4014828205108643,
      "learning_rate": 0.00019081390931839181,
      "loss": 0.997,
      "step": 680
    },
    {
      "epoch": 0.16396797688557155,
      "grad_norm": 7.637568950653076,
      "learning_rate": 0.0001907812362397698,
      "loss": 1.3175,
      "step": 681
    },
    {
      "epoch": 0.16420875218202613,
      "grad_norm": 1.3787779808044434,
      "learning_rate": 0.00019074850796595163,
      "loss": 0.4951,
      "step": 682
    },
    {
      "epoch": 0.1644495274784807,
      "grad_norm": 3.6682255268096924,
      "learning_rate": 0.00019071572451683614,
      "loss": 1.0832,
      "step": 683
    },
    {
      "epoch": 0.1646903027749353,
      "grad_norm": 25.37391471862793,
      "learning_rate": 0.00019068288591235578,
      "loss": 0.6875,
      "step": 684
    },
    {
      "epoch": 0.16493107807138987,
      "grad_norm": 2.674971580505371,
      "learning_rate": 0.00019064999217247643,
      "loss": 0.9103,
      "step": 685
    },
    {
      "epoch": 0.16517185336784446,
      "grad_norm": 3.5297887325286865,
      "learning_rate": 0.00019061704331719764,
      "loss": 0.8173,
      "step": 686
    },
    {
      "epoch": 0.16541262866429904,
      "grad_norm": 1.2813355922698975,
      "learning_rate": 0.00019058403936655233,
      "loss": 0.3151,
      "step": 687
    },
    {
      "epoch": 0.16565340396075362,
      "grad_norm": 3.667281150817871,
      "learning_rate": 0.000190550980340607,
      "loss": 0.6559,
      "step": 688
    },
    {
      "epoch": 0.1658941792572082,
      "grad_norm": 2.3366219997406006,
      "learning_rate": 0.00019051786625946162,
      "loss": 0.5158,
      "step": 689
    },
    {
      "epoch": 0.16613495455366278,
      "grad_norm": 1.1751844882965088,
      "learning_rate": 0.00019048469714324958,
      "loss": 0.8607,
      "step": 690
    },
    {
      "epoch": 0.16637572985011737,
      "grad_norm": 3.535374164581299,
      "learning_rate": 0.00019045147301213788,
      "loss": 1.228,
      "step": 691
    },
    {
      "epoch": 0.16661650514657197,
      "grad_norm": 4.35559606552124,
      "learning_rate": 0.00019041819388632676,
      "loss": 0.8601,
      "step": 692
    },
    {
      "epoch": 0.16685728044302656,
      "grad_norm": 2.7030580043792725,
      "learning_rate": 0.00019038485978605004,
      "loss": 1.0164,
      "step": 693
    },
    {
      "epoch": 0.16709805573948114,
      "grad_norm": 3.0144922733306885,
      "learning_rate": 0.00019035147073157493,
      "loss": 0.8172,
      "step": 694
    },
    {
      "epoch": 0.16733883103593572,
      "grad_norm": 2.4854543209075928,
      "learning_rate": 0.00019031802674320206,
      "loss": 0.924,
      "step": 695
    },
    {
      "epoch": 0.1675796063323903,
      "grad_norm": 2.9239442348480225,
      "learning_rate": 0.00019028452784126542,
      "loss": 0.796,
      "step": 696
    },
    {
      "epoch": 0.16782038162884488,
      "grad_norm": 3.872009038925171,
      "learning_rate": 0.00019025097404613245,
      "loss": 0.4696,
      "step": 697
    },
    {
      "epoch": 0.16806115692529947,
      "grad_norm": 1.675231695175171,
      "learning_rate": 0.00019021736537820394,
      "loss": 0.4549,
      "step": 698
    },
    {
      "epoch": 0.16830193222175405,
      "grad_norm": 2.725574493408203,
      "learning_rate": 0.000190183701857914,
      "loss": 0.6834,
      "step": 699
    },
    {
      "epoch": 0.16854270751820863,
      "grad_norm": 2.2455711364746094,
      "learning_rate": 0.00019014998350573014,
      "loss": 0.4471,
      "step": 700
    },
    {
      "epoch": 0.1687834828146632,
      "grad_norm": 0.9234648942947388,
      "learning_rate": 0.00019011621034215322,
      "loss": 0.1788,
      "step": 701
    },
    {
      "epoch": 0.1690242581111178,
      "grad_norm": 1.5781611204147339,
      "learning_rate": 0.00019008238238771736,
      "loss": 0.244,
      "step": 702
    },
    {
      "epoch": 0.16926503340757237,
      "grad_norm": 5.697232246398926,
      "learning_rate": 0.00019004849966299005,
      "loss": 0.4329,
      "step": 703
    },
    {
      "epoch": 0.16950580870402696,
      "grad_norm": 4.987598896026611,
      "learning_rate": 0.00019001456218857208,
      "loss": 0.9072,
      "step": 704
    },
    {
      "epoch": 0.16974658400048154,
      "grad_norm": 2.579894781112671,
      "learning_rate": 0.00018998056998509747,
      "loss": 0.717,
      "step": 705
    },
    {
      "epoch": 0.16998735929693615,
      "grad_norm": 3.0871734619140625,
      "learning_rate": 0.00018994652307323363,
      "loss": 0.2763,
      "step": 706
    },
    {
      "epoch": 0.17022813459339073,
      "grad_norm": 2.6915767192840576,
      "learning_rate": 0.00018991242147368105,
      "loss": 0.8432,
      "step": 707
    },
    {
      "epoch": 0.1704689098898453,
      "grad_norm": 4.125692844390869,
      "learning_rate": 0.00018987826520717365,
      "loss": 1.2892,
      "step": 708
    },
    {
      "epoch": 0.1707096851862999,
      "grad_norm": 3.3036179542541504,
      "learning_rate": 0.00018984405429447852,
      "loss": 0.9282,
      "step": 709
    },
    {
      "epoch": 0.17095046048275447,
      "grad_norm": 2.7406651973724365,
      "learning_rate": 0.00018980978875639596,
      "loss": 1.1154,
      "step": 710
    },
    {
      "epoch": 0.17119123577920906,
      "grad_norm": 0.8988383412361145,
      "learning_rate": 0.00018977546861375947,
      "loss": 0.4264,
      "step": 711
    },
    {
      "epoch": 0.17143201107566364,
      "grad_norm": 0.4057740867137909,
      "learning_rate": 0.00018974109388743583,
      "loss": 0.9764,
      "step": 712
    },
    {
      "epoch": 0.17167278637211822,
      "grad_norm": 3.4650371074676514,
      "learning_rate": 0.0001897066645983249,
      "loss": 1.0979,
      "step": 713
    },
    {
      "epoch": 0.1719135616685728,
      "grad_norm": 4.947608947753906,
      "learning_rate": 0.00018967218076735976,
      "loss": 0.7168,
      "step": 714
    },
    {
      "epoch": 0.17215433696502738,
      "grad_norm": 1.033057451248169,
      "learning_rate": 0.0001896376424155067,
      "loss": 0.2137,
      "step": 715
    },
    {
      "epoch": 0.17239511226148196,
      "grad_norm": 5.465882778167725,
      "learning_rate": 0.00018960304956376511,
      "loss": 1.7501,
      "step": 716
    },
    {
      "epoch": 0.17263588755793655,
      "grad_norm": 3.3956429958343506,
      "learning_rate": 0.00018956840223316752,
      "loss": 0.5464,
      "step": 717
    },
    {
      "epoch": 0.17287666285439113,
      "grad_norm": 0.9355387687683105,
      "learning_rate": 0.00018953370044477955,
      "loss": 0.3183,
      "step": 718
    },
    {
      "epoch": 0.1731174381508457,
      "grad_norm": 0.6955990195274353,
      "learning_rate": 0.00018949894421969998,
      "loss": 0.4827,
      "step": 719
    },
    {
      "epoch": 0.17335821344730032,
      "grad_norm": 9.664114952087402,
      "learning_rate": 0.00018946413357906068,
      "loss": 0.8839,
      "step": 720
    },
    {
      "epoch": 0.1735989887437549,
      "grad_norm": 3.0460386276245117,
      "learning_rate": 0.0001894292685440266,
      "loss": 0.4881,
      "step": 721
    },
    {
      "epoch": 0.17383976404020948,
      "grad_norm": 3.0840280055999756,
      "learning_rate": 0.00018939434913579578,
      "loss": 1.0241,
      "step": 722
    },
    {
      "epoch": 0.17408053933666406,
      "grad_norm": 3.3748912811279297,
      "learning_rate": 0.00018935937537559926,
      "loss": 1.2437,
      "step": 723
    },
    {
      "epoch": 0.17432131463311865,
      "grad_norm": 10.365636825561523,
      "learning_rate": 0.00018932434728470118,
      "loss": 0.762,
      "step": 724
    },
    {
      "epoch": 0.17456208992957323,
      "grad_norm": 4.329830169677734,
      "learning_rate": 0.00018928926488439869,
      "loss": 0.7613,
      "step": 725
    },
    {
      "epoch": 0.1748028652260278,
      "grad_norm": 4.144877910614014,
      "learning_rate": 0.00018925412819602202,
      "loss": 1.1638,
      "step": 726
    },
    {
      "epoch": 0.1750436405224824,
      "grad_norm": 1.3736963272094727,
      "learning_rate": 0.00018921893724093428,
      "loss": 0.6176,
      "step": 727
    },
    {
      "epoch": 0.17528441581893697,
      "grad_norm": 0.9337141513824463,
      "learning_rate": 0.0001891836920405317,
      "loss": 0.2855,
      "step": 728
    },
    {
      "epoch": 0.17552519111539155,
      "grad_norm": 5.704214572906494,
      "learning_rate": 0.0001891483926162434,
      "loss": 0.5566,
      "step": 729
    },
    {
      "epoch": 0.17576596641184614,
      "grad_norm": 1.9563344717025757,
      "learning_rate": 0.00018911303898953158,
      "loss": 0.5568,
      "step": 730
    },
    {
      "epoch": 0.17600674170830072,
      "grad_norm": 5.422361850738525,
      "learning_rate": 0.00018907763118189124,
      "loss": 0.7783,
      "step": 731
    },
    {
      "epoch": 0.1762475170047553,
      "grad_norm": 3.7933502197265625,
      "learning_rate": 0.00018904216921485046,
      "loss": 1.178,
      "step": 732
    },
    {
      "epoch": 0.1764882923012099,
      "grad_norm": 2.3435802459716797,
      "learning_rate": 0.00018900665310997018,
      "loss": 0.5904,
      "step": 733
    },
    {
      "epoch": 0.1767290675976645,
      "grad_norm": 6.887885093688965,
      "learning_rate": 0.0001889710828888443,
      "loss": 1.1331,
      "step": 734
    },
    {
      "epoch": 0.17696984289411907,
      "grad_norm": 2.859257221221924,
      "learning_rate": 0.00018893545857309954,
      "loss": 0.8934,
      "step": 735
    },
    {
      "epoch": 0.17721061819057365,
      "grad_norm": 3.1216025352478027,
      "learning_rate": 0.0001888997801843956,
      "loss": 0.604,
      "step": 736
    },
    {
      "epoch": 0.17745139348702824,
      "grad_norm": 2.1345009803771973,
      "learning_rate": 0.00018886404774442502,
      "loss": 1.0628,
      "step": 737
    },
    {
      "epoch": 0.17769216878348282,
      "grad_norm": 3.882951021194458,
      "learning_rate": 0.0001888282612749132,
      "loss": 0.4992,
      "step": 738
    },
    {
      "epoch": 0.1779329440799374,
      "grad_norm": 6.192306041717529,
      "learning_rate": 0.0001887924207976184,
      "loss": 0.7377,
      "step": 739
    },
    {
      "epoch": 0.17817371937639198,
      "grad_norm": 7.351373672485352,
      "learning_rate": 0.00018875652633433166,
      "loss": 1.103,
      "step": 740
    },
    {
      "epoch": 0.17841449467284656,
      "grad_norm": 1.2278997898101807,
      "learning_rate": 0.00018872057790687697,
      "loss": 0.2774,
      "step": 741
    },
    {
      "epoch": 0.17865526996930114,
      "grad_norm": 2.035078525543213,
      "learning_rate": 0.00018868457553711102,
      "loss": 0.3135,
      "step": 742
    },
    {
      "epoch": 0.17889604526575573,
      "grad_norm": 3.5295181274414062,
      "learning_rate": 0.00018864851924692335,
      "loss": 0.8756,
      "step": 743
    },
    {
      "epoch": 0.1791368205622103,
      "grad_norm": 1.8237663507461548,
      "learning_rate": 0.00018861240905823623,
      "loss": 0.986,
      "step": 744
    },
    {
      "epoch": 0.1793775958586649,
      "grad_norm": 4.102538108825684,
      "learning_rate": 0.00018857624499300476,
      "loss": 0.3661,
      "step": 745
    },
    {
      "epoch": 0.17961837115511947,
      "grad_norm": 1.7040005922317505,
      "learning_rate": 0.0001885400270732168,
      "loss": 0.5499,
      "step": 746
    },
    {
      "epoch": 0.17985914645157408,
      "grad_norm": 1.8217339515686035,
      "learning_rate": 0.00018850375532089285,
      "loss": 0.3162,
      "step": 747
    },
    {
      "epoch": 0.18009992174802866,
      "grad_norm": 4.074040412902832,
      "learning_rate": 0.00018846742975808632,
      "loss": 1.4644,
      "step": 748
    },
    {
      "epoch": 0.18034069704448324,
      "grad_norm": 4.6111016273498535,
      "learning_rate": 0.00018843105040688312,
      "loss": 0.7778,
      "step": 749
    },
    {
      "epoch": 0.18058147234093783,
      "grad_norm": 2.9776699542999268,
      "learning_rate": 0.00018839461728940203,
      "loss": 0.7832,
      "step": 750
    },
    {
      "epoch": 0.1808222476373924,
      "grad_norm": 1.9872022867202759,
      "learning_rate": 0.0001883581304277945,
      "loss": 0.9256,
      "step": 751
    },
    {
      "epoch": 0.181063022933847,
      "grad_norm": 2.69476580619812,
      "learning_rate": 0.00018832158984424463,
      "loss": 0.9596,
      "step": 752
    },
    {
      "epoch": 0.18130379823030157,
      "grad_norm": 5.690935134887695,
      "learning_rate": 0.00018828499556096907,
      "loss": 0.9447,
      "step": 753
    },
    {
      "epoch": 0.18154457352675615,
      "grad_norm": 6.152745723724365,
      "learning_rate": 0.00018824834760021737,
      "loss": 1.0374,
      "step": 754
    },
    {
      "epoch": 0.18178534882321074,
      "grad_norm": 0.8274415135383606,
      "learning_rate": 0.00018821164598427145,
      "loss": 0.5589,
      "step": 755
    },
    {
      "epoch": 0.18202612411966532,
      "grad_norm": 0.797907829284668,
      "learning_rate": 0.00018817489073544609,
      "loss": 0.198,
      "step": 756
    },
    {
      "epoch": 0.1822668994161199,
      "grad_norm": 2.9858620166778564,
      "learning_rate": 0.00018813808187608845,
      "loss": 0.8879,
      "step": 757
    },
    {
      "epoch": 0.18250767471257448,
      "grad_norm": 3.2753536701202393,
      "learning_rate": 0.00018810121942857845,
      "loss": 0.9035,
      "step": 758
    },
    {
      "epoch": 0.18274845000902906,
      "grad_norm": 2.3199586868286133,
      "learning_rate": 0.00018806430341532858,
      "loss": 0.3536,
      "step": 759
    },
    {
      "epoch": 0.18298922530548364,
      "grad_norm": 2.436077833175659,
      "learning_rate": 0.0001880273338587838,
      "loss": 0.5789,
      "step": 760
    },
    {
      "epoch": 0.18323000060193825,
      "grad_norm": 4.57729959487915,
      "learning_rate": 0.0001879903107814217,
      "loss": 0.5619,
      "step": 761
    },
    {
      "epoch": 0.18347077589839283,
      "grad_norm": 2.3822367191314697,
      "learning_rate": 0.0001879532342057524,
      "loss": 0.6583,
      "step": 762
    },
    {
      "epoch": 0.18371155119484742,
      "grad_norm": 5.95395565032959,
      "learning_rate": 0.00018791610415431855,
      "loss": 0.9503,
      "step": 763
    },
    {
      "epoch": 0.183952326491302,
      "grad_norm": 10.346938133239746,
      "learning_rate": 0.0001878789206496953,
      "loss": 1.0378,
      "step": 764
    },
    {
      "epoch": 0.18419310178775658,
      "grad_norm": 2.6373162269592285,
      "learning_rate": 0.0001878416837144903,
      "loss": 0.2419,
      "step": 765
    },
    {
      "epoch": 0.18443387708421116,
      "grad_norm": 1.50508451461792,
      "learning_rate": 0.00018780439337134368,
      "loss": 0.5883,
      "step": 766
    },
    {
      "epoch": 0.18467465238066574,
      "grad_norm": 1.039527416229248,
      "learning_rate": 0.0001877670496429281,
      "loss": 0.586,
      "step": 767
    },
    {
      "epoch": 0.18491542767712033,
      "grad_norm": 3.885326862335205,
      "learning_rate": 0.00018772965255194857,
      "loss": 0.9222,
      "step": 768
    },
    {
      "epoch": 0.1851562029735749,
      "grad_norm": 5.3813605308532715,
      "learning_rate": 0.0001876922021211426,
      "loss": 0.7393,
      "step": 769
    },
    {
      "epoch": 0.1853969782700295,
      "grad_norm": 3.15456223487854,
      "learning_rate": 0.0001876546983732802,
      "loss": 0.7792,
      "step": 770
    },
    {
      "epoch": 0.18563775356648407,
      "grad_norm": 3.184206962585449,
      "learning_rate": 0.0001876171413311637,
      "loss": 1.2433,
      "step": 771
    },
    {
      "epoch": 0.18587852886293865,
      "grad_norm": 1.582762598991394,
      "learning_rate": 0.00018757953101762787,
      "loss": 0.5598,
      "step": 772
    },
    {
      "epoch": 0.18611930415939323,
      "grad_norm": 1.884548306465149,
      "learning_rate": 0.00018754186745553985,
      "loss": 0.4477,
      "step": 773
    },
    {
      "epoch": 0.18636007945584784,
      "grad_norm": 5.777435302734375,
      "learning_rate": 0.0001875041506677992,
      "loss": 0.4906,
      "step": 774
    },
    {
      "epoch": 0.18660085475230243,
      "grad_norm": 1.3165128231048584,
      "learning_rate": 0.00018746638067733778,
      "loss": 0.6351,
      "step": 775
    },
    {
      "epoch": 0.186841630048757,
      "grad_norm": 1.5441575050354004,
      "learning_rate": 0.00018742855750711988,
      "loss": 0.7108,
      "step": 776
    },
    {
      "epoch": 0.1870824053452116,
      "grad_norm": 2.326465606689453,
      "learning_rate": 0.00018739068118014198,
      "loss": 0.861,
      "step": 777
    },
    {
      "epoch": 0.18732318064166617,
      "grad_norm": 3.9939534664154053,
      "learning_rate": 0.00018735275171943307,
      "loss": 0.6814,
      "step": 778
    },
    {
      "epoch": 0.18756395593812075,
      "grad_norm": 1.1253992319107056,
      "learning_rate": 0.00018731476914805425,
      "loss": 0.1546,
      "step": 779
    },
    {
      "epoch": 0.18780473123457533,
      "grad_norm": 2.305006980895996,
      "learning_rate": 0.00018727673348909913,
      "loss": 1.0963,
      "step": 780
    },
    {
      "epoch": 0.18804550653102992,
      "grad_norm": 3.0463790893554688,
      "learning_rate": 0.0001872386447656934,
      "loss": 0.734,
      "step": 781
    },
    {
      "epoch": 0.1882862818274845,
      "grad_norm": 2.357088088989258,
      "learning_rate": 0.00018720050300099507,
      "loss": 0.7065,
      "step": 782
    },
    {
      "epoch": 0.18852705712393908,
      "grad_norm": 2.2680745124816895,
      "learning_rate": 0.0001871623082181945,
      "loss": 1.4469,
      "step": 783
    },
    {
      "epoch": 0.18876783242039366,
      "grad_norm": 2.114755392074585,
      "learning_rate": 0.0001871240604405141,
      "loss": 0.7899,
      "step": 784
    },
    {
      "epoch": 0.18900860771684824,
      "grad_norm": 1.0055882930755615,
      "learning_rate": 0.0001870857596912087,
      "loss": 0.1715,
      "step": 785
    },
    {
      "epoch": 0.18924938301330282,
      "grad_norm": 1.9801616668701172,
      "learning_rate": 0.00018704740599356518,
      "loss": 0.5179,
      "step": 786
    },
    {
      "epoch": 0.1894901583097574,
      "grad_norm": 2.5894370079040527,
      "learning_rate": 0.0001870089993709027,
      "loss": 0.4325,
      "step": 787
    },
    {
      "epoch": 0.18973093360621202,
      "grad_norm": 3.895353078842163,
      "learning_rate": 0.00018697053984657256,
      "loss": 0.3835,
      "step": 788
    },
    {
      "epoch": 0.1899717089026666,
      "grad_norm": 1.0935512781143188,
      "learning_rate": 0.00018693202744395827,
      "loss": 1.1042,
      "step": 789
    },
    {
      "epoch": 0.19021248419912118,
      "grad_norm": 1.6422269344329834,
      "learning_rate": 0.0001868934621864754,
      "loss": 0.718,
      "step": 790
    },
    {
      "epoch": 0.19045325949557576,
      "grad_norm": 2.844287633895874,
      "learning_rate": 0.00018685484409757178,
      "loss": 1.2023,
      "step": 791
    },
    {
      "epoch": 0.19069403479203034,
      "grad_norm": 1.130077600479126,
      "learning_rate": 0.00018681617320072725,
      "loss": 0.2922,
      "step": 792
    },
    {
      "epoch": 0.19093481008848492,
      "grad_norm": 2.1571900844573975,
      "learning_rate": 0.0001867774495194538,
      "loss": 0.7212,
      "step": 793
    },
    {
      "epoch": 0.1911755853849395,
      "grad_norm": 6.230739593505859,
      "learning_rate": 0.00018673867307729555,
      "loss": 0.8975,
      "step": 794
    },
    {
      "epoch": 0.1914163606813941,
      "grad_norm": 2.590592622756958,
      "learning_rate": 0.00018669984389782865,
      "loss": 0.3676,
      "step": 795
    },
    {
      "epoch": 0.19165713597784867,
      "grad_norm": 8.08610725402832,
      "learning_rate": 0.00018666096200466132,
      "loss": 0.7873,
      "step": 796
    },
    {
      "epoch": 0.19189791127430325,
      "grad_norm": 1.4064202308654785,
      "learning_rate": 0.00018662202742143383,
      "loss": 0.5145,
      "step": 797
    },
    {
      "epoch": 0.19213868657075783,
      "grad_norm": 1.37117338180542,
      "learning_rate": 0.0001865830401718185,
      "loss": 0.8417,
      "step": 798
    },
    {
      "epoch": 0.19237946186721241,
      "grad_norm": 2.1927073001861572,
      "learning_rate": 0.00018654400027951967,
      "loss": 0.9088,
      "step": 799
    },
    {
      "epoch": 0.192620237163667,
      "grad_norm": 2.8337302207946777,
      "learning_rate": 0.0001865049077682737,
      "loss": 0.5877,
      "step": 800
    },
    {
      "epoch": 0.19286101246012158,
      "grad_norm": 6.606812000274658,
      "learning_rate": 0.00018646576266184893,
      "loss": 0.9887,
      "step": 801
    },
    {
      "epoch": 0.1931017877565762,
      "grad_norm": 2.9909074306488037,
      "learning_rate": 0.00018642656498404564,
      "loss": 0.5693,
      "step": 802
    },
    {
      "epoch": 0.19334256305303077,
      "grad_norm": 0.7477906346321106,
      "learning_rate": 0.0001863873147586961,
      "loss": 0.2322,
      "step": 803
    },
    {
      "epoch": 0.19358333834948535,
      "grad_norm": 2.028005599975586,
      "learning_rate": 0.00018634801200966453,
      "loss": 0.3557,
      "step": 804
    },
    {
      "epoch": 0.19382411364593993,
      "grad_norm": 5.629332065582275,
      "learning_rate": 0.00018630865676084714,
      "loss": 0.6842,
      "step": 805
    },
    {
      "epoch": 0.19406488894239451,
      "grad_norm": 0.9226589202880859,
      "learning_rate": 0.000186269249036172,
      "loss": 0.2885,
      "step": 806
    },
    {
      "epoch": 0.1943056642388491,
      "grad_norm": 1.8051038980484009,
      "learning_rate": 0.00018622978885959906,
      "loss": 0.8416,
      "step": 807
    },
    {
      "epoch": 0.19454643953530368,
      "grad_norm": 4.140893936157227,
      "learning_rate": 0.0001861902762551202,
      "loss": 1.0417,
      "step": 808
    },
    {
      "epoch": 0.19478721483175826,
      "grad_norm": 7.981260776519775,
      "learning_rate": 0.0001861507112467592,
      "loss": 0.525,
      "step": 809
    },
    {
      "epoch": 0.19502799012821284,
      "grad_norm": 5.369372367858887,
      "learning_rate": 0.0001861110938585717,
      "loss": 0.5619,
      "step": 810
    },
    {
      "epoch": 0.19526876542466742,
      "grad_norm": 1.8795945644378662,
      "learning_rate": 0.0001860714241146451,
      "loss": 1.0825,
      "step": 811
    },
    {
      "epoch": 0.195509540721122,
      "grad_norm": 3.486668586730957,
      "learning_rate": 0.0001860317020390987,
      "loss": 0.3657,
      "step": 812
    },
    {
      "epoch": 0.1957503160175766,
      "grad_norm": 1.3779692649841309,
      "learning_rate": 0.00018599192765608364,
      "loss": 0.9127,
      "step": 813
    },
    {
      "epoch": 0.19599109131403117,
      "grad_norm": 2.563727617263794,
      "learning_rate": 0.00018595210098978283,
      "loss": 0.5109,
      "step": 814
    },
    {
      "epoch": 0.19623186661048578,
      "grad_norm": 0.7977485656738281,
      "learning_rate": 0.00018591222206441096,
      "loss": 0.5252,
      "step": 815
    },
    {
      "epoch": 0.19647264190694036,
      "grad_norm": 4.5069475173950195,
      "learning_rate": 0.0001858722909042145,
      "loss": 0.3426,
      "step": 816
    },
    {
      "epoch": 0.19671341720339494,
      "grad_norm": 6.430407524108887,
      "learning_rate": 0.00018583230753347173,
      "loss": 0.9264,
      "step": 817
    },
    {
      "epoch": 0.19695419249984952,
      "grad_norm": 2.3652713298797607,
      "learning_rate": 0.00018579227197649257,
      "loss": 0.6739,
      "step": 818
    },
    {
      "epoch": 0.1971949677963041,
      "grad_norm": 2.2648465633392334,
      "learning_rate": 0.00018575218425761876,
      "loss": 0.3986,
      "step": 819
    },
    {
      "epoch": 0.1974357430927587,
      "grad_norm": 2.1836869716644287,
      "learning_rate": 0.0001857120444012237,
      "loss": 0.2466,
      "step": 820
    },
    {
      "epoch": 0.19767651838921327,
      "grad_norm": 1.898180603981018,
      "learning_rate": 0.00018567185243171256,
      "loss": 0.5558,
      "step": 821
    },
    {
      "epoch": 0.19791729368566785,
      "grad_norm": 0.8913256525993347,
      "learning_rate": 0.00018563160837352212,
      "loss": 0.6096,
      "step": 822
    },
    {
      "epoch": 0.19815806898212243,
      "grad_norm": 3.458024024963379,
      "learning_rate": 0.00018559131225112085,
      "loss": 0.7502,
      "step": 823
    },
    {
      "epoch": 0.198398844278577,
      "grad_norm": 3.377265691757202,
      "learning_rate": 0.00018555096408900889,
      "loss": 0.9659,
      "step": 824
    },
    {
      "epoch": 0.1986396195750316,
      "grad_norm": 5.404399394989014,
      "learning_rate": 0.00018551056391171803,
      "loss": 0.8436,
      "step": 825
    },
    {
      "epoch": 0.19888039487148618,
      "grad_norm": 2.176090717315674,
      "learning_rate": 0.00018547011174381163,
      "loss": 0.6543,
      "step": 826
    },
    {
      "epoch": 0.19912117016794076,
      "grad_norm": 1.4764220714569092,
      "learning_rate": 0.00018542960760988475,
      "loss": 0.4371,
      "step": 827
    },
    {
      "epoch": 0.19936194546439534,
      "grad_norm": 4.111733913421631,
      "learning_rate": 0.00018538905153456394,
      "loss": 0.7307,
      "step": 828
    },
    {
      "epoch": 0.19960272076084995,
      "grad_norm": 3.4664177894592285,
      "learning_rate": 0.0001853484435425074,
      "loss": 0.8896,
      "step": 829
    },
    {
      "epoch": 0.19984349605730453,
      "grad_norm": 1.9064959287643433,
      "learning_rate": 0.00018530778365840497,
      "loss": 0.5491,
      "step": 830
    },
    {
      "epoch": 0.2000842713537591,
      "grad_norm": 1.8238356113433838,
      "learning_rate": 0.00018526707190697782,
      "loss": 0.564,
      "step": 831
    },
    {
      "epoch": 0.2003250466502137,
      "grad_norm": 1.4021512269973755,
      "learning_rate": 0.00018522630831297886,
      "loss": 0.2522,
      "step": 832
    },
    {
      "epoch": 0.20056582194666828,
      "grad_norm": 1.9710665941238403,
      "learning_rate": 0.0001851854929011924,
      "loss": 0.2168,
      "step": 833
    },
    {
      "epoch": 0.20080659724312286,
      "grad_norm": 1.932867407798767,
      "learning_rate": 0.00018514462569643435,
      "loss": 0.5669,
      "step": 834
    },
    {
      "epoch": 0.20104737253957744,
      "grad_norm": 1.412558674812317,
      "learning_rate": 0.00018510370672355204,
      "loss": 0.5655,
      "step": 835
    },
    {
      "epoch": 0.20128814783603202,
      "grad_norm": 5.750187873840332,
      "learning_rate": 0.00018506273600742433,
      "loss": 0.8122,
      "step": 836
    },
    {
      "epoch": 0.2015289231324866,
      "grad_norm": 4.016916275024414,
      "learning_rate": 0.00018502171357296144,
      "loss": 0.5478,
      "step": 837
    },
    {
      "epoch": 0.20176969842894119,
      "grad_norm": 1.5730372667312622,
      "learning_rate": 0.00018498063944510516,
      "loss": 0.3524,
      "step": 838
    },
    {
      "epoch": 0.20201047372539577,
      "grad_norm": 1.1213641166687012,
      "learning_rate": 0.0001849395136488286,
      "loss": 0.386,
      "step": 839
    },
    {
      "epoch": 0.20225124902185035,
      "grad_norm": 1.455862045288086,
      "learning_rate": 0.00018489833620913642,
      "loss": 0.2709,
      "step": 840
    },
    {
      "epoch": 0.20249202431830493,
      "grad_norm": 3.3921029567718506,
      "learning_rate": 0.0001848571071510645,
      "loss": 0.2738,
      "step": 841
    },
    {
      "epoch": 0.2027327996147595,
      "grad_norm": 1.9654597043991089,
      "learning_rate": 0.00018481582649968028,
      "loss": 0.5441,
      "step": 842
    },
    {
      "epoch": 0.20297357491121412,
      "grad_norm": 8.712904930114746,
      "learning_rate": 0.00018477449428008246,
      "loss": 0.5047,
      "step": 843
    },
    {
      "epoch": 0.2032143502076687,
      "grad_norm": 4.064781665802002,
      "learning_rate": 0.0001847331105174011,
      "loss": 0.6401,
      "step": 844
    },
    {
      "epoch": 0.20345512550412329,
      "grad_norm": 10.879172325134277,
      "learning_rate": 0.0001846916752367976,
      "loss": 0.7271,
      "step": 845
    },
    {
      "epoch": 0.20369590080057787,
      "grad_norm": 1.46236252784729,
      "learning_rate": 0.00018465018846346482,
      "loss": 0.3446,
      "step": 846
    },
    {
      "epoch": 0.20393667609703245,
      "grad_norm": 1.9737117290496826,
      "learning_rate": 0.0001846086502226267,
      "loss": 0.5821,
      "step": 847
    },
    {
      "epoch": 0.20417745139348703,
      "grad_norm": 2.094733715057373,
      "learning_rate": 0.00018456706053953862,
      "loss": 0.2923,
      "step": 848
    },
    {
      "epoch": 0.2044182266899416,
      "grad_norm": 1.962471842765808,
      "learning_rate": 0.0001845254194394872,
      "loss": 0.756,
      "step": 849
    },
    {
      "epoch": 0.2046590019863962,
      "grad_norm": 3.4438953399658203,
      "learning_rate": 0.00018448372694779034,
      "loss": 0.4609,
      "step": 850
    },
    {
      "epoch": 0.20489977728285078,
      "grad_norm": 1.1954097747802734,
      "learning_rate": 0.00018444198308979713,
      "loss": 0.6803,
      "step": 851
    },
    {
      "epoch": 0.20514055257930536,
      "grad_norm": 2.8534281253814697,
      "learning_rate": 0.00018440018789088794,
      "loss": 0.8631,
      "step": 852
    },
    {
      "epoch": 0.20538132787575994,
      "grad_norm": 0.7627564072608948,
      "learning_rate": 0.0001843583413764744,
      "loss": 0.3575,
      "step": 853
    },
    {
      "epoch": 0.20562210317221452,
      "grad_norm": 2.954674482345581,
      "learning_rate": 0.0001843164435719992,
      "loss": 1.2672,
      "step": 854
    },
    {
      "epoch": 0.2058628784686691,
      "grad_norm": 3.019871950149536,
      "learning_rate": 0.00018427449450293635,
      "loss": 0.5769,
      "step": 855
    },
    {
      "epoch": 0.2061036537651237,
      "grad_norm": 1.2849375009536743,
      "learning_rate": 0.00018423249419479099,
      "loss": 1.0092,
      "step": 856
    },
    {
      "epoch": 0.2063444290615783,
      "grad_norm": 2.783853054046631,
      "learning_rate": 0.00018419044267309939,
      "loss": 0.6801,
      "step": 857
    },
    {
      "epoch": 0.20658520435803288,
      "grad_norm": 3.1100003719329834,
      "learning_rate": 0.0001841483399634289,
      "loss": 1.2878,
      "step": 858
    },
    {
      "epoch": 0.20682597965448746,
      "grad_norm": 1.7785344123840332,
      "learning_rate": 0.00018410618609137816,
      "loss": 0.5104,
      "step": 859
    },
    {
      "epoch": 0.20706675495094204,
      "grad_norm": 1.5101239681243896,
      "learning_rate": 0.0001840639810825768,
      "loss": 0.6032,
      "step": 860
    },
    {
      "epoch": 0.20730753024739662,
      "grad_norm": 4.038559913635254,
      "learning_rate": 0.00018402172496268554,
      "loss": 0.6457,
      "step": 861
    },
    {
      "epoch": 0.2075483055438512,
      "grad_norm": 8.409773826599121,
      "learning_rate": 0.0001839794177573962,
      "loss": 1.5939,
      "step": 862
    },
    {
      "epoch": 0.20778908084030578,
      "grad_norm": 2.086423635482788,
      "learning_rate": 0.00018393705949243164,
      "loss": 0.7663,
      "step": 863
    },
    {
      "epoch": 0.20802985613676037,
      "grad_norm": 4.5612945556640625,
      "learning_rate": 0.00018389465019354577,
      "loss": 0.5459,
      "step": 864
    },
    {
      "epoch": 0.20827063143321495,
      "grad_norm": 1.9495208263397217,
      "learning_rate": 0.0001838521898865236,
      "loss": 0.1955,
      "step": 865
    },
    {
      "epoch": 0.20851140672966953,
      "grad_norm": 2.232084035873413,
      "learning_rate": 0.00018380967859718105,
      "loss": 0.798,
      "step": 866
    },
    {
      "epoch": 0.2087521820261241,
      "grad_norm": 5.387617111206055,
      "learning_rate": 0.0001837671163513651,
      "loss": 0.7414,
      "step": 867
    },
    {
      "epoch": 0.2089929573225787,
      "grad_norm": 7.861992359161377,
      "learning_rate": 0.00018372450317495365,
      "loss": 0.9128,
      "step": 868
    },
    {
      "epoch": 0.20923373261903327,
      "grad_norm": 2.3675897121429443,
      "learning_rate": 0.00018368183909385567,
      "loss": 0.6167,
      "step": 869
    },
    {
      "epoch": 0.20947450791548788,
      "grad_norm": 3.206550121307373,
      "learning_rate": 0.00018363912413401097,
      "loss": 0.918,
      "step": 870
    },
    {
      "epoch": 0.20971528321194247,
      "grad_norm": 11.829947471618652,
      "learning_rate": 0.00018359635832139034,
      "loss": 1.1065,
      "step": 871
    },
    {
      "epoch": 0.20995605850839705,
      "grad_norm": 2.4670798778533936,
      "learning_rate": 0.00018355354168199552,
      "loss": 0.52,
      "step": 872
    },
    {
      "epoch": 0.21019683380485163,
      "grad_norm": 2.387666702270508,
      "learning_rate": 0.00018351067424185913,
      "loss": 0.3961,
      "step": 873
    },
    {
      "epoch": 0.2104376091013062,
      "grad_norm": 0.41803881525993347,
      "learning_rate": 0.00018346775602704464,
      "loss": 0.1675,
      "step": 874
    },
    {
      "epoch": 0.2106783843977608,
      "grad_norm": 5.301272869110107,
      "learning_rate": 0.0001834247870636464,
      "loss": 1.194,
      "step": 875
    },
    {
      "epoch": 0.21091915969421537,
      "grad_norm": 2.4999866485595703,
      "learning_rate": 0.0001833817673777897,
      "loss": 0.1707,
      "step": 876
    },
    {
      "epoch": 0.21115993499066996,
      "grad_norm": 1.3982088565826416,
      "learning_rate": 0.00018333869699563055,
      "loss": 1.0266,
      "step": 877
    },
    {
      "epoch": 0.21140071028712454,
      "grad_norm": 3.187394380569458,
      "learning_rate": 0.00018329557594335585,
      "loss": 1.0817,
      "step": 878
    },
    {
      "epoch": 0.21164148558357912,
      "grad_norm": 3.2300422191619873,
      "learning_rate": 0.00018325240424718335,
      "loss": 0.6478,
      "step": 879
    },
    {
      "epoch": 0.2118822608800337,
      "grad_norm": 3.521116018295288,
      "learning_rate": 0.00018320918193336148,
      "loss": 0.8387,
      "step": 880
    },
    {
      "epoch": 0.21212303617648828,
      "grad_norm": 9.480287551879883,
      "learning_rate": 0.00018316590902816952,
      "loss": 0.9253,
      "step": 881
    },
    {
      "epoch": 0.21236381147294286,
      "grad_norm": 2.395949602127075,
      "learning_rate": 0.0001831225855579175,
      "loss": 0.8792,
      "step": 882
    },
    {
      "epoch": 0.21260458676939745,
      "grad_norm": 1.681579351425171,
      "learning_rate": 0.0001830792115489462,
      "loss": 0.9965,
      "step": 883
    },
    {
      "epoch": 0.21284536206585206,
      "grad_norm": 1.3200875520706177,
      "learning_rate": 0.00018303578702762705,
      "loss": 0.2478,
      "step": 884
    },
    {
      "epoch": 0.21308613736230664,
      "grad_norm": 2.904762029647827,
      "learning_rate": 0.00018299231202036233,
      "loss": 0.4818,
      "step": 885
    },
    {
      "epoch": 0.21332691265876122,
      "grad_norm": 2.1330971717834473,
      "learning_rate": 0.00018294878655358493,
      "loss": 0.1786,
      "step": 886
    },
    {
      "epoch": 0.2135676879552158,
      "grad_norm": 4.824681758880615,
      "learning_rate": 0.0001829052106537584,
      "loss": 0.8048,
      "step": 887
    },
    {
      "epoch": 0.21380846325167038,
      "grad_norm": 2.336089849472046,
      "learning_rate": 0.000182861584347377,
      "loss": 0.7041,
      "step": 888
    },
    {
      "epoch": 0.21404923854812496,
      "grad_norm": 2.5671005249023438,
      "learning_rate": 0.00018281790766096564,
      "loss": 0.6426,
      "step": 889
    },
    {
      "epoch": 0.21429001384457955,
      "grad_norm": 18.460041046142578,
      "learning_rate": 0.00018277418062107986,
      "loss": 0.9763,
      "step": 890
    },
    {
      "epoch": 0.21453078914103413,
      "grad_norm": 2.5273513793945312,
      "learning_rate": 0.00018273040325430574,
      "loss": 0.5831,
      "step": 891
    },
    {
      "epoch": 0.2147715644374887,
      "grad_norm": 1.38306725025177,
      "learning_rate": 0.00018268657558726003,
      "loss": 0.8044,
      "step": 892
    },
    {
      "epoch": 0.2150123397339433,
      "grad_norm": 1.9609812498092651,
      "learning_rate": 0.00018264269764659013,
      "loss": 0.3049,
      "step": 893
    },
    {
      "epoch": 0.21525311503039787,
      "grad_norm": 4.538389205932617,
      "learning_rate": 0.0001825987694589738,
      "loss": 0.8865,
      "step": 894
    },
    {
      "epoch": 0.21549389032685246,
      "grad_norm": 2.368454933166504,
      "learning_rate": 0.00018255479105111957,
      "loss": 1.0822,
      "step": 895
    },
    {
      "epoch": 0.21573466562330704,
      "grad_norm": 4.19332218170166,
      "learning_rate": 0.00018251076244976637,
      "loss": 1.0274,
      "step": 896
    },
    {
      "epoch": 0.21597544091976162,
      "grad_norm": 0.907124400138855,
      "learning_rate": 0.00018246668368168372,
      "loss": 0.5454,
      "step": 897
    },
    {
      "epoch": 0.21621621621621623,
      "grad_norm": 2.2195355892181396,
      "learning_rate": 0.0001824225547736716,
      "loss": 0.4168,
      "step": 898
    },
    {
      "epoch": 0.2164569915126708,
      "grad_norm": 4.278376579284668,
      "learning_rate": 0.00018237837575256044,
      "loss": 0.6395,
      "step": 899
    },
    {
      "epoch": 0.2166977668091254,
      "grad_norm": 3.1869797706604004,
      "learning_rate": 0.00018233414664521123,
      "loss": 0.9863,
      "step": 900
    },
    {
      "epoch": 0.21693854210557997,
      "grad_norm": 1.9933998584747314,
      "learning_rate": 0.00018228986747851537,
      "loss": 0.6143,
      "step": 901
    },
    {
      "epoch": 0.21717931740203456,
      "grad_norm": 1.5613797903060913,
      "learning_rate": 0.00018224553827939468,
      "loss": 0.4492,
      "step": 902
    },
    {
      "epoch": 0.21742009269848914,
      "grad_norm": 2.306579351425171,
      "learning_rate": 0.00018220115907480143,
      "loss": 0.5864,
      "step": 903
    },
    {
      "epoch": 0.21766086799494372,
      "grad_norm": 3.8171541690826416,
      "learning_rate": 0.00018215672989171824,
      "loss": 0.8157,
      "step": 904
    },
    {
      "epoch": 0.2179016432913983,
      "grad_norm": 1.4388493299484253,
      "learning_rate": 0.00018211225075715816,
      "loss": 0.8506,
      "step": 905
    },
    {
      "epoch": 0.21814241858785288,
      "grad_norm": 1.82477867603302,
      "learning_rate": 0.00018206772169816467,
      "loss": 0.7865,
      "step": 906
    },
    {
      "epoch": 0.21838319388430746,
      "grad_norm": 3.2749521732330322,
      "learning_rate": 0.00018202314274181144,
      "loss": 1.3825,
      "step": 907
    },
    {
      "epoch": 0.21862396918076205,
      "grad_norm": 1.8761945962905884,
      "learning_rate": 0.00018197851391520264,
      "loss": 0.8722,
      "step": 908
    },
    {
      "epoch": 0.21886474447721663,
      "grad_norm": 1.6125880479812622,
      "learning_rate": 0.0001819338352454727,
      "loss": 0.5524,
      "step": 909
    },
    {
      "epoch": 0.2191055197736712,
      "grad_norm": 1.2524000406265259,
      "learning_rate": 0.0001818891067597863,
      "loss": 0.8105,
      "step": 910
    },
    {
      "epoch": 0.21934629507012582,
      "grad_norm": 3.3656504154205322,
      "learning_rate": 0.0001818443284853385,
      "loss": 1.6029,
      "step": 911
    },
    {
      "epoch": 0.2195870703665804,
      "grad_norm": 1.9755463600158691,
      "learning_rate": 0.00018179950044935458,
      "loss": 0.401,
      "step": 912
    },
    {
      "epoch": 0.21982784566303498,
      "grad_norm": 3.240755081176758,
      "learning_rate": 0.0001817546226790901,
      "loss": 1.0564,
      "step": 913
    },
    {
      "epoch": 0.22006862095948956,
      "grad_norm": 5.947300910949707,
      "learning_rate": 0.00018170969520183084,
      "loss": 0.4548,
      "step": 914
    },
    {
      "epoch": 0.22030939625594415,
      "grad_norm": 3.0205721855163574,
      "learning_rate": 0.0001816647180448928,
      "loss": 0.8396,
      "step": 915
    },
    {
      "epoch": 0.22055017155239873,
      "grad_norm": 1.6607885360717773,
      "learning_rate": 0.0001816196912356222,
      "loss": 1.1035,
      "step": 916
    },
    {
      "epoch": 0.2207909468488533,
      "grad_norm": 1.3007737398147583,
      "learning_rate": 0.0001815746148013954,
      "loss": 0.1121,
      "step": 917
    },
    {
      "epoch": 0.2210317221453079,
      "grad_norm": 2.658994674682617,
      "learning_rate": 0.00018152948876961906,
      "loss": 0.3838,
      "step": 918
    },
    {
      "epoch": 0.22127249744176247,
      "grad_norm": 1.1010584831237793,
      "learning_rate": 0.00018148431316772983,
      "loss": 0.1575,
      "step": 919
    },
    {
      "epoch": 0.22151327273821705,
      "grad_norm": 4.701428413391113,
      "learning_rate": 0.0001814390880231946,
      "loss": 0.4306,
      "step": 920
    },
    {
      "epoch": 0.22175404803467164,
      "grad_norm": 3.2852671146392822,
      "learning_rate": 0.0001813938133635104,
      "loss": 0.2974,
      "step": 921
    },
    {
      "epoch": 0.22199482333112622,
      "grad_norm": 3.092611312866211,
      "learning_rate": 0.0001813484892162043,
      "loss": 0.7887,
      "step": 922
    },
    {
      "epoch": 0.2222355986275808,
      "grad_norm": 2.474486827850342,
      "learning_rate": 0.00018130311560883344,
      "loss": 0.7599,
      "step": 923
    },
    {
      "epoch": 0.22247637392403538,
      "grad_norm": 5.097280025482178,
      "learning_rate": 0.00018125769256898511,
      "loss": 0.6548,
      "step": 924
    },
    {
      "epoch": 0.22271714922049,
      "grad_norm": 3.1248862743377686,
      "learning_rate": 0.00018121222012427665,
      "loss": 1.0051,
      "step": 925
    },
    {
      "epoch": 0.22295792451694457,
      "grad_norm": 4.130378723144531,
      "learning_rate": 0.00018116669830235536,
      "loss": 0.8515,
      "step": 926
    },
    {
      "epoch": 0.22319869981339915,
      "grad_norm": 3.8639516830444336,
      "learning_rate": 0.00018112112713089863,
      "loss": 0.3418,
      "step": 927
    },
    {
      "epoch": 0.22343947510985374,
      "grad_norm": 5.733872890472412,
      "learning_rate": 0.00018107550663761386,
      "loss": 0.4249,
      "step": 928
    },
    {
      "epoch": 0.22368025040630832,
      "grad_norm": 2.717703104019165,
      "learning_rate": 0.0001810298368502384,
      "loss": 0.3455,
      "step": 929
    },
    {
      "epoch": 0.2239210257027629,
      "grad_norm": 4.0550689697265625,
      "learning_rate": 0.00018098411779653953,
      "loss": 0.6515,
      "step": 930
    },
    {
      "epoch": 0.22416180099921748,
      "grad_norm": 1.4261348247528076,
      "learning_rate": 0.00018093834950431458,
      "loss": 0.7618,
      "step": 931
    },
    {
      "epoch": 0.22440257629567206,
      "grad_norm": 1.7245268821716309,
      "learning_rate": 0.0001808925320013908,
      "loss": 0.7967,
      "step": 932
    },
    {
      "epoch": 0.22464335159212664,
      "grad_norm": 4.139218807220459,
      "learning_rate": 0.0001808466653156253,
      "loss": 0.7014,
      "step": 933
    },
    {
      "epoch": 0.22488412688858123,
      "grad_norm": 2.1172738075256348,
      "learning_rate": 0.00018080074947490516,
      "loss": 0.4765,
      "step": 934
    },
    {
      "epoch": 0.2251249021850358,
      "grad_norm": 4.761689186096191,
      "learning_rate": 0.00018075478450714724,
      "loss": 0.699,
      "step": 935
    },
    {
      "epoch": 0.2253656774814904,
      "grad_norm": 2.6363584995269775,
      "learning_rate": 0.00018070877044029846,
      "loss": 0.8263,
      "step": 936
    },
    {
      "epoch": 0.22560645277794497,
      "grad_norm": 1.930909276008606,
      "learning_rate": 0.00018066270730233538,
      "loss": 0.6952,
      "step": 937
    },
    {
      "epoch": 0.22584722807439955,
      "grad_norm": 0.8242762684822083,
      "learning_rate": 0.00018061659512126453,
      "loss": 0.5675,
      "step": 938
    },
    {
      "epoch": 0.22608800337085416,
      "grad_norm": 1.3294146060943604,
      "learning_rate": 0.0001805704339251222,
      "loss": 0.5123,
      "step": 939
    },
    {
      "epoch": 0.22632877866730874,
      "grad_norm": 0.8458835482597351,
      "learning_rate": 0.00018052422374197454,
      "loss": 0.2988,
      "step": 940
    },
    {
      "epoch": 0.22656955396376333,
      "grad_norm": 1.0856271982192993,
      "learning_rate": 0.00018047796459991742,
      "loss": 0.7522,
      "step": 941
    },
    {
      "epoch": 0.2268103292602179,
      "grad_norm": 5.306552410125732,
      "learning_rate": 0.00018043165652707649,
      "loss": 0.7063,
      "step": 942
    },
    {
      "epoch": 0.2270511045566725,
      "grad_norm": 5.354522228240967,
      "learning_rate": 0.00018038529955160718,
      "loss": 0.7462,
      "step": 943
    },
    {
      "epoch": 0.22729187985312707,
      "grad_norm": 1.556826114654541,
      "learning_rate": 0.00018033889370169465,
      "loss": 0.7949,
      "step": 944
    },
    {
      "epoch": 0.22753265514958165,
      "grad_norm": 0.9913277626037598,
      "learning_rate": 0.00018029243900555373,
      "loss": 0.5612,
      "step": 945
    },
    {
      "epoch": 0.22777343044603623,
      "grad_norm": 1.7368444204330444,
      "learning_rate": 0.000180245935491429,
      "loss": 0.3213,
      "step": 946
    },
    {
      "epoch": 0.22801420574249082,
      "grad_norm": 2.660506010055542,
      "learning_rate": 0.0001801993831875947,
      "loss": 0.39,
      "step": 947
    },
    {
      "epoch": 0.2282549810389454,
      "grad_norm": 3.1855568885803223,
      "learning_rate": 0.0001801527821223547,
      "loss": 0.4602,
      "step": 948
    },
    {
      "epoch": 0.22849575633539998,
      "grad_norm": 2.8115875720977783,
      "learning_rate": 0.0001801061323240426,
      "loss": 0.3065,
      "step": 949
    },
    {
      "epoch": 0.22873653163185456,
      "grad_norm": 18.071075439453125,
      "learning_rate": 0.00018005943382102158,
      "loss": 0.8023,
      "step": 950
    },
    {
      "epoch": 0.22897730692830914,
      "grad_norm": 1.1732177734375,
      "learning_rate": 0.00018001268664168439,
      "loss": 0.8773,
      "step": 951
    },
    {
      "epoch": 0.22921808222476375,
      "grad_norm": 2.2807600498199463,
      "learning_rate": 0.00017996589081445348,
      "loss": 0.7107,
      "step": 952
    },
    {
      "epoch": 0.22945885752121833,
      "grad_norm": 2.0999910831451416,
      "learning_rate": 0.00017991904636778077,
      "loss": 0.6253,
      "step": 953
    },
    {
      "epoch": 0.22969963281767292,
      "grad_norm": 13.20639419555664,
      "learning_rate": 0.00017987215333014782,
      "loss": 0.9696,
      "step": 954
    },
    {
      "epoch": 0.2299404081141275,
      "grad_norm": 1.124551773071289,
      "learning_rate": 0.00017982521173006568,
      "loss": 0.3418,
      "step": 955
    },
    {
      "epoch": 0.23018118341058208,
      "grad_norm": 0.3517683148384094,
      "learning_rate": 0.00017977822159607497,
      "loss": 0.2291,
      "step": 956
    },
    {
      "epoch": 0.23042195870703666,
      "grad_norm": 2.7812604904174805,
      "learning_rate": 0.0001797311829567458,
      "loss": 0.966,
      "step": 957
    },
    {
      "epoch": 0.23066273400349124,
      "grad_norm": 1.8114944696426392,
      "learning_rate": 0.0001796840958406777,
      "loss": 0.5787,
      "step": 958
    },
    {
      "epoch": 0.23090350929994582,
      "grad_norm": 2.012598991394043,
      "learning_rate": 0.00017963696027649986,
      "loss": 1.1201,
      "step": 959
    },
    {
      "epoch": 0.2311442845964004,
      "grad_norm": 1.5761219263076782,
      "learning_rate": 0.00017958977629287074,
      "loss": 0.9017,
      "step": 960
    },
    {
      "epoch": 0.231385059892855,
      "grad_norm": 1.2920587062835693,
      "learning_rate": 0.0001795425439184783,
      "loss": 0.319,
      "step": 961
    },
    {
      "epoch": 0.23162583518930957,
      "grad_norm": 6.733016014099121,
      "learning_rate": 0.00017949526318203997,
      "loss": 0.7354,
      "step": 962
    },
    {
      "epoch": 0.23186661048576415,
      "grad_norm": 1.5943965911865234,
      "learning_rate": 0.0001794479341123025,
      "loss": 0.2783,
      "step": 963
    },
    {
      "epoch": 0.23210738578221873,
      "grad_norm": 1.023605227470398,
      "learning_rate": 0.00017940055673804208,
      "loss": 0.5166,
      "step": 964
    },
    {
      "epoch": 0.23234816107867332,
      "grad_norm": 0.7512199282646179,
      "learning_rate": 0.00017935313108806427,
      "loss": 0.1101,
      "step": 965
    },
    {
      "epoch": 0.23258893637512792,
      "grad_norm": 3.7386422157287598,
      "learning_rate": 0.000179305657191204,
      "loss": 0.6783,
      "step": 966
    },
    {
      "epoch": 0.2328297116715825,
      "grad_norm": 1.3405836820602417,
      "learning_rate": 0.00017925813507632546,
      "loss": 0.5868,
      "step": 967
    },
    {
      "epoch": 0.2330704869680371,
      "grad_norm": 3.388740301132202,
      "learning_rate": 0.00017921056477232224,
      "loss": 0.5516,
      "step": 968
    },
    {
      "epoch": 0.23331126226449167,
      "grad_norm": 2.8512704372406006,
      "learning_rate": 0.00017916294630811717,
      "loss": 0.383,
      "step": 969
    },
    {
      "epoch": 0.23355203756094625,
      "grad_norm": 1.5921225547790527,
      "learning_rate": 0.00017911527971266238,
      "loss": 0.4268,
      "step": 970
    },
    {
      "epoch": 0.23379281285740083,
      "grad_norm": 8.35683536529541,
      "learning_rate": 0.00017906756501493925,
      "loss": 0.3925,
      "step": 971
    },
    {
      "epoch": 0.23403358815385542,
      "grad_norm": 1.589657187461853,
      "learning_rate": 0.0001790198022439585,
      "loss": 0.5233,
      "step": 972
    },
    {
      "epoch": 0.23427436345031,
      "grad_norm": 2.5263054370880127,
      "learning_rate": 0.00017897199142875994,
      "loss": 0.3526,
      "step": 973
    },
    {
      "epoch": 0.23451513874676458,
      "grad_norm": 1.696166753768921,
      "learning_rate": 0.00017892413259841265,
      "loss": 0.3805,
      "step": 974
    },
    {
      "epoch": 0.23475591404321916,
      "grad_norm": 3.3580451011657715,
      "learning_rate": 0.0001788762257820149,
      "loss": 0.66,
      "step": 975
    },
    {
      "epoch": 0.23499668933967374,
      "grad_norm": 2.4022610187530518,
      "learning_rate": 0.0001788282710086942,
      "loss": 0.4526,
      "step": 976
    },
    {
      "epoch": 0.23523746463612832,
      "grad_norm": 2.932914972305298,
      "learning_rate": 0.00017878026830760714,
      "loss": 0.8118,
      "step": 977
    },
    {
      "epoch": 0.2354782399325829,
      "grad_norm": 2.4748735427856445,
      "learning_rate": 0.00017873221770793943,
      "loss": 0.7625,
      "step": 978
    },
    {
      "epoch": 0.2357190152290375,
      "grad_norm": 7.512228488922119,
      "learning_rate": 0.00017868411923890597,
      "loss": 0.8987,
      "step": 979
    },
    {
      "epoch": 0.2359597905254921,
      "grad_norm": 1.6160115003585815,
      "learning_rate": 0.00017863597292975075,
      "loss": 0.5894,
      "step": 980
    },
    {
      "epoch": 0.23620056582194668,
      "grad_norm": 1.4038505554199219,
      "learning_rate": 0.00017858777880974677,
      "loss": 0.411,
      "step": 981
    },
    {
      "epoch": 0.23644134111840126,
      "grad_norm": 0.987040102481842,
      "learning_rate": 0.00017853953690819628,
      "loss": 0.4793,
      "step": 982
    },
    {
      "epoch": 0.23668211641485584,
      "grad_norm": 12.90198802947998,
      "learning_rate": 0.00017849124725443033,
      "loss": 0.7816,
      "step": 983
    },
    {
      "epoch": 0.23692289171131042,
      "grad_norm": 1.474013090133667,
      "learning_rate": 0.00017844290987780926,
      "loss": 0.8878,
      "step": 984
    },
    {
      "epoch": 0.237163667007765,
      "grad_norm": 4.9217963218688965,
      "learning_rate": 0.0001783945248077222,
      "loss": 0.7382,
      "step": 985
    },
    {
      "epoch": 0.2374044423042196,
      "grad_norm": 3.483311891555786,
      "learning_rate": 0.0001783460920735875,
      "loss": 1.2593,
      "step": 986
    },
    {
      "epoch": 0.23764521760067417,
      "grad_norm": 4.8503594398498535,
      "learning_rate": 0.00017829761170485228,
      "loss": 1.5008,
      "step": 987
    },
    {
      "epoch": 0.23788599289712875,
      "grad_norm": 1.4968628883361816,
      "learning_rate": 0.0001782490837309927,
      "loss": 0.62,
      "step": 988
    },
    {
      "epoch": 0.23812676819358333,
      "grad_norm": 2.4329562187194824,
      "learning_rate": 0.00017820050818151395,
      "loss": 0.7213,
      "step": 989
    },
    {
      "epoch": 0.23836754349003791,
      "grad_norm": 7.601263523101807,
      "learning_rate": 0.00017815188508595002,
      "loss": 0.4269,
      "step": 990
    },
    {
      "epoch": 0.2386083187864925,
      "grad_norm": 2.8010635375976562,
      "learning_rate": 0.00017810321447386387,
      "loss": 0.9812,
      "step": 991
    },
    {
      "epoch": 0.23884909408294708,
      "grad_norm": 4.355586051940918,
      "learning_rate": 0.0001780544963748474,
      "loss": 0.6753,
      "step": 992
    },
    {
      "epoch": 0.2390898693794017,
      "grad_norm": 1.6186625957489014,
      "learning_rate": 0.00017800573081852122,
      "loss": 0.5759,
      "step": 993
    },
    {
      "epoch": 0.23933064467585627,
      "grad_norm": 1.3594582080841064,
      "learning_rate": 0.000177956917834535,
      "loss": 0.6777,
      "step": 994
    },
    {
      "epoch": 0.23957141997231085,
      "grad_norm": 0.8430949449539185,
      "learning_rate": 0.00017790805745256704,
      "loss": 0.4463,
      "step": 995
    },
    {
      "epoch": 0.23981219526876543,
      "grad_norm": 3.564265012741089,
      "learning_rate": 0.00017785914970232467,
      "loss": 0.7162,
      "step": 996
    },
    {
      "epoch": 0.24005297056522001,
      "grad_norm": 2.442955255508423,
      "learning_rate": 0.00017781019461354385,
      "loss": 1.1975,
      "step": 997
    },
    {
      "epoch": 0.2402937458616746,
      "grad_norm": 2.008604049682617,
      "learning_rate": 0.00017776119221598938,
      "loss": 0.3523,
      "step": 998
    },
    {
      "epoch": 0.24053452115812918,
      "grad_norm": 5.036071300506592,
      "learning_rate": 0.00017771214253945488,
      "loss": 0.7299,
      "step": 999
    },
    {
      "epoch": 0.24077529645458376,
      "grad_norm": 2.059300661087036,
      "learning_rate": 0.0001776630456137626,
      "loss": 1.0976,
      "step": 1000
    },
    {
      "epoch": 0.24101607175103834,
      "grad_norm": 1.5523993968963623,
      "learning_rate": 0.0001776139014687636,
      "loss": 0.3973,
      "step": 1001
    },
    {
      "epoch": 0.24125684704749292,
      "grad_norm": 2.268207311630249,
      "learning_rate": 0.00017756471013433766,
      "loss": 0.6189,
      "step": 1002
    },
    {
      "epoch": 0.2414976223439475,
      "grad_norm": 1.0523104667663574,
      "learning_rate": 0.0001775154716403932,
      "loss": 0.6191,
      "step": 1003
    },
    {
      "epoch": 0.2417383976404021,
      "grad_norm": 1.8148690462112427,
      "learning_rate": 0.00017746618601686734,
      "loss": 0.5895,
      "step": 1004
    },
    {
      "epoch": 0.24197917293685667,
      "grad_norm": 2.2843098640441895,
      "learning_rate": 0.00017741685329372584,
      "loss": 0.4135,
      "step": 1005
    },
    {
      "epoch": 0.24221994823331125,
      "grad_norm": 1.5911093950271606,
      "learning_rate": 0.00017736747350096313,
      "loss": 0.3805,
      "step": 1006
    },
    {
      "epoch": 0.24246072352976586,
      "grad_norm": 1.609438180923462,
      "learning_rate": 0.00017731804666860218,
      "loss": 0.4508,
      "step": 1007
    },
    {
      "epoch": 0.24270149882622044,
      "grad_norm": 9.2236328125,
      "learning_rate": 0.0001772685728266947,
      "loss": 0.7403,
      "step": 1008
    },
    {
      "epoch": 0.24294227412267502,
      "grad_norm": 4.165558815002441,
      "learning_rate": 0.00017721905200532084,
      "loss": 0.4195,
      "step": 1009
    },
    {
      "epoch": 0.2431830494191296,
      "grad_norm": 2.679929494857788,
      "learning_rate": 0.00017716948423458938,
      "loss": 0.696,
      "step": 1010
    },
    {
      "epoch": 0.24342382471558419,
      "grad_norm": 2.558372974395752,
      "learning_rate": 0.00017711986954463765,
      "loss": 0.8344,
      "step": 1011
    },
    {
      "epoch": 0.24366460001203877,
      "grad_norm": 2.897308588027954,
      "learning_rate": 0.0001770702079656315,
      "loss": 0.4203,
      "step": 1012
    },
    {
      "epoch": 0.24390537530849335,
      "grad_norm": 3.2203593254089355,
      "learning_rate": 0.00017702049952776522,
      "loss": 0.7664,
      "step": 1013
    },
    {
      "epoch": 0.24414615060494793,
      "grad_norm": 4.204813480377197,
      "learning_rate": 0.00017697074426126173,
      "loss": 0.3801,
      "step": 1014
    },
    {
      "epoch": 0.2443869259014025,
      "grad_norm": 0.8308073878288269,
      "learning_rate": 0.0001769209421963723,
      "loss": 0.2596,
      "step": 1015
    },
    {
      "epoch": 0.2446277011978571,
      "grad_norm": 2.2909529209136963,
      "learning_rate": 0.00017687109336337673,
      "loss": 0.3914,
      "step": 1016
    },
    {
      "epoch": 0.24486847649431168,
      "grad_norm": 3.4535796642303467,
      "learning_rate": 0.00017682119779258317,
      "loss": 0.7128,
      "step": 1017
    },
    {
      "epoch": 0.24510925179076626,
      "grad_norm": 2.2746803760528564,
      "learning_rate": 0.0001767712555143283,
      "loss": 0.6153,
      "step": 1018
    },
    {
      "epoch": 0.24535002708722084,
      "grad_norm": 3.151444435119629,
      "learning_rate": 0.00017672126655897708,
      "loss": 0.8,
      "step": 1019
    },
    {
      "epoch": 0.24559080238367542,
      "grad_norm": 7.057896614074707,
      "learning_rate": 0.00017667123095692296,
      "loss": 0.4853,
      "step": 1020
    },
    {
      "epoch": 0.24583157768013003,
      "grad_norm": 1.5912202596664429,
      "learning_rate": 0.00017662114873858768,
      "loss": 0.5406,
      "step": 1021
    },
    {
      "epoch": 0.2460723529765846,
      "grad_norm": 4.36636209487915,
      "learning_rate": 0.00017657101993442132,
      "loss": 1.5037,
      "step": 1022
    },
    {
      "epoch": 0.2463131282730392,
      "grad_norm": 3.4972012042999268,
      "learning_rate": 0.00017652084457490233,
      "loss": 0.4583,
      "step": 1023
    },
    {
      "epoch": 0.24655390356949378,
      "grad_norm": 1.963361382484436,
      "learning_rate": 0.00017647062269053745,
      "loss": 0.5212,
      "step": 1024
    },
    {
      "epoch": 0.24679467886594836,
      "grad_norm": 8.170878410339355,
      "learning_rate": 0.00017642035431186166,
      "loss": 0.3219,
      "step": 1025
    },
    {
      "epoch": 0.24703545416240294,
      "grad_norm": 5.340506076812744,
      "learning_rate": 0.00017637003946943826,
      "loss": 0.8826,
      "step": 1026
    },
    {
      "epoch": 0.24727622945885752,
      "grad_norm": 0.9775887727737427,
      "learning_rate": 0.00017631967819385885,
      "loss": 0.7689,
      "step": 1027
    },
    {
      "epoch": 0.2475170047553121,
      "grad_norm": 5.842097759246826,
      "learning_rate": 0.0001762692705157431,
      "loss": 1.3133,
      "step": 1028
    },
    {
      "epoch": 0.24775778005176669,
      "grad_norm": 3.479212999343872,
      "learning_rate": 0.00017621881646573905,
      "loss": 0.6421,
      "step": 1029
    },
    {
      "epoch": 0.24799855534822127,
      "grad_norm": 2.3998911380767822,
      "learning_rate": 0.00017616831607452288,
      "loss": 0.9605,
      "step": 1030
    },
    {
      "epoch": 0.24823933064467585,
      "grad_norm": 2.134242057800293,
      "learning_rate": 0.00017611776937279894,
      "loss": 0.5968,
      "step": 1031
    },
    {
      "epoch": 0.24848010594113043,
      "grad_norm": 1.5552438497543335,
      "learning_rate": 0.00017606717639129967,
      "loss": 0.5313,
      "step": 1032
    },
    {
      "epoch": 0.248720881237585,
      "grad_norm": 1.7223352193832397,
      "learning_rate": 0.00017601653716078583,
      "loss": 0.6771,
      "step": 1033
    },
    {
      "epoch": 0.2489616565340396,
      "grad_norm": 1.0817844867706299,
      "learning_rate": 0.00017596585171204612,
      "loss": 0.0747,
      "step": 1034
    },
    {
      "epoch": 0.2492024318304942,
      "grad_norm": 16.8873291015625,
      "learning_rate": 0.0001759151200758974,
      "loss": 0.7068,
      "step": 1035
    },
    {
      "epoch": 0.24944320712694878,
      "grad_norm": 3.909327983856201,
      "learning_rate": 0.00017586434228318462,
      "loss": 1.1171,
      "step": 1036
    },
    {
      "epoch": 0.24968398242340337,
      "grad_norm": 1.0942474603652954,
      "learning_rate": 0.00017581351836478085,
      "loss": 0.3179,
      "step": 1037
    },
    {
      "epoch": 0.24992475771985795,
      "grad_norm": 1.4328174591064453,
      "learning_rate": 0.00017576264835158706,
      "loss": 0.5279,
      "step": 1038
    },
    {
      "epoch": 0.2501655330163125,
      "grad_norm": 1.4774179458618164,
      "learning_rate": 0.0001757117322745324,
      "loss": 0.5594,
      "step": 1039
    },
    {
      "epoch": 0.2504063083127671,
      "grad_norm": 5.494201183319092,
      "learning_rate": 0.00017566077016457394,
      "loss": 0.892,
      "step": 1040
    },
    {
      "epoch": 0.2506470836092217,
      "grad_norm": 0.7356401085853577,
      "learning_rate": 0.00017560976205269673,
      "loss": 0.3253,
      "step": 1041
    },
    {
      "epoch": 0.2508878589056763,
      "grad_norm": 1.2550084590911865,
      "learning_rate": 0.00017555870796991387,
      "loss": 0.4044,
      "step": 1042
    },
    {
      "epoch": 0.2511286342021309,
      "grad_norm": 5.170292377471924,
      "learning_rate": 0.00017550760794726633,
      "loss": 0.5862,
      "step": 1043
    },
    {
      "epoch": 0.25136940949858544,
      "grad_norm": 1.0770255327224731,
      "learning_rate": 0.00017545646201582303,
      "loss": 0.7886,
      "step": 1044
    },
    {
      "epoch": 0.25161018479504005,
      "grad_norm": 1.4369720220565796,
      "learning_rate": 0.0001754052702066808,
      "loss": 0.3612,
      "step": 1045
    },
    {
      "epoch": 0.2518509600914946,
      "grad_norm": 1.905137300491333,
      "learning_rate": 0.00017535403255096444,
      "loss": 0.9435,
      "step": 1046
    },
    {
      "epoch": 0.2520917353879492,
      "grad_norm": 16.227949142456055,
      "learning_rate": 0.00017530274907982647,
      "loss": 1.7011,
      "step": 1047
    },
    {
      "epoch": 0.25233251068440377,
      "grad_norm": 1.4642868041992188,
      "learning_rate": 0.0001752514198244474,
      "loss": 0.5992,
      "step": 1048
    },
    {
      "epoch": 0.2525732859808584,
      "grad_norm": 2.769197463989258,
      "learning_rate": 0.00017520004481603554,
      "loss": 1.3272,
      "step": 1049
    },
    {
      "epoch": 0.25281406127731293,
      "grad_norm": 2.604154586791992,
      "learning_rate": 0.00017514862408582701,
      "loss": 0.6135,
      "step": 1050
    },
    {
      "epoch": 0.25305483657376754,
      "grad_norm": 4.081873416900635,
      "learning_rate": 0.00017509715766508575,
      "loss": 1.1481,
      "step": 1051
    },
    {
      "epoch": 0.2532956118702221,
      "grad_norm": 5.054668426513672,
      "learning_rate": 0.0001750456455851034,
      "loss": 0.8622,
      "step": 1052
    },
    {
      "epoch": 0.2535363871666767,
      "grad_norm": 3.3023860454559326,
      "learning_rate": 0.00017499408787719945,
      "loss": 0.6033,
      "step": 1053
    },
    {
      "epoch": 0.25377716246313126,
      "grad_norm": 1.7069976329803467,
      "learning_rate": 0.00017494248457272112,
      "loss": 0.4344,
      "step": 1054
    },
    {
      "epoch": 0.25401793775958587,
      "grad_norm": 4.372264385223389,
      "learning_rate": 0.00017489083570304333,
      "loss": 1.2165,
      "step": 1055
    },
    {
      "epoch": 0.2542587130560405,
      "grad_norm": 3.081066608428955,
      "learning_rate": 0.00017483914129956868,
      "loss": 0.8693,
      "step": 1056
    },
    {
      "epoch": 0.25449948835249503,
      "grad_norm": 3.6236472129821777,
      "learning_rate": 0.00017478740139372753,
      "loss": 0.8538,
      "step": 1057
    },
    {
      "epoch": 0.25474026364894964,
      "grad_norm": 1.3119887113571167,
      "learning_rate": 0.00017473561601697783,
      "loss": 0.6279,
      "step": 1058
    },
    {
      "epoch": 0.2549810389454042,
      "grad_norm": 1.9135150909423828,
      "learning_rate": 0.0001746837852008052,
      "loss": 0.2616,
      "step": 1059
    },
    {
      "epoch": 0.2552218142418588,
      "grad_norm": 3.3330864906311035,
      "learning_rate": 0.0001746319089767229,
      "loss": 0.5011,
      "step": 1060
    },
    {
      "epoch": 0.25546258953831336,
      "grad_norm": 1.3728001117706299,
      "learning_rate": 0.00017457998737627182,
      "loss": 0.3416,
      "step": 1061
    },
    {
      "epoch": 0.25570336483476797,
      "grad_norm": 1.3205347061157227,
      "learning_rate": 0.00017452802043102034,
      "loss": 0.4671,
      "step": 1062
    },
    {
      "epoch": 0.2559441401312225,
      "grad_norm": 1.428043246269226,
      "learning_rate": 0.00017447600817256458,
      "loss": 0.9892,
      "step": 1063
    },
    {
      "epoch": 0.25618491542767713,
      "grad_norm": 6.333396911621094,
      "learning_rate": 0.000174423950632528,
      "loss": 0.2126,
      "step": 1064
    },
    {
      "epoch": 0.2564256907241317,
      "grad_norm": 4.501138210296631,
      "learning_rate": 0.00017437184784256177,
      "loss": 1.0723,
      "step": 1065
    },
    {
      "epoch": 0.2566664660205863,
      "grad_norm": 6.165459632873535,
      "learning_rate": 0.0001743196998343445,
      "loss": 0.7911,
      "step": 1066
    },
    {
      "epoch": 0.25690724131704085,
      "grad_norm": 2.045748710632324,
      "learning_rate": 0.00017426750663958231,
      "loss": 0.6512,
      "step": 1067
    },
    {
      "epoch": 0.25714801661349546,
      "grad_norm": 15.06201457977295,
      "learning_rate": 0.00017421526829000872,
      "loss": 1.1931,
      "step": 1068
    },
    {
      "epoch": 0.25738879190995007,
      "grad_norm": 1.8321692943572998,
      "learning_rate": 0.00017416298481738482,
      "loss": 0.5883,
      "step": 1069
    },
    {
      "epoch": 0.2576295672064046,
      "grad_norm": 1.6688590049743652,
      "learning_rate": 0.00017411065625349905,
      "loss": 0.2832,
      "step": 1070
    },
    {
      "epoch": 0.25787034250285923,
      "grad_norm": 3.1032514572143555,
      "learning_rate": 0.00017405828263016734,
      "loss": 0.6419,
      "step": 1071
    },
    {
      "epoch": 0.2581111177993138,
      "grad_norm": 1.9432801008224487,
      "learning_rate": 0.00017400586397923288,
      "loss": 0.5049,
      "step": 1072
    },
    {
      "epoch": 0.2583518930957684,
      "grad_norm": 1.8286429643630981,
      "learning_rate": 0.0001739534003325664,
      "loss": 0.9415,
      "step": 1073
    },
    {
      "epoch": 0.25859266839222295,
      "grad_norm": 1.2921900749206543,
      "learning_rate": 0.00017390089172206592,
      "loss": 0.1063,
      "step": 1074
    },
    {
      "epoch": 0.25883344368867756,
      "grad_norm": 2.301280975341797,
      "learning_rate": 0.00017384833817965674,
      "loss": 0.6128,
      "step": 1075
    },
    {
      "epoch": 0.2590742189851321,
      "grad_norm": 2.117572069168091,
      "learning_rate": 0.00017379573973729163,
      "loss": 0.2485,
      "step": 1076
    },
    {
      "epoch": 0.2593149942815867,
      "grad_norm": 44.768497467041016,
      "learning_rate": 0.0001737430964269504,
      "loss": 1.1191,
      "step": 1077
    },
    {
      "epoch": 0.2595557695780413,
      "grad_norm": 3.337317943572998,
      "learning_rate": 0.00017369040828064047,
      "loss": 0.372,
      "step": 1078
    },
    {
      "epoch": 0.2597965448744959,
      "grad_norm": 2.307708263397217,
      "learning_rate": 0.00017363767533039626,
      "loss": 0.9921,
      "step": 1079
    },
    {
      "epoch": 0.26003732017095044,
      "grad_norm": 1.9676494598388672,
      "learning_rate": 0.00017358489760827954,
      "loss": 0.1324,
      "step": 1080
    },
    {
      "epoch": 0.26027809546740505,
      "grad_norm": 2.810729503631592,
      "learning_rate": 0.00017353207514637928,
      "loss": 0.5826,
      "step": 1081
    },
    {
      "epoch": 0.26051887076385966,
      "grad_norm": 0.4161555767059326,
      "learning_rate": 0.00017347920797681165,
      "loss": 0.1594,
      "step": 1082
    },
    {
      "epoch": 0.2607596460603142,
      "grad_norm": 2.319537878036499,
      "learning_rate": 0.00017342629613172005,
      "loss": 1.0077,
      "step": 1083
    },
    {
      "epoch": 0.2610004213567688,
      "grad_norm": 6.007627487182617,
      "learning_rate": 0.00017337333964327493,
      "loss": 0.7686,
      "step": 1084
    },
    {
      "epoch": 0.2612411966532234,
      "grad_norm": 1.204407811164856,
      "learning_rate": 0.00017332033854367405,
      "loss": 0.3591,
      "step": 1085
    },
    {
      "epoch": 0.261481971949678,
      "grad_norm": 1.132603406906128,
      "learning_rate": 0.00017326729286514208,
      "loss": 0.5379,
      "step": 1086
    },
    {
      "epoch": 0.26172274724613254,
      "grad_norm": 2.5757410526275635,
      "learning_rate": 0.00017321420263993102,
      "loss": 0.4672,
      "step": 1087
    },
    {
      "epoch": 0.26196352254258715,
      "grad_norm": 4.104795932769775,
      "learning_rate": 0.0001731610679003198,
      "loss": 0.9948,
      "step": 1088
    },
    {
      "epoch": 0.2622042978390417,
      "grad_norm": 2.2880449295043945,
      "learning_rate": 0.00017310788867861446,
      "loss": 0.5483,
      "step": 1089
    },
    {
      "epoch": 0.2624450731354963,
      "grad_norm": 0.9389917254447937,
      "learning_rate": 0.00017305466500714808,
      "loss": 0.4569,
      "step": 1090
    },
    {
      "epoch": 0.26268584843195086,
      "grad_norm": 1.6185779571533203,
      "learning_rate": 0.00017300139691828076,
      "loss": 0.5776,
      "step": 1091
    },
    {
      "epoch": 0.2629266237284055,
      "grad_norm": 1.6307772397994995,
      "learning_rate": 0.00017294808444439966,
      "loss": 0.3469,
      "step": 1092
    },
    {
      "epoch": 0.26316739902486,
      "grad_norm": 1.6767011880874634,
      "learning_rate": 0.00017289472761791887,
      "loss": 0.4124,
      "step": 1093
    },
    {
      "epoch": 0.26340817432131464,
      "grad_norm": 3.5887739658355713,
      "learning_rate": 0.00017284132647127947,
      "loss": 0.7729,
      "step": 1094
    },
    {
      "epoch": 0.2636489496177692,
      "grad_norm": 2.98939847946167,
      "learning_rate": 0.00017278788103694943,
      "loss": 0.547,
      "step": 1095
    },
    {
      "epoch": 0.2638897249142238,
      "grad_norm": 3.3976495265960693,
      "learning_rate": 0.00017273439134742372,
      "loss": 0.9218,
      "step": 1096
    },
    {
      "epoch": 0.2641305002106784,
      "grad_norm": 4.070367813110352,
      "learning_rate": 0.00017268085743522423,
      "loss": 0.6816,
      "step": 1097
    },
    {
      "epoch": 0.26437127550713296,
      "grad_norm": 2.875377893447876,
      "learning_rate": 0.00017262727933289965,
      "loss": 0.3219,
      "step": 1098
    },
    {
      "epoch": 0.2646120508035876,
      "grad_norm": 2.2826290130615234,
      "learning_rate": 0.0001725736570730256,
      "loss": 0.4502,
      "step": 1099
    },
    {
      "epoch": 0.2648528261000421,
      "grad_norm": 1.6275043487548828,
      "learning_rate": 0.00017251999068820456,
      "loss": 0.2921,
      "step": 1100
    },
    {
      "epoch": 0.26509360139649674,
      "grad_norm": 7.831151962280273,
      "learning_rate": 0.00017246628021106577,
      "loss": 0.6283,
      "step": 1101
    },
    {
      "epoch": 0.2653343766929513,
      "grad_norm": 2.219731569290161,
      "learning_rate": 0.00017241252567426534,
      "loss": 0.4064,
      "step": 1102
    },
    {
      "epoch": 0.2655751519894059,
      "grad_norm": 3.912492036819458,
      "learning_rate": 0.00017235872711048617,
      "loss": 1.037,
      "step": 1103
    },
    {
      "epoch": 0.26581592728586045,
      "grad_norm": 2.191307783126831,
      "learning_rate": 0.00017230488455243788,
      "loss": 0.8365,
      "step": 1104
    },
    {
      "epoch": 0.26605670258231506,
      "grad_norm": 4.49420690536499,
      "learning_rate": 0.00017225099803285692,
      "loss": 0.6711,
      "step": 1105
    },
    {
      "epoch": 0.2662974778787696,
      "grad_norm": 3.0059874057769775,
      "learning_rate": 0.00017219706758450631,
      "loss": 0.8782,
      "step": 1106
    },
    {
      "epoch": 0.2665382531752242,
      "grad_norm": 0.6084616780281067,
      "learning_rate": 0.00017214309324017598,
      "loss": 0.146,
      "step": 1107
    },
    {
      "epoch": 0.2667790284716788,
      "grad_norm": 3.9558541774749756,
      "learning_rate": 0.0001720890750326824,
      "loss": 0.4695,
      "step": 1108
    },
    {
      "epoch": 0.2670198037681334,
      "grad_norm": 3.0784409046173096,
      "learning_rate": 0.00017203501299486881,
      "loss": 0.9544,
      "step": 1109
    },
    {
      "epoch": 0.267260579064588,
      "grad_norm": 0.6500839591026306,
      "learning_rate": 0.000171980907159605,
      "loss": 0.2016,
      "step": 1110
    },
    {
      "epoch": 0.26750135436104255,
      "grad_norm": 4.700311660766602,
      "learning_rate": 0.00017192675755978748,
      "loss": 1.1171,
      "step": 1111
    },
    {
      "epoch": 0.26774212965749716,
      "grad_norm": 1.3093310594558716,
      "learning_rate": 0.00017187256422833929,
      "loss": 0.2874,
      "step": 1112
    },
    {
      "epoch": 0.2679829049539517,
      "grad_norm": 2.463928461074829,
      "learning_rate": 0.0001718183271982101,
      "loss": 0.4196,
      "step": 1113
    },
    {
      "epoch": 0.2682236802504063,
      "grad_norm": 3.2348127365112305,
      "learning_rate": 0.0001717640465023762,
      "loss": 1.1128,
      "step": 1114
    },
    {
      "epoch": 0.2684644555468609,
      "grad_norm": 2.890456438064575,
      "learning_rate": 0.00017170972217384035,
      "loss": 0.5629,
      "step": 1115
    },
    {
      "epoch": 0.2687052308433155,
      "grad_norm": 4.826290607452393,
      "learning_rate": 0.00017165535424563185,
      "loss": 0.7627,
      "step": 1116
    },
    {
      "epoch": 0.26894600613977004,
      "grad_norm": 2.350214719772339,
      "learning_rate": 0.00017160094275080648,
      "loss": 0.8664,
      "step": 1117
    },
    {
      "epoch": 0.26918678143622465,
      "grad_norm": 2.407381772994995,
      "learning_rate": 0.00017154648772244664,
      "loss": 0.5608,
      "step": 1118
    },
    {
      "epoch": 0.2694275567326792,
      "grad_norm": 4.348508834838867,
      "learning_rate": 0.00017149198919366105,
      "loss": 0.7147,
      "step": 1119
    },
    {
      "epoch": 0.2696683320291338,
      "grad_norm": 2.3149821758270264,
      "learning_rate": 0.00017143744719758499,
      "loss": 0.8603,
      "step": 1120
    },
    {
      "epoch": 0.26990910732558837,
      "grad_norm": 2.070988893508911,
      "learning_rate": 0.00017138286176738006,
      "loss": 0.3237,
      "step": 1121
    },
    {
      "epoch": 0.270149882622043,
      "grad_norm": 2.3443868160247803,
      "learning_rate": 0.00017132823293623432,
      "loss": 0.313,
      "step": 1122
    },
    {
      "epoch": 0.27039065791849753,
      "grad_norm": 2.001828908920288,
      "learning_rate": 0.0001712735607373623,
      "loss": 0.665,
      "step": 1123
    },
    {
      "epoch": 0.27063143321495214,
      "grad_norm": 8.432289123535156,
      "learning_rate": 0.00017121884520400474,
      "loss": 0.7836,
      "step": 1124
    },
    {
      "epoch": 0.27087220851140675,
      "grad_norm": 2.163132429122925,
      "learning_rate": 0.00017116408636942888,
      "loss": 0.2619,
      "step": 1125
    },
    {
      "epoch": 0.2711129838078613,
      "grad_norm": 1.5865484476089478,
      "learning_rate": 0.0001711092842669281,
      "loss": 0.4622,
      "step": 1126
    },
    {
      "epoch": 0.2713537591043159,
      "grad_norm": 2.0779945850372314,
      "learning_rate": 0.0001710544389298223,
      "loss": 0.3762,
      "step": 1127
    },
    {
      "epoch": 0.27159453440077047,
      "grad_norm": 7.750448703765869,
      "learning_rate": 0.00017099955039145758,
      "loss": 0.7578,
      "step": 1128
    },
    {
      "epoch": 0.2718353096972251,
      "grad_norm": 1.3454210758209229,
      "learning_rate": 0.00017094461868520622,
      "loss": 0.5281,
      "step": 1129
    },
    {
      "epoch": 0.27207608499367963,
      "grad_norm": 1.0360485315322876,
      "learning_rate": 0.0001708896438444669,
      "loss": 0.7575,
      "step": 1130
    },
    {
      "epoch": 0.27231686029013424,
      "grad_norm": 1.6887176036834717,
      "learning_rate": 0.00017083462590266438,
      "loss": 0.4004,
      "step": 1131
    },
    {
      "epoch": 0.2725576355865888,
      "grad_norm": 2.2301809787750244,
      "learning_rate": 0.00017077956489324972,
      "loss": 0.4566,
      "step": 1132
    },
    {
      "epoch": 0.2727984108830434,
      "grad_norm": 1.984376311302185,
      "learning_rate": 0.00017072446084970014,
      "loss": 0.3397,
      "step": 1133
    },
    {
      "epoch": 0.27303918617949796,
      "grad_norm": 1.465584397315979,
      "learning_rate": 0.000170669313805519,
      "loss": 1.118,
      "step": 1134
    },
    {
      "epoch": 0.27327996147595257,
      "grad_norm": 3.79280948638916,
      "learning_rate": 0.00017061412379423588,
      "loss": 1.0574,
      "step": 1135
    },
    {
      "epoch": 0.2735207367724071,
      "grad_norm": 2.3764195442199707,
      "learning_rate": 0.00017055889084940638,
      "loss": 0.66,
      "step": 1136
    },
    {
      "epoch": 0.27376151206886173,
      "grad_norm": 4.677147388458252,
      "learning_rate": 0.00017050361500461225,
      "loss": 1.764,
      "step": 1137
    },
    {
      "epoch": 0.27400228736531634,
      "grad_norm": 0.9030484557151794,
      "learning_rate": 0.00017044829629346138,
      "loss": 0.6526,
      "step": 1138
    },
    {
      "epoch": 0.2742430626617709,
      "grad_norm": 0.627707302570343,
      "learning_rate": 0.00017039293474958766,
      "loss": 0.3727,
      "step": 1139
    },
    {
      "epoch": 0.2744838379582255,
      "grad_norm": 1.3778057098388672,
      "learning_rate": 0.00017033753040665098,
      "loss": 0.6493,
      "step": 1140
    },
    {
      "epoch": 0.27472461325468006,
      "grad_norm": 4.6707305908203125,
      "learning_rate": 0.00017028208329833734,
      "loss": 0.9048,
      "step": 1141
    },
    {
      "epoch": 0.27496538855113467,
      "grad_norm": 0.9319252967834473,
      "learning_rate": 0.00017022659345835873,
      "loss": 0.199,
      "step": 1142
    },
    {
      "epoch": 0.2752061638475892,
      "grad_norm": 4.65252685546875,
      "learning_rate": 0.00017017106092045308,
      "loss": 0.5977,
      "step": 1143
    },
    {
      "epoch": 0.27544693914404383,
      "grad_norm": 2.0846447944641113,
      "learning_rate": 0.00017011548571838425,
      "loss": 0.8822,
      "step": 1144
    },
    {
      "epoch": 0.2756877144404984,
      "grad_norm": 7.9237961769104,
      "learning_rate": 0.00017005986788594217,
      "loss": 0.9254,
      "step": 1145
    },
    {
      "epoch": 0.275928489736953,
      "grad_norm": 1.8218225240707397,
      "learning_rate": 0.00017000420745694254,
      "loss": 0.8144,
      "step": 1146
    },
    {
      "epoch": 0.27616926503340755,
      "grad_norm": 2.215475082397461,
      "learning_rate": 0.00016994850446522708,
      "loss": 0.5376,
      "step": 1147
    },
    {
      "epoch": 0.27641004032986216,
      "grad_norm": 2.7972052097320557,
      "learning_rate": 0.0001698927589446633,
      "loss": 0.7172,
      "step": 1148
    },
    {
      "epoch": 0.2766508156263167,
      "grad_norm": 3.2082738876342773,
      "learning_rate": 0.00016983697092914462,
      "loss": 0.7779,
      "step": 1149
    },
    {
      "epoch": 0.2768915909227713,
      "grad_norm": 7.983036041259766,
      "learning_rate": 0.00016978114045259024,
      "loss": 0.6586,
      "step": 1150
    },
    {
      "epoch": 0.27713236621922593,
      "grad_norm": 1.2389219999313354,
      "learning_rate": 0.00016972526754894526,
      "loss": 0.4504,
      "step": 1151
    },
    {
      "epoch": 0.2773731415156805,
      "grad_norm": 4.120885848999023,
      "learning_rate": 0.00016966935225218055,
      "loss": 0.9209,
      "step": 1152
    },
    {
      "epoch": 0.2776139168121351,
      "grad_norm": 1.707640528678894,
      "learning_rate": 0.0001696133945962927,
      "loss": 0.4769,
      "step": 1153
    },
    {
      "epoch": 0.27785469210858965,
      "grad_norm": 2.347038745880127,
      "learning_rate": 0.00016955739461530403,
      "loss": 0.6969,
      "step": 1154
    },
    {
      "epoch": 0.27809546740504426,
      "grad_norm": 1.5984582901000977,
      "learning_rate": 0.0001695013523432628,
      "loss": 0.7477,
      "step": 1155
    },
    {
      "epoch": 0.2783362427014988,
      "grad_norm": 3.808624267578125,
      "learning_rate": 0.0001694452678142427,
      "loss": 0.6706,
      "step": 1156
    },
    {
      "epoch": 0.2785770179979534,
      "grad_norm": 2.3861489295959473,
      "learning_rate": 0.00016938914106234333,
      "loss": 0.492,
      "step": 1157
    },
    {
      "epoch": 0.278817793294408,
      "grad_norm": 6.34063196182251,
      "learning_rate": 0.00016933297212168985,
      "loss": 0.9194,
      "step": 1158
    },
    {
      "epoch": 0.2790585685908626,
      "grad_norm": 2.32570743560791,
      "learning_rate": 0.0001692767610264331,
      "loss": 0.3936,
      "step": 1159
    },
    {
      "epoch": 0.27929934388731714,
      "grad_norm": 2.573622226715088,
      "learning_rate": 0.0001692205078107496,
      "loss": 0.8134,
      "step": 1160
    },
    {
      "epoch": 0.27954011918377175,
      "grad_norm": 2.499985933303833,
      "learning_rate": 0.00016916421250884138,
      "loss": 0.4928,
      "step": 1161
    },
    {
      "epoch": 0.2797808944802263,
      "grad_norm": 1.9372178316116333,
      "learning_rate": 0.00016910787515493611,
      "loss": 0.6883,
      "step": 1162
    },
    {
      "epoch": 0.2800216697766809,
      "grad_norm": 1.012056589126587,
      "learning_rate": 0.00016905149578328702,
      "loss": 0.3567,
      "step": 1163
    },
    {
      "epoch": 0.28026244507313547,
      "grad_norm": 1.383881688117981,
      "learning_rate": 0.00016899507442817298,
      "loss": 0.7005,
      "step": 1164
    },
    {
      "epoch": 0.2805032203695901,
      "grad_norm": 7.843169212341309,
      "learning_rate": 0.00016893861112389822,
      "loss": 0.59,
      "step": 1165
    },
    {
      "epoch": 0.2807439956660447,
      "grad_norm": 15.78963851928711,
      "learning_rate": 0.00016888210590479256,
      "loss": 0.7168,
      "step": 1166
    },
    {
      "epoch": 0.28098477096249924,
      "grad_norm": 1.2202370166778564,
      "learning_rate": 0.0001688255588052113,
      "loss": 0.2079,
      "step": 1167
    },
    {
      "epoch": 0.28122554625895385,
      "grad_norm": 1.067835807800293,
      "learning_rate": 0.0001687689698595353,
      "loss": 0.6354,
      "step": 1168
    },
    {
      "epoch": 0.2814663215554084,
      "grad_norm": 0.6400854587554932,
      "learning_rate": 0.0001687123391021706,
      "loss": 0.307,
      "step": 1169
    },
    {
      "epoch": 0.281707096851863,
      "grad_norm": 2.6087357997894287,
      "learning_rate": 0.00016865566656754896,
      "loss": 0.4111,
      "step": 1170
    },
    {
      "epoch": 0.28194787214831757,
      "grad_norm": 1.9883902072906494,
      "learning_rate": 0.00016859895229012737,
      "loss": 0.6824,
      "step": 1171
    },
    {
      "epoch": 0.2821886474447722,
      "grad_norm": 2.6531500816345215,
      "learning_rate": 0.00016854219630438818,
      "loss": 0.745,
      "step": 1172
    },
    {
      "epoch": 0.28242942274122673,
      "grad_norm": 0.8592819571495056,
      "learning_rate": 0.00016848539864483926,
      "loss": 0.7847,
      "step": 1173
    },
    {
      "epoch": 0.28267019803768134,
      "grad_norm": 4.981196880340576,
      "learning_rate": 0.00016842855934601366,
      "loss": 0.9405,
      "step": 1174
    },
    {
      "epoch": 0.2829109733341359,
      "grad_norm": 1.9096482992172241,
      "learning_rate": 0.0001683716784424698,
      "loss": 0.8852,
      "step": 1175
    },
    {
      "epoch": 0.2831517486305905,
      "grad_norm": 7.072299003601074,
      "learning_rate": 0.0001683147559687914,
      "loss": 1.6136,
      "step": 1176
    },
    {
      "epoch": 0.28339252392704506,
      "grad_norm": 18.518299102783203,
      "learning_rate": 0.00016825779195958745,
      "loss": 0.2307,
      "step": 1177
    },
    {
      "epoch": 0.28363329922349967,
      "grad_norm": 2.7872228622436523,
      "learning_rate": 0.0001682007864494922,
      "loss": 0.6282,
      "step": 1178
    },
    {
      "epoch": 0.2838740745199543,
      "grad_norm": 1.4213825464248657,
      "learning_rate": 0.00016814373947316512,
      "loss": 0.6838,
      "step": 1179
    },
    {
      "epoch": 0.28411484981640883,
      "grad_norm": 1.1344329118728638,
      "learning_rate": 0.00016808665106529094,
      "loss": 0.4394,
      "step": 1180
    },
    {
      "epoch": 0.28435562511286344,
      "grad_norm": 1.0440508127212524,
      "learning_rate": 0.0001680295212605795,
      "loss": 0.1343,
      "step": 1181
    },
    {
      "epoch": 0.284596400409318,
      "grad_norm": 3.40962553024292,
      "learning_rate": 0.00016797235009376586,
      "loss": 0.6312,
      "step": 1182
    },
    {
      "epoch": 0.2848371757057726,
      "grad_norm": 3.0211853981018066,
      "learning_rate": 0.0001679151375996102,
      "loss": 0.6371,
      "step": 1183
    },
    {
      "epoch": 0.28507795100222716,
      "grad_norm": 8.21009635925293,
      "learning_rate": 0.0001678578838128979,
      "loss": 0.7002,
      "step": 1184
    },
    {
      "epoch": 0.28531872629868177,
      "grad_norm": 2.1480865478515625,
      "learning_rate": 0.00016780058876843934,
      "loss": 0.4914,
      "step": 1185
    },
    {
      "epoch": 0.2855595015951363,
      "grad_norm": 1.2523528337478638,
      "learning_rate": 0.00016774325250107006,
      "loss": 0.5931,
      "step": 1186
    },
    {
      "epoch": 0.28580027689159093,
      "grad_norm": 1.5123728513717651,
      "learning_rate": 0.00016768587504565062,
      "loss": 0.439,
      "step": 1187
    },
    {
      "epoch": 0.2860410521880455,
      "grad_norm": 1.9221967458724976,
      "learning_rate": 0.00016762845643706665,
      "loss": 0.6541,
      "step": 1188
    },
    {
      "epoch": 0.2862818274845001,
      "grad_norm": 4.153512477874756,
      "learning_rate": 0.00016757099671022883,
      "loss": 0.7725,
      "step": 1189
    },
    {
      "epoch": 0.28652260278095465,
      "grad_norm": 1.0292513370513916,
      "learning_rate": 0.00016751349590007274,
      "loss": 0.5082,
      "step": 1190
    },
    {
      "epoch": 0.28676337807740926,
      "grad_norm": 4.168222904205322,
      "learning_rate": 0.00016745595404155905,
      "loss": 0.5705,
      "step": 1191
    },
    {
      "epoch": 0.28700415337386387,
      "grad_norm": 1.6598914861679077,
      "learning_rate": 0.00016739837116967328,
      "loss": 0.8381,
      "step": 1192
    },
    {
      "epoch": 0.2872449286703184,
      "grad_norm": 2.8263731002807617,
      "learning_rate": 0.00016734074731942605,
      "loss": 0.7783,
      "step": 1193
    },
    {
      "epoch": 0.28748570396677303,
      "grad_norm": 1.6634050607681274,
      "learning_rate": 0.00016728308252585267,
      "loss": 0.3698,
      "step": 1194
    },
    {
      "epoch": 0.2877264792632276,
      "grad_norm": 2.690964937210083,
      "learning_rate": 0.00016722537682401357,
      "loss": 0.4771,
      "step": 1195
    },
    {
      "epoch": 0.2879672545596822,
      "grad_norm": 0.9511985778808594,
      "learning_rate": 0.0001671676302489939,
      "loss": 0.2755,
      "step": 1196
    },
    {
      "epoch": 0.28820802985613675,
      "grad_norm": 2.063718557357788,
      "learning_rate": 0.0001671098428359037,
      "loss": 0.4661,
      "step": 1197
    },
    {
      "epoch": 0.28844880515259136,
      "grad_norm": 3.1178414821624756,
      "learning_rate": 0.00016705201461987782,
      "loss": 1.1358,
      "step": 1198
    },
    {
      "epoch": 0.2886895804490459,
      "grad_norm": 1.8301066160202026,
      "learning_rate": 0.00016699414563607601,
      "loss": 0.3741,
      "step": 1199
    },
    {
      "epoch": 0.2889303557455005,
      "grad_norm": 2.6910312175750732,
      "learning_rate": 0.00016693623591968273,
      "loss": 1.0457,
      "step": 1200
    },
    {
      "epoch": 0.2891711310419551,
      "grad_norm": 1.9413840770721436,
      "learning_rate": 0.0001668782855059072,
      "loss": 0.7107,
      "step": 1201
    },
    {
      "epoch": 0.2894119063384097,
      "grad_norm": 1.9084299802780151,
      "learning_rate": 0.00016682029442998338,
      "loss": 0.9563,
      "step": 1202
    },
    {
      "epoch": 0.28965268163486424,
      "grad_norm": 6.873541831970215,
      "learning_rate": 0.00016676226272717,
      "loss": 0.8658,
      "step": 1203
    },
    {
      "epoch": 0.28989345693131885,
      "grad_norm": 2.0159761905670166,
      "learning_rate": 0.00016670419043275048,
      "loss": 0.7841,
      "step": 1204
    },
    {
      "epoch": 0.2901342322277734,
      "grad_norm": 1.8797401189804077,
      "learning_rate": 0.00016664607758203287,
      "loss": 0.7343,
      "step": 1205
    },
    {
      "epoch": 0.290375007524228,
      "grad_norm": 1.6734647750854492,
      "learning_rate": 0.00016658792421034996,
      "loss": 0.4975,
      "step": 1206
    },
    {
      "epoch": 0.2906157828206826,
      "grad_norm": 0.8860729932785034,
      "learning_rate": 0.00016652973035305907,
      "loss": 0.4253,
      "step": 1207
    },
    {
      "epoch": 0.2908565581171372,
      "grad_norm": 0.7764965295791626,
      "learning_rate": 0.00016647149604554227,
      "loss": 0.7893,
      "step": 1208
    },
    {
      "epoch": 0.2910973334135918,
      "grad_norm": 6.640602111816406,
      "learning_rate": 0.0001664132213232061,
      "loss": 0.9636,
      "step": 1209
    },
    {
      "epoch": 0.29133810871004634,
      "grad_norm": 6.02003288269043,
      "learning_rate": 0.00016635490622148177,
      "loss": 0.8415,
      "step": 1210
    },
    {
      "epoch": 0.29157888400650095,
      "grad_norm": 1.2742475271224976,
      "learning_rate": 0.00016629655077582487,
      "loss": 0.2262,
      "step": 1211
    },
    {
      "epoch": 0.2918196593029555,
      "grad_norm": 0.7330831289291382,
      "learning_rate": 0.0001662381550217158,
      "loss": 0.4596,
      "step": 1212
    },
    {
      "epoch": 0.2920604345994101,
      "grad_norm": 5.310278415679932,
      "learning_rate": 0.00016617971899465922,
      "loss": 0.4937,
      "step": 1213
    },
    {
      "epoch": 0.29230120989586467,
      "grad_norm": 3.351181983947754,
      "learning_rate": 0.0001661212427301844,
      "loss": 0.3122,
      "step": 1214
    },
    {
      "epoch": 0.2925419851923193,
      "grad_norm": 2.28200101852417,
      "learning_rate": 0.000166062726263845,
      "loss": 1.5276,
      "step": 1215
    },
    {
      "epoch": 0.29278276048877383,
      "grad_norm": 4.403338432312012,
      "learning_rate": 0.0001660041696312192,
      "loss": 0.8055,
      "step": 1216
    },
    {
      "epoch": 0.29302353578522844,
      "grad_norm": 2.3211700916290283,
      "learning_rate": 0.00016594557286790957,
      "loss": 0.715,
      "step": 1217
    },
    {
      "epoch": 0.293264311081683,
      "grad_norm": 2.3568782806396484,
      "learning_rate": 0.00016588693600954306,
      "loss": 0.4839,
      "step": 1218
    },
    {
      "epoch": 0.2935050863781376,
      "grad_norm": 3.552236795425415,
      "learning_rate": 0.00016582825909177099,
      "loss": 0.8309,
      "step": 1219
    },
    {
      "epoch": 0.2937458616745922,
      "grad_norm": 1.4845949411392212,
      "learning_rate": 0.0001657695421502691,
      "loss": 0.3576,
      "step": 1220
    },
    {
      "epoch": 0.29398663697104677,
      "grad_norm": 4.3355607986450195,
      "learning_rate": 0.00016571078522073737,
      "loss": 0.4216,
      "step": 1221
    },
    {
      "epoch": 0.2942274122675014,
      "grad_norm": 2.5869123935699463,
      "learning_rate": 0.0001656519883389002,
      "loss": 1.0778,
      "step": 1222
    },
    {
      "epoch": 0.29446818756395593,
      "grad_norm": 3.6160268783569336,
      "learning_rate": 0.0001655931515405062,
      "loss": 0.6609,
      "step": 1223
    },
    {
      "epoch": 0.29470896286041054,
      "grad_norm": 2.8097994327545166,
      "learning_rate": 0.00016553427486132828,
      "loss": 0.6801,
      "step": 1224
    },
    {
      "epoch": 0.2949497381568651,
      "grad_norm": 2.5700998306274414,
      "learning_rate": 0.00016547535833716362,
      "loss": 0.3883,
      "step": 1225
    },
    {
      "epoch": 0.2951905134533197,
      "grad_norm": 0.5326368808746338,
      "learning_rate": 0.00016541640200383356,
      "loss": 0.2599,
      "step": 1226
    },
    {
      "epoch": 0.29543128874977426,
      "grad_norm": 4.097855567932129,
      "learning_rate": 0.00016535740589718366,
      "loss": 1.4335,
      "step": 1227
    },
    {
      "epoch": 0.29567206404622887,
      "grad_norm": 1.7571992874145508,
      "learning_rate": 0.00016529837005308375,
      "loss": 0.7812,
      "step": 1228
    },
    {
      "epoch": 0.2959128393426834,
      "grad_norm": 2.6337194442749023,
      "learning_rate": 0.00016523929450742774,
      "loss": 0.3936,
      "step": 1229
    },
    {
      "epoch": 0.29615361463913803,
      "grad_norm": 0.9062210917472839,
      "learning_rate": 0.00016518017929613367,
      "loss": 0.3914,
      "step": 1230
    },
    {
      "epoch": 0.2963943899355926,
      "grad_norm": 0.581713080406189,
      "learning_rate": 0.00016512102445514375,
      "loss": 0.8761,
      "step": 1231
    },
    {
      "epoch": 0.2966351652320472,
      "grad_norm": 1.9768112897872925,
      "learning_rate": 0.0001650618300204242,
      "loss": 0.5862,
      "step": 1232
    },
    {
      "epoch": 0.2968759405285018,
      "grad_norm": 1.7873097658157349,
      "learning_rate": 0.00016500259602796546,
      "loss": 0.1979,
      "step": 1233
    },
    {
      "epoch": 0.29711671582495636,
      "grad_norm": 2.351323366165161,
      "learning_rate": 0.00016494332251378187,
      "loss": 0.6285,
      "step": 1234
    },
    {
      "epoch": 0.29735749112141097,
      "grad_norm": 2.609557628631592,
      "learning_rate": 0.00016488400951391186,
      "loss": 0.5139,
      "step": 1235
    },
    {
      "epoch": 0.2975982664178655,
      "grad_norm": 2.986835241317749,
      "learning_rate": 0.0001648246570644179,
      "loss": 0.3242,
      "step": 1236
    },
    {
      "epoch": 0.29783904171432013,
      "grad_norm": 1.083709716796875,
      "learning_rate": 0.00016476526520138636,
      "loss": 0.7125,
      "step": 1237
    },
    {
      "epoch": 0.2980798170107747,
      "grad_norm": 4.175523281097412,
      "learning_rate": 0.0001647058339609277,
      "loss": 0.6407,
      "step": 1238
    },
    {
      "epoch": 0.2983205923072293,
      "grad_norm": 1.0320210456848145,
      "learning_rate": 0.00016464636337917618,
      "loss": 0.4267,
      "step": 1239
    },
    {
      "epoch": 0.29856136760368385,
      "grad_norm": 1.7650171518325806,
      "learning_rate": 0.0001645868534922901,
      "loss": 0.8656,
      "step": 1240
    },
    {
      "epoch": 0.29880214290013846,
      "grad_norm": 0.3890477418899536,
      "learning_rate": 0.00016452730433645153,
      "loss": 0.4355,
      "step": 1241
    },
    {
      "epoch": 0.299042918196593,
      "grad_norm": 3.933539390563965,
      "learning_rate": 0.0001644677159478666,
      "loss": 0.7368,
      "step": 1242
    },
    {
      "epoch": 0.2992836934930476,
      "grad_norm": 3.1213431358337402,
      "learning_rate": 0.00016440808836276508,
      "loss": 0.5998,
      "step": 1243
    },
    {
      "epoch": 0.2995244687895022,
      "grad_norm": 2.534736156463623,
      "learning_rate": 0.00016434842161740075,
      "loss": 0.6373,
      "step": 1244
    },
    {
      "epoch": 0.2997652440859568,
      "grad_norm": 1.6457316875457764,
      "learning_rate": 0.0001642887157480511,
      "loss": 0.8746,
      "step": 1245
    },
    {
      "epoch": 0.30000601938241134,
      "grad_norm": 2.4500882625579834,
      "learning_rate": 0.0001642289707910174,
      "loss": 0.3703,
      "step": 1246
    },
    {
      "epoch": 0.30024679467886595,
      "grad_norm": 6.751053810119629,
      "learning_rate": 0.0001641691867826248,
      "loss": 0.9699,
      "step": 1247
    },
    {
      "epoch": 0.30048756997532056,
      "grad_norm": 2.2047617435455322,
      "learning_rate": 0.000164109363759222,
      "loss": 0.7187,
      "step": 1248
    },
    {
      "epoch": 0.3007283452717751,
      "grad_norm": 1.771125316619873,
      "learning_rate": 0.00016404950175718166,
      "loss": 0.4576,
      "step": 1249
    },
    {
      "epoch": 0.3009691205682297,
      "grad_norm": 2.1661245822906494,
      "learning_rate": 0.0001639896008128999,
      "loss": 0.6159,
      "step": 1250
    },
    {
      "epoch": 0.3012098958646843,
      "grad_norm": 4.253533363342285,
      "learning_rate": 0.0001639296609627967,
      "loss": 0.7709,
      "step": 1251
    },
    {
      "epoch": 0.3014506711611389,
      "grad_norm": 3.3934977054595947,
      "learning_rate": 0.00016386968224331558,
      "loss": 1.3822,
      "step": 1252
    },
    {
      "epoch": 0.30169144645759344,
      "grad_norm": 4.271642684936523,
      "learning_rate": 0.00016380966469092378,
      "loss": 1.5999,
      "step": 1253
    },
    {
      "epoch": 0.30193222175404805,
      "grad_norm": 1.2420214414596558,
      "learning_rate": 0.00016374960834211204,
      "loss": 0.3992,
      "step": 1254
    },
    {
      "epoch": 0.3021729970505026,
      "grad_norm": 1.2237993478775024,
      "learning_rate": 0.00016368951323339484,
      "loss": 0.2898,
      "step": 1255
    },
    {
      "epoch": 0.3024137723469572,
      "grad_norm": 1.4050495624542236,
      "learning_rate": 0.00016362937940131008,
      "loss": 0.2777,
      "step": 1256
    },
    {
      "epoch": 0.30265454764341176,
      "grad_norm": 1.4772244691848755,
      "learning_rate": 0.0001635692068824193,
      "loss": 0.9147,
      "step": 1257
    },
    {
      "epoch": 0.3028953229398664,
      "grad_norm": 4.798654556274414,
      "learning_rate": 0.0001635089957133075,
      "loss": 0.3208,
      "step": 1258
    },
    {
      "epoch": 0.30313609823632093,
      "grad_norm": 2.012327194213867,
      "learning_rate": 0.0001634487459305832,
      "loss": 0.691,
      "step": 1259
    },
    {
      "epoch": 0.30337687353277554,
      "grad_norm": 7.864597797393799,
      "learning_rate": 0.00016338845757087847,
      "loss": 0.7949,
      "step": 1260
    },
    {
      "epoch": 0.30361764882923015,
      "grad_norm": 1.5631287097930908,
      "learning_rate": 0.0001633281306708487,
      "loss": 0.8655,
      "step": 1261
    },
    {
      "epoch": 0.3038584241256847,
      "grad_norm": 3.419724225997925,
      "learning_rate": 0.0001632677652671728,
      "loss": 0.7764,
      "step": 1262
    },
    {
      "epoch": 0.3040991994221393,
      "grad_norm": 4.057196617126465,
      "learning_rate": 0.00016320736139655305,
      "loss": 0.4629,
      "step": 1263
    },
    {
      "epoch": 0.30433997471859386,
      "grad_norm": 2.437304735183716,
      "learning_rate": 0.0001631469190957152,
      "loss": 0.6043,
      "step": 1264
    },
    {
      "epoch": 0.3045807500150485,
      "grad_norm": 3.452397108078003,
      "learning_rate": 0.00016308643840140828,
      "loss": 0.9057,
      "step": 1265
    },
    {
      "epoch": 0.304821525311503,
      "grad_norm": 2.3599209785461426,
      "learning_rate": 0.00016302591935040463,
      "loss": 0.4477,
      "step": 1266
    },
    {
      "epoch": 0.30506230060795764,
      "grad_norm": 2.7127840518951416,
      "learning_rate": 0.0001629653619795,
      "loss": 0.4695,
      "step": 1267
    },
    {
      "epoch": 0.3053030759044122,
      "grad_norm": 1.4742056131362915,
      "learning_rate": 0.00016290476632551347,
      "loss": 0.7507,
      "step": 1268
    },
    {
      "epoch": 0.3055438512008668,
      "grad_norm": 1.4544012546539307,
      "learning_rate": 0.0001628441324252873,
      "loss": 0.5332,
      "step": 1269
    },
    {
      "epoch": 0.30578462649732135,
      "grad_norm": 3.388953685760498,
      "learning_rate": 0.000162783460315687,
      "loss": 0.9343,
      "step": 1270
    },
    {
      "epoch": 0.30602540179377596,
      "grad_norm": 3.447437047958374,
      "learning_rate": 0.00016272275003360135,
      "loss": 0.7331,
      "step": 1271
    },
    {
      "epoch": 0.3062661770902305,
      "grad_norm": 2.9696388244628906,
      "learning_rate": 0.0001626620016159424,
      "loss": 0.2491,
      "step": 1272
    },
    {
      "epoch": 0.3065069523866851,
      "grad_norm": 0.8574854135513306,
      "learning_rate": 0.0001626012150996453,
      "loss": 0.5318,
      "step": 1273
    },
    {
      "epoch": 0.30674772768313974,
      "grad_norm": 2.6496622562408447,
      "learning_rate": 0.00016254039052166833,
      "loss": 0.725,
      "step": 1274
    },
    {
      "epoch": 0.3069885029795943,
      "grad_norm": 1.233094334602356,
      "learning_rate": 0.00016247952791899307,
      "loss": 0.7075,
      "step": 1275
    },
    {
      "epoch": 0.3072292782760489,
      "grad_norm": 1.2451717853546143,
      "learning_rate": 0.00016241862732862403,
      "loss": 0.8067,
      "step": 1276
    },
    {
      "epoch": 0.30747005357250345,
      "grad_norm": 2.2256247997283936,
      "learning_rate": 0.00016235768878758897,
      "loss": 0.389,
      "step": 1277
    },
    {
      "epoch": 0.30771082886895806,
      "grad_norm": 2.2310009002685547,
      "learning_rate": 0.00016229671233293863,
      "loss": 1.3423,
      "step": 1278
    },
    {
      "epoch": 0.3079516041654126,
      "grad_norm": 2.3196895122528076,
      "learning_rate": 0.0001622356980017468,
      "loss": 0.5485,
      "step": 1279
    },
    {
      "epoch": 0.3081923794618672,
      "grad_norm": 6.365363121032715,
      "learning_rate": 0.0001621746458311104,
      "loss": 0.6047,
      "step": 1280
    },
    {
      "epoch": 0.3084331547583218,
      "grad_norm": 2.375135898590088,
      "learning_rate": 0.00016211355585814925,
      "loss": 1.0309,
      "step": 1281
    },
    {
      "epoch": 0.3086739300547764,
      "grad_norm": 3.856171131134033,
      "learning_rate": 0.00016205242812000617,
      "loss": 0.5747,
      "step": 1282
    },
    {
      "epoch": 0.30891470535123094,
      "grad_norm": 1.3465646505355835,
      "learning_rate": 0.00016199126265384702,
      "loss": 0.6992,
      "step": 1283
    },
    {
      "epoch": 0.30915548064768555,
      "grad_norm": 3.8031649589538574,
      "learning_rate": 0.0001619300594968605,
      "loss": 0.6855,
      "step": 1284
    },
    {
      "epoch": 0.3093962559441401,
      "grad_norm": 6.7793169021606445,
      "learning_rate": 0.00016186881868625826,
      "loss": 0.7541,
      "step": 1285
    },
    {
      "epoch": 0.3096370312405947,
      "grad_norm": 4.37679386138916,
      "learning_rate": 0.00016180754025927488,
      "loss": 0.7391,
      "step": 1286
    },
    {
      "epoch": 0.30987780653704927,
      "grad_norm": 2.1541247367858887,
      "learning_rate": 0.00016174622425316776,
      "loss": 0.6678,
      "step": 1287
    },
    {
      "epoch": 0.3101185818335039,
      "grad_norm": 1.5806964635849,
      "learning_rate": 0.00016168487070521717,
      "loss": 0.6008,
      "step": 1288
    },
    {
      "epoch": 0.3103593571299585,
      "grad_norm": 2.3984477519989014,
      "learning_rate": 0.00016162347965272624,
      "loss": 0.431,
      "step": 1289
    },
    {
      "epoch": 0.31060013242641304,
      "grad_norm": 5.19956111907959,
      "learning_rate": 0.00016156205113302083,
      "loss": 1.1046,
      "step": 1290
    },
    {
      "epoch": 0.31084090772286765,
      "grad_norm": 1.4966849088668823,
      "learning_rate": 0.00016150058518344963,
      "loss": 0.4343,
      "step": 1291
    },
    {
      "epoch": 0.3110816830193222,
      "grad_norm": 1.530099868774414,
      "learning_rate": 0.00016143908184138408,
      "loss": 0.3569,
      "step": 1292
    },
    {
      "epoch": 0.3113224583157768,
      "grad_norm": 6.92020845413208,
      "learning_rate": 0.00016137754114421834,
      "loss": 0.4397,
      "step": 1293
    },
    {
      "epoch": 0.31156323361223137,
      "grad_norm": 4.40862512588501,
      "learning_rate": 0.0001613159631293693,
      "loss": 0.7515,
      "step": 1294
    },
    {
      "epoch": 0.311804008908686,
      "grad_norm": 6.846129894256592,
      "learning_rate": 0.00016125434783427654,
      "loss": 0.9461,
      "step": 1295
    },
    {
      "epoch": 0.31204478420514054,
      "grad_norm": 6.169475078582764,
      "learning_rate": 0.0001611926952964023,
      "loss": 1.5009,
      "step": 1296
    },
    {
      "epoch": 0.31228555950159514,
      "grad_norm": 2.2589635848999023,
      "learning_rate": 0.0001611310055532314,
      "loss": 0.6197,
      "step": 1297
    },
    {
      "epoch": 0.3125263347980497,
      "grad_norm": 5.03438663482666,
      "learning_rate": 0.00016106927864227143,
      "loss": 1.2404,
      "step": 1298
    },
    {
      "epoch": 0.3127671100945043,
      "grad_norm": 2.119262456893921,
      "learning_rate": 0.00016100751460105243,
      "loss": 0.3959,
      "step": 1299
    },
    {
      "epoch": 0.31300788539095886,
      "grad_norm": 1.556208610534668,
      "learning_rate": 0.00016094571346712716,
      "loss": 0.3569,
      "step": 1300
    },
    {
      "epoch": 0.31324866068741347,
      "grad_norm": 3.7477822303771973,
      "learning_rate": 0.0001608838752780707,
      "loss": 0.9697,
      "step": 1301
    },
    {
      "epoch": 0.3134894359838681,
      "grad_norm": 1.226062297821045,
      "learning_rate": 0.000160822000071481,
      "loss": 0.2165,
      "step": 1302
    },
    {
      "epoch": 0.31373021128032264,
      "grad_norm": 1.5284736156463623,
      "learning_rate": 0.00016076008788497816,
      "loss": 0.3499,
      "step": 1303
    },
    {
      "epoch": 0.31397098657677724,
      "grad_norm": 1.2165570259094238,
      "learning_rate": 0.00016069813875620498,
      "loss": 0.3322,
      "step": 1304
    },
    {
      "epoch": 0.3142117618732318,
      "grad_norm": 2.2660257816314697,
      "learning_rate": 0.00016063615272282673,
      "loss": 0.9303,
      "step": 1305
    },
    {
      "epoch": 0.3144525371696864,
      "grad_norm": 3.506263494491577,
      "learning_rate": 0.00016057412982253098,
      "loss": 0.3677,
      "step": 1306
    },
    {
      "epoch": 0.31469331246614096,
      "grad_norm": 2.1276533603668213,
      "learning_rate": 0.00016051207009302781,
      "loss": 0.8432,
      "step": 1307
    },
    {
      "epoch": 0.31493408776259557,
      "grad_norm": 4.875666618347168,
      "learning_rate": 0.00016044997357204973,
      "loss": 0.4637,
      "step": 1308
    },
    {
      "epoch": 0.3151748630590501,
      "grad_norm": 0.7586674690246582,
      "learning_rate": 0.0001603878402973515,
      "loss": 0.3961,
      "step": 1309
    },
    {
      "epoch": 0.31541563835550473,
      "grad_norm": 1.5257422924041748,
      "learning_rate": 0.0001603256703067103,
      "loss": 0.6829,
      "step": 1310
    },
    {
      "epoch": 0.3156564136519593,
      "grad_norm": 2.0728249549865723,
      "learning_rate": 0.00016026346363792567,
      "loss": 0.5977,
      "step": 1311
    },
    {
      "epoch": 0.3158971889484139,
      "grad_norm": 6.4057936668396,
      "learning_rate": 0.00016020122032881932,
      "loss": 1.2481,
      "step": 1312
    },
    {
      "epoch": 0.31613796424486845,
      "grad_norm": 0.8619070649147034,
      "learning_rate": 0.00016013894041723542,
      "loss": 1.0521,
      "step": 1313
    },
    {
      "epoch": 0.31637873954132306,
      "grad_norm": 2.2395753860473633,
      "learning_rate": 0.00016007662394104024,
      "loss": 0.4075,
      "step": 1314
    },
    {
      "epoch": 0.31661951483777767,
      "grad_norm": 1.7755603790283203,
      "learning_rate": 0.00016001427093812235,
      "loss": 0.6441,
      "step": 1315
    },
    {
      "epoch": 0.3168602901342322,
      "grad_norm": 0.9650968909263611,
      "learning_rate": 0.0001599518814463925,
      "loss": 0.1726,
      "step": 1316
    },
    {
      "epoch": 0.31710106543068683,
      "grad_norm": 1.7282532453536987,
      "learning_rate": 0.0001598894555037837,
      "loss": 0.2156,
      "step": 1317
    },
    {
      "epoch": 0.3173418407271414,
      "grad_norm": 1.8310699462890625,
      "learning_rate": 0.000159826993148251,
      "loss": 0.4061,
      "step": 1318
    },
    {
      "epoch": 0.317582616023596,
      "grad_norm": 2.200747489929199,
      "learning_rate": 0.00015976449441777163,
      "loss": 0.3275,
      "step": 1319
    },
    {
      "epoch": 0.31782339132005055,
      "grad_norm": 3.546372175216675,
      "learning_rate": 0.00015970195935034506,
      "loss": 1.1699,
      "step": 1320
    },
    {
      "epoch": 0.31806416661650516,
      "grad_norm": 4.240285873413086,
      "learning_rate": 0.00015963938798399267,
      "loss": 1.0385,
      "step": 1321
    },
    {
      "epoch": 0.3183049419129597,
      "grad_norm": 1.8230444192886353,
      "learning_rate": 0.00015957678035675806,
      "loss": 0.8566,
      "step": 1322
    },
    {
      "epoch": 0.3185457172094143,
      "grad_norm": 1.3038523197174072,
      "learning_rate": 0.00015951413650670669,
      "loss": 0.545,
      "step": 1323
    },
    {
      "epoch": 0.3187864925058689,
      "grad_norm": 2.877883195877075,
      "learning_rate": 0.00015945145647192627,
      "loss": 0.522,
      "step": 1324
    },
    {
      "epoch": 0.3190272678023235,
      "grad_norm": 2.4238524436950684,
      "learning_rate": 0.0001593887402905264,
      "loss": 1.1966,
      "step": 1325
    },
    {
      "epoch": 0.31926804309877804,
      "grad_norm": 6.8975982666015625,
      "learning_rate": 0.0001593259880006386,
      "loss": 0.3491,
      "step": 1326
    },
    {
      "epoch": 0.31950881839523265,
      "grad_norm": 2.4402458667755127,
      "learning_rate": 0.0001592631996404164,
      "loss": 0.8293,
      "step": 1327
    },
    {
      "epoch": 0.3197495936916872,
      "grad_norm": 2.575347900390625,
      "learning_rate": 0.00015920037524803538,
      "loss": 0.9677,
      "step": 1328
    },
    {
      "epoch": 0.3199903689881418,
      "grad_norm": 1.8966193199157715,
      "learning_rate": 0.00015913751486169275,
      "loss": 0.4061,
      "step": 1329
    },
    {
      "epoch": 0.3202311442845964,
      "grad_norm": 3.9115090370178223,
      "learning_rate": 0.0001590746185196079,
      "loss": 0.8245,
      "step": 1330
    },
    {
      "epoch": 0.320471919581051,
      "grad_norm": 2.3119075298309326,
      "learning_rate": 0.00015901168626002184,
      "loss": 0.8401,
      "step": 1331
    },
    {
      "epoch": 0.3207126948775056,
      "grad_norm": 3.0290722846984863,
      "learning_rate": 0.00015894871812119764,
      "loss": 0.2644,
      "step": 1332
    },
    {
      "epoch": 0.32095347017396014,
      "grad_norm": 1.3376718759536743,
      "learning_rate": 0.00015888571414141996,
      "loss": 0.7519,
      "step": 1333
    },
    {
      "epoch": 0.32119424547041475,
      "grad_norm": 3.897224187850952,
      "learning_rate": 0.00015882267435899543,
      "loss": 0.5062,
      "step": 1334
    },
    {
      "epoch": 0.3214350207668693,
      "grad_norm": 3.285747766494751,
      "learning_rate": 0.00015875959881225238,
      "loss": 0.6907,
      "step": 1335
    },
    {
      "epoch": 0.3216757960633239,
      "grad_norm": 0.9387348294258118,
      "learning_rate": 0.00015869648753954083,
      "loss": 0.3543,
      "step": 1336
    },
    {
      "epoch": 0.32191657135977847,
      "grad_norm": 0.8521896600723267,
      "learning_rate": 0.00015863334057923263,
      "loss": 0.6814,
      "step": 1337
    },
    {
      "epoch": 0.3221573466562331,
      "grad_norm": 3.792236328125,
      "learning_rate": 0.00015857015796972126,
      "loss": 0.361,
      "step": 1338
    },
    {
      "epoch": 0.32239812195268763,
      "grad_norm": 1.9048930406570435,
      "learning_rate": 0.00015850693974942188,
      "loss": 0.841,
      "step": 1339
    },
    {
      "epoch": 0.32263889724914224,
      "grad_norm": 4.0882744789123535,
      "learning_rate": 0.00015844368595677128,
      "loss": 1.4357,
      "step": 1340
    },
    {
      "epoch": 0.3228796725455968,
      "grad_norm": 2.6341850757598877,
      "learning_rate": 0.000158380396630228,
      "loss": 0.7541,
      "step": 1341
    },
    {
      "epoch": 0.3231204478420514,
      "grad_norm": 1.2838685512542725,
      "learning_rate": 0.000158317071808272,
      "loss": 0.3621,
      "step": 1342
    },
    {
      "epoch": 0.323361223138506,
      "grad_norm": 2.2688121795654297,
      "learning_rate": 0.000158253711529405,
      "loss": 0.4616,
      "step": 1343
    },
    {
      "epoch": 0.32360199843496057,
      "grad_norm": 1.7393488883972168,
      "learning_rate": 0.00015819031583215007,
      "loss": 0.9108,
      "step": 1344
    },
    {
      "epoch": 0.3238427737314152,
      "grad_norm": 2.279599666595459,
      "learning_rate": 0.00015812688475505201,
      "loss": 0.4704,
      "step": 1345
    },
    {
      "epoch": 0.32408354902786973,
      "grad_norm": 1.3711464405059814,
      "learning_rate": 0.0001580634183366771,
      "loss": 0.5435,
      "step": 1346
    },
    {
      "epoch": 0.32432432432432434,
      "grad_norm": 2.167222499847412,
      "learning_rate": 0.00015799991661561303,
      "loss": 0.2528,
      "step": 1347
    },
    {
      "epoch": 0.3245650996207789,
      "grad_norm": 6.101914405822754,
      "learning_rate": 0.00015793637963046897,
      "loss": 1.3281,
      "step": 1348
    },
    {
      "epoch": 0.3248058749172335,
      "grad_norm": 2.9374330043792725,
      "learning_rate": 0.00015787280741987557,
      "loss": 0.5171,
      "step": 1349
    },
    {
      "epoch": 0.32504665021368806,
      "grad_norm": 1.7808711528778076,
      "learning_rate": 0.00015780920002248484,
      "loss": 0.6773,
      "step": 1350
    },
    {
      "epoch": 0.32528742551014267,
      "grad_norm": 2.1058807373046875,
      "learning_rate": 0.00015774555747697025,
      "loss": 0.5836,
      "step": 1351
    },
    {
      "epoch": 0.3255282008065972,
      "grad_norm": 3.5313520431518555,
      "learning_rate": 0.00015768187982202666,
      "loss": 0.6518,
      "step": 1352
    },
    {
      "epoch": 0.32576897610305183,
      "grad_norm": 4.409549236297607,
      "learning_rate": 0.00015761816709637015,
      "loss": 0.8503,
      "step": 1353
    },
    {
      "epoch": 0.3260097513995064,
      "grad_norm": 1.0890048742294312,
      "learning_rate": 0.00015755441933873823,
      "loss": 0.2637,
      "step": 1354
    },
    {
      "epoch": 0.326250526695961,
      "grad_norm": 1.0471165180206299,
      "learning_rate": 0.00015749063658788967,
      "loss": 0.4454,
      "step": 1355
    },
    {
      "epoch": 0.3264913019924156,
      "grad_norm": 1.7348659038543701,
      "learning_rate": 0.00015742681888260455,
      "loss": 0.977,
      "step": 1356
    },
    {
      "epoch": 0.32673207728887016,
      "grad_norm": 2.9363324642181396,
      "learning_rate": 0.0001573629662616842,
      "loss": 0.5051,
      "step": 1357
    },
    {
      "epoch": 0.32697285258532477,
      "grad_norm": 2.2017180919647217,
      "learning_rate": 0.00015729907876395105,
      "loss": 0.6374,
      "step": 1358
    },
    {
      "epoch": 0.3272136278817793,
      "grad_norm": 1.8804614543914795,
      "learning_rate": 0.00015723515642824894,
      "loss": 0.5544,
      "step": 1359
    },
    {
      "epoch": 0.32745440317823393,
      "grad_norm": 1.586624264717102,
      "learning_rate": 0.00015717119929344278,
      "loss": 1.0256,
      "step": 1360
    },
    {
      "epoch": 0.3276951784746885,
      "grad_norm": 3.861217737197876,
      "learning_rate": 0.00015710720739841864,
      "loss": 0.6251,
      "step": 1361
    },
    {
      "epoch": 0.3279359537711431,
      "grad_norm": 1.4513386487960815,
      "learning_rate": 0.00015704318078208374,
      "loss": 0.7021,
      "step": 1362
    },
    {
      "epoch": 0.32817672906759765,
      "grad_norm": 1.8319506645202637,
      "learning_rate": 0.00015697911948336641,
      "loss": 0.5171,
      "step": 1363
    },
    {
      "epoch": 0.32841750436405226,
      "grad_norm": 1.2202706336975098,
      "learning_rate": 0.00015691502354121605,
      "loss": 0.3055,
      "step": 1364
    },
    {
      "epoch": 0.3286582796605068,
      "grad_norm": 3.2873902320861816,
      "learning_rate": 0.00015685089299460317,
      "loss": 0.9132,
      "step": 1365
    },
    {
      "epoch": 0.3288990549569614,
      "grad_norm": 2.2419869899749756,
      "learning_rate": 0.00015678672788251922,
      "loss": 0.5913,
      "step": 1366
    },
    {
      "epoch": 0.329139830253416,
      "grad_norm": 5.597873210906982,
      "learning_rate": 0.0001567225282439768,
      "loss": 0.8836,
      "step": 1367
    },
    {
      "epoch": 0.3293806055498706,
      "grad_norm": 1.8877670764923096,
      "learning_rate": 0.0001566582941180094,
      "loss": 0.486,
      "step": 1368
    },
    {
      "epoch": 0.32962138084632514,
      "grad_norm": 3.7749109268188477,
      "learning_rate": 0.00015659402554367153,
      "loss": 0.8683,
      "step": 1369
    },
    {
      "epoch": 0.32986215614277975,
      "grad_norm": 1.6134521961212158,
      "learning_rate": 0.00015652972256003864,
      "loss": 0.7984,
      "step": 1370
    },
    {
      "epoch": 0.33010293143923436,
      "grad_norm": 2.474909782409668,
      "learning_rate": 0.00015646538520620705,
      "loss": 0.5438,
      "step": 1371
    },
    {
      "epoch": 0.3303437067356889,
      "grad_norm": 4.5085368156433105,
      "learning_rate": 0.00015640101352129402,
      "loss": 0.8213,
      "step": 1372
    },
    {
      "epoch": 0.3305844820321435,
      "grad_norm": 1.169089913368225,
      "learning_rate": 0.00015633660754443772,
      "loss": 0.2603,
      "step": 1373
    },
    {
      "epoch": 0.3308252573285981,
      "grad_norm": 1.1745972633361816,
      "learning_rate": 0.0001562721673147971,
      "loss": 0.3671,
      "step": 1374
    },
    {
      "epoch": 0.3310660326250527,
      "grad_norm": 2.545999765396118,
      "learning_rate": 0.00015620769287155197,
      "loss": 0.8987,
      "step": 1375
    },
    {
      "epoch": 0.33130680792150724,
      "grad_norm": 3.68367338180542,
      "learning_rate": 0.00015614318425390296,
      "loss": 0.7555,
      "step": 1376
    },
    {
      "epoch": 0.33154758321796185,
      "grad_norm": 1.6733169555664062,
      "learning_rate": 0.0001560786415010714,
      "loss": 0.5136,
      "step": 1377
    },
    {
      "epoch": 0.3317883585144164,
      "grad_norm": 6.26981258392334,
      "learning_rate": 0.00015601406465229947,
      "loss": 0.7942,
      "step": 1378
    },
    {
      "epoch": 0.332029133810871,
      "grad_norm": 2.717362880706787,
      "learning_rate": 0.00015594945374685002,
      "loss": 0.7386,
      "step": 1379
    },
    {
      "epoch": 0.33226990910732557,
      "grad_norm": 1.8613269329071045,
      "learning_rate": 0.00015588480882400662,
      "loss": 0.5929,
      "step": 1380
    },
    {
      "epoch": 0.3325106844037802,
      "grad_norm": 4.205772876739502,
      "learning_rate": 0.0001558201299230736,
      "loss": 0.6058,
      "step": 1381
    },
    {
      "epoch": 0.33275145970023473,
      "grad_norm": 3.043046474456787,
      "learning_rate": 0.0001557554170833758,
      "loss": 0.3609,
      "step": 1382
    },
    {
      "epoch": 0.33299223499668934,
      "grad_norm": 2.67464542388916,
      "learning_rate": 0.00015569067034425878,
      "loss": 0.9453,
      "step": 1383
    },
    {
      "epoch": 0.33323301029314395,
      "grad_norm": 1.8980488777160645,
      "learning_rate": 0.00015562588974508872,
      "loss": 0.8145,
      "step": 1384
    },
    {
      "epoch": 0.3334737855895985,
      "grad_norm": 1.8081344366073608,
      "learning_rate": 0.00015556107532525238,
      "loss": 0.5141,
      "step": 1385
    },
    {
      "epoch": 0.3337145608860531,
      "grad_norm": 2.477198362350464,
      "learning_rate": 0.00015549622712415702,
      "loss": 0.8897,
      "step": 1386
    },
    {
      "epoch": 0.33395533618250767,
      "grad_norm": 2.74221134185791,
      "learning_rate": 0.0001554313451812306,
      "loss": 0.8277,
      "step": 1387
    },
    {
      "epoch": 0.3341961114789623,
      "grad_norm": 1.0981510877609253,
      "learning_rate": 0.0001553664295359214,
      "loss": 1.2254,
      "step": 1388
    },
    {
      "epoch": 0.33443688677541683,
      "grad_norm": 1.967882752418518,
      "learning_rate": 0.0001553014802276983,
      "loss": 0.8044,
      "step": 1389
    },
    {
      "epoch": 0.33467766207187144,
      "grad_norm": 2.7293615341186523,
      "learning_rate": 0.0001552364972960506,
      "loss": 0.3669,
      "step": 1390
    },
    {
      "epoch": 0.334918437368326,
      "grad_norm": 0.9830564260482788,
      "learning_rate": 0.00015517148078048808,
      "loss": 0.0667,
      "step": 1391
    },
    {
      "epoch": 0.3351592126647806,
      "grad_norm": 2.215790033340454,
      "learning_rate": 0.00015510643072054098,
      "loss": 0.7652,
      "step": 1392
    },
    {
      "epoch": 0.33539998796123516,
      "grad_norm": 3.3328158855438232,
      "learning_rate": 0.00015504134715575986,
      "loss": 0.7612,
      "step": 1393
    },
    {
      "epoch": 0.33564076325768977,
      "grad_norm": 0.497371107339859,
      "learning_rate": 0.00015497623012571566,
      "loss": 0.5093,
      "step": 1394
    },
    {
      "epoch": 0.3358815385541443,
      "grad_norm": 3.333343744277954,
      "learning_rate": 0.00015491107966999964,
      "loss": 1.1697,
      "step": 1395
    },
    {
      "epoch": 0.33612231385059893,
      "grad_norm": 1.7939079999923706,
      "learning_rate": 0.00015484589582822348,
      "loss": 0.8186,
      "step": 1396
    },
    {
      "epoch": 0.33636308914705354,
      "grad_norm": 0.5831475257873535,
      "learning_rate": 0.00015478067864001908,
      "loss": 0.0296,
      "step": 1397
    },
    {
      "epoch": 0.3366038644435081,
      "grad_norm": 1.5100713968276978,
      "learning_rate": 0.00015471542814503867,
      "loss": 0.7465,
      "step": 1398
    },
    {
      "epoch": 0.3368446397399627,
      "grad_norm": 3.8856029510498047,
      "learning_rate": 0.00015465014438295467,
      "loss": 0.5473,
      "step": 1399
    },
    {
      "epoch": 0.33708541503641726,
      "grad_norm": 1.1712760925292969,
      "learning_rate": 0.00015458482739345974,
      "loss": 0.3689,
      "step": 1400
    },
    {
      "epoch": 0.33732619033287187,
      "grad_norm": 4.496668338775635,
      "learning_rate": 0.00015451947721626676,
      "loss": 1.0972,
      "step": 1401
    },
    {
      "epoch": 0.3375669656293264,
      "grad_norm": 5.929965496063232,
      "learning_rate": 0.00015445409389110883,
      "loss": 0.6352,
      "step": 1402
    },
    {
      "epoch": 0.33780774092578103,
      "grad_norm": 1.9079606533050537,
      "learning_rate": 0.00015438867745773912,
      "loss": 0.5129,
      "step": 1403
    },
    {
      "epoch": 0.3380485162222356,
      "grad_norm": 3.6617226600646973,
      "learning_rate": 0.00015432322795593098,
      "loss": 0.4049,
      "step": 1404
    },
    {
      "epoch": 0.3382892915186902,
      "grad_norm": 5.218686580657959,
      "learning_rate": 0.00015425774542547784,
      "loss": 0.3238,
      "step": 1405
    },
    {
      "epoch": 0.33853006681514475,
      "grad_norm": 1.2463502883911133,
      "learning_rate": 0.00015419222990619322,
      "loss": 0.4756,
      "step": 1406
    },
    {
      "epoch": 0.33877084211159936,
      "grad_norm": 3.0786678791046143,
      "learning_rate": 0.00015412668143791075,
      "loss": 0.8815,
      "step": 1407
    },
    {
      "epoch": 0.3390116174080539,
      "grad_norm": 2.135958194732666,
      "learning_rate": 0.000154061100060484,
      "loss": 0.6417,
      "step": 1408
    },
    {
      "epoch": 0.3392523927045085,
      "grad_norm": 3.0606963634490967,
      "learning_rate": 0.00015399548581378664,
      "loss": 0.573,
      "step": 1409
    },
    {
      "epoch": 0.3394931680009631,
      "grad_norm": 1.5275843143463135,
      "learning_rate": 0.00015392983873771223,
      "loss": 0.5197,
      "step": 1410
    },
    {
      "epoch": 0.3397339432974177,
      "grad_norm": 2.3803906440734863,
      "learning_rate": 0.00015386415887217437,
      "loss": 0.543,
      "step": 1411
    },
    {
      "epoch": 0.3399747185938723,
      "grad_norm": 5.230526924133301,
      "learning_rate": 0.00015379844625710654,
      "loss": 0.5216,
      "step": 1412
    },
    {
      "epoch": 0.34021549389032685,
      "grad_norm": 1.887787938117981,
      "learning_rate": 0.0001537327009324622,
      "loss": 0.7004,
      "step": 1413
    },
    {
      "epoch": 0.34045626918678146,
      "grad_norm": 3.1152963638305664,
      "learning_rate": 0.0001536669229382146,
      "loss": 0.65,
      "step": 1414
    },
    {
      "epoch": 0.340697044483236,
      "grad_norm": 4.267107009887695,
      "learning_rate": 0.00015360111231435693,
      "loss": 0.7265,
      "step": 1415
    },
    {
      "epoch": 0.3409378197796906,
      "grad_norm": 1.1614614725112915,
      "learning_rate": 0.0001535352691009023,
      "loss": 0.443,
      "step": 1416
    },
    {
      "epoch": 0.3411785950761452,
      "grad_norm": 2.7194442749023438,
      "learning_rate": 0.00015346939333788336,
      "loss": 0.93,
      "step": 1417
    },
    {
      "epoch": 0.3414193703725998,
      "grad_norm": 1.5683730840682983,
      "learning_rate": 0.00015340348506535283,
      "loss": 0.665,
      "step": 1418
    },
    {
      "epoch": 0.34166014566905434,
      "grad_norm": 0.9245167970657349,
      "learning_rate": 0.00015333754432338302,
      "loss": 0.3983,
      "step": 1419
    },
    {
      "epoch": 0.34190092096550895,
      "grad_norm": 3.776094913482666,
      "learning_rate": 0.00015327157115206614,
      "loss": 0.6996,
      "step": 1420
    },
    {
      "epoch": 0.3421416962619635,
      "grad_norm": 3.2278683185577393,
      "learning_rate": 0.00015320556559151398,
      "loss": 0.614,
      "step": 1421
    },
    {
      "epoch": 0.3423824715584181,
      "grad_norm": 1.4512388706207275,
      "learning_rate": 0.00015313952768185803,
      "loss": 0.8104,
      "step": 1422
    },
    {
      "epoch": 0.34262324685487267,
      "grad_norm": 1.858079195022583,
      "learning_rate": 0.00015307345746324954,
      "loss": 0.8088,
      "step": 1423
    },
    {
      "epoch": 0.3428640221513273,
      "grad_norm": 0.6770870685577393,
      "learning_rate": 0.00015300735497585934,
      "loss": 0.6674,
      "step": 1424
    },
    {
      "epoch": 0.3431047974477819,
      "grad_norm": 1.5875935554504395,
      "learning_rate": 0.00015294122025987788,
      "loss": 0.5163,
      "step": 1425
    },
    {
      "epoch": 0.34334557274423644,
      "grad_norm": 1.7607767581939697,
      "learning_rate": 0.00015287505335551525,
      "loss": 0.5005,
      "step": 1426
    },
    {
      "epoch": 0.34358634804069105,
      "grad_norm": 4.621982574462891,
      "learning_rate": 0.000152808854303001,
      "loss": 0.7541,
      "step": 1427
    },
    {
      "epoch": 0.3438271233371456,
      "grad_norm": 3.1218035221099854,
      "learning_rate": 0.00015274262314258442,
      "loss": 0.5221,
      "step": 1428
    },
    {
      "epoch": 0.3440678986336002,
      "grad_norm": 4.2029924392700195,
      "learning_rate": 0.00015267635991453408,
      "loss": 0.6852,
      "step": 1429
    },
    {
      "epoch": 0.34430867393005476,
      "grad_norm": 5.702292442321777,
      "learning_rate": 0.00015261006465913828,
      "loss": 0.7622,
      "step": 1430
    },
    {
      "epoch": 0.3445494492265094,
      "grad_norm": 3.05202054977417,
      "learning_rate": 0.00015254373741670457,
      "loss": 0.6527,
      "step": 1431
    },
    {
      "epoch": 0.34479022452296393,
      "grad_norm": 3.4306201934814453,
      "learning_rate": 0.00015247737822756018,
      "loss": 1.2398,
      "step": 1432
    },
    {
      "epoch": 0.34503099981941854,
      "grad_norm": 2.056917667388916,
      "learning_rate": 0.0001524109871320516,
      "loss": 0.3902,
      "step": 1433
    },
    {
      "epoch": 0.3452717751158731,
      "grad_norm": 1.1819590330123901,
      "learning_rate": 0.00015234456417054476,
      "loss": 0.0903,
      "step": 1434
    },
    {
      "epoch": 0.3455125504123277,
      "grad_norm": 1.0845695734024048,
      "learning_rate": 0.00015227810938342492,
      "loss": 0.3635,
      "step": 1435
    },
    {
      "epoch": 0.34575332570878226,
      "grad_norm": 2.537416458129883,
      "learning_rate": 0.00015221162281109683,
      "loss": 0.4821,
      "step": 1436
    },
    {
      "epoch": 0.34599410100523686,
      "grad_norm": 1.1138862371444702,
      "learning_rate": 0.00015214510449398442,
      "loss": 0.5671,
      "step": 1437
    },
    {
      "epoch": 0.3462348763016914,
      "grad_norm": 2.424607276916504,
      "learning_rate": 0.00015207855447253103,
      "loss": 0.8349,
      "step": 1438
    },
    {
      "epoch": 0.34647565159814603,
      "grad_norm": 5.5035176277160645,
      "learning_rate": 0.00015201197278719915,
      "loss": 0.9117,
      "step": 1439
    },
    {
      "epoch": 0.34671642689460064,
      "grad_norm": 2.757199287414551,
      "learning_rate": 0.00015194535947847063,
      "loss": 0.4329,
      "step": 1440
    },
    {
      "epoch": 0.3469572021910552,
      "grad_norm": 6.263975143432617,
      "learning_rate": 0.00015187871458684655,
      "loss": 0.7671,
      "step": 1441
    },
    {
      "epoch": 0.3471979774875098,
      "grad_norm": 2.1420156955718994,
      "learning_rate": 0.00015181203815284707,
      "loss": 0.5561,
      "step": 1442
    },
    {
      "epoch": 0.34743875278396436,
      "grad_norm": 2.368563175201416,
      "learning_rate": 0.00015174533021701167,
      "loss": 0.479,
      "step": 1443
    },
    {
      "epoch": 0.34767952808041896,
      "grad_norm": 20.131282806396484,
      "learning_rate": 0.00015167859081989895,
      "loss": 0.9437,
      "step": 1444
    },
    {
      "epoch": 0.3479203033768735,
      "grad_norm": 1.1026864051818848,
      "learning_rate": 0.00015161182000208653,
      "loss": 0.0633,
      "step": 1445
    },
    {
      "epoch": 0.34816107867332813,
      "grad_norm": 1.3895201683044434,
      "learning_rate": 0.0001515450178041713,
      "loss": 0.8124,
      "step": 1446
    },
    {
      "epoch": 0.3484018539697827,
      "grad_norm": 3.4181928634643555,
      "learning_rate": 0.0001514781842667691,
      "loss": 0.8081,
      "step": 1447
    },
    {
      "epoch": 0.3486426292662373,
      "grad_norm": 3.2324140071868896,
      "learning_rate": 0.0001514113194305149,
      "loss": 1.6239,
      "step": 1448
    },
    {
      "epoch": 0.34888340456269185,
      "grad_norm": 0.9572200775146484,
      "learning_rate": 0.00015134442333606264,
      "loss": 0.7945,
      "step": 1449
    },
    {
      "epoch": 0.34912417985914646,
      "grad_norm": 3.1057350635528564,
      "learning_rate": 0.00015127749602408529,
      "loss": 0.3813,
      "step": 1450
    },
    {
      "epoch": 0.349364955155601,
      "grad_norm": 3.538774251937866,
      "learning_rate": 0.00015121053753527485,
      "loss": 0.6439,
      "step": 1451
    },
    {
      "epoch": 0.3496057304520556,
      "grad_norm": 1.1300746202468872,
      "learning_rate": 0.00015114354791034225,
      "loss": 0.1153,
      "step": 1452
    },
    {
      "epoch": 0.34984650574851023,
      "grad_norm": 6.388082027435303,
      "learning_rate": 0.00015107652719001724,
      "loss": 1.2515,
      "step": 1453
    },
    {
      "epoch": 0.3500872810449648,
      "grad_norm": 3.4116952419281006,
      "learning_rate": 0.00015100947541504863,
      "loss": 0.4694,
      "step": 1454
    },
    {
      "epoch": 0.3503280563414194,
      "grad_norm": 1.212721586227417,
      "learning_rate": 0.00015094239262620406,
      "loss": 0.4981,
      "step": 1455
    },
    {
      "epoch": 0.35056883163787395,
      "grad_norm": 4.218289375305176,
      "learning_rate": 0.00015087527886426997,
      "loss": 0.7434,
      "step": 1456
    },
    {
      "epoch": 0.35080960693432856,
      "grad_norm": 1.7133764028549194,
      "learning_rate": 0.00015080813417005172,
      "loss": 0.5143,
      "step": 1457
    },
    {
      "epoch": 0.3510503822307831,
      "grad_norm": 2.915750503540039,
      "learning_rate": 0.00015074095858437343,
      "loss": 0.6977,
      "step": 1458
    },
    {
      "epoch": 0.3512911575272377,
      "grad_norm": 1.0086733102798462,
      "learning_rate": 0.00015067375214807796,
      "loss": 0.4913,
      "step": 1459
    },
    {
      "epoch": 0.3515319328236923,
      "grad_norm": 2.266055107116699,
      "learning_rate": 0.0001506065149020271,
      "loss": 0.4658,
      "step": 1460
    },
    {
      "epoch": 0.3517727081201469,
      "grad_norm": 1.7266699075698853,
      "learning_rate": 0.0001505392468871011,
      "loss": 1.0223,
      "step": 1461
    },
    {
      "epoch": 0.35201348341660144,
      "grad_norm": 4.561027526855469,
      "learning_rate": 0.00015047194814419914,
      "loss": 0.4841,
      "step": 1462
    },
    {
      "epoch": 0.35225425871305605,
      "grad_norm": 2.0526604652404785,
      "learning_rate": 0.00015040461871423897,
      "loss": 0.0654,
      "step": 1463
    },
    {
      "epoch": 0.3524950340095106,
      "grad_norm": 2.187910556793213,
      "learning_rate": 0.0001503372586381571,
      "loss": 0.8852,
      "step": 1464
    },
    {
      "epoch": 0.3527358093059652,
      "grad_norm": 1.6966273784637451,
      "learning_rate": 0.00015026986795690857,
      "loss": 0.5213,
      "step": 1465
    },
    {
      "epoch": 0.3529765846024198,
      "grad_norm": 1.83759343624115,
      "learning_rate": 0.00015020244671146702,
      "loss": 0.5114,
      "step": 1466
    },
    {
      "epoch": 0.3532173598988744,
      "grad_norm": 6.522552490234375,
      "learning_rate": 0.00015013499494282478,
      "loss": 0.7191,
      "step": 1467
    },
    {
      "epoch": 0.353458135195329,
      "grad_norm": 1.2005650997161865,
      "learning_rate": 0.00015006751269199263,
      "loss": 0.4789,
      "step": 1468
    },
    {
      "epoch": 0.35369891049178354,
      "grad_norm": 11.688396453857422,
      "learning_rate": 0.00015000000000000001,
      "loss": 1.0394,
      "step": 1469
    },
    {
      "epoch": 0.35393968578823815,
      "grad_norm": 2.6536548137664795,
      "learning_rate": 0.0001499324569078947,
      "loss": 0.7116,
      "step": 1470
    },
    {
      "epoch": 0.3541804610846927,
      "grad_norm": 5.886802673339844,
      "learning_rate": 0.00014986488345674313,
      "loss": 0.8322,
      "step": 1471
    },
    {
      "epoch": 0.3544212363811473,
      "grad_norm": 3.7790753841400146,
      "learning_rate": 0.00014979727968763003,
      "loss": 0.6478,
      "step": 1472
    },
    {
      "epoch": 0.35466201167760186,
      "grad_norm": 2.2750492095947266,
      "learning_rate": 0.0001497296456416587,
      "loss": 0.5024,
      "step": 1473
    },
    {
      "epoch": 0.35490278697405647,
      "grad_norm": 4.089879035949707,
      "learning_rate": 0.0001496619813599508,
      "loss": 0.5536,
      "step": 1474
    },
    {
      "epoch": 0.355143562270511,
      "grad_norm": 4.2792558670043945,
      "learning_rate": 0.00014959428688364633,
      "loss": 0.8609,
      "step": 1475
    },
    {
      "epoch": 0.35538433756696564,
      "grad_norm": 1.6434048414230347,
      "learning_rate": 0.0001495265622539037,
      "loss": 0.7308,
      "step": 1476
    },
    {
      "epoch": 0.3556251128634202,
      "grad_norm": 4.617370128631592,
      "learning_rate": 0.00014945880751189965,
      "loss": 0.8816,
      "step": 1477
    },
    {
      "epoch": 0.3558658881598748,
      "grad_norm": 1.2536977529525757,
      "learning_rate": 0.0001493910226988292,
      "loss": 0.3698,
      "step": 1478
    },
    {
      "epoch": 0.35610666345632935,
      "grad_norm": 1.1533249616622925,
      "learning_rate": 0.00014932320785590562,
      "loss": 0.7943,
      "step": 1479
    },
    {
      "epoch": 0.35634743875278396,
      "grad_norm": 3.6115407943725586,
      "learning_rate": 0.00014925536302436057,
      "loss": 0.8441,
      "step": 1480
    },
    {
      "epoch": 0.35658821404923857,
      "grad_norm": 3.1211764812469482,
      "learning_rate": 0.0001491874882454438,
      "loss": 0.3908,
      "step": 1481
    },
    {
      "epoch": 0.3568289893456931,
      "grad_norm": 1.8636207580566406,
      "learning_rate": 0.00014911958356042342,
      "loss": 0.7643,
      "step": 1482
    },
    {
      "epoch": 0.35706976464214774,
      "grad_norm": 2.321810722351074,
      "learning_rate": 0.00014905164901058551,
      "loss": 0.3578,
      "step": 1483
    },
    {
      "epoch": 0.3573105399386023,
      "grad_norm": 2.7864739894866943,
      "learning_rate": 0.0001489836846372345,
      "loss": 0.6989,
      "step": 1484
    },
    {
      "epoch": 0.3575513152350569,
      "grad_norm": 2.2333052158355713,
      "learning_rate": 0.0001489156904816929,
      "loss": 0.7687,
      "step": 1485
    },
    {
      "epoch": 0.35779209053151145,
      "grad_norm": 3.294875383377075,
      "learning_rate": 0.00014884766658530125,
      "loss": 0.7011,
      "step": 1486
    },
    {
      "epoch": 0.35803286582796606,
      "grad_norm": 4.5237250328063965,
      "learning_rate": 0.00014877961298941824,
      "loss": 0.7226,
      "step": 1487
    },
    {
      "epoch": 0.3582736411244206,
      "grad_norm": 3.1399903297424316,
      "learning_rate": 0.00014871152973542067,
      "loss": 1.068,
      "step": 1488
    },
    {
      "epoch": 0.3585144164208752,
      "grad_norm": 3.0712268352508545,
      "learning_rate": 0.00014864341686470324,
      "loss": 0.3788,
      "step": 1489
    },
    {
      "epoch": 0.3587551917173298,
      "grad_norm": 2.3056137561798096,
      "learning_rate": 0.0001485752744186788,
      "loss": 0.7056,
      "step": 1490
    },
    {
      "epoch": 0.3589959670137844,
      "grad_norm": 2.5554864406585693,
      "learning_rate": 0.00014850710243877803,
      "loss": 0.8528,
      "step": 1491
    },
    {
      "epoch": 0.35923674231023894,
      "grad_norm": 2.9446706771850586,
      "learning_rate": 0.0001484389009664497,
      "loss": 0.6246,
      "step": 1492
    },
    {
      "epoch": 0.35947751760669355,
      "grad_norm": 2.1668176651000977,
      "learning_rate": 0.00014837067004316049,
      "loss": 0.6075,
      "step": 1493
    },
    {
      "epoch": 0.35971829290314816,
      "grad_norm": 3.2588088512420654,
      "learning_rate": 0.00014830240971039487,
      "loss": 0.6044,
      "step": 1494
    },
    {
      "epoch": 0.3599590681996027,
      "grad_norm": 2.2925832271575928,
      "learning_rate": 0.00014823412000965533,
      "loss": 0.5931,
      "step": 1495
    },
    {
      "epoch": 0.3601998434960573,
      "grad_norm": 1.257023811340332,
      "learning_rate": 0.00014816580098246215,
      "loss": 0.3431,
      "step": 1496
    },
    {
      "epoch": 0.3604406187925119,
      "grad_norm": 2.16398024559021,
      "learning_rate": 0.00014809745267035346,
      "loss": 0.5645,
      "step": 1497
    },
    {
      "epoch": 0.3606813940889665,
      "grad_norm": 3.020810604095459,
      "learning_rate": 0.0001480290751148852,
      "loss": 0.8691,
      "step": 1498
    },
    {
      "epoch": 0.36092216938542104,
      "grad_norm": 2.6079869270324707,
      "learning_rate": 0.00014796066835763103,
      "loss": 0.7485,
      "step": 1499
    },
    {
      "epoch": 0.36116294468187565,
      "grad_norm": 4.599701881408691,
      "learning_rate": 0.00014789223244018244,
      "loss": 0.7325,
      "step": 1500
    },
    {
      "epoch": 0.3614037199783302,
      "grad_norm": 3.939009428024292,
      "learning_rate": 0.00014782376740414863,
      "loss": 0.6157,
      "step": 1501
    },
    {
      "epoch": 0.3616444952747848,
      "grad_norm": 1.3872252702713013,
      "learning_rate": 0.0001477552732911565,
      "loss": 0.7243,
      "step": 1502
    },
    {
      "epoch": 0.36188527057123937,
      "grad_norm": 2.120624542236328,
      "learning_rate": 0.00014768675014285062,
      "loss": 0.8203,
      "step": 1503
    },
    {
      "epoch": 0.362126045867694,
      "grad_norm": 1.584861397743225,
      "learning_rate": 0.0001476181980008932,
      "loss": 0.5647,
      "step": 1504
    },
    {
      "epoch": 0.36236682116414853,
      "grad_norm": 2.499906063079834,
      "learning_rate": 0.0001475496169069641,
      "loss": 0.4923,
      "step": 1505
    },
    {
      "epoch": 0.36260759646060314,
      "grad_norm": 4.404402732849121,
      "learning_rate": 0.0001474810069027608,
      "loss": 0.8503,
      "step": 1506
    },
    {
      "epoch": 0.36284837175705775,
      "grad_norm": 1.0523654222488403,
      "learning_rate": 0.00014741236802999835,
      "loss": 0.6165,
      "step": 1507
    },
    {
      "epoch": 0.3630891470535123,
      "grad_norm": 4.341505527496338,
      "learning_rate": 0.00014734370033040928,
      "loss": 0.8027,
      "step": 1508
    },
    {
      "epoch": 0.3633299223499669,
      "grad_norm": 1.6273728609085083,
      "learning_rate": 0.00014727500384574375,
      "loss": 0.4199,
      "step": 1509
    },
    {
      "epoch": 0.36357069764642147,
      "grad_norm": 2.939532995223999,
      "learning_rate": 0.00014720627861776939,
      "loss": 1.1424,
      "step": 1510
    },
    {
      "epoch": 0.3638114729428761,
      "grad_norm": 1.7308731079101562,
      "learning_rate": 0.00014713752468827128,
      "loss": 0.2182,
      "step": 1511
    },
    {
      "epoch": 0.36405224823933063,
      "grad_norm": 8.7367582321167,
      "learning_rate": 0.00014706874209905192,
      "loss": 0.7563,
      "step": 1512
    },
    {
      "epoch": 0.36429302353578524,
      "grad_norm": 0.6256684064865112,
      "learning_rate": 0.00014699993089193134,
      "loss": 0.7837,
      "step": 1513
    },
    {
      "epoch": 0.3645337988322398,
      "grad_norm": 2.6632747650146484,
      "learning_rate": 0.00014693109110874687,
      "loss": 0.5477,
      "step": 1514
    },
    {
      "epoch": 0.3647745741286944,
      "grad_norm": 2.9854331016540527,
      "learning_rate": 0.00014686222279135328,
      "loss": 0.6202,
      "step": 1515
    },
    {
      "epoch": 0.36501534942514896,
      "grad_norm": 4.577966690063477,
      "learning_rate": 0.00014679332598162265,
      "loss": 0.4136,
      "step": 1516
    },
    {
      "epoch": 0.36525612472160357,
      "grad_norm": 0.8396844863891602,
      "learning_rate": 0.00014672440072144443,
      "loss": 0.8962,
      "step": 1517
    },
    {
      "epoch": 0.3654969000180581,
      "grad_norm": 2.5254063606262207,
      "learning_rate": 0.00014665544705272525,
      "loss": 0.9689,
      "step": 1518
    },
    {
      "epoch": 0.36573767531451273,
      "grad_norm": 2.8939788341522217,
      "learning_rate": 0.0001465864650173892,
      "loss": 0.6918,
      "step": 1519
    },
    {
      "epoch": 0.3659784506109673,
      "grad_norm": 2.4425880908966064,
      "learning_rate": 0.00014651745465737737,
      "loss": 0.6507,
      "step": 1520
    },
    {
      "epoch": 0.3662192259074219,
      "grad_norm": 3.3433775901794434,
      "learning_rate": 0.00014644841601464838,
      "loss": 0.7875,
      "step": 1521
    },
    {
      "epoch": 0.3664600012038765,
      "grad_norm": 1.840368390083313,
      "learning_rate": 0.00014637934913117777,
      "loss": 0.6712,
      "step": 1522
    },
    {
      "epoch": 0.36670077650033106,
      "grad_norm": 0.7851834297180176,
      "learning_rate": 0.0001463102540489584,
      "loss": 0.3682,
      "step": 1523
    },
    {
      "epoch": 0.36694155179678567,
      "grad_norm": 4.149460792541504,
      "learning_rate": 0.00014624113081000023,
      "loss": 0.5221,
      "step": 1524
    },
    {
      "epoch": 0.3671823270932402,
      "grad_norm": 1.0604087114334106,
      "learning_rate": 0.00014617197945633037,
      "loss": 0.6734,
      "step": 1525
    },
    {
      "epoch": 0.36742310238969483,
      "grad_norm": 0.5313230752944946,
      "learning_rate": 0.00014610280002999291,
      "loss": 0.1435,
      "step": 1526
    },
    {
      "epoch": 0.3676638776861494,
      "grad_norm": 9.519638061523438,
      "learning_rate": 0.00014603359257304925,
      "loss": 0.8516,
      "step": 1527
    },
    {
      "epoch": 0.367904652982604,
      "grad_norm": 1.3151129484176636,
      "learning_rate": 0.0001459643571275775,
      "loss": 0.5194,
      "step": 1528
    },
    {
      "epoch": 0.36814542827905855,
      "grad_norm": 1.3871126174926758,
      "learning_rate": 0.00014589509373567314,
      "loss": 0.3852,
      "step": 1529
    },
    {
      "epoch": 0.36838620357551316,
      "grad_norm": 1.881598949432373,
      "learning_rate": 0.00014582580243944836,
      "loss": 0.5607,
      "step": 1530
    },
    {
      "epoch": 0.3686269788719677,
      "grad_norm": 3.6260831356048584,
      "learning_rate": 0.0001457564832810324,
      "loss": 0.4408,
      "step": 1531
    },
    {
      "epoch": 0.3688677541684223,
      "grad_norm": 2.3478870391845703,
      "learning_rate": 0.00014568713630257155,
      "loss": 0.8691,
      "step": 1532
    },
    {
      "epoch": 0.3691085294648769,
      "grad_norm": 1.4225029945373535,
      "learning_rate": 0.00014561776154622892,
      "loss": 0.6029,
      "step": 1533
    },
    {
      "epoch": 0.3693493047613315,
      "grad_norm": 2.58164381980896,
      "learning_rate": 0.00014554835905418448,
      "loss": 0.6517,
      "step": 1534
    },
    {
      "epoch": 0.3695900800577861,
      "grad_norm": 1.5946727991104126,
      "learning_rate": 0.00014547892886863508,
      "loss": 0.3034,
      "step": 1535
    },
    {
      "epoch": 0.36983085535424065,
      "grad_norm": 1.9315208196640015,
      "learning_rate": 0.00014540947103179448,
      "loss": 0.1705,
      "step": 1536
    },
    {
      "epoch": 0.37007163065069526,
      "grad_norm": 1.584106683731079,
      "learning_rate": 0.0001453399855858932,
      "loss": 0.6266,
      "step": 1537
    },
    {
      "epoch": 0.3703124059471498,
      "grad_norm": 1.4017444849014282,
      "learning_rate": 0.00014527047257317853,
      "loss": 0.2253,
      "step": 1538
    },
    {
      "epoch": 0.3705531812436044,
      "grad_norm": 1.8468575477600098,
      "learning_rate": 0.00014520093203591452,
      "loss": 0.6682,
      "step": 1539
    },
    {
      "epoch": 0.370793956540059,
      "grad_norm": 3.008110761642456,
      "learning_rate": 0.000145131364016382,
      "loss": 0.5261,
      "step": 1540
    },
    {
      "epoch": 0.3710347318365136,
      "grad_norm": 4.193415641784668,
      "learning_rate": 0.00014506176855687847,
      "loss": 0.9067,
      "step": 1541
    },
    {
      "epoch": 0.37127550713296814,
      "grad_norm": 1.9622831344604492,
      "learning_rate": 0.00014499214569971814,
      "loss": 1.1056,
      "step": 1542
    },
    {
      "epoch": 0.37151628242942275,
      "grad_norm": 1.8108497858047485,
      "learning_rate": 0.00014492249548723188,
      "loss": 1.02,
      "step": 1543
    },
    {
      "epoch": 0.3717570577258773,
      "grad_norm": 3.2026660442352295,
      "learning_rate": 0.00014485281796176714,
      "loss": 0.675,
      "step": 1544
    },
    {
      "epoch": 0.3719978330223319,
      "grad_norm": 2.4484424591064453,
      "learning_rate": 0.00014478311316568797,
      "loss": 0.3577,
      "step": 1545
    },
    {
      "epoch": 0.37223860831878647,
      "grad_norm": 1.7263269424438477,
      "learning_rate": 0.00014471338114137517,
      "loss": 0.7703,
      "step": 1546
    },
    {
      "epoch": 0.3724793836152411,
      "grad_norm": 18.308795928955078,
      "learning_rate": 0.00014464362193122586,
      "loss": 0.6747,
      "step": 1547
    },
    {
      "epoch": 0.3727201589116957,
      "grad_norm": 2.601501941680908,
      "learning_rate": 0.00014457383557765386,
      "loss": 1.0085,
      "step": 1548
    },
    {
      "epoch": 0.37296093420815024,
      "grad_norm": 1.59297513961792,
      "learning_rate": 0.00014450402212308936,
      "loss": 0.5779,
      "step": 1549
    },
    {
      "epoch": 0.37320170950460485,
      "grad_norm": 2.8175299167633057,
      "learning_rate": 0.00014443418160997918,
      "loss": 0.5384,
      "step": 1550
    },
    {
      "epoch": 0.3734424848010594,
      "grad_norm": 4.92849063873291,
      "learning_rate": 0.00014436431408078643,
      "loss": 0.5555,
      "step": 1551
    },
    {
      "epoch": 0.373683260097514,
      "grad_norm": 2.1497936248779297,
      "learning_rate": 0.00014429441957799078,
      "loss": 0.6927,
      "step": 1552
    },
    {
      "epoch": 0.37392403539396857,
      "grad_norm": 0.8706673979759216,
      "learning_rate": 0.00014422449814408824,
      "loss": 0.2299,
      "step": 1553
    },
    {
      "epoch": 0.3741648106904232,
      "grad_norm": 1.8380601406097412,
      "learning_rate": 0.0001441545498215912,
      "loss": 0.6146,
      "step": 1554
    },
    {
      "epoch": 0.37440558598687773,
      "grad_norm": 1.3901095390319824,
      "learning_rate": 0.0001440845746530284,
      "loss": 0.3206,
      "step": 1555
    },
    {
      "epoch": 0.37464636128333234,
      "grad_norm": 1.46050226688385,
      "learning_rate": 0.00014401457268094483,
      "loss": 0.7791,
      "step": 1556
    },
    {
      "epoch": 0.3748871365797869,
      "grad_norm": 4.091619968414307,
      "learning_rate": 0.0001439445439479019,
      "loss": 0.5138,
      "step": 1557
    },
    {
      "epoch": 0.3751279118762415,
      "grad_norm": 1.6713485717773438,
      "learning_rate": 0.00014387448849647732,
      "loss": 0.3188,
      "step": 1558
    },
    {
      "epoch": 0.37536868717269606,
      "grad_norm": 3.4357035160064697,
      "learning_rate": 0.00014380440636926485,
      "loss": 0.6026,
      "step": 1559
    },
    {
      "epoch": 0.37560946246915067,
      "grad_norm": 3.0857181549072266,
      "learning_rate": 0.00014373429760887457,
      "loss": 0.7203,
      "step": 1560
    },
    {
      "epoch": 0.3758502377656052,
      "grad_norm": 3.1348352432250977,
      "learning_rate": 0.00014366416225793284,
      "loss": 0.611,
      "step": 1561
    },
    {
      "epoch": 0.37609101306205983,
      "grad_norm": 1.1630622148513794,
      "learning_rate": 0.0001435940003590821,
      "loss": 0.3327,
      "step": 1562
    },
    {
      "epoch": 0.37633178835851444,
      "grad_norm": 1.690561294555664,
      "learning_rate": 0.00014352381195498093,
      "loss": 0.6988,
      "step": 1563
    },
    {
      "epoch": 0.376572563654969,
      "grad_norm": 3.028482437133789,
      "learning_rate": 0.000143453597088304,
      "loss": 0.594,
      "step": 1564
    },
    {
      "epoch": 0.3768133389514236,
      "grad_norm": 0.997052013874054,
      "learning_rate": 0.00014338335580174212,
      "loss": 0.8037,
      "step": 1565
    },
    {
      "epoch": 0.37705411424787816,
      "grad_norm": 7.6312079429626465,
      "learning_rate": 0.00014331308813800222,
      "loss": 1.306,
      "step": 1566
    },
    {
      "epoch": 0.37729488954433277,
      "grad_norm": 2.4936201572418213,
      "learning_rate": 0.00014324279413980713,
      "loss": 0.4458,
      "step": 1567
    },
    {
      "epoch": 0.3775356648407873,
      "grad_norm": 2.824725389480591,
      "learning_rate": 0.00014317247384989577,
      "loss": 0.5562,
      "step": 1568
    },
    {
      "epoch": 0.37777644013724193,
      "grad_norm": 1.7765711545944214,
      "learning_rate": 0.00014310212731102304,
      "loss": 0.5947,
      "step": 1569
    },
    {
      "epoch": 0.3780172154336965,
      "grad_norm": 9.701804161071777,
      "learning_rate": 0.00014303175456595977,
      "loss": 1.0711,
      "step": 1570
    },
    {
      "epoch": 0.3782579907301511,
      "grad_norm": 2.2651548385620117,
      "learning_rate": 0.0001429613556574928,
      "loss": 0.7001,
      "step": 1571
    },
    {
      "epoch": 0.37849876602660565,
      "grad_norm": 1.20858895778656,
      "learning_rate": 0.0001428909306284248,
      "loss": 0.6168,
      "step": 1572
    },
    {
      "epoch": 0.37873954132306026,
      "grad_norm": 1.3196377754211426,
      "learning_rate": 0.00014282047952157432,
      "loss": 0.8402,
      "step": 1573
    },
    {
      "epoch": 0.3789803166195148,
      "grad_norm": 1.9669349193572998,
      "learning_rate": 0.00014275000237977582,
      "loss": 0.5,
      "step": 1574
    },
    {
      "epoch": 0.3792210919159694,
      "grad_norm": 2.7113590240478516,
      "learning_rate": 0.00014267949924587958,
      "loss": 0.2134,
      "step": 1575
    },
    {
      "epoch": 0.37946186721242403,
      "grad_norm": 7.937801837921143,
      "learning_rate": 0.00014260897016275166,
      "loss": 0.4475,
      "step": 1576
    },
    {
      "epoch": 0.3797026425088786,
      "grad_norm": 1.9907861948013306,
      "learning_rate": 0.00014253841517327382,
      "loss": 0.7746,
      "step": 1577
    },
    {
      "epoch": 0.3799434178053332,
      "grad_norm": 2.06160569190979,
      "learning_rate": 0.00014246783432034373,
      "loss": 0.9227,
      "step": 1578
    },
    {
      "epoch": 0.38018419310178775,
      "grad_norm": 2.051358461380005,
      "learning_rate": 0.00014239722764687474,
      "loss": 0.7264,
      "step": 1579
    },
    {
      "epoch": 0.38042496839824236,
      "grad_norm": 3.846851110458374,
      "learning_rate": 0.0001423265951957958,
      "loss": 0.9719,
      "step": 1580
    },
    {
      "epoch": 0.3806657436946969,
      "grad_norm": 1.47300124168396,
      "learning_rate": 0.00014225593701005157,
      "loss": 1.17,
      "step": 1581
    },
    {
      "epoch": 0.3809065189911515,
      "grad_norm": 4.379542827606201,
      "learning_rate": 0.0001421852531326025,
      "loss": 0.5917,
      "step": 1582
    },
    {
      "epoch": 0.3811472942876061,
      "grad_norm": 1.3115028142929077,
      "learning_rate": 0.00014211454360642443,
      "loss": 0.1916,
      "step": 1583
    },
    {
      "epoch": 0.3813880695840607,
      "grad_norm": 2.608750343322754,
      "learning_rate": 0.00014204380847450897,
      "loss": 0.8763,
      "step": 1584
    },
    {
      "epoch": 0.38162884488051524,
      "grad_norm": 3.5941126346588135,
      "learning_rate": 0.00014197304777986325,
      "loss": 0.1222,
      "step": 1585
    },
    {
      "epoch": 0.38186962017696985,
      "grad_norm": 4.869987487792969,
      "learning_rate": 0.0001419022615655099,
      "loss": 0.8706,
      "step": 1586
    },
    {
      "epoch": 0.3821103954734244,
      "grad_norm": 3.05656099319458,
      "learning_rate": 0.00014183144987448711,
      "loss": 0.5847,
      "step": 1587
    },
    {
      "epoch": 0.382351170769879,
      "grad_norm": 1.0079351663589478,
      "learning_rate": 0.00014176061274984858,
      "loss": 0.2984,
      "step": 1588
    },
    {
      "epoch": 0.3825919460663336,
      "grad_norm": 3.344771146774292,
      "learning_rate": 0.00014168975023466337,
      "loss": 0.5847,
      "step": 1589
    },
    {
      "epoch": 0.3828327213627882,
      "grad_norm": 2.857647657394409,
      "learning_rate": 0.00014161886237201612,
      "loss": 1.2925,
      "step": 1590
    },
    {
      "epoch": 0.3830734966592428,
      "grad_norm": 2.705115795135498,
      "learning_rate": 0.00014154794920500673,
      "loss": 0.5277,
      "step": 1591
    },
    {
      "epoch": 0.38331427195569734,
      "grad_norm": 1.9536807537078857,
      "learning_rate": 0.00014147701077675065,
      "loss": 0.5553,
      "step": 1592
    },
    {
      "epoch": 0.38355504725215195,
      "grad_norm": 1.2713546752929688,
      "learning_rate": 0.00014140604713037857,
      "loss": 0.507,
      "step": 1593
    },
    {
      "epoch": 0.3837958225486065,
      "grad_norm": 2.593982219696045,
      "learning_rate": 0.00014133505830903658,
      "loss": 0.3527,
      "step": 1594
    },
    {
      "epoch": 0.3840365978450611,
      "grad_norm": 0.6847010254859924,
      "learning_rate": 0.00014126404435588596,
      "loss": 0.1223,
      "step": 1595
    },
    {
      "epoch": 0.38427737314151567,
      "grad_norm": 1.8529340028762817,
      "learning_rate": 0.00014119300531410342,
      "loss": 0.44,
      "step": 1596
    },
    {
      "epoch": 0.3845181484379703,
      "grad_norm": 2.9218854904174805,
      "learning_rate": 0.0001411219412268808,
      "loss": 0.4854,
      "step": 1597
    },
    {
      "epoch": 0.38475892373442483,
      "grad_norm": 2.5640389919281006,
      "learning_rate": 0.00014105085213742533,
      "loss": 0.7238,
      "step": 1598
    },
    {
      "epoch": 0.38499969903087944,
      "grad_norm": 0.7277923822402954,
      "learning_rate": 0.00014097973808895926,
      "loss": 0.2205,
      "step": 1599
    },
    {
      "epoch": 0.385240474327334,
      "grad_norm": 1.501104474067688,
      "learning_rate": 0.00014090859912472005,
      "loss": 0.1477,
      "step": 1600
    },
    {
      "epoch": 0.3854812496237886,
      "grad_norm": 3.788515329360962,
      "learning_rate": 0.00014083743528796045,
      "loss": 1.0636,
      "step": 1601
    },
    {
      "epoch": 0.38572202492024316,
      "grad_norm": 2.322822332382202,
      "learning_rate": 0.00014076624662194816,
      "loss": 0.283,
      "step": 1602
    },
    {
      "epoch": 0.38596280021669777,
      "grad_norm": 1.9796638488769531,
      "learning_rate": 0.00014069503316996613,
      "loss": 0.3978,
      "step": 1603
    },
    {
      "epoch": 0.3862035755131524,
      "grad_norm": 0.6974928379058838,
      "learning_rate": 0.0001406237949753122,
      "loss": 0.8248,
      "step": 1604
    },
    {
      "epoch": 0.38644435080960693,
      "grad_norm": 0.8106366991996765,
      "learning_rate": 0.00014055253208129938,
      "loss": 0.5371,
      "step": 1605
    },
    {
      "epoch": 0.38668512610606154,
      "grad_norm": 1.5011775493621826,
      "learning_rate": 0.00014048124453125573,
      "loss": 0.2772,
      "step": 1606
    },
    {
      "epoch": 0.3869259014025161,
      "grad_norm": 1.6291502714157104,
      "learning_rate": 0.0001404099323685242,
      "loss": 0.1682,
      "step": 1607
    },
    {
      "epoch": 0.3871666766989707,
      "grad_norm": 1.8147183656692505,
      "learning_rate": 0.00014033859563646276,
      "loss": 0.1837,
      "step": 1608
    },
    {
      "epoch": 0.38740745199542526,
      "grad_norm": 2.469822645187378,
      "learning_rate": 0.00014026723437844421,
      "loss": 0.7883,
      "step": 1609
    },
    {
      "epoch": 0.38764822729187987,
      "grad_norm": 6.157069683074951,
      "learning_rate": 0.00014019584863785652,
      "loss": 0.3593,
      "step": 1610
    },
    {
      "epoch": 0.3878890025883344,
      "grad_norm": 1.2629841566085815,
      "learning_rate": 0.00014012443845810223,
      "loss": 0.4991,
      "step": 1611
    },
    {
      "epoch": 0.38812977788478903,
      "grad_norm": 2.7113308906555176,
      "learning_rate": 0.000140053003882599,
      "loss": 1.136,
      "step": 1612
    },
    {
      "epoch": 0.3883705531812436,
      "grad_norm": 3.3584749698638916,
      "learning_rate": 0.00013998154495477912,
      "loss": 0.1191,
      "step": 1613
    },
    {
      "epoch": 0.3886113284776982,
      "grad_norm": 2.5008931159973145,
      "learning_rate": 0.0001399100617180899,
      "loss": 0.6197,
      "step": 1614
    },
    {
      "epoch": 0.38885210377415275,
      "grad_norm": 1.7047406435012817,
      "learning_rate": 0.00013983855421599318,
      "loss": 0.6819,
      "step": 1615
    },
    {
      "epoch": 0.38909287907060736,
      "grad_norm": 1.2568997144699097,
      "learning_rate": 0.0001397670224919658,
      "loss": 0.4986,
      "step": 1616
    },
    {
      "epoch": 0.38933365436706197,
      "grad_norm": 4.387941360473633,
      "learning_rate": 0.0001396954665894991,
      "loss": 0.4947,
      "step": 1617
    },
    {
      "epoch": 0.3895744296635165,
      "grad_norm": 1.8967385292053223,
      "learning_rate": 0.00013962388655209927,
      "loss": 0.6985,
      "step": 1618
    },
    {
      "epoch": 0.38981520495997113,
      "grad_norm": 3.39685320854187,
      "learning_rate": 0.00013955228242328718,
      "loss": 1.0637,
      "step": 1619
    },
    {
      "epoch": 0.3900559802564257,
      "grad_norm": 4.821850299835205,
      "learning_rate": 0.00013948065424659824,
      "loss": 0.4031,
      "step": 1620
    },
    {
      "epoch": 0.3902967555528803,
      "grad_norm": 2.4104623794555664,
      "learning_rate": 0.00013940900206558257,
      "loss": 0.9255,
      "step": 1621
    },
    {
      "epoch": 0.39053753084933485,
      "grad_norm": 2.2007462978363037,
      "learning_rate": 0.00013933732592380483,
      "loss": 0.5469,
      "step": 1622
    },
    {
      "epoch": 0.39077830614578946,
      "grad_norm": 2.2772059440612793,
      "learning_rate": 0.00013926562586484434,
      "loss": 0.4233,
      "step": 1623
    },
    {
      "epoch": 0.391019081442244,
      "grad_norm": 2.6534852981567383,
      "learning_rate": 0.00013919390193229485,
      "loss": 0.3978,
      "step": 1624
    },
    {
      "epoch": 0.3912598567386986,
      "grad_norm": 0.3831101357936859,
      "learning_rate": 0.00013912215416976467,
      "loss": 0.2271,
      "step": 1625
    },
    {
      "epoch": 0.3915006320351532,
      "grad_norm": 1.9152987003326416,
      "learning_rate": 0.00013905038262087662,
      "loss": 0.522,
      "step": 1626
    },
    {
      "epoch": 0.3917414073316078,
      "grad_norm": 2.0952141284942627,
      "learning_rate": 0.00013897858732926793,
      "loss": 0.2229,
      "step": 1627
    },
    {
      "epoch": 0.39198218262806234,
      "grad_norm": 10.112699508666992,
      "learning_rate": 0.00013890676833859037,
      "loss": 1.0788,
      "step": 1628
    },
    {
      "epoch": 0.39222295792451695,
      "grad_norm": 2.1068572998046875,
      "learning_rate": 0.00013883492569250998,
      "loss": 0.5627,
      "step": 1629
    },
    {
      "epoch": 0.39246373322097156,
      "grad_norm": 2.1683926582336426,
      "learning_rate": 0.00013876305943470724,
      "loss": 1.0251,
      "step": 1630
    },
    {
      "epoch": 0.3927045085174261,
      "grad_norm": 5.917585372924805,
      "learning_rate": 0.00013869116960887708,
      "loss": 0.6836,
      "step": 1631
    },
    {
      "epoch": 0.3929452838138807,
      "grad_norm": 2.575009346008301,
      "learning_rate": 0.0001386192562587286,
      "loss": 0.8661,
      "step": 1632
    },
    {
      "epoch": 0.3931860591103353,
      "grad_norm": 2.4185233116149902,
      "learning_rate": 0.00013854731942798532,
      "loss": 0.7001,
      "step": 1633
    },
    {
      "epoch": 0.3934268344067899,
      "grad_norm": 1.6709206104278564,
      "learning_rate": 0.00013847535916038496,
      "loss": 0.364,
      "step": 1634
    },
    {
      "epoch": 0.39366760970324444,
      "grad_norm": 3.425093650817871,
      "learning_rate": 0.00013840337549967955,
      "loss": 0.3667,
      "step": 1635
    },
    {
      "epoch": 0.39390838499969905,
      "grad_norm": 1.7669458389282227,
      "learning_rate": 0.00013833136848963532,
      "loss": 0.733,
      "step": 1636
    },
    {
      "epoch": 0.3941491602961536,
      "grad_norm": 2.1822469234466553,
      "learning_rate": 0.00013825933817403267,
      "loss": 0.7814,
      "step": 1637
    },
    {
      "epoch": 0.3943899355926082,
      "grad_norm": 8.053266525268555,
      "learning_rate": 0.00013818728459666623,
      "loss": 0.9111,
      "step": 1638
    },
    {
      "epoch": 0.39463071088906276,
      "grad_norm": 1.4243130683898926,
      "learning_rate": 0.0001381152078013447,
      "loss": 0.4235,
      "step": 1639
    },
    {
      "epoch": 0.3948714861855174,
      "grad_norm": 1.732535481452942,
      "learning_rate": 0.00013804310783189098,
      "loss": 0.3293,
      "step": 1640
    },
    {
      "epoch": 0.3951122614819719,
      "grad_norm": 1.332587718963623,
      "learning_rate": 0.00013797098473214197,
      "loss": 0.6848,
      "step": 1641
    },
    {
      "epoch": 0.39535303677842654,
      "grad_norm": 1.3026105165481567,
      "learning_rate": 0.0001378988385459487,
      "loss": 0.7852,
      "step": 1642
    },
    {
      "epoch": 0.3955938120748811,
      "grad_norm": 2.118013620376587,
      "learning_rate": 0.0001378266693171762,
      "loss": 0.4796,
      "step": 1643
    },
    {
      "epoch": 0.3958345873713357,
      "grad_norm": 2.2776410579681396,
      "learning_rate": 0.00013775447708970351,
      "loss": 1.0214,
      "step": 1644
    },
    {
      "epoch": 0.3960753626677903,
      "grad_norm": 1.8297806978225708,
      "learning_rate": 0.0001376822619074237,
      "loss": 0.4031,
      "step": 1645
    },
    {
      "epoch": 0.39631613796424486,
      "grad_norm": 1.5983656644821167,
      "learning_rate": 0.0001376100238142438,
      "loss": 0.2453,
      "step": 1646
    },
    {
      "epoch": 0.3965569132606995,
      "grad_norm": 1.8416905403137207,
      "learning_rate": 0.00013753776285408464,
      "loss": 0.5695,
      "step": 1647
    },
    {
      "epoch": 0.396797688557154,
      "grad_norm": 2.1590733528137207,
      "learning_rate": 0.00013746547907088108,
      "loss": 0.1617,
      "step": 1648
    },
    {
      "epoch": 0.39703846385360864,
      "grad_norm": 2.4669997692108154,
      "learning_rate": 0.00013739317250858186,
      "loss": 0.5653,
      "step": 1649
    },
    {
      "epoch": 0.3972792391500632,
      "grad_norm": 1.7538673877716064,
      "learning_rate": 0.0001373208432111495,
      "loss": 0.16,
      "step": 1650
    },
    {
      "epoch": 0.3975200144465178,
      "grad_norm": 2.019120216369629,
      "learning_rate": 0.00013724849122256035,
      "loss": 0.6373,
      "step": 1651
    },
    {
      "epoch": 0.39776078974297235,
      "grad_norm": 1.4879308938980103,
      "learning_rate": 0.00013717611658680464,
      "loss": 0.8454,
      "step": 1652
    },
    {
      "epoch": 0.39800156503942696,
      "grad_norm": 0.9595705270767212,
      "learning_rate": 0.00013710371934788632,
      "loss": 0.532,
      "step": 1653
    },
    {
      "epoch": 0.3982423403358815,
      "grad_norm": 1.8083183765411377,
      "learning_rate": 0.00013703129954982299,
      "loss": 0.4841,
      "step": 1654
    },
    {
      "epoch": 0.3984831156323361,
      "grad_norm": 1.0364370346069336,
      "learning_rate": 0.00013695885723664616,
      "loss": 0.2084,
      "step": 1655
    },
    {
      "epoch": 0.3987238909287907,
      "grad_norm": 6.035412788391113,
      "learning_rate": 0.00013688639245240078,
      "loss": 0.7487,
      "step": 1656
    },
    {
      "epoch": 0.3989646662252453,
      "grad_norm": 1.0442893505096436,
      "learning_rate": 0.00013681390524114575,
      "loss": 0.422,
      "step": 1657
    },
    {
      "epoch": 0.3992054415216999,
      "grad_norm": 2.071849822998047,
      "learning_rate": 0.00013674139564695333,
      "loss": 0.5663,
      "step": 1658
    },
    {
      "epoch": 0.39944621681815445,
      "grad_norm": 2.249422311782837,
      "learning_rate": 0.00013666886371390967,
      "loss": 0.679,
      "step": 1659
    },
    {
      "epoch": 0.39968699211460906,
      "grad_norm": 5.166494369506836,
      "learning_rate": 0.0001365963094861142,
      "loss": 0.9236,
      "step": 1660
    },
    {
      "epoch": 0.3999277674110636,
      "grad_norm": 2.5879993438720703,
      "learning_rate": 0.0001365237330076801,
      "loss": 0.642,
      "step": 1661
    },
    {
      "epoch": 0.4001685427075182,
      "grad_norm": 2.8723905086517334,
      "learning_rate": 0.00013645113432273403,
      "loss": 0.7538,
      "step": 1662
    },
    {
      "epoch": 0.4004093180039728,
      "grad_norm": 1.0138564109802246,
      "learning_rate": 0.0001363785134754162,
      "loss": 0.511,
      "step": 1663
    },
    {
      "epoch": 0.4006500933004274,
      "grad_norm": 3.8104164600372314,
      "learning_rate": 0.00013630587050988022,
      "loss": 0.4648,
      "step": 1664
    },
    {
      "epoch": 0.40089086859688194,
      "grad_norm": 2.2068583965301514,
      "learning_rate": 0.00013623320547029316,
      "loss": 0.6258,
      "step": 1665
    },
    {
      "epoch": 0.40113164389333655,
      "grad_norm": 1.245370864868164,
      "learning_rate": 0.0001361605184008355,
      "loss": 0.4723,
      "step": 1666
    },
    {
      "epoch": 0.4013724191897911,
      "grad_norm": 1.0925084352493286,
      "learning_rate": 0.00013608780934570123,
      "loss": 0.5381,
      "step": 1667
    },
    {
      "epoch": 0.4016131944862457,
      "grad_norm": 6.653575897216797,
      "learning_rate": 0.00013601507834909757,
      "loss": 0.5606,
      "step": 1668
    },
    {
      "epoch": 0.40185396978270027,
      "grad_norm": 1.6157435178756714,
      "learning_rate": 0.0001359423254552451,
      "loss": 0.8517,
      "step": 1669
    },
    {
      "epoch": 0.4020947450791549,
      "grad_norm": 1.4830398559570312,
      "learning_rate": 0.00013586955070837777,
      "loss": 0.895,
      "step": 1670
    },
    {
      "epoch": 0.4023355203756095,
      "grad_norm": 0.835504949092865,
      "learning_rate": 0.00013579675415274284,
      "loss": 0.3608,
      "step": 1671
    },
    {
      "epoch": 0.40257629567206404,
      "grad_norm": 3.575409173965454,
      "learning_rate": 0.00013572393583260073,
      "loss": 0.985,
      "step": 1672
    },
    {
      "epoch": 0.40281707096851865,
      "grad_norm": 2.397228479385376,
      "learning_rate": 0.0001356510957922251,
      "loss": 0.5574,
      "step": 1673
    },
    {
      "epoch": 0.4030578462649732,
      "grad_norm": 1.162008285522461,
      "learning_rate": 0.00013557823407590294,
      "loss": 0.4828,
      "step": 1674
    },
    {
      "epoch": 0.4032986215614278,
      "grad_norm": 2.0564050674438477,
      "learning_rate": 0.00013550535072793428,
      "loss": 1.0467,
      "step": 1675
    },
    {
      "epoch": 0.40353939685788237,
      "grad_norm": 4.555008888244629,
      "learning_rate": 0.00013543244579263244,
      "loss": 0.645,
      "step": 1676
    },
    {
      "epoch": 0.403780172154337,
      "grad_norm": 1.655927062034607,
      "learning_rate": 0.00013535951931432366,
      "loss": 0.5477,
      "step": 1677
    },
    {
      "epoch": 0.40402094745079153,
      "grad_norm": 2.4142045974731445,
      "learning_rate": 0.0001352865713373475,
      "loss": 0.5651,
      "step": 1678
    },
    {
      "epoch": 0.40426172274724614,
      "grad_norm": 2.2285380363464355,
      "learning_rate": 0.00013521360190605646,
      "loss": 0.648,
      "step": 1679
    },
    {
      "epoch": 0.4045024980437007,
      "grad_norm": 3.8250715732574463,
      "learning_rate": 0.00013514061106481614,
      "loss": 0.9591,
      "step": 1680
    },
    {
      "epoch": 0.4047432733401553,
      "grad_norm": 0.9585970640182495,
      "learning_rate": 0.0001350675988580051,
      "loss": 0.3991,
      "step": 1681
    },
    {
      "epoch": 0.40498404863660986,
      "grad_norm": 9.034631729125977,
      "learning_rate": 0.00013499456533001497,
      "loss": 0.5749,
      "step": 1682
    },
    {
      "epoch": 0.40522482393306447,
      "grad_norm": 2.0019724369049072,
      "learning_rate": 0.00013492151052525023,
      "loss": 0.1236,
      "step": 1683
    },
    {
      "epoch": 0.405465599229519,
      "grad_norm": 11.653858184814453,
      "learning_rate": 0.00013484843448812844,
      "loss": 0.785,
      "step": 1684
    },
    {
      "epoch": 0.40570637452597363,
      "grad_norm": 2.2401812076568604,
      "learning_rate": 0.00013477533726308,
      "loss": 0.8912,
      "step": 1685
    },
    {
      "epoch": 0.40594714982242824,
      "grad_norm": 12.922853469848633,
      "learning_rate": 0.0001347022188945481,
      "loss": 0.6012,
      "step": 1686
    },
    {
      "epoch": 0.4061879251188828,
      "grad_norm": 1.3376822471618652,
      "learning_rate": 0.00013462907942698895,
      "loss": 1.2057,
      "step": 1687
    },
    {
      "epoch": 0.4064287004153374,
      "grad_norm": 6.5069708824157715,
      "learning_rate": 0.00013455591890487148,
      "loss": 0.8799,
      "step": 1688
    },
    {
      "epoch": 0.40666947571179196,
      "grad_norm": 1.1161401271820068,
      "learning_rate": 0.0001344827373726775,
      "loss": 1.4456,
      "step": 1689
    },
    {
      "epoch": 0.40691025100824657,
      "grad_norm": 0.9486348032951355,
      "learning_rate": 0.00013440953487490144,
      "loss": 0.5933,
      "step": 1690
    },
    {
      "epoch": 0.4071510263047011,
      "grad_norm": 1.8005541563034058,
      "learning_rate": 0.0001343363114560507,
      "loss": 0.7821,
      "step": 1691
    },
    {
      "epoch": 0.40739180160115573,
      "grad_norm": 2.908756732940674,
      "learning_rate": 0.0001342630671606452,
      "loss": 0.8259,
      "step": 1692
    },
    {
      "epoch": 0.4076325768976103,
      "grad_norm": 1.161380648612976,
      "learning_rate": 0.00013418980203321772,
      "loss": 0.7767,
      "step": 1693
    },
    {
      "epoch": 0.4078733521940649,
      "grad_norm": 2.2439661026000977,
      "learning_rate": 0.00013411651611831352,
      "loss": 0.3818,
      "step": 1694
    },
    {
      "epoch": 0.40811412749051945,
      "grad_norm": 2.2217512130737305,
      "learning_rate": 0.00013404320946049068,
      "loss": 0.2162,
      "step": 1695
    },
    {
      "epoch": 0.40835490278697406,
      "grad_norm": 2.809119462966919,
      "learning_rate": 0.00013396988210431977,
      "loss": 0.7169,
      "step": 1696
    },
    {
      "epoch": 0.4085956780834286,
      "grad_norm": 2.8886725902557373,
      "learning_rate": 0.00013389653409438406,
      "loss": 0.349,
      "step": 1697
    },
    {
      "epoch": 0.4088364533798832,
      "grad_norm": 1.2677594423294067,
      "learning_rate": 0.00013382316547527919,
      "loss": 0.2073,
      "step": 1698
    },
    {
      "epoch": 0.40907722867633783,
      "grad_norm": 6.638054370880127,
      "learning_rate": 0.00013374977629161355,
      "loss": 0.9768,
      "step": 1699
    },
    {
      "epoch": 0.4093180039727924,
      "grad_norm": 2.200249195098877,
      "learning_rate": 0.00013367636658800783,
      "loss": 0.4204,
      "step": 1700
    },
    {
      "epoch": 0.409558779269247,
      "grad_norm": 2.565556526184082,
      "learning_rate": 0.0001336029364090954,
      "loss": 0.8401,
      "step": 1701
    },
    {
      "epoch": 0.40979955456570155,
      "grad_norm": 1.9111295938491821,
      "learning_rate": 0.0001335294857995219,
      "loss": 1.2343,
      "step": 1702
    },
    {
      "epoch": 0.41004032986215616,
      "grad_norm": 5.341217041015625,
      "learning_rate": 0.0001334560148039455,
      "loss": 0.371,
      "step": 1703
    },
    {
      "epoch": 0.4102811051586107,
      "grad_norm": 1.3484272956848145,
      "learning_rate": 0.00013338252346703673,
      "loss": 0.7788,
      "step": 1704
    },
    {
      "epoch": 0.4105218804550653,
      "grad_norm": 2.7777099609375,
      "learning_rate": 0.00013330901183347847,
      "loss": 0.4438,
      "step": 1705
    },
    {
      "epoch": 0.4107626557515199,
      "grad_norm": 2.4722752571105957,
      "learning_rate": 0.00013323547994796597,
      "loss": 0.4454,
      "step": 1706
    },
    {
      "epoch": 0.4110034310479745,
      "grad_norm": 2.2678263187408447,
      "learning_rate": 0.0001331619278552068,
      "loss": 0.4712,
      "step": 1707
    },
    {
      "epoch": 0.41124420634442904,
      "grad_norm": 2.552933692932129,
      "learning_rate": 0.00013308835559992075,
      "loss": 0.6171,
      "step": 1708
    },
    {
      "epoch": 0.41148498164088365,
      "grad_norm": 4.140172958374023,
      "learning_rate": 0.00013301476322683997,
      "loss": 1.2291,
      "step": 1709
    },
    {
      "epoch": 0.4117257569373382,
      "grad_norm": 1.883234977722168,
      "learning_rate": 0.00013294115078070875,
      "loss": 0.3714,
      "step": 1710
    },
    {
      "epoch": 0.4119665322337928,
      "grad_norm": 1.5748333930969238,
      "learning_rate": 0.00013286751830628363,
      "loss": 0.891,
      "step": 1711
    },
    {
      "epoch": 0.4122073075302474,
      "grad_norm": 1.558668613433838,
      "learning_rate": 0.00013279386584833335,
      "loss": 0.5892,
      "step": 1712
    },
    {
      "epoch": 0.412448082826702,
      "grad_norm": 6.810975074768066,
      "learning_rate": 0.00013272019345163873,
      "loss": 0.5012,
      "step": 1713
    },
    {
      "epoch": 0.4126888581231566,
      "grad_norm": 2.5344254970550537,
      "learning_rate": 0.00013264650116099277,
      "loss": 0.6199,
      "step": 1714
    },
    {
      "epoch": 0.41292963341961114,
      "grad_norm": 1.2778170108795166,
      "learning_rate": 0.00013257278902120058,
      "loss": 0.8041,
      "step": 1715
    },
    {
      "epoch": 0.41317040871606575,
      "grad_norm": 1.3319803476333618,
      "learning_rate": 0.00013249905707707926,
      "loss": 1.0953,
      "step": 1716
    },
    {
      "epoch": 0.4134111840125203,
      "grad_norm": 4.633189678192139,
      "learning_rate": 0.000132425305373458,
      "loss": 0.9773,
      "step": 1717
    },
    {
      "epoch": 0.4136519593089749,
      "grad_norm": 1.2184745073318481,
      "learning_rate": 0.00013235153395517804,
      "loss": 0.7046,
      "step": 1718
    },
    {
      "epoch": 0.41389273460542947,
      "grad_norm": 1.2916301488876343,
      "learning_rate": 0.00013227774286709253,
      "loss": 0.2718,
      "step": 1719
    },
    {
      "epoch": 0.4141335099018841,
      "grad_norm": 1.1648756265640259,
      "learning_rate": 0.00013220393215406664,
      "loss": 0.446,
      "step": 1720
    },
    {
      "epoch": 0.41437428519833863,
      "grad_norm": 2.0171449184417725,
      "learning_rate": 0.00013213010186097744,
      "loss": 0.1262,
      "step": 1721
    },
    {
      "epoch": 0.41461506049479324,
      "grad_norm": 2.397416591644287,
      "learning_rate": 0.00013205625203271395,
      "loss": 0.7722,
      "step": 1722
    },
    {
      "epoch": 0.4148558357912478,
      "grad_norm": 0.6799049377441406,
      "learning_rate": 0.00013198238271417697,
      "loss": 0.6582,
      "step": 1723
    },
    {
      "epoch": 0.4150966110877024,
      "grad_norm": 2.0616261959075928,
      "learning_rate": 0.00013190849395027928,
      "loss": 1.2671,
      "step": 1724
    },
    {
      "epoch": 0.41533738638415696,
      "grad_norm": 0.9546332955360413,
      "learning_rate": 0.00013183458578594533,
      "loss": 0.2217,
      "step": 1725
    },
    {
      "epoch": 0.41557816168061157,
      "grad_norm": 4.271639823913574,
      "learning_rate": 0.0001317606582661115,
      "loss": 0.6956,
      "step": 1726
    },
    {
      "epoch": 0.4158189369770662,
      "grad_norm": 1.4144961833953857,
      "learning_rate": 0.0001316867114357259,
      "loss": 0.6413,
      "step": 1727
    },
    {
      "epoch": 0.41605971227352073,
      "grad_norm": 0.7294138073921204,
      "learning_rate": 0.00013161274533974836,
      "loss": 0.3907,
      "step": 1728
    },
    {
      "epoch": 0.41630048756997534,
      "grad_norm": 1.5399507284164429,
      "learning_rate": 0.00013153876002315045,
      "loss": 0.635,
      "step": 1729
    },
    {
      "epoch": 0.4165412628664299,
      "grad_norm": 1.771852731704712,
      "learning_rate": 0.00013146475553091536,
      "loss": 0.4428,
      "step": 1730
    },
    {
      "epoch": 0.4167820381628845,
      "grad_norm": 1.6749565601348877,
      "learning_rate": 0.000131390731908038,
      "loss": 0.5446,
      "step": 1731
    },
    {
      "epoch": 0.41702281345933906,
      "grad_norm": 1.4587945938110352,
      "learning_rate": 0.00013131668919952495,
      "loss": 0.8724,
      "step": 1732
    },
    {
      "epoch": 0.41726358875579367,
      "grad_norm": 2.2476232051849365,
      "learning_rate": 0.0001312426274503943,
      "loss": 0.184,
      "step": 1733
    },
    {
      "epoch": 0.4175043640522482,
      "grad_norm": 1.1287919282913208,
      "learning_rate": 0.00013116854670567577,
      "loss": 0.4209,
      "step": 1734
    },
    {
      "epoch": 0.41774513934870283,
      "grad_norm": 0.6734319925308228,
      "learning_rate": 0.00013109444701041057,
      "loss": 0.2422,
      "step": 1735
    },
    {
      "epoch": 0.4179859146451574,
      "grad_norm": 2.265183448791504,
      "learning_rate": 0.0001310203284096516,
      "loss": 0.6902,
      "step": 1736
    },
    {
      "epoch": 0.418226689941612,
      "grad_norm": 3.4037933349609375,
      "learning_rate": 0.00013094619094846304,
      "loss": 0.628,
      "step": 1737
    },
    {
      "epoch": 0.41846746523806655,
      "grad_norm": 4.971876621246338,
      "learning_rate": 0.00013087203467192067,
      "loss": 0.9363,
      "step": 1738
    },
    {
      "epoch": 0.41870824053452116,
      "grad_norm": 2.3928446769714355,
      "learning_rate": 0.00013079785962511164,
      "loss": 0.5608,
      "step": 1739
    },
    {
      "epoch": 0.41894901583097577,
      "grad_norm": 1.1700559854507446,
      "learning_rate": 0.0001307236658531346,
      "loss": 0.4105,
      "step": 1740
    },
    {
      "epoch": 0.4191897911274303,
      "grad_norm": 1.4026082754135132,
      "learning_rate": 0.00013064945340109948,
      "loss": 0.6884,
      "step": 1741
    },
    {
      "epoch": 0.41943056642388493,
      "grad_norm": 2.345377206802368,
      "learning_rate": 0.00013057522231412765,
      "loss": 0.6579,
      "step": 1742
    },
    {
      "epoch": 0.4196713417203395,
      "grad_norm": 0.7213815450668335,
      "learning_rate": 0.00013050097263735174,
      "loss": 0.4405,
      "step": 1743
    },
    {
      "epoch": 0.4199121170167941,
      "grad_norm": 0.8045918941497803,
      "learning_rate": 0.0001304267044159158,
      "loss": 0.4975,
      "step": 1744
    },
    {
      "epoch": 0.42015289231324865,
      "grad_norm": 1.4894392490386963,
      "learning_rate": 0.000130352417694975,
      "loss": 0.5714,
      "step": 1745
    },
    {
      "epoch": 0.42039366760970326,
      "grad_norm": 8.357844352722168,
      "learning_rate": 0.00013027811251969585,
      "loss": 0.6262,
      "step": 1746
    },
    {
      "epoch": 0.4206344429061578,
      "grad_norm": 1.4566922187805176,
      "learning_rate": 0.00013020378893525603,
      "loss": 0.1933,
      "step": 1747
    },
    {
      "epoch": 0.4208752182026124,
      "grad_norm": 0.6821098327636719,
      "learning_rate": 0.00013012944698684455,
      "loss": 0.4767,
      "step": 1748
    },
    {
      "epoch": 0.421115993499067,
      "grad_norm": 2.3084802627563477,
      "learning_rate": 0.00013005508671966141,
      "loss": 0.6336,
      "step": 1749
    },
    {
      "epoch": 0.4213567687955216,
      "grad_norm": 2.982093572616577,
      "learning_rate": 0.0001299807081789178,
      "loss": 0.5048,
      "step": 1750
    },
    {
      "epoch": 0.42159754409197614,
      "grad_norm": 1.2381023168563843,
      "learning_rate": 0.0001299063114098361,
      "loss": 0.6217,
      "step": 1751
    },
    {
      "epoch": 0.42183831938843075,
      "grad_norm": 2.4936861991882324,
      "learning_rate": 0.00012983189645764966,
      "loss": 0.5497,
      "step": 1752
    },
    {
      "epoch": 0.4220790946848853,
      "grad_norm": 0.9683302044868469,
      "learning_rate": 0.00012975746336760298,
      "loss": 0.3565,
      "step": 1753
    },
    {
      "epoch": 0.4223198699813399,
      "grad_norm": 3.492793083190918,
      "learning_rate": 0.00012968301218495152,
      "loss": 0.5133,
      "step": 1754
    },
    {
      "epoch": 0.4225606452777945,
      "grad_norm": 2.7869482040405273,
      "learning_rate": 0.00012960854295496178,
      "loss": 0.9106,
      "step": 1755
    },
    {
      "epoch": 0.4228014205742491,
      "grad_norm": 2.8004496097564697,
      "learning_rate": 0.00012953405572291117,
      "loss": 0.5493,
      "step": 1756
    },
    {
      "epoch": 0.4230421958707037,
      "grad_norm": 1.4663894176483154,
      "learning_rate": 0.0001294595505340882,
      "loss": 0.4555,
      "step": 1757
    },
    {
      "epoch": 0.42328297116715824,
      "grad_norm": 14.337491035461426,
      "learning_rate": 0.00012938502743379212,
      "loss": 0.7048,
      "step": 1758
    },
    {
      "epoch": 0.42352374646361285,
      "grad_norm": 1.1422491073608398,
      "learning_rate": 0.00012931048646733313,
      "loss": 0.6569,
      "step": 1759
    },
    {
      "epoch": 0.4237645217600674,
      "grad_norm": 0.4564094841480255,
      "learning_rate": 0.00012923592768003235,
      "loss": 0.1381,
      "step": 1760
    },
    {
      "epoch": 0.424005297056522,
      "grad_norm": 2.7814853191375732,
      "learning_rate": 0.00012916135111722165,
      "loss": 0.5271,
      "step": 1761
    },
    {
      "epoch": 0.42424607235297657,
      "grad_norm": 3.1444740295410156,
      "learning_rate": 0.0001290867568242438,
      "loss": 0.9703,
      "step": 1762
    },
    {
      "epoch": 0.4244868476494312,
      "grad_norm": 0.9466924071311951,
      "learning_rate": 0.00012901214484645226,
      "loss": 0.64,
      "step": 1763
    },
    {
      "epoch": 0.42472762294588573,
      "grad_norm": 0.9237557053565979,
      "learning_rate": 0.00012893751522921124,
      "loss": 0.6848,
      "step": 1764
    },
    {
      "epoch": 0.42496839824234034,
      "grad_norm": 2.4244697093963623,
      "learning_rate": 0.00012886286801789583,
      "loss": 0.6039,
      "step": 1765
    },
    {
      "epoch": 0.4252091735387949,
      "grad_norm": 9.26452922821045,
      "learning_rate": 0.00012878820325789162,
      "loss": 0.6834,
      "step": 1766
    },
    {
      "epoch": 0.4254499488352495,
      "grad_norm": 0.7539100646972656,
      "learning_rate": 0.00012871352099459496,
      "loss": 0.3441,
      "step": 1767
    },
    {
      "epoch": 0.4256907241317041,
      "grad_norm": 2.2698490619659424,
      "learning_rate": 0.00012863882127341284,
      "loss": 0.9277,
      "step": 1768
    },
    {
      "epoch": 0.42593149942815867,
      "grad_norm": 5.280149936676025,
      "learning_rate": 0.00012856410413976285,
      "loss": 0.9697,
      "step": 1769
    },
    {
      "epoch": 0.4261722747246133,
      "grad_norm": 3.395625591278076,
      "learning_rate": 0.0001284893696390732,
      "loss": 1.0316,
      "step": 1770
    },
    {
      "epoch": 0.42641305002106783,
      "grad_norm": 1.7464591264724731,
      "learning_rate": 0.00012841461781678263,
      "loss": 0.7941,
      "step": 1771
    },
    {
      "epoch": 0.42665382531752244,
      "grad_norm": 2.47660493850708,
      "learning_rate": 0.00012833984871834042,
      "loss": 0.9419,
      "step": 1772
    },
    {
      "epoch": 0.426894600613977,
      "grad_norm": 1.8424837589263916,
      "learning_rate": 0.00012826506238920632,
      "loss": 0.6514,
      "step": 1773
    },
    {
      "epoch": 0.4271353759104316,
      "grad_norm": 11.50127124786377,
      "learning_rate": 0.00012819025887485062,
      "loss": 1.3198,
      "step": 1774
    },
    {
      "epoch": 0.42737615120688616,
      "grad_norm": 1.003143548965454,
      "learning_rate": 0.00012811543822075397,
      "loss": 0.2809,
      "step": 1775
    },
    {
      "epoch": 0.42761692650334077,
      "grad_norm": 1.2120084762573242,
      "learning_rate": 0.00012804060047240756,
      "loss": 0.4469,
      "step": 1776
    },
    {
      "epoch": 0.4278577017997953,
      "grad_norm": 2.660773515701294,
      "learning_rate": 0.0001279657456753129,
      "loss": 0.8044,
      "step": 1777
    },
    {
      "epoch": 0.42809847709624993,
      "grad_norm": 3.651428461074829,
      "learning_rate": 0.00012789087387498187,
      "loss": 0.7613,
      "step": 1778
    },
    {
      "epoch": 0.4283392523927045,
      "grad_norm": 1.7895033359527588,
      "learning_rate": 0.00012781598511693666,
      "loss": 0.5028,
      "step": 1779
    },
    {
      "epoch": 0.4285800276891591,
      "grad_norm": 2.3747005462646484,
      "learning_rate": 0.00012774107944670983,
      "loss": 0.6884,
      "step": 1780
    },
    {
      "epoch": 0.4288208029856137,
      "grad_norm": 0.8780110478401184,
      "learning_rate": 0.00012766615690984422,
      "loss": 0.5869,
      "step": 1781
    },
    {
      "epoch": 0.42906157828206826,
      "grad_norm": 1.754726529121399,
      "learning_rate": 0.00012759121755189282,
      "loss": 0.883,
      "step": 1782
    },
    {
      "epoch": 0.42930235357852287,
      "grad_norm": 1.48545241355896,
      "learning_rate": 0.00012751626141841902,
      "loss": 0.6704,
      "step": 1783
    },
    {
      "epoch": 0.4295431288749774,
      "grad_norm": 1.908327579498291,
      "learning_rate": 0.0001274412885549963,
      "loss": 0.5986,
      "step": 1784
    },
    {
      "epoch": 0.42978390417143203,
      "grad_norm": 2.356943130493164,
      "learning_rate": 0.0001273662990072083,
      "loss": 0.5414,
      "step": 1785
    },
    {
      "epoch": 0.4300246794678866,
      "grad_norm": 2.7557711601257324,
      "learning_rate": 0.00012729129282064886,
      "loss": 0.7579,
      "step": 1786
    },
    {
      "epoch": 0.4302654547643412,
      "grad_norm": 1.6613632440567017,
      "learning_rate": 0.00012721627004092184,
      "loss": 0.7389,
      "step": 1787
    },
    {
      "epoch": 0.43050623006079575,
      "grad_norm": 0.23793041706085205,
      "learning_rate": 0.00012714123071364138,
      "loss": 0.3544,
      "step": 1788
    },
    {
      "epoch": 0.43074700535725036,
      "grad_norm": 0.7207126617431641,
      "learning_rate": 0.0001270661748844315,
      "loss": 0.4286,
      "step": 1789
    },
    {
      "epoch": 0.4309877806537049,
      "grad_norm": 2.2661566734313965,
      "learning_rate": 0.00012699110259892625,
      "loss": 0.8774,
      "step": 1790
    },
    {
      "epoch": 0.4312285559501595,
      "grad_norm": 2.410264730453491,
      "learning_rate": 0.00012691601390276983,
      "loss": 0.6303,
      "step": 1791
    },
    {
      "epoch": 0.4314693312466141,
      "grad_norm": 3.2463836669921875,
      "learning_rate": 0.00012684090884161636,
      "loss": 0.4901,
      "step": 1792
    },
    {
      "epoch": 0.4317101065430687,
      "grad_norm": 2.540635824203491,
      "learning_rate": 0.0001267657874611298,
      "loss": 0.4825,
      "step": 1793
    },
    {
      "epoch": 0.43195088183952324,
      "grad_norm": 0.8397485613822937,
      "learning_rate": 0.00012669064980698418,
      "loss": 0.1515,
      "step": 1794
    },
    {
      "epoch": 0.43219165713597785,
      "grad_norm": 3.4554481506347656,
      "learning_rate": 0.00012661549592486327,
      "loss": 0.8663,
      "step": 1795
    },
    {
      "epoch": 0.43243243243243246,
      "grad_norm": 2.4448556900024414,
      "learning_rate": 0.00012654032586046097,
      "loss": 0.2905,
      "step": 1796
    },
    {
      "epoch": 0.432673207728887,
      "grad_norm": 2.5620980262756348,
      "learning_rate": 0.0001264651396594807,
      "loss": 0.4889,
      "step": 1797
    },
    {
      "epoch": 0.4329139830253416,
      "grad_norm": 1.970119833946228,
      "learning_rate": 0.0001263899373676359,
      "loss": 0.6237,
      "step": 1798
    },
    {
      "epoch": 0.4331547583217962,
      "grad_norm": 1.576965093612671,
      "learning_rate": 0.00012631471903064973,
      "loss": 0.4789,
      "step": 1799
    },
    {
      "epoch": 0.4333955336182508,
      "grad_norm": 2.5547585487365723,
      "learning_rate": 0.0001262394846942551,
      "loss": 0.2264,
      "step": 1800
    },
    {
      "epoch": 0.43363630891470534,
      "grad_norm": 1.3450043201446533,
      "learning_rate": 0.00012616423440419468,
      "loss": 0.6556,
      "step": 1801
    },
    {
      "epoch": 0.43387708421115995,
      "grad_norm": 2.3657472133636475,
      "learning_rate": 0.00012608896820622077,
      "loss": 0.5502,
      "step": 1802
    },
    {
      "epoch": 0.4341178595076145,
      "grad_norm": 0.8529106974601746,
      "learning_rate": 0.0001260136861460954,
      "loss": 0.5755,
      "step": 1803
    },
    {
      "epoch": 0.4343586348040691,
      "grad_norm": 1.913244366645813,
      "learning_rate": 0.00012593838826959023,
      "loss": 0.4943,
      "step": 1804
    },
    {
      "epoch": 0.43459941010052366,
      "grad_norm": 2.5146071910858154,
      "learning_rate": 0.0001258630746224866,
      "loss": 0.7254,
      "step": 1805
    },
    {
      "epoch": 0.4348401853969783,
      "grad_norm": 1.7178691625595093,
      "learning_rate": 0.00012578774525057532,
      "loss": 0.5247,
      "step": 1806
    },
    {
      "epoch": 0.4350809606934328,
      "grad_norm": 3.7382612228393555,
      "learning_rate": 0.0001257124001996568,
      "loss": 0.6197,
      "step": 1807
    },
    {
      "epoch": 0.43532173598988744,
      "grad_norm": 4.024393558502197,
      "learning_rate": 0.00012563703951554102,
      "loss": 0.6969,
      "step": 1808
    },
    {
      "epoch": 0.43556251128634205,
      "grad_norm": 2.9647786617279053,
      "learning_rate": 0.0001255616632440475,
      "loss": 0.4495,
      "step": 1809
    },
    {
      "epoch": 0.4358032865827966,
      "grad_norm": 3.270671844482422,
      "learning_rate": 0.0001254862714310051,
      "loss": 1.3434,
      "step": 1810
    },
    {
      "epoch": 0.4360440618792512,
      "grad_norm": 2.660315752029419,
      "learning_rate": 0.00012541086412225225,
      "loss": 0.3752,
      "step": 1811
    },
    {
      "epoch": 0.43628483717570576,
      "grad_norm": 3.405566453933716,
      "learning_rate": 0.00012533544136363677,
      "loss": 0.6865,
      "step": 1812
    },
    {
      "epoch": 0.4365256124721604,
      "grad_norm": 1.3535075187683105,
      "learning_rate": 0.00012526000320101584,
      "loss": 0.5975,
      "step": 1813
    },
    {
      "epoch": 0.4367663877686149,
      "grad_norm": 7.082382678985596,
      "learning_rate": 0.0001251845496802561,
      "loss": 0.9345,
      "step": 1814
    },
    {
      "epoch": 0.43700716306506954,
      "grad_norm": 3.6921160221099854,
      "learning_rate": 0.00012510908084723335,
      "loss": 0.7298,
      "step": 1815
    },
    {
      "epoch": 0.4372479383615241,
      "grad_norm": 1.1721895933151245,
      "learning_rate": 0.00012503359674783293,
      "loss": 0.7013,
      "step": 1816
    },
    {
      "epoch": 0.4374887136579787,
      "grad_norm": 3.0077133178710938,
      "learning_rate": 0.00012495809742794927,
      "loss": 1.094,
      "step": 1817
    },
    {
      "epoch": 0.43772948895443325,
      "grad_norm": 0.8102177381515503,
      "learning_rate": 0.00012488258293348614,
      "loss": 0.5695,
      "step": 1818
    },
    {
      "epoch": 0.43797026425088786,
      "grad_norm": 2.2918097972869873,
      "learning_rate": 0.0001248070533103565,
      "loss": 0.3564,
      "step": 1819
    },
    {
      "epoch": 0.4382110395473424,
      "grad_norm": 0.9323842525482178,
      "learning_rate": 0.0001247315086044826,
      "loss": 0.38,
      "step": 1820
    },
    {
      "epoch": 0.438451814843797,
      "grad_norm": 3.6484107971191406,
      "learning_rate": 0.0001246559488617957,
      "loss": 0.4016,
      "step": 1821
    },
    {
      "epoch": 0.43869259014025164,
      "grad_norm": 2.140214204788208,
      "learning_rate": 0.0001245803741282364,
      "loss": 0.4333,
      "step": 1822
    },
    {
      "epoch": 0.4389333654367062,
      "grad_norm": 2.356504201889038,
      "learning_rate": 0.00012450478444975423,
      "loss": 0.3219,
      "step": 1823
    },
    {
      "epoch": 0.4391741407331608,
      "grad_norm": 1.8598113059997559,
      "learning_rate": 0.0001244291798723079,
      "loss": 0.4902,
      "step": 1824
    },
    {
      "epoch": 0.43941491602961535,
      "grad_norm": 3.3434224128723145,
      "learning_rate": 0.00012435356044186512,
      "loss": 0.63,
      "step": 1825
    },
    {
      "epoch": 0.43965569132606996,
      "grad_norm": 0.9114461541175842,
      "learning_rate": 0.00012427792620440278,
      "loss": 0.1613,
      "step": 1826
    },
    {
      "epoch": 0.4398964666225245,
      "grad_norm": 3.5121147632598877,
      "learning_rate": 0.00012420227720590657,
      "loss": 0.9353,
      "step": 1827
    },
    {
      "epoch": 0.4401372419189791,
      "grad_norm": 1.0356240272521973,
      "learning_rate": 0.00012412661349237134,
      "loss": 0.2213,
      "step": 1828
    },
    {
      "epoch": 0.4403780172154337,
      "grad_norm": 2.9189321994781494,
      "learning_rate": 0.00012405093510980072,
      "loss": 0.8359,
      "step": 1829
    },
    {
      "epoch": 0.4406187925118883,
      "grad_norm": 2.3521268367767334,
      "learning_rate": 0.00012397524210420736,
      "loss": 0.6263,
      "step": 1830
    },
    {
      "epoch": 0.44085956780834284,
      "grad_norm": 1.810509443283081,
      "learning_rate": 0.0001238995345216128,
      "loss": 0.415,
      "step": 1831
    },
    {
      "epoch": 0.44110034310479745,
      "grad_norm": 1.4703214168548584,
      "learning_rate": 0.0001238238124080474,
      "loss": 1.0236,
      "step": 1832
    },
    {
      "epoch": 0.441341118401252,
      "grad_norm": 1.8066413402557373,
      "learning_rate": 0.0001237480758095504,
      "loss": 0.5183,
      "step": 1833
    },
    {
      "epoch": 0.4415818936977066,
      "grad_norm": 2.030515670776367,
      "learning_rate": 0.00012367232477216973,
      "loss": 0.963,
      "step": 1834
    },
    {
      "epoch": 0.44182266899416117,
      "grad_norm": 4.339605808258057,
      "learning_rate": 0.00012359655934196236,
      "loss": 0.9798,
      "step": 1835
    },
    {
      "epoch": 0.4420634442906158,
      "grad_norm": 1.818166971206665,
      "learning_rate": 0.00012352077956499365,
      "loss": 0.4265,
      "step": 1836
    },
    {
      "epoch": 0.4423042195870704,
      "grad_norm": 3.2408132553100586,
      "learning_rate": 0.00012344498548733806,
      "loss": 0.9419,
      "step": 1837
    },
    {
      "epoch": 0.44254499488352494,
      "grad_norm": 2.5602407455444336,
      "learning_rate": 0.0001233691771550784,
      "loss": 0.7254,
      "step": 1838
    },
    {
      "epoch": 0.44278577017997955,
      "grad_norm": 2.216360330581665,
      "learning_rate": 0.0001232933546143064,
      "loss": 0.9503,
      "step": 1839
    },
    {
      "epoch": 0.4430265454764341,
      "grad_norm": 1.1760109663009644,
      "learning_rate": 0.00012321751791112234,
      "loss": 0.6887,
      "step": 1840
    },
    {
      "epoch": 0.4432673207728887,
      "grad_norm": 3.627732276916504,
      "learning_rate": 0.00012314166709163508,
      "loss": 0.5434,
      "step": 1841
    },
    {
      "epoch": 0.44350809606934327,
      "grad_norm": 2.6786983013153076,
      "learning_rate": 0.00012306580220196206,
      "loss": 1.46,
      "step": 1842
    },
    {
      "epoch": 0.4437488713657979,
      "grad_norm": 1.4511840343475342,
      "learning_rate": 0.00012298992328822937,
      "loss": 0.739,
      "step": 1843
    },
    {
      "epoch": 0.44398964666225244,
      "grad_norm": 6.163101673126221,
      "learning_rate": 0.00012291403039657147,
      "loss": 0.6288,
      "step": 1844
    },
    {
      "epoch": 0.44423042195870704,
      "grad_norm": 3.6202635765075684,
      "learning_rate": 0.00012283812357313152,
      "loss": 0.3465,
      "step": 1845
    },
    {
      "epoch": 0.4444711972551616,
      "grad_norm": 2.3283517360687256,
      "learning_rate": 0.00012276220286406097,
      "loss": 0.9489,
      "step": 1846
    },
    {
      "epoch": 0.4447119725516162,
      "grad_norm": 0.9239123463630676,
      "learning_rate": 0.00012268626831551978,
      "loss": 0.2347,
      "step": 1847
    },
    {
      "epoch": 0.44495274784807076,
      "grad_norm": 2.289092779159546,
      "learning_rate": 0.00012261031997367632,
      "loss": 0.6748,
      "step": 1848
    },
    {
      "epoch": 0.44519352314452537,
      "grad_norm": 3.025836706161499,
      "learning_rate": 0.0001225343578847074,
      "loss": 0.9556,
      "step": 1849
    },
    {
      "epoch": 0.44543429844098,
      "grad_norm": 2.054135322570801,
      "learning_rate": 0.00012245838209479812,
      "loss": 0.6107,
      "step": 1850
    },
    {
      "epoch": 0.44567507373743454,
      "grad_norm": 2.0344197750091553,
      "learning_rate": 0.0001223823926501419,
      "loss": 0.4859,
      "step": 1851
    },
    {
      "epoch": 0.44591584903388914,
      "grad_norm": 1.1552016735076904,
      "learning_rate": 0.00012230638959694054,
      "loss": 0.4992,
      "step": 1852
    },
    {
      "epoch": 0.4461566243303437,
      "grad_norm": 4.617137908935547,
      "learning_rate": 0.00012223037298140406,
      "loss": 0.4169,
      "step": 1853
    },
    {
      "epoch": 0.4463973996267983,
      "grad_norm": 1.8090236186981201,
      "learning_rate": 0.00012215434284975073,
      "loss": 1.1123,
      "step": 1854
    },
    {
      "epoch": 0.44663817492325286,
      "grad_norm": 1.46204674243927,
      "learning_rate": 0.000122078299248207,
      "loss": 0.3101,
      "step": 1855
    },
    {
      "epoch": 0.44687895021970747,
      "grad_norm": 2.3522417545318604,
      "learning_rate": 0.00012200224222300758,
      "loss": 0.3873,
      "step": 1856
    },
    {
      "epoch": 0.447119725516162,
      "grad_norm": 2.6018474102020264,
      "learning_rate": 0.00012192617182039534,
      "loss": 1.0129,
      "step": 1857
    },
    {
      "epoch": 0.44736050081261663,
      "grad_norm": 4.971423149108887,
      "learning_rate": 0.00012185008808662124,
      "loss": 0.7512,
      "step": 1858
    },
    {
      "epoch": 0.4476012761090712,
      "grad_norm": 1.7388516664505005,
      "learning_rate": 0.00012177399106794433,
      "loss": 0.9286,
      "step": 1859
    },
    {
      "epoch": 0.4478420514055258,
      "grad_norm": 2.4302382469177246,
      "learning_rate": 0.0001216978808106318,
      "loss": 1.2512,
      "step": 1860
    },
    {
      "epoch": 0.44808282670198035,
      "grad_norm": 2.7931926250457764,
      "learning_rate": 0.00012162175736095887,
      "loss": 0.5571,
      "step": 1861
    },
    {
      "epoch": 0.44832360199843496,
      "grad_norm": 1.046998381614685,
      "learning_rate": 0.00012154562076520874,
      "loss": 0.5747,
      "step": 1862
    },
    {
      "epoch": 0.44856437729488957,
      "grad_norm": 1.4877816438674927,
      "learning_rate": 0.00012146947106967266,
      "loss": 0.9024,
      "step": 1863
    },
    {
      "epoch": 0.4488051525913441,
      "grad_norm": 0.7219827175140381,
      "learning_rate": 0.00012139330832064974,
      "loss": 0.3575,
      "step": 1864
    },
    {
      "epoch": 0.44904592788779873,
      "grad_norm": 2.1786413192749023,
      "learning_rate": 0.00012131713256444722,
      "loss": 0.5989,
      "step": 1865
    },
    {
      "epoch": 0.4492867031842533,
      "grad_norm": 3.176419973373413,
      "learning_rate": 0.00012124094384738005,
      "loss": 0.6001,
      "step": 1866
    },
    {
      "epoch": 0.4495274784807079,
      "grad_norm": 4.134557723999023,
      "learning_rate": 0.00012116474221577116,
      "loss": 0.3355,
      "step": 1867
    },
    {
      "epoch": 0.44976825377716245,
      "grad_norm": 1.623186707496643,
      "learning_rate": 0.00012108852771595129,
      "loss": 0.4517,
      "step": 1868
    },
    {
      "epoch": 0.45000902907361706,
      "grad_norm": 0.6475129723548889,
      "learning_rate": 0.00012101230039425911,
      "loss": 0.3038,
      "step": 1869
    },
    {
      "epoch": 0.4502498043700716,
      "grad_norm": 1.8964297771453857,
      "learning_rate": 0.00012093606029704094,
      "loss": 0.1228,
      "step": 1870
    },
    {
      "epoch": 0.4504905796665262,
      "grad_norm": 3.343824625015259,
      "learning_rate": 0.00012085980747065093,
      "loss": 0.7346,
      "step": 1871
    },
    {
      "epoch": 0.4507313549629808,
      "grad_norm": 2.4471538066864014,
      "learning_rate": 0.00012078354196145099,
      "loss": 0.7198,
      "step": 1872
    },
    {
      "epoch": 0.4509721302594354,
      "grad_norm": 1.736475944519043,
      "learning_rate": 0.00012070726381581068,
      "loss": 1.0944,
      "step": 1873
    },
    {
      "epoch": 0.45121290555588994,
      "grad_norm": 2.5887935161590576,
      "learning_rate": 0.00012063097308010734,
      "loss": 0.7277,
      "step": 1874
    },
    {
      "epoch": 0.45145368085234455,
      "grad_norm": 2.683844804763794,
      "learning_rate": 0.0001205546698007259,
      "loss": 0.263,
      "step": 1875
    },
    {
      "epoch": 0.4516944561487991,
      "grad_norm": 2.049633741378784,
      "learning_rate": 0.00012047835402405887,
      "loss": 0.7888,
      "step": 1876
    },
    {
      "epoch": 0.4519352314452537,
      "grad_norm": 1.6313300132751465,
      "learning_rate": 0.00012040202579650648,
      "loss": 0.5099,
      "step": 1877
    },
    {
      "epoch": 0.4521760067417083,
      "grad_norm": 2.1740105152130127,
      "learning_rate": 0.00012032568516447645,
      "loss": 0.5025,
      "step": 1878
    },
    {
      "epoch": 0.4524167820381629,
      "grad_norm": 0.8934720754623413,
      "learning_rate": 0.00012024933217438403,
      "loss": 0.3097,
      "step": 1879
    },
    {
      "epoch": 0.4526575573346175,
      "grad_norm": 3.051832675933838,
      "learning_rate": 0.00012017296687265201,
      "loss": 0.5882,
      "step": 1880
    },
    {
      "epoch": 0.45289833263107204,
      "grad_norm": 0.8491730093955994,
      "learning_rate": 0.00012009658930571069,
      "loss": 0.3899,
      "step": 1881
    },
    {
      "epoch": 0.45313910792752665,
      "grad_norm": 1.7300267219543457,
      "learning_rate": 0.0001200201995199978,
      "loss": 0.6787,
      "step": 1882
    },
    {
      "epoch": 0.4533798832239812,
      "grad_norm": 1.650277853012085,
      "learning_rate": 0.00011994379756195852,
      "loss": 0.6297,
      "step": 1883
    },
    {
      "epoch": 0.4536206585204358,
      "grad_norm": 1.1548956632614136,
      "learning_rate": 0.00011986738347804536,
      "loss": 0.519,
      "step": 1884
    },
    {
      "epoch": 0.45386143381689037,
      "grad_norm": 1.0197851657867432,
      "learning_rate": 0.0001197909573147183,
      "loss": 0.6041,
      "step": 1885
    },
    {
      "epoch": 0.454102209113345,
      "grad_norm": 1.3757448196411133,
      "learning_rate": 0.00011971451911844457,
      "loss": 0.6228,
      "step": 1886
    },
    {
      "epoch": 0.45434298440979953,
      "grad_norm": 3.988311767578125,
      "learning_rate": 0.00011963806893569885,
      "loss": 0.5685,
      "step": 1887
    },
    {
      "epoch": 0.45458375970625414,
      "grad_norm": 1.5994011163711548,
      "learning_rate": 0.00011956160681296293,
      "loss": 0.5188,
      "step": 1888
    },
    {
      "epoch": 0.4548245350027087,
      "grad_norm": 1.7165995836257935,
      "learning_rate": 0.00011948513279672602,
      "loss": 0.8819,
      "step": 1889
    },
    {
      "epoch": 0.4550653102991633,
      "grad_norm": 1.711625337600708,
      "learning_rate": 0.00011940864693348444,
      "loss": 0.6248,
      "step": 1890
    },
    {
      "epoch": 0.4553060855956179,
      "grad_norm": 3.749361038208008,
      "learning_rate": 0.00011933214926974183,
      "loss": 0.6371,
      "step": 1891
    },
    {
      "epoch": 0.45554686089207247,
      "grad_norm": 0.7839668393135071,
      "learning_rate": 0.00011925563985200887,
      "loss": 0.3796,
      "step": 1892
    },
    {
      "epoch": 0.4557876361885271,
      "grad_norm": 1.5670320987701416,
      "learning_rate": 0.00011917911872680354,
      "loss": 0.3806,
      "step": 1893
    },
    {
      "epoch": 0.45602841148498163,
      "grad_norm": 1.0422892570495605,
      "learning_rate": 0.00011910258594065078,
      "loss": 0.0708,
      "step": 1894
    },
    {
      "epoch": 0.45626918678143624,
      "grad_norm": 3.335632562637329,
      "learning_rate": 0.00011902604154008274,
      "loss": 0.5238,
      "step": 1895
    },
    {
      "epoch": 0.4565099620778908,
      "grad_norm": 4.482006072998047,
      "learning_rate": 0.00011894948557163859,
      "loss": 0.7926,
      "step": 1896
    },
    {
      "epoch": 0.4567507373743454,
      "grad_norm": 2.2744340896606445,
      "learning_rate": 0.00011887291808186452,
      "loss": 1.2551,
      "step": 1897
    },
    {
      "epoch": 0.45699151267079996,
      "grad_norm": 1.9892547130584717,
      "learning_rate": 0.00011879633911731372,
      "loss": 0.6706,
      "step": 1898
    },
    {
      "epoch": 0.45723228796725457,
      "grad_norm": 0.8157358169555664,
      "learning_rate": 0.00011871974872454639,
      "loss": 0.2129,
      "step": 1899
    },
    {
      "epoch": 0.4574730632637091,
      "grad_norm": 0.6796861886978149,
      "learning_rate": 0.00011864314695012963,
      "loss": 0.5986,
      "step": 1900
    },
    {
      "epoch": 0.45771383856016373,
      "grad_norm": 1.5171664953231812,
      "learning_rate": 0.00011856653384063756,
      "loss": 0.4835,
      "step": 1901
    },
    {
      "epoch": 0.4579546138566183,
      "grad_norm": 1.7098067998886108,
      "learning_rate": 0.00011848990944265111,
      "loss": 0.4977,
      "step": 1902
    },
    {
      "epoch": 0.4581953891530729,
      "grad_norm": 1.370509386062622,
      "learning_rate": 0.00011841327380275799,
      "loss": 0.7172,
      "step": 1903
    },
    {
      "epoch": 0.4584361644495275,
      "grad_norm": 3.1343603134155273,
      "learning_rate": 0.00011833662696755295,
      "loss": 0.6409,
      "step": 1904
    },
    {
      "epoch": 0.45867693974598206,
      "grad_norm": 2.920408010482788,
      "learning_rate": 0.00011825996898363741,
      "loss": 0.496,
      "step": 1905
    },
    {
      "epoch": 0.45891771504243667,
      "grad_norm": 1.1497353315353394,
      "learning_rate": 0.00011818329989761959,
      "loss": 0.4073,
      "step": 1906
    },
    {
      "epoch": 0.4591584903388912,
      "grad_norm": 1.3592454195022583,
      "learning_rate": 0.00011810661975611444,
      "loss": 0.7055,
      "step": 1907
    },
    {
      "epoch": 0.45939926563534583,
      "grad_norm": 4.271294593811035,
      "learning_rate": 0.0001180299286057437,
      "loss": 0.6676,
      "step": 1908
    },
    {
      "epoch": 0.4596400409318004,
      "grad_norm": 3.51015567779541,
      "learning_rate": 0.00011795322649313574,
      "loss": 0.7051,
      "step": 1909
    },
    {
      "epoch": 0.459880816228255,
      "grad_norm": 0.6392609477043152,
      "learning_rate": 0.00011787651346492561,
      "loss": 0.2079,
      "step": 1910
    },
    {
      "epoch": 0.46012159152470955,
      "grad_norm": 5.496769905090332,
      "learning_rate": 0.00011779978956775506,
      "loss": 0.6687,
      "step": 1911
    },
    {
      "epoch": 0.46036236682116416,
      "grad_norm": 2.1029446125030518,
      "learning_rate": 0.00011772305484827231,
      "loss": 1.0162,
      "step": 1912
    },
    {
      "epoch": 0.4606031421176187,
      "grad_norm": 3.9741029739379883,
      "learning_rate": 0.00011764630935313228,
      "loss": 1.0211,
      "step": 1913
    },
    {
      "epoch": 0.4608439174140733,
      "grad_norm": 3.1617109775543213,
      "learning_rate": 0.00011756955312899642,
      "loss": 1.1738,
      "step": 1914
    },
    {
      "epoch": 0.4610846927105279,
      "grad_norm": 0.6556163430213928,
      "learning_rate": 0.00011749278622253268,
      "loss": 0.037,
      "step": 1915
    },
    {
      "epoch": 0.4613254680069825,
      "grad_norm": 3.8767170906066895,
      "learning_rate": 0.00011741600868041549,
      "loss": 0.8335,
      "step": 1916
    },
    {
      "epoch": 0.46156624330343704,
      "grad_norm": 2.683124542236328,
      "learning_rate": 0.00011733922054932577,
      "loss": 1.3765,
      "step": 1917
    },
    {
      "epoch": 0.46180701859989165,
      "grad_norm": 1.8836538791656494,
      "learning_rate": 0.00011726242187595091,
      "loss": 0.2703,
      "step": 1918
    },
    {
      "epoch": 0.46204779389634626,
      "grad_norm": 1.551708698272705,
      "learning_rate": 0.00011718561270698467,
      "loss": 0.1608,
      "step": 1919
    },
    {
      "epoch": 0.4622885691928008,
      "grad_norm": 2.271167278289795,
      "learning_rate": 0.00011710879308912717,
      "loss": 0.5994,
      "step": 1920
    },
    {
      "epoch": 0.4625293444892554,
      "grad_norm": 2.433912992477417,
      "learning_rate": 0.0001170319630690849,
      "loss": 0.756,
      "step": 1921
    },
    {
      "epoch": 0.46277011978571,
      "grad_norm": 0.924586296081543,
      "learning_rate": 0.00011695512269357076,
      "loss": 0.655,
      "step": 1922
    },
    {
      "epoch": 0.4630108950821646,
      "grad_norm": 2.5200753211975098,
      "learning_rate": 0.00011687827200930381,
      "loss": 0.5851,
      "step": 1923
    },
    {
      "epoch": 0.46325167037861914,
      "grad_norm": 1.0272419452667236,
      "learning_rate": 0.00011680141106300943,
      "loss": 0.3672,
      "step": 1924
    },
    {
      "epoch": 0.46349244567507375,
      "grad_norm": 5.197723865509033,
      "learning_rate": 0.00011672453990141927,
      "loss": 0.8146,
      "step": 1925
    },
    {
      "epoch": 0.4637332209715283,
      "grad_norm": 1.0704439878463745,
      "learning_rate": 0.00011664765857127118,
      "loss": 0.1931,
      "step": 1926
    },
    {
      "epoch": 0.4639739962679829,
      "grad_norm": 2.9581477642059326,
      "learning_rate": 0.00011657076711930919,
      "loss": 0.9693,
      "step": 1927
    },
    {
      "epoch": 0.46421477156443747,
      "grad_norm": 3.3531832695007324,
      "learning_rate": 0.00011649386559228341,
      "loss": 0.6575,
      "step": 1928
    },
    {
      "epoch": 0.4644555468608921,
      "grad_norm": 1.1153233051300049,
      "learning_rate": 0.00011641695403695021,
      "loss": 0.3665,
      "step": 1929
    },
    {
      "epoch": 0.46469632215734663,
      "grad_norm": 1.21510910987854,
      "learning_rate": 0.000116340032500072,
      "loss": 0.4691,
      "step": 1930
    },
    {
      "epoch": 0.46493709745380124,
      "grad_norm": 1.5340150594711304,
      "learning_rate": 0.00011626310102841718,
      "loss": 0.5084,
      "step": 1931
    },
    {
      "epoch": 0.46517787275025585,
      "grad_norm": 1.7374811172485352,
      "learning_rate": 0.0001161861596687603,
      "loss": 0.5055,
      "step": 1932
    },
    {
      "epoch": 0.4654186480467104,
      "grad_norm": 1.4668828248977661,
      "learning_rate": 0.00011610920846788184,
      "loss": 0.7268,
      "step": 1933
    },
    {
      "epoch": 0.465659423343165,
      "grad_norm": 2.211509943008423,
      "learning_rate": 0.0001160322474725684,
      "loss": 1.0503,
      "step": 1934
    },
    {
      "epoch": 0.46590019863961957,
      "grad_norm": 2.9162306785583496,
      "learning_rate": 0.00011595527672961235,
      "loss": 0.987,
      "step": 1935
    },
    {
      "epoch": 0.4661409739360742,
      "grad_norm": 0.8210351467132568,
      "learning_rate": 0.00011587829628581213,
      "loss": 0.2187,
      "step": 1936
    },
    {
      "epoch": 0.46638174923252873,
      "grad_norm": 2.2448573112487793,
      "learning_rate": 0.00011580130618797193,
      "loss": 0.7065,
      "step": 1937
    },
    {
      "epoch": 0.46662252452898334,
      "grad_norm": 2.9925882816314697,
      "learning_rate": 0.000115724306482902,
      "loss": 0.9504,
      "step": 1938
    },
    {
      "epoch": 0.4668632998254379,
      "grad_norm": 4.324154376983643,
      "learning_rate": 0.00011564729721741829,
      "loss": 0.8914,
      "step": 1939
    },
    {
      "epoch": 0.4671040751218925,
      "grad_norm": 0.7890626192092896,
      "learning_rate": 0.00011557027843834265,
      "loss": 0.4613,
      "step": 1940
    },
    {
      "epoch": 0.46734485041834706,
      "grad_norm": 0.8351976275444031,
      "learning_rate": 0.00011549325019250261,
      "loss": 0.262,
      "step": 1941
    },
    {
      "epoch": 0.46758562571480167,
      "grad_norm": 5.956714153289795,
      "learning_rate": 0.00011541621252673153,
      "loss": 0.5128,
      "step": 1942
    },
    {
      "epoch": 0.4678264010112562,
      "grad_norm": 1.704748511314392,
      "learning_rate": 0.00011533916548786857,
      "loss": 0.5645,
      "step": 1943
    },
    {
      "epoch": 0.46806717630771083,
      "grad_norm": 2.2155847549438477,
      "learning_rate": 0.00011526210912275836,
      "loss": 0.7139,
      "step": 1944
    },
    {
      "epoch": 0.46830795160416544,
      "grad_norm": 3.7036075592041016,
      "learning_rate": 0.00011518504347825145,
      "loss": 0.6394,
      "step": 1945
    },
    {
      "epoch": 0.46854872690062,
      "grad_norm": 1.530531406402588,
      "learning_rate": 0.00011510796860120388,
      "loss": 0.8155,
      "step": 1946
    },
    {
      "epoch": 0.4687895021970746,
      "grad_norm": 3.814300298690796,
      "learning_rate": 0.00011503088453847739,
      "loss": 0.9626,
      "step": 1947
    },
    {
      "epoch": 0.46903027749352916,
      "grad_norm": 2.3494253158569336,
      "learning_rate": 0.00011495379133693922,
      "loss": 0.3687,
      "step": 1948
    },
    {
      "epoch": 0.46927105278998377,
      "grad_norm": 2.5800893306732178,
      "learning_rate": 0.00011487668904346221,
      "loss": 0.8505,
      "step": 1949
    },
    {
      "epoch": 0.4695118280864383,
      "grad_norm": 1.1892086267471313,
      "learning_rate": 0.00011479957770492476,
      "loss": 0.8398,
      "step": 1950
    },
    {
      "epoch": 0.46975260338289293,
      "grad_norm": 4.8080034255981445,
      "learning_rate": 0.00011472245736821072,
      "loss": 0.8072,
      "step": 1951
    },
    {
      "epoch": 0.4699933786793475,
      "grad_norm": 4.775472164154053,
      "learning_rate": 0.00011464532808020943,
      "loss": 0.8344,
      "step": 1952
    },
    {
      "epoch": 0.4702341539758021,
      "grad_norm": 4.2183966636657715,
      "learning_rate": 0.00011456818988781565,
      "loss": 0.2391,
      "step": 1953
    },
    {
      "epoch": 0.47047492927225665,
      "grad_norm": 0.8757205605506897,
      "learning_rate": 0.00011449104283792964,
      "loss": 0.6855,
      "step": 1954
    },
    {
      "epoch": 0.47071570456871126,
      "grad_norm": 4.9031524658203125,
      "learning_rate": 0.0001144138869774569,
      "loss": 0.7411,
      "step": 1955
    },
    {
      "epoch": 0.4709564798651658,
      "grad_norm": 3.9868388175964355,
      "learning_rate": 0.0001143367223533084,
      "loss": 1.3733,
      "step": 1956
    },
    {
      "epoch": 0.4711972551616204,
      "grad_norm": 2.6897597312927246,
      "learning_rate": 0.0001142595490124004,
      "loss": 1.1645,
      "step": 1957
    },
    {
      "epoch": 0.471438030458075,
      "grad_norm": 0.6126354932785034,
      "learning_rate": 0.00011418236700165452,
      "loss": 0.4618,
      "step": 1958
    },
    {
      "epoch": 0.4716788057545296,
      "grad_norm": 0.5356245040893555,
      "learning_rate": 0.00011410517636799751,
      "loss": 0.805,
      "step": 1959
    },
    {
      "epoch": 0.4719195810509842,
      "grad_norm": 0.8628101944923401,
      "learning_rate": 0.00011402797715836153,
      "loss": 0.291,
      "step": 1960
    },
    {
      "epoch": 0.47216035634743875,
      "grad_norm": 1.2963393926620483,
      "learning_rate": 0.00011395076941968379,
      "loss": 0.7377,
      "step": 1961
    },
    {
      "epoch": 0.47240113164389336,
      "grad_norm": 1.1663508415222168,
      "learning_rate": 0.00011387355319890685,
      "loss": 0.1149,
      "step": 1962
    },
    {
      "epoch": 0.4726419069403479,
      "grad_norm": 1.1222305297851562,
      "learning_rate": 0.00011379632854297828,
      "loss": 0.8273,
      "step": 1963
    },
    {
      "epoch": 0.4728826822368025,
      "grad_norm": 1.7846665382385254,
      "learning_rate": 0.00011371909549885087,
      "loss": 0.5701,
      "step": 1964
    },
    {
      "epoch": 0.4731234575332571,
      "grad_norm": 2.4753174781799316,
      "learning_rate": 0.00011364185411348247,
      "loss": 0.6405,
      "step": 1965
    },
    {
      "epoch": 0.4733642328297117,
      "grad_norm": 2.678506374359131,
      "learning_rate": 0.00011356460443383607,
      "loss": 0.5651,
      "step": 1966
    },
    {
      "epoch": 0.47360500812616624,
      "grad_norm": 2.547746181488037,
      "learning_rate": 0.00011348734650687962,
      "loss": 0.4664,
      "step": 1967
    },
    {
      "epoch": 0.47384578342262085,
      "grad_norm": 2.901313304901123,
      "learning_rate": 0.00011341008037958607,
      "loss": 0.2748,
      "step": 1968
    },
    {
      "epoch": 0.4740865587190754,
      "grad_norm": 2.7114925384521484,
      "learning_rate": 0.00011333280609893344,
      "loss": 0.7454,
      "step": 1969
    },
    {
      "epoch": 0.47432733401553,
      "grad_norm": 2.3827106952667236,
      "learning_rate": 0.0001132555237119047,
      "loss": 1.0865,
      "step": 1970
    },
    {
      "epoch": 0.47456810931198457,
      "grad_norm": 1.0428249835968018,
      "learning_rate": 0.00011317823326548765,
      "loss": 0.6484,
      "step": 1971
    },
    {
      "epoch": 0.4748088846084392,
      "grad_norm": 2.74362850189209,
      "learning_rate": 0.00011310093480667507,
      "loss": 0.4957,
      "step": 1972
    },
    {
      "epoch": 0.4750496599048938,
      "grad_norm": 2.0484142303466797,
      "learning_rate": 0.00011302362838246463,
      "loss": 0.5256,
      "step": 1973
    },
    {
      "epoch": 0.47529043520134834,
      "grad_norm": 2.8634374141693115,
      "learning_rate": 0.0001129463140398588,
      "loss": 0.4557,
      "step": 1974
    },
    {
      "epoch": 0.47553121049780295,
      "grad_norm": 2.5055246353149414,
      "learning_rate": 0.00011286899182586485,
      "loss": 0.9222,
      "step": 1975
    },
    {
      "epoch": 0.4757719857942575,
      "grad_norm": 1.0836631059646606,
      "learning_rate": 0.00011279166178749489,
      "loss": 0.3692,
      "step": 1976
    },
    {
      "epoch": 0.4760127610907121,
      "grad_norm": 2.4149179458618164,
      "learning_rate": 0.0001127143239717657,
      "loss": 0.675,
      "step": 1977
    },
    {
      "epoch": 0.47625353638716666,
      "grad_norm": 1.899614930152893,
      "learning_rate": 0.00011263697842569894,
      "loss": 0.988,
      "step": 1978
    },
    {
      "epoch": 0.4764943116836213,
      "grad_norm": 3.755749464035034,
      "learning_rate": 0.00011255962519632081,
      "loss": 0.8779,
      "step": 1979
    },
    {
      "epoch": 0.47673508698007583,
      "grad_norm": 2.487436056137085,
      "learning_rate": 0.0001124822643306623,
      "loss": 0.4025,
      "step": 1980
    },
    {
      "epoch": 0.47697586227653044,
      "grad_norm": 0.9542964100837708,
      "learning_rate": 0.00011240489587575889,
      "loss": 0.5613,
      "step": 1981
    },
    {
      "epoch": 0.477216637572985,
      "grad_norm": 1.5301231145858765,
      "learning_rate": 0.00011232751987865084,
      "loss": 0.2873,
      "step": 1982
    },
    {
      "epoch": 0.4774574128694396,
      "grad_norm": 1.3803631067276,
      "learning_rate": 0.00011225013638638297,
      "loss": 0.2015,
      "step": 1983
    },
    {
      "epoch": 0.47769818816589416,
      "grad_norm": 2.5215346813201904,
      "learning_rate": 0.00011217274544600458,
      "loss": 0.7079,
      "step": 1984
    },
    {
      "epoch": 0.47793896346234876,
      "grad_norm": 0.8680809736251831,
      "learning_rate": 0.00011209534710456951,
      "loss": 0.8823,
      "step": 1985
    },
    {
      "epoch": 0.4781797387588034,
      "grad_norm": 1.0122793912887573,
      "learning_rate": 0.00011201794140913613,
      "loss": 0.3876,
      "step": 1986
    },
    {
      "epoch": 0.47842051405525793,
      "grad_norm": 2.795023202896118,
      "learning_rate": 0.00011194052840676735,
      "loss": 0.9525,
      "step": 1987
    },
    {
      "epoch": 0.47866128935171254,
      "grad_norm": 4.1220784187316895,
      "learning_rate": 0.00011186310814453035,
      "loss": 0.8727,
      "step": 1988
    },
    {
      "epoch": 0.4789020646481671,
      "grad_norm": 0.5881559252738953,
      "learning_rate": 0.00011178568066949688,
      "loss": 0.1987,
      "step": 1989
    },
    {
      "epoch": 0.4791428399446217,
      "grad_norm": 2.1437673568725586,
      "learning_rate": 0.00011170824602874301,
      "loss": 0.7281,
      "step": 1990
    },
    {
      "epoch": 0.47938361524107626,
      "grad_norm": 2.513075113296509,
      "learning_rate": 0.0001116308042693492,
      "loss": 0.6271,
      "step": 1991
    },
    {
      "epoch": 0.47962439053753086,
      "grad_norm": 2.7916321754455566,
      "learning_rate": 0.00011155335543840017,
      "loss": 0.4875,
      "step": 1992
    },
    {
      "epoch": 0.4798651658339854,
      "grad_norm": 1.7488362789154053,
      "learning_rate": 0.000111475899582985,
      "loss": 0.4013,
      "step": 1993
    },
    {
      "epoch": 0.48010594113044003,
      "grad_norm": 8.35679817199707,
      "learning_rate": 0.00011139843675019704,
      "loss": 0.7598,
      "step": 1994
    },
    {
      "epoch": 0.4803467164268946,
      "grad_norm": 1.7272447347640991,
      "learning_rate": 0.00011132096698713385,
      "loss": 0.4311,
      "step": 1995
    },
    {
      "epoch": 0.4805874917233492,
      "grad_norm": 2.1946487426757812,
      "learning_rate": 0.00011124349034089723,
      "loss": 0.6132,
      "step": 1996
    },
    {
      "epoch": 0.48082826701980375,
      "grad_norm": 1.6528022289276123,
      "learning_rate": 0.00011116600685859313,
      "loss": 0.738,
      "step": 1997
    },
    {
      "epoch": 0.48106904231625836,
      "grad_norm": 2.6232638359069824,
      "learning_rate": 0.0001110885165873317,
      "loss": 0.7694,
      "step": 1998
    },
    {
      "epoch": 0.4813098176127129,
      "grad_norm": 1.2000987529754639,
      "learning_rate": 0.00011101101957422723,
      "loss": 0.4693,
      "step": 1999
    },
    {
      "epoch": 0.4815505929091675,
      "grad_norm": 1.4360319375991821,
      "learning_rate": 0.00011093351586639806,
      "loss": 0.7783,
      "step": 2000
    },
    {
      "epoch": 0.48179136820562213,
      "grad_norm": 1.4586645364761353,
      "learning_rate": 0.00011085600551096657,
      "loss": 0.7863,
      "step": 2001
    },
    {
      "epoch": 0.4820321435020767,
      "grad_norm": 1.3981388807296753,
      "learning_rate": 0.0001107784885550593,
      "loss": 0.7304,
      "step": 2002
    },
    {
      "epoch": 0.4822729187985313,
      "grad_norm": 2.226198196411133,
      "learning_rate": 0.00011070096504580669,
      "loss": 0.5331,
      "step": 2003
    },
    {
      "epoch": 0.48251369409498585,
      "grad_norm": 1.673223614692688,
      "learning_rate": 0.00011062343503034325,
      "loss": 0.5965,
      "step": 2004
    },
    {
      "epoch": 0.48275446939144045,
      "grad_norm": 3.5956525802612305,
      "learning_rate": 0.00011054589855580732,
      "loss": 0.9231,
      "step": 2005
    },
    {
      "epoch": 0.482995244687895,
      "grad_norm": 2.030714273452759,
      "learning_rate": 0.00011046835566934138,
      "loss": 0.71,
      "step": 2006
    },
    {
      "epoch": 0.4832360199843496,
      "grad_norm": 6.168741226196289,
      "learning_rate": 0.00011039080641809154,
      "loss": 0.9501,
      "step": 2007
    },
    {
      "epoch": 0.4834767952808042,
      "grad_norm": 2.15983510017395,
      "learning_rate": 0.00011031325084920802,
      "loss": 1.0474,
      "step": 2008
    },
    {
      "epoch": 0.4837175705772588,
      "grad_norm": 3.2638587951660156,
      "learning_rate": 0.00011023568900984473,
      "loss": 0.4585,
      "step": 2009
    },
    {
      "epoch": 0.48395834587371334,
      "grad_norm": 0.8049036264419556,
      "learning_rate": 0.0001101581209471595,
      "loss": 0.3537,
      "step": 2010
    },
    {
      "epoch": 0.48419912117016795,
      "grad_norm": 2.6150450706481934,
      "learning_rate": 0.00011008054670831381,
      "loss": 0.4149,
      "step": 2011
    },
    {
      "epoch": 0.4844398964666225,
      "grad_norm": 1.5464622974395752,
      "learning_rate": 0.00011000296634047302,
      "loss": 0.1812,
      "step": 2012
    },
    {
      "epoch": 0.4846806717630771,
      "grad_norm": 1.843767762184143,
      "learning_rate": 0.00010992537989080618,
      "loss": 0.3838,
      "step": 2013
    },
    {
      "epoch": 0.4849214470595317,
      "grad_norm": 1.0260145664215088,
      "learning_rate": 0.00010984778740648598,
      "loss": 0.2033,
      "step": 2014
    },
    {
      "epoch": 0.4851622223559863,
      "grad_norm": 1.7894840240478516,
      "learning_rate": 0.00010977018893468884,
      "loss": 0.5762,
      "step": 2015
    },
    {
      "epoch": 0.4854029976524409,
      "grad_norm": 2.454301118850708,
      "learning_rate": 0.00010969258452259483,
      "loss": 0.8953,
      "step": 2016
    },
    {
      "epoch": 0.48564377294889544,
      "grad_norm": 0.5999788045883179,
      "learning_rate": 0.0001096149742173876,
      "loss": 0.4977,
      "step": 2017
    },
    {
      "epoch": 0.48588454824535005,
      "grad_norm": 2.6491451263427734,
      "learning_rate": 0.00010953735806625439,
      "loss": 1.0362,
      "step": 2018
    },
    {
      "epoch": 0.4861253235418046,
      "grad_norm": 2.1559669971466064,
      "learning_rate": 0.00010945973611638596,
      "loss": 0.4835,
      "step": 2019
    },
    {
      "epoch": 0.4863660988382592,
      "grad_norm": 4.336763381958008,
      "learning_rate": 0.00010938210841497667,
      "loss": 1.0278,
      "step": 2020
    },
    {
      "epoch": 0.48660687413471376,
      "grad_norm": 0.9082402586936951,
      "learning_rate": 0.00010930447500922433,
      "loss": 0.7064,
      "step": 2021
    },
    {
      "epoch": 0.48684764943116837,
      "grad_norm": 11.172735214233398,
      "learning_rate": 0.00010922683594633021,
      "loss": 0.9112,
      "step": 2022
    },
    {
      "epoch": 0.4870884247276229,
      "grad_norm": 1.7960487604141235,
      "learning_rate": 0.00010914919127349906,
      "loss": 0.4387,
      "step": 2023
    },
    {
      "epoch": 0.48732920002407754,
      "grad_norm": 2.4477851390838623,
      "learning_rate": 0.00010907154103793899,
      "loss": 0.2548,
      "step": 2024
    },
    {
      "epoch": 0.4875699753205321,
      "grad_norm": 1.2202852964401245,
      "learning_rate": 0.00010899388528686154,
      "loss": 0.4231,
      "step": 2025
    },
    {
      "epoch": 0.4878107506169867,
      "grad_norm": 2.1632204055786133,
      "learning_rate": 0.00010891622406748157,
      "loss": 0.5211,
      "step": 2026
    },
    {
      "epoch": 0.4880515259134413,
      "grad_norm": 2.416361093521118,
      "learning_rate": 0.00010883855742701727,
      "loss": 0.5395,
      "step": 2027
    },
    {
      "epoch": 0.48829230120989586,
      "grad_norm": 3.4709837436676025,
      "learning_rate": 0.00010876088541269014,
      "loss": 0.959,
      "step": 2028
    },
    {
      "epoch": 0.48853307650635047,
      "grad_norm": 4.083737373352051,
      "learning_rate": 0.00010868320807172496,
      "loss": 0.4737,
      "step": 2029
    },
    {
      "epoch": 0.488773851802805,
      "grad_norm": 2.2041704654693604,
      "learning_rate": 0.0001086055254513497,
      "loss": 0.7522,
      "step": 2030
    },
    {
      "epoch": 0.48901462709925964,
      "grad_norm": 1.5947551727294922,
      "learning_rate": 0.00010852783759879557,
      "loss": 0.1179,
      "step": 2031
    },
    {
      "epoch": 0.4892554023957142,
      "grad_norm": 2.6516928672790527,
      "learning_rate": 0.00010845014456129698,
      "loss": 0.9625,
      "step": 2032
    },
    {
      "epoch": 0.4894961776921688,
      "grad_norm": 3.6693668365478516,
      "learning_rate": 0.00010837244638609145,
      "loss": 0.4759,
      "step": 2033
    },
    {
      "epoch": 0.48973695298862335,
      "grad_norm": 5.227980613708496,
      "learning_rate": 0.00010829474312041963,
      "loss": 0.66,
      "step": 2034
    },
    {
      "epoch": 0.48997772828507796,
      "grad_norm": 1.135461688041687,
      "learning_rate": 0.00010821703481152534,
      "loss": 0.3187,
      "step": 2035
    },
    {
      "epoch": 0.4902185035815325,
      "grad_norm": 0.8220135569572449,
      "learning_rate": 0.00010813932150665538,
      "loss": 0.4416,
      "step": 2036
    },
    {
      "epoch": 0.4904592788779871,
      "grad_norm": 2.479522943496704,
      "learning_rate": 0.00010806160325305956,
      "loss": 0.6935,
      "step": 2037
    },
    {
      "epoch": 0.4907000541744417,
      "grad_norm": 3.262054920196533,
      "learning_rate": 0.00010798388009799084,
      "loss": 1.5196,
      "step": 2038
    },
    {
      "epoch": 0.4909408294708963,
      "grad_norm": 3.85654354095459,
      "learning_rate": 0.000107906152088705,
      "loss": 0.3237,
      "step": 2039
    },
    {
      "epoch": 0.49118160476735084,
      "grad_norm": 2.709144353866577,
      "learning_rate": 0.0001078284192724609,
      "loss": 0.806,
      "step": 2040
    },
    {
      "epoch": 0.49142238006380545,
      "grad_norm": 4.338006019592285,
      "learning_rate": 0.00010775068169652023,
      "loss": 0.6148,
      "step": 2041
    },
    {
      "epoch": 0.49166315536026006,
      "grad_norm": 1.0794256925582886,
      "learning_rate": 0.00010767293940814762,
      "loss": 0.3614,
      "step": 2042
    },
    {
      "epoch": 0.4919039306567146,
      "grad_norm": 0.8536688089370728,
      "learning_rate": 0.0001075951924546106,
      "loss": 0.3124,
      "step": 2043
    },
    {
      "epoch": 0.4921447059531692,
      "grad_norm": 2.3540027141571045,
      "learning_rate": 0.00010751744088317943,
      "loss": 0.8265,
      "step": 2044
    },
    {
      "epoch": 0.4923854812496238,
      "grad_norm": 1.918283462524414,
      "learning_rate": 0.00010743968474112728,
      "loss": 0.8652,
      "step": 2045
    },
    {
      "epoch": 0.4926262565460784,
      "grad_norm": 2.034250497817993,
      "learning_rate": 0.00010736192407573,
      "loss": 0.8617,
      "step": 2046
    },
    {
      "epoch": 0.49286703184253294,
      "grad_norm": 3.235872268676758,
      "learning_rate": 0.00010728415893426635,
      "loss": 0.4167,
      "step": 2047
    },
    {
      "epoch": 0.49310780713898755,
      "grad_norm": 1.3588740825653076,
      "learning_rate": 0.00010720638936401766,
      "loss": 0.8502,
      "step": 2048
    },
    {
      "epoch": 0.4933485824354421,
      "grad_norm": 2.3015613555908203,
      "learning_rate": 0.00010712861541226797,
      "loss": 0.8856,
      "step": 2049
    },
    {
      "epoch": 0.4935893577318967,
      "grad_norm": 3.480872631072998,
      "learning_rate": 0.00010705083712630401,
      "loss": 0.9697,
      "step": 2050
    },
    {
      "epoch": 0.49383013302835127,
      "grad_norm": 8.409546852111816,
      "learning_rate": 0.00010697305455341526,
      "loss": 0.3575,
      "step": 2051
    },
    {
      "epoch": 0.4940709083248059,
      "grad_norm": 1.3223494291305542,
      "learning_rate": 0.00010689526774089362,
      "loss": 0.3494,
      "step": 2052
    },
    {
      "epoch": 0.49431168362126043,
      "grad_norm": 1.0988234281539917,
      "learning_rate": 0.00010681747673603366,
      "loss": 0.2256,
      "step": 2053
    },
    {
      "epoch": 0.49455245891771504,
      "grad_norm": 1.517215609550476,
      "learning_rate": 0.00010673968158613243,
      "loss": 0.5634,
      "step": 2054
    },
    {
      "epoch": 0.49479323421416965,
      "grad_norm": 3.4470624923706055,
      "learning_rate": 0.00010666188233848967,
      "loss": 0.5364,
      "step": 2055
    },
    {
      "epoch": 0.4950340095106242,
      "grad_norm": 2.27813720703125,
      "learning_rate": 0.00010658407904040743,
      "loss": 0.7642,
      "step": 2056
    },
    {
      "epoch": 0.4952747848070788,
      "grad_norm": 0.9174807667732239,
      "learning_rate": 0.0001065062717391903,
      "loss": 0.4644,
      "step": 2057
    },
    {
      "epoch": 0.49551556010353337,
      "grad_norm": 1.2668373584747314,
      "learning_rate": 0.00010642846048214527,
      "loss": 0.3014,
      "step": 2058
    },
    {
      "epoch": 0.495756335399988,
      "grad_norm": 3.347287893295288,
      "learning_rate": 0.00010635064531658178,
      "loss": 0.283,
      "step": 2059
    },
    {
      "epoch": 0.49599711069644253,
      "grad_norm": 0.6961964964866638,
      "learning_rate": 0.00010627282628981165,
      "loss": 0.2452,
      "step": 2060
    },
    {
      "epoch": 0.49623788599289714,
      "grad_norm": 4.018993377685547,
      "learning_rate": 0.00010619500344914902,
      "loss": 0.7302,
      "step": 2061
    },
    {
      "epoch": 0.4964786612893517,
      "grad_norm": 1.290248990058899,
      "learning_rate": 0.0001061171768419103,
      "loss": 0.6912,
      "step": 2062
    },
    {
      "epoch": 0.4967194365858063,
      "grad_norm": 2.4808475971221924,
      "learning_rate": 0.00010603934651541427,
      "loss": 0.2996,
      "step": 2063
    },
    {
      "epoch": 0.49696021188226086,
      "grad_norm": 2.5349011421203613,
      "learning_rate": 0.00010596151251698199,
      "loss": 0.5265,
      "step": 2064
    },
    {
      "epoch": 0.49720098717871547,
      "grad_norm": 2.832211494445801,
      "learning_rate": 0.00010588367489393666,
      "loss": 0.9041,
      "step": 2065
    },
    {
      "epoch": 0.49744176247517,
      "grad_norm": 1.3861429691314697,
      "learning_rate": 0.00010580583369360373,
      "loss": 0.5222,
      "step": 2066
    },
    {
      "epoch": 0.49768253777162463,
      "grad_norm": 1.224226951599121,
      "learning_rate": 0.00010572798896331082,
      "loss": 0.5713,
      "step": 2067
    },
    {
      "epoch": 0.4979233130680792,
      "grad_norm": 2.4965927600860596,
      "learning_rate": 0.00010565014075038775,
      "loss": 0.3679,
      "step": 2068
    },
    {
      "epoch": 0.4981640883645338,
      "grad_norm": 2.0286030769348145,
      "learning_rate": 0.00010557228910216637,
      "loss": 0.3128,
      "step": 2069
    },
    {
      "epoch": 0.4984048636609884,
      "grad_norm": 1.7408385276794434,
      "learning_rate": 0.00010549443406598063,
      "loss": 0.6847,
      "step": 2070
    },
    {
      "epoch": 0.49864563895744296,
      "grad_norm": 2.918757915496826,
      "learning_rate": 0.00010541657568916661,
      "loss": 0.4012,
      "step": 2071
    },
    {
      "epoch": 0.49888641425389757,
      "grad_norm": 0.9126492142677307,
      "learning_rate": 0.00010533871401906237,
      "loss": 0.3021,
      "step": 2072
    },
    {
      "epoch": 0.4991271895503521,
      "grad_norm": 1.8159611225128174,
      "learning_rate": 0.00010526084910300798,
      "loss": 0.5893,
      "step": 2073
    },
    {
      "epoch": 0.49936796484680673,
      "grad_norm": 1.3606966733932495,
      "learning_rate": 0.00010518298098834547,
      "loss": 0.4645,
      "step": 2074
    },
    {
      "epoch": 0.4996087401432613,
      "grad_norm": 3.8433918952941895,
      "learning_rate": 0.00010510510972241887,
      "loss": 0.3448,
      "step": 2075
    },
    {
      "epoch": 0.4998495154397159,
      "grad_norm": 2.995986223220825,
      "learning_rate": 0.00010502723535257401,
      "loss": 0.6148,
      "step": 2076
    },
    {
      "epoch": 0.5000902907361705,
      "grad_norm": 2.552739381790161,
      "learning_rate": 0.00010494935792615879,
      "loss": 0.5938,
      "step": 2077
    },
    {
      "epoch": 0.500331066032625,
      "grad_norm": 2.2203798294067383,
      "learning_rate": 0.00010487147749052275,
      "loss": 0.8364,
      "step": 2078
    },
    {
      "epoch": 0.5005718413290796,
      "grad_norm": 1.519313097000122,
      "learning_rate": 0.00010479359409301745,
      "loss": 0.6105,
      "step": 2079
    },
    {
      "epoch": 0.5008126166255342,
      "grad_norm": 1.471633791923523,
      "learning_rate": 0.00010471570778099611,
      "loss": 0.8271,
      "step": 2080
    },
    {
      "epoch": 0.5010533919219888,
      "grad_norm": 3.146540880203247,
      "learning_rate": 0.00010463781860181385,
      "loss": 0.6785,
      "step": 2081
    },
    {
      "epoch": 0.5012941672184434,
      "grad_norm": 2.4692275524139404,
      "learning_rate": 0.00010455992660282741,
      "loss": 0.8448,
      "step": 2082
    },
    {
      "epoch": 0.5015349425148979,
      "grad_norm": 2.2308695316314697,
      "learning_rate": 0.00010448203183139533,
      "loss": 0.72,
      "step": 2083
    },
    {
      "epoch": 0.5017757178113526,
      "grad_norm": 0.7109373807907104,
      "learning_rate": 0.00010440413433487781,
      "loss": 0.1728,
      "step": 2084
    },
    {
      "epoch": 0.5020164931078072,
      "grad_norm": 5.810349464416504,
      "learning_rate": 0.00010432623416063667,
      "loss": 1.3146,
      "step": 2085
    },
    {
      "epoch": 0.5022572684042618,
      "grad_norm": 1.7806396484375,
      "learning_rate": 0.0001042483313560354,
      "loss": 0.224,
      "step": 2086
    },
    {
      "epoch": 0.5024980437007163,
      "grad_norm": 4.550583362579346,
      "learning_rate": 0.00010417042596843914,
      "loss": 0.5014,
      "step": 2087
    },
    {
      "epoch": 0.5027388189971709,
      "grad_norm": 0.9690256118774414,
      "learning_rate": 0.00010409251804521447,
      "loss": 0.2506,
      "step": 2088
    },
    {
      "epoch": 0.5029795942936255,
      "grad_norm": 1.3459006547927856,
      "learning_rate": 0.00010401460763372961,
      "loss": 0.212,
      "step": 2089
    },
    {
      "epoch": 0.5032203695900801,
      "grad_norm": 1.2357487678527832,
      "learning_rate": 0.00010393669478135426,
      "loss": 0.6829,
      "step": 2090
    },
    {
      "epoch": 0.5034611448865346,
      "grad_norm": 0.7511969804763794,
      "learning_rate": 0.00010385877953545961,
      "loss": 0.737,
      "step": 2091
    },
    {
      "epoch": 0.5037019201829892,
      "grad_norm": 1.3373340368270874,
      "learning_rate": 0.00010378086194341832,
      "loss": 0.9976,
      "step": 2092
    },
    {
      "epoch": 0.5039426954794438,
      "grad_norm": 2.1753182411193848,
      "learning_rate": 0.00010370294205260443,
      "loss": 0.3736,
      "step": 2093
    },
    {
      "epoch": 0.5041834707758984,
      "grad_norm": 0.6808569431304932,
      "learning_rate": 0.00010362501991039347,
      "loss": 0.4928,
      "step": 2094
    },
    {
      "epoch": 0.5044242460723529,
      "grad_norm": 5.135721683502197,
      "learning_rate": 0.00010354709556416218,
      "loss": 0.6557,
      "step": 2095
    },
    {
      "epoch": 0.5046650213688075,
      "grad_norm": 3.574115037918091,
      "learning_rate": 0.00010346916906128883,
      "loss": 0.6108,
      "step": 2096
    },
    {
      "epoch": 0.5049057966652621,
      "grad_norm": 5.21065092086792,
      "learning_rate": 0.0001033912404491529,
      "loss": 0.5354,
      "step": 2097
    },
    {
      "epoch": 0.5051465719617168,
      "grad_norm": 4.044327259063721,
      "learning_rate": 0.00010331330977513509,
      "loss": 0.4002,
      "step": 2098
    },
    {
      "epoch": 0.5053873472581714,
      "grad_norm": 1.0108164548873901,
      "learning_rate": 0.00010323537708661748,
      "loss": 0.5534,
      "step": 2099
    },
    {
      "epoch": 0.5056281225546259,
      "grad_norm": 0.433327317237854,
      "learning_rate": 0.00010315744243098333,
      "loss": 0.4697,
      "step": 2100
    },
    {
      "epoch": 0.5058688978510805,
      "grad_norm": 1.2929291725158691,
      "learning_rate": 0.00010307950585561706,
      "loss": 0.6741,
      "step": 2101
    },
    {
      "epoch": 0.5061096731475351,
      "grad_norm": 1.6541675329208374,
      "learning_rate": 0.00010300156740790427,
      "loss": 0.3582,
      "step": 2102
    },
    {
      "epoch": 0.5063504484439897,
      "grad_norm": 2.3018059730529785,
      "learning_rate": 0.00010292362713523176,
      "loss": 1.1002,
      "step": 2103
    },
    {
      "epoch": 0.5065912237404442,
      "grad_norm": 1.3195204734802246,
      "learning_rate": 0.00010284568508498735,
      "loss": 0.4559,
      "step": 2104
    },
    {
      "epoch": 0.5068319990368988,
      "grad_norm": 1.7798513174057007,
      "learning_rate": 0.00010276774130456001,
      "loss": 0.3002,
      "step": 2105
    },
    {
      "epoch": 0.5070727743333534,
      "grad_norm": 1.1935960054397583,
      "learning_rate": 0.00010268979584133971,
      "loss": 0.5571,
      "step": 2106
    },
    {
      "epoch": 0.507313549629808,
      "grad_norm": 2.004664421081543,
      "learning_rate": 0.00010261184874271748,
      "loss": 0.5307,
      "step": 2107
    },
    {
      "epoch": 0.5075543249262625,
      "grad_norm": 1.2251675128936768,
      "learning_rate": 0.00010253390005608534,
      "loss": 0.4798,
      "step": 2108
    },
    {
      "epoch": 0.5077951002227171,
      "grad_norm": 1.0275200605392456,
      "learning_rate": 0.00010245594982883626,
      "loss": 0.8242,
      "step": 2109
    },
    {
      "epoch": 0.5080358755191717,
      "grad_norm": 0.9734987616539001,
      "learning_rate": 0.00010237799810836413,
      "loss": 0.5406,
      "step": 2110
    },
    {
      "epoch": 0.5082766508156263,
      "grad_norm": 2.428023099899292,
      "learning_rate": 0.0001023000449420638,
      "loss": 0.3063,
      "step": 2111
    },
    {
      "epoch": 0.508517426112081,
      "grad_norm": 2.5370419025421143,
      "learning_rate": 0.00010222209037733097,
      "loss": 0.7001,
      "step": 2112
    },
    {
      "epoch": 0.5087582014085354,
      "grad_norm": 8.222167015075684,
      "learning_rate": 0.0001021441344615622,
      "loss": 1.3225,
      "step": 2113
    },
    {
      "epoch": 0.5089989767049901,
      "grad_norm": 8.197820663452148,
      "learning_rate": 0.00010206617724215481,
      "loss": 0.1596,
      "step": 2114
    },
    {
      "epoch": 0.5092397520014447,
      "grad_norm": 4.041478157043457,
      "learning_rate": 0.00010198821876650701,
      "loss": 0.4862,
      "step": 2115
    },
    {
      "epoch": 0.5094805272978993,
      "grad_norm": 1.4023808240890503,
      "learning_rate": 0.00010191025908201774,
      "loss": 0.337,
      "step": 2116
    },
    {
      "epoch": 0.5097213025943538,
      "grad_norm": 0.8638352751731873,
      "learning_rate": 0.00010183229823608665,
      "loss": 0.3498,
      "step": 2117
    },
    {
      "epoch": 0.5099620778908084,
      "grad_norm": 3.4716315269470215,
      "learning_rate": 0.00010175433627611408,
      "loss": 1.0205,
      "step": 2118
    },
    {
      "epoch": 0.510202853187263,
      "grad_norm": 9.713912010192871,
      "learning_rate": 0.0001016763732495011,
      "loss": 0.7581,
      "step": 2119
    },
    {
      "epoch": 0.5104436284837176,
      "grad_norm": 3.348017930984497,
      "learning_rate": 0.00010159840920364943,
      "loss": 0.1819,
      "step": 2120
    },
    {
      "epoch": 0.5106844037801721,
      "grad_norm": 9.675308227539062,
      "learning_rate": 0.00010152044418596136,
      "loss": 0.5749,
      "step": 2121
    },
    {
      "epoch": 0.5109251790766267,
      "grad_norm": 0.7371659278869629,
      "learning_rate": 0.00010144247824383979,
      "loss": 0.2887,
      "step": 2122
    },
    {
      "epoch": 0.5111659543730813,
      "grad_norm": 1.746598720550537,
      "learning_rate": 0.00010136451142468819,
      "loss": 0.9139,
      "step": 2123
    },
    {
      "epoch": 0.5114067296695359,
      "grad_norm": 0.3207070827484131,
      "learning_rate": 0.00010128654377591056,
      "loss": 0.3856,
      "step": 2124
    },
    {
      "epoch": 0.5116475049659905,
      "grad_norm": 1.747492790222168,
      "learning_rate": 0.00010120857534491144,
      "loss": 0.4888,
      "step": 2125
    },
    {
      "epoch": 0.511888280262445,
      "grad_norm": 1.8366111516952515,
      "learning_rate": 0.0001011306061790958,
      "loss": 0.8371,
      "step": 2126
    },
    {
      "epoch": 0.5121290555588996,
      "grad_norm": 2.3959193229675293,
      "learning_rate": 0.00010105263632586904,
      "loss": 1.0204,
      "step": 2127
    },
    {
      "epoch": 0.5123698308553543,
      "grad_norm": 1.2648195028305054,
      "learning_rate": 0.00010097466583263699,
      "loss": 0.3782,
      "step": 2128
    },
    {
      "epoch": 0.5126106061518089,
      "grad_norm": 3.5460050106048584,
      "learning_rate": 0.00010089669474680596,
      "loss": 0.697,
      "step": 2129
    },
    {
      "epoch": 0.5128513814482634,
      "grad_norm": 0.989863932132721,
      "learning_rate": 0.00010081872311578249,
      "loss": 0.217,
      "step": 2130
    },
    {
      "epoch": 0.513092156744718,
      "grad_norm": 5.3702921867370605,
      "learning_rate": 0.00010074075098697351,
      "loss": 0.7093,
      "step": 2131
    },
    {
      "epoch": 0.5133329320411726,
      "grad_norm": 8.320046424865723,
      "learning_rate": 0.00010066277840778626,
      "loss": 0.8629,
      "step": 2132
    },
    {
      "epoch": 0.5135737073376272,
      "grad_norm": 3.336007833480835,
      "learning_rate": 0.00010058480542562828,
      "loss": 1.1258,
      "step": 2133
    },
    {
      "epoch": 0.5138144826340817,
      "grad_norm": 0.6159772276878357,
      "learning_rate": 0.00010050683208790726,
      "loss": 0.3306,
      "step": 2134
    },
    {
      "epoch": 0.5140552579305363,
      "grad_norm": 1.654181957244873,
      "learning_rate": 0.00010042885844203119,
      "loss": 0.7766,
      "step": 2135
    },
    {
      "epoch": 0.5142960332269909,
      "grad_norm": 1.8773746490478516,
      "learning_rate": 0.00010035088453540822,
      "loss": 0.2017,
      "step": 2136
    },
    {
      "epoch": 0.5145368085234455,
      "grad_norm": 1.3991271257400513,
      "learning_rate": 0.00010027291041544664,
      "loss": 0.643,
      "step": 2137
    },
    {
      "epoch": 0.5147775838199001,
      "grad_norm": 2.1096439361572266,
      "learning_rate": 0.00010019493612955495,
      "loss": 0.6112,
      "step": 2138
    },
    {
      "epoch": 0.5150183591163546,
      "grad_norm": 2.802321195602417,
      "learning_rate": 0.00010011696172514162,
      "loss": 0.7492,
      "step": 2139
    },
    {
      "epoch": 0.5152591344128092,
      "grad_norm": 2.361962080001831,
      "learning_rate": 0.00010003898724961533,
      "loss": 0.2983,
      "step": 2140
    },
    {
      "epoch": 0.5154999097092638,
      "grad_norm": 2.6102824211120605,
      "learning_rate": 9.99610127503847e-05,
      "loss": 0.8425,
      "step": 2141
    },
    {
      "epoch": 0.5157406850057185,
      "grad_norm": 0.7321549654006958,
      "learning_rate": 9.988303827485839e-05,
      "loss": 0.2544,
      "step": 2142
    },
    {
      "epoch": 0.515981460302173,
      "grad_norm": 3.4591763019561768,
      "learning_rate": 9.980506387044508e-05,
      "loss": 0.6845,
      "step": 2143
    },
    {
      "epoch": 0.5162222355986276,
      "grad_norm": 6.815724849700928,
      "learning_rate": 9.972708958455337e-05,
      "loss": 0.6039,
      "step": 2144
    },
    {
      "epoch": 0.5164630108950822,
      "grad_norm": 3.7558867931365967,
      "learning_rate": 9.964911546459181e-05,
      "loss": 0.6514,
      "step": 2145
    },
    {
      "epoch": 0.5167037861915368,
      "grad_norm": 1.1329708099365234,
      "learning_rate": 9.957114155796884e-05,
      "loss": 1.0924,
      "step": 2146
    },
    {
      "epoch": 0.5169445614879913,
      "grad_norm": 2.772102117538452,
      "learning_rate": 9.949316791209275e-05,
      "loss": 0.3061,
      "step": 2147
    },
    {
      "epoch": 0.5171853367844459,
      "grad_norm": 1.8187817335128784,
      "learning_rate": 9.941519457437173e-05,
      "loss": 0.4169,
      "step": 2148
    },
    {
      "epoch": 0.5174261120809005,
      "grad_norm": 0.46912047266960144,
      "learning_rate": 9.933722159221376e-05,
      "loss": 0.336,
      "step": 2149
    },
    {
      "epoch": 0.5176668873773551,
      "grad_norm": 1.7679054737091064,
      "learning_rate": 9.925924901302651e-05,
      "loss": 0.4573,
      "step": 2150
    },
    {
      "epoch": 0.5179076626738097,
      "grad_norm": 1.68385648727417,
      "learning_rate": 9.918127688421755e-05,
      "loss": 0.529,
      "step": 2151
    },
    {
      "epoch": 0.5181484379702642,
      "grad_norm": 0.7433429956436157,
      "learning_rate": 9.910330525319406e-05,
      "loss": 0.3717,
      "step": 2152
    },
    {
      "epoch": 0.5183892132667188,
      "grad_norm": 1.197072148323059,
      "learning_rate": 9.902533416736302e-05,
      "loss": 0.2179,
      "step": 2153
    },
    {
      "epoch": 0.5186299885631734,
      "grad_norm": 1.157617211341858,
      "learning_rate": 9.894736367413102e-05,
      "loss": 0.6772,
      "step": 2154
    },
    {
      "epoch": 0.518870763859628,
      "grad_norm": 2.84462308883667,
      "learning_rate": 9.886939382090422e-05,
      "loss": 0.4376,
      "step": 2155
    },
    {
      "epoch": 0.5191115391560825,
      "grad_norm": 1.1269418001174927,
      "learning_rate": 9.879142465508856e-05,
      "loss": 0.5879,
      "step": 2156
    },
    {
      "epoch": 0.5193523144525372,
      "grad_norm": 1.6317634582519531,
      "learning_rate": 9.871345622408946e-05,
      "loss": 0.8341,
      "step": 2157
    },
    {
      "epoch": 0.5195930897489918,
      "grad_norm": 2.172504425048828,
      "learning_rate": 9.863548857531183e-05,
      "loss": 0.4717,
      "step": 2158
    },
    {
      "epoch": 0.5198338650454464,
      "grad_norm": 0.8946624994277954,
      "learning_rate": 9.855752175616025e-05,
      "loss": 0.9934,
      "step": 2159
    },
    {
      "epoch": 0.5200746403419009,
      "grad_norm": 1.0163549184799194,
      "learning_rate": 9.847955581403866e-05,
      "loss": 0.6364,
      "step": 2160
    },
    {
      "epoch": 0.5203154156383555,
      "grad_norm": 1.2340433597564697,
      "learning_rate": 9.840159079635057e-05,
      "loss": 0.681,
      "step": 2161
    },
    {
      "epoch": 0.5205561909348101,
      "grad_norm": 2.015260934829712,
      "learning_rate": 9.832362675049893e-05,
      "loss": 0.7061,
      "step": 2162
    },
    {
      "epoch": 0.5207969662312647,
      "grad_norm": 1.6834375858306885,
      "learning_rate": 9.824566372388596e-05,
      "loss": 0.6874,
      "step": 2163
    },
    {
      "epoch": 0.5210377415277193,
      "grad_norm": 2.863741874694824,
      "learning_rate": 9.81677017639134e-05,
      "loss": 0.1785,
      "step": 2164
    },
    {
      "epoch": 0.5212785168241738,
      "grad_norm": 0.741033673286438,
      "learning_rate": 9.808974091798227e-05,
      "loss": 0.3825,
      "step": 2165
    },
    {
      "epoch": 0.5215192921206284,
      "grad_norm": 2.9215714931488037,
      "learning_rate": 9.801178123349298e-05,
      "loss": 0.5243,
      "step": 2166
    },
    {
      "epoch": 0.521760067417083,
      "grad_norm": 2.389853000640869,
      "learning_rate": 9.793382275784521e-05,
      "loss": 0.5792,
      "step": 2167
    },
    {
      "epoch": 0.5220008427135376,
      "grad_norm": 4.854155540466309,
      "learning_rate": 9.785586553843781e-05,
      "loss": 0.7133,
      "step": 2168
    },
    {
      "epoch": 0.5222416180099921,
      "grad_norm": 1.7137115001678467,
      "learning_rate": 9.777790962266903e-05,
      "loss": 0.8245,
      "step": 2169
    },
    {
      "epoch": 0.5224823933064467,
      "grad_norm": 5.3910603523254395,
      "learning_rate": 9.769995505793622e-05,
      "loss": 0.5916,
      "step": 2170
    },
    {
      "epoch": 0.5227231686029014,
      "grad_norm": 9.350793838500977,
      "learning_rate": 9.762200189163588e-05,
      "loss": 0.7286,
      "step": 2171
    },
    {
      "epoch": 0.522963943899356,
      "grad_norm": 2.6609160900115967,
      "learning_rate": 9.754405017116379e-05,
      "loss": 0.5725,
      "step": 2172
    },
    {
      "epoch": 0.5232047191958105,
      "grad_norm": 2.54089617729187,
      "learning_rate": 9.746609994391468e-05,
      "loss": 0.7312,
      "step": 2173
    },
    {
      "epoch": 0.5234454944922651,
      "grad_norm": 1.6947931051254272,
      "learning_rate": 9.738815125728252e-05,
      "loss": 1.0029,
      "step": 2174
    },
    {
      "epoch": 0.5236862697887197,
      "grad_norm": 1.9103237390518188,
      "learning_rate": 9.73102041586603e-05,
      "loss": 0.6121,
      "step": 2175
    },
    {
      "epoch": 0.5239270450851743,
      "grad_norm": 3.6913580894470215,
      "learning_rate": 9.723225869544001e-05,
      "loss": 0.8657,
      "step": 2176
    },
    {
      "epoch": 0.5241678203816288,
      "grad_norm": 1.9038362503051758,
      "learning_rate": 9.715431491501269e-05,
      "loss": 0.5313,
      "step": 2177
    },
    {
      "epoch": 0.5244085956780834,
      "grad_norm": 3.199769973754883,
      "learning_rate": 9.707637286476827e-05,
      "loss": 0.7072,
      "step": 2178
    },
    {
      "epoch": 0.524649370974538,
      "grad_norm": 1.5751662254333496,
      "learning_rate": 9.699843259209574e-05,
      "loss": 0.2701,
      "step": 2179
    },
    {
      "epoch": 0.5248901462709926,
      "grad_norm": 1.8176679611206055,
      "learning_rate": 9.692049414438299e-05,
      "loss": 0.2336,
      "step": 2180
    },
    {
      "epoch": 0.5251309215674472,
      "grad_norm": 7.185880661010742,
      "learning_rate": 9.68425575690167e-05,
      "loss": 0.4916,
      "step": 2181
    },
    {
      "epoch": 0.5253716968639017,
      "grad_norm": 3.68613338470459,
      "learning_rate": 9.676462291338253e-05,
      "loss": 0.5863,
      "step": 2182
    },
    {
      "epoch": 0.5256124721603563,
      "grad_norm": 1.8995952606201172,
      "learning_rate": 9.668669022486494e-05,
      "loss": 0.1889,
      "step": 2183
    },
    {
      "epoch": 0.525853247456811,
      "grad_norm": 1.6753265857696533,
      "learning_rate": 9.660875955084713e-05,
      "loss": 0.539,
      "step": 2184
    },
    {
      "epoch": 0.5260940227532656,
      "grad_norm": 0.9983983039855957,
      "learning_rate": 9.65308309387112e-05,
      "loss": 0.3609,
      "step": 2185
    },
    {
      "epoch": 0.52633479804972,
      "grad_norm": 3.3040006160736084,
      "learning_rate": 9.645290443583785e-05,
      "loss": 1.2302,
      "step": 2186
    },
    {
      "epoch": 0.5265755733461747,
      "grad_norm": 2.018064498901367,
      "learning_rate": 9.637498008960657e-05,
      "loss": 0.443,
      "step": 2187
    },
    {
      "epoch": 0.5268163486426293,
      "grad_norm": 2.3584113121032715,
      "learning_rate": 9.629705794739558e-05,
      "loss": 0.8664,
      "step": 2188
    },
    {
      "epoch": 0.5270571239390839,
      "grad_norm": 0.6062427163124084,
      "learning_rate": 9.62191380565817e-05,
      "loss": 0.2761,
      "step": 2189
    },
    {
      "epoch": 0.5272978992355384,
      "grad_norm": 4.201809406280518,
      "learning_rate": 9.614122046454044e-05,
      "loss": 1.1502,
      "step": 2190
    },
    {
      "epoch": 0.527538674531993,
      "grad_norm": 6.053175449371338,
      "learning_rate": 9.606330521864576e-05,
      "loss": 0.465,
      "step": 2191
    },
    {
      "epoch": 0.5277794498284476,
      "grad_norm": 1.6828287839889526,
      "learning_rate": 9.59853923662704e-05,
      "loss": 0.7583,
      "step": 2192
    },
    {
      "epoch": 0.5280202251249022,
      "grad_norm": 2.127516746520996,
      "learning_rate": 9.590748195478557e-05,
      "loss": 0.581,
      "step": 2193
    },
    {
      "epoch": 0.5282610004213568,
      "grad_norm": 2.426520824432373,
      "learning_rate": 9.582957403156089e-05,
      "loss": 0.729,
      "step": 2194
    },
    {
      "epoch": 0.5285017757178113,
      "grad_norm": 0.5099361538887024,
      "learning_rate": 9.575166864396459e-05,
      "loss": 0.2235,
      "step": 2195
    },
    {
      "epoch": 0.5287425510142659,
      "grad_norm": 2.9863169193267822,
      "learning_rate": 9.567376583936335e-05,
      "loss": 0.5938,
      "step": 2196
    },
    {
      "epoch": 0.5289833263107205,
      "grad_norm": 1.6381510496139526,
      "learning_rate": 9.559586566512221e-05,
      "loss": 0.7708,
      "step": 2197
    },
    {
      "epoch": 0.5292241016071751,
      "grad_norm": 2.1702208518981934,
      "learning_rate": 9.551796816860471e-05,
      "loss": 0.2262,
      "step": 2198
    },
    {
      "epoch": 0.5294648769036296,
      "grad_norm": 1.5045363903045654,
      "learning_rate": 9.544007339717261e-05,
      "loss": 0.6521,
      "step": 2199
    },
    {
      "epoch": 0.5297056522000843,
      "grad_norm": 1.3283405303955078,
      "learning_rate": 9.536218139818614e-05,
      "loss": 0.386,
      "step": 2200
    },
    {
      "epoch": 0.5299464274965389,
      "grad_norm": 2.6849524974823,
      "learning_rate": 9.52842922190039e-05,
      "loss": 0.5514,
      "step": 2201
    },
    {
      "epoch": 0.5301872027929935,
      "grad_norm": 1.1004747152328491,
      "learning_rate": 9.520640590698258e-05,
      "loss": 0.5606,
      "step": 2202
    },
    {
      "epoch": 0.530427978089448,
      "grad_norm": 2.8887600898742676,
      "learning_rate": 9.512852250947727e-05,
      "loss": 0.7519,
      "step": 2203
    },
    {
      "epoch": 0.5306687533859026,
      "grad_norm": 2.1143975257873535,
      "learning_rate": 9.505064207384124e-05,
      "loss": 0.3216,
      "step": 2204
    },
    {
      "epoch": 0.5309095286823572,
      "grad_norm": 1.3769932985305786,
      "learning_rate": 9.497276464742598e-05,
      "loss": 0.2864,
      "step": 2205
    },
    {
      "epoch": 0.5311503039788118,
      "grad_norm": 1.131319284439087,
      "learning_rate": 9.489489027758118e-05,
      "loss": 0.5236,
      "step": 2206
    },
    {
      "epoch": 0.5313910792752664,
      "grad_norm": 1.2855147123336792,
      "learning_rate": 9.481701901165455e-05,
      "loss": 0.8535,
      "step": 2207
    },
    {
      "epoch": 0.5316318545717209,
      "grad_norm": 4.562783718109131,
      "learning_rate": 9.473915089699203e-05,
      "loss": 1.103,
      "step": 2208
    },
    {
      "epoch": 0.5318726298681755,
      "grad_norm": 1.491631269454956,
      "learning_rate": 9.466128598093767e-05,
      "loss": 0.328,
      "step": 2209
    },
    {
      "epoch": 0.5321134051646301,
      "grad_norm": 1.7544147968292236,
      "learning_rate": 9.458342431083342e-05,
      "loss": 0.0794,
      "step": 2210
    },
    {
      "epoch": 0.5323541804610847,
      "grad_norm": 1.3631882667541504,
      "learning_rate": 9.45055659340194e-05,
      "loss": 0.2153,
      "step": 2211
    },
    {
      "epoch": 0.5325949557575392,
      "grad_norm": 6.174732208251953,
      "learning_rate": 9.442771089783366e-05,
      "loss": 0.7058,
      "step": 2212
    },
    {
      "epoch": 0.5328357310539938,
      "grad_norm": 1.6120647192001343,
      "learning_rate": 9.434985924961226e-05,
      "loss": 0.5721,
      "step": 2213
    },
    {
      "epoch": 0.5330765063504485,
      "grad_norm": 0.5557000637054443,
      "learning_rate": 9.42720110366892e-05,
      "loss": 0.255,
      "step": 2214
    },
    {
      "epoch": 0.5333172816469031,
      "grad_norm": 3.7805826663970947,
      "learning_rate": 9.41941663063963e-05,
      "loss": 0.6903,
      "step": 2215
    },
    {
      "epoch": 0.5335580569433576,
      "grad_norm": 4.721010684967041,
      "learning_rate": 9.411632510606337e-05,
      "loss": 1.1333,
      "step": 2216
    },
    {
      "epoch": 0.5337988322398122,
      "grad_norm": 3.89003849029541,
      "learning_rate": 9.403848748301802e-05,
      "loss": 0.9563,
      "step": 2217
    },
    {
      "epoch": 0.5340396075362668,
      "grad_norm": 1.9357439279556274,
      "learning_rate": 9.396065348458571e-05,
      "loss": 0.6106,
      "step": 2218
    },
    {
      "epoch": 0.5342803828327214,
      "grad_norm": 1.0858145952224731,
      "learning_rate": 9.388282315808971e-05,
      "loss": 0.4984,
      "step": 2219
    },
    {
      "epoch": 0.534521158129176,
      "grad_norm": 2.763885259628296,
      "learning_rate": 9.3804996550851e-05,
      "loss": 0.3943,
      "step": 2220
    },
    {
      "epoch": 0.5347619334256305,
      "grad_norm": 0.8865588903427124,
      "learning_rate": 9.372717371018834e-05,
      "loss": 0.2669,
      "step": 2221
    },
    {
      "epoch": 0.5350027087220851,
      "grad_norm": 1.0072959661483765,
      "learning_rate": 9.364935468341824e-05,
      "loss": 0.2614,
      "step": 2222
    },
    {
      "epoch": 0.5352434840185397,
      "grad_norm": 1.3582466840744019,
      "learning_rate": 9.357153951785475e-05,
      "loss": 0.8149,
      "step": 2223
    },
    {
      "epoch": 0.5354842593149943,
      "grad_norm": 1.8487718105316162,
      "learning_rate": 9.349372826080974e-05,
      "loss": 1.013,
      "step": 2224
    },
    {
      "epoch": 0.5357250346114488,
      "grad_norm": 2.25203275680542,
      "learning_rate": 9.341592095959259e-05,
      "loss": 0.4711,
      "step": 2225
    },
    {
      "epoch": 0.5359658099079034,
      "grad_norm": 4.066526889801025,
      "learning_rate": 9.333811766151033e-05,
      "loss": 1.3851,
      "step": 2226
    },
    {
      "epoch": 0.536206585204358,
      "grad_norm": 3.2181577682495117,
      "learning_rate": 9.326031841386759e-05,
      "loss": 0.7188,
      "step": 2227
    },
    {
      "epoch": 0.5364473605008127,
      "grad_norm": 4.251607894897461,
      "learning_rate": 9.318252326396635e-05,
      "loss": 0.9096,
      "step": 2228
    },
    {
      "epoch": 0.5366881357972672,
      "grad_norm": 3.6044514179229736,
      "learning_rate": 9.310473225910641e-05,
      "loss": 0.4364,
      "step": 2229
    },
    {
      "epoch": 0.5369289110937218,
      "grad_norm": 0.8138754367828369,
      "learning_rate": 9.302694544658475e-05,
      "loss": 0.3227,
      "step": 2230
    },
    {
      "epoch": 0.5371696863901764,
      "grad_norm": 1.5204187631607056,
      "learning_rate": 9.294916287369597e-05,
      "loss": 0.3241,
      "step": 2231
    },
    {
      "epoch": 0.537410461686631,
      "grad_norm": 2.078233242034912,
      "learning_rate": 9.287138458773208e-05,
      "loss": 0.5936,
      "step": 2232
    },
    {
      "epoch": 0.5376512369830856,
      "grad_norm": 6.410951614379883,
      "learning_rate": 9.279361063598238e-05,
      "loss": 0.4392,
      "step": 2233
    },
    {
      "epoch": 0.5378920122795401,
      "grad_norm": 1.241186499595642,
      "learning_rate": 9.271584106573364e-05,
      "loss": 0.4729,
      "step": 2234
    },
    {
      "epoch": 0.5381327875759947,
      "grad_norm": 1.808719515800476,
      "learning_rate": 9.263807592427001e-05,
      "loss": 0.5305,
      "step": 2235
    },
    {
      "epoch": 0.5383735628724493,
      "grad_norm": 0.6988890171051025,
      "learning_rate": 9.256031525887273e-05,
      "loss": 0.5642,
      "step": 2236
    },
    {
      "epoch": 0.5386143381689039,
      "grad_norm": 2.4080259799957275,
      "learning_rate": 9.24825591168206e-05,
      "loss": 0.7976,
      "step": 2237
    },
    {
      "epoch": 0.5388551134653584,
      "grad_norm": 4.949229717254639,
      "learning_rate": 9.240480754538942e-05,
      "loss": 1.2054,
      "step": 2238
    },
    {
      "epoch": 0.539095888761813,
      "grad_norm": 1.403643250465393,
      "learning_rate": 9.232706059185236e-05,
      "loss": 0.9002,
      "step": 2239
    },
    {
      "epoch": 0.5393366640582676,
      "grad_norm": 2.1335864067077637,
      "learning_rate": 9.224931830347978e-05,
      "loss": 0.9663,
      "step": 2240
    },
    {
      "epoch": 0.5395774393547222,
      "grad_norm": 2.4091343879699707,
      "learning_rate": 9.21715807275391e-05,
      "loss": 0.9484,
      "step": 2241
    },
    {
      "epoch": 0.5398182146511767,
      "grad_norm": 2.391929864883423,
      "learning_rate": 9.209384791129504e-05,
      "loss": 0.6072,
      "step": 2242
    },
    {
      "epoch": 0.5400589899476314,
      "grad_norm": 5.663161754608154,
      "learning_rate": 9.20161199020092e-05,
      "loss": 0.3371,
      "step": 2243
    },
    {
      "epoch": 0.540299765244086,
      "grad_norm": 1.5023120641708374,
      "learning_rate": 9.193839674694046e-05,
      "loss": 0.7458,
      "step": 2244
    },
    {
      "epoch": 0.5405405405405406,
      "grad_norm": 2.3951783180236816,
      "learning_rate": 9.186067849334467e-05,
      "loss": 0.8693,
      "step": 2245
    },
    {
      "epoch": 0.5407813158369951,
      "grad_norm": 1.6337603330612183,
      "learning_rate": 9.178296518847467e-05,
      "loss": 0.8064,
      "step": 2246
    },
    {
      "epoch": 0.5410220911334497,
      "grad_norm": 4.101715564727783,
      "learning_rate": 9.170525687958035e-05,
      "loss": 0.7042,
      "step": 2247
    },
    {
      "epoch": 0.5412628664299043,
      "grad_norm": 0.9086791276931763,
      "learning_rate": 9.162755361390858e-05,
      "loss": 0.8873,
      "step": 2248
    },
    {
      "epoch": 0.5415036417263589,
      "grad_norm": 1.7184299230575562,
      "learning_rate": 9.154985543870304e-05,
      "loss": 0.8026,
      "step": 2249
    },
    {
      "epoch": 0.5417444170228135,
      "grad_norm": 2.9949686527252197,
      "learning_rate": 9.147216240120446e-05,
      "loss": 0.6126,
      "step": 2250
    },
    {
      "epoch": 0.541985192319268,
      "grad_norm": 2.2674872875213623,
      "learning_rate": 9.139447454865033e-05,
      "loss": 0.8358,
      "step": 2251
    },
    {
      "epoch": 0.5422259676157226,
      "grad_norm": 0.7034595012664795,
      "learning_rate": 9.131679192827506e-05,
      "loss": 0.4057,
      "step": 2252
    },
    {
      "epoch": 0.5424667429121772,
      "grad_norm": 3.044638156890869,
      "learning_rate": 9.123911458730988e-05,
      "loss": 0.7883,
      "step": 2253
    },
    {
      "epoch": 0.5427075182086318,
      "grad_norm": 4.1872239112854,
      "learning_rate": 9.116144257298274e-05,
      "loss": 1.4448,
      "step": 2254
    },
    {
      "epoch": 0.5429482935050863,
      "grad_norm": 1.9178543090820312,
      "learning_rate": 9.108377593251847e-05,
      "loss": 1.2404,
      "step": 2255
    },
    {
      "epoch": 0.5431890688015409,
      "grad_norm": 1.3553639650344849,
      "learning_rate": 9.100611471313849e-05,
      "loss": 0.4571,
      "step": 2256
    },
    {
      "epoch": 0.5434298440979956,
      "grad_norm": 5.682826042175293,
      "learning_rate": 9.092845896206102e-05,
      "loss": 0.6029,
      "step": 2257
    },
    {
      "epoch": 0.5436706193944502,
      "grad_norm": 3.233644485473633,
      "learning_rate": 9.085080872650098e-05,
      "loss": 0.7475,
      "step": 2258
    },
    {
      "epoch": 0.5439113946909047,
      "grad_norm": 0.8178972601890564,
      "learning_rate": 9.077316405366981e-05,
      "loss": 0.4826,
      "step": 2259
    },
    {
      "epoch": 0.5441521699873593,
      "grad_norm": 1.9637796878814697,
      "learning_rate": 9.069552499077569e-05,
      "loss": 0.7773,
      "step": 2260
    },
    {
      "epoch": 0.5443929452838139,
      "grad_norm": 4.2175188064575195,
      "learning_rate": 9.061789158502336e-05,
      "loss": 0.5585,
      "step": 2261
    },
    {
      "epoch": 0.5446337205802685,
      "grad_norm": 1.7888754606246948,
      "learning_rate": 9.054026388361405e-05,
      "loss": 0.5089,
      "step": 2262
    },
    {
      "epoch": 0.5448744958767231,
      "grad_norm": 1.9590795040130615,
      "learning_rate": 9.046264193374568e-05,
      "loss": 0.5263,
      "step": 2263
    },
    {
      "epoch": 0.5451152711731776,
      "grad_norm": 2.484314441680908,
      "learning_rate": 9.038502578261241e-05,
      "loss": 0.5187,
      "step": 2264
    },
    {
      "epoch": 0.5453560464696322,
      "grad_norm": 1.6243886947631836,
      "learning_rate": 9.030741547740517e-05,
      "loss": 0.6487,
      "step": 2265
    },
    {
      "epoch": 0.5455968217660868,
      "grad_norm": 3.200514793395996,
      "learning_rate": 9.022981106531119e-05,
      "loss": 0.4566,
      "step": 2266
    },
    {
      "epoch": 0.5458375970625414,
      "grad_norm": 2.8995554447174072,
      "learning_rate": 9.015221259351405e-05,
      "loss": 1.1906,
      "step": 2267
    },
    {
      "epoch": 0.5460783723589959,
      "grad_norm": 1.6960794925689697,
      "learning_rate": 9.007462010919386e-05,
      "loss": 0.8553,
      "step": 2268
    },
    {
      "epoch": 0.5463191476554505,
      "grad_norm": 0.8978815674781799,
      "learning_rate": 8.999703365952699e-05,
      "loss": 0.9352,
      "step": 2269
    },
    {
      "epoch": 0.5465599229519051,
      "grad_norm": 1.4150447845458984,
      "learning_rate": 8.99194532916862e-05,
      "loss": 0.3387,
      "step": 2270
    },
    {
      "epoch": 0.5468006982483598,
      "grad_norm": 1.1384726762771606,
      "learning_rate": 8.984187905284055e-05,
      "loss": 0.2762,
      "step": 2271
    },
    {
      "epoch": 0.5470414735448142,
      "grad_norm": 1.1837869882583618,
      "learning_rate": 8.976431099015528e-05,
      "loss": 0.43,
      "step": 2272
    },
    {
      "epoch": 0.5472822488412689,
      "grad_norm": 3.328984498977661,
      "learning_rate": 8.968674915079197e-05,
      "loss": 0.9047,
      "step": 2273
    },
    {
      "epoch": 0.5475230241377235,
      "grad_norm": 2.5467495918273926,
      "learning_rate": 8.960919358190848e-05,
      "loss": 0.7412,
      "step": 2274
    },
    {
      "epoch": 0.5477637994341781,
      "grad_norm": 0.357572466135025,
      "learning_rate": 8.953164433065866e-05,
      "loss": 0.2749,
      "step": 2275
    },
    {
      "epoch": 0.5480045747306327,
      "grad_norm": 1.4513580799102783,
      "learning_rate": 8.945410144419269e-05,
      "loss": 0.4484,
      "step": 2276
    },
    {
      "epoch": 0.5482453500270872,
      "grad_norm": 2.9045469760894775,
      "learning_rate": 8.937656496965678e-05,
      "loss": 0.8804,
      "step": 2277
    },
    {
      "epoch": 0.5484861253235418,
      "grad_norm": 2.212029218673706,
      "learning_rate": 8.929903495419331e-05,
      "loss": 0.5796,
      "step": 2278
    },
    {
      "epoch": 0.5487269006199964,
      "grad_norm": 5.107553482055664,
      "learning_rate": 8.922151144494072e-05,
      "loss": 0.6931,
      "step": 2279
    },
    {
      "epoch": 0.548967675916451,
      "grad_norm": 0.923570990562439,
      "learning_rate": 8.914399448903344e-05,
      "loss": 0.2629,
      "step": 2280
    },
    {
      "epoch": 0.5492084512129055,
      "grad_norm": 4.435163974761963,
      "learning_rate": 8.906648413360197e-05,
      "loss": 0.4986,
      "step": 2281
    },
    {
      "epoch": 0.5494492265093601,
      "grad_norm": 0.577694296836853,
      "learning_rate": 8.898898042577279e-05,
      "loss": 0.4683,
      "step": 2282
    },
    {
      "epoch": 0.5496900018058147,
      "grad_norm": 3.198882579803467,
      "learning_rate": 8.891148341266828e-05,
      "loss": 0.4887,
      "step": 2283
    },
    {
      "epoch": 0.5499307771022693,
      "grad_norm": 2.20881724357605,
      "learning_rate": 8.883399314140689e-05,
      "loss": 0.6167,
      "step": 2284
    },
    {
      "epoch": 0.5501715523987238,
      "grad_norm": 2.165309429168701,
      "learning_rate": 8.875650965910279e-05,
      "loss": 0.6205,
      "step": 2285
    },
    {
      "epoch": 0.5504123276951784,
      "grad_norm": 1.3588035106658936,
      "learning_rate": 8.867903301286616e-05,
      "loss": 0.3225,
      "step": 2286
    },
    {
      "epoch": 0.5506531029916331,
      "grad_norm": 1.6632091999053955,
      "learning_rate": 8.8601563249803e-05,
      "loss": 0.5279,
      "step": 2287
    },
    {
      "epoch": 0.5508938782880877,
      "grad_norm": 1.157415509223938,
      "learning_rate": 8.852410041701502e-05,
      "loss": 0.4965,
      "step": 2288
    },
    {
      "epoch": 0.5511346535845423,
      "grad_norm": 3.8233842849731445,
      "learning_rate": 8.844664456159985e-05,
      "loss": 0.7001,
      "step": 2289
    },
    {
      "epoch": 0.5513754288809968,
      "grad_norm": 1.0012489557266235,
      "learning_rate": 8.836919573065082e-05,
      "loss": 0.4657,
      "step": 2290
    },
    {
      "epoch": 0.5516162041774514,
      "grad_norm": 1.7905609607696533,
      "learning_rate": 8.829175397125698e-05,
      "loss": 0.3764,
      "step": 2291
    },
    {
      "epoch": 0.551856979473906,
      "grad_norm": 3.6006107330322266,
      "learning_rate": 8.821431933050313e-05,
      "loss": 0.7817,
      "step": 2292
    },
    {
      "epoch": 0.5520977547703606,
      "grad_norm": 0.9073820114135742,
      "learning_rate": 8.813689185546965e-05,
      "loss": 0.322,
      "step": 2293
    },
    {
      "epoch": 0.5523385300668151,
      "grad_norm": 3.195746660232544,
      "learning_rate": 8.80594715932327e-05,
      "loss": 0.6941,
      "step": 2294
    },
    {
      "epoch": 0.5525793053632697,
      "grad_norm": 1.6812855005264282,
      "learning_rate": 8.798205859086388e-05,
      "loss": 0.7138,
      "step": 2295
    },
    {
      "epoch": 0.5528200806597243,
      "grad_norm": 1.5866107940673828,
      "learning_rate": 8.790465289543051e-05,
      "loss": 0.4609,
      "step": 2296
    },
    {
      "epoch": 0.5530608559561789,
      "grad_norm": 1.2990373373031616,
      "learning_rate": 8.782725455399546e-05,
      "loss": 0.5497,
      "step": 2297
    },
    {
      "epoch": 0.5533016312526334,
      "grad_norm": 0.8197939395904541,
      "learning_rate": 8.774986361361705e-05,
      "loss": 0.3533,
      "step": 2298
    },
    {
      "epoch": 0.553542406549088,
      "grad_norm": 2.288421869277954,
      "learning_rate": 8.767248012134914e-05,
      "loss": 0.1527,
      "step": 2299
    },
    {
      "epoch": 0.5537831818455426,
      "grad_norm": 6.408196449279785,
      "learning_rate": 8.759510412424113e-05,
      "loss": 0.6184,
      "step": 2300
    },
    {
      "epoch": 0.5540239571419973,
      "grad_norm": 4.457020282745361,
      "learning_rate": 8.751773566933774e-05,
      "loss": 0.665,
      "step": 2301
    },
    {
      "epoch": 0.5542647324384519,
      "grad_norm": 2.0285515785217285,
      "learning_rate": 8.744037480367921e-05,
      "loss": 0.9767,
      "step": 2302
    },
    {
      "epoch": 0.5545055077349064,
      "grad_norm": 4.255732536315918,
      "learning_rate": 8.736302157430107e-05,
      "loss": 0.7522,
      "step": 2303
    },
    {
      "epoch": 0.554746283031361,
      "grad_norm": 1.1508095264434814,
      "learning_rate": 8.728567602823429e-05,
      "loss": 0.4259,
      "step": 2304
    },
    {
      "epoch": 0.5549870583278156,
      "grad_norm": 0.9924709796905518,
      "learning_rate": 8.720833821250513e-05,
      "loss": 1.6025,
      "step": 2305
    },
    {
      "epoch": 0.5552278336242702,
      "grad_norm": 1.755651593208313,
      "learning_rate": 8.713100817413516e-05,
      "loss": 0.3882,
      "step": 2306
    },
    {
      "epoch": 0.5554686089207247,
      "grad_norm": 1.430647850036621,
      "learning_rate": 8.705368596014125e-05,
      "loss": 0.5597,
      "step": 2307
    },
    {
      "epoch": 0.5557093842171793,
      "grad_norm": 1.2561583518981934,
      "learning_rate": 8.697637161753538e-05,
      "loss": 0.8822,
      "step": 2308
    },
    {
      "epoch": 0.5559501595136339,
      "grad_norm": 1.0225826501846313,
      "learning_rate": 8.689906519332491e-05,
      "loss": 0.8633,
      "step": 2309
    },
    {
      "epoch": 0.5561909348100885,
      "grad_norm": 1.079167366027832,
      "learning_rate": 8.682176673451239e-05,
      "loss": 0.4746,
      "step": 2310
    },
    {
      "epoch": 0.556431710106543,
      "grad_norm": 1.3175033330917358,
      "learning_rate": 8.674447628809533e-05,
      "loss": 0.4305,
      "step": 2311
    },
    {
      "epoch": 0.5566724854029976,
      "grad_norm": 4.170149326324463,
      "learning_rate": 8.666719390106655e-05,
      "loss": 0.8164,
      "step": 2312
    },
    {
      "epoch": 0.5569132606994522,
      "grad_norm": 0.9638872742652893,
      "learning_rate": 8.658991962041395e-05,
      "loss": 0.5429,
      "step": 2313
    },
    {
      "epoch": 0.5571540359959068,
      "grad_norm": 1.9414424896240234,
      "learning_rate": 8.65126534931204e-05,
      "loss": 0.9223,
      "step": 2314
    },
    {
      "epoch": 0.5573948112923615,
      "grad_norm": 1.570064902305603,
      "learning_rate": 8.643539556616397e-05,
      "loss": 1.0301,
      "step": 2315
    },
    {
      "epoch": 0.557635586588816,
      "grad_norm": 3.4186506271362305,
      "learning_rate": 8.635814588651754e-05,
      "loss": 0.8375,
      "step": 2316
    },
    {
      "epoch": 0.5578763618852706,
      "grad_norm": 2.636807441711426,
      "learning_rate": 8.628090450114916e-05,
      "loss": 0.4639,
      "step": 2317
    },
    {
      "epoch": 0.5581171371817252,
      "grad_norm": 4.225121974945068,
      "learning_rate": 8.620367145702177e-05,
      "loss": 0.5046,
      "step": 2318
    },
    {
      "epoch": 0.5583579124781798,
      "grad_norm": 0.9116895198822021,
      "learning_rate": 8.612644680109319e-05,
      "loss": 0.2553,
      "step": 2319
    },
    {
      "epoch": 0.5585986877746343,
      "grad_norm": 2.3729517459869385,
      "learning_rate": 8.604923058031624e-05,
      "loss": 0.592,
      "step": 2320
    },
    {
      "epoch": 0.5588394630710889,
      "grad_norm": 1.5719141960144043,
      "learning_rate": 8.59720228416385e-05,
      "loss": 0.9508,
      "step": 2321
    },
    {
      "epoch": 0.5590802383675435,
      "grad_norm": 3.1368796825408936,
      "learning_rate": 8.589482363200247e-05,
      "loss": 0.9687,
      "step": 2322
    },
    {
      "epoch": 0.5593210136639981,
      "grad_norm": 2.1668570041656494,
      "learning_rate": 8.581763299834551e-05,
      "loss": 0.0668,
      "step": 2323
    },
    {
      "epoch": 0.5595617889604526,
      "grad_norm": 0.7108801007270813,
      "learning_rate": 8.57404509875996e-05,
      "loss": 0.2144,
      "step": 2324
    },
    {
      "epoch": 0.5598025642569072,
      "grad_norm": 2.860525369644165,
      "learning_rate": 8.56632776466916e-05,
      "loss": 0.619,
      "step": 2325
    },
    {
      "epoch": 0.5600433395533618,
      "grad_norm": 4.221729278564453,
      "learning_rate": 8.558611302254314e-05,
      "loss": 0.828,
      "step": 2326
    },
    {
      "epoch": 0.5602841148498164,
      "grad_norm": 1.6991534233093262,
      "learning_rate": 8.55089571620704e-05,
      "loss": 0.909,
      "step": 2327
    },
    {
      "epoch": 0.5605248901462709,
      "grad_norm": 4.212416648864746,
      "learning_rate": 8.543181011218437e-05,
      "loss": 1.5328,
      "step": 2328
    },
    {
      "epoch": 0.5607656654427255,
      "grad_norm": 4.365540504455566,
      "learning_rate": 8.535467191979058e-05,
      "loss": 0.6489,
      "step": 2329
    },
    {
      "epoch": 0.5610064407391802,
      "grad_norm": 0.9320734143257141,
      "learning_rate": 8.527754263178929e-05,
      "loss": 0.6582,
      "step": 2330
    },
    {
      "epoch": 0.5612472160356348,
      "grad_norm": 4.166979789733887,
      "learning_rate": 8.520042229507528e-05,
      "loss": 0.5757,
      "step": 2331
    },
    {
      "epoch": 0.5614879913320894,
      "grad_norm": 3.1154069900512695,
      "learning_rate": 8.512331095653781e-05,
      "loss": 0.8792,
      "step": 2332
    },
    {
      "epoch": 0.5617287666285439,
      "grad_norm": 4.849252700805664,
      "learning_rate": 8.504620866306083e-05,
      "loss": 0.4272,
      "step": 2333
    },
    {
      "epoch": 0.5619695419249985,
      "grad_norm": 2.375708818435669,
      "learning_rate": 8.496911546152265e-05,
      "loss": 0.971,
      "step": 2334
    },
    {
      "epoch": 0.5622103172214531,
      "grad_norm": 2.0698773860931396,
      "learning_rate": 8.489203139879612e-05,
      "loss": 0.4473,
      "step": 2335
    },
    {
      "epoch": 0.5624510925179077,
      "grad_norm": 6.773448944091797,
      "learning_rate": 8.481495652174859e-05,
      "loss": 0.363,
      "step": 2336
    },
    {
      "epoch": 0.5626918678143622,
      "grad_norm": 5.320286750793457,
      "learning_rate": 8.473789087724165e-05,
      "loss": 0.5259,
      "step": 2337
    },
    {
      "epoch": 0.5629326431108168,
      "grad_norm": 2.9927375316619873,
      "learning_rate": 8.466083451213144e-05,
      "loss": 0.5302,
      "step": 2338
    },
    {
      "epoch": 0.5631734184072714,
      "grad_norm": 1.8399150371551514,
      "learning_rate": 8.458378747326848e-05,
      "loss": 0.9814,
      "step": 2339
    },
    {
      "epoch": 0.563414193703726,
      "grad_norm": 1.0915262699127197,
      "learning_rate": 8.450674980749742e-05,
      "loss": 0.2,
      "step": 2340
    },
    {
      "epoch": 0.5636549690001805,
      "grad_norm": 6.239700794219971,
      "learning_rate": 8.442972156165738e-05,
      "loss": 0.754,
      "step": 2341
    },
    {
      "epoch": 0.5638957442966351,
      "grad_norm": 3.9862194061279297,
      "learning_rate": 8.435270278258172e-05,
      "loss": 0.306,
      "step": 2342
    },
    {
      "epoch": 0.5641365195930897,
      "grad_norm": 3.2919952869415283,
      "learning_rate": 8.427569351709801e-05,
      "loss": 0.776,
      "step": 2343
    },
    {
      "epoch": 0.5643772948895444,
      "grad_norm": 1.855094075202942,
      "learning_rate": 8.41986938120281e-05,
      "loss": 0.6994,
      "step": 2344
    },
    {
      "epoch": 0.564618070185999,
      "grad_norm": 1.7668780088424683,
      "learning_rate": 8.41217037141879e-05,
      "loss": 0.3419,
      "step": 2345
    },
    {
      "epoch": 0.5648588454824535,
      "grad_norm": 3.746309280395508,
      "learning_rate": 8.404472327038768e-05,
      "loss": 1.1026,
      "step": 2346
    },
    {
      "epoch": 0.5650996207789081,
      "grad_norm": 2.670344591140747,
      "learning_rate": 8.396775252743162e-05,
      "loss": 0.7391,
      "step": 2347
    },
    {
      "epoch": 0.5653403960753627,
      "grad_norm": 1.6550657749176025,
      "learning_rate": 8.389079153211814e-05,
      "loss": 0.4773,
      "step": 2348
    },
    {
      "epoch": 0.5655811713718173,
      "grad_norm": 2.2174558639526367,
      "learning_rate": 8.381384033123974e-05,
      "loss": 0.6246,
      "step": 2349
    },
    {
      "epoch": 0.5658219466682718,
      "grad_norm": 0.4945906400680542,
      "learning_rate": 8.373689897158284e-05,
      "loss": 0.1936,
      "step": 2350
    },
    {
      "epoch": 0.5660627219647264,
      "grad_norm": 1.8350954055786133,
      "learning_rate": 8.365996749992801e-05,
      "loss": 0.3785,
      "step": 2351
    },
    {
      "epoch": 0.566303497261181,
      "grad_norm": 0.7016525864601135,
      "learning_rate": 8.358304596304982e-05,
      "loss": 0.567,
      "step": 2352
    },
    {
      "epoch": 0.5665442725576356,
      "grad_norm": 5.016156196594238,
      "learning_rate": 8.35061344077166e-05,
      "loss": 0.6756,
      "step": 2353
    },
    {
      "epoch": 0.5667850478540901,
      "grad_norm": 1.9168941974639893,
      "learning_rate": 8.342923288069086e-05,
      "loss": 0.888,
      "step": 2354
    },
    {
      "epoch": 0.5670258231505447,
      "grad_norm": 1.5404551029205322,
      "learning_rate": 8.335234142872885e-05,
      "loss": 0.4729,
      "step": 2355
    },
    {
      "epoch": 0.5672665984469993,
      "grad_norm": 1.9677037000656128,
      "learning_rate": 8.327546009858074e-05,
      "loss": 0.3468,
      "step": 2356
    },
    {
      "epoch": 0.567507373743454,
      "grad_norm": 1.9757428169250488,
      "learning_rate": 8.319858893699059e-05,
      "loss": 0.2262,
      "step": 2357
    },
    {
      "epoch": 0.5677481490399086,
      "grad_norm": 1.3826395273208618,
      "learning_rate": 8.312172799069621e-05,
      "loss": 0.5705,
      "step": 2358
    },
    {
      "epoch": 0.567988924336363,
      "grad_norm": 1.7746422290802002,
      "learning_rate": 8.304487730642929e-05,
      "loss": 0.7911,
      "step": 2359
    },
    {
      "epoch": 0.5682296996328177,
      "grad_norm": 1.2216047048568726,
      "learning_rate": 8.296803693091511e-05,
      "loss": 0.5022,
      "step": 2360
    },
    {
      "epoch": 0.5684704749292723,
      "grad_norm": 0.8310643434524536,
      "learning_rate": 8.289120691087285e-05,
      "loss": 0.3669,
      "step": 2361
    },
    {
      "epoch": 0.5687112502257269,
      "grad_norm": 0.5129712820053101,
      "learning_rate": 8.281438729301536e-05,
      "loss": 0.436,
      "step": 2362
    },
    {
      "epoch": 0.5689520255221814,
      "grad_norm": 3.883026599884033,
      "learning_rate": 8.27375781240491e-05,
      "loss": 1.1273,
      "step": 2363
    },
    {
      "epoch": 0.569192800818636,
      "grad_norm": 2.724834680557251,
      "learning_rate": 8.266077945067424e-05,
      "loss": 0.8467,
      "step": 2364
    },
    {
      "epoch": 0.5694335761150906,
      "grad_norm": 2.839754343032837,
      "learning_rate": 8.258399131958454e-05,
      "loss": 0.9973,
      "step": 2365
    },
    {
      "epoch": 0.5696743514115452,
      "grad_norm": 1.3639193773269653,
      "learning_rate": 8.250721377746734e-05,
      "loss": 0.3668,
      "step": 2366
    },
    {
      "epoch": 0.5699151267079997,
      "grad_norm": 4.23447322845459,
      "learning_rate": 8.243044687100363e-05,
      "loss": 0.3128,
      "step": 2367
    },
    {
      "epoch": 0.5701559020044543,
      "grad_norm": 1.0347940921783447,
      "learning_rate": 8.235369064686776e-05,
      "loss": 0.4905,
      "step": 2368
    },
    {
      "epoch": 0.5703966773009089,
      "grad_norm": 3.1089839935302734,
      "learning_rate": 8.227694515172773e-05,
      "loss": 0.4338,
      "step": 2369
    },
    {
      "epoch": 0.5706374525973635,
      "grad_norm": 4.172400951385498,
      "learning_rate": 8.2200210432245e-05,
      "loss": 1.1137,
      "step": 2370
    },
    {
      "epoch": 0.5708782278938181,
      "grad_norm": 3.9930694103240967,
      "learning_rate": 8.21234865350744e-05,
      "loss": 0.1863,
      "step": 2371
    },
    {
      "epoch": 0.5711190031902726,
      "grad_norm": 2.020798921585083,
      "learning_rate": 8.204677350686432e-05,
      "loss": 0.379,
      "step": 2372
    },
    {
      "epoch": 0.5713597784867273,
      "grad_norm": 3.6490232944488525,
      "learning_rate": 8.197007139425631e-05,
      "loss": 0.4755,
      "step": 2373
    },
    {
      "epoch": 0.5716005537831819,
      "grad_norm": 2.922484874725342,
      "learning_rate": 8.189338024388557e-05,
      "loss": 1.0381,
      "step": 2374
    },
    {
      "epoch": 0.5718413290796365,
      "grad_norm": 3.1068320274353027,
      "learning_rate": 8.181670010238046e-05,
      "loss": 0.8434,
      "step": 2375
    },
    {
      "epoch": 0.572082104376091,
      "grad_norm": 2.6153829097747803,
      "learning_rate": 8.174003101636261e-05,
      "loss": 1.216,
      "step": 2376
    },
    {
      "epoch": 0.5723228796725456,
      "grad_norm": 1.5444633960723877,
      "learning_rate": 8.166337303244705e-05,
      "loss": 0.5995,
      "step": 2377
    },
    {
      "epoch": 0.5725636549690002,
      "grad_norm": 4.170453071594238,
      "learning_rate": 8.158672619724203e-05,
      "loss": 0.6781,
      "step": 2378
    },
    {
      "epoch": 0.5728044302654548,
      "grad_norm": 4.247837543487549,
      "learning_rate": 8.151009055734893e-05,
      "loss": 0.3414,
      "step": 2379
    },
    {
      "epoch": 0.5730452055619093,
      "grad_norm": 1.5872865915298462,
      "learning_rate": 8.143346615936247e-05,
      "loss": 0.6584,
      "step": 2380
    },
    {
      "epoch": 0.5732859808583639,
      "grad_norm": 1.2567731142044067,
      "learning_rate": 8.135685304987039e-05,
      "loss": 0.6352,
      "step": 2381
    },
    {
      "epoch": 0.5735267561548185,
      "grad_norm": 3.6656978130340576,
      "learning_rate": 8.128025127545362e-05,
      "loss": 1.2404,
      "step": 2382
    },
    {
      "epoch": 0.5737675314512731,
      "grad_norm": 2.5888733863830566,
      "learning_rate": 8.120366088268632e-05,
      "loss": 0.3153,
      "step": 2383
    },
    {
      "epoch": 0.5740083067477277,
      "grad_norm": 0.779647171497345,
      "learning_rate": 8.112708191813552e-05,
      "loss": 0.4345,
      "step": 2384
    },
    {
      "epoch": 0.5742490820441822,
      "grad_norm": 0.7447169423103333,
      "learning_rate": 8.105051442836145e-05,
      "loss": 0.2654,
      "step": 2385
    },
    {
      "epoch": 0.5744898573406368,
      "grad_norm": 0.5837435722351074,
      "learning_rate": 8.097395845991727e-05,
      "loss": 0.5411,
      "step": 2386
    },
    {
      "epoch": 0.5747306326370915,
      "grad_norm": 1.1887192726135254,
      "learning_rate": 8.089741405934922e-05,
      "loss": 0.3803,
      "step": 2387
    },
    {
      "epoch": 0.5749714079335461,
      "grad_norm": 2.842036724090576,
      "learning_rate": 8.08208812731965e-05,
      "loss": 0.8952,
      "step": 2388
    },
    {
      "epoch": 0.5752121832300006,
      "grad_norm": 3.2157955169677734,
      "learning_rate": 8.074436014799114e-05,
      "loss": 0.2237,
      "step": 2389
    },
    {
      "epoch": 0.5754529585264552,
      "grad_norm": 3.5656988620758057,
      "learning_rate": 8.06678507302582e-05,
      "loss": 0.2823,
      "step": 2390
    },
    {
      "epoch": 0.5756937338229098,
      "grad_norm": 2.8901267051696777,
      "learning_rate": 8.059135306651557e-05,
      "loss": 0.6339,
      "step": 2391
    },
    {
      "epoch": 0.5759345091193644,
      "grad_norm": 1.380159854888916,
      "learning_rate": 8.0514867203274e-05,
      "loss": 1.2206,
      "step": 2392
    },
    {
      "epoch": 0.5761752844158189,
      "grad_norm": 5.57066011428833,
      "learning_rate": 8.043839318703709e-05,
      "loss": 0.932,
      "step": 2393
    },
    {
      "epoch": 0.5764160597122735,
      "grad_norm": 2.786633253097534,
      "learning_rate": 8.036193106430118e-05,
      "loss": 0.4513,
      "step": 2394
    },
    {
      "epoch": 0.5766568350087281,
      "grad_norm": 2.4537577629089355,
      "learning_rate": 8.028548088155542e-05,
      "loss": 0.8421,
      "step": 2395
    },
    {
      "epoch": 0.5768976103051827,
      "grad_norm": 3.0305957794189453,
      "learning_rate": 8.020904268528175e-05,
      "loss": 0.7525,
      "step": 2396
    },
    {
      "epoch": 0.5771383856016373,
      "grad_norm": 1.3954887390136719,
      "learning_rate": 8.013261652195466e-05,
      "loss": 0.3742,
      "step": 2397
    },
    {
      "epoch": 0.5773791608980918,
      "grad_norm": 2.359279155731201,
      "learning_rate": 8.00562024380415e-05,
      "loss": 1.0482,
      "step": 2398
    },
    {
      "epoch": 0.5776199361945464,
      "grad_norm": 4.190445899963379,
      "learning_rate": 7.99798004800022e-05,
      "loss": 1.031,
      "step": 2399
    },
    {
      "epoch": 0.577860711491001,
      "grad_norm": 3.663658618927002,
      "learning_rate": 7.990341069428931e-05,
      "loss": 0.4797,
      "step": 2400
    },
    {
      "epoch": 0.5781014867874557,
      "grad_norm": 1.6564805507659912,
      "learning_rate": 7.9827033127348e-05,
      "loss": 0.8746,
      "step": 2401
    },
    {
      "epoch": 0.5783422620839102,
      "grad_norm": 1.9383180141448975,
      "learning_rate": 7.9750667825616e-05,
      "loss": 0.7385,
      "step": 2402
    },
    {
      "epoch": 0.5785830373803648,
      "grad_norm": 0.28531309962272644,
      "learning_rate": 7.967431483552356e-05,
      "loss": 0.2861,
      "step": 2403
    },
    {
      "epoch": 0.5788238126768194,
      "grad_norm": 2.478971004486084,
      "learning_rate": 7.959797420349355e-05,
      "loss": 0.3581,
      "step": 2404
    },
    {
      "epoch": 0.579064587973274,
      "grad_norm": 3.229998826980591,
      "learning_rate": 7.952164597594115e-05,
      "loss": 0.7698,
      "step": 2405
    },
    {
      "epoch": 0.5793053632697285,
      "grad_norm": 1.7557350397109985,
      "learning_rate": 7.944533019927414e-05,
      "loss": 0.758,
      "step": 2406
    },
    {
      "epoch": 0.5795461385661831,
      "grad_norm": 7.241235256195068,
      "learning_rate": 7.936902691989267e-05,
      "loss": 1.014,
      "step": 2407
    },
    {
      "epoch": 0.5797869138626377,
      "grad_norm": 4.189211368560791,
      "learning_rate": 7.929273618418933e-05,
      "loss": 0.7462,
      "step": 2408
    },
    {
      "epoch": 0.5800276891590923,
      "grad_norm": 4.705471515655518,
      "learning_rate": 7.921645803854907e-05,
      "loss": 0.9831,
      "step": 2409
    },
    {
      "epoch": 0.5802684644555468,
      "grad_norm": 2.0091071128845215,
      "learning_rate": 7.914019252934908e-05,
      "loss": 0.8221,
      "step": 2410
    },
    {
      "epoch": 0.5805092397520014,
      "grad_norm": 2.49102783203125,
      "learning_rate": 7.906393970295905e-05,
      "loss": 0.5716,
      "step": 2411
    },
    {
      "epoch": 0.580750015048456,
      "grad_norm": 2.925053119659424,
      "learning_rate": 7.89876996057409e-05,
      "loss": 0.9088,
      "step": 2412
    },
    {
      "epoch": 0.5809907903449106,
      "grad_norm": 4.885961532592773,
      "learning_rate": 7.891147228404869e-05,
      "loss": 0.5873,
      "step": 2413
    },
    {
      "epoch": 0.5812315656413652,
      "grad_norm": 1.2176140546798706,
      "learning_rate": 7.883525778422887e-05,
      "loss": 0.8426,
      "step": 2414
    },
    {
      "epoch": 0.5814723409378197,
      "grad_norm": 1.188421368598938,
      "learning_rate": 7.875905615261997e-05,
      "loss": 0.6984,
      "step": 2415
    },
    {
      "epoch": 0.5817131162342744,
      "grad_norm": 3.3436102867126465,
      "learning_rate": 7.868286743555279e-05,
      "loss": 0.7285,
      "step": 2416
    },
    {
      "epoch": 0.581953891530729,
      "grad_norm": 2.9441144466400146,
      "learning_rate": 7.860669167935028e-05,
      "loss": 0.3281,
      "step": 2417
    },
    {
      "epoch": 0.5821946668271836,
      "grad_norm": 1.1844704151153564,
      "learning_rate": 7.853052893032736e-05,
      "loss": 0.3296,
      "step": 2418
    },
    {
      "epoch": 0.5824354421236381,
      "grad_norm": 1.122290849685669,
      "learning_rate": 7.84543792347913e-05,
      "loss": 0.3637,
      "step": 2419
    },
    {
      "epoch": 0.5826762174200927,
      "grad_norm": 0.8115438222885132,
      "learning_rate": 7.837824263904116e-05,
      "loss": 0.1266,
      "step": 2420
    },
    {
      "epoch": 0.5829169927165473,
      "grad_norm": 2.9317989349365234,
      "learning_rate": 7.83021191893682e-05,
      "loss": 0.7098,
      "step": 2421
    },
    {
      "epoch": 0.5831577680130019,
      "grad_norm": 2.4324686527252197,
      "learning_rate": 7.822600893205569e-05,
      "loss": 0.384,
      "step": 2422
    },
    {
      "epoch": 0.5833985433094564,
      "grad_norm": 1.8341871500015259,
      "learning_rate": 7.814991191337877e-05,
      "loss": 0.6857,
      "step": 2423
    },
    {
      "epoch": 0.583639318605911,
      "grad_norm": 0.9151331782341003,
      "learning_rate": 7.807382817960464e-05,
      "loss": 0.2521,
      "step": 2424
    },
    {
      "epoch": 0.5838800939023656,
      "grad_norm": 3.2553586959838867,
      "learning_rate": 7.799775777699243e-05,
      "loss": 0.277,
      "step": 2425
    },
    {
      "epoch": 0.5841208691988202,
      "grad_norm": 5.162132263183594,
      "learning_rate": 7.792170075179302e-05,
      "loss": 0.3815,
      "step": 2426
    },
    {
      "epoch": 0.5843616444952748,
      "grad_norm": 0.9806712865829468,
      "learning_rate": 7.784565715024932e-05,
      "loss": 0.6379,
      "step": 2427
    },
    {
      "epoch": 0.5846024197917293,
      "grad_norm": 2.116602897644043,
      "learning_rate": 7.776962701859596e-05,
      "loss": 0.3267,
      "step": 2428
    },
    {
      "epoch": 0.5848431950881839,
      "grad_norm": 2.120924472808838,
      "learning_rate": 7.769361040305944e-05,
      "loss": 0.5844,
      "step": 2429
    },
    {
      "epoch": 0.5850839703846386,
      "grad_norm": 1.5902043581008911,
      "learning_rate": 7.76176073498581e-05,
      "loss": 0.3459,
      "step": 2430
    },
    {
      "epoch": 0.5853247456810932,
      "grad_norm": 2.4817397594451904,
      "learning_rate": 7.75416179052019e-05,
      "loss": 0.4125,
      "step": 2431
    },
    {
      "epoch": 0.5855655209775477,
      "grad_norm": 1.6833219528198242,
      "learning_rate": 7.746564211529264e-05,
      "loss": 0.422,
      "step": 2432
    },
    {
      "epoch": 0.5858062962740023,
      "grad_norm": 9.756152153015137,
      "learning_rate": 7.73896800263237e-05,
      "loss": 1.028,
      "step": 2433
    },
    {
      "epoch": 0.5860470715704569,
      "grad_norm": 1.5745316743850708,
      "learning_rate": 7.731373168448027e-05,
      "loss": 0.6256,
      "step": 2434
    },
    {
      "epoch": 0.5862878468669115,
      "grad_norm": 3.160309314727783,
      "learning_rate": 7.723779713593908e-05,
      "loss": 0.4354,
      "step": 2435
    },
    {
      "epoch": 0.586528622163366,
      "grad_norm": 1.0206762552261353,
      "learning_rate": 7.716187642686851e-05,
      "loss": 0.7593,
      "step": 2436
    },
    {
      "epoch": 0.5867693974598206,
      "grad_norm": 2.535022020339966,
      "learning_rate": 7.708596960342852e-05,
      "loss": 0.5759,
      "step": 2437
    },
    {
      "epoch": 0.5870101727562752,
      "grad_norm": 1.5657432079315186,
      "learning_rate": 7.701007671177067e-05,
      "loss": 0.4912,
      "step": 2438
    },
    {
      "epoch": 0.5872509480527298,
      "grad_norm": 0.8812488317489624,
      "learning_rate": 7.693419779803794e-05,
      "loss": 0.3876,
      "step": 2439
    },
    {
      "epoch": 0.5874917233491844,
      "grad_norm": 2.174088954925537,
      "learning_rate": 7.685833290836497e-05,
      "loss": 0.6519,
      "step": 2440
    },
    {
      "epoch": 0.5877324986456389,
      "grad_norm": 1.4618853330612183,
      "learning_rate": 7.678248208887767e-05,
      "loss": 0.1547,
      "step": 2441
    },
    {
      "epoch": 0.5879732739420935,
      "grad_norm": 1.048917293548584,
      "learning_rate": 7.670664538569358e-05,
      "loss": 0.7463,
      "step": 2442
    },
    {
      "epoch": 0.5882140492385481,
      "grad_norm": 0.559017539024353,
      "learning_rate": 7.663082284492161e-05,
      "loss": 0.4063,
      "step": 2443
    },
    {
      "epoch": 0.5884548245350028,
      "grad_norm": 1.1241803169250488,
      "learning_rate": 7.655501451266197e-05,
      "loss": 0.6386,
      "step": 2444
    },
    {
      "epoch": 0.5886955998314573,
      "grad_norm": 1.063376545906067,
      "learning_rate": 7.647922043500637e-05,
      "loss": 0.7574,
      "step": 2445
    },
    {
      "epoch": 0.5889363751279119,
      "grad_norm": 0.6335359811782837,
      "learning_rate": 7.640344065803768e-05,
      "loss": 0.4932,
      "step": 2446
    },
    {
      "epoch": 0.5891771504243665,
      "grad_norm": 5.459598064422607,
      "learning_rate": 7.632767522783025e-05,
      "loss": 0.7634,
      "step": 2447
    },
    {
      "epoch": 0.5894179257208211,
      "grad_norm": 2.985459089279175,
      "learning_rate": 7.625192419044966e-05,
      "loss": 1.101,
      "step": 2448
    },
    {
      "epoch": 0.5896587010172756,
      "grad_norm": 1.2922786474227905,
      "learning_rate": 7.617618759195262e-05,
      "loss": 0.2012,
      "step": 2449
    },
    {
      "epoch": 0.5898994763137302,
      "grad_norm": 3.0108511447906494,
      "learning_rate": 7.61004654783872e-05,
      "loss": 0.4321,
      "step": 2450
    },
    {
      "epoch": 0.5901402516101848,
      "grad_norm": 1.4148467779159546,
      "learning_rate": 7.602475789579265e-05,
      "loss": 0.4081,
      "step": 2451
    },
    {
      "epoch": 0.5903810269066394,
      "grad_norm": 3.129077911376953,
      "learning_rate": 7.594906489019928e-05,
      "loss": 0.3434,
      "step": 2452
    },
    {
      "epoch": 0.590621802203094,
      "grad_norm": 2.397958993911743,
      "learning_rate": 7.58733865076287e-05,
      "loss": 0.6114,
      "step": 2453
    },
    {
      "epoch": 0.5908625774995485,
      "grad_norm": 1.5747435092926025,
      "learning_rate": 7.579772279409342e-05,
      "loss": 0.6804,
      "step": 2454
    },
    {
      "epoch": 0.5911033527960031,
      "grad_norm": 2.1680166721343994,
      "learning_rate": 7.572207379559721e-05,
      "loss": 0.5044,
      "step": 2455
    },
    {
      "epoch": 0.5913441280924577,
      "grad_norm": 0.6241942644119263,
      "learning_rate": 7.564643955813489e-05,
      "loss": 0.7446,
      "step": 2456
    },
    {
      "epoch": 0.5915849033889123,
      "grad_norm": 4.499767780303955,
      "learning_rate": 7.557082012769213e-05,
      "loss": 0.8841,
      "step": 2457
    },
    {
      "epoch": 0.5918256786853668,
      "grad_norm": 1.2002981901168823,
      "learning_rate": 7.549521555024582e-05,
      "loss": 0.2635,
      "step": 2458
    },
    {
      "epoch": 0.5920664539818215,
      "grad_norm": 1.4949264526367188,
      "learning_rate": 7.541962587176361e-05,
      "loss": 0.3554,
      "step": 2459
    },
    {
      "epoch": 0.5923072292782761,
      "grad_norm": 3.360037326812744,
      "learning_rate": 7.534405113820427e-05,
      "loss": 0.1464,
      "step": 2460
    },
    {
      "epoch": 0.5925480045747307,
      "grad_norm": 1.4905561208724976,
      "learning_rate": 7.526849139551744e-05,
      "loss": 0.3034,
      "step": 2461
    },
    {
      "epoch": 0.5927887798711852,
      "grad_norm": 1.9774373769760132,
      "learning_rate": 7.51929466896435e-05,
      "loss": 0.7877,
      "step": 2462
    },
    {
      "epoch": 0.5930295551676398,
      "grad_norm": 1.1401469707489014,
      "learning_rate": 7.511741706651384e-05,
      "loss": 0.7026,
      "step": 2463
    },
    {
      "epoch": 0.5932703304640944,
      "grad_norm": 2.067647695541382,
      "learning_rate": 7.504190257205075e-05,
      "loss": 0.5986,
      "step": 2464
    },
    {
      "epoch": 0.593511105760549,
      "grad_norm": 2.496720790863037,
      "learning_rate": 7.496640325216708e-05,
      "loss": 0.4588,
      "step": 2465
    },
    {
      "epoch": 0.5937518810570036,
      "grad_norm": 0.8668129444122314,
      "learning_rate": 7.489091915276664e-05,
      "loss": 0.2105,
      "step": 2466
    },
    {
      "epoch": 0.5939926563534581,
      "grad_norm": 2.7461793422698975,
      "learning_rate": 7.481545031974392e-05,
      "loss": 0.829,
      "step": 2467
    },
    {
      "epoch": 0.5942334316499127,
      "grad_norm": 2.240567207336426,
      "learning_rate": 7.473999679898414e-05,
      "loss": 0.3528,
      "step": 2468
    },
    {
      "epoch": 0.5944742069463673,
      "grad_norm": 3.95941162109375,
      "learning_rate": 7.466455863636326e-05,
      "loss": 0.5933,
      "step": 2469
    },
    {
      "epoch": 0.5947149822428219,
      "grad_norm": 3.9699573516845703,
      "learning_rate": 7.458913587774777e-05,
      "loss": 1.0409,
      "step": 2470
    },
    {
      "epoch": 0.5949557575392764,
      "grad_norm": 1.2216235399246216,
      "learning_rate": 7.451372856899494e-05,
      "loss": 0.2177,
      "step": 2471
    },
    {
      "epoch": 0.595196532835731,
      "grad_norm": 7.556828022003174,
      "learning_rate": 7.443833675595255e-05,
      "loss": 1.0671,
      "step": 2472
    },
    {
      "epoch": 0.5954373081321856,
      "grad_norm": 3.3300185203552246,
      "learning_rate": 7.436296048445899e-05,
      "loss": 0.4654,
      "step": 2473
    },
    {
      "epoch": 0.5956780834286403,
      "grad_norm": 1.8964426517486572,
      "learning_rate": 7.428759980034324e-05,
      "loss": 0.8291,
      "step": 2474
    },
    {
      "epoch": 0.5959188587250948,
      "grad_norm": 2.5427963733673096,
      "learning_rate": 7.421225474942472e-05,
      "loss": 0.7374,
      "step": 2475
    },
    {
      "epoch": 0.5961596340215494,
      "grad_norm": 2.4423563480377197,
      "learning_rate": 7.413692537751341e-05,
      "loss": 0.5469,
      "step": 2476
    },
    {
      "epoch": 0.596400409318004,
      "grad_norm": 0.9203125834465027,
      "learning_rate": 7.40616117304098e-05,
      "loss": 0.3301,
      "step": 2477
    },
    {
      "epoch": 0.5966411846144586,
      "grad_norm": 2.929774284362793,
      "learning_rate": 7.398631385390464e-05,
      "loss": 0.4724,
      "step": 2478
    },
    {
      "epoch": 0.5968819599109132,
      "grad_norm": 1.285556674003601,
      "learning_rate": 7.391103179377927e-05,
      "loss": 0.5685,
      "step": 2479
    },
    {
      "epoch": 0.5971227352073677,
      "grad_norm": 2.1298601627349854,
      "learning_rate": 7.383576559580537e-05,
      "loss": 0.4238,
      "step": 2480
    },
    {
      "epoch": 0.5973635105038223,
      "grad_norm": 3.565706968307495,
      "learning_rate": 7.37605153057449e-05,
      "loss": 0.8206,
      "step": 2481
    },
    {
      "epoch": 0.5976042858002769,
      "grad_norm": 1.6290020942687988,
      "learning_rate": 7.368528096935028e-05,
      "loss": 0.7003,
      "step": 2482
    },
    {
      "epoch": 0.5978450610967315,
      "grad_norm": 0.8494675755500793,
      "learning_rate": 7.361006263236409e-05,
      "loss": 0.4155,
      "step": 2483
    },
    {
      "epoch": 0.598085836393186,
      "grad_norm": 6.1263508796691895,
      "learning_rate": 7.353486034051933e-05,
      "loss": 0.3906,
      "step": 2484
    },
    {
      "epoch": 0.5983266116896406,
      "grad_norm": 2.4993395805358887,
      "learning_rate": 7.345967413953906e-05,
      "loss": 0.8447,
      "step": 2485
    },
    {
      "epoch": 0.5985673869860952,
      "grad_norm": 1.5568212270736694,
      "learning_rate": 7.338450407513671e-05,
      "loss": 0.3203,
      "step": 2486
    },
    {
      "epoch": 0.5988081622825498,
      "grad_norm": 1.6858243942260742,
      "learning_rate": 7.330935019301587e-05,
      "loss": 0.5842,
      "step": 2487
    },
    {
      "epoch": 0.5990489375790043,
      "grad_norm": 1.3046417236328125,
      "learning_rate": 7.323421253887022e-05,
      "loss": 0.4694,
      "step": 2488
    },
    {
      "epoch": 0.599289712875459,
      "grad_norm": 2.6327929496765137,
      "learning_rate": 7.315909115838367e-05,
      "loss": 1.0909,
      "step": 2489
    },
    {
      "epoch": 0.5995304881719136,
      "grad_norm": 3.176302909851074,
      "learning_rate": 7.308398609723019e-05,
      "loss": 0.6372,
      "step": 2490
    },
    {
      "epoch": 0.5997712634683682,
      "grad_norm": 0.5345314145088196,
      "learning_rate": 7.300889740107376e-05,
      "loss": 0.1974,
      "step": 2491
    },
    {
      "epoch": 0.6000120387648227,
      "grad_norm": 3.916313886642456,
      "learning_rate": 7.293382511556856e-05,
      "loss": 1.1176,
      "step": 2492
    },
    {
      "epoch": 0.6002528140612773,
      "grad_norm": 1.7653621435165405,
      "learning_rate": 7.285876928635864e-05,
      "loss": 0.6719,
      "step": 2493
    },
    {
      "epoch": 0.6004935893577319,
      "grad_norm": 1.351683497428894,
      "learning_rate": 7.278372995907815e-05,
      "loss": 0.4,
      "step": 2494
    },
    {
      "epoch": 0.6007343646541865,
      "grad_norm": 1.6684333086013794,
      "learning_rate": 7.270870717935119e-05,
      "loss": 0.5533,
      "step": 2495
    },
    {
      "epoch": 0.6009751399506411,
      "grad_norm": 2.9273340702056885,
      "learning_rate": 7.263370099279172e-05,
      "loss": 0.7937,
      "step": 2496
    },
    {
      "epoch": 0.6012159152470956,
      "grad_norm": 1.5765647888183594,
      "learning_rate": 7.255871144500375e-05,
      "loss": 1.2214,
      "step": 2497
    },
    {
      "epoch": 0.6014566905435502,
      "grad_norm": 0.8081079125404358,
      "learning_rate": 7.248373858158099e-05,
      "loss": 0.2137,
      "step": 2498
    },
    {
      "epoch": 0.6016974658400048,
      "grad_norm": 1.117992877960205,
      "learning_rate": 7.240878244810718e-05,
      "loss": 0.3442,
      "step": 2499
    },
    {
      "epoch": 0.6019382411364594,
      "grad_norm": 2.1289424896240234,
      "learning_rate": 7.233384309015584e-05,
      "loss": 0.673,
      "step": 2500
    },
    {
      "epoch": 0.6021790164329139,
      "grad_norm": 1.3627246618270874,
      "learning_rate": 7.22589205532902e-05,
      "loss": 0.6268,
      "step": 2501
    },
    {
      "epoch": 0.6024197917293685,
      "grad_norm": 1.519879937171936,
      "learning_rate": 7.218401488306337e-05,
      "loss": 0.272,
      "step": 2502
    },
    {
      "epoch": 0.6026605670258232,
      "grad_norm": 2.85306978225708,
      "learning_rate": 7.210912612501817e-05,
      "loss": 0.6996,
      "step": 2503
    },
    {
      "epoch": 0.6029013423222778,
      "grad_norm": 1.7936211824417114,
      "learning_rate": 7.20342543246871e-05,
      "loss": 0.4967,
      "step": 2504
    },
    {
      "epoch": 0.6031421176187323,
      "grad_norm": 3.3115148544311523,
      "learning_rate": 7.195939952759248e-05,
      "loss": 0.2885,
      "step": 2505
    },
    {
      "epoch": 0.6033828929151869,
      "grad_norm": 0.8520734906196594,
      "learning_rate": 7.188456177924605e-05,
      "loss": 0.7537,
      "step": 2506
    },
    {
      "epoch": 0.6036236682116415,
      "grad_norm": 0.48104944825172424,
      "learning_rate": 7.180974112514943e-05,
      "loss": 0.1885,
      "step": 2507
    },
    {
      "epoch": 0.6038644435080961,
      "grad_norm": 0.6579359769821167,
      "learning_rate": 7.173493761079372e-05,
      "loss": 0.5065,
      "step": 2508
    },
    {
      "epoch": 0.6041052188045507,
      "grad_norm": 1.0354386568069458,
      "learning_rate": 7.166015128165962e-05,
      "loss": 0.2026,
      "step": 2509
    },
    {
      "epoch": 0.6043459941010052,
      "grad_norm": 0.9975037574768066,
      "learning_rate": 7.158538218321739e-05,
      "loss": 0.3232,
      "step": 2510
    },
    {
      "epoch": 0.6045867693974598,
      "grad_norm": 1.1760191917419434,
      "learning_rate": 7.15106303609268e-05,
      "loss": 0.8285,
      "step": 2511
    },
    {
      "epoch": 0.6048275446939144,
      "grad_norm": 1.8876464366912842,
      "learning_rate": 7.143589586023715e-05,
      "loss": 0.3947,
      "step": 2512
    },
    {
      "epoch": 0.605068319990369,
      "grad_norm": 1.618282437324524,
      "learning_rate": 7.136117872658721e-05,
      "loss": 0.5223,
      "step": 2513
    },
    {
      "epoch": 0.6053090952868235,
      "grad_norm": 3.051154851913452,
      "learning_rate": 7.128647900540506e-05,
      "loss": 0.6019,
      "step": 2514
    },
    {
      "epoch": 0.6055498705832781,
      "grad_norm": 1.742828130722046,
      "learning_rate": 7.121179674210841e-05,
      "loss": 0.4666,
      "step": 2515
    },
    {
      "epoch": 0.6057906458797327,
      "grad_norm": 1.7815309762954712,
      "learning_rate": 7.11371319821042e-05,
      "loss": 0.9279,
      "step": 2516
    },
    {
      "epoch": 0.6060314211761874,
      "grad_norm": 2.8688342571258545,
      "learning_rate": 7.106248477078874e-05,
      "loss": 0.7174,
      "step": 2517
    },
    {
      "epoch": 0.6062721964726419,
      "grad_norm": 0.4165075123310089,
      "learning_rate": 7.09878551535478e-05,
      "loss": 0.286,
      "step": 2518
    },
    {
      "epoch": 0.6065129717690965,
      "grad_norm": 1.0387226343154907,
      "learning_rate": 7.091324317575623e-05,
      "loss": 0.5322,
      "step": 2519
    },
    {
      "epoch": 0.6067537470655511,
      "grad_norm": 3.8460330963134766,
      "learning_rate": 7.083864888277833e-05,
      "loss": 0.5769,
      "step": 2520
    },
    {
      "epoch": 0.6069945223620057,
      "grad_norm": 3.067915201187134,
      "learning_rate": 7.076407231996768e-05,
      "loss": 0.8518,
      "step": 2521
    },
    {
      "epoch": 0.6072352976584603,
      "grad_norm": 0.5079042315483093,
      "learning_rate": 7.06895135326669e-05,
      "loss": 0.7296,
      "step": 2522
    },
    {
      "epoch": 0.6074760729549148,
      "grad_norm": 2.1162655353546143,
      "learning_rate": 7.061497256620793e-05,
      "loss": 0.4852,
      "step": 2523
    },
    {
      "epoch": 0.6077168482513694,
      "grad_norm": 0.4626848101615906,
      "learning_rate": 7.054044946591184e-05,
      "loss": 0.4988,
      "step": 2524
    },
    {
      "epoch": 0.607957623547824,
      "grad_norm": 1.59532630443573,
      "learning_rate": 7.046594427708882e-05,
      "loss": 0.2568,
      "step": 2525
    },
    {
      "epoch": 0.6081983988442786,
      "grad_norm": 4.2776384353637695,
      "learning_rate": 7.039145704503829e-05,
      "loss": 0.9273,
      "step": 2526
    },
    {
      "epoch": 0.6084391741407331,
      "grad_norm": 2.9044902324676514,
      "learning_rate": 7.031698781504849e-05,
      "loss": 0.6728,
      "step": 2527
    },
    {
      "epoch": 0.6086799494371877,
      "grad_norm": 4.779917240142822,
      "learning_rate": 7.024253663239704e-05,
      "loss": 0.6652,
      "step": 2528
    },
    {
      "epoch": 0.6089207247336423,
      "grad_norm": 2.6108837127685547,
      "learning_rate": 7.016810354235038e-05,
      "loss": 0.6116,
      "step": 2529
    },
    {
      "epoch": 0.609161500030097,
      "grad_norm": 0.8134174942970276,
      "learning_rate": 7.009368859016393e-05,
      "loss": 0.5276,
      "step": 2530
    },
    {
      "epoch": 0.6094022753265514,
      "grad_norm": 0.9418330192565918,
      "learning_rate": 7.001929182108223e-05,
      "loss": 0.315,
      "step": 2531
    },
    {
      "epoch": 0.609643050623006,
      "grad_norm": 1.0904672145843506,
      "learning_rate": 6.994491328033862e-05,
      "loss": 0.4043,
      "step": 2532
    },
    {
      "epoch": 0.6098838259194607,
      "grad_norm": 3.003647565841675,
      "learning_rate": 6.987055301315546e-05,
      "loss": 1.1199,
      "step": 2533
    },
    {
      "epoch": 0.6101246012159153,
      "grad_norm": 5.428164958953857,
      "learning_rate": 6.979621106474399e-05,
      "loss": 0.6681,
      "step": 2534
    },
    {
      "epoch": 0.6103653765123699,
      "grad_norm": 3.2087454795837402,
      "learning_rate": 6.972188748030419e-05,
      "loss": 0.629,
      "step": 2535
    },
    {
      "epoch": 0.6106061518088244,
      "grad_norm": 4.468095779418945,
      "learning_rate": 6.964758230502503e-05,
      "loss": 0.9202,
      "step": 2536
    },
    {
      "epoch": 0.610846927105279,
      "grad_norm": 3.3546736240386963,
      "learning_rate": 6.957329558408423e-05,
      "loss": 0.1201,
      "step": 2537
    },
    {
      "epoch": 0.6110877024017336,
      "grad_norm": 1.5020190477371216,
      "learning_rate": 6.949902736264823e-05,
      "loss": 0.7108,
      "step": 2538
    },
    {
      "epoch": 0.6113284776981882,
      "grad_norm": 2.882939577102661,
      "learning_rate": 6.942477768587237e-05,
      "loss": 0.8403,
      "step": 2539
    },
    {
      "epoch": 0.6115692529946427,
      "grad_norm": 2.0902936458587646,
      "learning_rate": 6.935054659890052e-05,
      "loss": 0.5279,
      "step": 2540
    },
    {
      "epoch": 0.6118100282910973,
      "grad_norm": 0.7436076402664185,
      "learning_rate": 6.92763341468654e-05,
      "loss": 0.2855,
      "step": 2541
    },
    {
      "epoch": 0.6120508035875519,
      "grad_norm": 1.6438990831375122,
      "learning_rate": 6.920214037488837e-05,
      "loss": 0.9144,
      "step": 2542
    },
    {
      "epoch": 0.6122915788840065,
      "grad_norm": 4.628514766693115,
      "learning_rate": 6.912796532807934e-05,
      "loss": 0.4584,
      "step": 2543
    },
    {
      "epoch": 0.612532354180461,
      "grad_norm": 1.0494173765182495,
      "learning_rate": 6.905380905153699e-05,
      "loss": 0.8366,
      "step": 2544
    },
    {
      "epoch": 0.6127731294769156,
      "grad_norm": 1.7360765933990479,
      "learning_rate": 6.897967159034842e-05,
      "loss": 0.5281,
      "step": 2545
    },
    {
      "epoch": 0.6130139047733703,
      "grad_norm": 1.996323823928833,
      "learning_rate": 6.89055529895894e-05,
      "loss": 0.9566,
      "step": 2546
    },
    {
      "epoch": 0.6132546800698249,
      "grad_norm": 1.1495094299316406,
      "learning_rate": 6.883145329432427e-05,
      "loss": 0.2774,
      "step": 2547
    },
    {
      "epoch": 0.6134954553662795,
      "grad_norm": 1.1022604703903198,
      "learning_rate": 6.875737254960573e-05,
      "loss": 0.5837,
      "step": 2548
    },
    {
      "epoch": 0.613736230662734,
      "grad_norm": 1.230012059211731,
      "learning_rate": 6.86833108004751e-05,
      "loss": 0.3625,
      "step": 2549
    },
    {
      "epoch": 0.6139770059591886,
      "grad_norm": 2.1149914264678955,
      "learning_rate": 6.860926809196202e-05,
      "loss": 0.5104,
      "step": 2550
    },
    {
      "epoch": 0.6142177812556432,
      "grad_norm": 1.968151330947876,
      "learning_rate": 6.853524446908469e-05,
      "loss": 0.2779,
      "step": 2551
    },
    {
      "epoch": 0.6144585565520978,
      "grad_norm": 2.429940938949585,
      "learning_rate": 6.84612399768496e-05,
      "loss": 0.6746,
      "step": 2552
    },
    {
      "epoch": 0.6146993318485523,
      "grad_norm": 5.285004138946533,
      "learning_rate": 6.838725466025165e-05,
      "loss": 1.0503,
      "step": 2553
    },
    {
      "epoch": 0.6149401071450069,
      "grad_norm": 2.815894842147827,
      "learning_rate": 6.83132885642741e-05,
      "loss": 0.7085,
      "step": 2554
    },
    {
      "epoch": 0.6151808824414615,
      "grad_norm": 0.6075404286384583,
      "learning_rate": 6.823934173388851e-05,
      "loss": 0.772,
      "step": 2555
    },
    {
      "epoch": 0.6154216577379161,
      "grad_norm": 2.521939992904663,
      "learning_rate": 6.81654142140547e-05,
      "loss": 0.3452,
      "step": 2556
    },
    {
      "epoch": 0.6156624330343706,
      "grad_norm": 2.4346060752868652,
      "learning_rate": 6.809150604972079e-05,
      "loss": 0.4844,
      "step": 2557
    },
    {
      "epoch": 0.6159032083308252,
      "grad_norm": 3.9991800785064697,
      "learning_rate": 6.801761728582305e-05,
      "loss": 0.4445,
      "step": 2558
    },
    {
      "epoch": 0.6161439836272798,
      "grad_norm": 1.3528568744659424,
      "learning_rate": 6.794374796728606e-05,
      "loss": 0.558,
      "step": 2559
    },
    {
      "epoch": 0.6163847589237345,
      "grad_norm": 1.2810922861099243,
      "learning_rate": 6.786989813902256e-05,
      "loss": 0.1497,
      "step": 2560
    },
    {
      "epoch": 0.616625534220189,
      "grad_norm": 2.726228713989258,
      "learning_rate": 6.779606784593335e-05,
      "loss": 0.6956,
      "step": 2561
    },
    {
      "epoch": 0.6168663095166436,
      "grad_norm": 1.971341609954834,
      "learning_rate": 6.77222571329075e-05,
      "loss": 0.5738,
      "step": 2562
    },
    {
      "epoch": 0.6171070848130982,
      "grad_norm": 1.7685867547988892,
      "learning_rate": 6.764846604482198e-05,
      "loss": 0.596,
      "step": 2563
    },
    {
      "epoch": 0.6173478601095528,
      "grad_norm": 2.118589162826538,
      "learning_rate": 6.7574694626542e-05,
      "loss": 0.6178,
      "step": 2564
    },
    {
      "epoch": 0.6175886354060074,
      "grad_norm": 1.7449718713760376,
      "learning_rate": 6.750094292292077e-05,
      "loss": 0.478,
      "step": 2565
    },
    {
      "epoch": 0.6178294107024619,
      "grad_norm": 3.4893815517425537,
      "learning_rate": 6.742721097879944e-05,
      "loss": 0.7634,
      "step": 2566
    },
    {
      "epoch": 0.6180701859989165,
      "grad_norm": 1.0635044574737549,
      "learning_rate": 6.735349883900723e-05,
      "loss": 0.4162,
      "step": 2567
    },
    {
      "epoch": 0.6183109612953711,
      "grad_norm": 3.9489662647247314,
      "learning_rate": 6.727980654836128e-05,
      "loss": 0.3658,
      "step": 2568
    },
    {
      "epoch": 0.6185517365918257,
      "grad_norm": 0.335372656583786,
      "learning_rate": 6.720613415166666e-05,
      "loss": 0.3209,
      "step": 2569
    },
    {
      "epoch": 0.6187925118882802,
      "grad_norm": 5.411129474639893,
      "learning_rate": 6.71324816937164e-05,
      "loss": 0.9387,
      "step": 2570
    },
    {
      "epoch": 0.6190332871847348,
      "grad_norm": 3.5781424045562744,
      "learning_rate": 6.705884921929129e-05,
      "loss": 0.389,
      "step": 2571
    },
    {
      "epoch": 0.6192740624811894,
      "grad_norm": 1.238693356513977,
      "learning_rate": 6.698523677316005e-05,
      "loss": 0.2619,
      "step": 2572
    },
    {
      "epoch": 0.619514837777644,
      "grad_norm": 1.2711673974990845,
      "learning_rate": 6.691164440007927e-05,
      "loss": 0.5968,
      "step": 2573
    },
    {
      "epoch": 0.6197556130740985,
      "grad_norm": 1.706970453262329,
      "learning_rate": 6.683807214479323e-05,
      "loss": 0.512,
      "step": 2574
    },
    {
      "epoch": 0.6199963883705532,
      "grad_norm": 0.9450774192810059,
      "learning_rate": 6.676452005203406e-05,
      "loss": 0.1936,
      "step": 2575
    },
    {
      "epoch": 0.6202371636670078,
      "grad_norm": 2.2367053031921387,
      "learning_rate": 6.669098816652154e-05,
      "loss": 0.7918,
      "step": 2576
    },
    {
      "epoch": 0.6204779389634624,
      "grad_norm": 2.209228277206421,
      "learning_rate": 6.661747653296328e-05,
      "loss": 0.8082,
      "step": 2577
    },
    {
      "epoch": 0.620718714259917,
      "grad_norm": 2.171247720718384,
      "learning_rate": 6.654398519605453e-05,
      "loss": 0.1968,
      "step": 2578
    },
    {
      "epoch": 0.6209594895563715,
      "grad_norm": 1.3200645446777344,
      "learning_rate": 6.647051420047811e-05,
      "loss": 0.3753,
      "step": 2579
    },
    {
      "epoch": 0.6212002648528261,
      "grad_norm": 1.0066230297088623,
      "learning_rate": 6.63970635909046e-05,
      "loss": 0.2922,
      "step": 2580
    },
    {
      "epoch": 0.6214410401492807,
      "grad_norm": 1.6534557342529297,
      "learning_rate": 6.632363341199216e-05,
      "loss": 0.1991,
      "step": 2581
    },
    {
      "epoch": 0.6216818154457353,
      "grad_norm": 0.7948190569877625,
      "learning_rate": 6.625022370838649e-05,
      "loss": 0.6887,
      "step": 2582
    },
    {
      "epoch": 0.6219225907421898,
      "grad_norm": 0.8418195843696594,
      "learning_rate": 6.617683452472084e-05,
      "loss": 0.3701,
      "step": 2583
    },
    {
      "epoch": 0.6221633660386444,
      "grad_norm": 0.7696940898895264,
      "learning_rate": 6.610346590561597e-05,
      "loss": 0.2781,
      "step": 2584
    },
    {
      "epoch": 0.622404141335099,
      "grad_norm": 4.234709739685059,
      "learning_rate": 6.603011789568021e-05,
      "loss": 0.4584,
      "step": 2585
    },
    {
      "epoch": 0.6226449166315536,
      "grad_norm": 2.2976908683776855,
      "learning_rate": 6.595679053950933e-05,
      "loss": 0.9089,
      "step": 2586
    },
    {
      "epoch": 0.6228856919280081,
      "grad_norm": 3.831660270690918,
      "learning_rate": 6.588348388168649e-05,
      "loss": 1.088,
      "step": 2587
    },
    {
      "epoch": 0.6231264672244627,
      "grad_norm": 1.276307463645935,
      "learning_rate": 6.581019796678231e-05,
      "loss": 0.5542,
      "step": 2588
    },
    {
      "epoch": 0.6233672425209174,
      "grad_norm": 1.1596672534942627,
      "learning_rate": 6.57369328393548e-05,
      "loss": 0.5181,
      "step": 2589
    },
    {
      "epoch": 0.623608017817372,
      "grad_norm": 37.87815856933594,
      "learning_rate": 6.566368854394931e-05,
      "loss": 0.9077,
      "step": 2590
    },
    {
      "epoch": 0.6238487931138266,
      "grad_norm": 2.3513474464416504,
      "learning_rate": 6.55904651250986e-05,
      "loss": 0.4156,
      "step": 2591
    },
    {
      "epoch": 0.6240895684102811,
      "grad_norm": 2.0487992763519287,
      "learning_rate": 6.551726262732253e-05,
      "loss": 0.2226,
      "step": 2592
    },
    {
      "epoch": 0.6243303437067357,
      "grad_norm": 2.3378994464874268,
      "learning_rate": 6.54440810951285e-05,
      "loss": 0.1514,
      "step": 2593
    },
    {
      "epoch": 0.6245711190031903,
      "grad_norm": 0.9720037579536438,
      "learning_rate": 6.537092057301107e-05,
      "loss": 0.4916,
      "step": 2594
    },
    {
      "epoch": 0.6248118942996449,
      "grad_norm": 9.787001609802246,
      "learning_rate": 6.529778110545191e-05,
      "loss": 0.5348,
      "step": 2595
    },
    {
      "epoch": 0.6250526695960994,
      "grad_norm": 1.4247881174087524,
      "learning_rate": 6.522466273692006e-05,
      "loss": 0.6283,
      "step": 2596
    },
    {
      "epoch": 0.625293444892554,
      "grad_norm": 0.8598988652229309,
      "learning_rate": 6.515156551187156e-05,
      "loss": 0.4231,
      "step": 2597
    },
    {
      "epoch": 0.6255342201890086,
      "grad_norm": 1.4908655881881714,
      "learning_rate": 6.507848947474976e-05,
      "loss": 0.5314,
      "step": 2598
    },
    {
      "epoch": 0.6257749954854632,
      "grad_norm": 1.3599947690963745,
      "learning_rate": 6.500543466998508e-05,
      "loss": 0.6969,
      "step": 2599
    },
    {
      "epoch": 0.6260157707819177,
      "grad_norm": 2.1686439514160156,
      "learning_rate": 6.49324011419949e-05,
      "loss": 1.0609,
      "step": 2600
    },
    {
      "epoch": 0.6262565460783723,
      "grad_norm": 2.399899482727051,
      "learning_rate": 6.48593889351839e-05,
      "loss": 0.433,
      "step": 2601
    },
    {
      "epoch": 0.6264973213748269,
      "grad_norm": 2.7841548919677734,
      "learning_rate": 6.478639809394355e-05,
      "loss": 0.9582,
      "step": 2602
    },
    {
      "epoch": 0.6267380966712816,
      "grad_norm": 1.8142119646072388,
      "learning_rate": 6.471342866265251e-05,
      "loss": 0.978,
      "step": 2603
    },
    {
      "epoch": 0.6269788719677362,
      "grad_norm": 2.9826908111572266,
      "learning_rate": 6.464048068567637e-05,
      "loss": 1.4267,
      "step": 2604
    },
    {
      "epoch": 0.6272196472641907,
      "grad_norm": 1.7013874053955078,
      "learning_rate": 6.45675542073676e-05,
      "loss": 0.7787,
      "step": 2605
    },
    {
      "epoch": 0.6274604225606453,
      "grad_norm": 2.441843032836914,
      "learning_rate": 6.44946492720657e-05,
      "loss": 0.6874,
      "step": 2606
    },
    {
      "epoch": 0.6277011978570999,
      "grad_norm": 2.108856439590454,
      "learning_rate": 6.44217659240971e-05,
      "loss": 0.1561,
      "step": 2607
    },
    {
      "epoch": 0.6279419731535545,
      "grad_norm": 2.3653995990753174,
      "learning_rate": 6.434890420777491e-05,
      "loss": 1.1851,
      "step": 2608
    },
    {
      "epoch": 0.628182748450009,
      "grad_norm": 2.6391708850860596,
      "learning_rate": 6.427606416739932e-05,
      "loss": 0.7138,
      "step": 2609
    },
    {
      "epoch": 0.6284235237464636,
      "grad_norm": 2.129570484161377,
      "learning_rate": 6.420324584725719e-05,
      "loss": 0.5445,
      "step": 2610
    },
    {
      "epoch": 0.6286642990429182,
      "grad_norm": 2.4864790439605713,
      "learning_rate": 6.413044929162221e-05,
      "loss": 0.3383,
      "step": 2611
    },
    {
      "epoch": 0.6289050743393728,
      "grad_norm": 1.6433722972869873,
      "learning_rate": 6.405767454475492e-05,
      "loss": 0.5752,
      "step": 2612
    },
    {
      "epoch": 0.6291458496358273,
      "grad_norm": 1.772709846496582,
      "learning_rate": 6.398492165090246e-05,
      "loss": 0.4523,
      "step": 2613
    },
    {
      "epoch": 0.6293866249322819,
      "grad_norm": 1.9673948287963867,
      "learning_rate": 6.391219065429882e-05,
      "loss": 0.7943,
      "step": 2614
    },
    {
      "epoch": 0.6296274002287365,
      "grad_norm": 1.1356581449508667,
      "learning_rate": 6.383948159916453e-05,
      "loss": 0.4172,
      "step": 2615
    },
    {
      "epoch": 0.6298681755251911,
      "grad_norm": 1.4601411819458008,
      "learning_rate": 6.376679452970689e-05,
      "loss": 0.6976,
      "step": 2616
    },
    {
      "epoch": 0.6301089508216458,
      "grad_norm": 1.9286946058273315,
      "learning_rate": 6.369412949011983e-05,
      "loss": 0.4763,
      "step": 2617
    },
    {
      "epoch": 0.6303497261181003,
      "grad_norm": 44.064327239990234,
      "learning_rate": 6.362148652458382e-05,
      "loss": 0.3406,
      "step": 2618
    },
    {
      "epoch": 0.6305905014145549,
      "grad_norm": 1.2986640930175781,
      "learning_rate": 6.354886567726596e-05,
      "loss": 0.3705,
      "step": 2619
    },
    {
      "epoch": 0.6308312767110095,
      "grad_norm": 2.005955219268799,
      "learning_rate": 6.347626699231995e-05,
      "loss": 0.7816,
      "step": 2620
    },
    {
      "epoch": 0.6310720520074641,
      "grad_norm": 5.238460063934326,
      "learning_rate": 6.340369051388583e-05,
      "loss": 0.8997,
      "step": 2621
    },
    {
      "epoch": 0.6313128273039186,
      "grad_norm": 2.330120801925659,
      "learning_rate": 6.33311362860904e-05,
      "loss": 1.2146,
      "step": 2622
    },
    {
      "epoch": 0.6315536026003732,
      "grad_norm": 1.597697138786316,
      "learning_rate": 6.325860435304668e-05,
      "loss": 0.5971,
      "step": 2623
    },
    {
      "epoch": 0.6317943778968278,
      "grad_norm": 3.000946521759033,
      "learning_rate": 6.318609475885427e-05,
      "loss": 1.0077,
      "step": 2624
    },
    {
      "epoch": 0.6320351531932824,
      "grad_norm": 1.6477075815200806,
      "learning_rate": 6.311360754759923e-05,
      "loss": 0.2647,
      "step": 2625
    },
    {
      "epoch": 0.6322759284897369,
      "grad_norm": 3.0683581829071045,
      "learning_rate": 6.30411427633539e-05,
      "loss": 0.5957,
      "step": 2626
    },
    {
      "epoch": 0.6325167037861915,
      "grad_norm": 2.284569025039673,
      "learning_rate": 6.296870045017704e-05,
      "loss": 0.5671,
      "step": 2627
    },
    {
      "epoch": 0.6327574790826461,
      "grad_norm": 1.2169504165649414,
      "learning_rate": 6.28962806521137e-05,
      "loss": 0.8968,
      "step": 2628
    },
    {
      "epoch": 0.6329982543791007,
      "grad_norm": 0.7323624491691589,
      "learning_rate": 6.282388341319534e-05,
      "loss": 0.2924,
      "step": 2629
    },
    {
      "epoch": 0.6332390296755553,
      "grad_norm": 1.0401545763015747,
      "learning_rate": 6.275150877743968e-05,
      "loss": 0.4025,
      "step": 2630
    },
    {
      "epoch": 0.6334798049720098,
      "grad_norm": 1.7448832988739014,
      "learning_rate": 6.267915678885054e-05,
      "loss": 0.5393,
      "step": 2631
    },
    {
      "epoch": 0.6337205802684645,
      "grad_norm": 2.505074977874756,
      "learning_rate": 6.260682749141816e-05,
      "loss": 0.5271,
      "step": 2632
    },
    {
      "epoch": 0.6339613555649191,
      "grad_norm": 1.562775731086731,
      "learning_rate": 6.253452092911893e-05,
      "loss": 0.6703,
      "step": 2633
    },
    {
      "epoch": 0.6342021308613737,
      "grad_norm": 7.656639099121094,
      "learning_rate": 6.24622371459154e-05,
      "loss": 0.6791,
      "step": 2634
    },
    {
      "epoch": 0.6344429061578282,
      "grad_norm": 1.8805732727050781,
      "learning_rate": 6.238997618575625e-05,
      "loss": 0.5853,
      "step": 2635
    },
    {
      "epoch": 0.6346836814542828,
      "grad_norm": 2.444486141204834,
      "learning_rate": 6.231773809257631e-05,
      "loss": 0.463,
      "step": 2636
    },
    {
      "epoch": 0.6349244567507374,
      "grad_norm": 2.5845704078674316,
      "learning_rate": 6.224552291029648e-05,
      "loss": 1.1734,
      "step": 2637
    },
    {
      "epoch": 0.635165232047192,
      "grad_norm": 2.266542434692383,
      "learning_rate": 6.217333068282383e-05,
      "loss": 0.6734,
      "step": 2638
    },
    {
      "epoch": 0.6354060073436465,
      "grad_norm": 1.0296664237976074,
      "learning_rate": 6.210116145405132e-05,
      "loss": 0.5709,
      "step": 2639
    },
    {
      "epoch": 0.6356467826401011,
      "grad_norm": 1.0958337783813477,
      "learning_rate": 6.202901526785806e-05,
      "loss": 0.4469,
      "step": 2640
    },
    {
      "epoch": 0.6358875579365557,
      "grad_norm": 4.823302745819092,
      "learning_rate": 6.195689216810903e-05,
      "loss": 0.5806,
      "step": 2641
    },
    {
      "epoch": 0.6361283332330103,
      "grad_norm": 2.3272790908813477,
      "learning_rate": 6.188479219865529e-05,
      "loss": 0.7147,
      "step": 2642
    },
    {
      "epoch": 0.6363691085294648,
      "grad_norm": 1.8934577703475952,
      "learning_rate": 6.181271540333379e-05,
      "loss": 0.4858,
      "step": 2643
    },
    {
      "epoch": 0.6366098838259194,
      "grad_norm": 3.648973226547241,
      "learning_rate": 6.174066182596734e-05,
      "loss": 0.6624,
      "step": 2644
    },
    {
      "epoch": 0.636850659122374,
      "grad_norm": 1.5058507919311523,
      "learning_rate": 6.166863151036468e-05,
      "loss": 0.4902,
      "step": 2645
    },
    {
      "epoch": 0.6370914344188287,
      "grad_norm": 1.450817584991455,
      "learning_rate": 6.159662450032046e-05,
      "loss": 0.5773,
      "step": 2646
    },
    {
      "epoch": 0.6373322097152833,
      "grad_norm": 1.6548353433609009,
      "learning_rate": 6.152464083961506e-05,
      "loss": 0.7833,
      "step": 2647
    },
    {
      "epoch": 0.6375729850117378,
      "grad_norm": 2.030325174331665,
      "learning_rate": 6.145268057201473e-05,
      "loss": 0.3633,
      "step": 2648
    },
    {
      "epoch": 0.6378137603081924,
      "grad_norm": 0.9832929968833923,
      "learning_rate": 6.138074374127141e-05,
      "loss": 0.8045,
      "step": 2649
    },
    {
      "epoch": 0.638054535604647,
      "grad_norm": 3.2297143936157227,
      "learning_rate": 6.130883039112292e-05,
      "loss": 0.8928,
      "step": 2650
    },
    {
      "epoch": 0.6382953109011016,
      "grad_norm": 1.0606904029846191,
      "learning_rate": 6.123694056529277e-05,
      "loss": 0.2497,
      "step": 2651
    },
    {
      "epoch": 0.6385360861975561,
      "grad_norm": 1.3311941623687744,
      "learning_rate": 6.116507430749005e-05,
      "loss": 1.0808,
      "step": 2652
    },
    {
      "epoch": 0.6387768614940107,
      "grad_norm": 1.9613722562789917,
      "learning_rate": 6.109323166140968e-05,
      "loss": 1.0504,
      "step": 2653
    },
    {
      "epoch": 0.6390176367904653,
      "grad_norm": 1.6116615533828735,
      "learning_rate": 6.102141267073207e-05,
      "loss": 0.5613,
      "step": 2654
    },
    {
      "epoch": 0.6392584120869199,
      "grad_norm": 1.1715492010116577,
      "learning_rate": 6.094961737912339e-05,
      "loss": 0.3594,
      "step": 2655
    },
    {
      "epoch": 0.6394991873833744,
      "grad_norm": 1.0914250612258911,
      "learning_rate": 6.087784583023535e-05,
      "loss": 0.4884,
      "step": 2656
    },
    {
      "epoch": 0.639739962679829,
      "grad_norm": 3.499825954437256,
      "learning_rate": 6.080609806770516e-05,
      "loss": 1.0449,
      "step": 2657
    },
    {
      "epoch": 0.6399807379762836,
      "grad_norm": 1.0770723819732666,
      "learning_rate": 6.073437413515566e-05,
      "loss": 0.5479,
      "step": 2658
    },
    {
      "epoch": 0.6402215132727382,
      "grad_norm": 1.7570230960845947,
      "learning_rate": 6.0662674076195194e-05,
      "loss": 0.3681,
      "step": 2659
    },
    {
      "epoch": 0.6404622885691929,
      "grad_norm": 1.4941871166229248,
      "learning_rate": 6.059099793441746e-05,
      "loss": 0.4912,
      "step": 2660
    },
    {
      "epoch": 0.6407030638656473,
      "grad_norm": 2.4457526206970215,
      "learning_rate": 6.05193457534018e-05,
      "loss": 0.7875,
      "step": 2661
    },
    {
      "epoch": 0.640943839162102,
      "grad_norm": 0.5564669966697693,
      "learning_rate": 6.044771757671286e-05,
      "loss": 0.4049,
      "step": 2662
    },
    {
      "epoch": 0.6411846144585566,
      "grad_norm": 2.4950759410858154,
      "learning_rate": 6.037611344790073e-05,
      "loss": 0.7389,
      "step": 2663
    },
    {
      "epoch": 0.6414253897550112,
      "grad_norm": 4.833079814910889,
      "learning_rate": 6.030453341050093e-05,
      "loss": 0.9588,
      "step": 2664
    },
    {
      "epoch": 0.6416661650514657,
      "grad_norm": 5.021981716156006,
      "learning_rate": 6.023297750803423e-05,
      "loss": 0.9724,
      "step": 2665
    },
    {
      "epoch": 0.6419069403479203,
      "grad_norm": 1.1995772123336792,
      "learning_rate": 6.0161445784006845e-05,
      "loss": 0.6185,
      "step": 2666
    },
    {
      "epoch": 0.6421477156443749,
      "grad_norm": 1.3051928281784058,
      "learning_rate": 6.008993828191013e-05,
      "loss": 0.382,
      "step": 2667
    },
    {
      "epoch": 0.6423884909408295,
      "grad_norm": 3.9973623752593994,
      "learning_rate": 6.001845504522086e-05,
      "loss": 0.4625,
      "step": 2668
    },
    {
      "epoch": 0.642629266237284,
      "grad_norm": 1.3775721788406372,
      "learning_rate": 5.994699611740102e-05,
      "loss": 0.5969,
      "step": 2669
    },
    {
      "epoch": 0.6428700415337386,
      "grad_norm": 1.1527438163757324,
      "learning_rate": 5.987556154189777e-05,
      "loss": 0.6209,
      "step": 2670
    },
    {
      "epoch": 0.6431108168301932,
      "grad_norm": 1.8281904458999634,
      "learning_rate": 5.98041513621435e-05,
      "loss": 0.636,
      "step": 2671
    },
    {
      "epoch": 0.6433515921266478,
      "grad_norm": 2.8507192134857178,
      "learning_rate": 5.973276562155581e-05,
      "loss": 0.8928,
      "step": 2672
    },
    {
      "epoch": 0.6435923674231024,
      "grad_norm": 2.5010929107666016,
      "learning_rate": 5.9661404363537287e-05,
      "loss": 1.1794,
      "step": 2673
    },
    {
      "epoch": 0.6438331427195569,
      "grad_norm": 2.6095519065856934,
      "learning_rate": 5.959006763147584e-05,
      "loss": 1.2778,
      "step": 2674
    },
    {
      "epoch": 0.6440739180160115,
      "grad_norm": 1.2294814586639404,
      "learning_rate": 5.951875546874428e-05,
      "loss": 0.3187,
      "step": 2675
    },
    {
      "epoch": 0.6443146933124662,
      "grad_norm": 4.2607316970825195,
      "learning_rate": 5.9447467918700614e-05,
      "loss": 0.4591,
      "step": 2676
    },
    {
      "epoch": 0.6445554686089208,
      "grad_norm": 1.8376699686050415,
      "learning_rate": 5.9376205024687835e-05,
      "loss": 0.6134,
      "step": 2677
    },
    {
      "epoch": 0.6447962439053753,
      "grad_norm": 1.8157228231430054,
      "learning_rate": 5.9304966830033907e-05,
      "loss": 0.6103,
      "step": 2678
    },
    {
      "epoch": 0.6450370192018299,
      "grad_norm": 1.8555665016174316,
      "learning_rate": 5.923375337805186e-05,
      "loss": 0.5274,
      "step": 2679
    },
    {
      "epoch": 0.6452777944982845,
      "grad_norm": 1.7795042991638184,
      "learning_rate": 5.916256471203958e-05,
      "loss": 0.6227,
      "step": 2680
    },
    {
      "epoch": 0.6455185697947391,
      "grad_norm": 2.5843842029571533,
      "learning_rate": 5.909140087527996e-05,
      "loss": 1.0466,
      "step": 2681
    },
    {
      "epoch": 0.6457593450911936,
      "grad_norm": 3.1203784942626953,
      "learning_rate": 5.9020261911040796e-05,
      "loss": 0.874,
      "step": 2682
    },
    {
      "epoch": 0.6460001203876482,
      "grad_norm": 2.341921091079712,
      "learning_rate": 5.89491478625747e-05,
      "loss": 0.3058,
      "step": 2683
    },
    {
      "epoch": 0.6462408956841028,
      "grad_norm": 3.351788282394409,
      "learning_rate": 5.8878058773119185e-05,
      "loss": 0.7692,
      "step": 2684
    },
    {
      "epoch": 0.6464816709805574,
      "grad_norm": 3.12622332572937,
      "learning_rate": 5.880699468589661e-05,
      "loss": 1.2673,
      "step": 2685
    },
    {
      "epoch": 0.646722446277012,
      "grad_norm": 1.2435944080352783,
      "learning_rate": 5.8735955644114046e-05,
      "loss": 0.7739,
      "step": 2686
    },
    {
      "epoch": 0.6469632215734665,
      "grad_norm": 0.624167799949646,
      "learning_rate": 5.866494169096348e-05,
      "loss": 0.7543,
      "step": 2687
    },
    {
      "epoch": 0.6472039968699211,
      "grad_norm": 1.9075957536697388,
      "learning_rate": 5.8593952869621436e-05,
      "loss": 0.4008,
      "step": 2688
    },
    {
      "epoch": 0.6474447721663757,
      "grad_norm": 2.9206345081329346,
      "learning_rate": 5.852298922324935e-05,
      "loss": 1.4938,
      "step": 2689
    },
    {
      "epoch": 0.6476855474628304,
      "grad_norm": 2.629241704940796,
      "learning_rate": 5.8452050794993275e-05,
      "loss": 0.55,
      "step": 2690
    },
    {
      "epoch": 0.6479263227592849,
      "grad_norm": 1.5018374919891357,
      "learning_rate": 5.8381137627983915e-05,
      "loss": 0.6912,
      "step": 2691
    },
    {
      "epoch": 0.6481670980557395,
      "grad_norm": 4.23275089263916,
      "learning_rate": 5.831024976533668e-05,
      "loss": 0.6019,
      "step": 2692
    },
    {
      "epoch": 0.6484078733521941,
      "grad_norm": 1.633299708366394,
      "learning_rate": 5.823938725015148e-05,
      "loss": 0.5051,
      "step": 2693
    },
    {
      "epoch": 0.6486486486486487,
      "grad_norm": 7.270058631896973,
      "learning_rate": 5.816855012551291e-05,
      "loss": 0.6392,
      "step": 2694
    },
    {
      "epoch": 0.6488894239451032,
      "grad_norm": 1.3112475872039795,
      "learning_rate": 5.809773843449011e-05,
      "loss": 0.4469,
      "step": 2695
    },
    {
      "epoch": 0.6491301992415578,
      "grad_norm": 1.903131365776062,
      "learning_rate": 5.802695222013676e-05,
      "loss": 0.381,
      "step": 2696
    },
    {
      "epoch": 0.6493709745380124,
      "grad_norm": 2.8745779991149902,
      "learning_rate": 5.795619152549102e-05,
      "loss": 0.8397,
      "step": 2697
    },
    {
      "epoch": 0.649611749834467,
      "grad_norm": 5.189337730407715,
      "learning_rate": 5.78854563935756e-05,
      "loss": 0.4703,
      "step": 2698
    },
    {
      "epoch": 0.6498525251309216,
      "grad_norm": 1.0739030838012695,
      "learning_rate": 5.781474686739754e-05,
      "loss": 0.6484,
      "step": 2699
    },
    {
      "epoch": 0.6500933004273761,
      "grad_norm": 1.2836296558380127,
      "learning_rate": 5.7744062989948464e-05,
      "loss": 0.5722,
      "step": 2700
    },
    {
      "epoch": 0.6503340757238307,
      "grad_norm": 7.686650276184082,
      "learning_rate": 5.767340480420426e-05,
      "loss": 0.4963,
      "step": 2701
    },
    {
      "epoch": 0.6505748510202853,
      "grad_norm": 2.3548178672790527,
      "learning_rate": 5.760277235312529e-05,
      "loss": 0.757,
      "step": 2702
    },
    {
      "epoch": 0.65081562631674,
      "grad_norm": 1.2598122358322144,
      "learning_rate": 5.753216567965626e-05,
      "loss": 0.397,
      "step": 2703
    },
    {
      "epoch": 0.6510564016131944,
      "grad_norm": 2.0660560131073,
      "learning_rate": 5.746158482672617e-05,
      "loss": 0.8815,
      "step": 2704
    },
    {
      "epoch": 0.6512971769096491,
      "grad_norm": 1.225748896598816,
      "learning_rate": 5.73910298372484e-05,
      "loss": 0.8635,
      "step": 2705
    },
    {
      "epoch": 0.6515379522061037,
      "grad_norm": 1.400314211845398,
      "learning_rate": 5.7320500754120434e-05,
      "loss": 0.5746,
      "step": 2706
    },
    {
      "epoch": 0.6517787275025583,
      "grad_norm": 2.5364537239074707,
      "learning_rate": 5.724999762022416e-05,
      "loss": 0.657,
      "step": 2707
    },
    {
      "epoch": 0.6520195027990128,
      "grad_norm": 2.604710340499878,
      "learning_rate": 5.717952047842571e-05,
      "loss": 0.564,
      "step": 2708
    },
    {
      "epoch": 0.6522602780954674,
      "grad_norm": 1.6602225303649902,
      "learning_rate": 5.710906937157523e-05,
      "loss": 0.7511,
      "step": 2709
    },
    {
      "epoch": 0.652501053391922,
      "grad_norm": 1.7182340621948242,
      "learning_rate": 5.7038644342507205e-05,
      "loss": 0.3137,
      "step": 2710
    },
    {
      "epoch": 0.6527418286883766,
      "grad_norm": 1.6290512084960938,
      "learning_rate": 5.6968245434040225e-05,
      "loss": 0.8388,
      "step": 2711
    },
    {
      "epoch": 0.6529826039848312,
      "grad_norm": 1.5818265676498413,
      "learning_rate": 5.689787268897697e-05,
      "loss": 0.3487,
      "step": 2712
    },
    {
      "epoch": 0.6532233792812857,
      "grad_norm": 3.120393753051758,
      "learning_rate": 5.682752615010427e-05,
      "loss": 0.9698,
      "step": 2713
    },
    {
      "epoch": 0.6534641545777403,
      "grad_norm": 1.175986886024475,
      "learning_rate": 5.6757205860192905e-05,
      "loss": 0.44,
      "step": 2714
    },
    {
      "epoch": 0.6537049298741949,
      "grad_norm": 1.2981712818145752,
      "learning_rate": 5.6686911861997795e-05,
      "loss": 0.2365,
      "step": 2715
    },
    {
      "epoch": 0.6539457051706495,
      "grad_norm": 1.5336500406265259,
      "learning_rate": 5.66166441982579e-05,
      "loss": 0.9387,
      "step": 2716
    },
    {
      "epoch": 0.654186480467104,
      "grad_norm": 1.408539056777954,
      "learning_rate": 5.654640291169604e-05,
      "loss": 0.6778,
      "step": 2717
    },
    {
      "epoch": 0.6544272557635586,
      "grad_norm": 0.782248854637146,
      "learning_rate": 5.647618804501915e-05,
      "loss": 0.803,
      "step": 2718
    },
    {
      "epoch": 0.6546680310600133,
      "grad_norm": 2.8610918521881104,
      "learning_rate": 5.640599964091791e-05,
      "loss": 0.828,
      "step": 2719
    },
    {
      "epoch": 0.6549088063564679,
      "grad_norm": 2.616344690322876,
      "learning_rate": 5.6335837742067145e-05,
      "loss": 0.8763,
      "step": 2720
    },
    {
      "epoch": 0.6551495816529224,
      "grad_norm": 2.1979098320007324,
      "learning_rate": 5.6265702391125444e-05,
      "loss": 0.383,
      "step": 2721
    },
    {
      "epoch": 0.655390356949377,
      "grad_norm": 2.1095833778381348,
      "learning_rate": 5.6195593630735185e-05,
      "loss": 0.6834,
      "step": 2722
    },
    {
      "epoch": 0.6556311322458316,
      "grad_norm": 3.0333285331726074,
      "learning_rate": 5.61255115035227e-05,
      "loss": 0.4893,
      "step": 2723
    },
    {
      "epoch": 0.6558719075422862,
      "grad_norm": 3.033856153488159,
      "learning_rate": 5.60554560520981e-05,
      "loss": 0.6269,
      "step": 2724
    },
    {
      "epoch": 0.6561126828387407,
      "grad_norm": 1.2351047992706299,
      "learning_rate": 5.5985427319055195e-05,
      "loss": 0.2402,
      "step": 2725
    },
    {
      "epoch": 0.6563534581351953,
      "grad_norm": 0.572399914264679,
      "learning_rate": 5.5915425346971683e-05,
      "loss": 0.4192,
      "step": 2726
    },
    {
      "epoch": 0.6565942334316499,
      "grad_norm": 16.815656661987305,
      "learning_rate": 5.584545017840885e-05,
      "loss": 0.7806,
      "step": 2727
    },
    {
      "epoch": 0.6568350087281045,
      "grad_norm": 1.1614093780517578,
      "learning_rate": 5.577550185591174e-05,
      "loss": 0.7885,
      "step": 2728
    },
    {
      "epoch": 0.6570757840245591,
      "grad_norm": 0.8286291360855103,
      "learning_rate": 5.570558042200923e-05,
      "loss": 0.3164,
      "step": 2729
    },
    {
      "epoch": 0.6573165593210136,
      "grad_norm": 1.0357860326766968,
      "learning_rate": 5.563568591921358e-05,
      "loss": 0.2793,
      "step": 2730
    },
    {
      "epoch": 0.6575573346174682,
      "grad_norm": 1.7955443859100342,
      "learning_rate": 5.5565818390020886e-05,
      "loss": 0.4327,
      "step": 2731
    },
    {
      "epoch": 0.6577981099139228,
      "grad_norm": 25.76378631591797,
      "learning_rate": 5.5495977876910675e-05,
      "loss": 1.1611,
      "step": 2732
    },
    {
      "epoch": 0.6580388852103775,
      "grad_norm": 1.9328374862670898,
      "learning_rate": 5.542616442234618e-05,
      "loss": 0.4974,
      "step": 2733
    },
    {
      "epoch": 0.658279660506832,
      "grad_norm": 1.2386356592178345,
      "learning_rate": 5.535637806877419e-05,
      "loss": 0.8787,
      "step": 2734
    },
    {
      "epoch": 0.6585204358032866,
      "grad_norm": 3.446852684020996,
      "learning_rate": 5.5286618858624874e-05,
      "loss": 0.4847,
      "step": 2735
    },
    {
      "epoch": 0.6587612110997412,
      "grad_norm": 1.4139548540115356,
      "learning_rate": 5.5216886834312e-05,
      "loss": 0.6242,
      "step": 2736
    },
    {
      "epoch": 0.6590019863961958,
      "grad_norm": 0.9220794439315796,
      "learning_rate": 5.51471820382329e-05,
      "loss": 0.3538,
      "step": 2737
    },
    {
      "epoch": 0.6592427616926503,
      "grad_norm": 0.6702575087547302,
      "learning_rate": 5.507750451276814e-05,
      "loss": 0.4045,
      "step": 2738
    },
    {
      "epoch": 0.6594835369891049,
      "grad_norm": 0.9865245223045349,
      "learning_rate": 5.500785430028188e-05,
      "loss": 0.4853,
      "step": 2739
    },
    {
      "epoch": 0.6597243122855595,
      "grad_norm": 0.48536545038223267,
      "learning_rate": 5.4938231443121546e-05,
      "loss": 0.2637,
      "step": 2740
    },
    {
      "epoch": 0.6599650875820141,
      "grad_norm": 10.391051292419434,
      "learning_rate": 5.4868635983618014e-05,
      "loss": 0.4469,
      "step": 2741
    },
    {
      "epoch": 0.6602058628784687,
      "grad_norm": 1.1766020059585571,
      "learning_rate": 5.4799067964085526e-05,
      "loss": 0.3332,
      "step": 2742
    },
    {
      "epoch": 0.6604466381749232,
      "grad_norm": 1.8510931730270386,
      "learning_rate": 5.4729527426821514e-05,
      "loss": 0.6154,
      "step": 2743
    },
    {
      "epoch": 0.6606874134713778,
      "grad_norm": 1.353156328201294,
      "learning_rate": 5.466001441410682e-05,
      "loss": 0.3427,
      "step": 2744
    },
    {
      "epoch": 0.6609281887678324,
      "grad_norm": 1.8044596910476685,
      "learning_rate": 5.459052896820551e-05,
      "loss": 0.2089,
      "step": 2745
    },
    {
      "epoch": 0.661168964064287,
      "grad_norm": 2.0654067993164062,
      "learning_rate": 5.4521071131364906e-05,
      "loss": 0.781,
      "step": 2746
    },
    {
      "epoch": 0.6614097393607415,
      "grad_norm": 1.7442854642868042,
      "learning_rate": 5.4451640945815564e-05,
      "loss": 0.3432,
      "step": 2747
    },
    {
      "epoch": 0.6616505146571962,
      "grad_norm": 1.4665995836257935,
      "learning_rate": 5.438223845377111e-05,
      "loss": 0.5632,
      "step": 2748
    },
    {
      "epoch": 0.6618912899536508,
      "grad_norm": 3.585455894470215,
      "learning_rate": 5.431286369742844e-05,
      "loss": 1.3084,
      "step": 2749
    },
    {
      "epoch": 0.6621320652501054,
      "grad_norm": 3.5575718879699707,
      "learning_rate": 5.424351671896761e-05,
      "loss": 0.3184,
      "step": 2750
    },
    {
      "epoch": 0.6623728405465599,
      "grad_norm": 17.34886932373047,
      "learning_rate": 5.4174197560551685e-05,
      "loss": 1.1173,
      "step": 2751
    },
    {
      "epoch": 0.6626136158430145,
      "grad_norm": 2.6678974628448486,
      "learning_rate": 5.4104906264326884e-05,
      "loss": 0.6505,
      "step": 2752
    },
    {
      "epoch": 0.6628543911394691,
      "grad_norm": 4.722652912139893,
      "learning_rate": 5.403564287242248e-05,
      "loss": 1.3932,
      "step": 2753
    },
    {
      "epoch": 0.6630951664359237,
      "grad_norm": 1.4697628021240234,
      "learning_rate": 5.396640742695076e-05,
      "loss": 0.5875,
      "step": 2754
    },
    {
      "epoch": 0.6633359417323783,
      "grad_norm": 2.3093106746673584,
      "learning_rate": 5.389719997000708e-05,
      "loss": 0.8362,
      "step": 2755
    },
    {
      "epoch": 0.6635767170288328,
      "grad_norm": 1.6689960956573486,
      "learning_rate": 5.382802054366966e-05,
      "loss": 0.3395,
      "step": 2756
    },
    {
      "epoch": 0.6638174923252874,
      "grad_norm": 1.0874135494232178,
      "learning_rate": 5.37588691899998e-05,
      "loss": 0.757,
      "step": 2757
    },
    {
      "epoch": 0.664058267621742,
      "grad_norm": 1.348419427871704,
      "learning_rate": 5.3689745951041626e-05,
      "loss": 0.6488,
      "step": 2758
    },
    {
      "epoch": 0.6642990429181966,
      "grad_norm": 2.6158974170684814,
      "learning_rate": 5.3620650868822256e-05,
      "loss": 0.251,
      "step": 2759
    },
    {
      "epoch": 0.6645398182146511,
      "grad_norm": 1.7711026668548584,
      "learning_rate": 5.3551583985351636e-05,
      "loss": 0.607,
      "step": 2760
    },
    {
      "epoch": 0.6647805935111057,
      "grad_norm": 1.2356005907058716,
      "learning_rate": 5.348254534262262e-05,
      "loss": 0.2628,
      "step": 2761
    },
    {
      "epoch": 0.6650213688075604,
      "grad_norm": 4.986912727355957,
      "learning_rate": 5.3413534982610836e-05,
      "loss": 0.4364,
      "step": 2762
    },
    {
      "epoch": 0.665262144104015,
      "grad_norm": 2.007636785507202,
      "learning_rate": 5.3344552947274776e-05,
      "loss": 0.6385,
      "step": 2763
    },
    {
      "epoch": 0.6655029194004695,
      "grad_norm": 1.343567132949829,
      "learning_rate": 5.32755992785556e-05,
      "loss": 0.4478,
      "step": 2764
    },
    {
      "epoch": 0.6657436946969241,
      "grad_norm": 0.5190923810005188,
      "learning_rate": 5.320667401837738e-05,
      "loss": 0.5346,
      "step": 2765
    },
    {
      "epoch": 0.6659844699933787,
      "grad_norm": 1.11277437210083,
      "learning_rate": 5.313777720864674e-05,
      "loss": 0.2615,
      "step": 2766
    },
    {
      "epoch": 0.6662252452898333,
      "grad_norm": 1.8751391172409058,
      "learning_rate": 5.3068908891253134e-05,
      "loss": 0.632,
      "step": 2767
    },
    {
      "epoch": 0.6664660205862879,
      "grad_norm": 1.2252461910247803,
      "learning_rate": 5.3000069108068674e-05,
      "loss": 0.2374,
      "step": 2768
    },
    {
      "epoch": 0.6667067958827424,
      "grad_norm": 0.743096113204956,
      "learning_rate": 5.293125790094809e-05,
      "loss": 0.4381,
      "step": 2769
    },
    {
      "epoch": 0.666947571179197,
      "grad_norm": 1.818084478378296,
      "learning_rate": 5.286247531172877e-05,
      "loss": 0.3025,
      "step": 2770
    },
    {
      "epoch": 0.6671883464756516,
      "grad_norm": 1.5191317796707153,
      "learning_rate": 5.2793721382230624e-05,
      "loss": 0.8121,
      "step": 2771
    },
    {
      "epoch": 0.6674291217721062,
      "grad_norm": 1.5100692510604858,
      "learning_rate": 5.272499615425624e-05,
      "loss": 0.2726,
      "step": 2772
    },
    {
      "epoch": 0.6676698970685607,
      "grad_norm": 3.111989974975586,
      "learning_rate": 5.2656299669590756e-05,
      "loss": 0.9423,
      "step": 2773
    },
    {
      "epoch": 0.6679106723650153,
      "grad_norm": 2.021303415298462,
      "learning_rate": 5.2587631970001697e-05,
      "loss": 0.6443,
      "step": 2774
    },
    {
      "epoch": 0.6681514476614699,
      "grad_norm": 3.0573856830596924,
      "learning_rate": 5.251899309723921e-05,
      "loss": 0.7831,
      "step": 2775
    },
    {
      "epoch": 0.6683922229579246,
      "grad_norm": 3.3146016597747803,
      "learning_rate": 5.2450383093035905e-05,
      "loss": 0.4461,
      "step": 2776
    },
    {
      "epoch": 0.668632998254379,
      "grad_norm": 2.406733989715576,
      "learning_rate": 5.2381801999106806e-05,
      "loss": 0.9433,
      "step": 2777
    },
    {
      "epoch": 0.6688737735508337,
      "grad_norm": 1.2738914489746094,
      "learning_rate": 5.2313249857149414e-05,
      "loss": 0.5913,
      "step": 2778
    },
    {
      "epoch": 0.6691145488472883,
      "grad_norm": 1.1874566078186035,
      "learning_rate": 5.2244726708843516e-05,
      "loss": 0.6504,
      "step": 2779
    },
    {
      "epoch": 0.6693553241437429,
      "grad_norm": 2.1480422019958496,
      "learning_rate": 5.217623259585136e-05,
      "loss": 0.9203,
      "step": 2780
    },
    {
      "epoch": 0.6695960994401975,
      "grad_norm": 3.107542037963867,
      "learning_rate": 5.2107767559817586e-05,
      "loss": 0.4462,
      "step": 2781
    },
    {
      "epoch": 0.669836874736652,
      "grad_norm": 1.504056453704834,
      "learning_rate": 5.2039331642369004e-05,
      "loss": 0.6573,
      "step": 2782
    },
    {
      "epoch": 0.6700776500331066,
      "grad_norm": 0.6759874224662781,
      "learning_rate": 5.197092488511482e-05,
      "loss": 0.284,
      "step": 2783
    },
    {
      "epoch": 0.6703184253295612,
      "grad_norm": 0.775605320930481,
      "learning_rate": 5.1902547329646536e-05,
      "loss": 0.5416,
      "step": 2784
    },
    {
      "epoch": 0.6705592006260158,
      "grad_norm": 0.199252650141716,
      "learning_rate": 5.1834199017537834e-05,
      "loss": 0.4752,
      "step": 2785
    },
    {
      "epoch": 0.6707999759224703,
      "grad_norm": 3.240574359893799,
      "learning_rate": 5.176587999034468e-05,
      "loss": 0.6012,
      "step": 2786
    },
    {
      "epoch": 0.6710407512189249,
      "grad_norm": 3.1757545471191406,
      "learning_rate": 5.1697590289605136e-05,
      "loss": 1.1006,
      "step": 2787
    },
    {
      "epoch": 0.6712815265153795,
      "grad_norm": 1.6893806457519531,
      "learning_rate": 5.162932995683951e-05,
      "loss": 0.7829,
      "step": 2788
    },
    {
      "epoch": 0.6715223018118341,
      "grad_norm": 1.8709641695022583,
      "learning_rate": 5.156109903355031e-05,
      "loss": 0.9529,
      "step": 2789
    },
    {
      "epoch": 0.6717630771082886,
      "grad_norm": 1.4354405403137207,
      "learning_rate": 5.1492897561221976e-05,
      "loss": 0.3602,
      "step": 2790
    },
    {
      "epoch": 0.6720038524047433,
      "grad_norm": 1.0905989408493042,
      "learning_rate": 5.142472558132125e-05,
      "loss": 0.6715,
      "step": 2791
    },
    {
      "epoch": 0.6722446277011979,
      "grad_norm": 0.8870560526847839,
      "learning_rate": 5.1356583135296744e-05,
      "loss": 0.3977,
      "step": 2792
    },
    {
      "epoch": 0.6724854029976525,
      "grad_norm": 2.73464298248291,
      "learning_rate": 5.1288470264579327e-05,
      "loss": 0.8096,
      "step": 2793
    },
    {
      "epoch": 0.6727261782941071,
      "grad_norm": 3.191851854324341,
      "learning_rate": 5.122038701058176e-05,
      "loss": 0.8286,
      "step": 2794
    },
    {
      "epoch": 0.6729669535905616,
      "grad_norm": 0.7643956542015076,
      "learning_rate": 5.115233341469877e-05,
      "loss": 0.3281,
      "step": 2795
    },
    {
      "epoch": 0.6732077288870162,
      "grad_norm": 4.106560230255127,
      "learning_rate": 5.108430951830716e-05,
      "loss": 0.7662,
      "step": 2796
    },
    {
      "epoch": 0.6734485041834708,
      "grad_norm": 1.1065987348556519,
      "learning_rate": 5.101631536276552e-05,
      "loss": 0.9248,
      "step": 2797
    },
    {
      "epoch": 0.6736892794799254,
      "grad_norm": 2.439703941345215,
      "learning_rate": 5.094835098941451e-05,
      "loss": 1.0613,
      "step": 2798
    },
    {
      "epoch": 0.6739300547763799,
      "grad_norm": 1.7999013662338257,
      "learning_rate": 5.088041643957664e-05,
      "loss": 0.4121,
      "step": 2799
    },
    {
      "epoch": 0.6741708300728345,
      "grad_norm": 2.1599056720733643,
      "learning_rate": 5.081251175455617e-05,
      "loss": 0.3685,
      "step": 2800
    },
    {
      "epoch": 0.6744116053692891,
      "grad_norm": 3.4274089336395264,
      "learning_rate": 5.0744636975639424e-05,
      "loss": 0.3434,
      "step": 2801
    },
    {
      "epoch": 0.6746523806657437,
      "grad_norm": 1.6600308418273926,
      "learning_rate": 5.06767921440944e-05,
      "loss": 0.402,
      "step": 2802
    },
    {
      "epoch": 0.6748931559621982,
      "grad_norm": 1.426318883895874,
      "learning_rate": 5.0608977301170845e-05,
      "loss": 0.2329,
      "step": 2803
    },
    {
      "epoch": 0.6751339312586528,
      "grad_norm": 1.2392008304595947,
      "learning_rate": 5.05411924881004e-05,
      "loss": 0.5493,
      "step": 2804
    },
    {
      "epoch": 0.6753747065551075,
      "grad_norm": 0.7665623426437378,
      "learning_rate": 5.047343774609632e-05,
      "loss": 0.3614,
      "step": 2805
    },
    {
      "epoch": 0.6756154818515621,
      "grad_norm": 2.566469192504883,
      "learning_rate": 5.040571311635367e-05,
      "loss": 0.7117,
      "step": 2806
    },
    {
      "epoch": 0.6758562571480166,
      "grad_norm": 1.9703136682510376,
      "learning_rate": 5.033801864004923e-05,
      "loss": 0.7024,
      "step": 2807
    },
    {
      "epoch": 0.6760970324444712,
      "grad_norm": 0.3591214716434479,
      "learning_rate": 5.0270354358341307e-05,
      "loss": 0.1396,
      "step": 2808
    },
    {
      "epoch": 0.6763378077409258,
      "grad_norm": 3.92927622795105,
      "learning_rate": 5.020272031236996e-05,
      "loss": 0.7112,
      "step": 2809
    },
    {
      "epoch": 0.6765785830373804,
      "grad_norm": 10.165884971618652,
      "learning_rate": 5.013511654325689e-05,
      "loss": 0.3902,
      "step": 2810
    },
    {
      "epoch": 0.676819358333835,
      "grad_norm": 4.904458045959473,
      "learning_rate": 5.0067543092105284e-05,
      "loss": 0.9305,
      "step": 2811
    },
    {
      "epoch": 0.6770601336302895,
      "grad_norm": 3.849393844604492,
      "learning_rate": 5.000000000000002e-05,
      "loss": 0.9527,
      "step": 2812
    },
    {
      "epoch": 0.6773009089267441,
      "grad_norm": 1.6533546447753906,
      "learning_rate": 4.993248730800737e-05,
      "loss": 0.2365,
      "step": 2813
    },
    {
      "epoch": 0.6775416842231987,
      "grad_norm": 2.609186887741089,
      "learning_rate": 4.986500505717524e-05,
      "loss": 0.8003,
      "step": 2814
    },
    {
      "epoch": 0.6777824595196533,
      "grad_norm": 4.835500240325928,
      "learning_rate": 4.9797553288533036e-05,
      "loss": 1.3338,
      "step": 2815
    },
    {
      "epoch": 0.6780232348161078,
      "grad_norm": 4.737950325012207,
      "learning_rate": 4.9730132043091494e-05,
      "loss": 0.9067,
      "step": 2816
    },
    {
      "epoch": 0.6782640101125624,
      "grad_norm": 0.9720152616500854,
      "learning_rate": 4.9662741361842934e-05,
      "loss": 0.5726,
      "step": 2817
    },
    {
      "epoch": 0.678504785409017,
      "grad_norm": 2.1223130226135254,
      "learning_rate": 4.9595381285761036e-05,
      "loss": 0.2327,
      "step": 2818
    },
    {
      "epoch": 0.6787455607054717,
      "grad_norm": 1.109189748764038,
      "learning_rate": 4.9528051855800874e-05,
      "loss": 0.8678,
      "step": 2819
    },
    {
      "epoch": 0.6789863360019261,
      "grad_norm": 1.6633493900299072,
      "learning_rate": 4.946075311289894e-05,
      "loss": 0.1905,
      "step": 2820
    },
    {
      "epoch": 0.6792271112983808,
      "grad_norm": 2.95998477935791,
      "learning_rate": 4.939348509797293e-05,
      "loss": 0.6686,
      "step": 2821
    },
    {
      "epoch": 0.6794678865948354,
      "grad_norm": 1.455351710319519,
      "learning_rate": 4.932624785192206e-05,
      "loss": 0.3546,
      "step": 2822
    },
    {
      "epoch": 0.67970866189129,
      "grad_norm": 3.9884376525878906,
      "learning_rate": 4.9259041415626615e-05,
      "loss": 1.0358,
      "step": 2823
    },
    {
      "epoch": 0.6799494371877446,
      "grad_norm": 3.427591562271118,
      "learning_rate": 4.91918658299483e-05,
      "loss": 0.9694,
      "step": 2824
    },
    {
      "epoch": 0.6801902124841991,
      "grad_norm": 0.9723843336105347,
      "learning_rate": 4.912472113573005e-05,
      "loss": 0.0587,
      "step": 2825
    },
    {
      "epoch": 0.6804309877806537,
      "grad_norm": 2.2842836380004883,
      "learning_rate": 4.905760737379597e-05,
      "loss": 0.9764,
      "step": 2826
    },
    {
      "epoch": 0.6806717630771083,
      "grad_norm": 3.051936626434326,
      "learning_rate": 4.899052458495137e-05,
      "loss": 0.7743,
      "step": 2827
    },
    {
      "epoch": 0.6809125383735629,
      "grad_norm": 4.598084926605225,
      "learning_rate": 4.8923472809982795e-05,
      "loss": 0.9498,
      "step": 2828
    },
    {
      "epoch": 0.6811533136700174,
      "grad_norm": 5.4813947677612305,
      "learning_rate": 4.885645208965779e-05,
      "loss": 0.5918,
      "step": 2829
    },
    {
      "epoch": 0.681394088966472,
      "grad_norm": 5.616296768188477,
      "learning_rate": 4.8789462464725176e-05,
      "loss": 0.3233,
      "step": 2830
    },
    {
      "epoch": 0.6816348642629266,
      "grad_norm": 2.7440338134765625,
      "learning_rate": 4.8722503975914724e-05,
      "loss": 0.834,
      "step": 2831
    },
    {
      "epoch": 0.6818756395593812,
      "grad_norm": 2.0612759590148926,
      "learning_rate": 4.865557666393739e-05,
      "loss": 0.6478,
      "step": 2832
    },
    {
      "epoch": 0.6821164148558357,
      "grad_norm": 2.663640260696411,
      "learning_rate": 4.858868056948512e-05,
      "loss": 0.3945,
      "step": 2833
    },
    {
      "epoch": 0.6823571901522903,
      "grad_norm": 1.9246824979782104,
      "learning_rate": 4.8521815733230894e-05,
      "loss": 0.7715,
      "step": 2834
    },
    {
      "epoch": 0.682597965448745,
      "grad_norm": 1.4412755966186523,
      "learning_rate": 4.8454982195828725e-05,
      "loss": 0.1277,
      "step": 2835
    },
    {
      "epoch": 0.6828387407451996,
      "grad_norm": 2.150667190551758,
      "learning_rate": 4.838817999791348e-05,
      "loss": 0.694,
      "step": 2836
    },
    {
      "epoch": 0.6830795160416542,
      "grad_norm": 2.0565173625946045,
      "learning_rate": 4.832140918010107e-05,
      "loss": 0.4911,
      "step": 2837
    },
    {
      "epoch": 0.6833202913381087,
      "grad_norm": 2.131206750869751,
      "learning_rate": 4.825466978298835e-05,
      "loss": 0.3765,
      "step": 2838
    },
    {
      "epoch": 0.6835610666345633,
      "grad_norm": 1.4898698329925537,
      "learning_rate": 4.818796184715295e-05,
      "loss": 0.8131,
      "step": 2839
    },
    {
      "epoch": 0.6838018419310179,
      "grad_norm": 1.2278820276260376,
      "learning_rate": 4.812128541315348e-05,
      "loss": 0.6781,
      "step": 2840
    },
    {
      "epoch": 0.6840426172274725,
      "grad_norm": 1.6083769798278809,
      "learning_rate": 4.805464052152937e-05,
      "loss": 0.3348,
      "step": 2841
    },
    {
      "epoch": 0.684283392523927,
      "grad_norm": 2.421626091003418,
      "learning_rate": 4.7988027212800856e-05,
      "loss": 1.0312,
      "step": 2842
    },
    {
      "epoch": 0.6845241678203816,
      "grad_norm": 2.5446934700012207,
      "learning_rate": 4.7921445527469014e-05,
      "loss": 0.6185,
      "step": 2843
    },
    {
      "epoch": 0.6847649431168362,
      "grad_norm": 2.2218170166015625,
      "learning_rate": 4.7854895506015587e-05,
      "loss": 0.5071,
      "step": 2844
    },
    {
      "epoch": 0.6850057184132908,
      "grad_norm": 1.7741551399230957,
      "learning_rate": 4.7788377188903176e-05,
      "loss": 0.1996,
      "step": 2845
    },
    {
      "epoch": 0.6852464937097453,
      "grad_norm": 0.4389905035495758,
      "learning_rate": 4.7721890616575103e-05,
      "loss": 0.2294,
      "step": 2846
    },
    {
      "epoch": 0.6854872690061999,
      "grad_norm": 2.4608020782470703,
      "learning_rate": 4.76554358294553e-05,
      "loss": 0.4453,
      "step": 2847
    },
    {
      "epoch": 0.6857280443026545,
      "grad_norm": 1.8286683559417725,
      "learning_rate": 4.758901286794842e-05,
      "loss": 0.3692,
      "step": 2848
    },
    {
      "epoch": 0.6859688195991092,
      "grad_norm": 3.2650294303894043,
      "learning_rate": 4.7522621772439826e-05,
      "loss": 0.2029,
      "step": 2849
    },
    {
      "epoch": 0.6862095948955638,
      "grad_norm": 1.0558974742889404,
      "learning_rate": 4.7456262583295406e-05,
      "loss": 0.2386,
      "step": 2850
    },
    {
      "epoch": 0.6864503701920183,
      "grad_norm": 3.462625741958618,
      "learning_rate": 4.7389935340861766e-05,
      "loss": 0.5172,
      "step": 2851
    },
    {
      "epoch": 0.6866911454884729,
      "grad_norm": 1.7738730907440186,
      "learning_rate": 4.732364008546593e-05,
      "loss": 0.5665,
      "step": 2852
    },
    {
      "epoch": 0.6869319207849275,
      "grad_norm": 1.2178890705108643,
      "learning_rate": 4.72573768574156e-05,
      "loss": 0.4615,
      "step": 2853
    },
    {
      "epoch": 0.6871726960813821,
      "grad_norm": 4.225795745849609,
      "learning_rate": 4.719114569699902e-05,
      "loss": 1.0835,
      "step": 2854
    },
    {
      "epoch": 0.6874134713778366,
      "grad_norm": 3.400425434112549,
      "learning_rate": 4.712494664448479e-05,
      "loss": 0.8196,
      "step": 2855
    },
    {
      "epoch": 0.6876542466742912,
      "grad_norm": 4.688882827758789,
      "learning_rate": 4.705877974012213e-05,
      "loss": 0.9437,
      "step": 2856
    },
    {
      "epoch": 0.6878950219707458,
      "grad_norm": 1.8264660835266113,
      "learning_rate": 4.699264502414066e-05,
      "loss": 0.649,
      "step": 2857
    },
    {
      "epoch": 0.6881357972672004,
      "grad_norm": 4.103915214538574,
      "learning_rate": 4.6926542536750454e-05,
      "loss": 0.5432,
      "step": 2858
    },
    {
      "epoch": 0.6883765725636549,
      "grad_norm": 8.779718399047852,
      "learning_rate": 4.686047231814199e-05,
      "loss": 0.8389,
      "step": 2859
    },
    {
      "epoch": 0.6886173478601095,
      "grad_norm": 1.3146713972091675,
      "learning_rate": 4.6794434408486043e-05,
      "loss": 0.5095,
      "step": 2860
    },
    {
      "epoch": 0.6888581231565641,
      "grad_norm": 2.284715175628662,
      "learning_rate": 4.6728428847933893e-05,
      "loss": 0.7908,
      "step": 2861
    },
    {
      "epoch": 0.6890988984530187,
      "grad_norm": 1.923722267150879,
      "learning_rate": 4.666245567661699e-05,
      "loss": 0.7053,
      "step": 2862
    },
    {
      "epoch": 0.6893396737494734,
      "grad_norm": 1.3939085006713867,
      "learning_rate": 4.659651493464721e-05,
      "loss": 0.5569,
      "step": 2863
    },
    {
      "epoch": 0.6895804490459279,
      "grad_norm": 1.071938157081604,
      "learning_rate": 4.653060666211665e-05,
      "loss": 0.474,
      "step": 2864
    },
    {
      "epoch": 0.6898212243423825,
      "grad_norm": 2.894726514816284,
      "learning_rate": 4.646473089909772e-05,
      "loss": 0.5261,
      "step": 2865
    },
    {
      "epoch": 0.6900619996388371,
      "grad_norm": 2.7686641216278076,
      "learning_rate": 4.639888768564302e-05,
      "loss": 0.8032,
      "step": 2866
    },
    {
      "epoch": 0.6903027749352917,
      "grad_norm": 1.58405601978302,
      "learning_rate": 4.633307706178541e-05,
      "loss": 0.8255,
      "step": 2867
    },
    {
      "epoch": 0.6905435502317462,
      "grad_norm": 2.1864027976989746,
      "learning_rate": 4.626729906753782e-05,
      "loss": 0.5292,
      "step": 2868
    },
    {
      "epoch": 0.6907843255282008,
      "grad_norm": 2.6647164821624756,
      "learning_rate": 4.62015537428935e-05,
      "loss": 1.1816,
      "step": 2869
    },
    {
      "epoch": 0.6910251008246554,
      "grad_norm": 0.903181254863739,
      "learning_rate": 4.613584112782567e-05,
      "loss": 0.5345,
      "step": 2870
    },
    {
      "epoch": 0.69126587612111,
      "grad_norm": 1.2306076288223267,
      "learning_rate": 4.607016126228779e-05,
      "loss": 0.5126,
      "step": 2871
    },
    {
      "epoch": 0.6915066514175645,
      "grad_norm": 1.6878161430358887,
      "learning_rate": 4.600451418621341e-05,
      "loss": 0.5813,
      "step": 2872
    },
    {
      "epoch": 0.6917474267140191,
      "grad_norm": 1.6797889471054077,
      "learning_rate": 4.593889993951599e-05,
      "loss": 0.4037,
      "step": 2873
    },
    {
      "epoch": 0.6919882020104737,
      "grad_norm": 1.061113715171814,
      "learning_rate": 4.587331856208927e-05,
      "loss": 0.3819,
      "step": 2874
    },
    {
      "epoch": 0.6922289773069283,
      "grad_norm": 2.49900484085083,
      "learning_rate": 4.580777009380678e-05,
      "loss": 0.4709,
      "step": 2875
    },
    {
      "epoch": 0.6924697526033828,
      "grad_norm": 1.850058674812317,
      "learning_rate": 4.574225457452217e-05,
      "loss": 0.4061,
      "step": 2876
    },
    {
      "epoch": 0.6927105278998374,
      "grad_norm": 4.711109638214111,
      "learning_rate": 4.5676772044069064e-05,
      "loss": 0.6784,
      "step": 2877
    },
    {
      "epoch": 0.6929513031962921,
      "grad_norm": 2.337125778198242,
      "learning_rate": 4.5611322542260906e-05,
      "loss": 1.2925,
      "step": 2878
    },
    {
      "epoch": 0.6931920784927467,
      "grad_norm": 1.2772440910339355,
      "learning_rate": 4.554590610889118e-05,
      "loss": 0.453,
      "step": 2879
    },
    {
      "epoch": 0.6934328537892013,
      "grad_norm": 1.6413322687149048,
      "learning_rate": 4.548052278373327e-05,
      "loss": 0.509,
      "step": 2880
    },
    {
      "epoch": 0.6936736290856558,
      "grad_norm": 2.3125624656677246,
      "learning_rate": 4.54151726065403e-05,
      "loss": 0.68,
      "step": 2881
    },
    {
      "epoch": 0.6939144043821104,
      "grad_norm": 2.5944857597351074,
      "learning_rate": 4.534985561704537e-05,
      "loss": 0.9755,
      "step": 2882
    },
    {
      "epoch": 0.694155179678565,
      "grad_norm": 5.304381370544434,
      "learning_rate": 4.528457185496134e-05,
      "loss": 0.6764,
      "step": 2883
    },
    {
      "epoch": 0.6943959549750196,
      "grad_norm": 1.1039294004440308,
      "learning_rate": 4.521932135998092e-05,
      "loss": 0.4513,
      "step": 2884
    },
    {
      "epoch": 0.6946367302714741,
      "grad_norm": 4.094736576080322,
      "learning_rate": 4.5154104171776546e-05,
      "loss": 1.098,
      "step": 2885
    },
    {
      "epoch": 0.6948775055679287,
      "grad_norm": 2.167951822280884,
      "learning_rate": 4.5088920330000386e-05,
      "loss": 0.9008,
      "step": 2886
    },
    {
      "epoch": 0.6951182808643833,
      "grad_norm": 1.435927152633667,
      "learning_rate": 4.502376987428442e-05,
      "loss": 0.3153,
      "step": 2887
    },
    {
      "epoch": 0.6953590561608379,
      "grad_norm": 1.3964961767196655,
      "learning_rate": 4.495865284424018e-05,
      "loss": 0.9771,
      "step": 2888
    },
    {
      "epoch": 0.6955998314572924,
      "grad_norm": 1.8884319067001343,
      "learning_rate": 4.4893569279459034e-05,
      "loss": 0.5999,
      "step": 2889
    },
    {
      "epoch": 0.695840606753747,
      "grad_norm": 1.9889702796936035,
      "learning_rate": 4.4828519219511914e-05,
      "loss": 0.408,
      "step": 2890
    },
    {
      "epoch": 0.6960813820502016,
      "grad_norm": 0.6705599427223206,
      "learning_rate": 4.476350270394942e-05,
      "loss": 1.1983,
      "step": 2891
    },
    {
      "epoch": 0.6963221573466563,
      "grad_norm": 1.9054640531539917,
      "learning_rate": 4.469851977230173e-05,
      "loss": 0.6402,
      "step": 2892
    },
    {
      "epoch": 0.6965629326431109,
      "grad_norm": 0.7746975421905518,
      "learning_rate": 4.463357046407864e-05,
      "loss": 0.3632,
      "step": 2893
    },
    {
      "epoch": 0.6968037079395654,
      "grad_norm": 0.40516397356987,
      "learning_rate": 4.456865481876943e-05,
      "loss": 0.1903,
      "step": 2894
    },
    {
      "epoch": 0.69704448323602,
      "grad_norm": 3.2087621688842773,
      "learning_rate": 4.4503772875843e-05,
      "loss": 0.568,
      "step": 2895
    },
    {
      "epoch": 0.6972852585324746,
      "grad_norm": 2.681427478790283,
      "learning_rate": 4.4438924674747663e-05,
      "loss": 0.9806,
      "step": 2896
    },
    {
      "epoch": 0.6975260338289292,
      "grad_norm": 1.9525858163833618,
      "learning_rate": 4.4374110254911306e-05,
      "loss": 0.4023,
      "step": 2897
    },
    {
      "epoch": 0.6977668091253837,
      "grad_norm": 5.92275857925415,
      "learning_rate": 4.430932965574125e-05,
      "loss": 0.8938,
      "step": 2898
    },
    {
      "epoch": 0.6980075844218383,
      "grad_norm": 5.130187034606934,
      "learning_rate": 4.424458291662422e-05,
      "loss": 0.5991,
      "step": 2899
    },
    {
      "epoch": 0.6982483597182929,
      "grad_norm": 1.512535810470581,
      "learning_rate": 4.417987007692641e-05,
      "loss": 0.6119,
      "step": 2900
    },
    {
      "epoch": 0.6984891350147475,
      "grad_norm": 1.4863414764404297,
      "learning_rate": 4.4115191175993385e-05,
      "loss": 0.5287,
      "step": 2901
    },
    {
      "epoch": 0.698729910311202,
      "grad_norm": 0.5450987219810486,
      "learning_rate": 4.405054625314999e-05,
      "loss": 0.4031,
      "step": 2902
    },
    {
      "epoch": 0.6989706856076566,
      "grad_norm": 2.9713079929351807,
      "learning_rate": 4.398593534770058e-05,
      "loss": 0.8828,
      "step": 2903
    },
    {
      "epoch": 0.6992114609041112,
      "grad_norm": 1.440027117729187,
      "learning_rate": 4.3921358498928645e-05,
      "loss": 0.3911,
      "step": 2904
    },
    {
      "epoch": 0.6994522362005658,
      "grad_norm": 1.3297288417816162,
      "learning_rate": 4.385681574609708e-05,
      "loss": 0.3319,
      "step": 2905
    },
    {
      "epoch": 0.6996930114970205,
      "grad_norm": 0.7073665261268616,
      "learning_rate": 4.379230712844804e-05,
      "loss": 0.6385,
      "step": 2906
    },
    {
      "epoch": 0.699933786793475,
      "grad_norm": 2.934152126312256,
      "learning_rate": 4.37278326852029e-05,
      "loss": 1.4158,
      "step": 2907
    },
    {
      "epoch": 0.7001745620899296,
      "grad_norm": 2.335797071456909,
      "learning_rate": 4.36633924555623e-05,
      "loss": 0.9337,
      "step": 2908
    },
    {
      "epoch": 0.7004153373863842,
      "grad_norm": 1.474564552307129,
      "learning_rate": 4.359898647870599e-05,
      "loss": 0.5355,
      "step": 2909
    },
    {
      "epoch": 0.7006561126828388,
      "grad_norm": 1.9566766023635864,
      "learning_rate": 4.353461479379297e-05,
      "loss": 0.4216,
      "step": 2910
    },
    {
      "epoch": 0.7008968879792933,
      "grad_norm": 1.7746264934539795,
      "learning_rate": 4.34702774399614e-05,
      "loss": 0.5385,
      "step": 2911
    },
    {
      "epoch": 0.7011376632757479,
      "grad_norm": 2.327068567276001,
      "learning_rate": 4.340597445632849e-05,
      "loss": 0.1434,
      "step": 2912
    },
    {
      "epoch": 0.7013784385722025,
      "grad_norm": 0.7720171809196472,
      "learning_rate": 4.334170588199061e-05,
      "loss": 0.327,
      "step": 2913
    },
    {
      "epoch": 0.7016192138686571,
      "grad_norm": 0.9734980463981628,
      "learning_rate": 4.32774717560232e-05,
      "loss": 0.6511,
      "step": 2914
    },
    {
      "epoch": 0.7018599891651116,
      "grad_norm": 2.17838191986084,
      "learning_rate": 4.321327211748077e-05,
      "loss": 0.6218,
      "step": 2915
    },
    {
      "epoch": 0.7021007644615662,
      "grad_norm": 1.358054757118225,
      "learning_rate": 4.314910700539687e-05,
      "loss": 0.8311,
      "step": 2916
    },
    {
      "epoch": 0.7023415397580208,
      "grad_norm": 7.809467792510986,
      "learning_rate": 4.308497645878396e-05,
      "loss": 1.164,
      "step": 2917
    },
    {
      "epoch": 0.7025823150544754,
      "grad_norm": 2.3735713958740234,
      "learning_rate": 4.302088051663359e-05,
      "loss": 0.5243,
      "step": 2918
    },
    {
      "epoch": 0.70282309035093,
      "grad_norm": 1.5434727668762207,
      "learning_rate": 4.2956819217916275e-05,
      "loss": 0.2084,
      "step": 2919
    },
    {
      "epoch": 0.7030638656473845,
      "grad_norm": 2.580521821975708,
      "learning_rate": 4.289279260158137e-05,
      "loss": 0.666,
      "step": 2920
    },
    {
      "epoch": 0.7033046409438392,
      "grad_norm": 3.4632489681243896,
      "learning_rate": 4.282880070655723e-05,
      "loss": 0.7674,
      "step": 2921
    },
    {
      "epoch": 0.7035454162402938,
      "grad_norm": 3.7505438327789307,
      "learning_rate": 4.2764843571751046e-05,
      "loss": 0.8833,
      "step": 2922
    },
    {
      "epoch": 0.7037861915367484,
      "grad_norm": 1.1136095523834229,
      "learning_rate": 4.270092123604894e-05,
      "loss": 0.5675,
      "step": 2923
    },
    {
      "epoch": 0.7040269668332029,
      "grad_norm": 2.523184299468994,
      "learning_rate": 4.263703373831586e-05,
      "loss": 0.621,
      "step": 2924
    },
    {
      "epoch": 0.7042677421296575,
      "grad_norm": 1.7620470523834229,
      "learning_rate": 4.2573181117395455e-05,
      "loss": 0.2796,
      "step": 2925
    },
    {
      "epoch": 0.7045085174261121,
      "grad_norm": 0.7479153275489807,
      "learning_rate": 4.250936341211032e-05,
      "loss": 0.7364,
      "step": 2926
    },
    {
      "epoch": 0.7047492927225667,
      "grad_norm": 1.9243773221969604,
      "learning_rate": 4.2445580661261794e-05,
      "loss": 0.5447,
      "step": 2927
    },
    {
      "epoch": 0.7049900680190212,
      "grad_norm": 2.319751501083374,
      "learning_rate": 4.238183290362987e-05,
      "loss": 0.4302,
      "step": 2928
    },
    {
      "epoch": 0.7052308433154758,
      "grad_norm": 1.1263662576675415,
      "learning_rate": 4.231812017797335e-05,
      "loss": 0.5473,
      "step": 2929
    },
    {
      "epoch": 0.7054716186119304,
      "grad_norm": 1.7234480381011963,
      "learning_rate": 4.225444252302973e-05,
      "loss": 0.3453,
      "step": 2930
    },
    {
      "epoch": 0.705712393908385,
      "grad_norm": 1.5523897409439087,
      "learning_rate": 4.219079997751515e-05,
      "loss": 0.2537,
      "step": 2931
    },
    {
      "epoch": 0.7059531692048396,
      "grad_norm": 2.044769287109375,
      "learning_rate": 4.212719258012447e-05,
      "loss": 0.3151,
      "step": 2932
    },
    {
      "epoch": 0.7061939445012941,
      "grad_norm": 3.085174322128296,
      "learning_rate": 4.206362036953104e-05,
      "loss": 0.4571,
      "step": 2933
    },
    {
      "epoch": 0.7064347197977487,
      "grad_norm": 7.409231185913086,
      "learning_rate": 4.2000083384387e-05,
      "loss": 1.0109,
      "step": 2934
    },
    {
      "epoch": 0.7066754950942034,
      "grad_norm": 4.059498310089111,
      "learning_rate": 4.193658166332291e-05,
      "loss": 0.4508,
      "step": 2935
    },
    {
      "epoch": 0.706916270390658,
      "grad_norm": 3.295271873474121,
      "learning_rate": 4.187311524494798e-05,
      "loss": 0.5282,
      "step": 2936
    },
    {
      "epoch": 0.7071570456871125,
      "grad_norm": 1.7621487379074097,
      "learning_rate": 4.1809684167849936e-05,
      "loss": 1.1533,
      "step": 2937
    },
    {
      "epoch": 0.7073978209835671,
      "grad_norm": 3.32817006111145,
      "learning_rate": 4.1746288470595044e-05,
      "loss": 0.7824,
      "step": 2938
    },
    {
      "epoch": 0.7076385962800217,
      "grad_norm": 2.0236010551452637,
      "learning_rate": 4.1682928191727985e-05,
      "loss": 0.4317,
      "step": 2939
    },
    {
      "epoch": 0.7078793715764763,
      "grad_norm": 2.6577980518341064,
      "learning_rate": 4.161960336977203e-05,
      "loss": 0.5246,
      "step": 2940
    },
    {
      "epoch": 0.7081201468729308,
      "grad_norm": 45.47622299194336,
      "learning_rate": 4.1556314043228705e-05,
      "loss": 0.5691,
      "step": 2941
    },
    {
      "epoch": 0.7083609221693854,
      "grad_norm": 2.584383249282837,
      "learning_rate": 4.1493060250578165e-05,
      "loss": 0.4159,
      "step": 2942
    },
    {
      "epoch": 0.70860169746584,
      "grad_norm": 2.8023557662963867,
      "learning_rate": 4.1429842030278774e-05,
      "loss": 0.8909,
      "step": 2943
    },
    {
      "epoch": 0.7088424727622946,
      "grad_norm": 1.6718467473983765,
      "learning_rate": 4.1366659420767384e-05,
      "loss": 0.5008,
      "step": 2944
    },
    {
      "epoch": 0.7090832480587492,
      "grad_norm": 4.263134956359863,
      "learning_rate": 4.1303512460459214e-05,
      "loss": 0.6134,
      "step": 2945
    },
    {
      "epoch": 0.7093240233552037,
      "grad_norm": 3.2568228244781494,
      "learning_rate": 4.124040118774763e-05,
      "loss": 0.4874,
      "step": 2946
    },
    {
      "epoch": 0.7095647986516583,
      "grad_norm": 0.5476480722427368,
      "learning_rate": 4.1177325641004595e-05,
      "loss": 0.341,
      "step": 2947
    },
    {
      "epoch": 0.7098055739481129,
      "grad_norm": 2.8167431354522705,
      "learning_rate": 4.1114285858580045e-05,
      "loss": 0.3281,
      "step": 2948
    },
    {
      "epoch": 0.7100463492445676,
      "grad_norm": 4.154418468475342,
      "learning_rate": 4.105128187880238e-05,
      "loss": 0.5594,
      "step": 2949
    },
    {
      "epoch": 0.710287124541022,
      "grad_norm": 0.7136004567146301,
      "learning_rate": 4.098831373997818e-05,
      "loss": 0.4894,
      "step": 2950
    },
    {
      "epoch": 0.7105278998374767,
      "grad_norm": 2.967937707901001,
      "learning_rate": 4.0925381480392135e-05,
      "loss": 0.6342,
      "step": 2951
    },
    {
      "epoch": 0.7107686751339313,
      "grad_norm": 2.3439087867736816,
      "learning_rate": 4.086248513830725e-05,
      "loss": 0.514,
      "step": 2952
    },
    {
      "epoch": 0.7110094504303859,
      "grad_norm": 1.2852379083633423,
      "learning_rate": 4.079962475196468e-05,
      "loss": 0.9783,
      "step": 2953
    },
    {
      "epoch": 0.7112502257268404,
      "grad_norm": 3.063833713531494,
      "learning_rate": 4.0736800359583605e-05,
      "loss": 0.3231,
      "step": 2954
    },
    {
      "epoch": 0.711491001023295,
      "grad_norm": 0.8757096529006958,
      "learning_rate": 4.067401199936143e-05,
      "loss": 0.3563,
      "step": 2955
    },
    {
      "epoch": 0.7117317763197496,
      "grad_norm": 2.1515250205993652,
      "learning_rate": 4.061125970947363e-05,
      "loss": 0.5002,
      "step": 2956
    },
    {
      "epoch": 0.7119725516162042,
      "grad_norm": 1.7841241359710693,
      "learning_rate": 4.054854352807372e-05,
      "loss": 0.2222,
      "step": 2957
    },
    {
      "epoch": 0.7122133269126587,
      "grad_norm": 1.669628620147705,
      "learning_rate": 4.048586349329333e-05,
      "loss": 0.8098,
      "step": 2958
    },
    {
      "epoch": 0.7124541022091133,
      "grad_norm": 1.4398468732833862,
      "learning_rate": 4.0423219643241985e-05,
      "loss": 0.3151,
      "step": 2959
    },
    {
      "epoch": 0.7126948775055679,
      "grad_norm": 3.6351101398468018,
      "learning_rate": 4.036061201600737e-05,
      "loss": 0.3961,
      "step": 2960
    },
    {
      "epoch": 0.7129356528020225,
      "grad_norm": 0.8414926528930664,
      "learning_rate": 4.029804064965498e-05,
      "loss": 0.2666,
      "step": 2961
    },
    {
      "epoch": 0.7131764280984771,
      "grad_norm": 1.7037287950515747,
      "learning_rate": 4.023550558222837e-05,
      "loss": 0.4597,
      "step": 2962
    },
    {
      "epoch": 0.7134172033949316,
      "grad_norm": 2.4606921672821045,
      "learning_rate": 4.017300685174903e-05,
      "loss": 0.6738,
      "step": 2963
    },
    {
      "epoch": 0.7136579786913863,
      "grad_norm": 0.9210506677627563,
      "learning_rate": 4.011054449621632e-05,
      "loss": 0.5534,
      "step": 2964
    },
    {
      "epoch": 0.7138987539878409,
      "grad_norm": 1.8560645580291748,
      "learning_rate": 4.004811855360748e-05,
      "loss": 0.2176,
      "step": 2965
    },
    {
      "epoch": 0.7141395292842955,
      "grad_norm": 1.5320228338241577,
      "learning_rate": 3.998572906187767e-05,
      "loss": 0.6553,
      "step": 2966
    },
    {
      "epoch": 0.71438030458075,
      "grad_norm": 7.649412155151367,
      "learning_rate": 3.9923376058959774e-05,
      "loss": 0.8473,
      "step": 2967
    },
    {
      "epoch": 0.7146210798772046,
      "grad_norm": 0.8672193288803101,
      "learning_rate": 3.986105958276463e-05,
      "loss": 0.4563,
      "step": 2968
    },
    {
      "epoch": 0.7148618551736592,
      "grad_norm": 3.8993074893951416,
      "learning_rate": 3.97987796711807e-05,
      "loss": 0.5047,
      "step": 2969
    },
    {
      "epoch": 0.7151026304701138,
      "grad_norm": 2.695249319076538,
      "learning_rate": 3.973653636207437e-05,
      "loss": 0.8572,
      "step": 2970
    },
    {
      "epoch": 0.7153434057665683,
      "grad_norm": 1.0926902294158936,
      "learning_rate": 3.967432969328971e-05,
      "loss": 0.4632,
      "step": 2971
    },
    {
      "epoch": 0.7155841810630229,
      "grad_norm": 2.5427393913269043,
      "learning_rate": 3.961215970264852e-05,
      "loss": 0.4715,
      "step": 2972
    },
    {
      "epoch": 0.7158249563594775,
      "grad_norm": 1.8015666007995605,
      "learning_rate": 3.9550026427950315e-05,
      "loss": 0.4259,
      "step": 2973
    },
    {
      "epoch": 0.7160657316559321,
      "grad_norm": 2.0264315605163574,
      "learning_rate": 3.94879299069722e-05,
      "loss": 0.7189,
      "step": 2974
    },
    {
      "epoch": 0.7163065069523867,
      "grad_norm": 2.786452531814575,
      "learning_rate": 3.942587017746904e-05,
      "loss": 1.0023,
      "step": 2975
    },
    {
      "epoch": 0.7165472822488412,
      "grad_norm": 1.3321934938430786,
      "learning_rate": 3.936384727717332e-05,
      "loss": 0.5356,
      "step": 2976
    },
    {
      "epoch": 0.7167880575452958,
      "grad_norm": 1.1468703746795654,
      "learning_rate": 3.930186124379503e-05,
      "loss": 0.6806,
      "step": 2977
    },
    {
      "epoch": 0.7170288328417505,
      "grad_norm": 3.6442174911499023,
      "learning_rate": 3.923991211502187e-05,
      "loss": 0.2468,
      "step": 2978
    },
    {
      "epoch": 0.7172696081382051,
      "grad_norm": 1.8191343545913696,
      "learning_rate": 3.917799992851903e-05,
      "loss": 1.0023,
      "step": 2979
    },
    {
      "epoch": 0.7175103834346596,
      "grad_norm": 2.2116637229919434,
      "learning_rate": 3.911612472192927e-05,
      "loss": 0.3557,
      "step": 2980
    },
    {
      "epoch": 0.7177511587311142,
      "grad_norm": 0.8968959450721741,
      "learning_rate": 3.9054286532872884e-05,
      "loss": 0.3245,
      "step": 2981
    },
    {
      "epoch": 0.7179919340275688,
      "grad_norm": 1.421441674232483,
      "learning_rate": 3.899248539894757e-05,
      "loss": 0.3783,
      "step": 2982
    },
    {
      "epoch": 0.7182327093240234,
      "grad_norm": 2.168306827545166,
      "learning_rate": 3.8930721357728584e-05,
      "loss": 0.2099,
      "step": 2983
    },
    {
      "epoch": 0.7184734846204779,
      "grad_norm": 1.9068177938461304,
      "learning_rate": 3.886899444676863e-05,
      "loss": 0.8279,
      "step": 2984
    },
    {
      "epoch": 0.7187142599169325,
      "grad_norm": 1.3989911079406738,
      "learning_rate": 3.880730470359776e-05,
      "loss": 0.9995,
      "step": 2985
    },
    {
      "epoch": 0.7189550352133871,
      "grad_norm": 5.264814376831055,
      "learning_rate": 3.8745652165723486e-05,
      "loss": 0.9829,
      "step": 2986
    },
    {
      "epoch": 0.7191958105098417,
      "grad_norm": 2.0597469806671143,
      "learning_rate": 3.8684036870630705e-05,
      "loss": 0.2443,
      "step": 2987
    },
    {
      "epoch": 0.7194365858062963,
      "grad_norm": 1.258255958557129,
      "learning_rate": 3.862245885578166e-05,
      "loss": 0.7055,
      "step": 2988
    },
    {
      "epoch": 0.7196773611027508,
      "grad_norm": 3.225368022918701,
      "learning_rate": 3.856091815861595e-05,
      "loss": 0.3839,
      "step": 2989
    },
    {
      "epoch": 0.7199181363992054,
      "grad_norm": 2.35640025138855,
      "learning_rate": 3.8499414816550384e-05,
      "loss": 0.8443,
      "step": 2990
    },
    {
      "epoch": 0.72015891169566,
      "grad_norm": 2.4103639125823975,
      "learning_rate": 3.843794886697917e-05,
      "loss": 0.693,
      "step": 2991
    },
    {
      "epoch": 0.7203996869921147,
      "grad_norm": 0.8811191320419312,
      "learning_rate": 3.837652034727378e-05,
      "loss": 0.3415,
      "step": 2992
    },
    {
      "epoch": 0.7206404622885692,
      "grad_norm": 1.7266875505447388,
      "learning_rate": 3.8315129294782835e-05,
      "loss": 0.4295,
      "step": 2993
    },
    {
      "epoch": 0.7208812375850238,
      "grad_norm": 1.2905570268630981,
      "learning_rate": 3.8253775746832244e-05,
      "loss": 0.248,
      "step": 2994
    },
    {
      "epoch": 0.7211220128814784,
      "grad_norm": 0.3409409821033478,
      "learning_rate": 3.819245974072513e-05,
      "loss": 0.6092,
      "step": 2995
    },
    {
      "epoch": 0.721362788177933,
      "grad_norm": 1.4550303220748901,
      "learning_rate": 3.8131181313741735e-05,
      "loss": 0.6874,
      "step": 2996
    },
    {
      "epoch": 0.7216035634743875,
      "grad_norm": 0.9903691411018372,
      "learning_rate": 3.806994050313953e-05,
      "loss": 0.1963,
      "step": 2997
    },
    {
      "epoch": 0.7218443387708421,
      "grad_norm": 0.8208291530609131,
      "learning_rate": 3.800873734615299e-05,
      "loss": 0.3679,
      "step": 2998
    },
    {
      "epoch": 0.7220851140672967,
      "grad_norm": 1.0615532398223877,
      "learning_rate": 3.794757187999386e-05,
      "loss": 0.9426,
      "step": 2999
    },
    {
      "epoch": 0.7223258893637513,
      "grad_norm": 2.0494561195373535,
      "learning_rate": 3.788644414185078e-05,
      "loss": 0.2539,
      "step": 3000
    },
    {
      "epoch": 0.7225666646602059,
      "grad_norm": 2.378437042236328,
      "learning_rate": 3.782535416888963e-05,
      "loss": 0.7789,
      "step": 3001
    },
    {
      "epoch": 0.7228074399566604,
      "grad_norm": 1.714324951171875,
      "learning_rate": 3.776430199825321e-05,
      "loss": 0.9774,
      "step": 3002
    },
    {
      "epoch": 0.723048215253115,
      "grad_norm": 2.891805648803711,
      "learning_rate": 3.770328766706139e-05,
      "loss": 0.6982,
      "step": 3003
    },
    {
      "epoch": 0.7232889905495696,
      "grad_norm": 4.66194486618042,
      "learning_rate": 3.764231121241103e-05,
      "loss": 0.6659,
      "step": 3004
    },
    {
      "epoch": 0.7235297658460242,
      "grad_norm": 3.184102773666382,
      "learning_rate": 3.758137267137598e-05,
      "loss": 0.5286,
      "step": 3005
    },
    {
      "epoch": 0.7237705411424787,
      "grad_norm": 5.212895393371582,
      "learning_rate": 3.752047208100694e-05,
      "loss": 0.2767,
      "step": 3006
    },
    {
      "epoch": 0.7240113164389333,
      "grad_norm": 1.256901741027832,
      "learning_rate": 3.745960947833168e-05,
      "loss": 0.477,
      "step": 3007
    },
    {
      "epoch": 0.724252091735388,
      "grad_norm": 1.1887600421905518,
      "learning_rate": 3.739878490035473e-05,
      "loss": 0.7814,
      "step": 3008
    },
    {
      "epoch": 0.7244928670318426,
      "grad_norm": 2.5815846920013428,
      "learning_rate": 3.73379983840576e-05,
      "loss": 0.5839,
      "step": 3009
    },
    {
      "epoch": 0.7247336423282971,
      "grad_norm": 4.950305938720703,
      "learning_rate": 3.727724996639863e-05,
      "loss": 0.4643,
      "step": 3010
    },
    {
      "epoch": 0.7249744176247517,
      "grad_norm": 6.662084102630615,
      "learning_rate": 3.7216539684313004e-05,
      "loss": 0.5806,
      "step": 3011
    },
    {
      "epoch": 0.7252151929212063,
      "grad_norm": 2.3618359565734863,
      "learning_rate": 3.715586757471273e-05,
      "loss": 0.5451,
      "step": 3012
    },
    {
      "epoch": 0.7254559682176609,
      "grad_norm": 1.401696801185608,
      "learning_rate": 3.709523367448653e-05,
      "loss": 0.8228,
      "step": 3013
    },
    {
      "epoch": 0.7256967435141155,
      "grad_norm": 0.5292275547981262,
      "learning_rate": 3.7034638020499976e-05,
      "loss": 0.3713,
      "step": 3014
    },
    {
      "epoch": 0.72593751881057,
      "grad_norm": 0.8193963766098022,
      "learning_rate": 3.697408064959541e-05,
      "loss": 0.1659,
      "step": 3015
    },
    {
      "epoch": 0.7261782941070246,
      "grad_norm": 2.547407388687134,
      "learning_rate": 3.691356159859177e-05,
      "loss": 0.3945,
      "step": 3016
    },
    {
      "epoch": 0.7264190694034792,
      "grad_norm": 1.281667709350586,
      "learning_rate": 3.685308090428481e-05,
      "loss": 0.6264,
      "step": 3017
    },
    {
      "epoch": 0.7266598446999338,
      "grad_norm": 0.9349974989891052,
      "learning_rate": 3.6792638603446974e-05,
      "loss": 0.5355,
      "step": 3018
    },
    {
      "epoch": 0.7269006199963883,
      "grad_norm": 1.2555688619613647,
      "learning_rate": 3.67322347328272e-05,
      "loss": 0.1645,
      "step": 3019
    },
    {
      "epoch": 0.7271413952928429,
      "grad_norm": 2.40930438041687,
      "learning_rate": 3.667186932915133e-05,
      "loss": 0.3945,
      "step": 3020
    },
    {
      "epoch": 0.7273821705892975,
      "grad_norm": 1.6507692337036133,
      "learning_rate": 3.661154242912155e-05,
      "loss": 0.3394,
      "step": 3021
    },
    {
      "epoch": 0.7276229458857522,
      "grad_norm": 0.6924558877944946,
      "learning_rate": 3.6551254069416774e-05,
      "loss": 0.2132,
      "step": 3022
    },
    {
      "epoch": 0.7278637211822067,
      "grad_norm": 1.6599589586257935,
      "learning_rate": 3.649100428669253e-05,
      "loss": 0.5329,
      "step": 3023
    },
    {
      "epoch": 0.7281044964786613,
      "grad_norm": 1.3489158153533936,
      "learning_rate": 3.643079311758072e-05,
      "loss": 0.4529,
      "step": 3024
    },
    {
      "epoch": 0.7283452717751159,
      "grad_norm": 3.1767184734344482,
      "learning_rate": 3.637062059868996e-05,
      "loss": 1.4829,
      "step": 3025
    },
    {
      "epoch": 0.7285860470715705,
      "grad_norm": 2.8698904514312744,
      "learning_rate": 3.63104867666052e-05,
      "loss": 0.4736,
      "step": 3026
    },
    {
      "epoch": 0.7288268223680251,
      "grad_norm": 1.75603187084198,
      "learning_rate": 3.625039165788794e-05,
      "loss": 0.8231,
      "step": 3027
    },
    {
      "epoch": 0.7290675976644796,
      "grad_norm": 0.7908713221549988,
      "learning_rate": 3.619033530907625e-05,
      "loss": 0.5338,
      "step": 3028
    },
    {
      "epoch": 0.7293083729609342,
      "grad_norm": 1.7771409749984741,
      "learning_rate": 3.613031775668443e-05,
      "loss": 0.7482,
      "step": 3029
    },
    {
      "epoch": 0.7295491482573888,
      "grad_norm": 2.4424712657928467,
      "learning_rate": 3.6070339037203306e-05,
      "loss": 0.4881,
      "step": 3030
    },
    {
      "epoch": 0.7297899235538434,
      "grad_norm": 0.698549211025238,
      "learning_rate": 3.601039918710012e-05,
      "loss": 0.3092,
      "step": 3031
    },
    {
      "epoch": 0.7300306988502979,
      "grad_norm": 0.616523802280426,
      "learning_rate": 3.595049824281837e-05,
      "loss": 0.5394,
      "step": 3032
    },
    {
      "epoch": 0.7302714741467525,
      "grad_norm": 1.3015395402908325,
      "learning_rate": 3.589063624077802e-05,
      "loss": 0.5671,
      "step": 3033
    },
    {
      "epoch": 0.7305122494432071,
      "grad_norm": 0.953938364982605,
      "learning_rate": 3.583081321737525e-05,
      "loss": 0.7368,
      "step": 3034
    },
    {
      "epoch": 0.7307530247396617,
      "grad_norm": 0.6559523344039917,
      "learning_rate": 3.577102920898261e-05,
      "loss": 0.2857,
      "step": 3035
    },
    {
      "epoch": 0.7309938000361162,
      "grad_norm": 0.8794732689857483,
      "learning_rate": 3.5711284251948914e-05,
      "loss": 0.2559,
      "step": 3036
    },
    {
      "epoch": 0.7312345753325709,
      "grad_norm": 1.048971176147461,
      "learning_rate": 3.565157838259925e-05,
      "loss": 0.2112,
      "step": 3037
    },
    {
      "epoch": 0.7314753506290255,
      "grad_norm": 1.1826798915863037,
      "learning_rate": 3.5591911637234945e-05,
      "loss": 0.2799,
      "step": 3038
    },
    {
      "epoch": 0.7317161259254801,
      "grad_norm": 2.4413845539093018,
      "learning_rate": 3.5532284052133436e-05,
      "loss": 0.5779,
      "step": 3039
    },
    {
      "epoch": 0.7319569012219346,
      "grad_norm": 0.9847295880317688,
      "learning_rate": 3.547269566354847e-05,
      "loss": 0.6497,
      "step": 3040
    },
    {
      "epoch": 0.7321976765183892,
      "grad_norm": 1.833725094795227,
      "learning_rate": 3.541314650770996e-05,
      "loss": 0.3938,
      "step": 3041
    },
    {
      "epoch": 0.7324384518148438,
      "grad_norm": 2.012840747833252,
      "learning_rate": 3.535363662082385e-05,
      "loss": 0.4187,
      "step": 3042
    },
    {
      "epoch": 0.7326792271112984,
      "grad_norm": 3.2702102661132812,
      "learning_rate": 3.529416603907233e-05,
      "loss": 0.9575,
      "step": 3043
    },
    {
      "epoch": 0.732920002407753,
      "grad_norm": 1.4701731204986572,
      "learning_rate": 3.523473479861365e-05,
      "loss": 0.5232,
      "step": 3044
    },
    {
      "epoch": 0.7331607777042075,
      "grad_norm": 1.68658447265625,
      "learning_rate": 3.5175342935582114e-05,
      "loss": 0.6121,
      "step": 3045
    },
    {
      "epoch": 0.7334015530006621,
      "grad_norm": 1.9545087814331055,
      "learning_rate": 3.5115990486088166e-05,
      "loss": 0.31,
      "step": 3046
    },
    {
      "epoch": 0.7336423282971167,
      "grad_norm": 4.512576580047607,
      "learning_rate": 3.5056677486218145e-05,
      "loss": 0.8468,
      "step": 3047
    },
    {
      "epoch": 0.7338831035935713,
      "grad_norm": 2.4108033180236816,
      "learning_rate": 3.4997403972034546e-05,
      "loss": 0.826,
      "step": 3048
    },
    {
      "epoch": 0.7341238788900258,
      "grad_norm": 3.4939920902252197,
      "learning_rate": 3.493816997957582e-05,
      "loss": 0.4593,
      "step": 3049
    },
    {
      "epoch": 0.7343646541864804,
      "grad_norm": 2.438183307647705,
      "learning_rate": 3.487897554485628e-05,
      "loss": 0.6518,
      "step": 3050
    },
    {
      "epoch": 0.7346054294829351,
      "grad_norm": 3.4589779376983643,
      "learning_rate": 3.4819820703866344e-05,
      "loss": 0.6474,
      "step": 3051
    },
    {
      "epoch": 0.7348462047793897,
      "grad_norm": 4.573122978210449,
      "learning_rate": 3.4760705492572266e-05,
      "loss": 0.529,
      "step": 3052
    },
    {
      "epoch": 0.7350869800758442,
      "grad_norm": 0.7465322017669678,
      "learning_rate": 3.470162994691624e-05,
      "loss": 0.4171,
      "step": 3053
    },
    {
      "epoch": 0.7353277553722988,
      "grad_norm": 1.0964757204055786,
      "learning_rate": 3.464259410281635e-05,
      "loss": 0.4091,
      "step": 3054
    },
    {
      "epoch": 0.7355685306687534,
      "grad_norm": 3.490908145904541,
      "learning_rate": 3.458359799616647e-05,
      "loss": 1.0212,
      "step": 3055
    },
    {
      "epoch": 0.735809305965208,
      "grad_norm": 1.6229488849639893,
      "learning_rate": 3.45246416628364e-05,
      "loss": 0.5396,
      "step": 3056
    },
    {
      "epoch": 0.7360500812616626,
      "grad_norm": 2.6889917850494385,
      "learning_rate": 3.446572513867175e-05,
      "loss": 0.8915,
      "step": 3057
    },
    {
      "epoch": 0.7362908565581171,
      "grad_norm": 2.3369765281677246,
      "learning_rate": 3.4406848459493814e-05,
      "loss": 0.62,
      "step": 3058
    },
    {
      "epoch": 0.7365316318545717,
      "grad_norm": 1.6141836643218994,
      "learning_rate": 3.434801166109981e-05,
      "loss": 0.4647,
      "step": 3059
    },
    {
      "epoch": 0.7367724071510263,
      "grad_norm": 4.394378662109375,
      "learning_rate": 3.4289214779262636e-05,
      "loss": 1.081,
      "step": 3060
    },
    {
      "epoch": 0.7370131824474809,
      "grad_norm": 2.108896255493164,
      "learning_rate": 3.423045784973091e-05,
      "loss": 0.6174,
      "step": 3061
    },
    {
      "epoch": 0.7372539577439354,
      "grad_norm": 6.742406845092773,
      "learning_rate": 3.4171740908229044e-05,
      "loss": 1.3335,
      "step": 3062
    },
    {
      "epoch": 0.73749473304039,
      "grad_norm": 2.831634998321533,
      "learning_rate": 3.411306399045697e-05,
      "loss": 0.492,
      "step": 3063
    },
    {
      "epoch": 0.7377355083368446,
      "grad_norm": 0.8104602694511414,
      "learning_rate": 3.405442713209047e-05,
      "loss": 0.6458,
      "step": 3064
    },
    {
      "epoch": 0.7379762836332993,
      "grad_norm": 1.0663022994995117,
      "learning_rate": 3.3995830368780825e-05,
      "loss": 0.3529,
      "step": 3065
    },
    {
      "epoch": 0.7382170589297538,
      "grad_norm": 2.1759705543518066,
      "learning_rate": 3.393727373615503e-05,
      "loss": 0.7057,
      "step": 3066
    },
    {
      "epoch": 0.7384578342262084,
      "grad_norm": 2.893615245819092,
      "learning_rate": 3.387875726981563e-05,
      "loss": 0.7425,
      "step": 3067
    },
    {
      "epoch": 0.738698609522663,
      "grad_norm": 1.8920822143554688,
      "learning_rate": 3.3820281005340794e-05,
      "loss": 0.4257,
      "step": 3068
    },
    {
      "epoch": 0.7389393848191176,
      "grad_norm": 2.6992859840393066,
      "learning_rate": 3.3761844978284205e-05,
      "loss": 1.193,
      "step": 3069
    },
    {
      "epoch": 0.7391801601155722,
      "grad_norm": 2.974738836288452,
      "learning_rate": 3.370344922417513e-05,
      "loss": 1.1457,
      "step": 3070
    },
    {
      "epoch": 0.7394209354120267,
      "grad_norm": 0.7591432929039001,
      "learning_rate": 3.364509377851828e-05,
      "loss": 0.4777,
      "step": 3071
    },
    {
      "epoch": 0.7396617107084813,
      "grad_norm": 2.3580071926116943,
      "learning_rate": 3.358677867679394e-05,
      "loss": 0.5326,
      "step": 3072
    },
    {
      "epoch": 0.7399024860049359,
      "grad_norm": 2.4343063831329346,
      "learning_rate": 3.3528503954457756e-05,
      "loss": 0.4066,
      "step": 3073
    },
    {
      "epoch": 0.7401432613013905,
      "grad_norm": 1.2667893171310425,
      "learning_rate": 3.3470269646940935e-05,
      "loss": 0.423,
      "step": 3074
    },
    {
      "epoch": 0.740384036597845,
      "grad_norm": 2.454868793487549,
      "learning_rate": 3.341207578965005e-05,
      "loss": 0.2659,
      "step": 3075
    },
    {
      "epoch": 0.7406248118942996,
      "grad_norm": 1.9105570316314697,
      "learning_rate": 3.335392241796712e-05,
      "loss": 1.0031,
      "step": 3076
    },
    {
      "epoch": 0.7408655871907542,
      "grad_norm": 2.387080669403076,
      "learning_rate": 3.329580956724955e-05,
      "loss": 0.5239,
      "step": 3077
    },
    {
      "epoch": 0.7411063624872088,
      "grad_norm": 8.040419578552246,
      "learning_rate": 3.3237737272830013e-05,
      "loss": 0.4703,
      "step": 3078
    },
    {
      "epoch": 0.7413471377836633,
      "grad_norm": 1.0667513608932495,
      "learning_rate": 3.317970557001664e-05,
      "loss": 0.5395,
      "step": 3079
    },
    {
      "epoch": 0.741587913080118,
      "grad_norm": 0.727729082107544,
      "learning_rate": 3.312171449409285e-05,
      "loss": 0.2785,
      "step": 3080
    },
    {
      "epoch": 0.7418286883765726,
      "grad_norm": 1.5719585418701172,
      "learning_rate": 3.306376408031729e-05,
      "loss": 0.54,
      "step": 3081
    },
    {
      "epoch": 0.7420694636730272,
      "grad_norm": 2.5653600692749023,
      "learning_rate": 3.3005854363923995e-05,
      "loss": 0.2214,
      "step": 3082
    },
    {
      "epoch": 0.7423102389694818,
      "grad_norm": 1.5638865232467651,
      "learning_rate": 3.294798538012217e-05,
      "loss": 0.7477,
      "step": 3083
    },
    {
      "epoch": 0.7425510142659363,
      "grad_norm": 1.88933527469635,
      "learning_rate": 3.289015716409631e-05,
      "loss": 0.7616,
      "step": 3084
    },
    {
      "epoch": 0.7427917895623909,
      "grad_norm": 0.9233277440071106,
      "learning_rate": 3.283236975100613e-05,
      "loss": 0.3405,
      "step": 3085
    },
    {
      "epoch": 0.7430325648588455,
      "grad_norm": 2.3473784923553467,
      "learning_rate": 3.277462317598644e-05,
      "loss": 0.8511,
      "step": 3086
    },
    {
      "epoch": 0.7432733401553001,
      "grad_norm": 1.4704930782318115,
      "learning_rate": 3.271691747414731e-05,
      "loss": 0.5758,
      "step": 3087
    },
    {
      "epoch": 0.7435141154517546,
      "grad_norm": 1.2950267791748047,
      "learning_rate": 3.265925268057398e-05,
      "loss": 0.7987,
      "step": 3088
    },
    {
      "epoch": 0.7437548907482092,
      "grad_norm": 0.8450798392295837,
      "learning_rate": 3.2601628830326726e-05,
      "loss": 0.3298,
      "step": 3089
    },
    {
      "epoch": 0.7439956660446638,
      "grad_norm": 1.7670706510543823,
      "learning_rate": 3.2544045958441004e-05,
      "loss": 0.4484,
      "step": 3090
    },
    {
      "epoch": 0.7442364413411184,
      "grad_norm": 1.2544729709625244,
      "learning_rate": 3.248650409992726e-05,
      "loss": 0.4268,
      "step": 3091
    },
    {
      "epoch": 0.7444772166375729,
      "grad_norm": 1.582452416419983,
      "learning_rate": 3.2429003289771176e-05,
      "loss": 0.5207,
      "step": 3092
    },
    {
      "epoch": 0.7447179919340275,
      "grad_norm": 1.0165259838104248,
      "learning_rate": 3.237154356293336e-05,
      "loss": 0.7176,
      "step": 3093
    },
    {
      "epoch": 0.7449587672304822,
      "grad_norm": 1.914751410484314,
      "learning_rate": 3.231412495434939e-05,
      "loss": 0.5358,
      "step": 3094
    },
    {
      "epoch": 0.7451995425269368,
      "grad_norm": 4.326685428619385,
      "learning_rate": 3.225674749892994e-05,
      "loss": 0.7129,
      "step": 3095
    },
    {
      "epoch": 0.7454403178233914,
      "grad_norm": 0.8451967239379883,
      "learning_rate": 3.219941123156068e-05,
      "loss": 0.4402,
      "step": 3096
    },
    {
      "epoch": 0.7456810931198459,
      "grad_norm": 0.9839834570884705,
      "learning_rate": 3.214211618710211e-05,
      "loss": 0.3726,
      "step": 3097
    },
    {
      "epoch": 0.7459218684163005,
      "grad_norm": 1.0465095043182373,
      "learning_rate": 3.208486240038982e-05,
      "loss": 0.1241,
      "step": 3098
    },
    {
      "epoch": 0.7461626437127551,
      "grad_norm": 1.104686975479126,
      "learning_rate": 3.202764990623417e-05,
      "loss": 0.5279,
      "step": 3099
    },
    {
      "epoch": 0.7464034190092097,
      "grad_norm": 1.0594794750213623,
      "learning_rate": 3.1970478739420496e-05,
      "loss": 0.3273,
      "step": 3100
    },
    {
      "epoch": 0.7466441943056642,
      "grad_norm": 0.9185763597488403,
      "learning_rate": 3.191334893470907e-05,
      "loss": 0.3357,
      "step": 3101
    },
    {
      "epoch": 0.7468849696021188,
      "grad_norm": 2.1206271648406982,
      "learning_rate": 3.185626052683487e-05,
      "loss": 0.5291,
      "step": 3102
    },
    {
      "epoch": 0.7471257448985734,
      "grad_norm": 0.9549693465232849,
      "learning_rate": 3.1799213550507835e-05,
      "loss": 0.6672,
      "step": 3103
    },
    {
      "epoch": 0.747366520195028,
      "grad_norm": 1.769875407218933,
      "learning_rate": 3.174220804041258e-05,
      "loss": 0.9207,
      "step": 3104
    },
    {
      "epoch": 0.7476072954914825,
      "grad_norm": 10.123749732971191,
      "learning_rate": 3.168524403120863e-05,
      "loss": 0.9403,
      "step": 3105
    },
    {
      "epoch": 0.7478480707879371,
      "grad_norm": 3.946068525314331,
      "learning_rate": 3.1628321557530246e-05,
      "loss": 0.6703,
      "step": 3106
    },
    {
      "epoch": 0.7480888460843917,
      "grad_norm": 1.5204689502716064,
      "learning_rate": 3.157144065398638e-05,
      "loss": 0.6827,
      "step": 3107
    },
    {
      "epoch": 0.7483296213808464,
      "grad_norm": 0.9539960026741028,
      "learning_rate": 3.151460135516075e-05,
      "loss": 0.6948,
      "step": 3108
    },
    {
      "epoch": 0.748570396677301,
      "grad_norm": 2.0044784545898438,
      "learning_rate": 3.145780369561182e-05,
      "loss": 0.6487,
      "step": 3109
    },
    {
      "epoch": 0.7488111719737555,
      "grad_norm": 2.3419203758239746,
      "learning_rate": 3.140104770987265e-05,
      "loss": 0.4121,
      "step": 3110
    },
    {
      "epoch": 0.7490519472702101,
      "grad_norm": 2.2572646141052246,
      "learning_rate": 3.1344333432451066e-05,
      "loss": 0.2235,
      "step": 3111
    },
    {
      "epoch": 0.7492927225666647,
      "grad_norm": 1.7564064264297485,
      "learning_rate": 3.1287660897829404e-05,
      "loss": 0.2708,
      "step": 3112
    },
    {
      "epoch": 0.7495334978631193,
      "grad_norm": 1.6659893989562988,
      "learning_rate": 3.1231030140464736e-05,
      "loss": 0.7538,
      "step": 3113
    },
    {
      "epoch": 0.7497742731595738,
      "grad_norm": 4.775331497192383,
      "learning_rate": 3.117444119478871e-05,
      "loss": 1.2959,
      "step": 3114
    },
    {
      "epoch": 0.7500150484560284,
      "grad_norm": 4.739798545837402,
      "learning_rate": 3.111789409520746e-05,
      "loss": 0.5799,
      "step": 3115
    },
    {
      "epoch": 0.750255823752483,
      "grad_norm": 1.0320911407470703,
      "learning_rate": 3.1061388876101804e-05,
      "loss": 0.4581,
      "step": 3116
    },
    {
      "epoch": 0.7504965990489376,
      "grad_norm": 3.4287285804748535,
      "learning_rate": 3.1004925571827023e-05,
      "loss": 0.8336,
      "step": 3117
    },
    {
      "epoch": 0.7507373743453921,
      "grad_norm": 2.3229026794433594,
      "learning_rate": 3.094850421671295e-05,
      "loss": 0.591,
      "step": 3118
    },
    {
      "epoch": 0.7509781496418467,
      "grad_norm": 1.660323977470398,
      "learning_rate": 3.089212484506392e-05,
      "loss": 0.7506,
      "step": 3119
    },
    {
      "epoch": 0.7512189249383013,
      "grad_norm": 2.4399898052215576,
      "learning_rate": 3.083578749115865e-05,
      "loss": 0.7181,
      "step": 3120
    },
    {
      "epoch": 0.751459700234756,
      "grad_norm": 1.1477172374725342,
      "learning_rate": 3.0779492189250414e-05,
      "loss": 0.6411,
      "step": 3121
    },
    {
      "epoch": 0.7517004755312104,
      "grad_norm": 3.424316167831421,
      "learning_rate": 3.0723238973566925e-05,
      "loss": 0.6226,
      "step": 3122
    },
    {
      "epoch": 0.751941250827665,
      "grad_norm": 3.0182266235351562,
      "learning_rate": 3.066702787831017e-05,
      "loss": 0.3055,
      "step": 3123
    },
    {
      "epoch": 0.7521820261241197,
      "grad_norm": 4.055928707122803,
      "learning_rate": 3.06108589376567e-05,
      "loss": 0.9499,
      "step": 3124
    },
    {
      "epoch": 0.7524228014205743,
      "grad_norm": 2.966586112976074,
      "learning_rate": 3.0554732185757315e-05,
      "loss": 0.4065,
      "step": 3125
    },
    {
      "epoch": 0.7526635767170289,
      "grad_norm": 2.517282247543335,
      "learning_rate": 3.0498647656737223e-05,
      "loss": 0.5657,
      "step": 3126
    },
    {
      "epoch": 0.7529043520134834,
      "grad_norm": 5.178724765777588,
      "learning_rate": 3.0442605384695977e-05,
      "loss": 0.7705,
      "step": 3127
    },
    {
      "epoch": 0.753145127309938,
      "grad_norm": 2.8488965034484863,
      "learning_rate": 3.0386605403707346e-05,
      "loss": 0.4091,
      "step": 3128
    },
    {
      "epoch": 0.7533859026063926,
      "grad_norm": 0.804840087890625,
      "learning_rate": 3.0330647747819496e-05,
      "loss": 0.3117,
      "step": 3129
    },
    {
      "epoch": 0.7536266779028472,
      "grad_norm": 1.8321592807769775,
      "learning_rate": 3.0274732451054756e-05,
      "loss": 0.58,
      "step": 3130
    },
    {
      "epoch": 0.7538674531993017,
      "grad_norm": 0.4262060225009918,
      "learning_rate": 3.021885954740977e-05,
      "loss": 0.206,
      "step": 3131
    },
    {
      "epoch": 0.7541082284957563,
      "grad_norm": 1.1734882593154907,
      "learning_rate": 3.016302907085541e-05,
      "loss": 0.9527,
      "step": 3132
    },
    {
      "epoch": 0.7543490037922109,
      "grad_norm": 1.2724254131317139,
      "learning_rate": 3.010724105533671e-05,
      "loss": 0.7622,
      "step": 3133
    },
    {
      "epoch": 0.7545897790886655,
      "grad_norm": 1.9372936487197876,
      "learning_rate": 3.005149553477292e-05,
      "loss": 0.5003,
      "step": 3134
    },
    {
      "epoch": 0.75483055438512,
      "grad_norm": 4.942528247833252,
      "learning_rate": 2.9995792543057478e-05,
      "loss": 0.2299,
      "step": 3135
    },
    {
      "epoch": 0.7550713296815746,
      "grad_norm": 2.330275535583496,
      "learning_rate": 2.994013211405785e-05,
      "loss": 0.5149,
      "step": 3136
    },
    {
      "epoch": 0.7553121049780293,
      "grad_norm": 3.635746717453003,
      "learning_rate": 2.988451428161578e-05,
      "loss": 0.7856,
      "step": 3137
    },
    {
      "epoch": 0.7555528802744839,
      "grad_norm": 1.8431618213653564,
      "learning_rate": 2.982893907954697e-05,
      "loss": 0.3647,
      "step": 3138
    },
    {
      "epoch": 0.7557936555709385,
      "grad_norm": 3.0256638526916504,
      "learning_rate": 2.977340654164129e-05,
      "loss": 0.3034,
      "step": 3139
    },
    {
      "epoch": 0.756034430867393,
      "grad_norm": 1.4221413135528564,
      "learning_rate": 2.9717916701662662e-05,
      "loss": 0.4793,
      "step": 3140
    },
    {
      "epoch": 0.7562752061638476,
      "grad_norm": 1.2902501821517944,
      "learning_rate": 2.966246959334903e-05,
      "loss": 0.5462,
      "step": 3141
    },
    {
      "epoch": 0.7565159814603022,
      "grad_norm": 2.2602968215942383,
      "learning_rate": 2.960706525041238e-05,
      "loss": 0.5961,
      "step": 3142
    },
    {
      "epoch": 0.7567567567567568,
      "grad_norm": 2.0314295291900635,
      "learning_rate": 2.9551703706538623e-05,
      "loss": 0.9683,
      "step": 3143
    },
    {
      "epoch": 0.7569975320532113,
      "grad_norm": 1.4725910425186157,
      "learning_rate": 2.949638499538774e-05,
      "loss": 0.5248,
      "step": 3144
    },
    {
      "epoch": 0.7572383073496659,
      "grad_norm": 1.5069992542266846,
      "learning_rate": 2.944110915059366e-05,
      "loss": 0.5018,
      "step": 3145
    },
    {
      "epoch": 0.7574790826461205,
      "grad_norm": 1.0725562572479248,
      "learning_rate": 2.938587620576415e-05,
      "loss": 0.7976,
      "step": 3146
    },
    {
      "epoch": 0.7577198579425751,
      "grad_norm": 2.012692451477051,
      "learning_rate": 2.9330686194481006e-05,
      "loss": 0.5563,
      "step": 3147
    },
    {
      "epoch": 0.7579606332390296,
      "grad_norm": 1.9315499067306519,
      "learning_rate": 2.927553915029987e-05,
      "loss": 0.8436,
      "step": 3148
    },
    {
      "epoch": 0.7582014085354842,
      "grad_norm": 1.7731233835220337,
      "learning_rate": 2.9220435106750276e-05,
      "loss": 0.6159,
      "step": 3149
    },
    {
      "epoch": 0.7584421838319388,
      "grad_norm": 4.7184977531433105,
      "learning_rate": 2.9165374097335642e-05,
      "loss": 0.625,
      "step": 3150
    },
    {
      "epoch": 0.7586829591283935,
      "grad_norm": 3.9251320362091064,
      "learning_rate": 2.9110356155533113e-05,
      "loss": 0.6392,
      "step": 3151
    },
    {
      "epoch": 0.7589237344248481,
      "grad_norm": 2.422001600265503,
      "learning_rate": 2.905538131479376e-05,
      "loss": 0.4932,
      "step": 3152
    },
    {
      "epoch": 0.7591645097213026,
      "grad_norm": 3.3069140911102295,
      "learning_rate": 2.9000449608542447e-05,
      "loss": 0.7679,
      "step": 3153
    },
    {
      "epoch": 0.7594052850177572,
      "grad_norm": 2.4573240280151367,
      "learning_rate": 2.8945561070177696e-05,
      "loss": 0.8308,
      "step": 3154
    },
    {
      "epoch": 0.7596460603142118,
      "grad_norm": 1.1037508249282837,
      "learning_rate": 2.8890715733071927e-05,
      "loss": 0.4607,
      "step": 3155
    },
    {
      "epoch": 0.7598868356106664,
      "grad_norm": 1.992222785949707,
      "learning_rate": 2.8835913630571155e-05,
      "loss": 1.0511,
      "step": 3156
    },
    {
      "epoch": 0.7601276109071209,
      "grad_norm": 3.1501615047454834,
      "learning_rate": 2.8781154795995247e-05,
      "loss": 0.7244,
      "step": 3157
    },
    {
      "epoch": 0.7603683862035755,
      "grad_norm": 5.186891078948975,
      "learning_rate": 2.8726439262637727e-05,
      "loss": 0.5768,
      "step": 3158
    },
    {
      "epoch": 0.7606091615000301,
      "grad_norm": 3.4781057834625244,
      "learning_rate": 2.8671767063765676e-05,
      "loss": 0.4973,
      "step": 3159
    },
    {
      "epoch": 0.7608499367964847,
      "grad_norm": 0.9576385617256165,
      "learning_rate": 2.8617138232619955e-05,
      "loss": 0.6546,
      "step": 3160
    },
    {
      "epoch": 0.7610907120929392,
      "grad_norm": 1.434462070465088,
      "learning_rate": 2.8562552802415055e-05,
      "loss": 0.5047,
      "step": 3161
    },
    {
      "epoch": 0.7613314873893938,
      "grad_norm": 1.7557677030563354,
      "learning_rate": 2.850801080633896e-05,
      "loss": 0.6268,
      "step": 3162
    },
    {
      "epoch": 0.7615722626858484,
      "grad_norm": 1.2142372131347656,
      "learning_rate": 2.8453512277553406e-05,
      "loss": 0.5757,
      "step": 3163
    },
    {
      "epoch": 0.761813037982303,
      "grad_norm": 1.8882231712341309,
      "learning_rate": 2.8399057249193518e-05,
      "loss": 0.9265,
      "step": 3164
    },
    {
      "epoch": 0.7620538132787577,
      "grad_norm": 0.7379496693611145,
      "learning_rate": 2.8344645754368172e-05,
      "loss": 0.6167,
      "step": 3165
    },
    {
      "epoch": 0.7622945885752122,
      "grad_norm": 1.584207534790039,
      "learning_rate": 2.8290277826159683e-05,
      "loss": 0.7881,
      "step": 3166
    },
    {
      "epoch": 0.7625353638716668,
      "grad_norm": 2.564490556716919,
      "learning_rate": 2.8235953497623803e-05,
      "loss": 0.4444,
      "step": 3167
    },
    {
      "epoch": 0.7627761391681214,
      "grad_norm": 2.109895706176758,
      "learning_rate": 2.8181672801789917e-05,
      "loss": 0.5114,
      "step": 3168
    },
    {
      "epoch": 0.763016914464576,
      "grad_norm": 2.212892770767212,
      "learning_rate": 2.8127435771660747e-05,
      "loss": 0.9194,
      "step": 3169
    },
    {
      "epoch": 0.7632576897610305,
      "grad_norm": 0.5335499048233032,
      "learning_rate": 2.8073242440212556e-05,
      "loss": 0.2802,
      "step": 3170
    },
    {
      "epoch": 0.7634984650574851,
      "grad_norm": 0.7556986808776855,
      "learning_rate": 2.8019092840395044e-05,
      "loss": 0.4347,
      "step": 3171
    },
    {
      "epoch": 0.7637392403539397,
      "grad_norm": 4.619632244110107,
      "learning_rate": 2.796498700513124e-05,
      "loss": 0.8844,
      "step": 3172
    },
    {
      "epoch": 0.7639800156503943,
      "grad_norm": 4.419466018676758,
      "learning_rate": 2.7910924967317585e-05,
      "loss": 0.7078,
      "step": 3173
    },
    {
      "epoch": 0.7642207909468488,
      "grad_norm": 2.6079466342926025,
      "learning_rate": 2.785690675982404e-05,
      "loss": 0.3685,
      "step": 3174
    },
    {
      "epoch": 0.7644615662433034,
      "grad_norm": 2.4883298873901367,
      "learning_rate": 2.7802932415493698e-05,
      "loss": 1.2917,
      "step": 3175
    },
    {
      "epoch": 0.764702341539758,
      "grad_norm": 2.154827356338501,
      "learning_rate": 2.7749001967143128e-05,
      "loss": 1.0546,
      "step": 3176
    },
    {
      "epoch": 0.7649431168362126,
      "grad_norm": 1.366364598274231,
      "learning_rate": 2.7695115447562126e-05,
      "loss": 0.3194,
      "step": 3177
    },
    {
      "epoch": 0.7651838921326672,
      "grad_norm": 2.245346784591675,
      "learning_rate": 2.7641272889513837e-05,
      "loss": 0.5605,
      "step": 3178
    },
    {
      "epoch": 0.7654246674291217,
      "grad_norm": 8.141434669494629,
      "learning_rate": 2.7587474325734687e-05,
      "loss": 0.3617,
      "step": 3179
    },
    {
      "epoch": 0.7656654427255764,
      "grad_norm": 1.830678105354309,
      "learning_rate": 2.7533719788934255e-05,
      "loss": 0.6659,
      "step": 3180
    },
    {
      "epoch": 0.765906218022031,
      "grad_norm": 2.041790246963501,
      "learning_rate": 2.7480009311795473e-05,
      "loss": 0.7169,
      "step": 3181
    },
    {
      "epoch": 0.7661469933184856,
      "grad_norm": 2.693058967590332,
      "learning_rate": 2.7426342926974413e-05,
      "loss": 0.6781,
      "step": 3182
    },
    {
      "epoch": 0.7663877686149401,
      "grad_norm": 1.7061842679977417,
      "learning_rate": 2.737272066710036e-05,
      "loss": 0.5184,
      "step": 3183
    },
    {
      "epoch": 0.7666285439113947,
      "grad_norm": 1.023386001586914,
      "learning_rate": 2.73191425647758e-05,
      "loss": 0.2387,
      "step": 3184
    },
    {
      "epoch": 0.7668693192078493,
      "grad_norm": 0.27525773644447327,
      "learning_rate": 2.726560865257629e-05,
      "loss": 0.1579,
      "step": 3185
    },
    {
      "epoch": 0.7671100945043039,
      "grad_norm": 0.31351879239082336,
      "learning_rate": 2.7212118963050592e-05,
      "loss": 0.2697,
      "step": 3186
    },
    {
      "epoch": 0.7673508698007584,
      "grad_norm": 1.223887324333191,
      "learning_rate": 2.715867352872058e-05,
      "loss": 0.5606,
      "step": 3187
    },
    {
      "epoch": 0.767591645097213,
      "grad_norm": 4.273595333099365,
      "learning_rate": 2.710527238208116e-05,
      "loss": 0.6982,
      "step": 3188
    },
    {
      "epoch": 0.7678324203936676,
      "grad_norm": 3.2111504077911377,
      "learning_rate": 2.705191555560035e-05,
      "loss": 0.5278,
      "step": 3189
    },
    {
      "epoch": 0.7680731956901222,
      "grad_norm": 1.322572112083435,
      "learning_rate": 2.6998603081719243e-05,
      "loss": 0.6689,
      "step": 3190
    },
    {
      "epoch": 0.7683139709865767,
      "grad_norm": 2.3791556358337402,
      "learning_rate": 2.6945334992851933e-05,
      "loss": 0.3503,
      "step": 3191
    },
    {
      "epoch": 0.7685547462830313,
      "grad_norm": 3.1207807064056396,
      "learning_rate": 2.6892111321385584e-05,
      "loss": 0.6434,
      "step": 3192
    },
    {
      "epoch": 0.7687955215794859,
      "grad_norm": 1.937662124633789,
      "learning_rate": 2.6838932099680225e-05,
      "loss": 0.2284,
      "step": 3193
    },
    {
      "epoch": 0.7690362968759406,
      "grad_norm": 1.8253540992736816,
      "learning_rate": 2.678579736006901e-05,
      "loss": 0.4102,
      "step": 3194
    },
    {
      "epoch": 0.7692770721723952,
      "grad_norm": 1.696462869644165,
      "learning_rate": 2.6732707134857937e-05,
      "loss": 0.603,
      "step": 3195
    },
    {
      "epoch": 0.7695178474688497,
      "grad_norm": 1.8397753238677979,
      "learning_rate": 2.6679661456325988e-05,
      "loss": 0.3548,
      "step": 3196
    },
    {
      "epoch": 0.7697586227653043,
      "grad_norm": 2.073573350906372,
      "learning_rate": 2.6626660356725064e-05,
      "loss": 0.6005,
      "step": 3197
    },
    {
      "epoch": 0.7699993980617589,
      "grad_norm": 0.84525465965271,
      "learning_rate": 2.6573703868279963e-05,
      "loss": 0.3808,
      "step": 3198
    },
    {
      "epoch": 0.7702401733582135,
      "grad_norm": 2.4383602142333984,
      "learning_rate": 2.6520792023188333e-05,
      "loss": 0.8604,
      "step": 3199
    },
    {
      "epoch": 0.770480948654668,
      "grad_norm": 0.7531054019927979,
      "learning_rate": 2.646792485362074e-05,
      "loss": 0.5609,
      "step": 3200
    },
    {
      "epoch": 0.7707217239511226,
      "grad_norm": 0.8975092768669128,
      "learning_rate": 2.6415102391720482e-05,
      "loss": 0.4972,
      "step": 3201
    },
    {
      "epoch": 0.7709624992475772,
      "grad_norm": 0.7330169081687927,
      "learning_rate": 2.6362324669603776e-05,
      "loss": 0.3688,
      "step": 3202
    },
    {
      "epoch": 0.7712032745440318,
      "grad_norm": 1.5205063819885254,
      "learning_rate": 2.630959171935956e-05,
      "loss": 0.7152,
      "step": 3203
    },
    {
      "epoch": 0.7714440498404863,
      "grad_norm": 4.553707122802734,
      "learning_rate": 2.6256903573049597e-05,
      "loss": 0.5479,
      "step": 3204
    },
    {
      "epoch": 0.7716848251369409,
      "grad_norm": 0.9805248379707336,
      "learning_rate": 2.6204260262708403e-05,
      "loss": 0.655,
      "step": 3205
    },
    {
      "epoch": 0.7719256004333955,
      "grad_norm": 1.8487534523010254,
      "learning_rate": 2.6151661820343243e-05,
      "loss": 0.6114,
      "step": 3206
    },
    {
      "epoch": 0.7721663757298501,
      "grad_norm": 0.826151967048645,
      "learning_rate": 2.6099108277934103e-05,
      "loss": 0.2134,
      "step": 3207
    },
    {
      "epoch": 0.7724071510263048,
      "grad_norm": 3.3528854846954346,
      "learning_rate": 2.6046599667433603e-05,
      "loss": 0.8448,
      "step": 3208
    },
    {
      "epoch": 0.7726479263227592,
      "grad_norm": 1.5255182981491089,
      "learning_rate": 2.5994136020767124e-05,
      "loss": 0.5631,
      "step": 3209
    },
    {
      "epoch": 0.7728887016192139,
      "grad_norm": 2.4078643321990967,
      "learning_rate": 2.5941717369832707e-05,
      "loss": 0.5426,
      "step": 3210
    },
    {
      "epoch": 0.7731294769156685,
      "grad_norm": 4.288626670837402,
      "learning_rate": 2.588934374650096e-05,
      "loss": 0.4884,
      "step": 3211
    },
    {
      "epoch": 0.7733702522121231,
      "grad_norm": 3.0660624504089355,
      "learning_rate": 2.583701518261519e-05,
      "loss": 0.4575,
      "step": 3212
    },
    {
      "epoch": 0.7736110275085776,
      "grad_norm": 0.8354116678237915,
      "learning_rate": 2.5784731709991272e-05,
      "loss": 0.6711,
      "step": 3213
    },
    {
      "epoch": 0.7738518028050322,
      "grad_norm": 3.1987497806549072,
      "learning_rate": 2.57324933604177e-05,
      "loss": 0.6139,
      "step": 3214
    },
    {
      "epoch": 0.7740925781014868,
      "grad_norm": 0.9417548179626465,
      "learning_rate": 2.5680300165655503e-05,
      "loss": 0.6099,
      "step": 3215
    },
    {
      "epoch": 0.7743333533979414,
      "grad_norm": 1.8997162580490112,
      "learning_rate": 2.5628152157438222e-05,
      "loss": 0.6094,
      "step": 3216
    },
    {
      "epoch": 0.7745741286943959,
      "grad_norm": 1.4700846672058105,
      "learning_rate": 2.5576049367471998e-05,
      "loss": 0.2409,
      "step": 3217
    },
    {
      "epoch": 0.7748149039908505,
      "grad_norm": 7.270529747009277,
      "learning_rate": 2.5523991827435468e-05,
      "loss": 0.6279,
      "step": 3218
    },
    {
      "epoch": 0.7750556792873051,
      "grad_norm": 1.6653450727462769,
      "learning_rate": 2.5471979568979666e-05,
      "loss": 0.6544,
      "step": 3219
    },
    {
      "epoch": 0.7752964545837597,
      "grad_norm": 4.143406391143799,
      "learning_rate": 2.5420012623728208e-05,
      "loss": 0.733,
      "step": 3220
    },
    {
      "epoch": 0.7755372298802143,
      "grad_norm": 0.922996997833252,
      "learning_rate": 2.5368091023277096e-05,
      "loss": 0.3873,
      "step": 3221
    },
    {
      "epoch": 0.7757780051766688,
      "grad_norm": 1.7943379878997803,
      "learning_rate": 2.5316214799194805e-05,
      "loss": 0.1036,
      "step": 3222
    },
    {
      "epoch": 0.7760187804731234,
      "grad_norm": 3.269728422164917,
      "learning_rate": 2.5264383983022198e-05,
      "loss": 0.697,
      "step": 3223
    },
    {
      "epoch": 0.7762595557695781,
      "grad_norm": 1.3404314517974854,
      "learning_rate": 2.5212598606272486e-05,
      "loss": 0.7117,
      "step": 3224
    },
    {
      "epoch": 0.7765003310660327,
      "grad_norm": 3.5856986045837402,
      "learning_rate": 2.516085870043131e-05,
      "loss": 0.6111,
      "step": 3225
    },
    {
      "epoch": 0.7767411063624872,
      "grad_norm": 1.1721508502960205,
      "learning_rate": 2.51091642969567e-05,
      "loss": 0.2938,
      "step": 3226
    },
    {
      "epoch": 0.7769818816589418,
      "grad_norm": 2.648401975631714,
      "learning_rate": 2.50575154272789e-05,
      "loss": 0.8979,
      "step": 3227
    },
    {
      "epoch": 0.7772226569553964,
      "grad_norm": 4.080894947052002,
      "learning_rate": 2.5005912122800557e-05,
      "loss": 0.284,
      "step": 3228
    },
    {
      "epoch": 0.777463432251851,
      "grad_norm": 2.2749102115631104,
      "learning_rate": 2.495435441489661e-05,
      "loss": 0.6611,
      "step": 3229
    },
    {
      "epoch": 0.7777042075483055,
      "grad_norm": 2.709296226501465,
      "learning_rate": 2.4902842334914266e-05,
      "loss": 0.6276,
      "step": 3230
    },
    {
      "epoch": 0.7779449828447601,
      "grad_norm": 1.5649709701538086,
      "learning_rate": 2.4851375914173003e-05,
      "loss": 0.706,
      "step": 3231
    },
    {
      "epoch": 0.7781857581412147,
      "grad_norm": 1.1615535020828247,
      "learning_rate": 2.4799955183964463e-05,
      "loss": 0.4029,
      "step": 3232
    },
    {
      "epoch": 0.7784265334376693,
      "grad_norm": 1.1178641319274902,
      "learning_rate": 2.4748580175552627e-05,
      "loss": 0.6088,
      "step": 3233
    },
    {
      "epoch": 0.7786673087341239,
      "grad_norm": 1.7456036806106567,
      "learning_rate": 2.4697250920173566e-05,
      "loss": 0.6452,
      "step": 3234
    },
    {
      "epoch": 0.7789080840305784,
      "grad_norm": 1.5968141555786133,
      "learning_rate": 2.46459674490356e-05,
      "loss": 0.2782,
      "step": 3235
    },
    {
      "epoch": 0.779148859327033,
      "grad_norm": 1.4153774976730347,
      "learning_rate": 2.4594729793319227e-05,
      "loss": 0.9929,
      "step": 3236
    },
    {
      "epoch": 0.7793896346234876,
      "grad_norm": 4.274727821350098,
      "learning_rate": 2.4543537984176978e-05,
      "loss": 0.4176,
      "step": 3237
    },
    {
      "epoch": 0.7796304099199423,
      "grad_norm": 2.977787494659424,
      "learning_rate": 2.449239205273367e-05,
      "loss": 0.5403,
      "step": 3238
    },
    {
      "epoch": 0.7798711852163968,
      "grad_norm": 2.9022774696350098,
      "learning_rate": 2.4441292030086137e-05,
      "loss": 0.4639,
      "step": 3239
    },
    {
      "epoch": 0.7801119605128514,
      "grad_norm": 1.2932614088058472,
      "learning_rate": 2.439023794730326e-05,
      "loss": 0.6634,
      "step": 3240
    },
    {
      "epoch": 0.780352735809306,
      "grad_norm": 3.5876283645629883,
      "learning_rate": 2.433922983542609e-05,
      "loss": 1.0981,
      "step": 3241
    },
    {
      "epoch": 0.7805935111057606,
      "grad_norm": 1.4793999195098877,
      "learning_rate": 2.4288267725467618e-05,
      "loss": 0.398,
      "step": 3242
    },
    {
      "epoch": 0.7808342864022151,
      "grad_norm": 3.516136884689331,
      "learning_rate": 2.4237351648412942e-05,
      "loss": 0.6531,
      "step": 3243
    },
    {
      "epoch": 0.7810750616986697,
      "grad_norm": 0.46728962659835815,
      "learning_rate": 2.4186481635219193e-05,
      "loss": 0.0365,
      "step": 3244
    },
    {
      "epoch": 0.7813158369951243,
      "grad_norm": 5.394861221313477,
      "learning_rate": 2.4135657716815397e-05,
      "loss": 0.18,
      "step": 3245
    },
    {
      "epoch": 0.7815566122915789,
      "grad_norm": 2.0945961475372314,
      "learning_rate": 2.408487992410263e-05,
      "loss": 0.5442,
      "step": 3246
    },
    {
      "epoch": 0.7817973875880335,
      "grad_norm": 0.8790675401687622,
      "learning_rate": 2.4034148287953904e-05,
      "loss": 0.434,
      "step": 3247
    },
    {
      "epoch": 0.782038162884488,
      "grad_norm": 5.560616493225098,
      "learning_rate": 2.3983462839214177e-05,
      "loss": 0.4532,
      "step": 3248
    },
    {
      "epoch": 0.7822789381809426,
      "grad_norm": 1.3845301866531372,
      "learning_rate": 2.3932823608700338e-05,
      "loss": 0.6569,
      "step": 3249
    },
    {
      "epoch": 0.7825197134773972,
      "grad_norm": 4.446075916290283,
      "learning_rate": 2.3882230627201096e-05,
      "loss": 0.2362,
      "step": 3250
    },
    {
      "epoch": 0.7827604887738518,
      "grad_norm": 3.534898281097412,
      "learning_rate": 2.3831683925477134e-05,
      "loss": 0.3983,
      "step": 3251
    },
    {
      "epoch": 0.7830012640703063,
      "grad_norm": 2.6837666034698486,
      "learning_rate": 2.3781183534260975e-05,
      "loss": 0.6582,
      "step": 3252
    },
    {
      "epoch": 0.783242039366761,
      "grad_norm": 1.4857863187789917,
      "learning_rate": 2.373072948425692e-05,
      "loss": 0.3928,
      "step": 3253
    },
    {
      "epoch": 0.7834828146632156,
      "grad_norm": 7.157393932342529,
      "learning_rate": 2.368032180614118e-05,
      "loss": 0.7341,
      "step": 3254
    },
    {
      "epoch": 0.7837235899596702,
      "grad_norm": 1.6006975173950195,
      "learning_rate": 2.3629960530561736e-05,
      "loss": 0.8314,
      "step": 3255
    },
    {
      "epoch": 0.7839643652561247,
      "grad_norm": 2.4584901332855225,
      "learning_rate": 2.3579645688138352e-05,
      "loss": 0.4926,
      "step": 3256
    },
    {
      "epoch": 0.7842051405525793,
      "grad_norm": 2.698150396347046,
      "learning_rate": 2.3529377309462585e-05,
      "loss": 0.7207,
      "step": 3257
    },
    {
      "epoch": 0.7844459158490339,
      "grad_norm": 2.509859561920166,
      "learning_rate": 2.347915542509769e-05,
      "loss": 0.8804,
      "step": 3258
    },
    {
      "epoch": 0.7846866911454885,
      "grad_norm": 2.254075765609741,
      "learning_rate": 2.342898006557872e-05,
      "loss": 0.4099,
      "step": 3259
    },
    {
      "epoch": 0.7849274664419431,
      "grad_norm": 1.3479466438293457,
      "learning_rate": 2.337885126141236e-05,
      "loss": 0.4939,
      "step": 3260
    },
    {
      "epoch": 0.7851682417383976,
      "grad_norm": 1.9788506031036377,
      "learning_rate": 2.3328769043077058e-05,
      "loss": 0.6189,
      "step": 3261
    },
    {
      "epoch": 0.7854090170348522,
      "grad_norm": 1.8684098720550537,
      "learning_rate": 2.3278733441022925e-05,
      "loss": 0.8066,
      "step": 3262
    },
    {
      "epoch": 0.7856497923313068,
      "grad_norm": 3.792185068130493,
      "learning_rate": 2.3228744485671718e-05,
      "loss": 0.4835,
      "step": 3263
    },
    {
      "epoch": 0.7858905676277614,
      "grad_norm": 0.6826027035713196,
      "learning_rate": 2.3178802207416828e-05,
      "loss": 0.4087,
      "step": 3264
    },
    {
      "epoch": 0.7861313429242159,
      "grad_norm": 1.6336182355880737,
      "learning_rate": 2.3128906636623303e-05,
      "loss": 0.5187,
      "step": 3265
    },
    {
      "epoch": 0.7863721182206705,
      "grad_norm": 2.8685998916625977,
      "learning_rate": 2.3079057803627713e-05,
      "loss": 0.3996,
      "step": 3266
    },
    {
      "epoch": 0.7866128935171252,
      "grad_norm": 1.4814997911453247,
      "learning_rate": 2.3029255738738308e-05,
      "loss": 0.2919,
      "step": 3267
    },
    {
      "epoch": 0.7868536688135798,
      "grad_norm": 2.786038398742676,
      "learning_rate": 2.2979500472234806e-05,
      "loss": 0.62,
      "step": 3268
    },
    {
      "epoch": 0.7870944441100343,
      "grad_norm": 4.719537734985352,
      "learning_rate": 2.2929792034368535e-05,
      "loss": 0.822,
      "step": 3269
    },
    {
      "epoch": 0.7873352194064889,
      "grad_norm": 8.993035316467285,
      "learning_rate": 2.2880130455362358e-05,
      "loss": 0.4918,
      "step": 3270
    },
    {
      "epoch": 0.7875759947029435,
      "grad_norm": 1.7135777473449707,
      "learning_rate": 2.2830515765410622e-05,
      "loss": 0.4715,
      "step": 3271
    },
    {
      "epoch": 0.7878167699993981,
      "grad_norm": 2.256098508834839,
      "learning_rate": 2.278094799467918e-05,
      "loss": 1.0771,
      "step": 3272
    },
    {
      "epoch": 0.7880575452958526,
      "grad_norm": 1.1801178455352783,
      "learning_rate": 2.2731427173305307e-05,
      "loss": 0.6812,
      "step": 3273
    },
    {
      "epoch": 0.7882983205923072,
      "grad_norm": 1.6812212467193604,
      "learning_rate": 2.268195333139781e-05,
      "loss": 0.567,
      "step": 3274
    },
    {
      "epoch": 0.7885390958887618,
      "grad_norm": 2.234989881515503,
      "learning_rate": 2.263252649903691e-05,
      "loss": 0.5069,
      "step": 3275
    },
    {
      "epoch": 0.7887798711852164,
      "grad_norm": 1.5656296014785767,
      "learning_rate": 2.2583146706274184e-05,
      "loss": 0.3535,
      "step": 3276
    },
    {
      "epoch": 0.789020646481671,
      "grad_norm": 1.6030066013336182,
      "learning_rate": 2.253381398313269e-05,
      "loss": 0.9362,
      "step": 3277
    },
    {
      "epoch": 0.7892614217781255,
      "grad_norm": 1.6599286794662476,
      "learning_rate": 2.2484528359606816e-05,
      "loss": 0.2563,
      "step": 3278
    },
    {
      "epoch": 0.7895021970745801,
      "grad_norm": 3.2402637004852295,
      "learning_rate": 2.2435289865662344e-05,
      "loss": 0.8971,
      "step": 3279
    },
    {
      "epoch": 0.7897429723710347,
      "grad_norm": 3.251466751098633,
      "learning_rate": 2.2386098531236422e-05,
      "loss": 1.0431,
      "step": 3280
    },
    {
      "epoch": 0.7899837476674894,
      "grad_norm": 1.1390844583511353,
      "learning_rate": 2.233695438623743e-05,
      "loss": 0.381,
      "step": 3281
    },
    {
      "epoch": 0.7902245229639439,
      "grad_norm": 1.3459970951080322,
      "learning_rate": 2.228785746054515e-05,
      "loss": 0.5704,
      "step": 3282
    },
    {
      "epoch": 0.7904652982603985,
      "grad_norm": 6.388441562652588,
      "learning_rate": 2.223880778401065e-05,
      "loss": 0.7198,
      "step": 3283
    },
    {
      "epoch": 0.7907060735568531,
      "grad_norm": 2.3066797256469727,
      "learning_rate": 2.2189805386456186e-05,
      "loss": 0.3931,
      "step": 3284
    },
    {
      "epoch": 0.7909468488533077,
      "grad_norm": 4.641172409057617,
      "learning_rate": 2.2140850297675353e-05,
      "loss": 0.9101,
      "step": 3285
    },
    {
      "epoch": 0.7911876241497622,
      "grad_norm": 2.48939847946167,
      "learning_rate": 2.2091942547432955e-05,
      "loss": 0.5307,
      "step": 3286
    },
    {
      "epoch": 0.7914283994462168,
      "grad_norm": 1.4391555786132812,
      "learning_rate": 2.2043082165465023e-05,
      "loss": 0.3424,
      "step": 3287
    },
    {
      "epoch": 0.7916691747426714,
      "grad_norm": 2.05548357963562,
      "learning_rate": 2.19942691814788e-05,
      "loss": 0.5474,
      "step": 3288
    },
    {
      "epoch": 0.791909950039126,
      "grad_norm": 3.470940589904785,
      "learning_rate": 2.194550362515263e-05,
      "loss": 0.5817,
      "step": 3289
    },
    {
      "epoch": 0.7921507253355806,
      "grad_norm": 6.029779434204102,
      "learning_rate": 2.189678552613612e-05,
      "loss": 0.8264,
      "step": 3290
    },
    {
      "epoch": 0.7923915006320351,
      "grad_norm": 2.177302837371826,
      "learning_rate": 2.184811491405001e-05,
      "loss": 0.5883,
      "step": 3291
    },
    {
      "epoch": 0.7926322759284897,
      "grad_norm": 3.795201063156128,
      "learning_rate": 2.1799491818486083e-05,
      "loss": 0.8381,
      "step": 3292
    },
    {
      "epoch": 0.7928730512249443,
      "grad_norm": 2.861975908279419,
      "learning_rate": 2.1750916269007316e-05,
      "loss": 0.5125,
      "step": 3293
    },
    {
      "epoch": 0.793113826521399,
      "grad_norm": 2.659313917160034,
      "learning_rate": 2.1702388295147747e-05,
      "loss": 0.5038,
      "step": 3294
    },
    {
      "epoch": 0.7933546018178534,
      "grad_norm": 2.762467384338379,
      "learning_rate": 2.165390792641251e-05,
      "loss": 0.5655,
      "step": 3295
    },
    {
      "epoch": 0.793595377114308,
      "grad_norm": 1.6343928575515747,
      "learning_rate": 2.160547519227779e-05,
      "loss": 0.4066,
      "step": 3296
    },
    {
      "epoch": 0.7938361524107627,
      "grad_norm": 0.6321638822555542,
      "learning_rate": 2.155709012219076e-05,
      "loss": 0.3478,
      "step": 3297
    },
    {
      "epoch": 0.7940769277072173,
      "grad_norm": 4.435551643371582,
      "learning_rate": 2.1508752745569695e-05,
      "loss": 1.071,
      "step": 3298
    },
    {
      "epoch": 0.7943177030036718,
      "grad_norm": 11.227981567382812,
      "learning_rate": 2.1460463091803773e-05,
      "loss": 0.741,
      "step": 3299
    },
    {
      "epoch": 0.7945584783001264,
      "grad_norm": 1.14915931224823,
      "learning_rate": 2.1412221190253245e-05,
      "loss": 0.5523,
      "step": 3300
    },
    {
      "epoch": 0.794799253596581,
      "grad_norm": 0.4120855927467346,
      "learning_rate": 2.1364027070249282e-05,
      "loss": 0.1457,
      "step": 3301
    },
    {
      "epoch": 0.7950400288930356,
      "grad_norm": 3.5175342559814453,
      "learning_rate": 2.1315880761094044e-05,
      "loss": 1.1578,
      "step": 3302
    },
    {
      "epoch": 0.7952808041894902,
      "grad_norm": 1.621909260749817,
      "learning_rate": 2.126778229206058e-05,
      "loss": 1.1031,
      "step": 3303
    },
    {
      "epoch": 0.7955215794859447,
      "grad_norm": 1.1469271183013916,
      "learning_rate": 2.1219731692392887e-05,
      "loss": 0.16,
      "step": 3304
    },
    {
      "epoch": 0.7957623547823993,
      "grad_norm": 1.0267417430877686,
      "learning_rate": 2.1171728991305795e-05,
      "loss": 0.6588,
      "step": 3305
    },
    {
      "epoch": 0.7960031300788539,
      "grad_norm": 0.7822784781455994,
      "learning_rate": 2.1123774217985116e-05,
      "loss": 0.4397,
      "step": 3306
    },
    {
      "epoch": 0.7962439053753085,
      "grad_norm": 0.9245109558105469,
      "learning_rate": 2.107586740158738e-05,
      "loss": 0.2842,
      "step": 3307
    },
    {
      "epoch": 0.796484680671763,
      "grad_norm": 1.0245726108551025,
      "learning_rate": 2.1028008571240088e-05,
      "loss": 1.107,
      "step": 3308
    },
    {
      "epoch": 0.7967254559682176,
      "grad_norm": 4.2257466316223145,
      "learning_rate": 2.0980197756041542e-05,
      "loss": 0.7681,
      "step": 3309
    },
    {
      "epoch": 0.7969662312646723,
      "grad_norm": 1.4079909324645996,
      "learning_rate": 2.0932434985060733e-05,
      "loss": 0.3071,
      "step": 3310
    },
    {
      "epoch": 0.7972070065611269,
      "grad_norm": 2.9457712173461914,
      "learning_rate": 2.0884720287337657e-05,
      "loss": 0.5083,
      "step": 3311
    },
    {
      "epoch": 0.7974477818575814,
      "grad_norm": 1.8760308027267456,
      "learning_rate": 2.0837053691882856e-05,
      "loss": 0.1079,
      "step": 3312
    },
    {
      "epoch": 0.797688557154036,
      "grad_norm": 2.5826492309570312,
      "learning_rate": 2.0789435227677777e-05,
      "loss": 0.8308,
      "step": 3313
    },
    {
      "epoch": 0.7979293324504906,
      "grad_norm": 1.930856466293335,
      "learning_rate": 2.074186492367457e-05,
      "loss": 0.6475,
      "step": 3314
    },
    {
      "epoch": 0.7981701077469452,
      "grad_norm": 1.8756681680679321,
      "learning_rate": 2.069434280879603e-05,
      "loss": 0.4886,
      "step": 3315
    },
    {
      "epoch": 0.7984108830433998,
      "grad_norm": 3.851440668106079,
      "learning_rate": 2.0646868911935735e-05,
      "loss": 1.2528,
      "step": 3316
    },
    {
      "epoch": 0.7986516583398543,
      "grad_norm": 1.4915354251861572,
      "learning_rate": 2.0599443261957962e-05,
      "loss": 0.534,
      "step": 3317
    },
    {
      "epoch": 0.7988924336363089,
      "grad_norm": 0.9275015592575073,
      "learning_rate": 2.0552065887697546e-05,
      "loss": 0.3342,
      "step": 3318
    },
    {
      "epoch": 0.7991332089327635,
      "grad_norm": 2.0070860385894775,
      "learning_rate": 2.0504736817960068e-05,
      "loss": 0.7064,
      "step": 3319
    },
    {
      "epoch": 0.7993739842292181,
      "grad_norm": 2.1603689193725586,
      "learning_rate": 2.045745608152171e-05,
      "loss": 0.7129,
      "step": 3320
    },
    {
      "epoch": 0.7996147595256726,
      "grad_norm": 1.2571876049041748,
      "learning_rate": 2.0410223707129274e-05,
      "loss": 0.8612,
      "step": 3321
    },
    {
      "epoch": 0.7998555348221272,
      "grad_norm": 2.6326212882995605,
      "learning_rate": 2.0363039723500156e-05,
      "loss": 0.4104,
      "step": 3322
    },
    {
      "epoch": 0.8000963101185818,
      "grad_norm": 1.6665747165679932,
      "learning_rate": 2.0315904159322287e-05,
      "loss": 0.6619,
      "step": 3323
    },
    {
      "epoch": 0.8003370854150365,
      "grad_norm": 1.7550292015075684,
      "learning_rate": 2.026881704325425e-05,
      "loss": 0.8006,
      "step": 3324
    },
    {
      "epoch": 0.800577860711491,
      "grad_norm": 0.9334133267402649,
      "learning_rate": 2.0221778403925062e-05,
      "loss": 0.4847,
      "step": 3325
    },
    {
      "epoch": 0.8008186360079456,
      "grad_norm": 1.6787467002868652,
      "learning_rate": 2.0174788269934343e-05,
      "loss": 0.4084,
      "step": 3326
    },
    {
      "epoch": 0.8010594113044002,
      "grad_norm": 1.1613237857818604,
      "learning_rate": 2.01278466698522e-05,
      "loss": 0.4913,
      "step": 3327
    },
    {
      "epoch": 0.8013001866008548,
      "grad_norm": 1.7713500261306763,
      "learning_rate": 2.0080953632219247e-05,
      "loss": 0.3606,
      "step": 3328
    },
    {
      "epoch": 0.8015409618973094,
      "grad_norm": 2.774338960647583,
      "learning_rate": 2.0034109185546534e-05,
      "loss": 0.6157,
      "step": 3329
    },
    {
      "epoch": 0.8017817371937639,
      "grad_norm": 2.1375133991241455,
      "learning_rate": 1.9987313358315628e-05,
      "loss": 0.8029,
      "step": 3330
    },
    {
      "epoch": 0.8020225124902185,
      "grad_norm": 4.05165958404541,
      "learning_rate": 1.994056617897846e-05,
      "loss": 0.9467,
      "step": 3331
    },
    {
      "epoch": 0.8022632877866731,
      "grad_norm": 1.2718948125839233,
      "learning_rate": 1.9893867675957445e-05,
      "loss": 0.6438,
      "step": 3332
    },
    {
      "epoch": 0.8025040630831277,
      "grad_norm": 2.1101791858673096,
      "learning_rate": 1.984721787764534e-05,
      "loss": 0.6168,
      "step": 3333
    },
    {
      "epoch": 0.8027448383795822,
      "grad_norm": 3.8065285682678223,
      "learning_rate": 1.9800616812405348e-05,
      "loss": 0.331,
      "step": 3334
    },
    {
      "epoch": 0.8029856136760368,
      "grad_norm": 1.6323808431625366,
      "learning_rate": 1.9754064508571036e-05,
      "loss": 0.4418,
      "step": 3335
    },
    {
      "epoch": 0.8032263889724914,
      "grad_norm": 1.982974648475647,
      "learning_rate": 1.9707560994446284e-05,
      "loss": 0.6296,
      "step": 3336
    },
    {
      "epoch": 0.803467164268946,
      "grad_norm": 0.8455390334129333,
      "learning_rate": 1.9661106298305387e-05,
      "loss": 0.4286,
      "step": 3337
    },
    {
      "epoch": 0.8037079395654005,
      "grad_norm": 1.4520126581192017,
      "learning_rate": 1.9614700448392832e-05,
      "loss": 0.7171,
      "step": 3338
    },
    {
      "epoch": 0.8039487148618552,
      "grad_norm": 1.9037809371948242,
      "learning_rate": 1.9568343472923524e-05,
      "loss": 0.4656,
      "step": 3339
    },
    {
      "epoch": 0.8041894901583098,
      "grad_norm": 1.7248350381851196,
      "learning_rate": 1.9522035400082615e-05,
      "loss": 0.3961,
      "step": 3340
    },
    {
      "epoch": 0.8044302654547644,
      "grad_norm": 2.145430326461792,
      "learning_rate": 1.947577625802548e-05,
      "loss": 0.5493,
      "step": 3341
    },
    {
      "epoch": 0.804671040751219,
      "grad_norm": 0.38401633501052856,
      "learning_rate": 1.9429566074877816e-05,
      "loss": 0.4645,
      "step": 3342
    },
    {
      "epoch": 0.8049118160476735,
      "grad_norm": 4.1669840812683105,
      "learning_rate": 1.938340487873549e-05,
      "loss": 0.6155,
      "step": 3343
    },
    {
      "epoch": 0.8051525913441281,
      "grad_norm": 1.835777997970581,
      "learning_rate": 1.9337292697664633e-05,
      "loss": 0.7442,
      "step": 3344
    },
    {
      "epoch": 0.8053933666405827,
      "grad_norm": 1.9592784643173218,
      "learning_rate": 1.9291229559701572e-05,
      "loss": 0.6318,
      "step": 3345
    },
    {
      "epoch": 0.8056341419370373,
      "grad_norm": 2.0905535221099854,
      "learning_rate": 1.9245215492852766e-05,
      "loss": 0.5699,
      "step": 3346
    },
    {
      "epoch": 0.8058749172334918,
      "grad_norm": 1.7609286308288574,
      "learning_rate": 1.919925052509487e-05,
      "loss": 0.9583,
      "step": 3347
    },
    {
      "epoch": 0.8061156925299464,
      "grad_norm": 2.8224740028381348,
      "learning_rate": 1.9153334684374725e-05,
      "loss": 0.8957,
      "step": 3348
    },
    {
      "epoch": 0.806356467826401,
      "grad_norm": 0.9539978504180908,
      "learning_rate": 1.9107467998609228e-05,
      "loss": 0.7801,
      "step": 3349
    },
    {
      "epoch": 0.8065972431228556,
      "grad_norm": 3.402155637741089,
      "learning_rate": 1.9061650495685433e-05,
      "loss": 0.8503,
      "step": 3350
    },
    {
      "epoch": 0.8068380184193101,
      "grad_norm": 1.3554385900497437,
      "learning_rate": 1.9015882203460488e-05,
      "loss": 0.5393,
      "step": 3351
    },
    {
      "epoch": 0.8070787937157647,
      "grad_norm": 3.57460618019104,
      "learning_rate": 1.8970163149761634e-05,
      "loss": 0.9425,
      "step": 3352
    },
    {
      "epoch": 0.8073195690122194,
      "grad_norm": 7.111121654510498,
      "learning_rate": 1.8924493362386166e-05,
      "loss": 0.5293,
      "step": 3353
    },
    {
      "epoch": 0.807560344308674,
      "grad_norm": 2.1528825759887695,
      "learning_rate": 1.887887286910137e-05,
      "loss": 0.2559,
      "step": 3354
    },
    {
      "epoch": 0.8078011196051285,
      "grad_norm": 0.7006820440292358,
      "learning_rate": 1.8833301697644644e-05,
      "loss": 0.431,
      "step": 3355
    },
    {
      "epoch": 0.8080418949015831,
      "grad_norm": 2.09385085105896,
      "learning_rate": 1.878777987572339e-05,
      "loss": 1.159,
      "step": 3356
    },
    {
      "epoch": 0.8082826701980377,
      "grad_norm": 3.343334674835205,
      "learning_rate": 1.8742307431014905e-05,
      "loss": 1.0441,
      "step": 3357
    },
    {
      "epoch": 0.8085234454944923,
      "grad_norm": 4.183254241943359,
      "learning_rate": 1.869688439116659e-05,
      "loss": 1.0862,
      "step": 3358
    },
    {
      "epoch": 0.8087642207909469,
      "grad_norm": 2.1016793251037598,
      "learning_rate": 1.8651510783795734e-05,
      "loss": 0.5553,
      "step": 3359
    },
    {
      "epoch": 0.8090049960874014,
      "grad_norm": 0.7969531416893005,
      "learning_rate": 1.8606186636489596e-05,
      "loss": 0.54,
      "step": 3360
    },
    {
      "epoch": 0.809245771383856,
      "grad_norm": 1.4294320344924927,
      "learning_rate": 1.8560911976805405e-05,
      "loss": 0.4665,
      "step": 3361
    },
    {
      "epoch": 0.8094865466803106,
      "grad_norm": 3.3648643493652344,
      "learning_rate": 1.8515686832270184e-05,
      "loss": 1.236,
      "step": 3362
    },
    {
      "epoch": 0.8097273219767652,
      "grad_norm": 1.2407772541046143,
      "learning_rate": 1.8470511230380983e-05,
      "loss": 0.553,
      "step": 3363
    },
    {
      "epoch": 0.8099680972732197,
      "grad_norm": 3.098479986190796,
      "learning_rate": 1.8425385198604615e-05,
      "loss": 0.4733,
      "step": 3364
    },
    {
      "epoch": 0.8102088725696743,
      "grad_norm": 1.627521276473999,
      "learning_rate": 1.8380308764377842e-05,
      "loss": 0.4355,
      "step": 3365
    },
    {
      "epoch": 0.8104496478661289,
      "grad_norm": 1.2290712594985962,
      "learning_rate": 1.833528195510722e-05,
      "loss": 0.4626,
      "step": 3366
    },
    {
      "epoch": 0.8106904231625836,
      "grad_norm": 1.5985430479049683,
      "learning_rate": 1.8290304798169176e-05,
      "loss": 0.2771,
      "step": 3367
    },
    {
      "epoch": 0.810931198459038,
      "grad_norm": 2.063868522644043,
      "learning_rate": 1.8245377320909894e-05,
      "loss": 0.3625,
      "step": 3368
    },
    {
      "epoch": 0.8111719737554927,
      "grad_norm": 17.49557113647461,
      "learning_rate": 1.8200499550645433e-05,
      "loss": 1.085,
      "step": 3369
    },
    {
      "epoch": 0.8114127490519473,
      "grad_norm": 2.0841176509857178,
      "learning_rate": 1.815567151466151e-05,
      "loss": 0.722,
      "step": 3370
    },
    {
      "epoch": 0.8116535243484019,
      "grad_norm": 2.4572806358337402,
      "learning_rate": 1.8110893240213733e-05,
      "loss": 0.365,
      "step": 3371
    },
    {
      "epoch": 0.8118942996448565,
      "grad_norm": 0.8181131482124329,
      "learning_rate": 1.806616475452734e-05,
      "loss": 0.4139,
      "step": 3372
    },
    {
      "epoch": 0.812135074941311,
      "grad_norm": 2.0539329051971436,
      "learning_rate": 1.8021486084797368e-05,
      "loss": 0.8089,
      "step": 3373
    },
    {
      "epoch": 0.8123758502377656,
      "grad_norm": 1.458526611328125,
      "learning_rate": 1.797685725818856e-05,
      "loss": 0.7537,
      "step": 3374
    },
    {
      "epoch": 0.8126166255342202,
      "grad_norm": 0.4736323058605194,
      "learning_rate": 1.7932278301835347e-05,
      "loss": 0.1747,
      "step": 3375
    },
    {
      "epoch": 0.8128574008306748,
      "grad_norm": 2.3120036125183105,
      "learning_rate": 1.7887749242841844e-05,
      "loss": 0.4192,
      "step": 3376
    },
    {
      "epoch": 0.8130981761271293,
      "grad_norm": 15.435149192810059,
      "learning_rate": 1.7843270108281772e-05,
      "loss": 0.6507,
      "step": 3377
    },
    {
      "epoch": 0.8133389514235839,
      "grad_norm": 1.2103174924850464,
      "learning_rate": 1.779884092519859e-05,
      "loss": 0.6697,
      "step": 3378
    },
    {
      "epoch": 0.8135797267200385,
      "grad_norm": 1.7959200143814087,
      "learning_rate": 1.7754461720605342e-05,
      "loss": 0.4094,
      "step": 3379
    },
    {
      "epoch": 0.8138205020164931,
      "grad_norm": 2.2297651767730713,
      "learning_rate": 1.7710132521484646e-05,
      "loss": 1.1822,
      "step": 3380
    },
    {
      "epoch": 0.8140612773129476,
      "grad_norm": 1.7390764951705933,
      "learning_rate": 1.7665853354788774e-05,
      "loss": 0.6164,
      "step": 3381
    },
    {
      "epoch": 0.8143020526094022,
      "grad_norm": 1.3549991846084595,
      "learning_rate": 1.7621624247439594e-05,
      "loss": 0.3209,
      "step": 3382
    },
    {
      "epoch": 0.8145428279058569,
      "grad_norm": 0.4444354176521301,
      "learning_rate": 1.7577445226328425e-05,
      "loss": 0.3606,
      "step": 3383
    },
    {
      "epoch": 0.8147836032023115,
      "grad_norm": 10.032902717590332,
      "learning_rate": 1.7533316318316307e-05,
      "loss": 0.7676,
      "step": 3384
    },
    {
      "epoch": 0.8150243784987661,
      "grad_norm": 1.579328179359436,
      "learning_rate": 1.748923755023364e-05,
      "loss": 0.7432,
      "step": 3385
    },
    {
      "epoch": 0.8152651537952206,
      "grad_norm": 2.0636134147644043,
      "learning_rate": 1.7445208948880442e-05,
      "loss": 0.9798,
      "step": 3386
    },
    {
      "epoch": 0.8155059290916752,
      "grad_norm": 1.8931350708007812,
      "learning_rate": 1.7401230541026226e-05,
      "loss": 0.4045,
      "step": 3387
    },
    {
      "epoch": 0.8157467043881298,
      "grad_norm": 29.280317306518555,
      "learning_rate": 1.735730235340991e-05,
      "loss": 0.8831,
      "step": 3388
    },
    {
      "epoch": 0.8159874796845844,
      "grad_norm": 1.0704580545425415,
      "learning_rate": 1.7313424412739987e-05,
      "loss": 0.5779,
      "step": 3389
    },
    {
      "epoch": 0.8162282549810389,
      "grad_norm": 1.8337838649749756,
      "learning_rate": 1.7269596745694295e-05,
      "loss": 0.6469,
      "step": 3390
    },
    {
      "epoch": 0.8164690302774935,
      "grad_norm": 1.4514000415802002,
      "learning_rate": 1.722581937892015e-05,
      "loss": 0.4172,
      "step": 3391
    },
    {
      "epoch": 0.8167098055739481,
      "grad_norm": 2.778085231781006,
      "learning_rate": 1.718209233903436e-05,
      "loss": 0.622,
      "step": 3392
    },
    {
      "epoch": 0.8169505808704027,
      "grad_norm": 1.9710346460342407,
      "learning_rate": 1.7138415652622995e-05,
      "loss": 0.3566,
      "step": 3393
    },
    {
      "epoch": 0.8171913561668572,
      "grad_norm": 3.470649003982544,
      "learning_rate": 1.70947893462416e-05,
      "loss": 0.6804,
      "step": 3394
    },
    {
      "epoch": 0.8174321314633118,
      "grad_norm": 0.8175150752067566,
      "learning_rate": 1.7051213446415104e-05,
      "loss": 0.1711,
      "step": 3395
    },
    {
      "epoch": 0.8176729067597664,
      "grad_norm": 2.38569974899292,
      "learning_rate": 1.7007687979637687e-05,
      "loss": 1.0114,
      "step": 3396
    },
    {
      "epoch": 0.8179136820562211,
      "grad_norm": 1.0082086324691772,
      "learning_rate": 1.6964212972372995e-05,
      "loss": 0.6251,
      "step": 3397
    },
    {
      "epoch": 0.8181544573526757,
      "grad_norm": 3.498782157897949,
      "learning_rate": 1.692078845105386e-05,
      "loss": 1.078,
      "step": 3398
    },
    {
      "epoch": 0.8183952326491302,
      "grad_norm": 1.055105447769165,
      "learning_rate": 1.6877414442082528e-05,
      "loss": 0.45,
      "step": 3399
    },
    {
      "epoch": 0.8186360079455848,
      "grad_norm": 3.0139036178588867,
      "learning_rate": 1.6834090971830507e-05,
      "loss": 0.5409,
      "step": 3400
    },
    {
      "epoch": 0.8188767832420394,
      "grad_norm": 1.671664834022522,
      "learning_rate": 1.6790818066638536e-05,
      "loss": 0.5133,
      "step": 3401
    },
    {
      "epoch": 0.819117558538494,
      "grad_norm": 3.366199254989624,
      "learning_rate": 1.6747595752816658e-05,
      "loss": 0.6439,
      "step": 3402
    },
    {
      "epoch": 0.8193583338349485,
      "grad_norm": 1.2832306623458862,
      "learning_rate": 1.6704424056644154e-05,
      "loss": 0.7887,
      "step": 3403
    },
    {
      "epoch": 0.8195991091314031,
      "grad_norm": 1.5629595518112183,
      "learning_rate": 1.6661303004369468e-05,
      "loss": 0.5992,
      "step": 3404
    },
    {
      "epoch": 0.8198398844278577,
      "grad_norm": 1.5762869119644165,
      "learning_rate": 1.661823262221035e-05,
      "loss": 0.6254,
      "step": 3405
    },
    {
      "epoch": 0.8200806597243123,
      "grad_norm": 4.563362121582031,
      "learning_rate": 1.6575212936353625e-05,
      "loss": 0.9719,
      "step": 3406
    },
    {
      "epoch": 0.8203214350207668,
      "grad_norm": 0.8489302396774292,
      "learning_rate": 1.6532243972955398e-05,
      "loss": 0.3529,
      "step": 3407
    },
    {
      "epoch": 0.8205622103172214,
      "grad_norm": 4.638950824737549,
      "learning_rate": 1.6489325758140895e-05,
      "loss": 0.3409,
      "step": 3408
    },
    {
      "epoch": 0.820802985613676,
      "grad_norm": 1.3469526767730713,
      "learning_rate": 1.6446458318004477e-05,
      "loss": 0.5872,
      "step": 3409
    },
    {
      "epoch": 0.8210437609101306,
      "grad_norm": 0.8786214590072632,
      "learning_rate": 1.640364167860967e-05,
      "loss": 0.3984,
      "step": 3410
    },
    {
      "epoch": 0.8212845362065853,
      "grad_norm": 1.392174482345581,
      "learning_rate": 1.6360875865989046e-05,
      "loss": 0.293,
      "step": 3411
    },
    {
      "epoch": 0.8215253115030398,
      "grad_norm": 1.987600326538086,
      "learning_rate": 1.631816090614434e-05,
      "loss": 0.2162,
      "step": 3412
    },
    {
      "epoch": 0.8217660867994944,
      "grad_norm": 0.8357548713684082,
      "learning_rate": 1.6275496825046367e-05,
      "loss": 0.5193,
      "step": 3413
    },
    {
      "epoch": 0.822006862095949,
      "grad_norm": 2.0921974182128906,
      "learning_rate": 1.6232883648634933e-05,
      "loss": 0.5682,
      "step": 3414
    },
    {
      "epoch": 0.8222476373924036,
      "grad_norm": 2.1746203899383545,
      "learning_rate": 1.6190321402818963e-05,
      "loss": 0.6858,
      "step": 3415
    },
    {
      "epoch": 0.8224884126888581,
      "grad_norm": 4.1129584312438965,
      "learning_rate": 1.6147810113476413e-05,
      "loss": 0.6928,
      "step": 3416
    },
    {
      "epoch": 0.8227291879853127,
      "grad_norm": 3.485736846923828,
      "learning_rate": 1.610534980645423e-05,
      "loss": 0.9683,
      "step": 3417
    },
    {
      "epoch": 0.8229699632817673,
      "grad_norm": 1.9745628833770752,
      "learning_rate": 1.60629405075684e-05,
      "loss": 0.5169,
      "step": 3418
    },
    {
      "epoch": 0.8232107385782219,
      "grad_norm": 1.6666935682296753,
      "learning_rate": 1.6020582242603844e-05,
      "loss": 0.6159,
      "step": 3419
    },
    {
      "epoch": 0.8234515138746764,
      "grad_norm": 4.859829425811768,
      "learning_rate": 1.5978275037314482e-05,
      "loss": 0.9885,
      "step": 3420
    },
    {
      "epoch": 0.823692289171131,
      "grad_norm": 2.979112148284912,
      "learning_rate": 1.5936018917423236e-05,
      "loss": 0.6964,
      "step": 3421
    },
    {
      "epoch": 0.8239330644675856,
      "grad_norm": 2.812305450439453,
      "learning_rate": 1.5893813908621857e-05,
      "loss": 0.9054,
      "step": 3422
    },
    {
      "epoch": 0.8241738397640402,
      "grad_norm": 8.65831184387207,
      "learning_rate": 1.5851660036571115e-05,
      "loss": 1.0088,
      "step": 3423
    },
    {
      "epoch": 0.8244146150604948,
      "grad_norm": 0.610390305519104,
      "learning_rate": 1.5809557326900647e-05,
      "loss": 0.4041,
      "step": 3424
    },
    {
      "epoch": 0.8246553903569493,
      "grad_norm": 2.6373860836029053,
      "learning_rate": 1.5767505805209027e-05,
      "loss": 0.7424,
      "step": 3425
    },
    {
      "epoch": 0.824896165653404,
      "grad_norm": 2.2015843391418457,
      "learning_rate": 1.5725505497063664e-05,
      "loss": 0.9575,
      "step": 3426
    },
    {
      "epoch": 0.8251369409498586,
      "grad_norm": 2.0284852981567383,
      "learning_rate": 1.568355642800081e-05,
      "loss": 0.6706,
      "step": 3427
    },
    {
      "epoch": 0.8253777162463132,
      "grad_norm": 3.0381412506103516,
      "learning_rate": 1.5641658623525623e-05,
      "loss": 0.3887,
      "step": 3428
    },
    {
      "epoch": 0.8256184915427677,
      "grad_norm": 3.0191638469696045,
      "learning_rate": 1.5599812109112076e-05,
      "loss": 0.8594,
      "step": 3429
    },
    {
      "epoch": 0.8258592668392223,
      "grad_norm": 2.608114719390869,
      "learning_rate": 1.55580169102029e-05,
      "loss": 0.2382,
      "step": 3430
    },
    {
      "epoch": 0.8261000421356769,
      "grad_norm": 2.528092384338379,
      "learning_rate": 1.5516273052209683e-05,
      "loss": 0.6125,
      "step": 3431
    },
    {
      "epoch": 0.8263408174321315,
      "grad_norm": 1.7258909940719604,
      "learning_rate": 1.547458056051281e-05,
      "loss": 0.2416,
      "step": 3432
    },
    {
      "epoch": 0.826581592728586,
      "grad_norm": 1.4426395893096924,
      "learning_rate": 1.5432939460461384e-05,
      "loss": 0.5668,
      "step": 3433
    },
    {
      "epoch": 0.8268223680250406,
      "grad_norm": 1.0259637832641602,
      "learning_rate": 1.539134977737332e-05,
      "loss": 0.2167,
      "step": 3434
    },
    {
      "epoch": 0.8270631433214952,
      "grad_norm": 1.5782815217971802,
      "learning_rate": 1.5349811536535196e-05,
      "loss": 0.9795,
      "step": 3435
    },
    {
      "epoch": 0.8273039186179498,
      "grad_norm": 1.5504636764526367,
      "learning_rate": 1.5308324763202397e-05,
      "loss": 0.6967,
      "step": 3436
    },
    {
      "epoch": 0.8275446939144043,
      "grad_norm": 0.8018413186073303,
      "learning_rate": 1.5266889482598934e-05,
      "loss": 0.1634,
      "step": 3437
    },
    {
      "epoch": 0.8277854692108589,
      "grad_norm": 2.674348831176758,
      "learning_rate": 1.5225505719917577e-05,
      "loss": 0.8628,
      "step": 3438
    },
    {
      "epoch": 0.8280262445073135,
      "grad_norm": 1.5734491348266602,
      "learning_rate": 1.5184173500319731e-05,
      "loss": 0.6495,
      "step": 3439
    },
    {
      "epoch": 0.8282670198037682,
      "grad_norm": 3.662338972091675,
      "learning_rate": 1.5142892848935497e-05,
      "loss": 0.9257,
      "step": 3440
    },
    {
      "epoch": 0.8285077951002228,
      "grad_norm": 4.754350185394287,
      "learning_rate": 1.5101663790863596e-05,
      "loss": 0.2461,
      "step": 3441
    },
    {
      "epoch": 0.8287485703966773,
      "grad_norm": 3.429853677749634,
      "learning_rate": 1.5060486351171411e-05,
      "loss": 1.0115,
      "step": 3442
    },
    {
      "epoch": 0.8289893456931319,
      "grad_norm": 2.6595869064331055,
      "learning_rate": 1.5019360554894868e-05,
      "loss": 0.4968,
      "step": 3443
    },
    {
      "epoch": 0.8292301209895865,
      "grad_norm": 1.3187874555587769,
      "learning_rate": 1.4978286427038601e-05,
      "loss": 0.6117,
      "step": 3444
    },
    {
      "epoch": 0.8294708962860411,
      "grad_norm": 1.491859793663025,
      "learning_rate": 1.4937263992575712e-05,
      "loss": 0.4465,
      "step": 3445
    },
    {
      "epoch": 0.8297116715824956,
      "grad_norm": 2.291027784347534,
      "learning_rate": 1.489629327644797e-05,
      "loss": 0.8872,
      "step": 3446
    },
    {
      "epoch": 0.8299524468789502,
      "grad_norm": 1.3068852424621582,
      "learning_rate": 1.4855374303565662e-05,
      "loss": 0.358,
      "step": 3447
    },
    {
      "epoch": 0.8301932221754048,
      "grad_norm": 1.2296390533447266,
      "learning_rate": 1.4814507098807595e-05,
      "loss": 0.5845,
      "step": 3448
    },
    {
      "epoch": 0.8304339974718594,
      "grad_norm": 2.593040943145752,
      "learning_rate": 1.4773691687021174e-05,
      "loss": 0.6584,
      "step": 3449
    },
    {
      "epoch": 0.8306747727683139,
      "grad_norm": 1.2656725645065308,
      "learning_rate": 1.473292809302219e-05,
      "loss": 0.6618,
      "step": 3450
    },
    {
      "epoch": 0.8309155480647685,
      "grad_norm": 1.9369158744812012,
      "learning_rate": 1.4692216341595044e-05,
      "loss": 0.4147,
      "step": 3451
    },
    {
      "epoch": 0.8311563233612231,
      "grad_norm": 1.4402110576629639,
      "learning_rate": 1.4651556457492588e-05,
      "loss": 0.2083,
      "step": 3452
    },
    {
      "epoch": 0.8313970986576777,
      "grad_norm": 0.478405237197876,
      "learning_rate": 1.4610948465436069e-05,
      "loss": 0.2323,
      "step": 3453
    },
    {
      "epoch": 0.8316378739541324,
      "grad_norm": 2.096238613128662,
      "learning_rate": 1.4570392390115261e-05,
      "loss": 0.2422,
      "step": 3454
    },
    {
      "epoch": 0.8318786492505869,
      "grad_norm": 0.71112060546875,
      "learning_rate": 1.4529888256188363e-05,
      "loss": 0.2833,
      "step": 3455
    },
    {
      "epoch": 0.8321194245470415,
      "grad_norm": 4.690402984619141,
      "learning_rate": 1.448943608828197e-05,
      "loss": 1.0562,
      "step": 3456
    },
    {
      "epoch": 0.8323601998434961,
      "grad_norm": 1.7524763345718384,
      "learning_rate": 1.4449035910991115e-05,
      "loss": 0.087,
      "step": 3457
    },
    {
      "epoch": 0.8326009751399507,
      "grad_norm": 1.4712945222854614,
      "learning_rate": 1.4408687748879156e-05,
      "loss": 0.4935,
      "step": 3458
    },
    {
      "epoch": 0.8328417504364052,
      "grad_norm": 2.9652466773986816,
      "learning_rate": 1.4368391626477884e-05,
      "loss": 0.7135,
      "step": 3459
    },
    {
      "epoch": 0.8330825257328598,
      "grad_norm": 1.4162325859069824,
      "learning_rate": 1.4328147568287453e-05,
      "loss": 0.4518,
      "step": 3460
    },
    {
      "epoch": 0.8333233010293144,
      "grad_norm": 0.665979266166687,
      "learning_rate": 1.4287955598776304e-05,
      "loss": 0.3329,
      "step": 3461
    },
    {
      "epoch": 0.833564076325769,
      "grad_norm": 2.3450982570648193,
      "learning_rate": 1.4247815742381277e-05,
      "loss": 0.4297,
      "step": 3462
    },
    {
      "epoch": 0.8338048516222235,
      "grad_norm": 2.609652519226074,
      "learning_rate": 1.4207728023507471e-05,
      "loss": 0.595,
      "step": 3463
    },
    {
      "epoch": 0.8340456269186781,
      "grad_norm": 1.1684465408325195,
      "learning_rate": 1.4167692466528281e-05,
      "loss": 0.2792,
      "step": 3464
    },
    {
      "epoch": 0.8342864022151327,
      "grad_norm": 1.9600780010223389,
      "learning_rate": 1.4127709095785513e-05,
      "loss": 0.1156,
      "step": 3465
    },
    {
      "epoch": 0.8345271775115873,
      "grad_norm": 1.2096495628356934,
      "learning_rate": 1.4087777935589052e-05,
      "loss": 0.6391,
      "step": 3466
    },
    {
      "epoch": 0.834767952808042,
      "grad_norm": 2.56876540184021,
      "learning_rate": 1.404789901021717e-05,
      "loss": 0.5451,
      "step": 3467
    },
    {
      "epoch": 0.8350087281044964,
      "grad_norm": 2.7339911460876465,
      "learning_rate": 1.4008072343916379e-05,
      "loss": 0.7596,
      "step": 3468
    },
    {
      "epoch": 0.835249503400951,
      "grad_norm": 1.9011280536651611,
      "learning_rate": 1.396829796090131e-05,
      "loss": 0.9727,
      "step": 3469
    },
    {
      "epoch": 0.8354902786974057,
      "grad_norm": 1.2117639780044556,
      "learning_rate": 1.3928575885354933e-05,
      "loss": 0.2906,
      "step": 3470
    },
    {
      "epoch": 0.8357310539938603,
      "grad_norm": 1.8486530780792236,
      "learning_rate": 1.3888906141428325e-05,
      "loss": 0.395,
      "step": 3471
    },
    {
      "epoch": 0.8359718292903148,
      "grad_norm": 4.039324760437012,
      "learning_rate": 1.3849288753240786e-05,
      "loss": 0.4848,
      "step": 3472
    },
    {
      "epoch": 0.8362126045867694,
      "grad_norm": 1.2622008323669434,
      "learning_rate": 1.3809723744879788e-05,
      "loss": 0.6475,
      "step": 3473
    },
    {
      "epoch": 0.836453379883224,
      "grad_norm": 0.6000483632087708,
      "learning_rate": 1.3770211140400946e-05,
      "loss": 0.1693,
      "step": 3474
    },
    {
      "epoch": 0.8366941551796786,
      "grad_norm": 2.0849924087524414,
      "learning_rate": 1.3730750963828032e-05,
      "loss": 0.1984,
      "step": 3475
    },
    {
      "epoch": 0.8369349304761331,
      "grad_norm": 2.24556040763855,
      "learning_rate": 1.3691343239152864e-05,
      "loss": 0.4181,
      "step": 3476
    },
    {
      "epoch": 0.8371757057725877,
      "grad_norm": 1.9239386320114136,
      "learning_rate": 1.3651987990335469e-05,
      "loss": 0.4543,
      "step": 3477
    },
    {
      "epoch": 0.8374164810690423,
      "grad_norm": 1.7591582536697388,
      "learning_rate": 1.3612685241303947e-05,
      "loss": 0.5755,
      "step": 3478
    },
    {
      "epoch": 0.8376572563654969,
      "grad_norm": 0.7575153112411499,
      "learning_rate": 1.3573435015954406e-05,
      "loss": 0.0756,
      "step": 3479
    },
    {
      "epoch": 0.8378980316619515,
      "grad_norm": 1.6970840692520142,
      "learning_rate": 1.3534237338151102e-05,
      "loss": 0.2317,
      "step": 3480
    },
    {
      "epoch": 0.838138806958406,
      "grad_norm": 1.827329158782959,
      "learning_rate": 1.3495092231726304e-05,
      "loss": 0.7636,
      "step": 3481
    },
    {
      "epoch": 0.8383795822548606,
      "grad_norm": 2.604074239730835,
      "learning_rate": 1.3455999720480316e-05,
      "loss": 0.6374,
      "step": 3482
    },
    {
      "epoch": 0.8386203575513153,
      "grad_norm": 2.306910514831543,
      "learning_rate": 1.341695982818152e-05,
      "loss": 0.7725,
      "step": 3483
    },
    {
      "epoch": 0.8388611328477699,
      "grad_norm": 1.2861391305923462,
      "learning_rate": 1.337797257856619e-05,
      "loss": 0.6311,
      "step": 3484
    },
    {
      "epoch": 0.8391019081442244,
      "grad_norm": 2.032479763031006,
      "learning_rate": 1.33390379953387e-05,
      "loss": 0.4538,
      "step": 3485
    },
    {
      "epoch": 0.839342683440679,
      "grad_norm": 0.6007648706436157,
      "learning_rate": 1.3300156102171379e-05,
      "loss": 0.1634,
      "step": 3486
    },
    {
      "epoch": 0.8395834587371336,
      "grad_norm": 2.5209765434265137,
      "learning_rate": 1.3261326922704464e-05,
      "loss": 0.1593,
      "step": 3487
    },
    {
      "epoch": 0.8398242340335882,
      "grad_norm": 0.9033012390136719,
      "learning_rate": 1.3222550480546203e-05,
      "loss": 0.3819,
      "step": 3488
    },
    {
      "epoch": 0.8400650093300427,
      "grad_norm": 7.269277572631836,
      "learning_rate": 1.3183826799272758e-05,
      "loss": 0.5313,
      "step": 3489
    },
    {
      "epoch": 0.8403057846264973,
      "grad_norm": 2.5734009742736816,
      "learning_rate": 1.3145155902428219e-05,
      "loss": 0.6007,
      "step": 3490
    },
    {
      "epoch": 0.8405465599229519,
      "grad_norm": 2.136906147003174,
      "learning_rate": 1.310653781352461e-05,
      "loss": 0.7098,
      "step": 3491
    },
    {
      "epoch": 0.8407873352194065,
      "grad_norm": 1.183111310005188,
      "learning_rate": 1.3067972556041752e-05,
      "loss": 0.5929,
      "step": 3492
    },
    {
      "epoch": 0.8410281105158611,
      "grad_norm": 2.32429575920105,
      "learning_rate": 1.3029460153427442e-05,
      "loss": 0.6398,
      "step": 3493
    },
    {
      "epoch": 0.8412688858123156,
      "grad_norm": 2.2042758464813232,
      "learning_rate": 1.2991000629097328e-05,
      "loss": 0.4816,
      "step": 3494
    },
    {
      "epoch": 0.8415096611087702,
      "grad_norm": 2.461498498916626,
      "learning_rate": 1.2952594006434849e-05,
      "loss": 0.74,
      "step": 3495
    },
    {
      "epoch": 0.8417504364052248,
      "grad_norm": 1.1167387962341309,
      "learning_rate": 1.2914240308791326e-05,
      "loss": 1.0576,
      "step": 3496
    },
    {
      "epoch": 0.8419912117016795,
      "grad_norm": 1.2157386541366577,
      "learning_rate": 1.2875939559485905e-05,
      "loss": 0.5803,
      "step": 3497
    },
    {
      "epoch": 0.842231986998134,
      "grad_norm": 1.176676869392395,
      "learning_rate": 1.2837691781805516e-05,
      "loss": 0.7533,
      "step": 3498
    },
    {
      "epoch": 0.8424727622945886,
      "grad_norm": 1.823185920715332,
      "learning_rate": 1.2799496999004935e-05,
      "loss": 0.3445,
      "step": 3499
    },
    {
      "epoch": 0.8427135375910432,
      "grad_norm": 2.291487216949463,
      "learning_rate": 1.2761355234306626e-05,
      "loss": 0.6968,
      "step": 3500
    },
    {
      "epoch": 0.8429543128874978,
      "grad_norm": 3.8804900646209717,
      "learning_rate": 1.27232665109009e-05,
      "loss": 0.626,
      "step": 3501
    },
    {
      "epoch": 0.8431950881839523,
      "grad_norm": 3.4894497394561768,
      "learning_rate": 1.268523085194575e-05,
      "loss": 0.6506,
      "step": 3502
    },
    {
      "epoch": 0.8434358634804069,
      "grad_norm": 4.037177085876465,
      "learning_rate": 1.264724828056696e-05,
      "loss": 0.5966,
      "step": 3503
    },
    {
      "epoch": 0.8436766387768615,
      "grad_norm": 2.5394115447998047,
      "learning_rate": 1.2609318819858029e-05,
      "loss": 0.8267,
      "step": 3504
    },
    {
      "epoch": 0.8439174140733161,
      "grad_norm": 4.015261173248291,
      "learning_rate": 1.2571442492880159e-05,
      "loss": 0.9835,
      "step": 3505
    },
    {
      "epoch": 0.8441581893697706,
      "grad_norm": 3.58262300491333,
      "learning_rate": 1.2533619322662216e-05,
      "loss": 0.4845,
      "step": 3506
    },
    {
      "epoch": 0.8443989646662252,
      "grad_norm": 1.208625316619873,
      "learning_rate": 1.2495849332200815e-05,
      "loss": 0.4519,
      "step": 3507
    },
    {
      "epoch": 0.8446397399626798,
      "grad_norm": 6.022768497467041,
      "learning_rate": 1.2458132544460155e-05,
      "loss": 0.4975,
      "step": 3508
    },
    {
      "epoch": 0.8448805152591344,
      "grad_norm": 0.27254337072372437,
      "learning_rate": 1.2420468982372158e-05,
      "loss": 0.4787,
      "step": 3509
    },
    {
      "epoch": 0.845121290555589,
      "grad_norm": 3.7382776737213135,
      "learning_rate": 1.2382858668836317e-05,
      "loss": 0.7591,
      "step": 3510
    },
    {
      "epoch": 0.8453620658520435,
      "grad_norm": 1.5053765773773193,
      "learning_rate": 1.2345301626719808e-05,
      "loss": 0.4858,
      "step": 3511
    },
    {
      "epoch": 0.8456028411484982,
      "grad_norm": 2.0983424186706543,
      "learning_rate": 1.2307797878857396e-05,
      "loss": 0.4047,
      "step": 3512
    },
    {
      "epoch": 0.8458436164449528,
      "grad_norm": 1.0963115692138672,
      "learning_rate": 1.2270347448051456e-05,
      "loss": 0.6322,
      "step": 3513
    },
    {
      "epoch": 0.8460843917414074,
      "grad_norm": 2.32830548286438,
      "learning_rate": 1.2232950357071937e-05,
      "loss": 0.339,
      "step": 3514
    },
    {
      "epoch": 0.8463251670378619,
      "grad_norm": 2.1017560958862305,
      "learning_rate": 1.219560662865633e-05,
      "loss": 0.6883,
      "step": 3515
    },
    {
      "epoch": 0.8465659423343165,
      "grad_norm": 2.3535349369049072,
      "learning_rate": 1.2158316285509708e-05,
      "loss": 0.3383,
      "step": 3516
    },
    {
      "epoch": 0.8468067176307711,
      "grad_norm": 0.24175573885440826,
      "learning_rate": 1.2121079350304732e-05,
      "loss": 0.3707,
      "step": 3517
    },
    {
      "epoch": 0.8470474929272257,
      "grad_norm": 3.2046167850494385,
      "learning_rate": 1.208389584568147e-05,
      "loss": 0.7788,
      "step": 3518
    },
    {
      "epoch": 0.8472882682236802,
      "grad_norm": 0.8707532286643982,
      "learning_rate": 1.2046765794247604e-05,
      "loss": 0.5653,
      "step": 3519
    },
    {
      "epoch": 0.8475290435201348,
      "grad_norm": 5.456550598144531,
      "learning_rate": 1.2009689218578313e-05,
      "loss": 0.5814,
      "step": 3520
    },
    {
      "epoch": 0.8477698188165894,
      "grad_norm": 3.579458236694336,
      "learning_rate": 1.1972666141216215e-05,
      "loss": 0.3246,
      "step": 3521
    },
    {
      "epoch": 0.848010594113044,
      "grad_norm": 1.3261420726776123,
      "learning_rate": 1.1935696584671452e-05,
      "loss": 0.4498,
      "step": 3522
    },
    {
      "epoch": 0.8482513694094986,
      "grad_norm": 1.701804280281067,
      "learning_rate": 1.1898780571421552e-05,
      "loss": 0.5852,
      "step": 3523
    },
    {
      "epoch": 0.8484921447059531,
      "grad_norm": 3.848027229309082,
      "learning_rate": 1.1861918123911564e-05,
      "loss": 0.8602,
      "step": 3524
    },
    {
      "epoch": 0.8487329200024077,
      "grad_norm": 2.2513511180877686,
      "learning_rate": 1.1825109264553947e-05,
      "loss": 1.1043,
      "step": 3525
    },
    {
      "epoch": 0.8489736952988624,
      "grad_norm": 3.580653429031372,
      "learning_rate": 1.1788354015728543e-05,
      "loss": 0.3012,
      "step": 3526
    },
    {
      "epoch": 0.849214470595317,
      "grad_norm": 2.1842987537384033,
      "learning_rate": 1.1751652399782665e-05,
      "loss": 0.5502,
      "step": 3527
    },
    {
      "epoch": 0.8494552458917715,
      "grad_norm": 1.2588567733764648,
      "learning_rate": 1.1715004439030908e-05,
      "loss": 0.5083,
      "step": 3528
    },
    {
      "epoch": 0.8496960211882261,
      "grad_norm": 2.6544106006622314,
      "learning_rate": 1.1678410155755382e-05,
      "loss": 0.342,
      "step": 3529
    },
    {
      "epoch": 0.8499367964846807,
      "grad_norm": 0.7514671087265015,
      "learning_rate": 1.1641869572205489e-05,
      "loss": 0.323,
      "step": 3530
    },
    {
      "epoch": 0.8501775717811353,
      "grad_norm": 2.6009557247161865,
      "learning_rate": 1.1605382710597957e-05,
      "loss": 0.4687,
      "step": 3531
    },
    {
      "epoch": 0.8504183470775898,
      "grad_norm": 1.5390700101852417,
      "learning_rate": 1.1568949593116884e-05,
      "loss": 0.4879,
      "step": 3532
    },
    {
      "epoch": 0.8506591223740444,
      "grad_norm": 4.1019110679626465,
      "learning_rate": 1.1532570241913721e-05,
      "loss": 0.9231,
      "step": 3533
    },
    {
      "epoch": 0.850899897670499,
      "grad_norm": 1.2922954559326172,
      "learning_rate": 1.1496244679107148e-05,
      "loss": 0.546,
      "step": 3534
    },
    {
      "epoch": 0.8511406729669536,
      "grad_norm": 1.7373534440994263,
      "learning_rate": 1.1459972926783236e-05,
      "loss": 0.2165,
      "step": 3535
    },
    {
      "epoch": 0.8513814482634082,
      "grad_norm": 1.6212053298950195,
      "learning_rate": 1.1423755006995241e-05,
      "loss": 0.8171,
      "step": 3536
    },
    {
      "epoch": 0.8516222235598627,
      "grad_norm": 6.341080665588379,
      "learning_rate": 1.1387590941763749e-05,
      "loss": 0.8483,
      "step": 3537
    },
    {
      "epoch": 0.8518629988563173,
      "grad_norm": 0.9660471081733704,
      "learning_rate": 1.135148075307666e-05,
      "loss": 0.284,
      "step": 3538
    },
    {
      "epoch": 0.8521037741527719,
      "grad_norm": 1.2168993949890137,
      "learning_rate": 1.1315424462888968e-05,
      "loss": 0.3476,
      "step": 3539
    },
    {
      "epoch": 0.8523445494492266,
      "grad_norm": 2.224290609359741,
      "learning_rate": 1.1279422093123037e-05,
      "loss": 0.4849,
      "step": 3540
    },
    {
      "epoch": 0.852585324745681,
      "grad_norm": 4.049657821655273,
      "learning_rate": 1.1243473665668336e-05,
      "loss": 0.4054,
      "step": 3541
    },
    {
      "epoch": 0.8528261000421357,
      "grad_norm": 1.0879812240600586,
      "learning_rate": 1.1207579202381625e-05,
      "loss": 0.2502,
      "step": 3542
    },
    {
      "epoch": 0.8530668753385903,
      "grad_norm": 1.2523934841156006,
      "learning_rate": 1.1171738725086833e-05,
      "loss": 0.3098,
      "step": 3543
    },
    {
      "epoch": 0.8533076506350449,
      "grad_norm": 0.9704805612564087,
      "learning_rate": 1.1135952255574999e-05,
      "loss": 0.5721,
      "step": 3544
    },
    {
      "epoch": 0.8535484259314994,
      "grad_norm": 3.1758830547332764,
      "learning_rate": 1.1100219815604418e-05,
      "loss": 0.4685,
      "step": 3545
    },
    {
      "epoch": 0.853789201227954,
      "grad_norm": 0.9316069483757019,
      "learning_rate": 1.1064541426900476e-05,
      "loss": 0.575,
      "step": 3546
    },
    {
      "epoch": 0.8540299765244086,
      "grad_norm": 5.6592912673950195,
      "learning_rate": 1.1028917111155712e-05,
      "loss": 1.1386,
      "step": 3547
    },
    {
      "epoch": 0.8542707518208632,
      "grad_norm": 2.0701656341552734,
      "learning_rate": 1.0993346890029832e-05,
      "loss": 1.1186,
      "step": 3548
    },
    {
      "epoch": 0.8545115271173178,
      "grad_norm": 2.067758560180664,
      "learning_rate": 1.0957830785149548e-05,
      "loss": 0.7525,
      "step": 3549
    },
    {
      "epoch": 0.8547523024137723,
      "grad_norm": 1.7323333024978638,
      "learning_rate": 1.0922368818108774e-05,
      "loss": 0.4399,
      "step": 3550
    },
    {
      "epoch": 0.8549930777102269,
      "grad_norm": 1.6739343404769897,
      "learning_rate": 1.0886961010468466e-05,
      "loss": 0.4477,
      "step": 3551
    },
    {
      "epoch": 0.8552338530066815,
      "grad_norm": 1.9262574911117554,
      "learning_rate": 1.0851607383756612e-05,
      "loss": 0.733,
      "step": 3552
    },
    {
      "epoch": 0.8554746283031361,
      "grad_norm": 1.7514442205429077,
      "learning_rate": 1.081630795946833e-05,
      "loss": 0.7005,
      "step": 3553
    },
    {
      "epoch": 0.8557154035995906,
      "grad_norm": 1.4854047298431396,
      "learning_rate": 1.078106275906573e-05,
      "loss": 0.926,
      "step": 3554
    },
    {
      "epoch": 0.8559561788960452,
      "grad_norm": 1.791135549545288,
      "learning_rate": 1.0745871803978002e-05,
      "loss": 0.4316,
      "step": 3555
    },
    {
      "epoch": 0.8561969541924999,
      "grad_norm": 0.6483386754989624,
      "learning_rate": 1.0710735115601311e-05,
      "loss": 0.4356,
      "step": 3556
    },
    {
      "epoch": 0.8564377294889545,
      "grad_norm": 6.167766094207764,
      "learning_rate": 1.0675652715298835e-05,
      "loss": 0.8807,
      "step": 3557
    },
    {
      "epoch": 0.856678504785409,
      "grad_norm": 3.2649612426757812,
      "learning_rate": 1.0640624624400752e-05,
      "loss": 0.724,
      "step": 3558
    },
    {
      "epoch": 0.8569192800818636,
      "grad_norm": 1.8932918310165405,
      "learning_rate": 1.0605650864204252e-05,
      "loss": 0.6025,
      "step": 3559
    },
    {
      "epoch": 0.8571600553783182,
      "grad_norm": 1.1560626029968262,
      "learning_rate": 1.0570731455973414e-05,
      "loss": 0.4024,
      "step": 3560
    },
    {
      "epoch": 0.8574008306747728,
      "grad_norm": 0.9457545280456543,
      "learning_rate": 1.0535866420939332e-05,
      "loss": 0.2836,
      "step": 3561
    },
    {
      "epoch": 0.8576416059712274,
      "grad_norm": 1.101394772529602,
      "learning_rate": 1.050105578030003e-05,
      "loss": 0.4336,
      "step": 3562
    },
    {
      "epoch": 0.8578823812676819,
      "grad_norm": 2.297769784927368,
      "learning_rate": 1.046629955522046e-05,
      "loss": 1.2278,
      "step": 3563
    },
    {
      "epoch": 0.8581231565641365,
      "grad_norm": 6.07118034362793,
      "learning_rate": 1.0431597766832502e-05,
      "loss": 0.6956,
      "step": 3564
    },
    {
      "epoch": 0.8583639318605911,
      "grad_norm": 0.6239258646965027,
      "learning_rate": 1.0396950436234887e-05,
      "loss": 0.051,
      "step": 3565
    },
    {
      "epoch": 0.8586047071570457,
      "grad_norm": 3.072779417037964,
      "learning_rate": 1.0362357584493298e-05,
      "loss": 0.9781,
      "step": 3566
    },
    {
      "epoch": 0.8588454824535002,
      "grad_norm": 1.9962631464004517,
      "learning_rate": 1.0327819232640235e-05,
      "loss": 0.5025,
      "step": 3567
    },
    {
      "epoch": 0.8590862577499548,
      "grad_norm": 1.4594467878341675,
      "learning_rate": 1.029333540167512e-05,
      "loss": 0.4257,
      "step": 3568
    },
    {
      "epoch": 0.8593270330464094,
      "grad_norm": 3.9603610038757324,
      "learning_rate": 1.0258906112564181e-05,
      "loss": 0.3696,
      "step": 3569
    },
    {
      "epoch": 0.8595678083428641,
      "grad_norm": 3.076791286468506,
      "learning_rate": 1.0224531386240522e-05,
      "loss": 0.7587,
      "step": 3570
    },
    {
      "epoch": 0.8598085836393186,
      "grad_norm": 3.732264995574951,
      "learning_rate": 1.0190211243604043e-05,
      "loss": 0.5357,
      "step": 3571
    },
    {
      "epoch": 0.8600493589357732,
      "grad_norm": 1.9595264196395874,
      "learning_rate": 1.0155945705521486e-05,
      "loss": 0.4164,
      "step": 3572
    },
    {
      "epoch": 0.8602901342322278,
      "grad_norm": 1.8622673749923706,
      "learning_rate": 1.0121734792826353e-05,
      "loss": 0.5882,
      "step": 3573
    },
    {
      "epoch": 0.8605309095286824,
      "grad_norm": 2.3604211807250977,
      "learning_rate": 1.0087578526318975e-05,
      "loss": 0.7776,
      "step": 3574
    },
    {
      "epoch": 0.860771684825137,
      "grad_norm": 2.476921796798706,
      "learning_rate": 1.0053476926766414e-05,
      "loss": 0.5129,
      "step": 3575
    },
    {
      "epoch": 0.8610124601215915,
      "grad_norm": 0.9629519581794739,
      "learning_rate": 1.0019430014902531e-05,
      "loss": 0.6324,
      "step": 3576
    },
    {
      "epoch": 0.8612532354180461,
      "grad_norm": 1.2336691617965698,
      "learning_rate": 9.985437811427933e-06,
      "loss": 0.236,
      "step": 3577
    },
    {
      "epoch": 0.8614940107145007,
      "grad_norm": 1.3842549324035645,
      "learning_rate": 9.951500337009945e-06,
      "loss": 0.8148,
      "step": 3578
    },
    {
      "epoch": 0.8617347860109553,
      "grad_norm": 1.1485180854797363,
      "learning_rate": 9.917617612282648e-06,
      "loss": 0.4162,
      "step": 3579
    },
    {
      "epoch": 0.8619755613074098,
      "grad_norm": 1.5087698698043823,
      "learning_rate": 9.883789657846799e-06,
      "loss": 1.0833,
      "step": 3580
    },
    {
      "epoch": 0.8622163366038644,
      "grad_norm": 0.747292697429657,
      "learning_rate": 9.850016494269853e-06,
      "loss": 0.4187,
      "step": 3581
    },
    {
      "epoch": 0.862457111900319,
      "grad_norm": 0.6730207204818726,
      "learning_rate": 9.816298142086022e-06,
      "loss": 0.5837,
      "step": 3582
    },
    {
      "epoch": 0.8626978871967736,
      "grad_norm": 3.047215461730957,
      "learning_rate": 9.782634621796083e-06,
      "loss": 0.3071,
      "step": 3583
    },
    {
      "epoch": 0.8629386624932281,
      "grad_norm": 0.9594640135765076,
      "learning_rate": 9.749025953867552e-06,
      "loss": 0.2057,
      "step": 3584
    },
    {
      "epoch": 0.8631794377896828,
      "grad_norm": 1.894709587097168,
      "learning_rate": 9.715472158734585e-06,
      "loss": 0.4201,
      "step": 3585
    },
    {
      "epoch": 0.8634202130861374,
      "grad_norm": 1.63250732421875,
      "learning_rate": 9.68197325679795e-06,
      "loss": 0.7254,
      "step": 3586
    },
    {
      "epoch": 0.863660988382592,
      "grad_norm": 8.452190399169922,
      "learning_rate": 9.648529268425088e-06,
      "loss": 0.2811,
      "step": 3587
    },
    {
      "epoch": 0.8639017636790465,
      "grad_norm": 0.6347200870513916,
      "learning_rate": 9.61514021394998e-06,
      "loss": 0.355,
      "step": 3588
    },
    {
      "epoch": 0.8641425389755011,
      "grad_norm": 1.1674455404281616,
      "learning_rate": 9.581806113673253e-06,
      "loss": 0.5386,
      "step": 3589
    },
    {
      "epoch": 0.8643833142719557,
      "grad_norm": 1.5471045970916748,
      "learning_rate": 9.548526987862149e-06,
      "loss": 0.6897,
      "step": 3590
    },
    {
      "epoch": 0.8646240895684103,
      "grad_norm": 1.1853959560394287,
      "learning_rate": 9.515302856750408e-06,
      "loss": 0.7887,
      "step": 3591
    },
    {
      "epoch": 0.8648648648648649,
      "grad_norm": 0.8306871056556702,
      "learning_rate": 9.48213374053839e-06,
      "loss": 0.4411,
      "step": 3592
    },
    {
      "epoch": 0.8651056401613194,
      "grad_norm": 1.9106206893920898,
      "learning_rate": 9.449019659392999e-06,
      "loss": 0.4598,
      "step": 3593
    },
    {
      "epoch": 0.865346415457774,
      "grad_norm": 2.1204633712768555,
      "learning_rate": 9.415960633447674e-06,
      "loss": 0.6254,
      "step": 3594
    },
    {
      "epoch": 0.8655871907542286,
      "grad_norm": 1.09833562374115,
      "learning_rate": 9.382956682802379e-06,
      "loss": 0.4423,
      "step": 3595
    },
    {
      "epoch": 0.8658279660506832,
      "grad_norm": 1.1681790351867676,
      "learning_rate": 9.350007827523577e-06,
      "loss": 0.9621,
      "step": 3596
    },
    {
      "epoch": 0.8660687413471377,
      "grad_norm": 2.5067391395568848,
      "learning_rate": 9.317114087644252e-06,
      "loss": 0.5401,
      "step": 3597
    },
    {
      "epoch": 0.8663095166435923,
      "grad_norm": 2.4286880493164062,
      "learning_rate": 9.284275483163885e-06,
      "loss": 0.4498,
      "step": 3598
    },
    {
      "epoch": 0.866550291940047,
      "grad_norm": 3.0217394828796387,
      "learning_rate": 9.251492034048393e-06,
      "loss": 0.769,
      "step": 3599
    },
    {
      "epoch": 0.8667910672365016,
      "grad_norm": 0.8035622239112854,
      "learning_rate": 9.21876376023022e-06,
      "loss": 0.1272,
      "step": 3600
    },
    {
      "epoch": 0.8670318425329561,
      "grad_norm": 0.6220622658729553,
      "learning_rate": 9.186090681608173e-06,
      "loss": 0.2411,
      "step": 3601
    },
    {
      "epoch": 0.8672726178294107,
      "grad_norm": 2.2393176555633545,
      "learning_rate": 9.153472818047625e-06,
      "loss": 0.3123,
      "step": 3602
    },
    {
      "epoch": 0.8675133931258653,
      "grad_norm": 5.026528835296631,
      "learning_rate": 9.120910189380294e-06,
      "loss": 1.1053,
      "step": 3603
    },
    {
      "epoch": 0.8677541684223199,
      "grad_norm": 4.139969825744629,
      "learning_rate": 9.088402815404306e-06,
      "loss": 0.6011,
      "step": 3604
    },
    {
      "epoch": 0.8679949437187745,
      "grad_norm": 2.963592290878296,
      "learning_rate": 9.055950715884254e-06,
      "loss": 0.629,
      "step": 3605
    },
    {
      "epoch": 0.868235719015229,
      "grad_norm": 4.244201183319092,
      "learning_rate": 9.023553910551041e-06,
      "loss": 0.2936,
      "step": 3606
    },
    {
      "epoch": 0.8684764943116836,
      "grad_norm": 1.2388718128204346,
      "learning_rate": 8.991212419102025e-06,
      "loss": 0.6559,
      "step": 3607
    },
    {
      "epoch": 0.8687172696081382,
      "grad_norm": 1.3638206720352173,
      "learning_rate": 8.958926261200928e-06,
      "loss": 0.6455,
      "step": 3608
    },
    {
      "epoch": 0.8689580449045928,
      "grad_norm": 1.8364553451538086,
      "learning_rate": 8.926695456477751e-06,
      "loss": 0.325,
      "step": 3609
    },
    {
      "epoch": 0.8691988202010473,
      "grad_norm": 2.5603854656219482,
      "learning_rate": 8.894520024528918e-06,
      "loss": 0.4407,
      "step": 3610
    },
    {
      "epoch": 0.8694395954975019,
      "grad_norm": 1.9255558252334595,
      "learning_rate": 8.862399984917213e-06,
      "loss": 0.8542,
      "step": 3611
    },
    {
      "epoch": 0.8696803707939565,
      "grad_norm": 1.358974575996399,
      "learning_rate": 8.830335357171627e-06,
      "loss": 0.9968,
      "step": 3612
    },
    {
      "epoch": 0.8699211460904112,
      "grad_norm": 1.3834187984466553,
      "learning_rate": 8.798326160787573e-06,
      "loss": 0.9395,
      "step": 3613
    },
    {
      "epoch": 0.8701619213868657,
      "grad_norm": 2.3378701210021973,
      "learning_rate": 8.766372415226675e-06,
      "loss": 0.2571,
      "step": 3614
    },
    {
      "epoch": 0.8704026966833203,
      "grad_norm": 1.1119276285171509,
      "learning_rate": 8.734474139916903e-06,
      "loss": 0.4873,
      "step": 3615
    },
    {
      "epoch": 0.8706434719797749,
      "grad_norm": 0.519648551940918,
      "learning_rate": 8.702631354252489e-06,
      "loss": 0.4369,
      "step": 3616
    },
    {
      "epoch": 0.8708842472762295,
      "grad_norm": 6.27766227722168,
      "learning_rate": 8.670844077593899e-06,
      "loss": 0.6788,
      "step": 3617
    },
    {
      "epoch": 0.8711250225726841,
      "grad_norm": 1.280344009399414,
      "learning_rate": 8.639112329267862e-06,
      "loss": 0.6255,
      "step": 3618
    },
    {
      "epoch": 0.8713657978691386,
      "grad_norm": 4.4702067375183105,
      "learning_rate": 8.60743612856738e-06,
      "loss": 0.4173,
      "step": 3619
    },
    {
      "epoch": 0.8716065731655932,
      "grad_norm": 0.5965597033500671,
      "learning_rate": 8.575815494751637e-06,
      "loss": 0.1958,
      "step": 3620
    },
    {
      "epoch": 0.8718473484620478,
      "grad_norm": 1.7509022951126099,
      "learning_rate": 8.544250447046075e-06,
      "loss": 0.1665,
      "step": 3621
    },
    {
      "epoch": 0.8720881237585024,
      "grad_norm": 1.8660122156143188,
      "learning_rate": 8.512741004642277e-06,
      "loss": 0.3934,
      "step": 3622
    },
    {
      "epoch": 0.8723288990549569,
      "grad_norm": 0.804557204246521,
      "learning_rate": 8.481287186698061e-06,
      "loss": 0.383,
      "step": 3623
    },
    {
      "epoch": 0.8725696743514115,
      "grad_norm": 1.3217666149139404,
      "learning_rate": 8.449889012337453e-06,
      "loss": 0.1176,
      "step": 3624
    },
    {
      "epoch": 0.8728104496478661,
      "grad_norm": 3.857081651687622,
      "learning_rate": 8.418546500650582e-06,
      "loss": 0.5942,
      "step": 3625
    },
    {
      "epoch": 0.8730512249443207,
      "grad_norm": 1.6486952304840088,
      "learning_rate": 8.387259670693759e-06,
      "loss": 0.525,
      "step": 3626
    },
    {
      "epoch": 0.8732920002407752,
      "grad_norm": 0.7626795768737793,
      "learning_rate": 8.356028541489468e-06,
      "loss": 0.2785,
      "step": 3627
    },
    {
      "epoch": 0.8735327755372299,
      "grad_norm": 2.2588295936584473,
      "learning_rate": 8.3248531320263e-06,
      "loss": 0.3082,
      "step": 3628
    },
    {
      "epoch": 0.8737735508336845,
      "grad_norm": 6.79339075088501,
      "learning_rate": 8.293733461259002e-06,
      "loss": 0.9379,
      "step": 3629
    },
    {
      "epoch": 0.8740143261301391,
      "grad_norm": 1.2690285444259644,
      "learning_rate": 8.262669548108349e-06,
      "loss": 0.9261,
      "step": 3630
    },
    {
      "epoch": 0.8742551014265937,
      "grad_norm": 1.7360615730285645,
      "learning_rate": 8.23166141146131e-06,
      "loss": 0.3663,
      "step": 3631
    },
    {
      "epoch": 0.8744958767230482,
      "grad_norm": 1.55054771900177,
      "learning_rate": 8.200709070170876e-06,
      "loss": 0.4774,
      "step": 3632
    },
    {
      "epoch": 0.8747366520195028,
      "grad_norm": 7.235383987426758,
      "learning_rate": 8.169812543056155e-06,
      "loss": 0.4964,
      "step": 3633
    },
    {
      "epoch": 0.8749774273159574,
      "grad_norm": 1.6751036643981934,
      "learning_rate": 8.13897184890231e-06,
      "loss": 1.2376,
      "step": 3634
    },
    {
      "epoch": 0.875218202612412,
      "grad_norm": 2.576141119003296,
      "learning_rate": 8.108187006460533e-06,
      "loss": 0.543,
      "step": 3635
    },
    {
      "epoch": 0.8754589779088665,
      "grad_norm": 1.2057117223739624,
      "learning_rate": 8.077458034448105e-06,
      "loss": 0.2369,
      "step": 3636
    },
    {
      "epoch": 0.8756997532053211,
      "grad_norm": 1.8533183336257935,
      "learning_rate": 8.046784951548302e-06,
      "loss": 0.6765,
      "step": 3637
    },
    {
      "epoch": 0.8759405285017757,
      "grad_norm": 1.2572599649429321,
      "learning_rate": 8.01616777641041e-06,
      "loss": 0.5824,
      "step": 3638
    },
    {
      "epoch": 0.8761813037982303,
      "grad_norm": 1.9591710567474365,
      "learning_rate": 7.985606527649769e-06,
      "loss": 0.9164,
      "step": 3639
    },
    {
      "epoch": 0.8764220790946848,
      "grad_norm": 0.9350030422210693,
      "learning_rate": 7.955101223847649e-06,
      "loss": 0.1639,
      "step": 3640
    },
    {
      "epoch": 0.8766628543911394,
      "grad_norm": 1.2530356645584106,
      "learning_rate": 7.92465188355137e-06,
      "loss": 0.6552,
      "step": 3641
    },
    {
      "epoch": 0.876903629687594,
      "grad_norm": 2.239734649658203,
      "learning_rate": 7.894258525274189e-06,
      "loss": 0.9524,
      "step": 3642
    },
    {
      "epoch": 0.8771444049840487,
      "grad_norm": 1.1480050086975098,
      "learning_rate": 7.863921167495348e-06,
      "loss": 0.5416,
      "step": 3643
    },
    {
      "epoch": 0.8773851802805033,
      "grad_norm": 1.4632046222686768,
      "learning_rate": 7.833639828660033e-06,
      "loss": 0.756,
      "step": 3644
    },
    {
      "epoch": 0.8776259555769578,
      "grad_norm": 2.0627739429473877,
      "learning_rate": 7.803414527179343e-06,
      "loss": 0.2886,
      "step": 3645
    },
    {
      "epoch": 0.8778667308734124,
      "grad_norm": 1.060166835784912,
      "learning_rate": 7.77324528143033e-06,
      "loss": 0.4151,
      "step": 3646
    },
    {
      "epoch": 0.878107506169867,
      "grad_norm": 0.9938207864761353,
      "learning_rate": 7.743132109756001e-06,
      "loss": 0.4678,
      "step": 3647
    },
    {
      "epoch": 0.8783482814663216,
      "grad_norm": 3.3694393634796143,
      "learning_rate": 7.713075030465199e-06,
      "loss": 0.7547,
      "step": 3648
    },
    {
      "epoch": 0.8785890567627761,
      "grad_norm": 5.267509460449219,
      "learning_rate": 7.683074061832685e-06,
      "loss": 0.8142,
      "step": 3649
    },
    {
      "epoch": 0.8788298320592307,
      "grad_norm": 1.6653624773025513,
      "learning_rate": 7.653129222099143e-06,
      "loss": 0.5309,
      "step": 3650
    },
    {
      "epoch": 0.8790706073556853,
      "grad_norm": 6.179348468780518,
      "learning_rate": 7.623240529471099e-06,
      "loss": 0.5801,
      "step": 3651
    },
    {
      "epoch": 0.8793113826521399,
      "grad_norm": 1.773995041847229,
      "learning_rate": 7.5934080021209496e-06,
      "loss": 0.2403,
      "step": 3652
    },
    {
      "epoch": 0.8795521579485944,
      "grad_norm": 1.451350450515747,
      "learning_rate": 7.563631658186921e-06,
      "loss": 0.4058,
      "step": 3653
    },
    {
      "epoch": 0.879792933245049,
      "grad_norm": 1.8011319637298584,
      "learning_rate": 7.533911515773096e-06,
      "loss": 0.6627,
      "step": 3654
    },
    {
      "epoch": 0.8800337085415036,
      "grad_norm": 1.6576850414276123,
      "learning_rate": 7.5042475929494205e-06,
      "loss": 0.4088,
      "step": 3655
    },
    {
      "epoch": 0.8802744838379583,
      "grad_norm": 3.480985403060913,
      "learning_rate": 7.4746399077515905e-06,
      "loss": 0.6784,
      "step": 3656
    },
    {
      "epoch": 0.8805152591344129,
      "grad_norm": 6.059078693389893,
      "learning_rate": 7.445088478181151e-06,
      "loss": 0.6168,
      "step": 3657
    },
    {
      "epoch": 0.8807560344308674,
      "grad_norm": 2.815342903137207,
      "learning_rate": 7.4155933222054494e-06,
      "loss": 0.6268,
      "step": 3658
    },
    {
      "epoch": 0.880996809727322,
      "grad_norm": 1.5780977010726929,
      "learning_rate": 7.386154457757599e-06,
      "loss": 0.5302,
      "step": 3659
    },
    {
      "epoch": 0.8812375850237766,
      "grad_norm": 1.6322784423828125,
      "learning_rate": 7.356771902736514e-06,
      "loss": 0.8104,
      "step": 3660
    },
    {
      "epoch": 0.8814783603202312,
      "grad_norm": 2.2666542530059814,
      "learning_rate": 7.327445675006839e-06,
      "loss": 0.4232,
      "step": 3661
    },
    {
      "epoch": 0.8817191356166857,
      "grad_norm": 2.0304696559906006,
      "learning_rate": 7.2981757923989755e-06,
      "loss": 0.7694,
      "step": 3662
    },
    {
      "epoch": 0.8819599109131403,
      "grad_norm": 4.598212242126465,
      "learning_rate": 7.268962272709101e-06,
      "loss": 0.8297,
      "step": 3663
    },
    {
      "epoch": 0.8822006862095949,
      "grad_norm": 0.6841728687286377,
      "learning_rate": 7.239805133699085e-06,
      "loss": 0.4114,
      "step": 3664
    },
    {
      "epoch": 0.8824414615060495,
      "grad_norm": 0.9421213269233704,
      "learning_rate": 7.210704393096534e-06,
      "loss": 0.24,
      "step": 3665
    },
    {
      "epoch": 0.882682236802504,
      "grad_norm": 3.708446979522705,
      "learning_rate": 7.181660068594764e-06,
      "loss": 0.8693,
      "step": 3666
    },
    {
      "epoch": 0.8829230120989586,
      "grad_norm": 0.668267548084259,
      "learning_rate": 7.152672177852804e-06,
      "loss": 0.2204,
      "step": 3667
    },
    {
      "epoch": 0.8831637873954132,
      "grad_norm": 1.9290603399276733,
      "learning_rate": 7.1237407384953655e-06,
      "loss": 0.3814,
      "step": 3668
    },
    {
      "epoch": 0.8834045626918678,
      "grad_norm": 1.4112284183502197,
      "learning_rate": 7.09486576811278e-06,
      "loss": 0.5461,
      "step": 3669
    },
    {
      "epoch": 0.8836453379883223,
      "grad_norm": 2.2663886547088623,
      "learning_rate": 7.066047284261157e-06,
      "loss": 0.4108,
      "step": 3670
    },
    {
      "epoch": 0.883886113284777,
      "grad_norm": 1.4161934852600098,
      "learning_rate": 7.037285304462138e-06,
      "loss": 0.4483,
      "step": 3671
    },
    {
      "epoch": 0.8841268885812316,
      "grad_norm": 2.718825101852417,
      "learning_rate": 7.008579846203112e-06,
      "loss": 0.454,
      "step": 3672
    },
    {
      "epoch": 0.8843676638776862,
      "grad_norm": 1.7748380899429321,
      "learning_rate": 6.979930926937062e-06,
      "loss": 0.343,
      "step": 3673
    },
    {
      "epoch": 0.8846084391741408,
      "grad_norm": 3.08974552154541,
      "learning_rate": 6.951338564082555e-06,
      "loss": 0.9658,
      "step": 3674
    },
    {
      "epoch": 0.8848492144705953,
      "grad_norm": 1.58262300491333,
      "learning_rate": 6.922802775023862e-06,
      "loss": 0.7142,
      "step": 3675
    },
    {
      "epoch": 0.8850899897670499,
      "grad_norm": 2.7481048107147217,
      "learning_rate": 6.894323577110795e-06,
      "loss": 0.4091,
      "step": 3676
    },
    {
      "epoch": 0.8853307650635045,
      "grad_norm": 1.3223680257797241,
      "learning_rate": 6.865900987658758e-06,
      "loss": 0.6664,
      "step": 3677
    },
    {
      "epoch": 0.8855715403599591,
      "grad_norm": 2.7666354179382324,
      "learning_rate": 6.83753502394876e-06,
      "loss": 0.7086,
      "step": 3678
    },
    {
      "epoch": 0.8858123156564136,
      "grad_norm": 1.754913091659546,
      "learning_rate": 6.809225703227351e-06,
      "loss": 0.8025,
      "step": 3679
    },
    {
      "epoch": 0.8860530909528682,
      "grad_norm": 1.091379165649414,
      "learning_rate": 6.780973042706673e-06,
      "loss": 0.1816,
      "step": 3680
    },
    {
      "epoch": 0.8862938662493228,
      "grad_norm": 0.8506015539169312,
      "learning_rate": 6.75277705956443e-06,
      "loss": 0.3386,
      "step": 3681
    },
    {
      "epoch": 0.8865346415457774,
      "grad_norm": 2.4108307361602783,
      "learning_rate": 6.724637770943798e-06,
      "loss": 0.6603,
      "step": 3682
    },
    {
      "epoch": 0.8867754168422319,
      "grad_norm": 7.575955390930176,
      "learning_rate": 6.6965551939535795e-06,
      "loss": 0.7667,
      "step": 3683
    },
    {
      "epoch": 0.8870161921386865,
      "grad_norm": 1.8191972970962524,
      "learning_rate": 6.668529345667995e-06,
      "loss": 0.5581,
      "step": 3684
    },
    {
      "epoch": 0.8872569674351412,
      "grad_norm": 1.4198626279830933,
      "learning_rate": 6.640560243126859e-06,
      "loss": 0.3262,
      "step": 3685
    },
    {
      "epoch": 0.8874977427315958,
      "grad_norm": 1.5749354362487793,
      "learning_rate": 6.612647903335445e-06,
      "loss": 0.5869,
      "step": 3686
    },
    {
      "epoch": 0.8877385180280504,
      "grad_norm": 5.316013813018799,
      "learning_rate": 6.58479234326449e-06,
      "loss": 0.3689,
      "step": 3687
    },
    {
      "epoch": 0.8879792933245049,
      "grad_norm": 1.4162142276763916,
      "learning_rate": 6.556993579850268e-06,
      "loss": 0.4004,
      "step": 3688
    },
    {
      "epoch": 0.8882200686209595,
      "grad_norm": 2.608461618423462,
      "learning_rate": 6.529251629994482e-06,
      "loss": 0.4771,
      "step": 3689
    },
    {
      "epoch": 0.8884608439174141,
      "grad_norm": 4.454953670501709,
      "learning_rate": 6.501566510564295e-06,
      "loss": 1.0927,
      "step": 3690
    },
    {
      "epoch": 0.8887016192138687,
      "grad_norm": 5.660929203033447,
      "learning_rate": 6.4739382383923185e-06,
      "loss": 0.5887,
      "step": 3691
    },
    {
      "epoch": 0.8889423945103232,
      "grad_norm": 5.009692668914795,
      "learning_rate": 6.446366830276607e-06,
      "loss": 0.8515,
      "step": 3692
    },
    {
      "epoch": 0.8891831698067778,
      "grad_norm": 1.05977201461792,
      "learning_rate": 6.4188523029806495e-06,
      "loss": 0.5923,
      "step": 3693
    },
    {
      "epoch": 0.8894239451032324,
      "grad_norm": 2.408989906311035,
      "learning_rate": 6.3913946732333414e-06,
      "loss": 0.5728,
      "step": 3694
    },
    {
      "epoch": 0.889664720399687,
      "grad_norm": 1.7964873313903809,
      "learning_rate": 6.363993957728953e-06,
      "loss": 0.616,
      "step": 3695
    },
    {
      "epoch": 0.8899054956961415,
      "grad_norm": 1.842602252960205,
      "learning_rate": 6.336650173127223e-06,
      "loss": 0.5101,
      "step": 3696
    },
    {
      "epoch": 0.8901462709925961,
      "grad_norm": 1.9197190999984741,
      "learning_rate": 6.309363336053209e-06,
      "loss": 0.6051,
      "step": 3697
    },
    {
      "epoch": 0.8903870462890507,
      "grad_norm": 14.988290786743164,
      "learning_rate": 6.282133463097362e-06,
      "loss": 0.4667,
      "step": 3698
    },
    {
      "epoch": 0.8906278215855054,
      "grad_norm": 2.018468141555786,
      "learning_rate": 6.254960570815527e-06,
      "loss": 0.3706,
      "step": 3699
    },
    {
      "epoch": 0.89086859688196,
      "grad_norm": 1.9267723560333252,
      "learning_rate": 6.227844675728867e-06,
      "loss": 1.4215,
      "step": 3700
    },
    {
      "epoch": 0.8911093721784145,
      "grad_norm": 4.696993350982666,
      "learning_rate": 6.2007857943239155e-06,
      "loss": 0.7318,
      "step": 3701
    },
    {
      "epoch": 0.8913501474748691,
      "grad_norm": 2.4501404762268066,
      "learning_rate": 6.1737839430525575e-06,
      "loss": 0.7474,
      "step": 3702
    },
    {
      "epoch": 0.8915909227713237,
      "grad_norm": 3.1551716327667236,
      "learning_rate": 6.146839138331928e-06,
      "loss": 0.4585,
      "step": 3703
    },
    {
      "epoch": 0.8918316980677783,
      "grad_norm": 8.161409378051758,
      "learning_rate": 6.119951396544576e-06,
      "loss": 0.3907,
      "step": 3704
    },
    {
      "epoch": 0.8920724733642328,
      "grad_norm": 5.239265441894531,
      "learning_rate": 6.093120734038283e-06,
      "loss": 0.8809,
      "step": 3705
    },
    {
      "epoch": 0.8923132486606874,
      "grad_norm": 2.640469789505005,
      "learning_rate": 6.0663471671261515e-06,
      "loss": 0.5767,
      "step": 3706
    },
    {
      "epoch": 0.892554023957142,
      "grad_norm": 2.713616371154785,
      "learning_rate": 6.0396307120865746e-06,
      "loss": 1.0203,
      "step": 3707
    },
    {
      "epoch": 0.8927947992535966,
      "grad_norm": 1.149683952331543,
      "learning_rate": 6.012971385163224e-06,
      "loss": 0.481,
      "step": 3708
    },
    {
      "epoch": 0.8930355745500511,
      "grad_norm": 0.7889773845672607,
      "learning_rate": 5.986369202565034e-06,
      "loss": 0.4251,
      "step": 3709
    },
    {
      "epoch": 0.8932763498465057,
      "grad_norm": 0.5434550046920776,
      "learning_rate": 5.959824180466178e-06,
      "loss": 0.1314,
      "step": 3710
    },
    {
      "epoch": 0.8935171251429603,
      "grad_norm": 4.118932723999023,
      "learning_rate": 5.93333633500609e-06,
      "loss": 0.3042,
      "step": 3711
    },
    {
      "epoch": 0.8937579004394149,
      "grad_norm": 1.7867053747177124,
      "learning_rate": 5.906905682289465e-06,
      "loss": 0.6089,
      "step": 3712
    },
    {
      "epoch": 0.8939986757358696,
      "grad_norm": 3.315713405609131,
      "learning_rate": 5.880532238386161e-06,
      "loss": 0.8192,
      "step": 3713
    },
    {
      "epoch": 0.894239451032324,
      "grad_norm": 2.045057535171509,
      "learning_rate": 5.854216019331305e-06,
      "loss": 0.6398,
      "step": 3714
    },
    {
      "epoch": 0.8944802263287787,
      "grad_norm": 1.3669121265411377,
      "learning_rate": 5.8279570411252316e-06,
      "loss": 0.2998,
      "step": 3715
    },
    {
      "epoch": 0.8947210016252333,
      "grad_norm": 1.606748104095459,
      "learning_rate": 5.801755319733438e-06,
      "loss": 0.4933,
      "step": 3716
    },
    {
      "epoch": 0.8949617769216879,
      "grad_norm": 1.4545626640319824,
      "learning_rate": 5.775610871086667e-06,
      "loss": 0.7581,
      "step": 3717
    },
    {
      "epoch": 0.8952025522181424,
      "grad_norm": 2.5948798656463623,
      "learning_rate": 5.749523711080762e-06,
      "loss": 0.7313,
      "step": 3718
    },
    {
      "epoch": 0.895443327514597,
      "grad_norm": 3.4522511959075928,
      "learning_rate": 5.723493855576778e-06,
      "loss": 0.4838,
      "step": 3719
    },
    {
      "epoch": 0.8956841028110516,
      "grad_norm": 1.7841429710388184,
      "learning_rate": 5.697521320400967e-06,
      "loss": 0.5223,
      "step": 3720
    },
    {
      "epoch": 0.8959248781075062,
      "grad_norm": 2.9225831031799316,
      "learning_rate": 5.67160612134463e-06,
      "loss": 0.8148,
      "step": 3721
    },
    {
      "epoch": 0.8961656534039607,
      "grad_norm": 1.33102285861969,
      "learning_rate": 5.645748274164309e-06,
      "loss": 0.1499,
      "step": 3722
    },
    {
      "epoch": 0.8964064287004153,
      "grad_norm": 0.6809419989585876,
      "learning_rate": 5.619947794581615e-06,
      "loss": 0.1958,
      "step": 3723
    },
    {
      "epoch": 0.8966472039968699,
      "grad_norm": 2.1398706436157227,
      "learning_rate": 5.594204698283301e-06,
      "loss": 0.4343,
      "step": 3724
    },
    {
      "epoch": 0.8968879792933245,
      "grad_norm": 2.5033817291259766,
      "learning_rate": 5.568519000921235e-06,
      "loss": 0.7202,
      "step": 3725
    },
    {
      "epoch": 0.8971287545897791,
      "grad_norm": 1.002875566482544,
      "learning_rate": 5.54289071811237e-06,
      "loss": 0.479,
      "step": 3726
    },
    {
      "epoch": 0.8973695298862336,
      "grad_norm": 2.744338035583496,
      "learning_rate": 5.517319865438764e-06,
      "loss": 0.923,
      "step": 3727
    },
    {
      "epoch": 0.8976103051826883,
      "grad_norm": 2.405243396759033,
      "learning_rate": 5.491806458447557e-06,
      "loss": 1.0923,
      "step": 3728
    },
    {
      "epoch": 0.8978510804791429,
      "grad_norm": 2.02970814704895,
      "learning_rate": 5.466350512650953e-06,
      "loss": 0.9937,
      "step": 3729
    },
    {
      "epoch": 0.8980918557755975,
      "grad_norm": 1.6559299230575562,
      "learning_rate": 5.440952043526215e-06,
      "loss": 0.5801,
      "step": 3730
    },
    {
      "epoch": 0.898332631072052,
      "grad_norm": 4.650358200073242,
      "learning_rate": 5.41561106651568e-06,
      "loss": 0.812,
      "step": 3731
    },
    {
      "epoch": 0.8985734063685066,
      "grad_norm": 2.2116572856903076,
      "learning_rate": 5.390327597026712e-06,
      "loss": 0.5227,
      "step": 3732
    },
    {
      "epoch": 0.8988141816649612,
      "grad_norm": 2.8776440620422363,
      "learning_rate": 5.3651016504317475e-06,
      "loss": 1.0063,
      "step": 3733
    },
    {
      "epoch": 0.8990549569614158,
      "grad_norm": 2.672783136367798,
      "learning_rate": 5.339933242068174e-06,
      "loss": 0.5567,
      "step": 3734
    },
    {
      "epoch": 0.8992957322578703,
      "grad_norm": 2.6852715015411377,
      "learning_rate": 5.3148223872384715e-06,
      "loss": 0.4038,
      "step": 3735
    },
    {
      "epoch": 0.8995365075543249,
      "grad_norm": 4.501379013061523,
      "learning_rate": 5.289769101210074e-06,
      "loss": 0.6712,
      "step": 3736
    },
    {
      "epoch": 0.8997772828507795,
      "grad_norm": 1.5511711835861206,
      "learning_rate": 5.26477339921545e-06,
      "loss": 0.2077,
      "step": 3737
    },
    {
      "epoch": 0.9000180581472341,
      "grad_norm": 1.7630692720413208,
      "learning_rate": 5.239835296452045e-06,
      "loss": 0.4195,
      "step": 3738
    },
    {
      "epoch": 0.9002588334436887,
      "grad_norm": 1.3953133821487427,
      "learning_rate": 5.214954808082273e-06,
      "loss": 0.6487,
      "step": 3739
    },
    {
      "epoch": 0.9004996087401432,
      "grad_norm": 1.0030934810638428,
      "learning_rate": 5.190131949233523e-06,
      "loss": 0.6278,
      "step": 3740
    },
    {
      "epoch": 0.9007403840365978,
      "grad_norm": 0.8308902978897095,
      "learning_rate": 5.165366734998178e-06,
      "loss": 0.301,
      "step": 3741
    },
    {
      "epoch": 0.9009811593330525,
      "grad_norm": 2.4039227962493896,
      "learning_rate": 5.140659180433516e-06,
      "loss": 0.8232,
      "step": 3742
    },
    {
      "epoch": 0.9012219346295071,
      "grad_norm": 1.496519923210144,
      "learning_rate": 5.116009300561797e-06,
      "loss": 0.5532,
      "step": 3743
    },
    {
      "epoch": 0.9014627099259616,
      "grad_norm": 2.6047515869140625,
      "learning_rate": 5.0914171103701895e-06,
      "loss": 0.6708,
      "step": 3744
    },
    {
      "epoch": 0.9017034852224162,
      "grad_norm": 4.155404090881348,
      "learning_rate": 5.066882624810809e-06,
      "loss": 0.4486,
      "step": 3745
    },
    {
      "epoch": 0.9019442605188708,
      "grad_norm": 5.623289108276367,
      "learning_rate": 5.042405858800692e-06,
      "loss": 0.6778,
      "step": 3746
    },
    {
      "epoch": 0.9021850358153254,
      "grad_norm": 2.981544256210327,
      "learning_rate": 5.017986827221733e-06,
      "loss": 0.6458,
      "step": 3747
    },
    {
      "epoch": 0.9024258111117799,
      "grad_norm": 0.939237117767334,
      "learning_rate": 4.993625544920799e-06,
      "loss": 0.1,
      "step": 3748
    },
    {
      "epoch": 0.9026665864082345,
      "grad_norm": 1.9405542612075806,
      "learning_rate": 4.969322026709577e-06,
      "loss": 0.5831,
      "step": 3749
    },
    {
      "epoch": 0.9029073617046891,
      "grad_norm": 0.6850067973136902,
      "learning_rate": 4.945076287364669e-06,
      "loss": 0.3371,
      "step": 3750
    },
    {
      "epoch": 0.9031481370011437,
      "grad_norm": 2.4914636611938477,
      "learning_rate": 4.9208883416275495e-06,
      "loss": 0.726,
      "step": 3751
    },
    {
      "epoch": 0.9033889122975982,
      "grad_norm": 2.531623601913452,
      "learning_rate": 4.896758204204532e-06,
      "loss": 0.4892,
      "step": 3752
    },
    {
      "epoch": 0.9036296875940528,
      "grad_norm": 0.5930827856063843,
      "learning_rate": 4.8726858897667816e-06,
      "loss": 0.2482,
      "step": 3753
    },
    {
      "epoch": 0.9038704628905074,
      "grad_norm": 1.0176321268081665,
      "learning_rate": 4.8486714129503565e-06,
      "loss": 0.4158,
      "step": 3754
    },
    {
      "epoch": 0.904111238186962,
      "grad_norm": 2.1726293563842773,
      "learning_rate": 4.824714788356066e-06,
      "loss": 0.4867,
      "step": 3755
    },
    {
      "epoch": 0.9043520134834167,
      "grad_norm": 0.8435872793197632,
      "learning_rate": 4.800816030549638e-06,
      "loss": 0.5242,
      "step": 3756
    },
    {
      "epoch": 0.9045927887798711,
      "grad_norm": 3.269883394241333,
      "learning_rate": 4.776975154061536e-06,
      "loss": 1.0293,
      "step": 3757
    },
    {
      "epoch": 0.9048335640763258,
      "grad_norm": 1.6402075290679932,
      "learning_rate": 4.753192173387089e-06,
      "loss": 0.5345,
      "step": 3758
    },
    {
      "epoch": 0.9050743393727804,
      "grad_norm": 1.5480372905731201,
      "learning_rate": 4.729467102986396e-06,
      "loss": 0.3328,
      "step": 3759
    },
    {
      "epoch": 0.905315114669235,
      "grad_norm": 3.9539589881896973,
      "learning_rate": 4.705799957284351e-06,
      "loss": 0.5114,
      "step": 3760
    },
    {
      "epoch": 0.9055558899656895,
      "grad_norm": 1.5085039138793945,
      "learning_rate": 4.6821907506706345e-06,
      "loss": 0.928,
      "step": 3761
    },
    {
      "epoch": 0.9057966652621441,
      "grad_norm": 1.067598819732666,
      "learning_rate": 4.6586394974996836e-06,
      "loss": 0.9092,
      "step": 3762
    },
    {
      "epoch": 0.9060374405585987,
      "grad_norm": 2.00384783744812,
      "learning_rate": 4.635146212090735e-06,
      "loss": 0.5831,
      "step": 3763
    },
    {
      "epoch": 0.9062782158550533,
      "grad_norm": 3.4452266693115234,
      "learning_rate": 4.61171090872774e-06,
      "loss": 0.2842,
      "step": 3764
    },
    {
      "epoch": 0.9065189911515078,
      "grad_norm": 2.7296142578125,
      "learning_rate": 4.588333601659423e-06,
      "loss": 0.5461,
      "step": 3765
    },
    {
      "epoch": 0.9067597664479624,
      "grad_norm": 1.4393811225891113,
      "learning_rate": 4.565014305099247e-06,
      "loss": 0.329,
      "step": 3766
    },
    {
      "epoch": 0.907000541744417,
      "grad_norm": 1.5528326034545898,
      "learning_rate": 4.541753033225393e-06,
      "loss": 0.9322,
      "step": 3767
    },
    {
      "epoch": 0.9072413170408716,
      "grad_norm": 1.016589879989624,
      "learning_rate": 4.5185498001807605e-06,
      "loss": 0.4822,
      "step": 3768
    },
    {
      "epoch": 0.9074820923373262,
      "grad_norm": 1.8541332483291626,
      "learning_rate": 4.495404620072985e-06,
      "loss": 0.3784,
      "step": 3769
    },
    {
      "epoch": 0.9077228676337807,
      "grad_norm": 2.7582716941833496,
      "learning_rate": 4.472317506974366e-06,
      "loss": 0.2414,
      "step": 3770
    },
    {
      "epoch": 0.9079636429302353,
      "grad_norm": 1.1082451343536377,
      "learning_rate": 4.44928847492192e-06,
      "loss": 0.6802,
      "step": 3771
    },
    {
      "epoch": 0.90820441822669,
      "grad_norm": 1.2040634155273438,
      "learning_rate": 4.426317537917368e-06,
      "loss": 0.333,
      "step": 3772
    },
    {
      "epoch": 0.9084451935231446,
      "grad_norm": 2.672924280166626,
      "learning_rate": 4.403404709927084e-06,
      "loss": 0.5014,
      "step": 3773
    },
    {
      "epoch": 0.9086859688195991,
      "grad_norm": 3.4659922122955322,
      "learning_rate": 4.3805500048821225e-06,
      "loss": 0.2261,
      "step": 3774
    },
    {
      "epoch": 0.9089267441160537,
      "grad_norm": 2.050246477127075,
      "learning_rate": 4.35775343667818e-06,
      "loss": 0.7365,
      "step": 3775
    },
    {
      "epoch": 0.9091675194125083,
      "grad_norm": 0.8071643710136414,
      "learning_rate": 4.335015019175637e-06,
      "loss": 0.7056,
      "step": 3776
    },
    {
      "epoch": 0.9094082947089629,
      "grad_norm": 2.027353525161743,
      "learning_rate": 4.3123347661995105e-06,
      "loss": 0.2542,
      "step": 3777
    },
    {
      "epoch": 0.9096490700054174,
      "grad_norm": 2.125551700592041,
      "learning_rate": 4.289712691539416e-06,
      "loss": 0.1728,
      "step": 3778
    },
    {
      "epoch": 0.909889845301872,
      "grad_norm": 1.8031567335128784,
      "learning_rate": 4.267148808949639e-06,
      "loss": 0.4307,
      "step": 3779
    },
    {
      "epoch": 0.9101306205983266,
      "grad_norm": 0.9503381252288818,
      "learning_rate": 4.244643132149084e-06,
      "loss": 0.4191,
      "step": 3780
    },
    {
      "epoch": 0.9103713958947812,
      "grad_norm": 1.0291205644607544,
      "learning_rate": 4.2221956748212384e-06,
      "loss": 0.7377,
      "step": 3781
    },
    {
      "epoch": 0.9106121711912358,
      "grad_norm": 0.7489404082298279,
      "learning_rate": 4.19980645061423e-06,
      "loss": 0.5554,
      "step": 3782
    },
    {
      "epoch": 0.9108529464876903,
      "grad_norm": 1.0450713634490967,
      "learning_rate": 4.177475473140724e-06,
      "loss": 0.3164,
      "step": 3783
    },
    {
      "epoch": 0.9110937217841449,
      "grad_norm": 1.918267011642456,
      "learning_rate": 4.155202755978027e-06,
      "loss": 0.759,
      "step": 3784
    },
    {
      "epoch": 0.9113344970805995,
      "grad_norm": 1.4207653999328613,
      "learning_rate": 4.132988312667996e-06,
      "loss": 0.6942,
      "step": 3785
    },
    {
      "epoch": 0.9115752723770542,
      "grad_norm": 1.9554914236068726,
      "learning_rate": 4.110832156717059e-06,
      "loss": 0.6352,
      "step": 3786
    },
    {
      "epoch": 0.9118160476735087,
      "grad_norm": 2.0343658924102783,
      "learning_rate": 4.088734301596209e-06,
      "loss": 0.7284,
      "step": 3787
    },
    {
      "epoch": 0.9120568229699633,
      "grad_norm": 2.685506820678711,
      "learning_rate": 4.066694760740996e-06,
      "loss": 0.732,
      "step": 3788
    },
    {
      "epoch": 0.9122975982664179,
      "grad_norm": 0.8585965633392334,
      "learning_rate": 4.044713547551504e-06,
      "loss": 0.4282,
      "step": 3789
    },
    {
      "epoch": 0.9125383735628725,
      "grad_norm": 1.1643730401992798,
      "learning_rate": 4.022790675392385e-06,
      "loss": 0.3473,
      "step": 3790
    },
    {
      "epoch": 0.912779148859327,
      "grad_norm": 1.323641061782837,
      "learning_rate": 4.0009261575927545e-06,
      "loss": 0.3057,
      "step": 3791
    },
    {
      "epoch": 0.9130199241557816,
      "grad_norm": 0.6881577968597412,
      "learning_rate": 3.979120007446313e-06,
      "loss": 0.3613,
      "step": 3792
    },
    {
      "epoch": 0.9132606994522362,
      "grad_norm": 2.510683298110962,
      "learning_rate": 3.957372238211254e-06,
      "loss": 0.8894,
      "step": 3793
    },
    {
      "epoch": 0.9135014747486908,
      "grad_norm": 1.2805593013763428,
      "learning_rate": 3.935682863110246e-06,
      "loss": 0.3263,
      "step": 3794
    },
    {
      "epoch": 0.9137422500451454,
      "grad_norm": 0.9712691903114319,
      "learning_rate": 3.914051895330506e-06,
      "loss": 0.3099,
      "step": 3795
    },
    {
      "epoch": 0.9139830253415999,
      "grad_norm": 6.864174842834473,
      "learning_rate": 3.892479348023681e-06,
      "loss": 0.5236,
      "step": 3796
    },
    {
      "epoch": 0.9142238006380545,
      "grad_norm": 3.510481834411621,
      "learning_rate": 3.8709652343059565e-06,
      "loss": 0.6298,
      "step": 3797
    },
    {
      "epoch": 0.9144645759345091,
      "grad_norm": 1.1707494258880615,
      "learning_rate": 3.849509567257959e-06,
      "loss": 0.5126,
      "step": 3798
    },
    {
      "epoch": 0.9147053512309637,
      "grad_norm": 1.940595269203186,
      "learning_rate": 3.828112359924763e-06,
      "loss": 0.5885,
      "step": 3799
    },
    {
      "epoch": 0.9149461265274182,
      "grad_norm": 3.032635450363159,
      "learning_rate": 3.8067736253159404e-06,
      "loss": 0.4042,
      "step": 3800
    },
    {
      "epoch": 0.9151869018238729,
      "grad_norm": 1.1645691394805908,
      "learning_rate": 3.785493376405469e-06,
      "loss": 0.441,
      "step": 3801
    },
    {
      "epoch": 0.9154276771203275,
      "grad_norm": 4.984523296356201,
      "learning_rate": 3.764271626131799e-06,
      "loss": 0.4769,
      "step": 3802
    },
    {
      "epoch": 0.9156684524167821,
      "grad_norm": 1.2044367790222168,
      "learning_rate": 3.743108387397798e-06,
      "loss": 0.3186,
      "step": 3803
    },
    {
      "epoch": 0.9159092277132366,
      "grad_norm": 0.9009220004081726,
      "learning_rate": 3.722003673070773e-06,
      "loss": 0.3869,
      "step": 3804
    },
    {
      "epoch": 0.9161500030096912,
      "grad_norm": 3.3949239253997803,
      "learning_rate": 3.700957495982427e-06,
      "loss": 0.4886,
      "step": 3805
    },
    {
      "epoch": 0.9163907783061458,
      "grad_norm": 1.1531803607940674,
      "learning_rate": 3.6799698689289007e-06,
      "loss": 0.488,
      "step": 3806
    },
    {
      "epoch": 0.9166315536026004,
      "grad_norm": 1.1033868789672852,
      "learning_rate": 3.659040804670699e-06,
      "loss": 0.4561,
      "step": 3807
    },
    {
      "epoch": 0.916872328899055,
      "grad_norm": 0.8439015746116638,
      "learning_rate": 3.638170315932754e-06,
      "loss": 0.443,
      "step": 3808
    },
    {
      "epoch": 0.9171131041955095,
      "grad_norm": 3.8729732036590576,
      "learning_rate": 3.6173584154043484e-06,
      "loss": 0.4908,
      "step": 3809
    },
    {
      "epoch": 0.9173538794919641,
      "grad_norm": 1.3288518190383911,
      "learning_rate": 3.5966051157391824e-06,
      "loss": 0.5959,
      "step": 3810
    },
    {
      "epoch": 0.9175946547884187,
      "grad_norm": 1.7388828992843628,
      "learning_rate": 3.575910429555307e-06,
      "loss": 0.1869,
      "step": 3811
    },
    {
      "epoch": 0.9178354300848733,
      "grad_norm": 1.7654670476913452,
      "learning_rate": 3.5552743694351354e-06,
      "loss": 0.5691,
      "step": 3812
    },
    {
      "epoch": 0.9180762053813278,
      "grad_norm": 1.1300573348999023,
      "learning_rate": 3.5346969479254532e-06,
      "loss": 0.5942,
      "step": 3813
    },
    {
      "epoch": 0.9183169806777824,
      "grad_norm": 1.7620266675949097,
      "learning_rate": 3.5141781775373527e-06,
      "loss": 0.623,
      "step": 3814
    },
    {
      "epoch": 0.9185577559742371,
      "grad_norm": 2.159961223602295,
      "learning_rate": 3.493718070746299e-06,
      "loss": 0.5568,
      "step": 3815
    },
    {
      "epoch": 0.9187985312706917,
      "grad_norm": 1.376558542251587,
      "learning_rate": 3.473316639992108e-06,
      "loss": 0.4562,
      "step": 3816
    },
    {
      "epoch": 0.9190393065671462,
      "grad_norm": 2.9691762924194336,
      "learning_rate": 3.4529738976788574e-06,
      "loss": 0.2444,
      "step": 3817
    },
    {
      "epoch": 0.9192800818636008,
      "grad_norm": 2.1220481395721436,
      "learning_rate": 3.4326898561750087e-06,
      "loss": 0.7534,
      "step": 3818
    },
    {
      "epoch": 0.9195208571600554,
      "grad_norm": 3.4113810062408447,
      "learning_rate": 3.412464527813297e-06,
      "loss": 0.4182,
      "step": 3819
    },
    {
      "epoch": 0.91976163245651,
      "grad_norm": 0.48114293813705444,
      "learning_rate": 3.3922979248907638e-06,
      "loss": 0.3866,
      "step": 3820
    },
    {
      "epoch": 0.9200024077529645,
      "grad_norm": 0.9137384295463562,
      "learning_rate": 3.372190059668756e-06,
      "loss": 0.4303,
      "step": 3821
    },
    {
      "epoch": 0.9202431830494191,
      "grad_norm": 2.026947021484375,
      "learning_rate": 3.3521409443728947e-06,
      "loss": 0.813,
      "step": 3822
    },
    {
      "epoch": 0.9204839583458737,
      "grad_norm": 4.139359474182129,
      "learning_rate": 3.332150591193095e-06,
      "loss": 0.5473,
      "step": 3823
    },
    {
      "epoch": 0.9207247336423283,
      "grad_norm": 0.5374311208724976,
      "learning_rate": 3.312219012283535e-06,
      "loss": 0.3422,
      "step": 3824
    },
    {
      "epoch": 0.9209655089387829,
      "grad_norm": 1.6601024866104126,
      "learning_rate": 3.2923462197626433e-06,
      "loss": 0.777,
      "step": 3825
    },
    {
      "epoch": 0.9212062842352374,
      "grad_norm": 1.9013340473175049,
      "learning_rate": 3.272532225713143e-06,
      "loss": 0.8191,
      "step": 3826
    },
    {
      "epoch": 0.921447059531692,
      "grad_norm": 1.4580706357955933,
      "learning_rate": 3.252777042181976e-06,
      "loss": 0.5187,
      "step": 3827
    },
    {
      "epoch": 0.9216878348281466,
      "grad_norm": 4.314640522003174,
      "learning_rate": 3.233080681180323e-06,
      "loss": 1.1448,
      "step": 3828
    },
    {
      "epoch": 0.9219286101246013,
      "grad_norm": 0.6895533800125122,
      "learning_rate": 3.21344315468366e-06,
      "loss": 0.3816,
      "step": 3829
    },
    {
      "epoch": 0.9221693854210558,
      "grad_norm": 0.710097074508667,
      "learning_rate": 3.193864474631614e-06,
      "loss": 0.317,
      "step": 3830
    },
    {
      "epoch": 0.9224101607175104,
      "grad_norm": 3.7332968711853027,
      "learning_rate": 3.174344652928063e-06,
      "loss": 0.261,
      "step": 3831
    },
    {
      "epoch": 0.922650936013965,
      "grad_norm": 1.7163169384002686,
      "learning_rate": 3.1548837014411357e-06,
      "loss": 0.5922,
      "step": 3832
    },
    {
      "epoch": 0.9228917113104196,
      "grad_norm": 1.793519139289856,
      "learning_rate": 3.135481632003101e-06,
      "loss": 1.0053,
      "step": 3833
    },
    {
      "epoch": 0.9231324866068741,
      "grad_norm": 2.4100849628448486,
      "learning_rate": 3.116138456410478e-06,
      "loss": 0.5618,
      "step": 3834
    },
    {
      "epoch": 0.9233732619033287,
      "grad_norm": 1.54547119140625,
      "learning_rate": 3.0968541864239476e-06,
      "loss": 0.1529,
      "step": 3835
    },
    {
      "epoch": 0.9236140371997833,
      "grad_norm": 1.1684705018997192,
      "learning_rate": 3.0776288337683977e-06,
      "loss": 0.2276,
      "step": 3836
    },
    {
      "epoch": 0.9238548124962379,
      "grad_norm": 2.4300782680511475,
      "learning_rate": 3.0584624101328785e-06,
      "loss": 0.4722,
      "step": 3837
    },
    {
      "epoch": 0.9240955877926925,
      "grad_norm": 1.5282294750213623,
      "learning_rate": 3.0393549271706345e-06,
      "loss": 0.5003,
      "step": 3838
    },
    {
      "epoch": 0.924336363089147,
      "grad_norm": 2.3679680824279785,
      "learning_rate": 3.0203063964990617e-06,
      "loss": 0.6853,
      "step": 3839
    },
    {
      "epoch": 0.9245771383856016,
      "grad_norm": 0.8373381495475769,
      "learning_rate": 3.001316829699685e-06,
      "loss": 0.2153,
      "step": 3840
    },
    {
      "epoch": 0.9248179136820562,
      "grad_norm": 3.2224361896514893,
      "learning_rate": 2.982386238318213e-06,
      "loss": 0.4978,
      "step": 3841
    },
    {
      "epoch": 0.9250586889785108,
      "grad_norm": 3.300617218017578,
      "learning_rate": 2.963514633864506e-06,
      "loss": 0.747,
      "step": 3842
    },
    {
      "epoch": 0.9252994642749653,
      "grad_norm": 1.0810372829437256,
      "learning_rate": 2.9447020278125072e-06,
      "loss": 0.4873,
      "step": 3843
    },
    {
      "epoch": 0.92554023957142,
      "grad_norm": 1.0733656883239746,
      "learning_rate": 2.925948431600356e-06,
      "loss": 0.6448,
      "step": 3844
    },
    {
      "epoch": 0.9257810148678746,
      "grad_norm": 1.8431061506271362,
      "learning_rate": 2.9072538566302654e-06,
      "loss": 0.3739,
      "step": 3845
    },
    {
      "epoch": 0.9260217901643292,
      "grad_norm": 4.552158832550049,
      "learning_rate": 2.8886183142685763e-06,
      "loss": 1.5292,
      "step": 3846
    },
    {
      "epoch": 0.9262625654607837,
      "grad_norm": 1.8968464136123657,
      "learning_rate": 2.87004181584577e-06,
      "loss": 0.3096,
      "step": 3847
    },
    {
      "epoch": 0.9265033407572383,
      "grad_norm": 1.599402904510498,
      "learning_rate": 2.8515243726563557e-06,
      "loss": 0.8706,
      "step": 3848
    },
    {
      "epoch": 0.9267441160536929,
      "grad_norm": 1.9225425720214844,
      "learning_rate": 2.8330659959589946e-06,
      "loss": 0.5347,
      "step": 3849
    },
    {
      "epoch": 0.9269848913501475,
      "grad_norm": 2.9519357681274414,
      "learning_rate": 2.8146666969764535e-06,
      "loss": 0.7436,
      "step": 3850
    },
    {
      "epoch": 0.9272256666466021,
      "grad_norm": 0.88628089427948,
      "learning_rate": 2.7963264868955065e-06,
      "loss": 0.5481,
      "step": 3851
    },
    {
      "epoch": 0.9274664419430566,
      "grad_norm": 2.948660373687744,
      "learning_rate": 2.7780453768670557e-06,
      "loss": 0.8285,
      "step": 3852
    },
    {
      "epoch": 0.9277072172395112,
      "grad_norm": 0.3918326497077942,
      "learning_rate": 2.7598233780060543e-06,
      "loss": 0.3189,
      "step": 3853
    },
    {
      "epoch": 0.9279479925359658,
      "grad_norm": 0.8619207739830017,
      "learning_rate": 2.7416605013915297e-06,
      "loss": 0.2851,
      "step": 3854
    },
    {
      "epoch": 0.9281887678324204,
      "grad_norm": 2.232577323913574,
      "learning_rate": 2.7235567580665587e-06,
      "loss": 0.5436,
      "step": 3855
    },
    {
      "epoch": 0.9284295431288749,
      "grad_norm": 3.190218448638916,
      "learning_rate": 2.705512159038226e-06,
      "loss": 0.6544,
      "step": 3856
    },
    {
      "epoch": 0.9286703184253295,
      "grad_norm": 2.477781295776367,
      "learning_rate": 2.687526715277722e-06,
      "loss": 0.5965,
      "step": 3857
    },
    {
      "epoch": 0.9289110937217842,
      "grad_norm": 1.608775019645691,
      "learning_rate": 2.669600437720221e-06,
      "loss": 0.3984,
      "step": 3858
    },
    {
      "epoch": 0.9291518690182388,
      "grad_norm": 4.561281204223633,
      "learning_rate": 2.651733337264928e-06,
      "loss": 0.4893,
      "step": 3859
    },
    {
      "epoch": 0.9293926443146933,
      "grad_norm": 6.173032760620117,
      "learning_rate": 2.6339254247751078e-06,
      "loss": 0.7945,
      "step": 3860
    },
    {
      "epoch": 0.9296334196111479,
      "grad_norm": 1.251524806022644,
      "learning_rate": 2.616176711077989e-06,
      "loss": 0.6277,
      "step": 3861
    },
    {
      "epoch": 0.9298741949076025,
      "grad_norm": 1.0110701322555542,
      "learning_rate": 2.5984872069648393e-06,
      "loss": 0.5838,
      "step": 3862
    },
    {
      "epoch": 0.9301149702040571,
      "grad_norm": 2.0771772861480713,
      "learning_rate": 2.580856923190933e-06,
      "loss": 0.9169,
      "step": 3863
    },
    {
      "epoch": 0.9303557455005117,
      "grad_norm": 1.6572563648223877,
      "learning_rate": 2.5632858704754848e-06,
      "loss": 0.4677,
      "step": 3864
    },
    {
      "epoch": 0.9305965207969662,
      "grad_norm": 1.9874509572982788,
      "learning_rate": 2.5457740595017707e-06,
      "loss": 0.4567,
      "step": 3865
    },
    {
      "epoch": 0.9308372960934208,
      "grad_norm": 2.116501808166504,
      "learning_rate": 2.5283215009169857e-06,
      "loss": 0.4592,
      "step": 3866
    },
    {
      "epoch": 0.9310780713898754,
      "grad_norm": 2.0553879737854004,
      "learning_rate": 2.51092820533233e-06,
      "loss": 0.3902,
      "step": 3867
    },
    {
      "epoch": 0.93131884668633,
      "grad_norm": 2.063753604888916,
      "learning_rate": 2.4935941833229782e-06,
      "loss": 0.7985,
      "step": 3868
    },
    {
      "epoch": 0.9315596219827845,
      "grad_norm": 1.6043528318405151,
      "learning_rate": 2.4763194454280435e-06,
      "loss": 0.7647,
      "step": 3869
    },
    {
      "epoch": 0.9318003972792391,
      "grad_norm": 1.9053353071212769,
      "learning_rate": 2.4591040021506027e-06,
      "loss": 0.6018,
      "step": 3870
    },
    {
      "epoch": 0.9320411725756937,
      "grad_norm": 2.5057151317596436,
      "learning_rate": 2.4419478639577164e-06,
      "loss": 0.7383,
      "step": 3871
    },
    {
      "epoch": 0.9322819478721484,
      "grad_norm": 2.225681781768799,
      "learning_rate": 2.424851041280307e-06,
      "loss": 0.4332,
      "step": 3872
    },
    {
      "epoch": 0.9325227231686029,
      "grad_norm": 0.5860837697982788,
      "learning_rate": 2.4078135445133156e-06,
      "loss": 0.6404,
      "step": 3873
    },
    {
      "epoch": 0.9327634984650575,
      "grad_norm": 3.1039059162139893,
      "learning_rate": 2.390835384015555e-06,
      "loss": 0.6935,
      "step": 3874
    },
    {
      "epoch": 0.9330042737615121,
      "grad_norm": 2.5515451431274414,
      "learning_rate": 2.373916570109802e-06,
      "loss": 0.3266,
      "step": 3875
    },
    {
      "epoch": 0.9332450490579667,
      "grad_norm": 3.696157455444336,
      "learning_rate": 2.357057113082728e-06,
      "loss": 0.3768,
      "step": 3876
    },
    {
      "epoch": 0.9334858243544213,
      "grad_norm": 2.1884636878967285,
      "learning_rate": 2.340257023184922e-06,
      "loss": 0.2111,
      "step": 3877
    },
    {
      "epoch": 0.9337265996508758,
      "grad_norm": 4.436749458312988,
      "learning_rate": 2.323516310630891e-06,
      "loss": 0.9444,
      "step": 3878
    },
    {
      "epoch": 0.9339673749473304,
      "grad_norm": 1.7192350625991821,
      "learning_rate": 2.3068349855989936e-06,
      "loss": 0.3693,
      "step": 3879
    },
    {
      "epoch": 0.934208150243785,
      "grad_norm": 5.450645446777344,
      "learning_rate": 2.2902130582315274e-06,
      "loss": 0.4809,
      "step": 3880
    },
    {
      "epoch": 0.9344489255402396,
      "grad_norm": 1.2411659955978394,
      "learning_rate": 2.2736505386346863e-06,
      "loss": 1.0273,
      "step": 3881
    },
    {
      "epoch": 0.9346897008366941,
      "grad_norm": 0.8129162192344666,
      "learning_rate": 2.2571474368784707e-06,
      "loss": 0.0588,
      "step": 3882
    },
    {
      "epoch": 0.9349304761331487,
      "grad_norm": 1.5056270360946655,
      "learning_rate": 2.240703762996843e-06,
      "loss": 0.6161,
      "step": 3883
    },
    {
      "epoch": 0.9351712514296033,
      "grad_norm": 1.0241050720214844,
      "learning_rate": 2.224319526987584e-06,
      "loss": 0.4896,
      "step": 3884
    },
    {
      "epoch": 0.9354120267260579,
      "grad_norm": 0.8922635316848755,
      "learning_rate": 2.2079947388123356e-06,
      "loss": 0.5838,
      "step": 3885
    },
    {
      "epoch": 0.9356528020225124,
      "grad_norm": 3.8598411083221436,
      "learning_rate": 2.1917294083966254e-06,
      "loss": 0.5277,
      "step": 3886
    },
    {
      "epoch": 0.935893577318967,
      "grad_norm": 10.636117935180664,
      "learning_rate": 2.1755235456297986e-06,
      "loss": 0.9566,
      "step": 3887
    },
    {
      "epoch": 0.9361343526154217,
      "grad_norm": 0.4968515932559967,
      "learning_rate": 2.15937716036505e-06,
      "loss": 0.563,
      "step": 3888
    },
    {
      "epoch": 0.9363751279118763,
      "grad_norm": 8.35496711730957,
      "learning_rate": 2.1432902624194286e-06,
      "loss": 0.2234,
      "step": 3889
    },
    {
      "epoch": 0.9366159032083309,
      "grad_norm": 1.9201698303222656,
      "learning_rate": 2.1272628615737977e-06,
      "loss": 0.797,
      "step": 3890
    },
    {
      "epoch": 0.9368566785047854,
      "grad_norm": 3.2286055088043213,
      "learning_rate": 2.1112949675728743e-06,
      "loss": 0.6248,
      "step": 3891
    },
    {
      "epoch": 0.93709745380124,
      "grad_norm": 1.6388925313949585,
      "learning_rate": 2.0953865901251255e-06,
      "loss": 0.685,
      "step": 3892
    },
    {
      "epoch": 0.9373382290976946,
      "grad_norm": 1.8927644491195679,
      "learning_rate": 2.0795377389029257e-06,
      "loss": 0.3773,
      "step": 3893
    },
    {
      "epoch": 0.9375790043941492,
      "grad_norm": 11.435422897338867,
      "learning_rate": 2.063748423542411e-06,
      "loss": 0.7658,
      "step": 3894
    },
    {
      "epoch": 0.9378197796906037,
      "grad_norm": 1.6999096870422363,
      "learning_rate": 2.048018653643491e-06,
      "loss": 0.727,
      "step": 3895
    },
    {
      "epoch": 0.9380605549870583,
      "grad_norm": 0.23305965960025787,
      "learning_rate": 2.0323484387699264e-06,
      "loss": 0.1286,
      "step": 3896
    },
    {
      "epoch": 0.9383013302835129,
      "grad_norm": 1.2219979763031006,
      "learning_rate": 2.0167377884492412e-06,
      "loss": 0.3562,
      "step": 3897
    },
    {
      "epoch": 0.9385421055799675,
      "grad_norm": 1.201636552810669,
      "learning_rate": 2.0011867121727313e-06,
      "loss": 0.6063,
      "step": 3898
    },
    {
      "epoch": 0.938782880876422,
      "grad_norm": 9.849644660949707,
      "learning_rate": 1.9856952193955005e-06,
      "loss": 0.9423,
      "step": 3899
    },
    {
      "epoch": 0.9390236561728766,
      "grad_norm": 1.613932490348816,
      "learning_rate": 1.9702633195363917e-06,
      "loss": 0.522,
      "step": 3900
    },
    {
      "epoch": 0.9392644314693313,
      "grad_norm": 2.0753109455108643,
      "learning_rate": 1.954891021978045e-06,
      "loss": 0.844,
      "step": 3901
    },
    {
      "epoch": 0.9395052067657859,
      "grad_norm": 2.056060552597046,
      "learning_rate": 1.9395783360668718e-06,
      "loss": 0.5813,
      "step": 3902
    },
    {
      "epoch": 0.9397459820622404,
      "grad_norm": 1.1586860418319702,
      "learning_rate": 1.9243252711129923e-06,
      "loss": 0.8256,
      "step": 3903
    },
    {
      "epoch": 0.939986757358695,
      "grad_norm": 2.10019850730896,
      "learning_rate": 1.909131836390321e-06,
      "loss": 1.029,
      "step": 3904
    },
    {
      "epoch": 0.9402275326551496,
      "grad_norm": 0.8406896591186523,
      "learning_rate": 1.893998041136502e-06,
      "loss": 0.4226,
      "step": 3905
    },
    {
      "epoch": 0.9404683079516042,
      "grad_norm": 1.0460152626037598,
      "learning_rate": 1.8789238945528976e-06,
      "loss": 0.5012,
      "step": 3906
    },
    {
      "epoch": 0.9407090832480588,
      "grad_norm": 3.7730448246002197,
      "learning_rate": 1.8639094058046425e-06,
      "loss": 1.1832,
      "step": 3907
    },
    {
      "epoch": 0.9409498585445133,
      "grad_norm": 3.430011034011841,
      "learning_rate": 1.848954584020568e-06,
      "loss": 0.3868,
      "step": 3908
    },
    {
      "epoch": 0.9411906338409679,
      "grad_norm": 3.7194321155548096,
      "learning_rate": 1.834059438293234e-06,
      "loss": 0.8586,
      "step": 3909
    },
    {
      "epoch": 0.9414314091374225,
      "grad_norm": 1.3014260530471802,
      "learning_rate": 1.819223977678941e-06,
      "loss": 0.5398,
      "step": 3910
    },
    {
      "epoch": 0.9416721844338771,
      "grad_norm": 0.9454424381256104,
      "learning_rate": 1.8044482111976735e-06,
      "loss": 0.6537,
      "step": 3911
    },
    {
      "epoch": 0.9419129597303316,
      "grad_norm": 1.6029918193817139,
      "learning_rate": 1.7897321478331342e-06,
      "loss": 0.3148,
      "step": 3912
    },
    {
      "epoch": 0.9421537350267862,
      "grad_norm": 3.4292304515838623,
      "learning_rate": 1.7750757965327213e-06,
      "loss": 0.5388,
      "step": 3913
    },
    {
      "epoch": 0.9423945103232408,
      "grad_norm": 2.228184461593628,
      "learning_rate": 1.7604791662075181e-06,
      "loss": 0.7117,
      "step": 3914
    },
    {
      "epoch": 0.9426352856196955,
      "grad_norm": 0.6483622193336487,
      "learning_rate": 1.7459422657323254e-06,
      "loss": 0.4383,
      "step": 3915
    },
    {
      "epoch": 0.94287606091615,
      "grad_norm": 2.6446633338928223,
      "learning_rate": 1.7314651039455954e-06,
      "loss": 0.6101,
      "step": 3916
    },
    {
      "epoch": 0.9431168362126046,
      "grad_norm": 2.0396006107330322,
      "learning_rate": 1.717047689649487e-06,
      "loss": 0.5838,
      "step": 3917
    },
    {
      "epoch": 0.9433576115090592,
      "grad_norm": 0.8823184370994568,
      "learning_rate": 1.7026900316098215e-06,
      "loss": 0.3099,
      "step": 3918
    },
    {
      "epoch": 0.9435983868055138,
      "grad_norm": 2.7773776054382324,
      "learning_rate": 1.688392138556083e-06,
      "loss": 0.9675,
      "step": 3919
    },
    {
      "epoch": 0.9438391621019684,
      "grad_norm": 5.874734878540039,
      "learning_rate": 1.6741540191814287e-06,
      "loss": 0.3189,
      "step": 3920
    },
    {
      "epoch": 0.9440799373984229,
      "grad_norm": 3.0924699306488037,
      "learning_rate": 1.6599756821426449e-06,
      "loss": 0.1751,
      "step": 3921
    },
    {
      "epoch": 0.9443207126948775,
      "grad_norm": 0.5945261120796204,
      "learning_rate": 1.6458571360602248e-06,
      "loss": 0.6427,
      "step": 3922
    },
    {
      "epoch": 0.9445614879913321,
      "grad_norm": 4.261098861694336,
      "learning_rate": 1.6317983895182575e-06,
      "loss": 0.7257,
      "step": 3923
    },
    {
      "epoch": 0.9448022632877867,
      "grad_norm": 3.0185914039611816,
      "learning_rate": 1.6177994510644834e-06,
      "loss": 0.6177,
      "step": 3924
    },
    {
      "epoch": 0.9450430385842412,
      "grad_norm": 1.362781286239624,
      "learning_rate": 1.603860329210316e-06,
      "loss": 1.1531,
      "step": 3925
    },
    {
      "epoch": 0.9452838138806958,
      "grad_norm": 1.4825752973556519,
      "learning_rate": 1.589981032430743e-06,
      "loss": 0.7275,
      "step": 3926
    },
    {
      "epoch": 0.9455245891771504,
      "grad_norm": 1.4190683364868164,
      "learning_rate": 1.576161569164436e-06,
      "loss": 0.7995,
      "step": 3927
    },
    {
      "epoch": 0.945765364473605,
      "grad_norm": 1.807726263999939,
      "learning_rate": 1.5624019478136408e-06,
      "loss": 0.3198,
      "step": 3928
    },
    {
      "epoch": 0.9460061397700595,
      "grad_norm": 2.1518940925598145,
      "learning_rate": 1.5487021767442433e-06,
      "loss": 0.4084,
      "step": 3929
    },
    {
      "epoch": 0.9462469150665141,
      "grad_norm": 1.5749576091766357,
      "learning_rate": 1.535062264285736e-06,
      "loss": 0.6711,
      "step": 3930
    },
    {
      "epoch": 0.9464876903629688,
      "grad_norm": 0.5705631375312805,
      "learning_rate": 1.5214822187312294e-06,
      "loss": 0.3036,
      "step": 3931
    },
    {
      "epoch": 0.9467284656594234,
      "grad_norm": 0.7009626030921936,
      "learning_rate": 1.5079620483373857e-06,
      "loss": 0.5722,
      "step": 3932
    },
    {
      "epoch": 0.946969240955878,
      "grad_norm": 1.3820369243621826,
      "learning_rate": 1.4945017613245294e-06,
      "loss": 0.1829,
      "step": 3933
    },
    {
      "epoch": 0.9472100162523325,
      "grad_norm": 5.041447639465332,
      "learning_rate": 1.481101365876547e-06,
      "loss": 0.6447,
      "step": 3934
    },
    {
      "epoch": 0.9474507915487871,
      "grad_norm": 3.8214685916900635,
      "learning_rate": 1.4677608701408886e-06,
      "loss": 0.7699,
      "step": 3935
    },
    {
      "epoch": 0.9476915668452417,
      "grad_norm": 2.032578468322754,
      "learning_rate": 1.4544802822286318e-06,
      "loss": 0.5696,
      "step": 3936
    },
    {
      "epoch": 0.9479323421416963,
      "grad_norm": 0.9442195892333984,
      "learning_rate": 1.4412596102143738e-06,
      "loss": 0.2906,
      "step": 3937
    },
    {
      "epoch": 0.9481731174381508,
      "grad_norm": 1.667283535003662,
      "learning_rate": 1.42809886213634e-06,
      "loss": 0.4485,
      "step": 3938
    },
    {
      "epoch": 0.9484138927346054,
      "grad_norm": 2.698345184326172,
      "learning_rate": 1.4149980459962742e-06,
      "loss": 0.3603,
      "step": 3939
    },
    {
      "epoch": 0.94865466803106,
      "grad_norm": 1.8065334558486938,
      "learning_rate": 1.4019571697595156e-06,
      "loss": 0.2997,
      "step": 3940
    },
    {
      "epoch": 0.9488954433275146,
      "grad_norm": 2.3097822666168213,
      "learning_rate": 1.3889762413549333e-06,
      "loss": 0.5277,
      "step": 3941
    },
    {
      "epoch": 0.9491362186239691,
      "grad_norm": 2.765949249267578,
      "learning_rate": 1.3760552686749806e-06,
      "loss": 0.3425,
      "step": 3942
    },
    {
      "epoch": 0.9493769939204237,
      "grad_norm": 0.5022979378700256,
      "learning_rate": 1.3631942595756175e-06,
      "loss": 0.5907,
      "step": 3943
    },
    {
      "epoch": 0.9496177692168783,
      "grad_norm": 1.2168604135513306,
      "learning_rate": 1.3503932218763893e-06,
      "loss": 0.2498,
      "step": 3944
    },
    {
      "epoch": 0.949858544513333,
      "grad_norm": 0.7240424752235413,
      "learning_rate": 1.3376521633603256e-06,
      "loss": 0.2152,
      "step": 3945
    },
    {
      "epoch": 0.9500993198097876,
      "grad_norm": 5.824214935302734,
      "learning_rate": 1.324971091774052e-06,
      "loss": 0.7278,
      "step": 3946
    },
    {
      "epoch": 0.9503400951062421,
      "grad_norm": 2.0212886333465576,
      "learning_rate": 1.312350014827668e-06,
      "loss": 0.8276,
      "step": 3947
    },
    {
      "epoch": 0.9505808704026967,
      "grad_norm": 4.131972789764404,
      "learning_rate": 1.2997889401948126e-06,
      "loss": 0.4576,
      "step": 3948
    },
    {
      "epoch": 0.9508216456991513,
      "grad_norm": 0.5388569831848145,
      "learning_rate": 1.287287875512655e-06,
      "loss": 0.3775,
      "step": 3949
    },
    {
      "epoch": 0.9510624209956059,
      "grad_norm": 0.646866500377655,
      "learning_rate": 1.2748468283818815e-06,
      "loss": 0.2518,
      "step": 3950
    },
    {
      "epoch": 0.9513031962920604,
      "grad_norm": 1.9133360385894775,
      "learning_rate": 1.2624658063666639e-06,
      "loss": 0.6595,
      "step": 3951
    },
    {
      "epoch": 0.951543971588515,
      "grad_norm": 1.385985255241394,
      "learning_rate": 1.2501448169946916e-06,
      "loss": 0.6742,
      "step": 3952
    },
    {
      "epoch": 0.9517847468849696,
      "grad_norm": 2.3750483989715576,
      "learning_rate": 1.2378838677571503e-06,
      "loss": 0.9347,
      "step": 3953
    },
    {
      "epoch": 0.9520255221814242,
      "grad_norm": 2.106820583343506,
      "learning_rate": 1.2256829661087432e-06,
      "loss": 0.4332,
      "step": 3954
    },
    {
      "epoch": 0.9522662974778787,
      "grad_norm": 1.1812132596969604,
      "learning_rate": 1.2135421194676256e-06,
      "loss": 0.4027,
      "step": 3955
    },
    {
      "epoch": 0.9525070727743333,
      "grad_norm": 1.4752898216247559,
      "learning_rate": 1.2014613352154702e-06,
      "loss": 0.2301,
      "step": 3956
    },
    {
      "epoch": 0.9527478480707879,
      "grad_norm": 1.9427971839904785,
      "learning_rate": 1.189440620697424e-06,
      "loss": 0.6347,
      "step": 3957
    },
    {
      "epoch": 0.9529886233672425,
      "grad_norm": 2.3208911418914795,
      "learning_rate": 1.1774799832220961e-06,
      "loss": 0.9104,
      "step": 3958
    },
    {
      "epoch": 0.9532293986636972,
      "grad_norm": 1.113741397857666,
      "learning_rate": 1.1655794300615918e-06,
      "loss": 0.8192,
      "step": 3959
    },
    {
      "epoch": 0.9534701739601517,
      "grad_norm": 2.645212411880493,
      "learning_rate": 1.1537389684514787e-06,
      "loss": 0.7612,
      "step": 3960
    },
    {
      "epoch": 0.9537109492566063,
      "grad_norm": 4.026910781860352,
      "learning_rate": 1.141958605590765e-06,
      "loss": 0.8181,
      "step": 3961
    },
    {
      "epoch": 0.9539517245530609,
      "grad_norm": 3.301568031311035,
      "learning_rate": 1.1302383486419544e-06,
      "loss": 1.2559,
      "step": 3962
    },
    {
      "epoch": 0.9541924998495155,
      "grad_norm": 1.3520029783248901,
      "learning_rate": 1.11857820473098e-06,
      "loss": 0.6955,
      "step": 3963
    },
    {
      "epoch": 0.95443327514597,
      "grad_norm": 3.8628885746002197,
      "learning_rate": 1.106978180947238e-06,
      "loss": 0.9347,
      "step": 3964
    },
    {
      "epoch": 0.9546740504424246,
      "grad_norm": 1.2216380834579468,
      "learning_rate": 1.095438284343575e-06,
      "loss": 0.6449,
      "step": 3965
    },
    {
      "epoch": 0.9549148257388792,
      "grad_norm": 0.4715072214603424,
      "learning_rate": 1.083958521936257e-06,
      "loss": 0.2787,
      "step": 3966
    },
    {
      "epoch": 0.9551556010353338,
      "grad_norm": 2.9943132400512695,
      "learning_rate": 1.0725389007050446e-06,
      "loss": 0.8323,
      "step": 3967
    },
    {
      "epoch": 0.9553963763317883,
      "grad_norm": 3.0367226600646973,
      "learning_rate": 1.0611794275930399e-06,
      "loss": 0.8128,
      "step": 3968
    },
    {
      "epoch": 0.9556371516282429,
      "grad_norm": 1.5612680912017822,
      "learning_rate": 1.0498801095068733e-06,
      "loss": 0.2859,
      "step": 3969
    },
    {
      "epoch": 0.9558779269246975,
      "grad_norm": 1.2354720830917358,
      "learning_rate": 1.0386409533165276e-06,
      "loss": 0.6305,
      "step": 3970
    },
    {
      "epoch": 0.9561187022211521,
      "grad_norm": 3.0069570541381836,
      "learning_rate": 1.0274619658554475e-06,
      "loss": 0.6211,
      "step": 3971
    },
    {
      "epoch": 0.9563594775176067,
      "grad_norm": 2.1763761043548584,
      "learning_rate": 1.0163431539204847e-06,
      "loss": 0.7641,
      "step": 3972
    },
    {
      "epoch": 0.9566002528140612,
      "grad_norm": 2.459559202194214,
      "learning_rate": 1.005284524271899e-06,
      "loss": 0.822,
      "step": 3973
    },
    {
      "epoch": 0.9568410281105159,
      "grad_norm": 1.9608721733093262,
      "learning_rate": 9.942860836333445e-07,
      "loss": 0.8843,
      "step": 3974
    },
    {
      "epoch": 0.9570818034069705,
      "grad_norm": 2.488222599029541,
      "learning_rate": 9.833478386919282e-07,
      "loss": 0.9933,
      "step": 3975
    },
    {
      "epoch": 0.9573225787034251,
      "grad_norm": 1.750231146812439,
      "learning_rate": 9.724697960981077e-07,
      "loss": 0.3185,
      "step": 3976
    },
    {
      "epoch": 0.9575633539998796,
      "grad_norm": 1.300431489944458,
      "learning_rate": 9.616519624657706e-07,
      "loss": 0.4801,
      "step": 3977
    },
    {
      "epoch": 0.9578041292963342,
      "grad_norm": 0.6917396783828735,
      "learning_rate": 9.508943443721663e-07,
      "loss": 0.5667,
      "step": 3978
    },
    {
      "epoch": 0.9580449045927888,
      "grad_norm": 3.407341480255127,
      "learning_rate": 9.401969483579632e-07,
      "loss": 0.5509,
      "step": 3979
    },
    {
      "epoch": 0.9582856798892434,
      "grad_norm": 2.0416157245635986,
      "learning_rate": 9.295597809272028e-07,
      "loss": 0.1392,
      "step": 3980
    },
    {
      "epoch": 0.9585264551856979,
      "grad_norm": 2.7798619270324707,
      "learning_rate": 9.189828485473006e-07,
      "loss": 0.9261,
      "step": 3981
    },
    {
      "epoch": 0.9587672304821525,
      "grad_norm": 0.8763837218284607,
      "learning_rate": 9.084661576490461e-07,
      "loss": 0.398,
      "step": 3982
    },
    {
      "epoch": 0.9590080057786071,
      "grad_norm": 1.7817946672439575,
      "learning_rate": 8.980097146266464e-07,
      "loss": 0.4061,
      "step": 3983
    },
    {
      "epoch": 0.9592487810750617,
      "grad_norm": 1.229095458984375,
      "learning_rate": 8.876135258376051e-07,
      "loss": 0.4831,
      "step": 3984
    },
    {
      "epoch": 0.9594895563715162,
      "grad_norm": 3.4042162895202637,
      "learning_rate": 8.772775976028546e-07,
      "loss": 0.4386,
      "step": 3985
    },
    {
      "epoch": 0.9597303316679708,
      "grad_norm": 1.3494471311569214,
      "learning_rate": 8.670019362066461e-07,
      "loss": 0.1705,
      "step": 3986
    },
    {
      "epoch": 0.9599711069644254,
      "grad_norm": 1.1241267919540405,
      "learning_rate": 8.567865478966042e-07,
      "loss": 0.3146,
      "step": 3987
    },
    {
      "epoch": 0.9602118822608801,
      "grad_norm": 0.9588642120361328,
      "learning_rate": 8.466314388837271e-07,
      "loss": 0.2576,
      "step": 3988
    },
    {
      "epoch": 0.9604526575573347,
      "grad_norm": 0.49164265394210815,
      "learning_rate": 8.365366153423204e-07,
      "loss": 0.2641,
      "step": 3989
    },
    {
      "epoch": 0.9606934328537892,
      "grad_norm": 1.6935783624649048,
      "learning_rate": 8.265020834100635e-07,
      "loss": 0.5948,
      "step": 3990
    },
    {
      "epoch": 0.9609342081502438,
      "grad_norm": 1.3072270154953003,
      "learning_rate": 8.165278491879868e-07,
      "loss": 0.5994,
      "step": 3991
    },
    {
      "epoch": 0.9611749834466984,
      "grad_norm": 4.324315071105957,
      "learning_rate": 8.066139187404398e-07,
      "loss": 0.4573,
      "step": 3992
    },
    {
      "epoch": 0.961415758743153,
      "grad_norm": 6.5658087730407715,
      "learning_rate": 7.967602980951228e-07,
      "loss": 0.6555,
      "step": 3993
    },
    {
      "epoch": 0.9616565340396075,
      "grad_norm": 2.510852813720703,
      "learning_rate": 7.869669932430435e-07,
      "loss": 0.617,
      "step": 3994
    },
    {
      "epoch": 0.9618973093360621,
      "grad_norm": 1.054416298866272,
      "learning_rate": 7.772340101385611e-07,
      "loss": 0.7014,
      "step": 3995
    },
    {
      "epoch": 0.9621380846325167,
      "grad_norm": 3.1223275661468506,
      "learning_rate": 7.675613546993643e-07,
      "loss": 0.7056,
      "step": 3996
    },
    {
      "epoch": 0.9623788599289713,
      "grad_norm": 2.15596866607666,
      "learning_rate": 7.579490328064265e-07,
      "loss": 0.7612,
      "step": 3997
    },
    {
      "epoch": 0.9626196352254258,
      "grad_norm": 3.02179217338562,
      "learning_rate": 7.483970503040726e-07,
      "loss": 0.6353,
      "step": 3998
    },
    {
      "epoch": 0.9628604105218804,
      "grad_norm": 1.420333743095398,
      "learning_rate": 7.38905412999924e-07,
      "loss": 0.3904,
      "step": 3999
    },
    {
      "epoch": 0.963101185818335,
      "grad_norm": 2.189934253692627,
      "learning_rate": 7.294741266649307e-07,
      "loss": 0.4709,
      "step": 4000
    },
    {
      "epoch": 0.9633419611147896,
      "grad_norm": 3.9134743213653564,
      "learning_rate": 7.201031970333283e-07,
      "loss": 0.3967,
      "step": 4001
    },
    {
      "epoch": 0.9635827364112443,
      "grad_norm": 14.885796546936035,
      "learning_rate": 7.10792629802659e-07,
      "loss": 0.9829,
      "step": 4002
    },
    {
      "epoch": 0.9638235117076988,
      "grad_norm": 2.1734344959259033,
      "learning_rate": 7.015424306337725e-07,
      "loss": 0.3751,
      "step": 4003
    },
    {
      "epoch": 0.9640642870041534,
      "grad_norm": 2.0911247730255127,
      "learning_rate": 6.923526051508145e-07,
      "loss": 0.7239,
      "step": 4004
    },
    {
      "epoch": 0.964305062300608,
      "grad_norm": 2.525022029876709,
      "learning_rate": 6.832231589412042e-07,
      "loss": 0.2855,
      "step": 4005
    },
    {
      "epoch": 0.9645458375970626,
      "grad_norm": 1.239410161972046,
      "learning_rate": 6.741540975556903e-07,
      "loss": 0.3171,
      "step": 4006
    },
    {
      "epoch": 0.9647866128935171,
      "grad_norm": 1.3856205940246582,
      "learning_rate": 6.651454265082512e-07,
      "loss": 0.5041,
      "step": 4007
    },
    {
      "epoch": 0.9650273881899717,
      "grad_norm": 1.1256098747253418,
      "learning_rate": 6.561971512762055e-07,
      "loss": 0.4639,
      "step": 4008
    },
    {
      "epoch": 0.9652681634864263,
      "grad_norm": 1.6934860944747925,
      "learning_rate": 6.473092773001233e-07,
      "loss": 0.488,
      "step": 4009
    },
    {
      "epoch": 0.9655089387828809,
      "grad_norm": 3.070348024368286,
      "learning_rate": 6.384818099838374e-07,
      "loss": 0.9032,
      "step": 4010
    },
    {
      "epoch": 0.9657497140793354,
      "grad_norm": 1.458402395248413,
      "learning_rate": 6.297147546944882e-07,
      "loss": 0.4057,
      "step": 4011
    },
    {
      "epoch": 0.96599048937579,
      "grad_norm": 2.034212589263916,
      "learning_rate": 6.210081167624338e-07,
      "loss": 0.265,
      "step": 4012
    },
    {
      "epoch": 0.9662312646722446,
      "grad_norm": 1.1797361373901367,
      "learning_rate": 6.12361901481362e-07,
      "loss": 0.6248,
      "step": 4013
    },
    {
      "epoch": 0.9664720399686992,
      "grad_norm": 4.208076000213623,
      "learning_rate": 6.037761141081677e-07,
      "loss": 0.8388,
      "step": 4014
    },
    {
      "epoch": 0.9667128152651538,
      "grad_norm": 3.974991798400879,
      "learning_rate": 5.952507598630419e-07,
      "loss": 0.8926,
      "step": 4015
    },
    {
      "epoch": 0.9669535905616083,
      "grad_norm": 1.3050296306610107,
      "learning_rate": 5.86785843929416e-07,
      "loss": 0.438,
      "step": 4016
    },
    {
      "epoch": 0.967194365858063,
      "grad_norm": 4.134682655334473,
      "learning_rate": 5.783813714539731e-07,
      "loss": 1.4216,
      "step": 4017
    },
    {
      "epoch": 0.9674351411545176,
      "grad_norm": 1.5222718715667725,
      "learning_rate": 5.700373475466592e-07,
      "loss": 0.5619,
      "step": 4018
    },
    {
      "epoch": 0.9676759164509722,
      "grad_norm": 1.8690755367279053,
      "learning_rate": 5.617537772806602e-07,
      "loss": 0.45,
      "step": 4019
    },
    {
      "epoch": 0.9679166917474267,
      "grad_norm": 2.2518856525421143,
      "learning_rate": 5.535306656923922e-07,
      "loss": 0.2202,
      "step": 4020
    },
    {
      "epoch": 0.9681574670438813,
      "grad_norm": 0.996590793132782,
      "learning_rate": 5.453680177815445e-07,
      "loss": 0.599,
      "step": 4021
    },
    {
      "epoch": 0.9683982423403359,
      "grad_norm": 2.219210624694824,
      "learning_rate": 5.372658385110141e-07,
      "loss": 0.4227,
      "step": 4022
    },
    {
      "epoch": 0.9686390176367905,
      "grad_norm": 4.723870754241943,
      "learning_rate": 5.29224132806938e-07,
      "loss": 0.6357,
      "step": 4023
    },
    {
      "epoch": 0.968879792933245,
      "grad_norm": 3.3653030395507812,
      "learning_rate": 5.212429055587165e-07,
      "loss": 0.5787,
      "step": 4024
    },
    {
      "epoch": 0.9691205682296996,
      "grad_norm": 2.3071415424346924,
      "learning_rate": 5.133221616189232e-07,
      "loss": 0.6784,
      "step": 4025
    },
    {
      "epoch": 0.9693613435261542,
      "grad_norm": 2.037489414215088,
      "learning_rate": 5.054619058033949e-07,
      "loss": 0.7301,
      "step": 4026
    },
    {
      "epoch": 0.9696021188226088,
      "grad_norm": 0.8913125991821289,
      "learning_rate": 4.976621428912087e-07,
      "loss": 0.3828,
      "step": 4027
    },
    {
      "epoch": 0.9698428941190634,
      "grad_norm": 2.0508530139923096,
      "learning_rate": 4.899228776246157e-07,
      "loss": 0.4969,
      "step": 4028
    },
    {
      "epoch": 0.9700836694155179,
      "grad_norm": 2.9134552478790283,
      "learning_rate": 4.822441147091072e-07,
      "loss": 0.4914,
      "step": 4029
    },
    {
      "epoch": 0.9703244447119725,
      "grad_norm": 1.360295295715332,
      "learning_rate": 4.7462585881339337e-07,
      "loss": 0.5586,
      "step": 4030
    },
    {
      "epoch": 0.9705652200084272,
      "grad_norm": 0.46363896131515503,
      "learning_rate": 4.6706811456939116e-07,
      "loss": 0.1493,
      "step": 4031
    },
    {
      "epoch": 0.9708059953048818,
      "grad_norm": 1.6321947574615479,
      "learning_rate": 4.595708865722359e-07,
      "loss": 0.7034,
      "step": 4032
    },
    {
      "epoch": 0.9710467706013363,
      "grad_norm": 3.976177930831909,
      "learning_rate": 4.5213417938023693e-07,
      "loss": 0.6017,
      "step": 4033
    },
    {
      "epoch": 0.9712875458977909,
      "grad_norm": 3.0852105617523193,
      "learning_rate": 4.4475799751494405e-07,
      "loss": 0.8562,
      "step": 4034
    },
    {
      "epoch": 0.9715283211942455,
      "grad_norm": 1.4149786233901978,
      "learning_rate": 4.374423454610921e-07,
      "loss": 0.331,
      "step": 4035
    },
    {
      "epoch": 0.9717690964907001,
      "grad_norm": 3.825847625732422,
      "learning_rate": 4.3018722766661193e-07,
      "loss": 0.7702,
      "step": 4036
    },
    {
      "epoch": 0.9720098717871546,
      "grad_norm": 5.9385175704956055,
      "learning_rate": 4.2299264854263056e-07,
      "loss": 0.4582,
      "step": 4037
    },
    {
      "epoch": 0.9722506470836092,
      "grad_norm": 2.5086779594421387,
      "learning_rate": 4.1585861246346e-07,
      "loss": 0.4805,
      "step": 4038
    },
    {
      "epoch": 0.9724914223800638,
      "grad_norm": 1.3712728023529053,
      "learning_rate": 4.087851237666196e-07,
      "loss": 0.4194,
      "step": 4039
    },
    {
      "epoch": 0.9727321976765184,
      "grad_norm": 4.936484336853027,
      "learning_rate": 4.017721867528246e-07,
      "loss": 0.4498,
      "step": 4040
    },
    {
      "epoch": 0.972972972972973,
      "grad_norm": 1.3534749746322632,
      "learning_rate": 3.948198056859198e-07,
      "loss": 0.6054,
      "step": 4041
    },
    {
      "epoch": 0.9732137482694275,
      "grad_norm": 3.1654248237609863,
      "learning_rate": 3.8792798479299066e-07,
      "loss": 0.8157,
      "step": 4042
    },
    {
      "epoch": 0.9734545235658821,
      "grad_norm": 1.9799362421035767,
      "learning_rate": 3.810967282642741e-07,
      "loss": 0.5287,
      "step": 4043
    },
    {
      "epoch": 0.9736952988623367,
      "grad_norm": 0.713421642780304,
      "learning_rate": 3.743260402531923e-07,
      "loss": 0.4187,
      "step": 4044
    },
    {
      "epoch": 0.9739360741587914,
      "grad_norm": 2.7310409545898438,
      "learning_rate": 3.676159248763411e-07,
      "loss": 0.8605,
      "step": 4045
    },
    {
      "epoch": 0.9741768494552459,
      "grad_norm": 0.2964976131916046,
      "learning_rate": 3.6096638621346824e-07,
      "loss": 0.176,
      "step": 4046
    },
    {
      "epoch": 0.9744176247517005,
      "grad_norm": 4.847579002380371,
      "learning_rate": 3.543774283075396e-07,
      "loss": 0.6553,
      "step": 4047
    },
    {
      "epoch": 0.9746584000481551,
      "grad_norm": 1.6431396007537842,
      "learning_rate": 3.478490551646285e-07,
      "loss": 0.5625,
      "step": 4048
    },
    {
      "epoch": 0.9748991753446097,
      "grad_norm": 1.354458212852478,
      "learning_rate": 3.413812707540154e-07,
      "loss": 0.3545,
      "step": 4049
    },
    {
      "epoch": 0.9751399506410642,
      "grad_norm": 0.7734440565109253,
      "learning_rate": 3.3497407900812126e-07,
      "loss": 0.3589,
      "step": 4050
    },
    {
      "epoch": 0.9753807259375188,
      "grad_norm": 2.87133526802063,
      "learning_rate": 3.2862748382253006e-07,
      "loss": 0.2974,
      "step": 4051
    },
    {
      "epoch": 0.9756215012339734,
      "grad_norm": 8.928435325622559,
      "learning_rate": 3.223414890559995e-07,
      "loss": 0.4755,
      "step": 4052
    },
    {
      "epoch": 0.975862276530428,
      "grad_norm": 3.5270206928253174,
      "learning_rate": 3.161160985304168e-07,
      "loss": 0.4525,
      "step": 4053
    },
    {
      "epoch": 0.9761030518268826,
      "grad_norm": 1.8198820352554321,
      "learning_rate": 3.0995131603083205e-07,
      "loss": 0.7032,
      "step": 4054
    },
    {
      "epoch": 0.9763438271233371,
      "grad_norm": 2.2425918579101562,
      "learning_rate": 3.038471453054581e-07,
      "loss": 0.8367,
      "step": 4055
    },
    {
      "epoch": 0.9765846024197917,
      "grad_norm": 2.4997448921203613,
      "learning_rate": 2.978035900656373e-07,
      "loss": 0.7143,
      "step": 4056
    },
    {
      "epoch": 0.9768253777162463,
      "grad_norm": 2.2880537509918213,
      "learning_rate": 2.918206539858637e-07,
      "loss": 0.5019,
      "step": 4057
    },
    {
      "epoch": 0.9770661530127009,
      "grad_norm": 0.7689948081970215,
      "learning_rate": 2.8589834070378295e-07,
      "loss": 0.2252,
      "step": 4058
    },
    {
      "epoch": 0.9773069283091554,
      "grad_norm": 1.0523358583450317,
      "learning_rate": 2.800366538201593e-07,
      "loss": 0.3874,
      "step": 4059
    },
    {
      "epoch": 0.97754770360561,
      "grad_norm": 2.757550001144409,
      "learning_rate": 2.742355968989307e-07,
      "loss": 0.3381,
      "step": 4060
    },
    {
      "epoch": 0.9777884789020647,
      "grad_norm": 0.9217396378517151,
      "learning_rate": 2.684951734671426e-07,
      "loss": 0.1354,
      "step": 4061
    },
    {
      "epoch": 0.9780292541985193,
      "grad_norm": 0.9151739478111267,
      "learning_rate": 2.6281538701498075e-07,
      "loss": 0.4916,
      "step": 4062
    },
    {
      "epoch": 0.9782700294949738,
      "grad_norm": 1.2322125434875488,
      "learning_rate": 2.571962409957718e-07,
      "loss": 0.5954,
      "step": 4063
    },
    {
      "epoch": 0.9785108047914284,
      "grad_norm": 6.46744441986084,
      "learning_rate": 2.5163773882598274e-07,
      "loss": 0.917,
      "step": 4064
    },
    {
      "epoch": 0.978751580087883,
      "grad_norm": 2.882272958755493,
      "learning_rate": 2.4613988388517696e-07,
      "loss": 0.7739,
      "step": 4065
    },
    {
      "epoch": 0.9789923553843376,
      "grad_norm": 1.9530011415481567,
      "learning_rate": 2.407026795160694e-07,
      "loss": 0.8486,
      "step": 4066
    },
    {
      "epoch": 0.9792331306807921,
      "grad_norm": 2.2306883335113525,
      "learning_rate": 2.3532612902449346e-07,
      "loss": 0.5382,
      "step": 4067
    },
    {
      "epoch": 0.9794739059772467,
      "grad_norm": 2.601823568344116,
      "learning_rate": 2.3001023567941205e-07,
      "loss": 0.7243,
      "step": 4068
    },
    {
      "epoch": 0.9797146812737013,
      "grad_norm": 1.1144752502441406,
      "learning_rate": 2.247550027128842e-07,
      "loss": 0.8548,
      "step": 4069
    },
    {
      "epoch": 0.9799554565701559,
      "grad_norm": 10.666824340820312,
      "learning_rate": 2.1956043332010955e-07,
      "loss": 0.9193,
      "step": 4070
    },
    {
      "epoch": 0.9801962318666105,
      "grad_norm": 1.2598254680633545,
      "learning_rate": 2.144265306594062e-07,
      "loss": 0.3549,
      "step": 4071
    },
    {
      "epoch": 0.980437007163065,
      "grad_norm": 2.1722021102905273,
      "learning_rate": 2.093532978521884e-07,
      "loss": 0.5457,
      "step": 4072
    },
    {
      "epoch": 0.9806777824595196,
      "grad_norm": 3.0609018802642822,
      "learning_rate": 2.0434073798298869e-07,
      "loss": 0.9473,
      "step": 4073
    },
    {
      "epoch": 0.9809185577559743,
      "grad_norm": 1.4855046272277832,
      "learning_rate": 1.9938885409948038e-07,
      "loss": 0.428,
      "step": 4074
    },
    {
      "epoch": 0.9811593330524289,
      "grad_norm": 2.298407793045044,
      "learning_rate": 1.9449764921238845e-07,
      "loss": 1.0341,
      "step": 4075
    },
    {
      "epoch": 0.9814001083488834,
      "grad_norm": 1.82069730758667,
      "learning_rate": 1.8966712629558957e-07,
      "loss": 0.9768,
      "step": 4076
    },
    {
      "epoch": 0.981640883645338,
      "grad_norm": 3.1224253177642822,
      "learning_rate": 1.848972882860567e-07,
      "loss": 0.6785,
      "step": 4077
    },
    {
      "epoch": 0.9818816589417926,
      "grad_norm": 3.6556875705718994,
      "learning_rate": 1.8018813808385883e-07,
      "loss": 0.6481,
      "step": 4078
    },
    {
      "epoch": 0.9821224342382472,
      "grad_norm": 1.220012903213501,
      "learning_rate": 1.7553967855217235e-07,
      "loss": 0.9078,
      "step": 4079
    },
    {
      "epoch": 0.9823632095347017,
      "grad_norm": 0.8602136373519897,
      "learning_rate": 1.7095191251726982e-07,
      "loss": 0.0683,
      "step": 4080
    },
    {
      "epoch": 0.9826039848311563,
      "grad_norm": 2.155679941177368,
      "learning_rate": 1.6642484276852e-07,
      "loss": 0.7771,
      "step": 4081
    },
    {
      "epoch": 0.9828447601276109,
      "grad_norm": 2.459348440170288,
      "learning_rate": 1.6195847205838777e-07,
      "loss": 1.0204,
      "step": 4082
    },
    {
      "epoch": 0.9830855354240655,
      "grad_norm": 1.9554654359817505,
      "learning_rate": 1.5755280310244536e-07,
      "loss": 0.4039,
      "step": 4083
    },
    {
      "epoch": 0.9833263107205201,
      "grad_norm": 1.885136604309082,
      "learning_rate": 1.5320783857935005e-07,
      "loss": 0.4138,
      "step": 4084
    },
    {
      "epoch": 0.9835670860169746,
      "grad_norm": 1.192893385887146,
      "learning_rate": 1.4892358113084426e-07,
      "loss": 0.5029,
      "step": 4085
    },
    {
      "epoch": 0.9838078613134292,
      "grad_norm": 3.7182071208953857,
      "learning_rate": 1.447000333617665e-07,
      "loss": 1.1002,
      "step": 4086
    },
    {
      "epoch": 0.9840486366098838,
      "grad_norm": 1.4601658582687378,
      "learning_rate": 1.405371978400516e-07,
      "loss": 0.5054,
      "step": 4087
    },
    {
      "epoch": 0.9842894119063385,
      "grad_norm": 2.312633752822876,
      "learning_rate": 1.3643507709669713e-07,
      "loss": 0.4722,
      "step": 4088
    },
    {
      "epoch": 0.984530187202793,
      "grad_norm": 0.7593234181404114,
      "learning_rate": 1.3239367362581912e-07,
      "loss": 0.1804,
      "step": 4089
    },
    {
      "epoch": 0.9847709624992476,
      "grad_norm": 2.778722047805786,
      "learning_rate": 1.284129898845854e-07,
      "loss": 0.6931,
      "step": 4090
    },
    {
      "epoch": 0.9850117377957022,
      "grad_norm": 3.4330999851226807,
      "learning_rate": 1.2449302829327102e-07,
      "loss": 1.1589,
      "step": 4091
    },
    {
      "epoch": 0.9852525130921568,
      "grad_norm": 2.7237799167633057,
      "learning_rate": 1.20633791235214e-07,
      "loss": 0.58,
      "step": 4092
    },
    {
      "epoch": 0.9854932883886113,
      "grad_norm": 1.8853704929351807,
      "learning_rate": 1.1683528105684848e-07,
      "loss": 0.9436,
      "step": 4093
    },
    {
      "epoch": 0.9857340636850659,
      "grad_norm": 1.500649094581604,
      "learning_rate": 1.130975000676715e-07,
      "loss": 0.8359,
      "step": 4094
    },
    {
      "epoch": 0.9859748389815205,
      "grad_norm": 2.9082491397857666,
      "learning_rate": 1.0942045054025407e-07,
      "loss": 1.1474,
      "step": 4095
    },
    {
      "epoch": 0.9862156142779751,
      "grad_norm": 0.6749841570854187,
      "learning_rate": 1.058041347102634e-07,
      "loss": 0.5816,
      "step": 4096
    },
    {
      "epoch": 0.9864563895744297,
      "grad_norm": 1.8493642807006836,
      "learning_rate": 1.0224855477642959e-07,
      "loss": 1.1293,
      "step": 4097
    },
    {
      "epoch": 0.9866971648708842,
      "grad_norm": 0.32945817708969116,
      "learning_rate": 9.875371290053447e-08,
      "loss": 0.3363,
      "step": 4098
    },
    {
      "epoch": 0.9869379401673388,
      "grad_norm": 1.6765530109405518,
      "learning_rate": 9.531961120746724e-08,
      "loss": 0.892,
      "step": 4099
    },
    {
      "epoch": 0.9871787154637934,
      "grad_norm": 2.7227275371551514,
      "learning_rate": 9.19462517851688e-08,
      "loss": 0.8791,
      "step": 4100
    },
    {
      "epoch": 0.987419490760248,
      "grad_norm": 0.8839995265007019,
      "learning_rate": 8.863363668464297e-08,
      "loss": 0.5598,
      "step": 4101
    },
    {
      "epoch": 0.9876602660567025,
      "grad_norm": 1.8077160120010376,
      "learning_rate": 8.538176791996754e-08,
      "loss": 0.3711,
      "step": 4102
    },
    {
      "epoch": 0.9879010413531571,
      "grad_norm": 2.301443338394165,
      "learning_rate": 8.21906474682943e-08,
      "loss": 1.0025,
      "step": 4103
    },
    {
      "epoch": 0.9881418166496118,
      "grad_norm": 1.6749955415725708,
      "learning_rate": 7.906027726981568e-08,
      "loss": 1.0008,
      "step": 4104
    },
    {
      "epoch": 0.9883825919460664,
      "grad_norm": 2.291646957397461,
      "learning_rate": 7.599065922780924e-08,
      "loss": 0.4288,
      "step": 4105
    },
    {
      "epoch": 0.9886233672425209,
      "grad_norm": 3.5161843299865723,
      "learning_rate": 7.298179520862647e-08,
      "loss": 0.8569,
      "step": 4106
    },
    {
      "epoch": 0.9888641425389755,
      "grad_norm": 0.49535292387008667,
      "learning_rate": 7.003368704164847e-08,
      "loss": 0.3556,
      "step": 4107
    },
    {
      "epoch": 0.9891049178354301,
      "grad_norm": 2.1845948696136475,
      "learning_rate": 6.714633651931923e-08,
      "loss": 0.4408,
      "step": 4108
    },
    {
      "epoch": 0.9893456931318847,
      "grad_norm": 2.3166656494140625,
      "learning_rate": 6.431974539717888e-08,
      "loss": 0.4087,
      "step": 4109
    },
    {
      "epoch": 0.9895864684283393,
      "grad_norm": 0.6833885312080383,
      "learning_rate": 6.155391539379718e-08,
      "loss": 0.362,
      "step": 4110
    },
    {
      "epoch": 0.9898272437247938,
      "grad_norm": 1.6554492712020874,
      "learning_rate": 5.884884819079561e-08,
      "loss": 0.4277,
      "step": 4111
    },
    {
      "epoch": 0.9900680190212484,
      "grad_norm": 1.1396666765213013,
      "learning_rate": 5.620454543285858e-08,
      "loss": 0.5649,
      "step": 4112
    },
    {
      "epoch": 0.990308794317703,
      "grad_norm": 2.3340067863464355,
      "learning_rate": 5.362100872773334e-08,
      "loss": 0.5773,
      "step": 4113
    },
    {
      "epoch": 0.9905495696141576,
      "grad_norm": 2.0202200412750244,
      "learning_rate": 5.109823964621896e-08,
      "loss": 0.5604,
      "step": 4114
    },
    {
      "epoch": 0.9907903449106121,
      "grad_norm": 1.5799890756607056,
      "learning_rate": 4.863623972216624e-08,
      "loss": 0.5799,
      "step": 4115
    },
    {
      "epoch": 0.9910311202070667,
      "grad_norm": 3.0115244388580322,
      "learning_rate": 4.62350104524778e-08,
      "loss": 0.631,
      "step": 4116
    },
    {
      "epoch": 0.9912718955035213,
      "grad_norm": 1.998792290687561,
      "learning_rate": 4.3894553297085805e-08,
      "loss": 0.8206,
      "step": 4117
    },
    {
      "epoch": 0.991512670799976,
      "grad_norm": 1.306921124458313,
      "learning_rate": 4.161486967901862e-08,
      "loss": 0.6584,
      "step": 4118
    },
    {
      "epoch": 0.9917534460964305,
      "grad_norm": 2.2372684478759766,
      "learning_rate": 3.9395960984323076e-08,
      "loss": 0.4433,
      "step": 4119
    },
    {
      "epoch": 0.9919942213928851,
      "grad_norm": 1.8838356733322144,
      "learning_rate": 3.723782856208669e-08,
      "loss": 0.7484,
      "step": 4120
    },
    {
      "epoch": 0.9922349966893397,
      "grad_norm": 0.9679247140884399,
      "learning_rate": 3.5140473724482034e-08,
      "loss": 0.1638,
      "step": 4121
    },
    {
      "epoch": 0.9924757719857943,
      "grad_norm": 1.0013998746871948,
      "learning_rate": 3.3103897746689097e-08,
      "loss": 0.2034,
      "step": 4122
    },
    {
      "epoch": 0.9927165472822489,
      "grad_norm": 1.7386034727096558,
      "learning_rate": 3.11281018669507e-08,
      "loss": 0.6798,
      "step": 4123
    },
    {
      "epoch": 0.9929573225787034,
      "grad_norm": 1.7022814750671387,
      "learning_rate": 2.921308728656147e-08,
      "loss": 0.9177,
      "step": 4124
    },
    {
      "epoch": 0.993198097875158,
      "grad_norm": 1.6356173753738403,
      "learning_rate": 2.7358855169845598e-08,
      "loss": 0.397,
      "step": 4125
    },
    {
      "epoch": 0.9934388731716126,
      "grad_norm": 2.2541160583496094,
      "learning_rate": 2.556540664419016e-08,
      "loss": 0.3977,
      "step": 4126
    },
    {
      "epoch": 0.9936796484680672,
      "grad_norm": 4.046707630157471,
      "learning_rate": 2.38327428000118e-08,
      "loss": 0.4311,
      "step": 4127
    },
    {
      "epoch": 0.9939204237645217,
      "grad_norm": 3.0581321716308594,
      "learning_rate": 2.216086469077894e-08,
      "loss": 1.07,
      "step": 4128
    },
    {
      "epoch": 0.9941611990609763,
      "grad_norm": 1.8278604745864868,
      "learning_rate": 2.0549773332989575e-08,
      "loss": 0.2437,
      "step": 4129
    },
    {
      "epoch": 0.9944019743574309,
      "grad_norm": 1.3549528121948242,
      "learning_rate": 1.8999469706193484e-08,
      "loss": 0.3801,
      "step": 4130
    },
    {
      "epoch": 0.9946427496538855,
      "grad_norm": 2.3834619522094727,
      "learning_rate": 1.750995475299222e-08,
      "loss": 0.6792,
      "step": 4131
    },
    {
      "epoch": 0.99488352495034,
      "grad_norm": 2.10779070854187,
      "learning_rate": 1.6081229378983598e-08,
      "loss": 0.4779,
      "step": 4132
    },
    {
      "epoch": 0.9951243002467947,
      "grad_norm": 3.692774534225464,
      "learning_rate": 1.4713294452861626e-08,
      "loss": 0.9011,
      "step": 4133
    },
    {
      "epoch": 0.9953650755432493,
      "grad_norm": 1.1384838819503784,
      "learning_rate": 1.3406150806327678e-08,
      "loss": 0.4314,
      "step": 4134
    },
    {
      "epoch": 0.9956058508397039,
      "grad_norm": 1.2060233354568481,
      "learning_rate": 1.2159799234134905e-08,
      "loss": 0.4898,
      "step": 4135
    },
    {
      "epoch": 0.9958466261361584,
      "grad_norm": 2.4775209426879883,
      "learning_rate": 1.097424049404383e-08,
      "loss": 0.4144,
      "step": 4136
    },
    {
      "epoch": 0.996087401432613,
      "grad_norm": 6.3312177658081055,
      "learning_rate": 9.849475306900058e-09,
      "loss": 0.9015,
      "step": 4137
    },
    {
      "epoch": 0.9963281767290676,
      "grad_norm": 1.8564362525939941,
      "learning_rate": 8.785504356556563e-09,
      "loss": 0.3928,
      "step": 4138
    },
    {
      "epoch": 0.9965689520255222,
      "grad_norm": 1.6765766143798828,
      "learning_rate": 7.782328289906992e-09,
      "loss": 1.0499,
      "step": 4139
    },
    {
      "epoch": 0.9968097273219768,
      "grad_norm": 2.188523054122925,
      "learning_rate": 6.839947716885675e-09,
      "loss": 0.9049,
      "step": 4140
    },
    {
      "epoch": 0.9970505026184313,
      "grad_norm": 0.7010088562965393,
      "learning_rate": 5.95836321046761e-09,
      "loss": 0.4464,
      "step": 4141
    },
    {
      "epoch": 0.9972912779148859,
      "grad_norm": 1.611911416053772,
      "learning_rate": 5.137575306646269e-09,
      "loss": 0.3036,
      "step": 4142
    },
    {
      "epoch": 0.9975320532113405,
      "grad_norm": 2.555997371673584,
      "learning_rate": 4.377584504478005e-09,
      "loss": 0.4531,
      "step": 4143
    },
    {
      "epoch": 0.9977728285077951,
      "grad_norm": 1.4258787631988525,
      "learning_rate": 3.6783912660265372e-09,
      "loss": 0.4331,
      "step": 4144
    },
    {
      "epoch": 0.9980136038042496,
      "grad_norm": 1.0815415382385254,
      "learning_rate": 3.039996016407365e-09,
      "loss": 0.3503,
      "step": 4145
    },
    {
      "epoch": 0.9982543791007042,
      "grad_norm": 4.800786972045898,
      "learning_rate": 2.4623991437766614e-09,
      "loss": 0.6749,
      "step": 4146
    },
    {
      "epoch": 0.9984951543971589,
      "grad_norm": 1.161253809928894,
      "learning_rate": 1.9456009992979696e-09,
      "loss": 0.5973,
      "step": 4147
    },
    {
      "epoch": 0.9987359296936135,
      "grad_norm": 3.008234739303589,
      "learning_rate": 1.48960189718661e-09,
      "loss": 0.9873,
      "step": 4148
    },
    {
      "epoch": 0.998976704990068,
      "grad_norm": 1.616468071937561,
      "learning_rate": 1.0944021146985784e-09,
      "loss": 0.5877,
      "step": 4149
    },
    {
      "epoch": 0.9992174802865226,
      "grad_norm": 1.8020235300064087,
      "learning_rate": 7.60001892119444e-10,
      "loss": 0.6324,
      "step": 4150
    },
    {
      "epoch": 0.9994582555829772,
      "grad_norm": 2.7583658695220947,
      "learning_rate": 4.864014327532474e-10,
      "loss": 0.8355,
      "step": 4151
    },
    {
      "epoch": 0.9996990308794318,
      "grad_norm": 1.8322501182556152,
      "learning_rate": 2.7360090296690846e-10,
      "loss": 0.706,
      "step": 4152
    },
    {
      "epoch": 0.9999398061758864,
      "grad_norm": 1.8930530548095703,
      "learning_rate": 1.2160043212361417e-10,
      "loss": 0.6687,
      "step": 4153
    },
    {
      "epoch": 1.0,
      "grad_norm": 5.636228084564209,
      "learning_rate": 3.0400112649431325e-11,
      "loss": 0.9409,
      "step": 4154
    },
    {
      "epoch": 1.0,
      "step": 4154,
      "total_flos": 7.629858860247867e+17,
      "train_loss": 0.8297393302427364,
      "train_runtime": 10162.4154,
      "train_samples_per_second": 3.269,
      "train_steps_per_second": 0.409
    }
  ],
  "logging_steps": 1,
  "max_steps": 4154,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 2400000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 7.629858860247867e+17,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}