{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 6750,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00014814814814814815,
      "grad_norm": 2.1235709190368652,
      "learning_rate": 4e-05,
      "loss": 2.8688,
      "step": 1
    },
    {
      "epoch": 0.0002962962962962963,
      "grad_norm": 3.357743978500366,
      "learning_rate": 8e-05,
      "loss": 2.6483,
      "step": 2
    },
    {
      "epoch": 0.00044444444444444447,
      "grad_norm": 2.011920213699341,
      "learning_rate": 0.00012,
      "loss": 2.696,
      "step": 3
    },
    {
      "epoch": 0.0005925925925925926,
      "grad_norm": 3.150110960006714,
      "learning_rate": 0.00016,
      "loss": 2.9999,
      "step": 4
    },
    {
      "epoch": 0.0007407407407407407,
      "grad_norm": 2.520505905151367,
      "learning_rate": 0.0002,
      "loss": 2.0684,
      "step": 5
    },
    {
      "epoch": 0.0008888888888888889,
      "grad_norm": 1.2569918632507324,
      "learning_rate": 0.00019997034840622683,
      "loss": 2.1813,
      "step": 6
    },
    {
      "epoch": 0.001037037037037037,
      "grad_norm": 1.747957706451416,
      "learning_rate": 0.0001999406968124537,
      "loss": 2.0401,
      "step": 7
    },
    {
      "epoch": 0.0011851851851851852,
      "grad_norm": 1.1225775480270386,
      "learning_rate": 0.00019991104521868052,
      "loss": 1.7564,
      "step": 8
    },
    {
      "epoch": 0.0013333333333333333,
      "grad_norm": 1.16183340549469,
      "learning_rate": 0.00019988139362490733,
      "loss": 1.7111,
      "step": 9
    },
    {
      "epoch": 0.0014814814814814814,
      "grad_norm": 1.3514748811721802,
      "learning_rate": 0.0001998517420311342,
      "loss": 1.9612,
      "step": 10
    },
    {
      "epoch": 0.0016296296296296295,
      "grad_norm": 1.3388046026229858,
      "learning_rate": 0.00019982209043736102,
      "loss": 1.6278,
      "step": 11
    },
    {
      "epoch": 0.0017777777777777779,
      "grad_norm": 1.4107303619384766,
      "learning_rate": 0.00019979243884358784,
      "loss": 1.8498,
      "step": 12
    },
    {
      "epoch": 0.001925925925925926,
      "grad_norm": 1.2822532653808594,
      "learning_rate": 0.0001997627872498147,
      "loss": 1.4322,
      "step": 13
    },
    {
      "epoch": 0.002074074074074074,
      "grad_norm": 1.0885272026062012,
      "learning_rate": 0.00019973313565604153,
      "loss": 1.691,
      "step": 14
    },
    {
      "epoch": 0.0022222222222222222,
      "grad_norm": 0.9695473313331604,
      "learning_rate": 0.00019970348406226835,
      "loss": 1.6351,
      "step": 15
    },
    {
      "epoch": 0.0023703703703703703,
      "grad_norm": 1.0999016761779785,
      "learning_rate": 0.0001996738324684952,
      "loss": 1.4707,
      "step": 16
    },
    {
      "epoch": 0.0025185185185185185,
      "grad_norm": 1.2312846183776855,
      "learning_rate": 0.00019964418087472203,
      "loss": 1.5608,
      "step": 17
    },
    {
      "epoch": 0.0026666666666666666,
      "grad_norm": 1.980854868888855,
      "learning_rate": 0.00019961452928094885,
      "loss": 1.5096,
      "step": 18
    },
    {
      "epoch": 0.0028148148148148147,
      "grad_norm": 1.4264863729476929,
      "learning_rate": 0.0001995848776871757,
      "loss": 1.4313,
      "step": 19
    },
    {
      "epoch": 0.002962962962962963,
      "grad_norm": 1.5596296787261963,
      "learning_rate": 0.00019955522609340254,
      "loss": 1.2828,
      "step": 20
    },
    {
      "epoch": 0.003111111111111111,
      "grad_norm": 1.3198904991149902,
      "learning_rate": 0.00019952557449962936,
      "loss": 1.5241,
      "step": 21
    },
    {
      "epoch": 0.003259259259259259,
      "grad_norm": 1.263428807258606,
      "learning_rate": 0.0001994959229058562,
      "loss": 1.499,
      "step": 22
    },
    {
      "epoch": 0.0034074074074074076,
      "grad_norm": 1.1441594362258911,
      "learning_rate": 0.00019946627131208305,
      "loss": 1.2826,
      "step": 23
    },
    {
      "epoch": 0.0035555555555555557,
      "grad_norm": 1.7674256563186646,
      "learning_rate": 0.00019943661971830986,
      "loss": 1.3383,
      "step": 24
    },
    {
      "epoch": 0.003703703703703704,
      "grad_norm": 1.1815110445022583,
      "learning_rate": 0.0001994069681245367,
      "loss": 1.269,
      "step": 25
    },
    {
      "epoch": 0.003851851851851852,
      "grad_norm": 1.5254690647125244,
      "learning_rate": 0.00019937731653076355,
      "loss": 1.161,
      "step": 26
    },
    {
      "epoch": 0.004,
      "grad_norm": 1.4483976364135742,
      "learning_rate": 0.00019934766493699037,
      "loss": 1.2054,
      "step": 27
    },
    {
      "epoch": 0.004148148148148148,
      "grad_norm": 1.6680272817611694,
      "learning_rate": 0.00019931801334321722,
      "loss": 1.3768,
      "step": 28
    },
    {
      "epoch": 0.004296296296296296,
      "grad_norm": 1.6189104318618774,
      "learning_rate": 0.00019928836174944406,
      "loss": 1.5359,
      "step": 29
    },
    {
      "epoch": 0.0044444444444444444,
      "grad_norm": 1.8396241664886475,
      "learning_rate": 0.00019925871015567088,
      "loss": 1.2609,
      "step": 30
    },
    {
      "epoch": 0.0045925925925925926,
      "grad_norm": 1.2982949018478394,
      "learning_rate": 0.0001992290585618977,
      "loss": 1.3573,
      "step": 31
    },
    {
      "epoch": 0.004740740740740741,
      "grad_norm": 1.2802674770355225,
      "learning_rate": 0.00019919940696812454,
      "loss": 1.3206,
      "step": 32
    },
    {
      "epoch": 0.004888888888888889,
      "grad_norm": 1.320926547050476,
      "learning_rate": 0.00019916975537435138,
      "loss": 1.313,
      "step": 33
    },
    {
      "epoch": 0.005037037037037037,
      "grad_norm": 0.9515355825424194,
      "learning_rate": 0.0001991401037805782,
      "loss": 1.377,
      "step": 34
    },
    {
      "epoch": 0.005185185185185185,
      "grad_norm": 1.0064878463745117,
      "learning_rate": 0.00019911045218680505,
      "loss": 1.2234,
      "step": 35
    },
    {
      "epoch": 0.005333333333333333,
      "grad_norm": 0.8821234703063965,
      "learning_rate": 0.0001990808005930319,
      "loss": 1.2351,
      "step": 36
    },
    {
      "epoch": 0.005481481481481481,
      "grad_norm": 1.1091601848602295,
      "learning_rate": 0.0001990511489992587,
      "loss": 1.3856,
      "step": 37
    },
    {
      "epoch": 0.005629629629629629,
      "grad_norm": 1.2138992547988892,
      "learning_rate": 0.00019902149740548555,
      "loss": 1.489,
      "step": 38
    },
    {
      "epoch": 0.0057777777777777775,
      "grad_norm": 1.040979266166687,
      "learning_rate": 0.0001989918458117124,
      "loss": 1.2152,
      "step": 39
    },
    {
      "epoch": 0.005925925925925926,
      "grad_norm": 1.0536178350448608,
      "learning_rate": 0.0001989621942179392,
      "loss": 1.2148,
      "step": 40
    },
    {
      "epoch": 0.006074074074074074,
      "grad_norm": 1.0311094522476196,
      "learning_rate": 0.00019893254262416606,
      "loss": 1.3002,
      "step": 41
    },
    {
      "epoch": 0.006222222222222222,
      "grad_norm": 0.8636873960494995,
      "learning_rate": 0.0001989028910303929,
      "loss": 1.2371,
      "step": 42
    },
    {
      "epoch": 0.00637037037037037,
      "grad_norm": 0.8415317535400391,
      "learning_rate": 0.00019887323943661972,
      "loss": 1.4714,
      "step": 43
    },
    {
      "epoch": 0.006518518518518518,
      "grad_norm": 1.3478702306747437,
      "learning_rate": 0.00019884358784284656,
      "loss": 1.5319,
      "step": 44
    },
    {
      "epoch": 0.006666666666666667,
      "grad_norm": 0.9216668605804443,
      "learning_rate": 0.0001988139362490734,
      "loss": 1.3122,
      "step": 45
    },
    {
      "epoch": 0.006814814814814815,
      "grad_norm": 0.9138181209564209,
      "learning_rate": 0.00019878428465530023,
      "loss": 1.0854,
      "step": 46
    },
    {
      "epoch": 0.006962962962962963,
      "grad_norm": 0.890804648399353,
      "learning_rate": 0.00019875463306152707,
      "loss": 1.3739,
      "step": 47
    },
    {
      "epoch": 0.0071111111111111115,
      "grad_norm": 1.20708167552948,
      "learning_rate": 0.00019872498146775391,
      "loss": 1.3702,
      "step": 48
    },
    {
      "epoch": 0.00725925925925926,
      "grad_norm": 1.4061371088027954,
      "learning_rate": 0.00019869532987398073,
      "loss": 1.3931,
      "step": 49
    },
    {
      "epoch": 0.007407407407407408,
      "grad_norm": 0.9063286781311035,
      "learning_rate": 0.00019866567828020758,
      "loss": 1.2466,
      "step": 50
    },
    {
      "epoch": 0.007555555555555556,
      "grad_norm": 1.1027034521102905,
      "learning_rate": 0.0001986360266864344,
      "loss": 1.3566,
      "step": 51
    },
    {
      "epoch": 0.007703703703703704,
      "grad_norm": 0.9901520609855652,
      "learning_rate": 0.00019860637509266124,
      "loss": 1.3878,
      "step": 52
    },
    {
      "epoch": 0.007851851851851851,
      "grad_norm": 0.9693011045455933,
      "learning_rate": 0.00019857672349888808,
      "loss": 1.1488,
      "step": 53
    },
    {
      "epoch": 0.008,
      "grad_norm": 0.792296826839447,
      "learning_rate": 0.0001985470719051149,
      "loss": 1.4231,
      "step": 54
    },
    {
      "epoch": 0.008148148148148147,
      "grad_norm": 0.9005415439605713,
      "learning_rate": 0.00019851742031134174,
      "loss": 1.2315,
      "step": 55
    },
    {
      "epoch": 0.008296296296296296,
      "grad_norm": 0.7594810128211975,
      "learning_rate": 0.0001984877687175686,
      "loss": 1.3575,
      "step": 56
    },
    {
      "epoch": 0.008444444444444444,
      "grad_norm": 1.562156081199646,
      "learning_rate": 0.0001984581171237954,
      "loss": 1.3894,
      "step": 57
    },
    {
      "epoch": 0.008592592592592593,
      "grad_norm": 0.9959694743156433,
      "learning_rate": 0.00019842846553002225,
      "loss": 1.3206,
      "step": 58
    },
    {
      "epoch": 0.00874074074074074,
      "grad_norm": 1.3404779434204102,
      "learning_rate": 0.0001983988139362491,
      "loss": 1.2058,
      "step": 59
    },
    {
      "epoch": 0.008888888888888889,
      "grad_norm": 1.0158309936523438,
      "learning_rate": 0.0001983691623424759,
      "loss": 1.1776,
      "step": 60
    },
    {
      "epoch": 0.009037037037037038,
      "grad_norm": 1.3582422733306885,
      "learning_rate": 0.00019833951074870276,
      "loss": 1.4476,
      "step": 61
    },
    {
      "epoch": 0.009185185185185185,
      "grad_norm": 1.078850269317627,
      "learning_rate": 0.0001983098591549296,
      "loss": 1.3258,
      "step": 62
    },
    {
      "epoch": 0.009333333333333334,
      "grad_norm": 0.9518314599990845,
      "learning_rate": 0.00019828020756115642,
      "loss": 1.2733,
      "step": 63
    },
    {
      "epoch": 0.009481481481481481,
      "grad_norm": NaN,
      "learning_rate": 0.00019828020756115642,
      "loss": 1.2108,
      "step": 64
    },
    {
      "epoch": 0.00962962962962963,
      "grad_norm": 0.8653088212013245,
      "learning_rate": 0.00019825055596738326,
      "loss": 1.1989,
      "step": 65
    },
    {
      "epoch": 0.009777777777777778,
      "grad_norm": 1.1971681118011475,
      "learning_rate": 0.0001982209043736101,
      "loss": 1.316,
      "step": 66
    },
    {
      "epoch": 0.009925925925925927,
      "grad_norm": 0.8550506830215454,
      "learning_rate": 0.00019819125277983693,
      "loss": 1.5866,
      "step": 67
    },
    {
      "epoch": 0.010074074074074074,
      "grad_norm": 0.7203028798103333,
      "learning_rate": 0.00019816160118606374,
      "loss": 1.1993,
      "step": 68
    },
    {
      "epoch": 0.010222222222222223,
      "grad_norm": 1.342175841331482,
      "learning_rate": 0.0001981319495922906,
      "loss": 1.4759,
      "step": 69
    },
    {
      "epoch": 0.01037037037037037,
      "grad_norm": 0.8517811894416809,
      "learning_rate": 0.00019810229799851743,
      "loss": 1.3233,
      "step": 70
    },
    {
      "epoch": 0.010518518518518519,
      "grad_norm": 0.991734504699707,
      "learning_rate": 0.00019807264640474425,
      "loss": 1.3343,
      "step": 71
    },
    {
      "epoch": 0.010666666666666666,
      "grad_norm": 0.9519258737564087,
      "learning_rate": 0.0001980429948109711,
      "loss": 1.2763,
      "step": 72
    },
    {
      "epoch": 0.010814814814814815,
      "grad_norm": 2.1876156330108643,
      "learning_rate": 0.00019801334321719794,
      "loss": 1.4588,
      "step": 73
    },
    {
      "epoch": 0.010962962962962963,
      "grad_norm": 1.0274714231491089,
      "learning_rate": 0.00019798369162342476,
      "loss": 1.1687,
      "step": 74
    },
    {
      "epoch": 0.011111111111111112,
      "grad_norm": 0.8644994497299194,
      "learning_rate": 0.0001979540400296516,
      "loss": 1.4098,
      "step": 75
    },
    {
      "epoch": 0.011259259259259259,
      "grad_norm": 0.8542577624320984,
      "learning_rate": 0.00019792438843587844,
      "loss": 1.5521,
      "step": 76
    },
    {
      "epoch": 0.011407407407407408,
      "grad_norm": 0.9984826445579529,
      "learning_rate": 0.00019789473684210526,
      "loss": 1.1638,
      "step": 77
    },
    {
      "epoch": 0.011555555555555555,
      "grad_norm": 0.7212148904800415,
      "learning_rate": 0.0001978650852483321,
      "loss": 1.1242,
      "step": 78
    },
    {
      "epoch": 0.011703703703703704,
      "grad_norm": 1.859447717666626,
      "learning_rate": 0.00019783543365455895,
      "loss": 1.3361,
      "step": 79
    },
    {
      "epoch": 0.011851851851851851,
      "grad_norm": 0.8758236765861511,
      "learning_rate": 0.00019780578206078577,
      "loss": 1.0517,
      "step": 80
    },
    {
      "epoch": 0.012,
      "grad_norm": 1.2347968816757202,
      "learning_rate": 0.0001977761304670126,
      "loss": 1.0352,
      "step": 81
    },
    {
      "epoch": 0.012148148148148148,
      "grad_norm": 0.7254672050476074,
      "learning_rate": 0.00019774647887323946,
      "loss": 1.1428,
      "step": 82
    },
    {
      "epoch": 0.012296296296296296,
      "grad_norm": 1.1427414417266846,
      "learning_rate": 0.00019771682727946627,
      "loss": 1.3168,
      "step": 83
    },
    {
      "epoch": 0.012444444444444444,
      "grad_norm": 0.8925330638885498,
      "learning_rate": 0.0001976871756856931,
      "loss": 1.2553,
      "step": 84
    },
    {
      "epoch": 0.012592592592592593,
      "grad_norm": 0.6908546090126038,
      "learning_rate": 0.00019765752409191996,
      "loss": 1.2659,
      "step": 85
    },
    {
      "epoch": 0.01274074074074074,
      "grad_norm": 0.8214904069900513,
      "learning_rate": 0.00019762787249814678,
      "loss": 1.2111,
      "step": 86
    },
    {
      "epoch": 0.012888888888888889,
      "grad_norm": 0.9786846041679382,
      "learning_rate": 0.0001975982209043736,
      "loss": 1.3289,
      "step": 87
    },
    {
      "epoch": 0.013037037037037036,
      "grad_norm": 0.8723524212837219,
      "learning_rate": 0.00019756856931060047,
      "loss": 1.5589,
      "step": 88
    },
    {
      "epoch": 0.013185185185185185,
      "grad_norm": 1.081800937652588,
      "learning_rate": 0.0001975389177168273,
      "loss": 1.2323,
      "step": 89
    },
    {
      "epoch": 0.013333333333333334,
      "grad_norm": 0.8913035988807678,
      "learning_rate": 0.0001975092661230541,
      "loss": 1.2034,
      "step": 90
    },
    {
      "epoch": 0.013481481481481481,
      "grad_norm": 0.7650095820426941,
      "learning_rate": 0.00019747961452928098,
      "loss": 1.2324,
      "step": 91
    },
    {
      "epoch": 0.01362962962962963,
      "grad_norm": 2.0756051540374756,
      "learning_rate": 0.0001974499629355078,
      "loss": 1.0702,
      "step": 92
    },
    {
      "epoch": 0.013777777777777778,
      "grad_norm": 1.5064998865127563,
      "learning_rate": 0.0001974203113417346,
      "loss": 1.2101,
      "step": 93
    },
    {
      "epoch": 0.013925925925925927,
      "grad_norm": 0.9713321924209595,
      "learning_rate": 0.00019739065974796148,
      "loss": 1.4464,
      "step": 94
    },
    {
      "epoch": 0.014074074074074074,
      "grad_norm": 1.0733256340026855,
      "learning_rate": 0.0001973610081541883,
      "loss": 1.3696,
      "step": 95
    },
    {
      "epoch": 0.014222222222222223,
      "grad_norm": 1.0742157697677612,
      "learning_rate": 0.00019733135656041512,
      "loss": 1.3056,
      "step": 96
    },
    {
      "epoch": 0.01437037037037037,
      "grad_norm": 1.2496274709701538,
      "learning_rate": 0.000197301704966642,
      "loss": 1.179,
      "step": 97
    },
    {
      "epoch": 0.01451851851851852,
      "grad_norm": 1.0434441566467285,
      "learning_rate": 0.0001972720533728688,
      "loss": 1.0619,
      "step": 98
    },
    {
      "epoch": 0.014666666666666666,
      "grad_norm": 0.7900615334510803,
      "learning_rate": 0.00019724240177909562,
      "loss": 1.0743,
      "step": 99
    },
    {
      "epoch": 0.014814814814814815,
      "grad_norm": 0.84372878074646,
      "learning_rate": 0.0001972127501853225,
      "loss": 1.1668,
      "step": 100
    },
    {
      "epoch": 0.014962962962962963,
      "grad_norm": 1.3834422826766968,
      "learning_rate": 0.0001971830985915493,
      "loss": 1.2704,
      "step": 101
    },
    {
      "epoch": 0.015111111111111112,
      "grad_norm": 1.077656865119934,
      "learning_rate": 0.00019715344699777613,
      "loss": 1.2489,
      "step": 102
    },
    {
      "epoch": 0.015259259259259259,
      "grad_norm": 1.0293247699737549,
      "learning_rate": 0.00019712379540400297,
      "loss": 1.1533,
      "step": 103
    },
    {
      "epoch": 0.015407407407407408,
      "grad_norm": 0.8808808922767639,
      "learning_rate": 0.00019709414381022982,
      "loss": 1.1044,
      "step": 104
    },
    {
      "epoch": 0.015555555555555555,
      "grad_norm": 2.607654571533203,
      "learning_rate": 0.00019706449221645664,
      "loss": 1.2633,
      "step": 105
    },
    {
      "epoch": 0.015703703703703702,
      "grad_norm": 1.1236181259155273,
      "learning_rate": 0.00019703484062268348,
      "loss": 1.3062,
      "step": 106
    },
    {
      "epoch": 0.015851851851851853,
      "grad_norm": 0.9974605441093445,
      "learning_rate": 0.00019700518902891032,
      "loss": 1.42,
      "step": 107
    },
    {
      "epoch": 0.016,
      "grad_norm": 0.9278566241264343,
      "learning_rate": 0.00019697553743513714,
      "loss": 1.3455,
      "step": 108
    },
    {
      "epoch": 0.016148148148148148,
      "grad_norm": 1.077166199684143,
      "learning_rate": 0.00019694588584136399,
      "loss": 1.2423,
      "step": 109
    },
    {
      "epoch": 0.016296296296296295,
      "grad_norm": 1.052445411682129,
      "learning_rate": 0.00019691623424759083,
      "loss": 1.1738,
      "step": 110
    },
    {
      "epoch": 0.016444444444444446,
      "grad_norm": 0.9899265170097351,
      "learning_rate": 0.00019688658265381765,
      "loss": 1.4544,
      "step": 111
    },
    {
      "epoch": 0.016592592592592593,
      "grad_norm": 1.0286669731140137,
      "learning_rate": 0.0001968569310600445,
      "loss": 1.3348,
      "step": 112
    },
    {
      "epoch": 0.01674074074074074,
      "grad_norm": 2.1646833419799805,
      "learning_rate": 0.00019682727946627134,
      "loss": 1.1908,
      "step": 113
    },
    {
      "epoch": 0.016888888888888887,
      "grad_norm": 1.1112664937973022,
      "learning_rate": 0.00019679762787249815,
      "loss": 1.0064,
      "step": 114
    },
    {
      "epoch": 0.017037037037037038,
      "grad_norm": 0.9720861315727234,
      "learning_rate": 0.000196767976278725,
      "loss": 1.2794,
      "step": 115
    },
    {
      "epoch": 0.017185185185185185,
      "grad_norm": 1.5623220205307007,
      "learning_rate": 0.00019673832468495184,
      "loss": 1.2101,
      "step": 116
    },
    {
      "epoch": 0.017333333333333333,
      "grad_norm": 1.046308994293213,
      "learning_rate": 0.00019670867309117866,
      "loss": 1.1242,
      "step": 117
    },
    {
      "epoch": 0.01748148148148148,
      "grad_norm": 1.716864824295044,
      "learning_rate": 0.00019667902149740548,
      "loss": 1.1136,
      "step": 118
    },
    {
      "epoch": 0.01762962962962963,
      "grad_norm": 0.9031343460083008,
      "learning_rate": 0.00019664936990363232,
      "loss": 1.0777,
      "step": 119
    },
    {
      "epoch": 0.017777777777777778,
      "grad_norm": 0.8096110820770264,
      "learning_rate": 0.00019661971830985917,
      "loss": 1.2524,
      "step": 120
    },
    {
      "epoch": 0.017925925925925925,
      "grad_norm": 0.7985750436782837,
      "learning_rate": 0.00019659006671608598,
      "loss": 1.3701,
      "step": 121
    },
    {
      "epoch": 0.018074074074074076,
      "grad_norm": 0.6972211599349976,
      "learning_rate": 0.00019656041512231283,
      "loss": 1.2531,
      "step": 122
    },
    {
      "epoch": 0.018222222222222223,
      "grad_norm": 0.8134813904762268,
      "learning_rate": 0.00019653076352853967,
      "loss": 1.3938,
      "step": 123
    },
    {
      "epoch": 0.01837037037037037,
      "grad_norm": 1.100056767463684,
      "learning_rate": 0.0001965011119347665,
      "loss": 1.3501,
      "step": 124
    },
    {
      "epoch": 0.018518518518518517,
      "grad_norm": 0.6855674982070923,
      "learning_rate": 0.00019647146034099333,
      "loss": 1.3988,
      "step": 125
    },
    {
      "epoch": 0.018666666666666668,
      "grad_norm": 1.3612394332885742,
      "learning_rate": 0.00019644180874722018,
      "loss": 1.289,
      "step": 126
    },
    {
      "epoch": 0.018814814814814815,
      "grad_norm": NaN,
      "learning_rate": 0.00019644180874722018,
      "loss": 1.2401,
      "step": 127
    },
    {
      "epoch": 0.018962962962962963,
      "grad_norm": 0.6646182537078857,
      "learning_rate": 0.000196412157153447,
      "loss": 1.0761,
      "step": 128
    },
    {
      "epoch": 0.01911111111111111,
      "grad_norm": 1.1864745616912842,
      "learning_rate": 0.00019638250555967384,
      "loss": 1.3836,
      "step": 129
    },
    {
      "epoch": 0.01925925925925926,
      "grad_norm": 1.0745700597763062,
      "learning_rate": 0.00019635285396590069,
      "loss": 1.1912,
      "step": 130
    },
    {
      "epoch": 0.019407407407407408,
      "grad_norm": 1.649613618850708,
      "learning_rate": 0.0001963232023721275,
      "loss": 1.3176,
      "step": 131
    },
    {
      "epoch": 0.019555555555555555,
      "grad_norm": 1.0064952373504639,
      "learning_rate": 0.00019629355077835435,
      "loss": 1.3275,
      "step": 132
    },
    {
      "epoch": 0.019703703703703702,
      "grad_norm": 0.9298867583274841,
      "learning_rate": 0.0001962638991845812,
      "loss": 1.2191,
      "step": 133
    },
    {
      "epoch": 0.019851851851851853,
      "grad_norm": 1.6126947402954102,
      "learning_rate": 0.000196234247590808,
      "loss": 1.3715,
      "step": 134
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1250628232955933,
      "learning_rate": 0.00019620459599703485,
      "loss": 1.2638,
      "step": 135
    },
    {
      "epoch": 0.020148148148148148,
      "grad_norm": 1.1649386882781982,
      "learning_rate": 0.0001961749444032617,
      "loss": 1.2886,
      "step": 136
    },
    {
      "epoch": 0.020296296296296295,
      "grad_norm": 1.7831798791885376,
      "learning_rate": 0.00019614529280948852,
      "loss": 1.0661,
      "step": 137
    },
    {
      "epoch": 0.020444444444444446,
      "grad_norm": 1.2029664516448975,
      "learning_rate": 0.00019611564121571536,
      "loss": 1.455,
      "step": 138
    },
    {
      "epoch": 0.020592592592592593,
      "grad_norm": 0.9483388662338257,
      "learning_rate": 0.00019608598962194218,
      "loss": 1.2736,
      "step": 139
    },
    {
      "epoch": 0.02074074074074074,
      "grad_norm": 0.9334256649017334,
      "learning_rate": 0.00019605633802816902,
      "loss": 1.0822,
      "step": 140
    },
    {
      "epoch": 0.020888888888888887,
      "grad_norm": 0.8102211952209473,
      "learning_rate": 0.00019602668643439587,
      "loss": 1.3988,
      "step": 141
    },
    {
      "epoch": 0.021037037037037038,
      "grad_norm": 0.9568620324134827,
      "learning_rate": 0.00019599703484062268,
      "loss": 1.3997,
      "step": 142
    },
    {
      "epoch": 0.021185185185185185,
      "grad_norm": 1.1323370933532715,
      "learning_rate": 0.00019596738324684953,
      "loss": 1.3574,
      "step": 143
    },
    {
      "epoch": 0.021333333333333333,
      "grad_norm": 0.9975650310516357,
      "learning_rate": 0.00019593773165307637,
      "loss": 1.1898,
      "step": 144
    },
    {
      "epoch": 0.02148148148148148,
      "grad_norm": 0.9566569924354553,
      "learning_rate": 0.0001959080800593032,
      "loss": 1.2258,
      "step": 145
    },
    {
      "epoch": 0.02162962962962963,
      "grad_norm": 0.8966617584228516,
      "learning_rate": 0.00019587842846553003,
      "loss": 1.2678,
      "step": 146
    },
    {
      "epoch": 0.021777777777777778,
      "grad_norm": 0.8554395437240601,
      "learning_rate": 0.00019584877687175688,
      "loss": 1.1973,
      "step": 147
    },
    {
      "epoch": 0.021925925925925925,
      "grad_norm": 0.8343006372451782,
      "learning_rate": 0.0001958191252779837,
      "loss": 1.4802,
      "step": 148
    },
    {
      "epoch": 0.022074074074074072,
      "grad_norm": 1.0070141553878784,
      "learning_rate": 0.00019578947368421054,
      "loss": 1.1165,
      "step": 149
    },
    {
      "epoch": 0.022222222222222223,
      "grad_norm": 0.8232426643371582,
      "learning_rate": 0.00019575982209043739,
      "loss": 1.268,
      "step": 150
    },
    {
      "epoch": 0.02237037037037037,
      "grad_norm": 1.051567554473877,
      "learning_rate": 0.0001957301704966642,
      "loss": 1.2323,
      "step": 151
    },
    {
      "epoch": 0.022518518518518518,
      "grad_norm": 2.46091628074646,
      "learning_rate": 0.00019570051890289105,
      "loss": 1.2043,
      "step": 152
    },
    {
      "epoch": 0.02266666666666667,
      "grad_norm": 1.0516281127929688,
      "learning_rate": 0.0001956708673091179,
      "loss": 1.1408,
      "step": 153
    },
    {
      "epoch": 0.022814814814814816,
      "grad_norm": 0.8365291953086853,
      "learning_rate": 0.0001956412157153447,
      "loss": 1.2778,
      "step": 154
    },
    {
      "epoch": 0.022962962962962963,
      "grad_norm": 0.6768907904624939,
      "learning_rate": 0.00019561156412157153,
      "loss": 0.8589,
      "step": 155
    },
    {
      "epoch": 0.02311111111111111,
      "grad_norm": 2.7300822734832764,
      "learning_rate": 0.00019558191252779837,
      "loss": 1.5669,
      "step": 156
    },
    {
      "epoch": 0.02325925925925926,
      "grad_norm": 1.3450238704681396,
      "learning_rate": 0.00019555226093402521,
      "loss": 1.3563,
      "step": 157
    },
    {
      "epoch": 0.023407407407407408,
      "grad_norm": 0.7438297867774963,
      "learning_rate": 0.00019552260934025203,
      "loss": 1.1264,
      "step": 158
    },
    {
      "epoch": 0.023555555555555555,
      "grad_norm": 0.9578141570091248,
      "learning_rate": 0.00019549295774647888,
      "loss": 1.3149,
      "step": 159
    },
    {
      "epoch": 0.023703703703703703,
      "grad_norm": 1.0204038619995117,
      "learning_rate": 0.00019546330615270572,
      "loss": 1.394,
      "step": 160
    },
    {
      "epoch": 0.023851851851851853,
      "grad_norm": 1.0684940814971924,
      "learning_rate": 0.00019543365455893254,
      "loss": 1.2583,
      "step": 161
    },
    {
      "epoch": 0.024,
      "grad_norm": 0.7158825993537903,
      "learning_rate": 0.00019540400296515938,
      "loss": 1.3733,
      "step": 162
    },
    {
      "epoch": 0.024148148148148148,
      "grad_norm": 0.8597478866577148,
      "learning_rate": 0.00019537435137138623,
      "loss": 1.5522,
      "step": 163
    },
    {
      "epoch": 0.024296296296296295,
      "grad_norm": 0.7853899598121643,
      "learning_rate": 0.00019534469977761304,
      "loss": 1.3182,
      "step": 164
    },
    {
      "epoch": 0.024444444444444446,
      "grad_norm": 0.938490092754364,
      "learning_rate": 0.0001953150481838399,
      "loss": 1.0182,
      "step": 165
    },
    {
      "epoch": 0.024592592592592593,
      "grad_norm": 1.2965304851531982,
      "learning_rate": 0.00019528539659006673,
      "loss": 1.04,
      "step": 166
    },
    {
      "epoch": 0.02474074074074074,
      "grad_norm": 1.3206931352615356,
      "learning_rate": 0.00019525574499629355,
      "loss": 1.1929,
      "step": 167
    },
    {
      "epoch": 0.024888888888888887,
      "grad_norm": 0.8876661658287048,
      "learning_rate": 0.0001952260934025204,
      "loss": 1.1008,
      "step": 168
    },
    {
      "epoch": 0.025037037037037038,
      "grad_norm": 1.236785888671875,
      "learning_rate": 0.00019519644180874724,
      "loss": 1.4694,
      "step": 169
    },
    {
      "epoch": 0.025185185185185185,
      "grad_norm": 0.8049394488334656,
      "learning_rate": 0.00019516679021497406,
      "loss": 1.1324,
      "step": 170
    },
    {
      "epoch": 0.025333333333333333,
      "grad_norm": 1.1204288005828857,
      "learning_rate": 0.00019513713862120087,
      "loss": 1.2757,
      "step": 171
    },
    {
      "epoch": 0.02548148148148148,
      "grad_norm": 1.8202202320098877,
      "learning_rate": 0.00019510748702742775,
      "loss": 1.1399,
      "step": 172
    },
    {
      "epoch": 0.02562962962962963,
      "grad_norm": 0.9476074576377869,
      "learning_rate": 0.00019507783543365456,
      "loss": 1.2111,
      "step": 173
    },
    {
      "epoch": 0.025777777777777778,
      "grad_norm": 0.9113478660583496,
      "learning_rate": 0.00019504818383988138,
      "loss": 1.0864,
      "step": 174
    },
    {
      "epoch": 0.025925925925925925,
      "grad_norm": 0.8001992106437683,
      "learning_rate": 0.00019501853224610825,
      "loss": 1.0861,
      "step": 175
    },
    {
      "epoch": 0.026074074074074072,
      "grad_norm": 0.9687372446060181,
      "learning_rate": 0.00019498888065233507,
      "loss": 1.0022,
      "step": 176
    },
    {
      "epoch": 0.026222222222222223,
      "grad_norm": 1.6159669160842896,
      "learning_rate": 0.0001949592290585619,
      "loss": 1.4007,
      "step": 177
    },
    {
      "epoch": 0.02637037037037037,
      "grad_norm": 0.8484355211257935,
      "learning_rate": 0.00019492957746478876,
      "loss": 1.3777,
      "step": 178
    },
    {
      "epoch": 0.026518518518518518,
      "grad_norm": 1.052414894104004,
      "learning_rate": 0.00019489992587101558,
      "loss": 1.1446,
      "step": 179
    },
    {
      "epoch": 0.02666666666666667,
      "grad_norm": 0.8121458292007446,
      "learning_rate": 0.0001948702742772424,
      "loss": 1.3327,
      "step": 180
    },
    {
      "epoch": 0.026814814814814816,
      "grad_norm": 0.797144889831543,
      "learning_rate": 0.00019484062268346927,
      "loss": 1.2831,
      "step": 181
    },
    {
      "epoch": 0.026962962962962963,
      "grad_norm": 1.1285954713821411,
      "learning_rate": 0.00019481097108969608,
      "loss": 1.1895,
      "step": 182
    },
    {
      "epoch": 0.02711111111111111,
      "grad_norm": 0.9754137396812439,
      "learning_rate": 0.0001947813194959229,
      "loss": 1.1129,
      "step": 183
    },
    {
      "epoch": 0.02725925925925926,
      "grad_norm": 1.1544840335845947,
      "learning_rate": 0.00019475166790214977,
      "loss": 1.2382,
      "step": 184
    },
    {
      "epoch": 0.027407407407407408,
      "grad_norm": 0.8209052085876465,
      "learning_rate": 0.0001947220163083766,
      "loss": 1.3218,
      "step": 185
    },
    {
      "epoch": 0.027555555555555555,
      "grad_norm": 0.9780836701393127,
      "learning_rate": 0.0001946923647146034,
      "loss": 1.2419,
      "step": 186
    },
    {
      "epoch": 0.027703703703703703,
      "grad_norm": 0.9858911633491516,
      "learning_rate": 0.00019466271312083028,
      "loss": 1.3105,
      "step": 187
    },
    {
      "epoch": 0.027851851851851853,
      "grad_norm": 1.0222773551940918,
      "learning_rate": 0.0001946330615270571,
      "loss": 1.0789,
      "step": 188
    },
    {
      "epoch": 0.028,
      "grad_norm": 0.8119564652442932,
      "learning_rate": 0.0001946034099332839,
      "loss": 1.1691,
      "step": 189
    },
    {
      "epoch": 0.028148148148148148,
      "grad_norm": 1.0559577941894531,
      "learning_rate": 0.00019457375833951076,
      "loss": 1.2803,
      "step": 190
    },
    {
      "epoch": 0.028296296296296295,
      "grad_norm": 0.8176133036613464,
      "learning_rate": 0.0001945441067457376,
      "loss": 1.0209,
      "step": 191
    },
    {
      "epoch": 0.028444444444444446,
      "grad_norm": 0.9877428412437439,
      "learning_rate": 0.00019451445515196442,
      "loss": 1.3936,
      "step": 192
    },
    {
      "epoch": 0.028592592592592593,
      "grad_norm": 1.2009166479110718,
      "learning_rate": 0.00019448480355819126,
      "loss": 1.3871,
      "step": 193
    },
    {
      "epoch": 0.02874074074074074,
      "grad_norm": 0.8686572313308716,
      "learning_rate": 0.0001944551519644181,
      "loss": 1.0833,
      "step": 194
    },
    {
      "epoch": 0.028888888888888888,
      "grad_norm": 1.1291698217391968,
      "learning_rate": 0.00019442550037064492,
      "loss": 1.4868,
      "step": 195
    },
    {
      "epoch": 0.02903703703703704,
      "grad_norm": 0.7083054184913635,
      "learning_rate": 0.00019439584877687177,
      "loss": 0.9583,
      "step": 196
    },
    {
      "epoch": 0.029185185185185186,
      "grad_norm": 0.8085622787475586,
      "learning_rate": 0.00019436619718309861,
      "loss": 1.3064,
      "step": 197
    },
    {
      "epoch": 0.029333333333333333,
      "grad_norm": 0.8528979420661926,
      "learning_rate": 0.00019433654558932543,
      "loss": 1.3791,
      "step": 198
    },
    {
      "epoch": 0.02948148148148148,
      "grad_norm": 0.8347198963165283,
      "learning_rate": 0.00019430689399555228,
      "loss": 1.3034,
      "step": 199
    },
    {
      "epoch": 0.02962962962962963,
      "grad_norm": 1.3489162921905518,
      "learning_rate": 0.00019427724240177912,
      "loss": 1.4172,
      "step": 200
    },
    {
      "epoch": 0.029777777777777778,
      "grad_norm": 0.9368568062782288,
      "learning_rate": 0.00019424759080800594,
      "loss": 1.1568,
      "step": 201
    },
    {
      "epoch": 0.029925925925925925,
      "grad_norm": 0.8276304006576538,
      "learning_rate": 0.00019421793921423278,
      "loss": 1.2429,
      "step": 202
    },
    {
      "epoch": 0.030074074074074073,
      "grad_norm": 0.9591898322105408,
      "learning_rate": 0.00019418828762045963,
      "loss": 1.2785,
      "step": 203
    },
    {
      "epoch": 0.030222222222222223,
      "grad_norm": 1.008541464805603,
      "learning_rate": 0.00019415863602668644,
      "loss": 1.5194,
      "step": 204
    },
    {
      "epoch": 0.03037037037037037,
      "grad_norm": 0.7464162707328796,
      "learning_rate": 0.00019412898443291326,
      "loss": 1.275,
      "step": 205
    },
    {
      "epoch": 0.030518518518518518,
      "grad_norm": 1.6648197174072266,
      "learning_rate": 0.0001940993328391401,
      "loss": 1.3538,
      "step": 206
    },
    {
      "epoch": 0.030666666666666665,
      "grad_norm": 1.067299723625183,
      "learning_rate": 0.00019406968124536695,
      "loss": 1.2387,
      "step": 207
    },
    {
      "epoch": 0.030814814814814816,
      "grad_norm": 0.9523988366127014,
      "learning_rate": 0.00019404002965159377,
      "loss": 1.4373,
      "step": 208
    },
    {
      "epoch": 0.030962962962962963,
      "grad_norm": 1.2426611185073853,
      "learning_rate": 0.0001940103780578206,
      "loss": 1.3875,
      "step": 209
    },
    {
      "epoch": 0.03111111111111111,
      "grad_norm": 1.0543694496154785,
      "learning_rate": 0.00019398072646404746,
      "loss": 1.2702,
      "step": 210
    },
    {
      "epoch": 0.03125925925925926,
      "grad_norm": 0.78801429271698,
      "learning_rate": 0.00019395107487027427,
      "loss": 1.3753,
      "step": 211
    },
    {
      "epoch": 0.031407407407407405,
      "grad_norm": 0.9147347807884216,
      "learning_rate": 0.00019392142327650112,
      "loss": 1.4496,
      "step": 212
    },
    {
      "epoch": 0.03155555555555556,
      "grad_norm": 0.9836535453796387,
      "learning_rate": 0.00019389177168272796,
      "loss": 1.1552,
      "step": 213
    },
    {
      "epoch": 0.031703703703703706,
      "grad_norm": 0.9383349418640137,
      "learning_rate": 0.00019386212008895478,
      "loss": 1.2849,
      "step": 214
    },
    {
      "epoch": 0.03185185185185185,
      "grad_norm": 0.8178645968437195,
      "learning_rate": 0.00019383246849518162,
      "loss": 1.3611,
      "step": 215
    },
    {
      "epoch": 0.032,
      "grad_norm": 1.2790336608886719,
      "learning_rate": 0.00019380281690140847,
      "loss": 1.544,
      "step": 216
    },
    {
      "epoch": 0.03214814814814815,
      "grad_norm": 1.6234021186828613,
      "learning_rate": 0.00019377316530763529,
      "loss": 1.1774,
      "step": 217
    },
    {
      "epoch": 0.032296296296296295,
      "grad_norm": 1.1184484958648682,
      "learning_rate": 0.00019374351371386213,
      "loss": 1.1779,
      "step": 218
    },
    {
      "epoch": 0.03244444444444444,
      "grad_norm": 0.7729263305664062,
      "learning_rate": 0.00019371386212008898,
      "loss": 1.2238,
      "step": 219
    },
    {
      "epoch": 0.03259259259259259,
      "grad_norm": 1.2453947067260742,
      "learning_rate": 0.0001936842105263158,
      "loss": 1.1779,
      "step": 220
    },
    {
      "epoch": 0.032740740740740744,
      "grad_norm": 1.0809444189071655,
      "learning_rate": 0.00019365455893254264,
      "loss": 1.1391,
      "step": 221
    },
    {
      "epoch": 0.03288888888888889,
      "grad_norm": 2.4422430992126465,
      "learning_rate": 0.00019362490733876948,
      "loss": 1.3656,
      "step": 222
    },
    {
      "epoch": 0.03303703703703704,
      "grad_norm": 1.194951057434082,
      "learning_rate": 0.0001935952557449963,
      "loss": 1.1398,
      "step": 223
    },
    {
      "epoch": 0.033185185185185186,
      "grad_norm": 0.9725684523582458,
      "learning_rate": 0.00019356560415122314,
      "loss": 1.2123,
      "step": 224
    },
    {
      "epoch": 0.03333333333333333,
      "grad_norm": 0.9106444716453552,
      "learning_rate": 0.00019353595255744996,
      "loss": 1.1304,
      "step": 225
    },
    {
      "epoch": 0.03348148148148148,
      "grad_norm": 0.8902103900909424,
      "learning_rate": 0.0001935063009636768,
      "loss": 1.3702,
      "step": 226
    },
    {
      "epoch": 0.03362962962962963,
      "grad_norm": 0.9396015405654907,
      "learning_rate": 0.00019347664936990365,
      "loss": 1.6152,
      "step": 227
    },
    {
      "epoch": 0.033777777777777775,
      "grad_norm": 1.4745655059814453,
      "learning_rate": 0.00019344699777613047,
      "loss": 1.1397,
      "step": 228
    },
    {
      "epoch": 0.03392592592592593,
      "grad_norm": 0.685135543346405,
      "learning_rate": 0.0001934173461823573,
      "loss": 1.3612,
      "step": 229
    },
    {
      "epoch": 0.034074074074074076,
      "grad_norm": 1.0589948892593384,
      "learning_rate": 0.00019338769458858416,
      "loss": 1.0377,
      "step": 230
    },
    {
      "epoch": 0.03422222222222222,
      "grad_norm": 0.82380610704422,
      "learning_rate": 0.00019335804299481097,
      "loss": 1.3124,
      "step": 231
    },
    {
      "epoch": 0.03437037037037037,
      "grad_norm": 0.9715389609336853,
      "learning_rate": 0.00019332839140103782,
      "loss": 1.1181,
      "step": 232
    },
    {
      "epoch": 0.03451851851851852,
      "grad_norm": 0.8186538815498352,
      "learning_rate": 0.00019329873980726466,
      "loss": 1.2927,
      "step": 233
    },
    {
      "epoch": 0.034666666666666665,
      "grad_norm": 1.2029539346694946,
      "learning_rate": 0.00019326908821349148,
      "loss": 1.4152,
      "step": 234
    },
    {
      "epoch": 0.03481481481481481,
      "grad_norm": 1.2466408014297485,
      "learning_rate": 0.00019323943661971832,
      "loss": 1.1035,
      "step": 235
    },
    {
      "epoch": 0.03496296296296296,
      "grad_norm": 0.7909786105155945,
      "learning_rate": 0.00019320978502594517,
      "loss": 1.1657,
      "step": 236
    },
    {
      "epoch": 0.035111111111111114,
      "grad_norm": 1.3812874555587769,
      "learning_rate": 0.00019318013343217199,
      "loss": 1.2617,
      "step": 237
    },
    {
      "epoch": 0.03525925925925926,
      "grad_norm": 0.9587541222572327,
      "learning_rate": 0.00019315048183839883,
      "loss": 1.3889,
      "step": 238
    },
    {
      "epoch": 0.03540740740740741,
      "grad_norm": 1.1368465423583984,
      "learning_rate": 0.00019312083024462567,
      "loss": 1.0769,
      "step": 239
    },
    {
      "epoch": 0.035555555555555556,
      "grad_norm": 0.9174418449401855,
      "learning_rate": 0.0001930911786508525,
      "loss": 1.3081,
      "step": 240
    },
    {
      "epoch": 0.0357037037037037,
      "grad_norm": 0.7816482782363892,
      "learning_rate": 0.0001930615270570793,
      "loss": 1.142,
      "step": 241
    },
    {
      "epoch": 0.03585185185185185,
      "grad_norm": 1.1396574974060059,
      "learning_rate": 0.00019303187546330615,
      "loss": 1.2102,
      "step": 242
    },
    {
      "epoch": 0.036,
      "grad_norm": 0.8678709864616394,
      "learning_rate": 0.000193002223869533,
      "loss": 1.2445,
      "step": 243
    },
    {
      "epoch": 0.03614814814814815,
      "grad_norm": 0.7911355495452881,
      "learning_rate": 0.00019297257227575982,
      "loss": 0.9574,
      "step": 244
    },
    {
      "epoch": 0.0362962962962963,
      "grad_norm": 0.7782396078109741,
      "learning_rate": 0.00019294292068198666,
      "loss": 1.1546,
      "step": 245
    },
    {
      "epoch": 0.036444444444444446,
      "grad_norm": 0.9505060315132141,
      "learning_rate": 0.0001929132690882135,
      "loss": 1.1287,
      "step": 246
    },
    {
      "epoch": 0.03659259259259259,
      "grad_norm": 1.238294005393982,
      "learning_rate": 0.00019288361749444032,
      "loss": 1.0658,
      "step": 247
    },
    {
      "epoch": 0.03674074074074074,
      "grad_norm": 0.742830753326416,
      "learning_rate": 0.00019285396590066717,
      "loss": 1.1948,
      "step": 248
    },
    {
      "epoch": 0.03688888888888889,
      "grad_norm": 0.7183875441551208,
      "learning_rate": 0.000192824314306894,
      "loss": 0.9899,
      "step": 249
    },
    {
      "epoch": 0.037037037037037035,
      "grad_norm": 0.773074209690094,
      "learning_rate": 0.00019279466271312083,
      "loss": 1.2474,
      "step": 250
    },
    {
      "epoch": 0.03718518518518518,
      "grad_norm": 1.0374746322631836,
      "learning_rate": 0.00019276501111934767,
      "loss": 1.0381,
      "step": 251
    },
    {
      "epoch": 0.037333333333333336,
      "grad_norm": 0.8164530396461487,
      "learning_rate": 0.00019273535952557452,
      "loss": 1.0381,
      "step": 252
    },
    {
      "epoch": 0.037481481481481484,
      "grad_norm": 0.9365907311439514,
      "learning_rate": 0.00019270570793180133,
      "loss": 1.4101,
      "step": 253
    },
    {
      "epoch": 0.03762962962962963,
      "grad_norm": 0.8881521224975586,
      "learning_rate": 0.00019267605633802818,
      "loss": 1.1966,
      "step": 254
    },
    {
      "epoch": 0.03777777777777778,
      "grad_norm": 2.7169456481933594,
      "learning_rate": 0.00019264640474425502,
      "loss": 1.1894,
      "step": 255
    },
    {
      "epoch": 0.037925925925925925,
      "grad_norm": 1.2184417247772217,
      "learning_rate": 0.00019261675315048184,
      "loss": 1.2353,
      "step": 256
    },
    {
      "epoch": 0.03807407407407407,
      "grad_norm": 0.9313091039657593,
      "learning_rate": 0.00019258710155670869,
      "loss": 1.321,
      "step": 257
    },
    {
      "epoch": 0.03822222222222222,
      "grad_norm": 0.8754948973655701,
      "learning_rate": 0.00019255744996293553,
      "loss": 1.296,
      "step": 258
    },
    {
      "epoch": 0.03837037037037037,
      "grad_norm": 1.6140278577804565,
      "learning_rate": 0.00019252779836916235,
      "loss": 1.2427,
      "step": 259
    },
    {
      "epoch": 0.03851851851851852,
      "grad_norm": 0.7046688795089722,
      "learning_rate": 0.00019249814677538916,
      "loss": 1.2533,
      "step": 260
    },
    {
      "epoch": 0.03866666666666667,
      "grad_norm": 1.3836346864700317,
      "learning_rate": 0.00019246849518161604,
      "loss": 1.0986,
      "step": 261
    },
    {
      "epoch": 0.038814814814814816,
      "grad_norm": 0.8215917348861694,
      "learning_rate": 0.00019243884358784285,
      "loss": 1.0424,
      "step": 262
    },
    {
      "epoch": 0.03896296296296296,
      "grad_norm": 1.0078061819076538,
      "learning_rate": 0.00019240919199406967,
      "loss": 1.2892,
      "step": 263
    },
    {
      "epoch": 0.03911111111111111,
      "grad_norm": 1.0687581300735474,
      "learning_rate": 0.00019237954040029654,
      "loss": 1.3148,
      "step": 264
    },
    {
      "epoch": 0.03925925925925926,
      "grad_norm": 0.7134751081466675,
      "learning_rate": 0.00019234988880652336,
      "loss": 1.1001,
      "step": 265
    },
    {
      "epoch": 0.039407407407407405,
      "grad_norm": 0.726123034954071,
      "learning_rate": 0.00019232023721275018,
      "loss": 1.3147,
      "step": 266
    },
    {
      "epoch": 0.03955555555555555,
      "grad_norm": 1.0302845239639282,
      "learning_rate": 0.00019229058561897705,
      "loss": 1.3559,
      "step": 267
    },
    {
      "epoch": 0.039703703703703706,
      "grad_norm": 0.8920096158981323,
      "learning_rate": 0.00019226093402520387,
      "loss": 1.4267,
      "step": 268
    },
    {
      "epoch": 0.039851851851851854,
      "grad_norm": 1.2920289039611816,
      "learning_rate": 0.00019223128243143068,
      "loss": 1.2495,
      "step": 269
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8217295408248901,
      "learning_rate": 0.00019220163083765755,
      "loss": 1.1455,
      "step": 270
    },
    {
      "epoch": 0.04014814814814815,
      "grad_norm": 0.9490004777908325,
      "learning_rate": 0.00019217197924388437,
      "loss": 1.2003,
      "step": 271
    },
    {
      "epoch": 0.040296296296296295,
      "grad_norm": 0.8513433337211609,
      "learning_rate": 0.0001921423276501112,
      "loss": 1.1811,
      "step": 272
    },
    {
      "epoch": 0.04044444444444444,
      "grad_norm": 1.884893774986267,
      "learning_rate": 0.00019211267605633806,
      "loss": 1.2256,
      "step": 273
    },
    {
      "epoch": 0.04059259259259259,
      "grad_norm": 1.2672890424728394,
      "learning_rate": 0.00019208302446256488,
      "loss": 1.4415,
      "step": 274
    },
    {
      "epoch": 0.040740740740740744,
      "grad_norm": 0.9679176807403564,
      "learning_rate": 0.0001920533728687917,
      "loss": 1.2014,
      "step": 275
    },
    {
      "epoch": 0.04088888888888889,
      "grad_norm": 0.9189028739929199,
      "learning_rate": 0.00019202372127501854,
      "loss": 1.3192,
      "step": 276
    },
    {
      "epoch": 0.04103703703703704,
      "grad_norm": 0.8371793627738953,
      "learning_rate": 0.00019199406968124538,
      "loss": 1.2433,
      "step": 277
    },
    {
      "epoch": 0.041185185185185186,
      "grad_norm": 0.7926132678985596,
      "learning_rate": 0.0001919644180874722,
      "loss": 0.9882,
      "step": 278
    },
    {
      "epoch": 0.04133333333333333,
      "grad_norm": 2.006680488586426,
      "learning_rate": 0.00019193476649369905,
      "loss": 1.1837,
      "step": 279
    },
    {
      "epoch": 0.04148148148148148,
      "grad_norm": 1.062570571899414,
      "learning_rate": 0.0001919051148999259,
      "loss": 1.2128,
      "step": 280
    },
    {
      "epoch": 0.04162962962962963,
      "grad_norm": 0.7273450493812561,
      "learning_rate": 0.0001918754633061527,
      "loss": 1.3036,
      "step": 281
    },
    {
      "epoch": 0.041777777777777775,
      "grad_norm": 0.837178111076355,
      "learning_rate": 0.00019184581171237955,
      "loss": 1.2514,
      "step": 282
    },
    {
      "epoch": 0.04192592592592593,
      "grad_norm": 1.1254032850265503,
      "learning_rate": 0.0001918161601186064,
      "loss": 1.1421,
      "step": 283
    },
    {
      "epoch": 0.042074074074074076,
      "grad_norm": 0.915274441242218,
      "learning_rate": 0.00019178650852483321,
      "loss": 1.5691,
      "step": 284
    },
    {
      "epoch": 0.042222222222222223,
      "grad_norm": 4.748012065887451,
      "learning_rate": 0.00019175685693106006,
      "loss": 1.3856,
      "step": 285
    },
    {
      "epoch": 0.04237037037037037,
      "grad_norm": 1.5594621896743774,
      "learning_rate": 0.0001917272053372869,
      "loss": 1.3815,
      "step": 286
    },
    {
      "epoch": 0.04251851851851852,
      "grad_norm": 0.9001341462135315,
      "learning_rate": 0.00019169755374351372,
      "loss": 1.2276,
      "step": 287
    },
    {
      "epoch": 0.042666666666666665,
      "grad_norm": 1.085966944694519,
      "learning_rate": 0.00019166790214974057,
      "loss": 1.3204,
      "step": 288
    },
    {
      "epoch": 0.04281481481481481,
      "grad_norm": 0.9728178381919861,
      "learning_rate": 0.0001916382505559674,
      "loss": 1.1833,
      "step": 289
    },
    {
      "epoch": 0.04296296296296296,
      "grad_norm": 1.1957199573516846,
      "learning_rate": 0.00019160859896219423,
      "loss": 1.1297,
      "step": 290
    },
    {
      "epoch": 0.043111111111111114,
      "grad_norm": 2.2784461975097656,
      "learning_rate": 0.00019157894736842104,
      "loss": 1.3982,
      "step": 291
    },
    {
      "epoch": 0.04325925925925926,
      "grad_norm": 0.998017430305481,
      "learning_rate": 0.0001915492957746479,
      "loss": 1.0903,
      "step": 292
    },
    {
      "epoch": 0.04340740740740741,
      "grad_norm": 0.8035924434661865,
      "learning_rate": 0.00019151964418087473,
      "loss": 1.3742,
      "step": 293
    },
    {
      "epoch": 0.043555555555555556,
      "grad_norm": 0.9830217361450195,
      "learning_rate": 0.00019148999258710155,
      "loss": 1.1857,
      "step": 294
    },
    {
      "epoch": 0.0437037037037037,
      "grad_norm": 1.034332513809204,
      "learning_rate": 0.0001914603409933284,
      "loss": 1.0875,
      "step": 295
    },
    {
      "epoch": 0.04385185185185185,
      "grad_norm": 0.9594345688819885,
      "learning_rate": 0.00019143068939955524,
      "loss": 1.4199,
      "step": 296
    },
    {
      "epoch": 0.044,
      "grad_norm": 1.0057716369628906,
      "learning_rate": 0.00019140103780578206,
      "loss": 1.3263,
      "step": 297
    },
    {
      "epoch": 0.044148148148148145,
      "grad_norm": 1.2575480937957764,
      "learning_rate": 0.0001913713862120089,
      "loss": 1.3083,
      "step": 298
    },
    {
      "epoch": 0.0442962962962963,
      "grad_norm": 0.9955440759658813,
      "learning_rate": 0.00019134173461823575,
      "loss": 1.3674,
      "step": 299
    },
    {
      "epoch": 0.044444444444444446,
      "grad_norm": 0.7672873139381409,
      "learning_rate": 0.00019131208302446256,
      "loss": 1.2488,
      "step": 300
    },
    {
      "epoch": 0.04459259259259259,
      "grad_norm": 0.950467050075531,
      "learning_rate": 0.0001912824314306894,
      "loss": 1.3096,
      "step": 301
    },
    {
      "epoch": 0.04474074074074074,
      "grad_norm": 1.3131276369094849,
      "learning_rate": 0.00019125277983691625,
      "loss": 1.0787,
      "step": 302
    },
    {
      "epoch": 0.04488888888888889,
      "grad_norm": 1.0086424350738525,
      "learning_rate": 0.00019122312824314307,
      "loss": 1.3342,
      "step": 303
    },
    {
      "epoch": 0.045037037037037035,
      "grad_norm": 1.195286750793457,
      "learning_rate": 0.00019119347664936991,
      "loss": 1.109,
      "step": 304
    },
    {
      "epoch": 0.04518518518518518,
      "grad_norm": 0.877375602722168,
      "learning_rate": 0.00019116382505559676,
      "loss": 1.3081,
      "step": 305
    },
    {
      "epoch": 0.04533333333333334,
      "grad_norm": 0.8676083087921143,
      "learning_rate": 0.00019113417346182358,
      "loss": 1.1603,
      "step": 306
    },
    {
      "epoch": 0.045481481481481484,
      "grad_norm": 1.1526929140090942,
      "learning_rate": 0.00019110452186805042,
      "loss": 1.1906,
      "step": 307
    },
    {
      "epoch": 0.04562962962962963,
      "grad_norm": 0.9462252259254456,
      "learning_rate": 0.00019107487027427726,
      "loss": 1.0573,
      "step": 308
    },
    {
      "epoch": 0.04577777777777778,
      "grad_norm": 0.7923760414123535,
      "learning_rate": 0.00019104521868050408,
      "loss": 1.2772,
      "step": 309
    },
    {
      "epoch": 0.045925925925925926,
      "grad_norm": 1.2533591985702515,
      "learning_rate": 0.00019101556708673093,
      "loss": 1.3882,
      "step": 310
    },
    {
      "epoch": 0.04607407407407407,
      "grad_norm": 0.8806333541870117,
      "learning_rate": 0.00019098591549295774,
      "loss": 1.0505,
      "step": 311
    },
    {
      "epoch": 0.04622222222222222,
      "grad_norm": 0.7805909514427185,
      "learning_rate": 0.0001909562638991846,
      "loss": 1.4897,
      "step": 312
    },
    {
      "epoch": 0.04637037037037037,
      "grad_norm": 0.812323272228241,
      "learning_rate": 0.00019092661230541143,
      "loss": 1.1407,
      "step": 313
    },
    {
      "epoch": 0.04651851851851852,
      "grad_norm": 1.0240209102630615,
      "learning_rate": 0.00019089696071163825,
      "loss": 1.2754,
      "step": 314
    },
    {
      "epoch": 0.04666666666666667,
      "grad_norm": 0.7502384781837463,
      "learning_rate": 0.0001908673091178651,
      "loss": 1.178,
      "step": 315
    },
    {
      "epoch": 0.046814814814814816,
      "grad_norm": 0.7480301856994629,
      "learning_rate": 0.00019083765752409194,
      "loss": 0.9225,
      "step": 316
    },
    {
      "epoch": 0.04696296296296296,
      "grad_norm": 0.8027053475379944,
      "learning_rate": 0.00019080800593031876,
      "loss": 1.2066,
      "step": 317
    },
    {
      "epoch": 0.04711111111111111,
      "grad_norm": 1.5919809341430664,
      "learning_rate": 0.0001907783543365456,
      "loss": 1.4634,
      "step": 318
    },
    {
      "epoch": 0.04725925925925926,
      "grad_norm": 0.9461173415184021,
      "learning_rate": 0.00019074870274277245,
      "loss": 1.2798,
      "step": 319
    },
    {
      "epoch": 0.047407407407407405,
      "grad_norm": 0.9098615050315857,
      "learning_rate": 0.00019071905114899926,
      "loss": 1.2508,
      "step": 320
    },
    {
      "epoch": 0.04755555555555555,
      "grad_norm": 2.0828206539154053,
      "learning_rate": 0.0001906893995552261,
      "loss": 1.0461,
      "step": 321
    },
    {
      "epoch": 0.047703703703703706,
      "grad_norm": 0.975915253162384,
      "learning_rate": 0.00019065974796145295,
      "loss": 1.0488,
      "step": 322
    },
    {
      "epoch": 0.047851851851851854,
      "grad_norm": 0.8300365209579468,
      "learning_rate": 0.00019063009636767977,
      "loss": 1.2716,
      "step": 323
    },
    {
      "epoch": 0.048,
      "grad_norm": 0.8969668745994568,
      "learning_rate": 0.0001906004447739066,
      "loss": 1.3268,
      "step": 324
    },
    {
      "epoch": 0.04814814814814815,
      "grad_norm": 0.7707619071006775,
      "learning_rate": 0.00019057079318013346,
      "loss": 1.1056,
      "step": 325
    },
    {
      "epoch": 0.048296296296296296,
      "grad_norm": 0.941278874874115,
      "learning_rate": 0.00019054114158636028,
      "loss": 1.2243,
      "step": 326
    },
    {
      "epoch": 0.04844444444444444,
      "grad_norm": 2.3351869583129883,
      "learning_rate": 0.0001905114899925871,
      "loss": 1.1401,
      "step": 327
    },
    {
      "epoch": 0.04859259259259259,
      "grad_norm": 0.9738073945045471,
      "learning_rate": 0.00019048183839881394,
      "loss": 1.3564,
      "step": 328
    },
    {
      "epoch": 0.048740740740740744,
      "grad_norm": 1.029203176498413,
      "learning_rate": 0.00019045218680504078,
      "loss": 1.2079,
      "step": 329
    },
    {
      "epoch": 0.04888888888888889,
      "grad_norm": 1.3182631731033325,
      "learning_rate": 0.0001904225352112676,
      "loss": 0.9833,
      "step": 330
    },
    {
      "epoch": 0.04903703703703704,
      "grad_norm": 0.7868254780769348,
      "learning_rate": 0.00019039288361749444,
      "loss": 1.0738,
      "step": 331
    },
    {
      "epoch": 0.049185185185185186,
      "grad_norm": 0.949598491191864,
      "learning_rate": 0.0001903632320237213,
      "loss": 1.2144,
      "step": 332
    },
    {
      "epoch": 0.04933333333333333,
      "grad_norm": 0.8689578175544739,
      "learning_rate": 0.0001903335804299481,
      "loss": 0.9466,
      "step": 333
    },
    {
      "epoch": 0.04948148148148148,
      "grad_norm": 0.6478769183158875,
      "learning_rate": 0.00019030392883617495,
      "loss": 1.1753,
      "step": 334
    },
    {
      "epoch": 0.04962962962962963,
      "grad_norm": 1.025097131729126,
      "learning_rate": 0.0001902742772424018,
      "loss": 1.4284,
      "step": 335
    },
    {
      "epoch": 0.049777777777777775,
      "grad_norm": 1.3263148069381714,
      "learning_rate": 0.0001902446256486286,
      "loss": 1.1652,
      "step": 336
    },
    {
      "epoch": 0.04992592592592593,
      "grad_norm": 0.79034423828125,
      "learning_rate": 0.00019021497405485546,
      "loss": 0.9998,
      "step": 337
    },
    {
      "epoch": 0.050074074074074076,
      "grad_norm": 0.8130358457565308,
      "learning_rate": 0.0001901853224610823,
      "loss": 1.2194,
      "step": 338
    },
    {
      "epoch": 0.050222222222222224,
      "grad_norm": 1.3800885677337646,
      "learning_rate": 0.00019015567086730912,
      "loss": 1.0348,
      "step": 339
    },
    {
      "epoch": 0.05037037037037037,
      "grad_norm": 0.9516430497169495,
      "learning_rate": 0.00019012601927353596,
      "loss": 1.354,
      "step": 340
    },
    {
      "epoch": 0.05051851851851852,
      "grad_norm": 0.8971577286720276,
      "learning_rate": 0.0001900963676797628,
      "loss": 1.0234,
      "step": 341
    },
    {
      "epoch": 0.050666666666666665,
      "grad_norm": 0.7143462896347046,
      "learning_rate": 0.00019006671608598962,
      "loss": 1.129,
      "step": 342
    },
    {
      "epoch": 0.05081481481481481,
      "grad_norm": 1.2593697309494019,
      "learning_rate": 0.00019003706449221647,
      "loss": 1.1932,
      "step": 343
    },
    {
      "epoch": 0.05096296296296296,
      "grad_norm": 1.0689709186553955,
      "learning_rate": 0.0001900074128984433,
      "loss": 0.9627,
      "step": 344
    },
    {
      "epoch": 0.051111111111111114,
      "grad_norm": 0.9543367028236389,
      "learning_rate": 0.00018997776130467013,
      "loss": 1.155,
      "step": 345
    },
    {
      "epoch": 0.05125925925925926,
      "grad_norm": 1.1440194845199585,
      "learning_rate": 0.00018994810971089695,
      "loss": 1.4789,
      "step": 346
    },
    {
      "epoch": 0.05140740740740741,
      "grad_norm": 0.8633337020874023,
      "learning_rate": 0.00018991845811712382,
      "loss": 1.3178,
      "step": 347
    },
    {
      "epoch": 0.051555555555555556,
      "grad_norm": 0.8850125670433044,
      "learning_rate": 0.00018988880652335064,
      "loss": 1.1834,
      "step": 348
    },
    {
      "epoch": 0.0517037037037037,
      "grad_norm": 0.9416225552558899,
      "learning_rate": 0.00018985915492957745,
      "loss": 1.1306,
      "step": 349
    },
    {
      "epoch": 0.05185185185185185,
      "grad_norm": 1.0465419292449951,
      "learning_rate": 0.00018982950333580433,
      "loss": 0.9964,
      "step": 350
    },
    {
      "epoch": 0.052,
      "grad_norm": 0.9418424367904663,
      "learning_rate": 0.00018979985174203114,
      "loss": 1.4032,
      "step": 351
    },
    {
      "epoch": 0.052148148148148145,
      "grad_norm": 1.0868874788284302,
      "learning_rate": 0.00018977020014825796,
      "loss": 1.2295,
      "step": 352
    },
    {
      "epoch": 0.0522962962962963,
      "grad_norm": 1.3639134168624878,
      "learning_rate": 0.00018974054855448483,
      "loss": 1.1505,
      "step": 353
    },
    {
      "epoch": 0.052444444444444446,
      "grad_norm": 0.7342366576194763,
      "learning_rate": 0.00018971089696071165,
      "loss": 1.072,
      "step": 354
    },
    {
      "epoch": 0.052592592592592594,
      "grad_norm": 1.0624979734420776,
      "learning_rate": 0.00018968124536693847,
      "loss": 1.3847,
      "step": 355
    },
    {
      "epoch": 0.05274074074074074,
      "grad_norm": 1.0965144634246826,
      "learning_rate": 0.00018965159377316534,
      "loss": 1.1309,
      "step": 356
    },
    {
      "epoch": 0.05288888888888889,
      "grad_norm": 2.761899948120117,
      "learning_rate": 0.00018962194217939216,
      "loss": 1.396,
      "step": 357
    },
    {
      "epoch": 0.053037037037037035,
      "grad_norm": 1.303941011428833,
      "learning_rate": 0.00018959229058561897,
      "loss": 1.3632,
      "step": 358
    },
    {
      "epoch": 0.05318518518518518,
      "grad_norm": 0.8362237215042114,
      "learning_rate": 0.00018956263899184584,
      "loss": 0.9956,
      "step": 359
    },
    {
      "epoch": 0.05333333333333334,
      "grad_norm": 0.8556802272796631,
      "learning_rate": 0.00018953298739807266,
      "loss": 1.3789,
      "step": 360
    },
    {
      "epoch": 0.053481481481481484,
      "grad_norm": 0.9617559313774109,
      "learning_rate": 0.00018950333580429948,
      "loss": 1.1837,
      "step": 361
    },
    {
      "epoch": 0.05362962962962963,
      "grad_norm": 1.0323243141174316,
      "learning_rate": 0.00018947368421052632,
      "loss": 1.3796,
      "step": 362
    },
    {
      "epoch": 0.05377777777777778,
      "grad_norm": 1.2637794017791748,
      "learning_rate": 0.00018944403261675317,
      "loss": 1.1701,
      "step": 363
    },
    {
      "epoch": 0.053925925925925926,
      "grad_norm": 0.9685412645339966,
      "learning_rate": 0.00018941438102297999,
      "loss": 1.3551,
      "step": 364
    },
    {
      "epoch": 0.05407407407407407,
      "grad_norm": 0.9208672046661377,
      "learning_rate": 0.00018938472942920683,
      "loss": 1.5171,
      "step": 365
    },
    {
      "epoch": 0.05422222222222222,
      "grad_norm": 0.9151871204376221,
      "learning_rate": 0.00018935507783543367,
      "loss": 1.1876,
      "step": 366
    },
    {
      "epoch": 0.05437037037037037,
      "grad_norm": 1.1648972034454346,
      "learning_rate": 0.0001893254262416605,
      "loss": 1.2662,
      "step": 367
    },
    {
      "epoch": 0.05451851851851852,
      "grad_norm": 0.9495143294334412,
      "learning_rate": 0.00018929577464788734,
      "loss": 1.2151,
      "step": 368
    },
    {
      "epoch": 0.05466666666666667,
      "grad_norm": 1.1094683408737183,
      "learning_rate": 0.00018926612305411418,
      "loss": 1.2581,
      "step": 369
    },
    {
      "epoch": 0.054814814814814816,
      "grad_norm": 1.038244605064392,
      "learning_rate": 0.000189236471460341,
      "loss": 1.2898,
      "step": 370
    },
    {
      "epoch": 0.05496296296296296,
      "grad_norm": 1.372955322265625,
      "learning_rate": 0.00018920681986656784,
      "loss": 1.2511,
      "step": 371
    },
    {
      "epoch": 0.05511111111111111,
      "grad_norm": 1.5707471370697021,
      "learning_rate": 0.0001891771682727947,
      "loss": 1.4273,
      "step": 372
    },
    {
      "epoch": 0.05525925925925926,
      "grad_norm": 0.8700966835021973,
      "learning_rate": 0.0001891475166790215,
      "loss": 1.2627,
      "step": 373
    },
    {
      "epoch": 0.055407407407407405,
      "grad_norm": 1.0520998239517212,
      "learning_rate": 0.00018911786508524835,
      "loss": 1.1392,
      "step": 374
    },
    {
      "epoch": 0.05555555555555555,
      "grad_norm": 1.1595327854156494,
      "learning_rate": 0.0001890882134914752,
      "loss": 1.2995,
      "step": 375
    },
    {
      "epoch": 0.05570370370370371,
      "grad_norm": 1.009542465209961,
      "learning_rate": 0.000189058561897702,
      "loss": 1.3476,
      "step": 376
    },
    {
      "epoch": 0.055851851851851854,
      "grad_norm": 1.0016463994979858,
      "learning_rate": 0.00018902891030392883,
      "loss": 1.2419,
      "step": 377
    },
    {
      "epoch": 0.056,
      "grad_norm": 0.843089759349823,
      "learning_rate": 0.00018899925871015567,
      "loss": 1.0925,
      "step": 378
    },
    {
      "epoch": 0.05614814814814815,
      "grad_norm": 1.0765223503112793,
      "learning_rate": 0.00018896960711638252,
      "loss": 1.4006,
      "step": 379
    },
    {
      "epoch": 0.056296296296296296,
      "grad_norm": 0.8268885612487793,
      "learning_rate": 0.00018893995552260933,
      "loss": 1.2053,
      "step": 380
    },
    {
      "epoch": 0.05644444444444444,
      "grad_norm": 1.0966840982437134,
      "learning_rate": 0.00018891030392883618,
      "loss": 1.0795,
      "step": 381
    },
    {
      "epoch": 0.05659259259259259,
      "grad_norm": 1.574753999710083,
      "learning_rate": 0.00018888065233506302,
      "loss": 1.0723,
      "step": 382
    },
    {
      "epoch": 0.05674074074074074,
      "grad_norm": 1.1856094598770142,
      "learning_rate": 0.00018885100074128984,
      "loss": 0.9495,
      "step": 383
    },
    {
      "epoch": 0.05688888888888889,
      "grad_norm": 0.9048572778701782,
      "learning_rate": 0.00018882134914751668,
      "loss": 1.1942,
      "step": 384
    },
    {
      "epoch": 0.05703703703703704,
      "grad_norm": 0.7475482225418091,
      "learning_rate": 0.00018879169755374353,
      "loss": 1.0352,
      "step": 385
    },
    {
      "epoch": 0.057185185185185186,
      "grad_norm": 1.051243543624878,
      "learning_rate": 0.00018876204595997035,
      "loss": 1.0815,
      "step": 386
    },
    {
      "epoch": 0.05733333333333333,
      "grad_norm": 0.8005509972572327,
      "learning_rate": 0.0001887323943661972,
      "loss": 1.1604,
      "step": 387
    },
    {
      "epoch": 0.05748148148148148,
      "grad_norm": 0.951156497001648,
      "learning_rate": 0.00018870274277242404,
      "loss": 1.2127,
      "step": 388
    },
    {
      "epoch": 0.05762962962962963,
      "grad_norm": 0.998855471611023,
      "learning_rate": 0.00018867309117865085,
      "loss": 1.1835,
      "step": 389
    },
    {
      "epoch": 0.057777777777777775,
      "grad_norm": 1.2722110748291016,
      "learning_rate": 0.0001886434395848777,
      "loss": 1.2818,
      "step": 390
    },
    {
      "epoch": 0.05792592592592593,
      "grad_norm": 1.2306925058364868,
      "learning_rate": 0.00018861378799110454,
      "loss": 1.0018,
      "step": 391
    },
    {
      "epoch": 0.05807407407407408,
      "grad_norm": 1.022120475769043,
      "learning_rate": 0.00018858413639733136,
      "loss": 1.1503,
      "step": 392
    },
    {
      "epoch": 0.058222222222222224,
      "grad_norm": 0.9722836017608643,
      "learning_rate": 0.0001885544848035582,
      "loss": 1.3875,
      "step": 393
    },
    {
      "epoch": 0.05837037037037037,
      "grad_norm": 1.0112863779067993,
      "learning_rate": 0.00018852483320978505,
      "loss": 1.2048,
      "step": 394
    },
    {
      "epoch": 0.05851851851851852,
      "grad_norm": 2.09438157081604,
      "learning_rate": 0.00018849518161601187,
      "loss": 1.3112,
      "step": 395
    },
    {
      "epoch": 0.058666666666666666,
      "grad_norm": 1.14156973361969,
      "learning_rate": 0.0001884655300222387,
      "loss": 1.2865,
      "step": 396
    },
    {
      "epoch": 0.05881481481481481,
      "grad_norm": 0.8264205455780029,
      "learning_rate": 0.00018843587842846553,
      "loss": 1.0833,
      "step": 397
    },
    {
      "epoch": 0.05896296296296296,
      "grad_norm": 0.8042988181114197,
      "learning_rate": 0.00018840622683469237,
      "loss": 1.2443,
      "step": 398
    },
    {
      "epoch": 0.059111111111111114,
      "grad_norm": 0.8299493789672852,
      "learning_rate": 0.00018837657524091922,
      "loss": 1.1525,
      "step": 399
    },
    {
      "epoch": 0.05925925925925926,
      "grad_norm": 0.8911139965057373,
      "learning_rate": 0.00018834692364714603,
      "loss": 1.1735,
      "step": 400
    },
    {
      "epoch": 0.05940740740740741,
      "grad_norm": 0.8134204149246216,
      "learning_rate": 0.00018831727205337288,
      "loss": 1.0875,
      "step": 401
    },
    {
      "epoch": 0.059555555555555556,
      "grad_norm": 1.022357702255249,
      "learning_rate": 0.00018828762045959972,
      "loss": 1.109,
      "step": 402
    },
    {
      "epoch": 0.0597037037037037,
      "grad_norm": 1.407626748085022,
      "learning_rate": 0.00018825796886582654,
      "loss": 1.232,
      "step": 403
    },
    {
      "epoch": 0.05985185185185185,
      "grad_norm": 2.914661407470703,
      "learning_rate": 0.00018822831727205338,
      "loss": 1.2303,
      "step": 404
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.400601863861084,
      "learning_rate": 0.00018819866567828023,
      "loss": 1.311,
      "step": 405
    },
    {
      "epoch": 0.060148148148148145,
      "grad_norm": 0.8387115001678467,
      "learning_rate": 0.00018816901408450705,
      "loss": 1.2802,
      "step": 406
    },
    {
      "epoch": 0.0602962962962963,
      "grad_norm": 0.7756436467170715,
      "learning_rate": 0.0001881393624907339,
      "loss": 1.2511,
      "step": 407
    },
    {
      "epoch": 0.060444444444444446,
      "grad_norm": 0.9221392273902893,
      "learning_rate": 0.00018810971089696073,
      "loss": 1.0606,
      "step": 408
    },
    {
      "epoch": 0.060592592592592594,
      "grad_norm": 2.785463571548462,
      "learning_rate": 0.00018808005930318755,
      "loss": 1.0115,
      "step": 409
    },
    {
      "epoch": 0.06074074074074074,
      "grad_norm": 1.5348591804504395,
      "learning_rate": 0.0001880504077094144,
      "loss": 1.2014,
      "step": 410
    },
    {
      "epoch": 0.06088888888888889,
      "grad_norm": 0.8930820822715759,
      "learning_rate": 0.00018802075611564124,
      "loss": 1.1861,
      "step": 411
    },
    {
      "epoch": 0.061037037037037035,
      "grad_norm": 0.9898925423622131,
      "learning_rate": 0.00018799110452186806,
      "loss": 1.2266,
      "step": 412
    },
    {
      "epoch": 0.06118518518518518,
      "grad_norm": 1.4079804420471191,
      "learning_rate": 0.00018796145292809488,
      "loss": 1.0795,
      "step": 413
    },
    {
      "epoch": 0.06133333333333333,
      "grad_norm": 0.7314396500587463,
      "learning_rate": 0.00018793180133432172,
      "loss": 1.2817,
      "step": 414
    },
    {
      "epoch": 0.061481481481481484,
      "grad_norm": 1.0503140687942505,
      "learning_rate": 0.00018790214974054856,
      "loss": 1.3144,
      "step": 415
    },
    {
      "epoch": 0.06162962962962963,
      "grad_norm": 1.046270728111267,
      "learning_rate": 0.00018787249814677538,
      "loss": 1.0213,
      "step": 416
    },
    {
      "epoch": 0.06177777777777778,
      "grad_norm": 0.6629498600959778,
      "learning_rate": 0.00018784284655300223,
      "loss": 1.1684,
      "step": 417
    },
    {
      "epoch": 0.061925925925925926,
      "grad_norm": 1.0851155519485474,
      "learning_rate": 0.00018781319495922907,
      "loss": 1.2505,
      "step": 418
    },
    {
      "epoch": 0.06207407407407407,
      "grad_norm": 1.239920973777771,
      "learning_rate": 0.0001877835433654559,
      "loss": 1.0287,
      "step": 419
    },
    {
      "epoch": 0.06222222222222222,
      "grad_norm": 0.9222789406776428,
      "learning_rate": 0.00018775389177168273,
      "loss": 1.4088,
      "step": 420
    },
    {
      "epoch": 0.06237037037037037,
      "grad_norm": 0.8636415600776672,
      "learning_rate": 0.00018772424017790958,
      "loss": 1.1611,
      "step": 421
    },
    {
      "epoch": 0.06251851851851851,
      "grad_norm": 1.038588047027588,
      "learning_rate": 0.0001876945885841364,
      "loss": 1.2972,
      "step": 422
    },
    {
      "epoch": 0.06266666666666666,
      "grad_norm": 1.2301207780838013,
      "learning_rate": 0.00018766493699036324,
      "loss": 1.2349,
      "step": 423
    },
    {
      "epoch": 0.06281481481481481,
      "grad_norm": 0.7099899053573608,
      "learning_rate": 0.00018763528539659008,
      "loss": 1.1068,
      "step": 424
    },
    {
      "epoch": 0.06296296296296296,
      "grad_norm": 1.002394676208496,
      "learning_rate": 0.0001876056338028169,
      "loss": 1.3357,
      "step": 425
    },
    {
      "epoch": 0.06311111111111112,
      "grad_norm": 0.8066929578781128,
      "learning_rate": 0.00018757598220904375,
      "loss": 1.6873,
      "step": 426
    },
    {
      "epoch": 0.06325925925925927,
      "grad_norm": 0.9624059200286865,
      "learning_rate": 0.0001875463306152706,
      "loss": 1.137,
      "step": 427
    },
    {
      "epoch": 0.06340740740740741,
      "grad_norm": 2.6266281604766846,
      "learning_rate": 0.0001875166790214974,
      "loss": 1.216,
      "step": 428
    },
    {
      "epoch": 0.06355555555555556,
      "grad_norm": 1.0235140323638916,
      "learning_rate": 0.00018748702742772425,
      "loss": 1.3116,
      "step": 429
    },
    {
      "epoch": 0.0637037037037037,
      "grad_norm": 1.022839903831482,
      "learning_rate": 0.0001874573758339511,
      "loss": 1.3446,
      "step": 430
    },
    {
      "epoch": 0.06385185185185185,
      "grad_norm": 0.9741849303245544,
      "learning_rate": 0.00018742772424017791,
      "loss": 1.0908,
      "step": 431
    },
    {
      "epoch": 0.064,
      "grad_norm": 0.7452192306518555,
      "learning_rate": 0.00018739807264640473,
      "loss": 1.0021,
      "step": 432
    },
    {
      "epoch": 0.06414814814814815,
      "grad_norm": 0.8392693400382996,
      "learning_rate": 0.0001873684210526316,
      "loss": 1.6745,
      "step": 433
    },
    {
      "epoch": 0.0642962962962963,
      "grad_norm": 0.937454342842102,
      "learning_rate": 0.00018733876945885842,
      "loss": 1.3547,
      "step": 434
    },
    {
      "epoch": 0.06444444444444444,
      "grad_norm": 4.195993423461914,
      "learning_rate": 0.00018730911786508524,
      "loss": 1.2984,
      "step": 435
    },
    {
      "epoch": 0.06459259259259259,
      "grad_norm": 0.9298253059387207,
      "learning_rate": 0.0001872794662713121,
      "loss": 1.1374,
      "step": 436
    },
    {
      "epoch": 0.06474074074074074,
      "grad_norm": 1.1820027828216553,
      "learning_rate": 0.00018724981467753893,
      "loss": 1.169,
      "step": 437
    },
    {
      "epoch": 0.06488888888888888,
      "grad_norm": 0.7461003065109253,
      "learning_rate": 0.00018722016308376574,
      "loss": 1.1656,
      "step": 438
    },
    {
      "epoch": 0.06503703703703703,
      "grad_norm": 0.9983235001564026,
      "learning_rate": 0.00018719051148999262,
      "loss": 1.3256,
      "step": 439
    },
    {
      "epoch": 0.06518518518518518,
      "grad_norm": 1.3242130279541016,
      "learning_rate": 0.00018716085989621943,
      "loss": 1.0237,
      "step": 440
    },
    {
      "epoch": 0.06533333333333333,
      "grad_norm": 1.371294617652893,
      "learning_rate": 0.00018713120830244625,
      "loss": 1.1403,
      "step": 441
    },
    {
      "epoch": 0.06548148148148149,
      "grad_norm": 1.2605317831039429,
      "learning_rate": 0.00018710155670867312,
      "loss": 1.1972,
      "step": 442
    },
    {
      "epoch": 0.06562962962962963,
      "grad_norm": 1.1461695432662964,
      "learning_rate": 0.00018707190511489994,
      "loss": 1.3529,
      "step": 443
    },
    {
      "epoch": 0.06577777777777778,
      "grad_norm": 0.8331106901168823,
      "learning_rate": 0.00018704225352112676,
      "loss": 1.1217,
      "step": 444
    },
    {
      "epoch": 0.06592592592592593,
      "grad_norm": 1.2743287086486816,
      "learning_rate": 0.00018701260192735363,
      "loss": 1.2748,
      "step": 445
    },
    {
      "epoch": 0.06607407407407408,
      "grad_norm": 1.6404948234558105,
      "learning_rate": 0.00018698295033358044,
      "loss": 1.1709,
      "step": 446
    },
    {
      "epoch": 0.06622222222222222,
      "grad_norm": 1.4575682878494263,
      "learning_rate": 0.00018695329873980726,
      "loss": 1.1936,
      "step": 447
    },
    {
      "epoch": 0.06637037037037037,
      "grad_norm": 1.053147315979004,
      "learning_rate": 0.0001869236471460341,
      "loss": 1.0561,
      "step": 448
    },
    {
      "epoch": 0.06651851851851852,
      "grad_norm": 1.0501112937927246,
      "learning_rate": 0.00018689399555226095,
      "loss": 1.1234,
      "step": 449
    },
    {
      "epoch": 0.06666666666666667,
      "grad_norm": 0.9422358274459839,
      "learning_rate": 0.00018686434395848777,
      "loss": 1.1992,
      "step": 450
    },
    {
      "epoch": 0.06681481481481481,
      "grad_norm": 1.2424194812774658,
      "learning_rate": 0.0001868346923647146,
      "loss": 1.1539,
      "step": 451
    },
    {
      "epoch": 0.06696296296296296,
      "grad_norm": 0.9493004083633423,
      "learning_rate": 0.00018680504077094146,
      "loss": 1.0244,
      "step": 452
    },
    {
      "epoch": 0.06711111111111111,
      "grad_norm": 1.1930363178253174,
      "learning_rate": 0.00018677538917716827,
      "loss": 0.9746,
      "step": 453
    },
    {
      "epoch": 0.06725925925925925,
      "grad_norm": 0.8539004921913147,
      "learning_rate": 0.00018674573758339512,
      "loss": 1.1283,
      "step": 454
    },
    {
      "epoch": 0.0674074074074074,
      "grad_norm": 1.1996976137161255,
      "learning_rate": 0.00018671608598962196,
      "loss": 1.3479,
      "step": 455
    },
    {
      "epoch": 0.06755555555555555,
      "grad_norm": 1.668487548828125,
      "learning_rate": 0.00018668643439584878,
      "loss": 1.1042,
      "step": 456
    },
    {
      "epoch": 0.06770370370370371,
      "grad_norm": 0.8974586129188538,
      "learning_rate": 0.00018665678280207563,
      "loss": 1.2239,
      "step": 457
    },
    {
      "epoch": 0.06785185185185186,
      "grad_norm": 0.8592154383659363,
      "learning_rate": 0.00018662713120830247,
      "loss": 0.924,
      "step": 458
    },
    {
      "epoch": 0.068,
      "grad_norm": 1.0311380624771118,
      "learning_rate": 0.0001865974796145293,
      "loss": 1.1042,
      "step": 459
    },
    {
      "epoch": 0.06814814814814815,
      "grad_norm": 0.8845215439796448,
      "learning_rate": 0.00018656782802075613,
      "loss": 1.0193,
      "step": 460
    },
    {
      "epoch": 0.0682962962962963,
      "grad_norm": 0.8208663463592529,
      "learning_rate": 0.00018653817642698298,
      "loss": 1.0014,
      "step": 461
    },
    {
      "epoch": 0.06844444444444445,
      "grad_norm": 1.7937710285186768,
      "learning_rate": 0.0001865085248332098,
      "loss": 1.0882,
      "step": 462
    },
    {
      "epoch": 0.0685925925925926,
      "grad_norm": 0.9765856266021729,
      "learning_rate": 0.0001864788732394366,
      "loss": 1.1428,
      "step": 463
    },
    {
      "epoch": 0.06874074074074074,
      "grad_norm": 1.0087882280349731,
      "learning_rate": 0.00018644922164566348,
      "loss": 1.1982,
      "step": 464
    },
    {
      "epoch": 0.06888888888888889,
      "grad_norm": 1.0147068500518799,
      "learning_rate": 0.0001864195700518903,
      "loss": 1.1694,
      "step": 465
    },
    {
      "epoch": 0.06903703703703704,
      "grad_norm": 0.9086894392967224,
      "learning_rate": 0.00018638991845811712,
      "loss": 1.1188,
      "step": 466
    },
    {
      "epoch": 0.06918518518518518,
      "grad_norm": 0.7124015092849731,
      "learning_rate": 0.00018636026686434396,
      "loss": 1.1703,
      "step": 467
    },
    {
      "epoch": 0.06933333333333333,
      "grad_norm": 1.072105884552002,
      "learning_rate": 0.0001863306152705708,
      "loss": 1.2042,
      "step": 468
    },
    {
      "epoch": 0.06948148148148148,
      "grad_norm": 1.7827683687210083,
      "learning_rate": 0.00018630096367679762,
      "loss": 1.1303,
      "step": 469
    },
    {
      "epoch": 0.06962962962962962,
      "grad_norm": 1.5798730850219727,
      "learning_rate": 0.00018627131208302447,
      "loss": 1.0927,
      "step": 470
    },
    {
      "epoch": 0.06977777777777777,
      "grad_norm": 0.8328156471252441,
      "learning_rate": 0.0001862416604892513,
      "loss": 1.1426,
      "step": 471
    },
    {
      "epoch": 0.06992592592592592,
      "grad_norm": 0.8577338457107544,
      "learning_rate": 0.00018621200889547813,
      "loss": 1.2541,
      "step": 472
    },
    {
      "epoch": 0.07007407407407408,
      "grad_norm": 0.9907087087631226,
      "learning_rate": 0.00018618235730170497,
      "loss": 1.2424,
      "step": 473
    },
    {
      "epoch": 0.07022222222222223,
      "grad_norm": 0.7621241211891174,
      "learning_rate": 0.00018615270570793182,
      "loss": 1.1052,
      "step": 474
    },
    {
      "epoch": 0.07037037037037037,
      "grad_norm": 0.7610787153244019,
      "learning_rate": 0.00018612305411415864,
      "loss": 1.1561,
      "step": 475
    },
    {
      "epoch": 0.07051851851851852,
      "grad_norm": 1.0455390214920044,
      "learning_rate": 0.00018609340252038548,
      "loss": 1.1155,
      "step": 476
    },
    {
      "epoch": 0.07066666666666667,
      "grad_norm": 0.9702383279800415,
      "learning_rate": 0.00018606375092661233,
      "loss": 1.5349,
      "step": 477
    },
    {
      "epoch": 0.07081481481481482,
      "grad_norm": 0.9245896935462952,
      "learning_rate": 0.00018603409933283914,
      "loss": 1.629,
      "step": 478
    },
    {
      "epoch": 0.07096296296296296,
      "grad_norm": 0.9411025643348694,
      "learning_rate": 0.000186004447739066,
      "loss": 1.2111,
      "step": 479
    },
    {
      "epoch": 0.07111111111111111,
      "grad_norm": 3.217911958694458,
      "learning_rate": 0.00018597479614529283,
      "loss": 1.4453,
      "step": 480
    },
    {
      "epoch": 0.07125925925925926,
      "grad_norm": 0.8464000821113586,
      "learning_rate": 0.00018594514455151965,
      "loss": 1.131,
      "step": 481
    },
    {
      "epoch": 0.0714074074074074,
      "grad_norm": 6.36705207824707,
      "learning_rate": 0.0001859154929577465,
      "loss": 1.0089,
      "step": 482
    },
    {
      "epoch": 0.07155555555555555,
      "grad_norm": 0.8953663110733032,
      "learning_rate": 0.0001858858413639733,
      "loss": 1.2024,
      "step": 483
    },
    {
      "epoch": 0.0717037037037037,
      "grad_norm": 1.0531768798828125,
      "learning_rate": 0.00018585618977020015,
      "loss": 1.4019,
      "step": 484
    },
    {
      "epoch": 0.07185185185185185,
      "grad_norm": 2.170644998550415,
      "learning_rate": 0.000185826538176427,
      "loss": 1.376,
      "step": 485
    },
    {
      "epoch": 0.072,
      "grad_norm": 0.6142813563346863,
      "learning_rate": 0.00018579688658265382,
      "loss": 0.925,
      "step": 486
    },
    {
      "epoch": 0.07214814814814814,
      "grad_norm": 0.7336257100105286,
      "learning_rate": 0.00018576723498888066,
      "loss": 1.1232,
      "step": 487
    },
    {
      "epoch": 0.0722962962962963,
      "grad_norm": 0.6803194880485535,
      "learning_rate": 0.0001857375833951075,
      "loss": 1.1407,
      "step": 488
    },
    {
      "epoch": 0.07244444444444445,
      "grad_norm": 0.8505472540855408,
      "learning_rate": 0.00018570793180133432,
      "loss": 1.318,
      "step": 489
    },
    {
      "epoch": 0.0725925925925926,
      "grad_norm": 0.8538836240768433,
      "learning_rate": 0.00018567828020756117,
      "loss": 1.1678,
      "step": 490
    },
    {
      "epoch": 0.07274074074074074,
      "grad_norm": 0.8287623524665833,
      "learning_rate": 0.000185648628613788,
      "loss": 1.2591,
      "step": 491
    },
    {
      "epoch": 0.07288888888888889,
      "grad_norm": 1.3350564241409302,
      "learning_rate": 0.00018561897702001483,
      "loss": 1.4278,
      "step": 492
    },
    {
      "epoch": 0.07303703703703704,
      "grad_norm": 0.992910623550415,
      "learning_rate": 0.00018558932542624167,
      "loss": 1.2179,
      "step": 493
    },
    {
      "epoch": 0.07318518518518519,
      "grad_norm": 2.1098670959472656,
      "learning_rate": 0.00018555967383246852,
      "loss": 1.311,
      "step": 494
    },
    {
      "epoch": 0.07333333333333333,
      "grad_norm": 0.7079705595970154,
      "learning_rate": 0.00018553002223869534,
      "loss": 1.0287,
      "step": 495
    },
    {
      "epoch": 0.07348148148148148,
      "grad_norm": 0.9057304859161377,
      "learning_rate": 0.00018550037064492218,
      "loss": 1.2524,
      "step": 496
    },
    {
      "epoch": 0.07362962962962963,
      "grad_norm": 0.936161458492279,
      "learning_rate": 0.00018547071905114902,
      "loss": 1.1956,
      "step": 497
    },
    {
      "epoch": 0.07377777777777778,
      "grad_norm": 1.3111516237258911,
      "learning_rate": 0.00018544106745737584,
      "loss": 1.2858,
      "step": 498
    },
    {
      "epoch": 0.07392592592592592,
      "grad_norm": 0.8875294923782349,
      "learning_rate": 0.00018541141586360266,
      "loss": 1.0537,
      "step": 499
    },
    {
      "epoch": 0.07407407407407407,
      "grad_norm": 1.14637291431427,
      "learning_rate": 0.0001853817642698295,
      "loss": 1.4708,
      "step": 500
    },
    {
      "epoch": 0.07422222222222222,
      "grad_norm": 0.8916139006614685,
      "learning_rate": 0.00018535211267605635,
      "loss": 1.1583,
      "step": 501
    },
    {
      "epoch": 0.07437037037037036,
      "grad_norm": 0.798608124256134,
      "learning_rate": 0.00018532246108228317,
      "loss": 1.4402,
      "step": 502
    },
    {
      "epoch": 0.07451851851851851,
      "grad_norm": 0.8326433897018433,
      "learning_rate": 0.00018529280948851,
      "loss": 1.3123,
      "step": 503
    },
    {
      "epoch": 0.07466666666666667,
      "grad_norm": 1.6419179439544678,
      "learning_rate": 0.00018526315789473685,
      "loss": 1.1254,
      "step": 504
    },
    {
      "epoch": 0.07481481481481482,
      "grad_norm": 0.7575996518135071,
      "learning_rate": 0.00018523350630096367,
      "loss": 1.3618,
      "step": 505
    },
    {
      "epoch": 0.07496296296296297,
      "grad_norm": 0.8234692215919495,
      "learning_rate": 0.00018520385470719052,
      "loss": 1.1657,
      "step": 506
    },
    {
      "epoch": 0.07511111111111111,
      "grad_norm": 0.9207050204277039,
      "learning_rate": 0.00018517420311341736,
      "loss": 1.4156,
      "step": 507
    },
    {
      "epoch": 0.07525925925925926,
      "grad_norm": 0.9526640772819519,
      "learning_rate": 0.00018514455151964418,
      "loss": 1.2533,
      "step": 508
    },
    {
      "epoch": 0.07540740740740741,
      "grad_norm": 1.0630484819412231,
      "learning_rate": 0.00018511489992587102,
      "loss": 1.187,
      "step": 509
    },
    {
      "epoch": 0.07555555555555556,
      "grad_norm": 1.4691499471664429,
      "learning_rate": 0.00018508524833209787,
      "loss": 1.4744,
      "step": 510
    },
    {
      "epoch": 0.0757037037037037,
      "grad_norm": 0.8877632021903992,
      "learning_rate": 0.00018505559673832468,
      "loss": 1.3136,
      "step": 511
    },
    {
      "epoch": 0.07585185185185185,
      "grad_norm": 0.9979971051216125,
      "learning_rate": 0.00018502594514455153,
      "loss": 1.0359,
      "step": 512
    },
    {
      "epoch": 0.076,
      "grad_norm": 0.7808818817138672,
      "learning_rate": 0.00018499629355077837,
      "loss": 1.1469,
      "step": 513
    },
    {
      "epoch": 0.07614814814814815,
      "grad_norm": 0.7668399810791016,
      "learning_rate": 0.0001849666419570052,
      "loss": 0.9518,
      "step": 514
    },
    {
      "epoch": 0.07629629629629629,
      "grad_norm": 0.6751396059989929,
      "learning_rate": 0.00018493699036323204,
      "loss": 1.0435,
      "step": 515
    },
    {
      "epoch": 0.07644444444444444,
      "grad_norm": 2.380500316619873,
      "learning_rate": 0.00018490733876945888,
      "loss": 1.2916,
      "step": 516
    },
    {
      "epoch": 0.07659259259259259,
      "grad_norm": 1.5779838562011719,
      "learning_rate": 0.0001848776871756857,
      "loss": 1.1652,
      "step": 517
    },
    {
      "epoch": 0.07674074074074073,
      "grad_norm": 0.8845908045768738,
      "learning_rate": 0.00018484803558191251,
      "loss": 1.3284,
      "step": 518
    },
    {
      "epoch": 0.0768888888888889,
      "grad_norm": 0.826222836971283,
      "learning_rate": 0.00018481838398813939,
      "loss": 0.9343,
      "step": 519
    },
    {
      "epoch": 0.07703703703703704,
      "grad_norm": 0.7673617601394653,
      "learning_rate": 0.0001847887323943662,
      "loss": 1.0863,
      "step": 520
    },
    {
      "epoch": 0.07718518518518519,
      "grad_norm": 0.8270271420478821,
      "learning_rate": 0.00018475908080059302,
      "loss": 1.3003,
      "step": 521
    },
    {
      "epoch": 0.07733333333333334,
      "grad_norm": 1.4127448797225952,
      "learning_rate": 0.0001847294292068199,
      "loss": 1.3734,
      "step": 522
    },
    {
      "epoch": 0.07748148148148148,
      "grad_norm": 0.8243331909179688,
      "learning_rate": 0.0001846997776130467,
      "loss": 1.1697,
      "step": 523
    },
    {
      "epoch": 0.07762962962962963,
      "grad_norm": 0.7939732670783997,
      "learning_rate": 0.00018467012601927353,
      "loss": 1.3484,
      "step": 524
    },
    {
      "epoch": 0.07777777777777778,
      "grad_norm": 1.120720386505127,
      "learning_rate": 0.0001846404744255004,
      "loss": 1.1484,
      "step": 525
    },
    {
      "epoch": 0.07792592592592593,
      "grad_norm": 0.8500152230262756,
      "learning_rate": 0.00018461082283172722,
      "loss": 1.4174,
      "step": 526
    },
    {
      "epoch": 0.07807407407407407,
      "grad_norm": 0.9206627011299133,
      "learning_rate": 0.00018458117123795403,
      "loss": 1.4041,
      "step": 527
    },
    {
      "epoch": 0.07822222222222222,
      "grad_norm": 0.8654802441596985,
      "learning_rate": 0.0001845515196441809,
      "loss": 1.4201,
      "step": 528
    },
    {
      "epoch": 0.07837037037037037,
      "grad_norm": 0.9089827537536621,
      "learning_rate": 0.00018452186805040772,
      "loss": 1.3494,
      "step": 529
    },
    {
      "epoch": 0.07851851851851852,
      "grad_norm": 0.851271390914917,
      "learning_rate": 0.00018449221645663454,
      "loss": 1.1615,
      "step": 530
    },
    {
      "epoch": 0.07866666666666666,
      "grad_norm": 0.9235059022903442,
      "learning_rate": 0.0001844625648628614,
      "loss": 1.1841,
      "step": 531
    },
    {
      "epoch": 0.07881481481481481,
      "grad_norm": 1.1651939153671265,
      "learning_rate": 0.00018443291326908823,
      "loss": 1.1423,
      "step": 532
    },
    {
      "epoch": 0.07896296296296296,
      "grad_norm": 1.5201992988586426,
      "learning_rate": 0.00018440326167531505,
      "loss": 1.0807,
      "step": 533
    },
    {
      "epoch": 0.0791111111111111,
      "grad_norm": 0.9012873768806458,
      "learning_rate": 0.0001843736100815419,
      "loss": 1.0604,
      "step": 534
    },
    {
      "epoch": 0.07925925925925927,
      "grad_norm": 0.9370338320732117,
      "learning_rate": 0.00018434395848776873,
      "loss": 1.033,
      "step": 535
    },
    {
      "epoch": 0.07940740740740741,
      "grad_norm": 1.0847934484481812,
      "learning_rate": 0.00018431430689399555,
      "loss": 1.2449,
      "step": 536
    },
    {
      "epoch": 0.07955555555555556,
      "grad_norm": 1.0478984117507935,
      "learning_rate": 0.0001842846553002224,
      "loss": 1.2035,
      "step": 537
    },
    {
      "epoch": 0.07970370370370371,
      "grad_norm": 0.9668722152709961,
      "learning_rate": 0.00018425500370644924,
      "loss": 1.1051,
      "step": 538
    },
    {
      "epoch": 0.07985185185185185,
      "grad_norm": 1.078900933265686,
      "learning_rate": 0.00018422535211267606,
      "loss": 1.1831,
      "step": 539
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2907154560089111,
      "learning_rate": 0.0001841957005189029,
      "loss": 1.2905,
      "step": 540
    },
    {
      "epoch": 0.08014814814814815,
      "grad_norm": 0.8596047759056091,
      "learning_rate": 0.00018416604892512975,
      "loss": 1.0783,
      "step": 541
    },
    {
      "epoch": 0.0802962962962963,
      "grad_norm": 1.0501773357391357,
      "learning_rate": 0.00018413639733135656,
      "loss": 1.2523,
      "step": 542
    },
    {
      "epoch": 0.08044444444444444,
      "grad_norm": 1.099936842918396,
      "learning_rate": 0.0001841067457375834,
      "loss": 1.0595,
      "step": 543
    },
    {
      "epoch": 0.08059259259259259,
      "grad_norm": 2.2023308277130127,
      "learning_rate": 0.00018407709414381025,
      "loss": 1.1073,
      "step": 544
    },
    {
      "epoch": 0.08074074074074074,
      "grad_norm": 1.065362572669983,
      "learning_rate": 0.00018404744255003707,
      "loss": 1.1908,
      "step": 545
    },
    {
      "epoch": 0.08088888888888889,
      "grad_norm": 1.262590765953064,
      "learning_rate": 0.00018401779095626392,
      "loss": 1.1085,
      "step": 546
    },
    {
      "epoch": 0.08103703703703703,
      "grad_norm": 2.105292558670044,
      "learning_rate": 0.00018398813936249076,
      "loss": 1.1783,
      "step": 547
    },
    {
      "epoch": 0.08118518518518518,
      "grad_norm": 0.8273268938064575,
      "learning_rate": 0.00018395848776871758,
      "loss": 1.3093,
      "step": 548
    },
    {
      "epoch": 0.08133333333333333,
      "grad_norm": 0.870421826839447,
      "learning_rate": 0.0001839288361749444,
      "loss": 1.2127,
      "step": 549
    },
    {
      "epoch": 0.08148148148148149,
      "grad_norm": 0.9928128123283386,
      "learning_rate": 0.00018389918458117127,
      "loss": 1.2102,
      "step": 550
    },
    {
      "epoch": 0.08162962962962964,
      "grad_norm": 1.2662063837051392,
      "learning_rate": 0.00018386953298739808,
      "loss": 1.2983,
      "step": 551
    },
    {
      "epoch": 0.08177777777777778,
      "grad_norm": 0.717269241809845,
      "learning_rate": 0.0001838398813936249,
      "loss": 1.0745,
      "step": 552
    },
    {
      "epoch": 0.08192592592592593,
      "grad_norm": 0.8523573875427246,
      "learning_rate": 0.00018381022979985175,
      "loss": 1.2344,
      "step": 553
    },
    {
      "epoch": 0.08207407407407408,
      "grad_norm": 0.8732525110244751,
      "learning_rate": 0.0001837805782060786,
      "loss": 1.1399,
      "step": 554
    },
    {
      "epoch": 0.08222222222222222,
      "grad_norm": 0.8165760636329651,
      "learning_rate": 0.0001837509266123054,
      "loss": 1.2417,
      "step": 555
    },
    {
      "epoch": 0.08237037037037037,
      "grad_norm": 0.7446961998939514,
      "learning_rate": 0.00018372127501853225,
      "loss": 1.0925,
      "step": 556
    },
    {
      "epoch": 0.08251851851851852,
      "grad_norm": 0.8104601502418518,
      "learning_rate": 0.0001836916234247591,
      "loss": 1.2704,
      "step": 557
    },
    {
      "epoch": 0.08266666666666667,
      "grad_norm": 0.7920854091644287,
      "learning_rate": 0.0001836619718309859,
      "loss": 1.047,
      "step": 558
    },
    {
      "epoch": 0.08281481481481481,
      "grad_norm": 1.0525939464569092,
      "learning_rate": 0.00018363232023721276,
      "loss": 1.3214,
      "step": 559
    },
    {
      "epoch": 0.08296296296296296,
      "grad_norm": 0.9023261666297913,
      "learning_rate": 0.0001836026686434396,
      "loss": 1.059,
      "step": 560
    },
    {
      "epoch": 0.08311111111111111,
      "grad_norm": 1.2181390523910522,
      "learning_rate": 0.00018357301704966642,
      "loss": 1.1573,
      "step": 561
    },
    {
      "epoch": 0.08325925925925926,
      "grad_norm": 1.012555480003357,
      "learning_rate": 0.00018354336545589326,
      "loss": 1.2668,
      "step": 562
    },
    {
      "epoch": 0.0834074074074074,
      "grad_norm": 1.155202865600586,
      "learning_rate": 0.0001835137138621201,
      "loss": 1.1001,
      "step": 563
    },
    {
      "epoch": 0.08355555555555555,
      "grad_norm": 0.9464355707168579,
      "learning_rate": 0.00018348406226834693,
      "loss": 1.0413,
      "step": 564
    },
    {
      "epoch": 0.0837037037037037,
      "grad_norm": 0.8857262134552002,
      "learning_rate": 0.00018345441067457377,
      "loss": 1.243,
      "step": 565
    },
    {
      "epoch": 0.08385185185185186,
      "grad_norm": 0.9362701773643494,
      "learning_rate": 0.00018342475908080061,
      "loss": 1.2346,
      "step": 566
    },
    {
      "epoch": 0.084,
      "grad_norm": 0.7799801826477051,
      "learning_rate": 0.00018339510748702743,
      "loss": 0.9897,
      "step": 567
    },
    {
      "epoch": 0.08414814814814815,
      "grad_norm": 0.8452311754226685,
      "learning_rate": 0.00018336545589325428,
      "loss": 1.1353,
      "step": 568
    },
    {
      "epoch": 0.0842962962962963,
      "grad_norm": 0.8381460309028625,
      "learning_rate": 0.0001833358042994811,
      "loss": 1.3495,
      "step": 569
    },
    {
      "epoch": 0.08444444444444445,
      "grad_norm": 0.8649606108665466,
      "learning_rate": 0.00018330615270570794,
      "loss": 1.0649,
      "step": 570
    },
    {
      "epoch": 0.0845925925925926,
      "grad_norm": 1.4266537427902222,
      "learning_rate": 0.00018327650111193478,
      "loss": 1.2434,
      "step": 571
    },
    {
      "epoch": 0.08474074074074074,
      "grad_norm": 1.5174586772918701,
      "learning_rate": 0.0001832468495181616,
      "loss": 0.9991,
      "step": 572
    },
    {
      "epoch": 0.08488888888888889,
      "grad_norm": 0.9220426082611084,
      "learning_rate": 0.00018321719792438844,
      "loss": 1.1843,
      "step": 573
    },
    {
      "epoch": 0.08503703703703704,
      "grad_norm": 1.0861490964889526,
      "learning_rate": 0.0001831875463306153,
      "loss": 1.1983,
      "step": 574
    },
    {
      "epoch": 0.08518518518518518,
      "grad_norm": 1.193626046180725,
      "learning_rate": 0.0001831578947368421,
      "loss": 1.0819,
      "step": 575
    },
    {
      "epoch": 0.08533333333333333,
      "grad_norm": 1.0166492462158203,
      "learning_rate": 0.00018312824314306895,
      "loss": 1.1051,
      "step": 576
    },
    {
      "epoch": 0.08548148148148148,
      "grad_norm": 1.6874605417251587,
      "learning_rate": 0.0001830985915492958,
      "loss": 1.3142,
      "step": 577
    },
    {
      "epoch": 0.08562962962962962,
      "grad_norm": 0.8959518074989319,
      "learning_rate": 0.0001830689399555226,
      "loss": 1.1314,
      "step": 578
    },
    {
      "epoch": 0.08577777777777777,
      "grad_norm": 0.89152592420578,
      "learning_rate": 0.00018303928836174946,
      "loss": 1.1101,
      "step": 579
    },
    {
      "epoch": 0.08592592592592592,
      "grad_norm": 0.9080824851989746,
      "learning_rate": 0.0001830096367679763,
      "loss": 1.3752,
      "step": 580
    },
    {
      "epoch": 0.08607407407407408,
      "grad_norm": 1.0774247646331787,
      "learning_rate": 0.00018297998517420312,
      "loss": 1.1069,
      "step": 581
    },
    {
      "epoch": 0.08622222222222223,
      "grad_norm": 0.9702334403991699,
      "learning_rate": 0.00018295033358042996,
      "loss": 0.9372,
      "step": 582
    },
    {
      "epoch": 0.08637037037037038,
      "grad_norm": 1.751625895500183,
      "learning_rate": 0.0001829206819866568,
      "loss": 1.1275,
      "step": 583
    },
    {
      "epoch": 0.08651851851851852,
      "grad_norm": 1.5056380033493042,
      "learning_rate": 0.00018289103039288363,
      "loss": 1.2005,
      "step": 584
    },
    {
      "epoch": 0.08666666666666667,
      "grad_norm": 0.9727258682250977,
      "learning_rate": 0.00018286137879911044,
      "loss": 1.2625,
      "step": 585
    },
    {
      "epoch": 0.08681481481481482,
      "grad_norm": 1.2621508836746216,
      "learning_rate": 0.0001828317272053373,
      "loss": 1.114,
      "step": 586
    },
    {
      "epoch": 0.08696296296296296,
      "grad_norm": 1.268631100654602,
      "learning_rate": 0.00018280207561156413,
      "loss": 1.4013,
      "step": 587
    },
    {
      "epoch": 0.08711111111111111,
      "grad_norm": 1.8268654346466064,
      "learning_rate": 0.00018277242401779095,
      "loss": 1.251,
      "step": 588
    },
    {
      "epoch": 0.08725925925925926,
      "grad_norm": 1.0079928636550903,
      "learning_rate": 0.0001827427724240178,
      "loss": 1.4428,
      "step": 589
    },
    {
      "epoch": 0.0874074074074074,
      "grad_norm": 4.245646953582764,
      "learning_rate": 0.00018271312083024464,
      "loss": 1.2148,
      "step": 590
    },
    {
      "epoch": 0.08755555555555555,
      "grad_norm": 0.7979462742805481,
      "learning_rate": 0.00018268346923647146,
      "loss": 1.1625,
      "step": 591
    },
    {
      "epoch": 0.0877037037037037,
      "grad_norm": 0.8092358112335205,
      "learning_rate": 0.0001826538176426983,
      "loss": 1.3047,
      "step": 592
    },
    {
      "epoch": 0.08785185185185185,
      "grad_norm": 1.1461204290390015,
      "learning_rate": 0.00018262416604892514,
      "loss": 1.2695,
      "step": 593
    },
    {
      "epoch": 0.088,
      "grad_norm": 0.840783417224884,
      "learning_rate": 0.00018259451445515196,
      "loss": 1.1151,
      "step": 594
    },
    {
      "epoch": 0.08814814814814814,
      "grad_norm": 1.8119466304779053,
      "learning_rate": 0.0001825648628613788,
      "loss": 1.102,
      "step": 595
    },
    {
      "epoch": 0.08829629629629629,
      "grad_norm": 0.9149336814880371,
      "learning_rate": 0.00018253521126760565,
      "loss": 1.3933,
      "step": 596
    },
    {
      "epoch": 0.08844444444444445,
      "grad_norm": 1.2133201360702515,
      "learning_rate": 0.00018250555967383247,
      "loss": 1.5016,
      "step": 597
    },
    {
      "epoch": 0.0885925925925926,
      "grad_norm": 0.9949682950973511,
      "learning_rate": 0.0001824759080800593,
      "loss": 1.0059,
      "step": 598
    },
    {
      "epoch": 0.08874074074074075,
      "grad_norm": 0.8984311819076538,
      "learning_rate": 0.00018244625648628616,
      "loss": 1.3352,
      "step": 599
    },
    {
      "epoch": 0.08888888888888889,
      "grad_norm": 0.94035804271698,
      "learning_rate": 0.00018241660489251297,
      "loss": 1.2192,
      "step": 600
    },
    {
      "epoch": 0.08903703703703704,
      "grad_norm": 1.6803605556488037,
      "learning_rate": 0.00018238695329873982,
      "loss": 1.5195,
      "step": 601
    },
    {
      "epoch": 0.08918518518518519,
      "grad_norm": 0.9661155939102173,
      "learning_rate": 0.00018235730170496666,
      "loss": 1.1799,
      "step": 602
    },
    {
      "epoch": 0.08933333333333333,
      "grad_norm": 0.8699297904968262,
      "learning_rate": 0.00018232765011119348,
      "loss": 1.2836,
      "step": 603
    },
    {
      "epoch": 0.08948148148148148,
      "grad_norm": 1.2877745628356934,
      "learning_rate": 0.0001822979985174203,
      "loss": 1.3247,
      "step": 604
    },
    {
      "epoch": 0.08962962962962963,
      "grad_norm": 0.9368343949317932,
      "learning_rate": 0.00018226834692364717,
      "loss": 1.1784,
      "step": 605
    },
    {
      "epoch": 0.08977777777777778,
      "grad_norm": 1.4999383687973022,
      "learning_rate": 0.00018223869532987399,
      "loss": 1.0687,
      "step": 606
    },
    {
      "epoch": 0.08992592592592592,
      "grad_norm": 1.1466591358184814,
      "learning_rate": 0.0001822090437361008,
      "loss": 1.1705,
      "step": 607
    },
    {
      "epoch": 0.09007407407407407,
      "grad_norm": 0.9255340695381165,
      "learning_rate": 0.00018217939214232768,
      "loss": 1.1218,
      "step": 608
    },
    {
      "epoch": 0.09022222222222222,
      "grad_norm": 0.883299708366394,
      "learning_rate": 0.0001821497405485545,
      "loss": 1.1096,
      "step": 609
    },
    {
      "epoch": 0.09037037037037036,
      "grad_norm": 1.1371686458587646,
      "learning_rate": 0.0001821200889547813,
      "loss": 1.0435,
      "step": 610
    },
    {
      "epoch": 0.09051851851851851,
      "grad_norm": 1.531836748123169,
      "learning_rate": 0.00018209043736100818,
      "loss": 0.9625,
      "step": 611
    },
    {
      "epoch": 0.09066666666666667,
      "grad_norm": 1.1650729179382324,
      "learning_rate": 0.000182060785767235,
      "loss": 1.0427,
      "step": 612
    },
    {
      "epoch": 0.09081481481481482,
      "grad_norm": 0.9275507926940918,
      "learning_rate": 0.00018203113417346182,
      "loss": 1.2767,
      "step": 613
    },
    {
      "epoch": 0.09096296296296297,
      "grad_norm": 2.034724473953247,
      "learning_rate": 0.0001820014825796887,
      "loss": 1.068,
      "step": 614
    },
    {
      "epoch": 0.09111111111111111,
      "grad_norm": 0.7717260122299194,
      "learning_rate": 0.0001819718309859155,
      "loss": 1.072,
      "step": 615
    },
    {
      "epoch": 0.09125925925925926,
      "grad_norm": 0.9517045021057129,
      "learning_rate": 0.00018194217939214232,
      "loss": 0.9528,
      "step": 616
    },
    {
      "epoch": 0.09140740740740741,
      "grad_norm": 0.9404609799385071,
      "learning_rate": 0.0001819125277983692,
      "loss": 1.0691,
      "step": 617
    },
    {
      "epoch": 0.09155555555555556,
      "grad_norm": 1.0251749753952026,
      "learning_rate": 0.000181882876204596,
      "loss": 1.2236,
      "step": 618
    },
    {
      "epoch": 0.0917037037037037,
      "grad_norm": 0.8374236226081848,
      "learning_rate": 0.00018185322461082283,
      "loss": 0.8373,
      "step": 619
    },
    {
      "epoch": 0.09185185185185185,
      "grad_norm": 2.0672378540039062,
      "learning_rate": 0.00018182357301704967,
      "loss": 1.3187,
      "step": 620
    },
    {
      "epoch": 0.092,
      "grad_norm": 1.762125849723816,
      "learning_rate": 0.00018179392142327652,
      "loss": 1.3841,
      "step": 621
    },
    {
      "epoch": 0.09214814814814815,
      "grad_norm": 0.9544039964675903,
      "learning_rate": 0.00018176426982950334,
      "loss": 1.2255,
      "step": 622
    },
    {
      "epoch": 0.09229629629629629,
      "grad_norm": 1.0860487222671509,
      "learning_rate": 0.00018173461823573018,
      "loss": 1.3558,
      "step": 623
    },
    {
      "epoch": 0.09244444444444444,
      "grad_norm": 1.07772958278656,
      "learning_rate": 0.00018170496664195702,
      "loss": 1.3927,
      "step": 624
    },
    {
      "epoch": 0.09259259259259259,
      "grad_norm": 0.7878835201263428,
      "learning_rate": 0.00018167531504818384,
      "loss": 1.2057,
      "step": 625
    },
    {
      "epoch": 0.09274074074074073,
      "grad_norm": 1.000695824623108,
      "learning_rate": 0.00018164566345441069,
      "loss": 1.0641,
      "step": 626
    },
    {
      "epoch": 0.09288888888888888,
      "grad_norm": 1.0378655195236206,
      "learning_rate": 0.00018161601186063753,
      "loss": 1.1221,
      "step": 627
    },
    {
      "epoch": 0.09303703703703704,
      "grad_norm": 1.1828982830047607,
      "learning_rate": 0.00018158636026686435,
      "loss": 1.1044,
      "step": 628
    },
    {
      "epoch": 0.09318518518518519,
      "grad_norm": 0.841529905796051,
      "learning_rate": 0.0001815567086730912,
      "loss": 1.0851,
      "step": 629
    },
    {
      "epoch": 0.09333333333333334,
      "grad_norm": 0.9000698924064636,
      "learning_rate": 0.00018152705707931804,
      "loss": 1.2912,
      "step": 630
    },
    {
      "epoch": 0.09348148148148148,
      "grad_norm": 1.4891201257705688,
      "learning_rate": 0.00018149740548554485,
      "loss": 1.1035,
      "step": 631
    },
    {
      "epoch": 0.09362962962962963,
      "grad_norm": 0.7513223886489868,
      "learning_rate": 0.0001814677538917717,
      "loss": 1.0536,
      "step": 632
    },
    {
      "epoch": 0.09377777777777778,
      "grad_norm": 0.941815197467804,
      "learning_rate": 0.00018143810229799854,
      "loss": 1.0365,
      "step": 633
    },
    {
      "epoch": 0.09392592592592593,
      "grad_norm": 1.2023797035217285,
      "learning_rate": 0.00018140845070422536,
      "loss": 1.4159,
      "step": 634
    },
    {
      "epoch": 0.09407407407407407,
      "grad_norm": 1.007938027381897,
      "learning_rate": 0.00018137879911045218,
      "loss": 1.357,
      "step": 635
    },
    {
      "epoch": 0.09422222222222222,
      "grad_norm": 1.2334319353103638,
      "learning_rate": 0.00018134914751667905,
      "loss": 0.8031,
      "step": 636
    },
    {
      "epoch": 0.09437037037037037,
      "grad_norm": 1.6498353481292725,
      "learning_rate": 0.00018131949592290587,
      "loss": 1.4231,
      "step": 637
    },
    {
      "epoch": 0.09451851851851852,
      "grad_norm": 0.720307469367981,
      "learning_rate": 0.00018128984432913268,
      "loss": 0.8953,
      "step": 638
    },
    {
      "epoch": 0.09466666666666666,
      "grad_norm": 1.5557289123535156,
      "learning_rate": 0.00018126019273535953,
      "loss": 1.2389,
      "step": 639
    },
    {
      "epoch": 0.09481481481481481,
      "grad_norm": 1.052612543106079,
      "learning_rate": 0.00018123054114158637,
      "loss": 1.2308,
      "step": 640
    },
    {
      "epoch": 0.09496296296296296,
      "grad_norm": 0.9883959293365479,
      "learning_rate": 0.0001812008895478132,
      "loss": 1.1262,
      "step": 641
    },
    {
      "epoch": 0.0951111111111111,
      "grad_norm": 0.9268585443496704,
      "learning_rate": 0.00018117123795404003,
      "loss": 1.2834,
      "step": 642
    },
    {
      "epoch": 0.09525925925925927,
      "grad_norm": 1.0678269863128662,
      "learning_rate": 0.00018114158636026688,
      "loss": 1.1876,
      "step": 643
    },
    {
      "epoch": 0.09540740740740741,
      "grad_norm": 0.8583171963691711,
      "learning_rate": 0.0001811119347664937,
      "loss": 1.1843,
      "step": 644
    },
    {
      "epoch": 0.09555555555555556,
      "grad_norm": 1.6480978727340698,
      "learning_rate": 0.00018108228317272054,
      "loss": 1.1791,
      "step": 645
    },
    {
      "epoch": 0.09570370370370371,
      "grad_norm": 0.9286375641822815,
      "learning_rate": 0.00018105263157894739,
      "loss": 1.0981,
      "step": 646
    },
    {
      "epoch": 0.09585185185185185,
      "grad_norm": 1.2206205129623413,
      "learning_rate": 0.0001810229799851742,
      "loss": 1.0437,
      "step": 647
    },
    {
      "epoch": 0.096,
      "grad_norm": 1.1012060642242432,
      "learning_rate": 0.00018099332839140105,
      "loss": 1.6352,
      "step": 648
    },
    {
      "epoch": 0.09614814814814815,
      "grad_norm": 0.9242655038833618,
      "learning_rate": 0.0001809636767976279,
      "loss": 1.3694,
      "step": 649
    },
    {
      "epoch": 0.0962962962962963,
      "grad_norm": 1.2646676301956177,
      "learning_rate": 0.0001809340252038547,
      "loss": 1.0467,
      "step": 650
    },
    {
      "epoch": 0.09644444444444444,
      "grad_norm": 1.1692533493041992,
      "learning_rate": 0.00018090437361008155,
      "loss": 1.2235,
      "step": 651
    },
    {
      "epoch": 0.09659259259259259,
      "grad_norm": 0.8595184087753296,
      "learning_rate": 0.0001808747220163084,
      "loss": 1.6275,
      "step": 652
    },
    {
      "epoch": 0.09674074074074074,
      "grad_norm": 1.010973572731018,
      "learning_rate": 0.00018084507042253522,
      "loss": 1.2827,
      "step": 653
    },
    {
      "epoch": 0.09688888888888889,
      "grad_norm": 0.9244717359542847,
      "learning_rate": 0.00018081541882876206,
      "loss": 0.8571,
      "step": 654
    },
    {
      "epoch": 0.09703703703703703,
      "grad_norm": 0.7265779376029968,
      "learning_rate": 0.00018078576723498888,
      "loss": 0.8999,
      "step": 655
    },
    {
      "epoch": 0.09718518518518518,
      "grad_norm": 0.7790964245796204,
      "learning_rate": 0.00018075611564121572,
      "loss": 1.0935,
      "step": 656
    },
    {
      "epoch": 0.09733333333333333,
      "grad_norm": 0.9619722962379456,
      "learning_rate": 0.00018072646404744257,
      "loss": 1.0339,
      "step": 657
    },
    {
      "epoch": 0.09748148148148149,
      "grad_norm": 1.0757215023040771,
      "learning_rate": 0.00018069681245366938,
      "loss": 1.1048,
      "step": 658
    },
    {
      "epoch": 0.09762962962962964,
      "grad_norm": 1.017087459564209,
      "learning_rate": 0.00018066716085989623,
      "loss": 1.0618,
      "step": 659
    },
    {
      "epoch": 0.09777777777777778,
      "grad_norm": 1.15996515750885,
      "learning_rate": 0.00018063750926612307,
      "loss": 1.4437,
      "step": 660
    },
    {
      "epoch": 0.09792592592592593,
      "grad_norm": 1.0291752815246582,
      "learning_rate": 0.0001806078576723499,
      "loss": 1.3827,
      "step": 661
    },
    {
      "epoch": 0.09807407407407408,
      "grad_norm": 1.2072279453277588,
      "learning_rate": 0.00018057820607857673,
      "loss": 1.263,
      "step": 662
    },
    {
      "epoch": 0.09822222222222222,
      "grad_norm": 0.8682127594947815,
      "learning_rate": 0.00018054855448480358,
      "loss": 1.225,
      "step": 663
    },
    {
      "epoch": 0.09837037037037037,
      "grad_norm": 1.5121533870697021,
      "learning_rate": 0.0001805189028910304,
      "loss": 1.0637,
      "step": 664
    },
    {
      "epoch": 0.09851851851851852,
      "grad_norm": 1.2017015218734741,
      "learning_rate": 0.00018048925129725724,
      "loss": 1.348,
      "step": 665
    },
    {
      "epoch": 0.09866666666666667,
      "grad_norm": 0.8419830203056335,
      "learning_rate": 0.00018045959970348408,
      "loss": 1.0992,
      "step": 666
    },
    {
      "epoch": 0.09881481481481481,
      "grad_norm": 1.1485445499420166,
      "learning_rate": 0.0001804299481097109,
      "loss": 1.0252,
      "step": 667
    },
    {
      "epoch": 0.09896296296296296,
      "grad_norm": 1.7020227909088135,
      "learning_rate": 0.00018040029651593775,
      "loss": 1.1653,
      "step": 668
    },
    {
      "epoch": 0.09911111111111111,
      "grad_norm": 1.0807623863220215,
      "learning_rate": 0.0001803706449221646,
      "loss": 0.9006,
      "step": 669
    },
    {
      "epoch": 0.09925925925925926,
      "grad_norm": 1.100339651107788,
      "learning_rate": 0.0001803409933283914,
      "loss": 1.2011,
      "step": 670
    },
    {
      "epoch": 0.0994074074074074,
      "grad_norm": 1.3941802978515625,
      "learning_rate": 0.00018031134173461825,
      "loss": 1.3815,
      "step": 671
    },
    {
      "epoch": 0.09955555555555555,
      "grad_norm": 0.8945662379264832,
      "learning_rate": 0.00018028169014084507,
      "loss": 1.2971,
      "step": 672
    },
    {
      "epoch": 0.0997037037037037,
      "grad_norm": 0.9066154360771179,
      "learning_rate": 0.00018025203854707191,
      "loss": 1.4333,
      "step": 673
    },
    {
      "epoch": 0.09985185185185186,
      "grad_norm": 1.3932311534881592,
      "learning_rate": 0.00018022238695329873,
      "loss": 1.3624,
      "step": 674
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.272756814956665,
      "learning_rate": 0.00018019273535952558,
      "loss": 1.5097,
      "step": 675
    },
    {
      "epoch": 0.10014814814814815,
      "grad_norm": 1.024742841720581,
      "learning_rate": 0.00018016308376575242,
      "loss": 1.1012,
      "step": 676
    },
    {
      "epoch": 0.1002962962962963,
      "grad_norm": 1.3817062377929688,
      "learning_rate": 0.00018013343217197924,
      "loss": 1.1021,
      "step": 677
    },
    {
      "epoch": 0.10044444444444445,
      "grad_norm": 1.0953186750411987,
      "learning_rate": 0.00018010378057820608,
      "loss": 1.2881,
      "step": 678
    },
    {
      "epoch": 0.1005925925925926,
      "grad_norm": 1.1058462858200073,
      "learning_rate": 0.00018007412898443293,
      "loss": 1.1268,
      "step": 679
    },
    {
      "epoch": 0.10074074074074074,
      "grad_norm": 0.9799386858940125,
      "learning_rate": 0.00018004447739065974,
      "loss": 1.0524,
      "step": 680
    },
    {
      "epoch": 0.10088888888888889,
      "grad_norm": 4.252458572387695,
      "learning_rate": 0.0001800148257968866,
      "loss": 1.0699,
      "step": 681
    },
    {
      "epoch": 0.10103703703703704,
      "grad_norm": 0.836798369884491,
      "learning_rate": 0.00017998517420311343,
      "loss": 0.8132,
      "step": 682
    },
    {
      "epoch": 0.10118518518518518,
      "grad_norm": 1.1359460353851318,
      "learning_rate": 0.00017995552260934025,
      "loss": 1.1005,
      "step": 683
    },
    {
      "epoch": 0.10133333333333333,
      "grad_norm": 0.8894252181053162,
      "learning_rate": 0.0001799258710155671,
      "loss": 1.0666,
      "step": 684
    },
    {
      "epoch": 0.10148148148148148,
      "grad_norm": 1.1909353733062744,
      "learning_rate": 0.00017989621942179394,
      "loss": 1.3341,
      "step": 685
    },
    {
      "epoch": 0.10162962962962963,
      "grad_norm": 1.236089825630188,
      "learning_rate": 0.00017986656782802076,
      "loss": 1.0689,
      "step": 686
    },
    {
      "epoch": 0.10177777777777777,
      "grad_norm": 1.7130799293518066,
      "learning_rate": 0.0001798369162342476,
      "loss": 0.9953,
      "step": 687
    },
    {
      "epoch": 0.10192592592592592,
      "grad_norm": 1.1958650350570679,
      "learning_rate": 0.00017980726464047445,
      "loss": 1.2324,
      "step": 688
    },
    {
      "epoch": 0.10207407407407408,
      "grad_norm": 1.11331045627594,
      "learning_rate": 0.00017977761304670126,
      "loss": 1.1123,
      "step": 689
    },
    {
      "epoch": 0.10222222222222223,
      "grad_norm": 1.2462921142578125,
      "learning_rate": 0.00017974796145292808,
      "loss": 1.1015,
      "step": 690
    },
    {
      "epoch": 0.10237037037037038,
      "grad_norm": 1.2919429540634155,
      "learning_rate": 0.00017971830985915495,
      "loss": 1.0648,
      "step": 691
    },
    {
      "epoch": 0.10251851851851852,
      "grad_norm": 1.6416014432907104,
      "learning_rate": 0.00017968865826538177,
      "loss": 1.2218,
      "step": 692
    },
    {
      "epoch": 0.10266666666666667,
      "grad_norm": 1.135933756828308,
      "learning_rate": 0.0001796590066716086,
      "loss": 1.2607,
      "step": 693
    },
    {
      "epoch": 0.10281481481481482,
      "grad_norm": 1.9447219371795654,
      "learning_rate": 0.00017962935507783546,
      "loss": 1.0926,
      "step": 694
    },
    {
      "epoch": 0.10296296296296296,
      "grad_norm": 2.0542564392089844,
      "learning_rate": 0.00017959970348406228,
      "loss": 1.1081,
      "step": 695
    },
    {
      "epoch": 0.10311111111111111,
      "grad_norm": 1.4082337617874146,
      "learning_rate": 0.0001795700518902891,
      "loss": 1.3908,
      "step": 696
    },
    {
      "epoch": 0.10325925925925926,
      "grad_norm": 1.6165364980697632,
      "learning_rate": 0.00017954040029651596,
      "loss": 1.2273,
      "step": 697
    },
    {
      "epoch": 0.1034074074074074,
      "grad_norm": 0.9326266646385193,
      "learning_rate": 0.00017951074870274278,
      "loss": 0.9182,
      "step": 698
    },
    {
      "epoch": 0.10355555555555555,
      "grad_norm": 2.343601942062378,
      "learning_rate": 0.0001794810971089696,
      "loss": 0.9752,
      "step": 699
    },
    {
      "epoch": 0.1037037037037037,
      "grad_norm": 1.147686243057251,
      "learning_rate": 0.00017945144551519647,
      "loss": 1.0687,
      "step": 700
    },
    {
      "epoch": 0.10385185185185185,
      "grad_norm": 1.8303850889205933,
      "learning_rate": 0.0001794217939214233,
      "loss": 0.998,
      "step": 701
    },
    {
      "epoch": 0.104,
      "grad_norm": 1.1002910137176514,
      "learning_rate": 0.0001793921423276501,
      "loss": 1.1393,
      "step": 702
    },
    {
      "epoch": 0.10414814814814814,
      "grad_norm": 1.217954397201538,
      "learning_rate": 0.00017936249073387698,
      "loss": 1.1456,
      "step": 703
    },
    {
      "epoch": 0.10429629629629629,
      "grad_norm": 0.9953997135162354,
      "learning_rate": 0.0001793328391401038,
      "loss": 1.1105,
      "step": 704
    },
    {
      "epoch": 0.10444444444444445,
      "grad_norm": 0.8896104097366333,
      "learning_rate": 0.0001793031875463306,
      "loss": 1.234,
      "step": 705
    },
    {
      "epoch": 0.1045925925925926,
      "grad_norm": 1.017242670059204,
      "learning_rate": 0.00017927353595255746,
      "loss": 1.0124,
      "step": 706
    },
    {
      "epoch": 0.10474074074074075,
      "grad_norm": 1.1732721328735352,
      "learning_rate": 0.0001792438843587843,
      "loss": 1.109,
      "step": 707
    },
    {
      "epoch": 0.10488888888888889,
      "grad_norm": 1.5762159824371338,
      "learning_rate": 0.00017921423276501112,
      "loss": 1.0034,
      "step": 708
    },
    {
      "epoch": 0.10503703703703704,
      "grad_norm": 0.991986870765686,
      "learning_rate": 0.00017918458117123796,
      "loss": 1.1069,
      "step": 709
    },
    {
      "epoch": 0.10518518518518519,
      "grad_norm": 0.9856958389282227,
      "learning_rate": 0.0001791549295774648,
      "loss": 1.2329,
      "step": 710
    },
    {
      "epoch": 0.10533333333333333,
      "grad_norm": 1.2896099090576172,
      "learning_rate": 0.00017912527798369162,
      "loss": 1.1182,
      "step": 711
    },
    {
      "epoch": 0.10548148148148148,
      "grad_norm": 1.8374145030975342,
      "learning_rate": 0.00017909562638991847,
      "loss": 1.1333,
      "step": 712
    },
    {
      "epoch": 0.10562962962962963,
      "grad_norm": 1.0625301599502563,
      "learning_rate": 0.00017906597479614531,
      "loss": 1.1764,
      "step": 713
    },
    {
      "epoch": 0.10577777777777778,
      "grad_norm": 1.678533673286438,
      "learning_rate": 0.00017903632320237213,
      "loss": 1.1756,
      "step": 714
    },
    {
      "epoch": 0.10592592592592592,
      "grad_norm": 1.2041304111480713,
      "learning_rate": 0.00017900667160859898,
      "loss": 1.1431,
      "step": 715
    },
    {
      "epoch": 0.10607407407407407,
      "grad_norm": 1.6350512504577637,
      "learning_rate": 0.00017897702001482582,
      "loss": 0.9288,
      "step": 716
    },
    {
      "epoch": 0.10622222222222222,
      "grad_norm": 0.8590714931488037,
      "learning_rate": 0.00017894736842105264,
      "loss": 0.9684,
      "step": 717
    },
    {
      "epoch": 0.10637037037037037,
      "grad_norm": 1.4090867042541504,
      "learning_rate": 0.00017891771682727948,
      "loss": 1.0675,
      "step": 718
    },
    {
      "epoch": 0.10651851851851851,
      "grad_norm": 0.9573653340339661,
      "learning_rate": 0.00017888806523350633,
      "loss": 1.3571,
      "step": 719
    },
    {
      "epoch": 0.10666666666666667,
      "grad_norm": 1.591746211051941,
      "learning_rate": 0.00017885841363973314,
      "loss": 1.2832,
      "step": 720
    },
    {
      "epoch": 0.10681481481481482,
      "grad_norm": 1.0578999519348145,
      "learning_rate": 0.00017882876204595996,
      "loss": 1.3521,
      "step": 721
    },
    {
      "epoch": 0.10696296296296297,
      "grad_norm": 1.2212194204330444,
      "learning_rate": 0.00017879911045218683,
      "loss": 1.5566,
      "step": 722
    },
    {
      "epoch": 0.10711111111111112,
      "grad_norm": 0.9653564691543579,
      "learning_rate": 0.00017876945885841365,
      "loss": 1.3195,
      "step": 723
    },
    {
      "epoch": 0.10725925925925926,
      "grad_norm": 1.4364689588546753,
      "learning_rate": 0.00017873980726464047,
      "loss": 1.2876,
      "step": 724
    },
    {
      "epoch": 0.10740740740740741,
      "grad_norm": 1.106291651725769,
      "learning_rate": 0.0001787101556708673,
      "loss": 1.4856,
      "step": 725
    },
    {
      "epoch": 0.10755555555555556,
      "grad_norm": 1.1380616426467896,
      "learning_rate": 0.00017868050407709416,
      "loss": 1.2512,
      "step": 726
    },
    {
      "epoch": 0.1077037037037037,
      "grad_norm": 1.0685898065567017,
      "learning_rate": 0.00017865085248332097,
      "loss": 1.2495,
      "step": 727
    },
    {
      "epoch": 0.10785185185185185,
      "grad_norm": 1.0301077365875244,
      "learning_rate": 0.00017862120088954782,
      "loss": 1.17,
      "step": 728
    },
    {
      "epoch": 0.108,
      "grad_norm": 1.0483813285827637,
      "learning_rate": 0.00017859154929577466,
      "loss": 0.995,
      "step": 729
    },
    {
      "epoch": 0.10814814814814815,
      "grad_norm": 0.8973188400268555,
      "learning_rate": 0.00017856189770200148,
      "loss": 1.1569,
      "step": 730
    },
    {
      "epoch": 0.1082962962962963,
      "grad_norm": 0.9379794597625732,
      "learning_rate": 0.00017853224610822832,
      "loss": 1.2935,
      "step": 731
    },
    {
      "epoch": 0.10844444444444444,
      "grad_norm": 1.0541515350341797,
      "learning_rate": 0.00017850259451445517,
      "loss": 1.3934,
      "step": 732
    },
    {
      "epoch": 0.10859259259259259,
      "grad_norm": 0.9139310121536255,
      "learning_rate": 0.00017847294292068199,
      "loss": 0.9618,
      "step": 733
    },
    {
      "epoch": 0.10874074074074074,
      "grad_norm": 2.54887056350708,
      "learning_rate": 0.00017844329132690883,
      "loss": 1.1157,
      "step": 734
    },
    {
      "epoch": 0.10888888888888888,
      "grad_norm": 1.2888338565826416,
      "learning_rate": 0.00017841363973313567,
      "loss": 1.1304,
      "step": 735
    },
    {
      "epoch": 0.10903703703703704,
      "grad_norm": 1.361585259437561,
      "learning_rate": 0.0001783839881393625,
      "loss": 1.116,
      "step": 736
    },
    {
      "epoch": 0.10918518518518519,
      "grad_norm": 1.1790413856506348,
      "learning_rate": 0.00017835433654558934,
      "loss": 0.9679,
      "step": 737
    },
    {
      "epoch": 0.10933333333333334,
      "grad_norm": 0.9589934349060059,
      "learning_rate": 0.00017832468495181618,
      "loss": 1.2085,
      "step": 738
    },
    {
      "epoch": 0.10948148148148149,
      "grad_norm": 1.2599700689315796,
      "learning_rate": 0.000178295033358043,
      "loss": 1.2303,
      "step": 739
    },
    {
      "epoch": 0.10962962962962963,
      "grad_norm": 2.133882761001587,
      "learning_rate": 0.00017826538176426984,
      "loss": 1.2466,
      "step": 740
    },
    {
      "epoch": 0.10977777777777778,
      "grad_norm": 1.2228134870529175,
      "learning_rate": 0.00017823573017049666,
      "loss": 1.4695,
      "step": 741
    },
    {
      "epoch": 0.10992592592592593,
      "grad_norm": 1.8407241106033325,
      "learning_rate": 0.0001782060785767235,
      "loss": 0.9995,
      "step": 742
    },
    {
      "epoch": 0.11007407407407407,
      "grad_norm": 1.3531813621520996,
      "learning_rate": 0.00017817642698295035,
      "loss": 1.2148,
      "step": 743
    },
    {
      "epoch": 0.11022222222222222,
      "grad_norm": 1.0597742795944214,
      "learning_rate": 0.00017814677538917717,
      "loss": 1.2799,
      "step": 744
    },
    {
      "epoch": 0.11037037037037037,
      "grad_norm": 1.049060583114624,
      "learning_rate": 0.000178117123795404,
      "loss": 0.9831,
      "step": 745
    },
    {
      "epoch": 0.11051851851851852,
      "grad_norm": 1.0642156600952148,
      "learning_rate": 0.00017808747220163086,
      "loss": 1.2659,
      "step": 746
    },
    {
      "epoch": 0.11066666666666666,
      "grad_norm": 1.0166077613830566,
      "learning_rate": 0.00017805782060785767,
      "loss": 1.1209,
      "step": 747
    },
    {
      "epoch": 0.11081481481481481,
      "grad_norm": 1.0508230924606323,
      "learning_rate": 0.00017802816901408452,
      "loss": 1.1257,
      "step": 748
    },
    {
      "epoch": 0.11096296296296296,
      "grad_norm": 1.1801953315734863,
      "learning_rate": 0.00017799851742031136,
      "loss": 1.2034,
      "step": 749
    },
    {
      "epoch": 0.1111111111111111,
      "grad_norm": 1.217050552368164,
      "learning_rate": 0.00017796886582653818,
      "loss": 1.1783,
      "step": 750
    },
    {
      "epoch": 0.11125925925925927,
      "grad_norm": 1.0050989389419556,
      "learning_rate": 0.00017793921423276502,
      "loss": 0.973,
      "step": 751
    },
    {
      "epoch": 0.11140740740740741,
      "grad_norm": 1.00999116897583,
      "learning_rate": 0.00017790956263899187,
      "loss": 1.0116,
      "step": 752
    },
    {
      "epoch": 0.11155555555555556,
      "grad_norm": 1.4370405673980713,
      "learning_rate": 0.00017787991104521869,
      "loss": 1.1189,
      "step": 753
    },
    {
      "epoch": 0.11170370370370371,
      "grad_norm": 1.6145302057266235,
      "learning_rate": 0.00017785025945144553,
      "loss": 1.2394,
      "step": 754
    },
    {
      "epoch": 0.11185185185185186,
      "grad_norm": 1.486937165260315,
      "learning_rate": 0.00017782060785767237,
      "loss": 1.2607,
      "step": 755
    },
    {
      "epoch": 0.112,
      "grad_norm": 1.037169098854065,
      "learning_rate": 0.0001777909562638992,
      "loss": 1.1249,
      "step": 756
    },
    {
      "epoch": 0.11214814814814815,
      "grad_norm": 1.543372631072998,
      "learning_rate": 0.00017776130467012604,
      "loss": 1.1736,
      "step": 757
    },
    {
      "epoch": 0.1122962962962963,
      "grad_norm": 1.5751320123672485,
      "learning_rate": 0.00017773165307635285,
      "loss": 1.3599,
      "step": 758
    },
    {
      "epoch": 0.11244444444444444,
      "grad_norm": 0.9869830012321472,
      "learning_rate": 0.0001777020014825797,
      "loss": 1.2253,
      "step": 759
    },
    {
      "epoch": 0.11259259259259259,
      "grad_norm": 1.6567195653915405,
      "learning_rate": 0.00017767234988880652,
      "loss": 1.4151,
      "step": 760
    },
    {
      "epoch": 0.11274074074074074,
      "grad_norm": 1.5292116403579712,
      "learning_rate": 0.00017764269829503336,
      "loss": 1.2516,
      "step": 761
    },
    {
      "epoch": 0.11288888888888889,
      "grad_norm": 2.383601188659668,
      "learning_rate": 0.0001776130467012602,
      "loss": 1.2525,
      "step": 762
    },
    {
      "epoch": 0.11303703703703703,
      "grad_norm": 1.6173779964447021,
      "learning_rate": 0.00017758339510748702,
      "loss": 1.326,
      "step": 763
    },
    {
      "epoch": 0.11318518518518518,
      "grad_norm": 1.1858290433883667,
      "learning_rate": 0.00017755374351371387,
      "loss": 1.2835,
      "step": 764
    },
    {
      "epoch": 0.11333333333333333,
      "grad_norm": 1.27888023853302,
      "learning_rate": 0.0001775240919199407,
      "loss": 1.4655,
      "step": 765
    },
    {
      "epoch": 0.11348148148148147,
      "grad_norm": 4.245856761932373,
      "learning_rate": 0.00017749444032616753,
      "loss": 1.2797,
      "step": 766
    },
    {
      "epoch": 0.11362962962962964,
      "grad_norm": 1.5455471277236938,
      "learning_rate": 0.00017746478873239437,
      "loss": 1.1448,
      "step": 767
    },
    {
      "epoch": 0.11377777777777778,
      "grad_norm": 1.727730631828308,
      "learning_rate": 0.00017743513713862122,
      "loss": 1.055,
      "step": 768
    },
    {
      "epoch": 0.11392592592592593,
      "grad_norm": 2.2069005966186523,
      "learning_rate": 0.00017740548554484803,
      "loss": 1.1359,
      "step": 769
    },
    {
      "epoch": 0.11407407407407408,
      "grad_norm": 2.4828689098358154,
      "learning_rate": 0.00017737583395107488,
      "loss": 1.1874,
      "step": 770
    },
    {
      "epoch": 0.11422222222222222,
      "grad_norm": 2.375779867172241,
      "learning_rate": 0.00017734618235730172,
      "loss": 1.2403,
      "step": 771
    },
    {
      "epoch": 0.11437037037037037,
      "grad_norm": 2.210402250289917,
      "learning_rate": 0.00017731653076352854,
      "loss": 1.0164,
      "step": 772
    },
    {
      "epoch": 0.11451851851851852,
      "grad_norm": 1.034450888633728,
      "learning_rate": 0.00017728687916975538,
      "loss": 1.0978,
      "step": 773
    },
    {
      "epoch": 0.11466666666666667,
      "grad_norm": 2.2124218940734863,
      "learning_rate": 0.00017725722757598223,
      "loss": 1.1375,
      "step": 774
    },
    {
      "epoch": 0.11481481481481481,
      "grad_norm": 2.627687931060791,
      "learning_rate": 0.00017722757598220905,
      "loss": 1.1139,
      "step": 775
    },
    {
      "epoch": 0.11496296296296296,
      "grad_norm": 1.6046452522277832,
      "learning_rate": 0.00017719792438843586,
      "loss": 1.4081,
      "step": 776
    },
    {
      "epoch": 0.11511111111111111,
      "grad_norm": 2.07920241355896,
      "learning_rate": 0.00017716827279466274,
      "loss": 0.9552,
      "step": 777
    },
    {
      "epoch": 0.11525925925925926,
      "grad_norm": 2.331041097640991,
      "learning_rate": 0.00017713862120088955,
      "loss": 1.396,
      "step": 778
    },
    {
      "epoch": 0.1154074074074074,
      "grad_norm": 4.453252792358398,
      "learning_rate": 0.00017710896960711637,
      "loss": 1.0503,
      "step": 779
    },
    {
      "epoch": 0.11555555555555555,
      "grad_norm": 2.3019750118255615,
      "learning_rate": 0.00017707931801334324,
      "loss": 0.9527,
      "step": 780
    },
    {
      "epoch": 0.1157037037037037,
      "grad_norm": 1.7517541646957397,
      "learning_rate": 0.00017704966641957006,
      "loss": 1.3174,
      "step": 781
    },
    {
      "epoch": 0.11585185185185186,
      "grad_norm": 2.5055603981018066,
      "learning_rate": 0.00017702001482579688,
      "loss": 1.0664,
      "step": 782
    },
    {
      "epoch": 0.116,
      "grad_norm": 1.181646466255188,
      "learning_rate": 0.00017699036323202375,
      "loss": 1.2119,
      "step": 783
    },
    {
      "epoch": 0.11614814814814815,
      "grad_norm": 1.1410211324691772,
      "learning_rate": 0.00017696071163825057,
      "loss": 1.232,
      "step": 784
    },
    {
      "epoch": 0.1162962962962963,
      "grad_norm": 1.6544922590255737,
      "learning_rate": 0.00017693106004447738,
      "loss": 1.1537,
      "step": 785
    },
    {
      "epoch": 0.11644444444444445,
      "grad_norm": 1.8878635168075562,
      "learning_rate": 0.00017690140845070425,
      "loss": 1.061,
      "step": 786
    },
    {
      "epoch": 0.1165925925925926,
      "grad_norm": 1.3190855979919434,
      "learning_rate": 0.00017687175685693107,
      "loss": 0.9475,
      "step": 787
    },
    {
      "epoch": 0.11674074074074074,
      "grad_norm": 1.0858039855957031,
      "learning_rate": 0.0001768421052631579,
      "loss": 1.2793,
      "step": 788
    },
    {
      "epoch": 0.11688888888888889,
      "grad_norm": 1.147354245185852,
      "learning_rate": 0.00017681245366938476,
      "loss": 0.9593,
      "step": 789
    },
    {
      "epoch": 0.11703703703703704,
      "grad_norm": 1.5271817445755005,
      "learning_rate": 0.00017678280207561158,
      "loss": 1.0165,
      "step": 790
    },
    {
      "epoch": 0.11718518518518518,
      "grad_norm": 2.1167054176330566,
      "learning_rate": 0.0001767531504818384,
      "loss": 1.3867,
      "step": 791
    },
    {
      "epoch": 0.11733333333333333,
      "grad_norm": 1.4731441736221313,
      "learning_rate": 0.00017672349888806524,
      "loss": 1.2066,
      "step": 792
    },
    {
      "epoch": 0.11748148148148148,
      "grad_norm": 0.9553131461143494,
      "learning_rate": 0.00017669384729429208,
      "loss": 1.4145,
      "step": 793
    },
    {
      "epoch": 0.11762962962962963,
      "grad_norm": 6.962506294250488,
      "learning_rate": 0.0001766641957005189,
      "loss": 1.2297,
      "step": 794
    },
    {
      "epoch": 0.11777777777777777,
      "grad_norm": 1.519862174987793,
      "learning_rate": 0.00017663454410674575,
      "loss": 1.1655,
      "step": 795
    },
    {
      "epoch": 0.11792592592592592,
      "grad_norm": 1.0673969984054565,
      "learning_rate": 0.0001766048925129726,
      "loss": 1.1978,
      "step": 796
    },
    {
      "epoch": 0.11807407407407407,
      "grad_norm": 2.4118587970733643,
      "learning_rate": 0.0001765752409191994,
      "loss": 1.2152,
      "step": 797
    },
    {
      "epoch": 0.11822222222222223,
      "grad_norm": 1.513466477394104,
      "learning_rate": 0.00017654558932542625,
      "loss": 0.9631,
      "step": 798
    },
    {
      "epoch": 0.11837037037037038,
      "grad_norm": 1.4019991159439087,
      "learning_rate": 0.0001765159377316531,
      "loss": 1.0298,
      "step": 799
    },
    {
      "epoch": 0.11851851851851852,
      "grad_norm": 1.4815711975097656,
      "learning_rate": 0.00017648628613787991,
      "loss": 1.1588,
      "step": 800
    },
    {
      "epoch": 0.11866666666666667,
      "grad_norm": 2.318873882293701,
      "learning_rate": 0.00017645663454410676,
      "loss": 1.4133,
      "step": 801
    },
    {
      "epoch": 0.11881481481481482,
      "grad_norm": 1.6941622495651245,
      "learning_rate": 0.0001764269829503336,
      "loss": 1.1509,
      "step": 802
    },
    {
      "epoch": 0.11896296296296296,
      "grad_norm": 0.9023726582527161,
      "learning_rate": 0.00017639733135656042,
      "loss": 1.1994,
      "step": 803
    },
    {
      "epoch": 0.11911111111111111,
      "grad_norm": 2.4571475982666016,
      "learning_rate": 0.00017636767976278727,
      "loss": 1.0628,
      "step": 804
    },
    {
      "epoch": 0.11925925925925926,
      "grad_norm": 3.198491334915161,
      "learning_rate": 0.0001763380281690141,
      "loss": 1.0631,
      "step": 805
    },
    {
      "epoch": 0.1194074074074074,
      "grad_norm": 1.126410722732544,
      "learning_rate": 0.00017630837657524093,
      "loss": 1.2265,
      "step": 806
    },
    {
      "epoch": 0.11955555555555555,
      "grad_norm": 1.1916186809539795,
      "learning_rate": 0.00017627872498146774,
      "loss": 1.1347,
      "step": 807
    },
    {
      "epoch": 0.1197037037037037,
      "grad_norm": 1.1731302738189697,
      "learning_rate": 0.00017624907338769462,
      "loss": 0.9825,
      "step": 808
    },
    {
      "epoch": 0.11985185185185185,
      "grad_norm": 1.3539538383483887,
      "learning_rate": 0.00017621942179392143,
      "loss": 1.2031,
      "step": 809
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6558806896209717,
      "learning_rate": 0.00017618977020014825,
      "loss": 1.0797,
      "step": 810
    },
    {
      "epoch": 0.12014814814814814,
      "grad_norm": 2.5530829429626465,
      "learning_rate": 0.0001761601186063751,
      "loss": 1.0951,
      "step": 811
    },
    {
      "epoch": 0.12029629629629629,
      "grad_norm": 1.3450182676315308,
      "learning_rate": 0.00017613046701260194,
      "loss": 1.3404,
      "step": 812
    },
    {
      "epoch": 0.12044444444444445,
      "grad_norm": 1.4826587438583374,
      "learning_rate": 0.00017610081541882876,
      "loss": 1.3364,
      "step": 813
    },
    {
      "epoch": 0.1205925925925926,
      "grad_norm": 1.238110065460205,
      "learning_rate": 0.0001760711638250556,
      "loss": 1.2395,
      "step": 814
    },
    {
      "epoch": 0.12074074074074075,
      "grad_norm": 1.6360697746276855,
      "learning_rate": 0.00017604151223128245,
      "loss": 1.3256,
      "step": 815
    },
    {
      "epoch": 0.12088888888888889,
      "grad_norm": 2.127264976501465,
      "learning_rate": 0.00017601186063750926,
      "loss": 1.2677,
      "step": 816
    },
    {
      "epoch": 0.12103703703703704,
      "grad_norm": 2.0152833461761475,
      "learning_rate": 0.0001759822090437361,
      "loss": 1.1359,
      "step": 817
    },
    {
      "epoch": 0.12118518518518519,
      "grad_norm": 1.3907021284103394,
      "learning_rate": 0.00017595255744996295,
      "loss": 1.1803,
      "step": 818
    },
    {
      "epoch": 0.12133333333333333,
      "grad_norm": 1.9472498893737793,
      "learning_rate": 0.00017592290585618977,
      "loss": 1.1863,
      "step": 819
    },
    {
      "epoch": 0.12148148148148148,
      "grad_norm": 1.522857666015625,
      "learning_rate": 0.00017589325426241661,
      "loss": 1.1596,
      "step": 820
    },
    {
      "epoch": 0.12162962962962963,
      "grad_norm": 1.0325987339019775,
      "learning_rate": 0.00017586360266864346,
      "loss": 0.9308,
      "step": 821
    },
    {
      "epoch": 0.12177777777777778,
      "grad_norm": 1.7727866172790527,
      "learning_rate": 0.00017583395107487028,
      "loss": 1.2795,
      "step": 822
    },
    {
      "epoch": 0.12192592592592592,
      "grad_norm": 1.6282867193222046,
      "learning_rate": 0.00017580429948109712,
      "loss": 1.1801,
      "step": 823
    },
    {
      "epoch": 0.12207407407407407,
      "grad_norm": 1.6989792585372925,
      "learning_rate": 0.00017577464788732396,
      "loss": 1.068,
      "step": 824
    },
    {
      "epoch": 0.12222222222222222,
      "grad_norm": 1.0644278526306152,
      "learning_rate": 0.00017574499629355078,
      "loss": 1.0973,
      "step": 825
    },
    {
      "epoch": 0.12237037037037037,
      "grad_norm": 0.9775753617286682,
      "learning_rate": 0.00017571534469977763,
      "loss": 1.2895,
      "step": 826
    },
    {
      "epoch": 0.12251851851851851,
      "grad_norm": 1.5573720932006836,
      "learning_rate": 0.00017568569310600444,
      "loss": 1.0362,
      "step": 827
    },
    {
      "epoch": 0.12266666666666666,
      "grad_norm": 1.888784408569336,
      "learning_rate": 0.0001756560415122313,
      "loss": 1.2377,
      "step": 828
    },
    {
      "epoch": 0.12281481481481482,
      "grad_norm": 1.6177492141723633,
      "learning_rate": 0.00017562638991845813,
      "loss": 1.2052,
      "step": 829
    },
    {
      "epoch": 0.12296296296296297,
      "grad_norm": 1.8377748727798462,
      "learning_rate": 0.00017559673832468495,
      "loss": 1.3072,
      "step": 830
    },
    {
      "epoch": 0.12311111111111112,
      "grad_norm": 1.541957139968872,
      "learning_rate": 0.0001755670867309118,
      "loss": 1.116,
      "step": 831
    },
    {
      "epoch": 0.12325925925925926,
      "grad_norm": 1.854828953742981,
      "learning_rate": 0.00017553743513713864,
      "loss": 1.1264,
      "step": 832
    },
    {
      "epoch": 0.12340740740740741,
      "grad_norm": 1.5496182441711426,
      "learning_rate": 0.00017550778354336546,
      "loss": 1.0248,
      "step": 833
    },
    {
      "epoch": 0.12355555555555556,
      "grad_norm": 1.3529809713363647,
      "learning_rate": 0.0001754781319495923,
      "loss": 1.0269,
      "step": 834
    },
    {
      "epoch": 0.1237037037037037,
      "grad_norm": 1.4036271572113037,
      "learning_rate": 0.00017544848035581915,
      "loss": 1.1804,
      "step": 835
    },
    {
      "epoch": 0.12385185185185185,
      "grad_norm": 1.0726540088653564,
      "learning_rate": 0.00017541882876204596,
      "loss": 1.1089,
      "step": 836
    },
    {
      "epoch": 0.124,
      "grad_norm": 1.2027060985565186,
      "learning_rate": 0.0001753891771682728,
      "loss": 1.1218,
      "step": 837
    },
    {
      "epoch": 0.12414814814814815,
      "grad_norm": 1.0356816053390503,
      "learning_rate": 0.00017535952557449965,
      "loss": 1.0432,
      "step": 838
    },
    {
      "epoch": 0.1242962962962963,
      "grad_norm": 1.271623134613037,
      "learning_rate": 0.00017532987398072647,
      "loss": 1.246,
      "step": 839
    },
    {
      "epoch": 0.12444444444444444,
      "grad_norm": 1.7520036697387695,
      "learning_rate": 0.0001753002223869533,
      "loss": 1.1882,
      "step": 840
    },
    {
      "epoch": 0.12459259259259259,
      "grad_norm": 1.689392328262329,
      "learning_rate": 0.00017527057079318016,
      "loss": 1.0743,
      "step": 841
    },
    {
      "epoch": 0.12474074074074074,
      "grad_norm": 4.215272426605225,
      "learning_rate": 0.00017524091919940698,
      "loss": 0.983,
      "step": 842
    },
    {
      "epoch": 0.12488888888888888,
      "grad_norm": 1.8404631614685059,
      "learning_rate": 0.00017521126760563382,
      "loss": 1.0592,
      "step": 843
    },
    {
      "epoch": 0.12503703703703703,
      "grad_norm": 1.393333077430725,
      "learning_rate": 0.00017518161601186064,
      "loss": 1.568,
      "step": 844
    },
    {
      "epoch": 0.12518518518518518,
      "grad_norm": 1.4102659225463867,
      "learning_rate": 0.00017515196441808748,
      "loss": 1.1276,
      "step": 845
    },
    {
      "epoch": 0.12533333333333332,
      "grad_norm": 1.6933660507202148,
      "learning_rate": 0.0001751223128243143,
      "loss": 1.2158,
      "step": 846
    },
    {
      "epoch": 0.12548148148148147,
      "grad_norm": 2.3846960067749023,
      "learning_rate": 0.00017509266123054114,
      "loss": 1.1242,
      "step": 847
    },
    {
      "epoch": 0.12562962962962962,
      "grad_norm": 1.5024141073226929,
      "learning_rate": 0.000175063009636768,
      "loss": 1.1528,
      "step": 848
    },
    {
      "epoch": 0.12577777777777777,
      "grad_norm": 2.4547317028045654,
      "learning_rate": 0.0001750333580429948,
      "loss": 1.0205,
      "step": 849
    },
    {
      "epoch": 0.1259259259259259,
      "grad_norm": 0.8433824181556702,
      "learning_rate": 0.00017500370644922165,
      "loss": 1.0368,
      "step": 850
    },
    {
      "epoch": 0.12607407407407406,
      "grad_norm": 1.5638988018035889,
      "learning_rate": 0.0001749740548554485,
      "loss": 1.2009,
      "step": 851
    },
    {
      "epoch": 0.12622222222222224,
      "grad_norm": 2.30916690826416,
      "learning_rate": 0.0001749444032616753,
      "loss": 1.1026,
      "step": 852
    },
    {
      "epoch": 0.12637037037037038,
      "grad_norm": 1.8860763311386108,
      "learning_rate": 0.00017491475166790216,
      "loss": 0.9593,
      "step": 853
    },
    {
      "epoch": 0.12651851851851853,
      "grad_norm": 1.499836802482605,
      "learning_rate": 0.000174885100074129,
      "loss": 1.0407,
      "step": 854
    },
    {
      "epoch": 0.12666666666666668,
      "grad_norm": 1.9133578538894653,
      "learning_rate": 0.00017485544848035582,
      "loss": 1.2543,
      "step": 855
    },
    {
      "epoch": 0.12681481481481482,
      "grad_norm": 1.678727149963379,
      "learning_rate": 0.00017482579688658266,
      "loss": 1.0556,
      "step": 856
    },
    {
      "epoch": 0.12696296296296297,
      "grad_norm": 1.3053762912750244,
      "learning_rate": 0.0001747961452928095,
      "loss": 1.057,
      "step": 857
    },
    {
      "epoch": 0.12711111111111112,
      "grad_norm": 1.5819666385650635,
      "learning_rate": 0.00017476649369903632,
      "loss": 1.3151,
      "step": 858
    },
    {
      "epoch": 0.12725925925925927,
      "grad_norm": 1.497564435005188,
      "learning_rate": 0.00017473684210526317,
      "loss": 1.1288,
      "step": 859
    },
    {
      "epoch": 0.1274074074074074,
      "grad_norm": 1.2003209590911865,
      "learning_rate": 0.00017470719051149,
      "loss": 1.2003,
      "step": 860
    },
    {
      "epoch": 0.12755555555555556,
      "grad_norm": 1.4014747142791748,
      "learning_rate": 0.00017467753891771683,
      "loss": 1.0234,
      "step": 861
    },
    {
      "epoch": 0.1277037037037037,
      "grad_norm": 1.9688879251480103,
      "learning_rate": 0.00017464788732394365,
      "loss": 1.0934,
      "step": 862
    },
    {
      "epoch": 0.12785185185185186,
      "grad_norm": 1.6965707540512085,
      "learning_rate": 0.00017461823573017052,
      "loss": 1.2557,
      "step": 863
    },
    {
      "epoch": 0.128,
      "grad_norm": 2.1013545989990234,
      "learning_rate": 0.00017458858413639734,
      "loss": 1.0513,
      "step": 864
    },
    {
      "epoch": 0.12814814814814815,
      "grad_norm": 1.8798348903656006,
      "learning_rate": 0.00017455893254262415,
      "loss": 1.3548,
      "step": 865
    },
    {
      "epoch": 0.1282962962962963,
      "grad_norm": 2.6154160499572754,
      "learning_rate": 0.00017452928094885103,
      "loss": 1.0156,
      "step": 866
    },
    {
      "epoch": 0.12844444444444444,
      "grad_norm": 1.7234320640563965,
      "learning_rate": 0.00017449962935507784,
      "loss": 1.0047,
      "step": 867
    },
    {
      "epoch": 0.1285925925925926,
      "grad_norm": 2.064502716064453,
      "learning_rate": 0.00017446997776130466,
      "loss": 0.9285,
      "step": 868
    },
    {
      "epoch": 0.12874074074074074,
      "grad_norm": 1.2932170629501343,
      "learning_rate": 0.00017444032616753153,
      "loss": 1.3768,
      "step": 869
    },
    {
      "epoch": 0.1288888888888889,
      "grad_norm": 1.4662041664123535,
      "learning_rate": 0.00017441067457375835,
      "loss": 1.3052,
      "step": 870
    },
    {
      "epoch": 0.12903703703703703,
      "grad_norm": 2.8357980251312256,
      "learning_rate": 0.00017438102297998517,
      "loss": 1.0833,
      "step": 871
    },
    {
      "epoch": 0.12918518518518518,
      "grad_norm": 1.3351575136184692,
      "learning_rate": 0.00017435137138621204,
      "loss": 1.29,
      "step": 872
    },
    {
      "epoch": 0.12933333333333333,
      "grad_norm": 0.9936057925224304,
      "learning_rate": 0.00017432171979243886,
      "loss": 1.1774,
      "step": 873
    },
    {
      "epoch": 0.12948148148148148,
      "grad_norm": 1.349957823753357,
      "learning_rate": 0.00017429206819866567,
      "loss": 1.0534,
      "step": 874
    },
    {
      "epoch": 0.12962962962962962,
      "grad_norm": 2.102526903152466,
      "learning_rate": 0.00017426241660489254,
      "loss": 1.1639,
      "step": 875
    },
    {
      "epoch": 0.12977777777777777,
      "grad_norm": 2.4069101810455322,
      "learning_rate": 0.00017423276501111936,
      "loss": 1.2314,
      "step": 876
    },
    {
      "epoch": 0.12992592592592592,
      "grad_norm": 2.2288312911987305,
      "learning_rate": 0.00017420311341734618,
      "loss": 1.3554,
      "step": 877
    },
    {
      "epoch": 0.13007407407407406,
      "grad_norm": 1.6635648012161255,
      "learning_rate": 0.00017417346182357302,
      "loss": 1.3372,
      "step": 878
    },
    {
      "epoch": 0.1302222222222222,
      "grad_norm": 2.2414519786834717,
      "learning_rate": 0.00017414381022979987,
      "loss": 1.067,
      "step": 879
    },
    {
      "epoch": 0.13037037037037036,
      "grad_norm": 3.241548538208008,
      "learning_rate": 0.00017411415863602669,
      "loss": 1.0293,
      "step": 880
    },
    {
      "epoch": 0.1305185185185185,
      "grad_norm": 1.651780605316162,
      "learning_rate": 0.00017408450704225353,
      "loss": 1.0491,
      "step": 881
    },
    {
      "epoch": 0.13066666666666665,
      "grad_norm": 1.4641205072402954,
      "learning_rate": 0.00017405485544848037,
      "loss": 1.1071,
      "step": 882
    },
    {
      "epoch": 0.13081481481481483,
      "grad_norm": 1.5232881307601929,
      "learning_rate": 0.0001740252038547072,
      "loss": 1.1261,
      "step": 883
    },
    {
      "epoch": 0.13096296296296298,
      "grad_norm": 1.9777206182479858,
      "learning_rate": 0.00017399555226093404,
      "loss": 1.1046,
      "step": 884
    },
    {
      "epoch": 0.13111111111111112,
      "grad_norm": 1.5309432744979858,
      "learning_rate": 0.00017396590066716088,
      "loss": 1.1768,
      "step": 885
    },
    {
      "epoch": 0.13125925925925927,
      "grad_norm": 1.988309621810913,
      "learning_rate": 0.0001739362490733877,
      "loss": 1.1637,
      "step": 886
    },
    {
      "epoch": 0.13140740740740742,
      "grad_norm": 1.3992940187454224,
      "learning_rate": 0.00017390659747961454,
      "loss": 1.1298,
      "step": 887
    },
    {
      "epoch": 0.13155555555555556,
      "grad_norm": 1.1663585901260376,
      "learning_rate": 0.0001738769458858414,
      "loss": 1.1667,
      "step": 888
    },
    {
      "epoch": 0.1317037037037037,
      "grad_norm": 1.6041878461837769,
      "learning_rate": 0.0001738472942920682,
      "loss": 1.2966,
      "step": 889
    },
    {
      "epoch": 0.13185185185185186,
      "grad_norm": 1.4323391914367676,
      "learning_rate": 0.00017381764269829505,
      "loss": 1.4035,
      "step": 890
    },
    {
      "epoch": 0.132,
      "grad_norm": 1.9315614700317383,
      "learning_rate": 0.0001737879911045219,
      "loss": 1.2564,
      "step": 891
    },
    {
      "epoch": 0.13214814814814815,
      "grad_norm": 1.3660720586776733,
      "learning_rate": 0.0001737583395107487,
      "loss": 1.0422,
      "step": 892
    },
    {
      "epoch": 0.1322962962962963,
      "grad_norm": 1.206908941268921,
      "learning_rate": 0.00017372868791697553,
      "loss": 1.1346,
      "step": 893
    },
    {
      "epoch": 0.13244444444444445,
      "grad_norm": 2.7856850624084473,
      "learning_rate": 0.0001736990363232024,
      "loss": 1.1058,
      "step": 894
    },
    {
      "epoch": 0.1325925925925926,
      "grad_norm": 1.9803142547607422,
      "learning_rate": 0.00017366938472942922,
      "loss": 1.1389,
      "step": 895
    },
    {
      "epoch": 0.13274074074074074,
      "grad_norm": 2.0166566371917725,
      "learning_rate": 0.00017363973313565603,
      "loss": 0.9425,
      "step": 896
    },
    {
      "epoch": 0.1328888888888889,
      "grad_norm": 1.5326697826385498,
      "learning_rate": 0.00017361008154188288,
      "loss": 1.3628,
      "step": 897
    },
    {
      "epoch": 0.13303703703703704,
      "grad_norm": 1.4127566814422607,
      "learning_rate": 0.00017358042994810972,
      "loss": 1.1228,
      "step": 898
    },
    {
      "epoch": 0.13318518518518518,
      "grad_norm": 1.0187325477600098,
      "learning_rate": 0.00017355077835433654,
      "loss": 1.0937,
      "step": 899
    },
    {
      "epoch": 0.13333333333333333,
      "grad_norm": 1.1690553426742554,
      "learning_rate": 0.00017352112676056338,
      "loss": 0.9765,
      "step": 900
    },
    {
      "epoch": 0.13348148148148148,
      "grad_norm": 2.1607093811035156,
      "learning_rate": 0.00017349147516679023,
      "loss": 1.089,
      "step": 901
    },
    {
      "epoch": 0.13362962962962963,
      "grad_norm": 1.9213364124298096,
      "learning_rate": 0.00017346182357301705,
      "loss": 1.2745,
      "step": 902
    },
    {
      "epoch": 0.13377777777777777,
      "grad_norm": 2.478525161743164,
      "learning_rate": 0.0001734321719792439,
      "loss": 0.9527,
      "step": 903
    },
    {
      "epoch": 0.13392592592592592,
      "grad_norm": 1.3539713621139526,
      "learning_rate": 0.00017340252038547074,
      "loss": 1.3241,
      "step": 904
    },
    {
      "epoch": 0.13407407407407407,
      "grad_norm": 1.1803101301193237,
      "learning_rate": 0.00017337286879169755,
      "loss": 1.1488,
      "step": 905
    },
    {
      "epoch": 0.13422222222222221,
      "grad_norm": 1.7030516862869263,
      "learning_rate": 0.0001733432171979244,
      "loss": 1.5023,
      "step": 906
    },
    {
      "epoch": 0.13437037037037036,
      "grad_norm": 1.156157374382019,
      "learning_rate": 0.00017331356560415124,
      "loss": 1.15,
      "step": 907
    },
    {
      "epoch": 0.1345185185185185,
      "grad_norm": 1.3928496837615967,
      "learning_rate": 0.00017328391401037806,
      "loss": 1.0722,
      "step": 908
    },
    {
      "epoch": 0.13466666666666666,
      "grad_norm": 1.8104082345962524,
      "learning_rate": 0.0001732542624166049,
      "loss": 1.1799,
      "step": 909
    },
    {
      "epoch": 0.1348148148148148,
      "grad_norm": 1.854732871055603,
      "learning_rate": 0.00017322461082283175,
      "loss": 0.84,
      "step": 910
    },
    {
      "epoch": 0.13496296296296295,
      "grad_norm": 1.7013317346572876,
      "learning_rate": 0.00017319495922905857,
      "loss": 1.2079,
      "step": 911
    },
    {
      "epoch": 0.1351111111111111,
      "grad_norm": 1.8916053771972656,
      "learning_rate": 0.0001731653076352854,
      "loss": 1.1653,
      "step": 912
    },
    {
      "epoch": 0.13525925925925925,
      "grad_norm": 1.6114717721939087,
      "learning_rate": 0.00017313565604151223,
      "loss": 1.1645,
      "step": 913
    },
    {
      "epoch": 0.13540740740740742,
      "grad_norm": 4.8172478675842285,
      "learning_rate": 0.00017310600444773907,
      "loss": 1.1203,
      "step": 914
    },
    {
      "epoch": 0.13555555555555557,
      "grad_norm": 2.4642679691314697,
      "learning_rate": 0.00017307635285396592,
      "loss": 1.3937,
      "step": 915
    },
    {
      "epoch": 0.13570370370370372,
      "grad_norm": 1.3399279117584229,
      "learning_rate": 0.00017304670126019273,
      "loss": 0.9886,
      "step": 916
    },
    {
      "epoch": 0.13585185185185186,
      "grad_norm": 1.9045206308364868,
      "learning_rate": 0.00017301704966641958,
      "loss": 0.9348,
      "step": 917
    },
    {
      "epoch": 0.136,
      "grad_norm": 1.8310236930847168,
      "learning_rate": 0.00017298739807264642,
      "loss": 1.1142,
      "step": 918
    },
    {
      "epoch": 0.13614814814814816,
      "grad_norm": 1.48021399974823,
      "learning_rate": 0.00017295774647887324,
      "loss": 1.0913,
      "step": 919
    },
    {
      "epoch": 0.1362962962962963,
      "grad_norm": 1.868577003479004,
      "learning_rate": 0.00017292809488510008,
      "loss": 1.0717,
      "step": 920
    },
    {
      "epoch": 0.13644444444444445,
      "grad_norm": 3.953855276107788,
      "learning_rate": 0.00017289844329132693,
      "loss": 1.1454,
      "step": 921
    },
    {
      "epoch": 0.1365925925925926,
      "grad_norm": 2.0253727436065674,
      "learning_rate": 0.00017286879169755375,
      "loss": 1.1603,
      "step": 922
    },
    {
      "epoch": 0.13674074074074075,
      "grad_norm": 1.7948057651519775,
      "learning_rate": 0.0001728391401037806,
      "loss": 0.854,
      "step": 923
    },
    {
      "epoch": 0.1368888888888889,
      "grad_norm": 1.3979237079620361,
      "learning_rate": 0.00017280948851000743,
      "loss": 1.297,
      "step": 924
    },
    {
      "epoch": 0.13703703703703704,
      "grad_norm": 2.3212087154388428,
      "learning_rate": 0.00017277983691623425,
      "loss": 1.1407,
      "step": 925
    },
    {
      "epoch": 0.1371851851851852,
      "grad_norm": 1.4935330152511597,
      "learning_rate": 0.0001727501853224611,
      "loss": 1.0982,
      "step": 926
    },
    {
      "epoch": 0.13733333333333334,
      "grad_norm": 1.5073734521865845,
      "learning_rate": 0.00017272053372868794,
      "loss": 1.2125,
      "step": 927
    },
    {
      "epoch": 0.13748148148148148,
      "grad_norm": 1.8712886571884155,
      "learning_rate": 0.00017269088213491476,
      "loss": 0.9262,
      "step": 928
    },
    {
      "epoch": 0.13762962962962963,
      "grad_norm": 1.6642515659332275,
      "learning_rate": 0.0001726612305411416,
      "loss": 1.2041,
      "step": 929
    },
    {
      "epoch": 0.13777777777777778,
      "grad_norm": 1.1925135850906372,
      "learning_rate": 0.00017263157894736842,
      "loss": 0.8896,
      "step": 930
    },
    {
      "epoch": 0.13792592592592592,
      "grad_norm": 5.4231038093566895,
      "learning_rate": 0.00017260192735359526,
      "loss": 1.15,
      "step": 931
    },
    {
      "epoch": 0.13807407407407407,
      "grad_norm": 1.3681412935256958,
      "learning_rate": 0.00017257227575982208,
      "loss": 1.1461,
      "step": 932
    },
    {
      "epoch": 0.13822222222222222,
      "grad_norm": 1.0649001598358154,
      "learning_rate": 0.00017254262416604893,
      "loss": 1.1943,
      "step": 933
    },
    {
      "epoch": 0.13837037037037037,
      "grad_norm": 1.8697491884231567,
      "learning_rate": 0.00017251297257227577,
      "loss": 0.9972,
      "step": 934
    },
    {
      "epoch": 0.1385185185185185,
      "grad_norm": 1.898934006690979,
      "learning_rate": 0.0001724833209785026,
      "loss": 1.2559,
      "step": 935
    },
    {
      "epoch": 0.13866666666666666,
      "grad_norm": 2.1527063846588135,
      "learning_rate": 0.00017245366938472943,
      "loss": 1.4038,
      "step": 936
    },
    {
      "epoch": 0.1388148148148148,
      "grad_norm": 2.2986197471618652,
      "learning_rate": 0.00017242401779095628,
      "loss": 1.1437,
      "step": 937
    },
    {
      "epoch": 0.13896296296296295,
      "grad_norm": 2.1097805500030518,
      "learning_rate": 0.0001723943661971831,
      "loss": 0.8753,
      "step": 938
    },
    {
      "epoch": 0.1391111111111111,
      "grad_norm": 2.792574167251587,
      "learning_rate": 0.00017236471460340994,
      "loss": 1.2432,
      "step": 939
    },
    {
      "epoch": 0.13925925925925925,
      "grad_norm": 1.5301166772842407,
      "learning_rate": 0.00017233506300963678,
      "loss": 1.2332,
      "step": 940
    },
    {
      "epoch": 0.1394074074074074,
      "grad_norm": 2.1605138778686523,
      "learning_rate": 0.0001723054114158636,
      "loss": 1.0934,
      "step": 941
    },
    {
      "epoch": 0.13955555555555554,
      "grad_norm": 7.574090003967285,
      "learning_rate": 0.00017227575982209045,
      "loss": 1.1196,
      "step": 942
    },
    {
      "epoch": 0.1397037037037037,
      "grad_norm": 1.6092199087142944,
      "learning_rate": 0.0001722461082283173,
      "loss": 1.047,
      "step": 943
    },
    {
      "epoch": 0.13985185185185184,
      "grad_norm": 2.296266794204712,
      "learning_rate": 0.0001722164566345441,
      "loss": 1.1549,
      "step": 944
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8765658140182495,
      "learning_rate": 0.00017218680504077095,
      "loss": 1.1803,
      "step": 945
    },
    {
      "epoch": 0.14014814814814816,
      "grad_norm": 2.1477468013763428,
      "learning_rate": 0.0001721571534469978,
      "loss": 1.2277,
      "step": 946
    },
    {
      "epoch": 0.1402962962962963,
      "grad_norm": 1.460718035697937,
      "learning_rate": 0.0001721275018532246,
      "loss": 1.3237,
      "step": 947
    },
    {
      "epoch": 0.14044444444444446,
      "grad_norm": 1.4240020513534546,
      "learning_rate": 0.00017209785025945143,
      "loss": 1.0038,
      "step": 948
    },
    {
      "epoch": 0.1405925925925926,
      "grad_norm": 1.8419314622879028,
      "learning_rate": 0.0001720681986656783,
      "loss": 1.3067,
      "step": 949
    },
    {
      "epoch": 0.14074074074074075,
      "grad_norm": 1.5516164302825928,
      "learning_rate": 0.00017203854707190512,
      "loss": 1.0963,
      "step": 950
    },
    {
      "epoch": 0.1408888888888889,
      "grad_norm": 2.192901134490967,
      "learning_rate": 0.00017200889547813194,
      "loss": 1.022,
      "step": 951
    },
    {
      "epoch": 0.14103703703703704,
      "grad_norm": 2.8840384483337402,
      "learning_rate": 0.0001719792438843588,
      "loss": 1.1251,
      "step": 952
    },
    {
      "epoch": 0.1411851851851852,
      "grad_norm": 2.4674930572509766,
      "learning_rate": 0.00017194959229058563,
      "loss": 0.8607,
      "step": 953
    },
    {
      "epoch": 0.14133333333333334,
      "grad_norm": 1.5810648202896118,
      "learning_rate": 0.00017191994069681244,
      "loss": 1.0845,
      "step": 954
    },
    {
      "epoch": 0.14148148148148149,
      "grad_norm": 2.392319440841675,
      "learning_rate": 0.00017189028910303931,
      "loss": 1.1497,
      "step": 955
    },
    {
      "epoch": 0.14162962962962963,
      "grad_norm": 2.7800850868225098,
      "learning_rate": 0.00017186063750926613,
      "loss": 1.0349,
      "step": 956
    },
    {
      "epoch": 0.14177777777777778,
      "grad_norm": 2.4474740028381348,
      "learning_rate": 0.00017183098591549295,
      "loss": 1.1785,
      "step": 957
    },
    {
      "epoch": 0.14192592592592593,
      "grad_norm": 1.2785381078720093,
      "learning_rate": 0.00017180133432171982,
      "loss": 1.1121,
      "step": 958
    },
    {
      "epoch": 0.14207407407407407,
      "grad_norm": 1.6383781433105469,
      "learning_rate": 0.00017177168272794664,
      "loss": 1.1978,
      "step": 959
    },
    {
      "epoch": 0.14222222222222222,
      "grad_norm": 2.8146605491638184,
      "learning_rate": 0.00017174203113417346,
      "loss": 1.0505,
      "step": 960
    },
    {
      "epoch": 0.14237037037037037,
      "grad_norm": 3.455076217651367,
      "learning_rate": 0.00017171237954040033,
      "loss": 1.1984,
      "step": 961
    },
    {
      "epoch": 0.14251851851851852,
      "grad_norm": 2.968580484390259,
      "learning_rate": 0.00017168272794662714,
      "loss": 1.2749,
      "step": 962
    },
    {
      "epoch": 0.14266666666666666,
      "grad_norm": 1.3419718742370605,
      "learning_rate": 0.00017165307635285396,
      "loss": 1.1617,
      "step": 963
    },
    {
      "epoch": 0.1428148148148148,
      "grad_norm": 4.6509904861450195,
      "learning_rate": 0.0001716234247590808,
      "loss": 1.0516,
      "step": 964
    },
    {
      "epoch": 0.14296296296296296,
      "grad_norm": 1.687054991722107,
      "learning_rate": 0.00017159377316530765,
      "loss": 1.1101,
      "step": 965
    },
    {
      "epoch": 0.1431111111111111,
      "grad_norm": 5.888388633728027,
      "learning_rate": 0.00017156412157153447,
      "loss": 1.2839,
      "step": 966
    },
    {
      "epoch": 0.14325925925925925,
      "grad_norm": 1.7805489301681519,
      "learning_rate": 0.0001715344699777613,
      "loss": 1.0585,
      "step": 967
    },
    {
      "epoch": 0.1434074074074074,
      "grad_norm": 1.6146360635757446,
      "learning_rate": 0.00017150481838398816,
      "loss": 1.0834,
      "step": 968
    },
    {
      "epoch": 0.14355555555555555,
      "grad_norm": 1.9422056674957275,
      "learning_rate": 0.00017147516679021497,
      "loss": 0.9914,
      "step": 969
    },
    {
      "epoch": 0.1437037037037037,
      "grad_norm": 1.6852895021438599,
      "learning_rate": 0.00017144551519644182,
      "loss": 1.1439,
      "step": 970
    },
    {
      "epoch": 0.14385185185185184,
      "grad_norm": 1.960229516029358,
      "learning_rate": 0.00017141586360266866,
      "loss": 1.1256,
      "step": 971
    },
    {
      "epoch": 0.144,
      "grad_norm": 2.058276414871216,
      "learning_rate": 0.00017138621200889548,
      "loss": 1.0833,
      "step": 972
    },
    {
      "epoch": 0.14414814814814814,
      "grad_norm": 3.783475160598755,
      "learning_rate": 0.00017135656041512233,
      "loss": 1.1396,
      "step": 973
    },
    {
      "epoch": 0.14429629629629628,
      "grad_norm": 2.62729811668396,
      "learning_rate": 0.00017132690882134917,
      "loss": 0.9766,
      "step": 974
    },
    {
      "epoch": 0.14444444444444443,
      "grad_norm": 2.9100377559661865,
      "learning_rate": 0.000171297257227576,
      "loss": 1.1975,
      "step": 975
    },
    {
      "epoch": 0.1445925925925926,
      "grad_norm": 2.2580909729003906,
      "learning_rate": 0.00017126760563380283,
      "loss": 1.0595,
      "step": 976
    },
    {
      "epoch": 0.14474074074074075,
      "grad_norm": 1.8175936937332153,
      "learning_rate": 0.00017123795404002968,
      "loss": 1.1501,
      "step": 977
    },
    {
      "epoch": 0.1448888888888889,
      "grad_norm": 1.8463125228881836,
      "learning_rate": 0.0001712083024462565,
      "loss": 1.1216,
      "step": 978
    },
    {
      "epoch": 0.14503703703703705,
      "grad_norm": 7.846401214599609,
      "learning_rate": 0.0001711786508524833,
      "loss": 1.0921,
      "step": 979
    },
    {
      "epoch": 0.1451851851851852,
      "grad_norm": 2.4334380626678467,
      "learning_rate": 0.00017114899925871018,
      "loss": 1.1639,
      "step": 980
    },
    {
      "epoch": 0.14533333333333334,
      "grad_norm": 5.559380531311035,
      "learning_rate": 0.000171119347664937,
      "loss": 1.0065,
      "step": 981
    },
    {
      "epoch": 0.1454814814814815,
      "grad_norm": 2.4271509647369385,
      "learning_rate": 0.00017108969607116382,
      "loss": 1.065,
      "step": 982
    },
    {
      "epoch": 0.14562962962962964,
      "grad_norm": 1.9057291746139526,
      "learning_rate": 0.00017106004447739066,
      "loss": 1.2109,
      "step": 983
    },
    {
      "epoch": 0.14577777777777778,
      "grad_norm": 1.6439474821090698,
      "learning_rate": 0.0001710303928836175,
      "loss": 1.0766,
      "step": 984
    },
    {
      "epoch": 0.14592592592592593,
      "grad_norm": 3.028550863265991,
      "learning_rate": 0.00017100074128984432,
      "loss": 1.2883,
      "step": 985
    },
    {
      "epoch": 0.14607407407407408,
      "grad_norm": 1.0586735010147095,
      "learning_rate": 0.00017097108969607117,
      "loss": 1.2154,
      "step": 986
    },
    {
      "epoch": 0.14622222222222223,
      "grad_norm": 1.668674111366272,
      "learning_rate": 0.000170941438102298,
      "loss": 1.0683,
      "step": 987
    },
    {
      "epoch": 0.14637037037037037,
      "grad_norm": 2.949061870574951,
      "learning_rate": 0.00017091178650852483,
      "loss": 1.2461,
      "step": 988
    },
    {
      "epoch": 0.14651851851851852,
      "grad_norm": 1.5150032043457031,
      "learning_rate": 0.00017088213491475167,
      "loss": 1.148,
      "step": 989
    },
    {
      "epoch": 0.14666666666666667,
      "grad_norm": 9.12592601776123,
      "learning_rate": 0.00017085248332097852,
      "loss": 1.2257,
      "step": 990
    },
    {
      "epoch": 0.14681481481481481,
      "grad_norm": 2.1272830963134766,
      "learning_rate": 0.00017082283172720534,
      "loss": 1.1114,
      "step": 991
    },
    {
      "epoch": 0.14696296296296296,
      "grad_norm": 2.5358645915985107,
      "learning_rate": 0.00017079318013343218,
      "loss": 1.1169,
      "step": 992
    },
    {
      "epoch": 0.1471111111111111,
      "grad_norm": 1.4151437282562256,
      "learning_rate": 0.00017076352853965902,
      "loss": 1.093,
      "step": 993
    },
    {
      "epoch": 0.14725925925925926,
      "grad_norm": 4.572996616363525,
      "learning_rate": 0.00017073387694588584,
      "loss": 1.2401,
      "step": 994
    },
    {
      "epoch": 0.1474074074074074,
      "grad_norm": 1.4581624269485474,
      "learning_rate": 0.0001707042253521127,
      "loss": 1.3273,
      "step": 995
    },
    {
      "epoch": 0.14755555555555555,
      "grad_norm": 2.1116039752960205,
      "learning_rate": 0.00017067457375833953,
      "loss": 1.2053,
      "step": 996
    },
    {
      "epoch": 0.1477037037037037,
      "grad_norm": 2.616701602935791,
      "learning_rate": 0.00017064492216456635,
      "loss": 1.0642,
      "step": 997
    },
    {
      "epoch": 0.14785185185185185,
      "grad_norm": 1.2618801593780518,
      "learning_rate": 0.0001706152705707932,
      "loss": 1.5356,
      "step": 998
    },
    {
      "epoch": 0.148,
      "grad_norm": 2.029723882675171,
      "learning_rate": 0.00017058561897702,
      "loss": 0.9674,
      "step": 999
    },
    {
      "epoch": 0.14814814814814814,
      "grad_norm": 1.6765875816345215,
      "learning_rate": 0.00017055596738324685,
      "loss": 1.1802,
      "step": 1000
    },
    {
      "epoch": 0.1482962962962963,
      "grad_norm": 3.5849623680114746,
      "learning_rate": 0.0001705263157894737,
      "loss": 1.1245,
      "step": 1001
    },
    {
      "epoch": 0.14844444444444443,
      "grad_norm": 2.4030394554138184,
      "learning_rate": 0.00017049666419570052,
      "loss": 1.2715,
      "step": 1002
    },
    {
      "epoch": 0.14859259259259258,
      "grad_norm": 5.763272285461426,
      "learning_rate": 0.00017046701260192736,
      "loss": 1.2968,
      "step": 1003
    },
    {
      "epoch": 0.14874074074074073,
      "grad_norm": 3.2482750415802,
      "learning_rate": 0.0001704373610081542,
      "loss": 1.1823,
      "step": 1004
    },
    {
      "epoch": 0.14888888888888888,
      "grad_norm": 1.9993109703063965,
      "learning_rate": 0.00017040770941438102,
      "loss": 1.1634,
      "step": 1005
    },
    {
      "epoch": 0.14903703703703702,
      "grad_norm": 3.8828346729278564,
      "learning_rate": 0.00017037805782060787,
      "loss": 1.4147,
      "step": 1006
    },
    {
      "epoch": 0.1491851851851852,
      "grad_norm": 1.2229200601577759,
      "learning_rate": 0.0001703484062268347,
      "loss": 1.2015,
      "step": 1007
    },
    {
      "epoch": 0.14933333333333335,
      "grad_norm": 2.444016456604004,
      "learning_rate": 0.00017031875463306153,
      "loss": 1.0146,
      "step": 1008
    },
    {
      "epoch": 0.1494814814814815,
      "grad_norm": 2.006105661392212,
      "learning_rate": 0.00017028910303928837,
      "loss": 1.3469,
      "step": 1009
    },
    {
      "epoch": 0.14962962962962964,
      "grad_norm": 2.110853433609009,
      "learning_rate": 0.00017025945144551522,
      "loss": 1.0897,
      "step": 1010
    },
    {
      "epoch": 0.1497777777777778,
      "grad_norm": 3.2394096851348877,
      "learning_rate": 0.00017022979985174204,
      "loss": 1.2767,
      "step": 1011
    },
    {
      "epoch": 0.14992592592592593,
      "grad_norm": 2.3130080699920654,
      "learning_rate": 0.00017020014825796888,
      "loss": 1.071,
      "step": 1012
    },
    {
      "epoch": 0.15007407407407408,
      "grad_norm": 1.756558895111084,
      "learning_rate": 0.00017017049666419572,
      "loss": 1.0203,
      "step": 1013
    },
    {
      "epoch": 0.15022222222222223,
      "grad_norm": 1.4772218465805054,
      "learning_rate": 0.00017014084507042254,
      "loss": 1.1404,
      "step": 1014
    },
    {
      "epoch": 0.15037037037037038,
      "grad_norm": 2.8919644355773926,
      "learning_rate": 0.00017011119347664939,
      "loss": 1.1955,
      "step": 1015
    },
    {
      "epoch": 0.15051851851851852,
      "grad_norm": 1.9548122882843018,
      "learning_rate": 0.0001700815418828762,
      "loss": 1.2325,
      "step": 1016
    },
    {
      "epoch": 0.15066666666666667,
      "grad_norm": 1.2817782163619995,
      "learning_rate": 0.00017005189028910305,
      "loss": 1.2533,
      "step": 1017
    },
    {
      "epoch": 0.15081481481481482,
      "grad_norm": 1.1539685726165771,
      "learning_rate": 0.00017002223869532987,
      "loss": 1.1358,
      "step": 1018
    },
    {
      "epoch": 0.15096296296296297,
      "grad_norm": 2.1873302459716797,
      "learning_rate": 0.0001699925871015567,
      "loss": 1.1667,
      "step": 1019
    },
    {
      "epoch": 0.1511111111111111,
      "grad_norm": 2.369619131088257,
      "learning_rate": 0.00016996293550778355,
      "loss": 1.1854,
      "step": 1020
    },
    {
      "epoch": 0.15125925925925926,
      "grad_norm": 2.369476556777954,
      "learning_rate": 0.00016993328391401037,
      "loss": 1.2106,
      "step": 1021
    },
    {
      "epoch": 0.1514074074074074,
      "grad_norm": 3.2865421772003174,
      "learning_rate": 0.00016990363232023722,
      "loss": 1.0389,
      "step": 1022
    },
    {
      "epoch": 0.15155555555555555,
      "grad_norm": 1.9383407831192017,
      "learning_rate": 0.00016987398072646406,
      "loss": 1.0633,
      "step": 1023
    },
    {
      "epoch": 0.1517037037037037,
      "grad_norm": 2.9727392196655273,
      "learning_rate": 0.00016984432913269088,
      "loss": 1.0315,
      "step": 1024
    },
    {
      "epoch": 0.15185185185185185,
      "grad_norm": 1.2354108095169067,
      "learning_rate": 0.00016981467753891772,
      "loss": 1.1532,
      "step": 1025
    },
    {
      "epoch": 0.152,
      "grad_norm": 1.7172785997390747,
      "learning_rate": 0.00016978502594514457,
      "loss": 0.7329,
      "step": 1026
    },
    {
      "epoch": 0.15214814814814814,
      "grad_norm": 2.638615846633911,
      "learning_rate": 0.00016975537435137138,
      "loss": 1.1379,
      "step": 1027
    },
    {
      "epoch": 0.1522962962962963,
      "grad_norm": 2.0213356018066406,
      "learning_rate": 0.00016972572275759823,
      "loss": 1.1679,
      "step": 1028
    },
    {
      "epoch": 0.15244444444444444,
      "grad_norm": 2.057471990585327,
      "learning_rate": 0.00016969607116382507,
      "loss": 1.0414,
      "step": 1029
    },
    {
      "epoch": 0.15259259259259259,
      "grad_norm": 1.5801711082458496,
      "learning_rate": 0.0001696664195700519,
      "loss": 1.3041,
      "step": 1030
    },
    {
      "epoch": 0.15274074074074073,
      "grad_norm": 2.0575385093688965,
      "learning_rate": 0.00016963676797627873,
      "loss": 1.0981,
      "step": 1031
    },
    {
      "epoch": 0.15288888888888888,
      "grad_norm": 1.9751660823822021,
      "learning_rate": 0.00016960711638250558,
      "loss": 0.9827,
      "step": 1032
    },
    {
      "epoch": 0.15303703703703703,
      "grad_norm": 1.5350358486175537,
      "learning_rate": 0.0001695774647887324,
      "loss": 1.1458,
      "step": 1033
    },
    {
      "epoch": 0.15318518518518517,
      "grad_norm": 1.4879770278930664,
      "learning_rate": 0.00016954781319495921,
      "loss": 1.1881,
      "step": 1034
    },
    {
      "epoch": 0.15333333333333332,
      "grad_norm": 3.2379486560821533,
      "learning_rate": 0.00016951816160118609,
      "loss": 1.1286,
      "step": 1035
    },
    {
      "epoch": 0.15348148148148147,
      "grad_norm": 5.281479358673096,
      "learning_rate": 0.0001694885100074129,
      "loss": 1.1006,
      "step": 1036
    },
    {
      "epoch": 0.15362962962962962,
      "grad_norm": 1.7295136451721191,
      "learning_rate": 0.00016945885841363972,
      "loss": 1.1644,
      "step": 1037
    },
    {
      "epoch": 0.1537777777777778,
      "grad_norm": 1.8465901613235474,
      "learning_rate": 0.0001694292068198666,
      "loss": 1.1519,
      "step": 1038
    },
    {
      "epoch": 0.15392592592592594,
      "grad_norm": 6.92486047744751,
      "learning_rate": 0.0001693995552260934,
      "loss": 1.142,
      "step": 1039
    },
    {
      "epoch": 0.15407407407407409,
      "grad_norm": 1.9682059288024902,
      "learning_rate": 0.00016936990363232023,
      "loss": 0.9864,
      "step": 1040
    },
    {
      "epoch": 0.15422222222222223,
      "grad_norm": 1.4835504293441772,
      "learning_rate": 0.0001693402520385471,
      "loss": 1.0888,
      "step": 1041
    },
    {
      "epoch": 0.15437037037037038,
      "grad_norm": 1.9731804132461548,
      "learning_rate": 0.00016931060044477392,
      "loss": 1.4113,
      "step": 1042
    },
    {
      "epoch": 0.15451851851851853,
      "grad_norm": 1.9451696872711182,
      "learning_rate": 0.00016928094885100073,
      "loss": 1.0769,
      "step": 1043
    },
    {
      "epoch": 0.15466666666666667,
      "grad_norm": 2.1988463401794434,
      "learning_rate": 0.0001692512972572276,
      "loss": 1.0705,
      "step": 1044
    },
    {
      "epoch": 0.15481481481481482,
      "grad_norm": 4.193809509277344,
      "learning_rate": 0.00016922164566345442,
      "loss": 1.3807,
      "step": 1045
    },
    {
      "epoch": 0.15496296296296297,
      "grad_norm": 4.9835710525512695,
      "learning_rate": 0.00016919199406968124,
      "loss": 1.2299,
      "step": 1046
    },
    {
      "epoch": 0.15511111111111112,
      "grad_norm": 1.6631501913070679,
      "learning_rate": 0.0001691623424759081,
      "loss": 1.0437,
      "step": 1047
    },
    {
      "epoch": 0.15525925925925926,
      "grad_norm": 2.734771728515625,
      "learning_rate": 0.00016913269088213493,
      "loss": 1.2403,
      "step": 1048
    },
    {
      "epoch": 0.1554074074074074,
      "grad_norm": 1.514737844467163,
      "learning_rate": 0.00016910303928836175,
      "loss": 1.2392,
      "step": 1049
    },
    {
      "epoch": 0.15555555555555556,
      "grad_norm": 1.6169673204421997,
      "learning_rate": 0.0001690733876945886,
      "loss": 1.0107,
      "step": 1050
    },
    {
      "epoch": 0.1557037037037037,
      "grad_norm": 1.7885040044784546,
      "learning_rate": 0.00016904373610081543,
      "loss": 1.2452,
      "step": 1051
    },
    {
      "epoch": 0.15585185185185185,
      "grad_norm": 1.7115554809570312,
      "learning_rate": 0.00016901408450704225,
      "loss": 1.1554,
      "step": 1052
    },
    {
      "epoch": 0.156,
      "grad_norm": 1.8804570436477661,
      "learning_rate": 0.0001689844329132691,
      "loss": 1.2028,
      "step": 1053
    },
    {
      "epoch": 0.15614814814814815,
      "grad_norm": 2.993739128112793,
      "learning_rate": 0.00016895478131949594,
      "loss": 1.1684,
      "step": 1054
    },
    {
      "epoch": 0.1562962962962963,
      "grad_norm": 2.713289499282837,
      "learning_rate": 0.00016892512972572276,
      "loss": 0.8836,
      "step": 1055
    },
    {
      "epoch": 0.15644444444444444,
      "grad_norm": 2.5271599292755127,
      "learning_rate": 0.0001688954781319496,
      "loss": 1.4292,
      "step": 1056
    },
    {
      "epoch": 0.1565925925925926,
      "grad_norm": 4.2666425704956055,
      "learning_rate": 0.00016886582653817645,
      "loss": 1.3867,
      "step": 1057
    },
    {
      "epoch": 0.15674074074074074,
      "grad_norm": 1.8929147720336914,
      "learning_rate": 0.00016883617494440326,
      "loss": 1.2823,
      "step": 1058
    },
    {
      "epoch": 0.15688888888888888,
      "grad_norm": 2.1356215476989746,
      "learning_rate": 0.0001688065233506301,
      "loss": 1.3934,
      "step": 1059
    },
    {
      "epoch": 0.15703703703703703,
      "grad_norm": 2.8732924461364746,
      "learning_rate": 0.00016877687175685695,
      "loss": 1.3736,
      "step": 1060
    },
    {
      "epoch": 0.15718518518518518,
      "grad_norm": 2.7370901107788086,
      "learning_rate": 0.00016874722016308377,
      "loss": 0.995,
      "step": 1061
    },
    {
      "epoch": 0.15733333333333333,
      "grad_norm": 1.6540776491165161,
      "learning_rate": 0.00016871756856931061,
      "loss": 0.9875,
      "step": 1062
    },
    {
      "epoch": 0.15748148148148147,
      "grad_norm": 2.5256638526916504,
      "learning_rate": 0.00016868791697553746,
      "loss": 0.9672,
      "step": 1063
    },
    {
      "epoch": 0.15762962962962962,
      "grad_norm": 1.825677752494812,
      "learning_rate": 0.00016865826538176428,
      "loss": 1.1755,
      "step": 1064
    },
    {
      "epoch": 0.15777777777777777,
      "grad_norm": 3.3257060050964355,
      "learning_rate": 0.0001686286137879911,
      "loss": 1.1019,
      "step": 1065
    },
    {
      "epoch": 0.15792592592592591,
      "grad_norm": 3.0843424797058105,
      "learning_rate": 0.00016859896219421797,
      "loss": 1.2329,
      "step": 1066
    },
    {
      "epoch": 0.15807407407407406,
      "grad_norm": NaN,
      "learning_rate": 0.00016859896219421797,
      "loss": 1.0131,
      "step": 1067
    },
    {
      "epoch": 0.1582222222222222,
      "grad_norm": 3.285895347595215,
      "learning_rate": 0.00016856931060044478,
      "loss": 1.1322,
      "step": 1068
    },
    {
      "epoch": 0.15837037037037038,
      "grad_norm": 2.788299322128296,
      "learning_rate": 0.0001685396590066716,
      "loss": 1.1749,
      "step": 1069
    },
    {
      "epoch": 0.15851851851851853,
      "grad_norm": 2.9314417839050293,
      "learning_rate": 0.00016851000741289844,
      "loss": 0.9868,
      "step": 1070
    },
    {
      "epoch": 0.15866666666666668,
      "grad_norm": 8.959638595581055,
      "learning_rate": 0.0001684803558191253,
      "loss": 1.1358,
      "step": 1071
    },
    {
      "epoch": 0.15881481481481483,
      "grad_norm": 3.2001211643218994,
      "learning_rate": 0.0001684507042253521,
      "loss": 1.2816,
      "step": 1072
    },
    {
      "epoch": 0.15896296296296297,
      "grad_norm": 8.979604721069336,
      "learning_rate": 0.00016842105263157895,
      "loss": 1.2513,
      "step": 1073
    },
    {
      "epoch": 0.15911111111111112,
      "grad_norm": 3.510807752609253,
      "learning_rate": 0.0001683914010378058,
      "loss": 1.2859,
      "step": 1074
    },
    {
      "epoch": 0.15925925925925927,
      "grad_norm": 4.32289981842041,
      "learning_rate": 0.0001683617494440326,
      "loss": 1.0888,
      "step": 1075
    },
    {
      "epoch": 0.15940740740740741,
      "grad_norm": 2.1474578380584717,
      "learning_rate": 0.00016833209785025946,
      "loss": 1.0386,
      "step": 1076
    },
    {
      "epoch": 0.15955555555555556,
      "grad_norm": 3.062962293624878,
      "learning_rate": 0.0001683024462564863,
      "loss": 1.0207,
      "step": 1077
    },
    {
      "epoch": 0.1597037037037037,
      "grad_norm": 3.040384292602539,
      "learning_rate": 0.00016827279466271312,
      "loss": 1.1263,
      "step": 1078
    },
    {
      "epoch": 0.15985185185185186,
      "grad_norm": 2.453765630722046,
      "learning_rate": 0.00016824314306893996,
      "loss": 1.3174,
      "step": 1079
    },
    {
      "epoch": 0.16,
      "grad_norm": 4.926877498626709,
      "learning_rate": 0.0001682134914751668,
      "loss": 0.9633,
      "step": 1080
    },
    {
      "epoch": 0.16014814814814815,
      "grad_norm": 3.5459671020507812,
      "learning_rate": 0.00016818383988139363,
      "loss": 1.4504,
      "step": 1081
    },
    {
      "epoch": 0.1602962962962963,
      "grad_norm": 1.3094695806503296,
      "learning_rate": 0.00016815418828762047,
      "loss": 1.2689,
      "step": 1082
    },
    {
      "epoch": 0.16044444444444445,
      "grad_norm": 1.9539918899536133,
      "learning_rate": 0.00016812453669384731,
      "loss": 0.8696,
      "step": 1083
    },
    {
      "epoch": 0.1605925925925926,
      "grad_norm": 4.95810079574585,
      "learning_rate": 0.00016809488510007413,
      "loss": 1.026,
      "step": 1084
    },
    {
      "epoch": 0.16074074074074074,
      "grad_norm": 2.032973051071167,
      "learning_rate": 0.00016806523350630098,
      "loss": 0.8564,
      "step": 1085
    },
    {
      "epoch": 0.1608888888888889,
      "grad_norm": 4.3425068855285645,
      "learning_rate": 0.00016803558191252782,
      "loss": 1.1461,
      "step": 1086
    },
    {
      "epoch": 0.16103703703703703,
      "grad_norm": 2.2817065715789795,
      "learning_rate": 0.00016800593031875464,
      "loss": 1.112,
      "step": 1087
    },
    {
      "epoch": 0.16118518518518518,
      "grad_norm": 1.5987143516540527,
      "learning_rate": 0.00016797627872498148,
      "loss": 1.007,
      "step": 1088
    },
    {
      "epoch": 0.16133333333333333,
      "grad_norm": 1.582229495048523,
      "learning_rate": 0.0001679466271312083,
      "loss": 1.1057,
      "step": 1089
    },
    {
      "epoch": 0.16148148148148148,
      "grad_norm": 1.8761428594589233,
      "learning_rate": 0.00016791697553743514,
      "loss": 1.2869,
      "step": 1090
    },
    {
      "epoch": 0.16162962962962962,
      "grad_norm": 4.022858619689941,
      "learning_rate": 0.000167887323943662,
      "loss": 1.2291,
      "step": 1091
    },
    {
      "epoch": 0.16177777777777777,
      "grad_norm": 1.9940602779388428,
      "learning_rate": 0.0001678576723498888,
      "loss": 1.12,
      "step": 1092
    },
    {
      "epoch": 0.16192592592592592,
      "grad_norm": 2.525167942047119,
      "learning_rate": 0.00016782802075611565,
      "loss": 1.2224,
      "step": 1093
    },
    {
      "epoch": 0.16207407407407406,
      "grad_norm": 2.735330104827881,
      "learning_rate": 0.0001677983691623425,
      "loss": 1.1975,
      "step": 1094
    },
    {
      "epoch": 0.1622222222222222,
      "grad_norm": 2.2123138904571533,
      "learning_rate": 0.0001677687175685693,
      "loss": 1.0923,
      "step": 1095
    },
    {
      "epoch": 0.16237037037037036,
      "grad_norm": 4.129847049713135,
      "learning_rate": 0.00016773906597479616,
      "loss": 1.3661,
      "step": 1096
    },
    {
      "epoch": 0.1625185185185185,
      "grad_norm": 3.07590389251709,
      "learning_rate": 0.000167709414381023,
      "loss": 1.0667,
      "step": 1097
    },
    {
      "epoch": 0.16266666666666665,
      "grad_norm": 3.407912015914917,
      "learning_rate": 0.00016767976278724982,
      "loss": 0.9218,
      "step": 1098
    },
    {
      "epoch": 0.1628148148148148,
      "grad_norm": 4.002742290496826,
      "learning_rate": 0.00016765011119347666,
      "loss": 1.0098,
      "step": 1099
    },
    {
      "epoch": 0.16296296296296298,
      "grad_norm": 1.2676118612289429,
      "learning_rate": 0.0001676204595997035,
      "loss": 1.0311,
      "step": 1100
    },
    {
      "epoch": 0.16311111111111112,
      "grad_norm": 3.6772754192352295,
      "learning_rate": 0.00016759080800593032,
      "loss": 1.0986,
      "step": 1101
    },
    {
      "epoch": 0.16325925925925927,
      "grad_norm": 3.532989501953125,
      "learning_rate": 0.00016756115641215717,
      "loss": 1.1303,
      "step": 1102
    },
    {
      "epoch": 0.16340740740740742,
      "grad_norm": 2.35569429397583,
      "learning_rate": 0.000167531504818384,
      "loss": 1.1121,
      "step": 1103
    },
    {
      "epoch": 0.16355555555555557,
      "grad_norm": 1.7124059200286865,
      "learning_rate": 0.00016750185322461083,
      "loss": 1.1505,
      "step": 1104
    },
    {
      "epoch": 0.1637037037037037,
      "grad_norm": 2.458972692489624,
      "learning_rate": 0.00016747220163083765,
      "loss": 1.1398,
      "step": 1105
    },
    {
      "epoch": 0.16385185185185186,
      "grad_norm": 3.610698699951172,
      "learning_rate": 0.0001674425500370645,
      "loss": 1.3289,
      "step": 1106
    },
    {
      "epoch": 0.164,
      "grad_norm": 2.0191452503204346,
      "learning_rate": 0.00016741289844329134,
      "loss": 1.1516,
      "step": 1107
    },
    {
      "epoch": 0.16414814814814815,
      "grad_norm": 1.9639180898666382,
      "learning_rate": 0.00016738324684951815,
      "loss": 0.986,
      "step": 1108
    },
    {
      "epoch": 0.1642962962962963,
      "grad_norm": 1.9284876585006714,
      "learning_rate": 0.000167353595255745,
      "loss": 0.8722,
      "step": 1109
    },
    {
      "epoch": 0.16444444444444445,
      "grad_norm": 2.1016082763671875,
      "learning_rate": 0.00016732394366197184,
      "loss": 1.1622,
      "step": 1110
    },
    {
      "epoch": 0.1645925925925926,
      "grad_norm": 2.410661220550537,
      "learning_rate": 0.00016729429206819866,
      "loss": 1.2704,
      "step": 1111
    },
    {
      "epoch": 0.16474074074074074,
      "grad_norm": 4.063007354736328,
      "learning_rate": 0.0001672646404744255,
      "loss": 1.0752,
      "step": 1112
    },
    {
      "epoch": 0.1648888888888889,
      "grad_norm": 2.9490597248077393,
      "learning_rate": 0.00016723498888065235,
      "loss": 0.9154,
      "step": 1113
    },
    {
      "epoch": 0.16503703703703704,
      "grad_norm": 2.2835164070129395,
      "learning_rate": 0.00016720533728687917,
      "loss": 0.9618,
      "step": 1114
    },
    {
      "epoch": 0.16518518518518518,
      "grad_norm": 1.3010778427124023,
      "learning_rate": 0.000167175685693106,
      "loss": 1.0763,
      "step": 1115
    },
    {
      "epoch": 0.16533333333333333,
      "grad_norm": 2.7012317180633545,
      "learning_rate": 0.00016714603409933286,
      "loss": 1.2822,
      "step": 1116
    },
    {
      "epoch": 0.16548148148148148,
      "grad_norm": 3.55961275100708,
      "learning_rate": 0.00016711638250555967,
      "loss": 1.3006,
      "step": 1117
    },
    {
      "epoch": 0.16562962962962963,
      "grad_norm": 4.551784992218018,
      "learning_rate": 0.00016708673091178652,
      "loss": 1.3546,
      "step": 1118
    },
    {
      "epoch": 0.16577777777777777,
      "grad_norm": 2.7167952060699463,
      "learning_rate": 0.00016705707931801336,
      "loss": 1.2642,
      "step": 1119
    },
    {
      "epoch": 0.16592592592592592,
      "grad_norm": 3.6416263580322266,
      "learning_rate": 0.00016702742772424018,
      "loss": 1.2168,
      "step": 1120
    },
    {
      "epoch": 0.16607407407407407,
      "grad_norm": 2.924973964691162,
      "learning_rate": 0.000166997776130467,
      "loss": 0.9433,
      "step": 1121
    },
    {
      "epoch": 0.16622222222222222,
      "grad_norm": 1.412368893623352,
      "learning_rate": 0.00016696812453669387,
      "loss": 0.9324,
      "step": 1122
    },
    {
      "epoch": 0.16637037037037036,
      "grad_norm": 1.3757219314575195,
      "learning_rate": 0.00016693847294292069,
      "loss": 1.1399,
      "step": 1123
    },
    {
      "epoch": 0.1665185185185185,
      "grad_norm": 1.7917617559432983,
      "learning_rate": 0.0001669088213491475,
      "loss": 1.3018,
      "step": 1124
    },
    {
      "epoch": 0.16666666666666666,
      "grad_norm": 3.3100757598876953,
      "learning_rate": 0.00016687916975537438,
      "loss": 1.0173,
      "step": 1125
    },
    {
      "epoch": 0.1668148148148148,
      "grad_norm": 1.7143326997756958,
      "learning_rate": 0.0001668495181616012,
      "loss": 1.0514,
      "step": 1126
    },
    {
      "epoch": 0.16696296296296295,
      "grad_norm": 3.0766828060150146,
      "learning_rate": 0.000166819866567828,
      "loss": 1.0553,
      "step": 1127
    },
    {
      "epoch": 0.1671111111111111,
      "grad_norm": 1.6713535785675049,
      "learning_rate": 0.00016679021497405488,
      "loss": 1.0935,
      "step": 1128
    },
    {
      "epoch": 0.16725925925925925,
      "grad_norm": 2.5680131912231445,
      "learning_rate": 0.0001667605633802817,
      "loss": 1.096,
      "step": 1129
    },
    {
      "epoch": 0.1674074074074074,
      "grad_norm": 2.3472912311553955,
      "learning_rate": 0.00016673091178650852,
      "loss": 1.0766,
      "step": 1130
    },
    {
      "epoch": 0.16755555555555557,
      "grad_norm": 2.3274173736572266,
      "learning_rate": 0.0001667012601927354,
      "loss": 1.2308,
      "step": 1131
    },
    {
      "epoch": 0.16770370370370372,
      "grad_norm": 2.9745113849639893,
      "learning_rate": 0.0001666716085989622,
      "loss": 1.1308,
      "step": 1132
    },
    {
      "epoch": 0.16785185185185186,
      "grad_norm": 1.898444652557373,
      "learning_rate": 0.00016664195700518902,
      "loss": 1.1928,
      "step": 1133
    },
    {
      "epoch": 0.168,
      "grad_norm": 3.0440452098846436,
      "learning_rate": 0.0001666123054114159,
      "loss": 1.0433,
      "step": 1134
    },
    {
      "epoch": 0.16814814814814816,
      "grad_norm": 1.7012965679168701,
      "learning_rate": 0.0001665826538176427,
      "loss": 1.2063,
      "step": 1135
    },
    {
      "epoch": 0.1682962962962963,
      "grad_norm": 2.831171989440918,
      "learning_rate": 0.00016655300222386953,
      "loss": 0.8884,
      "step": 1136
    },
    {
      "epoch": 0.16844444444444445,
      "grad_norm": 1.5138641595840454,
      "learning_rate": 0.00016652335063009637,
      "loss": 1.255,
      "step": 1137
    },
    {
      "epoch": 0.1685925925925926,
      "grad_norm": 2.0016908645629883,
      "learning_rate": 0.00016649369903632322,
      "loss": 1.2003,
      "step": 1138
    },
    {
      "epoch": 0.16874074074074075,
      "grad_norm": 1.6556096076965332,
      "learning_rate": 0.00016646404744255003,
      "loss": 1.0039,
      "step": 1139
    },
    {
      "epoch": 0.1688888888888889,
      "grad_norm": 1.9967530965805054,
      "learning_rate": 0.00016643439584877688,
      "loss": 0.9737,
      "step": 1140
    },
    {
      "epoch": 0.16903703703703704,
      "grad_norm": 4.667789936065674,
      "learning_rate": 0.00016640474425500372,
      "loss": 1.4546,
      "step": 1141
    },
    {
      "epoch": 0.1691851851851852,
      "grad_norm": 2.0882341861724854,
      "learning_rate": 0.00016637509266123054,
      "loss": 0.9068,
      "step": 1142
    },
    {
      "epoch": 0.16933333333333334,
      "grad_norm": 2.6171019077301025,
      "learning_rate": 0.00016634544106745739,
      "loss": 1.3465,
      "step": 1143
    },
    {
      "epoch": 0.16948148148148148,
      "grad_norm": 2.0520472526550293,
      "learning_rate": 0.00016631578947368423,
      "loss": 1.1366,
      "step": 1144
    },
    {
      "epoch": 0.16962962962962963,
      "grad_norm": 2.103170156478882,
      "learning_rate": 0.00016628613787991105,
      "loss": 1.226,
      "step": 1145
    },
    {
      "epoch": 0.16977777777777778,
      "grad_norm": 13.238292694091797,
      "learning_rate": 0.0001662564862861379,
      "loss": 1.0785,
      "step": 1146
    },
    {
      "epoch": 0.16992592592592592,
      "grad_norm": 2.532947301864624,
      "learning_rate": 0.00016622683469236474,
      "loss": 1.0802,
      "step": 1147
    },
    {
      "epoch": 0.17007407407407407,
      "grad_norm": 1.3687976598739624,
      "learning_rate": 0.00016619718309859155,
      "loss": 1.0488,
      "step": 1148
    },
    {
      "epoch": 0.17022222222222222,
      "grad_norm": 2.4959707260131836,
      "learning_rate": 0.0001661675315048184,
      "loss": 1.1523,
      "step": 1149
    },
    {
      "epoch": 0.17037037037037037,
      "grad_norm": 1.4208955764770508,
      "learning_rate": 0.00016613787991104524,
      "loss": 1.2076,
      "step": 1150
    },
    {
      "epoch": 0.1705185185185185,
      "grad_norm": 3.3279201984405518,
      "learning_rate": 0.00016610822831727206,
      "loss": 0.9391,
      "step": 1151
    },
    {
      "epoch": 0.17066666666666666,
      "grad_norm": 1.680005669593811,
      "learning_rate": 0.00016607857672349888,
      "loss": 1.3179,
      "step": 1152
    },
    {
      "epoch": 0.1708148148148148,
      "grad_norm": 3.0134124755859375,
      "learning_rate": 0.00016604892512972575,
      "loss": 1.6055,
      "step": 1153
    },
    {
      "epoch": 0.17096296296296296,
      "grad_norm": 1.9432357549667358,
      "learning_rate": 0.00016601927353595257,
      "loss": 1.0273,
      "step": 1154
    },
    {
      "epoch": 0.1711111111111111,
      "grad_norm": 2.375060796737671,
      "learning_rate": 0.00016598962194217938,
      "loss": 1.3216,
      "step": 1155
    },
    {
      "epoch": 0.17125925925925925,
      "grad_norm": 2.5080406665802,
      "learning_rate": 0.00016595997034840623,
      "loss": 1.1713,
      "step": 1156
    },
    {
      "epoch": 0.1714074074074074,
      "grad_norm": 13.426639556884766,
      "learning_rate": 0.00016593031875463307,
      "loss": 1.0199,
      "step": 1157
    },
    {
      "epoch": 0.17155555555555554,
      "grad_norm": 3.609325885772705,
      "learning_rate": 0.0001659006671608599,
      "loss": 0.9444,
      "step": 1158
    },
    {
      "epoch": 0.1717037037037037,
      "grad_norm": 2.1851866245269775,
      "learning_rate": 0.00016587101556708673,
      "loss": 1.103,
      "step": 1159
    },
    {
      "epoch": 0.17185185185185184,
      "grad_norm": 3.400322437286377,
      "learning_rate": 0.00016584136397331358,
      "loss": 1.1331,
      "step": 1160
    },
    {
      "epoch": 0.172,
      "grad_norm": 2.3655338287353516,
      "learning_rate": 0.0001658117123795404,
      "loss": 1.2707,
      "step": 1161
    },
    {
      "epoch": 0.17214814814814816,
      "grad_norm": 1.8025535345077515,
      "learning_rate": 0.00016578206078576724,
      "loss": 1.1592,
      "step": 1162
    },
    {
      "epoch": 0.1722962962962963,
      "grad_norm": 2.439143180847168,
      "learning_rate": 0.00016575240919199409,
      "loss": 1.066,
      "step": 1163
    },
    {
      "epoch": 0.17244444444444446,
      "grad_norm": 3.3526995182037354,
      "learning_rate": 0.0001657227575982209,
      "loss": 1.1781,
      "step": 1164
    },
    {
      "epoch": 0.1725925925925926,
      "grad_norm": 3.699305772781372,
      "learning_rate": 0.00016569310600444775,
      "loss": 1.2586,
      "step": 1165
    },
    {
      "epoch": 0.17274074074074075,
      "grad_norm": 1.3605479001998901,
      "learning_rate": 0.0001656634544106746,
      "loss": 0.9209,
      "step": 1166
    },
    {
      "epoch": 0.1728888888888889,
      "grad_norm": 2.2111520767211914,
      "learning_rate": 0.0001656338028169014,
      "loss": 1.1226,
      "step": 1167
    },
    {
      "epoch": 0.17303703703703704,
      "grad_norm": 2.446354866027832,
      "learning_rate": 0.00016560415122312825,
      "loss": 1.1782,
      "step": 1168
    },
    {
      "epoch": 0.1731851851851852,
      "grad_norm": 1.759831190109253,
      "learning_rate": 0.0001655744996293551,
      "loss": 1.3204,
      "step": 1169
    },
    {
      "epoch": 0.17333333333333334,
      "grad_norm": 1.6880930662155151,
      "learning_rate": 0.00016554484803558192,
      "loss": 0.9032,
      "step": 1170
    },
    {
      "epoch": 0.1734814814814815,
      "grad_norm": 1.861935019493103,
      "learning_rate": 0.00016551519644180876,
      "loss": 1.2485,
      "step": 1171
    },
    {
      "epoch": 0.17362962962962963,
      "grad_norm": 3.1562302112579346,
      "learning_rate": 0.0001654855448480356,
      "loss": 1.294,
      "step": 1172
    },
    {
      "epoch": 0.17377777777777778,
      "grad_norm": 1.7831099033355713,
      "learning_rate": 0.00016545589325426242,
      "loss": 1.0631,
      "step": 1173
    },
    {
      "epoch": 0.17392592592592593,
      "grad_norm": 2.3080148696899414,
      "learning_rate": 0.00016542624166048927,
      "loss": 1.04,
      "step": 1174
    },
    {
      "epoch": 0.17407407407407408,
      "grad_norm": 2.459798574447632,
      "learning_rate": 0.00016539659006671608,
      "loss": 1.2923,
      "step": 1175
    },
    {
      "epoch": 0.17422222222222222,
      "grad_norm": 2.508446455001831,
      "learning_rate": 0.00016536693847294293,
      "loss": 1.0659,
      "step": 1176
    },
    {
      "epoch": 0.17437037037037037,
      "grad_norm": 2.1636300086975098,
      "learning_rate": 0.00016533728687916977,
      "loss": 1.1215,
      "step": 1177
    },
    {
      "epoch": 0.17451851851851852,
      "grad_norm": 4.602794647216797,
      "learning_rate": 0.0001653076352853966,
      "loss": 1.3148,
      "step": 1178
    },
    {
      "epoch": 0.17466666666666666,
      "grad_norm": 1.9078799486160278,
      "learning_rate": 0.00016527798369162343,
      "loss": 0.9364,
      "step": 1179
    },
    {
      "epoch": 0.1748148148148148,
      "grad_norm": 5.264694690704346,
      "learning_rate": 0.00016524833209785028,
      "loss": 1.2738,
      "step": 1180
    },
    {
      "epoch": 0.17496296296296296,
      "grad_norm": 2.355531692504883,
      "learning_rate": 0.0001652186805040771,
      "loss": 1.2169,
      "step": 1181
    },
    {
      "epoch": 0.1751111111111111,
      "grad_norm": 1.698150396347046,
      "learning_rate": 0.00016518902891030394,
      "loss": 1.0232,
      "step": 1182
    },
    {
      "epoch": 0.17525925925925925,
      "grad_norm": 1.9937596321105957,
      "learning_rate": 0.00016515937731653078,
      "loss": 1.1532,
      "step": 1183
    },
    {
      "epoch": 0.1754074074074074,
      "grad_norm": 1.392584204673767,
      "learning_rate": 0.0001651297257227576,
      "loss": 1.1634,
      "step": 1184
    },
    {
      "epoch": 0.17555555555555555,
      "grad_norm": 2.6987364292144775,
      "learning_rate": 0.00016510007412898445,
      "loss": 1.418,
      "step": 1185
    },
    {
      "epoch": 0.1757037037037037,
      "grad_norm": 2.445852041244507,
      "learning_rate": 0.0001650704225352113,
      "loss": 0.9981,
      "step": 1186
    },
    {
      "epoch": 0.17585185185185184,
      "grad_norm": 2.5761144161224365,
      "learning_rate": 0.0001650407709414381,
      "loss": 1.0874,
      "step": 1187
    },
    {
      "epoch": 0.176,
      "grad_norm": 1.2105910778045654,
      "learning_rate": 0.00016501111934766495,
      "loss": 1.0968,
      "step": 1188
    },
    {
      "epoch": 0.17614814814814814,
      "grad_norm": 2.399811029434204,
      "learning_rate": 0.00016498146775389177,
      "loss": 1.3,
      "step": 1189
    },
    {
      "epoch": 0.17629629629629628,
      "grad_norm": 2.0828731060028076,
      "learning_rate": 0.00016495181616011861,
      "loss": 1.2034,
      "step": 1190
    },
    {
      "epoch": 0.17644444444444443,
      "grad_norm": 1.969633936882019,
      "learning_rate": 0.00016492216456634543,
      "loss": 0.9231,
      "step": 1191
    },
    {
      "epoch": 0.17659259259259258,
      "grad_norm": 2.430616617202759,
      "learning_rate": 0.00016489251297257228,
      "loss": 1.2606,
      "step": 1192
    },
    {
      "epoch": 0.17674074074074075,
      "grad_norm": 4.084666728973389,
      "learning_rate": 0.00016486286137879912,
      "loss": 0.9322,
      "step": 1193
    },
    {
      "epoch": 0.1768888888888889,
      "grad_norm": 2.4692203998565674,
      "learning_rate": 0.00016483320978502594,
      "loss": 1.2591,
      "step": 1194
    },
    {
      "epoch": 0.17703703703703705,
      "grad_norm": 2.2353594303131104,
      "learning_rate": 0.00016480355819125278,
      "loss": 1.3139,
      "step": 1195
    },
    {
      "epoch": 0.1771851851851852,
      "grad_norm": 2.550607919692993,
      "learning_rate": 0.00016477390659747963,
      "loss": 1.1424,
      "step": 1196
    },
    {
      "epoch": 0.17733333333333334,
      "grad_norm": 1.8636503219604492,
      "learning_rate": 0.00016474425500370644,
      "loss": 0.9783,
      "step": 1197
    },
    {
      "epoch": 0.1774814814814815,
      "grad_norm": 3.616098165512085,
      "learning_rate": 0.0001647146034099333,
      "loss": 1.3557,
      "step": 1198
    },
    {
      "epoch": 0.17762962962962964,
      "grad_norm": 5.341982364654541,
      "learning_rate": 0.00016468495181616013,
      "loss": 0.9768,
      "step": 1199
    },
    {
      "epoch": 0.17777777777777778,
      "grad_norm": 1.3313606977462769,
      "learning_rate": 0.00016465530022238695,
      "loss": 1.1152,
      "step": 1200
    },
    {
      "epoch": 0.17792592592592593,
      "grad_norm": 3.144308567047119,
      "learning_rate": 0.0001646256486286138,
      "loss": 1.1852,
      "step": 1201
    },
    {
      "epoch": 0.17807407407407408,
      "grad_norm": 1.5455279350280762,
      "learning_rate": 0.00016459599703484064,
      "loss": 0.9879,
      "step": 1202
    },
    {
      "epoch": 0.17822222222222223,
      "grad_norm": 4.991955280303955,
      "learning_rate": 0.00016456634544106746,
      "loss": 1.1951,
      "step": 1203
    },
    {
      "epoch": 0.17837037037037037,
      "grad_norm": 2.8212974071502686,
      "learning_rate": 0.0001645366938472943,
      "loss": 1.1659,
      "step": 1204
    },
    {
      "epoch": 0.17851851851851852,
      "grad_norm": 3.9513165950775146,
      "learning_rate": 0.00016450704225352115,
      "loss": 1.3337,
      "step": 1205
    },
    {
      "epoch": 0.17866666666666667,
      "grad_norm": 3.328399181365967,
      "learning_rate": 0.00016447739065974796,
      "loss": 1.2649,
      "step": 1206
    },
    {
      "epoch": 0.17881481481481482,
      "grad_norm": 2.1954457759857178,
      "learning_rate": 0.00016444773906597478,
      "loss": 1.3181,
      "step": 1207
    },
    {
      "epoch": 0.17896296296296296,
      "grad_norm": 1.7748527526855469,
      "learning_rate": 0.00016441808747220165,
      "loss": 1.11,
      "step": 1208
    },
    {
      "epoch": 0.1791111111111111,
      "grad_norm": 2.2563819885253906,
      "learning_rate": 0.00016438843587842847,
      "loss": 1.0285,
      "step": 1209
    },
    {
      "epoch": 0.17925925925925926,
      "grad_norm": 5.612164497375488,
      "learning_rate": 0.0001643587842846553,
      "loss": 1.2267,
      "step": 1210
    },
    {
      "epoch": 0.1794074074074074,
      "grad_norm": 2.1129345893859863,
      "learning_rate": 0.00016432913269088216,
      "loss": 1.1477,
      "step": 1211
    },
    {
      "epoch": 0.17955555555555555,
      "grad_norm": 2.182945489883423,
      "learning_rate": 0.00016429948109710898,
      "loss": 1.1477,
      "step": 1212
    },
    {
      "epoch": 0.1797037037037037,
      "grad_norm": 2.216829299926758,
      "learning_rate": 0.0001642698295033358,
      "loss": 1.1976,
      "step": 1213
    },
    {
      "epoch": 0.17985185185185185,
      "grad_norm": 2.9617536067962646,
      "learning_rate": 0.00016424017790956266,
      "loss": 1.141,
      "step": 1214
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9161547422409058,
      "learning_rate": 0.00016421052631578948,
      "loss": 1.0007,
      "step": 1215
    },
    {
      "epoch": 0.18014814814814814,
      "grad_norm": 3.606161594390869,
      "learning_rate": 0.0001641808747220163,
      "loss": 1.0588,
      "step": 1216
    },
    {
      "epoch": 0.1802962962962963,
      "grad_norm": 2.439342737197876,
      "learning_rate": 0.00016415122312824317,
      "loss": 1.1524,
      "step": 1217
    },
    {
      "epoch": 0.18044444444444444,
      "grad_norm": 1.9793710708618164,
      "learning_rate": 0.00016412157153447,
      "loss": 0.9695,
      "step": 1218
    },
    {
      "epoch": 0.18059259259259258,
      "grad_norm": 1.7666544914245605,
      "learning_rate": 0.0001640919199406968,
      "loss": 1.3083,
      "step": 1219
    },
    {
      "epoch": 0.18074074074074073,
      "grad_norm": 2.4542181491851807,
      "learning_rate": 0.00016406226834692368,
      "loss": 1.1153,
      "step": 1220
    },
    {
      "epoch": 0.18088888888888888,
      "grad_norm": 3.1888298988342285,
      "learning_rate": 0.0001640326167531505,
      "loss": 0.944,
      "step": 1221
    },
    {
      "epoch": 0.18103703703703702,
      "grad_norm": 2.3188817501068115,
      "learning_rate": 0.0001640029651593773,
      "loss": 1.1371,
      "step": 1222
    },
    {
      "epoch": 0.18118518518518517,
      "grad_norm": 2.448969841003418,
      "learning_rate": 0.00016397331356560416,
      "loss": 1.1699,
      "step": 1223
    },
    {
      "epoch": 0.18133333333333335,
      "grad_norm": 3.9037697315216064,
      "learning_rate": 0.000163943661971831,
      "loss": 1.139,
      "step": 1224
    },
    {
      "epoch": 0.1814814814814815,
      "grad_norm": 2.6014204025268555,
      "learning_rate": 0.00016391401037805782,
      "loss": 1.2577,
      "step": 1225
    },
    {
      "epoch": 0.18162962962962964,
      "grad_norm": 25.857332229614258,
      "learning_rate": 0.00016388435878428466,
      "loss": 0.9306,
      "step": 1226
    },
    {
      "epoch": 0.1817777777777778,
      "grad_norm": 2.0890629291534424,
      "learning_rate": 0.0001638547071905115,
      "loss": 1.3022,
      "step": 1227
    },
    {
      "epoch": 0.18192592592592594,
      "grad_norm": 1.969598412513733,
      "learning_rate": 0.00016382505559673832,
      "loss": 1.1961,
      "step": 1228
    },
    {
      "epoch": 0.18207407407407408,
      "grad_norm": 1.7499252557754517,
      "learning_rate": 0.00016379540400296517,
      "loss": 0.9372,
      "step": 1229
    },
    {
      "epoch": 0.18222222222222223,
      "grad_norm": 2.8854031562805176,
      "learning_rate": 0.000163765752409192,
      "loss": 1.1053,
      "step": 1230
    },
    {
      "epoch": 0.18237037037037038,
      "grad_norm": 4.263045310974121,
      "learning_rate": 0.00016373610081541883,
      "loss": 0.9321,
      "step": 1231
    },
    {
      "epoch": 0.18251851851851852,
      "grad_norm": 1.8082984685897827,
      "learning_rate": 0.00016370644922164568,
      "loss": 1.125,
      "step": 1232
    },
    {
      "epoch": 0.18266666666666667,
      "grad_norm": 13.007062911987305,
      "learning_rate": 0.00016367679762787252,
      "loss": 1.3134,
      "step": 1233
    },
    {
      "epoch": 0.18281481481481482,
      "grad_norm": 1.9080790281295776,
      "learning_rate": 0.00016364714603409934,
      "loss": 1.0975,
      "step": 1234
    },
    {
      "epoch": 0.18296296296296297,
      "grad_norm": 1.7352561950683594,
      "learning_rate": 0.00016361749444032618,
      "loss": 1.2799,
      "step": 1235
    },
    {
      "epoch": 0.1831111111111111,
      "grad_norm": 2.157141923904419,
      "learning_rate": 0.00016358784284655303,
      "loss": 1.2574,
      "step": 1236
    },
    {
      "epoch": 0.18325925925925926,
      "grad_norm": 2.416133403778076,
      "learning_rate": 0.00016355819125277984,
      "loss": 1.1067,
      "step": 1237
    },
    {
      "epoch": 0.1834074074074074,
      "grad_norm": 1.6308764219284058,
      "learning_rate": 0.00016352853965900666,
      "loss": 0.9436,
      "step": 1238
    },
    {
      "epoch": 0.18355555555555556,
      "grad_norm": 3.1649699211120605,
      "learning_rate": 0.00016349888806523353,
      "loss": 0.9486,
      "step": 1239
    },
    {
      "epoch": 0.1837037037037037,
      "grad_norm": 2.2168164253234863,
      "learning_rate": 0.00016346923647146035,
      "loss": 1.1415,
      "step": 1240
    },
    {
      "epoch": 0.18385185185185185,
      "grad_norm": 2.6989848613739014,
      "learning_rate": 0.00016343958487768717,
      "loss": 1.04,
      "step": 1241
    },
    {
      "epoch": 0.184,
      "grad_norm": 2.743046760559082,
      "learning_rate": 0.000163409933283914,
      "loss": 1.2542,
      "step": 1242
    },
    {
      "epoch": 0.18414814814814814,
      "grad_norm": 2.6614725589752197,
      "learning_rate": 0.00016338028169014086,
      "loss": 1.2448,
      "step": 1243
    },
    {
      "epoch": 0.1842962962962963,
      "grad_norm": 2.142125129699707,
      "learning_rate": 0.00016335063009636767,
      "loss": 1.0992,
      "step": 1244
    },
    {
      "epoch": 0.18444444444444444,
      "grad_norm": 4.400914669036865,
      "learning_rate": 0.00016332097850259452,
      "loss": 1.1357,
      "step": 1245
    },
    {
      "epoch": 0.18459259259259259,
      "grad_norm": 2.421738862991333,
      "learning_rate": 0.00016329132690882136,
      "loss": 1.3442,
      "step": 1246
    },
    {
      "epoch": 0.18474074074074073,
      "grad_norm": 3.050323486328125,
      "learning_rate": 0.00016326167531504818,
      "loss": 1.173,
      "step": 1247
    },
    {
      "epoch": 0.18488888888888888,
      "grad_norm": 1.4971282482147217,
      "learning_rate": 0.00016323202372127502,
      "loss": 1.137,
      "step": 1248
    },
    {
      "epoch": 0.18503703703703703,
      "grad_norm": 1.1616339683532715,
      "learning_rate": 0.00016320237212750187,
      "loss": 0.8902,
      "step": 1249
    },
    {
      "epoch": 0.18518518518518517,
      "grad_norm": 1.8049408197402954,
      "learning_rate": 0.00016317272053372869,
      "loss": 1.138,
      "step": 1250
    },
    {
      "epoch": 0.18533333333333332,
      "grad_norm": 1.7517163753509521,
      "learning_rate": 0.00016314306893995553,
      "loss": 1.0648,
      "step": 1251
    },
    {
      "epoch": 0.18548148148148147,
      "grad_norm": 3.4030308723449707,
      "learning_rate": 0.00016311341734618237,
      "loss": 0.9477,
      "step": 1252
    },
    {
      "epoch": 0.18562962962962962,
      "grad_norm": 2.556241512298584,
      "learning_rate": 0.0001630837657524092,
      "loss": 1.2402,
      "step": 1253
    },
    {
      "epoch": 0.18577777777777776,
      "grad_norm": 2.832058906555176,
      "learning_rate": 0.00016305411415863604,
      "loss": 1.1,
      "step": 1254
    },
    {
      "epoch": 0.18592592592592594,
      "grad_norm": 1.4326435327529907,
      "learning_rate": 0.00016302446256486288,
      "loss": 1.4222,
      "step": 1255
    },
    {
      "epoch": 0.1860740740740741,
      "grad_norm": 1.8035448789596558,
      "learning_rate": 0.0001629948109710897,
      "loss": 1.1173,
      "step": 1256
    },
    {
      "epoch": 0.18622222222222223,
      "grad_norm": 2.3016655445098877,
      "learning_rate": 0.00016296515937731654,
      "loss": 1.3771,
      "step": 1257
    },
    {
      "epoch": 0.18637037037037038,
      "grad_norm": 3.838371515274048,
      "learning_rate": 0.0001629355077835434,
      "loss": 1.1765,
      "step": 1258
    },
    {
      "epoch": 0.18651851851851853,
      "grad_norm": 2.1241445541381836,
      "learning_rate": 0.0001629058561897702,
      "loss": 1.1228,
      "step": 1259
    },
    {
      "epoch": 0.18666666666666668,
      "grad_norm": 2.992121934890747,
      "learning_rate": 0.00016287620459599705,
      "loss": 1.4603,
      "step": 1260
    },
    {
      "epoch": 0.18681481481481482,
      "grad_norm": 2.247199296951294,
      "learning_rate": 0.00016284655300222387,
      "loss": 1.2536,
      "step": 1261
    },
    {
      "epoch": 0.18696296296296297,
      "grad_norm": 4.452000141143799,
      "learning_rate": 0.0001628169014084507,
      "loss": 1.2157,
      "step": 1262
    },
    {
      "epoch": 0.18711111111111112,
      "grad_norm": 2.1935882568359375,
      "learning_rate": 0.00016278724981467756,
      "loss": 1.2344,
      "step": 1263
    },
    {
      "epoch": 0.18725925925925926,
      "grad_norm": 1.2970445156097412,
      "learning_rate": 0.00016275759822090437,
      "loss": 1.0382,
      "step": 1264
    },
    {
      "epoch": 0.1874074074074074,
      "grad_norm": 2.174247980117798,
      "learning_rate": 0.00016272794662713122,
      "loss": 1.0524,
      "step": 1265
    },
    {
      "epoch": 0.18755555555555556,
      "grad_norm": 1.9508861303329468,
      "learning_rate": 0.00016269829503335806,
      "loss": 1.3446,
      "step": 1266
    },
    {
      "epoch": 0.1877037037037037,
      "grad_norm": 2.7138915061950684,
      "learning_rate": 0.00016266864343958488,
      "loss": 1.3338,
      "step": 1267
    },
    {
      "epoch": 0.18785185185185185,
      "grad_norm": 7.125894546508789,
      "learning_rate": 0.00016263899184581172,
      "loss": 1.0892,
      "step": 1268
    },
    {
      "epoch": 0.188,
      "grad_norm": 1.7355868816375732,
      "learning_rate": 0.00016260934025203857,
      "loss": 1.2031,
      "step": 1269
    },
    {
      "epoch": 0.18814814814814815,
      "grad_norm": 1.1919690370559692,
      "learning_rate": 0.00016257968865826539,
      "loss": 0.981,
      "step": 1270
    },
    {
      "epoch": 0.1882962962962963,
      "grad_norm": 1.8908437490463257,
      "learning_rate": 0.00016255003706449223,
      "loss": 1.1244,
      "step": 1271
    },
    {
      "epoch": 0.18844444444444444,
      "grad_norm": 1.9054700136184692,
      "learning_rate": 0.00016252038547071907,
      "loss": 1.1896,
      "step": 1272
    },
    {
      "epoch": 0.1885925925925926,
      "grad_norm": 2.030076742172241,
      "learning_rate": 0.0001624907338769459,
      "loss": 1.0656,
      "step": 1273
    },
    {
      "epoch": 0.18874074074074074,
      "grad_norm": 2.07568097114563,
      "learning_rate": 0.00016246108228317274,
      "loss": 1.2487,
      "step": 1274
    },
    {
      "epoch": 0.18888888888888888,
      "grad_norm": 1.4379876852035522,
      "learning_rate": 0.00016243143068939955,
      "loss": 1.1438,
      "step": 1275
    },
    {
      "epoch": 0.18903703703703703,
      "grad_norm": 3.3925724029541016,
      "learning_rate": 0.0001624017790956264,
      "loss": 1.482,
      "step": 1276
    },
    {
      "epoch": 0.18918518518518518,
      "grad_norm": 1.1773624420166016,
      "learning_rate": 0.00016237212750185322,
      "loss": 1.1702,
      "step": 1277
    },
    {
      "epoch": 0.18933333333333333,
      "grad_norm": 2.5804686546325684,
      "learning_rate": 0.00016234247590808006,
      "loss": 1.189,
      "step": 1278
    },
    {
      "epoch": 0.18948148148148147,
      "grad_norm": 2.311694860458374,
      "learning_rate": 0.0001623128243143069,
      "loss": 1.1001,
      "step": 1279
    },
    {
      "epoch": 0.18962962962962962,
      "grad_norm": 3.100853681564331,
      "learning_rate": 0.00016228317272053372,
      "loss": 1.4143,
      "step": 1280
    },
    {
      "epoch": 0.18977777777777777,
      "grad_norm": 1.9254438877105713,
      "learning_rate": 0.00016225352112676057,
      "loss": 1.0859,
      "step": 1281
    },
    {
      "epoch": 0.18992592592592591,
      "grad_norm": 2.69869065284729,
      "learning_rate": 0.0001622238695329874,
      "loss": 1.2862,
      "step": 1282
    },
    {
      "epoch": 0.19007407407407406,
      "grad_norm": 3.5101282596588135,
      "learning_rate": 0.00016219421793921423,
      "loss": 1.004,
      "step": 1283
    },
    {
      "epoch": 0.1902222222222222,
      "grad_norm": 1.9631364345550537,
      "learning_rate": 0.00016216456634544107,
      "loss": 1.2547,
      "step": 1284
    },
    {
      "epoch": 0.19037037037037038,
      "grad_norm": 4.261918067932129,
      "learning_rate": 0.00016213491475166792,
      "loss": 1.3082,
      "step": 1285
    },
    {
      "epoch": 0.19051851851851853,
      "grad_norm": 3.2264537811279297,
      "learning_rate": 0.00016210526315789473,
      "loss": 1.3426,
      "step": 1286
    },
    {
      "epoch": 0.19066666666666668,
      "grad_norm": 3.6949462890625,
      "learning_rate": 0.00016207561156412158,
      "loss": 1.2305,
      "step": 1287
    },
    {
      "epoch": 0.19081481481481483,
      "grad_norm": 3.31636118888855,
      "learning_rate": 0.00016204595997034842,
      "loss": 1.1111,
      "step": 1288
    },
    {
      "epoch": 0.19096296296296297,
      "grad_norm": 1.7275923490524292,
      "learning_rate": 0.00016201630837657524,
      "loss": 1.1604,
      "step": 1289
    },
    {
      "epoch": 0.19111111111111112,
      "grad_norm": 1.5811316967010498,
      "learning_rate": 0.00016198665678280208,
      "loss": 1.0016,
      "step": 1290
    },
    {
      "epoch": 0.19125925925925927,
      "grad_norm": 2.3568472862243652,
      "learning_rate": 0.00016195700518902893,
      "loss": 1.1028,
      "step": 1291
    },
    {
      "epoch": 0.19140740740740741,
      "grad_norm": 2.253685235977173,
      "learning_rate": 0.00016192735359525575,
      "loss": 1.1587,
      "step": 1292
    },
    {
      "epoch": 0.19155555555555556,
      "grad_norm": 3.685878038406372,
      "learning_rate": 0.0001618977020014826,
      "loss": 1.315,
      "step": 1293
    },
    {
      "epoch": 0.1917037037037037,
      "grad_norm": 2.7187671661376953,
      "learning_rate": 0.00016186805040770944,
      "loss": 1.0894,
      "step": 1294
    },
    {
      "epoch": 0.19185185185185186,
      "grad_norm": 4.728074550628662,
      "learning_rate": 0.00016183839881393625,
      "loss": 0.9953,
      "step": 1295
    },
    {
      "epoch": 0.192,
      "grad_norm": 1.6997319459915161,
      "learning_rate": 0.00016180874722016307,
      "loss": 1.3237,
      "step": 1296
    },
    {
      "epoch": 0.19214814814814815,
      "grad_norm": 2.6872713565826416,
      "learning_rate": 0.00016177909562638994,
      "loss": 1.0687,
      "step": 1297
    },
    {
      "epoch": 0.1922962962962963,
      "grad_norm": 1.523756504058838,
      "learning_rate": 0.00016174944403261676,
      "loss": 1.1545,
      "step": 1298
    },
    {
      "epoch": 0.19244444444444445,
      "grad_norm": 2.7069854736328125,
      "learning_rate": 0.00016171979243884358,
      "loss": 1.035,
      "step": 1299
    },
    {
      "epoch": 0.1925925925925926,
      "grad_norm": 1.2767950296401978,
      "learning_rate": 0.00016169014084507045,
      "loss": 1.2159,
      "step": 1300
    },
    {
      "epoch": 0.19274074074074074,
      "grad_norm": 1.6256358623504639,
      "learning_rate": 0.00016166048925129727,
      "loss": 1.2178,
      "step": 1301
    },
    {
      "epoch": 0.1928888888888889,
      "grad_norm": 3.1067140102386475,
      "learning_rate": 0.00016163083765752408,
      "loss": 1.1665,
      "step": 1302
    },
    {
      "epoch": 0.19303703703703703,
      "grad_norm": 2.419325113296509,
      "learning_rate": 0.00016160118606375095,
      "loss": 1.0093,
      "step": 1303
    },
    {
      "epoch": 0.19318518518518518,
      "grad_norm": 1.551223635673523,
      "learning_rate": 0.00016157153446997777,
      "loss": 1.1413,
      "step": 1304
    },
    {
      "epoch": 0.19333333333333333,
      "grad_norm": 2.0792694091796875,
      "learning_rate": 0.0001615418828762046,
      "loss": 1.3,
      "step": 1305
    },
    {
      "epoch": 0.19348148148148148,
      "grad_norm": 1.577027440071106,
      "learning_rate": 0.00016151223128243146,
      "loss": 1.0933,
      "step": 1306
    },
    {
      "epoch": 0.19362962962962962,
      "grad_norm": 1.7525880336761475,
      "learning_rate": 0.00016148257968865828,
      "loss": 1.3021,
      "step": 1307
    },
    {
      "epoch": 0.19377777777777777,
      "grad_norm": 3.2240004539489746,
      "learning_rate": 0.0001614529280948851,
      "loss": 1.2007,
      "step": 1308
    },
    {
      "epoch": 0.19392592592592592,
      "grad_norm": 2.8471946716308594,
      "learning_rate": 0.00016142327650111194,
      "loss": 1.2284,
      "step": 1309
    },
    {
      "epoch": 0.19407407407407407,
      "grad_norm": 2.295551300048828,
      "learning_rate": 0.00016139362490733878,
      "loss": 1.099,
      "step": 1310
    },
    {
      "epoch": 0.1942222222222222,
      "grad_norm": 4.0938720703125,
      "learning_rate": 0.0001613639733135656,
      "loss": 0.8849,
      "step": 1311
    },
    {
      "epoch": 0.19437037037037036,
      "grad_norm": 1.349643349647522,
      "learning_rate": 0.00016133432171979245,
      "loss": 0.9514,
      "step": 1312
    },
    {
      "epoch": 0.1945185185185185,
      "grad_norm": 2.4366378784179688,
      "learning_rate": 0.0001613046701260193,
      "loss": 1.3515,
      "step": 1313
    },
    {
      "epoch": 0.19466666666666665,
      "grad_norm": 1.8238210678100586,
      "learning_rate": 0.0001612750185322461,
      "loss": 1.0687,
      "step": 1314
    },
    {
      "epoch": 0.1948148148148148,
      "grad_norm": 3.347797393798828,
      "learning_rate": 0.00016124536693847295,
      "loss": 1.2437,
      "step": 1315
    },
    {
      "epoch": 0.19496296296296298,
      "grad_norm": 2.944796562194824,
      "learning_rate": 0.0001612157153446998,
      "loss": 0.9848,
      "step": 1316
    },
    {
      "epoch": 0.19511111111111112,
      "grad_norm": 1.7708215713500977,
      "learning_rate": 0.00016118606375092661,
      "loss": 1.0974,
      "step": 1317
    },
    {
      "epoch": 0.19525925925925927,
      "grad_norm": 2.0808119773864746,
      "learning_rate": 0.00016115641215715346,
      "loss": 1.1477,
      "step": 1318
    },
    {
      "epoch": 0.19540740740740742,
      "grad_norm": 3.2274317741394043,
      "learning_rate": 0.0001611267605633803,
      "loss": 1.209,
      "step": 1319
    },
    {
      "epoch": 0.19555555555555557,
      "grad_norm": 2.4879109859466553,
      "learning_rate": 0.00016109710896960712,
      "loss": 1.0825,
      "step": 1320
    },
    {
      "epoch": 0.1957037037037037,
      "grad_norm": 4.362395286560059,
      "learning_rate": 0.00016106745737583396,
      "loss": 1.0334,
      "step": 1321
    },
    {
      "epoch": 0.19585185185185186,
      "grad_norm": 2.8710215091705322,
      "learning_rate": 0.0001610378057820608,
      "loss": 1.3673,
      "step": 1322
    },
    {
      "epoch": 0.196,
      "grad_norm": 1.7256768941879272,
      "learning_rate": 0.00016100815418828763,
      "loss": 1.1933,
      "step": 1323
    },
    {
      "epoch": 0.19614814814814815,
      "grad_norm": 1.6461589336395264,
      "learning_rate": 0.00016097850259451444,
      "loss": 1.0943,
      "step": 1324
    },
    {
      "epoch": 0.1962962962962963,
      "grad_norm": 2.648347854614258,
      "learning_rate": 0.00016094885100074132,
      "loss": 1.1845,
      "step": 1325
    },
    {
      "epoch": 0.19644444444444445,
      "grad_norm": 1.4237217903137207,
      "learning_rate": 0.00016091919940696813,
      "loss": 0.8626,
      "step": 1326
    },
    {
      "epoch": 0.1965925925925926,
      "grad_norm": 2.1890766620635986,
      "learning_rate": 0.00016088954781319495,
      "loss": 0.9657,
      "step": 1327
    },
    {
      "epoch": 0.19674074074074074,
      "grad_norm": 1.728020191192627,
      "learning_rate": 0.0001608598962194218,
      "loss": 1.2267,
      "step": 1328
    },
    {
      "epoch": 0.1968888888888889,
      "grad_norm": 1.2080868482589722,
      "learning_rate": 0.00016083024462564864,
      "loss": 0.9941,
      "step": 1329
    },
    {
      "epoch": 0.19703703703703704,
      "grad_norm": 2.4535627365112305,
      "learning_rate": 0.00016080059303187546,
      "loss": 1.2543,
      "step": 1330
    },
    {
      "epoch": 0.19718518518518519,
      "grad_norm": 2.3592395782470703,
      "learning_rate": 0.0001607709414381023,
      "loss": 1.0558,
      "step": 1331
    },
    {
      "epoch": 0.19733333333333333,
      "grad_norm": 2.684448480606079,
      "learning_rate": 0.00016074128984432915,
      "loss": 0.9364,
      "step": 1332
    },
    {
      "epoch": 0.19748148148148148,
      "grad_norm": 5.671911716461182,
      "learning_rate": 0.00016071163825055596,
      "loss": 1.1495,
      "step": 1333
    },
    {
      "epoch": 0.19762962962962963,
      "grad_norm": 1.8713393211364746,
      "learning_rate": 0.0001606819866567828,
      "loss": 0.9452,
      "step": 1334
    },
    {
      "epoch": 0.19777777777777777,
      "grad_norm": 1.614362359046936,
      "learning_rate": 0.00016065233506300965,
      "loss": 1.1946,
      "step": 1335
    },
    {
      "epoch": 0.19792592592592592,
      "grad_norm": 3.343510866165161,
      "learning_rate": 0.00016062268346923647,
      "loss": 1.3064,
      "step": 1336
    },
    {
      "epoch": 0.19807407407407407,
      "grad_norm": 2.7058181762695312,
      "learning_rate": 0.00016059303187546331,
      "loss": 1.11,
      "step": 1337
    },
    {
      "epoch": 0.19822222222222222,
      "grad_norm": 2.640108823776245,
      "learning_rate": 0.00016056338028169016,
      "loss": 1.2595,
      "step": 1338
    },
    {
      "epoch": 0.19837037037037036,
      "grad_norm": 3.210209369659424,
      "learning_rate": 0.00016053372868791698,
      "loss": 1.0694,
      "step": 1339
    },
    {
      "epoch": 0.1985185185185185,
      "grad_norm": 2.481147527694702,
      "learning_rate": 0.00016050407709414382,
      "loss": 1.1474,
      "step": 1340
    },
    {
      "epoch": 0.19866666666666666,
      "grad_norm": 1.3905922174453735,
      "learning_rate": 0.00016047442550037066,
      "loss": 1.2432,
      "step": 1341
    },
    {
      "epoch": 0.1988148148148148,
      "grad_norm": 1.8446251153945923,
      "learning_rate": 0.00016044477390659748,
      "loss": 1.6521,
      "step": 1342
    },
    {
      "epoch": 0.19896296296296295,
      "grad_norm": 3.1401638984680176,
      "learning_rate": 0.00016041512231282433,
      "loss": 1.0923,
      "step": 1343
    },
    {
      "epoch": 0.1991111111111111,
      "grad_norm": 2.527815341949463,
      "learning_rate": 0.00016038547071905117,
      "loss": 1.0247,
      "step": 1344
    },
    {
      "epoch": 0.19925925925925925,
      "grad_norm": 5.10316801071167,
      "learning_rate": 0.000160355819125278,
      "loss": 1.1991,
      "step": 1345
    },
    {
      "epoch": 0.1994074074074074,
      "grad_norm": 1.7162216901779175,
      "learning_rate": 0.00016032616753150483,
      "loss": 1.2135,
      "step": 1346
    },
    {
      "epoch": 0.19955555555555557,
      "grad_norm": 1.5862842798233032,
      "learning_rate": 0.00016029651593773165,
      "loss": 1.0167,
      "step": 1347
    },
    {
      "epoch": 0.19970370370370372,
      "grad_norm": 1.537105679512024,
      "learning_rate": 0.0001602668643439585,
      "loss": 1.2985,
      "step": 1348
    },
    {
      "epoch": 0.19985185185185186,
      "grad_norm": 3.7510077953338623,
      "learning_rate": 0.00016023721275018534,
      "loss": 1.1062,
      "step": 1349
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5134105682373047,
      "learning_rate": 0.00016020756115641216,
      "loss": 1.2994,
      "step": 1350
    },
    {
      "epoch": 0.20014814814814816,
      "grad_norm": 2.0335676670074463,
      "learning_rate": 0.000160177909562639,
      "loss": 1.0951,
      "step": 1351
    },
    {
      "epoch": 0.2002962962962963,
      "grad_norm": 1.9692561626434326,
      "learning_rate": 0.00016014825796886584,
      "loss": 1.2001,
      "step": 1352
    },
    {
      "epoch": 0.20044444444444445,
      "grad_norm": 5.022037029266357,
      "learning_rate": 0.00016011860637509266,
      "loss": 1.1642,
      "step": 1353
    },
    {
      "epoch": 0.2005925925925926,
      "grad_norm": 2.4920811653137207,
      "learning_rate": 0.0001600889547813195,
      "loss": 1.3397,
      "step": 1354
    },
    {
      "epoch": 0.20074074074074075,
      "grad_norm": 1.7635655403137207,
      "learning_rate": 0.00016005930318754635,
      "loss": 1.0308,
      "step": 1355
    },
    {
      "epoch": 0.2008888888888889,
      "grad_norm": 1.5410326719284058,
      "learning_rate": 0.00016002965159377317,
      "loss": 1.1857,
      "step": 1356
    },
    {
      "epoch": 0.20103703703703704,
      "grad_norm": 2.2208480834960938,
      "learning_rate": 0.00016,
      "loss": 1.1641,
      "step": 1357
    },
    {
      "epoch": 0.2011851851851852,
      "grad_norm": 1.3878949880599976,
      "learning_rate": 0.00015997034840622686,
      "loss": 1.1996,
      "step": 1358
    },
    {
      "epoch": 0.20133333333333334,
      "grad_norm": 3.8697714805603027,
      "learning_rate": 0.00015994069681245367,
      "loss": 1.2625,
      "step": 1359
    },
    {
      "epoch": 0.20148148148148148,
      "grad_norm": 2.1626200675964355,
      "learning_rate": 0.00015991104521868052,
      "loss": 1.1042,
      "step": 1360
    },
    {
      "epoch": 0.20162962962962963,
      "grad_norm": 1.4656286239624023,
      "learning_rate": 0.00015988139362490734,
      "loss": 1.0454,
      "step": 1361
    },
    {
      "epoch": 0.20177777777777778,
      "grad_norm": 2.388040065765381,
      "learning_rate": 0.00015985174203113418,
      "loss": 0.9111,
      "step": 1362
    },
    {
      "epoch": 0.20192592592592593,
      "grad_norm": 1.7776933908462524,
      "learning_rate": 0.000159822090437361,
      "loss": 1.2934,
      "step": 1363
    },
    {
      "epoch": 0.20207407407407407,
      "grad_norm": 3.2282278537750244,
      "learning_rate": 0.00015979243884358784,
      "loss": 0.9072,
      "step": 1364
    },
    {
      "epoch": 0.20222222222222222,
      "grad_norm": 2.2581164836883545,
      "learning_rate": 0.0001597627872498147,
      "loss": 1.235,
      "step": 1365
    },
    {
      "epoch": 0.20237037037037037,
      "grad_norm": 2.0510082244873047,
      "learning_rate": 0.0001597331356560415,
      "loss": 1.1715,
      "step": 1366
    },
    {
      "epoch": 0.20251851851851851,
      "grad_norm": 1.0332187414169312,
      "learning_rate": 0.00015970348406226835,
      "loss": 1.0326,
      "step": 1367
    },
    {
      "epoch": 0.20266666666666666,
      "grad_norm": 1.8021240234375,
      "learning_rate": 0.0001596738324684952,
      "loss": 1.267,
      "step": 1368
    },
    {
      "epoch": 0.2028148148148148,
      "grad_norm": 4.383606910705566,
      "learning_rate": 0.000159644180874722,
      "loss": 1.2257,
      "step": 1369
    },
    {
      "epoch": 0.20296296296296296,
      "grad_norm": 1.646316409111023,
      "learning_rate": 0.00015961452928094886,
      "loss": 1.0715,
      "step": 1370
    },
    {
      "epoch": 0.2031111111111111,
      "grad_norm": 2.4021363258361816,
      "learning_rate": 0.0001595848776871757,
      "loss": 1.1131,
      "step": 1371
    },
    {
      "epoch": 0.20325925925925925,
      "grad_norm": 3.759568214416504,
      "learning_rate": 0.00015955522609340252,
      "loss": 0.971,
      "step": 1372
    },
    {
      "epoch": 0.2034074074074074,
      "grad_norm": 1.7920905351638794,
      "learning_rate": 0.00015952557449962936,
      "loss": 1.2104,
      "step": 1373
    },
    {
      "epoch": 0.20355555555555555,
      "grad_norm": 1.354028344154358,
      "learning_rate": 0.0001594959229058562,
      "loss": 1.1061,
      "step": 1374
    },
    {
      "epoch": 0.2037037037037037,
      "grad_norm": 2.5420985221862793,
      "learning_rate": 0.00015946627131208302,
      "loss": 1.1685,
      "step": 1375
    },
    {
      "epoch": 0.20385185185185184,
      "grad_norm": 2.520581007003784,
      "learning_rate": 0.00015943661971830987,
      "loss": 1.286,
      "step": 1376
    },
    {
      "epoch": 0.204,
      "grad_norm": 3.1659348011016846,
      "learning_rate": 0.0001594069681245367,
      "loss": 1.423,
      "step": 1377
    },
    {
      "epoch": 0.20414814814814816,
      "grad_norm": 1.2454274892807007,
      "learning_rate": 0.00015937731653076353,
      "loss": 1.06,
      "step": 1378
    },
    {
      "epoch": 0.2042962962962963,
      "grad_norm": 4.370316505432129,
      "learning_rate": 0.00015934766493699037,
      "loss": 1.1003,
      "step": 1379
    },
    {
      "epoch": 0.20444444444444446,
      "grad_norm": 1.9933221340179443,
      "learning_rate": 0.00015931801334321722,
      "loss": 1.201,
      "step": 1380
    },
    {
      "epoch": 0.2045925925925926,
      "grad_norm": 3.2642831802368164,
      "learning_rate": 0.00015928836174944404,
      "loss": 0.9817,
      "step": 1381
    },
    {
      "epoch": 0.20474074074074075,
      "grad_norm": 1.2640935182571411,
      "learning_rate": 0.00015925871015567085,
      "loss": 1.1943,
      "step": 1382
    },
    {
      "epoch": 0.2048888888888889,
      "grad_norm": 1.4914500713348389,
      "learning_rate": 0.00015922905856189773,
      "loss": 1.0585,
      "step": 1383
    },
    {
      "epoch": 0.20503703703703705,
      "grad_norm": 1.990502119064331,
      "learning_rate": 0.00015919940696812454,
      "loss": 1.217,
      "step": 1384
    },
    {
      "epoch": 0.2051851851851852,
      "grad_norm": 1.5243146419525146,
      "learning_rate": 0.00015916975537435136,
      "loss": 1.3086,
      "step": 1385
    },
    {
      "epoch": 0.20533333333333334,
      "grad_norm": 1.9028431177139282,
      "learning_rate": 0.00015914010378057823,
      "loss": 1.1409,
      "step": 1386
    },
    {
      "epoch": 0.2054814814814815,
      "grad_norm": 2.9515159130096436,
      "learning_rate": 0.00015911045218680505,
      "loss": 1.1486,
      "step": 1387
    },
    {
      "epoch": 0.20562962962962963,
      "grad_norm": 1.4709255695343018,
      "learning_rate": 0.00015908080059303187,
      "loss": 1.1272,
      "step": 1388
    },
    {
      "epoch": 0.20577777777777778,
      "grad_norm": 1.290727972984314,
      "learning_rate": 0.00015905114899925874,
      "loss": 1.2133,
      "step": 1389
    },
    {
      "epoch": 0.20592592592592593,
      "grad_norm": 1.8945701122283936,
      "learning_rate": 0.00015902149740548556,
      "loss": 1.2185,
      "step": 1390
    },
    {
      "epoch": 0.20607407407407408,
      "grad_norm": 1.6242437362670898,
      "learning_rate": 0.00015899184581171237,
      "loss": 1.2213,
      "step": 1391
    },
    {
      "epoch": 0.20622222222222222,
      "grad_norm": 2.466501474380493,
      "learning_rate": 0.00015896219421793924,
      "loss": 0.9557,
      "step": 1392
    },
    {
      "epoch": 0.20637037037037037,
      "grad_norm": 2.058912992477417,
      "learning_rate": 0.00015893254262416606,
      "loss": 1.1384,
      "step": 1393
    },
    {
      "epoch": 0.20651851851851852,
      "grad_norm": 1.4795424938201904,
      "learning_rate": 0.00015890289103039288,
      "loss": 1.0766,
      "step": 1394
    },
    {
      "epoch": 0.20666666666666667,
      "grad_norm": 1.2131915092468262,
      "learning_rate": 0.00015887323943661972,
      "loss": 1.1701,
      "step": 1395
    },
    {
      "epoch": 0.2068148148148148,
      "grad_norm": 1.1304845809936523,
      "learning_rate": 0.00015884358784284657,
      "loss": 0.9965,
      "step": 1396
    },
    {
      "epoch": 0.20696296296296296,
      "grad_norm": 2.0751266479492188,
      "learning_rate": 0.00015881393624907338,
      "loss": 1.1225,
      "step": 1397
    },
    {
      "epoch": 0.2071111111111111,
      "grad_norm": 2.2350399494171143,
      "learning_rate": 0.00015878428465530023,
      "loss": 1.029,
      "step": 1398
    },
    {
      "epoch": 0.20725925925925925,
      "grad_norm": 1.6366523504257202,
      "learning_rate": 0.00015875463306152707,
      "loss": 1.0351,
      "step": 1399
    },
    {
      "epoch": 0.2074074074074074,
      "grad_norm": 1.5569686889648438,
      "learning_rate": 0.0001587249814677539,
      "loss": 1.2932,
      "step": 1400
    },
    {
      "epoch": 0.20755555555555555,
      "grad_norm": 6.361331939697266,
      "learning_rate": 0.00015869532987398074,
      "loss": 1.086,
      "step": 1401
    },
    {
      "epoch": 0.2077037037037037,
      "grad_norm": 1.6677700281143188,
      "learning_rate": 0.00015866567828020758,
      "loss": 1.0963,
      "step": 1402
    },
    {
      "epoch": 0.20785185185185184,
      "grad_norm": 1.473945140838623,
      "learning_rate": 0.0001586360266864344,
      "loss": 1.1528,
      "step": 1403
    },
    {
      "epoch": 0.208,
      "grad_norm": 1.6613620519638062,
      "learning_rate": 0.00015860637509266124,
      "loss": 0.9257,
      "step": 1404
    },
    {
      "epoch": 0.20814814814814814,
      "grad_norm": 5.979618549346924,
      "learning_rate": 0.00015857672349888809,
      "loss": 1.2903,
      "step": 1405
    },
    {
      "epoch": 0.20829629629629628,
      "grad_norm": 2.915433168411255,
      "learning_rate": 0.0001585470719051149,
      "loss": 1.1357,
      "step": 1406
    },
    {
      "epoch": 0.20844444444444443,
      "grad_norm": 2.8678345680236816,
      "learning_rate": 0.00015851742031134175,
      "loss": 1.1544,
      "step": 1407
    },
    {
      "epoch": 0.20859259259259258,
      "grad_norm": 2.497988224029541,
      "learning_rate": 0.0001584877687175686,
      "loss": 1.1563,
      "step": 1408
    },
    {
      "epoch": 0.20874074074074075,
      "grad_norm": 1.2685167789459229,
      "learning_rate": 0.0001584581171237954,
      "loss": 1.0737,
      "step": 1409
    },
    {
      "epoch": 0.2088888888888889,
      "grad_norm": 1.5436787605285645,
      "learning_rate": 0.00015842846553002223,
      "loss": 1.0743,
      "step": 1410
    },
    {
      "epoch": 0.20903703703703705,
      "grad_norm": 1.4325158596038818,
      "learning_rate": 0.0001583988139362491,
      "loss": 1.1472,
      "step": 1411
    },
    {
      "epoch": 0.2091851851851852,
      "grad_norm": 1.1780500411987305,
      "learning_rate": 0.00015836916234247592,
      "loss": 1.0339,
      "step": 1412
    },
    {
      "epoch": 0.20933333333333334,
      "grad_norm": 1.5698156356811523,
      "learning_rate": 0.00015833951074870273,
      "loss": 1.1295,
      "step": 1413
    },
    {
      "epoch": 0.2094814814814815,
      "grad_norm": 2.26485538482666,
      "learning_rate": 0.00015830985915492958,
      "loss": 1.2991,
      "step": 1414
    },
    {
      "epoch": 0.20962962962962964,
      "grad_norm": 2.012516498565674,
      "learning_rate": 0.00015828020756115642,
      "loss": 1.014,
      "step": 1415
    },
    {
      "epoch": 0.20977777777777779,
      "grad_norm": 1.6176815032958984,
      "learning_rate": 0.00015825055596738324,
      "loss": 1.2393,
      "step": 1416
    },
    {
      "epoch": 0.20992592592592593,
      "grad_norm": 1.2089728116989136,
      "learning_rate": 0.00015822090437361008,
      "loss": 1.0259,
      "step": 1417
    },
    {
      "epoch": 0.21007407407407408,
      "grad_norm": 1.3265610933303833,
      "learning_rate": 0.00015819125277983693,
      "loss": 1.0523,
      "step": 1418
    },
    {
      "epoch": 0.21022222222222223,
      "grad_norm": 1.7452551126480103,
      "learning_rate": 0.00015816160118606375,
      "loss": 0.9849,
      "step": 1419
    },
    {
      "epoch": 0.21037037037037037,
      "grad_norm": 1.428599238395691,
      "learning_rate": 0.0001581319495922906,
      "loss": 1.1105,
      "step": 1420
    },
    {
      "epoch": 0.21051851851851852,
      "grad_norm": 1.663043737411499,
      "learning_rate": 0.00015810229799851744,
      "loss": 1.5132,
      "step": 1421
    },
    {
      "epoch": 0.21066666666666667,
      "grad_norm": 1.9859318733215332,
      "learning_rate": 0.00015807264640474425,
      "loss": 1.208,
      "step": 1422
    },
    {
      "epoch": 0.21081481481481482,
      "grad_norm": 1.3973498344421387,
      "learning_rate": 0.0001580429948109711,
      "loss": 0.9525,
      "step": 1423
    },
    {
      "epoch": 0.21096296296296296,
      "grad_norm": 1.3018242120742798,
      "learning_rate": 0.00015801334321719794,
      "loss": 1.2355,
      "step": 1424
    },
    {
      "epoch": 0.2111111111111111,
      "grad_norm": 1.5467562675476074,
      "learning_rate": 0.00015798369162342476,
      "loss": 1.2726,
      "step": 1425
    },
    {
      "epoch": 0.21125925925925926,
      "grad_norm": 2.138482093811035,
      "learning_rate": 0.0001579540400296516,
      "loss": 1.2641,
      "step": 1426
    },
    {
      "epoch": 0.2114074074074074,
      "grad_norm": 1.714917778968811,
      "learning_rate": 0.00015792438843587845,
      "loss": 0.9664,
      "step": 1427
    },
    {
      "epoch": 0.21155555555555555,
      "grad_norm": 1.4636965990066528,
      "learning_rate": 0.00015789473684210527,
      "loss": 1.3803,
      "step": 1428
    },
    {
      "epoch": 0.2117037037037037,
      "grad_norm": 1.4125659465789795,
      "learning_rate": 0.0001578650852483321,
      "loss": 1.0361,
      "step": 1429
    },
    {
      "epoch": 0.21185185185185185,
      "grad_norm": 3.2594969272613525,
      "learning_rate": 0.00015783543365455895,
      "loss": 1.115,
      "step": 1430
    },
    {
      "epoch": 0.212,
      "grad_norm": 1.4381506443023682,
      "learning_rate": 0.00015780578206078577,
      "loss": 1.1786,
      "step": 1431
    },
    {
      "epoch": 0.21214814814814814,
      "grad_norm": 1.2792737483978271,
      "learning_rate": 0.00015777613046701262,
      "loss": 0.9593,
      "step": 1432
    },
    {
      "epoch": 0.2122962962962963,
      "grad_norm": 1.6736551523208618,
      "learning_rate": 0.00015774647887323943,
      "loss": 1.1135,
      "step": 1433
    },
    {
      "epoch": 0.21244444444444444,
      "grad_norm": 1.3635329008102417,
      "learning_rate": 0.00015771682727946628,
      "loss": 0.9715,
      "step": 1434
    },
    {
      "epoch": 0.21259259259259258,
      "grad_norm": 1.551611065864563,
      "learning_rate": 0.00015768717568569312,
      "loss": 1.0314,
      "step": 1435
    },
    {
      "epoch": 0.21274074074074073,
      "grad_norm": 2.0477828979492188,
      "learning_rate": 0.00015765752409191994,
      "loss": 1.3357,
      "step": 1436
    },
    {
      "epoch": 0.21288888888888888,
      "grad_norm": 1.5487900972366333,
      "learning_rate": 0.00015762787249814678,
      "loss": 1.0907,
      "step": 1437
    },
    {
      "epoch": 0.21303703703703702,
      "grad_norm": 5.054993629455566,
      "learning_rate": 0.00015759822090437363,
      "loss": 1.0958,
      "step": 1438
    },
    {
      "epoch": 0.21318518518518517,
      "grad_norm": 1.6109741926193237,
      "learning_rate": 0.00015756856931060045,
      "loss": 1.0971,
      "step": 1439
    },
    {
      "epoch": 0.21333333333333335,
      "grad_norm": 1.518358588218689,
      "learning_rate": 0.0001575389177168273,
      "loss": 1.1,
      "step": 1440
    },
    {
      "epoch": 0.2134814814814815,
      "grad_norm": 1.6584968566894531,
      "learning_rate": 0.00015750926612305413,
      "loss": 1.2818,
      "step": 1441
    },
    {
      "epoch": 0.21362962962962964,
      "grad_norm": 3.345515727996826,
      "learning_rate": 0.00015747961452928095,
      "loss": 1.2602,
      "step": 1442
    },
    {
      "epoch": 0.2137777777777778,
      "grad_norm": 2.380998373031616,
      "learning_rate": 0.0001574499629355078,
      "loss": 1.0518,
      "step": 1443
    },
    {
      "epoch": 0.21392592592592594,
      "grad_norm": 2.689133882522583,
      "learning_rate": 0.00015742031134173464,
      "loss": 1.1277,
      "step": 1444
    },
    {
      "epoch": 0.21407407407407408,
      "grad_norm": 1.4815983772277832,
      "learning_rate": 0.00015739065974796146,
      "loss": 1.1333,
      "step": 1445
    },
    {
      "epoch": 0.21422222222222223,
      "grad_norm": 1.9596880674362183,
      "learning_rate": 0.0001573610081541883,
      "loss": 1.2099,
      "step": 1446
    },
    {
      "epoch": 0.21437037037037038,
      "grad_norm": 1.3206571340560913,
      "learning_rate": 0.00015733135656041512,
      "loss": 1.0269,
      "step": 1447
    },
    {
      "epoch": 0.21451851851851853,
      "grad_norm": 3.195244073867798,
      "learning_rate": 0.00015730170496664196,
      "loss": 1.0816,
      "step": 1448
    },
    {
      "epoch": 0.21466666666666667,
      "grad_norm": 2.082491874694824,
      "learning_rate": 0.00015727205337286878,
      "loss": 1.081,
      "step": 1449
    },
    {
      "epoch": 0.21481481481481482,
      "grad_norm": 1.4490270614624023,
      "learning_rate": 0.00015724240177909563,
      "loss": 1.0375,
      "step": 1450
    },
    {
      "epoch": 0.21496296296296297,
      "grad_norm": 1.2920150756835938,
      "learning_rate": 0.00015721275018532247,
      "loss": 1.1174,
      "step": 1451
    },
    {
      "epoch": 0.21511111111111111,
      "grad_norm": 3.3416709899902344,
      "learning_rate": 0.0001571830985915493,
      "loss": 1.2194,
      "step": 1452
    },
    {
      "epoch": 0.21525925925925926,
      "grad_norm": 1.8580683469772339,
      "learning_rate": 0.00015715344699777613,
      "loss": 1.0884,
      "step": 1453
    },
    {
      "epoch": 0.2154074074074074,
      "grad_norm": 1.8695363998413086,
      "learning_rate": 0.00015712379540400298,
      "loss": 1.0645,
      "step": 1454
    },
    {
      "epoch": 0.21555555555555556,
      "grad_norm": 1.5951566696166992,
      "learning_rate": 0.0001570941438102298,
      "loss": 1.0675,
      "step": 1455
    },
    {
      "epoch": 0.2157037037037037,
      "grad_norm": 1.4753663539886475,
      "learning_rate": 0.00015706449221645664,
      "loss": 1.1141,
      "step": 1456
    },
    {
      "epoch": 0.21585185185185185,
      "grad_norm": 1.8130995035171509,
      "learning_rate": 0.00015703484062268348,
      "loss": 1.0426,
      "step": 1457
    },
    {
      "epoch": 0.216,
      "grad_norm": 1.3725109100341797,
      "learning_rate": 0.0001570051890289103,
      "loss": 1.1511,
      "step": 1458
    },
    {
      "epoch": 0.21614814814814814,
      "grad_norm": 1.8450452089309692,
      "learning_rate": 0.00015697553743513715,
      "loss": 0.9767,
      "step": 1459
    },
    {
      "epoch": 0.2162962962962963,
      "grad_norm": 1.2268054485321045,
      "learning_rate": 0.000156945885841364,
      "loss": 0.8711,
      "step": 1460
    },
    {
      "epoch": 0.21644444444444444,
      "grad_norm": 1.552537441253662,
      "learning_rate": 0.0001569162342475908,
      "loss": 1.0715,
      "step": 1461
    },
    {
      "epoch": 0.2165925925925926,
      "grad_norm": 5.605630397796631,
      "learning_rate": 0.00015688658265381765,
      "loss": 1.2085,
      "step": 1462
    },
    {
      "epoch": 0.21674074074074073,
      "grad_norm": 2.071743965148926,
      "learning_rate": 0.0001568569310600445,
      "loss": 1.1515,
      "step": 1463
    },
    {
      "epoch": 0.21688888888888888,
      "grad_norm": 2.2309176921844482,
      "learning_rate": 0.0001568272794662713,
      "loss": 1.1824,
      "step": 1464
    },
    {
      "epoch": 0.21703703703703703,
      "grad_norm": 2.1204428672790527,
      "learning_rate": 0.00015679762787249816,
      "loss": 0.9924,
      "step": 1465
    },
    {
      "epoch": 0.21718518518518518,
      "grad_norm": 1.2242668867111206,
      "learning_rate": 0.000156767976278725,
      "loss": 0.9135,
      "step": 1466
    },
    {
      "epoch": 0.21733333333333332,
      "grad_norm": 3.142942428588867,
      "learning_rate": 0.00015673832468495182,
      "loss": 1.2583,
      "step": 1467
    },
    {
      "epoch": 0.21748148148148147,
      "grad_norm": 2.464092969894409,
      "learning_rate": 0.00015670867309117864,
      "loss": 1.0696,
      "step": 1468
    },
    {
      "epoch": 0.21762962962962962,
      "grad_norm": 1.8219599723815918,
      "learning_rate": 0.0001566790214974055,
      "loss": 1.0506,
      "step": 1469
    },
    {
      "epoch": 0.21777777777777776,
      "grad_norm": 1.3650825023651123,
      "learning_rate": 0.00015664936990363233,
      "loss": 1.217,
      "step": 1470
    },
    {
      "epoch": 0.21792592592592594,
      "grad_norm": 1.90289306640625,
      "learning_rate": 0.00015661971830985914,
      "loss": 1.3279,
      "step": 1471
    },
    {
      "epoch": 0.2180740740740741,
      "grad_norm": 1.2393357753753662,
      "learning_rate": 0.00015659006671608601,
      "loss": 1.1024,
      "step": 1472
    },
    {
      "epoch": 0.21822222222222223,
      "grad_norm": 1.8062958717346191,
      "learning_rate": 0.00015656041512231283,
      "loss": 1.1021,
      "step": 1473
    },
    {
      "epoch": 0.21837037037037038,
      "grad_norm": 1.8752361536026,
      "learning_rate": 0.00015653076352853965,
      "loss": 1.4975,
      "step": 1474
    },
    {
      "epoch": 0.21851851851851853,
      "grad_norm": 1.5685244798660278,
      "learning_rate": 0.00015650111193476652,
      "loss": 0.9332,
      "step": 1475
    },
    {
      "epoch": 0.21866666666666668,
      "grad_norm": 1.5438872575759888,
      "learning_rate": 0.00015647146034099334,
      "loss": 1.2319,
      "step": 1476
    },
    {
      "epoch": 0.21881481481481482,
      "grad_norm": 1.8651984930038452,
      "learning_rate": 0.00015644180874722016,
      "loss": 1.1674,
      "step": 1477
    },
    {
      "epoch": 0.21896296296296297,
      "grad_norm": 1.3855433464050293,
      "learning_rate": 0.00015641215715344703,
      "loss": 1.0712,
      "step": 1478
    },
    {
      "epoch": 0.21911111111111112,
      "grad_norm": 1.3934948444366455,
      "learning_rate": 0.00015638250555967384,
      "loss": 1.0684,
      "step": 1479
    },
    {
      "epoch": 0.21925925925925926,
      "grad_norm": 1.8999346494674683,
      "learning_rate": 0.00015635285396590066,
      "loss": 0.9289,
      "step": 1480
    },
    {
      "epoch": 0.2194074074074074,
      "grad_norm": 1.4764175415039062,
      "learning_rate": 0.0001563232023721275,
      "loss": 1.1297,
      "step": 1481
    },
    {
      "epoch": 0.21955555555555556,
      "grad_norm": 1.6788074970245361,
      "learning_rate": 0.00015629355077835435,
      "loss": 0.8778,
      "step": 1482
    },
    {
      "epoch": 0.2197037037037037,
      "grad_norm": 1.6045212745666504,
      "learning_rate": 0.00015626389918458117,
      "loss": 1.1411,
      "step": 1483
    },
    {
      "epoch": 0.21985185185185185,
      "grad_norm": 1.5411138534545898,
      "learning_rate": 0.000156234247590808,
      "loss": 1.1933,
      "step": 1484
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9160683155059814,
      "learning_rate": 0.00015620459599703486,
      "loss": 0.9902,
      "step": 1485
    },
    {
      "epoch": 0.22014814814814815,
      "grad_norm": 1.3305299282073975,
      "learning_rate": 0.00015617494440326167,
      "loss": 0.8694,
      "step": 1486
    },
    {
      "epoch": 0.2202962962962963,
      "grad_norm": 2.571554660797119,
      "learning_rate": 0.00015614529280948852,
      "loss": 1.1888,
      "step": 1487
    },
    {
      "epoch": 0.22044444444444444,
      "grad_norm": 2.218079090118408,
      "learning_rate": 0.00015611564121571536,
      "loss": 1.2693,
      "step": 1488
    },
    {
      "epoch": 0.2205925925925926,
      "grad_norm": 1.1321967840194702,
      "learning_rate": 0.00015608598962194218,
      "loss": 1.2222,
      "step": 1489
    },
    {
      "epoch": 0.22074074074074074,
      "grad_norm": 1.371054768562317,
      "learning_rate": 0.00015605633802816903,
      "loss": 1.1179,
      "step": 1490
    },
    {
      "epoch": 0.22088888888888888,
      "grad_norm": 1.216324806213379,
      "learning_rate": 0.00015602668643439587,
      "loss": 1.1176,
      "step": 1491
    },
    {
      "epoch": 0.22103703703703703,
      "grad_norm": 1.2088987827301025,
      "learning_rate": 0.0001559970348406227,
      "loss": 1.2248,
      "step": 1492
    },
    {
      "epoch": 0.22118518518518518,
      "grad_norm": 1.882351279258728,
      "learning_rate": 0.00015596738324684953,
      "loss": 1.3442,
      "step": 1493
    },
    {
      "epoch": 0.22133333333333333,
      "grad_norm": 1.4370276927947998,
      "learning_rate": 0.00015593773165307638,
      "loss": 1.3169,
      "step": 1494
    },
    {
      "epoch": 0.22148148148148147,
      "grad_norm": 2.124148368835449,
      "learning_rate": 0.0001559080800593032,
      "loss": 1.2403,
      "step": 1495
    },
    {
      "epoch": 0.22162962962962962,
      "grad_norm": 1.5456814765930176,
      "learning_rate": 0.00015587842846553,
      "loss": 1.2922,
      "step": 1496
    },
    {
      "epoch": 0.22177777777777777,
      "grad_norm": 1.3349412679672241,
      "learning_rate": 0.00015584877687175688,
      "loss": 1.351,
      "step": 1497
    },
    {
      "epoch": 0.22192592592592592,
      "grad_norm": 1.2368528842926025,
      "learning_rate": 0.0001558191252779837,
      "loss": 1.2629,
      "step": 1498
    },
    {
      "epoch": 0.22207407407407406,
      "grad_norm": 1.2499349117279053,
      "learning_rate": 0.00015578947368421052,
      "loss": 1.2562,
      "step": 1499
    },
    {
      "epoch": 0.2222222222222222,
      "grad_norm": 1.4824923276901245,
      "learning_rate": 0.0001557598220904374,
      "loss": 1.1449,
      "step": 1500
    },
    {
      "epoch": 0.22237037037037036,
      "grad_norm": 1.833436131477356,
      "learning_rate": 0.0001557301704966642,
      "loss": 1.1581,
      "step": 1501
    },
    {
      "epoch": 0.22251851851851853,
      "grad_norm": 1.676798701286316,
      "learning_rate": 0.00015570051890289102,
      "loss": 1.1682,
      "step": 1502
    },
    {
      "epoch": 0.22266666666666668,
      "grad_norm": 2.183213949203491,
      "learning_rate": 0.00015567086730911787,
      "loss": 0.9933,
      "step": 1503
    },
    {
      "epoch": 0.22281481481481483,
      "grad_norm": 1.5602302551269531,
      "learning_rate": 0.0001556412157153447,
      "loss": 1.1624,
      "step": 1504
    },
    {
      "epoch": 0.22296296296296297,
      "grad_norm": 1.3274348974227905,
      "learning_rate": 0.00015561156412157153,
      "loss": 0.9692,
      "step": 1505
    },
    {
      "epoch": 0.22311111111111112,
      "grad_norm": 1.28597891330719,
      "learning_rate": 0.00015558191252779837,
      "loss": 1.0527,
      "step": 1506
    },
    {
      "epoch": 0.22325925925925927,
      "grad_norm": 1.9549756050109863,
      "learning_rate": 0.00015555226093402522,
      "loss": 1.0626,
      "step": 1507
    },
    {
      "epoch": 0.22340740740740742,
      "grad_norm": 1.334690809249878,
      "learning_rate": 0.00015552260934025204,
      "loss": 1.1926,
      "step": 1508
    },
    {
      "epoch": 0.22355555555555556,
      "grad_norm": 1.8309886455535889,
      "learning_rate": 0.00015549295774647888,
      "loss": 1.1651,
      "step": 1509
    },
    {
      "epoch": 0.2237037037037037,
      "grad_norm": 1.4169001579284668,
      "learning_rate": 0.00015546330615270572,
      "loss": 1.1697,
      "step": 1510
    },
    {
      "epoch": 0.22385185185185186,
      "grad_norm": 1.2612583637237549,
      "learning_rate": 0.00015543365455893254,
      "loss": 1.0604,
      "step": 1511
    },
    {
      "epoch": 0.224,
      "grad_norm": 1.4787007570266724,
      "learning_rate": 0.0001554040029651594,
      "loss": 1.1787,
      "step": 1512
    },
    {
      "epoch": 0.22414814814814815,
      "grad_norm": 2.134244918823242,
      "learning_rate": 0.00015537435137138623,
      "loss": 1.0592,
      "step": 1513
    },
    {
      "epoch": 0.2242962962962963,
      "grad_norm": 2.822035312652588,
      "learning_rate": 0.00015534469977761305,
      "loss": 1.1476,
      "step": 1514
    },
    {
      "epoch": 0.22444444444444445,
      "grad_norm": 1.7734057903289795,
      "learning_rate": 0.0001553150481838399,
      "loss": 1.039,
      "step": 1515
    },
    {
      "epoch": 0.2245925925925926,
      "grad_norm": 1.3871924877166748,
      "learning_rate": 0.00015528539659006674,
      "loss": 1.0588,
      "step": 1516
    },
    {
      "epoch": 0.22474074074074074,
      "grad_norm": 1.3794752359390259,
      "learning_rate": 0.00015525574499629355,
      "loss": 1.254,
      "step": 1517
    },
    {
      "epoch": 0.2248888888888889,
      "grad_norm": 1.6303880214691162,
      "learning_rate": 0.0001552260934025204,
      "loss": 1.5117,
      "step": 1518
    },
    {
      "epoch": 0.22503703703703704,
      "grad_norm": 2.4256293773651123,
      "learning_rate": 0.00015519644180874722,
      "loss": 1.1298,
      "step": 1519
    },
    {
      "epoch": 0.22518518518518518,
      "grad_norm": 2.3574140071868896,
      "learning_rate": 0.00015516679021497406,
      "loss": 1.2046,
      "step": 1520
    },
    {
      "epoch": 0.22533333333333333,
      "grad_norm": 1.2977325916290283,
      "learning_rate": 0.0001551371386212009,
      "loss": 1.0207,
      "step": 1521
    },
    {
      "epoch": 0.22548148148148148,
      "grad_norm": 2.1307456493377686,
      "learning_rate": 0.00015510748702742772,
      "loss": 1.2669,
      "step": 1522
    },
    {
      "epoch": 0.22562962962962962,
      "grad_norm": 2.4574077129364014,
      "learning_rate": 0.00015507783543365457,
      "loss": 1.2679,
      "step": 1523
    },
    {
      "epoch": 0.22577777777777777,
      "grad_norm": 1.542917251586914,
      "learning_rate": 0.0001550481838398814,
      "loss": 1.1871,
      "step": 1524
    },
    {
      "epoch": 0.22592592592592592,
      "grad_norm": 1.4039915800094604,
      "learning_rate": 0.00015501853224610823,
      "loss": 0.9045,
      "step": 1525
    },
    {
      "epoch": 0.22607407407407407,
      "grad_norm": 4.266626834869385,
      "learning_rate": 0.00015498888065233507,
      "loss": 1.4225,
      "step": 1526
    },
    {
      "epoch": 0.2262222222222222,
      "grad_norm": 2.024946451187134,
      "learning_rate": 0.00015495922905856192,
      "loss": 1.1265,
      "step": 1527
    },
    {
      "epoch": 0.22637037037037036,
      "grad_norm": 3.2204794883728027,
      "learning_rate": 0.00015492957746478874,
      "loss": 1.4952,
      "step": 1528
    },
    {
      "epoch": 0.2265185185185185,
      "grad_norm": 2.093524932861328,
      "learning_rate": 0.00015489992587101558,
      "loss": 1.2728,
      "step": 1529
    },
    {
      "epoch": 0.22666666666666666,
      "grad_norm": 2.0702030658721924,
      "learning_rate": 0.00015487027427724242,
      "loss": 1.047,
      "step": 1530
    },
    {
      "epoch": 0.2268148148148148,
      "grad_norm": 1.324537754058838,
      "learning_rate": 0.00015484062268346924,
      "loss": 1.3573,
      "step": 1531
    },
    {
      "epoch": 0.22696296296296295,
      "grad_norm": 3.2942769527435303,
      "learning_rate": 0.00015481097108969609,
      "loss": 1.0047,
      "step": 1532
    },
    {
      "epoch": 0.22711111111111112,
      "grad_norm": 1.4537463188171387,
      "learning_rate": 0.0001547813194959229,
      "loss": 1.1266,
      "step": 1533
    },
    {
      "epoch": 0.22725925925925927,
      "grad_norm": 1.2477037906646729,
      "learning_rate": 0.00015475166790214975,
      "loss": 1.1559,
      "step": 1534
    },
    {
      "epoch": 0.22740740740740742,
      "grad_norm": 2.4413416385650635,
      "learning_rate": 0.00015472201630837657,
      "loss": 0.8902,
      "step": 1535
    },
    {
      "epoch": 0.22755555555555557,
      "grad_norm": 1.791354775428772,
      "learning_rate": 0.0001546923647146034,
      "loss": 1.1871,
      "step": 1536
    },
    {
      "epoch": 0.2277037037037037,
      "grad_norm": 4.12494421005249,
      "learning_rate": 0.00015466271312083025,
      "loss": 1.1432,
      "step": 1537
    },
    {
      "epoch": 0.22785185185185186,
      "grad_norm": 1.4111377000808716,
      "learning_rate": 0.00015463306152705707,
      "loss": 1.1645,
      "step": 1538
    },
    {
      "epoch": 0.228,
      "grad_norm": 1.289906620979309,
      "learning_rate": 0.00015460340993328392,
      "loss": 1.1256,
      "step": 1539
    },
    {
      "epoch": 0.22814814814814816,
      "grad_norm": 1.2543591260910034,
      "learning_rate": 0.00015457375833951076,
      "loss": 1.1597,
      "step": 1540
    },
    {
      "epoch": 0.2282962962962963,
      "grad_norm": 1.2581937313079834,
      "learning_rate": 0.00015454410674573758,
      "loss": 1.0717,
      "step": 1541
    },
    {
      "epoch": 0.22844444444444445,
      "grad_norm": 2.8613216876983643,
      "learning_rate": 0.00015451445515196442,
      "loss": 1.1478,
      "step": 1542
    },
    {
      "epoch": 0.2285925925925926,
      "grad_norm": 1.1726185083389282,
      "learning_rate": 0.00015448480355819127,
      "loss": 1.1574,
      "step": 1543
    },
    {
      "epoch": 0.22874074074074074,
      "grad_norm": 1.0736782550811768,
      "learning_rate": 0.00015445515196441808,
      "loss": 0.7646,
      "step": 1544
    },
    {
      "epoch": 0.2288888888888889,
      "grad_norm": 1.8339978456497192,
      "learning_rate": 0.00015442550037064493,
      "loss": 1.0807,
      "step": 1545
    },
    {
      "epoch": 0.22903703703703704,
      "grad_norm": 1.7938997745513916,
      "learning_rate": 0.00015439584877687177,
      "loss": 1.4722,
      "step": 1546
    },
    {
      "epoch": 0.2291851851851852,
      "grad_norm": 1.0827606916427612,
      "learning_rate": 0.0001543661971830986,
      "loss": 1.0839,
      "step": 1547
    },
    {
      "epoch": 0.22933333333333333,
      "grad_norm": 1.4679458141326904,
      "learning_rate": 0.00015433654558932543,
      "loss": 1.0397,
      "step": 1548
    },
    {
      "epoch": 0.22948148148148148,
      "grad_norm": 1.7668004035949707,
      "learning_rate": 0.00015430689399555228,
      "loss": 0.9854,
      "step": 1549
    },
    {
      "epoch": 0.22962962962962963,
      "grad_norm": 1.282181739807129,
      "learning_rate": 0.0001542772424017791,
      "loss": 1.2771,
      "step": 1550
    },
    {
      "epoch": 0.22977777777777778,
      "grad_norm": 1.6717363595962524,
      "learning_rate": 0.00015424759080800594,
      "loss": 1.0952,
      "step": 1551
    },
    {
      "epoch": 0.22992592592592592,
      "grad_norm": 1.4578113555908203,
      "learning_rate": 0.00015421793921423279,
      "loss": 1.1101,
      "step": 1552
    },
    {
      "epoch": 0.23007407407407407,
      "grad_norm": 1.8867779970169067,
      "learning_rate": 0.0001541882876204596,
      "loss": 1.0367,
      "step": 1553
    },
    {
      "epoch": 0.23022222222222222,
      "grad_norm": 1.3532156944274902,
      "learning_rate": 0.00015415863602668642,
      "loss": 0.972,
      "step": 1554
    },
    {
      "epoch": 0.23037037037037036,
      "grad_norm": 1.3631467819213867,
      "learning_rate": 0.0001541289844329133,
      "loss": 0.9205,
      "step": 1555
    },
    {
      "epoch": 0.2305185185185185,
      "grad_norm": 2.7105939388275146,
      "learning_rate": 0.0001540993328391401,
      "loss": 1.2536,
      "step": 1556
    },
    {
      "epoch": 0.23066666666666666,
      "grad_norm": 1.349776029586792,
      "learning_rate": 0.00015406968124536693,
      "loss": 1.3044,
      "step": 1557
    },
    {
      "epoch": 0.2308148148148148,
      "grad_norm": 1.2606697082519531,
      "learning_rate": 0.0001540400296515938,
      "loss": 1.2882,
      "step": 1558
    },
    {
      "epoch": 0.23096296296296295,
      "grad_norm": 4.026727199554443,
      "learning_rate": 0.00015401037805782062,
      "loss": 0.9589,
      "step": 1559
    },
    {
      "epoch": 0.2311111111111111,
      "grad_norm": 2.5214037895202637,
      "learning_rate": 0.00015398072646404743,
      "loss": 1.2341,
      "step": 1560
    },
    {
      "epoch": 0.23125925925925925,
      "grad_norm": 1.9056528806686401,
      "learning_rate": 0.0001539510748702743,
      "loss": 0.7476,
      "step": 1561
    },
    {
      "epoch": 0.2314074074074074,
      "grad_norm": 5.390804767608643,
      "learning_rate": 0.00015392142327650112,
      "loss": 1.2445,
      "step": 1562
    },
    {
      "epoch": 0.23155555555555554,
      "grad_norm": 1.888152837753296,
      "learning_rate": 0.00015389177168272794,
      "loss": 0.9465,
      "step": 1563
    },
    {
      "epoch": 0.23170370370370372,
      "grad_norm": 2.0070877075195312,
      "learning_rate": 0.0001538621200889548,
      "loss": 1.1051,
      "step": 1564
    },
    {
      "epoch": 0.23185185185185186,
      "grad_norm": 1.2344915866851807,
      "learning_rate": 0.00015383246849518163,
      "loss": 0.9859,
      "step": 1565
    },
    {
      "epoch": 0.232,
      "grad_norm": 1.742415428161621,
      "learning_rate": 0.00015380281690140845,
      "loss": 1.1889,
      "step": 1566
    },
    {
      "epoch": 0.23214814814814816,
      "grad_norm": 3.2890334129333496,
      "learning_rate": 0.0001537731653076353,
      "loss": 1.3544,
      "step": 1567
    },
    {
      "epoch": 0.2322962962962963,
      "grad_norm": 1.8093900680541992,
      "learning_rate": 0.00015374351371386213,
      "loss": 1.1094,
      "step": 1568
    },
    {
      "epoch": 0.23244444444444445,
      "grad_norm": 2.2620792388916016,
      "learning_rate": 0.00015371386212008895,
      "loss": 1.1049,
      "step": 1569
    },
    {
      "epoch": 0.2325925925925926,
      "grad_norm": 1.7136729955673218,
      "learning_rate": 0.0001536842105263158,
      "loss": 1.1256,
      "step": 1570
    },
    {
      "epoch": 0.23274074074074075,
      "grad_norm": 1.5654417276382446,
      "learning_rate": 0.00015365455893254264,
      "loss": 1.1673,
      "step": 1571
    },
    {
      "epoch": 0.2328888888888889,
      "grad_norm": 1.0836280584335327,
      "learning_rate": 0.00015362490733876946,
      "loss": 1.2182,
      "step": 1572
    },
    {
      "epoch": 0.23303703703703704,
      "grad_norm": 1.9667060375213623,
      "learning_rate": 0.0001535952557449963,
      "loss": 1.1324,
      "step": 1573
    },
    {
      "epoch": 0.2331851851851852,
      "grad_norm": 1.7171205282211304,
      "learning_rate": 0.00015356560415122315,
      "loss": 1.1833,
      "step": 1574
    },
    {
      "epoch": 0.23333333333333334,
      "grad_norm": 1.2634750604629517,
      "learning_rate": 0.00015353595255744996,
      "loss": 1.2652,
      "step": 1575
    },
    {
      "epoch": 0.23348148148148148,
      "grad_norm": 1.0828866958618164,
      "learning_rate": 0.0001535063009636768,
      "loss": 0.8981,
      "step": 1576
    },
    {
      "epoch": 0.23362962962962963,
      "grad_norm": 1.451007604598999,
      "learning_rate": 0.00015347664936990365,
      "loss": 1.0465,
      "step": 1577
    },
    {
      "epoch": 0.23377777777777778,
      "grad_norm": 1.5012891292572021,
      "learning_rate": 0.00015344699777613047,
      "loss": 1.3538,
      "step": 1578
    },
    {
      "epoch": 0.23392592592592593,
      "grad_norm": 1.3202357292175293,
      "learning_rate": 0.00015341734618235731,
      "loss": 1.192,
      "step": 1579
    },
    {
      "epoch": 0.23407407407407407,
      "grad_norm": 2.689044952392578,
      "learning_rate": 0.00015338769458858416,
      "loss": 1.127,
      "step": 1580
    },
    {
      "epoch": 0.23422222222222222,
      "grad_norm": 1.9422969818115234,
      "learning_rate": 0.00015335804299481098,
      "loss": 0.9591,
      "step": 1581
    },
    {
      "epoch": 0.23437037037037037,
      "grad_norm": 1.6667768955230713,
      "learning_rate": 0.0001533283914010378,
      "loss": 1.227,
      "step": 1582
    },
    {
      "epoch": 0.23451851851851852,
      "grad_norm": 2.025862216949463,
      "learning_rate": 0.00015329873980726467,
      "loss": 1.1708,
      "step": 1583
    },
    {
      "epoch": 0.23466666666666666,
      "grad_norm": 3.5528149604797363,
      "learning_rate": 0.00015326908821349148,
      "loss": 1.2254,
      "step": 1584
    },
    {
      "epoch": 0.2348148148148148,
      "grad_norm": 1.451198935508728,
      "learning_rate": 0.0001532394366197183,
      "loss": 1.1741,
      "step": 1585
    },
    {
      "epoch": 0.23496296296296296,
      "grad_norm": 1.3051122426986694,
      "learning_rate": 0.00015320978502594517,
      "loss": 1.0806,
      "step": 1586
    },
    {
      "epoch": 0.2351111111111111,
      "grad_norm": 1.3965210914611816,
      "learning_rate": 0.000153180133432172,
      "loss": 1.1295,
      "step": 1587
    },
    {
      "epoch": 0.23525925925925925,
      "grad_norm": 1.2600857019424438,
      "learning_rate": 0.0001531504818383988,
      "loss": 0.9167,
      "step": 1588
    },
    {
      "epoch": 0.2354074074074074,
      "grad_norm": 2.6228692531585693,
      "learning_rate": 0.00015312083024462565,
      "loss": 1.394,
      "step": 1589
    },
    {
      "epoch": 0.23555555555555555,
      "grad_norm": 1.4789807796478271,
      "learning_rate": 0.0001530911786508525,
      "loss": 1.2326,
      "step": 1590
    },
    {
      "epoch": 0.2357037037037037,
      "grad_norm": 1.1614006757736206,
      "learning_rate": 0.0001530615270570793,
      "loss": 0.8976,
      "step": 1591
    },
    {
      "epoch": 0.23585185185185184,
      "grad_norm": 3.0136983394622803,
      "learning_rate": 0.00015303187546330616,
      "loss": 1.0333,
      "step": 1592
    },
    {
      "epoch": 0.236,
      "grad_norm": 1.777445912361145,
      "learning_rate": 0.000153002223869533,
      "loss": 1.3849,
      "step": 1593
    },
    {
      "epoch": 0.23614814814814813,
      "grad_norm": 2.2834534645080566,
      "learning_rate": 0.00015297257227575982,
      "loss": 1.5408,
      "step": 1594
    },
    {
      "epoch": 0.2362962962962963,
      "grad_norm": 1.2909154891967773,
      "learning_rate": 0.00015294292068198666,
      "loss": 1.1083,
      "step": 1595
    },
    {
      "epoch": 0.23644444444444446,
      "grad_norm": 1.6266371011734009,
      "learning_rate": 0.0001529132690882135,
      "loss": 1.2947,
      "step": 1596
    },
    {
      "epoch": 0.2365925925925926,
      "grad_norm": 1.3940317630767822,
      "learning_rate": 0.00015288361749444033,
      "loss": 0.9727,
      "step": 1597
    },
    {
      "epoch": 0.23674074074074075,
      "grad_norm": 1.4263591766357422,
      "learning_rate": 0.00015285396590066717,
      "loss": 1.0053,
      "step": 1598
    },
    {
      "epoch": 0.2368888888888889,
      "grad_norm": 1.289556622505188,
      "learning_rate": 0.00015282431430689401,
      "loss": 1.1829,
      "step": 1599
    },
    {
      "epoch": 0.23703703703703705,
      "grad_norm": 1.2458724975585938,
      "learning_rate": 0.00015279466271312083,
      "loss": 0.9879,
      "step": 1600
    },
    {
      "epoch": 0.2371851851851852,
      "grad_norm": 0.9497737288475037,
      "learning_rate": 0.00015276501111934768,
      "loss": 1.1464,
      "step": 1601
    },
    {
      "epoch": 0.23733333333333334,
      "grad_norm": 1.2145698070526123,
      "learning_rate": 0.00015273535952557452,
      "loss": 1.176,
      "step": 1602
    },
    {
      "epoch": 0.2374814814814815,
      "grad_norm": 1.0378276109695435,
      "learning_rate": 0.00015270570793180134,
      "loss": 1.053,
      "step": 1603
    },
    {
      "epoch": 0.23762962962962964,
      "grad_norm": 1.327704668045044,
      "learning_rate": 0.00015267605633802818,
      "loss": 1.2492,
      "step": 1604
    },
    {
      "epoch": 0.23777777777777778,
      "grad_norm": 1.621195912361145,
      "learning_rate": 0.000152646404744255,
      "loss": 1.2555,
      "step": 1605
    },
    {
      "epoch": 0.23792592592592593,
      "grad_norm": 1.1727124452590942,
      "learning_rate": 0.00015261675315048184,
      "loss": 1.0607,
      "step": 1606
    },
    {
      "epoch": 0.23807407407407408,
      "grad_norm": 2.3401219844818115,
      "learning_rate": 0.0001525871015567087,
      "loss": 1.1388,
      "step": 1607
    },
    {
      "epoch": 0.23822222222222222,
      "grad_norm": 1.2623165845870972,
      "learning_rate": 0.0001525574499629355,
      "loss": 0.8555,
      "step": 1608
    },
    {
      "epoch": 0.23837037037037037,
      "grad_norm": 1.048319935798645,
      "learning_rate": 0.00015252779836916235,
      "loss": 1.0111,
      "step": 1609
    },
    {
      "epoch": 0.23851851851851852,
      "grad_norm": 2.189643383026123,
      "learning_rate": 0.0001524981467753892,
      "loss": 1.0949,
      "step": 1610
    },
    {
      "epoch": 0.23866666666666667,
      "grad_norm": 3.7101433277130127,
      "learning_rate": 0.000152468495181616,
      "loss": 1.3376,
      "step": 1611
    },
    {
      "epoch": 0.2388148148148148,
      "grad_norm": 1.1837656497955322,
      "learning_rate": 0.00015243884358784286,
      "loss": 0.9051,
      "step": 1612
    },
    {
      "epoch": 0.23896296296296296,
      "grad_norm": 1.5767087936401367,
      "learning_rate": 0.0001524091919940697,
      "loss": 1.1235,
      "step": 1613
    },
    {
      "epoch": 0.2391111111111111,
      "grad_norm": 1.9962878227233887,
      "learning_rate": 0.00015237954040029652,
      "loss": 1.2182,
      "step": 1614
    },
    {
      "epoch": 0.23925925925925925,
      "grad_norm": 1.3688435554504395,
      "learning_rate": 0.00015234988880652336,
      "loss": 0.973,
      "step": 1615
    },
    {
      "epoch": 0.2394074074074074,
      "grad_norm": 1.3422621488571167,
      "learning_rate": 0.0001523202372127502,
      "loss": 1.266,
      "step": 1616
    },
    {
      "epoch": 0.23955555555555555,
      "grad_norm": 1.719692587852478,
      "learning_rate": 0.00015229058561897702,
      "loss": 1.3915,
      "step": 1617
    },
    {
      "epoch": 0.2397037037037037,
      "grad_norm": 1.1138745546340942,
      "learning_rate": 0.00015226093402520387,
      "loss": 0.8391,
      "step": 1618
    },
    {
      "epoch": 0.23985185185185184,
      "grad_norm": 1.1409568786621094,
      "learning_rate": 0.0001522312824314307,
      "loss": 1.0422,
      "step": 1619
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4203941822052002,
      "learning_rate": 0.00015220163083765753,
      "loss": 0.9852,
      "step": 1620
    },
    {
      "epoch": 0.24014814814814814,
      "grad_norm": 1.5709267854690552,
      "learning_rate": 0.00015217197924388435,
      "loss": 1.1323,
      "step": 1621
    },
    {
      "epoch": 0.24029629629629629,
      "grad_norm": 1.2581415176391602,
      "learning_rate": 0.0001521423276501112,
      "loss": 1.2246,
      "step": 1622
    },
    {
      "epoch": 0.24044444444444443,
      "grad_norm": 1.197487711906433,
      "learning_rate": 0.00015211267605633804,
      "loss": 1.2816,
      "step": 1623
    },
    {
      "epoch": 0.24059259259259258,
      "grad_norm": 1.5059443712234497,
      "learning_rate": 0.00015208302446256485,
      "loss": 1.2273,
      "step": 1624
    },
    {
      "epoch": 0.24074074074074073,
      "grad_norm": 1.7525125741958618,
      "learning_rate": 0.0001520533728687917,
      "loss": 0.9537,
      "step": 1625
    },
    {
      "epoch": 0.2408888888888889,
      "grad_norm": 1.6179605722427368,
      "learning_rate": 0.00015202372127501854,
      "loss": 1.276,
      "step": 1626
    },
    {
      "epoch": 0.24103703703703705,
      "grad_norm": 1.1902782917022705,
      "learning_rate": 0.00015199406968124536,
      "loss": 0.9585,
      "step": 1627
    },
    {
      "epoch": 0.2411851851851852,
      "grad_norm": 1.3122388124465942,
      "learning_rate": 0.0001519644180874722,
      "loss": 1.3009,
      "step": 1628
    },
    {
      "epoch": 0.24133333333333334,
      "grad_norm": 1.5132875442504883,
      "learning_rate": 0.00015193476649369905,
      "loss": 1.1741,
      "step": 1629
    },
    {
      "epoch": 0.2414814814814815,
      "grad_norm": 1.6310935020446777,
      "learning_rate": 0.00015190511489992587,
      "loss": 1.3513,
      "step": 1630
    },
    {
      "epoch": 0.24162962962962964,
      "grad_norm": 1.2689038515090942,
      "learning_rate": 0.0001518754633061527,
      "loss": 1.022,
      "step": 1631
    },
    {
      "epoch": 0.24177777777777779,
      "grad_norm": 6.270352363586426,
      "learning_rate": 0.00015184581171237956,
      "loss": 1.2905,
      "step": 1632
    },
    {
      "epoch": 0.24192592592592593,
      "grad_norm": 4.507718086242676,
      "learning_rate": 0.00015181616011860637,
      "loss": 1.1989,
      "step": 1633
    },
    {
      "epoch": 0.24207407407407408,
      "grad_norm": 1.1099902391433716,
      "learning_rate": 0.00015178650852483322,
      "loss": 1.2158,
      "step": 1634
    },
    {
      "epoch": 0.24222222222222223,
      "grad_norm": 2.4325876235961914,
      "learning_rate": 0.00015175685693106006,
      "loss": 0.954,
      "step": 1635
    },
    {
      "epoch": 0.24237037037037037,
      "grad_norm": 3.1827144622802734,
      "learning_rate": 0.00015172720533728688,
      "loss": 1.3827,
      "step": 1636
    },
    {
      "epoch": 0.24251851851851852,
      "grad_norm": 1.328933835029602,
      "learning_rate": 0.00015169755374351372,
      "loss": 1.0837,
      "step": 1637
    },
    {
      "epoch": 0.24266666666666667,
      "grad_norm": 1.7154287099838257,
      "learning_rate": 0.00015166790214974057,
      "loss": 1.1321,
      "step": 1638
    },
    {
      "epoch": 0.24281481481481482,
      "grad_norm": 2.116061210632324,
      "learning_rate": 0.00015163825055596739,
      "loss": 1.0563,
      "step": 1639
    },
    {
      "epoch": 0.24296296296296296,
      "grad_norm": 1.2728734016418457,
      "learning_rate": 0.0001516085989621942,
      "loss": 1.1292,
      "step": 1640
    },
    {
      "epoch": 0.2431111111111111,
      "grad_norm": 1.740496039390564,
      "learning_rate": 0.00015157894736842108,
      "loss": 1.144,
      "step": 1641
    },
    {
      "epoch": 0.24325925925925926,
      "grad_norm": 1.2164138555526733,
      "learning_rate": 0.0001515492957746479,
      "loss": 0.9607,
      "step": 1642
    },
    {
      "epoch": 0.2434074074074074,
      "grad_norm": 1.3772945404052734,
      "learning_rate": 0.0001515196441808747,
      "loss": 1.204,
      "step": 1643
    },
    {
      "epoch": 0.24355555555555555,
      "grad_norm": 2.048625946044922,
      "learning_rate": 0.00015148999258710158,
      "loss": 1.2636,
      "step": 1644
    },
    {
      "epoch": 0.2437037037037037,
      "grad_norm": 1.2583869695663452,
      "learning_rate": 0.0001514603409933284,
      "loss": 0.97,
      "step": 1645
    },
    {
      "epoch": 0.24385185185185185,
      "grad_norm": 1.6000150442123413,
      "learning_rate": 0.00015143068939955522,
      "loss": 1.3946,
      "step": 1646
    },
    {
      "epoch": 0.244,
      "grad_norm": 1.0762135982513428,
      "learning_rate": 0.0001514010378057821,
      "loss": 1.0259,
      "step": 1647
    },
    {
      "epoch": 0.24414814814814814,
      "grad_norm": 1.103447675704956,
      "learning_rate": 0.0001513713862120089,
      "loss": 1.0098,
      "step": 1648
    },
    {
      "epoch": 0.2442962962962963,
      "grad_norm": 1.3879259824752808,
      "learning_rate": 0.00015134173461823572,
      "loss": 0.9891,
      "step": 1649
    },
    {
      "epoch": 0.24444444444444444,
      "grad_norm": 1.2222763299942017,
      "learning_rate": 0.0001513120830244626,
      "loss": 1.0233,
      "step": 1650
    },
    {
      "epoch": 0.24459259259259258,
      "grad_norm": 2.4225425720214844,
      "learning_rate": 0.0001512824314306894,
      "loss": 1.0827,
      "step": 1651
    },
    {
      "epoch": 0.24474074074074073,
      "grad_norm": 1.1073728799819946,
      "learning_rate": 0.00015125277983691623,
      "loss": 1.1398,
      "step": 1652
    },
    {
      "epoch": 0.24488888888888888,
      "grad_norm": 1.8750205039978027,
      "learning_rate": 0.00015122312824314307,
      "loss": 1.0011,
      "step": 1653
    },
    {
      "epoch": 0.24503703703703703,
      "grad_norm": 1.8179675340652466,
      "learning_rate": 0.00015119347664936992,
      "loss": 1.2671,
      "step": 1654
    },
    {
      "epoch": 0.24518518518518517,
      "grad_norm": 11.022109031677246,
      "learning_rate": 0.00015116382505559673,
      "loss": 1.124,
      "step": 1655
    },
    {
      "epoch": 0.24533333333333332,
      "grad_norm": 1.687408208847046,
      "learning_rate": 0.00015113417346182358,
      "loss": 1.1028,
      "step": 1656
    },
    {
      "epoch": 0.2454814814814815,
      "grad_norm": 1.3397771120071411,
      "learning_rate": 0.00015110452186805042,
      "loss": 1.2922,
      "step": 1657
    },
    {
      "epoch": 0.24562962962962964,
      "grad_norm": 1.9326858520507812,
      "learning_rate": 0.00015107487027427724,
      "loss": 1.0396,
      "step": 1658
    },
    {
      "epoch": 0.2457777777777778,
      "grad_norm": 2.765787124633789,
      "learning_rate": 0.00015104521868050409,
      "loss": 1.1338,
      "step": 1659
    },
    {
      "epoch": 0.24592592592592594,
      "grad_norm": 1.0958811044692993,
      "learning_rate": 0.00015101556708673093,
      "loss": 0.9947,
      "step": 1660
    },
    {
      "epoch": 0.24607407407407408,
      "grad_norm": 2.3440728187561035,
      "learning_rate": 0.00015098591549295775,
      "loss": 1.2989,
      "step": 1661
    },
    {
      "epoch": 0.24622222222222223,
      "grad_norm": 1.3007993698120117,
      "learning_rate": 0.0001509562638991846,
      "loss": 1.357,
      "step": 1662
    },
    {
      "epoch": 0.24637037037037038,
      "grad_norm": 1.1814420223236084,
      "learning_rate": 0.00015092661230541144,
      "loss": 1.417,
      "step": 1663
    },
    {
      "epoch": 0.24651851851851853,
      "grad_norm": 1.2551305294036865,
      "learning_rate": 0.00015089696071163825,
      "loss": 1.2175,
      "step": 1664
    },
    {
      "epoch": 0.24666666666666667,
      "grad_norm": 1.9871037006378174,
      "learning_rate": 0.0001508673091178651,
      "loss": 1.1472,
      "step": 1665
    },
    {
      "epoch": 0.24681481481481482,
      "grad_norm": 1.1334728002548218,
      "learning_rate": 0.00015083765752409194,
      "loss": 1.0093,
      "step": 1666
    },
    {
      "epoch": 0.24696296296296297,
      "grad_norm": 1.8311874866485596,
      "learning_rate": 0.00015080800593031876,
      "loss": 1.1317,
      "step": 1667
    },
    {
      "epoch": 0.24711111111111111,
      "grad_norm": 1.4071077108383179,
      "learning_rate": 0.0001507783543365456,
      "loss": 1.0525,
      "step": 1668
    },
    {
      "epoch": 0.24725925925925926,
      "grad_norm": 1.2422730922698975,
      "learning_rate": 0.00015074870274277245,
      "loss": 1.0748,
      "step": 1669
    },
    {
      "epoch": 0.2474074074074074,
      "grad_norm": 1.0326001644134521,
      "learning_rate": 0.00015071905114899927,
      "loss": 1.2629,
      "step": 1670
    },
    {
      "epoch": 0.24755555555555556,
      "grad_norm": 1.173397183418274,
      "learning_rate": 0.00015068939955522608,
      "loss": 1.2473,
      "step": 1671
    },
    {
      "epoch": 0.2477037037037037,
      "grad_norm": 0.9199334979057312,
      "learning_rate": 0.00015065974796145296,
      "loss": 0.9084,
      "step": 1672
    },
    {
      "epoch": 0.24785185185185185,
      "grad_norm": 2.1282355785369873,
      "learning_rate": 0.00015063009636767977,
      "loss": 1.0612,
      "step": 1673
    },
    {
      "epoch": 0.248,
      "grad_norm": 1.5971500873565674,
      "learning_rate": 0.0001506004447739066,
      "loss": 1.1243,
      "step": 1674
    },
    {
      "epoch": 0.24814814814814815,
      "grad_norm": 1.3204591274261475,
      "learning_rate": 0.00015057079318013343,
      "loss": 1.1751,
      "step": 1675
    },
    {
      "epoch": 0.2482962962962963,
      "grad_norm": 1.365966558456421,
      "learning_rate": 0.00015054114158636028,
      "loss": 1.2581,
      "step": 1676
    },
    {
      "epoch": 0.24844444444444444,
      "grad_norm": 2.5307705402374268,
      "learning_rate": 0.0001505114899925871,
      "loss": 1.0367,
      "step": 1677
    },
    {
      "epoch": 0.2485925925925926,
      "grad_norm": 1.1115132570266724,
      "learning_rate": 0.00015048183839881394,
      "loss": 1.0591,
      "step": 1678
    },
    {
      "epoch": 0.24874074074074073,
      "grad_norm": 1.1425161361694336,
      "learning_rate": 0.00015045218680504079,
      "loss": 0.9711,
      "step": 1679
    },
    {
      "epoch": 0.24888888888888888,
      "grad_norm": 0.990850031375885,
      "learning_rate": 0.0001504225352112676,
      "loss": 1.0807,
      "step": 1680
    },
    {
      "epoch": 0.24903703703703703,
      "grad_norm": 1.6098432540893555,
      "learning_rate": 0.00015039288361749445,
      "loss": 0.9325,
      "step": 1681
    },
    {
      "epoch": 0.24918518518518518,
      "grad_norm": 2.343644857406616,
      "learning_rate": 0.0001503632320237213,
      "loss": 1.0391,
      "step": 1682
    },
    {
      "epoch": 0.24933333333333332,
      "grad_norm": 1.5114959478378296,
      "learning_rate": 0.0001503335804299481,
      "loss": 1.3867,
      "step": 1683
    },
    {
      "epoch": 0.24948148148148147,
      "grad_norm": 1.31886887550354,
      "learning_rate": 0.00015030392883617495,
      "loss": 1.1278,
      "step": 1684
    },
    {
      "epoch": 0.24962962962962962,
      "grad_norm": 1.9786417484283447,
      "learning_rate": 0.0001502742772424018,
      "loss": 1.4276,
      "step": 1685
    },
    {
      "epoch": 0.24977777777777777,
      "grad_norm": 1.586955189704895,
      "learning_rate": 0.00015024462564862861,
      "loss": 1.1962,
      "step": 1686
    },
    {
      "epoch": 0.2499259259259259,
      "grad_norm": 1.065201759338379,
      "learning_rate": 0.00015021497405485546,
      "loss": 1.0395,
      "step": 1687
    },
    {
      "epoch": 0.25007407407407406,
      "grad_norm": 2.2416906356811523,
      "learning_rate": 0.0001501853224610823,
      "loss": 0.9971,
      "step": 1688
    },
    {
      "epoch": 0.25022222222222223,
      "grad_norm": 1.2670091390609741,
      "learning_rate": 0.00015015567086730912,
      "loss": 1.0083,
      "step": 1689
    },
    {
      "epoch": 0.25037037037037035,
      "grad_norm": 1.256080150604248,
      "learning_rate": 0.00015012601927353597,
      "loss": 1.0631,
      "step": 1690
    },
    {
      "epoch": 0.25051851851851853,
      "grad_norm": 1.2797483205795288,
      "learning_rate": 0.00015009636767976278,
      "loss": 1.0362,
      "step": 1691
    },
    {
      "epoch": 0.25066666666666665,
      "grad_norm": 1.3483778238296509,
      "learning_rate": 0.00015006671608598963,
      "loss": 1.0629,
      "step": 1692
    },
    {
      "epoch": 0.2508148148148148,
      "grad_norm": 1.6776374578475952,
      "learning_rate": 0.00015003706449221647,
      "loss": 0.9733,
      "step": 1693
    },
    {
      "epoch": 0.25096296296296294,
      "grad_norm": 1.5913230180740356,
      "learning_rate": 0.0001500074128984433,
      "loss": 1.2711,
      "step": 1694
    },
    {
      "epoch": 0.2511111111111111,
      "grad_norm": 1.493897557258606,
      "learning_rate": 0.00014997776130467013,
      "loss": 1.2509,
      "step": 1695
    },
    {
      "epoch": 0.25125925925925924,
      "grad_norm": 1.1311514377593994,
      "learning_rate": 0.00014994810971089698,
      "loss": 1.0818,
      "step": 1696
    },
    {
      "epoch": 0.2514074074074074,
      "grad_norm": 1.299396276473999,
      "learning_rate": 0.0001499184581171238,
      "loss": 1.0392,
      "step": 1697
    },
    {
      "epoch": 0.25155555555555553,
      "grad_norm": 1.3660831451416016,
      "learning_rate": 0.00014988880652335064,
      "loss": 1.0738,
      "step": 1698
    },
    {
      "epoch": 0.2517037037037037,
      "grad_norm": 2.1091654300689697,
      "learning_rate": 0.00014985915492957748,
      "loss": 1.1295,
      "step": 1699
    },
    {
      "epoch": 0.2518518518518518,
      "grad_norm": 1.4837119579315186,
      "learning_rate": 0.0001498295033358043,
      "loss": 1.3783,
      "step": 1700
    },
    {
      "epoch": 0.252,
      "grad_norm": 1.9091229438781738,
      "learning_rate": 0.00014979985174203115,
      "loss": 1.0157,
      "step": 1701
    },
    {
      "epoch": 0.2521481481481481,
      "grad_norm": 1.6074457168579102,
      "learning_rate": 0.000149770200148258,
      "loss": 1.1333,
      "step": 1702
    },
    {
      "epoch": 0.2522962962962963,
      "grad_norm": 1.9816997051239014,
      "learning_rate": 0.0001497405485544848,
      "loss": 1.1686,
      "step": 1703
    },
    {
      "epoch": 0.25244444444444447,
      "grad_norm": 1.0871987342834473,
      "learning_rate": 0.00014971089696071165,
      "loss": 1.1539,
      "step": 1704
    },
    {
      "epoch": 0.2525925925925926,
      "grad_norm": 1.8652485609054565,
      "learning_rate": 0.00014968124536693847,
      "loss": 1.2676,
      "step": 1705
    },
    {
      "epoch": 0.25274074074074077,
      "grad_norm": 1.4037657976150513,
      "learning_rate": 0.00014965159377316531,
      "loss": 0.8503,
      "step": 1706
    },
    {
      "epoch": 0.2528888888888889,
      "grad_norm": 1.6149505376815796,
      "learning_rate": 0.00014962194217939216,
      "loss": 1.3148,
      "step": 1707
    },
    {
      "epoch": 0.25303703703703706,
      "grad_norm": 2.128265142440796,
      "learning_rate": 0.00014959229058561898,
      "loss": 1.3584,
      "step": 1708
    },
    {
      "epoch": 0.2531851851851852,
      "grad_norm": 3.4360711574554443,
      "learning_rate": 0.00014956263899184582,
      "loss": 1.2946,
      "step": 1709
    },
    {
      "epoch": 0.25333333333333335,
      "grad_norm": 1.0533114671707153,
      "learning_rate": 0.00014953298739807264,
      "loss": 0.9652,
      "step": 1710
    },
    {
      "epoch": 0.2534814814814815,
      "grad_norm": 2.195936441421509,
      "learning_rate": 0.00014950333580429948,
      "loss": 1.0955,
      "step": 1711
    },
    {
      "epoch": 0.25362962962962965,
      "grad_norm": 1.2903773784637451,
      "learning_rate": 0.00014947368421052633,
      "loss": 1.1884,
      "step": 1712
    },
    {
      "epoch": 0.25377777777777777,
      "grad_norm": 2.6290183067321777,
      "learning_rate": 0.00014944403261675314,
      "loss": 1.0086,
      "step": 1713
    },
    {
      "epoch": 0.25392592592592594,
      "grad_norm": 1.4115326404571533,
      "learning_rate": 0.00014941438102298,
      "loss": 0.9618,
      "step": 1714
    },
    {
      "epoch": 0.25407407407407406,
      "grad_norm": 2.979445219039917,
      "learning_rate": 0.00014938472942920683,
      "loss": 1.3909,
      "step": 1715
    },
    {
      "epoch": 0.25422222222222224,
      "grad_norm": 1.03138267993927,
      "learning_rate": 0.00014935507783543365,
      "loss": 1.0494,
      "step": 1716
    },
    {
      "epoch": 0.25437037037037036,
      "grad_norm": 1.688761591911316,
      "learning_rate": 0.0001493254262416605,
      "loss": 1.0728,
      "step": 1717
    },
    {
      "epoch": 0.25451851851851853,
      "grad_norm": 1.0532830953598022,
      "learning_rate": 0.00014929577464788734,
      "loss": 1.0128,
      "step": 1718
    },
    {
      "epoch": 0.25466666666666665,
      "grad_norm": 2.199781656265259,
      "learning_rate": 0.00014926612305411416,
      "loss": 1.0623,
      "step": 1719
    },
    {
      "epoch": 0.2548148148148148,
      "grad_norm": 1.4258421659469604,
      "learning_rate": 0.000149236471460341,
      "loss": 1.1596,
      "step": 1720
    },
    {
      "epoch": 0.25496296296296295,
      "grad_norm": 0.9015241265296936,
      "learning_rate": 0.00014920681986656785,
      "loss": 1.1419,
      "step": 1721
    },
    {
      "epoch": 0.2551111111111111,
      "grad_norm": 1.3522428274154663,
      "learning_rate": 0.00014917716827279466,
      "loss": 1.2893,
      "step": 1722
    },
    {
      "epoch": 0.25525925925925924,
      "grad_norm": 1.257901906967163,
      "learning_rate": 0.0001491475166790215,
      "loss": 1.0008,
      "step": 1723
    },
    {
      "epoch": 0.2554074074074074,
      "grad_norm": 1.484906554222107,
      "learning_rate": 0.00014911786508524835,
      "loss": 1.2028,
      "step": 1724
    },
    {
      "epoch": 0.25555555555555554,
      "grad_norm": 1.1615350246429443,
      "learning_rate": 0.00014908821349147517,
      "loss": 0.8992,
      "step": 1725
    },
    {
      "epoch": 0.2557037037037037,
      "grad_norm": 2.3160672187805176,
      "learning_rate": 0.000149058561897702,
      "loss": 1.2325,
      "step": 1726
    },
    {
      "epoch": 0.25585185185185183,
      "grad_norm": 3.3724308013916016,
      "learning_rate": 0.00014902891030392886,
      "loss": 1.1052,
      "step": 1727
    },
    {
      "epoch": 0.256,
      "grad_norm": 1.7354025840759277,
      "learning_rate": 0.00014899925871015568,
      "loss": 1.4236,
      "step": 1728
    },
    {
      "epoch": 0.2561481481481481,
      "grad_norm": 1.625180721282959,
      "learning_rate": 0.0001489696071163825,
      "loss": 1.2491,
      "step": 1729
    },
    {
      "epoch": 0.2562962962962963,
      "grad_norm": 2.191547155380249,
      "learning_rate": 0.00014893995552260936,
      "loss": 1.0228,
      "step": 1730
    },
    {
      "epoch": 0.2564444444444444,
      "grad_norm": 1.3447610139846802,
      "learning_rate": 0.00014891030392883618,
      "loss": 1.0485,
      "step": 1731
    },
    {
      "epoch": 0.2565925925925926,
      "grad_norm": 1.1098294258117676,
      "learning_rate": 0.000148880652335063,
      "loss": 0.9501,
      "step": 1732
    },
    {
      "epoch": 0.2567407407407407,
      "grad_norm": 1.87017822265625,
      "learning_rate": 0.00014885100074128987,
      "loss": 1.0179,
      "step": 1733
    },
    {
      "epoch": 0.2568888888888889,
      "grad_norm": 2.0555732250213623,
      "learning_rate": 0.0001488213491475167,
      "loss": 0.9778,
      "step": 1734
    },
    {
      "epoch": 0.25703703703703706,
      "grad_norm": 0.9215679168701172,
      "learning_rate": 0.0001487916975537435,
      "loss": 0.9877,
      "step": 1735
    },
    {
      "epoch": 0.2571851851851852,
      "grad_norm": 2.807542562484741,
      "learning_rate": 0.00014876204595997038,
      "loss": 1.2137,
      "step": 1736
    },
    {
      "epoch": 0.25733333333333336,
      "grad_norm": 1.4949638843536377,
      "learning_rate": 0.0001487323943661972,
      "loss": 1.3621,
      "step": 1737
    },
    {
      "epoch": 0.2574814814814815,
      "grad_norm": 1.8658875226974487,
      "learning_rate": 0.000148702742772424,
      "loss": 0.9667,
      "step": 1738
    },
    {
      "epoch": 0.25762962962962965,
      "grad_norm": 1.7364095449447632,
      "learning_rate": 0.00014867309117865086,
      "loss": 1.1931,
      "step": 1739
    },
    {
      "epoch": 0.2577777777777778,
      "grad_norm": 1.4463858604431152,
      "learning_rate": 0.0001486434395848777,
      "loss": 0.9939,
      "step": 1740
    },
    {
      "epoch": 0.25792592592592595,
      "grad_norm": 1.1945314407348633,
      "learning_rate": 0.00014861378799110452,
      "loss": 1.1426,
      "step": 1741
    },
    {
      "epoch": 0.25807407407407407,
      "grad_norm": 1.6998885869979858,
      "learning_rate": 0.00014858413639733136,
      "loss": 1.2283,
      "step": 1742
    },
    {
      "epoch": 0.25822222222222224,
      "grad_norm": 1.1630172729492188,
      "learning_rate": 0.0001485544848035582,
      "loss": 0.9967,
      "step": 1743
    },
    {
      "epoch": 0.25837037037037036,
      "grad_norm": 2.042039155960083,
      "learning_rate": 0.00014852483320978502,
      "loss": 1.1009,
      "step": 1744
    },
    {
      "epoch": 0.25851851851851854,
      "grad_norm": 1.3501461744308472,
      "learning_rate": 0.00014849518161601187,
      "loss": 1.5284,
      "step": 1745
    },
    {
      "epoch": 0.25866666666666666,
      "grad_norm": 1.1120213270187378,
      "learning_rate": 0.0001484655300222387,
      "loss": 0.8859,
      "step": 1746
    },
    {
      "epoch": 0.25881481481481483,
      "grad_norm": 1.3494079113006592,
      "learning_rate": 0.00014843587842846553,
      "loss": 1.2178,
      "step": 1747
    },
    {
      "epoch": 0.25896296296296295,
      "grad_norm": 1.2317906618118286,
      "learning_rate": 0.00014840622683469238,
      "loss": 1.0457,
      "step": 1748
    },
    {
      "epoch": 0.2591111111111111,
      "grad_norm": 1.3726729154586792,
      "learning_rate": 0.00014837657524091922,
      "loss": 0.9816,
      "step": 1749
    },
    {
      "epoch": 0.25925925925925924,
      "grad_norm": 1.264622688293457,
      "learning_rate": 0.00014834692364714604,
      "loss": 1.1851,
      "step": 1750
    },
    {
      "epoch": 0.2594074074074074,
      "grad_norm": 1.4444366693496704,
      "learning_rate": 0.00014831727205337288,
      "loss": 1.1572,
      "step": 1751
    },
    {
      "epoch": 0.25955555555555554,
      "grad_norm": 1.4414944648742676,
      "learning_rate": 0.00014828762045959973,
      "loss": 1.2479,
      "step": 1752
    },
    {
      "epoch": 0.2597037037037037,
      "grad_norm": 2.290572166442871,
      "learning_rate": 0.00014825796886582654,
      "loss": 1.0049,
      "step": 1753
    },
    {
      "epoch": 0.25985185185185183,
      "grad_norm": 1.5054208040237427,
      "learning_rate": 0.0001482283172720534,
      "loss": 1.1638,
      "step": 1754
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7715121507644653,
      "learning_rate": 0.00014819866567828023,
      "loss": 1.165,
      "step": 1755
    },
    {
      "epoch": 0.26014814814814813,
      "grad_norm": 1.6338270902633667,
      "learning_rate": 0.00014816901408450705,
      "loss": 1.2188,
      "step": 1756
    },
    {
      "epoch": 0.2602962962962963,
      "grad_norm": 1.2027145624160767,
      "learning_rate": 0.00014813936249073387,
      "loss": 1.0042,
      "step": 1757
    },
    {
      "epoch": 0.2604444444444444,
      "grad_norm": 1.4290188550949097,
      "learning_rate": 0.00014810971089696074,
      "loss": 1.0109,
      "step": 1758
    },
    {
      "epoch": 0.2605925925925926,
      "grad_norm": 1.5197229385375977,
      "learning_rate": 0.00014808005930318756,
      "loss": 1.1907,
      "step": 1759
    },
    {
      "epoch": 0.2607407407407407,
      "grad_norm": 1.4386405944824219,
      "learning_rate": 0.00014805040770941437,
      "loss": 1.0953,
      "step": 1760
    },
    {
      "epoch": 0.2608888888888889,
      "grad_norm": 1.638106107711792,
      "learning_rate": 0.00014802075611564122,
      "loss": 1.1874,
      "step": 1761
    },
    {
      "epoch": 0.261037037037037,
      "grad_norm": 1.4412510395050049,
      "learning_rate": 0.00014799110452186806,
      "loss": 0.8044,
      "step": 1762
    },
    {
      "epoch": 0.2611851851851852,
      "grad_norm": 0.9868293404579163,
      "learning_rate": 0.00014796145292809488,
      "loss": 0.8231,
      "step": 1763
    },
    {
      "epoch": 0.2613333333333333,
      "grad_norm": 1.248249888420105,
      "learning_rate": 0.00014793180133432172,
      "loss": 0.9181,
      "step": 1764
    },
    {
      "epoch": 0.2614814814814815,
      "grad_norm": 1.0567678213119507,
      "learning_rate": 0.00014790214974054857,
      "loss": 1.0008,
      "step": 1765
    },
    {
      "epoch": 0.26162962962962966,
      "grad_norm": 2.1666345596313477,
      "learning_rate": 0.00014787249814677539,
      "loss": 1.0072,
      "step": 1766
    },
    {
      "epoch": 0.2617777777777778,
      "grad_norm": 1.361720085144043,
      "learning_rate": 0.00014784284655300223,
      "loss": 1.1215,
      "step": 1767
    },
    {
      "epoch": 0.26192592592592595,
      "grad_norm": 1.513120174407959,
      "learning_rate": 0.00014781319495922907,
      "loss": 1.1039,
      "step": 1768
    },
    {
      "epoch": 0.26207407407407407,
      "grad_norm": 1.3253802061080933,
      "learning_rate": 0.0001477835433654559,
      "loss": 1.0483,
      "step": 1769
    },
    {
      "epoch": 0.26222222222222225,
      "grad_norm": 1.3114228248596191,
      "learning_rate": 0.00014775389177168274,
      "loss": 0.8184,
      "step": 1770
    },
    {
      "epoch": 0.26237037037037036,
      "grad_norm": 1.4233734607696533,
      "learning_rate": 0.00014772424017790958,
      "loss": 1.2382,
      "step": 1771
    },
    {
      "epoch": 0.26251851851851854,
      "grad_norm": 1.3835053443908691,
      "learning_rate": 0.0001476945885841364,
      "loss": 1.0324,
      "step": 1772
    },
    {
      "epoch": 0.26266666666666666,
      "grad_norm": 1.469097375869751,
      "learning_rate": 0.00014766493699036324,
      "loss": 1.3254,
      "step": 1773
    },
    {
      "epoch": 0.26281481481481483,
      "grad_norm": 2.058704137802124,
      "learning_rate": 0.0001476352853965901,
      "loss": 1.0667,
      "step": 1774
    },
    {
      "epoch": 0.26296296296296295,
      "grad_norm": 1.145105004310608,
      "learning_rate": 0.0001476056338028169,
      "loss": 1.0848,
      "step": 1775
    },
    {
      "epoch": 0.26311111111111113,
      "grad_norm": 0.9493306875228882,
      "learning_rate": 0.00014757598220904375,
      "loss": 0.8814,
      "step": 1776
    },
    {
      "epoch": 0.26325925925925925,
      "grad_norm": 1.336127519607544,
      "learning_rate": 0.00014754633061527057,
      "loss": 0.9214,
      "step": 1777
    },
    {
      "epoch": 0.2634074074074074,
      "grad_norm": 1.348656177520752,
      "learning_rate": 0.0001475166790214974,
      "loss": 1.179,
      "step": 1778
    },
    {
      "epoch": 0.26355555555555554,
      "grad_norm": 1.284738302230835,
      "learning_rate": 0.00014748702742772426,
      "loss": 1.1527,
      "step": 1779
    },
    {
      "epoch": 0.2637037037037037,
      "grad_norm": 1.421317219734192,
      "learning_rate": 0.00014745737583395107,
      "loss": 1.2755,
      "step": 1780
    },
    {
      "epoch": 0.26385185185185184,
      "grad_norm": 1.293117880821228,
      "learning_rate": 0.00014742772424017792,
      "loss": 0.9949,
      "step": 1781
    },
    {
      "epoch": 0.264,
      "grad_norm": 1.6930663585662842,
      "learning_rate": 0.00014739807264640476,
      "loss": 1.6129,
      "step": 1782
    },
    {
      "epoch": 0.26414814814814813,
      "grad_norm": 1.8110852241516113,
      "learning_rate": 0.00014736842105263158,
      "loss": 1.1731,
      "step": 1783
    },
    {
      "epoch": 0.2642962962962963,
      "grad_norm": 1.2088935375213623,
      "learning_rate": 0.00014733876945885842,
      "loss": 0.9807,
      "step": 1784
    },
    {
      "epoch": 0.2644444444444444,
      "grad_norm": 1.3096468448638916,
      "learning_rate": 0.00014730911786508527,
      "loss": 1.2637,
      "step": 1785
    },
    {
      "epoch": 0.2645925925925926,
      "grad_norm": 1.1938130855560303,
      "learning_rate": 0.00014727946627131209,
      "loss": 0.904,
      "step": 1786
    },
    {
      "epoch": 0.2647407407407407,
      "grad_norm": 2.0407581329345703,
      "learning_rate": 0.00014724981467753893,
      "loss": 1.0874,
      "step": 1787
    },
    {
      "epoch": 0.2648888888888889,
      "grad_norm": 1.9213590621948242,
      "learning_rate": 0.00014722016308376577,
      "loss": 0.9534,
      "step": 1788
    },
    {
      "epoch": 0.265037037037037,
      "grad_norm": 1.3738536834716797,
      "learning_rate": 0.0001471905114899926,
      "loss": 1.2112,
      "step": 1789
    },
    {
      "epoch": 0.2651851851851852,
      "grad_norm": 1.7585114240646362,
      "learning_rate": 0.00014716085989621944,
      "loss": 1.1066,
      "step": 1790
    },
    {
      "epoch": 0.2653333333333333,
      "grad_norm": 1.6683770418167114,
      "learning_rate": 0.00014713120830244625,
      "loss": 1.1725,
      "step": 1791
    },
    {
      "epoch": 0.2654814814814815,
      "grad_norm": 1.3317421674728394,
      "learning_rate": 0.0001471015567086731,
      "loss": 1.2168,
      "step": 1792
    },
    {
      "epoch": 0.2656296296296296,
      "grad_norm": 1.2089072465896606,
      "learning_rate": 0.00014707190511489994,
      "loss": 1.3568,
      "step": 1793
    },
    {
      "epoch": 0.2657777777777778,
      "grad_norm": 1.3464851379394531,
      "learning_rate": 0.00014704225352112676,
      "loss": 1.2184,
      "step": 1794
    },
    {
      "epoch": 0.2659259259259259,
      "grad_norm": 1.285253643989563,
      "learning_rate": 0.0001470126019273536,
      "loss": 1.0801,
      "step": 1795
    },
    {
      "epoch": 0.2660740740740741,
      "grad_norm": 1.6146527528762817,
      "learning_rate": 0.00014698295033358042,
      "loss": 1.0191,
      "step": 1796
    },
    {
      "epoch": 0.26622222222222225,
      "grad_norm": 1.29169762134552,
      "learning_rate": 0.00014695329873980727,
      "loss": 0.9172,
      "step": 1797
    },
    {
      "epoch": 0.26637037037037037,
      "grad_norm": 1.3831039667129517,
      "learning_rate": 0.0001469236471460341,
      "loss": 1.1838,
      "step": 1798
    },
    {
      "epoch": 0.26651851851851854,
      "grad_norm": 1.3449994325637817,
      "learning_rate": 0.00014689399555226093,
      "loss": 1.0419,
      "step": 1799
    },
    {
      "epoch": 0.26666666666666666,
      "grad_norm": 2.0927140712738037,
      "learning_rate": 0.00014686434395848777,
      "loss": 1.117,
      "step": 1800
    },
    {
      "epoch": 0.26681481481481484,
      "grad_norm": 1.8944083452224731,
      "learning_rate": 0.00014683469236471462,
      "loss": 1.0172,
      "step": 1801
    },
    {
      "epoch": 0.26696296296296296,
      "grad_norm": 1.5123599767684937,
      "learning_rate": 0.00014680504077094143,
      "loss": 1.2678,
      "step": 1802
    },
    {
      "epoch": 0.26711111111111113,
      "grad_norm": 1.9740763902664185,
      "learning_rate": 0.00014677538917716828,
      "loss": 1.1469,
      "step": 1803
    },
    {
      "epoch": 0.26725925925925925,
      "grad_norm": 1.1890026330947876,
      "learning_rate": 0.00014674573758339512,
      "loss": 0.8493,
      "step": 1804
    },
    {
      "epoch": 0.2674074074074074,
      "grad_norm": 1.6631288528442383,
      "learning_rate": 0.00014671608598962194,
      "loss": 1.3172,
      "step": 1805
    },
    {
      "epoch": 0.26755555555555555,
      "grad_norm": 1.374815583229065,
      "learning_rate": 0.00014668643439584878,
      "loss": 1.1752,
      "step": 1806
    },
    {
      "epoch": 0.2677037037037037,
      "grad_norm": 1.6949161291122437,
      "learning_rate": 0.00014665678280207563,
      "loss": 1.4217,
      "step": 1807
    },
    {
      "epoch": 0.26785185185185184,
      "grad_norm": 1.3431637287139893,
      "learning_rate": 0.00014662713120830245,
      "loss": 1.4953,
      "step": 1808
    },
    {
      "epoch": 0.268,
      "grad_norm": 1.5021950006484985,
      "learning_rate": 0.0001465974796145293,
      "loss": 1.0382,
      "step": 1809
    },
    {
      "epoch": 0.26814814814814814,
      "grad_norm": 2.2632665634155273,
      "learning_rate": 0.00014656782802075614,
      "loss": 1.255,
      "step": 1810
    },
    {
      "epoch": 0.2682962962962963,
      "grad_norm": 1.290856122970581,
      "learning_rate": 0.00014653817642698295,
      "loss": 1.1311,
      "step": 1811
    },
    {
      "epoch": 0.26844444444444443,
      "grad_norm": 1.3417847156524658,
      "learning_rate": 0.00014650852483320977,
      "loss": 1.0782,
      "step": 1812
    },
    {
      "epoch": 0.2685925925925926,
      "grad_norm": 2.0849289894104004,
      "learning_rate": 0.00014647887323943664,
      "loss": 1.1883,
      "step": 1813
    },
    {
      "epoch": 0.2687407407407407,
      "grad_norm": 1.8363046646118164,
      "learning_rate": 0.00014644922164566346,
      "loss": 1.1608,
      "step": 1814
    },
    {
      "epoch": 0.2688888888888889,
      "grad_norm": 2.068054676055908,
      "learning_rate": 0.00014641957005189028,
      "loss": 1.1914,
      "step": 1815
    },
    {
      "epoch": 0.269037037037037,
      "grad_norm": 1.2311770915985107,
      "learning_rate": 0.00014638991845811715,
      "loss": 1.1122,
      "step": 1816
    },
    {
      "epoch": 0.2691851851851852,
      "grad_norm": 1.6212143898010254,
      "learning_rate": 0.00014636026686434397,
      "loss": 1.2723,
      "step": 1817
    },
    {
      "epoch": 0.2693333333333333,
      "grad_norm": 2.1663825511932373,
      "learning_rate": 0.00014633061527057078,
      "loss": 1.226,
      "step": 1818
    },
    {
      "epoch": 0.2694814814814815,
      "grad_norm": 1.633810043334961,
      "learning_rate": 0.00014630096367679765,
      "loss": 1.1611,
      "step": 1819
    },
    {
      "epoch": 0.2696296296296296,
      "grad_norm": 2.2274973392486572,
      "learning_rate": 0.00014627131208302447,
      "loss": 1.0705,
      "step": 1820
    },
    {
      "epoch": 0.2697777777777778,
      "grad_norm": 1.201061487197876,
      "learning_rate": 0.0001462416604892513,
      "loss": 1.0724,
      "step": 1821
    },
    {
      "epoch": 0.2699259259259259,
      "grad_norm": 1.2622113227844238,
      "learning_rate": 0.00014621200889547816,
      "loss": 1.3015,
      "step": 1822
    },
    {
      "epoch": 0.2700740740740741,
      "grad_norm": 1.0760676860809326,
      "learning_rate": 0.00014618235730170498,
      "loss": 1.1231,
      "step": 1823
    },
    {
      "epoch": 0.2702222222222222,
      "grad_norm": 1.277963638305664,
      "learning_rate": 0.0001461527057079318,
      "loss": 1.1237,
      "step": 1824
    },
    {
      "epoch": 0.27037037037037037,
      "grad_norm": 1.220734715461731,
      "learning_rate": 0.00014612305411415864,
      "loss": 0.9188,
      "step": 1825
    },
    {
      "epoch": 0.2705185185185185,
      "grad_norm": 1.4577242136001587,
      "learning_rate": 0.00014609340252038548,
      "loss": 1.1207,
      "step": 1826
    },
    {
      "epoch": 0.27066666666666667,
      "grad_norm": 1.287782073020935,
      "learning_rate": 0.0001460637509266123,
      "loss": 1.0625,
      "step": 1827
    },
    {
      "epoch": 0.27081481481481484,
      "grad_norm": 1.487561821937561,
      "learning_rate": 0.00014603409933283915,
      "loss": 1.0836,
      "step": 1828
    },
    {
      "epoch": 0.27096296296296296,
      "grad_norm": 1.497262716293335,
      "learning_rate": 0.000146004447739066,
      "loss": 1.162,
      "step": 1829
    },
    {
      "epoch": 0.27111111111111114,
      "grad_norm": 1.4409953355789185,
      "learning_rate": 0.0001459747961452928,
      "loss": 0.9221,
      "step": 1830
    },
    {
      "epoch": 0.27125925925925926,
      "grad_norm": 1.2185614109039307,
      "learning_rate": 0.00014594514455151965,
      "loss": 1.2039,
      "step": 1831
    },
    {
      "epoch": 0.27140740740740743,
      "grad_norm": 1.4382011890411377,
      "learning_rate": 0.0001459154929577465,
      "loss": 1.1022,
      "step": 1832
    },
    {
      "epoch": 0.27155555555555555,
      "grad_norm": 1.8076633214950562,
      "learning_rate": 0.00014588584136397331,
      "loss": 1.0575,
      "step": 1833
    },
    {
      "epoch": 0.2717037037037037,
      "grad_norm": 1.1807422637939453,
      "learning_rate": 0.00014585618977020016,
      "loss": 1.1686,
      "step": 1834
    },
    {
      "epoch": 0.27185185185185184,
      "grad_norm": 1.8012399673461914,
      "learning_rate": 0.000145826538176427,
      "loss": 1.1746,
      "step": 1835
    },
    {
      "epoch": 0.272,
      "grad_norm": 3.0142524242401123,
      "learning_rate": 0.00014579688658265382,
      "loss": 1.1084,
      "step": 1836
    },
    {
      "epoch": 0.27214814814814814,
      "grad_norm": 1.2734559774398804,
      "learning_rate": 0.00014576723498888066,
      "loss": 0.9371,
      "step": 1837
    },
    {
      "epoch": 0.2722962962962963,
      "grad_norm": 2.423719882965088,
      "learning_rate": 0.0001457375833951075,
      "loss": 0.9749,
      "step": 1838
    },
    {
      "epoch": 0.27244444444444443,
      "grad_norm": 1.2738714218139648,
      "learning_rate": 0.00014570793180133433,
      "loss": 1.1978,
      "step": 1839
    },
    {
      "epoch": 0.2725925925925926,
      "grad_norm": 1.620475172996521,
      "learning_rate": 0.00014567828020756117,
      "loss": 1.0713,
      "step": 1840
    },
    {
      "epoch": 0.27274074074074073,
      "grad_norm": 1.6548757553100586,
      "learning_rate": 0.00014564862861378802,
      "loss": 1.3495,
      "step": 1841
    },
    {
      "epoch": 0.2728888888888889,
      "grad_norm": 1.3922020196914673,
      "learning_rate": 0.00014561897702001483,
      "loss": 1.0026,
      "step": 1842
    },
    {
      "epoch": 0.273037037037037,
      "grad_norm": 1.3025039434432983,
      "learning_rate": 0.00014558932542624165,
      "loss": 0.8775,
      "step": 1843
    },
    {
      "epoch": 0.2731851851851852,
      "grad_norm": 1.4768999814987183,
      "learning_rate": 0.00014555967383246852,
      "loss": 0.9688,
      "step": 1844
    },
    {
      "epoch": 0.2733333333333333,
      "grad_norm": 1.5161889791488647,
      "learning_rate": 0.00014553002223869534,
      "loss": 1.0882,
      "step": 1845
    },
    {
      "epoch": 0.2734814814814815,
      "grad_norm": 2.0973105430603027,
      "learning_rate": 0.00014550037064492216,
      "loss": 0.9726,
      "step": 1846
    },
    {
      "epoch": 0.2736296296296296,
      "grad_norm": 1.1360061168670654,
      "learning_rate": 0.000145470719051149,
      "loss": 0.9396,
      "step": 1847
    },
    {
      "epoch": 0.2737777777777778,
      "grad_norm": 2.1403005123138428,
      "learning_rate": 0.00014544106745737585,
      "loss": 1.0215,
      "step": 1848
    },
    {
      "epoch": 0.2739259259259259,
      "grad_norm": 2.610644817352295,
      "learning_rate": 0.00014541141586360266,
      "loss": 1.035,
      "step": 1849
    },
    {
      "epoch": 0.2740740740740741,
      "grad_norm": 1.1601502895355225,
      "learning_rate": 0.0001453817642698295,
      "loss": 0.8839,
      "step": 1850
    },
    {
      "epoch": 0.2742222222222222,
      "grad_norm": 1.4491620063781738,
      "learning_rate": 0.00014535211267605635,
      "loss": 1.0072,
      "step": 1851
    },
    {
      "epoch": 0.2743703703703704,
      "grad_norm": 2.7844150066375732,
      "learning_rate": 0.00014532246108228317,
      "loss": 1.0882,
      "step": 1852
    },
    {
      "epoch": 0.2745185185185185,
      "grad_norm": 1.5077942609786987,
      "learning_rate": 0.00014529280948851,
      "loss": 1.0161,
      "step": 1853
    },
    {
      "epoch": 0.27466666666666667,
      "grad_norm": 1.31179940700531,
      "learning_rate": 0.00014526315789473686,
      "loss": 1.1895,
      "step": 1854
    },
    {
      "epoch": 0.2748148148148148,
      "grad_norm": 2.904644012451172,
      "learning_rate": 0.00014523350630096368,
      "loss": 1.1659,
      "step": 1855
    },
    {
      "epoch": 0.27496296296296296,
      "grad_norm": 2.673091411590576,
      "learning_rate": 0.00014520385470719052,
      "loss": 1.1377,
      "step": 1856
    },
    {
      "epoch": 0.2751111111111111,
      "grad_norm": 1.848264455795288,
      "learning_rate": 0.00014517420311341736,
      "loss": 1.1158,
      "step": 1857
    },
    {
      "epoch": 0.27525925925925926,
      "grad_norm": 1.3115473985671997,
      "learning_rate": 0.00014514455151964418,
      "loss": 1.1346,
      "step": 1858
    },
    {
      "epoch": 0.27540740740740743,
      "grad_norm": 1.5505434274673462,
      "learning_rate": 0.00014511489992587103,
      "loss": 1.3049,
      "step": 1859
    },
    {
      "epoch": 0.27555555555555555,
      "grad_norm": 1.2869677543640137,
      "learning_rate": 0.00014508524833209787,
      "loss": 0.9032,
      "step": 1860
    },
    {
      "epoch": 0.27570370370370373,
      "grad_norm": 3.9332666397094727,
      "learning_rate": 0.0001450555967383247,
      "loss": 1.351,
      "step": 1861
    },
    {
      "epoch": 0.27585185185185185,
      "grad_norm": 1.4140563011169434,
      "learning_rate": 0.00014502594514455153,
      "loss": 1.3197,
      "step": 1862
    },
    {
      "epoch": 0.276,
      "grad_norm": 1.3230453729629517,
      "learning_rate": 0.00014499629355077835,
      "loss": 1.0612,
      "step": 1863
    },
    {
      "epoch": 0.27614814814814814,
      "grad_norm": 1.4923207759857178,
      "learning_rate": 0.0001449666419570052,
      "loss": 0.9324,
      "step": 1864
    },
    {
      "epoch": 0.2762962962962963,
      "grad_norm": 1.35109281539917,
      "learning_rate": 0.00014493699036323204,
      "loss": 0.9941,
      "step": 1865
    },
    {
      "epoch": 0.27644444444444444,
      "grad_norm": 1.4291627407073975,
      "learning_rate": 0.00014490733876945886,
      "loss": 0.8801,
      "step": 1866
    },
    {
      "epoch": 0.2765925925925926,
      "grad_norm": 1.297644853591919,
      "learning_rate": 0.0001448776871756857,
      "loss": 0.9117,
      "step": 1867
    },
    {
      "epoch": 0.27674074074074073,
      "grad_norm": 1.4469108581542969,
      "learning_rate": 0.00014484803558191254,
      "loss": 1.1232,
      "step": 1868
    },
    {
      "epoch": 0.2768888888888889,
      "grad_norm": 2.2614119052886963,
      "learning_rate": 0.00014481838398813936,
      "loss": 1.4292,
      "step": 1869
    },
    {
      "epoch": 0.277037037037037,
      "grad_norm": 2.2086384296417236,
      "learning_rate": 0.0001447887323943662,
      "loss": 1.4552,
      "step": 1870
    },
    {
      "epoch": 0.2771851851851852,
      "grad_norm": 1.3284968137741089,
      "learning_rate": 0.00014475908080059305,
      "loss": 1.2932,
      "step": 1871
    },
    {
      "epoch": 0.2773333333333333,
      "grad_norm": 1.6751161813735962,
      "learning_rate": 0.00014472942920681987,
      "loss": 1.1819,
      "step": 1872
    },
    {
      "epoch": 0.2774814814814815,
      "grad_norm": 1.5120177268981934,
      "learning_rate": 0.0001446997776130467,
      "loss": 1.2286,
      "step": 1873
    },
    {
      "epoch": 0.2776296296296296,
      "grad_norm": 1.416845679283142,
      "learning_rate": 0.00014467012601927356,
      "loss": 1.0614,
      "step": 1874
    },
    {
      "epoch": 0.2777777777777778,
      "grad_norm": 1.9702194929122925,
      "learning_rate": 0.00014464047442550037,
      "loss": 1.0974,
      "step": 1875
    },
    {
      "epoch": 0.2779259259259259,
      "grad_norm": 1.1982643604278564,
      "learning_rate": 0.00014461082283172722,
      "loss": 1.3637,
      "step": 1876
    },
    {
      "epoch": 0.2780740740740741,
      "grad_norm": 1.339460849761963,
      "learning_rate": 0.00014458117123795404,
      "loss": 1.0512,
      "step": 1877
    },
    {
      "epoch": 0.2782222222222222,
      "grad_norm": 1.8185973167419434,
      "learning_rate": 0.00014455151964418088,
      "loss": 1.3702,
      "step": 1878
    },
    {
      "epoch": 0.2783703703703704,
      "grad_norm": 1.7184706926345825,
      "learning_rate": 0.00014452186805040773,
      "loss": 1.1731,
      "step": 1879
    },
    {
      "epoch": 0.2785185185185185,
      "grad_norm": 1.1334104537963867,
      "learning_rate": 0.00014449221645663454,
      "loss": 1.0635,
      "step": 1880
    },
    {
      "epoch": 0.2786666666666667,
      "grad_norm": 1.3710155487060547,
      "learning_rate": 0.0001444625648628614,
      "loss": 0.8815,
      "step": 1881
    },
    {
      "epoch": 0.2788148148148148,
      "grad_norm": 3.35996150970459,
      "learning_rate": 0.0001444329132690882,
      "loss": 1.1999,
      "step": 1882
    },
    {
      "epoch": 0.27896296296296297,
      "grad_norm": 1.0989127159118652,
      "learning_rate": 0.00014440326167531505,
      "loss": 1.0851,
      "step": 1883
    },
    {
      "epoch": 0.2791111111111111,
      "grad_norm": 1.127285122871399,
      "learning_rate": 0.0001443736100815419,
      "loss": 1.0653,
      "step": 1884
    },
    {
      "epoch": 0.27925925925925926,
      "grad_norm": 1.6573400497436523,
      "learning_rate": 0.0001443439584877687,
      "loss": 1.1363,
      "step": 1885
    },
    {
      "epoch": 0.2794074074074074,
      "grad_norm": 1.572654128074646,
      "learning_rate": 0.00014431430689399556,
      "loss": 1.0745,
      "step": 1886
    },
    {
      "epoch": 0.27955555555555556,
      "grad_norm": 2.3293049335479736,
      "learning_rate": 0.0001442846553002224,
      "loss": 1.1124,
      "step": 1887
    },
    {
      "epoch": 0.2797037037037037,
      "grad_norm": 2.035825729370117,
      "learning_rate": 0.00014425500370644922,
      "loss": 1.3554,
      "step": 1888
    },
    {
      "epoch": 0.27985185185185185,
      "grad_norm": 2.500138998031616,
      "learning_rate": 0.00014422535211267606,
      "loss": 1.1704,
      "step": 1889
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3954805135726929,
      "learning_rate": 0.0001441957005189029,
      "loss": 1.1847,
      "step": 1890
    },
    {
      "epoch": 0.28014814814814815,
      "grad_norm": 1.9010459184646606,
      "learning_rate": 0.00014416604892512972,
      "loss": 1.3681,
      "step": 1891
    },
    {
      "epoch": 0.2802962962962963,
      "grad_norm": 1.322282314300537,
      "learning_rate": 0.00014413639733135657,
      "loss": 1.2975,
      "step": 1892
    },
    {
      "epoch": 0.28044444444444444,
      "grad_norm": 1.429682970046997,
      "learning_rate": 0.0001441067457375834,
      "loss": 1.0909,
      "step": 1893
    },
    {
      "epoch": 0.2805925925925926,
      "grad_norm": 2.190866231918335,
      "learning_rate": 0.00014407709414381023,
      "loss": 1.3857,
      "step": 1894
    },
    {
      "epoch": 0.28074074074074074,
      "grad_norm": 1.7194695472717285,
      "learning_rate": 0.00014404744255003707,
      "loss": 0.9431,
      "step": 1895
    },
    {
      "epoch": 0.2808888888888889,
      "grad_norm": 4.119308948516846,
      "learning_rate": 0.00014401779095626392,
      "loss": 1.2931,
      "step": 1896
    },
    {
      "epoch": 0.28103703703703703,
      "grad_norm": 1.2322664260864258,
      "learning_rate": 0.00014398813936249074,
      "loss": 0.9592,
      "step": 1897
    },
    {
      "epoch": 0.2811851851851852,
      "grad_norm": 1.7712771892547607,
      "learning_rate": 0.00014395848776871755,
      "loss": 0.9421,
      "step": 1898
    },
    {
      "epoch": 0.2813333333333333,
      "grad_norm": 1.3226734399795532,
      "learning_rate": 0.00014392883617494442,
      "loss": 0.9837,
      "step": 1899
    },
    {
      "epoch": 0.2814814814814815,
      "grad_norm": 2.522213935852051,
      "learning_rate": 0.00014389918458117124,
      "loss": 1.2813,
      "step": 1900
    },
    {
      "epoch": 0.2816296296296296,
      "grad_norm": 1.8988999128341675,
      "learning_rate": 0.00014386953298739806,
      "loss": 1.1405,
      "step": 1901
    },
    {
      "epoch": 0.2817777777777778,
      "grad_norm": 1.8001806735992432,
      "learning_rate": 0.00014383988139362493,
      "loss": 1.1198,
      "step": 1902
    },
    {
      "epoch": 0.2819259259259259,
      "grad_norm": 1.7499761581420898,
      "learning_rate": 0.00014381022979985175,
      "loss": 1.0926,
      "step": 1903
    },
    {
      "epoch": 0.2820740740740741,
      "grad_norm": 1.7617902755737305,
      "learning_rate": 0.00014378057820607857,
      "loss": 1.0601,
      "step": 1904
    },
    {
      "epoch": 0.2822222222222222,
      "grad_norm": 1.4161577224731445,
      "learning_rate": 0.00014375092661230544,
      "loss": 0.9908,
      "step": 1905
    },
    {
      "epoch": 0.2823703703703704,
      "grad_norm": 1.7820091247558594,
      "learning_rate": 0.00014372127501853225,
      "loss": 1.1655,
      "step": 1906
    },
    {
      "epoch": 0.2825185185185185,
      "grad_norm": 1.7779147624969482,
      "learning_rate": 0.00014369162342475907,
      "loss": 1.0416,
      "step": 1907
    },
    {
      "epoch": 0.2826666666666667,
      "grad_norm": 1.3166837692260742,
      "learning_rate": 0.00014366197183098594,
      "loss": 1.1624,
      "step": 1908
    },
    {
      "epoch": 0.2828148148148148,
      "grad_norm": 1.822948932647705,
      "learning_rate": 0.00014363232023721276,
      "loss": 1.1519,
      "step": 1909
    },
    {
      "epoch": 0.28296296296296297,
      "grad_norm": 2.3033623695373535,
      "learning_rate": 0.00014360266864343958,
      "loss": 1.1689,
      "step": 1910
    },
    {
      "epoch": 0.2831111111111111,
      "grad_norm": 1.3716576099395752,
      "learning_rate": 0.00014357301704966642,
      "loss": 1.0579,
      "step": 1911
    },
    {
      "epoch": 0.28325925925925927,
      "grad_norm": 1.3765064477920532,
      "learning_rate": 0.00014354336545589327,
      "loss": 1.0688,
      "step": 1912
    },
    {
      "epoch": 0.2834074074074074,
      "grad_norm": 2.1557159423828125,
      "learning_rate": 0.00014351371386212008,
      "loss": 1.0384,
      "step": 1913
    },
    {
      "epoch": 0.28355555555555556,
      "grad_norm": 2.223402976989746,
      "learning_rate": 0.00014348406226834693,
      "loss": 1.2898,
      "step": 1914
    },
    {
      "epoch": 0.2837037037037037,
      "grad_norm": 1.851702094078064,
      "learning_rate": 0.00014345441067457377,
      "loss": 1.0041,
      "step": 1915
    },
    {
      "epoch": 0.28385185185185186,
      "grad_norm": 4.681601524353027,
      "learning_rate": 0.0001434247590808006,
      "loss": 1.2566,
      "step": 1916
    },
    {
      "epoch": 0.284,
      "grad_norm": 4.030296325683594,
      "learning_rate": 0.00014339510748702744,
      "loss": 0.9683,
      "step": 1917
    },
    {
      "epoch": 0.28414814814814815,
      "grad_norm": 2.739288330078125,
      "learning_rate": 0.00014336545589325428,
      "loss": 1.2889,
      "step": 1918
    },
    {
      "epoch": 0.28429629629629627,
      "grad_norm": 1.4121339321136475,
      "learning_rate": 0.0001433358042994811,
      "loss": 1.1335,
      "step": 1919
    },
    {
      "epoch": 0.28444444444444444,
      "grad_norm": 3.6412529945373535,
      "learning_rate": 0.00014330615270570794,
      "loss": 1.001,
      "step": 1920
    },
    {
      "epoch": 0.2845925925925926,
      "grad_norm": 1.610365390777588,
      "learning_rate": 0.00014327650111193479,
      "loss": 1.1877,
      "step": 1921
    },
    {
      "epoch": 0.28474074074074074,
      "grad_norm": 6.543050289154053,
      "learning_rate": 0.0001432468495181616,
      "loss": 1.2976,
      "step": 1922
    },
    {
      "epoch": 0.2848888888888889,
      "grad_norm": 2.442750930786133,
      "learning_rate": 0.00014321719792438845,
      "loss": 1.2311,
      "step": 1923
    },
    {
      "epoch": 0.28503703703703703,
      "grad_norm": 1.7650247812271118,
      "learning_rate": 0.0001431875463306153,
      "loss": 1.0885,
      "step": 1924
    },
    {
      "epoch": 0.2851851851851852,
      "grad_norm": 1.3626705408096313,
      "learning_rate": 0.0001431578947368421,
      "loss": 1.168,
      "step": 1925
    },
    {
      "epoch": 0.2853333333333333,
      "grad_norm": 1.6422728300094604,
      "learning_rate": 0.00014312824314306895,
      "loss": 1.0842,
      "step": 1926
    },
    {
      "epoch": 0.2854814814814815,
      "grad_norm": 3.8954384326934814,
      "learning_rate": 0.0001430985915492958,
      "loss": 1.0006,
      "step": 1927
    },
    {
      "epoch": 0.2856296296296296,
      "grad_norm": 2.413081645965576,
      "learning_rate": 0.00014306893995552262,
      "loss": 1.2681,
      "step": 1928
    },
    {
      "epoch": 0.2857777777777778,
      "grad_norm": 1.2404167652130127,
      "learning_rate": 0.00014303928836174943,
      "loss": 1.0716,
      "step": 1929
    },
    {
      "epoch": 0.2859259259259259,
      "grad_norm": 2.429436683654785,
      "learning_rate": 0.0001430096367679763,
      "loss": 1.242,
      "step": 1930
    },
    {
      "epoch": 0.2860740740740741,
      "grad_norm": 1.9265131950378418,
      "learning_rate": 0.00014297998517420312,
      "loss": 1.376,
      "step": 1931
    },
    {
      "epoch": 0.2862222222222222,
      "grad_norm": 1.5621486902236938,
      "learning_rate": 0.00014295033358042994,
      "loss": 1.0938,
      "step": 1932
    },
    {
      "epoch": 0.2863703703703704,
      "grad_norm": 1.2274481058120728,
      "learning_rate": 0.00014292068198665678,
      "loss": 1.1254,
      "step": 1933
    },
    {
      "epoch": 0.2865185185185185,
      "grad_norm": 1.5727094411849976,
      "learning_rate": 0.00014289103039288363,
      "loss": 1.2272,
      "step": 1934
    },
    {
      "epoch": 0.2866666666666667,
      "grad_norm": 1.5376226902008057,
      "learning_rate": 0.00014286137879911045,
      "loss": 1.2636,
      "step": 1935
    },
    {
      "epoch": 0.2868148148148148,
      "grad_norm": 1.1617416143417358,
      "learning_rate": 0.0001428317272053373,
      "loss": 1.0181,
      "step": 1936
    },
    {
      "epoch": 0.286962962962963,
      "grad_norm": 2.308255672454834,
      "learning_rate": 0.00014280207561156413,
      "loss": 0.9628,
      "step": 1937
    },
    {
      "epoch": 0.2871111111111111,
      "grad_norm": 1.201524019241333,
      "learning_rate": 0.00014277242401779095,
      "loss": 0.8777,
      "step": 1938
    },
    {
      "epoch": 0.28725925925925927,
      "grad_norm": 1.079357385635376,
      "learning_rate": 0.0001427427724240178,
      "loss": 1.1408,
      "step": 1939
    },
    {
      "epoch": 0.2874074074074074,
      "grad_norm": 0.8939654231071472,
      "learning_rate": 0.00014271312083024464,
      "loss": 0.9137,
      "step": 1940
    },
    {
      "epoch": 0.28755555555555556,
      "grad_norm": 2.082834005355835,
      "learning_rate": 0.00014268346923647146,
      "loss": 1.1254,
      "step": 1941
    },
    {
      "epoch": 0.2877037037037037,
      "grad_norm": 1.8582286834716797,
      "learning_rate": 0.0001426538176426983,
      "loss": 1.2128,
      "step": 1942
    },
    {
      "epoch": 0.28785185185185186,
      "grad_norm": 1.6522938013076782,
      "learning_rate": 0.00014262416604892515,
      "loss": 1.3354,
      "step": 1943
    },
    {
      "epoch": 0.288,
      "grad_norm": 1.6389734745025635,
      "learning_rate": 0.00014259451445515196,
      "loss": 0.9965,
      "step": 1944
    },
    {
      "epoch": 0.28814814814814815,
      "grad_norm": 1.6375473737716675,
      "learning_rate": 0.0001425648628613788,
      "loss": 1.0244,
      "step": 1945
    },
    {
      "epoch": 0.2882962962962963,
      "grad_norm": 1.497673511505127,
      "learning_rate": 0.00014253521126760565,
      "loss": 1.1187,
      "step": 1946
    },
    {
      "epoch": 0.28844444444444445,
      "grad_norm": 1.0881043672561646,
      "learning_rate": 0.00014250555967383247,
      "loss": 1.065,
      "step": 1947
    },
    {
      "epoch": 0.28859259259259257,
      "grad_norm": 1.5061964988708496,
      "learning_rate": 0.00014247590808005932,
      "loss": 1.0024,
      "step": 1948
    },
    {
      "epoch": 0.28874074074074074,
      "grad_norm": 1.6110973358154297,
      "learning_rate": 0.00014244625648628613,
      "loss": 1.3192,
      "step": 1949
    },
    {
      "epoch": 0.28888888888888886,
      "grad_norm": 1.792970895767212,
      "learning_rate": 0.00014241660489251298,
      "loss": 0.9602,
      "step": 1950
    },
    {
      "epoch": 0.28903703703703704,
      "grad_norm": 1.2316087484359741,
      "learning_rate": 0.00014238695329873982,
      "loss": 1.0634,
      "step": 1951
    },
    {
      "epoch": 0.2891851851851852,
      "grad_norm": 6.28628396987915,
      "learning_rate": 0.00014235730170496664,
      "loss": 0.9428,
      "step": 1952
    },
    {
      "epoch": 0.28933333333333333,
      "grad_norm": 3.0054290294647217,
      "learning_rate": 0.00014232765011119348,
      "loss": 1.0454,
      "step": 1953
    },
    {
      "epoch": 0.2894814814814815,
      "grad_norm": 1.8051791191101074,
      "learning_rate": 0.00014229799851742033,
      "loss": 1.3311,
      "step": 1954
    },
    {
      "epoch": 0.2896296296296296,
      "grad_norm": 1.9095637798309326,
      "learning_rate": 0.00014226834692364715,
      "loss": 1.2088,
      "step": 1955
    },
    {
      "epoch": 0.2897777777777778,
      "grad_norm": 2.037728786468506,
      "learning_rate": 0.000142238695329874,
      "loss": 1.238,
      "step": 1956
    },
    {
      "epoch": 0.2899259259259259,
      "grad_norm": 1.1061418056488037,
      "learning_rate": 0.00014220904373610083,
      "loss": 1.0668,
      "step": 1957
    },
    {
      "epoch": 0.2900740740740741,
      "grad_norm": 1.9614605903625488,
      "learning_rate": 0.00014217939214232765,
      "loss": 1.1359,
      "step": 1958
    },
    {
      "epoch": 0.2902222222222222,
      "grad_norm": 3.108888626098633,
      "learning_rate": 0.0001421497405485545,
      "loss": 1.0206,
      "step": 1959
    },
    {
      "epoch": 0.2903703703703704,
      "grad_norm": 2.745737314224243,
      "learning_rate": 0.00014212008895478134,
      "loss": 1.1219,
      "step": 1960
    },
    {
      "epoch": 0.2905185185185185,
      "grad_norm": 1.4380501508712769,
      "learning_rate": 0.00014209043736100816,
      "loss": 1.0199,
      "step": 1961
    },
    {
      "epoch": 0.2906666666666667,
      "grad_norm": 1.7835073471069336,
      "learning_rate": 0.000142060785767235,
      "loss": 1.2319,
      "step": 1962
    },
    {
      "epoch": 0.2908148148148148,
      "grad_norm": 1.7886954545974731,
      "learning_rate": 0.00014203113417346182,
      "loss": 0.9066,
      "step": 1963
    },
    {
      "epoch": 0.290962962962963,
      "grad_norm": 2.375223159790039,
      "learning_rate": 0.00014200148257968866,
      "loss": 1.1687,
      "step": 1964
    },
    {
      "epoch": 0.2911111111111111,
      "grad_norm": 1.4279139041900635,
      "learning_rate": 0.0001419718309859155,
      "loss": 1.2445,
      "step": 1965
    },
    {
      "epoch": 0.2912592592592593,
      "grad_norm": 1.6330987215042114,
      "learning_rate": 0.00014194217939214233,
      "loss": 1.1252,
      "step": 1966
    },
    {
      "epoch": 0.2914074074074074,
      "grad_norm": 1.4857168197631836,
      "learning_rate": 0.00014191252779836917,
      "loss": 1.2034,
      "step": 1967
    },
    {
      "epoch": 0.29155555555555557,
      "grad_norm": 2.2815330028533936,
      "learning_rate": 0.000141882876204596,
      "loss": 1.0196,
      "step": 1968
    },
    {
      "epoch": 0.2917037037037037,
      "grad_norm": 2.0011632442474365,
      "learning_rate": 0.00014185322461082283,
      "loss": 1.2011,
      "step": 1969
    },
    {
      "epoch": 0.29185185185185186,
      "grad_norm": 2.2272956371307373,
      "learning_rate": 0.00014182357301704968,
      "loss": 1.3563,
      "step": 1970
    },
    {
      "epoch": 0.292,
      "grad_norm": 1.3123246431350708,
      "learning_rate": 0.0001417939214232765,
      "loss": 1.1925,
      "step": 1971
    },
    {
      "epoch": 0.29214814814814816,
      "grad_norm": 1.6946793794631958,
      "learning_rate": 0.00014176426982950334,
      "loss": 1.0575,
      "step": 1972
    },
    {
      "epoch": 0.2922962962962963,
      "grad_norm": 1.524677038192749,
      "learning_rate": 0.00014173461823573018,
      "loss": 1.2523,
      "step": 1973
    },
    {
      "epoch": 0.29244444444444445,
      "grad_norm": 1.8048510551452637,
      "learning_rate": 0.000141704966641957,
      "loss": 1.0267,
      "step": 1974
    },
    {
      "epoch": 0.29259259259259257,
      "grad_norm": 3.08186936378479,
      "learning_rate": 0.00014167531504818384,
      "loss": 0.9579,
      "step": 1975
    },
    {
      "epoch": 0.29274074074074075,
      "grad_norm": 1.672932744026184,
      "learning_rate": 0.0001416456634544107,
      "loss": 1.0669,
      "step": 1976
    },
    {
      "epoch": 0.29288888888888887,
      "grad_norm": 1.2979950904846191,
      "learning_rate": 0.0001416160118606375,
      "loss": 0.8932,
      "step": 1977
    },
    {
      "epoch": 0.29303703703703704,
      "grad_norm": 1.031038761138916,
      "learning_rate": 0.00014158636026686435,
      "loss": 0.9491,
      "step": 1978
    },
    {
      "epoch": 0.29318518518518516,
      "grad_norm": 2.2748239040374756,
      "learning_rate": 0.0001415567086730912,
      "loss": 1.1151,
      "step": 1979
    },
    {
      "epoch": 0.29333333333333333,
      "grad_norm": 1.350414752960205,
      "learning_rate": 0.000141527057079318,
      "loss": 1.2051,
      "step": 1980
    },
    {
      "epoch": 0.29348148148148145,
      "grad_norm": 2.0504868030548096,
      "learning_rate": 0.00014149740548554486,
      "loss": 1.1326,
      "step": 1981
    },
    {
      "epoch": 0.29362962962962963,
      "grad_norm": 1.304865837097168,
      "learning_rate": 0.0001414677538917717,
      "loss": 1.0071,
      "step": 1982
    },
    {
      "epoch": 0.2937777777777778,
      "grad_norm": 2.612804412841797,
      "learning_rate": 0.00014143810229799852,
      "loss": 1.0072,
      "step": 1983
    },
    {
      "epoch": 0.2939259259259259,
      "grad_norm": 1.4876537322998047,
      "learning_rate": 0.00014140845070422534,
      "loss": 1.189,
      "step": 1984
    },
    {
      "epoch": 0.2940740740740741,
      "grad_norm": 1.6770356893539429,
      "learning_rate": 0.0001413787991104522,
      "loss": 1.1421,
      "step": 1985
    },
    {
      "epoch": 0.2942222222222222,
      "grad_norm": 1.357831597328186,
      "learning_rate": 0.00014134914751667903,
      "loss": 1.022,
      "step": 1986
    },
    {
      "epoch": 0.2943703703703704,
      "grad_norm": 1.505112648010254,
      "learning_rate": 0.00014131949592290584,
      "loss": 1.1427,
      "step": 1987
    },
    {
      "epoch": 0.2945185185185185,
      "grad_norm": 1.8086297512054443,
      "learning_rate": 0.00014128984432913271,
      "loss": 1.0027,
      "step": 1988
    },
    {
      "epoch": 0.2946666666666667,
      "grad_norm": 1.528651237487793,
      "learning_rate": 0.00014126019273535953,
      "loss": 1.2825,
      "step": 1989
    },
    {
      "epoch": 0.2948148148148148,
      "grad_norm": 2.0352816581726074,
      "learning_rate": 0.00014123054114158635,
      "loss": 1.1326,
      "step": 1990
    },
    {
      "epoch": 0.294962962962963,
      "grad_norm": 1.5756343603134155,
      "learning_rate": 0.00014120088954781322,
      "loss": 0.9532,
      "step": 1991
    },
    {
      "epoch": 0.2951111111111111,
      "grad_norm": 1.6788939237594604,
      "learning_rate": 0.00014117123795404004,
      "loss": 1.1934,
      "step": 1992
    },
    {
      "epoch": 0.2952592592592593,
      "grad_norm": 2.541499376296997,
      "learning_rate": 0.00014114158636026686,
      "loss": 0.9294,
      "step": 1993
    },
    {
      "epoch": 0.2954074074074074,
      "grad_norm": 1.2268800735473633,
      "learning_rate": 0.00014111193476649373,
      "loss": 1.0078,
      "step": 1994
    },
    {
      "epoch": 0.29555555555555557,
      "grad_norm": 1.4305241107940674,
      "learning_rate": 0.00014108228317272054,
      "loss": 1.1887,
      "step": 1995
    },
    {
      "epoch": 0.2957037037037037,
      "grad_norm": 1.9692851305007935,
      "learning_rate": 0.00014105263157894736,
      "loss": 1.2435,
      "step": 1996
    },
    {
      "epoch": 0.29585185185185187,
      "grad_norm": 3.043774366378784,
      "learning_rate": 0.0001410229799851742,
      "loss": 1.1618,
      "step": 1997
    },
    {
      "epoch": 0.296,
      "grad_norm": 1.3506648540496826,
      "learning_rate": 0.00014099332839140105,
      "loss": 1.2005,
      "step": 1998
    },
    {
      "epoch": 0.29614814814814816,
      "grad_norm": 1.5097509622573853,
      "learning_rate": 0.00014096367679762787,
      "loss": 1.2464,
      "step": 1999
    },
    {
      "epoch": 0.2962962962962963,
      "grad_norm": 2.3342812061309814,
      "learning_rate": 0.0001409340252038547,
      "loss": 1.095,
      "step": 2000
    },
    {
      "epoch": 0.29644444444444445,
      "grad_norm": 5.315145969390869,
      "learning_rate": 0.00014090437361008156,
      "loss": 1.1415,
      "step": 2001
    },
    {
      "epoch": 0.2965925925925926,
      "grad_norm": 1.8004692792892456,
      "learning_rate": 0.00014087472201630837,
      "loss": 1.3195,
      "step": 2002
    },
    {
      "epoch": 0.29674074074074075,
      "grad_norm": 1.706508994102478,
      "learning_rate": 0.00014084507042253522,
      "loss": 0.98,
      "step": 2003
    },
    {
      "epoch": 0.29688888888888887,
      "grad_norm": 1.7100145816802979,
      "learning_rate": 0.00014081541882876206,
      "loss": 1.1068,
      "step": 2004
    },
    {
      "epoch": 0.29703703703703704,
      "grad_norm": 1.4298951625823975,
      "learning_rate": 0.00014078576723498888,
      "loss": 1.0667,
      "step": 2005
    },
    {
      "epoch": 0.29718518518518516,
      "grad_norm": 2.0300867557525635,
      "learning_rate": 0.00014075611564121573,
      "loss": 1.1282,
      "step": 2006
    },
    {
      "epoch": 0.29733333333333334,
      "grad_norm": 2.285794258117676,
      "learning_rate": 0.00014072646404744257,
      "loss": 1.2465,
      "step": 2007
    },
    {
      "epoch": 0.29748148148148146,
      "grad_norm": 2.2818078994750977,
      "learning_rate": 0.0001406968124536694,
      "loss": 1.0622,
      "step": 2008
    },
    {
      "epoch": 0.29762962962962963,
      "grad_norm": 1.829458236694336,
      "learning_rate": 0.00014066716085989623,
      "loss": 1.3267,
      "step": 2009
    },
    {
      "epoch": 0.29777777777777775,
      "grad_norm": 2.451023817062378,
      "learning_rate": 0.00014063750926612308,
      "loss": 1.1424,
      "step": 2010
    },
    {
      "epoch": 0.2979259259259259,
      "grad_norm": 1.8001643419265747,
      "learning_rate": 0.0001406078576723499,
      "loss": 1.0965,
      "step": 2011
    },
    {
      "epoch": 0.29807407407407405,
      "grad_norm": 2.1611523628234863,
      "learning_rate": 0.00014057820607857674,
      "loss": 1.4567,
      "step": 2012
    },
    {
      "epoch": 0.2982222222222222,
      "grad_norm": 2.005830764770508,
      "learning_rate": 0.00014054855448480358,
      "loss": 1.179,
      "step": 2013
    },
    {
      "epoch": 0.2983703703703704,
      "grad_norm": 1.6628178358078003,
      "learning_rate": 0.0001405189028910304,
      "loss": 1.0446,
      "step": 2014
    },
    {
      "epoch": 0.2985185185185185,
      "grad_norm": 1.9607075452804565,
      "learning_rate": 0.00014048925129725722,
      "loss": 0.9936,
      "step": 2015
    },
    {
      "epoch": 0.2986666666666667,
      "grad_norm": 1.6360664367675781,
      "learning_rate": 0.0001404595997034841,
      "loss": 1.2731,
      "step": 2016
    },
    {
      "epoch": 0.2988148148148148,
      "grad_norm": 1.4931821823120117,
      "learning_rate": 0.0001404299481097109,
      "loss": 1.4205,
      "step": 2017
    },
    {
      "epoch": 0.298962962962963,
      "grad_norm": 2.0647025108337402,
      "learning_rate": 0.00014040029651593772,
      "loss": 0.9611,
      "step": 2018
    },
    {
      "epoch": 0.2991111111111111,
      "grad_norm": 3.6834490299224854,
      "learning_rate": 0.00014037064492216457,
      "loss": 1.118,
      "step": 2019
    },
    {
      "epoch": 0.2992592592592593,
      "grad_norm": 2.8627822399139404,
      "learning_rate": 0.0001403409933283914,
      "loss": 1.1278,
      "step": 2020
    },
    {
      "epoch": 0.2994074074074074,
      "grad_norm": 1.3225929737091064,
      "learning_rate": 0.00014031134173461823,
      "loss": 0.8782,
      "step": 2021
    },
    {
      "epoch": 0.2995555555555556,
      "grad_norm": 3.9964535236358643,
      "learning_rate": 0.00014028169014084507,
      "loss": 1.0422,
      "step": 2022
    },
    {
      "epoch": 0.2997037037037037,
      "grad_norm": 5.868827819824219,
      "learning_rate": 0.00014025203854707192,
      "loss": 1.0829,
      "step": 2023
    },
    {
      "epoch": 0.29985185185185187,
      "grad_norm": 1.5820839405059814,
      "learning_rate": 0.00014022238695329874,
      "loss": 1.285,
      "step": 2024
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.345027208328247,
      "learning_rate": 0.00014019273535952558,
      "loss": 1.1556,
      "step": 2025
    },
    {
      "epoch": 0.30014814814814816,
      "grad_norm": 1.3832290172576904,
      "learning_rate": 0.00014016308376575242,
      "loss": 1.2876,
      "step": 2026
    },
    {
      "epoch": 0.3002962962962963,
      "grad_norm": 2.850512742996216,
      "learning_rate": 0.00014013343217197924,
      "loss": 1.1713,
      "step": 2027
    },
    {
      "epoch": 0.30044444444444446,
      "grad_norm": 2.576414108276367,
      "learning_rate": 0.00014010378057820609,
      "loss": 1.1124,
      "step": 2028
    },
    {
      "epoch": 0.3005925925925926,
      "grad_norm": 2.3272831439971924,
      "learning_rate": 0.00014007412898443293,
      "loss": 1.2297,
      "step": 2029
    },
    {
      "epoch": 0.30074074074074075,
      "grad_norm": 2.0849647521972656,
      "learning_rate": 0.00014004447739065975,
      "loss": 1.2055,
      "step": 2030
    },
    {
      "epoch": 0.3008888888888889,
      "grad_norm": 1.5891973972320557,
      "learning_rate": 0.0001400148257968866,
      "loss": 1.2693,
      "step": 2031
    },
    {
      "epoch": 0.30103703703703705,
      "grad_norm": 1.419865369796753,
      "learning_rate": 0.00013998517420311344,
      "loss": 1.1698,
      "step": 2032
    },
    {
      "epoch": 0.30118518518518517,
      "grad_norm": 1.8656340837478638,
      "learning_rate": 0.00013995552260934025,
      "loss": 1.1178,
      "step": 2033
    },
    {
      "epoch": 0.30133333333333334,
      "grad_norm": 1.6104474067687988,
      "learning_rate": 0.0001399258710155671,
      "loss": 1.1444,
      "step": 2034
    },
    {
      "epoch": 0.30148148148148146,
      "grad_norm": 1.2462067604064941,
      "learning_rate": 0.00013989621942179392,
      "loss": 1.0419,
      "step": 2035
    },
    {
      "epoch": 0.30162962962962964,
      "grad_norm": 1.6338249444961548,
      "learning_rate": 0.00013986656782802076,
      "loss": 0.9944,
      "step": 2036
    },
    {
      "epoch": 0.30177777777777776,
      "grad_norm": 1.7983711957931519,
      "learning_rate": 0.0001398369162342476,
      "loss": 1.2706,
      "step": 2037
    },
    {
      "epoch": 0.30192592592592593,
      "grad_norm": 1.923130989074707,
      "learning_rate": 0.00013980726464047442,
      "loss": 1.21,
      "step": 2038
    },
    {
      "epoch": 0.30207407407407405,
      "grad_norm": 1.6995714902877808,
      "learning_rate": 0.00013977761304670127,
      "loss": 1.2053,
      "step": 2039
    },
    {
      "epoch": 0.3022222222222222,
      "grad_norm": 1.351297378540039,
      "learning_rate": 0.0001397479614529281,
      "loss": 0.9145,
      "step": 2040
    },
    {
      "epoch": 0.30237037037037034,
      "grad_norm": 1.4165565967559814,
      "learning_rate": 0.00013971830985915493,
      "loss": 1.2775,
      "step": 2041
    },
    {
      "epoch": 0.3025185185185185,
      "grad_norm": 1.3446037769317627,
      "learning_rate": 0.00013968865826538177,
      "loss": 1.0615,
      "step": 2042
    },
    {
      "epoch": 0.30266666666666664,
      "grad_norm": 3.5239148139953613,
      "learning_rate": 0.00013965900667160862,
      "loss": 1.2237,
      "step": 2043
    },
    {
      "epoch": 0.3028148148148148,
      "grad_norm": 2.3545305728912354,
      "learning_rate": 0.00013962935507783544,
      "loss": 1.0055,
      "step": 2044
    },
    {
      "epoch": 0.302962962962963,
      "grad_norm": 0.9781142473220825,
      "learning_rate": 0.00013959970348406228,
      "loss": 1.0277,
      "step": 2045
    },
    {
      "epoch": 0.3031111111111111,
      "grad_norm": 2.4959397315979004,
      "learning_rate": 0.00013957005189028912,
      "loss": 1.252,
      "step": 2046
    },
    {
      "epoch": 0.3032592592592593,
      "grad_norm": 2.1696665287017822,
      "learning_rate": 0.00013954040029651594,
      "loss": 1.1637,
      "step": 2047
    },
    {
      "epoch": 0.3034074074074074,
      "grad_norm": 1.444602608680725,
      "learning_rate": 0.00013951074870274279,
      "loss": 0.8602,
      "step": 2048
    },
    {
      "epoch": 0.3035555555555556,
      "grad_norm": 2.0049591064453125,
      "learning_rate": 0.0001394810971089696,
      "loss": 1.1302,
      "step": 2049
    },
    {
      "epoch": 0.3037037037037037,
      "grad_norm": 6.134702682495117,
      "learning_rate": 0.00013945144551519645,
      "loss": 1.1219,
      "step": 2050
    },
    {
      "epoch": 0.3038518518518519,
      "grad_norm": 1.8419837951660156,
      "learning_rate": 0.0001394217939214233,
      "loss": 0.9795,
      "step": 2051
    },
    {
      "epoch": 0.304,
      "grad_norm": 2.378662109375,
      "learning_rate": 0.0001393921423276501,
      "loss": 1.1208,
      "step": 2052
    },
    {
      "epoch": 0.30414814814814817,
      "grad_norm": 1.314644694328308,
      "learning_rate": 0.00013936249073387695,
      "loss": 1.2562,
      "step": 2053
    },
    {
      "epoch": 0.3042962962962963,
      "grad_norm": 1.2006711959838867,
      "learning_rate": 0.00013933283914010377,
      "loss": 1.1376,
      "step": 2054
    },
    {
      "epoch": 0.30444444444444446,
      "grad_norm": 1.1629737615585327,
      "learning_rate": 0.00013930318754633062,
      "loss": 1.1162,
      "step": 2055
    },
    {
      "epoch": 0.3045925925925926,
      "grad_norm": 1.5775014162063599,
      "learning_rate": 0.00013927353595255746,
      "loss": 1.2141,
      "step": 2056
    },
    {
      "epoch": 0.30474074074074076,
      "grad_norm": 1.3674976825714111,
      "learning_rate": 0.00013924388435878428,
      "loss": 1.0952,
      "step": 2057
    },
    {
      "epoch": 0.3048888888888889,
      "grad_norm": 1.9454426765441895,
      "learning_rate": 0.00013921423276501112,
      "loss": 1.1577,
      "step": 2058
    },
    {
      "epoch": 0.30503703703703705,
      "grad_norm": 1.3205631971359253,
      "learning_rate": 0.00013918458117123797,
      "loss": 1.0524,
      "step": 2059
    },
    {
      "epoch": 0.30518518518518517,
      "grad_norm": 1.596271276473999,
      "learning_rate": 0.00013915492957746478,
      "loss": 1.252,
      "step": 2060
    },
    {
      "epoch": 0.30533333333333335,
      "grad_norm": 1.1906851530075073,
      "learning_rate": 0.00013912527798369163,
      "loss": 1.0535,
      "step": 2061
    },
    {
      "epoch": 0.30548148148148146,
      "grad_norm": 1.4791088104248047,
      "learning_rate": 0.00013909562638991847,
      "loss": 1.1838,
      "step": 2062
    },
    {
      "epoch": 0.30562962962962964,
      "grad_norm": 1.7153657674789429,
      "learning_rate": 0.0001390659747961453,
      "loss": 1.1325,
      "step": 2063
    },
    {
      "epoch": 0.30577777777777776,
      "grad_norm": 2.7629482746124268,
      "learning_rate": 0.00013903632320237213,
      "loss": 0.9909,
      "step": 2064
    },
    {
      "epoch": 0.30592592592592593,
      "grad_norm": 1.4287002086639404,
      "learning_rate": 0.00013900667160859898,
      "loss": 1.1571,
      "step": 2065
    },
    {
      "epoch": 0.30607407407407405,
      "grad_norm": 1.3045237064361572,
      "learning_rate": 0.0001389770200148258,
      "loss": 1.1581,
      "step": 2066
    },
    {
      "epoch": 0.30622222222222223,
      "grad_norm": 1.5022578239440918,
      "learning_rate": 0.00013894736842105264,
      "loss": 1.2141,
      "step": 2067
    },
    {
      "epoch": 0.30637037037037035,
      "grad_norm": 7.063483238220215,
      "learning_rate": 0.00013891771682727949,
      "loss": 1.3515,
      "step": 2068
    },
    {
      "epoch": 0.3065185185185185,
      "grad_norm": 1.825252652168274,
      "learning_rate": 0.0001388880652335063,
      "loss": 1.1497,
      "step": 2069
    },
    {
      "epoch": 0.30666666666666664,
      "grad_norm": 0.9816583395004272,
      "learning_rate": 0.00013885841363973312,
      "loss": 1.0345,
      "step": 2070
    },
    {
      "epoch": 0.3068148148148148,
      "grad_norm": 1.6378679275512695,
      "learning_rate": 0.00013882876204596,
      "loss": 1.2512,
      "step": 2071
    },
    {
      "epoch": 0.30696296296296294,
      "grad_norm": 1.7705243825912476,
      "learning_rate": 0.0001387991104521868,
      "loss": 0.924,
      "step": 2072
    },
    {
      "epoch": 0.3071111111111111,
      "grad_norm": 1.4243345260620117,
      "learning_rate": 0.00013876945885841363,
      "loss": 1.0156,
      "step": 2073
    },
    {
      "epoch": 0.30725925925925923,
      "grad_norm": 1.9182519912719727,
      "learning_rate": 0.0001387398072646405,
      "loss": 1.176,
      "step": 2074
    },
    {
      "epoch": 0.3074074074074074,
      "grad_norm": 1.8517405986785889,
      "learning_rate": 0.00013871015567086732,
      "loss": 1.3266,
      "step": 2075
    },
    {
      "epoch": 0.3075555555555556,
      "grad_norm": 2.758709192276001,
      "learning_rate": 0.00013868050407709413,
      "loss": 0.9802,
      "step": 2076
    },
    {
      "epoch": 0.3077037037037037,
      "grad_norm": 1.551541805267334,
      "learning_rate": 0.000138650852483321,
      "loss": 1.1485,
      "step": 2077
    },
    {
      "epoch": 0.3078518518518519,
      "grad_norm": 1.4198012351989746,
      "learning_rate": 0.00013862120088954782,
      "loss": 1.3013,
      "step": 2078
    },
    {
      "epoch": 0.308,
      "grad_norm": 1.623287558555603,
      "learning_rate": 0.00013859154929577464,
      "loss": 1.0242,
      "step": 2079
    },
    {
      "epoch": 0.30814814814814817,
      "grad_norm": 1.7934820652008057,
      "learning_rate": 0.0001385618977020015,
      "loss": 0.9132,
      "step": 2080
    },
    {
      "epoch": 0.3082962962962963,
      "grad_norm": 1.3298826217651367,
      "learning_rate": 0.00013853224610822833,
      "loss": 0.9939,
      "step": 2081
    },
    {
      "epoch": 0.30844444444444447,
      "grad_norm": 1.3171889781951904,
      "learning_rate": 0.00013850259451445515,
      "loss": 1.4316,
      "step": 2082
    },
    {
      "epoch": 0.3085925925925926,
      "grad_norm": 1.2780592441558838,
      "learning_rate": 0.000138472942920682,
      "loss": 1.2374,
      "step": 2083
    },
    {
      "epoch": 0.30874074074074076,
      "grad_norm": 0.9781845211982727,
      "learning_rate": 0.00013844329132690883,
      "loss": 0.8591,
      "step": 2084
    },
    {
      "epoch": 0.3088888888888889,
      "grad_norm": 1.3549484014511108,
      "learning_rate": 0.00013841363973313565,
      "loss": 1.0935,
      "step": 2085
    },
    {
      "epoch": 0.30903703703703705,
      "grad_norm": 1.1546287536621094,
      "learning_rate": 0.0001383839881393625,
      "loss": 1.1185,
      "step": 2086
    },
    {
      "epoch": 0.3091851851851852,
      "grad_norm": 4.052056312561035,
      "learning_rate": 0.00013835433654558934,
      "loss": 0.9417,
      "step": 2087
    },
    {
      "epoch": 0.30933333333333335,
      "grad_norm": 3.001648426055908,
      "learning_rate": 0.00013832468495181616,
      "loss": 1.2758,
      "step": 2088
    },
    {
      "epoch": 0.30948148148148147,
      "grad_norm": 2.10091233253479,
      "learning_rate": 0.000138295033358043,
      "loss": 1.3074,
      "step": 2089
    },
    {
      "epoch": 0.30962962962962964,
      "grad_norm": 1.6234939098358154,
      "learning_rate": 0.00013826538176426985,
      "loss": 1.2339,
      "step": 2090
    },
    {
      "epoch": 0.30977777777777776,
      "grad_norm": 1.1966112852096558,
      "learning_rate": 0.00013823573017049666,
      "loss": 1.1366,
      "step": 2091
    },
    {
      "epoch": 0.30992592592592594,
      "grad_norm": 1.2139739990234375,
      "learning_rate": 0.0001382060785767235,
      "loss": 1.3649,
      "step": 2092
    },
    {
      "epoch": 0.31007407407407406,
      "grad_norm": 1.45710289478302,
      "learning_rate": 0.00013817642698295035,
      "loss": 1.0141,
      "step": 2093
    },
    {
      "epoch": 0.31022222222222223,
      "grad_norm": 1.7804147005081177,
      "learning_rate": 0.00013814677538917717,
      "loss": 1.206,
      "step": 2094
    },
    {
      "epoch": 0.31037037037037035,
      "grad_norm": 2.8588223457336426,
      "learning_rate": 0.00013811712379540401,
      "loss": 1.2005,
      "step": 2095
    },
    {
      "epoch": 0.3105185185185185,
      "grad_norm": 1.061313271522522,
      "learning_rate": 0.00013808747220163086,
      "loss": 0.8415,
      "step": 2096
    },
    {
      "epoch": 0.31066666666666665,
      "grad_norm": 1.0050193071365356,
      "learning_rate": 0.00013805782060785768,
      "loss": 0.8893,
      "step": 2097
    },
    {
      "epoch": 0.3108148148148148,
      "grad_norm": 1.303781270980835,
      "learning_rate": 0.00013802816901408452,
      "loss": 1.1208,
      "step": 2098
    },
    {
      "epoch": 0.31096296296296294,
      "grad_norm": 0.9755362868309021,
      "learning_rate": 0.00013799851742031137,
      "loss": 1.0903,
      "step": 2099
    },
    {
      "epoch": 0.3111111111111111,
      "grad_norm": 1.2339026927947998,
      "learning_rate": 0.00013796886582653818,
      "loss": 1.1237,
      "step": 2100
    },
    {
      "epoch": 0.31125925925925924,
      "grad_norm": 1.6841607093811035,
      "learning_rate": 0.000137939214232765,
      "loss": 0.9327,
      "step": 2101
    },
    {
      "epoch": 0.3114074074074074,
      "grad_norm": 1.405722737312317,
      "learning_rate": 0.00013790956263899187,
      "loss": 1.2344,
      "step": 2102
    },
    {
      "epoch": 0.31155555555555553,
      "grad_norm": 2.11942195892334,
      "learning_rate": 0.0001378799110452187,
      "loss": 0.86,
      "step": 2103
    },
    {
      "epoch": 0.3117037037037037,
      "grad_norm": 1.6581157445907593,
      "learning_rate": 0.0001378502594514455,
      "loss": 1.3143,
      "step": 2104
    },
    {
      "epoch": 0.3118518518518518,
      "grad_norm": 2.0134646892547607,
      "learning_rate": 0.00013782060785767235,
      "loss": 1.2018,
      "step": 2105
    },
    {
      "epoch": 0.312,
      "grad_norm": 2.4110283851623535,
      "learning_rate": 0.0001377909562638992,
      "loss": 1.3338,
      "step": 2106
    },
    {
      "epoch": 0.3121481481481482,
      "grad_norm": 1.8445804119110107,
      "learning_rate": 0.000137761304670126,
      "loss": 1.0447,
      "step": 2107
    },
    {
      "epoch": 0.3122962962962963,
      "grad_norm": 2.610743761062622,
      "learning_rate": 0.00013773165307635286,
      "loss": 1.073,
      "step": 2108
    },
    {
      "epoch": 0.31244444444444447,
      "grad_norm": 1.6694021224975586,
      "learning_rate": 0.0001377020014825797,
      "loss": 1.0696,
      "step": 2109
    },
    {
      "epoch": 0.3125925925925926,
      "grad_norm": 1.3696857690811157,
      "learning_rate": 0.00013767234988880652,
      "loss": 1.0803,
      "step": 2110
    },
    {
      "epoch": 0.31274074074074076,
      "grad_norm": 1.8750032186508179,
      "learning_rate": 0.00013764269829503336,
      "loss": 1.2241,
      "step": 2111
    },
    {
      "epoch": 0.3128888888888889,
      "grad_norm": 2.962296962738037,
      "learning_rate": 0.0001376130467012602,
      "loss": 1.1358,
      "step": 2112
    },
    {
      "epoch": 0.31303703703703706,
      "grad_norm": 1.7400325536727905,
      "learning_rate": 0.00013758339510748703,
      "loss": 1.2508,
      "step": 2113
    },
    {
      "epoch": 0.3131851851851852,
      "grad_norm": 5.648115158081055,
      "learning_rate": 0.00013755374351371387,
      "loss": 1.2557,
      "step": 2114
    },
    {
      "epoch": 0.31333333333333335,
      "grad_norm": 2.5589797496795654,
      "learning_rate": 0.00013752409191994071,
      "loss": 1.1127,
      "step": 2115
    },
    {
      "epoch": 0.31348148148148147,
      "grad_norm": 2.085777997970581,
      "learning_rate": 0.00013749444032616753,
      "loss": 1.245,
      "step": 2116
    },
    {
      "epoch": 0.31362962962962965,
      "grad_norm": 2.188732147216797,
      "learning_rate": 0.00013746478873239438,
      "loss": 1.1694,
      "step": 2117
    },
    {
      "epoch": 0.31377777777777777,
      "grad_norm": 1.11812162399292,
      "learning_rate": 0.00013743513713862122,
      "loss": 1.323,
      "step": 2118
    },
    {
      "epoch": 0.31392592592592594,
      "grad_norm": 1.5886762142181396,
      "learning_rate": 0.00013740548554484804,
      "loss": 1.0427,
      "step": 2119
    },
    {
      "epoch": 0.31407407407407406,
      "grad_norm": 1.855367660522461,
      "learning_rate": 0.00013737583395107488,
      "loss": 0.9953,
      "step": 2120
    },
    {
      "epoch": 0.31422222222222224,
      "grad_norm": 1.6558690071105957,
      "learning_rate": 0.00013734618235730173,
      "loss": 1.0263,
      "step": 2121
    },
    {
      "epoch": 0.31437037037037036,
      "grad_norm": 0.9354804754257202,
      "learning_rate": 0.00013731653076352854,
      "loss": 1.4811,
      "step": 2122
    },
    {
      "epoch": 0.31451851851851853,
      "grad_norm": 2.6394095420837402,
      "learning_rate": 0.0001372868791697554,
      "loss": 0.9087,
      "step": 2123
    },
    {
      "epoch": 0.31466666666666665,
      "grad_norm": 1.5338950157165527,
      "learning_rate": 0.0001372572275759822,
      "loss": 1.0878,
      "step": 2124
    },
    {
      "epoch": 0.3148148148148148,
      "grad_norm": 2.82747220993042,
      "learning_rate": 0.00013722757598220905,
      "loss": 1.2008,
      "step": 2125
    },
    {
      "epoch": 0.31496296296296294,
      "grad_norm": 1.4896637201309204,
      "learning_rate": 0.0001371979243884359,
      "loss": 1.1101,
      "step": 2126
    },
    {
      "epoch": 0.3151111111111111,
      "grad_norm": 3.165152072906494,
      "learning_rate": 0.0001371682727946627,
      "loss": 1.2841,
      "step": 2127
    },
    {
      "epoch": 0.31525925925925924,
      "grad_norm": 2.199152946472168,
      "learning_rate": 0.00013713862120088956,
      "loss": 1.3937,
      "step": 2128
    },
    {
      "epoch": 0.3154074074074074,
      "grad_norm": 5.559042930603027,
      "learning_rate": 0.0001371089696071164,
      "loss": 1.2933,
      "step": 2129
    },
    {
      "epoch": 0.31555555555555553,
      "grad_norm": 10.026233673095703,
      "learning_rate": 0.00013707931801334322,
      "loss": 1.1713,
      "step": 2130
    },
    {
      "epoch": 0.3157037037037037,
      "grad_norm": 1.5633232593536377,
      "learning_rate": 0.00013704966641957006,
      "loss": 1.054,
      "step": 2131
    },
    {
      "epoch": 0.31585185185185183,
      "grad_norm": 1.8309271335601807,
      "learning_rate": 0.0001370200148257969,
      "loss": 1.1635,
      "step": 2132
    },
    {
      "epoch": 0.316,
      "grad_norm": 1.3764691352844238,
      "learning_rate": 0.00013699036323202372,
      "loss": 1.0757,
      "step": 2133
    },
    {
      "epoch": 0.3161481481481481,
      "grad_norm": 1.3178585767745972,
      "learning_rate": 0.00013696071163825057,
      "loss": 0.8379,
      "step": 2134
    },
    {
      "epoch": 0.3162962962962963,
      "grad_norm": 1.8320229053497314,
      "learning_rate": 0.0001369310600444774,
      "loss": 1.0575,
      "step": 2135
    },
    {
      "epoch": 0.3164444444444444,
      "grad_norm": 2.8101818561553955,
      "learning_rate": 0.00013690140845070423,
      "loss": 1.1453,
      "step": 2136
    },
    {
      "epoch": 0.3165925925925926,
      "grad_norm": 2.0384163856506348,
      "learning_rate": 0.00013687175685693108,
      "loss": 1.0169,
      "step": 2137
    },
    {
      "epoch": 0.31674074074074077,
      "grad_norm": 1.8953146934509277,
      "learning_rate": 0.0001368421052631579,
      "loss": 1.2889,
      "step": 2138
    },
    {
      "epoch": 0.3168888888888889,
      "grad_norm": 2.657672643661499,
      "learning_rate": 0.00013681245366938474,
      "loss": 1.1288,
      "step": 2139
    },
    {
      "epoch": 0.31703703703703706,
      "grad_norm": 4.052217960357666,
      "learning_rate": 0.00013678280207561155,
      "loss": 1.2616,
      "step": 2140
    },
    {
      "epoch": 0.3171851851851852,
      "grad_norm": 1.951884388923645,
      "learning_rate": 0.0001367531504818384,
      "loss": 1.0237,
      "step": 2141
    },
    {
      "epoch": 0.31733333333333336,
      "grad_norm": 2.485708236694336,
      "learning_rate": 0.00013672349888806524,
      "loss": 1.0757,
      "step": 2142
    },
    {
      "epoch": 0.3174814814814815,
      "grad_norm": 3.166104555130005,
      "learning_rate": 0.00013669384729429206,
      "loss": 1.0823,
      "step": 2143
    },
    {
      "epoch": 0.31762962962962965,
      "grad_norm": 4.034739017486572,
      "learning_rate": 0.0001366641957005189,
      "loss": 1.1093,
      "step": 2144
    },
    {
      "epoch": 0.31777777777777777,
      "grad_norm": 2.1679728031158447,
      "learning_rate": 0.00013663454410674575,
      "loss": 1.0586,
      "step": 2145
    },
    {
      "epoch": 0.31792592592592595,
      "grad_norm": 2.8911707401275635,
      "learning_rate": 0.00013660489251297257,
      "loss": 1.1043,
      "step": 2146
    },
    {
      "epoch": 0.31807407407407406,
      "grad_norm": 1.3390415906906128,
      "learning_rate": 0.0001365752409191994,
      "loss": 0.9645,
      "step": 2147
    },
    {
      "epoch": 0.31822222222222224,
      "grad_norm": 2.3080697059631348,
      "learning_rate": 0.00013654558932542626,
      "loss": 1.234,
      "step": 2148
    },
    {
      "epoch": 0.31837037037037036,
      "grad_norm": 1.975243091583252,
      "learning_rate": 0.00013651593773165307,
      "loss": 0.9722,
      "step": 2149
    },
    {
      "epoch": 0.31851851851851853,
      "grad_norm": 1.9367717504501343,
      "learning_rate": 0.00013648628613787992,
      "loss": 0.8212,
      "step": 2150
    },
    {
      "epoch": 0.31866666666666665,
      "grad_norm": 1.9093810319900513,
      "learning_rate": 0.00013645663454410676,
      "loss": 1.0883,
      "step": 2151
    },
    {
      "epoch": 0.31881481481481483,
      "grad_norm": 1.5951952934265137,
      "learning_rate": 0.00013642698295033358,
      "loss": 1.1901,
      "step": 2152
    },
    {
      "epoch": 0.31896296296296295,
      "grad_norm": 2.7034664154052734,
      "learning_rate": 0.00013639733135656042,
      "loss": 1.2348,
      "step": 2153
    },
    {
      "epoch": 0.3191111111111111,
      "grad_norm": 1.6457303762435913,
      "learning_rate": 0.00013636767976278727,
      "loss": 1.3959,
      "step": 2154
    },
    {
      "epoch": 0.31925925925925924,
      "grad_norm": 1.4474924802780151,
      "learning_rate": 0.00013633802816901409,
      "loss": 1.0256,
      "step": 2155
    },
    {
      "epoch": 0.3194074074074074,
      "grad_norm": 4.5285139083862305,
      "learning_rate": 0.0001363083765752409,
      "loss": 1.3252,
      "step": 2156
    },
    {
      "epoch": 0.31955555555555554,
      "grad_norm": 3.122760534286499,
      "learning_rate": 0.00013627872498146777,
      "loss": 1.4024,
      "step": 2157
    },
    {
      "epoch": 0.3197037037037037,
      "grad_norm": 3.5009829998016357,
      "learning_rate": 0.0001362490733876946,
      "loss": 1.0491,
      "step": 2158
    },
    {
      "epoch": 0.31985185185185183,
      "grad_norm": 2.080441951751709,
      "learning_rate": 0.0001362194217939214,
      "loss": 0.962,
      "step": 2159
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5678153038024902,
      "learning_rate": 0.00013618977020014828,
      "loss": 0.9275,
      "step": 2160
    },
    {
      "epoch": 0.3201481481481481,
      "grad_norm": 3.1842198371887207,
      "learning_rate": 0.0001361601186063751,
      "loss": 1.2926,
      "step": 2161
    },
    {
      "epoch": 0.3202962962962963,
      "grad_norm": 2.5489861965179443,
      "learning_rate": 0.00013613046701260192,
      "loss": 1.0225,
      "step": 2162
    },
    {
      "epoch": 0.3204444444444444,
      "grad_norm": 1.5396262407302856,
      "learning_rate": 0.0001361008154188288,
      "loss": 1.1842,
      "step": 2163
    },
    {
      "epoch": 0.3205925925925926,
      "grad_norm": 2.000410318374634,
      "learning_rate": 0.0001360711638250556,
      "loss": 1.1912,
      "step": 2164
    },
    {
      "epoch": 0.3207407407407407,
      "grad_norm": 3.4067788124084473,
      "learning_rate": 0.00013604151223128242,
      "loss": 1.0961,
      "step": 2165
    },
    {
      "epoch": 0.3208888888888889,
      "grad_norm": 2.5867462158203125,
      "learning_rate": 0.0001360118606375093,
      "loss": 1.5042,
      "step": 2166
    },
    {
      "epoch": 0.321037037037037,
      "grad_norm": 1.708670735359192,
      "learning_rate": 0.0001359822090437361,
      "loss": 1.1048,
      "step": 2167
    },
    {
      "epoch": 0.3211851851851852,
      "grad_norm": 1.2881687879562378,
      "learning_rate": 0.00013595255744996293,
      "loss": 1.2155,
      "step": 2168
    },
    {
      "epoch": 0.32133333333333336,
      "grad_norm": 1.3208478689193726,
      "learning_rate": 0.00013592290585618977,
      "loss": 1.15,
      "step": 2169
    },
    {
      "epoch": 0.3214814814814815,
      "grad_norm": 2.9517972469329834,
      "learning_rate": 0.00013589325426241662,
      "loss": 1.2613,
      "step": 2170
    },
    {
      "epoch": 0.32162962962962965,
      "grad_norm": 1.9396419525146484,
      "learning_rate": 0.00013586360266864343,
      "loss": 1.0884,
      "step": 2171
    },
    {
      "epoch": 0.3217777777777778,
      "grad_norm": 1.2326692342758179,
      "learning_rate": 0.00013583395107487028,
      "loss": 1.2093,
      "step": 2172
    },
    {
      "epoch": 0.32192592592592595,
      "grad_norm": 1.4012480974197388,
      "learning_rate": 0.00013580429948109712,
      "loss": 1.1254,
      "step": 2173
    },
    {
      "epoch": 0.32207407407407407,
      "grad_norm": 3.0078437328338623,
      "learning_rate": 0.00013577464788732394,
      "loss": 1.0937,
      "step": 2174
    },
    {
      "epoch": 0.32222222222222224,
      "grad_norm": 1.1547621488571167,
      "learning_rate": 0.00013574499629355079,
      "loss": 0.9611,
      "step": 2175
    },
    {
      "epoch": 0.32237037037037036,
      "grad_norm": 2.170196771621704,
      "learning_rate": 0.00013571534469977763,
      "loss": 1.2364,
      "step": 2176
    },
    {
      "epoch": 0.32251851851851854,
      "grad_norm": 1.1854034662246704,
      "learning_rate": 0.00013568569310600445,
      "loss": 1.1208,
      "step": 2177
    },
    {
      "epoch": 0.32266666666666666,
      "grad_norm": 1.8629307746887207,
      "learning_rate": 0.0001356560415122313,
      "loss": 1.0558,
      "step": 2178
    },
    {
      "epoch": 0.32281481481481483,
      "grad_norm": 1.7712525129318237,
      "learning_rate": 0.00013562638991845814,
      "loss": 1.1729,
      "step": 2179
    },
    {
      "epoch": 0.32296296296296295,
      "grad_norm": 1.4765230417251587,
      "learning_rate": 0.00013559673832468495,
      "loss": 1.3337,
      "step": 2180
    },
    {
      "epoch": 0.3231111111111111,
      "grad_norm": 1.1344188451766968,
      "learning_rate": 0.0001355670867309118,
      "loss": 0.9294,
      "step": 2181
    },
    {
      "epoch": 0.32325925925925925,
      "grad_norm": 1.6488782167434692,
      "learning_rate": 0.00013553743513713864,
      "loss": 1.2047,
      "step": 2182
    },
    {
      "epoch": 0.3234074074074074,
      "grad_norm": 1.8202825784683228,
      "learning_rate": 0.00013550778354336546,
      "loss": 1.0434,
      "step": 2183
    },
    {
      "epoch": 0.32355555555555554,
      "grad_norm": 2.084465265274048,
      "learning_rate": 0.0001354781319495923,
      "loss": 1.2276,
      "step": 2184
    },
    {
      "epoch": 0.3237037037037037,
      "grad_norm": 1.4090747833251953,
      "learning_rate": 0.00013544848035581915,
      "loss": 1.1444,
      "step": 2185
    },
    {
      "epoch": 0.32385185185185184,
      "grad_norm": 1.7574411630630493,
      "learning_rate": 0.00013541882876204597,
      "loss": 1.0567,
      "step": 2186
    },
    {
      "epoch": 0.324,
      "grad_norm": 1.773861050605774,
      "learning_rate": 0.00013538917716827278,
      "loss": 1.1064,
      "step": 2187
    },
    {
      "epoch": 0.32414814814814813,
      "grad_norm": 2.8019587993621826,
      "learning_rate": 0.00013535952557449965,
      "loss": 1.1687,
      "step": 2188
    },
    {
      "epoch": 0.3242962962962963,
      "grad_norm": 1.6108754873275757,
      "learning_rate": 0.00013532987398072647,
      "loss": 1.0169,
      "step": 2189
    },
    {
      "epoch": 0.3244444444444444,
      "grad_norm": 1.2681635618209839,
      "learning_rate": 0.0001353002223869533,
      "loss": 0.9246,
      "step": 2190
    },
    {
      "epoch": 0.3245925925925926,
      "grad_norm": 1.8541462421417236,
      "learning_rate": 0.00013527057079318013,
      "loss": 1.2053,
      "step": 2191
    },
    {
      "epoch": 0.3247407407407407,
      "grad_norm": 1.8434734344482422,
      "learning_rate": 0.00013524091919940698,
      "loss": 1.1271,
      "step": 2192
    },
    {
      "epoch": 0.3248888888888889,
      "grad_norm": 2.0871453285217285,
      "learning_rate": 0.0001352112676056338,
      "loss": 1.0076,
      "step": 2193
    },
    {
      "epoch": 0.325037037037037,
      "grad_norm": 1.3376096487045288,
      "learning_rate": 0.00013518161601186064,
      "loss": 1.0293,
      "step": 2194
    },
    {
      "epoch": 0.3251851851851852,
      "grad_norm": 1.2752751111984253,
      "learning_rate": 0.00013515196441808748,
      "loss": 1.0601,
      "step": 2195
    },
    {
      "epoch": 0.3253333333333333,
      "grad_norm": 5.159078121185303,
      "learning_rate": 0.0001351223128243143,
      "loss": 0.8771,
      "step": 2196
    },
    {
      "epoch": 0.3254814814814815,
      "grad_norm": 1.5362927913665771,
      "learning_rate": 0.00013509266123054115,
      "loss": 1.1814,
      "step": 2197
    },
    {
      "epoch": 0.3256296296296296,
      "grad_norm": 1.4354616403579712,
      "learning_rate": 0.000135063009636768,
      "loss": 1.5378,
      "step": 2198
    },
    {
      "epoch": 0.3257777777777778,
      "grad_norm": 2.6065075397491455,
      "learning_rate": 0.0001350333580429948,
      "loss": 1.1688,
      "step": 2199
    },
    {
      "epoch": 0.32592592592592595,
      "grad_norm": 1.4871209859848022,
      "learning_rate": 0.00013500370644922165,
      "loss": 1.125,
      "step": 2200
    },
    {
      "epoch": 0.32607407407407407,
      "grad_norm": 6.628143787384033,
      "learning_rate": 0.0001349740548554485,
      "loss": 1.1464,
      "step": 2201
    },
    {
      "epoch": 0.32622222222222225,
      "grad_norm": 2.4741086959838867,
      "learning_rate": 0.00013494440326167531,
      "loss": 1.0454,
      "step": 2202
    },
    {
      "epoch": 0.32637037037037037,
      "grad_norm": 1.276069164276123,
      "learning_rate": 0.00013491475166790216,
      "loss": 0.9532,
      "step": 2203
    },
    {
      "epoch": 0.32651851851851854,
      "grad_norm": 1.3667021989822388,
      "learning_rate": 0.000134885100074129,
      "loss": 1.0145,
      "step": 2204
    },
    {
      "epoch": 0.32666666666666666,
      "grad_norm": 1.7200194597244263,
      "learning_rate": 0.00013485544848035582,
      "loss": 0.8424,
      "step": 2205
    },
    {
      "epoch": 0.32681481481481484,
      "grad_norm": 1.6975458860397339,
      "learning_rate": 0.00013482579688658267,
      "loss": 1.1774,
      "step": 2206
    },
    {
      "epoch": 0.32696296296296296,
      "grad_norm": 2.316565990447998,
      "learning_rate": 0.0001347961452928095,
      "loss": 1.1567,
      "step": 2207
    },
    {
      "epoch": 0.32711111111111113,
      "grad_norm": 2.421337604522705,
      "learning_rate": 0.00013476649369903633,
      "loss": 1.195,
      "step": 2208
    },
    {
      "epoch": 0.32725925925925925,
      "grad_norm": 1.2722667455673218,
      "learning_rate": 0.00013473684210526317,
      "loss": 1.0311,
      "step": 2209
    },
    {
      "epoch": 0.3274074074074074,
      "grad_norm": 1.2215676307678223,
      "learning_rate": 0.00013470719051149,
      "loss": 1.0461,
      "step": 2210
    },
    {
      "epoch": 0.32755555555555554,
      "grad_norm": 1.0788629055023193,
      "learning_rate": 0.00013467753891771683,
      "loss": 1.2845,
      "step": 2211
    },
    {
      "epoch": 0.3277037037037037,
      "grad_norm": 1.3409634828567505,
      "learning_rate": 0.00013464788732394368,
      "loss": 1.1823,
      "step": 2212
    },
    {
      "epoch": 0.32785185185185184,
      "grad_norm": 2.9622018337249756,
      "learning_rate": 0.0001346182357301705,
      "loss": 1.0053,
      "step": 2213
    },
    {
      "epoch": 0.328,
      "grad_norm": 1.644832730293274,
      "learning_rate": 0.00013458858413639734,
      "loss": 1.1964,
      "step": 2214
    },
    {
      "epoch": 0.32814814814814813,
      "grad_norm": 1.0058623552322388,
      "learning_rate": 0.00013455893254262418,
      "loss": 0.7503,
      "step": 2215
    },
    {
      "epoch": 0.3282962962962963,
      "grad_norm": 1.2541953325271606,
      "learning_rate": 0.000134529280948851,
      "loss": 1.1544,
      "step": 2216
    },
    {
      "epoch": 0.32844444444444443,
      "grad_norm": 2.020519495010376,
      "learning_rate": 0.00013449962935507785,
      "loss": 1.2573,
      "step": 2217
    },
    {
      "epoch": 0.3285925925925926,
      "grad_norm": 1.1754764318466187,
      "learning_rate": 0.0001344699777613047,
      "loss": 1.0214,
      "step": 2218
    },
    {
      "epoch": 0.3287407407407407,
      "grad_norm": 5.07017183303833,
      "learning_rate": 0.0001344403261675315,
      "loss": 0.9595,
      "step": 2219
    },
    {
      "epoch": 0.3288888888888889,
      "grad_norm": 1.5297579765319824,
      "learning_rate": 0.00013441067457375835,
      "loss": 1.2923,
      "step": 2220
    },
    {
      "epoch": 0.329037037037037,
      "grad_norm": 1.3993247747421265,
      "learning_rate": 0.00013438102297998517,
      "loss": 1.1458,
      "step": 2221
    },
    {
      "epoch": 0.3291851851851852,
      "grad_norm": 1.262351393699646,
      "learning_rate": 0.00013435137138621201,
      "loss": 0.966,
      "step": 2222
    },
    {
      "epoch": 0.3293333333333333,
      "grad_norm": 1.176295518875122,
      "learning_rate": 0.00013432171979243886,
      "loss": 1.028,
      "step": 2223
    },
    {
      "epoch": 0.3294814814814815,
      "grad_norm": 1.2695571184158325,
      "learning_rate": 0.00013429206819866568,
      "loss": 1.0897,
      "step": 2224
    },
    {
      "epoch": 0.3296296296296296,
      "grad_norm": 1.488978385925293,
      "learning_rate": 0.00013426241660489252,
      "loss": 1.1638,
      "step": 2225
    },
    {
      "epoch": 0.3297777777777778,
      "grad_norm": 4.710011959075928,
      "learning_rate": 0.00013423276501111934,
      "loss": 1.2246,
      "step": 2226
    },
    {
      "epoch": 0.3299259259259259,
      "grad_norm": 1.6328994035720825,
      "learning_rate": 0.00013420311341734618,
      "loss": 0.9819,
      "step": 2227
    },
    {
      "epoch": 0.3300740740740741,
      "grad_norm": 2.277803421020508,
      "learning_rate": 0.00013417346182357303,
      "loss": 1.2242,
      "step": 2228
    },
    {
      "epoch": 0.3302222222222222,
      "grad_norm": 2.4489951133728027,
      "learning_rate": 0.00013414381022979984,
      "loss": 1.1634,
      "step": 2229
    },
    {
      "epoch": 0.33037037037037037,
      "grad_norm": 1.8839341402053833,
      "learning_rate": 0.0001341141586360267,
      "loss": 1.1036,
      "step": 2230
    },
    {
      "epoch": 0.33051851851851854,
      "grad_norm": 2.3348946571350098,
      "learning_rate": 0.00013408450704225353,
      "loss": 1.0797,
      "step": 2231
    },
    {
      "epoch": 0.33066666666666666,
      "grad_norm": 1.9656461477279663,
      "learning_rate": 0.00013405485544848035,
      "loss": 1.2083,
      "step": 2232
    },
    {
      "epoch": 0.33081481481481484,
      "grad_norm": 1.7312856912612915,
      "learning_rate": 0.0001340252038547072,
      "loss": 0.9045,
      "step": 2233
    },
    {
      "epoch": 0.33096296296296296,
      "grad_norm": 1.9389487504959106,
      "learning_rate": 0.00013399555226093404,
      "loss": 1.036,
      "step": 2234
    },
    {
      "epoch": 0.33111111111111113,
      "grad_norm": 1.2917704582214355,
      "learning_rate": 0.00013396590066716086,
      "loss": 1.0194,
      "step": 2235
    },
    {
      "epoch": 0.33125925925925925,
      "grad_norm": 1.4721956253051758,
      "learning_rate": 0.0001339362490733877,
      "loss": 1.1603,
      "step": 2236
    },
    {
      "epoch": 0.33140740740740743,
      "grad_norm": 1.4399042129516602,
      "learning_rate": 0.00013390659747961455,
      "loss": 0.8924,
      "step": 2237
    },
    {
      "epoch": 0.33155555555555555,
      "grad_norm": 1.5402852296829224,
      "learning_rate": 0.00013387694588584136,
      "loss": 0.9991,
      "step": 2238
    },
    {
      "epoch": 0.3317037037037037,
      "grad_norm": 1.7603425979614258,
      "learning_rate": 0.0001338472942920682,
      "loss": 1.2753,
      "step": 2239
    },
    {
      "epoch": 0.33185185185185184,
      "grad_norm": 9.889684677124023,
      "learning_rate": 0.00013381764269829505,
      "loss": 1.0038,
      "step": 2240
    },
    {
      "epoch": 0.332,
      "grad_norm": 1.8739274740219116,
      "learning_rate": 0.00013378799110452187,
      "loss": 1.1637,
      "step": 2241
    },
    {
      "epoch": 0.33214814814814814,
      "grad_norm": 1.2513951063156128,
      "learning_rate": 0.0001337583395107487,
      "loss": 0.8515,
      "step": 2242
    },
    {
      "epoch": 0.3322962962962963,
      "grad_norm": 1.3573156595230103,
      "learning_rate": 0.00013372868791697556,
      "loss": 1.1591,
      "step": 2243
    },
    {
      "epoch": 0.33244444444444443,
      "grad_norm": 1.0235463380813599,
      "learning_rate": 0.00013369903632320238,
      "loss": 1.0468,
      "step": 2244
    },
    {
      "epoch": 0.3325925925925926,
      "grad_norm": 3.3065717220306396,
      "learning_rate": 0.0001336693847294292,
      "loss": 1.3364,
      "step": 2245
    },
    {
      "epoch": 0.3327407407407407,
      "grad_norm": 1.5484888553619385,
      "learning_rate": 0.00013363973313565606,
      "loss": 1.1987,
      "step": 2246
    },
    {
      "epoch": 0.3328888888888889,
      "grad_norm": 1.9932880401611328,
      "learning_rate": 0.00013361008154188288,
      "loss": 1.1988,
      "step": 2247
    },
    {
      "epoch": 0.333037037037037,
      "grad_norm": 1.6632351875305176,
      "learning_rate": 0.0001335804299481097,
      "loss": 1.236,
      "step": 2248
    },
    {
      "epoch": 0.3331851851851852,
      "grad_norm": 1.444595456123352,
      "learning_rate": 0.00013355077835433657,
      "loss": 0.977,
      "step": 2249
    },
    {
      "epoch": 0.3333333333333333,
      "grad_norm": 2.094960927963257,
      "learning_rate": 0.0001335211267605634,
      "loss": 1.2066,
      "step": 2250
    },
    {
      "epoch": 0.3334814814814815,
      "grad_norm": 1.1526453495025635,
      "learning_rate": 0.0001334914751667902,
      "loss": 1.0169,
      "step": 2251
    },
    {
      "epoch": 0.3336296296296296,
      "grad_norm": 1.17086923122406,
      "learning_rate": 0.00013346182357301708,
      "loss": 1.0256,
      "step": 2252
    },
    {
      "epoch": 0.3337777777777778,
      "grad_norm": 2.183746337890625,
      "learning_rate": 0.0001334321719792439,
      "loss": 1.1574,
      "step": 2253
    },
    {
      "epoch": 0.3339259259259259,
      "grad_norm": 1.5439538955688477,
      "learning_rate": 0.0001334025203854707,
      "loss": 1.0676,
      "step": 2254
    },
    {
      "epoch": 0.3340740740740741,
      "grad_norm": 2.3153722286224365,
      "learning_rate": 0.00013337286879169756,
      "loss": 1.1764,
      "step": 2255
    },
    {
      "epoch": 0.3342222222222222,
      "grad_norm": 2.0121192932128906,
      "learning_rate": 0.0001333432171979244,
      "loss": 1.2408,
      "step": 2256
    },
    {
      "epoch": 0.3343703703703704,
      "grad_norm": 1.2326407432556152,
      "learning_rate": 0.00013331356560415122,
      "loss": 1.0246,
      "step": 2257
    },
    {
      "epoch": 0.3345185185185185,
      "grad_norm": 1.514545202255249,
      "learning_rate": 0.00013328391401037806,
      "loss": 1.1998,
      "step": 2258
    },
    {
      "epoch": 0.33466666666666667,
      "grad_norm": 2.469101667404175,
      "learning_rate": 0.0001332542624166049,
      "loss": 1.1136,
      "step": 2259
    },
    {
      "epoch": 0.3348148148148148,
      "grad_norm": 1.266455054283142,
      "learning_rate": 0.00013322461082283172,
      "loss": 1.1005,
      "step": 2260
    },
    {
      "epoch": 0.33496296296296296,
      "grad_norm": 1.3625801801681519,
      "learning_rate": 0.00013319495922905857,
      "loss": 0.9987,
      "step": 2261
    },
    {
      "epoch": 0.33511111111111114,
      "grad_norm": 2.7278425693511963,
      "learning_rate": 0.0001331653076352854,
      "loss": 1.2047,
      "step": 2262
    },
    {
      "epoch": 0.33525925925925926,
      "grad_norm": 1.64750075340271,
      "learning_rate": 0.00013313565604151223,
      "loss": 1.056,
      "step": 2263
    },
    {
      "epoch": 0.33540740740740743,
      "grad_norm": 1.3636395931243896,
      "learning_rate": 0.00013310600444773907,
      "loss": 1.2012,
      "step": 2264
    },
    {
      "epoch": 0.33555555555555555,
      "grad_norm": 1.602080225944519,
      "learning_rate": 0.00013307635285396592,
      "loss": 1.0032,
      "step": 2265
    },
    {
      "epoch": 0.3357037037037037,
      "grad_norm": 1.533245325088501,
      "learning_rate": 0.00013304670126019274,
      "loss": 1.1727,
      "step": 2266
    },
    {
      "epoch": 0.33585185185185185,
      "grad_norm": 5.972871780395508,
      "learning_rate": 0.00013301704966641958,
      "loss": 1.0066,
      "step": 2267
    },
    {
      "epoch": 0.336,
      "grad_norm": 2.36637282371521,
      "learning_rate": 0.00013298739807264643,
      "loss": 1.068,
      "step": 2268
    },
    {
      "epoch": 0.33614814814814814,
      "grad_norm": 1.5738499164581299,
      "learning_rate": 0.00013295774647887324,
      "loss": 1.2732,
      "step": 2269
    },
    {
      "epoch": 0.3362962962962963,
      "grad_norm": 1.3270472288131714,
      "learning_rate": 0.0001329280948851001,
      "loss": 1.0262,
      "step": 2270
    },
    {
      "epoch": 0.33644444444444443,
      "grad_norm": 2.080435276031494,
      "learning_rate": 0.00013289844329132693,
      "loss": 1.0533,
      "step": 2271
    },
    {
      "epoch": 0.3365925925925926,
      "grad_norm": 1.3887215852737427,
      "learning_rate": 0.00013286879169755375,
      "loss": 1.0709,
      "step": 2272
    },
    {
      "epoch": 0.33674074074074073,
      "grad_norm": 2.125825881958008,
      "learning_rate": 0.00013283914010378057,
      "loss": 1.1915,
      "step": 2273
    },
    {
      "epoch": 0.3368888888888889,
      "grad_norm": 1.562106728553772,
      "learning_rate": 0.00013280948851000744,
      "loss": 1.1003,
      "step": 2274
    },
    {
      "epoch": 0.337037037037037,
      "grad_norm": 3.1373605728149414,
      "learning_rate": 0.00013277983691623426,
      "loss": 0.8193,
      "step": 2275
    },
    {
      "epoch": 0.3371851851851852,
      "grad_norm": 1.7475008964538574,
      "learning_rate": 0.00013275018532246107,
      "loss": 1.0573,
      "step": 2276
    },
    {
      "epoch": 0.3373333333333333,
      "grad_norm": 1.6964002847671509,
      "learning_rate": 0.00013272053372868792,
      "loss": 1.2842,
      "step": 2277
    },
    {
      "epoch": 0.3374814814814815,
      "grad_norm": 1.929476022720337,
      "learning_rate": 0.00013269088213491476,
      "loss": 1.2528,
      "step": 2278
    },
    {
      "epoch": 0.3376296296296296,
      "grad_norm": 1.4762523174285889,
      "learning_rate": 0.00013266123054114158,
      "loss": 1.3121,
      "step": 2279
    },
    {
      "epoch": 0.3377777777777778,
      "grad_norm": 1.1908106803894043,
      "learning_rate": 0.00013263157894736842,
      "loss": 0.9289,
      "step": 2280
    },
    {
      "epoch": 0.3379259259259259,
      "grad_norm": 2.59541654586792,
      "learning_rate": 0.00013260192735359527,
      "loss": 0.962,
      "step": 2281
    },
    {
      "epoch": 0.3380740740740741,
      "grad_norm": 1.5643655061721802,
      "learning_rate": 0.00013257227575982209,
      "loss": 0.912,
      "step": 2282
    },
    {
      "epoch": 0.3382222222222222,
      "grad_norm": 3.0142710208892822,
      "learning_rate": 0.00013254262416604893,
      "loss": 1.2697,
      "step": 2283
    },
    {
      "epoch": 0.3383703703703704,
      "grad_norm": 1.6093851327896118,
      "learning_rate": 0.00013251297257227577,
      "loss": 1.0584,
      "step": 2284
    },
    {
      "epoch": 0.3385185185185185,
      "grad_norm": 2.0272083282470703,
      "learning_rate": 0.0001324833209785026,
      "loss": 1.5104,
      "step": 2285
    },
    {
      "epoch": 0.33866666666666667,
      "grad_norm": 2.518580198287964,
      "learning_rate": 0.00013245366938472944,
      "loss": 1.2991,
      "step": 2286
    },
    {
      "epoch": 0.3388148148148148,
      "grad_norm": 1.4523662328720093,
      "learning_rate": 0.00013242401779095628,
      "loss": 1.2912,
      "step": 2287
    },
    {
      "epoch": 0.33896296296296297,
      "grad_norm": 2.055608034133911,
      "learning_rate": 0.0001323943661971831,
      "loss": 0.9778,
      "step": 2288
    },
    {
      "epoch": 0.3391111111111111,
      "grad_norm": 1.3338221311569214,
      "learning_rate": 0.00013236471460340994,
      "loss": 0.8675,
      "step": 2289
    },
    {
      "epoch": 0.33925925925925926,
      "grad_norm": 1.8887592554092407,
      "learning_rate": 0.0001323350630096368,
      "loss": 1.0106,
      "step": 2290
    },
    {
      "epoch": 0.3394074074074074,
      "grad_norm": 1.289023756980896,
      "learning_rate": 0.0001323054114158636,
      "loss": 1.327,
      "step": 2291
    },
    {
      "epoch": 0.33955555555555555,
      "grad_norm": 1.4453749656677246,
      "learning_rate": 0.00013227575982209045,
      "loss": 1.3064,
      "step": 2292
    },
    {
      "epoch": 0.33970370370370373,
      "grad_norm": 1.5419834852218628,
      "learning_rate": 0.0001322461082283173,
      "loss": 1.2386,
      "step": 2293
    },
    {
      "epoch": 0.33985185185185185,
      "grad_norm": 1.6710052490234375,
      "learning_rate": 0.0001322164566345441,
      "loss": 1.0302,
      "step": 2294
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6174490451812744,
      "learning_rate": 0.00013218680504077096,
      "loss": 1.0299,
      "step": 2295
    },
    {
      "epoch": 0.34014814814814814,
      "grad_norm": 2.3813655376434326,
      "learning_rate": 0.00013215715344699777,
      "loss": 1.3254,
      "step": 2296
    },
    {
      "epoch": 0.3402962962962963,
      "grad_norm": 2.1109097003936768,
      "learning_rate": 0.00013212750185322462,
      "loss": 0.9789,
      "step": 2297
    },
    {
      "epoch": 0.34044444444444444,
      "grad_norm": 2.058483362197876,
      "learning_rate": 0.00013209785025945146,
      "loss": 1.065,
      "step": 2298
    },
    {
      "epoch": 0.3405925925925926,
      "grad_norm": 2.3258121013641357,
      "learning_rate": 0.00013206819866567828,
      "loss": 0.9045,
      "step": 2299
    },
    {
      "epoch": 0.34074074074074073,
      "grad_norm": 2.2851665019989014,
      "learning_rate": 0.00013203854707190512,
      "loss": 1.2529,
      "step": 2300
    },
    {
      "epoch": 0.3408888888888889,
      "grad_norm": 1.613037347793579,
      "learning_rate": 0.00013200889547813197,
      "loss": 1.2204,
      "step": 2301
    },
    {
      "epoch": 0.341037037037037,
      "grad_norm": 1.3707785606384277,
      "learning_rate": 0.00013197924388435878,
      "loss": 0.9235,
      "step": 2302
    },
    {
      "epoch": 0.3411851851851852,
      "grad_norm": 2.2744858264923096,
      "learning_rate": 0.00013194959229058563,
      "loss": 1.2832,
      "step": 2303
    },
    {
      "epoch": 0.3413333333333333,
      "grad_norm": 2.278960943222046,
      "learning_rate": 0.00013191994069681247,
      "loss": 1.1904,
      "step": 2304
    },
    {
      "epoch": 0.3414814814814815,
      "grad_norm": 1.6594761610031128,
      "learning_rate": 0.0001318902891030393,
      "loss": 1.3182,
      "step": 2305
    },
    {
      "epoch": 0.3416296296296296,
      "grad_norm": 1.9333387613296509,
      "learning_rate": 0.00013186063750926614,
      "loss": 1.1184,
      "step": 2306
    },
    {
      "epoch": 0.3417777777777778,
      "grad_norm": 2.1505253314971924,
      "learning_rate": 0.00013183098591549295,
      "loss": 1.343,
      "step": 2307
    },
    {
      "epoch": 0.3419259259259259,
      "grad_norm": 2.506364345550537,
      "learning_rate": 0.0001318013343217198,
      "loss": 1.0817,
      "step": 2308
    },
    {
      "epoch": 0.3420740740740741,
      "grad_norm": 1.2928565740585327,
      "learning_rate": 0.00013177168272794664,
      "loss": 1.1916,
      "step": 2309
    },
    {
      "epoch": 0.3422222222222222,
      "grad_norm": 1.342826247215271,
      "learning_rate": 0.00013174203113417346,
      "loss": 1.1338,
      "step": 2310
    },
    {
      "epoch": 0.3423703703703704,
      "grad_norm": 2.0148978233337402,
      "learning_rate": 0.0001317123795404003,
      "loss": 1.2262,
      "step": 2311
    },
    {
      "epoch": 0.3425185185185185,
      "grad_norm": 1.5498647689819336,
      "learning_rate": 0.00013168272794662712,
      "loss": 1.2198,
      "step": 2312
    },
    {
      "epoch": 0.3426666666666667,
      "grad_norm": 1.4745525121688843,
      "learning_rate": 0.00013165307635285397,
      "loss": 1.0997,
      "step": 2313
    },
    {
      "epoch": 0.3428148148148148,
      "grad_norm": 1.331832766532898,
      "learning_rate": 0.0001316234247590808,
      "loss": 1.162,
      "step": 2314
    },
    {
      "epoch": 0.34296296296296297,
      "grad_norm": 1.9498533010482788,
      "learning_rate": 0.00013159377316530763,
      "loss": 1.1283,
      "step": 2315
    },
    {
      "epoch": 0.3431111111111111,
      "grad_norm": 1.7941899299621582,
      "learning_rate": 0.00013156412157153447,
      "loss": 1.1096,
      "step": 2316
    },
    {
      "epoch": 0.34325925925925926,
      "grad_norm": 2.869532823562622,
      "learning_rate": 0.00013153446997776132,
      "loss": 0.9594,
      "step": 2317
    },
    {
      "epoch": 0.3434074074074074,
      "grad_norm": 1.5423548221588135,
      "learning_rate": 0.00013150481838398813,
      "loss": 1.0791,
      "step": 2318
    },
    {
      "epoch": 0.34355555555555556,
      "grad_norm": 2.9745006561279297,
      "learning_rate": 0.00013147516679021498,
      "loss": 1.1664,
      "step": 2319
    },
    {
      "epoch": 0.3437037037037037,
      "grad_norm": 2.872915506362915,
      "learning_rate": 0.00013144551519644182,
      "loss": 1.0395,
      "step": 2320
    },
    {
      "epoch": 0.34385185185185185,
      "grad_norm": 1.4737216234207153,
      "learning_rate": 0.00013141586360266864,
      "loss": 1.2031,
      "step": 2321
    },
    {
      "epoch": 0.344,
      "grad_norm": 2.5522093772888184,
      "learning_rate": 0.00013138621200889548,
      "loss": 1.316,
      "step": 2322
    },
    {
      "epoch": 0.34414814814814815,
      "grad_norm": 1.5588302612304688,
      "learning_rate": 0.00013135656041512233,
      "loss": 0.9322,
      "step": 2323
    },
    {
      "epoch": 0.3442962962962963,
      "grad_norm": 1.507179856300354,
      "learning_rate": 0.00013132690882134915,
      "loss": 1.0541,
      "step": 2324
    },
    {
      "epoch": 0.34444444444444444,
      "grad_norm": 1.4153691530227661,
      "learning_rate": 0.000131297257227576,
      "loss": 1.0663,
      "step": 2325
    },
    {
      "epoch": 0.3445925925925926,
      "grad_norm": 2.8796892166137695,
      "learning_rate": 0.00013126760563380284,
      "loss": 0.9802,
      "step": 2326
    },
    {
      "epoch": 0.34474074074074074,
      "grad_norm": 2.128862142562866,
      "learning_rate": 0.00013123795404002965,
      "loss": 1.0643,
      "step": 2327
    },
    {
      "epoch": 0.3448888888888889,
      "grad_norm": 7.3458075523376465,
      "learning_rate": 0.0001312083024462565,
      "loss": 1.3487,
      "step": 2328
    },
    {
      "epoch": 0.34503703703703703,
      "grad_norm": 2.286179542541504,
      "learning_rate": 0.00013117865085248334,
      "loss": 1.092,
      "step": 2329
    },
    {
      "epoch": 0.3451851851851852,
      "grad_norm": 1.8476670980453491,
      "learning_rate": 0.00013114899925871016,
      "loss": 1.1717,
      "step": 2330
    },
    {
      "epoch": 0.3453333333333333,
      "grad_norm": 2.7225584983825684,
      "learning_rate": 0.00013111934766493698,
      "loss": 1.1744,
      "step": 2331
    },
    {
      "epoch": 0.3454814814814815,
      "grad_norm": 1.977587103843689,
      "learning_rate": 0.00013108969607116385,
      "loss": 1.0366,
      "step": 2332
    },
    {
      "epoch": 0.3456296296296296,
      "grad_norm": 1.4287402629852295,
      "learning_rate": 0.00013106004447739067,
      "loss": 0.94,
      "step": 2333
    },
    {
      "epoch": 0.3457777777777778,
      "grad_norm": 3.3908700942993164,
      "learning_rate": 0.00013103039288361748,
      "loss": 0.9075,
      "step": 2334
    },
    {
      "epoch": 0.3459259259259259,
      "grad_norm": 1.7279934883117676,
      "learning_rate": 0.00013100074128984435,
      "loss": 0.9598,
      "step": 2335
    },
    {
      "epoch": 0.3460740740740741,
      "grad_norm": 2.6617941856384277,
      "learning_rate": 0.00013097108969607117,
      "loss": 1.0555,
      "step": 2336
    },
    {
      "epoch": 0.3462222222222222,
      "grad_norm": 1.5456342697143555,
      "learning_rate": 0.000130941438102298,
      "loss": 0.9512,
      "step": 2337
    },
    {
      "epoch": 0.3463703703703704,
      "grad_norm": 1.7742456197738647,
      "learning_rate": 0.00013091178650852486,
      "loss": 1.1459,
      "step": 2338
    },
    {
      "epoch": 0.3465185185185185,
      "grad_norm": 1.6286709308624268,
      "learning_rate": 0.00013088213491475168,
      "loss": 1.0374,
      "step": 2339
    },
    {
      "epoch": 0.3466666666666667,
      "grad_norm": 2.609323024749756,
      "learning_rate": 0.0001308524833209785,
      "loss": 1.0615,
      "step": 2340
    },
    {
      "epoch": 0.3468148148148148,
      "grad_norm": 1.8275586366653442,
      "learning_rate": 0.00013082283172720534,
      "loss": 1.1204,
      "step": 2341
    },
    {
      "epoch": 0.346962962962963,
      "grad_norm": 1.8131769895553589,
      "learning_rate": 0.00013079318013343218,
      "loss": 1.2238,
      "step": 2342
    },
    {
      "epoch": 0.3471111111111111,
      "grad_norm": 1.819657802581787,
      "learning_rate": 0.000130763528539659,
      "loss": 1.1137,
      "step": 2343
    },
    {
      "epoch": 0.34725925925925927,
      "grad_norm": 1.9716880321502686,
      "learning_rate": 0.00013073387694588585,
      "loss": 0.9928,
      "step": 2344
    },
    {
      "epoch": 0.3474074074074074,
      "grad_norm": 1.4299585819244385,
      "learning_rate": 0.0001307042253521127,
      "loss": 1.1994,
      "step": 2345
    },
    {
      "epoch": 0.34755555555555556,
      "grad_norm": 3.061483383178711,
      "learning_rate": 0.0001306745737583395,
      "loss": 1.095,
      "step": 2346
    },
    {
      "epoch": 0.3477037037037037,
      "grad_norm": 1.1320656538009644,
      "learning_rate": 0.00013064492216456635,
      "loss": 1.1769,
      "step": 2347
    },
    {
      "epoch": 0.34785185185185186,
      "grad_norm": 1.1383968591690063,
      "learning_rate": 0.0001306152705707932,
      "loss": 1.2742,
      "step": 2348
    },
    {
      "epoch": 0.348,
      "grad_norm": 1.5431816577911377,
      "learning_rate": 0.00013058561897702001,
      "loss": 0.8871,
      "step": 2349
    },
    {
      "epoch": 0.34814814814814815,
      "grad_norm": 1.4152706861495972,
      "learning_rate": 0.00013055596738324686,
      "loss": 1.1823,
      "step": 2350
    },
    {
      "epoch": 0.34829629629629627,
      "grad_norm": 1.2930550575256348,
      "learning_rate": 0.0001305263157894737,
      "loss": 1.0146,
      "step": 2351
    },
    {
      "epoch": 0.34844444444444445,
      "grad_norm": 1.5922138690948486,
      "learning_rate": 0.00013049666419570052,
      "loss": 0.948,
      "step": 2352
    },
    {
      "epoch": 0.34859259259259257,
      "grad_norm": 2.2768802642822266,
      "learning_rate": 0.00013046701260192736,
      "loss": 1.0215,
      "step": 2353
    },
    {
      "epoch": 0.34874074074074074,
      "grad_norm": 3.258697986602783,
      "learning_rate": 0.0001304373610081542,
      "loss": 1.3143,
      "step": 2354
    },
    {
      "epoch": 0.3488888888888889,
      "grad_norm": 1.8296974897384644,
      "learning_rate": 0.00013040770941438103,
      "loss": 1.0664,
      "step": 2355
    },
    {
      "epoch": 0.34903703703703703,
      "grad_norm": 1.260080099105835,
      "learning_rate": 0.00013037805782060787,
      "loss": 1.1924,
      "step": 2356
    },
    {
      "epoch": 0.3491851851851852,
      "grad_norm": 2.329533100128174,
      "learning_rate": 0.00013034840622683472,
      "loss": 1.1003,
      "step": 2357
    },
    {
      "epoch": 0.34933333333333333,
      "grad_norm": 1.5267668962478638,
      "learning_rate": 0.00013031875463306153,
      "loss": 1.1741,
      "step": 2358
    },
    {
      "epoch": 0.3494814814814815,
      "grad_norm": 1.801763653755188,
      "learning_rate": 0.00013028910303928835,
      "loss": 1.0512,
      "step": 2359
    },
    {
      "epoch": 0.3496296296296296,
      "grad_norm": 1.5702931880950928,
      "learning_rate": 0.00013025945144551522,
      "loss": 0.9878,
      "step": 2360
    },
    {
      "epoch": 0.3497777777777778,
      "grad_norm": 1.5275589227676392,
      "learning_rate": 0.00013022979985174204,
      "loss": 1.2239,
      "step": 2361
    },
    {
      "epoch": 0.3499259259259259,
      "grad_norm": 1.3607791662216187,
      "learning_rate": 0.00013020014825796886,
      "loss": 0.955,
      "step": 2362
    },
    {
      "epoch": 0.3500740740740741,
      "grad_norm": 1.9856244325637817,
      "learning_rate": 0.0001301704966641957,
      "loss": 1.059,
      "step": 2363
    },
    {
      "epoch": 0.3502222222222222,
      "grad_norm": 4.6814656257629395,
      "learning_rate": 0.00013014084507042255,
      "loss": 1.2651,
      "step": 2364
    },
    {
      "epoch": 0.3503703703703704,
      "grad_norm": 1.6786004304885864,
      "learning_rate": 0.00013011119347664936,
      "loss": 1.4956,
      "step": 2365
    },
    {
      "epoch": 0.3505185185185185,
      "grad_norm": 1.4799193143844604,
      "learning_rate": 0.0001300815418828762,
      "loss": 1.1083,
      "step": 2366
    },
    {
      "epoch": 0.3506666666666667,
      "grad_norm": 1.5611683130264282,
      "learning_rate": 0.00013005189028910305,
      "loss": 1.1887,
      "step": 2367
    },
    {
      "epoch": 0.3508148148148148,
      "grad_norm": 1.5283243656158447,
      "learning_rate": 0.00013002223869532987,
      "loss": 1.0457,
      "step": 2368
    },
    {
      "epoch": 0.350962962962963,
      "grad_norm": 1.9599016904830933,
      "learning_rate": 0.0001299925871015567,
      "loss": 1.2424,
      "step": 2369
    },
    {
      "epoch": 0.3511111111111111,
      "grad_norm": 2.5228400230407715,
      "learning_rate": 0.00012996293550778356,
      "loss": 1.1464,
      "step": 2370
    },
    {
      "epoch": 0.35125925925925927,
      "grad_norm": 2.2123970985412598,
      "learning_rate": 0.00012993328391401038,
      "loss": 0.9944,
      "step": 2371
    },
    {
      "epoch": 0.3514074074074074,
      "grad_norm": 1.8431411981582642,
      "learning_rate": 0.00012990363232023722,
      "loss": 0.9995,
      "step": 2372
    },
    {
      "epoch": 0.35155555555555557,
      "grad_norm": 1.4077692031860352,
      "learning_rate": 0.00012987398072646406,
      "loss": 1.195,
      "step": 2373
    },
    {
      "epoch": 0.3517037037037037,
      "grad_norm": 1.4908804893493652,
      "learning_rate": 0.00012984432913269088,
      "loss": 1.1056,
      "step": 2374
    },
    {
      "epoch": 0.35185185185185186,
      "grad_norm": 1.3918676376342773,
      "learning_rate": 0.00012981467753891773,
      "loss": 1.088,
      "step": 2375
    },
    {
      "epoch": 0.352,
      "grad_norm": 1.3568589687347412,
      "learning_rate": 0.00012978502594514457,
      "loss": 0.9954,
      "step": 2376
    },
    {
      "epoch": 0.35214814814814815,
      "grad_norm": 2.4987220764160156,
      "learning_rate": 0.0001297553743513714,
      "loss": 1.2841,
      "step": 2377
    },
    {
      "epoch": 0.3522962962962963,
      "grad_norm": 1.3742563724517822,
      "learning_rate": 0.00012972572275759823,
      "loss": 0.9693,
      "step": 2378
    },
    {
      "epoch": 0.35244444444444445,
      "grad_norm": 1.2668038606643677,
      "learning_rate": 0.00012969607116382508,
      "loss": 1.1097,
      "step": 2379
    },
    {
      "epoch": 0.35259259259259257,
      "grad_norm": 2.4651150703430176,
      "learning_rate": 0.0001296664195700519,
      "loss": 0.9591,
      "step": 2380
    },
    {
      "epoch": 0.35274074074074074,
      "grad_norm": 2.202927827835083,
      "learning_rate": 0.00012963676797627874,
      "loss": 1.0431,
      "step": 2381
    },
    {
      "epoch": 0.35288888888888886,
      "grad_norm": 3.5075621604919434,
      "learning_rate": 0.00012960711638250556,
      "loss": 1.0707,
      "step": 2382
    },
    {
      "epoch": 0.35303703703703704,
      "grad_norm": 1.142947793006897,
      "learning_rate": 0.0001295774647887324,
      "loss": 0.9011,
      "step": 2383
    },
    {
      "epoch": 0.35318518518518516,
      "grad_norm": 6.0171918869018555,
      "learning_rate": 0.00012954781319495924,
      "loss": 1.3032,
      "step": 2384
    },
    {
      "epoch": 0.35333333333333333,
      "grad_norm": 1.3418155908584595,
      "learning_rate": 0.00012951816160118606,
      "loss": 1.1346,
      "step": 2385
    },
    {
      "epoch": 0.3534814814814815,
      "grad_norm": 4.483288764953613,
      "learning_rate": 0.0001294885100074129,
      "loss": 1.2346,
      "step": 2386
    },
    {
      "epoch": 0.3536296296296296,
      "grad_norm": 2.130213737487793,
      "learning_rate": 0.00012945885841363975,
      "loss": 1.0704,
      "step": 2387
    },
    {
      "epoch": 0.3537777777777778,
      "grad_norm": 1.1577987670898438,
      "learning_rate": 0.00012942920681986657,
      "loss": 1.1326,
      "step": 2388
    },
    {
      "epoch": 0.3539259259259259,
      "grad_norm": 3.3889710903167725,
      "learning_rate": 0.0001293995552260934,
      "loss": 1.2009,
      "step": 2389
    },
    {
      "epoch": 0.3540740740740741,
      "grad_norm": 1.7422261238098145,
      "learning_rate": 0.00012936990363232026,
      "loss": 1.0677,
      "step": 2390
    },
    {
      "epoch": 0.3542222222222222,
      "grad_norm": 2.3017358779907227,
      "learning_rate": 0.00012934025203854707,
      "loss": 1.1908,
      "step": 2391
    },
    {
      "epoch": 0.3543703703703704,
      "grad_norm": 1.8185927867889404,
      "learning_rate": 0.00012931060044477392,
      "loss": 1.0373,
      "step": 2392
    },
    {
      "epoch": 0.3545185185185185,
      "grad_norm": 2.478407144546509,
      "learning_rate": 0.00012928094885100074,
      "loss": 0.9484,
      "step": 2393
    },
    {
      "epoch": 0.3546666666666667,
      "grad_norm": 1.702976942062378,
      "learning_rate": 0.00012925129725722758,
      "loss": 1.105,
      "step": 2394
    },
    {
      "epoch": 0.3548148148148148,
      "grad_norm": 2.6446189880371094,
      "learning_rate": 0.00012922164566345443,
      "loss": 0.8852,
      "step": 2395
    },
    {
      "epoch": 0.354962962962963,
      "grad_norm": 1.8409467935562134,
      "learning_rate": 0.00012919199406968124,
      "loss": 1.031,
      "step": 2396
    },
    {
      "epoch": 0.3551111111111111,
      "grad_norm": 1.3260278701782227,
      "learning_rate": 0.0001291623424759081,
      "loss": 1.0098,
      "step": 2397
    },
    {
      "epoch": 0.3552592592592593,
      "grad_norm": 1.2395411729812622,
      "learning_rate": 0.0001291326908821349,
      "loss": 1.1051,
      "step": 2398
    },
    {
      "epoch": 0.3554074074074074,
      "grad_norm": 1.3518856763839722,
      "learning_rate": 0.00012910303928836175,
      "loss": 0.8233,
      "step": 2399
    },
    {
      "epoch": 0.35555555555555557,
      "grad_norm": 1.969286322593689,
      "learning_rate": 0.0001290733876945886,
      "loss": 1.1434,
      "step": 2400
    },
    {
      "epoch": 0.3557037037037037,
      "grad_norm": 7.381817817687988,
      "learning_rate": 0.0001290437361008154,
      "loss": 1.2454,
      "step": 2401
    },
    {
      "epoch": 0.35585185185185186,
      "grad_norm": 1.8303916454315186,
      "learning_rate": 0.00012901408450704226,
      "loss": 1.1364,
      "step": 2402
    },
    {
      "epoch": 0.356,
      "grad_norm": 2.781118631362915,
      "learning_rate": 0.0001289844329132691,
      "loss": 1.318,
      "step": 2403
    },
    {
      "epoch": 0.35614814814814816,
      "grad_norm": 2.0820934772491455,
      "learning_rate": 0.00012895478131949592,
      "loss": 1.1334,
      "step": 2404
    },
    {
      "epoch": 0.3562962962962963,
      "grad_norm": 2.527456283569336,
      "learning_rate": 0.00012892512972572276,
      "loss": 1.019,
      "step": 2405
    },
    {
      "epoch": 0.35644444444444445,
      "grad_norm": 1.662646770477295,
      "learning_rate": 0.0001288954781319496,
      "loss": 1.2195,
      "step": 2406
    },
    {
      "epoch": 0.35659259259259257,
      "grad_norm": 1.7455997467041016,
      "learning_rate": 0.00012886582653817642,
      "loss": 1.0235,
      "step": 2407
    },
    {
      "epoch": 0.35674074074074075,
      "grad_norm": 1.5952746868133545,
      "learning_rate": 0.00012883617494440327,
      "loss": 1.3457,
      "step": 2408
    },
    {
      "epoch": 0.35688888888888887,
      "grad_norm": 1.823468565940857,
      "learning_rate": 0.0001288065233506301,
      "loss": 1.2087,
      "step": 2409
    },
    {
      "epoch": 0.35703703703703704,
      "grad_norm": 1.7799134254455566,
      "learning_rate": 0.00012877687175685693,
      "loss": 1.2204,
      "step": 2410
    },
    {
      "epoch": 0.35718518518518516,
      "grad_norm": 3.1586012840270996,
      "learning_rate": 0.00012874722016308377,
      "loss": 1.117,
      "step": 2411
    },
    {
      "epoch": 0.35733333333333334,
      "grad_norm": 1.5659916400909424,
      "learning_rate": 0.00012871756856931062,
      "loss": 1.0821,
      "step": 2412
    },
    {
      "epoch": 0.35748148148148146,
      "grad_norm": 1.5964833498001099,
      "learning_rate": 0.00012868791697553744,
      "loss": 1.2492,
      "step": 2413
    },
    {
      "epoch": 0.35762962962962963,
      "grad_norm": 1.5727459192276,
      "learning_rate": 0.00012865826538176428,
      "loss": 1.0777,
      "step": 2414
    },
    {
      "epoch": 0.35777777777777775,
      "grad_norm": 2.3969857692718506,
      "learning_rate": 0.00012862861378799112,
      "loss": 1.0429,
      "step": 2415
    },
    {
      "epoch": 0.3579259259259259,
      "grad_norm": 1.8711894750595093,
      "learning_rate": 0.00012859896219421794,
      "loss": 1.1727,
      "step": 2416
    },
    {
      "epoch": 0.3580740740740741,
      "grad_norm": 1.7183738946914673,
      "learning_rate": 0.00012856931060044476,
      "loss": 1.1054,
      "step": 2417
    },
    {
      "epoch": 0.3582222222222222,
      "grad_norm": 19.228012084960938,
      "learning_rate": 0.00012853965900667163,
      "loss": 1.0972,
      "step": 2418
    },
    {
      "epoch": 0.3583703703703704,
      "grad_norm": 2.1944210529327393,
      "learning_rate": 0.00012851000741289845,
      "loss": 1.2464,
      "step": 2419
    },
    {
      "epoch": 0.3585185185185185,
      "grad_norm": 6.018752098083496,
      "learning_rate": 0.00012848035581912527,
      "loss": 0.8132,
      "step": 2420
    },
    {
      "epoch": 0.3586666666666667,
      "grad_norm": 1.8604862689971924,
      "learning_rate": 0.00012845070422535214,
      "loss": 1.2575,
      "step": 2421
    },
    {
      "epoch": 0.3588148148148148,
      "grad_norm": 2.9561004638671875,
      "learning_rate": 0.00012842105263157895,
      "loss": 0.9434,
      "step": 2422
    },
    {
      "epoch": 0.358962962962963,
      "grad_norm": 1.7331130504608154,
      "learning_rate": 0.00012839140103780577,
      "loss": 1.1185,
      "step": 2423
    },
    {
      "epoch": 0.3591111111111111,
      "grad_norm": 2.323881149291992,
      "learning_rate": 0.00012836174944403264,
      "loss": 1.067,
      "step": 2424
    },
    {
      "epoch": 0.3592592592592593,
      "grad_norm": 1.638932466506958,
      "learning_rate": 0.00012833209785025946,
      "loss": 1.1443,
      "step": 2425
    },
    {
      "epoch": 0.3594074074074074,
      "grad_norm": 2.301362991333008,
      "learning_rate": 0.00012830244625648628,
      "loss": 0.9504,
      "step": 2426
    },
    {
      "epoch": 0.3595555555555556,
      "grad_norm": 1.333407998085022,
      "learning_rate": 0.00012827279466271312,
      "loss": 0.8333,
      "step": 2427
    },
    {
      "epoch": 0.3597037037037037,
      "grad_norm": 1.5285286903381348,
      "learning_rate": 0.00012824314306893997,
      "loss": 1.1386,
      "step": 2428
    },
    {
      "epoch": 0.35985185185185187,
      "grad_norm": 1.8714796304702759,
      "learning_rate": 0.00012821349147516678,
      "loss": 1.3843,
      "step": 2429
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7737548351287842,
      "learning_rate": 0.00012818383988139363,
      "loss": 0.9567,
      "step": 2430
    },
    {
      "epoch": 0.36014814814814816,
      "grad_norm": 2.0456058979034424,
      "learning_rate": 0.00012815418828762047,
      "loss": 0.8945,
      "step": 2431
    },
    {
      "epoch": 0.3602962962962963,
      "grad_norm": 3.461463212966919,
      "learning_rate": 0.0001281245366938473,
      "loss": 1.0448,
      "step": 2432
    },
    {
      "epoch": 0.36044444444444446,
      "grad_norm": 1.7812738418579102,
      "learning_rate": 0.00012809488510007414,
      "loss": 1.1725,
      "step": 2433
    },
    {
      "epoch": 0.3605925925925926,
      "grad_norm": 1.7331719398498535,
      "learning_rate": 0.00012806523350630098,
      "loss": 1.2265,
      "step": 2434
    },
    {
      "epoch": 0.36074074074074075,
      "grad_norm": 1.94052255153656,
      "learning_rate": 0.0001280355819125278,
      "loss": 1.2161,
      "step": 2435
    },
    {
      "epoch": 0.36088888888888887,
      "grad_norm": 2.128408908843994,
      "learning_rate": 0.00012800593031875464,
      "loss": 1.004,
      "step": 2436
    },
    {
      "epoch": 0.36103703703703705,
      "grad_norm": 1.4985857009887695,
      "learning_rate": 0.00012797627872498149,
      "loss": 0.9299,
      "step": 2437
    },
    {
      "epoch": 0.36118518518518516,
      "grad_norm": 2.550116539001465,
      "learning_rate": 0.0001279466271312083,
      "loss": 0.989,
      "step": 2438
    },
    {
      "epoch": 0.36133333333333334,
      "grad_norm": 1.5930713415145874,
      "learning_rate": 0.00012791697553743515,
      "loss": 1.3768,
      "step": 2439
    },
    {
      "epoch": 0.36148148148148146,
      "grad_norm": 1.9154518842697144,
      "learning_rate": 0.000127887323943662,
      "loss": 1.0797,
      "step": 2440
    },
    {
      "epoch": 0.36162962962962963,
      "grad_norm": 1.6080303192138672,
      "learning_rate": 0.0001278576723498888,
      "loss": 0.8371,
      "step": 2441
    },
    {
      "epoch": 0.36177777777777775,
      "grad_norm": 1.7143136262893677,
      "learning_rate": 0.00012782802075611565,
      "loss": 0.997,
      "step": 2442
    },
    {
      "epoch": 0.36192592592592593,
      "grad_norm": 2.2452192306518555,
      "learning_rate": 0.0001277983691623425,
      "loss": 1.0163,
      "step": 2443
    },
    {
      "epoch": 0.36207407407407405,
      "grad_norm": 1.7062718868255615,
      "learning_rate": 0.00012776871756856932,
      "loss": 1.0657,
      "step": 2444
    },
    {
      "epoch": 0.3622222222222222,
      "grad_norm": 1.5455044507980347,
      "learning_rate": 0.00012773906597479613,
      "loss": 0.8581,
      "step": 2445
    },
    {
      "epoch": 0.36237037037037034,
      "grad_norm": 1.2241191864013672,
      "learning_rate": 0.000127709414381023,
      "loss": 1.095,
      "step": 2446
    },
    {
      "epoch": 0.3625185185185185,
      "grad_norm": 1.5044482946395874,
      "learning_rate": 0.00012767976278724982,
      "loss": 1.0647,
      "step": 2447
    },
    {
      "epoch": 0.3626666666666667,
      "grad_norm": 2.371448278427124,
      "learning_rate": 0.00012765011119347664,
      "loss": 1.2282,
      "step": 2448
    },
    {
      "epoch": 0.3628148148148148,
      "grad_norm": 1.5230810642242432,
      "learning_rate": 0.00012762045959970348,
      "loss": 0.9934,
      "step": 2449
    },
    {
      "epoch": 0.362962962962963,
      "grad_norm": 1.5243303775787354,
      "learning_rate": 0.00012759080800593033,
      "loss": 0.9776,
      "step": 2450
    },
    {
      "epoch": 0.3631111111111111,
      "grad_norm": 2.3378851413726807,
      "learning_rate": 0.00012756115641215715,
      "loss": 1.2032,
      "step": 2451
    },
    {
      "epoch": 0.3632592592592593,
      "grad_norm": 1.9390791654586792,
      "learning_rate": 0.000127531504818384,
      "loss": 1.1387,
      "step": 2452
    },
    {
      "epoch": 0.3634074074074074,
      "grad_norm": 1.212646722793579,
      "learning_rate": 0.00012750185322461083,
      "loss": 1.2237,
      "step": 2453
    },
    {
      "epoch": 0.3635555555555556,
      "grad_norm": 2.331469774246216,
      "learning_rate": 0.00012747220163083765,
      "loss": 1.0961,
      "step": 2454
    },
    {
      "epoch": 0.3637037037037037,
      "grad_norm": 3.145979642868042,
      "learning_rate": 0.0001274425500370645,
      "loss": 1.1028,
      "step": 2455
    },
    {
      "epoch": 0.36385185185185187,
      "grad_norm": 3.360153913497925,
      "learning_rate": 0.00012741289844329134,
      "loss": 0.9997,
      "step": 2456
    },
    {
      "epoch": 0.364,
      "grad_norm": 1.4973174333572388,
      "learning_rate": 0.00012738324684951816,
      "loss": 1.0894,
      "step": 2457
    },
    {
      "epoch": 0.36414814814814817,
      "grad_norm": 1.7876518964767456,
      "learning_rate": 0.000127353595255745,
      "loss": 1.0056,
      "step": 2458
    },
    {
      "epoch": 0.3642962962962963,
      "grad_norm": 1.4665687084197998,
      "learning_rate": 0.00012732394366197185,
      "loss": 1.1241,
      "step": 2459
    },
    {
      "epoch": 0.36444444444444446,
      "grad_norm": 1.607784390449524,
      "learning_rate": 0.00012729429206819866,
      "loss": 1.1712,
      "step": 2460
    },
    {
      "epoch": 0.3645925925925926,
      "grad_norm": 5.57588005065918,
      "learning_rate": 0.0001272646404744255,
      "loss": 1.3167,
      "step": 2461
    },
    {
      "epoch": 0.36474074074074075,
      "grad_norm": 2.404571294784546,
      "learning_rate": 0.00012723498888065235,
      "loss": 1.1764,
      "step": 2462
    },
    {
      "epoch": 0.3648888888888889,
      "grad_norm": 1.4461548328399658,
      "learning_rate": 0.00012720533728687917,
      "loss": 1.2814,
      "step": 2463
    },
    {
      "epoch": 0.36503703703703705,
      "grad_norm": 1.2893413305282593,
      "learning_rate": 0.00012717568569310602,
      "loss": 1.0241,
      "step": 2464
    },
    {
      "epoch": 0.36518518518518517,
      "grad_norm": 1.6379495859146118,
      "learning_rate": 0.00012714603409933286,
      "loss": 1.0907,
      "step": 2465
    },
    {
      "epoch": 0.36533333333333334,
      "grad_norm": 2.1730432510375977,
      "learning_rate": 0.00012711638250555968,
      "loss": 1.2725,
      "step": 2466
    },
    {
      "epoch": 0.36548148148148146,
      "grad_norm": 1.6737717390060425,
      "learning_rate": 0.00012708673091178652,
      "loss": 0.9893,
      "step": 2467
    },
    {
      "epoch": 0.36562962962962964,
      "grad_norm": 1.4764902591705322,
      "learning_rate": 0.00012705707931801334,
      "loss": 1.315,
      "step": 2468
    },
    {
      "epoch": 0.36577777777777776,
      "grad_norm": 1.63857901096344,
      "learning_rate": 0.00012702742772424018,
      "loss": 1.1869,
      "step": 2469
    },
    {
      "epoch": 0.36592592592592593,
      "grad_norm": 2.2073915004730225,
      "learning_rate": 0.00012699777613046703,
      "loss": 1.2164,
      "step": 2470
    },
    {
      "epoch": 0.36607407407407405,
      "grad_norm": 1.5736753940582275,
      "learning_rate": 0.00012696812453669385,
      "loss": 0.9623,
      "step": 2471
    },
    {
      "epoch": 0.3662222222222222,
      "grad_norm": 4.185050964355469,
      "learning_rate": 0.0001269384729429207,
      "loss": 1.2394,
      "step": 2472
    },
    {
      "epoch": 0.36637037037037035,
      "grad_norm": 2.4215621948242188,
      "learning_rate": 0.00012690882134914753,
      "loss": 1.1873,
      "step": 2473
    },
    {
      "epoch": 0.3665185185185185,
      "grad_norm": 1.6673725843429565,
      "learning_rate": 0.00012687916975537435,
      "loss": 1.1829,
      "step": 2474
    },
    {
      "epoch": 0.36666666666666664,
      "grad_norm": 2.9597597122192383,
      "learning_rate": 0.0001268495181616012,
      "loss": 1.2275,
      "step": 2475
    },
    {
      "epoch": 0.3668148148148148,
      "grad_norm": 2.4930925369262695,
      "learning_rate": 0.00012681986656782804,
      "loss": 1.1223,
      "step": 2476
    },
    {
      "epoch": 0.36696296296296294,
      "grad_norm": 3.147881031036377,
      "learning_rate": 0.00012679021497405486,
      "loss": 1.0393,
      "step": 2477
    },
    {
      "epoch": 0.3671111111111111,
      "grad_norm": 3.007131576538086,
      "learning_rate": 0.0001267605633802817,
      "loss": 1.0715,
      "step": 2478
    },
    {
      "epoch": 0.3672592592592593,
      "grad_norm": 8.853863716125488,
      "learning_rate": 0.00012673091178650852,
      "loss": 1.0706,
      "step": 2479
    },
    {
      "epoch": 0.3674074074074074,
      "grad_norm": 2.0026261806488037,
      "learning_rate": 0.00012670126019273536,
      "loss": 1.1067,
      "step": 2480
    },
    {
      "epoch": 0.3675555555555556,
      "grad_norm": 1.833274483680725,
      "learning_rate": 0.0001266716085989622,
      "loss": 1.3698,
      "step": 2481
    },
    {
      "epoch": 0.3677037037037037,
      "grad_norm": 2.059183120727539,
      "learning_rate": 0.00012664195700518903,
      "loss": 1.0032,
      "step": 2482
    },
    {
      "epoch": 0.3678518518518519,
      "grad_norm": 1.295753002166748,
      "learning_rate": 0.00012661230541141587,
      "loss": 1.1414,
      "step": 2483
    },
    {
      "epoch": 0.368,
      "grad_norm": 1.6397961378097534,
      "learning_rate": 0.0001265826538176427,
      "loss": 1.1128,
      "step": 2484
    },
    {
      "epoch": 0.36814814814814817,
      "grad_norm": 1.9030511379241943,
      "learning_rate": 0.00012655300222386953,
      "loss": 1.1714,
      "step": 2485
    },
    {
      "epoch": 0.3682962962962963,
      "grad_norm": 1.5962088108062744,
      "learning_rate": 0.00012652335063009638,
      "loss": 1.1224,
      "step": 2486
    },
    {
      "epoch": 0.36844444444444446,
      "grad_norm": 1.4006625413894653,
      "learning_rate": 0.0001264936990363232,
      "loss": 1.0887,
      "step": 2487
    },
    {
      "epoch": 0.3685925925925926,
      "grad_norm": 4.466986656188965,
      "learning_rate": 0.00012646404744255004,
      "loss": 0.9706,
      "step": 2488
    },
    {
      "epoch": 0.36874074074074076,
      "grad_norm": 1.7184315919876099,
      "learning_rate": 0.00012643439584877688,
      "loss": 1.0607,
      "step": 2489
    },
    {
      "epoch": 0.3688888888888889,
      "grad_norm": 2.338414192199707,
      "learning_rate": 0.0001264047442550037,
      "loss": 1.2112,
      "step": 2490
    },
    {
      "epoch": 0.36903703703703705,
      "grad_norm": 1.548397183418274,
      "learning_rate": 0.00012637509266123054,
      "loss": 1.0891,
      "step": 2491
    },
    {
      "epoch": 0.36918518518518517,
      "grad_norm": 1.8215255737304688,
      "learning_rate": 0.0001263454410674574,
      "loss": 0.959,
      "step": 2492
    },
    {
      "epoch": 0.36933333333333335,
      "grad_norm": 1.2169244289398193,
      "learning_rate": 0.0001263157894736842,
      "loss": 1.093,
      "step": 2493
    },
    {
      "epoch": 0.36948148148148147,
      "grad_norm": 3.0639641284942627,
      "learning_rate": 0.00012628613787991105,
      "loss": 1.3668,
      "step": 2494
    },
    {
      "epoch": 0.36962962962962964,
      "grad_norm": 1.5741491317749023,
      "learning_rate": 0.0001262564862861379,
      "loss": 0.9399,
      "step": 2495
    },
    {
      "epoch": 0.36977777777777776,
      "grad_norm": 1.4462112188339233,
      "learning_rate": 0.0001262268346923647,
      "loss": 1.139,
      "step": 2496
    },
    {
      "epoch": 0.36992592592592594,
      "grad_norm": 1.1880522966384888,
      "learning_rate": 0.00012619718309859156,
      "loss": 1.1902,
      "step": 2497
    },
    {
      "epoch": 0.37007407407407406,
      "grad_norm": 1.3176915645599365,
      "learning_rate": 0.0001261675315048184,
      "loss": 0.9821,
      "step": 2498
    },
    {
      "epoch": 0.37022222222222223,
      "grad_norm": 1.6645140647888184,
      "learning_rate": 0.00012613787991104522,
      "loss": 1.0211,
      "step": 2499
    },
    {
      "epoch": 0.37037037037037035,
      "grad_norm": 1.8516615629196167,
      "learning_rate": 0.00012610822831727206,
      "loss": 1.101,
      "step": 2500
    },
    {
      "epoch": 0.3705185185185185,
      "grad_norm": 1.4071452617645264,
      "learning_rate": 0.0001260785767234989,
      "loss": 0.9341,
      "step": 2501
    },
    {
      "epoch": 0.37066666666666664,
      "grad_norm": 2.9631004333496094,
      "learning_rate": 0.00012604892512972573,
      "loss": 1.0563,
      "step": 2502
    },
    {
      "epoch": 0.3708148148148148,
      "grad_norm": 4.322868824005127,
      "learning_rate": 0.00012601927353595254,
      "loss": 1.0793,
      "step": 2503
    },
    {
      "epoch": 0.37096296296296294,
      "grad_norm": 3.899007797241211,
      "learning_rate": 0.00012598962194217941,
      "loss": 1.2543,
      "step": 2504
    },
    {
      "epoch": 0.3711111111111111,
      "grad_norm": 1.6123673915863037,
      "learning_rate": 0.00012595997034840623,
      "loss": 1.1704,
      "step": 2505
    },
    {
      "epoch": 0.37125925925925923,
      "grad_norm": 2.163992404937744,
      "learning_rate": 0.00012593031875463305,
      "loss": 1.0913,
      "step": 2506
    },
    {
      "epoch": 0.3714074074074074,
      "grad_norm": 1.86015784740448,
      "learning_rate": 0.00012590066716085992,
      "loss": 0.9095,
      "step": 2507
    },
    {
      "epoch": 0.37155555555555553,
      "grad_norm": 1.3718888759613037,
      "learning_rate": 0.00012587101556708674,
      "loss": 1.1879,
      "step": 2508
    },
    {
      "epoch": 0.3717037037037037,
      "grad_norm": 5.352588653564453,
      "learning_rate": 0.00012584136397331356,
      "loss": 0.9394,
      "step": 2509
    },
    {
      "epoch": 0.3718518518518519,
      "grad_norm": 1.686468482017517,
      "learning_rate": 0.00012581171237954043,
      "loss": 1.18,
      "step": 2510
    },
    {
      "epoch": 0.372,
      "grad_norm": 2.798321485519409,
      "learning_rate": 0.00012578206078576724,
      "loss": 0.9624,
      "step": 2511
    },
    {
      "epoch": 0.3721481481481482,
      "grad_norm": 2.6296160221099854,
      "learning_rate": 0.00012575240919199406,
      "loss": 1.1563,
      "step": 2512
    },
    {
      "epoch": 0.3722962962962963,
      "grad_norm": 4.136303901672363,
      "learning_rate": 0.0001257227575982209,
      "loss": 1.12,
      "step": 2513
    },
    {
      "epoch": 0.37244444444444447,
      "grad_norm": 2.5504956245422363,
      "learning_rate": 0.00012569310600444775,
      "loss": 0.9938,
      "step": 2514
    },
    {
      "epoch": 0.3725925925925926,
      "grad_norm": 3.7541561126708984,
      "learning_rate": 0.00012566345441067457,
      "loss": 1.0822,
      "step": 2515
    },
    {
      "epoch": 0.37274074074074076,
      "grad_norm": 2.6331770420074463,
      "learning_rate": 0.0001256338028169014,
      "loss": 1.1897,
      "step": 2516
    },
    {
      "epoch": 0.3728888888888889,
      "grad_norm": 4.061927318572998,
      "learning_rate": 0.00012560415122312826,
      "loss": 1.2737,
      "step": 2517
    },
    {
      "epoch": 0.37303703703703706,
      "grad_norm": 3.8827645778656006,
      "learning_rate": 0.00012557449962935507,
      "loss": 0.9835,
      "step": 2518
    },
    {
      "epoch": 0.3731851851851852,
      "grad_norm": 1.8469038009643555,
      "learning_rate": 0.00012554484803558192,
      "loss": 1.3418,
      "step": 2519
    },
    {
      "epoch": 0.37333333333333335,
      "grad_norm": 3.677811861038208,
      "learning_rate": 0.00012551519644180876,
      "loss": 1.123,
      "step": 2520
    },
    {
      "epoch": 0.37348148148148147,
      "grad_norm": 1.4161865711212158,
      "learning_rate": 0.00012548554484803558,
      "loss": 0.96,
      "step": 2521
    },
    {
      "epoch": 0.37362962962962964,
      "grad_norm": 16.41788101196289,
      "learning_rate": 0.00012545589325426242,
      "loss": 0.7189,
      "step": 2522
    },
    {
      "epoch": 0.37377777777777776,
      "grad_norm": 3.403994560241699,
      "learning_rate": 0.00012542624166048927,
      "loss": 1.0135,
      "step": 2523
    },
    {
      "epoch": 0.37392592592592594,
      "grad_norm": 3.407245397567749,
      "learning_rate": 0.0001253965900667161,
      "loss": 1.1582,
      "step": 2524
    },
    {
      "epoch": 0.37407407407407406,
      "grad_norm": 9.095651626586914,
      "learning_rate": 0.00012536693847294293,
      "loss": 1.4819,
      "step": 2525
    },
    {
      "epoch": 0.37422222222222223,
      "grad_norm": 3.44998836517334,
      "learning_rate": 0.00012533728687916978,
      "loss": 1.0873,
      "step": 2526
    },
    {
      "epoch": 0.37437037037037035,
      "grad_norm": 1.7175861597061157,
      "learning_rate": 0.0001253076352853966,
      "loss": 1.0546,
      "step": 2527
    },
    {
      "epoch": 0.37451851851851853,
      "grad_norm": 4.137767314910889,
      "learning_rate": 0.00012527798369162344,
      "loss": 1.0061,
      "step": 2528
    },
    {
      "epoch": 0.37466666666666665,
      "grad_norm": 3.3150739669799805,
      "learning_rate": 0.00012524833209785028,
      "loss": 1.1405,
      "step": 2529
    },
    {
      "epoch": 0.3748148148148148,
      "grad_norm": 6.849560737609863,
      "learning_rate": 0.0001252186805040771,
      "loss": 1.2259,
      "step": 2530
    },
    {
      "epoch": 0.37496296296296294,
      "grad_norm": 3.9576241970062256,
      "learning_rate": 0.00012518902891030392,
      "loss": 0.9823,
      "step": 2531
    },
    {
      "epoch": 0.3751111111111111,
      "grad_norm": 4.216405391693115,
      "learning_rate": 0.0001251593773165308,
      "loss": 0.9662,
      "step": 2532
    },
    {
      "epoch": 0.37525925925925924,
      "grad_norm": 5.597860813140869,
      "learning_rate": 0.0001251297257227576,
      "loss": 0.9107,
      "step": 2533
    },
    {
      "epoch": 0.3754074074074074,
      "grad_norm": 12.664651870727539,
      "learning_rate": 0.00012510007412898442,
      "loss": 1.3503,
      "step": 2534
    },
    {
      "epoch": 0.37555555555555553,
      "grad_norm": 2.6865103244781494,
      "learning_rate": 0.0001250704225352113,
      "loss": 0.8692,
      "step": 2535
    },
    {
      "epoch": 0.3757037037037037,
      "grad_norm": 1.703162670135498,
      "learning_rate": 0.0001250407709414381,
      "loss": 1.1979,
      "step": 2536
    },
    {
      "epoch": 0.3758518518518518,
      "grad_norm": 8.578655242919922,
      "learning_rate": 0.00012501111934766493,
      "loss": 1.1494,
      "step": 2537
    },
    {
      "epoch": 0.376,
      "grad_norm": 4.007734775543213,
      "learning_rate": 0.00012498146775389177,
      "loss": 1.1308,
      "step": 2538
    },
    {
      "epoch": 0.3761481481481482,
      "grad_norm": 1.7926369905471802,
      "learning_rate": 0.00012495181616011862,
      "loss": 1.0621,
      "step": 2539
    },
    {
      "epoch": 0.3762962962962963,
      "grad_norm": 3.823511838912964,
      "learning_rate": 0.00012492216456634544,
      "loss": 1.0382,
      "step": 2540
    },
    {
      "epoch": 0.37644444444444447,
      "grad_norm": 3.8496482372283936,
      "learning_rate": 0.00012489251297257228,
      "loss": 1.075,
      "step": 2541
    },
    {
      "epoch": 0.3765925925925926,
      "grad_norm": 13.723682403564453,
      "learning_rate": 0.00012486286137879912,
      "loss": 1.0547,
      "step": 2542
    },
    {
      "epoch": 0.37674074074074076,
      "grad_norm": 1.737381935119629,
      "learning_rate": 0.00012483320978502594,
      "loss": 1.0844,
      "step": 2543
    },
    {
      "epoch": 0.3768888888888889,
      "grad_norm": 1.9060014486312866,
      "learning_rate": 0.00012480355819125279,
      "loss": 1.0751,
      "step": 2544
    },
    {
      "epoch": 0.37703703703703706,
      "grad_norm": 3.8223044872283936,
      "learning_rate": 0.00012477390659747963,
      "loss": 1.2052,
      "step": 2545
    },
    {
      "epoch": 0.3771851851851852,
      "grad_norm": 4.125335693359375,
      "learning_rate": 0.00012474425500370645,
      "loss": 1.2216,
      "step": 2546
    },
    {
      "epoch": 0.37733333333333335,
      "grad_norm": 1.7740851640701294,
      "learning_rate": 0.0001247146034099333,
      "loss": 1.0954,
      "step": 2547
    },
    {
      "epoch": 0.3774814814814815,
      "grad_norm": 2.5749096870422363,
      "learning_rate": 0.00012468495181616014,
      "loss": 1.0394,
      "step": 2548
    },
    {
      "epoch": 0.37762962962962965,
      "grad_norm": 1.3280686140060425,
      "learning_rate": 0.00012465530022238695,
      "loss": 0.9565,
      "step": 2549
    },
    {
      "epoch": 0.37777777777777777,
      "grad_norm": 2.461038112640381,
      "learning_rate": 0.0001246256486286138,
      "loss": 1.0782,
      "step": 2550
    },
    {
      "epoch": 0.37792592592592594,
      "grad_norm": 1.3010717630386353,
      "learning_rate": 0.00012459599703484064,
      "loss": 1.0623,
      "step": 2551
    },
    {
      "epoch": 0.37807407407407406,
      "grad_norm": 3.0568859577178955,
      "learning_rate": 0.00012456634544106746,
      "loss": 1.2525,
      "step": 2552
    },
    {
      "epoch": 0.37822222222222224,
      "grad_norm": 1.7495825290679932,
      "learning_rate": 0.0001245366938472943,
      "loss": 1.2495,
      "step": 2553
    },
    {
      "epoch": 0.37837037037037036,
      "grad_norm": 5.899725437164307,
      "learning_rate": 0.00012450704225352112,
      "loss": 1.2229,
      "step": 2554
    },
    {
      "epoch": 0.37851851851851853,
      "grad_norm": 1.1927306652069092,
      "learning_rate": 0.00012447739065974797,
      "loss": 1.0398,
      "step": 2555
    },
    {
      "epoch": 0.37866666666666665,
      "grad_norm": 4.006344795227051,
      "learning_rate": 0.0001244477390659748,
      "loss": 1.3708,
      "step": 2556
    },
    {
      "epoch": 0.3788148148148148,
      "grad_norm": 3.9117653369903564,
      "learning_rate": 0.00012441808747220163,
      "loss": 0.8369,
      "step": 2557
    },
    {
      "epoch": 0.37896296296296295,
      "grad_norm": 1.193713665008545,
      "learning_rate": 0.00012438843587842847,
      "loss": 0.9799,
      "step": 2558
    },
    {
      "epoch": 0.3791111111111111,
      "grad_norm": 7.468748569488525,
      "learning_rate": 0.00012435878428465532,
      "loss": 1.0952,
      "step": 2559
    },
    {
      "epoch": 0.37925925925925924,
      "grad_norm": 9.303503036499023,
      "learning_rate": 0.00012432913269088213,
      "loss": 1.1702,
      "step": 2560
    },
    {
      "epoch": 0.3794074074074074,
      "grad_norm": 1.804075837135315,
      "learning_rate": 0.00012429948109710898,
      "loss": 1.3226,
      "step": 2561
    },
    {
      "epoch": 0.37955555555555553,
      "grad_norm": 2.3133153915405273,
      "learning_rate": 0.00012426982950333582,
      "loss": 1.1468,
      "step": 2562
    },
    {
      "epoch": 0.3797037037037037,
      "grad_norm": 3.307896375656128,
      "learning_rate": 0.00012424017790956264,
      "loss": 1.1266,
      "step": 2563
    },
    {
      "epoch": 0.37985185185185183,
      "grad_norm": 1.6659990549087524,
      "learning_rate": 0.00012421052631578949,
      "loss": 1.1707,
      "step": 2564
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.732919454574585,
      "learning_rate": 0.0001241808747220163,
      "loss": 1.138,
      "step": 2565
    },
    {
      "epoch": 0.3801481481481481,
      "grad_norm": 1.9463250637054443,
      "learning_rate": 0.00012415122312824315,
      "loss": 0.9848,
      "step": 2566
    },
    {
      "epoch": 0.3802962962962963,
      "grad_norm": 1.2089314460754395,
      "learning_rate": 0.00012412157153447,
      "loss": 1.015,
      "step": 2567
    },
    {
      "epoch": 0.3804444444444444,
      "grad_norm": 1.5920319557189941,
      "learning_rate": 0.0001240919199406968,
      "loss": 1.191,
      "step": 2568
    },
    {
      "epoch": 0.3805925925925926,
      "grad_norm": 2.5458788871765137,
      "learning_rate": 0.00012406226834692365,
      "loss": 0.8645,
      "step": 2569
    },
    {
      "epoch": 0.38074074074074077,
      "grad_norm": 2.972222089767456,
      "learning_rate": 0.00012403261675315047,
      "loss": 1.1067,
      "step": 2570
    },
    {
      "epoch": 0.3808888888888889,
      "grad_norm": 2.4307265281677246,
      "learning_rate": 0.00012400296515937732,
      "loss": 1.3904,
      "step": 2571
    },
    {
      "epoch": 0.38103703703703706,
      "grad_norm": 1.4049432277679443,
      "learning_rate": 0.00012397331356560416,
      "loss": 1.0049,
      "step": 2572
    },
    {
      "epoch": 0.3811851851851852,
      "grad_norm": 1.3105792999267578,
      "learning_rate": 0.00012394366197183098,
      "loss": 0.9569,
      "step": 2573
    },
    {
      "epoch": 0.38133333333333336,
      "grad_norm": 1.5824027061462402,
      "learning_rate": 0.00012391401037805782,
      "loss": 0.9485,
      "step": 2574
    },
    {
      "epoch": 0.3814814814814815,
      "grad_norm": 1.0780473947525024,
      "learning_rate": 0.00012388435878428467,
      "loss": 1.1683,
      "step": 2575
    },
    {
      "epoch": 0.38162962962962965,
      "grad_norm": 1.4697093963623047,
      "learning_rate": 0.00012385470719051148,
      "loss": 1.0345,
      "step": 2576
    },
    {
      "epoch": 0.38177777777777777,
      "grad_norm": 1.468275785446167,
      "learning_rate": 0.00012382505559673833,
      "loss": 1.191,
      "step": 2577
    },
    {
      "epoch": 0.38192592592592595,
      "grad_norm": 1.504564881324768,
      "learning_rate": 0.00012379540400296517,
      "loss": 1.1971,
      "step": 2578
    },
    {
      "epoch": 0.38207407407407407,
      "grad_norm": 2.012601375579834,
      "learning_rate": 0.000123765752409192,
      "loss": 1.1544,
      "step": 2579
    },
    {
      "epoch": 0.38222222222222224,
      "grad_norm": 1.1641924381256104,
      "learning_rate": 0.00012373610081541883,
      "loss": 0.9978,
      "step": 2580
    },
    {
      "epoch": 0.38237037037037036,
      "grad_norm": 1.745923638343811,
      "learning_rate": 0.00012370644922164568,
      "loss": 1.1242,
      "step": 2581
    },
    {
      "epoch": 0.38251851851851854,
      "grad_norm": 1.4969233274459839,
      "learning_rate": 0.0001236767976278725,
      "loss": 1.1654,
      "step": 2582
    },
    {
      "epoch": 0.38266666666666665,
      "grad_norm": 1.4478421211242676,
      "learning_rate": 0.00012364714603409934,
      "loss": 1.1774,
      "step": 2583
    },
    {
      "epoch": 0.38281481481481483,
      "grad_norm": 2.4453020095825195,
      "learning_rate": 0.00012361749444032619,
      "loss": 1.0666,
      "step": 2584
    },
    {
      "epoch": 0.38296296296296295,
      "grad_norm": 1.3032078742980957,
      "learning_rate": 0.000123587842846553,
      "loss": 1.2511,
      "step": 2585
    },
    {
      "epoch": 0.3831111111111111,
      "grad_norm": 1.3252873420715332,
      "learning_rate": 0.00012355819125277985,
      "loss": 0.9683,
      "step": 2586
    },
    {
      "epoch": 0.38325925925925924,
      "grad_norm": 1.6873570680618286,
      "learning_rate": 0.0001235285396590067,
      "loss": 1.0997,
      "step": 2587
    },
    {
      "epoch": 0.3834074074074074,
      "grad_norm": 1.2579675912857056,
      "learning_rate": 0.0001234988880652335,
      "loss": 1.0251,
      "step": 2588
    },
    {
      "epoch": 0.38355555555555554,
      "grad_norm": 1.1985095739364624,
      "learning_rate": 0.00012346923647146033,
      "loss": 1.2056,
      "step": 2589
    },
    {
      "epoch": 0.3837037037037037,
      "grad_norm": 6.052125453948975,
      "learning_rate": 0.0001234395848776872,
      "loss": 1.1525,
      "step": 2590
    },
    {
      "epoch": 0.38385185185185183,
      "grad_norm": 1.3533743619918823,
      "learning_rate": 0.00012340993328391401,
      "loss": 1.0307,
      "step": 2591
    },
    {
      "epoch": 0.384,
      "grad_norm": 1.623461127281189,
      "learning_rate": 0.00012338028169014083,
      "loss": 1.053,
      "step": 2592
    },
    {
      "epoch": 0.3841481481481481,
      "grad_norm": 2.8680176734924316,
      "learning_rate": 0.0001233506300963677,
      "loss": 1.1363,
      "step": 2593
    },
    {
      "epoch": 0.3842962962962963,
      "grad_norm": 5.083739757537842,
      "learning_rate": 0.00012332097850259452,
      "loss": 1.051,
      "step": 2594
    },
    {
      "epoch": 0.3844444444444444,
      "grad_norm": 1.3027294874191284,
      "learning_rate": 0.00012329132690882134,
      "loss": 1.2212,
      "step": 2595
    },
    {
      "epoch": 0.3845925925925926,
      "grad_norm": 2.079653024673462,
      "learning_rate": 0.0001232616753150482,
      "loss": 1.0086,
      "step": 2596
    },
    {
      "epoch": 0.3847407407407407,
      "grad_norm": 1.485935926437378,
      "learning_rate": 0.00012323202372127503,
      "loss": 1.485,
      "step": 2597
    },
    {
      "epoch": 0.3848888888888889,
      "grad_norm": 1.3015352487564087,
      "learning_rate": 0.00012320237212750184,
      "loss": 0.9506,
      "step": 2598
    },
    {
      "epoch": 0.385037037037037,
      "grad_norm": 1.783921718597412,
      "learning_rate": 0.0001231727205337287,
      "loss": 1.085,
      "step": 2599
    },
    {
      "epoch": 0.3851851851851852,
      "grad_norm": 2.8830976486206055,
      "learning_rate": 0.00012314306893995553,
      "loss": 0.991,
      "step": 2600
    },
    {
      "epoch": 0.38533333333333336,
      "grad_norm": 1.2732336521148682,
      "learning_rate": 0.00012311341734618235,
      "loss": 1.1099,
      "step": 2601
    },
    {
      "epoch": 0.3854814814814815,
      "grad_norm": 2.5392229557037354,
      "learning_rate": 0.0001230837657524092,
      "loss": 1.0938,
      "step": 2602
    },
    {
      "epoch": 0.38562962962962966,
      "grad_norm": 2.4780893325805664,
      "learning_rate": 0.00012305411415863604,
      "loss": 1.1627,
      "step": 2603
    },
    {
      "epoch": 0.3857777777777778,
      "grad_norm": 1.658705711364746,
      "learning_rate": 0.00012302446256486286,
      "loss": 1.1664,
      "step": 2604
    },
    {
      "epoch": 0.38592592592592595,
      "grad_norm": 1.5529338121414185,
      "learning_rate": 0.0001229948109710897,
      "loss": 1.1685,
      "step": 2605
    },
    {
      "epoch": 0.38607407407407407,
      "grad_norm": 3.1977758407592773,
      "learning_rate": 0.00012296515937731655,
      "loss": 1.1231,
      "step": 2606
    },
    {
      "epoch": 0.38622222222222224,
      "grad_norm": 1.6928820610046387,
      "learning_rate": 0.00012293550778354336,
      "loss": 1.1304,
      "step": 2607
    },
    {
      "epoch": 0.38637037037037036,
      "grad_norm": 1.929652452468872,
      "learning_rate": 0.0001229058561897702,
      "loss": 1.241,
      "step": 2608
    },
    {
      "epoch": 0.38651851851851854,
      "grad_norm": 1.2363271713256836,
      "learning_rate": 0.00012287620459599705,
      "loss": 1.1009,
      "step": 2609
    },
    {
      "epoch": 0.38666666666666666,
      "grad_norm": 1.6452393531799316,
      "learning_rate": 0.00012284655300222387,
      "loss": 1.1277,
      "step": 2610
    },
    {
      "epoch": 0.38681481481481483,
      "grad_norm": 1.3308392763137817,
      "learning_rate": 0.00012281690140845071,
      "loss": 1.429,
      "step": 2611
    },
    {
      "epoch": 0.38696296296296295,
      "grad_norm": 1.0953389406204224,
      "learning_rate": 0.00012278724981467756,
      "loss": 1.2,
      "step": 2612
    },
    {
      "epoch": 0.38711111111111113,
      "grad_norm": 2.9433095455169678,
      "learning_rate": 0.00012275759822090438,
      "loss": 1.3746,
      "step": 2613
    },
    {
      "epoch": 0.38725925925925925,
      "grad_norm": 1.1800731420516968,
      "learning_rate": 0.00012272794662713122,
      "loss": 1.323,
      "step": 2614
    },
    {
      "epoch": 0.3874074074074074,
      "grad_norm": 1.9630125761032104,
      "learning_rate": 0.00012269829503335807,
      "loss": 1.0169,
      "step": 2615
    },
    {
      "epoch": 0.38755555555555554,
      "grad_norm": 1.4434458017349243,
      "learning_rate": 0.00012266864343958488,
      "loss": 0.7611,
      "step": 2616
    },
    {
      "epoch": 0.3877037037037037,
      "grad_norm": 1.4789292812347412,
      "learning_rate": 0.0001226389918458117,
      "loss": 0.9573,
      "step": 2617
    },
    {
      "epoch": 0.38785185185185184,
      "grad_norm": 1.034011721611023,
      "learning_rate": 0.00012260934025203857,
      "loss": 1.1082,
      "step": 2618
    },
    {
      "epoch": 0.388,
      "grad_norm": 1.391062617301941,
      "learning_rate": 0.0001225796886582654,
      "loss": 1.0864,
      "step": 2619
    },
    {
      "epoch": 0.38814814814814813,
      "grad_norm": 1.247158169746399,
      "learning_rate": 0.0001225500370644922,
      "loss": 1.2264,
      "step": 2620
    },
    {
      "epoch": 0.3882962962962963,
      "grad_norm": 2.1945993900299072,
      "learning_rate": 0.00012252038547071908,
      "loss": 1.1592,
      "step": 2621
    },
    {
      "epoch": 0.3884444444444444,
      "grad_norm": 1.4600727558135986,
      "learning_rate": 0.0001224907338769459,
      "loss": 1.138,
      "step": 2622
    },
    {
      "epoch": 0.3885925925925926,
      "grad_norm": 1.4038389921188354,
      "learning_rate": 0.0001224610822831727,
      "loss": 1.0787,
      "step": 2623
    },
    {
      "epoch": 0.3887407407407407,
      "grad_norm": 1.3697896003723145,
      "learning_rate": 0.00012243143068939956,
      "loss": 1.2264,
      "step": 2624
    },
    {
      "epoch": 0.3888888888888889,
      "grad_norm": 2.064251184463501,
      "learning_rate": 0.0001224017790956264,
      "loss": 1.093,
      "step": 2625
    },
    {
      "epoch": 0.389037037037037,
      "grad_norm": 2.4867541790008545,
      "learning_rate": 0.00012237212750185322,
      "loss": 0.8753,
      "step": 2626
    },
    {
      "epoch": 0.3891851851851852,
      "grad_norm": 3.2699592113494873,
      "learning_rate": 0.00012234247590808006,
      "loss": 1.2429,
      "step": 2627
    },
    {
      "epoch": 0.3893333333333333,
      "grad_norm": 1.61955726146698,
      "learning_rate": 0.0001223128243143069,
      "loss": 1.0896,
      "step": 2628
    },
    {
      "epoch": 0.3894814814814815,
      "grad_norm": 0.942071259021759,
      "learning_rate": 0.00012228317272053373,
      "loss": 1.1198,
      "step": 2629
    },
    {
      "epoch": 0.3896296296296296,
      "grad_norm": 1.041031002998352,
      "learning_rate": 0.00012225352112676057,
      "loss": 1.1359,
      "step": 2630
    },
    {
      "epoch": 0.3897777777777778,
      "grad_norm": 1.2857797145843506,
      "learning_rate": 0.00012222386953298741,
      "loss": 1.3863,
      "step": 2631
    },
    {
      "epoch": 0.38992592592592595,
      "grad_norm": 1.5760654211044312,
      "learning_rate": 0.00012219421793921423,
      "loss": 1.0017,
      "step": 2632
    },
    {
      "epoch": 0.3900740740740741,
      "grad_norm": 1.2841405868530273,
      "learning_rate": 0.00012216456634544108,
      "loss": 0.9268,
      "step": 2633
    },
    {
      "epoch": 0.39022222222222225,
      "grad_norm": 1.0363614559173584,
      "learning_rate": 0.00012213491475166792,
      "loss": 0.9656,
      "step": 2634
    },
    {
      "epoch": 0.39037037037037037,
      "grad_norm": 2.366209030151367,
      "learning_rate": 0.00012210526315789474,
      "loss": 1.4091,
      "step": 2635
    },
    {
      "epoch": 0.39051851851851854,
      "grad_norm": 1.3012871742248535,
      "learning_rate": 0.00012207561156412158,
      "loss": 0.9263,
      "step": 2636
    },
    {
      "epoch": 0.39066666666666666,
      "grad_norm": 1.8037832975387573,
      "learning_rate": 0.00012204595997034843,
      "loss": 0.8226,
      "step": 2637
    },
    {
      "epoch": 0.39081481481481484,
      "grad_norm": 1.461460828781128,
      "learning_rate": 0.00012201630837657524,
      "loss": 1.2998,
      "step": 2638
    },
    {
      "epoch": 0.39096296296296296,
      "grad_norm": 1.3180598020553589,
      "learning_rate": 0.00012198665678280207,
      "loss": 0.9662,
      "step": 2639
    },
    {
      "epoch": 0.39111111111111113,
      "grad_norm": 1.1122477054595947,
      "learning_rate": 0.0001219570051890289,
      "loss": 0.9911,
      "step": 2640
    },
    {
      "epoch": 0.39125925925925925,
      "grad_norm": 2.2250654697418213,
      "learning_rate": 0.00012192735359525575,
      "loss": 1.1587,
      "step": 2641
    },
    {
      "epoch": 0.3914074074074074,
      "grad_norm": 1.1606261730194092,
      "learning_rate": 0.00012189770200148258,
      "loss": 0.8492,
      "step": 2642
    },
    {
      "epoch": 0.39155555555555555,
      "grad_norm": 1.3771429061889648,
      "learning_rate": 0.00012186805040770941,
      "loss": 0.9648,
      "step": 2643
    },
    {
      "epoch": 0.3917037037037037,
      "grad_norm": 1.557597279548645,
      "learning_rate": 0.00012183839881393626,
      "loss": 1.0868,
      "step": 2644
    },
    {
      "epoch": 0.39185185185185184,
      "grad_norm": 2.8755409717559814,
      "learning_rate": 0.00012180874722016309,
      "loss": 1.2169,
      "step": 2645
    },
    {
      "epoch": 0.392,
      "grad_norm": 2.0587596893310547,
      "learning_rate": 0.00012177909562638992,
      "loss": 1.0257,
      "step": 2646
    },
    {
      "epoch": 0.39214814814814813,
      "grad_norm": 1.3243364095687866,
      "learning_rate": 0.00012174944403261676,
      "loss": 0.9206,
      "step": 2647
    },
    {
      "epoch": 0.3922962962962963,
      "grad_norm": 1.1226966381072998,
      "learning_rate": 0.0001217197924388436,
      "loss": 0.9565,
      "step": 2648
    },
    {
      "epoch": 0.39244444444444443,
      "grad_norm": 1.1772652864456177,
      "learning_rate": 0.00012169014084507042,
      "loss": 0.9498,
      "step": 2649
    },
    {
      "epoch": 0.3925925925925926,
      "grad_norm": 1.1876426935195923,
      "learning_rate": 0.00012166048925129727,
      "loss": 1.2936,
      "step": 2650
    },
    {
      "epoch": 0.3927407407407407,
      "grad_norm": 1.5671477317810059,
      "learning_rate": 0.0001216308376575241,
      "loss": 1.3194,
      "step": 2651
    },
    {
      "epoch": 0.3928888888888889,
      "grad_norm": 1.7110168933868408,
      "learning_rate": 0.00012160118606375093,
      "loss": 1.3425,
      "step": 2652
    },
    {
      "epoch": 0.393037037037037,
      "grad_norm": 1.093193769454956,
      "learning_rate": 0.00012157153446997778,
      "loss": 1.1644,
      "step": 2653
    },
    {
      "epoch": 0.3931851851851852,
      "grad_norm": 0.9900262355804443,
      "learning_rate": 0.0001215418828762046,
      "loss": 1.2502,
      "step": 2654
    },
    {
      "epoch": 0.3933333333333333,
      "grad_norm": 1.0472580194473267,
      "learning_rate": 0.00012151223128243144,
      "loss": 0.9266,
      "step": 2655
    },
    {
      "epoch": 0.3934814814814815,
      "grad_norm": 1.7296568155288696,
      "learning_rate": 0.00012148257968865825,
      "loss": 1.0858,
      "step": 2656
    },
    {
      "epoch": 0.3936296296296296,
      "grad_norm": 1.21535325050354,
      "learning_rate": 0.00012145292809488511,
      "loss": 1.3982,
      "step": 2657
    },
    {
      "epoch": 0.3937777777777778,
      "grad_norm": 1.611433744430542,
      "learning_rate": 0.00012142327650111194,
      "loss": 1.1701,
      "step": 2658
    },
    {
      "epoch": 0.3939259259259259,
      "grad_norm": 1.3290176391601562,
      "learning_rate": 0.00012139362490733876,
      "loss": 0.858,
      "step": 2659
    },
    {
      "epoch": 0.3940740740740741,
      "grad_norm": 1.7268757820129395,
      "learning_rate": 0.00012136397331356562,
      "loss": 1.2488,
      "step": 2660
    },
    {
      "epoch": 0.3942222222222222,
      "grad_norm": 1.0796136856079102,
      "learning_rate": 0.00012133432171979245,
      "loss": 1.1617,
      "step": 2661
    },
    {
      "epoch": 0.39437037037037037,
      "grad_norm": 1.4177995920181274,
      "learning_rate": 0.00012130467012601927,
      "loss": 0.9514,
      "step": 2662
    },
    {
      "epoch": 0.39451851851851855,
      "grad_norm": 3.1777184009552,
      "learning_rate": 0.00012127501853224612,
      "loss": 1.1792,
      "step": 2663
    },
    {
      "epoch": 0.39466666666666667,
      "grad_norm": 1.3864431381225586,
      "learning_rate": 0.00012124536693847294,
      "loss": 1.0457,
      "step": 2664
    },
    {
      "epoch": 0.39481481481481484,
      "grad_norm": 1.309844732284546,
      "learning_rate": 0.00012121571534469977,
      "loss": 1.1057,
      "step": 2665
    },
    {
      "epoch": 0.39496296296296296,
      "grad_norm": 6.418210983276367,
      "learning_rate": 0.00012118606375092663,
      "loss": 1.3275,
      "step": 2666
    },
    {
      "epoch": 0.39511111111111114,
      "grad_norm": 1.2371841669082642,
      "learning_rate": 0.00012115641215715345,
      "loss": 1.2082,
      "step": 2667
    },
    {
      "epoch": 0.39525925925925925,
      "grad_norm": 1.165908932685852,
      "learning_rate": 0.00012112676056338028,
      "loss": 1.2033,
      "step": 2668
    },
    {
      "epoch": 0.39540740740740743,
      "grad_norm": 1.2984356880187988,
      "learning_rate": 0.00012109710896960714,
      "loss": 1.1775,
      "step": 2669
    },
    {
      "epoch": 0.39555555555555555,
      "grad_norm": 1.3221341371536255,
      "learning_rate": 0.00012106745737583395,
      "loss": 0.8594,
      "step": 2670
    },
    {
      "epoch": 0.3957037037037037,
      "grad_norm": 2.138890027999878,
      "learning_rate": 0.00012103780578206079,
      "loss": 1.0214,
      "step": 2671
    },
    {
      "epoch": 0.39585185185185184,
      "grad_norm": 2.4488580226898193,
      "learning_rate": 0.00012100815418828763,
      "loss": 0.95,
      "step": 2672
    },
    {
      "epoch": 0.396,
      "grad_norm": 1.6993088722229004,
      "learning_rate": 0.00012097850259451446,
      "loss": 0.8995,
      "step": 2673
    },
    {
      "epoch": 0.39614814814814814,
      "grad_norm": 1.2788841724395752,
      "learning_rate": 0.00012094885100074129,
      "loss": 1.0569,
      "step": 2674
    },
    {
      "epoch": 0.3962962962962963,
      "grad_norm": 1.178033471107483,
      "learning_rate": 0.00012091919940696812,
      "loss": 1.3261,
      "step": 2675
    },
    {
      "epoch": 0.39644444444444443,
      "grad_norm": 1.4852286577224731,
      "learning_rate": 0.00012088954781319497,
      "loss": 1.0252,
      "step": 2676
    },
    {
      "epoch": 0.3965925925925926,
      "grad_norm": 1.4918464422225952,
      "learning_rate": 0.0001208598962194218,
      "loss": 0.9957,
      "step": 2677
    },
    {
      "epoch": 0.3967407407407407,
      "grad_norm": 1.2921345233917236,
      "learning_rate": 0.00012083024462564863,
      "loss": 1.0395,
      "step": 2678
    },
    {
      "epoch": 0.3968888888888889,
      "grad_norm": 1.8526917695999146,
      "learning_rate": 0.00012080059303187547,
      "loss": 1.1959,
      "step": 2679
    },
    {
      "epoch": 0.397037037037037,
      "grad_norm": 1.2774417400360107,
      "learning_rate": 0.0001207709414381023,
      "loss": 1.2662,
      "step": 2680
    },
    {
      "epoch": 0.3971851851851852,
      "grad_norm": 1.7397370338439941,
      "learning_rate": 0.00012074128984432914,
      "loss": 0.987,
      "step": 2681
    },
    {
      "epoch": 0.3973333333333333,
      "grad_norm": 1.5014103651046753,
      "learning_rate": 0.00012071163825055598,
      "loss": 1.0948,
      "step": 2682
    },
    {
      "epoch": 0.3974814814814815,
      "grad_norm": 1.7596739530563354,
      "learning_rate": 0.00012068198665678281,
      "loss": 1.1562,
      "step": 2683
    },
    {
      "epoch": 0.3976296296296296,
      "grad_norm": 1.8999735116958618,
      "learning_rate": 0.00012065233506300964,
      "loss": 1.1918,
      "step": 2684
    },
    {
      "epoch": 0.3977777777777778,
      "grad_norm": 1.339568018913269,
      "learning_rate": 0.00012062268346923649,
      "loss": 1.2655,
      "step": 2685
    },
    {
      "epoch": 0.3979259259259259,
      "grad_norm": 1.881459355354309,
      "learning_rate": 0.00012059303187546332,
      "loss": 1.2279,
      "step": 2686
    },
    {
      "epoch": 0.3980740740740741,
      "grad_norm": 1.3578423261642456,
      "learning_rate": 0.00012056338028169015,
      "loss": 1.4419,
      "step": 2687
    },
    {
      "epoch": 0.3982222222222222,
      "grad_norm": 1.7409465312957764,
      "learning_rate": 0.00012053372868791699,
      "loss": 1.0041,
      "step": 2688
    },
    {
      "epoch": 0.3983703703703704,
      "grad_norm": 1.2117589712142944,
      "learning_rate": 0.00012050407709414382,
      "loss": 1.0102,
      "step": 2689
    },
    {
      "epoch": 0.3985185185185185,
      "grad_norm": 1.3980178833007812,
      "learning_rate": 0.00012047442550037064,
      "loss": 0.9528,
      "step": 2690
    },
    {
      "epoch": 0.39866666666666667,
      "grad_norm": 1.5781069993972778,
      "learning_rate": 0.00012044477390659747,
      "loss": 1.0334,
      "step": 2691
    },
    {
      "epoch": 0.3988148148148148,
      "grad_norm": 1.789389967918396,
      "learning_rate": 0.00012041512231282433,
      "loss": 1.1931,
      "step": 2692
    },
    {
      "epoch": 0.39896296296296296,
      "grad_norm": 2.0397837162017822,
      "learning_rate": 0.00012038547071905115,
      "loss": 1.0539,
      "step": 2693
    },
    {
      "epoch": 0.39911111111111114,
      "grad_norm": 1.8052537441253662,
      "learning_rate": 0.00012035581912527798,
      "loss": 0.9529,
      "step": 2694
    },
    {
      "epoch": 0.39925925925925926,
      "grad_norm": 1.8215285539627075,
      "learning_rate": 0.00012032616753150484,
      "loss": 1.1164,
      "step": 2695
    },
    {
      "epoch": 0.39940740740740743,
      "grad_norm": 1.65324866771698,
      "learning_rate": 0.00012029651593773165,
      "loss": 1.1857,
      "step": 2696
    },
    {
      "epoch": 0.39955555555555555,
      "grad_norm": 1.7311931848526,
      "learning_rate": 0.00012026686434395848,
      "loss": 1.1854,
      "step": 2697
    },
    {
      "epoch": 0.3997037037037037,
      "grad_norm": 1.9131438732147217,
      "learning_rate": 0.00012023721275018533,
      "loss": 1.2563,
      "step": 2698
    },
    {
      "epoch": 0.39985185185185185,
      "grad_norm": 1.6355929374694824,
      "learning_rate": 0.00012020756115641216,
      "loss": 1.0209,
      "step": 2699
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.510325312614441,
      "learning_rate": 0.00012017790956263899,
      "loss": 1.0529,
      "step": 2700
    },
    {
      "epoch": 0.40014814814814814,
      "grad_norm": 1.334281325340271,
      "learning_rate": 0.00012014825796886583,
      "loss": 0.9216,
      "step": 2701
    },
    {
      "epoch": 0.4002962962962963,
      "grad_norm": 1.1154178380966187,
      "learning_rate": 0.00012011860637509267,
      "loss": 0.9629,
      "step": 2702
    },
    {
      "epoch": 0.40044444444444444,
      "grad_norm": 1.282820224761963,
      "learning_rate": 0.0001200889547813195,
      "loss": 0.9645,
      "step": 2703
    },
    {
      "epoch": 0.4005925925925926,
      "grad_norm": 1.3689979314804077,
      "learning_rate": 0.00012005930318754634,
      "loss": 1.1408,
      "step": 2704
    },
    {
      "epoch": 0.40074074074074073,
      "grad_norm": 2.0123424530029297,
      "learning_rate": 0.00012002965159377317,
      "loss": 1.087,
      "step": 2705
    },
    {
      "epoch": 0.4008888888888889,
      "grad_norm": 1.6564671993255615,
      "learning_rate": 0.00012,
      "loss": 1.0362,
      "step": 2706
    },
    {
      "epoch": 0.401037037037037,
      "grad_norm": 2.1356120109558105,
      "learning_rate": 0.00011997034840622685,
      "loss": 0.8953,
      "step": 2707
    },
    {
      "epoch": 0.4011851851851852,
      "grad_norm": 2.0640385150909424,
      "learning_rate": 0.00011994069681245368,
      "loss": 1.0647,
      "step": 2708
    },
    {
      "epoch": 0.4013333333333333,
      "grad_norm": 1.7021347284317017,
      "learning_rate": 0.00011991104521868051,
      "loss": 0.974,
      "step": 2709
    },
    {
      "epoch": 0.4014814814814815,
      "grad_norm": 2.582080364227295,
      "learning_rate": 0.00011988139362490734,
      "loss": 1.1002,
      "step": 2710
    },
    {
      "epoch": 0.4016296296296296,
      "grad_norm": 1.5332332849502563,
      "learning_rate": 0.00011985174203113418,
      "loss": 1.0432,
      "step": 2711
    },
    {
      "epoch": 0.4017777777777778,
      "grad_norm": 1.7648104429244995,
      "learning_rate": 0.00011982209043736102,
      "loss": 0.9823,
      "step": 2712
    },
    {
      "epoch": 0.4019259259259259,
      "grad_norm": 1.6554726362228394,
      "learning_rate": 0.00011979243884358785,
      "loss": 1.4986,
      "step": 2713
    },
    {
      "epoch": 0.4020740740740741,
      "grad_norm": 3.5072312355041504,
      "learning_rate": 0.00011976278724981469,
      "loss": 1.0405,
      "step": 2714
    },
    {
      "epoch": 0.4022222222222222,
      "grad_norm": 1.4579415321350098,
      "learning_rate": 0.00011973313565604152,
      "loss": 0.9843,
      "step": 2715
    },
    {
      "epoch": 0.4023703703703704,
      "grad_norm": 2.068174123764038,
      "learning_rate": 0.00011970348406226834,
      "loss": 1.1261,
      "step": 2716
    },
    {
      "epoch": 0.4025185185185185,
      "grad_norm": 2.4436683654785156,
      "learning_rate": 0.0001196738324684952,
      "loss": 0.9804,
      "step": 2717
    },
    {
      "epoch": 0.4026666666666667,
      "grad_norm": 1.9487828016281128,
      "learning_rate": 0.00011964418087472203,
      "loss": 1.1943,
      "step": 2718
    },
    {
      "epoch": 0.4028148148148148,
      "grad_norm": 1.7667052745819092,
      "learning_rate": 0.00011961452928094885,
      "loss": 1.1153,
      "step": 2719
    },
    {
      "epoch": 0.40296296296296297,
      "grad_norm": 1.949844479560852,
      "learning_rate": 0.0001195848776871757,
      "loss": 0.9712,
      "step": 2720
    },
    {
      "epoch": 0.4031111111111111,
      "grad_norm": 1.6448479890823364,
      "learning_rate": 0.00011955522609340253,
      "loss": 1.0122,
      "step": 2721
    },
    {
      "epoch": 0.40325925925925926,
      "grad_norm": 1.8042147159576416,
      "learning_rate": 0.00011952557449962935,
      "loss": 1.1163,
      "step": 2722
    },
    {
      "epoch": 0.4034074074074074,
      "grad_norm": 1.6923189163208008,
      "learning_rate": 0.00011949592290585621,
      "loss": 1.2883,
      "step": 2723
    },
    {
      "epoch": 0.40355555555555556,
      "grad_norm": 2.571420907974243,
      "learning_rate": 0.00011946627131208303,
      "loss": 1.1161,
      "step": 2724
    },
    {
      "epoch": 0.40370370370370373,
      "grad_norm": 2.145495891571045,
      "learning_rate": 0.00011943661971830986,
      "loss": 1.0038,
      "step": 2725
    },
    {
      "epoch": 0.40385185185185185,
      "grad_norm": 1.3031134605407715,
      "learning_rate": 0.00011940696812453669,
      "loss": 1.0228,
      "step": 2726
    },
    {
      "epoch": 0.404,
      "grad_norm": 1.7984037399291992,
      "learning_rate": 0.00011937731653076353,
      "loss": 0.9614,
      "step": 2727
    },
    {
      "epoch": 0.40414814814814815,
      "grad_norm": 1.277489185333252,
      "learning_rate": 0.00011934766493699036,
      "loss": 1.0074,
      "step": 2728
    },
    {
      "epoch": 0.4042962962962963,
      "grad_norm": 2.0690696239471436,
      "learning_rate": 0.0001193180133432172,
      "loss": 1.0676,
      "step": 2729
    },
    {
      "epoch": 0.40444444444444444,
      "grad_norm": 1.6869933605194092,
      "learning_rate": 0.00011928836174944404,
      "loss": 1.1119,
      "step": 2730
    },
    {
      "epoch": 0.4045925925925926,
      "grad_norm": 2.0686795711517334,
      "learning_rate": 0.00011925871015567087,
      "loss": 1.0984,
      "step": 2731
    },
    {
      "epoch": 0.40474074074074073,
      "grad_norm": 2.3337457180023193,
      "learning_rate": 0.0001192290585618977,
      "loss": 1.0275,
      "step": 2732
    },
    {
      "epoch": 0.4048888888888889,
      "grad_norm": 1.5476855039596558,
      "learning_rate": 0.00011919940696812455,
      "loss": 1.0743,
      "step": 2733
    },
    {
      "epoch": 0.40503703703703703,
      "grad_norm": 5.566887378692627,
      "learning_rate": 0.00011916975537435138,
      "loss": 0.9098,
      "step": 2734
    },
    {
      "epoch": 0.4051851851851852,
      "grad_norm": 1.3219506740570068,
      "learning_rate": 0.00011914010378057821,
      "loss": 1.2043,
      "step": 2735
    },
    {
      "epoch": 0.4053333333333333,
      "grad_norm": 1.6084272861480713,
      "learning_rate": 0.00011911045218680505,
      "loss": 1.1581,
      "step": 2736
    },
    {
      "epoch": 0.4054814814814815,
      "grad_norm": 1.5224446058273315,
      "learning_rate": 0.00011908080059303188,
      "loss": 1.2197,
      "step": 2737
    },
    {
      "epoch": 0.4056296296296296,
      "grad_norm": 1.6093271970748901,
      "learning_rate": 0.00011905114899925871,
      "loss": 1.0036,
      "step": 2738
    },
    {
      "epoch": 0.4057777777777778,
      "grad_norm": 3.103193759918213,
      "learning_rate": 0.00011902149740548556,
      "loss": 1.0624,
      "step": 2739
    },
    {
      "epoch": 0.4059259259259259,
      "grad_norm": 2.020883560180664,
      "learning_rate": 0.00011899184581171239,
      "loss": 1.2473,
      "step": 2740
    },
    {
      "epoch": 0.4060740740740741,
      "grad_norm": 1.3161982297897339,
      "learning_rate": 0.00011896219421793922,
      "loss": 0.961,
      "step": 2741
    },
    {
      "epoch": 0.4062222222222222,
      "grad_norm": 1.6283003091812134,
      "learning_rate": 0.00011893254262416606,
      "loss": 0.9689,
      "step": 2742
    },
    {
      "epoch": 0.4063703703703704,
      "grad_norm": 3.9993278980255127,
      "learning_rate": 0.0001189028910303929,
      "loss": 1.1541,
      "step": 2743
    },
    {
      "epoch": 0.4065185185185185,
      "grad_norm": 2.251734495162964,
      "learning_rate": 0.00011887323943661973,
      "loss": 0.8959,
      "step": 2744
    },
    {
      "epoch": 0.4066666666666667,
      "grad_norm": 1.5558217763900757,
      "learning_rate": 0.00011884358784284654,
      "loss": 1.0407,
      "step": 2745
    },
    {
      "epoch": 0.4068148148148148,
      "grad_norm": 3.1227128505706787,
      "learning_rate": 0.0001188139362490734,
      "loss": 1.0737,
      "step": 2746
    },
    {
      "epoch": 0.40696296296296297,
      "grad_norm": 1.7677065134048462,
      "learning_rate": 0.00011878428465530023,
      "loss": 1.2473,
      "step": 2747
    },
    {
      "epoch": 0.4071111111111111,
      "grad_norm": 1.9715033769607544,
      "learning_rate": 0.00011875463306152705,
      "loss": 1.0825,
      "step": 2748
    },
    {
      "epoch": 0.40725925925925927,
      "grad_norm": 2.3912477493286133,
      "learning_rate": 0.00011872498146775391,
      "loss": 1.0649,
      "step": 2749
    },
    {
      "epoch": 0.4074074074074074,
      "grad_norm": 2.0233819484710693,
      "learning_rate": 0.00011869532987398073,
      "loss": 1.1185,
      "step": 2750
    },
    {
      "epoch": 0.40755555555555556,
      "grad_norm": 2.5465264320373535,
      "learning_rate": 0.00011866567828020756,
      "loss": 0.8653,
      "step": 2751
    },
    {
      "epoch": 0.4077037037037037,
      "grad_norm": 1.1739938259124756,
      "learning_rate": 0.00011863602668643441,
      "loss": 0.9556,
      "step": 2752
    },
    {
      "epoch": 0.40785185185185185,
      "grad_norm": 2.1719088554382324,
      "learning_rate": 0.00011860637509266123,
      "loss": 1.2272,
      "step": 2753
    },
    {
      "epoch": 0.408,
      "grad_norm": 1.2471652030944824,
      "learning_rate": 0.00011857672349888806,
      "loss": 1.1197,
      "step": 2754
    },
    {
      "epoch": 0.40814814814814815,
      "grad_norm": 1.5716360807418823,
      "learning_rate": 0.00011854707190511492,
      "loss": 1.281,
      "step": 2755
    },
    {
      "epoch": 0.4082962962962963,
      "grad_norm": 1.5447243452072144,
      "learning_rate": 0.00011851742031134174,
      "loss": 1.0123,
      "step": 2756
    },
    {
      "epoch": 0.40844444444444444,
      "grad_norm": 2.2320587635040283,
      "learning_rate": 0.00011848776871756857,
      "loss": 1.1074,
      "step": 2757
    },
    {
      "epoch": 0.4085925925925926,
      "grad_norm": 1.7709585428237915,
      "learning_rate": 0.00011845811712379541,
      "loss": 0.8078,
      "step": 2758
    },
    {
      "epoch": 0.40874074074074074,
      "grad_norm": 1.587296485900879,
      "learning_rate": 0.00011842846553002224,
      "loss": 1.084,
      "step": 2759
    },
    {
      "epoch": 0.4088888888888889,
      "grad_norm": 1.9970415830612183,
      "learning_rate": 0.00011839881393624908,
      "loss": 1.1203,
      "step": 2760
    },
    {
      "epoch": 0.40903703703703703,
      "grad_norm": 2.760261297225952,
      "learning_rate": 0.0001183691623424759,
      "loss": 1.297,
      "step": 2761
    },
    {
      "epoch": 0.4091851851851852,
      "grad_norm": 1.9074251651763916,
      "learning_rate": 0.00011833951074870275,
      "loss": 1.266,
      "step": 2762
    },
    {
      "epoch": 0.4093333333333333,
      "grad_norm": 1.3241833448410034,
      "learning_rate": 0.00011830985915492958,
      "loss": 1.0471,
      "step": 2763
    },
    {
      "epoch": 0.4094814814814815,
      "grad_norm": 2.138880968093872,
      "learning_rate": 0.00011828020756115641,
      "loss": 1.2054,
      "step": 2764
    },
    {
      "epoch": 0.4096296296296296,
      "grad_norm": 2.0332658290863037,
      "learning_rate": 0.00011825055596738326,
      "loss": 1.2343,
      "step": 2765
    },
    {
      "epoch": 0.4097777777777778,
      "grad_norm": 1.1907109022140503,
      "learning_rate": 0.00011822090437361009,
      "loss": 1.0939,
      "step": 2766
    },
    {
      "epoch": 0.4099259259259259,
      "grad_norm": 1.491758108139038,
      "learning_rate": 0.00011819125277983692,
      "loss": 1.2937,
      "step": 2767
    },
    {
      "epoch": 0.4100740740740741,
      "grad_norm": 1.6654256582260132,
      "learning_rate": 0.00011816160118606376,
      "loss": 0.9628,
      "step": 2768
    },
    {
      "epoch": 0.4102222222222222,
      "grad_norm": 1.8008649349212646,
      "learning_rate": 0.0001181319495922906,
      "loss": 0.9737,
      "step": 2769
    },
    {
      "epoch": 0.4103703703703704,
      "grad_norm": 1.270262360572815,
      "learning_rate": 0.00011810229799851743,
      "loss": 0.9349,
      "step": 2770
    },
    {
      "epoch": 0.4105185185185185,
      "grad_norm": 1.9640406370162964,
      "learning_rate": 0.00011807264640474427,
      "loss": 1.3547,
      "step": 2771
    },
    {
      "epoch": 0.4106666666666667,
      "grad_norm": 2.8695318698883057,
      "learning_rate": 0.0001180429948109711,
      "loss": 1.1886,
      "step": 2772
    },
    {
      "epoch": 0.4108148148148148,
      "grad_norm": 1.2530816793441772,
      "learning_rate": 0.00011801334321719793,
      "loss": 1.0095,
      "step": 2773
    },
    {
      "epoch": 0.410962962962963,
      "grad_norm": 1.5395197868347168,
      "learning_rate": 0.00011798369162342478,
      "loss": 1.1235,
      "step": 2774
    },
    {
      "epoch": 0.4111111111111111,
      "grad_norm": 1.469625473022461,
      "learning_rate": 0.0001179540400296516,
      "loss": 1.0371,
      "step": 2775
    },
    {
      "epoch": 0.41125925925925927,
      "grad_norm": 1.732647180557251,
      "learning_rate": 0.00011792438843587842,
      "loss": 0.972,
      "step": 2776
    },
    {
      "epoch": 0.4114074074074074,
      "grad_norm": 1.5699431896209717,
      "learning_rate": 0.00011789473684210525,
      "loss": 1.0902,
      "step": 2777
    },
    {
      "epoch": 0.41155555555555556,
      "grad_norm": 2.115208387374878,
      "learning_rate": 0.00011786508524833211,
      "loss": 1.1188,
      "step": 2778
    },
    {
      "epoch": 0.4117037037037037,
      "grad_norm": 1.4656784534454346,
      "learning_rate": 0.00011783543365455893,
      "loss": 0.9521,
      "step": 2779
    },
    {
      "epoch": 0.41185185185185186,
      "grad_norm": 1.6068055629730225,
      "learning_rate": 0.00011780578206078576,
      "loss": 1.0153,
      "step": 2780
    },
    {
      "epoch": 0.412,
      "grad_norm": 2.062823534011841,
      "learning_rate": 0.00011777613046701262,
      "loss": 1.0661,
      "step": 2781
    },
    {
      "epoch": 0.41214814814814815,
      "grad_norm": 2.0889668464660645,
      "learning_rate": 0.00011774647887323944,
      "loss": 0.9254,
      "step": 2782
    },
    {
      "epoch": 0.41229629629629627,
      "grad_norm": 1.8792005777359009,
      "learning_rate": 0.00011771682727946627,
      "loss": 1.1005,
      "step": 2783
    },
    {
      "epoch": 0.41244444444444445,
      "grad_norm": 2.2597243785858154,
      "learning_rate": 0.00011768717568569311,
      "loss": 1.0809,
      "step": 2784
    },
    {
      "epoch": 0.41259259259259257,
      "grad_norm": 1.2650634050369263,
      "learning_rate": 0.00011765752409191994,
      "loss": 0.9761,
      "step": 2785
    },
    {
      "epoch": 0.41274074074074074,
      "grad_norm": 1.583432674407959,
      "learning_rate": 0.00011762787249814677,
      "loss": 1.2031,
      "step": 2786
    },
    {
      "epoch": 0.4128888888888889,
      "grad_norm": 1.9459563493728638,
      "learning_rate": 0.00011759822090437362,
      "loss": 1.2632,
      "step": 2787
    },
    {
      "epoch": 0.41303703703703704,
      "grad_norm": 1.8172357082366943,
      "learning_rate": 0.00011756856931060045,
      "loss": 1.199,
      "step": 2788
    },
    {
      "epoch": 0.4131851851851852,
      "grad_norm": 1.8270275592803955,
      "learning_rate": 0.00011753891771682728,
      "loss": 1.0676,
      "step": 2789
    },
    {
      "epoch": 0.41333333333333333,
      "grad_norm": 2.283125877380371,
      "learning_rate": 0.00011750926612305412,
      "loss": 1.2442,
      "step": 2790
    },
    {
      "epoch": 0.4134814814814815,
      "grad_norm": 1.7654433250427246,
      "learning_rate": 0.00011747961452928096,
      "loss": 0.8421,
      "step": 2791
    },
    {
      "epoch": 0.4136296296296296,
      "grad_norm": 1.8013252019882202,
      "learning_rate": 0.00011744996293550779,
      "loss": 0.9433,
      "step": 2792
    },
    {
      "epoch": 0.4137777777777778,
      "grad_norm": 1.6748549938201904,
      "learning_rate": 0.00011742031134173463,
      "loss": 1.2431,
      "step": 2793
    },
    {
      "epoch": 0.4139259259259259,
      "grad_norm": 2.0394046306610107,
      "learning_rate": 0.00011739065974796146,
      "loss": 1.2758,
      "step": 2794
    },
    {
      "epoch": 0.4140740740740741,
      "grad_norm": 1.8491860628128052,
      "learning_rate": 0.00011736100815418829,
      "loss": 1.1828,
      "step": 2795
    },
    {
      "epoch": 0.4142222222222222,
      "grad_norm": 1.5049947500228882,
      "learning_rate": 0.00011733135656041512,
      "loss": 1.0096,
      "step": 2796
    },
    {
      "epoch": 0.4143703703703704,
      "grad_norm": 3.2022616863250732,
      "learning_rate": 0.00011730170496664197,
      "loss": 1.1493,
      "step": 2797
    },
    {
      "epoch": 0.4145185185185185,
      "grad_norm": 3.3280324935913086,
      "learning_rate": 0.0001172720533728688,
      "loss": 1.1176,
      "step": 2798
    },
    {
      "epoch": 0.4146666666666667,
      "grad_norm": 1.351608395576477,
      "learning_rate": 0.00011724240177909563,
      "loss": 0.8128,
      "step": 2799
    },
    {
      "epoch": 0.4148148148148148,
      "grad_norm": 2.201603889465332,
      "learning_rate": 0.00011721275018532247,
      "loss": 1.2038,
      "step": 2800
    },
    {
      "epoch": 0.414962962962963,
      "grad_norm": 1.5568238496780396,
      "learning_rate": 0.0001171830985915493,
      "loss": 0.9006,
      "step": 2801
    },
    {
      "epoch": 0.4151111111111111,
      "grad_norm": 3.2228941917419434,
      "learning_rate": 0.00011715344699777612,
      "loss": 1.3825,
      "step": 2802
    },
    {
      "epoch": 0.4152592592592593,
      "grad_norm": 1.5834277868270874,
      "learning_rate": 0.00011712379540400298,
      "loss": 0.9658,
      "step": 2803
    },
    {
      "epoch": 0.4154074074074074,
      "grad_norm": 1.1434483528137207,
      "learning_rate": 0.00011709414381022981,
      "loss": 1.0708,
      "step": 2804
    },
    {
      "epoch": 0.41555555555555557,
      "grad_norm": 1.8442808389663696,
      "learning_rate": 0.00011706449221645663,
      "loss": 1.1493,
      "step": 2805
    },
    {
      "epoch": 0.4157037037037037,
      "grad_norm": 3.8759419918060303,
      "learning_rate": 0.00011703484062268349,
      "loss": 1.1004,
      "step": 2806
    },
    {
      "epoch": 0.41585185185185186,
      "grad_norm": 1.9965623617172241,
      "learning_rate": 0.00011700518902891032,
      "loss": 1.2326,
      "step": 2807
    },
    {
      "epoch": 0.416,
      "grad_norm": 1.3121302127838135,
      "learning_rate": 0.00011697553743513714,
      "loss": 1.2076,
      "step": 2808
    },
    {
      "epoch": 0.41614814814814816,
      "grad_norm": 1.7846583127975464,
      "learning_rate": 0.00011694588584136399,
      "loss": 0.9679,
      "step": 2809
    },
    {
      "epoch": 0.4162962962962963,
      "grad_norm": 3.997375011444092,
      "learning_rate": 0.00011691623424759081,
      "loss": 1.1087,
      "step": 2810
    },
    {
      "epoch": 0.41644444444444445,
      "grad_norm": 1.307847499847412,
      "learning_rate": 0.00011688658265381764,
      "loss": 1.0535,
      "step": 2811
    },
    {
      "epoch": 0.41659259259259257,
      "grad_norm": 6.995899200439453,
      "learning_rate": 0.00011685693106004447,
      "loss": 0.9963,
      "step": 2812
    },
    {
      "epoch": 0.41674074074074074,
      "grad_norm": 2.9513638019561768,
      "learning_rate": 0.00011682727946627132,
      "loss": 1.1157,
      "step": 2813
    },
    {
      "epoch": 0.41688888888888886,
      "grad_norm": 1.6517165899276733,
      "learning_rate": 0.00011679762787249815,
      "loss": 1.0103,
      "step": 2814
    },
    {
      "epoch": 0.41703703703703704,
      "grad_norm": 1.0372458696365356,
      "learning_rate": 0.00011676797627872498,
      "loss": 0.9458,
      "step": 2815
    },
    {
      "epoch": 0.41718518518518516,
      "grad_norm": 1.507336974143982,
      "learning_rate": 0.00011673832468495182,
      "loss": 0.9639,
      "step": 2816
    },
    {
      "epoch": 0.41733333333333333,
      "grad_norm": 2.7597787380218506,
      "learning_rate": 0.00011670867309117865,
      "loss": 0.8869,
      "step": 2817
    },
    {
      "epoch": 0.4174814814814815,
      "grad_norm": 1.2514851093292236,
      "learning_rate": 0.00011667902149740548,
      "loss": 1.0918,
      "step": 2818
    },
    {
      "epoch": 0.41762962962962963,
      "grad_norm": 2.244161605834961,
      "learning_rate": 0.00011664936990363233,
      "loss": 1.2108,
      "step": 2819
    },
    {
      "epoch": 0.4177777777777778,
      "grad_norm": 1.3502081632614136,
      "learning_rate": 0.00011661971830985916,
      "loss": 1.0707,
      "step": 2820
    },
    {
      "epoch": 0.4179259259259259,
      "grad_norm": 1.668387532234192,
      "learning_rate": 0.00011659006671608599,
      "loss": 1.1988,
      "step": 2821
    },
    {
      "epoch": 0.4180740740740741,
      "grad_norm": 2.31921124458313,
      "learning_rate": 0.00011656041512231284,
      "loss": 0.8422,
      "step": 2822
    },
    {
      "epoch": 0.4182222222222222,
      "grad_norm": 1.1336761713027954,
      "learning_rate": 0.00011653076352853967,
      "loss": 0.7769,
      "step": 2823
    },
    {
      "epoch": 0.4183703703703704,
      "grad_norm": 7.61287784576416,
      "learning_rate": 0.0001165011119347665,
      "loss": 1.152,
      "step": 2824
    },
    {
      "epoch": 0.4185185185185185,
      "grad_norm": 1.4002079963684082,
      "learning_rate": 0.00011647146034099334,
      "loss": 0.9023,
      "step": 2825
    },
    {
      "epoch": 0.4186666666666667,
      "grad_norm": 1.6339399814605713,
      "learning_rate": 0.00011644180874722017,
      "loss": 1.0011,
      "step": 2826
    },
    {
      "epoch": 0.4188148148148148,
      "grad_norm": 1.773979663848877,
      "learning_rate": 0.000116412157153447,
      "loss": 1.092,
      "step": 2827
    },
    {
      "epoch": 0.418962962962963,
      "grad_norm": 1.360486388206482,
      "learning_rate": 0.00011638250555967385,
      "loss": 1.0493,
      "step": 2828
    },
    {
      "epoch": 0.4191111111111111,
      "grad_norm": 1.9607775211334229,
      "learning_rate": 0.00011635285396590068,
      "loss": 1.2693,
      "step": 2829
    },
    {
      "epoch": 0.4192592592592593,
      "grad_norm": 2.056821346282959,
      "learning_rate": 0.00011632320237212751,
      "loss": 0.9182,
      "step": 2830
    },
    {
      "epoch": 0.4194074074074074,
      "grad_norm": 1.7508609294891357,
      "learning_rate": 0.00011629355077835433,
      "loss": 1.14,
      "step": 2831
    },
    {
      "epoch": 0.41955555555555557,
      "grad_norm": 2.5750112533569336,
      "learning_rate": 0.00011626389918458119,
      "loss": 1.0558,
      "step": 2832
    },
    {
      "epoch": 0.4197037037037037,
      "grad_norm": 1.7716282606124878,
      "learning_rate": 0.00011623424759080802,
      "loss": 0.9906,
      "step": 2833
    },
    {
      "epoch": 0.41985185185185186,
      "grad_norm": 1.949119210243225,
      "learning_rate": 0.00011620459599703483,
      "loss": 1.109,
      "step": 2834
    },
    {
      "epoch": 0.42,
      "grad_norm": 6.4630961418151855,
      "learning_rate": 0.00011617494440326169,
      "loss": 0.9866,
      "step": 2835
    },
    {
      "epoch": 0.42014814814814816,
      "grad_norm": 1.7842416763305664,
      "learning_rate": 0.00011614529280948851,
      "loss": 1.2651,
      "step": 2836
    },
    {
      "epoch": 0.4202962962962963,
      "grad_norm": 1.5930815935134888,
      "learning_rate": 0.00011611564121571534,
      "loss": 1.0781,
      "step": 2837
    },
    {
      "epoch": 0.42044444444444445,
      "grad_norm": 2.1503567695617676,
      "learning_rate": 0.0001160859896219422,
      "loss": 1.0805,
      "step": 2838
    },
    {
      "epoch": 0.4205925925925926,
      "grad_norm": 3.180330753326416,
      "learning_rate": 0.00011605633802816902,
      "loss": 1.0535,
      "step": 2839
    },
    {
      "epoch": 0.42074074074074075,
      "grad_norm": 1.7494330406188965,
      "learning_rate": 0.00011602668643439585,
      "loss": 1.2937,
      "step": 2840
    },
    {
      "epoch": 0.42088888888888887,
      "grad_norm": 2.0975334644317627,
      "learning_rate": 0.0001159970348406227,
      "loss": 0.8353,
      "step": 2841
    },
    {
      "epoch": 0.42103703703703704,
      "grad_norm": 1.3983445167541504,
      "learning_rate": 0.00011596738324684952,
      "loss": 0.8618,
      "step": 2842
    },
    {
      "epoch": 0.42118518518518516,
      "grad_norm": 1.968269944190979,
      "learning_rate": 0.00011593773165307635,
      "loss": 0.8831,
      "step": 2843
    },
    {
      "epoch": 0.42133333333333334,
      "grad_norm": 2.646681070327759,
      "learning_rate": 0.0001159080800593032,
      "loss": 1.1054,
      "step": 2844
    },
    {
      "epoch": 0.42148148148148146,
      "grad_norm": 1.488573670387268,
      "learning_rate": 0.00011587842846553003,
      "loss": 1.0823,
      "step": 2845
    },
    {
      "epoch": 0.42162962962962963,
      "grad_norm": 6.95759391784668,
      "learning_rate": 0.00011584877687175686,
      "loss": 1.0718,
      "step": 2846
    },
    {
      "epoch": 0.42177777777777775,
      "grad_norm": 2.1569931507110596,
      "learning_rate": 0.00011581912527798369,
      "loss": 1.2504,
      "step": 2847
    },
    {
      "epoch": 0.4219259259259259,
      "grad_norm": 2.4960248470306396,
      "learning_rate": 0.00011578947368421053,
      "loss": 1.0442,
      "step": 2848
    },
    {
      "epoch": 0.4220740740740741,
      "grad_norm": 3.746467351913452,
      "learning_rate": 0.00011575982209043736,
      "loss": 1.1778,
      "step": 2849
    },
    {
      "epoch": 0.4222222222222222,
      "grad_norm": 2.5490500926971436,
      "learning_rate": 0.0001157301704966642,
      "loss": 1.1062,
      "step": 2850
    },
    {
      "epoch": 0.4223703703703704,
      "grad_norm": 1.6705260276794434,
      "learning_rate": 0.00011570051890289104,
      "loss": 1.3907,
      "step": 2851
    },
    {
      "epoch": 0.4225185185185185,
      "grad_norm": 4.421507835388184,
      "learning_rate": 0.00011567086730911787,
      "loss": 0.8469,
      "step": 2852
    },
    {
      "epoch": 0.4226666666666667,
      "grad_norm": 1.6530746221542358,
      "learning_rate": 0.0001156412157153447,
      "loss": 1.1554,
      "step": 2853
    },
    {
      "epoch": 0.4228148148148148,
      "grad_norm": 2.410132884979248,
      "learning_rate": 0.00011561156412157155,
      "loss": 1.202,
      "step": 2854
    },
    {
      "epoch": 0.422962962962963,
      "grad_norm": 1.847341775894165,
      "learning_rate": 0.00011558191252779838,
      "loss": 1.0665,
      "step": 2855
    },
    {
      "epoch": 0.4231111111111111,
      "grad_norm": 4.5526347160339355,
      "learning_rate": 0.00011555226093402521,
      "loss": 1.1206,
      "step": 2856
    },
    {
      "epoch": 0.4232592592592593,
      "grad_norm": 8.143963813781738,
      "learning_rate": 0.00011552260934025205,
      "loss": 1.2316,
      "step": 2857
    },
    {
      "epoch": 0.4234074074074074,
      "grad_norm": 1.9890685081481934,
      "learning_rate": 0.00011549295774647888,
      "loss": 1.0101,
      "step": 2858
    },
    {
      "epoch": 0.4235555555555556,
      "grad_norm": 3.8913021087646484,
      "learning_rate": 0.00011546330615270571,
      "loss": 0.942,
      "step": 2859
    },
    {
      "epoch": 0.4237037037037037,
      "grad_norm": 2.6386547088623047,
      "learning_rate": 0.00011543365455893256,
      "loss": 1.1263,
      "step": 2860
    },
    {
      "epoch": 0.42385185185185187,
      "grad_norm": 5.978245735168457,
      "learning_rate": 0.00011540400296515939,
      "loss": 1.0431,
      "step": 2861
    },
    {
      "epoch": 0.424,
      "grad_norm": 7.629427433013916,
      "learning_rate": 0.00011537435137138621,
      "loss": 1.1471,
      "step": 2862
    },
    {
      "epoch": 0.42414814814814816,
      "grad_norm": 4.681686878204346,
      "learning_rate": 0.00011534469977761304,
      "loss": 0.9924,
      "step": 2863
    },
    {
      "epoch": 0.4242962962962963,
      "grad_norm": 3.3226373195648193,
      "learning_rate": 0.0001153150481838399,
      "loss": 1.0308,
      "step": 2864
    },
    {
      "epoch": 0.42444444444444446,
      "grad_norm": 2.119053840637207,
      "learning_rate": 0.00011528539659006671,
      "loss": 1.0734,
      "step": 2865
    },
    {
      "epoch": 0.4245925925925926,
      "grad_norm": 1.8275108337402344,
      "learning_rate": 0.00011525574499629354,
      "loss": 0.9565,
      "step": 2866
    },
    {
      "epoch": 0.42474074074074075,
      "grad_norm": 6.552157402038574,
      "learning_rate": 0.0001152260934025204,
      "loss": 1.0816,
      "step": 2867
    },
    {
      "epoch": 0.42488888888888887,
      "grad_norm": 3.380706548690796,
      "learning_rate": 0.00011519644180874722,
      "loss": 1.4825,
      "step": 2868
    },
    {
      "epoch": 0.42503703703703705,
      "grad_norm": 4.735677242279053,
      "learning_rate": 0.00011516679021497405,
      "loss": 1.2556,
      "step": 2869
    },
    {
      "epoch": 0.42518518518518517,
      "grad_norm": 5.0357136726379395,
      "learning_rate": 0.0001151371386212009,
      "loss": 1.1886,
      "step": 2870
    },
    {
      "epoch": 0.42533333333333334,
      "grad_norm": 2.4263415336608887,
      "learning_rate": 0.00011510748702742773,
      "loss": 1.2772,
      "step": 2871
    },
    {
      "epoch": 0.42548148148148146,
      "grad_norm": 4.668264389038086,
      "learning_rate": 0.00011507783543365456,
      "loss": 1.0242,
      "step": 2872
    },
    {
      "epoch": 0.42562962962962964,
      "grad_norm": 2.4905412197113037,
      "learning_rate": 0.0001150481838398814,
      "loss": 1.1628,
      "step": 2873
    },
    {
      "epoch": 0.42577777777777776,
      "grad_norm": 2.1681697368621826,
      "learning_rate": 0.00011501853224610823,
      "loss": 0.8461,
      "step": 2874
    },
    {
      "epoch": 0.42592592592592593,
      "grad_norm": 2.9159774780273438,
      "learning_rate": 0.00011498888065233506,
      "loss": 1.0069,
      "step": 2875
    },
    {
      "epoch": 0.42607407407407405,
      "grad_norm": 3.7189862728118896,
      "learning_rate": 0.00011495922905856191,
      "loss": 1.6027,
      "step": 2876
    },
    {
      "epoch": 0.4262222222222222,
      "grad_norm": 2.600834608078003,
      "learning_rate": 0.00011492957746478874,
      "loss": 0.9963,
      "step": 2877
    },
    {
      "epoch": 0.42637037037037034,
      "grad_norm": 3.699162006378174,
      "learning_rate": 0.00011489992587101557,
      "loss": 0.9006,
      "step": 2878
    },
    {
      "epoch": 0.4265185185185185,
      "grad_norm": 5.747298717498779,
      "learning_rate": 0.00011487027427724241,
      "loss": 1.1509,
      "step": 2879
    },
    {
      "epoch": 0.4266666666666667,
      "grad_norm": 1.7831780910491943,
      "learning_rate": 0.00011484062268346925,
      "loss": 1.1276,
      "step": 2880
    },
    {
      "epoch": 0.4268148148148148,
      "grad_norm": 8.267875671386719,
      "learning_rate": 0.00011481097108969608,
      "loss": 0.9024,
      "step": 2881
    },
    {
      "epoch": 0.426962962962963,
      "grad_norm": 2.4939606189727783,
      "learning_rate": 0.00011478131949592291,
      "loss": 0.9272,
      "step": 2882
    },
    {
      "epoch": 0.4271111111111111,
      "grad_norm": 3.7283899784088135,
      "learning_rate": 0.00011475166790214975,
      "loss": 0.9605,
      "step": 2883
    },
    {
      "epoch": 0.4272592592592593,
      "grad_norm": 2.7416200637817383,
      "learning_rate": 0.00011472201630837658,
      "loss": 0.9724,
      "step": 2884
    },
    {
      "epoch": 0.4274074074074074,
      "grad_norm": 4.044346809387207,
      "learning_rate": 0.00011469236471460341,
      "loss": 0.8317,
      "step": 2885
    },
    {
      "epoch": 0.4275555555555556,
      "grad_norm": 4.139947414398193,
      "learning_rate": 0.00011466271312083026,
      "loss": 1.1099,
      "step": 2886
    },
    {
      "epoch": 0.4277037037037037,
      "grad_norm": 5.855690002441406,
      "learning_rate": 0.00011463306152705709,
      "loss": 1.0223,
      "step": 2887
    },
    {
      "epoch": 0.42785185185185187,
      "grad_norm": 5.331573963165283,
      "learning_rate": 0.0001146034099332839,
      "loss": 1.1985,
      "step": 2888
    },
    {
      "epoch": 0.428,
      "grad_norm": 8.737065315246582,
      "learning_rate": 0.00011457375833951076,
      "loss": 1.4157,
      "step": 2889
    },
    {
      "epoch": 0.42814814814814817,
      "grad_norm": 9.862096786499023,
      "learning_rate": 0.0001145441067457376,
      "loss": 1.2015,
      "step": 2890
    },
    {
      "epoch": 0.4282962962962963,
      "grad_norm": 5.805713176727295,
      "learning_rate": 0.00011451445515196441,
      "loss": 1.2362,
      "step": 2891
    },
    {
      "epoch": 0.42844444444444446,
      "grad_norm": 6.242314338684082,
      "learning_rate": 0.00011448480355819127,
      "loss": 1.2281,
      "step": 2892
    },
    {
      "epoch": 0.4285925925925926,
      "grad_norm": 3.5984320640563965,
      "learning_rate": 0.0001144551519644181,
      "loss": 1.06,
      "step": 2893
    },
    {
      "epoch": 0.42874074074074076,
      "grad_norm": 2.9863204956054688,
      "learning_rate": 0.00011442550037064492,
      "loss": 1.4264,
      "step": 2894
    },
    {
      "epoch": 0.4288888888888889,
      "grad_norm": 7.7998223304748535,
      "learning_rate": 0.00011439584877687178,
      "loss": 1.1028,
      "step": 2895
    },
    {
      "epoch": 0.42903703703703705,
      "grad_norm": 4.614808559417725,
      "learning_rate": 0.0001143661971830986,
      "loss": 1.0646,
      "step": 2896
    },
    {
      "epoch": 0.42918518518518517,
      "grad_norm": 8.673526763916016,
      "learning_rate": 0.00011433654558932542,
      "loss": 1.1162,
      "step": 2897
    },
    {
      "epoch": 0.42933333333333334,
      "grad_norm": 18.236242294311523,
      "learning_rate": 0.00011430689399555226,
      "loss": 1.1246,
      "step": 2898
    },
    {
      "epoch": 0.42948148148148146,
      "grad_norm": 5.567377090454102,
      "learning_rate": 0.0001142772424017791,
      "loss": 1.1503,
      "step": 2899
    },
    {
      "epoch": 0.42962962962962964,
      "grad_norm": Infinity,
      "learning_rate": 0.0001142772424017791,
      "loss": 1.1606,
      "step": 2900
    },
    {
      "epoch": 0.42977777777777776,
      "grad_norm": 24.497400283813477,
      "learning_rate": 0.00011424759080800593,
      "loss": 0.9694,
      "step": 2901
    },
    {
      "epoch": 0.42992592592592593,
      "grad_norm": 9.419271469116211,
      "learning_rate": 0.00011421793921423276,
      "loss": 1.1911,
      "step": 2902
    },
    {
      "epoch": 0.43007407407407405,
      "grad_norm": 6.582805633544922,
      "learning_rate": 0.0001141882876204596,
      "loss": 1.2768,
      "step": 2903
    },
    {
      "epoch": 0.43022222222222223,
      "grad_norm": 10.915657043457031,
      "learning_rate": 0.00011415863602668644,
      "loss": 1.1907,
      "step": 2904
    },
    {
      "epoch": 0.43037037037037035,
      "grad_norm": 12.22497844696045,
      "learning_rate": 0.00011412898443291327,
      "loss": 1.234,
      "step": 2905
    },
    {
      "epoch": 0.4305185185185185,
      "grad_norm": 3.5595293045043945,
      "learning_rate": 0.00011409933283914011,
      "loss": 1.5526,
      "step": 2906
    },
    {
      "epoch": 0.43066666666666664,
      "grad_norm": 4.788974761962891,
      "learning_rate": 0.00011406968124536694,
      "loss": 1.0496,
      "step": 2907
    },
    {
      "epoch": 0.4308148148148148,
      "grad_norm": 3.6452412605285645,
      "learning_rate": 0.00011404002965159377,
      "loss": 1.1436,
      "step": 2908
    },
    {
      "epoch": 0.43096296296296294,
      "grad_norm": 13.032570838928223,
      "learning_rate": 0.00011401037805782062,
      "loss": 0.974,
      "step": 2909
    },
    {
      "epoch": 0.4311111111111111,
      "grad_norm": 2.916541337966919,
      "learning_rate": 0.00011398072646404745,
      "loss": 1.1616,
      "step": 2910
    },
    {
      "epoch": 0.4312592592592593,
      "grad_norm": 4.221134185791016,
      "learning_rate": 0.00011395107487027428,
      "loss": 1.1259,
      "step": 2911
    },
    {
      "epoch": 0.4314074074074074,
      "grad_norm": 3.0779690742492676,
      "learning_rate": 0.00011392142327650113,
      "loss": 0.8523,
      "step": 2912
    },
    {
      "epoch": 0.4315555555555556,
      "grad_norm": 9.860507011413574,
      "learning_rate": 0.00011389177168272796,
      "loss": 1.033,
      "step": 2913
    },
    {
      "epoch": 0.4317037037037037,
      "grad_norm": 2.1072463989257812,
      "learning_rate": 0.00011386212008895479,
      "loss": 0.9688,
      "step": 2914
    },
    {
      "epoch": 0.4318518518518519,
      "grad_norm": 4.792141914367676,
      "learning_rate": 0.00011383246849518163,
      "loss": 1.1055,
      "step": 2915
    },
    {
      "epoch": 0.432,
      "grad_norm": 2.455380439758301,
      "learning_rate": 0.00011380281690140846,
      "loss": 1.0688,
      "step": 2916
    },
    {
      "epoch": 0.43214814814814817,
      "grad_norm": 3.3475184440612793,
      "learning_rate": 0.00011377316530763529,
      "loss": 1.1049,
      "step": 2917
    },
    {
      "epoch": 0.4322962962962963,
      "grad_norm": 6.160754680633545,
      "learning_rate": 0.00011374351371386211,
      "loss": 0.9171,
      "step": 2918
    },
    {
      "epoch": 0.43244444444444446,
      "grad_norm": 2.9104185104370117,
      "learning_rate": 0.00011371386212008897,
      "loss": 1.0173,
      "step": 2919
    },
    {
      "epoch": 0.4325925925925926,
      "grad_norm": 12.792739868164062,
      "learning_rate": 0.0001136842105263158,
      "loss": 1.0066,
      "step": 2920
    },
    {
      "epoch": 0.43274074074074076,
      "grad_norm": 3.8627989292144775,
      "learning_rate": 0.00011365455893254262,
      "loss": 0.8961,
      "step": 2921
    },
    {
      "epoch": 0.4328888888888889,
      "grad_norm": 16.445222854614258,
      "learning_rate": 0.00011362490733876947,
      "loss": 1.2134,
      "step": 2922
    },
    {
      "epoch": 0.43303703703703705,
      "grad_norm": 4.181873798370361,
      "learning_rate": 0.00011359525574499629,
      "loss": 0.89,
      "step": 2923
    },
    {
      "epoch": 0.4331851851851852,
      "grad_norm": 2.4696426391601562,
      "learning_rate": 0.00011356560415122312,
      "loss": 1.0442,
      "step": 2924
    },
    {
      "epoch": 0.43333333333333335,
      "grad_norm": 8.993077278137207,
      "learning_rate": 0.00011353595255744998,
      "loss": 1.1675,
      "step": 2925
    },
    {
      "epoch": 0.43348148148148147,
      "grad_norm": 3.3576834201812744,
      "learning_rate": 0.0001135063009636768,
      "loss": 1.0312,
      "step": 2926
    },
    {
      "epoch": 0.43362962962962964,
      "grad_norm": 14.622025489807129,
      "learning_rate": 0.00011347664936990363,
      "loss": 1.3223,
      "step": 2927
    },
    {
      "epoch": 0.43377777777777776,
      "grad_norm": 4.972567081451416,
      "learning_rate": 0.00011344699777613049,
      "loss": 1.1023,
      "step": 2928
    },
    {
      "epoch": 0.43392592592592594,
      "grad_norm": 2.8445241451263428,
      "learning_rate": 0.0001134173461823573,
      "loss": 1.2656,
      "step": 2929
    },
    {
      "epoch": 0.43407407407407406,
      "grad_norm": 4.684587478637695,
      "learning_rate": 0.00011338769458858414,
      "loss": 0.9524,
      "step": 2930
    },
    {
      "epoch": 0.43422222222222223,
      "grad_norm": 6.1581268310546875,
      "learning_rate": 0.00011335804299481098,
      "loss": 0.9127,
      "step": 2931
    },
    {
      "epoch": 0.43437037037037035,
      "grad_norm": 8.210823059082031,
      "learning_rate": 0.00011332839140103781,
      "loss": 1.0098,
      "step": 2932
    },
    {
      "epoch": 0.4345185185185185,
      "grad_norm": 3.8922359943389893,
      "learning_rate": 0.00011329873980726464,
      "loss": 0.9481,
      "step": 2933
    },
    {
      "epoch": 0.43466666666666665,
      "grad_norm": 24.909101486206055,
      "learning_rate": 0.00011326908821349147,
      "loss": 1.1517,
      "step": 2934
    },
    {
      "epoch": 0.4348148148148148,
      "grad_norm": 4.422623157501221,
      "learning_rate": 0.00011323943661971832,
      "loss": 1.0295,
      "step": 2935
    },
    {
      "epoch": 0.43496296296296294,
      "grad_norm": 17.422393798828125,
      "learning_rate": 0.00011320978502594515,
      "loss": 1.001,
      "step": 2936
    },
    {
      "epoch": 0.4351111111111111,
      "grad_norm": 4.383595943450928,
      "learning_rate": 0.00011318013343217198,
      "loss": 1.2922,
      "step": 2937
    },
    {
      "epoch": 0.43525925925925923,
      "grad_norm": 5.5702900886535645,
      "learning_rate": 0.00011315048183839882,
      "loss": 0.9702,
      "step": 2938
    },
    {
      "epoch": 0.4354074074074074,
      "grad_norm": 19.464635848999023,
      "learning_rate": 0.00011312083024462565,
      "loss": 1.0723,
      "step": 2939
    },
    {
      "epoch": 0.43555555555555553,
      "grad_norm": 6.28037691116333,
      "learning_rate": 0.00011309117865085249,
      "loss": 1.0345,
      "step": 2940
    },
    {
      "epoch": 0.4357037037037037,
      "grad_norm": 8.55496597290039,
      "learning_rate": 0.00011306152705707933,
      "loss": 0.9858,
      "step": 2941
    },
    {
      "epoch": 0.4358518518518519,
      "grad_norm": 18.133056640625,
      "learning_rate": 0.00011303187546330616,
      "loss": 1.0856,
      "step": 2942
    },
    {
      "epoch": 0.436,
      "grad_norm": 3.8335206508636475,
      "learning_rate": 0.00011300222386953299,
      "loss": 0.9485,
      "step": 2943
    },
    {
      "epoch": 0.4361481481481482,
      "grad_norm": 6.911520957946777,
      "learning_rate": 0.00011297257227575984,
      "loss": 1.0289,
      "step": 2944
    },
    {
      "epoch": 0.4362962962962963,
      "grad_norm": 3.026308059692383,
      "learning_rate": 0.00011294292068198667,
      "loss": 0.9971,
      "step": 2945
    },
    {
      "epoch": 0.43644444444444447,
      "grad_norm": 7.223231315612793,
      "learning_rate": 0.0001129132690882135,
      "loss": 1.169,
      "step": 2946
    },
    {
      "epoch": 0.4365925925925926,
      "grad_norm": 7.4562153816223145,
      "learning_rate": 0.00011288361749444034,
      "loss": 1.0482,
      "step": 2947
    },
    {
      "epoch": 0.43674074074074076,
      "grad_norm": 5.943459987640381,
      "learning_rate": 0.00011285396590066717,
      "loss": 1.2589,
      "step": 2948
    },
    {
      "epoch": 0.4368888888888889,
      "grad_norm": 5.091812610626221,
      "learning_rate": 0.00011282431430689399,
      "loss": 1.1445,
      "step": 2949
    },
    {
      "epoch": 0.43703703703703706,
      "grad_norm": 7.933882236480713,
      "learning_rate": 0.00011279466271312085,
      "loss": 1.1507,
      "step": 2950
    },
    {
      "epoch": 0.4371851851851852,
      "grad_norm": 8.546607971191406,
      "learning_rate": 0.00011276501111934768,
      "loss": 1.0796,
      "step": 2951
    },
    {
      "epoch": 0.43733333333333335,
      "grad_norm": 8.824572563171387,
      "learning_rate": 0.0001127353595255745,
      "loss": 1.335,
      "step": 2952
    },
    {
      "epoch": 0.43748148148148147,
      "grad_norm": 8.691951751708984,
      "learning_rate": 0.00011270570793180133,
      "loss": 1.1626,
      "step": 2953
    },
    {
      "epoch": 0.43762962962962965,
      "grad_norm": 4.850937366485596,
      "learning_rate": 0.00011267605633802819,
      "loss": 1.1599,
      "step": 2954
    },
    {
      "epoch": 0.43777777777777777,
      "grad_norm": 5.646979331970215,
      "learning_rate": 0.000112646404744255,
      "loss": 0.9394,
      "step": 2955
    },
    {
      "epoch": 0.43792592592592594,
      "grad_norm": 4.293743133544922,
      "learning_rate": 0.00011261675315048183,
      "loss": 1.0186,
      "step": 2956
    },
    {
      "epoch": 0.43807407407407406,
      "grad_norm": 2.938546895980835,
      "learning_rate": 0.00011258710155670868,
      "loss": 1.048,
      "step": 2957
    },
    {
      "epoch": 0.43822222222222224,
      "grad_norm": 7.627182483673096,
      "learning_rate": 0.00011255744996293551,
      "loss": 0.9949,
      "step": 2958
    },
    {
      "epoch": 0.43837037037037035,
      "grad_norm": 3.260939359664917,
      "learning_rate": 0.00011252779836916234,
      "loss": 1.2443,
      "step": 2959
    },
    {
      "epoch": 0.43851851851851853,
      "grad_norm": 2.731006145477295,
      "learning_rate": 0.00011249814677538918,
      "loss": 0.9315,
      "step": 2960
    },
    {
      "epoch": 0.43866666666666665,
      "grad_norm": 2.959324598312378,
      "learning_rate": 0.00011246849518161602,
      "loss": 1.0446,
      "step": 2961
    },
    {
      "epoch": 0.4388148148148148,
      "grad_norm": 5.511545181274414,
      "learning_rate": 0.00011243884358784285,
      "loss": 1.0153,
      "step": 2962
    },
    {
      "epoch": 0.43896296296296294,
      "grad_norm": 5.317894458770752,
      "learning_rate": 0.00011240919199406969,
      "loss": 0.9335,
      "step": 2963
    },
    {
      "epoch": 0.4391111111111111,
      "grad_norm": 12.788424491882324,
      "learning_rate": 0.00011237954040029652,
      "loss": 0.953,
      "step": 2964
    },
    {
      "epoch": 0.43925925925925924,
      "grad_norm": 24.395565032958984,
      "learning_rate": 0.00011234988880652335,
      "loss": 1.1692,
      "step": 2965
    },
    {
      "epoch": 0.4394074074074074,
      "grad_norm": 8.75581169128418,
      "learning_rate": 0.0001123202372127502,
      "loss": 1.0485,
      "step": 2966
    },
    {
      "epoch": 0.43955555555555553,
      "grad_norm": 17.51173973083496,
      "learning_rate": 0.00011229058561897703,
      "loss": 1.0157,
      "step": 2967
    },
    {
      "epoch": 0.4397037037037037,
      "grad_norm": 2.498159885406494,
      "learning_rate": 0.00011226093402520386,
      "loss": 1.1258,
      "step": 2968
    },
    {
      "epoch": 0.4398518518518518,
      "grad_norm": 17.463228225708008,
      "learning_rate": 0.00011223128243143069,
      "loss": 1.1448,
      "step": 2969
    },
    {
      "epoch": 0.44,
      "grad_norm": 10.378259658813477,
      "learning_rate": 0.00011220163083765753,
      "loss": 1.1837,
      "step": 2970
    },
    {
      "epoch": 0.4401481481481481,
      "grad_norm": 3.2528207302093506,
      "learning_rate": 0.00011217197924388437,
      "loss": 1.0357,
      "step": 2971
    },
    {
      "epoch": 0.4402962962962963,
      "grad_norm": 3.818711042404175,
      "learning_rate": 0.0001121423276501112,
      "loss": 1.2929,
      "step": 2972
    },
    {
      "epoch": 0.44044444444444447,
      "grad_norm": 5.835166931152344,
      "learning_rate": 0.00011211267605633804,
      "loss": 1.0996,
      "step": 2973
    },
    {
      "epoch": 0.4405925925925926,
      "grad_norm": 12.154253005981445,
      "learning_rate": 0.00011208302446256487,
      "loss": 1.102,
      "step": 2974
    },
    {
      "epoch": 0.44074074074074077,
      "grad_norm": 3.3864121437072754,
      "learning_rate": 0.00011205337286879169,
      "loss": 1.0104,
      "step": 2975
    },
    {
      "epoch": 0.4408888888888889,
      "grad_norm": 5.395549774169922,
      "learning_rate": 0.00011202372127501855,
      "loss": 0.9617,
      "step": 2976
    },
    {
      "epoch": 0.44103703703703706,
      "grad_norm": 2.800696849822998,
      "learning_rate": 0.00011199406968124538,
      "loss": 0.9404,
      "step": 2977
    },
    {
      "epoch": 0.4411851851851852,
      "grad_norm": 6.2743940353393555,
      "learning_rate": 0.0001119644180874722,
      "loss": 1.2399,
      "step": 2978
    },
    {
      "epoch": 0.44133333333333336,
      "grad_norm": 10.434906005859375,
      "learning_rate": 0.00011193476649369905,
      "loss": 1.0988,
      "step": 2979
    },
    {
      "epoch": 0.4414814814814815,
      "grad_norm": 7.3947858810424805,
      "learning_rate": 0.00011190511489992588,
      "loss": 1.1077,
      "step": 2980
    },
    {
      "epoch": 0.44162962962962965,
      "grad_norm": 9.96932315826416,
      "learning_rate": 0.0001118754633061527,
      "loss": 1.1874,
      "step": 2981
    },
    {
      "epoch": 0.44177777777777777,
      "grad_norm": 1.851417899131775,
      "learning_rate": 0.00011184581171237956,
      "loss": 0.866,
      "step": 2982
    },
    {
      "epoch": 0.44192592592592594,
      "grad_norm": 6.065192222595215,
      "learning_rate": 0.00011181616011860638,
      "loss": 0.9024,
      "step": 2983
    },
    {
      "epoch": 0.44207407407407406,
      "grad_norm": 10.450791358947754,
      "learning_rate": 0.00011178650852483321,
      "loss": 1.2193,
      "step": 2984
    },
    {
      "epoch": 0.44222222222222224,
      "grad_norm": 1.4706751108169556,
      "learning_rate": 0.00011175685693106004,
      "loss": 1.0355,
      "step": 2985
    },
    {
      "epoch": 0.44237037037037036,
      "grad_norm": 3.530428886413574,
      "learning_rate": 0.00011172720533728688,
      "loss": 1.0946,
      "step": 2986
    },
    {
      "epoch": 0.44251851851851853,
      "grad_norm": 2.906909227371216,
      "learning_rate": 0.00011169755374351371,
      "loss": 1.3765,
      "step": 2987
    },
    {
      "epoch": 0.44266666666666665,
      "grad_norm": 4.955533027648926,
      "learning_rate": 0.00011166790214974055,
      "loss": 1.1557,
      "step": 2988
    },
    {
      "epoch": 0.44281481481481483,
      "grad_norm": 3.982163667678833,
      "learning_rate": 0.00011163825055596739,
      "loss": 1.1782,
      "step": 2989
    },
    {
      "epoch": 0.44296296296296295,
      "grad_norm": 2.162320852279663,
      "learning_rate": 0.00011160859896219422,
      "loss": 1.175,
      "step": 2990
    },
    {
      "epoch": 0.4431111111111111,
      "grad_norm": 2.021183967590332,
      "learning_rate": 0.00011157894736842105,
      "loss": 1.3226,
      "step": 2991
    },
    {
      "epoch": 0.44325925925925924,
      "grad_norm": 7.46436071395874,
      "learning_rate": 0.0001115492957746479,
      "loss": 1.0841,
      "step": 2992
    },
    {
      "epoch": 0.4434074074074074,
      "grad_norm": 2.1430652141571045,
      "learning_rate": 0.00011151964418087473,
      "loss": 1.1459,
      "step": 2993
    },
    {
      "epoch": 0.44355555555555554,
      "grad_norm": 1.8138455152511597,
      "learning_rate": 0.00011148999258710156,
      "loss": 1.3235,
      "step": 2994
    },
    {
      "epoch": 0.4437037037037037,
      "grad_norm": 5.439229488372803,
      "learning_rate": 0.0001114603409933284,
      "loss": 1.086,
      "step": 2995
    },
    {
      "epoch": 0.44385185185185183,
      "grad_norm": 2.4075825214385986,
      "learning_rate": 0.00011143068939955523,
      "loss": 1.0681,
      "step": 2996
    },
    {
      "epoch": 0.444,
      "grad_norm": 3.177294969558716,
      "learning_rate": 0.00011140103780578206,
      "loss": 1.11,
      "step": 2997
    },
    {
      "epoch": 0.4441481481481481,
      "grad_norm": 2.3719303607940674,
      "learning_rate": 0.00011137138621200891,
      "loss": 0.9737,
      "step": 2998
    },
    {
      "epoch": 0.4442962962962963,
      "grad_norm": 6.038328647613525,
      "learning_rate": 0.00011134173461823574,
      "loss": 0.9585,
      "step": 2999
    },
    {
      "epoch": 0.4444444444444444,
      "grad_norm": 2.2186923027038574,
      "learning_rate": 0.00011131208302446257,
      "loss": 1.2404,
      "step": 3000
    },
    {
      "epoch": 0.4445925925925926,
      "grad_norm": 3.7171268463134766,
      "learning_rate": 0.00011128243143068941,
      "loss": 1.1925,
      "step": 3001
    },
    {
      "epoch": 0.4447407407407407,
      "grad_norm": 2.1566059589385986,
      "learning_rate": 0.00011125277983691625,
      "loss": 0.9764,
      "step": 3002
    },
    {
      "epoch": 0.4448888888888889,
      "grad_norm": 2.386497974395752,
      "learning_rate": 0.00011122312824314308,
      "loss": 1.1597,
      "step": 3003
    },
    {
      "epoch": 0.44503703703703706,
      "grad_norm": 2.0896761417388916,
      "learning_rate": 0.0001111934766493699,
      "loss": 0.996,
      "step": 3004
    },
    {
      "epoch": 0.4451851851851852,
      "grad_norm": 4.1234307289123535,
      "learning_rate": 0.00011116382505559675,
      "loss": 1.1501,
      "step": 3005
    },
    {
      "epoch": 0.44533333333333336,
      "grad_norm": 2.1813316345214844,
      "learning_rate": 0.00011113417346182358,
      "loss": 1.2474,
      "step": 3006
    },
    {
      "epoch": 0.4454814814814815,
      "grad_norm": 2.360969305038452,
      "learning_rate": 0.0001111045218680504,
      "loss": 0.7602,
      "step": 3007
    },
    {
      "epoch": 0.44562962962962965,
      "grad_norm": 2.4061508178710938,
      "learning_rate": 0.00011107487027427726,
      "loss": 0.9027,
      "step": 3008
    },
    {
      "epoch": 0.4457777777777778,
      "grad_norm": 1.799283504486084,
      "learning_rate": 0.00011104521868050408,
      "loss": 1.154,
      "step": 3009
    },
    {
      "epoch": 0.44592592592592595,
      "grad_norm": 1.6981852054595947,
      "learning_rate": 0.0001110155670867309,
      "loss": 1.0655,
      "step": 3010
    },
    {
      "epoch": 0.44607407407407407,
      "grad_norm": 2.8216686248779297,
      "learning_rate": 0.00011098591549295776,
      "loss": 1.2816,
      "step": 3011
    },
    {
      "epoch": 0.44622222222222224,
      "grad_norm": 2.0957045555114746,
      "learning_rate": 0.00011095626389918458,
      "loss": 1.0868,
      "step": 3012
    },
    {
      "epoch": 0.44637037037037036,
      "grad_norm": 1.7423616647720337,
      "learning_rate": 0.00011092661230541141,
      "loss": 1.0197,
      "step": 3013
    },
    {
      "epoch": 0.44651851851851854,
      "grad_norm": 1.9414492845535278,
      "learning_rate": 0.00011089696071163827,
      "loss": 0.926,
      "step": 3014
    },
    {
      "epoch": 0.44666666666666666,
      "grad_norm": 3.2115371227264404,
      "learning_rate": 0.00011086730911786509,
      "loss": 1.1533,
      "step": 3015
    },
    {
      "epoch": 0.44681481481481483,
      "grad_norm": 2.3400022983551025,
      "learning_rate": 0.00011083765752409192,
      "loss": 1.0101,
      "step": 3016
    },
    {
      "epoch": 0.44696296296296295,
      "grad_norm": 5.575966835021973,
      "learning_rate": 0.00011080800593031876,
      "loss": 0.9246,
      "step": 3017
    },
    {
      "epoch": 0.4471111111111111,
      "grad_norm": 5.21854305267334,
      "learning_rate": 0.0001107783543365456,
      "loss": 0.9716,
      "step": 3018
    },
    {
      "epoch": 0.44725925925925925,
      "grad_norm": 1.3820289373397827,
      "learning_rate": 0.00011074870274277243,
      "loss": 1.0381,
      "step": 3019
    },
    {
      "epoch": 0.4474074074074074,
      "grad_norm": 1.17095947265625,
      "learning_rate": 0.00011071905114899926,
      "loss": 1.191,
      "step": 3020
    },
    {
      "epoch": 0.44755555555555554,
      "grad_norm": 1.6841038465499878,
      "learning_rate": 0.0001106893995552261,
      "loss": 1.0123,
      "step": 3021
    },
    {
      "epoch": 0.4477037037037037,
      "grad_norm": 2.9544711112976074,
      "learning_rate": 0.00011065974796145293,
      "loss": 1.0386,
      "step": 3022
    },
    {
      "epoch": 0.44785185185185183,
      "grad_norm": 2.053281307220459,
      "learning_rate": 0.00011063009636767976,
      "loss": 1.174,
      "step": 3023
    },
    {
      "epoch": 0.448,
      "grad_norm": 3.5765340328216553,
      "learning_rate": 0.00011060044477390661,
      "loss": 1.193,
      "step": 3024
    },
    {
      "epoch": 0.44814814814814813,
      "grad_norm": 1.4198112487792969,
      "learning_rate": 0.00011057079318013344,
      "loss": 1.0356,
      "step": 3025
    },
    {
      "epoch": 0.4482962962962963,
      "grad_norm": 2.1637237071990967,
      "learning_rate": 0.00011054114158636027,
      "loss": 1.1385,
      "step": 3026
    },
    {
      "epoch": 0.4484444444444444,
      "grad_norm": 2.439480781555176,
      "learning_rate": 0.00011051148999258711,
      "loss": 1.2328,
      "step": 3027
    },
    {
      "epoch": 0.4485925925925926,
      "grad_norm": 1.2427433729171753,
      "learning_rate": 0.00011048183839881394,
      "loss": 1.1139,
      "step": 3028
    },
    {
      "epoch": 0.4487407407407407,
      "grad_norm": 3.283803939819336,
      "learning_rate": 0.00011045218680504077,
      "loss": 1.1461,
      "step": 3029
    },
    {
      "epoch": 0.4488888888888889,
      "grad_norm": 1.2881802320480347,
      "learning_rate": 0.00011042253521126762,
      "loss": 0.9716,
      "step": 3030
    },
    {
      "epoch": 0.449037037037037,
      "grad_norm": 3.095428228378296,
      "learning_rate": 0.00011039288361749445,
      "loss": 1.1238,
      "step": 3031
    },
    {
      "epoch": 0.4491851851851852,
      "grad_norm": 1.309549331665039,
      "learning_rate": 0.00011036323202372128,
      "loss": 1.1793,
      "step": 3032
    },
    {
      "epoch": 0.4493333333333333,
      "grad_norm": 1.5135389566421509,
      "learning_rate": 0.00011033358042994813,
      "loss": 0.953,
      "step": 3033
    },
    {
      "epoch": 0.4494814814814815,
      "grad_norm": 2.1670916080474854,
      "learning_rate": 0.00011030392883617496,
      "loss": 0.9172,
      "step": 3034
    },
    {
      "epoch": 0.44962962962962966,
      "grad_norm": 2.9388227462768555,
      "learning_rate": 0.00011027427724240177,
      "loss": 1.1726,
      "step": 3035
    },
    {
      "epoch": 0.4497777777777778,
      "grad_norm": 3.653726100921631,
      "learning_rate": 0.00011024462564862863,
      "loss": 0.916,
      "step": 3036
    },
    {
      "epoch": 0.44992592592592595,
      "grad_norm": 1.2855350971221924,
      "learning_rate": 0.00011021497405485546,
      "loss": 1.0213,
      "step": 3037
    },
    {
      "epoch": 0.45007407407407407,
      "grad_norm": 1.843808889389038,
      "learning_rate": 0.00011018532246108228,
      "loss": 1.0107,
      "step": 3038
    },
    {
      "epoch": 0.45022222222222225,
      "grad_norm": 1.0858509540557861,
      "learning_rate": 0.00011015567086730911,
      "loss": 1.1499,
      "step": 3039
    },
    {
      "epoch": 0.45037037037037037,
      "grad_norm": 2.298110008239746,
      "learning_rate": 0.00011012601927353597,
      "loss": 1.0576,
      "step": 3040
    },
    {
      "epoch": 0.45051851851851854,
      "grad_norm": 2.0090999603271484,
      "learning_rate": 0.00011009636767976279,
      "loss": 0.9171,
      "step": 3041
    },
    {
      "epoch": 0.45066666666666666,
      "grad_norm": 1.9219692945480347,
      "learning_rate": 0.00011006671608598962,
      "loss": 0.9276,
      "step": 3042
    },
    {
      "epoch": 0.45081481481481483,
      "grad_norm": 1.3808432817459106,
      "learning_rate": 0.00011003706449221646,
      "loss": 0.692,
      "step": 3043
    },
    {
      "epoch": 0.45096296296296295,
      "grad_norm": 1.2714673280715942,
      "learning_rate": 0.00011000741289844329,
      "loss": 0.9973,
      "step": 3044
    },
    {
      "epoch": 0.45111111111111113,
      "grad_norm": 5.028180122375488,
      "learning_rate": 0.00010997776130467012,
      "loss": 1.0399,
      "step": 3045
    },
    {
      "epoch": 0.45125925925925925,
      "grad_norm": 1.7946445941925049,
      "learning_rate": 0.00010994810971089697,
      "loss": 1.0096,
      "step": 3046
    },
    {
      "epoch": 0.4514074074074074,
      "grad_norm": 2.270958662033081,
      "learning_rate": 0.0001099184581171238,
      "loss": 1.2807,
      "step": 3047
    },
    {
      "epoch": 0.45155555555555554,
      "grad_norm": 2.8045239448547363,
      "learning_rate": 0.00010988880652335063,
      "loss": 0.9194,
      "step": 3048
    },
    {
      "epoch": 0.4517037037037037,
      "grad_norm": 5.084659576416016,
      "learning_rate": 0.00010985915492957747,
      "loss": 0.9291,
      "step": 3049
    },
    {
      "epoch": 0.45185185185185184,
      "grad_norm": 4.033753395080566,
      "learning_rate": 0.0001098295033358043,
      "loss": 1.0498,
      "step": 3050
    },
    {
      "epoch": 0.452,
      "grad_norm": 1.7575342655181885,
      "learning_rate": 0.00010979985174203114,
      "loss": 1.3233,
      "step": 3051
    },
    {
      "epoch": 0.45214814814814813,
      "grad_norm": 4.462325096130371,
      "learning_rate": 0.00010977020014825798,
      "loss": 1.1128,
      "step": 3052
    },
    {
      "epoch": 0.4522962962962963,
      "grad_norm": 2.3308298587799072,
      "learning_rate": 0.00010974054855448481,
      "loss": 1.0498,
      "step": 3053
    },
    {
      "epoch": 0.4524444444444444,
      "grad_norm": 1.3861079216003418,
      "learning_rate": 0.00010971089696071164,
      "loss": 0.9949,
      "step": 3054
    },
    {
      "epoch": 0.4525925925925926,
      "grad_norm": 4.199979305267334,
      "learning_rate": 0.00010968124536693847,
      "loss": 1.2077,
      "step": 3055
    },
    {
      "epoch": 0.4527407407407407,
      "grad_norm": 1.8853832483291626,
      "learning_rate": 0.00010965159377316532,
      "loss": 0.8667,
      "step": 3056
    },
    {
      "epoch": 0.4528888888888889,
      "grad_norm": 1.4810208082199097,
      "learning_rate": 0.00010962194217939215,
      "loss": 1.1541,
      "step": 3057
    },
    {
      "epoch": 0.453037037037037,
      "grad_norm": 1.997210144996643,
      "learning_rate": 0.00010959229058561898,
      "loss": 1.0197,
      "step": 3058
    },
    {
      "epoch": 0.4531851851851852,
      "grad_norm": 1.7493116855621338,
      "learning_rate": 0.00010956263899184582,
      "loss": 1.0517,
      "step": 3059
    },
    {
      "epoch": 0.4533333333333333,
      "grad_norm": 2.0254781246185303,
      "learning_rate": 0.00010953298739807266,
      "loss": 1.1163,
      "step": 3060
    },
    {
      "epoch": 0.4534814814814815,
      "grad_norm": 9.770330429077148,
      "learning_rate": 0.00010950333580429947,
      "loss": 1.018,
      "step": 3061
    },
    {
      "epoch": 0.4536296296296296,
      "grad_norm": 3.976106882095337,
      "learning_rate": 0.00010947368421052633,
      "loss": 1.0003,
      "step": 3062
    },
    {
      "epoch": 0.4537777777777778,
      "grad_norm": 3.375537633895874,
      "learning_rate": 0.00010944403261675316,
      "loss": 1.1881,
      "step": 3063
    },
    {
      "epoch": 0.4539259259259259,
      "grad_norm": 7.4661865234375,
      "learning_rate": 0.00010941438102297998,
      "loss": 1.0032,
      "step": 3064
    },
    {
      "epoch": 0.4540740740740741,
      "grad_norm": 13.027575492858887,
      "learning_rate": 0.00010938472942920684,
      "loss": 1.1811,
      "step": 3065
    },
    {
      "epoch": 0.45422222222222225,
      "grad_norm": 9.472670555114746,
      "learning_rate": 0.00010935507783543367,
      "loss": 1.1753,
      "step": 3066
    },
    {
      "epoch": 0.45437037037037037,
      "grad_norm": 1.929787278175354,
      "learning_rate": 0.00010932542624166048,
      "loss": 1.2076,
      "step": 3067
    },
    {
      "epoch": 0.45451851851851854,
      "grad_norm": 3.8757476806640625,
      "learning_rate": 0.00010929577464788734,
      "loss": 1.1256,
      "step": 3068
    },
    {
      "epoch": 0.45466666666666666,
      "grad_norm": 4.183289527893066,
      "learning_rate": 0.00010926612305411416,
      "loss": 1.166,
      "step": 3069
    },
    {
      "epoch": 0.45481481481481484,
      "grad_norm": 3.670625925064087,
      "learning_rate": 0.00010923647146034099,
      "loss": 0.9806,
      "step": 3070
    },
    {
      "epoch": 0.45496296296296296,
      "grad_norm": 2.6360023021698,
      "learning_rate": 0.00010920681986656782,
      "loss": 1.1445,
      "step": 3071
    },
    {
      "epoch": 0.45511111111111113,
      "grad_norm": 1.8747177124023438,
      "learning_rate": 0.00010917716827279467,
      "loss": 0.9117,
      "step": 3072
    },
    {
      "epoch": 0.45525925925925925,
      "grad_norm": 3.689152479171753,
      "learning_rate": 0.0001091475166790215,
      "loss": 0.9284,
      "step": 3073
    },
    {
      "epoch": 0.4554074074074074,
      "grad_norm": 2.021763563156128,
      "learning_rate": 0.00010911786508524833,
      "loss": 1.0789,
      "step": 3074
    },
    {
      "epoch": 0.45555555555555555,
      "grad_norm": 8.889338493347168,
      "learning_rate": 0.00010908821349147517,
      "loss": 1.1416,
      "step": 3075
    },
    {
      "epoch": 0.4557037037037037,
      "grad_norm": 1.4197607040405273,
      "learning_rate": 0.000109058561897702,
      "loss": 0.8435,
      "step": 3076
    },
    {
      "epoch": 0.45585185185185184,
      "grad_norm": 5.286947250366211,
      "learning_rate": 0.00010902891030392883,
      "loss": 1.1242,
      "step": 3077
    },
    {
      "epoch": 0.456,
      "grad_norm": 2.942779779434204,
      "learning_rate": 0.00010899925871015568,
      "loss": 1.1233,
      "step": 3078
    },
    {
      "epoch": 0.45614814814814814,
      "grad_norm": 2.4830679893493652,
      "learning_rate": 0.00010896960711638251,
      "loss": 0.9952,
      "step": 3079
    },
    {
      "epoch": 0.4562962962962963,
      "grad_norm": 3.3844051361083984,
      "learning_rate": 0.00010893995552260934,
      "loss": 1.0727,
      "step": 3080
    },
    {
      "epoch": 0.45644444444444443,
      "grad_norm": 1.5580177307128906,
      "learning_rate": 0.00010891030392883619,
      "loss": 1.0469,
      "step": 3081
    },
    {
      "epoch": 0.4565925925925926,
      "grad_norm": 2.341848850250244,
      "learning_rate": 0.00010888065233506302,
      "loss": 1.1389,
      "step": 3082
    },
    {
      "epoch": 0.4567407407407407,
      "grad_norm": 3.0161538124084473,
      "learning_rate": 0.00010885100074128985,
      "loss": 1.1048,
      "step": 3083
    },
    {
      "epoch": 0.4568888888888889,
      "grad_norm": 1.2393746376037598,
      "learning_rate": 0.00010882134914751669,
      "loss": 1.0041,
      "step": 3084
    },
    {
      "epoch": 0.457037037037037,
      "grad_norm": 2.847796678543091,
      "learning_rate": 0.00010879169755374352,
      "loss": 0.9235,
      "step": 3085
    },
    {
      "epoch": 0.4571851851851852,
      "grad_norm": 5.12452507019043,
      "learning_rate": 0.00010876204595997035,
      "loss": 1.2076,
      "step": 3086
    },
    {
      "epoch": 0.4573333333333333,
      "grad_norm": 1.9241137504577637,
      "learning_rate": 0.0001087323943661972,
      "loss": 0.9998,
      "step": 3087
    },
    {
      "epoch": 0.4574814814814815,
      "grad_norm": 3.328599452972412,
      "learning_rate": 0.00010870274277242403,
      "loss": 1.2035,
      "step": 3088
    },
    {
      "epoch": 0.4576296296296296,
      "grad_norm": 2.1363518238067627,
      "learning_rate": 0.00010867309117865086,
      "loss": 1.0252,
      "step": 3089
    },
    {
      "epoch": 0.4577777777777778,
      "grad_norm": 2.4398419857025146,
      "learning_rate": 0.00010864343958487768,
      "loss": 1.1888,
      "step": 3090
    },
    {
      "epoch": 0.4579259259259259,
      "grad_norm": 3.3628358840942383,
      "learning_rate": 0.00010861378799110454,
      "loss": 1.0159,
      "step": 3091
    },
    {
      "epoch": 0.4580740740740741,
      "grad_norm": 2.5573160648345947,
      "learning_rate": 0.00010858413639733137,
      "loss": 0.7046,
      "step": 3092
    },
    {
      "epoch": 0.4582222222222222,
      "grad_norm": 4.458554744720459,
      "learning_rate": 0.00010855448480355818,
      "loss": 1.0843,
      "step": 3093
    },
    {
      "epoch": 0.4583703703703704,
      "grad_norm": 6.643269062042236,
      "learning_rate": 0.00010852483320978504,
      "loss": 0.9896,
      "step": 3094
    },
    {
      "epoch": 0.4585185185185185,
      "grad_norm": 1.6025092601776123,
      "learning_rate": 0.00010849518161601186,
      "loss": 0.8736,
      "step": 3095
    },
    {
      "epoch": 0.45866666666666667,
      "grad_norm": 2.3946588039398193,
      "learning_rate": 0.00010846553002223869,
      "loss": 0.9271,
      "step": 3096
    },
    {
      "epoch": 0.45881481481481484,
      "grad_norm": 4.110802173614502,
      "learning_rate": 0.00010843587842846555,
      "loss": 0.9321,
      "step": 3097
    },
    {
      "epoch": 0.45896296296296296,
      "grad_norm": 3.624279499053955,
      "learning_rate": 0.00010840622683469237,
      "loss": 1.1331,
      "step": 3098
    },
    {
      "epoch": 0.45911111111111114,
      "grad_norm": 1.5807106494903564,
      "learning_rate": 0.0001083765752409192,
      "loss": 1.279,
      "step": 3099
    },
    {
      "epoch": 0.45925925925925926,
      "grad_norm": 4.025631904602051,
      "learning_rate": 0.00010834692364714605,
      "loss": 1.2527,
      "step": 3100
    },
    {
      "epoch": 0.45940740740740743,
      "grad_norm": 1.865668773651123,
      "learning_rate": 0.00010831727205337287,
      "loss": 1.1178,
      "step": 3101
    },
    {
      "epoch": 0.45955555555555555,
      "grad_norm": 10.235735893249512,
      "learning_rate": 0.0001082876204595997,
      "loss": 1.0382,
      "step": 3102
    },
    {
      "epoch": 0.4597037037037037,
      "grad_norm": 2.669966459274292,
      "learning_rate": 0.00010825796886582655,
      "loss": 1.1538,
      "step": 3103
    },
    {
      "epoch": 0.45985185185185184,
      "grad_norm": 5.778812885284424,
      "learning_rate": 0.00010822831727205338,
      "loss": 1.5724,
      "step": 3104
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.6304867267608643,
      "learning_rate": 0.00010819866567828021,
      "loss": 0.8341,
      "step": 3105
    },
    {
      "epoch": 0.46014814814814814,
      "grad_norm": 3.9482040405273438,
      "learning_rate": 0.00010816901408450704,
      "loss": 1.1932,
      "step": 3106
    },
    {
      "epoch": 0.4602962962962963,
      "grad_norm": 9.13787841796875,
      "learning_rate": 0.00010813936249073388,
      "loss": 1.0207,
      "step": 3107
    },
    {
      "epoch": 0.46044444444444443,
      "grad_norm": 2.339956045150757,
      "learning_rate": 0.00010810971089696071,
      "loss": 0.9284,
      "step": 3108
    },
    {
      "epoch": 0.4605925925925926,
      "grad_norm": 2.340646743774414,
      "learning_rate": 0.00010808005930318755,
      "loss": 1.2671,
      "step": 3109
    },
    {
      "epoch": 0.46074074074074073,
      "grad_norm": 4.460016250610352,
      "learning_rate": 0.00010805040770941439,
      "loss": 1.145,
      "step": 3110
    },
    {
      "epoch": 0.4608888888888889,
      "grad_norm": 6.100407600402832,
      "learning_rate": 0.00010802075611564122,
      "loss": 1.029,
      "step": 3111
    },
    {
      "epoch": 0.461037037037037,
      "grad_norm": 3.0961480140686035,
      "learning_rate": 0.00010799110452186805,
      "loss": 1.2583,
      "step": 3112
    },
    {
      "epoch": 0.4611851851851852,
      "grad_norm": 8.958905220031738,
      "learning_rate": 0.0001079614529280949,
      "loss": 1.0846,
      "step": 3113
    },
    {
      "epoch": 0.4613333333333333,
      "grad_norm": 2.06352162361145,
      "learning_rate": 0.00010793180133432173,
      "loss": 1.0211,
      "step": 3114
    },
    {
      "epoch": 0.4614814814814815,
      "grad_norm": 2.211383104324341,
      "learning_rate": 0.00010790214974054856,
      "loss": 1.1329,
      "step": 3115
    },
    {
      "epoch": 0.4616296296296296,
      "grad_norm": 2.3058247566223145,
      "learning_rate": 0.0001078724981467754,
      "loss": 1.0362,
      "step": 3116
    },
    {
      "epoch": 0.4617777777777778,
      "grad_norm": 5.804232597351074,
      "learning_rate": 0.00010784284655300223,
      "loss": 1.1004,
      "step": 3117
    },
    {
      "epoch": 0.4619259259259259,
      "grad_norm": 3.8884549140930176,
      "learning_rate": 0.00010781319495922906,
      "loss": 1.0747,
      "step": 3118
    },
    {
      "epoch": 0.4620740740740741,
      "grad_norm": 1.8256559371948242,
      "learning_rate": 0.00010778354336545591,
      "loss": 1.2765,
      "step": 3119
    },
    {
      "epoch": 0.4622222222222222,
      "grad_norm": 3.3232460021972656,
      "learning_rate": 0.00010775389177168274,
      "loss": 0.9998,
      "step": 3120
    },
    {
      "epoch": 0.4623703703703704,
      "grad_norm": 4.4035139083862305,
      "learning_rate": 0.00010772424017790956,
      "loss": 1.1608,
      "step": 3121
    },
    {
      "epoch": 0.4625185185185185,
      "grad_norm": 2.6258749961853027,
      "learning_rate": 0.00010769458858413642,
      "loss": 1.168,
      "step": 3122
    },
    {
      "epoch": 0.46266666666666667,
      "grad_norm": 5.1971282958984375,
      "learning_rate": 0.00010766493699036325,
      "loss": 1.2523,
      "step": 3123
    },
    {
      "epoch": 0.4628148148148148,
      "grad_norm": 8.231294631958008,
      "learning_rate": 0.00010763528539659006,
      "loss": 0.945,
      "step": 3124
    },
    {
      "epoch": 0.46296296296296297,
      "grad_norm": 7.108486652374268,
      "learning_rate": 0.0001076056338028169,
      "loss": 1.1456,
      "step": 3125
    },
    {
      "epoch": 0.4631111111111111,
      "grad_norm": 4.68864107131958,
      "learning_rate": 0.00010757598220904375,
      "loss": 0.9352,
      "step": 3126
    },
    {
      "epoch": 0.46325925925925926,
      "grad_norm": 3.0115625858306885,
      "learning_rate": 0.00010754633061527057,
      "loss": 0.929,
      "step": 3127
    },
    {
      "epoch": 0.46340740740740743,
      "grad_norm": 2.0125746726989746,
      "learning_rate": 0.0001075166790214974,
      "loss": 1.1929,
      "step": 3128
    },
    {
      "epoch": 0.46355555555555555,
      "grad_norm": 2.8470137119293213,
      "learning_rate": 0.00010748702742772425,
      "loss": 1.1472,
      "step": 3129
    },
    {
      "epoch": 0.46370370370370373,
      "grad_norm": 3.7088513374328613,
      "learning_rate": 0.00010745737583395108,
      "loss": 1.3877,
      "step": 3130
    },
    {
      "epoch": 0.46385185185185185,
      "grad_norm": 2.3978278636932373,
      "learning_rate": 0.00010742772424017791,
      "loss": 0.9441,
      "step": 3131
    },
    {
      "epoch": 0.464,
      "grad_norm": 1.5147128105163574,
      "learning_rate": 0.00010739807264640475,
      "loss": 1.1477,
      "step": 3132
    },
    {
      "epoch": 0.46414814814814814,
      "grad_norm": 2.765228271484375,
      "learning_rate": 0.00010736842105263158,
      "loss": 1.258,
      "step": 3133
    },
    {
      "epoch": 0.4642962962962963,
      "grad_norm": 7.870732307434082,
      "learning_rate": 0.00010733876945885841,
      "loss": 0.8307,
      "step": 3134
    },
    {
      "epoch": 0.46444444444444444,
      "grad_norm": 3.9861104488372803,
      "learning_rate": 0.00010730911786508526,
      "loss": 1.1876,
      "step": 3135
    },
    {
      "epoch": 0.4645925925925926,
      "grad_norm": 4.176854610443115,
      "learning_rate": 0.00010727946627131209,
      "loss": 1.1667,
      "step": 3136
    },
    {
      "epoch": 0.46474074074074073,
      "grad_norm": 3.1691198348999023,
      "learning_rate": 0.00010724981467753892,
      "loss": 1.2163,
      "step": 3137
    },
    {
      "epoch": 0.4648888888888889,
      "grad_norm": 2.161644220352173,
      "learning_rate": 0.00010722016308376576,
      "loss": 1.1669,
      "step": 3138
    },
    {
      "epoch": 0.465037037037037,
      "grad_norm": 5.941046237945557,
      "learning_rate": 0.0001071905114899926,
      "loss": 1.0284,
      "step": 3139
    },
    {
      "epoch": 0.4651851851851852,
      "grad_norm": 3.7416329383850098,
      "learning_rate": 0.00010716085989621943,
      "loss": 1.06,
      "step": 3140
    },
    {
      "epoch": 0.4653333333333333,
      "grad_norm": 3.7596495151519775,
      "learning_rate": 0.00010713120830244626,
      "loss": 1.2669,
      "step": 3141
    },
    {
      "epoch": 0.4654814814814815,
      "grad_norm": 1.4510890245437622,
      "learning_rate": 0.0001071015567086731,
      "loss": 0.9947,
      "step": 3142
    },
    {
      "epoch": 0.4656296296296296,
      "grad_norm": 2.532569169998169,
      "learning_rate": 0.00010707190511489993,
      "loss": 1.0805,
      "step": 3143
    },
    {
      "epoch": 0.4657777777777778,
      "grad_norm": 7.6669511795043945,
      "learning_rate": 0.00010704225352112676,
      "loss": 1.0176,
      "step": 3144
    },
    {
      "epoch": 0.4659259259259259,
      "grad_norm": 19.293540954589844,
      "learning_rate": 0.00010701260192735361,
      "loss": 1.4962,
      "step": 3145
    },
    {
      "epoch": 0.4660740740740741,
      "grad_norm": 8.845331192016602,
      "learning_rate": 0.00010698295033358044,
      "loss": 1.1374,
      "step": 3146
    },
    {
      "epoch": 0.4662222222222222,
      "grad_norm": 2.234372615814209,
      "learning_rate": 0.00010695329873980726,
      "loss": 0.961,
      "step": 3147
    },
    {
      "epoch": 0.4663703703703704,
      "grad_norm": 16.582304000854492,
      "learning_rate": 0.00010692364714603411,
      "loss": 1.0187,
      "step": 3148
    },
    {
      "epoch": 0.4665185185185185,
      "grad_norm": 3.5459134578704834,
      "learning_rate": 0.00010689399555226094,
      "loss": 1.0671,
      "step": 3149
    },
    {
      "epoch": 0.4666666666666667,
      "grad_norm": 3.5274243354797363,
      "learning_rate": 0.00010686434395848776,
      "loss": 1.0944,
      "step": 3150
    },
    {
      "epoch": 0.4668148148148148,
      "grad_norm": 3.0876269340515137,
      "learning_rate": 0.00010683469236471462,
      "loss": 1.1617,
      "step": 3151
    },
    {
      "epoch": 0.46696296296296297,
      "grad_norm": 3.162646532058716,
      "learning_rate": 0.00010680504077094145,
      "loss": 1.0033,
      "step": 3152
    },
    {
      "epoch": 0.4671111111111111,
      "grad_norm": 4.672357082366943,
      "learning_rate": 0.00010677538917716827,
      "loss": 1.1112,
      "step": 3153
    },
    {
      "epoch": 0.46725925925925926,
      "grad_norm": 2.4324915409088135,
      "learning_rate": 0.00010674573758339513,
      "loss": 1.2125,
      "step": 3154
    },
    {
      "epoch": 0.4674074074074074,
      "grad_norm": 3.274158239364624,
      "learning_rate": 0.00010671608598962194,
      "loss": 1.2154,
      "step": 3155
    },
    {
      "epoch": 0.46755555555555556,
      "grad_norm": 2.635927438735962,
      "learning_rate": 0.00010668643439584877,
      "loss": 1.1436,
      "step": 3156
    },
    {
      "epoch": 0.4677037037037037,
      "grad_norm": 3.1742031574249268,
      "learning_rate": 0.00010665678280207563,
      "loss": 0.9477,
      "step": 3157
    },
    {
      "epoch": 0.46785185185185185,
      "grad_norm": 2.651947021484375,
      "learning_rate": 0.00010662713120830245,
      "loss": 1.0503,
      "step": 3158
    },
    {
      "epoch": 0.468,
      "grad_norm": 2.1971304416656494,
      "learning_rate": 0.00010659747961452928,
      "loss": 0.995,
      "step": 3159
    },
    {
      "epoch": 0.46814814814814815,
      "grad_norm": 6.848816394805908,
      "learning_rate": 0.00010656782802075611,
      "loss": 0.9247,
      "step": 3160
    },
    {
      "epoch": 0.4682962962962963,
      "grad_norm": 2.959502935409546,
      "learning_rate": 0.00010653817642698296,
      "loss": 1.1678,
      "step": 3161
    },
    {
      "epoch": 0.46844444444444444,
      "grad_norm": 2.398146629333496,
      "learning_rate": 0.00010650852483320979,
      "loss": 1.1378,
      "step": 3162
    },
    {
      "epoch": 0.4685925925925926,
      "grad_norm": 2.858717203140259,
      "learning_rate": 0.00010647887323943662,
      "loss": 1.017,
      "step": 3163
    },
    {
      "epoch": 0.46874074074074074,
      "grad_norm": 4.143274307250977,
      "learning_rate": 0.00010644922164566346,
      "loss": 0.962,
      "step": 3164
    },
    {
      "epoch": 0.4688888888888889,
      "grad_norm": 2.289644718170166,
      "learning_rate": 0.0001064195700518903,
      "loss": 1.042,
      "step": 3165
    },
    {
      "epoch": 0.46903703703703703,
      "grad_norm": 3.0466256141662598,
      "learning_rate": 0.00010638991845811712,
      "loss": 1.0398,
      "step": 3166
    },
    {
      "epoch": 0.4691851851851852,
      "grad_norm": 5.905399799346924,
      "learning_rate": 0.00010636026686434397,
      "loss": 1.0772,
      "step": 3167
    },
    {
      "epoch": 0.4693333333333333,
      "grad_norm": 2.9436914920806885,
      "learning_rate": 0.0001063306152705708,
      "loss": 0.99,
      "step": 3168
    },
    {
      "epoch": 0.4694814814814815,
      "grad_norm": 16.72655487060547,
      "learning_rate": 0.00010630096367679763,
      "loss": 1.0256,
      "step": 3169
    },
    {
      "epoch": 0.4696296296296296,
      "grad_norm": 5.662783622741699,
      "learning_rate": 0.00010627131208302448,
      "loss": 1.2278,
      "step": 3170
    },
    {
      "epoch": 0.4697777777777778,
      "grad_norm": 1.763562560081482,
      "learning_rate": 0.0001062416604892513,
      "loss": 0.8771,
      "step": 3171
    },
    {
      "epoch": 0.4699259259259259,
      "grad_norm": 3.000190496444702,
      "learning_rate": 0.00010621200889547814,
      "loss": 1.1496,
      "step": 3172
    },
    {
      "epoch": 0.4700740740740741,
      "grad_norm": 14.939896583557129,
      "learning_rate": 0.00010618235730170498,
      "loss": 1.2843,
      "step": 3173
    },
    {
      "epoch": 0.4702222222222222,
      "grad_norm": 21.34058380126953,
      "learning_rate": 0.00010615270570793181,
      "loss": 0.6499,
      "step": 3174
    },
    {
      "epoch": 0.4703703703703704,
      "grad_norm": 12.188255310058594,
      "learning_rate": 0.00010612305411415864,
      "loss": 1.0415,
      "step": 3175
    },
    {
      "epoch": 0.4705185185185185,
      "grad_norm": 5.454524517059326,
      "learning_rate": 0.00010609340252038546,
      "loss": 1.1986,
      "step": 3176
    },
    {
      "epoch": 0.4706666666666667,
      "grad_norm": 8.059273719787598,
      "learning_rate": 0.00010606375092661232,
      "loss": 1.0591,
      "step": 3177
    },
    {
      "epoch": 0.4708148148148148,
      "grad_norm": 2.550055742263794,
      "learning_rate": 0.00010603409933283915,
      "loss": 1.1642,
      "step": 3178
    },
    {
      "epoch": 0.47096296296296297,
      "grad_norm": 5.875574588775635,
      "learning_rate": 0.00010600444773906597,
      "loss": 1.0948,
      "step": 3179
    },
    {
      "epoch": 0.4711111111111111,
      "grad_norm": 17.175174713134766,
      "learning_rate": 0.00010597479614529282,
      "loss": 1.1653,
      "step": 3180
    },
    {
      "epoch": 0.47125925925925927,
      "grad_norm": 3.404045581817627,
      "learning_rate": 0.00010594514455151964,
      "loss": 1.1246,
      "step": 3181
    },
    {
      "epoch": 0.4714074074074074,
      "grad_norm": 6.8494086265563965,
      "learning_rate": 0.00010591549295774647,
      "loss": 1.0845,
      "step": 3182
    },
    {
      "epoch": 0.47155555555555556,
      "grad_norm": 10.930824279785156,
      "learning_rate": 0.00010588584136397333,
      "loss": 0.9457,
      "step": 3183
    },
    {
      "epoch": 0.4717037037037037,
      "grad_norm": 2.7525134086608887,
      "learning_rate": 0.00010585618977020015,
      "loss": 1.2138,
      "step": 3184
    },
    {
      "epoch": 0.47185185185185186,
      "grad_norm": 5.941433906555176,
      "learning_rate": 0.00010582653817642698,
      "loss": 1.3032,
      "step": 3185
    },
    {
      "epoch": 0.472,
      "grad_norm": 5.860634803771973,
      "learning_rate": 0.00010579688658265384,
      "loss": 0.9713,
      "step": 3186
    },
    {
      "epoch": 0.47214814814814815,
      "grad_norm": 4.1480865478515625,
      "learning_rate": 0.00010576723498888065,
      "loss": 1.3524,
      "step": 3187
    },
    {
      "epoch": 0.47229629629629627,
      "grad_norm": 6.544581413269043,
      "learning_rate": 0.00010573758339510749,
      "loss": 1.1095,
      "step": 3188
    },
    {
      "epoch": 0.47244444444444444,
      "grad_norm": 4.920466899871826,
      "learning_rate": 0.00010570793180133433,
      "loss": 0.7559,
      "step": 3189
    },
    {
      "epoch": 0.4725925925925926,
      "grad_norm": 8.82383918762207,
      "learning_rate": 0.00010567828020756116,
      "loss": 1.0021,
      "step": 3190
    },
    {
      "epoch": 0.47274074074074074,
      "grad_norm": 3.9936301708221436,
      "learning_rate": 0.00010564862861378799,
      "loss": 1.1223,
      "step": 3191
    },
    {
      "epoch": 0.4728888888888889,
      "grad_norm": 5.304333209991455,
      "learning_rate": 0.00010561897702001482,
      "loss": 1.2125,
      "step": 3192
    },
    {
      "epoch": 0.47303703703703703,
      "grad_norm": 3.7484147548675537,
      "learning_rate": 0.00010558932542624167,
      "loss": 1.2072,
      "step": 3193
    },
    {
      "epoch": 0.4731851851851852,
      "grad_norm": 8.583340644836426,
      "learning_rate": 0.0001055596738324685,
      "loss": 0.8443,
      "step": 3194
    },
    {
      "epoch": 0.47333333333333333,
      "grad_norm": 6.135061740875244,
      "learning_rate": 0.00010553002223869533,
      "loss": 1.0808,
      "step": 3195
    },
    {
      "epoch": 0.4734814814814815,
      "grad_norm": 5.279104232788086,
      "learning_rate": 0.00010550037064492217,
      "loss": 1.0127,
      "step": 3196
    },
    {
      "epoch": 0.4736296296296296,
      "grad_norm": 9.33665943145752,
      "learning_rate": 0.000105470719051149,
      "loss": 1.0379,
      "step": 3197
    },
    {
      "epoch": 0.4737777777777778,
      "grad_norm": 1.8126426935195923,
      "learning_rate": 0.00010544106745737584,
      "loss": 1.0124,
      "step": 3198
    },
    {
      "epoch": 0.4739259259259259,
      "grad_norm": 2.6330509185791016,
      "learning_rate": 0.00010541141586360268,
      "loss": 1.1142,
      "step": 3199
    },
    {
      "epoch": 0.4740740740740741,
      "grad_norm": 16.036205291748047,
      "learning_rate": 0.00010538176426982951,
      "loss": 0.9784,
      "step": 3200
    },
    {
      "epoch": 0.4742222222222222,
      "grad_norm": 3.068990468978882,
      "learning_rate": 0.00010535211267605634,
      "loss": 1.1511,
      "step": 3201
    },
    {
      "epoch": 0.4743703703703704,
      "grad_norm": 7.242805480957031,
      "learning_rate": 0.00010532246108228319,
      "loss": 0.8941,
      "step": 3202
    },
    {
      "epoch": 0.4745185185185185,
      "grad_norm": 4.33395528793335,
      "learning_rate": 0.00010529280948851002,
      "loss": 0.8751,
      "step": 3203
    },
    {
      "epoch": 0.4746666666666667,
      "grad_norm": 4.307576656341553,
      "learning_rate": 0.00010526315789473685,
      "loss": 1.1531,
      "step": 3204
    },
    {
      "epoch": 0.4748148148148148,
      "grad_norm": 3.184480667114258,
      "learning_rate": 0.00010523350630096369,
      "loss": 1.2952,
      "step": 3205
    },
    {
      "epoch": 0.474962962962963,
      "grad_norm": 3.037388324737549,
      "learning_rate": 0.00010520385470719052,
      "loss": 1.0486,
      "step": 3206
    },
    {
      "epoch": 0.4751111111111111,
      "grad_norm": 2.3104970455169678,
      "learning_rate": 0.00010517420311341734,
      "loss": 1.0139,
      "step": 3207
    },
    {
      "epoch": 0.47525925925925927,
      "grad_norm": 7.615547180175781,
      "learning_rate": 0.0001051445515196442,
      "loss": 1.1018,
      "step": 3208
    },
    {
      "epoch": 0.4754074074074074,
      "grad_norm": 3.9027249813079834,
      "learning_rate": 0.00010511489992587103,
      "loss": 1.1016,
      "step": 3209
    },
    {
      "epoch": 0.47555555555555556,
      "grad_norm": 4.110639572143555,
      "learning_rate": 0.00010508524833209785,
      "loss": 0.9585,
      "step": 3210
    },
    {
      "epoch": 0.4757037037037037,
      "grad_norm": 3.2899138927459717,
      "learning_rate": 0.00010505559673832468,
      "loss": 1.0343,
      "step": 3211
    },
    {
      "epoch": 0.47585185185185186,
      "grad_norm": 6.333221435546875,
      "learning_rate": 0.00010502594514455154,
      "loss": 1.0174,
      "step": 3212
    },
    {
      "epoch": 0.476,
      "grad_norm": 6.963191032409668,
      "learning_rate": 0.00010499629355077835,
      "loss": 1.1656,
      "step": 3213
    },
    {
      "epoch": 0.47614814814814815,
      "grad_norm": 6.781939506530762,
      "learning_rate": 0.00010496664195700518,
      "loss": 1.086,
      "step": 3214
    },
    {
      "epoch": 0.4762962962962963,
      "grad_norm": 10.270206451416016,
      "learning_rate": 0.00010493699036323203,
      "loss": 0.7711,
      "step": 3215
    },
    {
      "epoch": 0.47644444444444445,
      "grad_norm": 6.469484806060791,
      "learning_rate": 0.00010490733876945886,
      "loss": 1.314,
      "step": 3216
    },
    {
      "epoch": 0.47659259259259257,
      "grad_norm": 2.9984261989593506,
      "learning_rate": 0.00010487768717568569,
      "loss": 1.2979,
      "step": 3217
    },
    {
      "epoch": 0.47674074074074074,
      "grad_norm": 4.488178253173828,
      "learning_rate": 0.00010484803558191253,
      "loss": 1.196,
      "step": 3218
    },
    {
      "epoch": 0.47688888888888886,
      "grad_norm": 6.778642177581787,
      "learning_rate": 0.00010481838398813937,
      "loss": 1.1411,
      "step": 3219
    },
    {
      "epoch": 0.47703703703703704,
      "grad_norm": 12.261344909667969,
      "learning_rate": 0.0001047887323943662,
      "loss": 1.0381,
      "step": 3220
    },
    {
      "epoch": 0.4771851851851852,
      "grad_norm": 5.716145038604736,
      "learning_rate": 0.00010475908080059304,
      "loss": 1.1154,
      "step": 3221
    },
    {
      "epoch": 0.47733333333333333,
      "grad_norm": 3.8252673149108887,
      "learning_rate": 0.00010472942920681987,
      "loss": 1.0692,
      "step": 3222
    },
    {
      "epoch": 0.4774814814814815,
      "grad_norm": 6.230849266052246,
      "learning_rate": 0.0001046997776130467,
      "loss": 1.1221,
      "step": 3223
    },
    {
      "epoch": 0.4776296296296296,
      "grad_norm": 8.367166519165039,
      "learning_rate": 0.00010467012601927355,
      "loss": 1.1213,
      "step": 3224
    },
    {
      "epoch": 0.4777777777777778,
      "grad_norm": 4.575971603393555,
      "learning_rate": 0.00010464047442550038,
      "loss": 0.983,
      "step": 3225
    },
    {
      "epoch": 0.4779259259259259,
      "grad_norm": 11.973864555358887,
      "learning_rate": 0.00010461082283172721,
      "loss": 1.1238,
      "step": 3226
    },
    {
      "epoch": 0.4780740740740741,
      "grad_norm": 3.2397303581237793,
      "learning_rate": 0.00010458117123795404,
      "loss": 0.9948,
      "step": 3227
    },
    {
      "epoch": 0.4782222222222222,
      "grad_norm": 4.968510627746582,
      "learning_rate": 0.00010455151964418088,
      "loss": 1.119,
      "step": 3228
    },
    {
      "epoch": 0.4783703703703704,
      "grad_norm": 5.719479084014893,
      "learning_rate": 0.00010452186805040772,
      "loss": 1.3429,
      "step": 3229
    },
    {
      "epoch": 0.4785185185185185,
      "grad_norm": 7.569019317626953,
      "learning_rate": 0.00010449221645663455,
      "loss": 1.024,
      "step": 3230
    },
    {
      "epoch": 0.4786666666666667,
      "grad_norm": 7.236438274383545,
      "learning_rate": 0.00010446256486286139,
      "loss": 0.9336,
      "step": 3231
    },
    {
      "epoch": 0.4788148148148148,
      "grad_norm": 3.0502982139587402,
      "learning_rate": 0.00010443291326908822,
      "loss": 1.2291,
      "step": 3232
    },
    {
      "epoch": 0.478962962962963,
      "grad_norm": 6.6787872314453125,
      "learning_rate": 0.00010440326167531504,
      "loss": 0.9153,
      "step": 3233
    },
    {
      "epoch": 0.4791111111111111,
      "grad_norm": 8.486490249633789,
      "learning_rate": 0.0001043736100815419,
      "loss": 1.3001,
      "step": 3234
    },
    {
      "epoch": 0.4792592592592593,
      "grad_norm": 10.042091369628906,
      "learning_rate": 0.00010434395848776873,
      "loss": 0.9567,
      "step": 3235
    },
    {
      "epoch": 0.4794074074074074,
      "grad_norm": 3.748504638671875,
      "learning_rate": 0.00010431430689399555,
      "loss": 1.0477,
      "step": 3236
    },
    {
      "epoch": 0.47955555555555557,
      "grad_norm": 5.855503082275391,
      "learning_rate": 0.0001042846553002224,
      "loss": 1.0012,
      "step": 3237
    },
    {
      "epoch": 0.4797037037037037,
      "grad_norm": 4.62381649017334,
      "learning_rate": 0.00010425500370644923,
      "loss": 1.1407,
      "step": 3238
    },
    {
      "epoch": 0.47985185185185186,
      "grad_norm": 7.165449142456055,
      "learning_rate": 0.00010422535211267605,
      "loss": 1.3546,
      "step": 3239
    },
    {
      "epoch": 0.48,
      "grad_norm": 5.236550331115723,
      "learning_rate": 0.00010419570051890291,
      "loss": 1.0317,
      "step": 3240
    },
    {
      "epoch": 0.48014814814814816,
      "grad_norm": 7.2281975746154785,
      "learning_rate": 0.00010416604892512973,
      "loss": 0.9371,
      "step": 3241
    },
    {
      "epoch": 0.4802962962962963,
      "grad_norm": 4.920871734619141,
      "learning_rate": 0.00010413639733135656,
      "loss": 1.012,
      "step": 3242
    },
    {
      "epoch": 0.48044444444444445,
      "grad_norm": 3.736154317855835,
      "learning_rate": 0.00010410674573758342,
      "loss": 0.9538,
      "step": 3243
    },
    {
      "epoch": 0.48059259259259257,
      "grad_norm": 6.0349555015563965,
      "learning_rate": 0.00010407709414381023,
      "loss": 1.2737,
      "step": 3244
    },
    {
      "epoch": 0.48074074074074075,
      "grad_norm": 3.131864070892334,
      "learning_rate": 0.00010404744255003706,
      "loss": 1.0383,
      "step": 3245
    },
    {
      "epoch": 0.48088888888888887,
      "grad_norm": 6.332339286804199,
      "learning_rate": 0.0001040177909562639,
      "loss": 1.0275,
      "step": 3246
    },
    {
      "epoch": 0.48103703703703704,
      "grad_norm": 3.2845075130462646,
      "learning_rate": 0.00010398813936249074,
      "loss": 1.1353,
      "step": 3247
    },
    {
      "epoch": 0.48118518518518516,
      "grad_norm": 5.595439434051514,
      "learning_rate": 0.00010395848776871757,
      "loss": 1.0353,
      "step": 3248
    },
    {
      "epoch": 0.48133333333333334,
      "grad_norm": 2.96281099319458,
      "learning_rate": 0.0001039288361749444,
      "loss": 0.965,
      "step": 3249
    },
    {
      "epoch": 0.48148148148148145,
      "grad_norm": 9.507851600646973,
      "learning_rate": 0.00010389918458117125,
      "loss": 1.1361,
      "step": 3250
    },
    {
      "epoch": 0.48162962962962963,
      "grad_norm": 5.7583465576171875,
      "learning_rate": 0.00010386953298739808,
      "loss": 1.0297,
      "step": 3251
    },
    {
      "epoch": 0.4817777777777778,
      "grad_norm": 17.506040573120117,
      "learning_rate": 0.00010383988139362491,
      "loss": 0.9779,
      "step": 3252
    },
    {
      "epoch": 0.4819259259259259,
      "grad_norm": 18.134029388427734,
      "learning_rate": 0.00010381022979985175,
      "loss": 1.1237,
      "step": 3253
    },
    {
      "epoch": 0.4820740740740741,
      "grad_norm": 8.527471542358398,
      "learning_rate": 0.00010378057820607858,
      "loss": 1.0244,
      "step": 3254
    },
    {
      "epoch": 0.4822222222222222,
      "grad_norm": 2.8711700439453125,
      "learning_rate": 0.00010375092661230541,
      "loss": 1.2051,
      "step": 3255
    },
    {
      "epoch": 0.4823703703703704,
      "grad_norm": 9.874549865722656,
      "learning_rate": 0.00010372127501853226,
      "loss": 0.9903,
      "step": 3256
    },
    {
      "epoch": 0.4825185185185185,
      "grad_norm": 3.829430103302002,
      "learning_rate": 0.00010369162342475909,
      "loss": 0.9314,
      "step": 3257
    },
    {
      "epoch": 0.4826666666666667,
      "grad_norm": 20.969573974609375,
      "learning_rate": 0.00010366197183098592,
      "loss": 0.9503,
      "step": 3258
    },
    {
      "epoch": 0.4828148148148148,
      "grad_norm": 6.133211612701416,
      "learning_rate": 0.00010363232023721276,
      "loss": 1.0567,
      "step": 3259
    },
    {
      "epoch": 0.482962962962963,
      "grad_norm": 5.0121541023254395,
      "learning_rate": 0.0001036026686434396,
      "loss": 0.9237,
      "step": 3260
    },
    {
      "epoch": 0.4831111111111111,
      "grad_norm": 3.7844817638397217,
      "learning_rate": 0.00010357301704966643,
      "loss": 1.1164,
      "step": 3261
    },
    {
      "epoch": 0.4832592592592593,
      "grad_norm": 29.42932891845703,
      "learning_rate": 0.00010354336545589324,
      "loss": 1.0503,
      "step": 3262
    },
    {
      "epoch": 0.4834074074074074,
      "grad_norm": 7.4309401512146,
      "learning_rate": 0.0001035137138621201,
      "loss": 1.2604,
      "step": 3263
    },
    {
      "epoch": 0.48355555555555557,
      "grad_norm": 4.521541118621826,
      "learning_rate": 0.00010348406226834693,
      "loss": 0.8875,
      "step": 3264
    },
    {
      "epoch": 0.4837037037037037,
      "grad_norm": 4.30009126663208,
      "learning_rate": 0.00010345441067457375,
      "loss": 1.0131,
      "step": 3265
    },
    {
      "epoch": 0.48385185185185187,
      "grad_norm": 4.7270402908325195,
      "learning_rate": 0.00010342475908080061,
      "loss": 0.9544,
      "step": 3266
    },
    {
      "epoch": 0.484,
      "grad_norm": 7.024489879608154,
      "learning_rate": 0.00010339510748702743,
      "loss": 1.0964,
      "step": 3267
    },
    {
      "epoch": 0.48414814814814816,
      "grad_norm": 7.917887210845947,
      "learning_rate": 0.00010336545589325426,
      "loss": 1.1018,
      "step": 3268
    },
    {
      "epoch": 0.4842962962962963,
      "grad_norm": 4.285741806030273,
      "learning_rate": 0.00010333580429948111,
      "loss": 0.9772,
      "step": 3269
    },
    {
      "epoch": 0.48444444444444446,
      "grad_norm": 6.478012561798096,
      "learning_rate": 0.00010330615270570793,
      "loss": 0.9533,
      "step": 3270
    },
    {
      "epoch": 0.4845925925925926,
      "grad_norm": 2.709798574447632,
      "learning_rate": 0.00010327650111193476,
      "loss": 1.0569,
      "step": 3271
    },
    {
      "epoch": 0.48474074074074075,
      "grad_norm": 9.730769157409668,
      "learning_rate": 0.00010324684951816162,
      "loss": 1.0849,
      "step": 3272
    },
    {
      "epoch": 0.48488888888888887,
      "grad_norm": 3.158259391784668,
      "learning_rate": 0.00010321719792438844,
      "loss": 1.2448,
      "step": 3273
    },
    {
      "epoch": 0.48503703703703704,
      "grad_norm": 16.45296287536621,
      "learning_rate": 0.00010318754633061527,
      "loss": 0.9933,
      "step": 3274
    },
    {
      "epoch": 0.48518518518518516,
      "grad_norm": 15.388961791992188,
      "learning_rate": 0.00010315789473684211,
      "loss": 0.9935,
      "step": 3275
    },
    {
      "epoch": 0.48533333333333334,
      "grad_norm": 3.80056095123291,
      "learning_rate": 0.00010312824314306894,
      "loss": 1.0549,
      "step": 3276
    },
    {
      "epoch": 0.48548148148148146,
      "grad_norm": 4.19111442565918,
      "learning_rate": 0.00010309859154929578,
      "loss": 1.1943,
      "step": 3277
    },
    {
      "epoch": 0.48562962962962963,
      "grad_norm": 3.5380570888519287,
      "learning_rate": 0.0001030689399555226,
      "loss": 1.0885,
      "step": 3278
    },
    {
      "epoch": 0.48577777777777775,
      "grad_norm": 27.214094161987305,
      "learning_rate": 0.00010303928836174945,
      "loss": 0.9228,
      "step": 3279
    },
    {
      "epoch": 0.48592592592592593,
      "grad_norm": 2.5952913761138916,
      "learning_rate": 0.00010300963676797628,
      "loss": 1.1284,
      "step": 3280
    },
    {
      "epoch": 0.48607407407407405,
      "grad_norm": 3.481954574584961,
      "learning_rate": 0.00010297998517420311,
      "loss": 1.1537,
      "step": 3281
    },
    {
      "epoch": 0.4862222222222222,
      "grad_norm": 9.62716007232666,
      "learning_rate": 0.00010295033358042996,
      "loss": 1.0798,
      "step": 3282
    },
    {
      "epoch": 0.4863703703703704,
      "grad_norm": 3.729365348815918,
      "learning_rate": 0.00010292068198665679,
      "loss": 0.9712,
      "step": 3283
    },
    {
      "epoch": 0.4865185185185185,
      "grad_norm": 6.008885383605957,
      "learning_rate": 0.00010289103039288362,
      "loss": 1.0435,
      "step": 3284
    },
    {
      "epoch": 0.4866666666666667,
      "grad_norm": 6.04400110244751,
      "learning_rate": 0.00010286137879911046,
      "loss": 1.0902,
      "step": 3285
    },
    {
      "epoch": 0.4868148148148148,
      "grad_norm": 8.919031143188477,
      "learning_rate": 0.0001028317272053373,
      "loss": 1.0534,
      "step": 3286
    },
    {
      "epoch": 0.486962962962963,
      "grad_norm": Infinity,
      "learning_rate": 0.0001028317272053373,
      "loss": 1.3228,
      "step": 3287
    },
    {
      "epoch": 0.4871111111111111,
      "grad_norm": 2.824831485748291,
      "learning_rate": 0.00010280207561156412,
      "loss": 1.0485,
      "step": 3288
    },
    {
      "epoch": 0.4872592592592593,
      "grad_norm": 4.498315334320068,
      "learning_rate": 0.00010277242401779097,
      "loss": 1.0745,
      "step": 3289
    },
    {
      "epoch": 0.4874074074074074,
      "grad_norm": 3.279341220855713,
      "learning_rate": 0.0001027427724240178,
      "loss": 1.0256,
      "step": 3290
    },
    {
      "epoch": 0.4875555555555556,
      "grad_norm": 3.054405927658081,
      "learning_rate": 0.00010271312083024463,
      "loss": 1.1039,
      "step": 3291
    },
    {
      "epoch": 0.4877037037037037,
      "grad_norm": 4.371935844421387,
      "learning_rate": 0.00010268346923647148,
      "loss": 1.0527,
      "step": 3292
    },
    {
      "epoch": 0.48785185185185187,
      "grad_norm": 7.293436050415039,
      "learning_rate": 0.0001026538176426983,
      "loss": 0.9356,
      "step": 3293
    },
    {
      "epoch": 0.488,
      "grad_norm": 4.384091854095459,
      "learning_rate": 0.00010262416604892512,
      "loss": 1.1196,
      "step": 3294
    },
    {
      "epoch": 0.48814814814814816,
      "grad_norm": 5.955717086791992,
      "learning_rate": 0.00010259451445515198,
      "loss": 0.8373,
      "step": 3295
    },
    {
      "epoch": 0.4882962962962963,
      "grad_norm": 2.367436170578003,
      "learning_rate": 0.00010256486286137881,
      "loss": 0.9994,
      "step": 3296
    },
    {
      "epoch": 0.48844444444444446,
      "grad_norm": 3.521695375442505,
      "learning_rate": 0.00010253521126760563,
      "loss": 1.0349,
      "step": 3297
    },
    {
      "epoch": 0.4885925925925926,
      "grad_norm": 10.636849403381348,
      "learning_rate": 0.00010250555967383246,
      "loss": 0.9018,
      "step": 3298
    },
    {
      "epoch": 0.48874074074074075,
      "grad_norm": 2.3019065856933594,
      "learning_rate": 0.00010247590808005932,
      "loss": 0.9242,
      "step": 3299
    },
    {
      "epoch": 0.4888888888888889,
      "grad_norm": 6.471230983734131,
      "learning_rate": 0.00010244625648628614,
      "loss": 1.0473,
      "step": 3300
    },
    {
      "epoch": 0.48903703703703705,
      "grad_norm": 4.0714592933654785,
      "learning_rate": 0.00010241660489251297,
      "loss": 1.1151,
      "step": 3301
    },
    {
      "epoch": 0.48918518518518517,
      "grad_norm": 4.892930507659912,
      "learning_rate": 0.00010238695329873981,
      "loss": 1.1018,
      "step": 3302
    },
    {
      "epoch": 0.48933333333333334,
      "grad_norm": 3.136021614074707,
      "learning_rate": 0.00010235730170496664,
      "loss": 0.9623,
      "step": 3303
    },
    {
      "epoch": 0.48948148148148146,
      "grad_norm": 3.2672665119171143,
      "learning_rate": 0.00010232765011119347,
      "loss": 1.0466,
      "step": 3304
    },
    {
      "epoch": 0.48962962962962964,
      "grad_norm": 3.7321572303771973,
      "learning_rate": 0.00010229799851742032,
      "loss": 1.0502,
      "step": 3305
    },
    {
      "epoch": 0.48977777777777776,
      "grad_norm": 3.520411491394043,
      "learning_rate": 0.00010226834692364715,
      "loss": 0.9759,
      "step": 3306
    },
    {
      "epoch": 0.48992592592592593,
      "grad_norm": 3.4311301708221436,
      "learning_rate": 0.00010223869532987398,
      "loss": 0.913,
      "step": 3307
    },
    {
      "epoch": 0.49007407407407405,
      "grad_norm": 5.491833686828613,
      "learning_rate": 0.00010220904373610082,
      "loss": 1.1901,
      "step": 3308
    },
    {
      "epoch": 0.4902222222222222,
      "grad_norm": 4.865017414093018,
      "learning_rate": 0.00010217939214232766,
      "loss": 0.9807,
      "step": 3309
    },
    {
      "epoch": 0.49037037037037035,
      "grad_norm": 5.523568630218506,
      "learning_rate": 0.00010214974054855449,
      "loss": 1.0945,
      "step": 3310
    },
    {
      "epoch": 0.4905185185185185,
      "grad_norm": 7.591002464294434,
      "learning_rate": 0.00010212008895478133,
      "loss": 1.1448,
      "step": 3311
    },
    {
      "epoch": 0.49066666666666664,
      "grad_norm": 2.7854530811309814,
      "learning_rate": 0.00010209043736100816,
      "loss": 1.0046,
      "step": 3312
    },
    {
      "epoch": 0.4908148148148148,
      "grad_norm": 3.1816959381103516,
      "learning_rate": 0.00010206078576723499,
      "loss": 1.0797,
      "step": 3313
    },
    {
      "epoch": 0.490962962962963,
      "grad_norm": 2.2549257278442383,
      "learning_rate": 0.00010203113417346182,
      "loss": 1.012,
      "step": 3314
    },
    {
      "epoch": 0.4911111111111111,
      "grad_norm": 4.814846992492676,
      "learning_rate": 0.00010200148257968867,
      "loss": 1.1395,
      "step": 3315
    },
    {
      "epoch": 0.4912592592592593,
      "grad_norm": 8.303160667419434,
      "learning_rate": 0.0001019718309859155,
      "loss": 0.9523,
      "step": 3316
    },
    {
      "epoch": 0.4914074074074074,
      "grad_norm": 7.3843464851379395,
      "learning_rate": 0.00010194217939214233,
      "loss": 0.9284,
      "step": 3317
    },
    {
      "epoch": 0.4915555555555556,
      "grad_norm": 3.921623706817627,
      "learning_rate": 0.00010191252779836917,
      "loss": 1.0163,
      "step": 3318
    },
    {
      "epoch": 0.4917037037037037,
      "grad_norm": 10.940088272094727,
      "learning_rate": 0.000101882876204596,
      "loss": 1.1748,
      "step": 3319
    },
    {
      "epoch": 0.4918518518518519,
      "grad_norm": 38.7925910949707,
      "learning_rate": 0.00010185322461082282,
      "loss": 1.1016,
      "step": 3320
    },
    {
      "epoch": 0.492,
      "grad_norm": 7.665594577789307,
      "learning_rate": 0.00010182357301704968,
      "loss": 1.0122,
      "step": 3321
    },
    {
      "epoch": 0.49214814814814817,
      "grad_norm": 2.942826747894287,
      "learning_rate": 0.00010179392142327651,
      "loss": 0.9124,
      "step": 3322
    },
    {
      "epoch": 0.4922962962962963,
      "grad_norm": 6.7701239585876465,
      "learning_rate": 0.00010176426982950333,
      "loss": 1.1139,
      "step": 3323
    },
    {
      "epoch": 0.49244444444444446,
      "grad_norm": 5.877721786499023,
      "learning_rate": 0.00010173461823573019,
      "loss": 1.0566,
      "step": 3324
    },
    {
      "epoch": 0.4925925925925926,
      "grad_norm": 6.935197830200195,
      "learning_rate": 0.00010170496664195702,
      "loss": 1.2815,
      "step": 3325
    },
    {
      "epoch": 0.49274074074074076,
      "grad_norm": 2.7543389797210693,
      "learning_rate": 0.00010167531504818383,
      "loss": 1.0665,
      "step": 3326
    },
    {
      "epoch": 0.4928888888888889,
      "grad_norm": 2.9426321983337402,
      "learning_rate": 0.00010164566345441069,
      "loss": 1.0679,
      "step": 3327
    },
    {
      "epoch": 0.49303703703703705,
      "grad_norm": 4.462264537811279,
      "learning_rate": 0.00010161601186063751,
      "loss": 1.1118,
      "step": 3328
    },
    {
      "epoch": 0.49318518518518517,
      "grad_norm": 5.7362060546875,
      "learning_rate": 0.00010158636026686434,
      "loss": 0.9373,
      "step": 3329
    },
    {
      "epoch": 0.49333333333333335,
      "grad_norm": 11.453153610229492,
      "learning_rate": 0.0001015567086730912,
      "loss": 0.9115,
      "step": 3330
    },
    {
      "epoch": 0.49348148148148147,
      "grad_norm": 3.6631534099578857,
      "learning_rate": 0.00010152705707931802,
      "loss": 0.8081,
      "step": 3331
    },
    {
      "epoch": 0.49362962962962964,
      "grad_norm": 4.551608562469482,
      "learning_rate": 0.00010149740548554485,
      "loss": 1.0346,
      "step": 3332
    },
    {
      "epoch": 0.49377777777777776,
      "grad_norm": 19.665569305419922,
      "learning_rate": 0.00010146775389177168,
      "loss": 1.1854,
      "step": 3333
    },
    {
      "epoch": 0.49392592592592593,
      "grad_norm": 3.854532480239868,
      "learning_rate": 0.00010143810229799852,
      "loss": 0.9575,
      "step": 3334
    },
    {
      "epoch": 0.49407407407407405,
      "grad_norm": 7.537748336791992,
      "learning_rate": 0.00010140845070422535,
      "loss": 1.1637,
      "step": 3335
    },
    {
      "epoch": 0.49422222222222223,
      "grad_norm": 5.545541763305664,
      "learning_rate": 0.00010137879911045218,
      "loss": 1.0542,
      "step": 3336
    },
    {
      "epoch": 0.49437037037037035,
      "grad_norm": 5.337022304534912,
      "learning_rate": 0.00010134914751667903,
      "loss": 1.1365,
      "step": 3337
    },
    {
      "epoch": 0.4945185185185185,
      "grad_norm": 3.3391358852386475,
      "learning_rate": 0.00010131949592290586,
      "loss": 1.0307,
      "step": 3338
    },
    {
      "epoch": 0.49466666666666664,
      "grad_norm": 3.6873624324798584,
      "learning_rate": 0.00010128984432913269,
      "loss": 1.0978,
      "step": 3339
    },
    {
      "epoch": 0.4948148148148148,
      "grad_norm": 9.957468032836914,
      "learning_rate": 0.00010126019273535954,
      "loss": 1.0894,
      "step": 3340
    },
    {
      "epoch": 0.49496296296296294,
      "grad_norm": 3.154163122177124,
      "learning_rate": 0.00010123054114158637,
      "loss": 1.1681,
      "step": 3341
    },
    {
      "epoch": 0.4951111111111111,
      "grad_norm": 2.4883615970611572,
      "learning_rate": 0.0001012008895478132,
      "loss": 0.9564,
      "step": 3342
    },
    {
      "epoch": 0.49525925925925923,
      "grad_norm": 2.372100353240967,
      "learning_rate": 0.00010117123795404004,
      "loss": 1.1688,
      "step": 3343
    },
    {
      "epoch": 0.4954074074074074,
      "grad_norm": 6.041884899139404,
      "learning_rate": 0.00010114158636026687,
      "loss": 1.0489,
      "step": 3344
    },
    {
      "epoch": 0.4955555555555556,
      "grad_norm": 3.130751132965088,
      "learning_rate": 0.0001011119347664937,
      "loss": 1.461,
      "step": 3345
    },
    {
      "epoch": 0.4957037037037037,
      "grad_norm": 4.726568222045898,
      "learning_rate": 0.00010108228317272055,
      "loss": 1.0306,
      "step": 3346
    },
    {
      "epoch": 0.4958518518518519,
      "grad_norm": 9.536551475524902,
      "learning_rate": 0.00010105263157894738,
      "loss": 1.0686,
      "step": 3347
    },
    {
      "epoch": 0.496,
      "grad_norm": 4.3145551681518555,
      "learning_rate": 0.00010102297998517421,
      "loss": 0.9891,
      "step": 3348
    },
    {
      "epoch": 0.49614814814814817,
      "grad_norm": 2.656224012374878,
      "learning_rate": 0.00010099332839140103,
      "loss": 1.4133,
      "step": 3349
    },
    {
      "epoch": 0.4962962962962963,
      "grad_norm": 2.4819960594177246,
      "learning_rate": 0.00010096367679762789,
      "loss": 1.1467,
      "step": 3350
    },
    {
      "epoch": 0.49644444444444447,
      "grad_norm": 22.741085052490234,
      "learning_rate": 0.00010093402520385472,
      "loss": 1.1741,
      "step": 3351
    },
    {
      "epoch": 0.4965925925925926,
      "grad_norm": 5.024966716766357,
      "learning_rate": 0.00010090437361008153,
      "loss": 1.2479,
      "step": 3352
    },
    {
      "epoch": 0.49674074074074076,
      "grad_norm": 4.4860382080078125,
      "learning_rate": 0.00010087472201630839,
      "loss": 1.0149,
      "step": 3353
    },
    {
      "epoch": 0.4968888888888889,
      "grad_norm": 18.099023818969727,
      "learning_rate": 0.00010084507042253521,
      "loss": 0.9571,
      "step": 3354
    },
    {
      "epoch": 0.49703703703703705,
      "grad_norm": 2.42246675491333,
      "learning_rate": 0.00010081541882876204,
      "loss": 1.1217,
      "step": 3355
    },
    {
      "epoch": 0.4971851851851852,
      "grad_norm": 12.542469024658203,
      "learning_rate": 0.0001007857672349889,
      "loss": 1.1278,
      "step": 3356
    },
    {
      "epoch": 0.49733333333333335,
      "grad_norm": 3.75882887840271,
      "learning_rate": 0.00010075611564121572,
      "loss": 1.3366,
      "step": 3357
    },
    {
      "epoch": 0.49748148148148147,
      "grad_norm": 2.7175066471099854,
      "learning_rate": 0.00010072646404744255,
      "loss": 1.0177,
      "step": 3358
    },
    {
      "epoch": 0.49762962962962964,
      "grad_norm": 9.794149398803711,
      "learning_rate": 0.0001006968124536694,
      "loss": 1.1153,
      "step": 3359
    },
    {
      "epoch": 0.49777777777777776,
      "grad_norm": 5.885090351104736,
      "learning_rate": 0.00010066716085989622,
      "loss": 1.0297,
      "step": 3360
    },
    {
      "epoch": 0.49792592592592594,
      "grad_norm": 2.174469470977783,
      "learning_rate": 0.00010063750926612305,
      "loss": 0.8401,
      "step": 3361
    },
    {
      "epoch": 0.49807407407407406,
      "grad_norm": 16.41845703125,
      "learning_rate": 0.0001006078576723499,
      "loss": 1.0287,
      "step": 3362
    },
    {
      "epoch": 0.49822222222222223,
      "grad_norm": 3.114132881164551,
      "learning_rate": 0.00010057820607857673,
      "loss": 0.9149,
      "step": 3363
    },
    {
      "epoch": 0.49837037037037035,
      "grad_norm": 10.348255157470703,
      "learning_rate": 0.00010054855448480356,
      "loss": 0.9482,
      "step": 3364
    },
    {
      "epoch": 0.4985185185185185,
      "grad_norm": 2.6116645336151123,
      "learning_rate": 0.0001005189028910304,
      "loss": 1.0156,
      "step": 3365
    },
    {
      "epoch": 0.49866666666666665,
      "grad_norm": 8.228729248046875,
      "learning_rate": 0.00010048925129725723,
      "loss": 1.0018,
      "step": 3366
    },
    {
      "epoch": 0.4988148148148148,
      "grad_norm": 5.662245273590088,
      "learning_rate": 0.00010045959970348406,
      "loss": 1.2499,
      "step": 3367
    },
    {
      "epoch": 0.49896296296296294,
      "grad_norm": 2.135146379470825,
      "learning_rate": 0.0001004299481097109,
      "loss": 1.5001,
      "step": 3368
    },
    {
      "epoch": 0.4991111111111111,
      "grad_norm": 3.819507122039795,
      "learning_rate": 0.00010040029651593774,
      "loss": 1.0538,
      "step": 3369
    },
    {
      "epoch": 0.49925925925925924,
      "grad_norm": 3.6428349018096924,
      "learning_rate": 0.00010037064492216457,
      "loss": 0.8779,
      "step": 3370
    },
    {
      "epoch": 0.4994074074074074,
      "grad_norm": 1.6564476490020752,
      "learning_rate": 0.0001003409933283914,
      "loss": 1.2154,
      "step": 3371
    },
    {
      "epoch": 0.49955555555555553,
      "grad_norm": 2.1467227935791016,
      "learning_rate": 0.00010031134173461825,
      "loss": 0.927,
      "step": 3372
    },
    {
      "epoch": 0.4997037037037037,
      "grad_norm": 3.4069087505340576,
      "learning_rate": 0.00010028169014084508,
      "loss": 1.1846,
      "step": 3373
    },
    {
      "epoch": 0.4998518518518518,
      "grad_norm": 1.6431936025619507,
      "learning_rate": 0.00010025203854707191,
      "loss": 0.9053,
      "step": 3374
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.8949391841888428,
      "learning_rate": 0.00010022238695329875,
      "loss": 1.1747,
      "step": 3375
    },
    {
      "epoch": 0.5001481481481481,
      "grad_norm": 1.8398103713989258,
      "learning_rate": 0.00010019273535952558,
      "loss": 0.8153,
      "step": 3376
    },
    {
      "epoch": 0.5002962962962964,
      "grad_norm": 2.2975878715515137,
      "learning_rate": 0.00010016308376575241,
      "loss": 1.0039,
      "step": 3377
    },
    {
      "epoch": 0.5004444444444445,
      "grad_norm": 4.093273639678955,
      "learning_rate": 0.00010013343217197926,
      "loss": 1.2182,
      "step": 3378
    },
    {
      "epoch": 0.5005925925925926,
      "grad_norm": 3.4979758262634277,
      "learning_rate": 0.00010010378057820609,
      "loss": 1.115,
      "step": 3379
    },
    {
      "epoch": 0.5007407407407407,
      "grad_norm": 2.741710662841797,
      "learning_rate": 0.00010007412898443291,
      "loss": 0.9741,
      "step": 3380
    },
    {
      "epoch": 0.5008888888888889,
      "grad_norm": 3.284590482711792,
      "learning_rate": 0.00010004447739065977,
      "loss": 0.9585,
      "step": 3381
    },
    {
      "epoch": 0.5010370370370371,
      "grad_norm": 6.5590081214904785,
      "learning_rate": 0.0001000148257968866,
      "loss": 1.0111,
      "step": 3382
    },
    {
      "epoch": 0.5011851851851852,
      "grad_norm": 1.995739221572876,
      "learning_rate": 9.998517420311341e-05,
      "loss": 1.2119,
      "step": 3383
    },
    {
      "epoch": 0.5013333333333333,
      "grad_norm": 5.810882568359375,
      "learning_rate": 9.995552260934026e-05,
      "loss": 0.8591,
      "step": 3384
    },
    {
      "epoch": 0.5014814814814815,
      "grad_norm": 1.9267522096633911,
      "learning_rate": 9.99258710155671e-05,
      "loss": 1.2118,
      "step": 3385
    },
    {
      "epoch": 0.5016296296296296,
      "grad_norm": 2.457502603530884,
      "learning_rate": 9.989621942179392e-05,
      "loss": 1.194,
      "step": 3386
    },
    {
      "epoch": 0.5017777777777778,
      "grad_norm": 2.373825788497925,
      "learning_rate": 9.986656782802076e-05,
      "loss": 1.1115,
      "step": 3387
    },
    {
      "epoch": 0.5019259259259259,
      "grad_norm": 2.949768304824829,
      "learning_rate": 9.98369162342476e-05,
      "loss": 1.1156,
      "step": 3388
    },
    {
      "epoch": 0.5020740740740741,
      "grad_norm": 2.7182395458221436,
      "learning_rate": 9.980726464047443e-05,
      "loss": 1.155,
      "step": 3389
    },
    {
      "epoch": 0.5022222222222222,
      "grad_norm": 3.5946409702301025,
      "learning_rate": 9.977761304670127e-05,
      "loss": 1.2636,
      "step": 3390
    },
    {
      "epoch": 0.5023703703703704,
      "grad_norm": 3.0228304862976074,
      "learning_rate": 9.97479614529281e-05,
      "loss": 1.1162,
      "step": 3391
    },
    {
      "epoch": 0.5025185185185185,
      "grad_norm": 2.010730028152466,
      "learning_rate": 9.971830985915493e-05,
      "loss": 0.9778,
      "step": 3392
    },
    {
      "epoch": 0.5026666666666667,
      "grad_norm": 2.3101558685302734,
      "learning_rate": 9.968865826538178e-05,
      "loss": 0.9486,
      "step": 3393
    },
    {
      "epoch": 0.5028148148148148,
      "grad_norm": 4.203605651855469,
      "learning_rate": 9.965900667160861e-05,
      "loss": 1.4041,
      "step": 3394
    },
    {
      "epoch": 0.502962962962963,
      "grad_norm": 2.7500808238983154,
      "learning_rate": 9.962935507783544e-05,
      "loss": 1.0569,
      "step": 3395
    },
    {
      "epoch": 0.5031111111111111,
      "grad_norm": 2.914515972137451,
      "learning_rate": 9.959970348406227e-05,
      "loss": 1.0734,
      "step": 3396
    },
    {
      "epoch": 0.5032592592592593,
      "grad_norm": 4.110001087188721,
      "learning_rate": 9.95700518902891e-05,
      "loss": 1.1533,
      "step": 3397
    },
    {
      "epoch": 0.5034074074074074,
      "grad_norm": 1.5626189708709717,
      "learning_rate": 9.954040029651594e-05,
      "loss": 1.1873,
      "step": 3398
    },
    {
      "epoch": 0.5035555555555555,
      "grad_norm": 2.6834592819213867,
      "learning_rate": 9.951074870274278e-05,
      "loss": 1.1417,
      "step": 3399
    },
    {
      "epoch": 0.5037037037037037,
      "grad_norm": 1.8137449026107788,
      "learning_rate": 9.94810971089696e-05,
      "loss": 1.0368,
      "step": 3400
    },
    {
      "epoch": 0.5038518518518519,
      "grad_norm": 1.611829161643982,
      "learning_rate": 9.945144551519645e-05,
      "loss": 0.8365,
      "step": 3401
    },
    {
      "epoch": 0.504,
      "grad_norm": 2.8950698375701904,
      "learning_rate": 9.942179392142328e-05,
      "loss": 1.103,
      "step": 3402
    },
    {
      "epoch": 0.5041481481481481,
      "grad_norm": 3.5890421867370605,
      "learning_rate": 9.939214232765011e-05,
      "loss": 1.0627,
      "step": 3403
    },
    {
      "epoch": 0.5042962962962962,
      "grad_norm": 1.892509937286377,
      "learning_rate": 9.936249073387696e-05,
      "loss": 1.0033,
      "step": 3404
    },
    {
      "epoch": 0.5044444444444445,
      "grad_norm": 2.3791167736053467,
      "learning_rate": 9.933283914010379e-05,
      "loss": 1.243,
      "step": 3405
    },
    {
      "epoch": 0.5045925925925926,
      "grad_norm": 1.5246496200561523,
      "learning_rate": 9.930318754633062e-05,
      "loss": 1.0807,
      "step": 3406
    },
    {
      "epoch": 0.5047407407407407,
      "grad_norm": 1.6053105592727661,
      "learning_rate": 9.927353595255745e-05,
      "loss": 1.0406,
      "step": 3407
    },
    {
      "epoch": 0.5048888888888889,
      "grad_norm": 2.311598539352417,
      "learning_rate": 9.92438843587843e-05,
      "loss": 1.0438,
      "step": 3408
    },
    {
      "epoch": 0.5050370370370371,
      "grad_norm": 5.0231781005859375,
      "learning_rate": 9.921423276501113e-05,
      "loss": 0.9384,
      "step": 3409
    },
    {
      "epoch": 0.5051851851851852,
      "grad_norm": 1.5837359428405762,
      "learning_rate": 9.918458117123796e-05,
      "loss": 1.1756,
      "step": 3410
    },
    {
      "epoch": 0.5053333333333333,
      "grad_norm": 1.6837211847305298,
      "learning_rate": 9.91549295774648e-05,
      "loss": 0.8449,
      "step": 3411
    },
    {
      "epoch": 0.5054814814814815,
      "grad_norm": 2.0716450214385986,
      "learning_rate": 9.912527798369163e-05,
      "loss": 1.2139,
      "step": 3412
    },
    {
      "epoch": 0.5056296296296297,
      "grad_norm": 1.12191641330719,
      "learning_rate": 9.909562638991846e-05,
      "loss": 0.8494,
      "step": 3413
    },
    {
      "epoch": 0.5057777777777778,
      "grad_norm": 1.795994520187378,
      "learning_rate": 9.90659747961453e-05,
      "loss": 1.3463,
      "step": 3414
    },
    {
      "epoch": 0.5059259259259259,
      "grad_norm": 1.747239112854004,
      "learning_rate": 9.903632320237212e-05,
      "loss": 1.0093,
      "step": 3415
    },
    {
      "epoch": 0.5060740740740741,
      "grad_norm": 1.6786613464355469,
      "learning_rate": 9.900667160859897e-05,
      "loss": 1.0296,
      "step": 3416
    },
    {
      "epoch": 0.5062222222222222,
      "grad_norm": 1.5573384761810303,
      "learning_rate": 9.89770200148258e-05,
      "loss": 0.7602,
      "step": 3417
    },
    {
      "epoch": 0.5063703703703704,
      "grad_norm": 1.5690990686416626,
      "learning_rate": 9.894736842105263e-05,
      "loss": 1.0206,
      "step": 3418
    },
    {
      "epoch": 0.5065185185185185,
      "grad_norm": 2.2538905143737793,
      "learning_rate": 9.891771682727948e-05,
      "loss": 1.2526,
      "step": 3419
    },
    {
      "epoch": 0.5066666666666667,
      "grad_norm": 2.059178590774536,
      "learning_rate": 9.88880652335063e-05,
      "loss": 0.9341,
      "step": 3420
    },
    {
      "epoch": 0.5068148148148148,
      "grad_norm": 3.653055429458618,
      "learning_rate": 9.885841363973314e-05,
      "loss": 1.1119,
      "step": 3421
    },
    {
      "epoch": 0.506962962962963,
      "grad_norm": 2.135084629058838,
      "learning_rate": 9.882876204595998e-05,
      "loss": 0.9503,
      "step": 3422
    },
    {
      "epoch": 0.5071111111111111,
      "grad_norm": 1.3559598922729492,
      "learning_rate": 9.87991104521868e-05,
      "loss": 1.0422,
      "step": 3423
    },
    {
      "epoch": 0.5072592592592593,
      "grad_norm": 3.151815414428711,
      "learning_rate": 9.876945885841364e-05,
      "loss": 0.8861,
      "step": 3424
    },
    {
      "epoch": 0.5074074074074074,
      "grad_norm": 1.5202436447143555,
      "learning_rate": 9.873980726464049e-05,
      "loss": 1.2197,
      "step": 3425
    },
    {
      "epoch": 0.5075555555555555,
      "grad_norm": 2.9413371086120605,
      "learning_rate": 9.87101556708673e-05,
      "loss": 1.0491,
      "step": 3426
    },
    {
      "epoch": 0.5077037037037037,
      "grad_norm": 1.1613496541976929,
      "learning_rate": 9.868050407709415e-05,
      "loss": 1.1598,
      "step": 3427
    },
    {
      "epoch": 0.5078518518518519,
      "grad_norm": 2.011124849319458,
      "learning_rate": 9.8650852483321e-05,
      "loss": 1.1567,
      "step": 3428
    },
    {
      "epoch": 0.508,
      "grad_norm": 2.108271360397339,
      "learning_rate": 9.862120088954781e-05,
      "loss": 1.0974,
      "step": 3429
    },
    {
      "epoch": 0.5081481481481481,
      "grad_norm": 3.502854585647583,
      "learning_rate": 9.859154929577466e-05,
      "loss": 1.1057,
      "step": 3430
    },
    {
      "epoch": 0.5082962962962962,
      "grad_norm": 1.2759106159210205,
      "learning_rate": 9.856189770200149e-05,
      "loss": 1.0656,
      "step": 3431
    },
    {
      "epoch": 0.5084444444444445,
      "grad_norm": 2.211440086364746,
      "learning_rate": 9.853224610822832e-05,
      "loss": 0.9594,
      "step": 3432
    },
    {
      "epoch": 0.5085925925925926,
      "grad_norm": 1.1375516653060913,
      "learning_rate": 9.850259451445516e-05,
      "loss": 1.166,
      "step": 3433
    },
    {
      "epoch": 0.5087407407407407,
      "grad_norm": 1.9250344038009644,
      "learning_rate": 9.847294292068199e-05,
      "loss": 0.9071,
      "step": 3434
    },
    {
      "epoch": 0.5088888888888888,
      "grad_norm": 1.8056732416152954,
      "learning_rate": 9.844329132690882e-05,
      "loss": 1.1854,
      "step": 3435
    },
    {
      "epoch": 0.5090370370370371,
      "grad_norm": 1.496208667755127,
      "learning_rate": 9.841363973313567e-05,
      "loss": 0.9071,
      "step": 3436
    },
    {
      "epoch": 0.5091851851851852,
      "grad_norm": 2.020498037338257,
      "learning_rate": 9.83839881393625e-05,
      "loss": 1.02,
      "step": 3437
    },
    {
      "epoch": 0.5093333333333333,
      "grad_norm": 1.7645292282104492,
      "learning_rate": 9.835433654558933e-05,
      "loss": 1.0352,
      "step": 3438
    },
    {
      "epoch": 0.5094814814814815,
      "grad_norm": 3.5330545902252197,
      "learning_rate": 9.832468495181616e-05,
      "loss": 0.8771,
      "step": 3439
    },
    {
      "epoch": 0.5096296296296297,
      "grad_norm": 1.1161566972732544,
      "learning_rate": 9.829503335804299e-05,
      "loss": 1.3086,
      "step": 3440
    },
    {
      "epoch": 0.5097777777777778,
      "grad_norm": 1.4570153951644897,
      "learning_rate": 9.826538176426984e-05,
      "loss": 1.1426,
      "step": 3441
    },
    {
      "epoch": 0.5099259259259259,
      "grad_norm": 2.8942079544067383,
      "learning_rate": 9.823573017049667e-05,
      "loss": 0.8561,
      "step": 3442
    },
    {
      "epoch": 0.5100740740740741,
      "grad_norm": 2.263978958129883,
      "learning_rate": 9.82060785767235e-05,
      "loss": 1.1924,
      "step": 3443
    },
    {
      "epoch": 0.5102222222222222,
      "grad_norm": 1.3524726629257202,
      "learning_rate": 9.817642698295034e-05,
      "loss": 1.1621,
      "step": 3444
    },
    {
      "epoch": 0.5103703703703704,
      "grad_norm": 1.2318508625030518,
      "learning_rate": 9.814677538917717e-05,
      "loss": 1.1966,
      "step": 3445
    },
    {
      "epoch": 0.5105185185185185,
      "grad_norm": 2.823561191558838,
      "learning_rate": 9.8117123795404e-05,
      "loss": 0.9551,
      "step": 3446
    },
    {
      "epoch": 0.5106666666666667,
      "grad_norm": 1.9692493677139282,
      "learning_rate": 9.808747220163085e-05,
      "loss": 1.0933,
      "step": 3447
    },
    {
      "epoch": 0.5108148148148148,
      "grad_norm": 2.315603017807007,
      "learning_rate": 9.805782060785768e-05,
      "loss": 1.1432,
      "step": 3448
    },
    {
      "epoch": 0.510962962962963,
      "grad_norm": 1.3484950065612793,
      "learning_rate": 9.802816901408451e-05,
      "loss": 0.9704,
      "step": 3449
    },
    {
      "epoch": 0.5111111111111111,
      "grad_norm": 4.833320140838623,
      "learning_rate": 9.799851742031134e-05,
      "loss": 0.9685,
      "step": 3450
    },
    {
      "epoch": 0.5112592592592593,
      "grad_norm": 1.722293734550476,
      "learning_rate": 9.796886582653819e-05,
      "loss": 1.0885,
      "step": 3451
    },
    {
      "epoch": 0.5114074074074074,
      "grad_norm": 1.3373132944107056,
      "learning_rate": 9.793921423276502e-05,
      "loss": 0.9328,
      "step": 3452
    },
    {
      "epoch": 0.5115555555555555,
      "grad_norm": 2.0415165424346924,
      "learning_rate": 9.790956263899185e-05,
      "loss": 1.0558,
      "step": 3453
    },
    {
      "epoch": 0.5117037037037037,
      "grad_norm": 1.8608633279800415,
      "learning_rate": 9.787991104521869e-05,
      "loss": 1.1091,
      "step": 3454
    },
    {
      "epoch": 0.5118518518518519,
      "grad_norm": 1.510023832321167,
      "learning_rate": 9.785025945144552e-05,
      "loss": 0.9175,
      "step": 3455
    },
    {
      "epoch": 0.512,
      "grad_norm": 1.702152967453003,
      "learning_rate": 9.782060785767235e-05,
      "loss": 1.1188,
      "step": 3456
    },
    {
      "epoch": 0.5121481481481481,
      "grad_norm": 1.7295708656311035,
      "learning_rate": 9.779095626389919e-05,
      "loss": 1.1258,
      "step": 3457
    },
    {
      "epoch": 0.5122962962962962,
      "grad_norm": 1.4218422174453735,
      "learning_rate": 9.776130467012602e-05,
      "loss": 1.1501,
      "step": 3458
    },
    {
      "epoch": 0.5124444444444445,
      "grad_norm": 2.1711299419403076,
      "learning_rate": 9.773165307635286e-05,
      "loss": 1.0482,
      "step": 3459
    },
    {
      "epoch": 0.5125925925925926,
      "grad_norm": 2.0415642261505127,
      "learning_rate": 9.770200148257969e-05,
      "loss": 1.2222,
      "step": 3460
    },
    {
      "epoch": 0.5127407407407407,
      "grad_norm": 1.3829699754714966,
      "learning_rate": 9.767234988880652e-05,
      "loss": 0.9807,
      "step": 3461
    },
    {
      "epoch": 0.5128888888888888,
      "grad_norm": 2.3946306705474854,
      "learning_rate": 9.764269829503337e-05,
      "loss": 1.1625,
      "step": 3462
    },
    {
      "epoch": 0.5130370370370371,
      "grad_norm": 1.7022687196731567,
      "learning_rate": 9.76130467012602e-05,
      "loss": 1.0249,
      "step": 3463
    },
    {
      "epoch": 0.5131851851851852,
      "grad_norm": 2.9826409816741943,
      "learning_rate": 9.758339510748703e-05,
      "loss": 1.1052,
      "step": 3464
    },
    {
      "epoch": 0.5133333333333333,
      "grad_norm": 3.7996156215667725,
      "learning_rate": 9.755374351371387e-05,
      "loss": 1.0881,
      "step": 3465
    },
    {
      "epoch": 0.5134814814814814,
      "grad_norm": 7.144508361816406,
      "learning_rate": 9.752409191994069e-05,
      "loss": 1.2364,
      "step": 3466
    },
    {
      "epoch": 0.5136296296296297,
      "grad_norm": 1.8672593832015991,
      "learning_rate": 9.749444032616753e-05,
      "loss": 1.0515,
      "step": 3467
    },
    {
      "epoch": 0.5137777777777778,
      "grad_norm": 1.5496947765350342,
      "learning_rate": 9.746478873239438e-05,
      "loss": 1.0754,
      "step": 3468
    },
    {
      "epoch": 0.5139259259259259,
      "grad_norm": 1.7912988662719727,
      "learning_rate": 9.74351371386212e-05,
      "loss": 1.054,
      "step": 3469
    },
    {
      "epoch": 0.5140740740740741,
      "grad_norm": 1.3586891889572144,
      "learning_rate": 9.740548554484804e-05,
      "loss": 1.1254,
      "step": 3470
    },
    {
      "epoch": 0.5142222222222222,
      "grad_norm": 2.686096429824829,
      "learning_rate": 9.737583395107489e-05,
      "loss": 1.1777,
      "step": 3471
    },
    {
      "epoch": 0.5143703703703704,
      "grad_norm": 1.231162190437317,
      "learning_rate": 9.73461823573017e-05,
      "loss": 1.097,
      "step": 3472
    },
    {
      "epoch": 0.5145185185185185,
      "grad_norm": 1.7783644199371338,
      "learning_rate": 9.731653076352855e-05,
      "loss": 0.9981,
      "step": 3473
    },
    {
      "epoch": 0.5146666666666667,
      "grad_norm": 2.513786554336548,
      "learning_rate": 9.728687916975538e-05,
      "loss": 0.9621,
      "step": 3474
    },
    {
      "epoch": 0.5148148148148148,
      "grad_norm": 7.590953350067139,
      "learning_rate": 9.725722757598221e-05,
      "loss": 1.263,
      "step": 3475
    },
    {
      "epoch": 0.514962962962963,
      "grad_norm": 1.2946828603744507,
      "learning_rate": 9.722757598220905e-05,
      "loss": 1.0425,
      "step": 3476
    },
    {
      "epoch": 0.5151111111111111,
      "grad_norm": 1.7475824356079102,
      "learning_rate": 9.719792438843588e-05,
      "loss": 1.0472,
      "step": 3477
    },
    {
      "epoch": 0.5152592592592593,
      "grad_norm": 1.539388656616211,
      "learning_rate": 9.716827279466272e-05,
      "loss": 1.0123,
      "step": 3478
    },
    {
      "epoch": 0.5154074074074074,
      "grad_norm": 2.2849812507629395,
      "learning_rate": 9.713862120088956e-05,
      "loss": 1.3275,
      "step": 3479
    },
    {
      "epoch": 0.5155555555555555,
      "grad_norm": 2.3221042156219482,
      "learning_rate": 9.710896960711639e-05,
      "loss": 0.8895,
      "step": 3480
    },
    {
      "epoch": 0.5157037037037037,
      "grad_norm": 2.655463933944702,
      "learning_rate": 9.707931801334322e-05,
      "loss": 0.9543,
      "step": 3481
    },
    {
      "epoch": 0.5158518518518519,
      "grad_norm": 1.56253182888031,
      "learning_rate": 9.704966641957005e-05,
      "loss": 1.0628,
      "step": 3482
    },
    {
      "epoch": 0.516,
      "grad_norm": 1.8649121522903442,
      "learning_rate": 9.702001482579688e-05,
      "loss": 0.8793,
      "step": 3483
    },
    {
      "epoch": 0.5161481481481481,
      "grad_norm": 2.0733768939971924,
      "learning_rate": 9.699036323202373e-05,
      "loss": 1.2105,
      "step": 3484
    },
    {
      "epoch": 0.5162962962962963,
      "grad_norm": 2.5004401206970215,
      "learning_rate": 9.696071163825056e-05,
      "loss": 1.2187,
      "step": 3485
    },
    {
      "epoch": 0.5164444444444445,
      "grad_norm": 2.358077049255371,
      "learning_rate": 9.693106004447739e-05,
      "loss": 1.0586,
      "step": 3486
    },
    {
      "epoch": 0.5165925925925926,
      "grad_norm": 1.6531083583831787,
      "learning_rate": 9.690140845070423e-05,
      "loss": 0.8671,
      "step": 3487
    },
    {
      "epoch": 0.5167407407407407,
      "grad_norm": 1.7980809211730957,
      "learning_rate": 9.687175685693107e-05,
      "loss": 1.0647,
      "step": 3488
    },
    {
      "epoch": 0.5168888888888888,
      "grad_norm": 2.022136926651001,
      "learning_rate": 9.68421052631579e-05,
      "loss": 0.9466,
      "step": 3489
    },
    {
      "epoch": 0.5170370370370371,
      "grad_norm": 1.33211350440979,
      "learning_rate": 9.681245366938474e-05,
      "loss": 1.1837,
      "step": 3490
    },
    {
      "epoch": 0.5171851851851852,
      "grad_norm": 2.676328182220459,
      "learning_rate": 9.678280207561157e-05,
      "loss": 1.0544,
      "step": 3491
    },
    {
      "epoch": 0.5173333333333333,
      "grad_norm": 1.818198800086975,
      "learning_rate": 9.67531504818384e-05,
      "loss": 1.1062,
      "step": 3492
    },
    {
      "epoch": 0.5174814814814814,
      "grad_norm": 1.833338975906372,
      "learning_rate": 9.672349888806523e-05,
      "loss": 1.0738,
      "step": 3493
    },
    {
      "epoch": 0.5176296296296297,
      "grad_norm": 2.3086183071136475,
      "learning_rate": 9.669384729429208e-05,
      "loss": 1.0157,
      "step": 3494
    },
    {
      "epoch": 0.5177777777777778,
      "grad_norm": 1.5982645750045776,
      "learning_rate": 9.666419570051891e-05,
      "loss": 1.0132,
      "step": 3495
    },
    {
      "epoch": 0.5179259259259259,
      "grad_norm": 1.5193531513214111,
      "learning_rate": 9.663454410674574e-05,
      "loss": 1.0794,
      "step": 3496
    },
    {
      "epoch": 0.518074074074074,
      "grad_norm": 7.415847301483154,
      "learning_rate": 9.660489251297258e-05,
      "loss": 0.8472,
      "step": 3497
    },
    {
      "epoch": 0.5182222222222223,
      "grad_norm": 1.256219506263733,
      "learning_rate": 9.657524091919942e-05,
      "loss": 1.0972,
      "step": 3498
    },
    {
      "epoch": 0.5183703703703704,
      "grad_norm": 4.069087028503418,
      "learning_rate": 9.654558932542625e-05,
      "loss": 1.1666,
      "step": 3499
    },
    {
      "epoch": 0.5185185185185185,
      "grad_norm": 2.4944255352020264,
      "learning_rate": 9.651593773165308e-05,
      "loss": 0.8349,
      "step": 3500
    },
    {
      "epoch": 0.5186666666666667,
      "grad_norm": 2.0719282627105713,
      "learning_rate": 9.648628613787991e-05,
      "loss": 1.1829,
      "step": 3501
    },
    {
      "epoch": 0.5188148148148148,
      "grad_norm": 3.4375545978546143,
      "learning_rate": 9.645663454410675e-05,
      "loss": 1.0,
      "step": 3502
    },
    {
      "epoch": 0.518962962962963,
      "grad_norm": 2.382495641708374,
      "learning_rate": 9.642698295033358e-05,
      "loss": 1.275,
      "step": 3503
    },
    {
      "epoch": 0.5191111111111111,
      "grad_norm": 2.8072335720062256,
      "learning_rate": 9.639733135656041e-05,
      "loss": 1.3857,
      "step": 3504
    },
    {
      "epoch": 0.5192592592592593,
      "grad_norm": 2.0779919624328613,
      "learning_rate": 9.636767976278726e-05,
      "loss": 1.1554,
      "step": 3505
    },
    {
      "epoch": 0.5194074074074074,
      "grad_norm": 2.170212507247925,
      "learning_rate": 9.633802816901409e-05,
      "loss": 0.9806,
      "step": 3506
    },
    {
      "epoch": 0.5195555555555555,
      "grad_norm": 1.8128262758255005,
      "learning_rate": 9.630837657524092e-05,
      "loss": 1.0177,
      "step": 3507
    },
    {
      "epoch": 0.5197037037037037,
      "grad_norm": 1.5690702199935913,
      "learning_rate": 9.627872498146776e-05,
      "loss": 0.9887,
      "step": 3508
    },
    {
      "epoch": 0.5198518518518519,
      "grad_norm": 2.0018036365509033,
      "learning_rate": 9.624907338769458e-05,
      "loss": 0.9906,
      "step": 3509
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.020087718963623,
      "learning_rate": 9.621942179392143e-05,
      "loss": 0.9797,
      "step": 3510
    },
    {
      "epoch": 0.5201481481481481,
      "grad_norm": 3.4835586547851562,
      "learning_rate": 9.618977020014827e-05,
      "loss": 0.8488,
      "step": 3511
    },
    {
      "epoch": 0.5202962962962963,
      "grad_norm": 1.7970818281173706,
      "learning_rate": 9.616011860637509e-05,
      "loss": 1.145,
      "step": 3512
    },
    {
      "epoch": 0.5204444444444445,
      "grad_norm": 2.0925204753875732,
      "learning_rate": 9.613046701260193e-05,
      "loss": 1.3318,
      "step": 3513
    },
    {
      "epoch": 0.5205925925925926,
      "grad_norm": 2.5351264476776123,
      "learning_rate": 9.610081541882878e-05,
      "loss": 0.9796,
      "step": 3514
    },
    {
      "epoch": 0.5207407407407407,
      "grad_norm": 1.360851764678955,
      "learning_rate": 9.60711638250556e-05,
      "loss": 1.0119,
      "step": 3515
    },
    {
      "epoch": 0.5208888888888888,
      "grad_norm": 2.807292938232422,
      "learning_rate": 9.604151223128244e-05,
      "loss": 0.9417,
      "step": 3516
    },
    {
      "epoch": 0.5210370370370371,
      "grad_norm": 2.252411127090454,
      "learning_rate": 9.601186063750927e-05,
      "loss": 1.3305,
      "step": 3517
    },
    {
      "epoch": 0.5211851851851852,
      "grad_norm": 1.8232671022415161,
      "learning_rate": 9.59822090437361e-05,
      "loss": 1.0784,
      "step": 3518
    },
    {
      "epoch": 0.5213333333333333,
      "grad_norm": 2.167621612548828,
      "learning_rate": 9.595255744996295e-05,
      "loss": 0.9162,
      "step": 3519
    },
    {
      "epoch": 0.5214814814814814,
      "grad_norm": 2.444425106048584,
      "learning_rate": 9.592290585618978e-05,
      "loss": 1.021,
      "step": 3520
    },
    {
      "epoch": 0.5216296296296297,
      "grad_norm": 1.9010529518127441,
      "learning_rate": 9.589325426241661e-05,
      "loss": 0.9694,
      "step": 3521
    },
    {
      "epoch": 0.5217777777777778,
      "grad_norm": 1.6443415880203247,
      "learning_rate": 9.586360266864345e-05,
      "loss": 1.0086,
      "step": 3522
    },
    {
      "epoch": 0.5219259259259259,
      "grad_norm": 2.088010311126709,
      "learning_rate": 9.583395107487028e-05,
      "loss": 1.1408,
      "step": 3523
    },
    {
      "epoch": 0.522074074074074,
      "grad_norm": 1.4378952980041504,
      "learning_rate": 9.580429948109711e-05,
      "loss": 0.9919,
      "step": 3524
    },
    {
      "epoch": 0.5222222222222223,
      "grad_norm": 2.5560710430145264,
      "learning_rate": 9.577464788732394e-05,
      "loss": 0.9739,
      "step": 3525
    },
    {
      "epoch": 0.5223703703703704,
      "grad_norm": 1.7038018703460693,
      "learning_rate": 9.574499629355078e-05,
      "loss": 1.1463,
      "step": 3526
    },
    {
      "epoch": 0.5225185185185185,
      "grad_norm": 2.424886703491211,
      "learning_rate": 9.571534469977762e-05,
      "loss": 1.2281,
      "step": 3527
    },
    {
      "epoch": 0.5226666666666666,
      "grad_norm": 2.7288095951080322,
      "learning_rate": 9.568569310600445e-05,
      "loss": 1.1308,
      "step": 3528
    },
    {
      "epoch": 0.5228148148148148,
      "grad_norm": 2.1019182205200195,
      "learning_rate": 9.565604151223128e-05,
      "loss": 0.9406,
      "step": 3529
    },
    {
      "epoch": 0.522962962962963,
      "grad_norm": 4.319130897521973,
      "learning_rate": 9.562638991845813e-05,
      "loss": 1.015,
      "step": 3530
    },
    {
      "epoch": 0.5231111111111111,
      "grad_norm": 1.8898524045944214,
      "learning_rate": 9.559673832468496e-05,
      "loss": 0.9298,
      "step": 3531
    },
    {
      "epoch": 0.5232592592592593,
      "grad_norm": 3.966099500656128,
      "learning_rate": 9.556708673091179e-05,
      "loss": 0.919,
      "step": 3532
    },
    {
      "epoch": 0.5234074074074074,
      "grad_norm": 2.4514873027801514,
      "learning_rate": 9.553743513713863e-05,
      "loss": 1.2715,
      "step": 3533
    },
    {
      "epoch": 0.5235555555555556,
      "grad_norm": 1.3653898239135742,
      "learning_rate": 9.550778354336546e-05,
      "loss": 1.0814,
      "step": 3534
    },
    {
      "epoch": 0.5237037037037037,
      "grad_norm": 1.8816627264022827,
      "learning_rate": 9.54781319495923e-05,
      "loss": 0.9403,
      "step": 3535
    },
    {
      "epoch": 0.5238518518518519,
      "grad_norm": 4.248720169067383,
      "learning_rate": 9.544848035581913e-05,
      "loss": 1.0868,
      "step": 3536
    },
    {
      "epoch": 0.524,
      "grad_norm": 2.754378318786621,
      "learning_rate": 9.541882876204597e-05,
      "loss": 1.1859,
      "step": 3537
    },
    {
      "epoch": 0.5241481481481481,
      "grad_norm": 2.7608413696289062,
      "learning_rate": 9.53891771682728e-05,
      "loss": 0.9651,
      "step": 3538
    },
    {
      "epoch": 0.5242962962962963,
      "grad_norm": 3.119210720062256,
      "learning_rate": 9.535952557449963e-05,
      "loss": 1.2463,
      "step": 3539
    },
    {
      "epoch": 0.5244444444444445,
      "grad_norm": 4.5977654457092285,
      "learning_rate": 9.532987398072648e-05,
      "loss": 0.8846,
      "step": 3540
    },
    {
      "epoch": 0.5245925925925926,
      "grad_norm": 1.846290946006775,
      "learning_rate": 9.53002223869533e-05,
      "loss": 1.1083,
      "step": 3541
    },
    {
      "epoch": 0.5247407407407407,
      "grad_norm": 3.4465091228485107,
      "learning_rate": 9.527057079318014e-05,
      "loss": 0.9773,
      "step": 3542
    },
    {
      "epoch": 0.5248888888888888,
      "grad_norm": 4.273923873901367,
      "learning_rate": 9.524091919940697e-05,
      "loss": 1.175,
      "step": 3543
    },
    {
      "epoch": 0.5250370370370371,
      "grad_norm": 2.4471707344055176,
      "learning_rate": 9.52112676056338e-05,
      "loss": 0.9078,
      "step": 3544
    },
    {
      "epoch": 0.5251851851851852,
      "grad_norm": 1.573663592338562,
      "learning_rate": 9.518161601186064e-05,
      "loss": 1.0537,
      "step": 3545
    },
    {
      "epoch": 0.5253333333333333,
      "grad_norm": 3.942758321762085,
      "learning_rate": 9.515196441808747e-05,
      "loss": 1.2459,
      "step": 3546
    },
    {
      "epoch": 0.5254814814814814,
      "grad_norm": 2.88981032371521,
      "learning_rate": 9.51223128243143e-05,
      "loss": 1.0162,
      "step": 3547
    },
    {
      "epoch": 0.5256296296296297,
      "grad_norm": 6.503210544586182,
      "learning_rate": 9.509266123054115e-05,
      "loss": 1.2065,
      "step": 3548
    },
    {
      "epoch": 0.5257777777777778,
      "grad_norm": 3.7239830493927,
      "learning_rate": 9.506300963676798e-05,
      "loss": 1.2433,
      "step": 3549
    },
    {
      "epoch": 0.5259259259259259,
      "grad_norm": 3.274492025375366,
      "learning_rate": 9.503335804299481e-05,
      "loss": 1.2985,
      "step": 3550
    },
    {
      "epoch": 0.526074074074074,
      "grad_norm": 5.178545951843262,
      "learning_rate": 9.500370644922166e-05,
      "loss": 1.1406,
      "step": 3551
    },
    {
      "epoch": 0.5262222222222223,
      "grad_norm": 4.6761393547058105,
      "learning_rate": 9.497405485544847e-05,
      "loss": 1.2129,
      "step": 3552
    },
    {
      "epoch": 0.5263703703703704,
      "grad_norm": 2.7359654903411865,
      "learning_rate": 9.494440326167532e-05,
      "loss": 1.2082,
      "step": 3553
    },
    {
      "epoch": 0.5265185185185185,
      "grad_norm": 7.111677169799805,
      "learning_rate": 9.491475166790216e-05,
      "loss": 0.9609,
      "step": 3554
    },
    {
      "epoch": 0.5266666666666666,
      "grad_norm": 1.8699086904525757,
      "learning_rate": 9.488510007412898e-05,
      "loss": 0.8755,
      "step": 3555
    },
    {
      "epoch": 0.5268148148148148,
      "grad_norm": 2.1888856887817383,
      "learning_rate": 9.485544848035582e-05,
      "loss": 0.9932,
      "step": 3556
    },
    {
      "epoch": 0.526962962962963,
      "grad_norm": 2.197059154510498,
      "learning_rate": 9.482579688658267e-05,
      "loss": 1.0316,
      "step": 3557
    },
    {
      "epoch": 0.5271111111111111,
      "grad_norm": 3.5654101371765137,
      "learning_rate": 9.479614529280949e-05,
      "loss": 1.1914,
      "step": 3558
    },
    {
      "epoch": 0.5272592592592592,
      "grad_norm": 4.099245548248291,
      "learning_rate": 9.476649369903633e-05,
      "loss": 1.0678,
      "step": 3559
    },
    {
      "epoch": 0.5274074074074074,
      "grad_norm": 2.1959383487701416,
      "learning_rate": 9.473684210526316e-05,
      "loss": 1.2129,
      "step": 3560
    },
    {
      "epoch": 0.5275555555555556,
      "grad_norm": 3.4108617305755615,
      "learning_rate": 9.470719051148999e-05,
      "loss": 0.9935,
      "step": 3561
    },
    {
      "epoch": 0.5277037037037037,
      "grad_norm": 4.734465599060059,
      "learning_rate": 9.467753891771684e-05,
      "loss": 0.7632,
      "step": 3562
    },
    {
      "epoch": 0.5278518518518519,
      "grad_norm": 1.6672844886779785,
      "learning_rate": 9.464788732394367e-05,
      "loss": 0.931,
      "step": 3563
    },
    {
      "epoch": 0.528,
      "grad_norm": 3.7147114276885986,
      "learning_rate": 9.46182357301705e-05,
      "loss": 0.9767,
      "step": 3564
    },
    {
      "epoch": 0.5281481481481481,
      "grad_norm": 2.0520565509796143,
      "learning_rate": 9.458858413639734e-05,
      "loss": 0.7752,
      "step": 3565
    },
    {
      "epoch": 0.5282962962962963,
      "grad_norm": 2.508117914199829,
      "learning_rate": 9.455893254262417e-05,
      "loss": 1.0447,
      "step": 3566
    },
    {
      "epoch": 0.5284444444444445,
      "grad_norm": 2.8419182300567627,
      "learning_rate": 9.4529280948851e-05,
      "loss": 0.7909,
      "step": 3567
    },
    {
      "epoch": 0.5285925925925926,
      "grad_norm": 2.5379035472869873,
      "learning_rate": 9.449962935507784e-05,
      "loss": 1.2331,
      "step": 3568
    },
    {
      "epoch": 0.5287407407407407,
      "grad_norm": 4.583249092102051,
      "learning_rate": 9.446997776130467e-05,
      "loss": 1.1977,
      "step": 3569
    },
    {
      "epoch": 0.5288888888888889,
      "grad_norm": 2.7971770763397217,
      "learning_rate": 9.444032616753151e-05,
      "loss": 1.168,
      "step": 3570
    },
    {
      "epoch": 0.5290370370370371,
      "grad_norm": 7.165088653564453,
      "learning_rate": 9.441067457375834e-05,
      "loss": 1.0675,
      "step": 3571
    },
    {
      "epoch": 0.5291851851851852,
      "grad_norm": 3.349947690963745,
      "learning_rate": 9.438102297998517e-05,
      "loss": 0.9698,
      "step": 3572
    },
    {
      "epoch": 0.5293333333333333,
      "grad_norm": 2.9816622734069824,
      "learning_rate": 9.435137138621202e-05,
      "loss": 1.1645,
      "step": 3573
    },
    {
      "epoch": 0.5294814814814814,
      "grad_norm": 6.285075664520264,
      "learning_rate": 9.432171979243885e-05,
      "loss": 1.2269,
      "step": 3574
    },
    {
      "epoch": 0.5296296296296297,
      "grad_norm": 2.5549850463867188,
      "learning_rate": 9.429206819866568e-05,
      "loss": 1.0708,
      "step": 3575
    },
    {
      "epoch": 0.5297777777777778,
      "grad_norm": 5.181352615356445,
      "learning_rate": 9.426241660489252e-05,
      "loss": 1.0041,
      "step": 3576
    },
    {
      "epoch": 0.5299259259259259,
      "grad_norm": 3.1909756660461426,
      "learning_rate": 9.423276501111935e-05,
      "loss": 0.9387,
      "step": 3577
    },
    {
      "epoch": 0.530074074074074,
      "grad_norm": 3.0633323192596436,
      "learning_rate": 9.420311341734619e-05,
      "loss": 1.1736,
      "step": 3578
    },
    {
      "epoch": 0.5302222222222223,
      "grad_norm": 3.279362916946411,
      "learning_rate": 9.417346182357302e-05,
      "loss": 0.8721,
      "step": 3579
    },
    {
      "epoch": 0.5303703703703704,
      "grad_norm": 3.5336806774139404,
      "learning_rate": 9.414381022979986e-05,
      "loss": 1.3407,
      "step": 3580
    },
    {
      "epoch": 0.5305185185185185,
      "grad_norm": 2.430006504058838,
      "learning_rate": 9.411415863602669e-05,
      "loss": 0.9274,
      "step": 3581
    },
    {
      "epoch": 0.5306666666666666,
      "grad_norm": 2.469728946685791,
      "learning_rate": 9.408450704225352e-05,
      "loss": 1.1849,
      "step": 3582
    },
    {
      "epoch": 0.5308148148148149,
      "grad_norm": 8.362934112548828,
      "learning_rate": 9.405485544848037e-05,
      "loss": 1.0888,
      "step": 3583
    },
    {
      "epoch": 0.530962962962963,
      "grad_norm": 5.342774391174316,
      "learning_rate": 9.40252038547072e-05,
      "loss": 1.2708,
      "step": 3584
    },
    {
      "epoch": 0.5311111111111111,
      "grad_norm": 11.297273635864258,
      "learning_rate": 9.399555226093403e-05,
      "loss": 1.0547,
      "step": 3585
    },
    {
      "epoch": 0.5312592592592592,
      "grad_norm": 3.0192387104034424,
      "learning_rate": 9.396590066716086e-05,
      "loss": 1.0153,
      "step": 3586
    },
    {
      "epoch": 0.5314074074074074,
      "grad_norm": 3.046156883239746,
      "learning_rate": 9.393624907338769e-05,
      "loss": 1.0377,
      "step": 3587
    },
    {
      "epoch": 0.5315555555555556,
      "grad_norm": 2.5736989974975586,
      "learning_rate": 9.390659747961454e-05,
      "loss": 0.9452,
      "step": 3588
    },
    {
      "epoch": 0.5317037037037037,
      "grad_norm": 4.893460750579834,
      "learning_rate": 9.387694588584137e-05,
      "loss": 1.1708,
      "step": 3589
    },
    {
      "epoch": 0.5318518518518518,
      "grad_norm": 4.664818286895752,
      "learning_rate": 9.38472942920682e-05,
      "loss": 1.1881,
      "step": 3590
    },
    {
      "epoch": 0.532,
      "grad_norm": 6.313598155975342,
      "learning_rate": 9.381764269829504e-05,
      "loss": 1.0385,
      "step": 3591
    },
    {
      "epoch": 0.5321481481481481,
      "grad_norm": 2.325152635574341,
      "learning_rate": 9.378799110452187e-05,
      "loss": 1.2152,
      "step": 3592
    },
    {
      "epoch": 0.5322962962962963,
      "grad_norm": 3.308443546295166,
      "learning_rate": 9.37583395107487e-05,
      "loss": 0.8925,
      "step": 3593
    },
    {
      "epoch": 0.5324444444444445,
      "grad_norm": 4.343135356903076,
      "learning_rate": 9.372868791697555e-05,
      "loss": 1.1041,
      "step": 3594
    },
    {
      "epoch": 0.5325925925925926,
      "grad_norm": 5.608693599700928,
      "learning_rate": 9.369903632320237e-05,
      "loss": 1.0201,
      "step": 3595
    },
    {
      "epoch": 0.5327407407407407,
      "grad_norm": 3.607280969619751,
      "learning_rate": 9.366938472942921e-05,
      "loss": 1.1627,
      "step": 3596
    },
    {
      "epoch": 0.5328888888888889,
      "grad_norm": 4.711460590362549,
      "learning_rate": 9.363973313565605e-05,
      "loss": 1.1231,
      "step": 3597
    },
    {
      "epoch": 0.5330370370370371,
      "grad_norm": 1.5293848514556885,
      "learning_rate": 9.361008154188287e-05,
      "loss": 1.3455,
      "step": 3598
    },
    {
      "epoch": 0.5331851851851852,
      "grad_norm": 2.6837525367736816,
      "learning_rate": 9.358042994810972e-05,
      "loss": 1.0517,
      "step": 3599
    },
    {
      "epoch": 0.5333333333333333,
      "grad_norm": 2.2800650596618652,
      "learning_rate": 9.355077835433656e-05,
      "loss": 1.2113,
      "step": 3600
    },
    {
      "epoch": 0.5334814814814814,
      "grad_norm": 7.9449944496154785,
      "learning_rate": 9.352112676056338e-05,
      "loss": 1.0929,
      "step": 3601
    },
    {
      "epoch": 0.5336296296296297,
      "grad_norm": 2.1599514484405518,
      "learning_rate": 9.349147516679022e-05,
      "loss": 1.1329,
      "step": 3602
    },
    {
      "epoch": 0.5337777777777778,
      "grad_norm": 8.514766693115234,
      "learning_rate": 9.346182357301705e-05,
      "loss": 0.9507,
      "step": 3603
    },
    {
      "epoch": 0.5339259259259259,
      "grad_norm": 1.950701117515564,
      "learning_rate": 9.343217197924388e-05,
      "loss": 1.0046,
      "step": 3604
    },
    {
      "epoch": 0.534074074074074,
      "grad_norm": 2.9352715015411377,
      "learning_rate": 9.340252038547073e-05,
      "loss": 0.9885,
      "step": 3605
    },
    {
      "epoch": 0.5342222222222223,
      "grad_norm": 3.379699230194092,
      "learning_rate": 9.337286879169756e-05,
      "loss": 1.0122,
      "step": 3606
    },
    {
      "epoch": 0.5343703703703704,
      "grad_norm": 2.198274850845337,
      "learning_rate": 9.334321719792439e-05,
      "loss": 1.1297,
      "step": 3607
    },
    {
      "epoch": 0.5345185185185185,
      "grad_norm": 2.858485460281372,
      "learning_rate": 9.331356560415124e-05,
      "loss": 1.0752,
      "step": 3608
    },
    {
      "epoch": 0.5346666666666666,
      "grad_norm": 2.4457948207855225,
      "learning_rate": 9.328391401037807e-05,
      "loss": 1.1746,
      "step": 3609
    },
    {
      "epoch": 0.5348148148148149,
      "grad_norm": 2.0533862113952637,
      "learning_rate": 9.32542624166049e-05,
      "loss": 1.1646,
      "step": 3610
    },
    {
      "epoch": 0.534962962962963,
      "grad_norm": 4.793309211730957,
      "learning_rate": 9.322461082283174e-05,
      "loss": 1.1582,
      "step": 3611
    },
    {
      "epoch": 0.5351111111111111,
      "grad_norm": 3.439361810684204,
      "learning_rate": 9.319495922905856e-05,
      "loss": 1.0282,
      "step": 3612
    },
    {
      "epoch": 0.5352592592592592,
      "grad_norm": 4.565910339355469,
      "learning_rate": 9.31653076352854e-05,
      "loss": 1.2051,
      "step": 3613
    },
    {
      "epoch": 0.5354074074074074,
      "grad_norm": 4.375285625457764,
      "learning_rate": 9.313565604151223e-05,
      "loss": 0.7596,
      "step": 3614
    },
    {
      "epoch": 0.5355555555555556,
      "grad_norm": 2.1360411643981934,
      "learning_rate": 9.310600444773906e-05,
      "loss": 0.8976,
      "step": 3615
    },
    {
      "epoch": 0.5357037037037037,
      "grad_norm": 1.7414849996566772,
      "learning_rate": 9.307635285396591e-05,
      "loss": 0.9914,
      "step": 3616
    },
    {
      "epoch": 0.5358518518518518,
      "grad_norm": 1.7469319105148315,
      "learning_rate": 9.304670126019274e-05,
      "loss": 1.1223,
      "step": 3617
    },
    {
      "epoch": 0.536,
      "grad_norm": 2.1778478622436523,
      "learning_rate": 9.301704966641957e-05,
      "loss": 1.0785,
      "step": 3618
    },
    {
      "epoch": 0.5361481481481482,
      "grad_norm": 2.8595550060272217,
      "learning_rate": 9.298739807264642e-05,
      "loss": 1.0983,
      "step": 3619
    },
    {
      "epoch": 0.5362962962962963,
      "grad_norm": 5.059350967407227,
      "learning_rate": 9.295774647887325e-05,
      "loss": 1.2499,
      "step": 3620
    },
    {
      "epoch": 0.5364444444444444,
      "grad_norm": 1.920396089553833,
      "learning_rate": 9.292809488510008e-05,
      "loss": 0.8308,
      "step": 3621
    },
    {
      "epoch": 0.5365925925925926,
      "grad_norm": 3.148193836212158,
      "learning_rate": 9.289844329132691e-05,
      "loss": 0.8431,
      "step": 3622
    },
    {
      "epoch": 0.5367407407407407,
      "grad_norm": 4.7985405921936035,
      "learning_rate": 9.286879169755375e-05,
      "loss": 1.0488,
      "step": 3623
    },
    {
      "epoch": 0.5368888888888889,
      "grad_norm": 1.8743398189544678,
      "learning_rate": 9.283914010378058e-05,
      "loss": 1.1173,
      "step": 3624
    },
    {
      "epoch": 0.5370370370370371,
      "grad_norm": 3.4600822925567627,
      "learning_rate": 9.280948851000741e-05,
      "loss": 0.9806,
      "step": 3625
    },
    {
      "epoch": 0.5371851851851852,
      "grad_norm": 2.07930850982666,
      "learning_rate": 9.277983691623426e-05,
      "loss": 0.9602,
      "step": 3626
    },
    {
      "epoch": 0.5373333333333333,
      "grad_norm": 2.4907588958740234,
      "learning_rate": 9.275018532246109e-05,
      "loss": 1.1242,
      "step": 3627
    },
    {
      "epoch": 0.5374814814814814,
      "grad_norm": 1.5953439474105835,
      "learning_rate": 9.272053372868792e-05,
      "loss": 0.739,
      "step": 3628
    },
    {
      "epoch": 0.5376296296296297,
      "grad_norm": 3.6003265380859375,
      "learning_rate": 9.269088213491475e-05,
      "loss": 1.0478,
      "step": 3629
    },
    {
      "epoch": 0.5377777777777778,
      "grad_norm": 1.954933524131775,
      "learning_rate": 9.266123054114158e-05,
      "loss": 1.0159,
      "step": 3630
    },
    {
      "epoch": 0.5379259259259259,
      "grad_norm": 4.888095378875732,
      "learning_rate": 9.263157894736843e-05,
      "loss": 1.1897,
      "step": 3631
    },
    {
      "epoch": 0.538074074074074,
      "grad_norm": 2.864197015762329,
      "learning_rate": 9.260192735359526e-05,
      "loss": 0.8302,
      "step": 3632
    },
    {
      "epoch": 0.5382222222222223,
      "grad_norm": 1.9595667123794556,
      "learning_rate": 9.257227575982209e-05,
      "loss": 1.0346,
      "step": 3633
    },
    {
      "epoch": 0.5383703703703704,
      "grad_norm": 2.2659835815429688,
      "learning_rate": 9.254262416604893e-05,
      "loss": 0.9851,
      "step": 3634
    },
    {
      "epoch": 0.5385185185185185,
      "grad_norm": 1.8228520154953003,
      "learning_rate": 9.251297257227576e-05,
      "loss": 0.9439,
      "step": 3635
    },
    {
      "epoch": 0.5386666666666666,
      "grad_norm": 2.0427815914154053,
      "learning_rate": 9.24833209785026e-05,
      "loss": 0.8628,
      "step": 3636
    },
    {
      "epoch": 0.5388148148148149,
      "grad_norm": 3.836122751235962,
      "learning_rate": 9.245366938472944e-05,
      "loss": 0.9758,
      "step": 3637
    },
    {
      "epoch": 0.538962962962963,
      "grad_norm": 1.4492243528366089,
      "learning_rate": 9.242401779095626e-05,
      "loss": 1.1122,
      "step": 3638
    },
    {
      "epoch": 0.5391111111111111,
      "grad_norm": 2.1065256595611572,
      "learning_rate": 9.23943661971831e-05,
      "loss": 0.966,
      "step": 3639
    },
    {
      "epoch": 0.5392592592592592,
      "grad_norm": 1.8754328489303589,
      "learning_rate": 9.236471460340995e-05,
      "loss": 0.9189,
      "step": 3640
    },
    {
      "epoch": 0.5394074074074074,
      "grad_norm": 4.311052322387695,
      "learning_rate": 9.233506300963676e-05,
      "loss": 1.1817,
      "step": 3641
    },
    {
      "epoch": 0.5395555555555556,
      "grad_norm": 3.1392056941986084,
      "learning_rate": 9.230541141586361e-05,
      "loss": 0.9071,
      "step": 3642
    },
    {
      "epoch": 0.5397037037037037,
      "grad_norm": 2.574453353881836,
      "learning_rate": 9.227575982209045e-05,
      "loss": 1.26,
      "step": 3643
    },
    {
      "epoch": 0.5398518518518518,
      "grad_norm": 2.1404294967651367,
      "learning_rate": 9.224610822831727e-05,
      "loss": 1.3647,
      "step": 3644
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.1237967014312744,
      "learning_rate": 9.221645663454411e-05,
      "loss": 1.0603,
      "step": 3645
    },
    {
      "epoch": 0.5401481481481482,
      "grad_norm": 4.297988414764404,
      "learning_rate": 9.218680504077095e-05,
      "loss": 0.9979,
      "step": 3646
    },
    {
      "epoch": 0.5402962962962963,
      "grad_norm": 1.8716181516647339,
      "learning_rate": 9.215715344699778e-05,
      "loss": 0.9667,
      "step": 3647
    },
    {
      "epoch": 0.5404444444444444,
      "grad_norm": 2.383779287338257,
      "learning_rate": 9.212750185322462e-05,
      "loss": 0.9201,
      "step": 3648
    },
    {
      "epoch": 0.5405925925925926,
      "grad_norm": 2.4546163082122803,
      "learning_rate": 9.209785025945145e-05,
      "loss": 0.9959,
      "step": 3649
    },
    {
      "epoch": 0.5407407407407407,
      "grad_norm": 2.4224185943603516,
      "learning_rate": 9.206819866567828e-05,
      "loss": 1.1507,
      "step": 3650
    },
    {
      "epoch": 0.5408888888888889,
      "grad_norm": 1.896492600440979,
      "learning_rate": 9.203854707190513e-05,
      "loss": 1.1051,
      "step": 3651
    },
    {
      "epoch": 0.541037037037037,
      "grad_norm": 2.1329288482666016,
      "learning_rate": 9.200889547813196e-05,
      "loss": 1.1851,
      "step": 3652
    },
    {
      "epoch": 0.5411851851851852,
      "grad_norm": 2.4231183528900146,
      "learning_rate": 9.197924388435879e-05,
      "loss": 1.1228,
      "step": 3653
    },
    {
      "epoch": 0.5413333333333333,
      "grad_norm": 1.6991504430770874,
      "learning_rate": 9.194959229058563e-05,
      "loss": 1.1305,
      "step": 3654
    },
    {
      "epoch": 0.5414814814814815,
      "grad_norm": 1.461769938468933,
      "learning_rate": 9.191994069681245e-05,
      "loss": 0.9971,
      "step": 3655
    },
    {
      "epoch": 0.5416296296296297,
      "grad_norm": 2.796705961227417,
      "learning_rate": 9.18902891030393e-05,
      "loss": 1.0234,
      "step": 3656
    },
    {
      "epoch": 0.5417777777777778,
      "grad_norm": 3.8761069774627686,
      "learning_rate": 9.186063750926613e-05,
      "loss": 1.2682,
      "step": 3657
    },
    {
      "epoch": 0.5419259259259259,
      "grad_norm": 2.3537259101867676,
      "learning_rate": 9.183098591549296e-05,
      "loss": 1.1272,
      "step": 3658
    },
    {
      "epoch": 0.542074074074074,
      "grad_norm": 2.1708521842956543,
      "learning_rate": 9.18013343217198e-05,
      "loss": 0.9924,
      "step": 3659
    },
    {
      "epoch": 0.5422222222222223,
      "grad_norm": 2.080087184906006,
      "learning_rate": 9.177168272794663e-05,
      "loss": 1.2023,
      "step": 3660
    },
    {
      "epoch": 0.5423703703703704,
      "grad_norm": 1.725771427154541,
      "learning_rate": 9.174203113417346e-05,
      "loss": 1.0793,
      "step": 3661
    },
    {
      "epoch": 0.5425185185185185,
      "grad_norm": 1.996046781539917,
      "learning_rate": 9.171237954040031e-05,
      "loss": 0.9576,
      "step": 3662
    },
    {
      "epoch": 0.5426666666666666,
      "grad_norm": 2.237187147140503,
      "learning_rate": 9.168272794662714e-05,
      "loss": 1.059,
      "step": 3663
    },
    {
      "epoch": 0.5428148148148149,
      "grad_norm": 1.372070550918579,
      "learning_rate": 9.165307635285397e-05,
      "loss": 1.032,
      "step": 3664
    },
    {
      "epoch": 0.542962962962963,
      "grad_norm": 1.853932499885559,
      "learning_rate": 9.16234247590808e-05,
      "loss": 0.9676,
      "step": 3665
    },
    {
      "epoch": 0.5431111111111111,
      "grad_norm": 2.8059439659118652,
      "learning_rate": 9.159377316530764e-05,
      "loss": 1.0134,
      "step": 3666
    },
    {
      "epoch": 0.5432592592592592,
      "grad_norm": 1.68291437625885,
      "learning_rate": 9.156412157153448e-05,
      "loss": 1.1153,
      "step": 3667
    },
    {
      "epoch": 0.5434074074074075,
      "grad_norm": 4.564503192901611,
      "learning_rate": 9.15344699777613e-05,
      "loss": 1.0873,
      "step": 3668
    },
    {
      "epoch": 0.5435555555555556,
      "grad_norm": 3.0195443630218506,
      "learning_rate": 9.150481838398815e-05,
      "loss": 0.8743,
      "step": 3669
    },
    {
      "epoch": 0.5437037037037037,
      "grad_norm": 1.9659086465835571,
      "learning_rate": 9.147516679021498e-05,
      "loss": 0.9922,
      "step": 3670
    },
    {
      "epoch": 0.5438518518518518,
      "grad_norm": 1.9579589366912842,
      "learning_rate": 9.144551519644181e-05,
      "loss": 1.0172,
      "step": 3671
    },
    {
      "epoch": 0.544,
      "grad_norm": 3.0811991691589355,
      "learning_rate": 9.141586360266864e-05,
      "loss": 1.0922,
      "step": 3672
    },
    {
      "epoch": 0.5441481481481482,
      "grad_norm": 2.1591901779174805,
      "learning_rate": 9.138621200889547e-05,
      "loss": 0.9586,
      "step": 3673
    },
    {
      "epoch": 0.5442962962962963,
      "grad_norm": 1.5976217985153198,
      "learning_rate": 9.135656041512232e-05,
      "loss": 1.086,
      "step": 3674
    },
    {
      "epoch": 0.5444444444444444,
      "grad_norm": 4.451835632324219,
      "learning_rate": 9.132690882134915e-05,
      "loss": 1.2514,
      "step": 3675
    },
    {
      "epoch": 0.5445925925925926,
      "grad_norm": 1.4515033960342407,
      "learning_rate": 9.129725722757598e-05,
      "loss": 1.0563,
      "step": 3676
    },
    {
      "epoch": 0.5447407407407407,
      "grad_norm": 1.9500216245651245,
      "learning_rate": 9.126760563380283e-05,
      "loss": 0.7786,
      "step": 3677
    },
    {
      "epoch": 0.5448888888888889,
      "grad_norm": 2.538944721221924,
      "learning_rate": 9.123795404002966e-05,
      "loss": 1.0119,
      "step": 3678
    },
    {
      "epoch": 0.545037037037037,
      "grad_norm": 1.7618365287780762,
      "learning_rate": 9.120830244625649e-05,
      "loss": 0.9883,
      "step": 3679
    },
    {
      "epoch": 0.5451851851851852,
      "grad_norm": 1.7227729558944702,
      "learning_rate": 9.117865085248333e-05,
      "loss": 1.0522,
      "step": 3680
    },
    {
      "epoch": 0.5453333333333333,
      "grad_norm": 2.5352048873901367,
      "learning_rate": 9.114899925871015e-05,
      "loss": 1.2212,
      "step": 3681
    },
    {
      "epoch": 0.5454814814814815,
      "grad_norm": 2.4464914798736572,
      "learning_rate": 9.111934766493699e-05,
      "loss": 0.939,
      "step": 3682
    },
    {
      "epoch": 0.5456296296296296,
      "grad_norm": 2.1241180896759033,
      "learning_rate": 9.108969607116384e-05,
      "loss": 0.9484,
      "step": 3683
    },
    {
      "epoch": 0.5457777777777778,
      "grad_norm": 1.5858129262924194,
      "learning_rate": 9.106004447739066e-05,
      "loss": 1.106,
      "step": 3684
    },
    {
      "epoch": 0.5459259259259259,
      "grad_norm": 2.267939567565918,
      "learning_rate": 9.10303928836175e-05,
      "loss": 1.1996,
      "step": 3685
    },
    {
      "epoch": 0.546074074074074,
      "grad_norm": 2.072701930999756,
      "learning_rate": 9.100074128984434e-05,
      "loss": 1.3082,
      "step": 3686
    },
    {
      "epoch": 0.5462222222222223,
      "grad_norm": 4.704676151275635,
      "learning_rate": 9.097108969607116e-05,
      "loss": 0.8774,
      "step": 3687
    },
    {
      "epoch": 0.5463703703703704,
      "grad_norm": 2.327634334564209,
      "learning_rate": 9.0941438102298e-05,
      "loss": 1.3083,
      "step": 3688
    },
    {
      "epoch": 0.5465185185185185,
      "grad_norm": 1.9294790029525757,
      "learning_rate": 9.091178650852484e-05,
      "loss": 1.0257,
      "step": 3689
    },
    {
      "epoch": 0.5466666666666666,
      "grad_norm": 2.1735949516296387,
      "learning_rate": 9.088213491475167e-05,
      "loss": 1.0953,
      "step": 3690
    },
    {
      "epoch": 0.5468148148148149,
      "grad_norm": 2.482743501663208,
      "learning_rate": 9.085248332097851e-05,
      "loss": 1.0466,
      "step": 3691
    },
    {
      "epoch": 0.546962962962963,
      "grad_norm": 1.8486324548721313,
      "learning_rate": 9.082283172720534e-05,
      "loss": 1.1685,
      "step": 3692
    },
    {
      "epoch": 0.5471111111111111,
      "grad_norm": 1.4710181951522827,
      "learning_rate": 9.079318013343217e-05,
      "loss": 0.9895,
      "step": 3693
    },
    {
      "epoch": 0.5472592592592592,
      "grad_norm": 1.2480143308639526,
      "learning_rate": 9.076352853965902e-05,
      "loss": 0.8952,
      "step": 3694
    },
    {
      "epoch": 0.5474074074074075,
      "grad_norm": 1.9864888191223145,
      "learning_rate": 9.073387694588585e-05,
      "loss": 1.1691,
      "step": 3695
    },
    {
      "epoch": 0.5475555555555556,
      "grad_norm": 1.6673661470413208,
      "learning_rate": 9.070422535211268e-05,
      "loss": 1.1285,
      "step": 3696
    },
    {
      "epoch": 0.5477037037037037,
      "grad_norm": 2.3615503311157227,
      "learning_rate": 9.067457375833952e-05,
      "loss": 1.1099,
      "step": 3697
    },
    {
      "epoch": 0.5478518518518518,
      "grad_norm": 1.8084917068481445,
      "learning_rate": 9.064492216456634e-05,
      "loss": 1.0073,
      "step": 3698
    },
    {
      "epoch": 0.548,
      "grad_norm": 1.7501620054244995,
      "learning_rate": 9.061527057079319e-05,
      "loss": 1.1668,
      "step": 3699
    },
    {
      "epoch": 0.5481481481481482,
      "grad_norm": 2.0538253784179688,
      "learning_rate": 9.058561897702002e-05,
      "loss": 0.875,
      "step": 3700
    },
    {
      "epoch": 0.5482962962962963,
      "grad_norm": 2.4856984615325928,
      "learning_rate": 9.055596738324685e-05,
      "loss": 1.0177,
      "step": 3701
    },
    {
      "epoch": 0.5484444444444444,
      "grad_norm": 1.54976224899292,
      "learning_rate": 9.052631578947369e-05,
      "loss": 1.1514,
      "step": 3702
    },
    {
      "epoch": 0.5485925925925926,
      "grad_norm": 1.382947564125061,
      "learning_rate": 9.049666419570052e-05,
      "loss": 0.8129,
      "step": 3703
    },
    {
      "epoch": 0.5487407407407408,
      "grad_norm": 2.57087779045105,
      "learning_rate": 9.046701260192735e-05,
      "loss": 1.0847,
      "step": 3704
    },
    {
      "epoch": 0.5488888888888889,
      "grad_norm": 1.8630986213684082,
      "learning_rate": 9.04373610081542e-05,
      "loss": 0.9154,
      "step": 3705
    },
    {
      "epoch": 0.549037037037037,
      "grad_norm": 2.1322314739227295,
      "learning_rate": 9.040770941438103e-05,
      "loss": 0.9647,
      "step": 3706
    },
    {
      "epoch": 0.5491851851851852,
      "grad_norm": 2.444566488265991,
      "learning_rate": 9.037805782060786e-05,
      "loss": 0.9911,
      "step": 3707
    },
    {
      "epoch": 0.5493333333333333,
      "grad_norm": 1.566116452217102,
      "learning_rate": 9.034840622683469e-05,
      "loss": 0.9378,
      "step": 3708
    },
    {
      "epoch": 0.5494814814814815,
      "grad_norm": 15.759946823120117,
      "learning_rate": 9.031875463306154e-05,
      "loss": 0.924,
      "step": 3709
    },
    {
      "epoch": 0.5496296296296296,
      "grad_norm": 1.5855779647827148,
      "learning_rate": 9.028910303928837e-05,
      "loss": 1.2671,
      "step": 3710
    },
    {
      "epoch": 0.5497777777777778,
      "grad_norm": 2.2595438957214355,
      "learning_rate": 9.02594514455152e-05,
      "loss": 0.9608,
      "step": 3711
    },
    {
      "epoch": 0.5499259259259259,
      "grad_norm": 2.2142670154571533,
      "learning_rate": 9.022979985174204e-05,
      "loss": 0.9906,
      "step": 3712
    },
    {
      "epoch": 0.550074074074074,
      "grad_norm": 1.7206697463989258,
      "learning_rate": 9.020014825796887e-05,
      "loss": 1.1186,
      "step": 3713
    },
    {
      "epoch": 0.5502222222222222,
      "grad_norm": 1.3970435857772827,
      "learning_rate": 9.01704966641957e-05,
      "loss": 1.1455,
      "step": 3714
    },
    {
      "epoch": 0.5503703703703704,
      "grad_norm": 2.5814034938812256,
      "learning_rate": 9.014084507042254e-05,
      "loss": 1.3222,
      "step": 3715
    },
    {
      "epoch": 0.5505185185185185,
      "grad_norm": 2.862255096435547,
      "learning_rate": 9.011119347664937e-05,
      "loss": 0.9744,
      "step": 3716
    },
    {
      "epoch": 0.5506666666666666,
      "grad_norm": 2.0777993202209473,
      "learning_rate": 9.008154188287621e-05,
      "loss": 1.1848,
      "step": 3717
    },
    {
      "epoch": 0.5508148148148149,
      "grad_norm": 1.3220739364624023,
      "learning_rate": 9.005189028910304e-05,
      "loss": 1.0493,
      "step": 3718
    },
    {
      "epoch": 0.550962962962963,
      "grad_norm": 1.9476999044418335,
      "learning_rate": 9.002223869532987e-05,
      "loss": 0.7834,
      "step": 3719
    },
    {
      "epoch": 0.5511111111111111,
      "grad_norm": 1.7757718563079834,
      "learning_rate": 8.999258710155672e-05,
      "loss": 1.152,
      "step": 3720
    },
    {
      "epoch": 0.5512592592592592,
      "grad_norm": 1.1972419023513794,
      "learning_rate": 8.996293550778355e-05,
      "loss": 0.9338,
      "step": 3721
    },
    {
      "epoch": 0.5514074074074075,
      "grad_norm": 1.3252272605895996,
      "learning_rate": 8.993328391401038e-05,
      "loss": 1.1488,
      "step": 3722
    },
    {
      "epoch": 0.5515555555555556,
      "grad_norm": 1.7388628721237183,
      "learning_rate": 8.990363232023722e-05,
      "loss": 0.988,
      "step": 3723
    },
    {
      "epoch": 0.5517037037037037,
      "grad_norm": 1.9051963090896606,
      "learning_rate": 8.987398072646404e-05,
      "loss": 1.315,
      "step": 3724
    },
    {
      "epoch": 0.5518518518518518,
      "grad_norm": 1.2011630535125732,
      "learning_rate": 8.984432913269088e-05,
      "loss": 0.9855,
      "step": 3725
    },
    {
      "epoch": 0.552,
      "grad_norm": 1.7776042222976685,
      "learning_rate": 8.981467753891773e-05,
      "loss": 1.1439,
      "step": 3726
    },
    {
      "epoch": 0.5521481481481482,
      "grad_norm": 1.6855988502502441,
      "learning_rate": 8.978502594514455e-05,
      "loss": 0.9103,
      "step": 3727
    },
    {
      "epoch": 0.5522962962962963,
      "grad_norm": 1.8196964263916016,
      "learning_rate": 8.975537435137139e-05,
      "loss": 1.1186,
      "step": 3728
    },
    {
      "epoch": 0.5524444444444444,
      "grad_norm": 2.096670150756836,
      "learning_rate": 8.972572275759824e-05,
      "loss": 0.9407,
      "step": 3729
    },
    {
      "epoch": 0.5525925925925926,
      "grad_norm": 2.373717784881592,
      "learning_rate": 8.969607116382505e-05,
      "loss": 1.3133,
      "step": 3730
    },
    {
      "epoch": 0.5527407407407408,
      "grad_norm": 1.496888279914856,
      "learning_rate": 8.96664195700519e-05,
      "loss": 0.9431,
      "step": 3731
    },
    {
      "epoch": 0.5528888888888889,
      "grad_norm": 1.5524799823760986,
      "learning_rate": 8.963676797627873e-05,
      "loss": 0.8583,
      "step": 3732
    },
    {
      "epoch": 0.553037037037037,
      "grad_norm": 1.5310463905334473,
      "learning_rate": 8.960711638250556e-05,
      "loss": 1.4312,
      "step": 3733
    },
    {
      "epoch": 0.5531851851851852,
      "grad_norm": 1.8233987092971802,
      "learning_rate": 8.95774647887324e-05,
      "loss": 1.0469,
      "step": 3734
    },
    {
      "epoch": 0.5533333333333333,
      "grad_norm": 1.8349663019180298,
      "learning_rate": 8.954781319495923e-05,
      "loss": 1.0884,
      "step": 3735
    },
    {
      "epoch": 0.5534814814814815,
      "grad_norm": 1.9651541709899902,
      "learning_rate": 8.951816160118607e-05,
      "loss": 1.2182,
      "step": 3736
    },
    {
      "epoch": 0.5536296296296296,
      "grad_norm": 4.574952602386475,
      "learning_rate": 8.948851000741291e-05,
      "loss": 1.073,
      "step": 3737
    },
    {
      "epoch": 0.5537777777777778,
      "grad_norm": 1.901864767074585,
      "learning_rate": 8.945885841363974e-05,
      "loss": 1.3331,
      "step": 3738
    },
    {
      "epoch": 0.5539259259259259,
      "grad_norm": 1.2893147468566895,
      "learning_rate": 8.942920681986657e-05,
      "loss": 1.0266,
      "step": 3739
    },
    {
      "epoch": 0.554074074074074,
      "grad_norm": 1.4766526222229004,
      "learning_rate": 8.939955522609342e-05,
      "loss": 1.0866,
      "step": 3740
    },
    {
      "epoch": 0.5542222222222222,
      "grad_norm": 1.5488594770431519,
      "learning_rate": 8.936990363232023e-05,
      "loss": 0.9393,
      "step": 3741
    },
    {
      "epoch": 0.5543703703703704,
      "grad_norm": 1.9072670936584473,
      "learning_rate": 8.934025203854708e-05,
      "loss": 1.027,
      "step": 3742
    },
    {
      "epoch": 0.5545185185185185,
      "grad_norm": 1.2895652055740356,
      "learning_rate": 8.931060044477391e-05,
      "loss": 0.9981,
      "step": 3743
    },
    {
      "epoch": 0.5546666666666666,
      "grad_norm": 3.711061477661133,
      "learning_rate": 8.928094885100074e-05,
      "loss": 1.0616,
      "step": 3744
    },
    {
      "epoch": 0.5548148148148148,
      "grad_norm": 2.482109546661377,
      "learning_rate": 8.925129725722758e-05,
      "loss": 0.8922,
      "step": 3745
    },
    {
      "epoch": 0.554962962962963,
      "grad_norm": 2.232032060623169,
      "learning_rate": 8.922164566345442e-05,
      "loss": 1.2899,
      "step": 3746
    },
    {
      "epoch": 0.5551111111111111,
      "grad_norm": 1.6179907321929932,
      "learning_rate": 8.919199406968125e-05,
      "loss": 1.1324,
      "step": 3747
    },
    {
      "epoch": 0.5552592592592592,
      "grad_norm": 2.103853940963745,
      "learning_rate": 8.916234247590809e-05,
      "loss": 1.2801,
      "step": 3748
    },
    {
      "epoch": 0.5554074074074075,
      "grad_norm": 1.3915637731552124,
      "learning_rate": 8.913269088213492e-05,
      "loss": 0.9872,
      "step": 3749
    },
    {
      "epoch": 0.5555555555555556,
      "grad_norm": 1.6203278303146362,
      "learning_rate": 8.910303928836175e-05,
      "loss": 0.8849,
      "step": 3750
    },
    {
      "epoch": 0.5557037037037037,
      "grad_norm": 1.936846375465393,
      "learning_rate": 8.907338769458858e-05,
      "loss": 1.2005,
      "step": 3751
    },
    {
      "epoch": 0.5558518518518518,
      "grad_norm": 2.231778144836426,
      "learning_rate": 8.904373610081543e-05,
      "loss": 1.1457,
      "step": 3752
    },
    {
      "epoch": 0.556,
      "grad_norm": 1.567258358001709,
      "learning_rate": 8.901408450704226e-05,
      "loss": 1.2589,
      "step": 3753
    },
    {
      "epoch": 0.5561481481481482,
      "grad_norm": 1.4013599157333374,
      "learning_rate": 8.898443291326909e-05,
      "loss": 1.1576,
      "step": 3754
    },
    {
      "epoch": 0.5562962962962963,
      "grad_norm": 1.2259019613265991,
      "learning_rate": 8.895478131949593e-05,
      "loss": 1.0385,
      "step": 3755
    },
    {
      "epoch": 0.5564444444444444,
      "grad_norm": 2.0349345207214355,
      "learning_rate": 8.892512972572276e-05,
      "loss": 1.1771,
      "step": 3756
    },
    {
      "epoch": 0.5565925925925926,
      "grad_norm": 1.4776968955993652,
      "learning_rate": 8.88954781319496e-05,
      "loss": 0.9647,
      "step": 3757
    },
    {
      "epoch": 0.5567407407407408,
      "grad_norm": 2.031043529510498,
      "learning_rate": 8.886582653817643e-05,
      "loss": 0.99,
      "step": 3758
    },
    {
      "epoch": 0.5568888888888889,
      "grad_norm": 1.9026827812194824,
      "learning_rate": 8.883617494440326e-05,
      "loss": 1.049,
      "step": 3759
    },
    {
      "epoch": 0.557037037037037,
      "grad_norm": 2.278316020965576,
      "learning_rate": 8.88065233506301e-05,
      "loss": 1.2736,
      "step": 3760
    },
    {
      "epoch": 0.5571851851851852,
      "grad_norm": 1.2471530437469482,
      "learning_rate": 8.877687175685693e-05,
      "loss": 0.9425,
      "step": 3761
    },
    {
      "epoch": 0.5573333333333333,
      "grad_norm": 1.2254701852798462,
      "learning_rate": 8.874722016308376e-05,
      "loss": 1.0904,
      "step": 3762
    },
    {
      "epoch": 0.5574814814814815,
      "grad_norm": 1.9684616327285767,
      "learning_rate": 8.871756856931061e-05,
      "loss": 1.022,
      "step": 3763
    },
    {
      "epoch": 0.5576296296296296,
      "grad_norm": 1.5332367420196533,
      "learning_rate": 8.868791697553744e-05,
      "loss": 1.0997,
      "step": 3764
    },
    {
      "epoch": 0.5577777777777778,
      "grad_norm": 1.2924193143844604,
      "learning_rate": 8.865826538176427e-05,
      "loss": 1.0744,
      "step": 3765
    },
    {
      "epoch": 0.5579259259259259,
      "grad_norm": 1.6238971948623657,
      "learning_rate": 8.862861378799111e-05,
      "loss": 1.0373,
      "step": 3766
    },
    {
      "epoch": 0.5580740740740741,
      "grad_norm": 1.2670892477035522,
      "learning_rate": 8.859896219421793e-05,
      "loss": 1.0113,
      "step": 3767
    },
    {
      "epoch": 0.5582222222222222,
      "grad_norm": 1.2489657402038574,
      "learning_rate": 8.856931060044478e-05,
      "loss": 0.9363,
      "step": 3768
    },
    {
      "epoch": 0.5583703703703704,
      "grad_norm": 1.6227606534957886,
      "learning_rate": 8.853965900667162e-05,
      "loss": 0.7972,
      "step": 3769
    },
    {
      "epoch": 0.5585185185185185,
      "grad_norm": 2.044673204421997,
      "learning_rate": 8.851000741289844e-05,
      "loss": 0.9603,
      "step": 3770
    },
    {
      "epoch": 0.5586666666666666,
      "grad_norm": 2.5743000507354736,
      "learning_rate": 8.848035581912528e-05,
      "loss": 0.9594,
      "step": 3771
    },
    {
      "epoch": 0.5588148148148148,
      "grad_norm": 1.3743785619735718,
      "learning_rate": 8.845070422535213e-05,
      "loss": 1.0438,
      "step": 3772
    },
    {
      "epoch": 0.558962962962963,
      "grad_norm": 1.4109739065170288,
      "learning_rate": 8.842105263157894e-05,
      "loss": 0.9083,
      "step": 3773
    },
    {
      "epoch": 0.5591111111111111,
      "grad_norm": 1.4330799579620361,
      "learning_rate": 8.839140103780579e-05,
      "loss": 1.0214,
      "step": 3774
    },
    {
      "epoch": 0.5592592592592592,
      "grad_norm": 2.8556342124938965,
      "learning_rate": 8.836174944403262e-05,
      "loss": 1.1462,
      "step": 3775
    },
    {
      "epoch": 0.5594074074074074,
      "grad_norm": 2.1466240882873535,
      "learning_rate": 8.833209785025945e-05,
      "loss": 0.9762,
      "step": 3776
    },
    {
      "epoch": 0.5595555555555556,
      "grad_norm": 1.436170220375061,
      "learning_rate": 8.83024462564863e-05,
      "loss": 1.0515,
      "step": 3777
    },
    {
      "epoch": 0.5597037037037037,
      "grad_norm": 1.9817818403244019,
      "learning_rate": 8.827279466271313e-05,
      "loss": 0.9498,
      "step": 3778
    },
    {
      "epoch": 0.5598518518518518,
      "grad_norm": 1.9754718542099,
      "learning_rate": 8.824314306893996e-05,
      "loss": 1.1527,
      "step": 3779
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9548002481460571,
      "learning_rate": 8.82134914751668e-05,
      "loss": 1.0014,
      "step": 3780
    },
    {
      "epoch": 0.5601481481481482,
      "grad_norm": 1.8182411193847656,
      "learning_rate": 8.818383988139363e-05,
      "loss": 1.2531,
      "step": 3781
    },
    {
      "epoch": 0.5602962962962963,
      "grad_norm": 4.865880966186523,
      "learning_rate": 8.815418828762046e-05,
      "loss": 1.1013,
      "step": 3782
    },
    {
      "epoch": 0.5604444444444444,
      "grad_norm": 2.1615490913391113,
      "learning_rate": 8.812453669384731e-05,
      "loss": 1.2701,
      "step": 3783
    },
    {
      "epoch": 0.5605925925925926,
      "grad_norm": 1.86410391330719,
      "learning_rate": 8.809488510007413e-05,
      "loss": 1.1001,
      "step": 3784
    },
    {
      "epoch": 0.5607407407407408,
      "grad_norm": 1.4472167491912842,
      "learning_rate": 8.806523350630097e-05,
      "loss": 0.9705,
      "step": 3785
    },
    {
      "epoch": 0.5608888888888889,
      "grad_norm": 2.6446444988250732,
      "learning_rate": 8.80355819125278e-05,
      "loss": 1.1533,
      "step": 3786
    },
    {
      "epoch": 0.561037037037037,
      "grad_norm": 1.4465818405151367,
      "learning_rate": 8.800593031875463e-05,
      "loss": 0.9612,
      "step": 3787
    },
    {
      "epoch": 0.5611851851851852,
      "grad_norm": 2.375319480895996,
      "learning_rate": 8.797627872498148e-05,
      "loss": 1.1046,
      "step": 3788
    },
    {
      "epoch": 0.5613333333333334,
      "grad_norm": 1.54768967628479,
      "learning_rate": 8.794662713120831e-05,
      "loss": 0.7747,
      "step": 3789
    },
    {
      "epoch": 0.5614814814814815,
      "grad_norm": 1.7208143472671509,
      "learning_rate": 8.791697553743514e-05,
      "loss": 1.1109,
      "step": 3790
    },
    {
      "epoch": 0.5616296296296296,
      "grad_norm": 1.2667361497879028,
      "learning_rate": 8.788732394366198e-05,
      "loss": 1.1482,
      "step": 3791
    },
    {
      "epoch": 0.5617777777777778,
      "grad_norm": 1.4096691608428955,
      "learning_rate": 8.785767234988881e-05,
      "loss": 1.0901,
      "step": 3792
    },
    {
      "epoch": 0.5619259259259259,
      "grad_norm": 1.3970277309417725,
      "learning_rate": 8.782802075611564e-05,
      "loss": 1.1533,
      "step": 3793
    },
    {
      "epoch": 0.5620740740740741,
      "grad_norm": 1.7509400844573975,
      "learning_rate": 8.779836916234247e-05,
      "loss": 1.0374,
      "step": 3794
    },
    {
      "epoch": 0.5622222222222222,
      "grad_norm": 1.8194208145141602,
      "learning_rate": 8.776871756856932e-05,
      "loss": 1.141,
      "step": 3795
    },
    {
      "epoch": 0.5623703703703704,
      "grad_norm": 1.4984582662582397,
      "learning_rate": 8.773906597479615e-05,
      "loss": 1.0448,
      "step": 3796
    },
    {
      "epoch": 0.5625185185185185,
      "grad_norm": 2.3701395988464355,
      "learning_rate": 8.770941438102298e-05,
      "loss": 0.8618,
      "step": 3797
    },
    {
      "epoch": 0.5626666666666666,
      "grad_norm": 1.72593092918396,
      "learning_rate": 8.767976278724983e-05,
      "loss": 1.2991,
      "step": 3798
    },
    {
      "epoch": 0.5628148148148148,
      "grad_norm": 3.1371865272521973,
      "learning_rate": 8.765011119347666e-05,
      "loss": 0.9859,
      "step": 3799
    },
    {
      "epoch": 0.562962962962963,
      "grad_norm": 1.9553066492080688,
      "learning_rate": 8.762045959970349e-05,
      "loss": 1.1226,
      "step": 3800
    },
    {
      "epoch": 0.5631111111111111,
      "grad_norm": 1.190905213356018,
      "learning_rate": 8.759080800593032e-05,
      "loss": 1.0534,
      "step": 3801
    },
    {
      "epoch": 0.5632592592592592,
      "grad_norm": 1.4159561395645142,
      "learning_rate": 8.756115641215715e-05,
      "loss": 1.0524,
      "step": 3802
    },
    {
      "epoch": 0.5634074074074074,
      "grad_norm": 1.7796388864517212,
      "learning_rate": 8.7531504818384e-05,
      "loss": 1.4437,
      "step": 3803
    },
    {
      "epoch": 0.5635555555555556,
      "grad_norm": 3.057600259780884,
      "learning_rate": 8.750185322461082e-05,
      "loss": 1.1243,
      "step": 3804
    },
    {
      "epoch": 0.5637037037037037,
      "grad_norm": 1.2936995029449463,
      "learning_rate": 8.747220163083766e-05,
      "loss": 1.2014,
      "step": 3805
    },
    {
      "epoch": 0.5638518518518518,
      "grad_norm": 1.568647861480713,
      "learning_rate": 8.74425500370645e-05,
      "loss": 1.1087,
      "step": 3806
    },
    {
      "epoch": 0.564,
      "grad_norm": 2.1068265438079834,
      "learning_rate": 8.741289844329133e-05,
      "loss": 1.0517,
      "step": 3807
    },
    {
      "epoch": 0.5641481481481482,
      "grad_norm": 1.7895489931106567,
      "learning_rate": 8.738324684951816e-05,
      "loss": 1.0477,
      "step": 3808
    },
    {
      "epoch": 0.5642962962962963,
      "grad_norm": 1.199803352355957,
      "learning_rate": 8.7353595255745e-05,
      "loss": 1.1233,
      "step": 3809
    },
    {
      "epoch": 0.5644444444444444,
      "grad_norm": 1.5444388389587402,
      "learning_rate": 8.732394366197182e-05,
      "loss": 1.2986,
      "step": 3810
    },
    {
      "epoch": 0.5645925925925926,
      "grad_norm": 3.2322614192962646,
      "learning_rate": 8.729429206819867e-05,
      "loss": 1.0613,
      "step": 3811
    },
    {
      "epoch": 0.5647407407407408,
      "grad_norm": 2.483393430709839,
      "learning_rate": 8.726464047442551e-05,
      "loss": 1.043,
      "step": 3812
    },
    {
      "epoch": 0.5648888888888889,
      "grad_norm": 2.9713010787963867,
      "learning_rate": 8.723498888065233e-05,
      "loss": 1.0973,
      "step": 3813
    },
    {
      "epoch": 0.565037037037037,
      "grad_norm": 1.9532005786895752,
      "learning_rate": 8.720533728687917e-05,
      "loss": 1.1491,
      "step": 3814
    },
    {
      "epoch": 0.5651851851851852,
      "grad_norm": 1.2219641208648682,
      "learning_rate": 8.717568569310602e-05,
      "loss": 0.9145,
      "step": 3815
    },
    {
      "epoch": 0.5653333333333334,
      "grad_norm": 1.3188670873641968,
      "learning_rate": 8.714603409933284e-05,
      "loss": 1.0009,
      "step": 3816
    },
    {
      "epoch": 0.5654814814814815,
      "grad_norm": 2.6909098625183105,
      "learning_rate": 8.711638250555968e-05,
      "loss": 1.1565,
      "step": 3817
    },
    {
      "epoch": 0.5656296296296296,
      "grad_norm": 2.079143762588501,
      "learning_rate": 8.708673091178651e-05,
      "loss": 1.0825,
      "step": 3818
    },
    {
      "epoch": 0.5657777777777778,
      "grad_norm": 4.6160054206848145,
      "learning_rate": 8.705707931801334e-05,
      "loss": 0.9953,
      "step": 3819
    },
    {
      "epoch": 0.5659259259259259,
      "grad_norm": 1.2882026433944702,
      "learning_rate": 8.702742772424019e-05,
      "loss": 0.8762,
      "step": 3820
    },
    {
      "epoch": 0.5660740740740741,
      "grad_norm": 1.4951224327087402,
      "learning_rate": 8.699777613046702e-05,
      "loss": 1.1549,
      "step": 3821
    },
    {
      "epoch": 0.5662222222222222,
      "grad_norm": 1.5525470972061157,
      "learning_rate": 8.696812453669385e-05,
      "loss": 1.0891,
      "step": 3822
    },
    {
      "epoch": 0.5663703703703704,
      "grad_norm": 1.595550775527954,
      "learning_rate": 8.69384729429207e-05,
      "loss": 1.0471,
      "step": 3823
    },
    {
      "epoch": 0.5665185185185185,
      "grad_norm": 1.8549158573150635,
      "learning_rate": 8.690882134914752e-05,
      "loss": 1.1468,
      "step": 3824
    },
    {
      "epoch": 0.5666666666666667,
      "grad_norm": 1.8177645206451416,
      "learning_rate": 8.687916975537436e-05,
      "loss": 1.2461,
      "step": 3825
    },
    {
      "epoch": 0.5668148148148148,
      "grad_norm": 3.8642890453338623,
      "learning_rate": 8.68495181616012e-05,
      "loss": 1.2282,
      "step": 3826
    },
    {
      "epoch": 0.566962962962963,
      "grad_norm": 2.238409996032715,
      "learning_rate": 8.681986656782802e-05,
      "loss": 0.9814,
      "step": 3827
    },
    {
      "epoch": 0.5671111111111111,
      "grad_norm": 2.2041618824005127,
      "learning_rate": 8.679021497405486e-05,
      "loss": 1.1013,
      "step": 3828
    },
    {
      "epoch": 0.5672592592592592,
      "grad_norm": 2.5299365520477295,
      "learning_rate": 8.676056338028169e-05,
      "loss": 1.1499,
      "step": 3829
    },
    {
      "epoch": 0.5674074074074074,
      "grad_norm": 1.562060832977295,
      "learning_rate": 8.673091178650852e-05,
      "loss": 0.8048,
      "step": 3830
    },
    {
      "epoch": 0.5675555555555556,
      "grad_norm": 1.3858586549758911,
      "learning_rate": 8.670126019273537e-05,
      "loss": 1.1815,
      "step": 3831
    },
    {
      "epoch": 0.5677037037037037,
      "grad_norm": 1.5052539110183716,
      "learning_rate": 8.66716085989622e-05,
      "loss": 1.0912,
      "step": 3832
    },
    {
      "epoch": 0.5678518518518518,
      "grad_norm": 1.2242339849472046,
      "learning_rate": 8.664195700518903e-05,
      "loss": 1.014,
      "step": 3833
    },
    {
      "epoch": 0.568,
      "grad_norm": 2.0522894859313965,
      "learning_rate": 8.661230541141587e-05,
      "loss": 0.8479,
      "step": 3834
    },
    {
      "epoch": 0.5681481481481482,
      "grad_norm": 2.4743332862854004,
      "learning_rate": 8.65826538176427e-05,
      "loss": 0.8606,
      "step": 3835
    },
    {
      "epoch": 0.5682962962962963,
      "grad_norm": 1.8897267580032349,
      "learning_rate": 8.655300222386954e-05,
      "loss": 1.1797,
      "step": 3836
    },
    {
      "epoch": 0.5684444444444444,
      "grad_norm": 1.2706682682037354,
      "learning_rate": 8.652335063009637e-05,
      "loss": 1.2249,
      "step": 3837
    },
    {
      "epoch": 0.5685925925925925,
      "grad_norm": 1.5865893363952637,
      "learning_rate": 8.649369903632321e-05,
      "loss": 1.3758,
      "step": 3838
    },
    {
      "epoch": 0.5687407407407408,
      "grad_norm": 2.2144436836242676,
      "learning_rate": 8.646404744255004e-05,
      "loss": 1.1577,
      "step": 3839
    },
    {
      "epoch": 0.5688888888888889,
      "grad_norm": 1.9299513101577759,
      "learning_rate": 8.643439584877687e-05,
      "loss": 1.1502,
      "step": 3840
    },
    {
      "epoch": 0.569037037037037,
      "grad_norm": 2.1713242530822754,
      "learning_rate": 8.640474425500372e-05,
      "loss": 0.8978,
      "step": 3841
    },
    {
      "epoch": 0.5691851851851852,
      "grad_norm": 1.302879810333252,
      "learning_rate": 8.637509266123055e-05,
      "loss": 0.8991,
      "step": 3842
    },
    {
      "epoch": 0.5693333333333334,
      "grad_norm": 1.569977045059204,
      "learning_rate": 8.634544106745738e-05,
      "loss": 0.9357,
      "step": 3843
    },
    {
      "epoch": 0.5694814814814815,
      "grad_norm": 1.9835530519485474,
      "learning_rate": 8.631578947368421e-05,
      "loss": 1.0916,
      "step": 3844
    },
    {
      "epoch": 0.5696296296296296,
      "grad_norm": 1.7564622163772583,
      "learning_rate": 8.628613787991104e-05,
      "loss": 1.1367,
      "step": 3845
    },
    {
      "epoch": 0.5697777777777778,
      "grad_norm": 1.8343626260757446,
      "learning_rate": 8.625648628613789e-05,
      "loss": 1.1506,
      "step": 3846
    },
    {
      "epoch": 0.569925925925926,
      "grad_norm": 1.1550328731536865,
      "learning_rate": 8.622683469236472e-05,
      "loss": 0.7496,
      "step": 3847
    },
    {
      "epoch": 0.5700740740740741,
      "grad_norm": 1.8206971883773804,
      "learning_rate": 8.619718309859155e-05,
      "loss": 1.334,
      "step": 3848
    },
    {
      "epoch": 0.5702222222222222,
      "grad_norm": 1.7791658639907837,
      "learning_rate": 8.616753150481839e-05,
      "loss": 1.1134,
      "step": 3849
    },
    {
      "epoch": 0.5703703703703704,
      "grad_norm": 1.7074779272079468,
      "learning_rate": 8.613787991104522e-05,
      "loss": 1.218,
      "step": 3850
    },
    {
      "epoch": 0.5705185185185185,
      "grad_norm": 1.5398327112197876,
      "learning_rate": 8.610822831727205e-05,
      "loss": 1.0318,
      "step": 3851
    },
    {
      "epoch": 0.5706666666666667,
      "grad_norm": 1.385349154472351,
      "learning_rate": 8.60785767234989e-05,
      "loss": 1.0344,
      "step": 3852
    },
    {
      "epoch": 0.5708148148148148,
      "grad_norm": 3.941965103149414,
      "learning_rate": 8.604892512972572e-05,
      "loss": 1.204,
      "step": 3853
    },
    {
      "epoch": 0.570962962962963,
      "grad_norm": 2.1788978576660156,
      "learning_rate": 8.601927353595256e-05,
      "loss": 1.0896,
      "step": 3854
    },
    {
      "epoch": 0.5711111111111111,
      "grad_norm": 1.6998543739318848,
      "learning_rate": 8.59896219421794e-05,
      "loss": 0.907,
      "step": 3855
    },
    {
      "epoch": 0.5712592592592592,
      "grad_norm": 1.5642954111099243,
      "learning_rate": 8.595997034840622e-05,
      "loss": 1.1863,
      "step": 3856
    },
    {
      "epoch": 0.5714074074074074,
      "grad_norm": 1.456916332244873,
      "learning_rate": 8.593031875463307e-05,
      "loss": 0.9201,
      "step": 3857
    },
    {
      "epoch": 0.5715555555555556,
      "grad_norm": 1.270564317703247,
      "learning_rate": 8.590066716085991e-05,
      "loss": 0.982,
      "step": 3858
    },
    {
      "epoch": 0.5717037037037037,
      "grad_norm": 1.182762622833252,
      "learning_rate": 8.587101556708673e-05,
      "loss": 1.0951,
      "step": 3859
    },
    {
      "epoch": 0.5718518518518518,
      "grad_norm": 1.4090367555618286,
      "learning_rate": 8.584136397331357e-05,
      "loss": 0.9046,
      "step": 3860
    },
    {
      "epoch": 0.572,
      "grad_norm": 1.2275861501693726,
      "learning_rate": 8.58117123795404e-05,
      "loss": 0.993,
      "step": 3861
    },
    {
      "epoch": 0.5721481481481482,
      "grad_norm": 3.029972791671753,
      "learning_rate": 8.578206078576723e-05,
      "loss": 1.0086,
      "step": 3862
    },
    {
      "epoch": 0.5722962962962963,
      "grad_norm": 3.029181957244873,
      "learning_rate": 8.575240919199408e-05,
      "loss": 1.0326,
      "step": 3863
    },
    {
      "epoch": 0.5724444444444444,
      "grad_norm": 1.5321046113967896,
      "learning_rate": 8.572275759822091e-05,
      "loss": 0.9353,
      "step": 3864
    },
    {
      "epoch": 0.5725925925925925,
      "grad_norm": 4.442076206207275,
      "learning_rate": 8.569310600444774e-05,
      "loss": 1.0312,
      "step": 3865
    },
    {
      "epoch": 0.5727407407407408,
      "grad_norm": 1.3870460987091064,
      "learning_rate": 8.566345441067458e-05,
      "loss": 1.0636,
      "step": 3866
    },
    {
      "epoch": 0.5728888888888889,
      "grad_norm": 1.3915530443191528,
      "learning_rate": 8.563380281690142e-05,
      "loss": 0.9028,
      "step": 3867
    },
    {
      "epoch": 0.573037037037037,
      "grad_norm": 5.211930274963379,
      "learning_rate": 8.560415122312825e-05,
      "loss": 1.0612,
      "step": 3868
    },
    {
      "epoch": 0.5731851851851851,
      "grad_norm": 1.4853953123092651,
      "learning_rate": 8.557449962935509e-05,
      "loss": 0.9738,
      "step": 3869
    },
    {
      "epoch": 0.5733333333333334,
      "grad_norm": 1.6683987379074097,
      "learning_rate": 8.554484803558191e-05,
      "loss": 1.0143,
      "step": 3870
    },
    {
      "epoch": 0.5734814814814815,
      "grad_norm": 1.8787882328033447,
      "learning_rate": 8.551519644180875e-05,
      "loss": 1.3646,
      "step": 3871
    },
    {
      "epoch": 0.5736296296296296,
      "grad_norm": 2.6080405712127686,
      "learning_rate": 8.548554484803558e-05,
      "loss": 1.2406,
      "step": 3872
    },
    {
      "epoch": 0.5737777777777778,
      "grad_norm": 1.8387919664382935,
      "learning_rate": 8.545589325426241e-05,
      "loss": 1.009,
      "step": 3873
    },
    {
      "epoch": 0.573925925925926,
      "grad_norm": 1.450384497642517,
      "learning_rate": 8.542624166048926e-05,
      "loss": 1.0718,
      "step": 3874
    },
    {
      "epoch": 0.5740740740740741,
      "grad_norm": 1.6187493801116943,
      "learning_rate": 8.539659006671609e-05,
      "loss": 1.2854,
      "step": 3875
    },
    {
      "epoch": 0.5742222222222222,
      "grad_norm": 1.410619854927063,
      "learning_rate": 8.536693847294292e-05,
      "loss": 1.0601,
      "step": 3876
    },
    {
      "epoch": 0.5743703703703704,
      "grad_norm": 1.3554319143295288,
      "learning_rate": 8.533728687916977e-05,
      "loss": 0.8737,
      "step": 3877
    },
    {
      "epoch": 0.5745185185185185,
      "grad_norm": 1.593678593635559,
      "learning_rate": 8.53076352853966e-05,
      "loss": 1.1269,
      "step": 3878
    },
    {
      "epoch": 0.5746666666666667,
      "grad_norm": 2.516368865966797,
      "learning_rate": 8.527798369162343e-05,
      "loss": 0.9533,
      "step": 3879
    },
    {
      "epoch": 0.5748148148148148,
      "grad_norm": 1.5936317443847656,
      "learning_rate": 8.524833209785026e-05,
      "loss": 1.1332,
      "step": 3880
    },
    {
      "epoch": 0.574962962962963,
      "grad_norm": 2.9010207653045654,
      "learning_rate": 8.52186805040771e-05,
      "loss": 0.9723,
      "step": 3881
    },
    {
      "epoch": 0.5751111111111111,
      "grad_norm": 1.9528433084487915,
      "learning_rate": 8.518902891030393e-05,
      "loss": 1.0244,
      "step": 3882
    },
    {
      "epoch": 0.5752592592592592,
      "grad_norm": 3.864577293395996,
      "learning_rate": 8.515937731653076e-05,
      "loss": 1.1317,
      "step": 3883
    },
    {
      "epoch": 0.5754074074074074,
      "grad_norm": 1.400748610496521,
      "learning_rate": 8.512972572275761e-05,
      "loss": 1.1082,
      "step": 3884
    },
    {
      "epoch": 0.5755555555555556,
      "grad_norm": 1.3284187316894531,
      "learning_rate": 8.510007412898444e-05,
      "loss": 1.1752,
      "step": 3885
    },
    {
      "epoch": 0.5757037037037037,
      "grad_norm": 3.269716739654541,
      "learning_rate": 8.507042253521127e-05,
      "loss": 1.0402,
      "step": 3886
    },
    {
      "epoch": 0.5758518518518518,
      "grad_norm": 1.8288670778274536,
      "learning_rate": 8.50407709414381e-05,
      "loss": 1.2304,
      "step": 3887
    },
    {
      "epoch": 0.576,
      "grad_norm": 1.7066706418991089,
      "learning_rate": 8.501111934766493e-05,
      "loss": 0.8752,
      "step": 3888
    },
    {
      "epoch": 0.5761481481481482,
      "grad_norm": 1.2042165994644165,
      "learning_rate": 8.498146775389178e-05,
      "loss": 0.9482,
      "step": 3889
    },
    {
      "epoch": 0.5762962962962963,
      "grad_norm": 1.4405831098556519,
      "learning_rate": 8.495181616011861e-05,
      "loss": 0.9983,
      "step": 3890
    },
    {
      "epoch": 0.5764444444444444,
      "grad_norm": 1.7180677652359009,
      "learning_rate": 8.492216456634544e-05,
      "loss": 1.1633,
      "step": 3891
    },
    {
      "epoch": 0.5765925925925925,
      "grad_norm": 1.9741321802139282,
      "learning_rate": 8.489251297257228e-05,
      "loss": 1.0343,
      "step": 3892
    },
    {
      "epoch": 0.5767407407407408,
      "grad_norm": 1.2920258045196533,
      "learning_rate": 8.486286137879911e-05,
      "loss": 0.7823,
      "step": 3893
    },
    {
      "epoch": 0.5768888888888889,
      "grad_norm": 2.000192642211914,
      "learning_rate": 8.483320978502595e-05,
      "loss": 1.1409,
      "step": 3894
    },
    {
      "epoch": 0.577037037037037,
      "grad_norm": 2.24792742729187,
      "learning_rate": 8.480355819125279e-05,
      "loss": 1.3058,
      "step": 3895
    },
    {
      "epoch": 0.5771851851851851,
      "grad_norm": 1.621907114982605,
      "learning_rate": 8.477390659747961e-05,
      "loss": 1.0469,
      "step": 3896
    },
    {
      "epoch": 0.5773333333333334,
      "grad_norm": 2.199373722076416,
      "learning_rate": 8.474425500370645e-05,
      "loss": 1.047,
      "step": 3897
    },
    {
      "epoch": 0.5774814814814815,
      "grad_norm": 1.3308420181274414,
      "learning_rate": 8.47146034099333e-05,
      "loss": 0.9544,
      "step": 3898
    },
    {
      "epoch": 0.5776296296296296,
      "grad_norm": 1.9536174535751343,
      "learning_rate": 8.468495181616011e-05,
      "loss": 0.9893,
      "step": 3899
    },
    {
      "epoch": 0.5777777777777777,
      "grad_norm": 1.3404901027679443,
      "learning_rate": 8.465530022238696e-05,
      "loss": 1.3704,
      "step": 3900
    },
    {
      "epoch": 0.577925925925926,
      "grad_norm": 1.9785478115081787,
      "learning_rate": 8.46256486286138e-05,
      "loss": 0.6877,
      "step": 3901
    },
    {
      "epoch": 0.5780740740740741,
      "grad_norm": 1.2857595682144165,
      "learning_rate": 8.459599703484062e-05,
      "loss": 0.8752,
      "step": 3902
    },
    {
      "epoch": 0.5782222222222222,
      "grad_norm": 2.6999502182006836,
      "learning_rate": 8.456634544106746e-05,
      "loss": 1.2352,
      "step": 3903
    },
    {
      "epoch": 0.5783703703703704,
      "grad_norm": 1.5243096351623535,
      "learning_rate": 8.45366938472943e-05,
      "loss": 0.8517,
      "step": 3904
    },
    {
      "epoch": 0.5785185185185185,
      "grad_norm": 26.335729598999023,
      "learning_rate": 8.450704225352113e-05,
      "loss": 0.9115,
      "step": 3905
    },
    {
      "epoch": 0.5786666666666667,
      "grad_norm": 3.49088454246521,
      "learning_rate": 8.447739065974797e-05,
      "loss": 1.0258,
      "step": 3906
    },
    {
      "epoch": 0.5788148148148148,
      "grad_norm": 1.7637948989868164,
      "learning_rate": 8.44477390659748e-05,
      "loss": 0.9831,
      "step": 3907
    },
    {
      "epoch": 0.578962962962963,
      "grad_norm": 1.4845067262649536,
      "learning_rate": 8.441808747220163e-05,
      "loss": 0.9553,
      "step": 3908
    },
    {
      "epoch": 0.5791111111111111,
      "grad_norm": 1.3211807012557983,
      "learning_rate": 8.438843587842848e-05,
      "loss": 0.9891,
      "step": 3909
    },
    {
      "epoch": 0.5792592592592593,
      "grad_norm": 1.7925214767456055,
      "learning_rate": 8.435878428465531e-05,
      "loss": 1.1427,
      "step": 3910
    },
    {
      "epoch": 0.5794074074074074,
      "grad_norm": 1.4540693759918213,
      "learning_rate": 8.432913269088214e-05,
      "loss": 1.1146,
      "step": 3911
    },
    {
      "epoch": 0.5795555555555556,
      "grad_norm": 1.6134746074676514,
      "learning_rate": 8.429948109710898e-05,
      "loss": 1.0095,
      "step": 3912
    },
    {
      "epoch": 0.5797037037037037,
      "grad_norm": 1.8240565061569214,
      "learning_rate": 8.42698295033358e-05,
      "loss": 1.0438,
      "step": 3913
    },
    {
      "epoch": 0.5798518518518518,
      "grad_norm": 2.772273063659668,
      "learning_rate": 8.424017790956264e-05,
      "loss": 1.051,
      "step": 3914
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9138096570968628,
      "learning_rate": 8.421052631578948e-05,
      "loss": 1.0445,
      "step": 3915
    },
    {
      "epoch": 0.5801481481481482,
      "grad_norm": 1.457571029663086,
      "learning_rate": 8.41808747220163e-05,
      "loss": 1.1765,
      "step": 3916
    },
    {
      "epoch": 0.5802962962962963,
      "grad_norm": 1.4449666738510132,
      "learning_rate": 8.415122312824315e-05,
      "loss": 0.9397,
      "step": 3917
    },
    {
      "epoch": 0.5804444444444444,
      "grad_norm": 2.546419143676758,
      "learning_rate": 8.412157153446998e-05,
      "loss": 1.2447,
      "step": 3918
    },
    {
      "epoch": 0.5805925925925925,
      "grad_norm": 2.1319639682769775,
      "learning_rate": 8.409191994069681e-05,
      "loss": 1.0725,
      "step": 3919
    },
    {
      "epoch": 0.5807407407407408,
      "grad_norm": 1.8370797634124756,
      "learning_rate": 8.406226834692366e-05,
      "loss": 1.1535,
      "step": 3920
    },
    {
      "epoch": 0.5808888888888889,
      "grad_norm": 1.722662091255188,
      "learning_rate": 8.403261675315049e-05,
      "loss": 1.1676,
      "step": 3921
    },
    {
      "epoch": 0.581037037037037,
      "grad_norm": 2.8595473766326904,
      "learning_rate": 8.400296515937732e-05,
      "loss": 1.167,
      "step": 3922
    },
    {
      "epoch": 0.5811851851851851,
      "grad_norm": 5.709082126617432,
      "learning_rate": 8.397331356560415e-05,
      "loss": 1.0609,
      "step": 3923
    },
    {
      "epoch": 0.5813333333333334,
      "grad_norm": 2.37556791305542,
      "learning_rate": 8.3943661971831e-05,
      "loss": 1.0483,
      "step": 3924
    },
    {
      "epoch": 0.5814814814814815,
      "grad_norm": 2.6315367221832275,
      "learning_rate": 8.391401037805783e-05,
      "loss": 0.9119,
      "step": 3925
    },
    {
      "epoch": 0.5816296296296296,
      "grad_norm": 2.009979009628296,
      "learning_rate": 8.388435878428466e-05,
      "loss": 1.2372,
      "step": 3926
    },
    {
      "epoch": 0.5817777777777777,
      "grad_norm": 2.6860642433166504,
      "learning_rate": 8.38547071905115e-05,
      "loss": 1.2327,
      "step": 3927
    },
    {
      "epoch": 0.581925925925926,
      "grad_norm": 1.9737948179244995,
      "learning_rate": 8.382505559673833e-05,
      "loss": 0.9675,
      "step": 3928
    },
    {
      "epoch": 0.5820740740740741,
      "grad_norm": 1.1909997463226318,
      "learning_rate": 8.379540400296516e-05,
      "loss": 0.9533,
      "step": 3929
    },
    {
      "epoch": 0.5822222222222222,
      "grad_norm": 1.620888590812683,
      "learning_rate": 8.3765752409192e-05,
      "loss": 1.0316,
      "step": 3930
    },
    {
      "epoch": 0.5823703703703703,
      "grad_norm": 1.3121371269226074,
      "learning_rate": 8.373610081541882e-05,
      "loss": 1.0043,
      "step": 3931
    },
    {
      "epoch": 0.5825185185185185,
      "grad_norm": 1.4426884651184082,
      "learning_rate": 8.370644922164567e-05,
      "loss": 0.9521,
      "step": 3932
    },
    {
      "epoch": 0.5826666666666667,
      "grad_norm": 1.5646693706512451,
      "learning_rate": 8.36767976278725e-05,
      "loss": 1.1229,
      "step": 3933
    },
    {
      "epoch": 0.5828148148148148,
      "grad_norm": 1.7909319400787354,
      "learning_rate": 8.364714603409933e-05,
      "loss": 1.0374,
      "step": 3934
    },
    {
      "epoch": 0.582962962962963,
      "grad_norm": 1.7373473644256592,
      "learning_rate": 8.361749444032618e-05,
      "loss": 1.1421,
      "step": 3935
    },
    {
      "epoch": 0.5831111111111111,
      "grad_norm": 2.039731740951538,
      "learning_rate": 8.3587842846553e-05,
      "loss": 0.8892,
      "step": 3936
    },
    {
      "epoch": 0.5832592592592593,
      "grad_norm": 2.041718006134033,
      "learning_rate": 8.355819125277984e-05,
      "loss": 0.9515,
      "step": 3937
    },
    {
      "epoch": 0.5834074074074074,
      "grad_norm": 1.4242746829986572,
      "learning_rate": 8.352853965900668e-05,
      "loss": 0.9712,
      "step": 3938
    },
    {
      "epoch": 0.5835555555555556,
      "grad_norm": 3.0257763862609863,
      "learning_rate": 8.34988880652335e-05,
      "loss": 0.8978,
      "step": 3939
    },
    {
      "epoch": 0.5837037037037037,
      "grad_norm": 1.5052884817123413,
      "learning_rate": 8.346923647146034e-05,
      "loss": 1.1112,
      "step": 3940
    },
    {
      "epoch": 0.5838518518518518,
      "grad_norm": 1.7193881273269653,
      "learning_rate": 8.343958487768719e-05,
      "loss": 0.9639,
      "step": 3941
    },
    {
      "epoch": 0.584,
      "grad_norm": 1.8135850429534912,
      "learning_rate": 8.3409933283914e-05,
      "loss": 1.0764,
      "step": 3942
    },
    {
      "epoch": 0.5841481481481482,
      "grad_norm": 1.574263572692871,
      "learning_rate": 8.338028169014085e-05,
      "loss": 1.1113,
      "step": 3943
    },
    {
      "epoch": 0.5842962962962963,
      "grad_norm": 1.1261448860168457,
      "learning_rate": 8.33506300963677e-05,
      "loss": 1.2041,
      "step": 3944
    },
    {
      "epoch": 0.5844444444444444,
      "grad_norm": 1.75725519657135,
      "learning_rate": 8.332097850259451e-05,
      "loss": 1.1305,
      "step": 3945
    },
    {
      "epoch": 0.5845925925925926,
      "grad_norm": 1.8887760639190674,
      "learning_rate": 8.329132690882136e-05,
      "loss": 1.041,
      "step": 3946
    },
    {
      "epoch": 0.5847407407407408,
      "grad_norm": 1.9972128868103027,
      "learning_rate": 8.326167531504819e-05,
      "loss": 1.1726,
      "step": 3947
    },
    {
      "epoch": 0.5848888888888889,
      "grad_norm": 2.7304162979125977,
      "learning_rate": 8.323202372127502e-05,
      "loss": 1.0078,
      "step": 3948
    },
    {
      "epoch": 0.585037037037037,
      "grad_norm": 1.2695363759994507,
      "learning_rate": 8.320237212750186e-05,
      "loss": 1.2488,
      "step": 3949
    },
    {
      "epoch": 0.5851851851851851,
      "grad_norm": 1.4677480459213257,
      "learning_rate": 8.317272053372869e-05,
      "loss": 0.9995,
      "step": 3950
    },
    {
      "epoch": 0.5853333333333334,
      "grad_norm": 1.2333437204360962,
      "learning_rate": 8.314306893995552e-05,
      "loss": 0.9881,
      "step": 3951
    },
    {
      "epoch": 0.5854814814814815,
      "grad_norm": 1.8984434604644775,
      "learning_rate": 8.311341734618237e-05,
      "loss": 0.8546,
      "step": 3952
    },
    {
      "epoch": 0.5856296296296296,
      "grad_norm": 1.391685962677002,
      "learning_rate": 8.30837657524092e-05,
      "loss": 1.0106,
      "step": 3953
    },
    {
      "epoch": 0.5857777777777777,
      "grad_norm": 1.926734209060669,
      "learning_rate": 8.305411415863603e-05,
      "loss": 1.1263,
      "step": 3954
    },
    {
      "epoch": 0.585925925925926,
      "grad_norm": 1.428473949432373,
      "learning_rate": 8.302446256486287e-05,
      "loss": 1.0882,
      "step": 3955
    },
    {
      "epoch": 0.5860740740740741,
      "grad_norm": 2.3048830032348633,
      "learning_rate": 8.299481097108969e-05,
      "loss": 0.6306,
      "step": 3956
    },
    {
      "epoch": 0.5862222222222222,
      "grad_norm": 1.389608383178711,
      "learning_rate": 8.296515937731654e-05,
      "loss": 0.9708,
      "step": 3957
    },
    {
      "epoch": 0.5863703703703703,
      "grad_norm": 1.9436511993408203,
      "learning_rate": 8.293550778354337e-05,
      "loss": 0.9615,
      "step": 3958
    },
    {
      "epoch": 0.5865185185185186,
      "grad_norm": 2.1171786785125732,
      "learning_rate": 8.29058561897702e-05,
      "loss": 1.2669,
      "step": 3959
    },
    {
      "epoch": 0.5866666666666667,
      "grad_norm": 1.4744733572006226,
      "learning_rate": 8.287620459599704e-05,
      "loss": 1.185,
      "step": 3960
    },
    {
      "epoch": 0.5868148148148148,
      "grad_norm": 1.9858527183532715,
      "learning_rate": 8.284655300222387e-05,
      "loss": 1.0073,
      "step": 3961
    },
    {
      "epoch": 0.5869629629629629,
      "grad_norm": 1.8568344116210938,
      "learning_rate": 8.28169014084507e-05,
      "loss": 1.0161,
      "step": 3962
    },
    {
      "epoch": 0.5871111111111111,
      "grad_norm": 1.805935025215149,
      "learning_rate": 8.278724981467755e-05,
      "loss": 1.0971,
      "step": 3963
    },
    {
      "epoch": 0.5872592592592593,
      "grad_norm": 1.8495519161224365,
      "learning_rate": 8.275759822090438e-05,
      "loss": 1.1824,
      "step": 3964
    },
    {
      "epoch": 0.5874074074074074,
      "grad_norm": 1.497774362564087,
      "learning_rate": 8.272794662713121e-05,
      "loss": 1.063,
      "step": 3965
    },
    {
      "epoch": 0.5875555555555556,
      "grad_norm": 1.5078151226043701,
      "learning_rate": 8.269829503335804e-05,
      "loss": 1.0814,
      "step": 3966
    },
    {
      "epoch": 0.5877037037037037,
      "grad_norm": 1.7053334712982178,
      "learning_rate": 8.266864343958489e-05,
      "loss": 1.0877,
      "step": 3967
    },
    {
      "epoch": 0.5878518518518518,
      "grad_norm": 1.7991783618927002,
      "learning_rate": 8.263899184581172e-05,
      "loss": 0.8419,
      "step": 3968
    },
    {
      "epoch": 0.588,
      "grad_norm": 1.92278254032135,
      "learning_rate": 8.260934025203855e-05,
      "loss": 1.2756,
      "step": 3969
    },
    {
      "epoch": 0.5881481481481482,
      "grad_norm": 1.3542948961257935,
      "learning_rate": 8.257968865826539e-05,
      "loss": 0.8694,
      "step": 3970
    },
    {
      "epoch": 0.5882962962962963,
      "grad_norm": 1.9901429414749146,
      "learning_rate": 8.255003706449222e-05,
      "loss": 0.931,
      "step": 3971
    },
    {
      "epoch": 0.5884444444444444,
      "grad_norm": 1.8600529432296753,
      "learning_rate": 8.252038547071905e-05,
      "loss": 1.0207,
      "step": 3972
    },
    {
      "epoch": 0.5885925925925926,
      "grad_norm": 1.4098716974258423,
      "learning_rate": 8.249073387694589e-05,
      "loss": 1.0243,
      "step": 3973
    },
    {
      "epoch": 0.5887407407407408,
      "grad_norm": 1.8596630096435547,
      "learning_rate": 8.246108228317272e-05,
      "loss": 1.0433,
      "step": 3974
    },
    {
      "epoch": 0.5888888888888889,
      "grad_norm": 1.580466628074646,
      "learning_rate": 8.243143068939956e-05,
      "loss": 1.1893,
      "step": 3975
    },
    {
      "epoch": 0.589037037037037,
      "grad_norm": 1.5999795198440552,
      "learning_rate": 8.240177909562639e-05,
      "loss": 1.0266,
      "step": 3976
    },
    {
      "epoch": 0.5891851851851851,
      "grad_norm": 2.5227270126342773,
      "learning_rate": 8.237212750185322e-05,
      "loss": 1.1009,
      "step": 3977
    },
    {
      "epoch": 0.5893333333333334,
      "grad_norm": 1.55117666721344,
      "learning_rate": 8.234247590808007e-05,
      "loss": 1.0404,
      "step": 3978
    },
    {
      "epoch": 0.5894814814814815,
      "grad_norm": 1.7022956609725952,
      "learning_rate": 8.23128243143069e-05,
      "loss": 1.0096,
      "step": 3979
    },
    {
      "epoch": 0.5896296296296296,
      "grad_norm": 1.8657565116882324,
      "learning_rate": 8.228317272053373e-05,
      "loss": 1.0103,
      "step": 3980
    },
    {
      "epoch": 0.5897777777777777,
      "grad_norm": 2.14446759223938,
      "learning_rate": 8.225352112676057e-05,
      "loss": 1.0509,
      "step": 3981
    },
    {
      "epoch": 0.589925925925926,
      "grad_norm": 1.3459311723709106,
      "learning_rate": 8.222386953298739e-05,
      "loss": 0.9706,
      "step": 3982
    },
    {
      "epoch": 0.5900740740740741,
      "grad_norm": 1.5558202266693115,
      "learning_rate": 8.219421793921423e-05,
      "loss": 1.0645,
      "step": 3983
    },
    {
      "epoch": 0.5902222222222222,
      "grad_norm": 1.6945942640304565,
      "learning_rate": 8.216456634544108e-05,
      "loss": 1.3269,
      "step": 3984
    },
    {
      "epoch": 0.5903703703703703,
      "grad_norm": 1.5880628824234009,
      "learning_rate": 8.21349147516679e-05,
      "loss": 0.9565,
      "step": 3985
    },
    {
      "epoch": 0.5905185185185186,
      "grad_norm": 1.7825582027435303,
      "learning_rate": 8.210526315789474e-05,
      "loss": 0.8963,
      "step": 3986
    },
    {
      "epoch": 0.5906666666666667,
      "grad_norm": 1.5919945240020752,
      "learning_rate": 8.207561156412159e-05,
      "loss": 0.9176,
      "step": 3987
    },
    {
      "epoch": 0.5908148148148148,
      "grad_norm": 1.5249149799346924,
      "learning_rate": 8.20459599703484e-05,
      "loss": 0.9807,
      "step": 3988
    },
    {
      "epoch": 0.5909629629629629,
      "grad_norm": 5.834096908569336,
      "learning_rate": 8.201630837657525e-05,
      "loss": 0.9669,
      "step": 3989
    },
    {
      "epoch": 0.5911111111111111,
      "grad_norm": 1.2772576808929443,
      "learning_rate": 8.198665678280208e-05,
      "loss": 1.0903,
      "step": 3990
    },
    {
      "epoch": 0.5912592592592593,
      "grad_norm": 1.7056981325149536,
      "learning_rate": 8.195700518902891e-05,
      "loss": 0.9943,
      "step": 3991
    },
    {
      "epoch": 0.5914074074074074,
      "grad_norm": 2.801191806793213,
      "learning_rate": 8.192735359525575e-05,
      "loss": 1.1217,
      "step": 3992
    },
    {
      "epoch": 0.5915555555555555,
      "grad_norm": 1.6783663034439087,
      "learning_rate": 8.189770200148258e-05,
      "loss": 1.0719,
      "step": 3993
    },
    {
      "epoch": 0.5917037037037037,
      "grad_norm": 2.6965856552124023,
      "learning_rate": 8.186805040770942e-05,
      "loss": 0.9692,
      "step": 3994
    },
    {
      "epoch": 0.5918518518518519,
      "grad_norm": 2.0375962257385254,
      "learning_rate": 8.183839881393626e-05,
      "loss": 1.2247,
      "step": 3995
    },
    {
      "epoch": 0.592,
      "grad_norm": 1.5867644548416138,
      "learning_rate": 8.180874722016309e-05,
      "loss": 1.1221,
      "step": 3996
    },
    {
      "epoch": 0.5921481481481482,
      "grad_norm": 1.8249834775924683,
      "learning_rate": 8.177909562638992e-05,
      "loss": 1.0135,
      "step": 3997
    },
    {
      "epoch": 0.5922962962962963,
      "grad_norm": 2.1309573650360107,
      "learning_rate": 8.174944403261677e-05,
      "loss": 1.1323,
      "step": 3998
    },
    {
      "epoch": 0.5924444444444444,
      "grad_norm": 2.425473690032959,
      "learning_rate": 8.171979243884358e-05,
      "loss": 0.971,
      "step": 3999
    },
    {
      "epoch": 0.5925925925925926,
      "grad_norm": 1.6478471755981445,
      "learning_rate": 8.169014084507043e-05,
      "loss": 0.9494,
      "step": 4000
    },
    {
      "epoch": 0.5927407407407408,
      "grad_norm": 1.8858792781829834,
      "learning_rate": 8.166048925129726e-05,
      "loss": 1.0459,
      "step": 4001
    },
    {
      "epoch": 0.5928888888888889,
      "grad_norm": 1.6564253568649292,
      "learning_rate": 8.163083765752409e-05,
      "loss": 0.9786,
      "step": 4002
    },
    {
      "epoch": 0.593037037037037,
      "grad_norm": 1.7090998888015747,
      "learning_rate": 8.160118606375093e-05,
      "loss": 0.924,
      "step": 4003
    },
    {
      "epoch": 0.5931851851851851,
      "grad_norm": 1.7010623216629028,
      "learning_rate": 8.157153446997777e-05,
      "loss": 0.966,
      "step": 4004
    },
    {
      "epoch": 0.5933333333333334,
      "grad_norm": 1.4552000761032104,
      "learning_rate": 8.15418828762046e-05,
      "loss": 0.8709,
      "step": 4005
    },
    {
      "epoch": 0.5934814814814815,
      "grad_norm": 2.0067336559295654,
      "learning_rate": 8.151223128243144e-05,
      "loss": 1.0272,
      "step": 4006
    },
    {
      "epoch": 0.5936296296296296,
      "grad_norm": 1.1080560684204102,
      "learning_rate": 8.148257968865827e-05,
      "loss": 1.0738,
      "step": 4007
    },
    {
      "epoch": 0.5937777777777777,
      "grad_norm": 1.604054570198059,
      "learning_rate": 8.14529280948851e-05,
      "loss": 1.0267,
      "step": 4008
    },
    {
      "epoch": 0.593925925925926,
      "grad_norm": 3.8693463802337646,
      "learning_rate": 8.142327650111193e-05,
      "loss": 1.1424,
      "step": 4009
    },
    {
      "epoch": 0.5940740740740741,
      "grad_norm": 1.270259141921997,
      "learning_rate": 8.139362490733878e-05,
      "loss": 1.0009,
      "step": 4010
    },
    {
      "epoch": 0.5942222222222222,
      "grad_norm": 1.2867693901062012,
      "learning_rate": 8.136397331356561e-05,
      "loss": 0.7596,
      "step": 4011
    },
    {
      "epoch": 0.5943703703703703,
      "grad_norm": 2.2901933193206787,
      "learning_rate": 8.133432171979244e-05,
      "loss": 1.0978,
      "step": 4012
    },
    {
      "epoch": 0.5945185185185186,
      "grad_norm": 1.6102063655853271,
      "learning_rate": 8.130467012601928e-05,
      "loss": 1.1288,
      "step": 4013
    },
    {
      "epoch": 0.5946666666666667,
      "grad_norm": 1.5397627353668213,
      "learning_rate": 8.127501853224611e-05,
      "loss": 1.0938,
      "step": 4014
    },
    {
      "epoch": 0.5948148148148148,
      "grad_norm": 1.4628108739852905,
      "learning_rate": 8.124536693847295e-05,
      "loss": 0.9546,
      "step": 4015
    },
    {
      "epoch": 0.5949629629629629,
      "grad_norm": 1.4136143922805786,
      "learning_rate": 8.121571534469978e-05,
      "loss": 0.9113,
      "step": 4016
    },
    {
      "epoch": 0.5951111111111111,
      "grad_norm": 1.436536431312561,
      "learning_rate": 8.118606375092661e-05,
      "loss": 1.1298,
      "step": 4017
    },
    {
      "epoch": 0.5952592592592593,
      "grad_norm": 1.4656097888946533,
      "learning_rate": 8.115641215715345e-05,
      "loss": 1.1187,
      "step": 4018
    },
    {
      "epoch": 0.5954074074074074,
      "grad_norm": 1.8365551233291626,
      "learning_rate": 8.112676056338028e-05,
      "loss": 1.0314,
      "step": 4019
    },
    {
      "epoch": 0.5955555555555555,
      "grad_norm": 1.4153543710708618,
      "learning_rate": 8.109710896960711e-05,
      "loss": 1.1519,
      "step": 4020
    },
    {
      "epoch": 0.5957037037037037,
      "grad_norm": 1.3734936714172363,
      "learning_rate": 8.106745737583396e-05,
      "loss": 1.3338,
      "step": 4021
    },
    {
      "epoch": 0.5958518518518519,
      "grad_norm": 2.5461981296539307,
      "learning_rate": 8.103780578206079e-05,
      "loss": 1.2022,
      "step": 4022
    },
    {
      "epoch": 0.596,
      "grad_norm": 2.483335256576538,
      "learning_rate": 8.100815418828762e-05,
      "loss": 1.0438,
      "step": 4023
    },
    {
      "epoch": 0.5961481481481481,
      "grad_norm": 1.7527753114700317,
      "learning_rate": 8.097850259451446e-05,
      "loss": 1.0306,
      "step": 4024
    },
    {
      "epoch": 0.5962962962962963,
      "grad_norm": 1.9566283226013184,
      "learning_rate": 8.09488510007413e-05,
      "loss": 0.8303,
      "step": 4025
    },
    {
      "epoch": 0.5964444444444444,
      "grad_norm": 2.9948341846466064,
      "learning_rate": 8.091919940696813e-05,
      "loss": 0.9456,
      "step": 4026
    },
    {
      "epoch": 0.5965925925925926,
      "grad_norm": 1.8734164237976074,
      "learning_rate": 8.088954781319497e-05,
      "loss": 1.0004,
      "step": 4027
    },
    {
      "epoch": 0.5967407407407408,
      "grad_norm": 2.0065784454345703,
      "learning_rate": 8.085989621942179e-05,
      "loss": 0.9237,
      "step": 4028
    },
    {
      "epoch": 0.5968888888888889,
      "grad_norm": 1.6082143783569336,
      "learning_rate": 8.083024462564863e-05,
      "loss": 0.9663,
      "step": 4029
    },
    {
      "epoch": 0.597037037037037,
      "grad_norm": 1.461788296699524,
      "learning_rate": 8.080059303187548e-05,
      "loss": 1.0964,
      "step": 4030
    },
    {
      "epoch": 0.5971851851851852,
      "grad_norm": 1.3981989622116089,
      "learning_rate": 8.07709414381023e-05,
      "loss": 0.9772,
      "step": 4031
    },
    {
      "epoch": 0.5973333333333334,
      "grad_norm": 2.0816566944122314,
      "learning_rate": 8.074128984432914e-05,
      "loss": 0.8227,
      "step": 4032
    },
    {
      "epoch": 0.5974814814814815,
      "grad_norm": 2.549114465713501,
      "learning_rate": 8.071163825055597e-05,
      "loss": 1.1723,
      "step": 4033
    },
    {
      "epoch": 0.5976296296296296,
      "grad_norm": 2.281337022781372,
      "learning_rate": 8.06819866567828e-05,
      "loss": 1.1551,
      "step": 4034
    },
    {
      "epoch": 0.5977777777777777,
      "grad_norm": 1.365309238433838,
      "learning_rate": 8.065233506300965e-05,
      "loss": 1.0515,
      "step": 4035
    },
    {
      "epoch": 0.597925925925926,
      "grad_norm": 1.4237347841262817,
      "learning_rate": 8.062268346923648e-05,
      "loss": 1.0027,
      "step": 4036
    },
    {
      "epoch": 0.5980740740740741,
      "grad_norm": 1.2399948835372925,
      "learning_rate": 8.059303187546331e-05,
      "loss": 0.9247,
      "step": 4037
    },
    {
      "epoch": 0.5982222222222222,
      "grad_norm": 1.2645602226257324,
      "learning_rate": 8.056338028169015e-05,
      "loss": 0.7925,
      "step": 4038
    },
    {
      "epoch": 0.5983703703703703,
      "grad_norm": 1.2598143815994263,
      "learning_rate": 8.053372868791698e-05,
      "loss": 0.8836,
      "step": 4039
    },
    {
      "epoch": 0.5985185185185186,
      "grad_norm": 1.4593044519424438,
      "learning_rate": 8.050407709414381e-05,
      "loss": 1.1674,
      "step": 4040
    },
    {
      "epoch": 0.5986666666666667,
      "grad_norm": 2.0067574977874756,
      "learning_rate": 8.047442550037066e-05,
      "loss": 1.3531,
      "step": 4041
    },
    {
      "epoch": 0.5988148148148148,
      "grad_norm": 1.540021538734436,
      "learning_rate": 8.044477390659748e-05,
      "loss": 0.8199,
      "step": 4042
    },
    {
      "epoch": 0.5989629629629629,
      "grad_norm": 1.3369797468185425,
      "learning_rate": 8.041512231282432e-05,
      "loss": 0.9617,
      "step": 4043
    },
    {
      "epoch": 0.5991111111111111,
      "grad_norm": 2.357513666152954,
      "learning_rate": 8.038547071905115e-05,
      "loss": 1.0855,
      "step": 4044
    },
    {
      "epoch": 0.5992592592592593,
      "grad_norm": 3.003789186477661,
      "learning_rate": 8.035581912527798e-05,
      "loss": 1.1802,
      "step": 4045
    },
    {
      "epoch": 0.5994074074074074,
      "grad_norm": 1.4910309314727783,
      "learning_rate": 8.032616753150483e-05,
      "loss": 1.0519,
      "step": 4046
    },
    {
      "epoch": 0.5995555555555555,
      "grad_norm": 1.4305469989776611,
      "learning_rate": 8.029651593773166e-05,
      "loss": 1.0129,
      "step": 4047
    },
    {
      "epoch": 0.5997037037037037,
      "grad_norm": 1.8170634508132935,
      "learning_rate": 8.026686434395849e-05,
      "loss": 0.8642,
      "step": 4048
    },
    {
      "epoch": 0.5998518518518519,
      "grad_norm": 1.469193935394287,
      "learning_rate": 8.023721275018533e-05,
      "loss": 1.0446,
      "step": 4049
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5894759893417358,
      "learning_rate": 8.020756115641216e-05,
      "loss": 0.9202,
      "step": 4050
    },
    {
      "epoch": 0.6001481481481481,
      "grad_norm": 1.668437123298645,
      "learning_rate": 8.0177909562639e-05,
      "loss": 0.8982,
      "step": 4051
    },
    {
      "epoch": 0.6002962962962963,
      "grad_norm": 1.35999596118927,
      "learning_rate": 8.014825796886582e-05,
      "loss": 0.9149,
      "step": 4052
    },
    {
      "epoch": 0.6004444444444444,
      "grad_norm": 1.5806087255477905,
      "learning_rate": 8.011860637509267e-05,
      "loss": 1.1388,
      "step": 4053
    },
    {
      "epoch": 0.6005925925925926,
      "grad_norm": 1.7249394655227661,
      "learning_rate": 8.00889547813195e-05,
      "loss": 1.1687,
      "step": 4054
    },
    {
      "epoch": 0.6007407407407407,
      "grad_norm": 1.5930378437042236,
      "learning_rate": 8.005930318754633e-05,
      "loss": 1.2331,
      "step": 4055
    },
    {
      "epoch": 0.6008888888888889,
      "grad_norm": 1.7165197134017944,
      "learning_rate": 8.002965159377318e-05,
      "loss": 1.1119,
      "step": 4056
    },
    {
      "epoch": 0.601037037037037,
      "grad_norm": 2.0387492179870605,
      "learning_rate": 8e-05,
      "loss": 0.9951,
      "step": 4057
    },
    {
      "epoch": 0.6011851851851852,
      "grad_norm": 2.3698105812072754,
      "learning_rate": 7.997034840622684e-05,
      "loss": 1.0545,
      "step": 4058
    },
    {
      "epoch": 0.6013333333333334,
      "grad_norm": 1.5965845584869385,
      "learning_rate": 7.994069681245367e-05,
      "loss": 0.9012,
      "step": 4059
    },
    {
      "epoch": 0.6014814814814815,
      "grad_norm": 2.0365452766418457,
      "learning_rate": 7.99110452186805e-05,
      "loss": 1.1274,
      "step": 4060
    },
    {
      "epoch": 0.6016296296296296,
      "grad_norm": 1.958509087562561,
      "learning_rate": 7.988139362490734e-05,
      "loss": 1.1167,
      "step": 4061
    },
    {
      "epoch": 0.6017777777777777,
      "grad_norm": 2.1451406478881836,
      "learning_rate": 7.985174203113417e-05,
      "loss": 1.3345,
      "step": 4062
    },
    {
      "epoch": 0.601925925925926,
      "grad_norm": 1.3491348028182983,
      "learning_rate": 7.9822090437361e-05,
      "loss": 1.0724,
      "step": 4063
    },
    {
      "epoch": 0.6020740740740741,
      "grad_norm": 2.01298451423645,
      "learning_rate": 7.979243884358785e-05,
      "loss": 0.8657,
      "step": 4064
    },
    {
      "epoch": 0.6022222222222222,
      "grad_norm": 1.8076258897781372,
      "learning_rate": 7.976278724981468e-05,
      "loss": 0.8764,
      "step": 4065
    },
    {
      "epoch": 0.6023703703703703,
      "grad_norm": 2.361900806427002,
      "learning_rate": 7.973313565604151e-05,
      "loss": 1.2356,
      "step": 4066
    },
    {
      "epoch": 0.6025185185185186,
      "grad_norm": 1.8414537906646729,
      "learning_rate": 7.970348406226836e-05,
      "loss": 1.0375,
      "step": 4067
    },
    {
      "epoch": 0.6026666666666667,
      "grad_norm": 2.9812514781951904,
      "learning_rate": 7.967383246849519e-05,
      "loss": 1.0114,
      "step": 4068
    },
    {
      "epoch": 0.6028148148148148,
      "grad_norm": 2.112748861312866,
      "learning_rate": 7.964418087472202e-05,
      "loss": 1.0682,
      "step": 4069
    },
    {
      "epoch": 0.6029629629629629,
      "grad_norm": 2.281156063079834,
      "learning_rate": 7.961452928094886e-05,
      "loss": 1.0197,
      "step": 4070
    },
    {
      "epoch": 0.6031111111111112,
      "grad_norm": 1.7563687562942505,
      "learning_rate": 7.958487768717568e-05,
      "loss": 1.1047,
      "step": 4071
    },
    {
      "epoch": 0.6032592592592593,
      "grad_norm": 1.8988922834396362,
      "learning_rate": 7.955522609340252e-05,
      "loss": 1.1074,
      "step": 4072
    },
    {
      "epoch": 0.6034074074074074,
      "grad_norm": 1.6233725547790527,
      "learning_rate": 7.952557449962937e-05,
      "loss": 1.1455,
      "step": 4073
    },
    {
      "epoch": 0.6035555555555555,
      "grad_norm": 5.494024276733398,
      "learning_rate": 7.949592290585619e-05,
      "loss": 1.0763,
      "step": 4074
    },
    {
      "epoch": 0.6037037037037037,
      "grad_norm": 2.2812435626983643,
      "learning_rate": 7.946627131208303e-05,
      "loss": 1.0217,
      "step": 4075
    },
    {
      "epoch": 0.6038518518518519,
      "grad_norm": 1.5280718803405762,
      "learning_rate": 7.943661971830986e-05,
      "loss": 1.1622,
      "step": 4076
    },
    {
      "epoch": 0.604,
      "grad_norm": 3.2437915802001953,
      "learning_rate": 7.940696812453669e-05,
      "loss": 0.8585,
      "step": 4077
    },
    {
      "epoch": 0.6041481481481481,
      "grad_norm": 1.639431357383728,
      "learning_rate": 7.937731653076354e-05,
      "loss": 1.2248,
      "step": 4078
    },
    {
      "epoch": 0.6042962962962963,
      "grad_norm": 2.29898738861084,
      "learning_rate": 7.934766493699037e-05,
      "loss": 1.0092,
      "step": 4079
    },
    {
      "epoch": 0.6044444444444445,
      "grad_norm": 1.4584858417510986,
      "learning_rate": 7.93180133432172e-05,
      "loss": 0.9846,
      "step": 4080
    },
    {
      "epoch": 0.6045925925925926,
      "grad_norm": 1.757988691329956,
      "learning_rate": 7.928836174944404e-05,
      "loss": 1.1713,
      "step": 4081
    },
    {
      "epoch": 0.6047407407407407,
      "grad_norm": 1.628948450088501,
      "learning_rate": 7.925871015567087e-05,
      "loss": 1.0576,
      "step": 4082
    },
    {
      "epoch": 0.6048888888888889,
      "grad_norm": 1.3002852201461792,
      "learning_rate": 7.92290585618977e-05,
      "loss": 1.0973,
      "step": 4083
    },
    {
      "epoch": 0.605037037037037,
      "grad_norm": 1.8399114608764648,
      "learning_rate": 7.919940696812455e-05,
      "loss": 0.8469,
      "step": 4084
    },
    {
      "epoch": 0.6051851851851852,
      "grad_norm": 2.028707504272461,
      "learning_rate": 7.916975537435137e-05,
      "loss": 1.1424,
      "step": 4085
    },
    {
      "epoch": 0.6053333333333333,
      "grad_norm": 1.6397368907928467,
      "learning_rate": 7.914010378057821e-05,
      "loss": 1.3036,
      "step": 4086
    },
    {
      "epoch": 0.6054814814814815,
      "grad_norm": 2.5899932384490967,
      "learning_rate": 7.911045218680504e-05,
      "loss": 0.8892,
      "step": 4087
    },
    {
      "epoch": 0.6056296296296296,
      "grad_norm": 4.704237461090088,
      "learning_rate": 7.908080059303187e-05,
      "loss": 1.246,
      "step": 4088
    },
    {
      "epoch": 0.6057777777777777,
      "grad_norm": 1.4413715600967407,
      "learning_rate": 7.905114899925872e-05,
      "loss": 0.9217,
      "step": 4089
    },
    {
      "epoch": 0.605925925925926,
      "grad_norm": 2.4111835956573486,
      "learning_rate": 7.902149740548555e-05,
      "loss": 1.1111,
      "step": 4090
    },
    {
      "epoch": 0.6060740740740741,
      "grad_norm": 1.3525553941726685,
      "learning_rate": 7.899184581171238e-05,
      "loss": 0.866,
      "step": 4091
    },
    {
      "epoch": 0.6062222222222222,
      "grad_norm": 1.4736638069152832,
      "learning_rate": 7.896219421793922e-05,
      "loss": 1.0034,
      "step": 4092
    },
    {
      "epoch": 0.6063703703703703,
      "grad_norm": 3.423008680343628,
      "learning_rate": 7.893254262416605e-05,
      "loss": 1.1446,
      "step": 4093
    },
    {
      "epoch": 0.6065185185185186,
      "grad_norm": 2.5786850452423096,
      "learning_rate": 7.890289103039289e-05,
      "loss": 1.1736,
      "step": 4094
    },
    {
      "epoch": 0.6066666666666667,
      "grad_norm": 1.7646931409835815,
      "learning_rate": 7.887323943661972e-05,
      "loss": 1.1437,
      "step": 4095
    },
    {
      "epoch": 0.6068148148148148,
      "grad_norm": 1.339115858078003,
      "learning_rate": 7.884358784284656e-05,
      "loss": 1.0239,
      "step": 4096
    },
    {
      "epoch": 0.6069629629629629,
      "grad_norm": 1.8531190156936646,
      "learning_rate": 7.881393624907339e-05,
      "loss": 1.0883,
      "step": 4097
    },
    {
      "epoch": 0.6071111111111112,
      "grad_norm": 1.5783220529556274,
      "learning_rate": 7.878428465530022e-05,
      "loss": 0.9258,
      "step": 4098
    },
    {
      "epoch": 0.6072592592592593,
      "grad_norm": 1.628716230392456,
      "learning_rate": 7.875463306152707e-05,
      "loss": 1.0058,
      "step": 4099
    },
    {
      "epoch": 0.6074074074074074,
      "grad_norm": 1.6141797304153442,
      "learning_rate": 7.87249814677539e-05,
      "loss": 0.9564,
      "step": 4100
    },
    {
      "epoch": 0.6075555555555555,
      "grad_norm": 2.645317554473877,
      "learning_rate": 7.869532987398073e-05,
      "loss": 0.9276,
      "step": 4101
    },
    {
      "epoch": 0.6077037037037037,
      "grad_norm": 1.5307976007461548,
      "learning_rate": 7.866567828020756e-05,
      "loss": 0.9949,
      "step": 4102
    },
    {
      "epoch": 0.6078518518518519,
      "grad_norm": 1.6276675462722778,
      "learning_rate": 7.863602668643439e-05,
      "loss": 1.0947,
      "step": 4103
    },
    {
      "epoch": 0.608,
      "grad_norm": 2.62444806098938,
      "learning_rate": 7.860637509266124e-05,
      "loss": 1.1976,
      "step": 4104
    },
    {
      "epoch": 0.6081481481481481,
      "grad_norm": 1.280867099761963,
      "learning_rate": 7.857672349888807e-05,
      "loss": 0.9916,
      "step": 4105
    },
    {
      "epoch": 0.6082962962962963,
      "grad_norm": 1.7362924814224243,
      "learning_rate": 7.85470719051149e-05,
      "loss": 1.379,
      "step": 4106
    },
    {
      "epoch": 0.6084444444444445,
      "grad_norm": 1.80226731300354,
      "learning_rate": 7.851742031134174e-05,
      "loss": 1.1435,
      "step": 4107
    },
    {
      "epoch": 0.6085925925925926,
      "grad_norm": 9.848043441772461,
      "learning_rate": 7.848776871756857e-05,
      "loss": 1.0301,
      "step": 4108
    },
    {
      "epoch": 0.6087407407407407,
      "grad_norm": 1.266819953918457,
      "learning_rate": 7.84581171237954e-05,
      "loss": 0.917,
      "step": 4109
    },
    {
      "epoch": 0.6088888888888889,
      "grad_norm": 1.533785104751587,
      "learning_rate": 7.842846553002225e-05,
      "loss": 0.9731,
      "step": 4110
    },
    {
      "epoch": 0.609037037037037,
      "grad_norm": 3.039918899536133,
      "learning_rate": 7.839881393624908e-05,
      "loss": 1.0057,
      "step": 4111
    },
    {
      "epoch": 0.6091851851851852,
      "grad_norm": 1.6496689319610596,
      "learning_rate": 7.836916234247591e-05,
      "loss": 0.9676,
      "step": 4112
    },
    {
      "epoch": 0.6093333333333333,
      "grad_norm": 2.629190444946289,
      "learning_rate": 7.833951074870275e-05,
      "loss": 0.9329,
      "step": 4113
    },
    {
      "epoch": 0.6094814814814815,
      "grad_norm": 1.5872153043746948,
      "learning_rate": 7.830985915492957e-05,
      "loss": 1.3053,
      "step": 4114
    },
    {
      "epoch": 0.6096296296296296,
      "grad_norm": 2.0701448917388916,
      "learning_rate": 7.828020756115642e-05,
      "loss": 1.0366,
      "step": 4115
    },
    {
      "epoch": 0.6097777777777778,
      "grad_norm": 2.4702115058898926,
      "learning_rate": 7.825055596738326e-05,
      "loss": 1.2577,
      "step": 4116
    },
    {
      "epoch": 0.6099259259259259,
      "grad_norm": 1.9070110321044922,
      "learning_rate": 7.822090437361008e-05,
      "loss": 0.8908,
      "step": 4117
    },
    {
      "epoch": 0.6100740740740741,
      "grad_norm": 1.2122715711593628,
      "learning_rate": 7.819125277983692e-05,
      "loss": 0.9514,
      "step": 4118
    },
    {
      "epoch": 0.6102222222222222,
      "grad_norm": 2.643510580062866,
      "learning_rate": 7.816160118606375e-05,
      "loss": 1.1475,
      "step": 4119
    },
    {
      "epoch": 0.6103703703703703,
      "grad_norm": 1.505334734916687,
      "learning_rate": 7.813194959229058e-05,
      "loss": 1.0381,
      "step": 4120
    },
    {
      "epoch": 0.6105185185185186,
      "grad_norm": 1.9772828817367554,
      "learning_rate": 7.810229799851743e-05,
      "loss": 1.0905,
      "step": 4121
    },
    {
      "epoch": 0.6106666666666667,
      "grad_norm": 1.8512394428253174,
      "learning_rate": 7.807264640474426e-05,
      "loss": 0.8946,
      "step": 4122
    },
    {
      "epoch": 0.6108148148148148,
      "grad_norm": 1.8181568384170532,
      "learning_rate": 7.804299481097109e-05,
      "loss": 1.1931,
      "step": 4123
    },
    {
      "epoch": 0.6109629629629629,
      "grad_norm": 2.4302375316619873,
      "learning_rate": 7.801334321719793e-05,
      "loss": 1.0536,
      "step": 4124
    },
    {
      "epoch": 0.6111111111111112,
      "grad_norm": 2.5360796451568604,
      "learning_rate": 7.798369162342477e-05,
      "loss": 1.1119,
      "step": 4125
    },
    {
      "epoch": 0.6112592592592593,
      "grad_norm": 2.764610767364502,
      "learning_rate": 7.79540400296516e-05,
      "loss": 1.0809,
      "step": 4126
    },
    {
      "epoch": 0.6114074074074074,
      "grad_norm": 8.232843399047852,
      "learning_rate": 7.792438843587844e-05,
      "loss": 1.0302,
      "step": 4127
    },
    {
      "epoch": 0.6115555555555555,
      "grad_norm": 2.0853981971740723,
      "learning_rate": 7.789473684210526e-05,
      "loss": 1.1032,
      "step": 4128
    },
    {
      "epoch": 0.6117037037037037,
      "grad_norm": 4.052962779998779,
      "learning_rate": 7.78650852483321e-05,
      "loss": 0.9787,
      "step": 4129
    },
    {
      "epoch": 0.6118518518518519,
      "grad_norm": 2.480470895767212,
      "learning_rate": 7.783543365455893e-05,
      "loss": 0.9696,
      "step": 4130
    },
    {
      "epoch": 0.612,
      "grad_norm": 4.445143699645996,
      "learning_rate": 7.780578206078576e-05,
      "loss": 1.2104,
      "step": 4131
    },
    {
      "epoch": 0.6121481481481481,
      "grad_norm": 2.423997402191162,
      "learning_rate": 7.777613046701261e-05,
      "loss": 0.8823,
      "step": 4132
    },
    {
      "epoch": 0.6122962962962963,
      "grad_norm": 1.5796433687210083,
      "learning_rate": 7.774647887323944e-05,
      "loss": 1.1221,
      "step": 4133
    },
    {
      "epoch": 0.6124444444444445,
      "grad_norm": 1.7948166131973267,
      "learning_rate": 7.771682727946627e-05,
      "loss": 0.9861,
      "step": 4134
    },
    {
      "epoch": 0.6125925925925926,
      "grad_norm": 2.499213933944702,
      "learning_rate": 7.768717568569312e-05,
      "loss": 0.9916,
      "step": 4135
    },
    {
      "epoch": 0.6127407407407407,
      "grad_norm": 2.29291033744812,
      "learning_rate": 7.765752409191995e-05,
      "loss": 1.0726,
      "step": 4136
    },
    {
      "epoch": 0.6128888888888889,
      "grad_norm": 2.7643020153045654,
      "learning_rate": 7.762787249814678e-05,
      "loss": 0.9617,
      "step": 4137
    },
    {
      "epoch": 0.613037037037037,
      "grad_norm": 1.8919548988342285,
      "learning_rate": 7.759822090437361e-05,
      "loss": 0.9031,
      "step": 4138
    },
    {
      "epoch": 0.6131851851851852,
      "grad_norm": 1.8989598751068115,
      "learning_rate": 7.756856931060045e-05,
      "loss": 1.0372,
      "step": 4139
    },
    {
      "epoch": 0.6133333333333333,
      "grad_norm": 1.5658788681030273,
      "learning_rate": 7.753891771682728e-05,
      "loss": 1.273,
      "step": 4140
    },
    {
      "epoch": 0.6134814814814815,
      "grad_norm": 2.625537872314453,
      "learning_rate": 7.750926612305411e-05,
      "loss": 1.0783,
      "step": 4141
    },
    {
      "epoch": 0.6136296296296296,
      "grad_norm": 2.4267075061798096,
      "learning_rate": 7.747961452928096e-05,
      "loss": 1.094,
      "step": 4142
    },
    {
      "epoch": 0.6137777777777778,
      "grad_norm": 4.063164710998535,
      "learning_rate": 7.744996293550779e-05,
      "loss": 0.9473,
      "step": 4143
    },
    {
      "epoch": 0.6139259259259259,
      "grad_norm": 2.0295581817626953,
      "learning_rate": 7.742031134173462e-05,
      "loss": 0.7895,
      "step": 4144
    },
    {
      "epoch": 0.6140740740740741,
      "grad_norm": 1.3276896476745605,
      "learning_rate": 7.739065974796145e-05,
      "loss": 0.8924,
      "step": 4145
    },
    {
      "epoch": 0.6142222222222222,
      "grad_norm": 1.562353491783142,
      "learning_rate": 7.736100815418828e-05,
      "loss": 0.9593,
      "step": 4146
    },
    {
      "epoch": 0.6143703703703703,
      "grad_norm": 1.0941139459609985,
      "learning_rate": 7.733135656041513e-05,
      "loss": 1.0222,
      "step": 4147
    },
    {
      "epoch": 0.6145185185185185,
      "grad_norm": 2.2305283546447754,
      "learning_rate": 7.730170496664196e-05,
      "loss": 1.1955,
      "step": 4148
    },
    {
      "epoch": 0.6146666666666667,
      "grad_norm": 1.459812045097351,
      "learning_rate": 7.727205337286879e-05,
      "loss": 1.198,
      "step": 4149
    },
    {
      "epoch": 0.6148148148148148,
      "grad_norm": 1.284201979637146,
      "learning_rate": 7.724240177909563e-05,
      "loss": 1.1772,
      "step": 4150
    },
    {
      "epoch": 0.6149629629629629,
      "grad_norm": 1.9952270984649658,
      "learning_rate": 7.721275018532246e-05,
      "loss": 1.0394,
      "step": 4151
    },
    {
      "epoch": 0.6151111111111112,
      "grad_norm": 2.0768494606018066,
      "learning_rate": 7.71830985915493e-05,
      "loss": 1.1319,
      "step": 4152
    },
    {
      "epoch": 0.6152592592592593,
      "grad_norm": 2.1895029544830322,
      "learning_rate": 7.715344699777614e-05,
      "loss": 1.1949,
      "step": 4153
    },
    {
      "epoch": 0.6154074074074074,
      "grad_norm": 1.666520118713379,
      "learning_rate": 7.712379540400297e-05,
      "loss": 0.9457,
      "step": 4154
    },
    {
      "epoch": 0.6155555555555555,
      "grad_norm": 2.864825963973999,
      "learning_rate": 7.70941438102298e-05,
      "loss": 1.1301,
      "step": 4155
    },
    {
      "epoch": 0.6157037037037038,
      "grad_norm": 1.4113929271697998,
      "learning_rate": 7.706449221645665e-05,
      "loss": 1.0233,
      "step": 4156
    },
    {
      "epoch": 0.6158518518518519,
      "grad_norm": 1.3558247089385986,
      "learning_rate": 7.703484062268346e-05,
      "loss": 0.836,
      "step": 4157
    },
    {
      "epoch": 0.616,
      "grad_norm": 2.3334364891052246,
      "learning_rate": 7.700518902891031e-05,
      "loss": 0.9674,
      "step": 4158
    },
    {
      "epoch": 0.6161481481481481,
      "grad_norm": 1.3023676872253418,
      "learning_rate": 7.697553743513715e-05,
      "loss": 0.9478,
      "step": 4159
    },
    {
      "epoch": 0.6162962962962963,
      "grad_norm": 2.030163049697876,
      "learning_rate": 7.694588584136397e-05,
      "loss": 1.3565,
      "step": 4160
    },
    {
      "epoch": 0.6164444444444445,
      "grad_norm": 1.7704468965530396,
      "learning_rate": 7.691623424759081e-05,
      "loss": 1.2324,
      "step": 4161
    },
    {
      "epoch": 0.6165925925925926,
      "grad_norm": 1.984651803970337,
      "learning_rate": 7.688658265381764e-05,
      "loss": 0.9833,
      "step": 4162
    },
    {
      "epoch": 0.6167407407407407,
      "grad_norm": 4.376496315002441,
      "learning_rate": 7.685693106004448e-05,
      "loss": 1.0987,
      "step": 4163
    },
    {
      "epoch": 0.6168888888888889,
      "grad_norm": 1.5003368854522705,
      "learning_rate": 7.682727946627132e-05,
      "loss": 0.9752,
      "step": 4164
    },
    {
      "epoch": 0.617037037037037,
      "grad_norm": 2.00019907951355,
      "learning_rate": 7.679762787249815e-05,
      "loss": 1.0655,
      "step": 4165
    },
    {
      "epoch": 0.6171851851851852,
      "grad_norm": 2.5613484382629395,
      "learning_rate": 7.676797627872498e-05,
      "loss": 0.9663,
      "step": 4166
    },
    {
      "epoch": 0.6173333333333333,
      "grad_norm": 1.5503530502319336,
      "learning_rate": 7.673832468495183e-05,
      "loss": 0.9448,
      "step": 4167
    },
    {
      "epoch": 0.6174814814814815,
      "grad_norm": 2.6386940479278564,
      "learning_rate": 7.670867309117866e-05,
      "loss": 1.0828,
      "step": 4168
    },
    {
      "epoch": 0.6176296296296296,
      "grad_norm": 1.0050711631774902,
      "learning_rate": 7.667902149740549e-05,
      "loss": 0.7964,
      "step": 4169
    },
    {
      "epoch": 0.6177777777777778,
      "grad_norm": 1.5181121826171875,
      "learning_rate": 7.664936990363233e-05,
      "loss": 0.6968,
      "step": 4170
    },
    {
      "epoch": 0.6179259259259259,
      "grad_norm": 1.0668538808822632,
      "learning_rate": 7.661971830985915e-05,
      "loss": 1.2148,
      "step": 4171
    },
    {
      "epoch": 0.6180740740740741,
      "grad_norm": 2.245246410369873,
      "learning_rate": 7.6590066716086e-05,
      "loss": 0.9375,
      "step": 4172
    },
    {
      "epoch": 0.6182222222222222,
      "grad_norm": 1.1597541570663452,
      "learning_rate": 7.656041512231283e-05,
      "loss": 0.7565,
      "step": 4173
    },
    {
      "epoch": 0.6183703703703703,
      "grad_norm": 2.1912178993225098,
      "learning_rate": 7.653076352853966e-05,
      "loss": 1.1889,
      "step": 4174
    },
    {
      "epoch": 0.6185185185185185,
      "grad_norm": 1.9651681184768677,
      "learning_rate": 7.65011119347665e-05,
      "loss": 0.99,
      "step": 4175
    },
    {
      "epoch": 0.6186666666666667,
      "grad_norm": 1.5237642526626587,
      "learning_rate": 7.647146034099333e-05,
      "loss": 0.9575,
      "step": 4176
    },
    {
      "epoch": 0.6188148148148148,
      "grad_norm": 1.6119823455810547,
      "learning_rate": 7.644180874722016e-05,
      "loss": 0.9707,
      "step": 4177
    },
    {
      "epoch": 0.6189629629629629,
      "grad_norm": 1.219313621520996,
      "learning_rate": 7.641215715344701e-05,
      "loss": 0.7978,
      "step": 4178
    },
    {
      "epoch": 0.6191111111111111,
      "grad_norm": 2.2975165843963623,
      "learning_rate": 7.638250555967384e-05,
      "loss": 1.1831,
      "step": 4179
    },
    {
      "epoch": 0.6192592592592593,
      "grad_norm": 1.8074194192886353,
      "learning_rate": 7.635285396590067e-05,
      "loss": 1.0527,
      "step": 4180
    },
    {
      "epoch": 0.6194074074074074,
      "grad_norm": 2.2357099056243896,
      "learning_rate": 7.63232023721275e-05,
      "loss": 1.0034,
      "step": 4181
    },
    {
      "epoch": 0.6195555555555555,
      "grad_norm": 1.9494400024414062,
      "learning_rate": 7.629355077835434e-05,
      "loss": 0.9735,
      "step": 4182
    },
    {
      "epoch": 0.6197037037037038,
      "grad_norm": 1.4775364398956299,
      "learning_rate": 7.626389918458118e-05,
      "loss": 1.0355,
      "step": 4183
    },
    {
      "epoch": 0.6198518518518519,
      "grad_norm": 1.3309204578399658,
      "learning_rate": 7.6234247590808e-05,
      "loss": 1.2182,
      "step": 4184
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.209710121154785,
      "learning_rate": 7.620459599703485e-05,
      "loss": 1.266,
      "step": 4185
    },
    {
      "epoch": 0.6201481481481481,
      "grad_norm": 3.240086078643799,
      "learning_rate": 7.617494440326168e-05,
      "loss": 0.8897,
      "step": 4186
    },
    {
      "epoch": 0.6202962962962963,
      "grad_norm": 2.5816378593444824,
      "learning_rate": 7.614529280948851e-05,
      "loss": 0.9752,
      "step": 4187
    },
    {
      "epoch": 0.6204444444444445,
      "grad_norm": 2.542442560195923,
      "learning_rate": 7.611564121571534e-05,
      "loss": 0.9243,
      "step": 4188
    },
    {
      "epoch": 0.6205925925925926,
      "grad_norm": 2.0120692253112793,
      "learning_rate": 7.608598962194217e-05,
      "loss": 1.1543,
      "step": 4189
    },
    {
      "epoch": 0.6207407407407407,
      "grad_norm": 1.7393430471420288,
      "learning_rate": 7.605633802816902e-05,
      "loss": 1.0426,
      "step": 4190
    },
    {
      "epoch": 0.6208888888888889,
      "grad_norm": 1.8278281688690186,
      "learning_rate": 7.602668643439585e-05,
      "loss": 0.929,
      "step": 4191
    },
    {
      "epoch": 0.621037037037037,
      "grad_norm": 1.3061013221740723,
      "learning_rate": 7.599703484062268e-05,
      "loss": 1.0186,
      "step": 4192
    },
    {
      "epoch": 0.6211851851851852,
      "grad_norm": 2.045588493347168,
      "learning_rate": 7.596738324684952e-05,
      "loss": 0.7341,
      "step": 4193
    },
    {
      "epoch": 0.6213333333333333,
      "grad_norm": 1.4220703840255737,
      "learning_rate": 7.593773165307636e-05,
      "loss": 0.9295,
      "step": 4194
    },
    {
      "epoch": 0.6214814814814815,
      "grad_norm": 3.4312973022460938,
      "learning_rate": 7.590808005930319e-05,
      "loss": 1.0083,
      "step": 4195
    },
    {
      "epoch": 0.6216296296296296,
      "grad_norm": 3.1611483097076416,
      "learning_rate": 7.587842846553003e-05,
      "loss": 1.0974,
      "step": 4196
    },
    {
      "epoch": 0.6217777777777778,
      "grad_norm": 2.1379802227020264,
      "learning_rate": 7.584877687175686e-05,
      "loss": 1.2359,
      "step": 4197
    },
    {
      "epoch": 0.6219259259259259,
      "grad_norm": 1.9825153350830078,
      "learning_rate": 7.581912527798369e-05,
      "loss": 1.1976,
      "step": 4198
    },
    {
      "epoch": 0.6220740740740741,
      "grad_norm": 1.353455901145935,
      "learning_rate": 7.578947368421054e-05,
      "loss": 0.9957,
      "step": 4199
    },
    {
      "epoch": 0.6222222222222222,
      "grad_norm": 1.3882861137390137,
      "learning_rate": 7.575982209043735e-05,
      "loss": 1.0771,
      "step": 4200
    },
    {
      "epoch": 0.6223703703703704,
      "grad_norm": 1.9619619846343994,
      "learning_rate": 7.57301704966642e-05,
      "loss": 0.7962,
      "step": 4201
    },
    {
      "epoch": 0.6225185185185185,
      "grad_norm": 1.5832520723342896,
      "learning_rate": 7.570051890289104e-05,
      "loss": 0.9227,
      "step": 4202
    },
    {
      "epoch": 0.6226666666666667,
      "grad_norm": 2.1912996768951416,
      "learning_rate": 7.567086730911786e-05,
      "loss": 1.1511,
      "step": 4203
    },
    {
      "epoch": 0.6228148148148148,
      "grad_norm": 1.8055832386016846,
      "learning_rate": 7.56412157153447e-05,
      "loss": 0.9385,
      "step": 4204
    },
    {
      "epoch": 0.6229629629629629,
      "grad_norm": 2.094910144805908,
      "learning_rate": 7.561156412157154e-05,
      "loss": 1.0901,
      "step": 4205
    },
    {
      "epoch": 0.6231111111111111,
      "grad_norm": 1.8955321311950684,
      "learning_rate": 7.558191252779837e-05,
      "loss": 1.0497,
      "step": 4206
    },
    {
      "epoch": 0.6232592592592593,
      "grad_norm": 12.515061378479004,
      "learning_rate": 7.555226093402521e-05,
      "loss": 1.021,
      "step": 4207
    },
    {
      "epoch": 0.6234074074074074,
      "grad_norm": 1.5139273405075073,
      "learning_rate": 7.552260934025204e-05,
      "loss": 1.0022,
      "step": 4208
    },
    {
      "epoch": 0.6235555555555555,
      "grad_norm": 1.5754523277282715,
      "learning_rate": 7.549295774647887e-05,
      "loss": 1.2412,
      "step": 4209
    },
    {
      "epoch": 0.6237037037037036,
      "grad_norm": 1.48861563205719,
      "learning_rate": 7.546330615270572e-05,
      "loss": 0.9615,
      "step": 4210
    },
    {
      "epoch": 0.6238518518518519,
      "grad_norm": 1.5839914083480835,
      "learning_rate": 7.543365455893255e-05,
      "loss": 1.0142,
      "step": 4211
    },
    {
      "epoch": 0.624,
      "grad_norm": 2.623595714569092,
      "learning_rate": 7.540400296515938e-05,
      "loss": 1.0416,
      "step": 4212
    },
    {
      "epoch": 0.6241481481481481,
      "grad_norm": 2.2146899700164795,
      "learning_rate": 7.537435137138622e-05,
      "loss": 1.267,
      "step": 4213
    },
    {
      "epoch": 0.6242962962962963,
      "grad_norm": 2.8189847469329834,
      "learning_rate": 7.534469977761304e-05,
      "loss": 1.1923,
      "step": 4214
    },
    {
      "epoch": 0.6244444444444445,
      "grad_norm": 1.65372633934021,
      "learning_rate": 7.531504818383989e-05,
      "loss": 0.9996,
      "step": 4215
    },
    {
      "epoch": 0.6245925925925926,
      "grad_norm": 1.4494138956069946,
      "learning_rate": 7.528539659006672e-05,
      "loss": 0.9434,
      "step": 4216
    },
    {
      "epoch": 0.6247407407407407,
      "grad_norm": 2.095656156539917,
      "learning_rate": 7.525574499629355e-05,
      "loss": 1.0989,
      "step": 4217
    },
    {
      "epoch": 0.6248888888888889,
      "grad_norm": 1.9197618961334229,
      "learning_rate": 7.522609340252039e-05,
      "loss": 1.2848,
      "step": 4218
    },
    {
      "epoch": 0.6250370370370371,
      "grad_norm": 2.0667176246643066,
      "learning_rate": 7.519644180874722e-05,
      "loss": 0.8713,
      "step": 4219
    },
    {
      "epoch": 0.6251851851851852,
      "grad_norm": 1.7209436893463135,
      "learning_rate": 7.516679021497405e-05,
      "loss": 1.1994,
      "step": 4220
    },
    {
      "epoch": 0.6253333333333333,
      "grad_norm": 1.2906612157821655,
      "learning_rate": 7.51371386212009e-05,
      "loss": 1.1263,
      "step": 4221
    },
    {
      "epoch": 0.6254814814814815,
      "grad_norm": 2.0552279949188232,
      "learning_rate": 7.510748702742773e-05,
      "loss": 1.0431,
      "step": 4222
    },
    {
      "epoch": 0.6256296296296296,
      "grad_norm": 1.662018895149231,
      "learning_rate": 7.507783543365456e-05,
      "loss": 1.1519,
      "step": 4223
    },
    {
      "epoch": 0.6257777777777778,
      "grad_norm": 2.582836866378784,
      "learning_rate": 7.504818383988139e-05,
      "loss": 0.9639,
      "step": 4224
    },
    {
      "epoch": 0.6259259259259259,
      "grad_norm": 1.4937838315963745,
      "learning_rate": 7.501853224610824e-05,
      "loss": 1.0501,
      "step": 4225
    },
    {
      "epoch": 0.6260740740740741,
      "grad_norm": 1.4058259725570679,
      "learning_rate": 7.498888065233507e-05,
      "loss": 0.8024,
      "step": 4226
    },
    {
      "epoch": 0.6262222222222222,
      "grad_norm": 1.589437484741211,
      "learning_rate": 7.49592290585619e-05,
      "loss": 1.1069,
      "step": 4227
    },
    {
      "epoch": 0.6263703703703704,
      "grad_norm": 1.9938610792160034,
      "learning_rate": 7.492957746478874e-05,
      "loss": 1.169,
      "step": 4228
    },
    {
      "epoch": 0.6265185185185185,
      "grad_norm": 1.3336560726165771,
      "learning_rate": 7.489992587101557e-05,
      "loss": 1.2742,
      "step": 4229
    },
    {
      "epoch": 0.6266666666666667,
      "grad_norm": 2.6536037921905518,
      "learning_rate": 7.48702742772424e-05,
      "loss": 1.2188,
      "step": 4230
    },
    {
      "epoch": 0.6268148148148148,
      "grad_norm": 1.3016421794891357,
      "learning_rate": 7.484062268346923e-05,
      "loss": 0.9154,
      "step": 4231
    },
    {
      "epoch": 0.6269629629629629,
      "grad_norm": 1.5644383430480957,
      "learning_rate": 7.481097108969608e-05,
      "loss": 1.0773,
      "step": 4232
    },
    {
      "epoch": 0.6271111111111111,
      "grad_norm": 1.9621175527572632,
      "learning_rate": 7.478131949592291e-05,
      "loss": 0.9856,
      "step": 4233
    },
    {
      "epoch": 0.6272592592592593,
      "grad_norm": 1.5468896627426147,
      "learning_rate": 7.475166790214974e-05,
      "loss": 0.9565,
      "step": 4234
    },
    {
      "epoch": 0.6274074074074074,
      "grad_norm": 1.7012988328933716,
      "learning_rate": 7.472201630837657e-05,
      "loss": 0.9858,
      "step": 4235
    },
    {
      "epoch": 0.6275555555555555,
      "grad_norm": 1.4528248310089111,
      "learning_rate": 7.469236471460342e-05,
      "loss": 0.854,
      "step": 4236
    },
    {
      "epoch": 0.6277037037037037,
      "grad_norm": 2.4127585887908936,
      "learning_rate": 7.466271312083025e-05,
      "loss": 1.1117,
      "step": 4237
    },
    {
      "epoch": 0.6278518518518519,
      "grad_norm": 2.762421131134033,
      "learning_rate": 7.463306152705708e-05,
      "loss": 0.9908,
      "step": 4238
    },
    {
      "epoch": 0.628,
      "grad_norm": 1.190571665763855,
      "learning_rate": 7.460340993328392e-05,
      "loss": 0.83,
      "step": 4239
    },
    {
      "epoch": 0.6281481481481481,
      "grad_norm": 1.5424587726593018,
      "learning_rate": 7.457375833951075e-05,
      "loss": 1.1369,
      "step": 4240
    },
    {
      "epoch": 0.6282962962962962,
      "grad_norm": 2.1614809036254883,
      "learning_rate": 7.454410674573758e-05,
      "loss": 1.0667,
      "step": 4241
    },
    {
      "epoch": 0.6284444444444445,
      "grad_norm": 1.5766479969024658,
      "learning_rate": 7.451445515196443e-05,
      "loss": 0.934,
      "step": 4242
    },
    {
      "epoch": 0.6285925925925926,
      "grad_norm": 1.3466310501098633,
      "learning_rate": 7.448480355819125e-05,
      "loss": 1.0371,
      "step": 4243
    },
    {
      "epoch": 0.6287407407407407,
      "grad_norm": 2.342902183532715,
      "learning_rate": 7.445515196441809e-05,
      "loss": 1.1869,
      "step": 4244
    },
    {
      "epoch": 0.6288888888888889,
      "grad_norm": 1.3603520393371582,
      "learning_rate": 7.442550037064494e-05,
      "loss": 1.2303,
      "step": 4245
    },
    {
      "epoch": 0.6290370370370371,
      "grad_norm": 1.6758443117141724,
      "learning_rate": 7.439584877687175e-05,
      "loss": 1.1124,
      "step": 4246
    },
    {
      "epoch": 0.6291851851851852,
      "grad_norm": 2.4252185821533203,
      "learning_rate": 7.43661971830986e-05,
      "loss": 1.0878,
      "step": 4247
    },
    {
      "epoch": 0.6293333333333333,
      "grad_norm": 2.861835241317749,
      "learning_rate": 7.433654558932543e-05,
      "loss": 1.0192,
      "step": 4248
    },
    {
      "epoch": 0.6294814814814815,
      "grad_norm": 1.5305482149124146,
      "learning_rate": 7.430689399555226e-05,
      "loss": 1.0219,
      "step": 4249
    },
    {
      "epoch": 0.6296296296296297,
      "grad_norm": 1.8694067001342773,
      "learning_rate": 7.42772424017791e-05,
      "loss": 1.1271,
      "step": 4250
    },
    {
      "epoch": 0.6297777777777778,
      "grad_norm": 4.762666702270508,
      "learning_rate": 7.424759080800593e-05,
      "loss": 0.9223,
      "step": 4251
    },
    {
      "epoch": 0.6299259259259259,
      "grad_norm": 1.920195460319519,
      "learning_rate": 7.421793921423277e-05,
      "loss": 1.1276,
      "step": 4252
    },
    {
      "epoch": 0.6300740740740741,
      "grad_norm": 2.142634153366089,
      "learning_rate": 7.418828762045961e-05,
      "loss": 0.878,
      "step": 4253
    },
    {
      "epoch": 0.6302222222222222,
      "grad_norm": 1.4534026384353638,
      "learning_rate": 7.415863602668644e-05,
      "loss": 1.0016,
      "step": 4254
    },
    {
      "epoch": 0.6303703703703704,
      "grad_norm": 1.5006121397018433,
      "learning_rate": 7.412898443291327e-05,
      "loss": 0.6872,
      "step": 4255
    },
    {
      "epoch": 0.6305185185185185,
      "grad_norm": 1.8782285451889038,
      "learning_rate": 7.409933283914012e-05,
      "loss": 1.0539,
      "step": 4256
    },
    {
      "epoch": 0.6306666666666667,
      "grad_norm": 1.6077821254730225,
      "learning_rate": 7.406968124536693e-05,
      "loss": 0.9447,
      "step": 4257
    },
    {
      "epoch": 0.6308148148148148,
      "grad_norm": 1.6158119440078735,
      "learning_rate": 7.404002965159378e-05,
      "loss": 1.2271,
      "step": 4258
    },
    {
      "epoch": 0.630962962962963,
      "grad_norm": 1.2205272912979126,
      "learning_rate": 7.401037805782061e-05,
      "loss": 0.9001,
      "step": 4259
    },
    {
      "epoch": 0.6311111111111111,
      "grad_norm": 6.948575019836426,
      "learning_rate": 7.398072646404744e-05,
      "loss": 1.2105,
      "step": 4260
    },
    {
      "epoch": 0.6312592592592593,
      "grad_norm": 1.7600739002227783,
      "learning_rate": 7.395107487027428e-05,
      "loss": 1.3201,
      "step": 4261
    },
    {
      "epoch": 0.6314074074074074,
      "grad_norm": 1.9330729246139526,
      "learning_rate": 7.392142327650112e-05,
      "loss": 0.8782,
      "step": 4262
    },
    {
      "epoch": 0.6315555555555555,
      "grad_norm": 1.58470618724823,
      "learning_rate": 7.389177168272795e-05,
      "loss": 0.9092,
      "step": 4263
    },
    {
      "epoch": 0.6317037037037037,
      "grad_norm": 1.2753225564956665,
      "learning_rate": 7.386212008895479e-05,
      "loss": 0.8799,
      "step": 4264
    },
    {
      "epoch": 0.6318518518518519,
      "grad_norm": 1.2597649097442627,
      "learning_rate": 7.383246849518162e-05,
      "loss": 0.9713,
      "step": 4265
    },
    {
      "epoch": 0.632,
      "grad_norm": 1.6237164735794067,
      "learning_rate": 7.380281690140845e-05,
      "loss": 1.3855,
      "step": 4266
    },
    {
      "epoch": 0.6321481481481481,
      "grad_norm": 1.4116196632385254,
      "learning_rate": 7.377316530763528e-05,
      "loss": 0.8775,
      "step": 4267
    },
    {
      "epoch": 0.6322962962962962,
      "grad_norm": 1.9042555093765259,
      "learning_rate": 7.374351371386213e-05,
      "loss": 0.9302,
      "step": 4268
    },
    {
      "epoch": 0.6324444444444445,
      "grad_norm": 1.2561272382736206,
      "learning_rate": 7.371386212008896e-05,
      "loss": 1.0513,
      "step": 4269
    },
    {
      "epoch": 0.6325925925925926,
      "grad_norm": 1.7997751235961914,
      "learning_rate": 7.368421052631579e-05,
      "loss": 1.2709,
      "step": 4270
    },
    {
      "epoch": 0.6327407407407407,
      "grad_norm": 1.4882794618606567,
      "learning_rate": 7.365455893254263e-05,
      "loss": 0.9939,
      "step": 4271
    },
    {
      "epoch": 0.6328888888888888,
      "grad_norm": 1.924720048904419,
      "learning_rate": 7.362490733876946e-05,
      "loss": 0.9963,
      "step": 4272
    },
    {
      "epoch": 0.6330370370370371,
      "grad_norm": 1.4113523960113525,
      "learning_rate": 7.35952557449963e-05,
      "loss": 0.9659,
      "step": 4273
    },
    {
      "epoch": 0.6331851851851852,
      "grad_norm": 1.4288746118545532,
      "learning_rate": 7.356560415122313e-05,
      "loss": 0.9953,
      "step": 4274
    },
    {
      "epoch": 0.6333333333333333,
      "grad_norm": 2.565685272216797,
      "learning_rate": 7.353595255744997e-05,
      "loss": 0.9741,
      "step": 4275
    },
    {
      "epoch": 0.6334814814814815,
      "grad_norm": 1.199468731880188,
      "learning_rate": 7.35063009636768e-05,
      "loss": 1.2029,
      "step": 4276
    },
    {
      "epoch": 0.6336296296296297,
      "grad_norm": 1.4904115200042725,
      "learning_rate": 7.347664936990363e-05,
      "loss": 1.0986,
      "step": 4277
    },
    {
      "epoch": 0.6337777777777778,
      "grad_norm": 1.748367428779602,
      "learning_rate": 7.344699777613046e-05,
      "loss": 0.882,
      "step": 4278
    },
    {
      "epoch": 0.6339259259259259,
      "grad_norm": 1.7592281103134155,
      "learning_rate": 7.341734618235731e-05,
      "loss": 1.0141,
      "step": 4279
    },
    {
      "epoch": 0.6340740740740741,
      "grad_norm": 1.6296261548995972,
      "learning_rate": 7.338769458858414e-05,
      "loss": 0.9284,
      "step": 4280
    },
    {
      "epoch": 0.6342222222222222,
      "grad_norm": 6.17998743057251,
      "learning_rate": 7.335804299481097e-05,
      "loss": 0.9908,
      "step": 4281
    },
    {
      "epoch": 0.6343703703703704,
      "grad_norm": 1.2737665176391602,
      "learning_rate": 7.332839140103781e-05,
      "loss": 0.9491,
      "step": 4282
    },
    {
      "epoch": 0.6345185185185185,
      "grad_norm": 1.1072090864181519,
      "learning_rate": 7.329873980726465e-05,
      "loss": 1.0856,
      "step": 4283
    },
    {
      "epoch": 0.6346666666666667,
      "grad_norm": 1.6702488660812378,
      "learning_rate": 7.326908821349148e-05,
      "loss": 1.187,
      "step": 4284
    },
    {
      "epoch": 0.6348148148148148,
      "grad_norm": 1.4215946197509766,
      "learning_rate": 7.323943661971832e-05,
      "loss": 0.8082,
      "step": 4285
    },
    {
      "epoch": 0.634962962962963,
      "grad_norm": 1.2191710472106934,
      "learning_rate": 7.320978502594514e-05,
      "loss": 0.9704,
      "step": 4286
    },
    {
      "epoch": 0.6351111111111111,
      "grad_norm": 1.2213389873504639,
      "learning_rate": 7.318013343217198e-05,
      "loss": 0.9641,
      "step": 4287
    },
    {
      "epoch": 0.6352592592592593,
      "grad_norm": 4.540380001068115,
      "learning_rate": 7.315048183839883e-05,
      "loss": 0.9253,
      "step": 4288
    },
    {
      "epoch": 0.6354074074074074,
      "grad_norm": 1.5974304676055908,
      "learning_rate": 7.312083024462564e-05,
      "loss": 1.3511,
      "step": 4289
    },
    {
      "epoch": 0.6355555555555555,
      "grad_norm": 1.6083300113677979,
      "learning_rate": 7.309117865085249e-05,
      "loss": 0.958,
      "step": 4290
    },
    {
      "epoch": 0.6357037037037037,
      "grad_norm": 1.281067967414856,
      "learning_rate": 7.306152705707932e-05,
      "loss": 0.8817,
      "step": 4291
    },
    {
      "epoch": 0.6358518518518519,
      "grad_norm": 1.8055102825164795,
      "learning_rate": 7.303187546330615e-05,
      "loss": 1.3341,
      "step": 4292
    },
    {
      "epoch": 0.636,
      "grad_norm": 1.7039903402328491,
      "learning_rate": 7.3002223869533e-05,
      "loss": 1.0581,
      "step": 4293
    },
    {
      "epoch": 0.6361481481481481,
      "grad_norm": 1.5735193490982056,
      "learning_rate": 7.297257227575983e-05,
      "loss": 1.0945,
      "step": 4294
    },
    {
      "epoch": 0.6362962962962962,
      "grad_norm": 2.5691006183624268,
      "learning_rate": 7.294292068198666e-05,
      "loss": 0.9692,
      "step": 4295
    },
    {
      "epoch": 0.6364444444444445,
      "grad_norm": 1.5633037090301514,
      "learning_rate": 7.29132690882135e-05,
      "loss": 0.9985,
      "step": 4296
    },
    {
      "epoch": 0.6365925925925926,
      "grad_norm": 1.8769036531448364,
      "learning_rate": 7.288361749444033e-05,
      "loss": 1.1071,
      "step": 4297
    },
    {
      "epoch": 0.6367407407407407,
      "grad_norm": 1.3452436923980713,
      "learning_rate": 7.285396590066716e-05,
      "loss": 1.0296,
      "step": 4298
    },
    {
      "epoch": 0.6368888888888888,
      "grad_norm": 1.531742811203003,
      "learning_rate": 7.282431430689401e-05,
      "loss": 1.1471,
      "step": 4299
    },
    {
      "epoch": 0.6370370370370371,
      "grad_norm": 1.4082554578781128,
      "learning_rate": 7.279466271312083e-05,
      "loss": 1.0521,
      "step": 4300
    },
    {
      "epoch": 0.6371851851851852,
      "grad_norm": 1.5034687519073486,
      "learning_rate": 7.276501111934767e-05,
      "loss": 1.1396,
      "step": 4301
    },
    {
      "epoch": 0.6373333333333333,
      "grad_norm": 4.933014392852783,
      "learning_rate": 7.27353595255745e-05,
      "loss": 1.2253,
      "step": 4302
    },
    {
      "epoch": 0.6374814814814814,
      "grad_norm": 1.5926105976104736,
      "learning_rate": 7.270570793180133e-05,
      "loss": 1.2105,
      "step": 4303
    },
    {
      "epoch": 0.6376296296296297,
      "grad_norm": 3.1412436962127686,
      "learning_rate": 7.267605633802818e-05,
      "loss": 1.0453,
      "step": 4304
    },
    {
      "epoch": 0.6377777777777778,
      "grad_norm": 1.523639440536499,
      "learning_rate": 7.2646404744255e-05,
      "loss": 0.7762,
      "step": 4305
    },
    {
      "epoch": 0.6379259259259259,
      "grad_norm": 1.5867841243743896,
      "learning_rate": 7.261675315048184e-05,
      "loss": 0.8973,
      "step": 4306
    },
    {
      "epoch": 0.6380740740740741,
      "grad_norm": 1.5310838222503662,
      "learning_rate": 7.258710155670868e-05,
      "loss": 1.4496,
      "step": 4307
    },
    {
      "epoch": 0.6382222222222222,
      "grad_norm": 1.4425774812698364,
      "learning_rate": 7.255744996293551e-05,
      "loss": 1.047,
      "step": 4308
    },
    {
      "epoch": 0.6383703703703704,
      "grad_norm": 1.5306428670883179,
      "learning_rate": 7.252779836916234e-05,
      "loss": 0.8203,
      "step": 4309
    },
    {
      "epoch": 0.6385185185185185,
      "grad_norm": 1.9451457262039185,
      "learning_rate": 7.249814677538917e-05,
      "loss": 1.117,
      "step": 4310
    },
    {
      "epoch": 0.6386666666666667,
      "grad_norm": 0.9834615588188171,
      "learning_rate": 7.246849518161602e-05,
      "loss": 0.9603,
      "step": 4311
    },
    {
      "epoch": 0.6388148148148148,
      "grad_norm": 1.9894959926605225,
      "learning_rate": 7.243884358784285e-05,
      "loss": 1.0404,
      "step": 4312
    },
    {
      "epoch": 0.638962962962963,
      "grad_norm": 1.2576085329055786,
      "learning_rate": 7.240919199406968e-05,
      "loss": 0.8318,
      "step": 4313
    },
    {
      "epoch": 0.6391111111111111,
      "grad_norm": 2.3639659881591797,
      "learning_rate": 7.237954040029653e-05,
      "loss": 0.9326,
      "step": 4314
    },
    {
      "epoch": 0.6392592592592593,
      "grad_norm": 1.5994187593460083,
      "learning_rate": 7.234988880652336e-05,
      "loss": 0.9818,
      "step": 4315
    },
    {
      "epoch": 0.6394074074074074,
      "grad_norm": 1.3508126735687256,
      "learning_rate": 7.232023721275019e-05,
      "loss": 1.0248,
      "step": 4316
    },
    {
      "epoch": 0.6395555555555555,
      "grad_norm": 2.3110697269439697,
      "learning_rate": 7.229058561897702e-05,
      "loss": 0.9616,
      "step": 4317
    },
    {
      "epoch": 0.6397037037037037,
      "grad_norm": 1.4802743196487427,
      "learning_rate": 7.226093402520386e-05,
      "loss": 1.0018,
      "step": 4318
    },
    {
      "epoch": 0.6398518518518519,
      "grad_norm": 1.8502402305603027,
      "learning_rate": 7.22312824314307e-05,
      "loss": 1.1073,
      "step": 4319
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.272400140762329,
      "learning_rate": 7.220163083765752e-05,
      "loss": 1.0234,
      "step": 4320
    },
    {
      "epoch": 0.6401481481481481,
      "grad_norm": 1.1387325525283813,
      "learning_rate": 7.217197924388436e-05,
      "loss": 0.8343,
      "step": 4321
    },
    {
      "epoch": 0.6402962962962963,
      "grad_norm": 1.1921565532684326,
      "learning_rate": 7.21423276501112e-05,
      "loss": 1.0573,
      "step": 4322
    },
    {
      "epoch": 0.6404444444444445,
      "grad_norm": 1.639599084854126,
      "learning_rate": 7.211267605633803e-05,
      "loss": 0.7863,
      "step": 4323
    },
    {
      "epoch": 0.6405925925925926,
      "grad_norm": 1.4308897256851196,
      "learning_rate": 7.208302446256486e-05,
      "loss": 0.7823,
      "step": 4324
    },
    {
      "epoch": 0.6407407407407407,
      "grad_norm": 3.084224224090576,
      "learning_rate": 7.20533728687917e-05,
      "loss": 1.2159,
      "step": 4325
    },
    {
      "epoch": 0.6408888888888888,
      "grad_norm": 1.5659797191619873,
      "learning_rate": 7.202372127501854e-05,
      "loss": 1.1887,
      "step": 4326
    },
    {
      "epoch": 0.6410370370370371,
      "grad_norm": 1.1393101215362549,
      "learning_rate": 7.199406968124537e-05,
      "loss": 0.8358,
      "step": 4327
    },
    {
      "epoch": 0.6411851851851852,
      "grad_norm": 1.5044102668762207,
      "learning_rate": 7.196441808747221e-05,
      "loss": 0.942,
      "step": 4328
    },
    {
      "epoch": 0.6413333333333333,
      "grad_norm": 1.4487009048461914,
      "learning_rate": 7.193476649369903e-05,
      "loss": 0.9936,
      "step": 4329
    },
    {
      "epoch": 0.6414814814814814,
      "grad_norm": 1.853836178779602,
      "learning_rate": 7.190511489992587e-05,
      "loss": 1.1098,
      "step": 4330
    },
    {
      "epoch": 0.6416296296296297,
      "grad_norm": 3.4412965774536133,
      "learning_rate": 7.187546330615272e-05,
      "loss": 0.8773,
      "step": 4331
    },
    {
      "epoch": 0.6417777777777778,
      "grad_norm": 3.3973379135131836,
      "learning_rate": 7.184581171237954e-05,
      "loss": 1.0711,
      "step": 4332
    },
    {
      "epoch": 0.6419259259259259,
      "grad_norm": 1.1628870964050293,
      "learning_rate": 7.181616011860638e-05,
      "loss": 0.9314,
      "step": 4333
    },
    {
      "epoch": 0.642074074074074,
      "grad_norm": 1.3469637632369995,
      "learning_rate": 7.178650852483321e-05,
      "loss": 0.9951,
      "step": 4334
    },
    {
      "epoch": 0.6422222222222222,
      "grad_norm": 1.5088274478912354,
      "learning_rate": 7.175685693106004e-05,
      "loss": 0.8278,
      "step": 4335
    },
    {
      "epoch": 0.6423703703703704,
      "grad_norm": 1.4323192834854126,
      "learning_rate": 7.172720533728689e-05,
      "loss": 0.9538,
      "step": 4336
    },
    {
      "epoch": 0.6425185185185185,
      "grad_norm": 1.4325075149536133,
      "learning_rate": 7.169755374351372e-05,
      "loss": 1.0913,
      "step": 4337
    },
    {
      "epoch": 0.6426666666666667,
      "grad_norm": 1.6242316961288452,
      "learning_rate": 7.166790214974055e-05,
      "loss": 1.1175,
      "step": 4338
    },
    {
      "epoch": 0.6428148148148148,
      "grad_norm": 2.5923986434936523,
      "learning_rate": 7.163825055596739e-05,
      "loss": 1.1285,
      "step": 4339
    },
    {
      "epoch": 0.642962962962963,
      "grad_norm": 1.162093162536621,
      "learning_rate": 7.160859896219422e-05,
      "loss": 0.9083,
      "step": 4340
    },
    {
      "epoch": 0.6431111111111111,
      "grad_norm": 1.2753523588180542,
      "learning_rate": 7.157894736842105e-05,
      "loss": 0.9541,
      "step": 4341
    },
    {
      "epoch": 0.6432592592592593,
      "grad_norm": 1.8385745286941528,
      "learning_rate": 7.15492957746479e-05,
      "loss": 0.9997,
      "step": 4342
    },
    {
      "epoch": 0.6434074074074074,
      "grad_norm": 1.579655647277832,
      "learning_rate": 7.151964418087472e-05,
      "loss": 1.3064,
      "step": 4343
    },
    {
      "epoch": 0.6435555555555555,
      "grad_norm": 1.538081169128418,
      "learning_rate": 7.148999258710156e-05,
      "loss": 1.0038,
      "step": 4344
    },
    {
      "epoch": 0.6437037037037037,
      "grad_norm": 1.4161475896835327,
      "learning_rate": 7.146034099332839e-05,
      "loss": 0.8822,
      "step": 4345
    },
    {
      "epoch": 0.6438518518518519,
      "grad_norm": 1.350731611251831,
      "learning_rate": 7.143068939955522e-05,
      "loss": 1.0213,
      "step": 4346
    },
    {
      "epoch": 0.644,
      "grad_norm": 2.6670544147491455,
      "learning_rate": 7.140103780578207e-05,
      "loss": 1.2181,
      "step": 4347
    },
    {
      "epoch": 0.6441481481481481,
      "grad_norm": 1.6661410331726074,
      "learning_rate": 7.13713862120089e-05,
      "loss": 0.9935,
      "step": 4348
    },
    {
      "epoch": 0.6442962962962963,
      "grad_norm": 1.722327709197998,
      "learning_rate": 7.134173461823573e-05,
      "loss": 0.996,
      "step": 4349
    },
    {
      "epoch": 0.6444444444444445,
      "grad_norm": 1.233064889907837,
      "learning_rate": 7.131208302446257e-05,
      "loss": 1.0661,
      "step": 4350
    },
    {
      "epoch": 0.6445925925925926,
      "grad_norm": 1.1755728721618652,
      "learning_rate": 7.12824314306894e-05,
      "loss": 0.9878,
      "step": 4351
    },
    {
      "epoch": 0.6447407407407407,
      "grad_norm": 2.2716925144195557,
      "learning_rate": 7.125277983691624e-05,
      "loss": 0.8776,
      "step": 4352
    },
    {
      "epoch": 0.6448888888888888,
      "grad_norm": 1.3941679000854492,
      "learning_rate": 7.122312824314307e-05,
      "loss": 1.3136,
      "step": 4353
    },
    {
      "epoch": 0.6450370370370371,
      "grad_norm": 1.9948437213897705,
      "learning_rate": 7.119347664936991e-05,
      "loss": 1.3365,
      "step": 4354
    },
    {
      "epoch": 0.6451851851851852,
      "grad_norm": 1.6200815439224243,
      "learning_rate": 7.116382505559674e-05,
      "loss": 1.1891,
      "step": 4355
    },
    {
      "epoch": 0.6453333333333333,
      "grad_norm": 1.497673511505127,
      "learning_rate": 7.113417346182357e-05,
      "loss": 0.9411,
      "step": 4356
    },
    {
      "epoch": 0.6454814814814814,
      "grad_norm": 1.9665746688842773,
      "learning_rate": 7.110452186805042e-05,
      "loss": 0.7485,
      "step": 4357
    },
    {
      "epoch": 0.6456296296296297,
      "grad_norm": 1.8524481058120728,
      "learning_rate": 7.107487027427725e-05,
      "loss": 0.9909,
      "step": 4358
    },
    {
      "epoch": 0.6457777777777778,
      "grad_norm": 1.7892533540725708,
      "learning_rate": 7.104521868050408e-05,
      "loss": 1.0559,
      "step": 4359
    },
    {
      "epoch": 0.6459259259259259,
      "grad_norm": 2.46882963180542,
      "learning_rate": 7.101556708673091e-05,
      "loss": 0.8663,
      "step": 4360
    },
    {
      "epoch": 0.646074074074074,
      "grad_norm": 1.3626261949539185,
      "learning_rate": 7.098591549295775e-05,
      "loss": 1.0529,
      "step": 4361
    },
    {
      "epoch": 0.6462222222222223,
      "grad_norm": 1.4369004964828491,
      "learning_rate": 7.095626389918459e-05,
      "loss": 1.3747,
      "step": 4362
    },
    {
      "epoch": 0.6463703703703704,
      "grad_norm": 1.6594363451004028,
      "learning_rate": 7.092661230541142e-05,
      "loss": 1.0476,
      "step": 4363
    },
    {
      "epoch": 0.6465185185185185,
      "grad_norm": 1.728981614112854,
      "learning_rate": 7.089696071163825e-05,
      "loss": 0.9783,
      "step": 4364
    },
    {
      "epoch": 0.6466666666666666,
      "grad_norm": 1.7096563577651978,
      "learning_rate": 7.086730911786509e-05,
      "loss": 1.0713,
      "step": 4365
    },
    {
      "epoch": 0.6468148148148148,
      "grad_norm": 2.27201509475708,
      "learning_rate": 7.083765752409192e-05,
      "loss": 0.9459,
      "step": 4366
    },
    {
      "epoch": 0.646962962962963,
      "grad_norm": 3.5282950401306152,
      "learning_rate": 7.080800593031875e-05,
      "loss": 1.1179,
      "step": 4367
    },
    {
      "epoch": 0.6471111111111111,
      "grad_norm": 6.547924518585205,
      "learning_rate": 7.07783543365456e-05,
      "loss": 1.1707,
      "step": 4368
    },
    {
      "epoch": 0.6472592592592593,
      "grad_norm": 1.124451994895935,
      "learning_rate": 7.074870274277243e-05,
      "loss": 0.9589,
      "step": 4369
    },
    {
      "epoch": 0.6474074074074074,
      "grad_norm": 1.4242360591888428,
      "learning_rate": 7.071905114899926e-05,
      "loss": 1.1477,
      "step": 4370
    },
    {
      "epoch": 0.6475555555555556,
      "grad_norm": 1.1883344650268555,
      "learning_rate": 7.06893995552261e-05,
      "loss": 1.0449,
      "step": 4371
    },
    {
      "epoch": 0.6477037037037037,
      "grad_norm": 1.572464942932129,
      "learning_rate": 7.065974796145292e-05,
      "loss": 1.1263,
      "step": 4372
    },
    {
      "epoch": 0.6478518518518519,
      "grad_norm": 1.5645495653152466,
      "learning_rate": 7.063009636767977e-05,
      "loss": 1.0772,
      "step": 4373
    },
    {
      "epoch": 0.648,
      "grad_norm": 1.2831021547317505,
      "learning_rate": 7.060044477390661e-05,
      "loss": 1.0004,
      "step": 4374
    },
    {
      "epoch": 0.6481481481481481,
      "grad_norm": 2.3488590717315674,
      "learning_rate": 7.057079318013343e-05,
      "loss": 1.2111,
      "step": 4375
    },
    {
      "epoch": 0.6482962962962963,
      "grad_norm": 2.050532341003418,
      "learning_rate": 7.054114158636027e-05,
      "loss": 1.2414,
      "step": 4376
    },
    {
      "epoch": 0.6484444444444445,
      "grad_norm": 1.1811741590499878,
      "learning_rate": 7.05114899925871e-05,
      "loss": 1.1716,
      "step": 4377
    },
    {
      "epoch": 0.6485925925925926,
      "grad_norm": 1.6432982683181763,
      "learning_rate": 7.048183839881393e-05,
      "loss": 0.9345,
      "step": 4378
    },
    {
      "epoch": 0.6487407407407407,
      "grad_norm": 1.1864477396011353,
      "learning_rate": 7.045218680504078e-05,
      "loss": 0.8852,
      "step": 4379
    },
    {
      "epoch": 0.6488888888888888,
      "grad_norm": 1.747707724571228,
      "learning_rate": 7.042253521126761e-05,
      "loss": 1.1688,
      "step": 4380
    },
    {
      "epoch": 0.6490370370370371,
      "grad_norm": 1.7994520664215088,
      "learning_rate": 7.039288361749444e-05,
      "loss": 1.3091,
      "step": 4381
    },
    {
      "epoch": 0.6491851851851852,
      "grad_norm": 2.1519625186920166,
      "learning_rate": 7.036323202372128e-05,
      "loss": 1.2301,
      "step": 4382
    },
    {
      "epoch": 0.6493333333333333,
      "grad_norm": 2.0501434803009033,
      "learning_rate": 7.033358042994812e-05,
      "loss": 1.1329,
      "step": 4383
    },
    {
      "epoch": 0.6494814814814814,
      "grad_norm": 1.7140653133392334,
      "learning_rate": 7.030392883617495e-05,
      "loss": 1.1757,
      "step": 4384
    },
    {
      "epoch": 0.6496296296296297,
      "grad_norm": 1.445603370666504,
      "learning_rate": 7.027427724240179e-05,
      "loss": 1.0283,
      "step": 4385
    },
    {
      "epoch": 0.6497777777777778,
      "grad_norm": 1.7610565423965454,
      "learning_rate": 7.024462564862861e-05,
      "loss": 1.0102,
      "step": 4386
    },
    {
      "epoch": 0.6499259259259259,
      "grad_norm": 1.5968871116638184,
      "learning_rate": 7.021497405485545e-05,
      "loss": 0.9477,
      "step": 4387
    },
    {
      "epoch": 0.650074074074074,
      "grad_norm": 1.4317080974578857,
      "learning_rate": 7.018532246108228e-05,
      "loss": 0.9569,
      "step": 4388
    },
    {
      "epoch": 0.6502222222222223,
      "grad_norm": 1.1420197486877441,
      "learning_rate": 7.015567086730911e-05,
      "loss": 1.0412,
      "step": 4389
    },
    {
      "epoch": 0.6503703703703704,
      "grad_norm": 1.4308433532714844,
      "learning_rate": 7.012601927353596e-05,
      "loss": 1.0494,
      "step": 4390
    },
    {
      "epoch": 0.6505185185185185,
      "grad_norm": 1.580278754234314,
      "learning_rate": 7.009636767976279e-05,
      "loss": 0.856,
      "step": 4391
    },
    {
      "epoch": 0.6506666666666666,
      "grad_norm": 3.086088180541992,
      "learning_rate": 7.006671608598962e-05,
      "loss": 1.1996,
      "step": 4392
    },
    {
      "epoch": 0.6508148148148148,
      "grad_norm": 1.5802485942840576,
      "learning_rate": 7.003706449221647e-05,
      "loss": 1.1041,
      "step": 4393
    },
    {
      "epoch": 0.650962962962963,
      "grad_norm": 1.2996973991394043,
      "learning_rate": 7.00074128984433e-05,
      "loss": 1.0198,
      "step": 4394
    },
    {
      "epoch": 0.6511111111111111,
      "grad_norm": 1.907769799232483,
      "learning_rate": 6.997776130467013e-05,
      "loss": 0.9092,
      "step": 4395
    },
    {
      "epoch": 0.6512592592592592,
      "grad_norm": 2.091052770614624,
      "learning_rate": 6.994810971089696e-05,
      "loss": 1.0609,
      "step": 4396
    },
    {
      "epoch": 0.6514074074074074,
      "grad_norm": 1.3710469007492065,
      "learning_rate": 6.99184581171238e-05,
      "loss": 1.237,
      "step": 4397
    },
    {
      "epoch": 0.6515555555555556,
      "grad_norm": 1.3312311172485352,
      "learning_rate": 6.988880652335063e-05,
      "loss": 0.9948,
      "step": 4398
    },
    {
      "epoch": 0.6517037037037037,
      "grad_norm": 1.4814784526824951,
      "learning_rate": 6.985915492957746e-05,
      "loss": 1.0974,
      "step": 4399
    },
    {
      "epoch": 0.6518518518518519,
      "grad_norm": 1.5356966257095337,
      "learning_rate": 6.982950333580431e-05,
      "loss": 1.1478,
      "step": 4400
    },
    {
      "epoch": 0.652,
      "grad_norm": 6.357550621032715,
      "learning_rate": 6.979985174203114e-05,
      "loss": 1.1715,
      "step": 4401
    },
    {
      "epoch": 0.6521481481481481,
      "grad_norm": 1.4539903402328491,
      "learning_rate": 6.977020014825797e-05,
      "loss": 0.952,
      "step": 4402
    },
    {
      "epoch": 0.6522962962962963,
      "grad_norm": 1.4701181650161743,
      "learning_rate": 6.97405485544848e-05,
      "loss": 1.1111,
      "step": 4403
    },
    {
      "epoch": 0.6524444444444445,
      "grad_norm": 1.5456289052963257,
      "learning_rate": 6.971089696071165e-05,
      "loss": 0.8952,
      "step": 4404
    },
    {
      "epoch": 0.6525925925925926,
      "grad_norm": 1.3559447526931763,
      "learning_rate": 6.968124536693848e-05,
      "loss": 0.8807,
      "step": 4405
    },
    {
      "epoch": 0.6527407407407407,
      "grad_norm": 1.323038935661316,
      "learning_rate": 6.965159377316531e-05,
      "loss": 0.8733,
      "step": 4406
    },
    {
      "epoch": 0.6528888888888889,
      "grad_norm": 1.5306757688522339,
      "learning_rate": 6.962194217939214e-05,
      "loss": 1.1169,
      "step": 4407
    },
    {
      "epoch": 0.6530370370370371,
      "grad_norm": 2.25394344329834,
      "learning_rate": 6.959229058561898e-05,
      "loss": 0.9831,
      "step": 4408
    },
    {
      "epoch": 0.6531851851851852,
      "grad_norm": 1.4161977767944336,
      "learning_rate": 6.956263899184581e-05,
      "loss": 1.0598,
      "step": 4409
    },
    {
      "epoch": 0.6533333333333333,
      "grad_norm": 1.2248963117599487,
      "learning_rate": 6.953298739807265e-05,
      "loss": 0.9447,
      "step": 4410
    },
    {
      "epoch": 0.6534814814814814,
      "grad_norm": 2.0185866355895996,
      "learning_rate": 6.950333580429949e-05,
      "loss": 1.0465,
      "step": 4411
    },
    {
      "epoch": 0.6536296296296297,
      "grad_norm": 1.0591628551483154,
      "learning_rate": 6.947368421052632e-05,
      "loss": 1.4689,
      "step": 4412
    },
    {
      "epoch": 0.6537777777777778,
      "grad_norm": 1.6865726709365845,
      "learning_rate": 6.944403261675315e-05,
      "loss": 1.1569,
      "step": 4413
    },
    {
      "epoch": 0.6539259259259259,
      "grad_norm": 3.0567121505737305,
      "learning_rate": 6.941438102298e-05,
      "loss": 0.8923,
      "step": 4414
    },
    {
      "epoch": 0.654074074074074,
      "grad_norm": 1.4168965816497803,
      "learning_rate": 6.938472942920681e-05,
      "loss": 0.9829,
      "step": 4415
    },
    {
      "epoch": 0.6542222222222223,
      "grad_norm": 1.2122267484664917,
      "learning_rate": 6.935507783543366e-05,
      "loss": 0.7592,
      "step": 4416
    },
    {
      "epoch": 0.6543703703703704,
      "grad_norm": 1.550054907798767,
      "learning_rate": 6.93254262416605e-05,
      "loss": 1.2356,
      "step": 4417
    },
    {
      "epoch": 0.6545185185185185,
      "grad_norm": 1.6436558961868286,
      "learning_rate": 6.929577464788732e-05,
      "loss": 0.9523,
      "step": 4418
    },
    {
      "epoch": 0.6546666666666666,
      "grad_norm": 1.7433936595916748,
      "learning_rate": 6.926612305411416e-05,
      "loss": 1.1111,
      "step": 4419
    },
    {
      "epoch": 0.6548148148148148,
      "grad_norm": 1.400778889656067,
      "learning_rate": 6.9236471460341e-05,
      "loss": 1.0042,
      "step": 4420
    },
    {
      "epoch": 0.654962962962963,
      "grad_norm": 1.8073093891143799,
      "learning_rate": 6.920681986656783e-05,
      "loss": 1.0994,
      "step": 4421
    },
    {
      "epoch": 0.6551111111111111,
      "grad_norm": 3.2208495140075684,
      "learning_rate": 6.917716827279467e-05,
      "loss": 0.8091,
      "step": 4422
    },
    {
      "epoch": 0.6552592592592592,
      "grad_norm": 1.918202519416809,
      "learning_rate": 6.91475166790215e-05,
      "loss": 1.1474,
      "step": 4423
    },
    {
      "epoch": 0.6554074074074074,
      "grad_norm": 2.4938154220581055,
      "learning_rate": 6.911786508524833e-05,
      "loss": 1.0839,
      "step": 4424
    },
    {
      "epoch": 0.6555555555555556,
      "grad_norm": 1.207390308380127,
      "learning_rate": 6.908821349147518e-05,
      "loss": 0.9566,
      "step": 4425
    },
    {
      "epoch": 0.6557037037037037,
      "grad_norm": 1.9529876708984375,
      "learning_rate": 6.905856189770201e-05,
      "loss": 1.0909,
      "step": 4426
    },
    {
      "epoch": 0.6558518518518518,
      "grad_norm": 1.7125343084335327,
      "learning_rate": 6.902891030392884e-05,
      "loss": 1.1568,
      "step": 4427
    },
    {
      "epoch": 0.656,
      "grad_norm": 1.4207946062088013,
      "learning_rate": 6.899925871015568e-05,
      "loss": 1.1674,
      "step": 4428
    },
    {
      "epoch": 0.6561481481481481,
      "grad_norm": 1.2349700927734375,
      "learning_rate": 6.89696071163825e-05,
      "loss": 0.8894,
      "step": 4429
    },
    {
      "epoch": 0.6562962962962963,
      "grad_norm": 1.6173549890518188,
      "learning_rate": 6.893995552260934e-05,
      "loss": 0.9113,
      "step": 4430
    },
    {
      "epoch": 0.6564444444444445,
      "grad_norm": 1.610507845878601,
      "learning_rate": 6.891030392883618e-05,
      "loss": 1.1148,
      "step": 4431
    },
    {
      "epoch": 0.6565925925925926,
      "grad_norm": 1.5079268217086792,
      "learning_rate": 6.8880652335063e-05,
      "loss": 0.8821,
      "step": 4432
    },
    {
      "epoch": 0.6567407407407407,
      "grad_norm": 1.8289453983306885,
      "learning_rate": 6.885100074128985e-05,
      "loss": 1.2582,
      "step": 4433
    },
    {
      "epoch": 0.6568888888888889,
      "grad_norm": 2.1192467212677,
      "learning_rate": 6.882134914751668e-05,
      "loss": 1.1065,
      "step": 4434
    },
    {
      "epoch": 0.6570370370370371,
      "grad_norm": 1.39765465259552,
      "learning_rate": 6.879169755374351e-05,
      "loss": 0.8152,
      "step": 4435
    },
    {
      "epoch": 0.6571851851851852,
      "grad_norm": 2.078397274017334,
      "learning_rate": 6.876204595997036e-05,
      "loss": 1.059,
      "step": 4436
    },
    {
      "epoch": 0.6573333333333333,
      "grad_norm": 1.189782977104187,
      "learning_rate": 6.873239436619719e-05,
      "loss": 0.838,
      "step": 4437
    },
    {
      "epoch": 0.6574814814814814,
      "grad_norm": 1.6565886735916138,
      "learning_rate": 6.870274277242402e-05,
      "loss": 1.0258,
      "step": 4438
    },
    {
      "epoch": 0.6576296296296297,
      "grad_norm": 1.6193196773529053,
      "learning_rate": 6.867309117865086e-05,
      "loss": 0.9524,
      "step": 4439
    },
    {
      "epoch": 0.6577777777777778,
      "grad_norm": 2.2543153762817383,
      "learning_rate": 6.86434395848777e-05,
      "loss": 1.1232,
      "step": 4440
    },
    {
      "epoch": 0.6579259259259259,
      "grad_norm": 2.2773020267486572,
      "learning_rate": 6.861378799110453e-05,
      "loss": 1.2689,
      "step": 4441
    },
    {
      "epoch": 0.658074074074074,
      "grad_norm": 1.8485618829727173,
      "learning_rate": 6.858413639733136e-05,
      "loss": 1.0171,
      "step": 4442
    },
    {
      "epoch": 0.6582222222222223,
      "grad_norm": 1.9104846715927124,
      "learning_rate": 6.85544848035582e-05,
      "loss": 1.1316,
      "step": 4443
    },
    {
      "epoch": 0.6583703703703704,
      "grad_norm": 1.614223599433899,
      "learning_rate": 6.852483320978503e-05,
      "loss": 0.9565,
      "step": 4444
    },
    {
      "epoch": 0.6585185185185185,
      "grad_norm": 1.0929920673370361,
      "learning_rate": 6.849518161601186e-05,
      "loss": 1.3088,
      "step": 4445
    },
    {
      "epoch": 0.6586666666666666,
      "grad_norm": 1.3854314088821411,
      "learning_rate": 6.84655300222387e-05,
      "loss": 1.0783,
      "step": 4446
    },
    {
      "epoch": 0.6588148148148149,
      "grad_norm": 1.7415268421173096,
      "learning_rate": 6.843587842846554e-05,
      "loss": 1.0249,
      "step": 4447
    },
    {
      "epoch": 0.658962962962963,
      "grad_norm": 1.6687849760055542,
      "learning_rate": 6.840622683469237e-05,
      "loss": 1.0307,
      "step": 4448
    },
    {
      "epoch": 0.6591111111111111,
      "grad_norm": 1.1237823963165283,
      "learning_rate": 6.83765752409192e-05,
      "loss": 0.8642,
      "step": 4449
    },
    {
      "epoch": 0.6592592592592592,
      "grad_norm": 1.1712908744812012,
      "learning_rate": 6.834692364714603e-05,
      "loss": 0.8663,
      "step": 4450
    },
    {
      "epoch": 0.6594074074074074,
      "grad_norm": 2.365463972091675,
      "learning_rate": 6.831727205337287e-05,
      "loss": 1.0883,
      "step": 4451
    },
    {
      "epoch": 0.6595555555555556,
      "grad_norm": 1.5662684440612793,
      "learning_rate": 6.82876204595997e-05,
      "loss": 1.2927,
      "step": 4452
    },
    {
      "epoch": 0.6597037037037037,
      "grad_norm": 1.570806622505188,
      "learning_rate": 6.825796886582654e-05,
      "loss": 1.1432,
      "step": 4453
    },
    {
      "epoch": 0.6598518518518518,
      "grad_norm": 1.8160992860794067,
      "learning_rate": 6.822831727205338e-05,
      "loss": 1.1425,
      "step": 4454
    },
    {
      "epoch": 0.66,
      "grad_norm": 7.27062463760376,
      "learning_rate": 6.819866567828021e-05,
      "loss": 1.063,
      "step": 4455
    },
    {
      "epoch": 0.6601481481481482,
      "grad_norm": 1.5494304895401,
      "learning_rate": 6.816901408450704e-05,
      "loss": 1.1038,
      "step": 4456
    },
    {
      "epoch": 0.6602962962962963,
      "grad_norm": 1.6469160318374634,
      "learning_rate": 6.813936249073389e-05,
      "loss": 1.0867,
      "step": 4457
    },
    {
      "epoch": 0.6604444444444444,
      "grad_norm": 1.5185201168060303,
      "learning_rate": 6.81097108969607e-05,
      "loss": 0.8585,
      "step": 4458
    },
    {
      "epoch": 0.6605925925925926,
      "grad_norm": 1.7621744871139526,
      "learning_rate": 6.808005930318755e-05,
      "loss": 0.9402,
      "step": 4459
    },
    {
      "epoch": 0.6607407407407407,
      "grad_norm": 1.990752935409546,
      "learning_rate": 6.80504077094144e-05,
      "loss": 1.2131,
      "step": 4460
    },
    {
      "epoch": 0.6608888888888889,
      "grad_norm": 4.080032825469971,
      "learning_rate": 6.802075611564121e-05,
      "loss": 1.1293,
      "step": 4461
    },
    {
      "epoch": 0.6610370370370371,
      "grad_norm": 1.4148558378219604,
      "learning_rate": 6.799110452186806e-05,
      "loss": 1.1308,
      "step": 4462
    },
    {
      "epoch": 0.6611851851851852,
      "grad_norm": 3.500199317932129,
      "learning_rate": 6.796145292809489e-05,
      "loss": 1.2161,
      "step": 4463
    },
    {
      "epoch": 0.6613333333333333,
      "grad_norm": 1.847669243812561,
      "learning_rate": 6.793180133432172e-05,
      "loss": 1.1194,
      "step": 4464
    },
    {
      "epoch": 0.6614814814814814,
      "grad_norm": 1.549932599067688,
      "learning_rate": 6.790214974054856e-05,
      "loss": 1.097,
      "step": 4465
    },
    {
      "epoch": 0.6616296296296297,
      "grad_norm": 1.6146255731582642,
      "learning_rate": 6.787249814677539e-05,
      "loss": 1.1995,
      "step": 4466
    },
    {
      "epoch": 0.6617777777777778,
      "grad_norm": 1.697512149810791,
      "learning_rate": 6.784284655300222e-05,
      "loss": 1.2261,
      "step": 4467
    },
    {
      "epoch": 0.6619259259259259,
      "grad_norm": 2.989250421524048,
      "learning_rate": 6.781319495922907e-05,
      "loss": 0.976,
      "step": 4468
    },
    {
      "epoch": 0.662074074074074,
      "grad_norm": 2.1078250408172607,
      "learning_rate": 6.77835433654559e-05,
      "loss": 0.9773,
      "step": 4469
    },
    {
      "epoch": 0.6622222222222223,
      "grad_norm": 1.2377699613571167,
      "learning_rate": 6.775389177168273e-05,
      "loss": 0.9623,
      "step": 4470
    },
    {
      "epoch": 0.6623703703703704,
      "grad_norm": 2.2702996730804443,
      "learning_rate": 6.772424017790957e-05,
      "loss": 1.196,
      "step": 4471
    },
    {
      "epoch": 0.6625185185185185,
      "grad_norm": 2.383726119995117,
      "learning_rate": 6.769458858413639e-05,
      "loss": 1.0343,
      "step": 4472
    },
    {
      "epoch": 0.6626666666666666,
      "grad_norm": 1.5570571422576904,
      "learning_rate": 6.766493699036324e-05,
      "loss": 1.1076,
      "step": 4473
    },
    {
      "epoch": 0.6628148148148149,
      "grad_norm": 1.4325950145721436,
      "learning_rate": 6.763528539659007e-05,
      "loss": 1.2037,
      "step": 4474
    },
    {
      "epoch": 0.662962962962963,
      "grad_norm": 1.4265539646148682,
      "learning_rate": 6.76056338028169e-05,
      "loss": 1.0731,
      "step": 4475
    },
    {
      "epoch": 0.6631111111111111,
      "grad_norm": 1.4804054498672485,
      "learning_rate": 6.757598220904374e-05,
      "loss": 1.049,
      "step": 4476
    },
    {
      "epoch": 0.6632592592592592,
      "grad_norm": 1.302612543106079,
      "learning_rate": 6.754633061527057e-05,
      "loss": 0.8392,
      "step": 4477
    },
    {
      "epoch": 0.6634074074074074,
      "grad_norm": 2.4564685821533203,
      "learning_rate": 6.75166790214974e-05,
      "loss": 1.0384,
      "step": 4478
    },
    {
      "epoch": 0.6635555555555556,
      "grad_norm": 1.816359281539917,
      "learning_rate": 6.748702742772425e-05,
      "loss": 1.0906,
      "step": 4479
    },
    {
      "epoch": 0.6637037037037037,
      "grad_norm": 1.585599660873413,
      "learning_rate": 6.745737583395108e-05,
      "loss": 0.9097,
      "step": 4480
    },
    {
      "epoch": 0.6638518518518518,
      "grad_norm": 1.2226183414459229,
      "learning_rate": 6.742772424017791e-05,
      "loss": 0.8926,
      "step": 4481
    },
    {
      "epoch": 0.664,
      "grad_norm": 1.1785897016525269,
      "learning_rate": 6.739807264640475e-05,
      "loss": 0.8586,
      "step": 4482
    },
    {
      "epoch": 0.6641481481481482,
      "grad_norm": 1.2020103931427002,
      "learning_rate": 6.736842105263159e-05,
      "loss": 0.8763,
      "step": 4483
    },
    {
      "epoch": 0.6642962962962963,
      "grad_norm": 1.3404290676116943,
      "learning_rate": 6.733876945885842e-05,
      "loss": 1.0221,
      "step": 4484
    },
    {
      "epoch": 0.6644444444444444,
      "grad_norm": 1.8340681791305542,
      "learning_rate": 6.730911786508525e-05,
      "loss": 1.1126,
      "step": 4485
    },
    {
      "epoch": 0.6645925925925926,
      "grad_norm": 1.6338748931884766,
      "learning_rate": 6.727946627131209e-05,
      "loss": 1.1481,
      "step": 4486
    },
    {
      "epoch": 0.6647407407407407,
      "grad_norm": 1.3059214353561401,
      "learning_rate": 6.724981467753892e-05,
      "loss": 0.8895,
      "step": 4487
    },
    {
      "epoch": 0.6648888888888889,
      "grad_norm": 4.59125280380249,
      "learning_rate": 6.722016308376575e-05,
      "loss": 1.0238,
      "step": 4488
    },
    {
      "epoch": 0.665037037037037,
      "grad_norm": 2.042271614074707,
      "learning_rate": 6.719051148999258e-05,
      "loss": 1.2073,
      "step": 4489
    },
    {
      "epoch": 0.6651851851851852,
      "grad_norm": 1.6275269985198975,
      "learning_rate": 6.716085989621943e-05,
      "loss": 0.9926,
      "step": 4490
    },
    {
      "epoch": 0.6653333333333333,
      "grad_norm": 3.977426290512085,
      "learning_rate": 6.713120830244626e-05,
      "loss": 1.2023,
      "step": 4491
    },
    {
      "epoch": 0.6654814814814815,
      "grad_norm": 1.3853273391723633,
      "learning_rate": 6.710155670867309e-05,
      "loss": 0.9428,
      "step": 4492
    },
    {
      "epoch": 0.6656296296296297,
      "grad_norm": 6.533312797546387,
      "learning_rate": 6.707190511489992e-05,
      "loss": 0.8783,
      "step": 4493
    },
    {
      "epoch": 0.6657777777777778,
      "grad_norm": 1.368784785270691,
      "learning_rate": 6.704225352112677e-05,
      "loss": 0.8937,
      "step": 4494
    },
    {
      "epoch": 0.6659259259259259,
      "grad_norm": 1.5556150674819946,
      "learning_rate": 6.70126019273536e-05,
      "loss": 1.0887,
      "step": 4495
    },
    {
      "epoch": 0.666074074074074,
      "grad_norm": 1.5616638660430908,
      "learning_rate": 6.698295033358043e-05,
      "loss": 0.9505,
      "step": 4496
    },
    {
      "epoch": 0.6662222222222223,
      "grad_norm": 1.4048346281051636,
      "learning_rate": 6.695329873980727e-05,
      "loss": 1.1939,
      "step": 4497
    },
    {
      "epoch": 0.6663703703703704,
      "grad_norm": 2.11753511428833,
      "learning_rate": 6.69236471460341e-05,
      "loss": 1.0441,
      "step": 4498
    },
    {
      "epoch": 0.6665185185185185,
      "grad_norm": 1.3496278524398804,
      "learning_rate": 6.689399555226093e-05,
      "loss": 1.0327,
      "step": 4499
    },
    {
      "epoch": 0.6666666666666666,
      "grad_norm": 1.2425817251205444,
      "learning_rate": 6.686434395848778e-05,
      "loss": 0.8351,
      "step": 4500
    },
    {
      "epoch": 0.6668148148148149,
      "grad_norm": 1.1833040714263916,
      "learning_rate": 6.68346923647146e-05,
      "loss": 1.1542,
      "step": 4501
    },
    {
      "epoch": 0.666962962962963,
      "grad_norm": 3.7058610916137695,
      "learning_rate": 6.680504077094144e-05,
      "loss": 0.8921,
      "step": 4502
    },
    {
      "epoch": 0.6671111111111111,
      "grad_norm": 2.4060218334198,
      "learning_rate": 6.677538917716829e-05,
      "loss": 1.2563,
      "step": 4503
    },
    {
      "epoch": 0.6672592592592592,
      "grad_norm": 1.6111152172088623,
      "learning_rate": 6.67457375833951e-05,
      "loss": 0.9096,
      "step": 4504
    },
    {
      "epoch": 0.6674074074074074,
      "grad_norm": 1.828894853591919,
      "learning_rate": 6.671608598962195e-05,
      "loss": 0.9255,
      "step": 4505
    },
    {
      "epoch": 0.6675555555555556,
      "grad_norm": 1.4932504892349243,
      "learning_rate": 6.668643439584878e-05,
      "loss": 1.1503,
      "step": 4506
    },
    {
      "epoch": 0.6677037037037037,
      "grad_norm": 2.4620230197906494,
      "learning_rate": 6.665678280207561e-05,
      "loss": 1.3892,
      "step": 4507
    },
    {
      "epoch": 0.6678518518518518,
      "grad_norm": 1.4247181415557861,
      "learning_rate": 6.662713120830245e-05,
      "loss": 1.1199,
      "step": 4508
    },
    {
      "epoch": 0.668,
      "grad_norm": 1.8170108795166016,
      "learning_rate": 6.659747961452928e-05,
      "loss": 0.9776,
      "step": 4509
    },
    {
      "epoch": 0.6681481481481482,
      "grad_norm": 1.342509388923645,
      "learning_rate": 6.656782802075612e-05,
      "loss": 1.0256,
      "step": 4510
    },
    {
      "epoch": 0.6682962962962963,
      "grad_norm": 1.2939727306365967,
      "learning_rate": 6.653817642698296e-05,
      "loss": 1.1221,
      "step": 4511
    },
    {
      "epoch": 0.6684444444444444,
      "grad_norm": 2.5898356437683105,
      "learning_rate": 6.650852483320979e-05,
      "loss": 0.9475,
      "step": 4512
    },
    {
      "epoch": 0.6685925925925926,
      "grad_norm": 2.077467203140259,
      "learning_rate": 6.647887323943662e-05,
      "loss": 0.9947,
      "step": 4513
    },
    {
      "epoch": 0.6687407407407407,
      "grad_norm": 2.0203447341918945,
      "learning_rate": 6.644922164566347e-05,
      "loss": 1.1886,
      "step": 4514
    },
    {
      "epoch": 0.6688888888888889,
      "grad_norm": 1.8688184022903442,
      "learning_rate": 6.641957005189028e-05,
      "loss": 1.0013,
      "step": 4515
    },
    {
      "epoch": 0.669037037037037,
      "grad_norm": 2.0238397121429443,
      "learning_rate": 6.638991845811713e-05,
      "loss": 1.2235,
      "step": 4516
    },
    {
      "epoch": 0.6691851851851852,
      "grad_norm": 1.3287385702133179,
      "learning_rate": 6.636026686434396e-05,
      "loss": 0.8432,
      "step": 4517
    },
    {
      "epoch": 0.6693333333333333,
      "grad_norm": 1.7164368629455566,
      "learning_rate": 6.633061527057079e-05,
      "loss": 1.0692,
      "step": 4518
    },
    {
      "epoch": 0.6694814814814815,
      "grad_norm": 1.6082435846328735,
      "learning_rate": 6.630096367679763e-05,
      "loss": 1.1004,
      "step": 4519
    },
    {
      "epoch": 0.6696296296296296,
      "grad_norm": 2.6166815757751465,
      "learning_rate": 6.627131208302446e-05,
      "loss": 0.9445,
      "step": 4520
    },
    {
      "epoch": 0.6697777777777778,
      "grad_norm": 1.7008702754974365,
      "learning_rate": 6.62416604892513e-05,
      "loss": 0.8787,
      "step": 4521
    },
    {
      "epoch": 0.6699259259259259,
      "grad_norm": 1.8740018606185913,
      "learning_rate": 6.621200889547814e-05,
      "loss": 0.8673,
      "step": 4522
    },
    {
      "epoch": 0.670074074074074,
      "grad_norm": 2.3854541778564453,
      "learning_rate": 6.618235730170497e-05,
      "loss": 1.1032,
      "step": 4523
    },
    {
      "epoch": 0.6702222222222223,
      "grad_norm": 1.7728660106658936,
      "learning_rate": 6.61527057079318e-05,
      "loss": 0.9903,
      "step": 4524
    },
    {
      "epoch": 0.6703703703703704,
      "grad_norm": 1.9628788232803345,
      "learning_rate": 6.612305411415865e-05,
      "loss": 1.004,
      "step": 4525
    },
    {
      "epoch": 0.6705185185185185,
      "grad_norm": 1.3167074918746948,
      "learning_rate": 6.609340252038548e-05,
      "loss": 1.1517,
      "step": 4526
    },
    {
      "epoch": 0.6706666666666666,
      "grad_norm": 1.9610397815704346,
      "learning_rate": 6.606375092661231e-05,
      "loss": 0.9651,
      "step": 4527
    },
    {
      "epoch": 0.6708148148148149,
      "grad_norm": 1.2325596809387207,
      "learning_rate": 6.603409933283914e-05,
      "loss": 0.8683,
      "step": 4528
    },
    {
      "epoch": 0.670962962962963,
      "grad_norm": 1.4824199676513672,
      "learning_rate": 6.600444773906598e-05,
      "loss": 0.9708,
      "step": 4529
    },
    {
      "epoch": 0.6711111111111111,
      "grad_norm": 1.3262386322021484,
      "learning_rate": 6.597479614529281e-05,
      "loss": 1.1023,
      "step": 4530
    },
    {
      "epoch": 0.6712592592592592,
      "grad_norm": 1.7843201160430908,
      "learning_rate": 6.594514455151965e-05,
      "loss": 0.9397,
      "step": 4531
    },
    {
      "epoch": 0.6714074074074075,
      "grad_norm": 1.5266560316085815,
      "learning_rate": 6.591549295774648e-05,
      "loss": 1.059,
      "step": 4532
    },
    {
      "epoch": 0.6715555555555556,
      "grad_norm": 1.3263297080993652,
      "learning_rate": 6.588584136397332e-05,
      "loss": 0.9717,
      "step": 4533
    },
    {
      "epoch": 0.6717037037037037,
      "grad_norm": 1.3532426357269287,
      "learning_rate": 6.585618977020015e-05,
      "loss": 0.9635,
      "step": 4534
    },
    {
      "epoch": 0.6718518518518518,
      "grad_norm": 1.5721492767333984,
      "learning_rate": 6.582653817642698e-05,
      "loss": 0.9163,
      "step": 4535
    },
    {
      "epoch": 0.672,
      "grad_norm": 3.239018678665161,
      "learning_rate": 6.579688658265381e-05,
      "loss": 0.939,
      "step": 4536
    },
    {
      "epoch": 0.6721481481481482,
      "grad_norm": 2.1135377883911133,
      "learning_rate": 6.576723498888066e-05,
      "loss": 1.1707,
      "step": 4537
    },
    {
      "epoch": 0.6722962962962963,
      "grad_norm": 1.684714436531067,
      "learning_rate": 6.573758339510749e-05,
      "loss": 0.9886,
      "step": 4538
    },
    {
      "epoch": 0.6724444444444444,
      "grad_norm": 1.624194860458374,
      "learning_rate": 6.570793180133432e-05,
      "loss": 0.9391,
      "step": 4539
    },
    {
      "epoch": 0.6725925925925926,
      "grad_norm": 1.5050755739212036,
      "learning_rate": 6.567828020756116e-05,
      "loss": 0.9477,
      "step": 4540
    },
    {
      "epoch": 0.6727407407407408,
      "grad_norm": 2.0678727626800537,
      "learning_rate": 6.5648628613788e-05,
      "loss": 0.9755,
      "step": 4541
    },
    {
      "epoch": 0.6728888888888889,
      "grad_norm": 5.3233561515808105,
      "learning_rate": 6.561897702001483e-05,
      "loss": 1.2563,
      "step": 4542
    },
    {
      "epoch": 0.673037037037037,
      "grad_norm": 1.804870843887329,
      "learning_rate": 6.558932542624167e-05,
      "loss": 1.1061,
      "step": 4543
    },
    {
      "epoch": 0.6731851851851852,
      "grad_norm": 1.8689392805099487,
      "learning_rate": 6.555967383246849e-05,
      "loss": 1.0492,
      "step": 4544
    },
    {
      "epoch": 0.6733333333333333,
      "grad_norm": 2.034116268157959,
      "learning_rate": 6.553002223869533e-05,
      "loss": 1.0905,
      "step": 4545
    },
    {
      "epoch": 0.6734814814814815,
      "grad_norm": 2.48861026763916,
      "learning_rate": 6.550037064492218e-05,
      "loss": 1.1014,
      "step": 4546
    },
    {
      "epoch": 0.6736296296296296,
      "grad_norm": 1.8998236656188965,
      "learning_rate": 6.5470719051149e-05,
      "loss": 1.1355,
      "step": 4547
    },
    {
      "epoch": 0.6737777777777778,
      "grad_norm": 3.2076668739318848,
      "learning_rate": 6.544106745737584e-05,
      "loss": 0.9577,
      "step": 4548
    },
    {
      "epoch": 0.6739259259259259,
      "grad_norm": 2.001138925552368,
      "learning_rate": 6.541141586360267e-05,
      "loss": 0.988,
      "step": 4549
    },
    {
      "epoch": 0.674074074074074,
      "grad_norm": 1.9601798057556152,
      "learning_rate": 6.53817642698295e-05,
      "loss": 1.1651,
      "step": 4550
    },
    {
      "epoch": 0.6742222222222222,
      "grad_norm": 2.0755982398986816,
      "learning_rate": 6.535211267605635e-05,
      "loss": 0.9835,
      "step": 4551
    },
    {
      "epoch": 0.6743703703703704,
      "grad_norm": 2.2885663509368896,
      "learning_rate": 6.532246108228318e-05,
      "loss": 0.9954,
      "step": 4552
    },
    {
      "epoch": 0.6745185185185185,
      "grad_norm": 2.1896133422851562,
      "learning_rate": 6.529280948851001e-05,
      "loss": 1.0637,
      "step": 4553
    },
    {
      "epoch": 0.6746666666666666,
      "grad_norm": 1.3759965896606445,
      "learning_rate": 6.526315789473685e-05,
      "loss": 1.0231,
      "step": 4554
    },
    {
      "epoch": 0.6748148148148149,
      "grad_norm": 3.140455484390259,
      "learning_rate": 6.523350630096368e-05,
      "loss": 1.2912,
      "step": 4555
    },
    {
      "epoch": 0.674962962962963,
      "grad_norm": 1.1157242059707642,
      "learning_rate": 6.520385470719051e-05,
      "loss": 0.8042,
      "step": 4556
    },
    {
      "epoch": 0.6751111111111111,
      "grad_norm": 1.8724523782730103,
      "learning_rate": 6.517420311341736e-05,
      "loss": 1.0487,
      "step": 4557
    },
    {
      "epoch": 0.6752592592592592,
      "grad_norm": 1.4933273792266846,
      "learning_rate": 6.514455151964417e-05,
      "loss": 1.0804,
      "step": 4558
    },
    {
      "epoch": 0.6754074074074075,
      "grad_norm": 1.7172576189041138,
      "learning_rate": 6.511489992587102e-05,
      "loss": 1.4236,
      "step": 4559
    },
    {
      "epoch": 0.6755555555555556,
      "grad_norm": 2.238006591796875,
      "learning_rate": 6.508524833209785e-05,
      "loss": 1.1543,
      "step": 4560
    },
    {
      "epoch": 0.6757037037037037,
      "grad_norm": 2.3506276607513428,
      "learning_rate": 6.505559673832468e-05,
      "loss": 1.0662,
      "step": 4561
    },
    {
      "epoch": 0.6758518518518518,
      "grad_norm": 4.046957015991211,
      "learning_rate": 6.502594514455153e-05,
      "loss": 1.2876,
      "step": 4562
    },
    {
      "epoch": 0.676,
      "grad_norm": 1.5363638401031494,
      "learning_rate": 6.499629355077836e-05,
      "loss": 0.8477,
      "step": 4563
    },
    {
      "epoch": 0.6761481481481482,
      "grad_norm": 1.6767934560775757,
      "learning_rate": 6.496664195700519e-05,
      "loss": 0.9757,
      "step": 4564
    },
    {
      "epoch": 0.6762962962962963,
      "grad_norm": 1.6577056646347046,
      "learning_rate": 6.493699036323203e-05,
      "loss": 1.0816,
      "step": 4565
    },
    {
      "epoch": 0.6764444444444444,
      "grad_norm": 1.40839684009552,
      "learning_rate": 6.490733876945886e-05,
      "loss": 1.245,
      "step": 4566
    },
    {
      "epoch": 0.6765925925925926,
      "grad_norm": 1.8399194478988647,
      "learning_rate": 6.48776871756857e-05,
      "loss": 1.1084,
      "step": 4567
    },
    {
      "epoch": 0.6767407407407408,
      "grad_norm": 1.5465725660324097,
      "learning_rate": 6.484803558191254e-05,
      "loss": 1.1317,
      "step": 4568
    },
    {
      "epoch": 0.6768888888888889,
      "grad_norm": 1.1534277200698853,
      "learning_rate": 6.481838398813937e-05,
      "loss": 0.9297,
      "step": 4569
    },
    {
      "epoch": 0.677037037037037,
      "grad_norm": 1.9321751594543457,
      "learning_rate": 6.47887323943662e-05,
      "loss": 1.0722,
      "step": 4570
    },
    {
      "epoch": 0.6771851851851852,
      "grad_norm": 2.9229111671447754,
      "learning_rate": 6.475908080059303e-05,
      "loss": 1.0186,
      "step": 4571
    },
    {
      "epoch": 0.6773333333333333,
      "grad_norm": 1.6638622283935547,
      "learning_rate": 6.472942920681988e-05,
      "loss": 1.314,
      "step": 4572
    },
    {
      "epoch": 0.6774814814814815,
      "grad_norm": 1.4501103162765503,
      "learning_rate": 6.46997776130467e-05,
      "loss": 1.2855,
      "step": 4573
    },
    {
      "epoch": 0.6776296296296296,
      "grad_norm": 1.2768062353134155,
      "learning_rate": 6.467012601927354e-05,
      "loss": 0.8762,
      "step": 4574
    },
    {
      "epoch": 0.6777777777777778,
      "grad_norm": 5.492815017700195,
      "learning_rate": 6.464047442550037e-05,
      "loss": 1.169,
      "step": 4575
    },
    {
      "epoch": 0.6779259259259259,
      "grad_norm": 1.2650996446609497,
      "learning_rate": 6.461082283172721e-05,
      "loss": 1.0619,
      "step": 4576
    },
    {
      "epoch": 0.678074074074074,
      "grad_norm": 1.293104887008667,
      "learning_rate": 6.458117123795404e-05,
      "loss": 0.9211,
      "step": 4577
    },
    {
      "epoch": 0.6782222222222222,
      "grad_norm": 1.4094767570495605,
      "learning_rate": 6.455151964418087e-05,
      "loss": 1.0138,
      "step": 4578
    },
    {
      "epoch": 0.6783703703703704,
      "grad_norm": 1.4086834192276,
      "learning_rate": 6.45218680504077e-05,
      "loss": 0.9488,
      "step": 4579
    },
    {
      "epoch": 0.6785185185185185,
      "grad_norm": 1.2480393648147583,
      "learning_rate": 6.449221645663455e-05,
      "loss": 1.0364,
      "step": 4580
    },
    {
      "epoch": 0.6786666666666666,
      "grad_norm": 2.318099021911621,
      "learning_rate": 6.446256486286138e-05,
      "loss": 0.8435,
      "step": 4581
    },
    {
      "epoch": 0.6788148148148148,
      "grad_norm": 1.66366708278656,
      "learning_rate": 6.443291326908821e-05,
      "loss": 0.9257,
      "step": 4582
    },
    {
      "epoch": 0.678962962962963,
      "grad_norm": 1.1964306831359863,
      "learning_rate": 6.440326167531506e-05,
      "loss": 1.1364,
      "step": 4583
    },
    {
      "epoch": 0.6791111111111111,
      "grad_norm": 2.636220932006836,
      "learning_rate": 6.437361008154189e-05,
      "loss": 1.0545,
      "step": 4584
    },
    {
      "epoch": 0.6792592592592592,
      "grad_norm": 1.8049631118774414,
      "learning_rate": 6.434395848776872e-05,
      "loss": 0.8934,
      "step": 4585
    },
    {
      "epoch": 0.6794074074074075,
      "grad_norm": 2.333420753479004,
      "learning_rate": 6.431430689399556e-05,
      "loss": 0.8483,
      "step": 4586
    },
    {
      "epoch": 0.6795555555555556,
      "grad_norm": 1.6943825483322144,
      "learning_rate": 6.428465530022238e-05,
      "loss": 1.3099,
      "step": 4587
    },
    {
      "epoch": 0.6797037037037037,
      "grad_norm": 1.1508939266204834,
      "learning_rate": 6.425500370644922e-05,
      "loss": 0.7506,
      "step": 4588
    },
    {
      "epoch": 0.6798518518518518,
      "grad_norm": 1.2238248586654663,
      "learning_rate": 6.422535211267607e-05,
      "loss": 1.012,
      "step": 4589
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4984291791915894,
      "learning_rate": 6.419570051890289e-05,
      "loss": 1.0019,
      "step": 4590
    },
    {
      "epoch": 0.6801481481481482,
      "grad_norm": 1.367092251777649,
      "learning_rate": 6.416604892512973e-05,
      "loss": 0.9964,
      "step": 4591
    },
    {
      "epoch": 0.6802962962962963,
      "grad_norm": 1.9505276679992676,
      "learning_rate": 6.413639733135656e-05,
      "loss": 0.8053,
      "step": 4592
    },
    {
      "epoch": 0.6804444444444444,
      "grad_norm": 1.4009976387023926,
      "learning_rate": 6.410674573758339e-05,
      "loss": 0.7596,
      "step": 4593
    },
    {
      "epoch": 0.6805925925925926,
      "grad_norm": 2.9519119262695312,
      "learning_rate": 6.407709414381024e-05,
      "loss": 1.1529,
      "step": 4594
    },
    {
      "epoch": 0.6807407407407408,
      "grad_norm": 1.8711851835250854,
      "learning_rate": 6.404744255003707e-05,
      "loss": 1.017,
      "step": 4595
    },
    {
      "epoch": 0.6808888888888889,
      "grad_norm": 1.149286150932312,
      "learning_rate": 6.40177909562639e-05,
      "loss": 0.9867,
      "step": 4596
    },
    {
      "epoch": 0.681037037037037,
      "grad_norm": 1.6351492404937744,
      "learning_rate": 6.398813936249074e-05,
      "loss": 1.0733,
      "step": 4597
    },
    {
      "epoch": 0.6811851851851852,
      "grad_norm": 3.9957165718078613,
      "learning_rate": 6.395848776871757e-05,
      "loss": 1.2609,
      "step": 4598
    },
    {
      "epoch": 0.6813333333333333,
      "grad_norm": 3.0349299907684326,
      "learning_rate": 6.39288361749444e-05,
      "loss": 1.0371,
      "step": 4599
    },
    {
      "epoch": 0.6814814814814815,
      "grad_norm": 1.1951899528503418,
      "learning_rate": 6.389918458117125e-05,
      "loss": 0.8061,
      "step": 4600
    },
    {
      "epoch": 0.6816296296296296,
      "grad_norm": 2.5305910110473633,
      "learning_rate": 6.386953298739807e-05,
      "loss": 1.0257,
      "step": 4601
    },
    {
      "epoch": 0.6817777777777778,
      "grad_norm": 1.5263696908950806,
      "learning_rate": 6.383988139362491e-05,
      "loss": 0.9928,
      "step": 4602
    },
    {
      "epoch": 0.6819259259259259,
      "grad_norm": 1.5052536725997925,
      "learning_rate": 6.381022979985174e-05,
      "loss": 1.0276,
      "step": 4603
    },
    {
      "epoch": 0.682074074074074,
      "grad_norm": 2.024991989135742,
      "learning_rate": 6.378057820607857e-05,
      "loss": 0.8347,
      "step": 4604
    },
    {
      "epoch": 0.6822222222222222,
      "grad_norm": 1.2899211645126343,
      "learning_rate": 6.375092661230542e-05,
      "loss": 0.9684,
      "step": 4605
    },
    {
      "epoch": 0.6823703703703704,
      "grad_norm": 1.3688863515853882,
      "learning_rate": 6.372127501853225e-05,
      "loss": 1.0025,
      "step": 4606
    },
    {
      "epoch": 0.6825185185185185,
      "grad_norm": 1.3318511247634888,
      "learning_rate": 6.369162342475908e-05,
      "loss": 0.8014,
      "step": 4607
    },
    {
      "epoch": 0.6826666666666666,
      "grad_norm": 1.8962894678115845,
      "learning_rate": 6.366197183098592e-05,
      "loss": 1.1632,
      "step": 4608
    },
    {
      "epoch": 0.6828148148148148,
      "grad_norm": 1.9377597570419312,
      "learning_rate": 6.363232023721275e-05,
      "loss": 0.9583,
      "step": 4609
    },
    {
      "epoch": 0.682962962962963,
      "grad_norm": 1.8377279043197632,
      "learning_rate": 6.360266864343959e-05,
      "loss": 0.8932,
      "step": 4610
    },
    {
      "epoch": 0.6831111111111111,
      "grad_norm": 1.5697270631790161,
      "learning_rate": 6.357301704966643e-05,
      "loss": 1.1786,
      "step": 4611
    },
    {
      "epoch": 0.6832592592592592,
      "grad_norm": 1.098049283027649,
      "learning_rate": 6.354336545589326e-05,
      "loss": 1.4777,
      "step": 4612
    },
    {
      "epoch": 0.6834074074074074,
      "grad_norm": 2.8057503700256348,
      "learning_rate": 6.351371386212009e-05,
      "loss": 1.1235,
      "step": 4613
    },
    {
      "epoch": 0.6835555555555556,
      "grad_norm": 1.5012096166610718,
      "learning_rate": 6.348406226834692e-05,
      "loss": 1.0603,
      "step": 4614
    },
    {
      "epoch": 0.6837037037037037,
      "grad_norm": 1.646206021308899,
      "learning_rate": 6.345441067457377e-05,
      "loss": 0.7903,
      "step": 4615
    },
    {
      "epoch": 0.6838518518518518,
      "grad_norm": 6.089558124542236,
      "learning_rate": 6.34247590808006e-05,
      "loss": 0.7942,
      "step": 4616
    },
    {
      "epoch": 0.684,
      "grad_norm": 2.203416585922241,
      "learning_rate": 6.339510748702743e-05,
      "loss": 1.0105,
      "step": 4617
    },
    {
      "epoch": 0.6841481481481482,
      "grad_norm": 1.3859854936599731,
      "learning_rate": 6.336545589325426e-05,
      "loss": 0.9016,
      "step": 4618
    },
    {
      "epoch": 0.6842962962962963,
      "grad_norm": 1.4855002164840698,
      "learning_rate": 6.33358042994811e-05,
      "loss": 0.7378,
      "step": 4619
    },
    {
      "epoch": 0.6844444444444444,
      "grad_norm": 1.8707270622253418,
      "learning_rate": 6.330615270570794e-05,
      "loss": 1.0435,
      "step": 4620
    },
    {
      "epoch": 0.6845925925925926,
      "grad_norm": 1.3705289363861084,
      "learning_rate": 6.327650111193477e-05,
      "loss": 1.2175,
      "step": 4621
    },
    {
      "epoch": 0.6847407407407408,
      "grad_norm": 6.896151542663574,
      "learning_rate": 6.32468495181616e-05,
      "loss": 1.0735,
      "step": 4622
    },
    {
      "epoch": 0.6848888888888889,
      "grad_norm": 2.116827964782715,
      "learning_rate": 6.321719792438844e-05,
      "loss": 1.0728,
      "step": 4623
    },
    {
      "epoch": 0.685037037037037,
      "grad_norm": 2.238344669342041,
      "learning_rate": 6.318754633061527e-05,
      "loss": 0.8672,
      "step": 4624
    },
    {
      "epoch": 0.6851851851851852,
      "grad_norm": 2.1863036155700684,
      "learning_rate": 6.31578947368421e-05,
      "loss": 1.0841,
      "step": 4625
    },
    {
      "epoch": 0.6853333333333333,
      "grad_norm": 1.78851318359375,
      "learning_rate": 6.312824314306895e-05,
      "loss": 1.0287,
      "step": 4626
    },
    {
      "epoch": 0.6854814814814815,
      "grad_norm": 5.450416088104248,
      "learning_rate": 6.309859154929578e-05,
      "loss": 1.1345,
      "step": 4627
    },
    {
      "epoch": 0.6856296296296296,
      "grad_norm": 1.4445854425430298,
      "learning_rate": 6.306893995552261e-05,
      "loss": 1.2755,
      "step": 4628
    },
    {
      "epoch": 0.6857777777777778,
      "grad_norm": 1.173293113708496,
      "learning_rate": 6.303928836174945e-05,
      "loss": 0.8193,
      "step": 4629
    },
    {
      "epoch": 0.6859259259259259,
      "grad_norm": 1.6796793937683105,
      "learning_rate": 6.300963676797627e-05,
      "loss": 1.0992,
      "step": 4630
    },
    {
      "epoch": 0.6860740740740741,
      "grad_norm": 5.284728050231934,
      "learning_rate": 6.297998517420312e-05,
      "loss": 1.0728,
      "step": 4631
    },
    {
      "epoch": 0.6862222222222222,
      "grad_norm": 1.464169979095459,
      "learning_rate": 6.295033358042996e-05,
      "loss": 1.0381,
      "step": 4632
    },
    {
      "epoch": 0.6863703703703704,
      "grad_norm": 2.6172666549682617,
      "learning_rate": 6.292068198665678e-05,
      "loss": 1.0066,
      "step": 4633
    },
    {
      "epoch": 0.6865185185185185,
      "grad_norm": 1.7775756120681763,
      "learning_rate": 6.289103039288362e-05,
      "loss": 1.1639,
      "step": 4634
    },
    {
      "epoch": 0.6866666666666666,
      "grad_norm": 3.5914671421051025,
      "learning_rate": 6.286137879911045e-05,
      "loss": 1.0721,
      "step": 4635
    },
    {
      "epoch": 0.6868148148148148,
      "grad_norm": 2.1291871070861816,
      "learning_rate": 6.283172720533728e-05,
      "loss": 0.9818,
      "step": 4636
    },
    {
      "epoch": 0.686962962962963,
      "grad_norm": 3.232025384902954,
      "learning_rate": 6.280207561156413e-05,
      "loss": 1.1499,
      "step": 4637
    },
    {
      "epoch": 0.6871111111111111,
      "grad_norm": 1.3152642250061035,
      "learning_rate": 6.277242401779096e-05,
      "loss": 0.8339,
      "step": 4638
    },
    {
      "epoch": 0.6872592592592592,
      "grad_norm": 2.176630973815918,
      "learning_rate": 6.274277242401779e-05,
      "loss": 1.1671,
      "step": 4639
    },
    {
      "epoch": 0.6874074074074074,
      "grad_norm": 1.4894682168960571,
      "learning_rate": 6.271312083024463e-05,
      "loss": 1.3326,
      "step": 4640
    },
    {
      "epoch": 0.6875555555555556,
      "grad_norm": 5.215499401092529,
      "learning_rate": 6.268346923647147e-05,
      "loss": 0.8718,
      "step": 4641
    },
    {
      "epoch": 0.6877037037037037,
      "grad_norm": 1.5449154376983643,
      "learning_rate": 6.26538176426983e-05,
      "loss": 1.0189,
      "step": 4642
    },
    {
      "epoch": 0.6878518518518518,
      "grad_norm": 1.9991947412490845,
      "learning_rate": 6.262416604892514e-05,
      "loss": 0.9451,
      "step": 4643
    },
    {
      "epoch": 0.688,
      "grad_norm": 1.1996254920959473,
      "learning_rate": 6.259451445515196e-05,
      "loss": 1.0544,
      "step": 4644
    },
    {
      "epoch": 0.6881481481481482,
      "grad_norm": 1.4383140802383423,
      "learning_rate": 6.25648628613788e-05,
      "loss": 0.8975,
      "step": 4645
    },
    {
      "epoch": 0.6882962962962963,
      "grad_norm": 4.084737300872803,
      "learning_rate": 6.253521126760565e-05,
      "loss": 1.143,
      "step": 4646
    },
    {
      "epoch": 0.6884444444444444,
      "grad_norm": 1.930940866470337,
      "learning_rate": 6.250555967383246e-05,
      "loss": 1.2551,
      "step": 4647
    },
    {
      "epoch": 0.6885925925925926,
      "grad_norm": 1.4857474565505981,
      "learning_rate": 6.247590808005931e-05,
      "loss": 1.1241,
      "step": 4648
    },
    {
      "epoch": 0.6887407407407408,
      "grad_norm": 1.0609443187713623,
      "learning_rate": 6.244625648628614e-05,
      "loss": 0.7137,
      "step": 4649
    },
    {
      "epoch": 0.6888888888888889,
      "grad_norm": 2.1343209743499756,
      "learning_rate": 6.241660489251297e-05,
      "loss": 1.1902,
      "step": 4650
    },
    {
      "epoch": 0.689037037037037,
      "grad_norm": 5.095562934875488,
      "learning_rate": 6.238695329873982e-05,
      "loss": 1.007,
      "step": 4651
    },
    {
      "epoch": 0.6891851851851852,
      "grad_norm": 1.7792041301727295,
      "learning_rate": 6.235730170496665e-05,
      "loss": 1.1408,
      "step": 4652
    },
    {
      "epoch": 0.6893333333333334,
      "grad_norm": 1.6858946084976196,
      "learning_rate": 6.232765011119348e-05,
      "loss": 1.1224,
      "step": 4653
    },
    {
      "epoch": 0.6894814814814815,
      "grad_norm": 1.2887392044067383,
      "learning_rate": 6.229799851742032e-05,
      "loss": 1.0686,
      "step": 4654
    },
    {
      "epoch": 0.6896296296296296,
      "grad_norm": 1.3962105512619019,
      "learning_rate": 6.226834692364715e-05,
      "loss": 1.0521,
      "step": 4655
    },
    {
      "epoch": 0.6897777777777778,
      "grad_norm": 1.8462930917739868,
      "learning_rate": 6.223869532987398e-05,
      "loss": 0.9927,
      "step": 4656
    },
    {
      "epoch": 0.6899259259259259,
      "grad_norm": 1.4207310676574707,
      "learning_rate": 6.220904373610081e-05,
      "loss": 1.1223,
      "step": 4657
    },
    {
      "epoch": 0.6900740740740741,
      "grad_norm": 1.438154935836792,
      "learning_rate": 6.217939214232766e-05,
      "loss": 0.988,
      "step": 4658
    },
    {
      "epoch": 0.6902222222222222,
      "grad_norm": 1.3789966106414795,
      "learning_rate": 6.214974054855449e-05,
      "loss": 0.9999,
      "step": 4659
    },
    {
      "epoch": 0.6903703703703704,
      "grad_norm": 1.7231265306472778,
      "learning_rate": 6.212008895478132e-05,
      "loss": 0.8048,
      "step": 4660
    },
    {
      "epoch": 0.6905185185185185,
      "grad_norm": 2.3367037773132324,
      "learning_rate": 6.209043736100815e-05,
      "loss": 0.7982,
      "step": 4661
    },
    {
      "epoch": 0.6906666666666667,
      "grad_norm": 1.5115360021591187,
      "learning_rate": 6.2060785767235e-05,
      "loss": 0.8287,
      "step": 4662
    },
    {
      "epoch": 0.6908148148148148,
      "grad_norm": 2.3880302906036377,
      "learning_rate": 6.203113417346183e-05,
      "loss": 1.1945,
      "step": 4663
    },
    {
      "epoch": 0.690962962962963,
      "grad_norm": 1.965254306793213,
      "learning_rate": 6.200148257968866e-05,
      "loss": 1.0172,
      "step": 4664
    },
    {
      "epoch": 0.6911111111111111,
      "grad_norm": 1.6692672967910767,
      "learning_rate": 6.197183098591549e-05,
      "loss": 0.801,
      "step": 4665
    },
    {
      "epoch": 0.6912592592592592,
      "grad_norm": 1.7301688194274902,
      "learning_rate": 6.194217939214233e-05,
      "loss": 1.1828,
      "step": 4666
    },
    {
      "epoch": 0.6914074074074074,
      "grad_norm": 2.8348965644836426,
      "learning_rate": 6.191252779836916e-05,
      "loss": 1.0651,
      "step": 4667
    },
    {
      "epoch": 0.6915555555555556,
      "grad_norm": 1.343113899230957,
      "learning_rate": 6.1882876204596e-05,
      "loss": 0.9749,
      "step": 4668
    },
    {
      "epoch": 0.6917037037037037,
      "grad_norm": 1.9785374402999878,
      "learning_rate": 6.185322461082284e-05,
      "loss": 1.1086,
      "step": 4669
    },
    {
      "epoch": 0.6918518518518518,
      "grad_norm": 1.3531380891799927,
      "learning_rate": 6.182357301704967e-05,
      "loss": 1.1721,
      "step": 4670
    },
    {
      "epoch": 0.692,
      "grad_norm": 1.2428358793258667,
      "learning_rate": 6.17939214232765e-05,
      "loss": 1.0404,
      "step": 4671
    },
    {
      "epoch": 0.6921481481481482,
      "grad_norm": 3.1928300857543945,
      "learning_rate": 6.176426982950335e-05,
      "loss": 1.0914,
      "step": 4672
    },
    {
      "epoch": 0.6922962962962963,
      "grad_norm": 1.8552119731903076,
      "learning_rate": 6.173461823573016e-05,
      "loss": 0.9968,
      "step": 4673
    },
    {
      "epoch": 0.6924444444444444,
      "grad_norm": 2.0166265964508057,
      "learning_rate": 6.170496664195701e-05,
      "loss": 1.1317,
      "step": 4674
    },
    {
      "epoch": 0.6925925925925925,
      "grad_norm": 1.3955466747283936,
      "learning_rate": 6.167531504818385e-05,
      "loss": 0.9586,
      "step": 4675
    },
    {
      "epoch": 0.6927407407407408,
      "grad_norm": 1.3913319110870361,
      "learning_rate": 6.164566345441067e-05,
      "loss": 1.2034,
      "step": 4676
    },
    {
      "epoch": 0.6928888888888889,
      "grad_norm": 2.064141273498535,
      "learning_rate": 6.161601186063751e-05,
      "loss": 0.9239,
      "step": 4677
    },
    {
      "epoch": 0.693037037037037,
      "grad_norm": 1.7810102701187134,
      "learning_rate": 6.158636026686434e-05,
      "loss": 0.9786,
      "step": 4678
    },
    {
      "epoch": 0.6931851851851852,
      "grad_norm": 1.4174787998199463,
      "learning_rate": 6.155670867309118e-05,
      "loss": 0.934,
      "step": 4679
    },
    {
      "epoch": 0.6933333333333334,
      "grad_norm": 1.6481225490570068,
      "learning_rate": 6.152705707931802e-05,
      "loss": 1.0113,
      "step": 4680
    },
    {
      "epoch": 0.6934814814814815,
      "grad_norm": 1.7207773923873901,
      "learning_rate": 6.149740548554485e-05,
      "loss": 0.9592,
      "step": 4681
    },
    {
      "epoch": 0.6936296296296296,
      "grad_norm": 1.2530018091201782,
      "learning_rate": 6.146775389177168e-05,
      "loss": 0.97,
      "step": 4682
    },
    {
      "epoch": 0.6937777777777778,
      "grad_norm": 2.6475136280059814,
      "learning_rate": 6.143810229799853e-05,
      "loss": 1.1792,
      "step": 4683
    },
    {
      "epoch": 0.693925925925926,
      "grad_norm": 2.30336856842041,
      "learning_rate": 6.140845070422536e-05,
      "loss": 1.0311,
      "step": 4684
    },
    {
      "epoch": 0.6940740740740741,
      "grad_norm": 1.6658436059951782,
      "learning_rate": 6.137879911045219e-05,
      "loss": 1.0753,
      "step": 4685
    },
    {
      "epoch": 0.6942222222222222,
      "grad_norm": 2.4629809856414795,
      "learning_rate": 6.134914751667903e-05,
      "loss": 1.0303,
      "step": 4686
    },
    {
      "epoch": 0.6943703703703704,
      "grad_norm": 1.2104218006134033,
      "learning_rate": 6.131949592290585e-05,
      "loss": 0.9383,
      "step": 4687
    },
    {
      "epoch": 0.6945185185185185,
      "grad_norm": 1.5628855228424072,
      "learning_rate": 6.12898443291327e-05,
      "loss": 0.9355,
      "step": 4688
    },
    {
      "epoch": 0.6946666666666667,
      "grad_norm": 1.9757931232452393,
      "learning_rate": 6.126019273535954e-05,
      "loss": 0.9127,
      "step": 4689
    },
    {
      "epoch": 0.6948148148148148,
      "grad_norm": 1.963261604309082,
      "learning_rate": 6.123054114158636e-05,
      "loss": 1.0436,
      "step": 4690
    },
    {
      "epoch": 0.694962962962963,
      "grad_norm": 1.6282535791397095,
      "learning_rate": 6.12008895478132e-05,
      "loss": 0.8477,
      "step": 4691
    },
    {
      "epoch": 0.6951111111111111,
      "grad_norm": 1.7320218086242676,
      "learning_rate": 6.117123795404003e-05,
      "loss": 1.2204,
      "step": 4692
    },
    {
      "epoch": 0.6952592592592592,
      "grad_norm": 2.105339765548706,
      "learning_rate": 6.114158636026686e-05,
      "loss": 0.9144,
      "step": 4693
    },
    {
      "epoch": 0.6954074074074074,
      "grad_norm": 2.56491756439209,
      "learning_rate": 6.111193476649371e-05,
      "loss": 1.0554,
      "step": 4694
    },
    {
      "epoch": 0.6955555555555556,
      "grad_norm": 1.3379398584365845,
      "learning_rate": 6.108228317272054e-05,
      "loss": 1.0433,
      "step": 4695
    },
    {
      "epoch": 0.6957037037037037,
      "grad_norm": 1.3718205690383911,
      "learning_rate": 6.105263157894737e-05,
      "loss": 0.9201,
      "step": 4696
    },
    {
      "epoch": 0.6958518518518518,
      "grad_norm": 2.1932365894317627,
      "learning_rate": 6.102297998517421e-05,
      "loss": 1.3829,
      "step": 4697
    },
    {
      "epoch": 0.696,
      "grad_norm": 2.6814064979553223,
      "learning_rate": 6.099332839140104e-05,
      "loss": 1.3149,
      "step": 4698
    },
    {
      "epoch": 0.6961481481481482,
      "grad_norm": 2.553849935531616,
      "learning_rate": 6.0963676797627875e-05,
      "loss": 1.0724,
      "step": 4699
    },
    {
      "epoch": 0.6962962962962963,
      "grad_norm": 1.985586166381836,
      "learning_rate": 6.0934025203854706e-05,
      "loss": 1.1211,
      "step": 4700
    },
    {
      "epoch": 0.6964444444444444,
      "grad_norm": 1.577874779701233,
      "learning_rate": 6.0904373610081544e-05,
      "loss": 0.9315,
      "step": 4701
    },
    {
      "epoch": 0.6965925925925925,
      "grad_norm": 2.6683530807495117,
      "learning_rate": 6.087472201630838e-05,
      "loss": 0.8294,
      "step": 4702
    },
    {
      "epoch": 0.6967407407407408,
      "grad_norm": 2.568171262741089,
      "learning_rate": 6.084507042253521e-05,
      "loss": 0.9812,
      "step": 4703
    },
    {
      "epoch": 0.6968888888888889,
      "grad_norm": 1.4571040868759155,
      "learning_rate": 6.081541882876205e-05,
      "loss": 1.2184,
      "step": 4704
    },
    {
      "epoch": 0.697037037037037,
      "grad_norm": 3.402040481567383,
      "learning_rate": 6.078576723498889e-05,
      "loss": 0.9914,
      "step": 4705
    },
    {
      "epoch": 0.6971851851851851,
      "grad_norm": 1.8167369365692139,
      "learning_rate": 6.075611564121572e-05,
      "loss": 0.96,
      "step": 4706
    },
    {
      "epoch": 0.6973333333333334,
      "grad_norm": 1.9412450790405273,
      "learning_rate": 6.0726464047442556e-05,
      "loss": 1.1004,
      "step": 4707
    },
    {
      "epoch": 0.6974814814814815,
      "grad_norm": 1.4077459573745728,
      "learning_rate": 6.069681245366938e-05,
      "loss": 0.8931,
      "step": 4708
    },
    {
      "epoch": 0.6976296296296296,
      "grad_norm": 2.860391139984131,
      "learning_rate": 6.0667160859896225e-05,
      "loss": 1.0071,
      "step": 4709
    },
    {
      "epoch": 0.6977777777777778,
      "grad_norm": 2.3669731616973877,
      "learning_rate": 6.063750926612306e-05,
      "loss": 0.9619,
      "step": 4710
    },
    {
      "epoch": 0.697925925925926,
      "grad_norm": 1.5092662572860718,
      "learning_rate": 6.0607857672349887e-05,
      "loss": 1.1466,
      "step": 4711
    },
    {
      "epoch": 0.6980740740740741,
      "grad_norm": 1.7964667081832886,
      "learning_rate": 6.0578206078576724e-05,
      "loss": 0.9678,
      "step": 4712
    },
    {
      "epoch": 0.6982222222222222,
      "grad_norm": 1.487083077430725,
      "learning_rate": 6.054855448480357e-05,
      "loss": 0.8466,
      "step": 4713
    },
    {
      "epoch": 0.6983703703703704,
      "grad_norm": 1.8605631589889526,
      "learning_rate": 6.051890289103039e-05,
      "loss": 0.9773,
      "step": 4714
    },
    {
      "epoch": 0.6985185185185185,
      "grad_norm": 1.8182073831558228,
      "learning_rate": 6.048925129725723e-05,
      "loss": 1.0576,
      "step": 4715
    },
    {
      "epoch": 0.6986666666666667,
      "grad_norm": 1.7778788805007935,
      "learning_rate": 6.045959970348406e-05,
      "loss": 0.9264,
      "step": 4716
    },
    {
      "epoch": 0.6988148148148148,
      "grad_norm": 1.9893543720245361,
      "learning_rate": 6.04299481097109e-05,
      "loss": 0.9176,
      "step": 4717
    },
    {
      "epoch": 0.698962962962963,
      "grad_norm": 1.6655364036560059,
      "learning_rate": 6.040029651593774e-05,
      "loss": 1.0479,
      "step": 4718
    },
    {
      "epoch": 0.6991111111111111,
      "grad_norm": 1.5168418884277344,
      "learning_rate": 6.037064492216457e-05,
      "loss": 0.983,
      "step": 4719
    },
    {
      "epoch": 0.6992592592592592,
      "grad_norm": 1.7691303491592407,
      "learning_rate": 6.0340993328391405e-05,
      "loss": 1.0607,
      "step": 4720
    },
    {
      "epoch": 0.6994074074074074,
      "grad_norm": 1.6258023977279663,
      "learning_rate": 6.031134173461824e-05,
      "loss": 0.8576,
      "step": 4721
    },
    {
      "epoch": 0.6995555555555556,
      "grad_norm": 1.5737048387527466,
      "learning_rate": 6.0281690140845074e-05,
      "loss": 0.9638,
      "step": 4722
    },
    {
      "epoch": 0.6997037037037037,
      "grad_norm": 2.3338394165039062,
      "learning_rate": 6.025203854707191e-05,
      "loss": 1.0194,
      "step": 4723
    },
    {
      "epoch": 0.6998518518518518,
      "grad_norm": 1.656455159187317,
      "learning_rate": 6.0222386953298736e-05,
      "loss": 1.0185,
      "step": 4724
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.012742519378662,
      "learning_rate": 6.0192735359525574e-05,
      "loss": 1.0691,
      "step": 4725
    },
    {
      "epoch": 0.7001481481481482,
      "grad_norm": 1.199476718902588,
      "learning_rate": 6.016308376575242e-05,
      "loss": 0.9874,
      "step": 4726
    },
    {
      "epoch": 0.7002962962962963,
      "grad_norm": 1.5636026859283447,
      "learning_rate": 6.013343217197924e-05,
      "loss": 1.1817,
      "step": 4727
    },
    {
      "epoch": 0.7004444444444444,
      "grad_norm": 1.44326913356781,
      "learning_rate": 6.010378057820608e-05,
      "loss": 1.0238,
      "step": 4728
    },
    {
      "epoch": 0.7005925925925925,
      "grad_norm": 1.7215958833694458,
      "learning_rate": 6.007412898443292e-05,
      "loss": 1.1073,
      "step": 4729
    },
    {
      "epoch": 0.7007407407407408,
      "grad_norm": 2.1584391593933105,
      "learning_rate": 6.004447739065975e-05,
      "loss": 1.1707,
      "step": 4730
    },
    {
      "epoch": 0.7008888888888889,
      "grad_norm": 1.508112907409668,
      "learning_rate": 6.0014825796886586e-05,
      "loss": 0.9277,
      "step": 4731
    },
    {
      "epoch": 0.701037037037037,
      "grad_norm": 1.450149416923523,
      "learning_rate": 5.9985174203113424e-05,
      "loss": 1.1159,
      "step": 4732
    },
    {
      "epoch": 0.7011851851851851,
      "grad_norm": 1.7273179292678833,
      "learning_rate": 5.9955522609340255e-05,
      "loss": 0.9167,
      "step": 4733
    },
    {
      "epoch": 0.7013333333333334,
      "grad_norm": 1.7336523532867432,
      "learning_rate": 5.992587101556709e-05,
      "loss": 0.9558,
      "step": 4734
    },
    {
      "epoch": 0.7014814814814815,
      "grad_norm": 1.4574651718139648,
      "learning_rate": 5.989621942179392e-05,
      "loss": 0.8977,
      "step": 4735
    },
    {
      "epoch": 0.7016296296296296,
      "grad_norm": 2.9354634284973145,
      "learning_rate": 5.986656782802076e-05,
      "loss": 1.0963,
      "step": 4736
    },
    {
      "epoch": 0.7017777777777777,
      "grad_norm": 3.131915807723999,
      "learning_rate": 5.98369162342476e-05,
      "loss": 0.8543,
      "step": 4737
    },
    {
      "epoch": 0.701925925925926,
      "grad_norm": 1.915164828300476,
      "learning_rate": 5.980726464047442e-05,
      "loss": 1.021,
      "step": 4738
    },
    {
      "epoch": 0.7020740740740741,
      "grad_norm": 2.250742197036743,
      "learning_rate": 5.977761304670127e-05,
      "loss": 1.1014,
      "step": 4739
    },
    {
      "epoch": 0.7022222222222222,
      "grad_norm": 1.5038580894470215,
      "learning_rate": 5.9747961452928105e-05,
      "loss": 1.1808,
      "step": 4740
    },
    {
      "epoch": 0.7023703703703704,
      "grad_norm": 1.679661512374878,
      "learning_rate": 5.971830985915493e-05,
      "loss": 0.996,
      "step": 4741
    },
    {
      "epoch": 0.7025185185185185,
      "grad_norm": 1.9651271104812622,
      "learning_rate": 5.968865826538177e-05,
      "loss": 1.1116,
      "step": 4742
    },
    {
      "epoch": 0.7026666666666667,
      "grad_norm": 2.024658203125,
      "learning_rate": 5.96590066716086e-05,
      "loss": 0.8722,
      "step": 4743
    },
    {
      "epoch": 0.7028148148148148,
      "grad_norm": 4.580418586730957,
      "learning_rate": 5.9629355077835435e-05,
      "loss": 1.0557,
      "step": 4744
    },
    {
      "epoch": 0.702962962962963,
      "grad_norm": 2.757688045501709,
      "learning_rate": 5.959970348406227e-05,
      "loss": 1.3364,
      "step": 4745
    },
    {
      "epoch": 0.7031111111111111,
      "grad_norm": 1.519632339477539,
      "learning_rate": 5.9570051890289104e-05,
      "loss": 0.932,
      "step": 4746
    },
    {
      "epoch": 0.7032592592592593,
      "grad_norm": 1.3939028978347778,
      "learning_rate": 5.954040029651594e-05,
      "loss": 0.968,
      "step": 4747
    },
    {
      "epoch": 0.7034074074074074,
      "grad_norm": 1.3090217113494873,
      "learning_rate": 5.951074870274278e-05,
      "loss": 1.1631,
      "step": 4748
    },
    {
      "epoch": 0.7035555555555556,
      "grad_norm": 1.3829586505889893,
      "learning_rate": 5.948109710896961e-05,
      "loss": 0.7478,
      "step": 4749
    },
    {
      "epoch": 0.7037037037037037,
      "grad_norm": 1.5770014524459839,
      "learning_rate": 5.945144551519645e-05,
      "loss": 0.7757,
      "step": 4750
    },
    {
      "epoch": 0.7038518518518518,
      "grad_norm": 1.3989028930664062,
      "learning_rate": 5.942179392142327e-05,
      "loss": 0.7791,
      "step": 4751
    },
    {
      "epoch": 0.704,
      "grad_norm": 2.3870842456817627,
      "learning_rate": 5.9392142327650116e-05,
      "loss": 1.2263,
      "step": 4752
    },
    {
      "epoch": 0.7041481481481482,
      "grad_norm": 1.4271196126937866,
      "learning_rate": 5.9362490733876954e-05,
      "loss": 0.858,
      "step": 4753
    },
    {
      "epoch": 0.7042962962962963,
      "grad_norm": 1.4456055164337158,
      "learning_rate": 5.933283914010378e-05,
      "loss": 0.9463,
      "step": 4754
    },
    {
      "epoch": 0.7044444444444444,
      "grad_norm": 1.2765156030654907,
      "learning_rate": 5.9303187546330616e-05,
      "loss": 1.0271,
      "step": 4755
    },
    {
      "epoch": 0.7045925925925925,
      "grad_norm": 1.2719509601593018,
      "learning_rate": 5.927353595255746e-05,
      "loss": 1.1208,
      "step": 4756
    },
    {
      "epoch": 0.7047407407407408,
      "grad_norm": 2.141507148742676,
      "learning_rate": 5.9243884358784285e-05,
      "loss": 1.0317,
      "step": 4757
    },
    {
      "epoch": 0.7048888888888889,
      "grad_norm": 2.1003317832946777,
      "learning_rate": 5.921423276501112e-05,
      "loss": 1.1045,
      "step": 4758
    },
    {
      "epoch": 0.705037037037037,
      "grad_norm": 1.7553337812423706,
      "learning_rate": 5.918458117123795e-05,
      "loss": 0.8747,
      "step": 4759
    },
    {
      "epoch": 0.7051851851851851,
      "grad_norm": 1.8500460386276245,
      "learning_rate": 5.915492957746479e-05,
      "loss": 0.8808,
      "step": 4760
    },
    {
      "epoch": 0.7053333333333334,
      "grad_norm": 2.0247552394866943,
      "learning_rate": 5.912527798369163e-05,
      "loss": 1.1156,
      "step": 4761
    },
    {
      "epoch": 0.7054814814814815,
      "grad_norm": 1.9151968955993652,
      "learning_rate": 5.909562638991846e-05,
      "loss": 0.9452,
      "step": 4762
    },
    {
      "epoch": 0.7056296296296296,
      "grad_norm": 1.699159026145935,
      "learning_rate": 5.90659747961453e-05,
      "loss": 0.9527,
      "step": 4763
    },
    {
      "epoch": 0.7057777777777777,
      "grad_norm": 1.6331653594970703,
      "learning_rate": 5.9036323202372135e-05,
      "loss": 0.89,
      "step": 4764
    },
    {
      "epoch": 0.705925925925926,
      "grad_norm": 2.28598690032959,
      "learning_rate": 5.9006671608598966e-05,
      "loss": 1.108,
      "step": 4765
    },
    {
      "epoch": 0.7060740740740741,
      "grad_norm": 2.5625808238983154,
      "learning_rate": 5.89770200148258e-05,
      "loss": 1.0557,
      "step": 4766
    },
    {
      "epoch": 0.7062222222222222,
      "grad_norm": 3.2963948249816895,
      "learning_rate": 5.894736842105263e-05,
      "loss": 1.1277,
      "step": 4767
    },
    {
      "epoch": 0.7063703703703703,
      "grad_norm": 1.650059461593628,
      "learning_rate": 5.8917716827279465e-05,
      "loss": 1.0941,
      "step": 4768
    },
    {
      "epoch": 0.7065185185185185,
      "grad_norm": 1.6791961193084717,
      "learning_rate": 5.888806523350631e-05,
      "loss": 1.2284,
      "step": 4769
    },
    {
      "epoch": 0.7066666666666667,
      "grad_norm": 1.6996461153030396,
      "learning_rate": 5.8858413639733134e-05,
      "loss": 0.8496,
      "step": 4770
    },
    {
      "epoch": 0.7068148148148148,
      "grad_norm": 1.7521129846572876,
      "learning_rate": 5.882876204595997e-05,
      "loss": 0.9027,
      "step": 4771
    },
    {
      "epoch": 0.706962962962963,
      "grad_norm": 1.7088426351547241,
      "learning_rate": 5.879911045218681e-05,
      "loss": 1.1444,
      "step": 4772
    },
    {
      "epoch": 0.7071111111111111,
      "grad_norm": 3.168114423751831,
      "learning_rate": 5.876945885841364e-05,
      "loss": 1.1774,
      "step": 4773
    },
    {
      "epoch": 0.7072592592592593,
      "grad_norm": 1.2813169956207275,
      "learning_rate": 5.873980726464048e-05,
      "loss": 0.9264,
      "step": 4774
    },
    {
      "epoch": 0.7074074074074074,
      "grad_norm": 1.6186448335647583,
      "learning_rate": 5.8710155670867315e-05,
      "loss": 1.1366,
      "step": 4775
    },
    {
      "epoch": 0.7075555555555556,
      "grad_norm": 1.6248599290847778,
      "learning_rate": 5.8680504077094146e-05,
      "loss": 1.295,
      "step": 4776
    },
    {
      "epoch": 0.7077037037037037,
      "grad_norm": 1.871125340461731,
      "learning_rate": 5.8650852483320984e-05,
      "loss": 1.3185,
      "step": 4777
    },
    {
      "epoch": 0.7078518518518518,
      "grad_norm": 1.6283950805664062,
      "learning_rate": 5.8621200889547815e-05,
      "loss": 1.1021,
      "step": 4778
    },
    {
      "epoch": 0.708,
      "grad_norm": 2.1870861053466797,
      "learning_rate": 5.859154929577465e-05,
      "loss": 0.9308,
      "step": 4779
    },
    {
      "epoch": 0.7081481481481482,
      "grad_norm": 1.734559416770935,
      "learning_rate": 5.856189770200149e-05,
      "loss": 1.3067,
      "step": 4780
    },
    {
      "epoch": 0.7082962962962963,
      "grad_norm": 1.5416643619537354,
      "learning_rate": 5.8532246108228314e-05,
      "loss": 0.8081,
      "step": 4781
    },
    {
      "epoch": 0.7084444444444444,
      "grad_norm": 2.0428013801574707,
      "learning_rate": 5.850259451445516e-05,
      "loss": 0.9032,
      "step": 4782
    },
    {
      "epoch": 0.7085925925925926,
      "grad_norm": 1.3764290809631348,
      "learning_rate": 5.8472942920681997e-05,
      "loss": 0.8764,
      "step": 4783
    },
    {
      "epoch": 0.7087407407407408,
      "grad_norm": 1.7580024003982544,
      "learning_rate": 5.844329132690882e-05,
      "loss": 1.0703,
      "step": 4784
    },
    {
      "epoch": 0.7088888888888889,
      "grad_norm": 1.3402817249298096,
      "learning_rate": 5.841363973313566e-05,
      "loss": 0.7716,
      "step": 4785
    },
    {
      "epoch": 0.709037037037037,
      "grad_norm": 1.4462758302688599,
      "learning_rate": 5.838398813936249e-05,
      "loss": 0.8883,
      "step": 4786
    },
    {
      "epoch": 0.7091851851851851,
      "grad_norm": 1.529545783996582,
      "learning_rate": 5.835433654558933e-05,
      "loss": 0.7961,
      "step": 4787
    },
    {
      "epoch": 0.7093333333333334,
      "grad_norm": 1.3867297172546387,
      "learning_rate": 5.8324684951816165e-05,
      "loss": 0.9369,
      "step": 4788
    },
    {
      "epoch": 0.7094814814814815,
      "grad_norm": 1.3662264347076416,
      "learning_rate": 5.8295033358042996e-05,
      "loss": 0.9777,
      "step": 4789
    },
    {
      "epoch": 0.7096296296296296,
      "grad_norm": 1.5619665384292603,
      "learning_rate": 5.826538176426983e-05,
      "loss": 0.9971,
      "step": 4790
    },
    {
      "epoch": 0.7097777777777777,
      "grad_norm": 1.6321423053741455,
      "learning_rate": 5.823573017049667e-05,
      "loss": 1.0788,
      "step": 4791
    },
    {
      "epoch": 0.709925925925926,
      "grad_norm": 1.869868278503418,
      "learning_rate": 5.82060785767235e-05,
      "loss": 1.0693,
      "step": 4792
    },
    {
      "epoch": 0.7100740740740741,
      "grad_norm": 3.2114758491516113,
      "learning_rate": 5.817642698295034e-05,
      "loss": 1.0596,
      "step": 4793
    },
    {
      "epoch": 0.7102222222222222,
      "grad_norm": 1.7647440433502197,
      "learning_rate": 5.8146775389177164e-05,
      "loss": 1.1664,
      "step": 4794
    },
    {
      "epoch": 0.7103703703703703,
      "grad_norm": 1.788858413696289,
      "learning_rate": 5.811712379540401e-05,
      "loss": 0.7981,
      "step": 4795
    },
    {
      "epoch": 0.7105185185185185,
      "grad_norm": 1.276236891746521,
      "learning_rate": 5.8087472201630846e-05,
      "loss": 0.952,
      "step": 4796
    },
    {
      "epoch": 0.7106666666666667,
      "grad_norm": 1.2361873388290405,
      "learning_rate": 5.805782060785767e-05,
      "loss": 0.8385,
      "step": 4797
    },
    {
      "epoch": 0.7108148148148148,
      "grad_norm": 1.7445262670516968,
      "learning_rate": 5.802816901408451e-05,
      "loss": 1.2577,
      "step": 4798
    },
    {
      "epoch": 0.7109629629629629,
      "grad_norm": 3.5595784187316895,
      "learning_rate": 5.799851742031135e-05,
      "loss": 1.1013,
      "step": 4799
    },
    {
      "epoch": 0.7111111111111111,
      "grad_norm": 1.8771253824234009,
      "learning_rate": 5.7968865826538176e-05,
      "loss": 1.3519,
      "step": 4800
    },
    {
      "epoch": 0.7112592592592593,
      "grad_norm": 2.0168356895446777,
      "learning_rate": 5.7939214232765014e-05,
      "loss": 0.8908,
      "step": 4801
    },
    {
      "epoch": 0.7114074074074074,
      "grad_norm": 1.7869441509246826,
      "learning_rate": 5.7909562638991845e-05,
      "loss": 0.9439,
      "step": 4802
    },
    {
      "epoch": 0.7115555555555556,
      "grad_norm": 1.5168068408966064,
      "learning_rate": 5.787991104521868e-05,
      "loss": 1.1086,
      "step": 4803
    },
    {
      "epoch": 0.7117037037037037,
      "grad_norm": 1.6673247814178467,
      "learning_rate": 5.785025945144552e-05,
      "loss": 1.1323,
      "step": 4804
    },
    {
      "epoch": 0.7118518518518518,
      "grad_norm": 2.1463546752929688,
      "learning_rate": 5.782060785767235e-05,
      "loss": 0.9427,
      "step": 4805
    },
    {
      "epoch": 0.712,
      "grad_norm": 2.2018017768859863,
      "learning_rate": 5.779095626389919e-05,
      "loss": 0.7993,
      "step": 4806
    },
    {
      "epoch": 0.7121481481481482,
      "grad_norm": 2.1906869411468506,
      "learning_rate": 5.7761304670126026e-05,
      "loss": 0.9726,
      "step": 4807
    },
    {
      "epoch": 0.7122962962962963,
      "grad_norm": 1.5149343013763428,
      "learning_rate": 5.773165307635286e-05,
      "loss": 0.8789,
      "step": 4808
    },
    {
      "epoch": 0.7124444444444444,
      "grad_norm": 2.22306489944458,
      "learning_rate": 5.7702001482579695e-05,
      "loss": 1.1814,
      "step": 4809
    },
    {
      "epoch": 0.7125925925925926,
      "grad_norm": 1.7001208066940308,
      "learning_rate": 5.767234988880652e-05,
      "loss": 1.3092,
      "step": 4810
    },
    {
      "epoch": 0.7127407407407408,
      "grad_norm": 1.7391821146011353,
      "learning_rate": 5.764269829503336e-05,
      "loss": 1.0056,
      "step": 4811
    },
    {
      "epoch": 0.7128888888888889,
      "grad_norm": 1.6737895011901855,
      "learning_rate": 5.76130467012602e-05,
      "loss": 1.1267,
      "step": 4812
    },
    {
      "epoch": 0.713037037037037,
      "grad_norm": 1.665744662284851,
      "learning_rate": 5.7583395107487025e-05,
      "loss": 1.0543,
      "step": 4813
    },
    {
      "epoch": 0.7131851851851851,
      "grad_norm": 1.5450365543365479,
      "learning_rate": 5.755374351371386e-05,
      "loss": 0.9675,
      "step": 4814
    },
    {
      "epoch": 0.7133333333333334,
      "grad_norm": 2.015901565551758,
      "learning_rate": 5.75240919199407e-05,
      "loss": 1.0007,
      "step": 4815
    },
    {
      "epoch": 0.7134814814814815,
      "grad_norm": 1.0717864036560059,
      "learning_rate": 5.749444032616753e-05,
      "loss": 1.3662,
      "step": 4816
    },
    {
      "epoch": 0.7136296296296296,
      "grad_norm": 1.3023358583450317,
      "learning_rate": 5.746478873239437e-05,
      "loss": 0.8571,
      "step": 4817
    },
    {
      "epoch": 0.7137777777777777,
      "grad_norm": 2.133415460586548,
      "learning_rate": 5.743513713862121e-05,
      "loss": 1.063,
      "step": 4818
    },
    {
      "epoch": 0.713925925925926,
      "grad_norm": 3.2209153175354004,
      "learning_rate": 5.740548554484804e-05,
      "loss": 0.9373,
      "step": 4819
    },
    {
      "epoch": 0.7140740740740741,
      "grad_norm": 1.4295662641525269,
      "learning_rate": 5.7375833951074876e-05,
      "loss": 0.9626,
      "step": 4820
    },
    {
      "epoch": 0.7142222222222222,
      "grad_norm": 1.9351325035095215,
      "learning_rate": 5.7346182357301707e-05,
      "loss": 0.9861,
      "step": 4821
    },
    {
      "epoch": 0.7143703703703703,
      "grad_norm": 1.9480867385864258,
      "learning_rate": 5.7316530763528544e-05,
      "loss": 0.8906,
      "step": 4822
    },
    {
      "epoch": 0.7145185185185186,
      "grad_norm": 2.4652979373931885,
      "learning_rate": 5.728687916975538e-05,
      "loss": 1.1842,
      "step": 4823
    },
    {
      "epoch": 0.7146666666666667,
      "grad_norm": 1.3195927143096924,
      "learning_rate": 5.7257227575982206e-05,
      "loss": 1.0483,
      "step": 4824
    },
    {
      "epoch": 0.7148148148148148,
      "grad_norm": 2.3855903148651123,
      "learning_rate": 5.722757598220905e-05,
      "loss": 1.0235,
      "step": 4825
    },
    {
      "epoch": 0.7149629629629629,
      "grad_norm": 2.136852979660034,
      "learning_rate": 5.719792438843589e-05,
      "loss": 0.9468,
      "step": 4826
    },
    {
      "epoch": 0.7151111111111111,
      "grad_norm": 2.072896718978882,
      "learning_rate": 5.716827279466271e-05,
      "loss": 0.9382,
      "step": 4827
    },
    {
      "epoch": 0.7152592592592593,
      "grad_norm": 1.2670269012451172,
      "learning_rate": 5.713862120088955e-05,
      "loss": 0.927,
      "step": 4828
    },
    {
      "epoch": 0.7154074074074074,
      "grad_norm": 3.4418578147888184,
      "learning_rate": 5.710896960711638e-05,
      "loss": 0.8586,
      "step": 4829
    },
    {
      "epoch": 0.7155555555555555,
      "grad_norm": 2.2086219787597656,
      "learning_rate": 5.707931801334322e-05,
      "loss": 1.1675,
      "step": 4830
    },
    {
      "epoch": 0.7157037037037037,
      "grad_norm": 1.7778724431991577,
      "learning_rate": 5.7049666419570056e-05,
      "loss": 1.0195,
      "step": 4831
    },
    {
      "epoch": 0.7158518518518519,
      "grad_norm": 1.8800742626190186,
      "learning_rate": 5.702001482579689e-05,
      "loss": 1.103,
      "step": 4832
    },
    {
      "epoch": 0.716,
      "grad_norm": 1.665619969367981,
      "learning_rate": 5.6990363232023725e-05,
      "loss": 1.0806,
      "step": 4833
    },
    {
      "epoch": 0.7161481481481482,
      "grad_norm": 1.8878470659255981,
      "learning_rate": 5.696071163825056e-05,
      "loss": 0.9141,
      "step": 4834
    },
    {
      "epoch": 0.7162962962962963,
      "grad_norm": 2.0529701709747314,
      "learning_rate": 5.6931060044477393e-05,
      "loss": 0.9595,
      "step": 4835
    },
    {
      "epoch": 0.7164444444444444,
      "grad_norm": 1.6207361221313477,
      "learning_rate": 5.690140845070423e-05,
      "loss": 0.8366,
      "step": 4836
    },
    {
      "epoch": 0.7165925925925926,
      "grad_norm": 2.2214808464050293,
      "learning_rate": 5.6871756856931055e-05,
      "loss": 1.1933,
      "step": 4837
    },
    {
      "epoch": 0.7167407407407408,
      "grad_norm": 1.9758164882659912,
      "learning_rate": 5.68421052631579e-05,
      "loss": 1.0578,
      "step": 4838
    },
    {
      "epoch": 0.7168888888888889,
      "grad_norm": 2.119447946548462,
      "learning_rate": 5.681245366938474e-05,
      "loss": 0.7454,
      "step": 4839
    },
    {
      "epoch": 0.717037037037037,
      "grad_norm": 3.2046563625335693,
      "learning_rate": 5.678280207561156e-05,
      "loss": 1.031,
      "step": 4840
    },
    {
      "epoch": 0.7171851851851851,
      "grad_norm": 1.210294246673584,
      "learning_rate": 5.67531504818384e-05,
      "loss": 1.1647,
      "step": 4841
    },
    {
      "epoch": 0.7173333333333334,
      "grad_norm": 1.4997482299804688,
      "learning_rate": 5.6723498888065244e-05,
      "loss": 1.0107,
      "step": 4842
    },
    {
      "epoch": 0.7174814814814815,
      "grad_norm": 1.4815231561660767,
      "learning_rate": 5.669384729429207e-05,
      "loss": 0.9828,
      "step": 4843
    },
    {
      "epoch": 0.7176296296296296,
      "grad_norm": 7.08305549621582,
      "learning_rate": 5.6664195700518906e-05,
      "loss": 1.1005,
      "step": 4844
    },
    {
      "epoch": 0.7177777777777777,
      "grad_norm": 1.646203875541687,
      "learning_rate": 5.6634544106745736e-05,
      "loss": 0.9126,
      "step": 4845
    },
    {
      "epoch": 0.717925925925926,
      "grad_norm": 2.0197436809539795,
      "learning_rate": 5.6604892512972574e-05,
      "loss": 0.8148,
      "step": 4846
    },
    {
      "epoch": 0.7180740740740741,
      "grad_norm": 2.079904794692993,
      "learning_rate": 5.657524091919941e-05,
      "loss": 1.1894,
      "step": 4847
    },
    {
      "epoch": 0.7182222222222222,
      "grad_norm": 2.2810299396514893,
      "learning_rate": 5.654558932542624e-05,
      "loss": 0.9773,
      "step": 4848
    },
    {
      "epoch": 0.7183703703703703,
      "grad_norm": 1.8330743312835693,
      "learning_rate": 5.651593773165308e-05,
      "loss": 1.3854,
      "step": 4849
    },
    {
      "epoch": 0.7185185185185186,
      "grad_norm": 1.503127098083496,
      "learning_rate": 5.648628613787992e-05,
      "loss": 0.8601,
      "step": 4850
    },
    {
      "epoch": 0.7186666666666667,
      "grad_norm": 2.749389410018921,
      "learning_rate": 5.645663454410675e-05,
      "loss": 0.9774,
      "step": 4851
    },
    {
      "epoch": 0.7188148148148148,
      "grad_norm": 1.6506550312042236,
      "learning_rate": 5.642698295033359e-05,
      "loss": 0.8976,
      "step": 4852
    },
    {
      "epoch": 0.7189629629629629,
      "grad_norm": 1.4076423645019531,
      "learning_rate": 5.6397331356560424e-05,
      "loss": 1.0072,
      "step": 4853
    },
    {
      "epoch": 0.7191111111111111,
      "grad_norm": 4.1458892822265625,
      "learning_rate": 5.636767976278725e-05,
      "loss": 0.7843,
      "step": 4854
    },
    {
      "epoch": 0.7192592592592593,
      "grad_norm": 1.7543416023254395,
      "learning_rate": 5.633802816901409e-05,
      "loss": 0.8265,
      "step": 4855
    },
    {
      "epoch": 0.7194074074074074,
      "grad_norm": 3.1243369579315186,
      "learning_rate": 5.630837657524092e-05,
      "loss": 1.2424,
      "step": 4856
    },
    {
      "epoch": 0.7195555555555555,
      "grad_norm": 1.600679636001587,
      "learning_rate": 5.6278724981467755e-05,
      "loss": 1.0629,
      "step": 4857
    },
    {
      "epoch": 0.7197037037037037,
      "grad_norm": 1.5085700750350952,
      "learning_rate": 5.624907338769459e-05,
      "loss": 1.0591,
      "step": 4858
    },
    {
      "epoch": 0.7198518518518519,
      "grad_norm": 1.312741994857788,
      "learning_rate": 5.621942179392142e-05,
      "loss": 0.9902,
      "step": 4859
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5295848846435547,
      "learning_rate": 5.618977020014826e-05,
      "loss": 1.2123,
      "step": 4860
    },
    {
      "epoch": 0.7201481481481481,
      "grad_norm": 1.4847466945648193,
      "learning_rate": 5.61601186063751e-05,
      "loss": 1.0278,
      "step": 4861
    },
    {
      "epoch": 0.7202962962962963,
      "grad_norm": 2.524733781814575,
      "learning_rate": 5.613046701260193e-05,
      "loss": 0.8715,
      "step": 4862
    },
    {
      "epoch": 0.7204444444444444,
      "grad_norm": 2.0416817665100098,
      "learning_rate": 5.610081541882877e-05,
      "loss": 0.8527,
      "step": 4863
    },
    {
      "epoch": 0.7205925925925926,
      "grad_norm": 1.7584664821624756,
      "learning_rate": 5.60711638250556e-05,
      "loss": 1.1136,
      "step": 4864
    },
    {
      "epoch": 0.7207407407407408,
      "grad_norm": 1.4379719495773315,
      "learning_rate": 5.6041512231282436e-05,
      "loss": 0.9294,
      "step": 4865
    },
    {
      "epoch": 0.7208888888888889,
      "grad_norm": 1.8781688213348389,
      "learning_rate": 5.6011860637509274e-05,
      "loss": 0.9797,
      "step": 4866
    },
    {
      "epoch": 0.721037037037037,
      "grad_norm": 1.8690986633300781,
      "learning_rate": 5.59822090437361e-05,
      "loss": 1.0365,
      "step": 4867
    },
    {
      "epoch": 0.7211851851851852,
      "grad_norm": 1.4605828523635864,
      "learning_rate": 5.595255744996294e-05,
      "loss": 0.9485,
      "step": 4868
    },
    {
      "epoch": 0.7213333333333334,
      "grad_norm": 2.2668371200561523,
      "learning_rate": 5.592290585618978e-05,
      "loss": 1.24,
      "step": 4869
    },
    {
      "epoch": 0.7214814814814815,
      "grad_norm": 1.3561054468154907,
      "learning_rate": 5.5893254262416604e-05,
      "loss": 1.0381,
      "step": 4870
    },
    {
      "epoch": 0.7216296296296296,
      "grad_norm": 1.8758090734481812,
      "learning_rate": 5.586360266864344e-05,
      "loss": 0.8211,
      "step": 4871
    },
    {
      "epoch": 0.7217777777777777,
      "grad_norm": 1.7644171714782715,
      "learning_rate": 5.583395107487027e-05,
      "loss": 0.8471,
      "step": 4872
    },
    {
      "epoch": 0.721925925925926,
      "grad_norm": 1.2087984085083008,
      "learning_rate": 5.580429948109711e-05,
      "loss": 1.0755,
      "step": 4873
    },
    {
      "epoch": 0.7220740740740741,
      "grad_norm": 2.2735400199890137,
      "learning_rate": 5.577464788732395e-05,
      "loss": 1.2577,
      "step": 4874
    },
    {
      "epoch": 0.7222222222222222,
      "grad_norm": 1.866197109222412,
      "learning_rate": 5.574499629355078e-05,
      "loss": 1.0508,
      "step": 4875
    },
    {
      "epoch": 0.7223703703703703,
      "grad_norm": 2.100440502166748,
      "learning_rate": 5.5715344699777617e-05,
      "loss": 1.067,
      "step": 4876
    },
    {
      "epoch": 0.7225185185185186,
      "grad_norm": 1.6745151281356812,
      "learning_rate": 5.5685693106004454e-05,
      "loss": 1.0073,
      "step": 4877
    },
    {
      "epoch": 0.7226666666666667,
      "grad_norm": 2.121823787689209,
      "learning_rate": 5.5656041512231285e-05,
      "loss": 0.9013,
      "step": 4878
    },
    {
      "epoch": 0.7228148148148148,
      "grad_norm": 1.6000807285308838,
      "learning_rate": 5.562638991845812e-05,
      "loss": 0.971,
      "step": 4879
    },
    {
      "epoch": 0.7229629629629629,
      "grad_norm": 2.799863576889038,
      "learning_rate": 5.559673832468495e-05,
      "loss": 1.2896,
      "step": 4880
    },
    {
      "epoch": 0.7231111111111111,
      "grad_norm": 1.652538776397705,
      "learning_rate": 5.556708673091179e-05,
      "loss": 1.116,
      "step": 4881
    },
    {
      "epoch": 0.7232592592592593,
      "grad_norm": 2.3919286727905273,
      "learning_rate": 5.553743513713863e-05,
      "loss": 1.0388,
      "step": 4882
    },
    {
      "epoch": 0.7234074074074074,
      "grad_norm": 2.370417594909668,
      "learning_rate": 5.550778354336545e-05,
      "loss": 0.9581,
      "step": 4883
    },
    {
      "epoch": 0.7235555555555555,
      "grad_norm": 1.2086527347564697,
      "learning_rate": 5.547813194959229e-05,
      "loss": 0.8202,
      "step": 4884
    },
    {
      "epoch": 0.7237037037037037,
      "grad_norm": 1.8177130222320557,
      "learning_rate": 5.5448480355819135e-05,
      "loss": 1.1192,
      "step": 4885
    },
    {
      "epoch": 0.7238518518518519,
      "grad_norm": 6.591041088104248,
      "learning_rate": 5.541882876204596e-05,
      "loss": 1.0015,
      "step": 4886
    },
    {
      "epoch": 0.724,
      "grad_norm": 2.110539436340332,
      "learning_rate": 5.53891771682728e-05,
      "loss": 0.9016,
      "step": 4887
    },
    {
      "epoch": 0.7241481481481481,
      "grad_norm": 3.109020948410034,
      "learning_rate": 5.535952557449963e-05,
      "loss": 1.0737,
      "step": 4888
    },
    {
      "epoch": 0.7242962962962963,
      "grad_norm": 1.3220057487487793,
      "learning_rate": 5.5329873980726466e-05,
      "loss": 0.973,
      "step": 4889
    },
    {
      "epoch": 0.7244444444444444,
      "grad_norm": 1.1820323467254639,
      "learning_rate": 5.5300222386953303e-05,
      "loss": 1.0008,
      "step": 4890
    },
    {
      "epoch": 0.7245925925925926,
      "grad_norm": 2.1216866970062256,
      "learning_rate": 5.5270570793180134e-05,
      "loss": 0.7249,
      "step": 4891
    },
    {
      "epoch": 0.7247407407407407,
      "grad_norm": 1.6888575553894043,
      "learning_rate": 5.524091919940697e-05,
      "loss": 1.4341,
      "step": 4892
    },
    {
      "epoch": 0.7248888888888889,
      "grad_norm": 2.9108896255493164,
      "learning_rate": 5.521126760563381e-05,
      "loss": 0.9926,
      "step": 4893
    },
    {
      "epoch": 0.725037037037037,
      "grad_norm": 1.7559731006622314,
      "learning_rate": 5.518161601186064e-05,
      "loss": 1.1029,
      "step": 4894
    },
    {
      "epoch": 0.7251851851851852,
      "grad_norm": 1.4509018659591675,
      "learning_rate": 5.515196441808748e-05,
      "loss": 1.0668,
      "step": 4895
    },
    {
      "epoch": 0.7253333333333334,
      "grad_norm": 1.8988780975341797,
      "learning_rate": 5.5122312824314316e-05,
      "loss": 1.0963,
      "step": 4896
    },
    {
      "epoch": 0.7254814814814815,
      "grad_norm": 3.0174896717071533,
      "learning_rate": 5.509266123054114e-05,
      "loss": 1.0151,
      "step": 4897
    },
    {
      "epoch": 0.7256296296296296,
      "grad_norm": 1.4957520961761475,
      "learning_rate": 5.5063009636767985e-05,
      "loss": 0.9085,
      "step": 4898
    },
    {
      "epoch": 0.7257777777777777,
      "grad_norm": 2.035078525543213,
      "learning_rate": 5.503335804299481e-05,
      "loss": 0.8713,
      "step": 4899
    },
    {
      "epoch": 0.725925925925926,
      "grad_norm": 0.9935551285743713,
      "learning_rate": 5.5003706449221646e-05,
      "loss": 1.0032,
      "step": 4900
    },
    {
      "epoch": 0.7260740740740741,
      "grad_norm": 2.0780887603759766,
      "learning_rate": 5.4974054855448484e-05,
      "loss": 1.0751,
      "step": 4901
    },
    {
      "epoch": 0.7262222222222222,
      "grad_norm": 1.6350312232971191,
      "learning_rate": 5.4944403261675315e-05,
      "loss": 1.1934,
      "step": 4902
    },
    {
      "epoch": 0.7263703703703703,
      "grad_norm": 2.609414577484131,
      "learning_rate": 5.491475166790215e-05,
      "loss": 1.1323,
      "step": 4903
    },
    {
      "epoch": 0.7265185185185186,
      "grad_norm": 1.2709413766860962,
      "learning_rate": 5.488510007412899e-05,
      "loss": 0.8645,
      "step": 4904
    },
    {
      "epoch": 0.7266666666666667,
      "grad_norm": 1.2477498054504395,
      "learning_rate": 5.485544848035582e-05,
      "loss": 1.012,
      "step": 4905
    },
    {
      "epoch": 0.7268148148148148,
      "grad_norm": 1.6356146335601807,
      "learning_rate": 5.482579688658266e-05,
      "loss": 1.3078,
      "step": 4906
    },
    {
      "epoch": 0.7269629629629629,
      "grad_norm": 1.3068426847457886,
      "learning_rate": 5.479614529280949e-05,
      "loss": 1.3083,
      "step": 4907
    },
    {
      "epoch": 0.7271111111111112,
      "grad_norm": 1.8441948890686035,
      "learning_rate": 5.476649369903633e-05,
      "loss": 1.1332,
      "step": 4908
    },
    {
      "epoch": 0.7272592592592593,
      "grad_norm": 2.111903429031372,
      "learning_rate": 5.4736842105263165e-05,
      "loss": 1.0196,
      "step": 4909
    },
    {
      "epoch": 0.7274074074074074,
      "grad_norm": 1.3523560762405396,
      "learning_rate": 5.470719051148999e-05,
      "loss": 1.0159,
      "step": 4910
    },
    {
      "epoch": 0.7275555555555555,
      "grad_norm": 2.855959892272949,
      "learning_rate": 5.4677538917716834e-05,
      "loss": 1.2127,
      "step": 4911
    },
    {
      "epoch": 0.7277037037037037,
      "grad_norm": 1.5005217790603638,
      "learning_rate": 5.464788732394367e-05,
      "loss": 1.0609,
      "step": 4912
    },
    {
      "epoch": 0.7278518518518519,
      "grad_norm": 1.4840768575668335,
      "learning_rate": 5.4618235730170496e-05,
      "loss": 0.9953,
      "step": 4913
    },
    {
      "epoch": 0.728,
      "grad_norm": 1.0789657831192017,
      "learning_rate": 5.458858413639733e-05,
      "loss": 1.1345,
      "step": 4914
    },
    {
      "epoch": 0.7281481481481481,
      "grad_norm": 2.3142013549804688,
      "learning_rate": 5.4558932542624164e-05,
      "loss": 0.9493,
      "step": 4915
    },
    {
      "epoch": 0.7282962962962963,
      "grad_norm": 4.36397647857666,
      "learning_rate": 5.4529280948851e-05,
      "loss": 0.88,
      "step": 4916
    },
    {
      "epoch": 0.7284444444444444,
      "grad_norm": 1.8086189031600952,
      "learning_rate": 5.449962935507784e-05,
      "loss": 1.2032,
      "step": 4917
    },
    {
      "epoch": 0.7285925925925926,
      "grad_norm": 1.501092553138733,
      "learning_rate": 5.446997776130467e-05,
      "loss": 0.7264,
      "step": 4918
    },
    {
      "epoch": 0.7287407407407407,
      "grad_norm": 1.809766411781311,
      "learning_rate": 5.444032616753151e-05,
      "loss": 1.1109,
      "step": 4919
    },
    {
      "epoch": 0.7288888888888889,
      "grad_norm": 1.3491125106811523,
      "learning_rate": 5.4410674573758346e-05,
      "loss": 0.7823,
      "step": 4920
    },
    {
      "epoch": 0.729037037037037,
      "grad_norm": 1.5708109140396118,
      "learning_rate": 5.438102297998518e-05,
      "loss": 0.829,
      "step": 4921
    },
    {
      "epoch": 0.7291851851851852,
      "grad_norm": 2.049161911010742,
      "learning_rate": 5.4351371386212014e-05,
      "loss": 0.8991,
      "step": 4922
    },
    {
      "epoch": 0.7293333333333333,
      "grad_norm": 1.0794745683670044,
      "learning_rate": 5.432171979243884e-05,
      "loss": 0.7913,
      "step": 4923
    },
    {
      "epoch": 0.7294814814814815,
      "grad_norm": 2.5652902126312256,
      "learning_rate": 5.429206819866568e-05,
      "loss": 1.0493,
      "step": 4924
    },
    {
      "epoch": 0.7296296296296296,
      "grad_norm": 2.240591287612915,
      "learning_rate": 5.426241660489252e-05,
      "loss": 1.0168,
      "step": 4925
    },
    {
      "epoch": 0.7297777777777777,
      "grad_norm": 1.9119607210159302,
      "learning_rate": 5.4232765011119345e-05,
      "loss": 1.2101,
      "step": 4926
    },
    {
      "epoch": 0.729925925925926,
      "grad_norm": 1.4905898571014404,
      "learning_rate": 5.420311341734618e-05,
      "loss": 0.9353,
      "step": 4927
    },
    {
      "epoch": 0.7300740740740741,
      "grad_norm": 2.004554510116577,
      "learning_rate": 5.417346182357303e-05,
      "loss": 1.2032,
      "step": 4928
    },
    {
      "epoch": 0.7302222222222222,
      "grad_norm": 1.0485835075378418,
      "learning_rate": 5.414381022979985e-05,
      "loss": 0.8296,
      "step": 4929
    },
    {
      "epoch": 0.7303703703703703,
      "grad_norm": 2.900482654571533,
      "learning_rate": 5.411415863602669e-05,
      "loss": 0.9702,
      "step": 4930
    },
    {
      "epoch": 0.7305185185185186,
      "grad_norm": 1.6167924404144287,
      "learning_rate": 5.408450704225352e-05,
      "loss": 1.0082,
      "step": 4931
    },
    {
      "epoch": 0.7306666666666667,
      "grad_norm": 2.0141680240631104,
      "learning_rate": 5.405485544848036e-05,
      "loss": 1.0995,
      "step": 4932
    },
    {
      "epoch": 0.7308148148148148,
      "grad_norm": 2.588238000869751,
      "learning_rate": 5.4025203854707195e-05,
      "loss": 1.202,
      "step": 4933
    },
    {
      "epoch": 0.7309629629629629,
      "grad_norm": 1.3934060335159302,
      "learning_rate": 5.3995552260934026e-05,
      "loss": 1.0188,
      "step": 4934
    },
    {
      "epoch": 0.7311111111111112,
      "grad_norm": 1.9247220754623413,
      "learning_rate": 5.3965900667160864e-05,
      "loss": 0.8479,
      "step": 4935
    },
    {
      "epoch": 0.7312592592592593,
      "grad_norm": 1.7972164154052734,
      "learning_rate": 5.39362490733877e-05,
      "loss": 1.2798,
      "step": 4936
    },
    {
      "epoch": 0.7314074074074074,
      "grad_norm": 1.7413814067840576,
      "learning_rate": 5.390659747961453e-05,
      "loss": 1.172,
      "step": 4937
    },
    {
      "epoch": 0.7315555555555555,
      "grad_norm": 1.755491852760315,
      "learning_rate": 5.387694588584137e-05,
      "loss": 0.814,
      "step": 4938
    },
    {
      "epoch": 0.7317037037037037,
      "grad_norm": 1.4138622283935547,
      "learning_rate": 5.384729429206821e-05,
      "loss": 1.023,
      "step": 4939
    },
    {
      "epoch": 0.7318518518518519,
      "grad_norm": 2.6964871883392334,
      "learning_rate": 5.381764269829503e-05,
      "loss": 1.2712,
      "step": 4940
    },
    {
      "epoch": 0.732,
      "grad_norm": 1.6311568021774292,
      "learning_rate": 5.3787991104521876e-05,
      "loss": 0.9323,
      "step": 4941
    },
    {
      "epoch": 0.7321481481481481,
      "grad_norm": 2.1600868701934814,
      "learning_rate": 5.37583395107487e-05,
      "loss": 0.9687,
      "step": 4942
    },
    {
      "epoch": 0.7322962962962963,
      "grad_norm": 2.4418132305145264,
      "learning_rate": 5.372868791697554e-05,
      "loss": 0.9305,
      "step": 4943
    },
    {
      "epoch": 0.7324444444444445,
      "grad_norm": 2.5369813442230225,
      "learning_rate": 5.3699036323202376e-05,
      "loss": 0.8592,
      "step": 4944
    },
    {
      "epoch": 0.7325925925925926,
      "grad_norm": 2.060513734817505,
      "learning_rate": 5.366938472942921e-05,
      "loss": 0.8562,
      "step": 4945
    },
    {
      "epoch": 0.7327407407407407,
      "grad_norm": 1.7541512250900269,
      "learning_rate": 5.3639733135656044e-05,
      "loss": 1.1362,
      "step": 4946
    },
    {
      "epoch": 0.7328888888888889,
      "grad_norm": 1.4201487302780151,
      "learning_rate": 5.361008154188288e-05,
      "loss": 1.021,
      "step": 4947
    },
    {
      "epoch": 0.733037037037037,
      "grad_norm": 1.720343828201294,
      "learning_rate": 5.358042994810971e-05,
      "loss": 0.8417,
      "step": 4948
    },
    {
      "epoch": 0.7331851851851852,
      "grad_norm": 2.402940034866333,
      "learning_rate": 5.355077835433655e-05,
      "loss": 0.8907,
      "step": 4949
    },
    {
      "epoch": 0.7333333333333333,
      "grad_norm": 1.6902525424957275,
      "learning_rate": 5.352112676056338e-05,
      "loss": 1.0181,
      "step": 4950
    },
    {
      "epoch": 0.7334814814814815,
      "grad_norm": 1.8172622919082642,
      "learning_rate": 5.349147516679022e-05,
      "loss": 1.1631,
      "step": 4951
    },
    {
      "epoch": 0.7336296296296296,
      "grad_norm": 1.879783272743225,
      "learning_rate": 5.346182357301706e-05,
      "loss": 0.9498,
      "step": 4952
    },
    {
      "epoch": 0.7337777777777778,
      "grad_norm": 1.7294676303863525,
      "learning_rate": 5.343217197924388e-05,
      "loss": 1.0644,
      "step": 4953
    },
    {
      "epoch": 0.7339259259259259,
      "grad_norm": 1.4138739109039307,
      "learning_rate": 5.3402520385470725e-05,
      "loss": 0.9301,
      "step": 4954
    },
    {
      "epoch": 0.7340740740740741,
      "grad_norm": 1.3349261283874512,
      "learning_rate": 5.337286879169756e-05,
      "loss": 0.9506,
      "step": 4955
    },
    {
      "epoch": 0.7342222222222222,
      "grad_norm": 2.168971061706543,
      "learning_rate": 5.334321719792439e-05,
      "loss": 0.9874,
      "step": 4956
    },
    {
      "epoch": 0.7343703703703703,
      "grad_norm": 1.384792685508728,
      "learning_rate": 5.3313565604151225e-05,
      "loss": 1.1012,
      "step": 4957
    },
    {
      "epoch": 0.7345185185185186,
      "grad_norm": 2.2706778049468994,
      "learning_rate": 5.3283914010378056e-05,
      "loss": 1.1663,
      "step": 4958
    },
    {
      "epoch": 0.7346666666666667,
      "grad_norm": 1.9906309843063354,
      "learning_rate": 5.3254262416604894e-05,
      "loss": 0.9603,
      "step": 4959
    },
    {
      "epoch": 0.7348148148148148,
      "grad_norm": 1.9914016723632812,
      "learning_rate": 5.322461082283173e-05,
      "loss": 1.12,
      "step": 4960
    },
    {
      "epoch": 0.7349629629629629,
      "grad_norm": 2.031702995300293,
      "learning_rate": 5.319495922905856e-05,
      "loss": 0.9065,
      "step": 4961
    },
    {
      "epoch": 0.7351111111111112,
      "grad_norm": 2.2742061614990234,
      "learning_rate": 5.31653076352854e-05,
      "loss": 1.196,
      "step": 4962
    },
    {
      "epoch": 0.7352592592592593,
      "grad_norm": 1.26792573928833,
      "learning_rate": 5.313565604151224e-05,
      "loss": 1.3298,
      "step": 4963
    },
    {
      "epoch": 0.7354074074074074,
      "grad_norm": 1.838092565536499,
      "learning_rate": 5.310600444773907e-05,
      "loss": 1.0595,
      "step": 4964
    },
    {
      "epoch": 0.7355555555555555,
      "grad_norm": 1.5691438913345337,
      "learning_rate": 5.3076352853965906e-05,
      "loss": 1.1053,
      "step": 4965
    },
    {
      "epoch": 0.7357037037037037,
      "grad_norm": 1.9591392278671265,
      "learning_rate": 5.304670126019273e-05,
      "loss": 1.0963,
      "step": 4966
    },
    {
      "epoch": 0.7358518518518519,
      "grad_norm": 2.119932174682617,
      "learning_rate": 5.3017049666419575e-05,
      "loss": 0.9933,
      "step": 4967
    },
    {
      "epoch": 0.736,
      "grad_norm": 1.7089087963104248,
      "learning_rate": 5.298739807264641e-05,
      "loss": 0.8262,
      "step": 4968
    },
    {
      "epoch": 0.7361481481481481,
      "grad_norm": 1.9580737352371216,
      "learning_rate": 5.2957746478873237e-05,
      "loss": 0.9954,
      "step": 4969
    },
    {
      "epoch": 0.7362962962962963,
      "grad_norm": 2.169576406478882,
      "learning_rate": 5.2928094885100074e-05,
      "loss": 0.9911,
      "step": 4970
    },
    {
      "epoch": 0.7364444444444445,
      "grad_norm": 1.5147404670715332,
      "learning_rate": 5.289844329132692e-05,
      "loss": 1.0696,
      "step": 4971
    },
    {
      "epoch": 0.7365925925925926,
      "grad_norm": 1.6818712949752808,
      "learning_rate": 5.286879169755374e-05,
      "loss": 0.8472,
      "step": 4972
    },
    {
      "epoch": 0.7367407407407407,
      "grad_norm": 1.5663869380950928,
      "learning_rate": 5.283914010378058e-05,
      "loss": 0.9923,
      "step": 4973
    },
    {
      "epoch": 0.7368888888888889,
      "grad_norm": 2.421029806137085,
      "learning_rate": 5.280948851000741e-05,
      "loss": 1.0343,
      "step": 4974
    },
    {
      "epoch": 0.737037037037037,
      "grad_norm": 3.366656541824341,
      "learning_rate": 5.277983691623425e-05,
      "loss": 1.1295,
      "step": 4975
    },
    {
      "epoch": 0.7371851851851852,
      "grad_norm": 1.3032071590423584,
      "learning_rate": 5.275018532246109e-05,
      "loss": 0.9873,
      "step": 4976
    },
    {
      "epoch": 0.7373333333333333,
      "grad_norm": 3.0822577476501465,
      "learning_rate": 5.272053372868792e-05,
      "loss": 0.773,
      "step": 4977
    },
    {
      "epoch": 0.7374814814814815,
      "grad_norm": 1.7527302503585815,
      "learning_rate": 5.2690882134914755e-05,
      "loss": 1.1594,
      "step": 4978
    },
    {
      "epoch": 0.7376296296296296,
      "grad_norm": 1.6602507829666138,
      "learning_rate": 5.266123054114159e-05,
      "loss": 0.9835,
      "step": 4979
    },
    {
      "epoch": 0.7377777777777778,
      "grad_norm": 1.6826786994934082,
      "learning_rate": 5.2631578947368424e-05,
      "loss": 1.1101,
      "step": 4980
    },
    {
      "epoch": 0.7379259259259259,
      "grad_norm": 1.423226237297058,
      "learning_rate": 5.260192735359526e-05,
      "loss": 1.0795,
      "step": 4981
    },
    {
      "epoch": 0.7380740740740741,
      "grad_norm": 1.8139948844909668,
      "learning_rate": 5.25722757598221e-05,
      "loss": 0.901,
      "step": 4982
    },
    {
      "epoch": 0.7382222222222222,
      "grad_norm": 1.323270320892334,
      "learning_rate": 5.2542624166048923e-05,
      "loss": 0.9716,
      "step": 4983
    },
    {
      "epoch": 0.7383703703703703,
      "grad_norm": 1.5968650579452515,
      "learning_rate": 5.251297257227577e-05,
      "loss": 1.0551,
      "step": 4984
    },
    {
      "epoch": 0.7385185185185185,
      "grad_norm": 4.135926723480225,
      "learning_rate": 5.248332097850259e-05,
      "loss": 0.9754,
      "step": 4985
    },
    {
      "epoch": 0.7386666666666667,
      "grad_norm": 1.9808924198150635,
      "learning_rate": 5.245366938472943e-05,
      "loss": 1.1565,
      "step": 4986
    },
    {
      "epoch": 0.7388148148148148,
      "grad_norm": 3.052340030670166,
      "learning_rate": 5.242401779095627e-05,
      "loss": 1.1365,
      "step": 4987
    },
    {
      "epoch": 0.7389629629629629,
      "grad_norm": 1.6291524171829224,
      "learning_rate": 5.23943661971831e-05,
      "loss": 0.9795,
      "step": 4988
    },
    {
      "epoch": 0.7391111111111112,
      "grad_norm": 2.475485324859619,
      "learning_rate": 5.2364714603409936e-05,
      "loss": 1.048,
      "step": 4989
    },
    {
      "epoch": 0.7392592592592593,
      "grad_norm": 1.5522637367248535,
      "learning_rate": 5.2335063009636774e-05,
      "loss": 1.0012,
      "step": 4990
    },
    {
      "epoch": 0.7394074074074074,
      "grad_norm": 1.6295976638793945,
      "learning_rate": 5.2305411415863605e-05,
      "loss": 1.1546,
      "step": 4991
    },
    {
      "epoch": 0.7395555555555555,
      "grad_norm": 1.7080334424972534,
      "learning_rate": 5.227575982209044e-05,
      "loss": 1.04,
      "step": 4992
    },
    {
      "epoch": 0.7397037037037038,
      "grad_norm": 2.2442939281463623,
      "learning_rate": 5.224610822831727e-05,
      "loss": 0.9557,
      "step": 4993
    },
    {
      "epoch": 0.7398518518518519,
      "grad_norm": 1.4062635898590088,
      "learning_rate": 5.221645663454411e-05,
      "loss": 0.9776,
      "step": 4994
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5093679428100586,
      "learning_rate": 5.218680504077095e-05,
      "loss": 1.0569,
      "step": 4995
    },
    {
      "epoch": 0.7401481481481481,
      "grad_norm": 5.3266496658325195,
      "learning_rate": 5.215715344699777e-05,
      "loss": 1.0382,
      "step": 4996
    },
    {
      "epoch": 0.7402962962962963,
      "grad_norm": 1.4864649772644043,
      "learning_rate": 5.212750185322462e-05,
      "loss": 1.3798,
      "step": 4997
    },
    {
      "epoch": 0.7404444444444445,
      "grad_norm": 2.1344094276428223,
      "learning_rate": 5.2097850259451455e-05,
      "loss": 0.8893,
      "step": 4998
    },
    {
      "epoch": 0.7405925925925926,
      "grad_norm": 2.4697370529174805,
      "learning_rate": 5.206819866567828e-05,
      "loss": 0.9593,
      "step": 4999
    },
    {
      "epoch": 0.7407407407407407,
      "grad_norm": 3.9260923862457275,
      "learning_rate": 5.203854707190512e-05,
      "loss": 1.1272,
      "step": 5000
    },
    {
      "epoch": 0.7408888888888889,
      "grad_norm": 1.388735055923462,
      "learning_rate": 5.200889547813195e-05,
      "loss": 0.6663,
      "step": 5001
    },
    {
      "epoch": 0.741037037037037,
      "grad_norm": 1.714282512664795,
      "learning_rate": 5.1979243884358785e-05,
      "loss": 1.2451,
      "step": 5002
    },
    {
      "epoch": 0.7411851851851852,
      "grad_norm": 2.0393123626708984,
      "learning_rate": 5.194959229058562e-05,
      "loss": 0.9522,
      "step": 5003
    },
    {
      "epoch": 0.7413333333333333,
      "grad_norm": 1.7348084449768066,
      "learning_rate": 5.1919940696812454e-05,
      "loss": 1.0637,
      "step": 5004
    },
    {
      "epoch": 0.7414814814814815,
      "grad_norm": 3.50278639793396,
      "learning_rate": 5.189028910303929e-05,
      "loss": 1.1513,
      "step": 5005
    },
    {
      "epoch": 0.7416296296296296,
      "grad_norm": 3.855315923690796,
      "learning_rate": 5.186063750926613e-05,
      "loss": 0.9822,
      "step": 5006
    },
    {
      "epoch": 0.7417777777777778,
      "grad_norm": 4.46833610534668,
      "learning_rate": 5.183098591549296e-05,
      "loss": 0.9115,
      "step": 5007
    },
    {
      "epoch": 0.7419259259259259,
      "grad_norm": 1.7555344104766846,
      "learning_rate": 5.18013343217198e-05,
      "loss": 1.2394,
      "step": 5008
    },
    {
      "epoch": 0.7420740740740741,
      "grad_norm": 2.125119924545288,
      "learning_rate": 5.177168272794662e-05,
      "loss": 1.2223,
      "step": 5009
    },
    {
      "epoch": 0.7422222222222222,
      "grad_norm": 3.150367021560669,
      "learning_rate": 5.1742031134173466e-05,
      "loss": 0.9966,
      "step": 5010
    },
    {
      "epoch": 0.7423703703703703,
      "grad_norm": 1.2428815364837646,
      "learning_rate": 5.1712379540400304e-05,
      "loss": 1.015,
      "step": 5011
    },
    {
      "epoch": 0.7425185185185185,
      "grad_norm": 1.4209166765213013,
      "learning_rate": 5.168272794662713e-05,
      "loss": 0.9191,
      "step": 5012
    },
    {
      "epoch": 0.7426666666666667,
      "grad_norm": 2.137342691421509,
      "learning_rate": 5.1653076352853966e-05,
      "loss": 1.1639,
      "step": 5013
    },
    {
      "epoch": 0.7428148148148148,
      "grad_norm": 1.7810817956924438,
      "learning_rate": 5.162342475908081e-05,
      "loss": 1.041,
      "step": 5014
    },
    {
      "epoch": 0.7429629629629629,
      "grad_norm": 1.5036296844482422,
      "learning_rate": 5.1593773165307634e-05,
      "loss": 1.0331,
      "step": 5015
    },
    {
      "epoch": 0.7431111111111111,
      "grad_norm": 2.7676284313201904,
      "learning_rate": 5.156412157153447e-05,
      "loss": 0.9642,
      "step": 5016
    },
    {
      "epoch": 0.7432592592592593,
      "grad_norm": 1.8751683235168457,
      "learning_rate": 5.15344699777613e-05,
      "loss": 1.1439,
      "step": 5017
    },
    {
      "epoch": 0.7434074074074074,
      "grad_norm": 1.4921735525131226,
      "learning_rate": 5.150481838398814e-05,
      "loss": 1.1156,
      "step": 5018
    },
    {
      "epoch": 0.7435555555555555,
      "grad_norm": 2.4699594974517822,
      "learning_rate": 5.147516679021498e-05,
      "loss": 1.0008,
      "step": 5019
    },
    {
      "epoch": 0.7437037037037038,
      "grad_norm": 2.1117660999298096,
      "learning_rate": 5.144551519644181e-05,
      "loss": 1.1055,
      "step": 5020
    },
    {
      "epoch": 0.7438518518518519,
      "grad_norm": 1.7587753534317017,
      "learning_rate": 5.141586360266865e-05,
      "loss": 1.1196,
      "step": 5021
    },
    {
      "epoch": 0.744,
      "grad_norm": 3.3482227325439453,
      "learning_rate": 5.1386212008895485e-05,
      "loss": 1.0338,
      "step": 5022
    },
    {
      "epoch": 0.7441481481481481,
      "grad_norm": 1.7586042881011963,
      "learning_rate": 5.1356560415122316e-05,
      "loss": 1.1843,
      "step": 5023
    },
    {
      "epoch": 0.7442962962962963,
      "grad_norm": 1.3716119527816772,
      "learning_rate": 5.132690882134915e-05,
      "loss": 0.7945,
      "step": 5024
    },
    {
      "epoch": 0.7444444444444445,
      "grad_norm": 2.298292636871338,
      "learning_rate": 5.129725722757599e-05,
      "loss": 1.1102,
      "step": 5025
    },
    {
      "epoch": 0.7445925925925926,
      "grad_norm": 3.0868923664093018,
      "learning_rate": 5.1267605633802815e-05,
      "loss": 1.0216,
      "step": 5026
    },
    {
      "epoch": 0.7447407407407407,
      "grad_norm": 6.296951770782471,
      "learning_rate": 5.123795404002966e-05,
      "loss": 0.9318,
      "step": 5027
    },
    {
      "epoch": 0.7448888888888889,
      "grad_norm": 1.2797033786773682,
      "learning_rate": 5.1208302446256484e-05,
      "loss": 0.7388,
      "step": 5028
    },
    {
      "epoch": 0.745037037037037,
      "grad_norm": 2.2225050926208496,
      "learning_rate": 5.117865085248332e-05,
      "loss": 1.038,
      "step": 5029
    },
    {
      "epoch": 0.7451851851851852,
      "grad_norm": 3.8540756702423096,
      "learning_rate": 5.114899925871016e-05,
      "loss": 1.445,
      "step": 5030
    },
    {
      "epoch": 0.7453333333333333,
      "grad_norm": 1.381791114807129,
      "learning_rate": 5.111934766493699e-05,
      "loss": 1.0029,
      "step": 5031
    },
    {
      "epoch": 0.7454814814814815,
      "grad_norm": 1.36771821975708,
      "learning_rate": 5.108969607116383e-05,
      "loss": 0.7636,
      "step": 5032
    },
    {
      "epoch": 0.7456296296296296,
      "grad_norm": 2.076235771179199,
      "learning_rate": 5.1060044477390665e-05,
      "loss": 0.7626,
      "step": 5033
    },
    {
      "epoch": 0.7457777777777778,
      "grad_norm": 1.1504572629928589,
      "learning_rate": 5.1030392883617496e-05,
      "loss": 0.925,
      "step": 5034
    },
    {
      "epoch": 0.7459259259259259,
      "grad_norm": 1.3281841278076172,
      "learning_rate": 5.1000741289844334e-05,
      "loss": 1.2024,
      "step": 5035
    },
    {
      "epoch": 0.7460740740740741,
      "grad_norm": 1.8246177434921265,
      "learning_rate": 5.0971089696071165e-05,
      "loss": 1.3113,
      "step": 5036
    },
    {
      "epoch": 0.7462222222222222,
      "grad_norm": 2.447537660598755,
      "learning_rate": 5.0941438102298e-05,
      "loss": 1.0149,
      "step": 5037
    },
    {
      "epoch": 0.7463703703703704,
      "grad_norm": 1.6020809412002563,
      "learning_rate": 5.091178650852484e-05,
      "loss": 0.8735,
      "step": 5038
    },
    {
      "epoch": 0.7465185185185185,
      "grad_norm": 1.535174012184143,
      "learning_rate": 5.0882134914751664e-05,
      "loss": 1.0452,
      "step": 5039
    },
    {
      "epoch": 0.7466666666666667,
      "grad_norm": 1.951108694076538,
      "learning_rate": 5.085248332097851e-05,
      "loss": 1.1868,
      "step": 5040
    },
    {
      "epoch": 0.7468148148148148,
      "grad_norm": 3.4173529148101807,
      "learning_rate": 5.0822831727205346e-05,
      "loss": 0.9471,
      "step": 5041
    },
    {
      "epoch": 0.7469629629629629,
      "grad_norm": 1.3767805099487305,
      "learning_rate": 5.079318013343217e-05,
      "loss": 0.9748,
      "step": 5042
    },
    {
      "epoch": 0.7471111111111111,
      "grad_norm": 1.9710142612457275,
      "learning_rate": 5.076352853965901e-05,
      "loss": 0.9426,
      "step": 5043
    },
    {
      "epoch": 0.7472592592592593,
      "grad_norm": 1.3733680248260498,
      "learning_rate": 5.073387694588584e-05,
      "loss": 0.8834,
      "step": 5044
    },
    {
      "epoch": 0.7474074074074074,
      "grad_norm": 1.5866197347640991,
      "learning_rate": 5.070422535211268e-05,
      "loss": 1.1177,
      "step": 5045
    },
    {
      "epoch": 0.7475555555555555,
      "grad_norm": 1.3883211612701416,
      "learning_rate": 5.0674573758339515e-05,
      "loss": 1.0757,
      "step": 5046
    },
    {
      "epoch": 0.7477037037037036,
      "grad_norm": 1.562939167022705,
      "learning_rate": 5.0644922164566345e-05,
      "loss": 0.9694,
      "step": 5047
    },
    {
      "epoch": 0.7478518518518519,
      "grad_norm": 1.792899250984192,
      "learning_rate": 5.061527057079318e-05,
      "loss": 0.9668,
      "step": 5048
    },
    {
      "epoch": 0.748,
      "grad_norm": 1.1375911235809326,
      "learning_rate": 5.058561897702002e-05,
      "loss": 1.0665,
      "step": 5049
    },
    {
      "epoch": 0.7481481481481481,
      "grad_norm": 1.2301177978515625,
      "learning_rate": 5.055596738324685e-05,
      "loss": 1.1568,
      "step": 5050
    },
    {
      "epoch": 0.7482962962962963,
      "grad_norm": 2.1505887508392334,
      "learning_rate": 5.052631578947369e-05,
      "loss": 1.0903,
      "step": 5051
    },
    {
      "epoch": 0.7484444444444445,
      "grad_norm": 1.9880132675170898,
      "learning_rate": 5.0496664195700514e-05,
      "loss": 0.9612,
      "step": 5052
    },
    {
      "epoch": 0.7485925925925926,
      "grad_norm": 1.5665372610092163,
      "learning_rate": 5.046701260192736e-05,
      "loss": 1.0377,
      "step": 5053
    },
    {
      "epoch": 0.7487407407407407,
      "grad_norm": 2.266237258911133,
      "learning_rate": 5.0437361008154196e-05,
      "loss": 1.2228,
      "step": 5054
    },
    {
      "epoch": 0.7488888888888889,
      "grad_norm": 2.041421890258789,
      "learning_rate": 5.040770941438102e-05,
      "loss": 1.0174,
      "step": 5055
    },
    {
      "epoch": 0.7490370370370371,
      "grad_norm": 1.5641827583312988,
      "learning_rate": 5.037805782060786e-05,
      "loss": 0.8587,
      "step": 5056
    },
    {
      "epoch": 0.7491851851851852,
      "grad_norm": 2.854626417160034,
      "learning_rate": 5.03484062268347e-05,
      "loss": 1.1644,
      "step": 5057
    },
    {
      "epoch": 0.7493333333333333,
      "grad_norm": 3.9217967987060547,
      "learning_rate": 5.0318754633061526e-05,
      "loss": 0.7622,
      "step": 5058
    },
    {
      "epoch": 0.7494814814814815,
      "grad_norm": 2.209564208984375,
      "learning_rate": 5.0289103039288364e-05,
      "loss": 0.9619,
      "step": 5059
    },
    {
      "epoch": 0.7496296296296296,
      "grad_norm": 1.823884129524231,
      "learning_rate": 5.02594514455152e-05,
      "loss": 1.0101,
      "step": 5060
    },
    {
      "epoch": 0.7497777777777778,
      "grad_norm": 2.5315563678741455,
      "learning_rate": 5.022979985174203e-05,
      "loss": 1.1403,
      "step": 5061
    },
    {
      "epoch": 0.7499259259259259,
      "grad_norm": 1.169373869895935,
      "learning_rate": 5.020014825796887e-05,
      "loss": 0.9472,
      "step": 5062
    },
    {
      "epoch": 0.7500740740740741,
      "grad_norm": 2.5863282680511475,
      "learning_rate": 5.01704966641957e-05,
      "loss": 1.276,
      "step": 5063
    },
    {
      "epoch": 0.7502222222222222,
      "grad_norm": 3.2997329235076904,
      "learning_rate": 5.014084507042254e-05,
      "loss": 0.9122,
      "step": 5064
    },
    {
      "epoch": 0.7503703703703704,
      "grad_norm": 1.424874186515808,
      "learning_rate": 5.0111193476649376e-05,
      "loss": 1.0295,
      "step": 5065
    },
    {
      "epoch": 0.7505185185185185,
      "grad_norm": 2.382889747619629,
      "learning_rate": 5.008154188287621e-05,
      "loss": 1.1297,
      "step": 5066
    },
    {
      "epoch": 0.7506666666666667,
      "grad_norm": 1.185091257095337,
      "learning_rate": 5.0051890289103045e-05,
      "loss": 0.8786,
      "step": 5067
    },
    {
      "epoch": 0.7508148148148148,
      "grad_norm": 2.00325083732605,
      "learning_rate": 5.002223869532988e-05,
      "loss": 1.042,
      "step": 5068
    },
    {
      "epoch": 0.7509629629629629,
      "grad_norm": 3.6229701042175293,
      "learning_rate": 4.999258710155671e-05,
      "loss": 0.9377,
      "step": 5069
    },
    {
      "epoch": 0.7511111111111111,
      "grad_norm": 1.0859620571136475,
      "learning_rate": 4.996293550778355e-05,
      "loss": 1.1823,
      "step": 5070
    },
    {
      "epoch": 0.7512592592592593,
      "grad_norm": 1.863409399986267,
      "learning_rate": 4.993328391401038e-05,
      "loss": 1.1228,
      "step": 5071
    },
    {
      "epoch": 0.7514074074074074,
      "grad_norm": 1.8272866010665894,
      "learning_rate": 4.990363232023721e-05,
      "loss": 1.0643,
      "step": 5072
    },
    {
      "epoch": 0.7515555555555555,
      "grad_norm": 1.7983300685882568,
      "learning_rate": 4.987398072646405e-05,
      "loss": 1.169,
      "step": 5073
    },
    {
      "epoch": 0.7517037037037037,
      "grad_norm": 2.3321971893310547,
      "learning_rate": 4.984432913269089e-05,
      "loss": 1.075,
      "step": 5074
    },
    {
      "epoch": 0.7518518518518519,
      "grad_norm": 1.8782232999801636,
      "learning_rate": 4.981467753891772e-05,
      "loss": 1.0492,
      "step": 5075
    },
    {
      "epoch": 0.752,
      "grad_norm": 2.8510990142822266,
      "learning_rate": 4.978502594514455e-05,
      "loss": 1.2358,
      "step": 5076
    },
    {
      "epoch": 0.7521481481481481,
      "grad_norm": 2.659990072250366,
      "learning_rate": 4.975537435137139e-05,
      "loss": 1.0693,
      "step": 5077
    },
    {
      "epoch": 0.7522962962962964,
      "grad_norm": 1.8440874814987183,
      "learning_rate": 4.9725722757598226e-05,
      "loss": 1.1364,
      "step": 5078
    },
    {
      "epoch": 0.7524444444444445,
      "grad_norm": 1.1493724584579468,
      "learning_rate": 4.9696071163825056e-05,
      "loss": 1.1714,
      "step": 5079
    },
    {
      "epoch": 0.7525925925925926,
      "grad_norm": 1.7375032901763916,
      "learning_rate": 4.9666419570051894e-05,
      "loss": 1.1045,
      "step": 5080
    },
    {
      "epoch": 0.7527407407407407,
      "grad_norm": 2.885653257369995,
      "learning_rate": 4.9636767976278725e-05,
      "loss": 1.0792,
      "step": 5081
    },
    {
      "epoch": 0.7528888888888889,
      "grad_norm": 1.5603786706924438,
      "learning_rate": 4.960711638250556e-05,
      "loss": 0.9486,
      "step": 5082
    },
    {
      "epoch": 0.7530370370370371,
      "grad_norm": 1.6088101863861084,
      "learning_rate": 4.95774647887324e-05,
      "loss": 1.081,
      "step": 5083
    },
    {
      "epoch": 0.7531851851851852,
      "grad_norm": 2.091627359390259,
      "learning_rate": 4.954781319495923e-05,
      "loss": 0.9934,
      "step": 5084
    },
    {
      "epoch": 0.7533333333333333,
      "grad_norm": 1.691460371017456,
      "learning_rate": 4.951816160118606e-05,
      "loss": 1.2726,
      "step": 5085
    },
    {
      "epoch": 0.7534814814814815,
      "grad_norm": 1.6092997789382935,
      "learning_rate": 4.94885100074129e-05,
      "loss": 1.0323,
      "step": 5086
    },
    {
      "epoch": 0.7536296296296296,
      "grad_norm": 1.2982960939407349,
      "learning_rate": 4.945885841363974e-05,
      "loss": 0.7403,
      "step": 5087
    },
    {
      "epoch": 0.7537777777777778,
      "grad_norm": 1.68495774269104,
      "learning_rate": 4.942920681986657e-05,
      "loss": 0.9302,
      "step": 5088
    },
    {
      "epoch": 0.7539259259259259,
      "grad_norm": 1.6897817850112915,
      "learning_rate": 4.93995552260934e-05,
      "loss": 0.9577,
      "step": 5089
    },
    {
      "epoch": 0.7540740740740741,
      "grad_norm": 1.0711270570755005,
      "learning_rate": 4.9369903632320244e-05,
      "loss": 0.8916,
      "step": 5090
    },
    {
      "epoch": 0.7542222222222222,
      "grad_norm": 4.649448394775391,
      "learning_rate": 4.9340252038547075e-05,
      "loss": 0.9868,
      "step": 5091
    },
    {
      "epoch": 0.7543703703703704,
      "grad_norm": 1.4439083337783813,
      "learning_rate": 4.9310600444773906e-05,
      "loss": 0.8525,
      "step": 5092
    },
    {
      "epoch": 0.7545185185185185,
      "grad_norm": 2.0443906784057617,
      "learning_rate": 4.928094885100074e-05,
      "loss": 1.2524,
      "step": 5093
    },
    {
      "epoch": 0.7546666666666667,
      "grad_norm": 1.8707892894744873,
      "learning_rate": 4.925129725722758e-05,
      "loss": 1.1176,
      "step": 5094
    },
    {
      "epoch": 0.7548148148148148,
      "grad_norm": 2.2765345573425293,
      "learning_rate": 4.922164566345441e-05,
      "loss": 1.0497,
      "step": 5095
    },
    {
      "epoch": 0.754962962962963,
      "grad_norm": 3.6682441234588623,
      "learning_rate": 4.919199406968125e-05,
      "loss": 1.0128,
      "step": 5096
    },
    {
      "epoch": 0.7551111111111111,
      "grad_norm": 2.4716155529022217,
      "learning_rate": 4.916234247590808e-05,
      "loss": 1.0322,
      "step": 5097
    },
    {
      "epoch": 0.7552592592592593,
      "grad_norm": 2.391040563583374,
      "learning_rate": 4.913269088213492e-05,
      "loss": 0.7442,
      "step": 5098
    },
    {
      "epoch": 0.7554074074074074,
      "grad_norm": 3.0249578952789307,
      "learning_rate": 4.910303928836175e-05,
      "loss": 1.1544,
      "step": 5099
    },
    {
      "epoch": 0.7555555555555555,
      "grad_norm": 1.9137699604034424,
      "learning_rate": 4.907338769458859e-05,
      "loss": 1.298,
      "step": 5100
    },
    {
      "epoch": 0.7557037037037037,
      "grad_norm": 1.8166754245758057,
      "learning_rate": 4.9043736100815425e-05,
      "loss": 1.0571,
      "step": 5101
    },
    {
      "epoch": 0.7558518518518519,
      "grad_norm": 5.292375087738037,
      "learning_rate": 4.9014084507042255e-05,
      "loss": 1.0935,
      "step": 5102
    },
    {
      "epoch": 0.756,
      "grad_norm": 1.5703907012939453,
      "learning_rate": 4.898443291326909e-05,
      "loss": 0.9633,
      "step": 5103
    },
    {
      "epoch": 0.7561481481481481,
      "grad_norm": 2.736743450164795,
      "learning_rate": 4.8954781319495924e-05,
      "loss": 0.9147,
      "step": 5104
    },
    {
      "epoch": 0.7562962962962962,
      "grad_norm": 5.090847969055176,
      "learning_rate": 4.892512972572276e-05,
      "loss": 0.8958,
      "step": 5105
    },
    {
      "epoch": 0.7564444444444445,
      "grad_norm": 1.9331525564193726,
      "learning_rate": 4.889547813194959e-05,
      "loss": 0.8684,
      "step": 5106
    },
    {
      "epoch": 0.7565925925925926,
      "grad_norm": 1.776873230934143,
      "learning_rate": 4.886582653817643e-05,
      "loss": 1.1317,
      "step": 5107
    },
    {
      "epoch": 0.7567407407407407,
      "grad_norm": 2.454066276550293,
      "learning_rate": 4.883617494440326e-05,
      "loss": 1.0814,
      "step": 5108
    },
    {
      "epoch": 0.7568888888888889,
      "grad_norm": 1.4756650924682617,
      "learning_rate": 4.88065233506301e-05,
      "loss": 0.8534,
      "step": 5109
    },
    {
      "epoch": 0.7570370370370371,
      "grad_norm": 1.317214846611023,
      "learning_rate": 4.8776871756856937e-05,
      "loss": 0.8166,
      "step": 5110
    },
    {
      "epoch": 0.7571851851851852,
      "grad_norm": 2.033379077911377,
      "learning_rate": 4.874722016308377e-05,
      "loss": 1.0837,
      "step": 5111
    },
    {
      "epoch": 0.7573333333333333,
      "grad_norm": 2.4382426738739014,
      "learning_rate": 4.87175685693106e-05,
      "loss": 0.8166,
      "step": 5112
    },
    {
      "epoch": 0.7574814814814815,
      "grad_norm": 1.879291296005249,
      "learning_rate": 4.868791697553744e-05,
      "loss": 1.1031,
      "step": 5113
    },
    {
      "epoch": 0.7576296296296297,
      "grad_norm": 1.4954347610473633,
      "learning_rate": 4.8658265381764274e-05,
      "loss": 0.9674,
      "step": 5114
    },
    {
      "epoch": 0.7577777777777778,
      "grad_norm": 2.026719570159912,
      "learning_rate": 4.8628613787991105e-05,
      "loss": 1.0438,
      "step": 5115
    },
    {
      "epoch": 0.7579259259259259,
      "grad_norm": 2.028395652770996,
      "learning_rate": 4.859896219421794e-05,
      "loss": 0.9114,
      "step": 5116
    },
    {
      "epoch": 0.7580740740740741,
      "grad_norm": 2.310157060623169,
      "learning_rate": 4.856931060044478e-05,
      "loss": 0.9937,
      "step": 5117
    },
    {
      "epoch": 0.7582222222222222,
      "grad_norm": 1.5251635313034058,
      "learning_rate": 4.853965900667161e-05,
      "loss": 1.048,
      "step": 5118
    },
    {
      "epoch": 0.7583703703703704,
      "grad_norm": 1.486611008644104,
      "learning_rate": 4.851000741289844e-05,
      "loss": 0.9944,
      "step": 5119
    },
    {
      "epoch": 0.7585185185185185,
      "grad_norm": 2.200636148452759,
      "learning_rate": 4.848035581912528e-05,
      "loss": 1.0271,
      "step": 5120
    },
    {
      "epoch": 0.7586666666666667,
      "grad_norm": 2.43915057182312,
      "learning_rate": 4.845070422535212e-05,
      "loss": 0.9229,
      "step": 5121
    },
    {
      "epoch": 0.7588148148148148,
      "grad_norm": 1.2409038543701172,
      "learning_rate": 4.842105263157895e-05,
      "loss": 0.8342,
      "step": 5122
    },
    {
      "epoch": 0.758962962962963,
      "grad_norm": 2.0774729251861572,
      "learning_rate": 4.8391401037805786e-05,
      "loss": 1.0365,
      "step": 5123
    },
    {
      "epoch": 0.7591111111111111,
      "grad_norm": 2.0544731616973877,
      "learning_rate": 4.836174944403262e-05,
      "loss": 0.9629,
      "step": 5124
    },
    {
      "epoch": 0.7592592592592593,
      "grad_norm": 1.7318483591079712,
      "learning_rate": 4.8332097850259454e-05,
      "loss": 1.2772,
      "step": 5125
    },
    {
      "epoch": 0.7594074074074074,
      "grad_norm": 8.01085090637207,
      "learning_rate": 4.830244625648629e-05,
      "loss": 1.1203,
      "step": 5126
    },
    {
      "epoch": 0.7595555555555555,
      "grad_norm": 3.245711088180542,
      "learning_rate": 4.827279466271312e-05,
      "loss": 1.0204,
      "step": 5127
    },
    {
      "epoch": 0.7597037037037037,
      "grad_norm": 1.6004467010498047,
      "learning_rate": 4.8243143068939954e-05,
      "loss": 0.8611,
      "step": 5128
    },
    {
      "epoch": 0.7598518518518519,
      "grad_norm": 1.5613590478897095,
      "learning_rate": 4.821349147516679e-05,
      "loss": 0.9004,
      "step": 5129
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1147972345352173,
      "learning_rate": 4.818383988139363e-05,
      "loss": 1.1883,
      "step": 5130
    },
    {
      "epoch": 0.7601481481481481,
      "grad_norm": 1.9375214576721191,
      "learning_rate": 4.815418828762046e-05,
      "loss": 1.055,
      "step": 5131
    },
    {
      "epoch": 0.7602962962962962,
      "grad_norm": 1.498977780342102,
      "learning_rate": 4.812453669384729e-05,
      "loss": 0.9044,
      "step": 5132
    },
    {
      "epoch": 0.7604444444444445,
      "grad_norm": 1.9106816053390503,
      "learning_rate": 4.8094885100074136e-05,
      "loss": 1.0633,
      "step": 5133
    },
    {
      "epoch": 0.7605925925925926,
      "grad_norm": 2.038706064224243,
      "learning_rate": 4.8065233506300966e-05,
      "loss": 1.1961,
      "step": 5134
    },
    {
      "epoch": 0.7607407407407407,
      "grad_norm": 3.4296905994415283,
      "learning_rate": 4.80355819125278e-05,
      "loss": 0.913,
      "step": 5135
    },
    {
      "epoch": 0.7608888888888888,
      "grad_norm": 4.157522678375244,
      "learning_rate": 4.8005930318754635e-05,
      "loss": 1.1286,
      "step": 5136
    },
    {
      "epoch": 0.7610370370370371,
      "grad_norm": 2.291372060775757,
      "learning_rate": 4.797627872498147e-05,
      "loss": 0.8568,
      "step": 5137
    },
    {
      "epoch": 0.7611851851851852,
      "grad_norm": 1.864700436592102,
      "learning_rate": 4.7946627131208304e-05,
      "loss": 0.8264,
      "step": 5138
    },
    {
      "epoch": 0.7613333333333333,
      "grad_norm": 1.610752820968628,
      "learning_rate": 4.791697553743514e-05,
      "loss": 1.0348,
      "step": 5139
    },
    {
      "epoch": 0.7614814814814815,
      "grad_norm": 4.244185447692871,
      "learning_rate": 4.788732394366197e-05,
      "loss": 1.1272,
      "step": 5140
    },
    {
      "epoch": 0.7616296296296297,
      "grad_norm": 1.9835176467895508,
      "learning_rate": 4.785767234988881e-05,
      "loss": 0.8225,
      "step": 5141
    },
    {
      "epoch": 0.7617777777777778,
      "grad_norm": 1.8490839004516602,
      "learning_rate": 4.782802075611564e-05,
      "loss": 1.0496,
      "step": 5142
    },
    {
      "epoch": 0.7619259259259259,
      "grad_norm": 3.377185583114624,
      "learning_rate": 4.779836916234248e-05,
      "loss": 0.9867,
      "step": 5143
    },
    {
      "epoch": 0.7620740740740741,
      "grad_norm": 1.957334280014038,
      "learning_rate": 4.7768717568569316e-05,
      "loss": 0.9881,
      "step": 5144
    },
    {
      "epoch": 0.7622222222222222,
      "grad_norm": 1.9648994207382202,
      "learning_rate": 4.773906597479615e-05,
      "loss": 1.0724,
      "step": 5145
    },
    {
      "epoch": 0.7623703703703704,
      "grad_norm": 2.500462055206299,
      "learning_rate": 4.7709414381022985e-05,
      "loss": 0.9337,
      "step": 5146
    },
    {
      "epoch": 0.7625185185185185,
      "grad_norm": 2.3736724853515625,
      "learning_rate": 4.7679762787249816e-05,
      "loss": 1.1224,
      "step": 5147
    },
    {
      "epoch": 0.7626666666666667,
      "grad_norm": 1.6987513303756714,
      "learning_rate": 4.765011119347665e-05,
      "loss": 0.9495,
      "step": 5148
    },
    {
      "epoch": 0.7628148148148148,
      "grad_norm": 1.900909423828125,
      "learning_rate": 4.7620459599703484e-05,
      "loss": 0.9714,
      "step": 5149
    },
    {
      "epoch": 0.762962962962963,
      "grad_norm": 2.466947317123413,
      "learning_rate": 4.759080800593032e-05,
      "loss": 1.1185,
      "step": 5150
    },
    {
      "epoch": 0.7631111111111111,
      "grad_norm": 1.8361475467681885,
      "learning_rate": 4.756115641215715e-05,
      "loss": 1.0755,
      "step": 5151
    },
    {
      "epoch": 0.7632592592592593,
      "grad_norm": 1.4487415552139282,
      "learning_rate": 4.753150481838399e-05,
      "loss": 0.9984,
      "step": 5152
    },
    {
      "epoch": 0.7634074074074074,
      "grad_norm": 1.4057559967041016,
      "learning_rate": 4.750185322461083e-05,
      "loss": 0.6848,
      "step": 5153
    },
    {
      "epoch": 0.7635555555555555,
      "grad_norm": 2.509324789047241,
      "learning_rate": 4.747220163083766e-05,
      "loss": 1.1268,
      "step": 5154
    },
    {
      "epoch": 0.7637037037037037,
      "grad_norm": 2.1801083087921143,
      "learning_rate": 4.744255003706449e-05,
      "loss": 1.0042,
      "step": 5155
    },
    {
      "epoch": 0.7638518518518519,
      "grad_norm": 1.8679866790771484,
      "learning_rate": 4.7412898443291334e-05,
      "loss": 0.8824,
      "step": 5156
    },
    {
      "epoch": 0.764,
      "grad_norm": 2.1970086097717285,
      "learning_rate": 4.7383246849518165e-05,
      "loss": 1.0381,
      "step": 5157
    },
    {
      "epoch": 0.7641481481481481,
      "grad_norm": 1.4073742628097534,
      "learning_rate": 4.7353595255744996e-05,
      "loss": 0.8238,
      "step": 5158
    },
    {
      "epoch": 0.7642962962962963,
      "grad_norm": 1.780596137046814,
      "learning_rate": 4.7323943661971834e-05,
      "loss": 1.1141,
      "step": 5159
    },
    {
      "epoch": 0.7644444444444445,
      "grad_norm": 1.9689271450042725,
      "learning_rate": 4.729429206819867e-05,
      "loss": 1.0593,
      "step": 5160
    },
    {
      "epoch": 0.7645925925925926,
      "grad_norm": 2.2552754878997803,
      "learning_rate": 4.72646404744255e-05,
      "loss": 0.9337,
      "step": 5161
    },
    {
      "epoch": 0.7647407407407407,
      "grad_norm": 1.968701958656311,
      "learning_rate": 4.7234988880652333e-05,
      "loss": 0.9357,
      "step": 5162
    },
    {
      "epoch": 0.7648888888888888,
      "grad_norm": 1.7369651794433594,
      "learning_rate": 4.720533728687917e-05,
      "loss": 1.2335,
      "step": 5163
    },
    {
      "epoch": 0.7650370370370371,
      "grad_norm": 1.7453725337982178,
      "learning_rate": 4.717568569310601e-05,
      "loss": 0.8022,
      "step": 5164
    },
    {
      "epoch": 0.7651851851851852,
      "grad_norm": 1.5386406183242798,
      "learning_rate": 4.714603409933284e-05,
      "loss": 1.0569,
      "step": 5165
    },
    {
      "epoch": 0.7653333333333333,
      "grad_norm": 2.168109178543091,
      "learning_rate": 4.711638250555968e-05,
      "loss": 0.9496,
      "step": 5166
    },
    {
      "epoch": 0.7654814814814814,
      "grad_norm": 1.2597980499267578,
      "learning_rate": 4.708673091178651e-05,
      "loss": 0.8692,
      "step": 5167
    },
    {
      "epoch": 0.7656296296296297,
      "grad_norm": 1.890368938446045,
      "learning_rate": 4.7057079318013346e-05,
      "loss": 1.074,
      "step": 5168
    },
    {
      "epoch": 0.7657777777777778,
      "grad_norm": 1.8125121593475342,
      "learning_rate": 4.7027427724240184e-05,
      "loss": 0.8905,
      "step": 5169
    },
    {
      "epoch": 0.7659259259259259,
      "grad_norm": 1.288940668106079,
      "learning_rate": 4.6997776130467015e-05,
      "loss": 1.0225,
      "step": 5170
    },
    {
      "epoch": 0.7660740740740741,
      "grad_norm": 1.973691463470459,
      "learning_rate": 4.6968124536693846e-05,
      "loss": 1.0235,
      "step": 5171
    },
    {
      "epoch": 0.7662222222222222,
      "grad_norm": 2.004398822784424,
      "learning_rate": 4.693847294292068e-05,
      "loss": 1.1091,
      "step": 5172
    },
    {
      "epoch": 0.7663703703703704,
      "grad_norm": 1.7750011682510376,
      "learning_rate": 4.690882134914752e-05,
      "loss": 1.0812,
      "step": 5173
    },
    {
      "epoch": 0.7665185185185185,
      "grad_norm": 1.1326333284378052,
      "learning_rate": 4.687916975537435e-05,
      "loss": 1.0171,
      "step": 5174
    },
    {
      "epoch": 0.7666666666666667,
      "grad_norm": 1.296269416809082,
      "learning_rate": 4.684951816160118e-05,
      "loss": 1.0817,
      "step": 5175
    },
    {
      "epoch": 0.7668148148148148,
      "grad_norm": 1.1539825201034546,
      "learning_rate": 4.681986656782803e-05,
      "loss": 0.8657,
      "step": 5176
    },
    {
      "epoch": 0.766962962962963,
      "grad_norm": 1.676647663116455,
      "learning_rate": 4.679021497405486e-05,
      "loss": 0.9272,
      "step": 5177
    },
    {
      "epoch": 0.7671111111111111,
      "grad_norm": 4.465587139129639,
      "learning_rate": 4.676056338028169e-05,
      "loss": 0.8286,
      "step": 5178
    },
    {
      "epoch": 0.7672592592592593,
      "grad_norm": 7.314817428588867,
      "learning_rate": 4.673091178650853e-05,
      "loss": 0.9234,
      "step": 5179
    },
    {
      "epoch": 0.7674074074074074,
      "grad_norm": 1.8242179155349731,
      "learning_rate": 4.6701260192735364e-05,
      "loss": 1.2321,
      "step": 5180
    },
    {
      "epoch": 0.7675555555555555,
      "grad_norm": 1.985378384590149,
      "learning_rate": 4.6671608598962195e-05,
      "loss": 1.0672,
      "step": 5181
    },
    {
      "epoch": 0.7677037037037037,
      "grad_norm": 1.7461580038070679,
      "learning_rate": 4.664195700518903e-05,
      "loss": 1.1942,
      "step": 5182
    },
    {
      "epoch": 0.7678518518518519,
      "grad_norm": 2.392956018447876,
      "learning_rate": 4.661230541141587e-05,
      "loss": 0.9936,
      "step": 5183
    },
    {
      "epoch": 0.768,
      "grad_norm": 1.471274971961975,
      "learning_rate": 4.65826538176427e-05,
      "loss": 0.8899,
      "step": 5184
    },
    {
      "epoch": 0.7681481481481481,
      "grad_norm": 1.7198207378387451,
      "learning_rate": 4.655300222386953e-05,
      "loss": 1.1507,
      "step": 5185
    },
    {
      "epoch": 0.7682962962962963,
      "grad_norm": 1.4129610061645508,
      "learning_rate": 4.652335063009637e-05,
      "loss": 0.8701,
      "step": 5186
    },
    {
      "epoch": 0.7684444444444445,
      "grad_norm": 1.2829008102416992,
      "learning_rate": 4.649369903632321e-05,
      "loss": 0.9653,
      "step": 5187
    },
    {
      "epoch": 0.7685925925925926,
      "grad_norm": 2.027888298034668,
      "learning_rate": 4.646404744255004e-05,
      "loss": 1.0169,
      "step": 5188
    },
    {
      "epoch": 0.7687407407407407,
      "grad_norm": 1.609615683555603,
      "learning_rate": 4.6434395848776876e-05,
      "loss": 1.2845,
      "step": 5189
    },
    {
      "epoch": 0.7688888888888888,
      "grad_norm": 1.939034342765808,
      "learning_rate": 4.640474425500371e-05,
      "loss": 1.0692,
      "step": 5190
    },
    {
      "epoch": 0.7690370370370371,
      "grad_norm": 1.922580599784851,
      "learning_rate": 4.6375092661230545e-05,
      "loss": 0.7777,
      "step": 5191
    },
    {
      "epoch": 0.7691851851851852,
      "grad_norm": 2.872612476348877,
      "learning_rate": 4.6345441067457376e-05,
      "loss": 1.0708,
      "step": 5192
    },
    {
      "epoch": 0.7693333333333333,
      "grad_norm": 1.4507546424865723,
      "learning_rate": 4.6315789473684214e-05,
      "loss": 1.0895,
      "step": 5193
    },
    {
      "epoch": 0.7694814814814814,
      "grad_norm": 1.2573904991149902,
      "learning_rate": 4.6286137879911045e-05,
      "loss": 0.8701,
      "step": 5194
    },
    {
      "epoch": 0.7696296296296297,
      "grad_norm": 1.58791184425354,
      "learning_rate": 4.625648628613788e-05,
      "loss": 1.0827,
      "step": 5195
    },
    {
      "epoch": 0.7697777777777778,
      "grad_norm": 2.590338945388794,
      "learning_rate": 4.622683469236472e-05,
      "loss": 1.3173,
      "step": 5196
    },
    {
      "epoch": 0.7699259259259259,
      "grad_norm": 2.0621609687805176,
      "learning_rate": 4.619718309859155e-05,
      "loss": 0.9686,
      "step": 5197
    },
    {
      "epoch": 0.770074074074074,
      "grad_norm": 3.1093332767486572,
      "learning_rate": 4.616753150481838e-05,
      "loss": 0.8499,
      "step": 5198
    },
    {
      "epoch": 0.7702222222222223,
      "grad_norm": 3.72360897064209,
      "learning_rate": 4.6137879911045226e-05,
      "loss": 0.9188,
      "step": 5199
    },
    {
      "epoch": 0.7703703703703704,
      "grad_norm": 1.468069314956665,
      "learning_rate": 4.610822831727206e-05,
      "loss": 0.6625,
      "step": 5200
    },
    {
      "epoch": 0.7705185185185185,
      "grad_norm": 1.898732304573059,
      "learning_rate": 4.607857672349889e-05,
      "loss": 0.9545,
      "step": 5201
    },
    {
      "epoch": 0.7706666666666667,
      "grad_norm": 1.5424686670303345,
      "learning_rate": 4.6048925129725726e-05,
      "loss": 0.9572,
      "step": 5202
    },
    {
      "epoch": 0.7708148148148148,
      "grad_norm": 1.6728442907333374,
      "learning_rate": 4.601927353595256e-05,
      "loss": 0.9951,
      "step": 5203
    },
    {
      "epoch": 0.770962962962963,
      "grad_norm": 1.8453505039215088,
      "learning_rate": 4.5989621942179394e-05,
      "loss": 1.1395,
      "step": 5204
    },
    {
      "epoch": 0.7711111111111111,
      "grad_norm": 1.8295652866363525,
      "learning_rate": 4.5959970348406225e-05,
      "loss": 0.9264,
      "step": 5205
    },
    {
      "epoch": 0.7712592592592593,
      "grad_norm": 2.741732597351074,
      "learning_rate": 4.593031875463306e-05,
      "loss": 0.9526,
      "step": 5206
    },
    {
      "epoch": 0.7714074074074074,
      "grad_norm": 1.5087363719940186,
      "learning_rate": 4.59006671608599e-05,
      "loss": 1.1452,
      "step": 5207
    },
    {
      "epoch": 0.7715555555555556,
      "grad_norm": 1.7406902313232422,
      "learning_rate": 4.587101556708673e-05,
      "loss": 0.9727,
      "step": 5208
    },
    {
      "epoch": 0.7717037037037037,
      "grad_norm": 2.977414846420288,
      "learning_rate": 4.584136397331357e-05,
      "loss": 1.2625,
      "step": 5209
    },
    {
      "epoch": 0.7718518518518519,
      "grad_norm": 1.4112823009490967,
      "learning_rate": 4.58117123795404e-05,
      "loss": 0.7912,
      "step": 5210
    },
    {
      "epoch": 0.772,
      "grad_norm": 2.0663907527923584,
      "learning_rate": 4.578206078576724e-05,
      "loss": 1.0525,
      "step": 5211
    },
    {
      "epoch": 0.7721481481481481,
      "grad_norm": 2.6327972412109375,
      "learning_rate": 4.5752409191994075e-05,
      "loss": 1.111,
      "step": 5212
    },
    {
      "epoch": 0.7722962962962963,
      "grad_norm": 1.619528889656067,
      "learning_rate": 4.5722757598220906e-05,
      "loss": 0.9059,
      "step": 5213
    },
    {
      "epoch": 0.7724444444444445,
      "grad_norm": 2.2156741619110107,
      "learning_rate": 4.569310600444774e-05,
      "loss": 1.0346,
      "step": 5214
    },
    {
      "epoch": 0.7725925925925926,
      "grad_norm": 1.7784485816955566,
      "learning_rate": 4.5663454410674575e-05,
      "loss": 0.9527,
      "step": 5215
    },
    {
      "epoch": 0.7727407407407407,
      "grad_norm": 1.4141141176223755,
      "learning_rate": 4.563380281690141e-05,
      "loss": 0.9136,
      "step": 5216
    },
    {
      "epoch": 0.7728888888888888,
      "grad_norm": 3.6373491287231445,
      "learning_rate": 4.5604151223128243e-05,
      "loss": 1.0312,
      "step": 5217
    },
    {
      "epoch": 0.7730370370370371,
      "grad_norm": 5.496160507202148,
      "learning_rate": 4.5574499629355074e-05,
      "loss": 1.1849,
      "step": 5218
    },
    {
      "epoch": 0.7731851851851852,
      "grad_norm": 1.7929624319076538,
      "learning_rate": 4.554484803558192e-05,
      "loss": 1.132,
      "step": 5219
    },
    {
      "epoch": 0.7733333333333333,
      "grad_norm": 2.1400399208068848,
      "learning_rate": 4.551519644180875e-05,
      "loss": 0.5993,
      "step": 5220
    },
    {
      "epoch": 0.7734814814814814,
      "grad_norm": 1.5654683113098145,
      "learning_rate": 4.548554484803558e-05,
      "loss": 0.8757,
      "step": 5221
    },
    {
      "epoch": 0.7736296296296297,
      "grad_norm": 2.1138947010040283,
      "learning_rate": 4.545589325426242e-05,
      "loss": 0.8961,
      "step": 5222
    },
    {
      "epoch": 0.7737777777777778,
      "grad_norm": 1.4625893831253052,
      "learning_rate": 4.5426241660489256e-05,
      "loss": 1.0103,
      "step": 5223
    },
    {
      "epoch": 0.7739259259259259,
      "grad_norm": 1.2665773630142212,
      "learning_rate": 4.539659006671609e-05,
      "loss": 1.0971,
      "step": 5224
    },
    {
      "epoch": 0.774074074074074,
      "grad_norm": 1.2460824251174927,
      "learning_rate": 4.5366938472942925e-05,
      "loss": 0.9117,
      "step": 5225
    },
    {
      "epoch": 0.7742222222222223,
      "grad_norm": 2.2900710105895996,
      "learning_rate": 4.533728687916976e-05,
      "loss": 0.9117,
      "step": 5226
    },
    {
      "epoch": 0.7743703703703704,
      "grad_norm": 2.224123001098633,
      "learning_rate": 4.530763528539659e-05,
      "loss": 1.0584,
      "step": 5227
    },
    {
      "epoch": 0.7745185185185185,
      "grad_norm": 1.5715810060501099,
      "learning_rate": 4.5277983691623424e-05,
      "loss": 1.1295,
      "step": 5228
    },
    {
      "epoch": 0.7746666666666666,
      "grad_norm": 2.7693135738372803,
      "learning_rate": 4.524833209785026e-05,
      "loss": 1.0575,
      "step": 5229
    },
    {
      "epoch": 0.7748148148148148,
      "grad_norm": 1.9409888982772827,
      "learning_rate": 4.52186805040771e-05,
      "loss": 1.0668,
      "step": 5230
    },
    {
      "epoch": 0.774962962962963,
      "grad_norm": 1.4197139739990234,
      "learning_rate": 4.518902891030393e-05,
      "loss": 1.1552,
      "step": 5231
    },
    {
      "epoch": 0.7751111111111111,
      "grad_norm": 1.9229507446289062,
      "learning_rate": 4.515937731653077e-05,
      "loss": 1.0031,
      "step": 5232
    },
    {
      "epoch": 0.7752592592592593,
      "grad_norm": 1.7542647123336792,
      "learning_rate": 4.51297257227576e-05,
      "loss": 1.0647,
      "step": 5233
    },
    {
      "epoch": 0.7754074074074074,
      "grad_norm": 1.3256491422653198,
      "learning_rate": 4.510007412898444e-05,
      "loss": 0.8055,
      "step": 5234
    },
    {
      "epoch": 0.7755555555555556,
      "grad_norm": 1.463727355003357,
      "learning_rate": 4.507042253521127e-05,
      "loss": 1.0222,
      "step": 5235
    },
    {
      "epoch": 0.7757037037037037,
      "grad_norm": 1.7977197170257568,
      "learning_rate": 4.5040770941438105e-05,
      "loss": 0.9904,
      "step": 5236
    },
    {
      "epoch": 0.7758518518518519,
      "grad_norm": 1.1428083181381226,
      "learning_rate": 4.5011119347664936e-05,
      "loss": 1.0521,
      "step": 5237
    },
    {
      "epoch": 0.776,
      "grad_norm": 1.4261960983276367,
      "learning_rate": 4.4981467753891774e-05,
      "loss": 0.8669,
      "step": 5238
    },
    {
      "epoch": 0.7761481481481481,
      "grad_norm": 2.0403456687927246,
      "learning_rate": 4.495181616011861e-05,
      "loss": 1.1753,
      "step": 5239
    },
    {
      "epoch": 0.7762962962962963,
      "grad_norm": 2.254314422607422,
      "learning_rate": 4.492216456634544e-05,
      "loss": 1.1025,
      "step": 5240
    },
    {
      "epoch": 0.7764444444444445,
      "grad_norm": 1.2379026412963867,
      "learning_rate": 4.489251297257227e-05,
      "loss": 1.2099,
      "step": 5241
    },
    {
      "epoch": 0.7765925925925926,
      "grad_norm": 1.7444937229156494,
      "learning_rate": 4.486286137879912e-05,
      "loss": 1.0509,
      "step": 5242
    },
    {
      "epoch": 0.7767407407407407,
      "grad_norm": 2.0687057971954346,
      "learning_rate": 4.483320978502595e-05,
      "loss": 0.6284,
      "step": 5243
    },
    {
      "epoch": 0.7768888888888889,
      "grad_norm": 2.0481202602386475,
      "learning_rate": 4.480355819125278e-05,
      "loss": 0.9988,
      "step": 5244
    },
    {
      "epoch": 0.7770370370370371,
      "grad_norm": 2.4136269092559814,
      "learning_rate": 4.477390659747962e-05,
      "loss": 1.2496,
      "step": 5245
    },
    {
      "epoch": 0.7771851851851852,
      "grad_norm": 1.8915942907333374,
      "learning_rate": 4.4744255003706455e-05,
      "loss": 1.0732,
      "step": 5246
    },
    {
      "epoch": 0.7773333333333333,
      "grad_norm": 1.8578381538391113,
      "learning_rate": 4.4714603409933286e-05,
      "loss": 0.9857,
      "step": 5247
    },
    {
      "epoch": 0.7774814814814814,
      "grad_norm": 2.6110923290252686,
      "learning_rate": 4.468495181616012e-05,
      "loss": 1.0259,
      "step": 5248
    },
    {
      "epoch": 0.7776296296296297,
      "grad_norm": 1.603898048400879,
      "learning_rate": 4.4655300222386954e-05,
      "loss": 0.8033,
      "step": 5249
    },
    {
      "epoch": 0.7777777777777778,
      "grad_norm": 1.5251612663269043,
      "learning_rate": 4.462564862861379e-05,
      "loss": 1.0749,
      "step": 5250
    },
    {
      "epoch": 0.7779259259259259,
      "grad_norm": 3.3131825923919678,
      "learning_rate": 4.459599703484062e-05,
      "loss": 1.0672,
      "step": 5251
    },
    {
      "epoch": 0.778074074074074,
      "grad_norm": 2.589365005493164,
      "learning_rate": 4.456634544106746e-05,
      "loss": 1.011,
      "step": 5252
    },
    {
      "epoch": 0.7782222222222223,
      "grad_norm": 2.332076072692871,
      "learning_rate": 4.453669384729429e-05,
      "loss": 1.2628,
      "step": 5253
    },
    {
      "epoch": 0.7783703703703704,
      "grad_norm": 1.7138726711273193,
      "learning_rate": 4.450704225352113e-05,
      "loss": 1.3278,
      "step": 5254
    },
    {
      "epoch": 0.7785185185185185,
      "grad_norm": 2.4788360595703125,
      "learning_rate": 4.447739065974797e-05,
      "loss": 1.4061,
      "step": 5255
    },
    {
      "epoch": 0.7786666666666666,
      "grad_norm": 1.6783311367034912,
      "learning_rate": 4.44477390659748e-05,
      "loss": 0.9061,
      "step": 5256
    },
    {
      "epoch": 0.7788148148148148,
      "grad_norm": 1.3091166019439697,
      "learning_rate": 4.441808747220163e-05,
      "loss": 1.2006,
      "step": 5257
    },
    {
      "epoch": 0.778962962962963,
      "grad_norm": 1.4785174131393433,
      "learning_rate": 4.4388435878428467e-05,
      "loss": 1.1297,
      "step": 5258
    },
    {
      "epoch": 0.7791111111111111,
      "grad_norm": 2.0996644496917725,
      "learning_rate": 4.4358784284655304e-05,
      "loss": 1.217,
      "step": 5259
    },
    {
      "epoch": 0.7792592592592592,
      "grad_norm": 1.4377645254135132,
      "learning_rate": 4.4329132690882135e-05,
      "loss": 0.8621,
      "step": 5260
    },
    {
      "epoch": 0.7794074074074074,
      "grad_norm": 1.9455523490905762,
      "learning_rate": 4.4299481097108966e-05,
      "loss": 1.0878,
      "step": 5261
    },
    {
      "epoch": 0.7795555555555556,
      "grad_norm": 1.466994285583496,
      "learning_rate": 4.426982950333581e-05,
      "loss": 1.1907,
      "step": 5262
    },
    {
      "epoch": 0.7797037037037037,
      "grad_norm": 1.3471636772155762,
      "learning_rate": 4.424017790956264e-05,
      "loss": 1.0218,
      "step": 5263
    },
    {
      "epoch": 0.7798518518518519,
      "grad_norm": 1.427681565284729,
      "learning_rate": 4.421052631578947e-05,
      "loss": 1.0729,
      "step": 5264
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1461193561553955,
      "learning_rate": 4.418087472201631e-05,
      "loss": 1.1489,
      "step": 5265
    },
    {
      "epoch": 0.7801481481481481,
      "grad_norm": 1.0958290100097656,
      "learning_rate": 4.415122312824315e-05,
      "loss": 1.0562,
      "step": 5266
    },
    {
      "epoch": 0.7802962962962963,
      "grad_norm": 2.9291634559631348,
      "learning_rate": 4.412157153446998e-05,
      "loss": 1.2814,
      "step": 5267
    },
    {
      "epoch": 0.7804444444444445,
      "grad_norm": 2.461829423904419,
      "learning_rate": 4.4091919940696816e-05,
      "loss": 0.9668,
      "step": 5268
    },
    {
      "epoch": 0.7805925925925926,
      "grad_norm": 1.934011459350586,
      "learning_rate": 4.4062268346923654e-05,
      "loss": 1.0245,
      "step": 5269
    },
    {
      "epoch": 0.7807407407407407,
      "grad_norm": 1.38313889503479,
      "learning_rate": 4.4032616753150485e-05,
      "loss": 0.9068,
      "step": 5270
    },
    {
      "epoch": 0.7808888888888889,
      "grad_norm": 1.3687999248504639,
      "learning_rate": 4.4002965159377316e-05,
      "loss": 1.0288,
      "step": 5271
    },
    {
      "epoch": 0.7810370370370371,
      "grad_norm": 1.9752943515777588,
      "learning_rate": 4.3973313565604153e-05,
      "loss": 0.8695,
      "step": 5272
    },
    {
      "epoch": 0.7811851851851852,
      "grad_norm": 1.7185872793197632,
      "learning_rate": 4.394366197183099e-05,
      "loss": 1.0999,
      "step": 5273
    },
    {
      "epoch": 0.7813333333333333,
      "grad_norm": 1.1522598266601562,
      "learning_rate": 4.391401037805782e-05,
      "loss": 0.9312,
      "step": 5274
    },
    {
      "epoch": 0.7814814814814814,
      "grad_norm": 1.209738850593567,
      "learning_rate": 4.388435878428466e-05,
      "loss": 1.2068,
      "step": 5275
    },
    {
      "epoch": 0.7816296296296297,
      "grad_norm": 1.4488725662231445,
      "learning_rate": 4.385470719051149e-05,
      "loss": 1.1119,
      "step": 5276
    },
    {
      "epoch": 0.7817777777777778,
      "grad_norm": 1.1559313535690308,
      "learning_rate": 4.382505559673833e-05,
      "loss": 1.0124,
      "step": 5277
    },
    {
      "epoch": 0.7819259259259259,
      "grad_norm": 2.1997835636138916,
      "learning_rate": 4.379540400296516e-05,
      "loss": 0.9524,
      "step": 5278
    },
    {
      "epoch": 0.782074074074074,
      "grad_norm": 4.906120777130127,
      "learning_rate": 4.3765752409192e-05,
      "loss": 0.9424,
      "step": 5279
    },
    {
      "epoch": 0.7822222222222223,
      "grad_norm": 2.4310436248779297,
      "learning_rate": 4.373610081541883e-05,
      "loss": 1.1148,
      "step": 5280
    },
    {
      "epoch": 0.7823703703703704,
      "grad_norm": 1.2778759002685547,
      "learning_rate": 4.3706449221645665e-05,
      "loss": 0.9099,
      "step": 5281
    },
    {
      "epoch": 0.7825185185185185,
      "grad_norm": 5.306490421295166,
      "learning_rate": 4.36767976278725e-05,
      "loss": 1.1062,
      "step": 5282
    },
    {
      "epoch": 0.7826666666666666,
      "grad_norm": 2.4609696865081787,
      "learning_rate": 4.3647146034099334e-05,
      "loss": 0.8321,
      "step": 5283
    },
    {
      "epoch": 0.7828148148148149,
      "grad_norm": 1.3912984132766724,
      "learning_rate": 4.3617494440326165e-05,
      "loss": 0.9438,
      "step": 5284
    },
    {
      "epoch": 0.782962962962963,
      "grad_norm": 1.6022518873214722,
      "learning_rate": 4.358784284655301e-05,
      "loss": 0.9162,
      "step": 5285
    },
    {
      "epoch": 0.7831111111111111,
      "grad_norm": 1.3555049896240234,
      "learning_rate": 4.355819125277984e-05,
      "loss": 0.8939,
      "step": 5286
    },
    {
      "epoch": 0.7832592592592592,
      "grad_norm": 1.3263565301895142,
      "learning_rate": 4.352853965900667e-05,
      "loss": 1.1853,
      "step": 5287
    },
    {
      "epoch": 0.7834074074074074,
      "grad_norm": 2.257020950317383,
      "learning_rate": 4.349888806523351e-05,
      "loss": 0.967,
      "step": 5288
    },
    {
      "epoch": 0.7835555555555556,
      "grad_norm": 1.8871673345565796,
      "learning_rate": 4.346923647146035e-05,
      "loss": 0.9221,
      "step": 5289
    },
    {
      "epoch": 0.7837037037037037,
      "grad_norm": 3.959899663925171,
      "learning_rate": 4.343958487768718e-05,
      "loss": 0.8704,
      "step": 5290
    },
    {
      "epoch": 0.7838518518518518,
      "grad_norm": 1.3531171083450317,
      "learning_rate": 4.340993328391401e-05,
      "loss": 1.0575,
      "step": 5291
    },
    {
      "epoch": 0.784,
      "grad_norm": 2.5105092525482178,
      "learning_rate": 4.3380281690140846e-05,
      "loss": 1.2141,
      "step": 5292
    },
    {
      "epoch": 0.7841481481481481,
      "grad_norm": 4.388306617736816,
      "learning_rate": 4.3350630096367684e-05,
      "loss": 1.1923,
      "step": 5293
    },
    {
      "epoch": 0.7842962962962963,
      "grad_norm": 1.7040736675262451,
      "learning_rate": 4.3320978502594515e-05,
      "loss": 0.9041,
      "step": 5294
    },
    {
      "epoch": 0.7844444444444445,
      "grad_norm": 1.3910716772079468,
      "learning_rate": 4.329132690882135e-05,
      "loss": 0.9643,
      "step": 5295
    },
    {
      "epoch": 0.7845925925925926,
      "grad_norm": 1.3138338327407837,
      "learning_rate": 4.326167531504818e-05,
      "loss": 1.0421,
      "step": 5296
    },
    {
      "epoch": 0.7847407407407407,
      "grad_norm": 1.597644329071045,
      "learning_rate": 4.323202372127502e-05,
      "loss": 1.064,
      "step": 5297
    },
    {
      "epoch": 0.7848888888888889,
      "grad_norm": 3.17818021774292,
      "learning_rate": 4.320237212750186e-05,
      "loss": 1.0256,
      "step": 5298
    },
    {
      "epoch": 0.7850370370370371,
      "grad_norm": 3.4625067710876465,
      "learning_rate": 4.317272053372869e-05,
      "loss": 1.111,
      "step": 5299
    },
    {
      "epoch": 0.7851851851851852,
      "grad_norm": 1.9711130857467651,
      "learning_rate": 4.314306893995552e-05,
      "loss": 1.065,
      "step": 5300
    },
    {
      "epoch": 0.7853333333333333,
      "grad_norm": 1.9203976392745972,
      "learning_rate": 4.311341734618236e-05,
      "loss": 1.0593,
      "step": 5301
    },
    {
      "epoch": 0.7854814814814814,
      "grad_norm": 1.304276943206787,
      "learning_rate": 4.3083765752409196e-05,
      "loss": 0.9218,
      "step": 5302
    },
    {
      "epoch": 0.7856296296296297,
      "grad_norm": 1.7462506294250488,
      "learning_rate": 4.305411415863603e-05,
      "loss": 1.2338,
      "step": 5303
    },
    {
      "epoch": 0.7857777777777778,
      "grad_norm": 1.6708916425704956,
      "learning_rate": 4.302446256486286e-05,
      "loss": 1.1153,
      "step": 5304
    },
    {
      "epoch": 0.7859259259259259,
      "grad_norm": 1.3915983438491821,
      "learning_rate": 4.29948109710897e-05,
      "loss": 1.0623,
      "step": 5305
    },
    {
      "epoch": 0.786074074074074,
      "grad_norm": 2.6342339515686035,
      "learning_rate": 4.296515937731653e-05,
      "loss": 0.8774,
      "step": 5306
    },
    {
      "epoch": 0.7862222222222223,
      "grad_norm": 1.4199789762496948,
      "learning_rate": 4.2935507783543364e-05,
      "loss": 1.0367,
      "step": 5307
    },
    {
      "epoch": 0.7863703703703704,
      "grad_norm": 1.8340078592300415,
      "learning_rate": 4.29058561897702e-05,
      "loss": 0.784,
      "step": 5308
    },
    {
      "epoch": 0.7865185185185185,
      "grad_norm": 2.203857660293579,
      "learning_rate": 4.287620459599704e-05,
      "loss": 1.0225,
      "step": 5309
    },
    {
      "epoch": 0.7866666666666666,
      "grad_norm": 2.1654696464538574,
      "learning_rate": 4.284655300222387e-05,
      "loss": 1.0628,
      "step": 5310
    },
    {
      "epoch": 0.7868148148148149,
      "grad_norm": 1.800679087638855,
      "learning_rate": 4.281690140845071e-05,
      "loss": 1.1444,
      "step": 5311
    },
    {
      "epoch": 0.786962962962963,
      "grad_norm": 1.3590725660324097,
      "learning_rate": 4.2787249814677546e-05,
      "loss": 1.074,
      "step": 5312
    },
    {
      "epoch": 0.7871111111111111,
      "grad_norm": 1.4780471324920654,
      "learning_rate": 4.2757598220904377e-05,
      "loss": 1.0316,
      "step": 5313
    },
    {
      "epoch": 0.7872592592592592,
      "grad_norm": 1.3855935335159302,
      "learning_rate": 4.272794662713121e-05,
      "loss": 0.933,
      "step": 5314
    },
    {
      "epoch": 0.7874074074074074,
      "grad_norm": 2.180665969848633,
      "learning_rate": 4.2698295033358045e-05,
      "loss": 1.3261,
      "step": 5315
    },
    {
      "epoch": 0.7875555555555556,
      "grad_norm": 2.058462142944336,
      "learning_rate": 4.266864343958488e-05,
      "loss": 1.0398,
      "step": 5316
    },
    {
      "epoch": 0.7877037037037037,
      "grad_norm": 1.1582828760147095,
      "learning_rate": 4.2638991845811714e-05,
      "loss": 0.9359,
      "step": 5317
    },
    {
      "epoch": 0.7878518518518518,
      "grad_norm": 1.454142451286316,
      "learning_rate": 4.260934025203855e-05,
      "loss": 1.0378,
      "step": 5318
    },
    {
      "epoch": 0.788,
      "grad_norm": 1.7095205783843994,
      "learning_rate": 4.257968865826538e-05,
      "loss": 0.8838,
      "step": 5319
    },
    {
      "epoch": 0.7881481481481482,
      "grad_norm": 1.327888011932373,
      "learning_rate": 4.255003706449222e-05,
      "loss": 1.0082,
      "step": 5320
    },
    {
      "epoch": 0.7882962962962963,
      "grad_norm": 1.7250101566314697,
      "learning_rate": 4.252038547071905e-05,
      "loss": 1.0791,
      "step": 5321
    },
    {
      "epoch": 0.7884444444444444,
      "grad_norm": 3.891110420227051,
      "learning_rate": 4.249073387694589e-05,
      "loss": 1.0821,
      "step": 5322
    },
    {
      "epoch": 0.7885925925925926,
      "grad_norm": 2.3465235233306885,
      "learning_rate": 4.246108228317272e-05,
      "loss": 0.8379,
      "step": 5323
    },
    {
      "epoch": 0.7887407407407407,
      "grad_norm": 1.1320641040802002,
      "learning_rate": 4.243143068939956e-05,
      "loss": 0.9779,
      "step": 5324
    },
    {
      "epoch": 0.7888888888888889,
      "grad_norm": 1.3223503828048706,
      "learning_rate": 4.2401779095626395e-05,
      "loss": 0.7022,
      "step": 5325
    },
    {
      "epoch": 0.7890370370370371,
      "grad_norm": 1.7182470560073853,
      "learning_rate": 4.2372127501853226e-05,
      "loss": 1.0736,
      "step": 5326
    },
    {
      "epoch": 0.7891851851851852,
      "grad_norm": 1.766697645187378,
      "learning_rate": 4.234247590808006e-05,
      "loss": 0.936,
      "step": 5327
    },
    {
      "epoch": 0.7893333333333333,
      "grad_norm": 1.171360731124878,
      "learning_rate": 4.23128243143069e-05,
      "loss": 0.962,
      "step": 5328
    },
    {
      "epoch": 0.7894814814814815,
      "grad_norm": 2.025193452835083,
      "learning_rate": 4.228317272053373e-05,
      "loss": 1.0565,
      "step": 5329
    },
    {
      "epoch": 0.7896296296296297,
      "grad_norm": 1.9869000911712646,
      "learning_rate": 4.225352112676056e-05,
      "loss": 1.1233,
      "step": 5330
    },
    {
      "epoch": 0.7897777777777778,
      "grad_norm": 5.825942516326904,
      "learning_rate": 4.22238695329874e-05,
      "loss": 0.867,
      "step": 5331
    },
    {
      "epoch": 0.7899259259259259,
      "grad_norm": 1.7098500728607178,
      "learning_rate": 4.219421793921424e-05,
      "loss": 0.8757,
      "step": 5332
    },
    {
      "epoch": 0.790074074074074,
      "grad_norm": 2.2363123893737793,
      "learning_rate": 4.216456634544107e-05,
      "loss": 0.9139,
      "step": 5333
    },
    {
      "epoch": 0.7902222222222223,
      "grad_norm": 1.6317927837371826,
      "learning_rate": 4.21349147516679e-05,
      "loss": 0.9388,
      "step": 5334
    },
    {
      "epoch": 0.7903703703703704,
      "grad_norm": 1.8629355430603027,
      "learning_rate": 4.210526315789474e-05,
      "loss": 0.9761,
      "step": 5335
    },
    {
      "epoch": 0.7905185185185185,
      "grad_norm": 2.1115009784698486,
      "learning_rate": 4.2075611564121575e-05,
      "loss": 1.0465,
      "step": 5336
    },
    {
      "epoch": 0.7906666666666666,
      "grad_norm": 1.4415923357009888,
      "learning_rate": 4.2045959970348406e-05,
      "loss": 0.9209,
      "step": 5337
    },
    {
      "epoch": 0.7908148148148149,
      "grad_norm": 1.5301687717437744,
      "learning_rate": 4.2016308376575244e-05,
      "loss": 1.1599,
      "step": 5338
    },
    {
      "epoch": 0.790962962962963,
      "grad_norm": 2.5030784606933594,
      "learning_rate": 4.1986656782802075e-05,
      "loss": 0.9027,
      "step": 5339
    },
    {
      "epoch": 0.7911111111111111,
      "grad_norm": 1.829357385635376,
      "learning_rate": 4.195700518902891e-05,
      "loss": 0.9675,
      "step": 5340
    },
    {
      "epoch": 0.7912592592592592,
      "grad_norm": 2.309213638305664,
      "learning_rate": 4.192735359525575e-05,
      "loss": 1.0948,
      "step": 5341
    },
    {
      "epoch": 0.7914074074074074,
      "grad_norm": 1.3838952779769897,
      "learning_rate": 4.189770200148258e-05,
      "loss": 0.9765,
      "step": 5342
    },
    {
      "epoch": 0.7915555555555556,
      "grad_norm": 1.4355117082595825,
      "learning_rate": 4.186805040770941e-05,
      "loss": 1.0487,
      "step": 5343
    },
    {
      "epoch": 0.7917037037037037,
      "grad_norm": 2.5563371181488037,
      "learning_rate": 4.183839881393625e-05,
      "loss": 1.1888,
      "step": 5344
    },
    {
      "epoch": 0.7918518518518518,
      "grad_norm": 2.01492977142334,
      "learning_rate": 4.180874722016309e-05,
      "loss": 0.9264,
      "step": 5345
    },
    {
      "epoch": 0.792,
      "grad_norm": 1.8180270195007324,
      "learning_rate": 4.177909562638992e-05,
      "loss": 0.924,
      "step": 5346
    },
    {
      "epoch": 0.7921481481481482,
      "grad_norm": 1.8011095523834229,
      "learning_rate": 4.174944403261675e-05,
      "loss": 1.1091,
      "step": 5347
    },
    {
      "epoch": 0.7922962962962963,
      "grad_norm": 1.3663522005081177,
      "learning_rate": 4.1719792438843594e-05,
      "loss": 1.04,
      "step": 5348
    },
    {
      "epoch": 0.7924444444444444,
      "grad_norm": 1.7330594062805176,
      "learning_rate": 4.1690140845070425e-05,
      "loss": 1.0136,
      "step": 5349
    },
    {
      "epoch": 0.7925925925925926,
      "grad_norm": 1.355904221534729,
      "learning_rate": 4.1660489251297256e-05,
      "loss": 0.9136,
      "step": 5350
    },
    {
      "epoch": 0.7927407407407407,
      "grad_norm": 1.5312200784683228,
      "learning_rate": 4.163083765752409e-05,
      "loss": 0.7906,
      "step": 5351
    },
    {
      "epoch": 0.7928888888888889,
      "grad_norm": 1.8225631713867188,
      "learning_rate": 4.160118606375093e-05,
      "loss": 1.2035,
      "step": 5352
    },
    {
      "epoch": 0.793037037037037,
      "grad_norm": 1.6775257587432861,
      "learning_rate": 4.157153446997776e-05,
      "loss": 0.9928,
      "step": 5353
    },
    {
      "epoch": 0.7931851851851852,
      "grad_norm": 1.468770980834961,
      "learning_rate": 4.15418828762046e-05,
      "loss": 1.1287,
      "step": 5354
    },
    {
      "epoch": 0.7933333333333333,
      "grad_norm": 1.8372886180877686,
      "learning_rate": 4.151223128243144e-05,
      "loss": 0.9269,
      "step": 5355
    },
    {
      "epoch": 0.7934814814814815,
      "grad_norm": 1.3729956150054932,
      "learning_rate": 4.148257968865827e-05,
      "loss": 0.9081,
      "step": 5356
    },
    {
      "epoch": 0.7936296296296297,
      "grad_norm": 3.273314952850342,
      "learning_rate": 4.14529280948851e-05,
      "loss": 1.2108,
      "step": 5357
    },
    {
      "epoch": 0.7937777777777778,
      "grad_norm": 1.243962287902832,
      "learning_rate": 4.142327650111194e-05,
      "loss": 0.9835,
      "step": 5358
    },
    {
      "epoch": 0.7939259259259259,
      "grad_norm": 1.414330244064331,
      "learning_rate": 4.1393624907338774e-05,
      "loss": 0.9602,
      "step": 5359
    },
    {
      "epoch": 0.794074074074074,
      "grad_norm": 3.1392345428466797,
      "learning_rate": 4.1363973313565605e-05,
      "loss": 1.1034,
      "step": 5360
    },
    {
      "epoch": 0.7942222222222223,
      "grad_norm": 1.39045250415802,
      "learning_rate": 4.133432171979244e-05,
      "loss": 1.0339,
      "step": 5361
    },
    {
      "epoch": 0.7943703703703704,
      "grad_norm": 2.301767349243164,
      "learning_rate": 4.1304670126019274e-05,
      "loss": 0.9389,
      "step": 5362
    },
    {
      "epoch": 0.7945185185185185,
      "grad_norm": 1.390378713607788,
      "learning_rate": 4.127501853224611e-05,
      "loss": 0.8781,
      "step": 5363
    },
    {
      "epoch": 0.7946666666666666,
      "grad_norm": 3.492128849029541,
      "learning_rate": 4.124536693847294e-05,
      "loss": 0.9743,
      "step": 5364
    },
    {
      "epoch": 0.7948148148148149,
      "grad_norm": 2.7725274562835693,
      "learning_rate": 4.121571534469978e-05,
      "loss": 0.8988,
      "step": 5365
    },
    {
      "epoch": 0.794962962962963,
      "grad_norm": 1.6059094667434692,
      "learning_rate": 4.118606375092661e-05,
      "loss": 0.8354,
      "step": 5366
    },
    {
      "epoch": 0.7951111111111111,
      "grad_norm": 1.6514225006103516,
      "learning_rate": 4.115641215715345e-05,
      "loss": 1.2253,
      "step": 5367
    },
    {
      "epoch": 0.7952592592592592,
      "grad_norm": 1.6473922729492188,
      "learning_rate": 4.1126760563380286e-05,
      "loss": 1.0169,
      "step": 5368
    },
    {
      "epoch": 0.7954074074074075,
      "grad_norm": 1.531722903251648,
      "learning_rate": 4.109710896960712e-05,
      "loss": 1.1131,
      "step": 5369
    },
    {
      "epoch": 0.7955555555555556,
      "grad_norm": 4.171046257019043,
      "learning_rate": 4.106745737583395e-05,
      "loss": 0.9861,
      "step": 5370
    },
    {
      "epoch": 0.7957037037037037,
      "grad_norm": 10.06808090209961,
      "learning_rate": 4.103780578206079e-05,
      "loss": 1.0757,
      "step": 5371
    },
    {
      "epoch": 0.7958518518518518,
      "grad_norm": 1.311013102531433,
      "learning_rate": 4.1008154188287624e-05,
      "loss": 0.9478,
      "step": 5372
    },
    {
      "epoch": 0.796,
      "grad_norm": 1.3929322957992554,
      "learning_rate": 4.0978502594514455e-05,
      "loss": 0.7379,
      "step": 5373
    },
    {
      "epoch": 0.7961481481481482,
      "grad_norm": 1.1322258710861206,
      "learning_rate": 4.094885100074129e-05,
      "loss": 0.8733,
      "step": 5374
    },
    {
      "epoch": 0.7962962962962963,
      "grad_norm": 1.8504239320755005,
      "learning_rate": 4.091919940696813e-05,
      "loss": 1.0918,
      "step": 5375
    },
    {
      "epoch": 0.7964444444444444,
      "grad_norm": 1.780629277229309,
      "learning_rate": 4.088954781319496e-05,
      "loss": 1.0471,
      "step": 5376
    },
    {
      "epoch": 0.7965925925925926,
      "grad_norm": 3.333953619003296,
      "learning_rate": 4.085989621942179e-05,
      "loss": 1.2132,
      "step": 5377
    },
    {
      "epoch": 0.7967407407407407,
      "grad_norm": 2.2474868297576904,
      "learning_rate": 4.083024462564863e-05,
      "loss": 0.9178,
      "step": 5378
    },
    {
      "epoch": 0.7968888888888889,
      "grad_norm": 1.3540128469467163,
      "learning_rate": 4.080059303187547e-05,
      "loss": 1.519,
      "step": 5379
    },
    {
      "epoch": 0.797037037037037,
      "grad_norm": 1.1776254177093506,
      "learning_rate": 4.07709414381023e-05,
      "loss": 1.0202,
      "step": 5380
    },
    {
      "epoch": 0.7971851851851852,
      "grad_norm": 1.7784807682037354,
      "learning_rate": 4.0741289844329136e-05,
      "loss": 0.9476,
      "step": 5381
    },
    {
      "epoch": 0.7973333333333333,
      "grad_norm": 1.7054754495620728,
      "learning_rate": 4.071163825055597e-05,
      "loss": 0.9141,
      "step": 5382
    },
    {
      "epoch": 0.7974814814814815,
      "grad_norm": 1.2991890907287598,
      "learning_rate": 4.0681986656782804e-05,
      "loss": 0.8789,
      "step": 5383
    },
    {
      "epoch": 0.7976296296296296,
      "grad_norm": 1.6967389583587646,
      "learning_rate": 4.065233506300964e-05,
      "loss": 1.1697,
      "step": 5384
    },
    {
      "epoch": 0.7977777777777778,
      "grad_norm": 1.8050689697265625,
      "learning_rate": 4.062268346923647e-05,
      "loss": 1.0018,
      "step": 5385
    },
    {
      "epoch": 0.7979259259259259,
      "grad_norm": 5.1262431144714355,
      "learning_rate": 4.0593031875463304e-05,
      "loss": 0.8984,
      "step": 5386
    },
    {
      "epoch": 0.798074074074074,
      "grad_norm": 1.3281807899475098,
      "learning_rate": 4.056338028169014e-05,
      "loss": 1.0359,
      "step": 5387
    },
    {
      "epoch": 0.7982222222222223,
      "grad_norm": 2.2558517456054688,
      "learning_rate": 4.053372868791698e-05,
      "loss": 1.1214,
      "step": 5388
    },
    {
      "epoch": 0.7983703703703704,
      "grad_norm": 2.250946044921875,
      "learning_rate": 4.050407709414381e-05,
      "loss": 0.9764,
      "step": 5389
    },
    {
      "epoch": 0.7985185185185185,
      "grad_norm": 1.5796802043914795,
      "learning_rate": 4.047442550037065e-05,
      "loss": 0.9587,
      "step": 5390
    },
    {
      "epoch": 0.7986666666666666,
      "grad_norm": 1.8032429218292236,
      "learning_rate": 4.0444773906597485e-05,
      "loss": 0.9545,
      "step": 5391
    },
    {
      "epoch": 0.7988148148148149,
      "grad_norm": 2.0286922454833984,
      "learning_rate": 4.0415122312824316e-05,
      "loss": 1.2246,
      "step": 5392
    },
    {
      "epoch": 0.798962962962963,
      "grad_norm": 1.4904719591140747,
      "learning_rate": 4.038547071905115e-05,
      "loss": 0.9967,
      "step": 5393
    },
    {
      "epoch": 0.7991111111111111,
      "grad_norm": 2.3682022094726562,
      "learning_rate": 4.0355819125277985e-05,
      "loss": 1.2159,
      "step": 5394
    },
    {
      "epoch": 0.7992592592592592,
      "grad_norm": 1.8080886602401733,
      "learning_rate": 4.032616753150482e-05,
      "loss": 1.0301,
      "step": 5395
    },
    {
      "epoch": 0.7994074074074075,
      "grad_norm": 2.6648478507995605,
      "learning_rate": 4.0296515937731654e-05,
      "loss": 0.9757,
      "step": 5396
    },
    {
      "epoch": 0.7995555555555556,
      "grad_norm": 2.8013088703155518,
      "learning_rate": 4.026686434395849e-05,
      "loss": 1.1824,
      "step": 5397
    },
    {
      "epoch": 0.7997037037037037,
      "grad_norm": 1.606271743774414,
      "learning_rate": 4.023721275018533e-05,
      "loss": 0.8274,
      "step": 5398
    },
    {
      "epoch": 0.7998518518518518,
      "grad_norm": 2.0751864910125732,
      "learning_rate": 4.020756115641216e-05,
      "loss": 1.0437,
      "step": 5399
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7119338512420654,
      "learning_rate": 4.017790956263899e-05,
      "loss": 0.8679,
      "step": 5400
    },
    {
      "epoch": 0.8001481481481482,
      "grad_norm": 4.252305030822754,
      "learning_rate": 4.014825796886583e-05,
      "loss": 1.0427,
      "step": 5401
    },
    {
      "epoch": 0.8002962962962963,
      "grad_norm": 1.4674803018569946,
      "learning_rate": 4.0118606375092666e-05,
      "loss": 0.8695,
      "step": 5402
    },
    {
      "epoch": 0.8004444444444444,
      "grad_norm": 1.4978564977645874,
      "learning_rate": 4.00889547813195e-05,
      "loss": 1.0836,
      "step": 5403
    },
    {
      "epoch": 0.8005925925925926,
      "grad_norm": 1.1871086359024048,
      "learning_rate": 4.0059303187546335e-05,
      "loss": 0.8869,
      "step": 5404
    },
    {
      "epoch": 0.8007407407407408,
      "grad_norm": 1.090904951095581,
      "learning_rate": 4.0029651593773166e-05,
      "loss": 1.0713,
      "step": 5405
    },
    {
      "epoch": 0.8008888888888889,
      "grad_norm": 1.7516885995864868,
      "learning_rate": 4e-05,
      "loss": 1.0477,
      "step": 5406
    },
    {
      "epoch": 0.801037037037037,
      "grad_norm": 1.5126169919967651,
      "learning_rate": 3.9970348406226834e-05,
      "loss": 0.8397,
      "step": 5407
    },
    {
      "epoch": 0.8011851851851852,
      "grad_norm": 1.9686980247497559,
      "learning_rate": 3.994069681245367e-05,
      "loss": 0.9915,
      "step": 5408
    },
    {
      "epoch": 0.8013333333333333,
      "grad_norm": 1.5845431089401245,
      "learning_rate": 3.99110452186805e-05,
      "loss": 1.0025,
      "step": 5409
    },
    {
      "epoch": 0.8014814814814815,
      "grad_norm": 2.3498854637145996,
      "learning_rate": 3.988139362490734e-05,
      "loss": 1.2884,
      "step": 5410
    },
    {
      "epoch": 0.8016296296296296,
      "grad_norm": 1.541824460029602,
      "learning_rate": 3.985174203113418e-05,
      "loss": 0.9402,
      "step": 5411
    },
    {
      "epoch": 0.8017777777777778,
      "grad_norm": 1.6967843770980835,
      "learning_rate": 3.982209043736101e-05,
      "loss": 0.9696,
      "step": 5412
    },
    {
      "epoch": 0.8019259259259259,
      "grad_norm": 1.4138332605361938,
      "learning_rate": 3.979243884358784e-05,
      "loss": 1.0194,
      "step": 5413
    },
    {
      "epoch": 0.802074074074074,
      "grad_norm": 1.6247764825820923,
      "learning_rate": 3.9762787249814684e-05,
      "loss": 0.9082,
      "step": 5414
    },
    {
      "epoch": 0.8022222222222222,
      "grad_norm": 2.2672598361968994,
      "learning_rate": 3.9733135656041515e-05,
      "loss": 1.1271,
      "step": 5415
    },
    {
      "epoch": 0.8023703703703704,
      "grad_norm": 1.7656632661819458,
      "learning_rate": 3.9703484062268346e-05,
      "loss": 1.0607,
      "step": 5416
    },
    {
      "epoch": 0.8025185185185185,
      "grad_norm": 1.7334779500961304,
      "learning_rate": 3.9673832468495184e-05,
      "loss": 1.3095,
      "step": 5417
    },
    {
      "epoch": 0.8026666666666666,
      "grad_norm": 1.2259182929992676,
      "learning_rate": 3.964418087472202e-05,
      "loss": 0.824,
      "step": 5418
    },
    {
      "epoch": 0.8028148148148149,
      "grad_norm": 1.4198298454284668,
      "learning_rate": 3.961452928094885e-05,
      "loss": 0.9926,
      "step": 5419
    },
    {
      "epoch": 0.802962962962963,
      "grad_norm": 1.5182987451553345,
      "learning_rate": 3.9584877687175683e-05,
      "loss": 1.0254,
      "step": 5420
    },
    {
      "epoch": 0.8031111111111111,
      "grad_norm": 2.0433640480041504,
      "learning_rate": 3.955522609340252e-05,
      "loss": 1.0753,
      "step": 5421
    },
    {
      "epoch": 0.8032592592592592,
      "grad_norm": 1.9819378852844238,
      "learning_rate": 3.952557449962936e-05,
      "loss": 0.9146,
      "step": 5422
    },
    {
      "epoch": 0.8034074074074075,
      "grad_norm": 1.3722615242004395,
      "learning_rate": 3.949592290585619e-05,
      "loss": 1.079,
      "step": 5423
    },
    {
      "epoch": 0.8035555555555556,
      "grad_norm": 1.2907549142837524,
      "learning_rate": 3.946627131208303e-05,
      "loss": 0.916,
      "step": 5424
    },
    {
      "epoch": 0.8037037037037037,
      "grad_norm": 2.0529794692993164,
      "learning_rate": 3.943661971830986e-05,
      "loss": 1.0675,
      "step": 5425
    },
    {
      "epoch": 0.8038518518518518,
      "grad_norm": 1.6150699853897095,
      "learning_rate": 3.9406968124536696e-05,
      "loss": 0.8458,
      "step": 5426
    },
    {
      "epoch": 0.804,
      "grad_norm": 1.9195685386657715,
      "learning_rate": 3.9377316530763534e-05,
      "loss": 0.9998,
      "step": 5427
    },
    {
      "epoch": 0.8041481481481482,
      "grad_norm": 1.518667221069336,
      "learning_rate": 3.9347664936990365e-05,
      "loss": 1.0308,
      "step": 5428
    },
    {
      "epoch": 0.8042962962962963,
      "grad_norm": 1.4057791233062744,
      "learning_rate": 3.9318013343217195e-05,
      "loss": 1.1398,
      "step": 5429
    },
    {
      "epoch": 0.8044444444444444,
      "grad_norm": 1.8205337524414062,
      "learning_rate": 3.928836174944403e-05,
      "loss": 0.8375,
      "step": 5430
    },
    {
      "epoch": 0.8045925925925926,
      "grad_norm": 1.9404771327972412,
      "learning_rate": 3.925871015567087e-05,
      "loss": 0.864,
      "step": 5431
    },
    {
      "epoch": 0.8047407407407408,
      "grad_norm": 1.4498867988586426,
      "learning_rate": 3.92290585618977e-05,
      "loss": 0.9772,
      "step": 5432
    },
    {
      "epoch": 0.8048888888888889,
      "grad_norm": 1.7233918905258179,
      "learning_rate": 3.919940696812454e-05,
      "loss": 0.9626,
      "step": 5433
    },
    {
      "epoch": 0.805037037037037,
      "grad_norm": 2.1639041900634766,
      "learning_rate": 3.916975537435138e-05,
      "loss": 0.8474,
      "step": 5434
    },
    {
      "epoch": 0.8051851851851852,
      "grad_norm": 2.4917683601379395,
      "learning_rate": 3.914010378057821e-05,
      "loss": 1.0292,
      "step": 5435
    },
    {
      "epoch": 0.8053333333333333,
      "grad_norm": 2.330807685852051,
      "learning_rate": 3.911045218680504e-05,
      "loss": 1.1002,
      "step": 5436
    },
    {
      "epoch": 0.8054814814814815,
      "grad_norm": 1.7886220216751099,
      "learning_rate": 3.9080800593031877e-05,
      "loss": 1.2997,
      "step": 5437
    },
    {
      "epoch": 0.8056296296296296,
      "grad_norm": 2.5931851863861084,
      "learning_rate": 3.9051148999258714e-05,
      "loss": 0.7829,
      "step": 5438
    },
    {
      "epoch": 0.8057777777777778,
      "grad_norm": 2.1554958820343018,
      "learning_rate": 3.9021497405485545e-05,
      "loss": 1.044,
      "step": 5439
    },
    {
      "epoch": 0.8059259259259259,
      "grad_norm": 1.3633235692977905,
      "learning_rate": 3.899184581171238e-05,
      "loss": 0.8883,
      "step": 5440
    },
    {
      "epoch": 0.806074074074074,
      "grad_norm": 1.4236547946929932,
      "learning_rate": 3.896219421793922e-05,
      "loss": 1.1795,
      "step": 5441
    },
    {
      "epoch": 0.8062222222222222,
      "grad_norm": 1.8070131540298462,
      "learning_rate": 3.893254262416605e-05,
      "loss": 1.0315,
      "step": 5442
    },
    {
      "epoch": 0.8063703703703704,
      "grad_norm": 1.392898678779602,
      "learning_rate": 3.890289103039288e-05,
      "loss": 0.8974,
      "step": 5443
    },
    {
      "epoch": 0.8065185185185185,
      "grad_norm": 2.12119197845459,
      "learning_rate": 3.887323943661972e-05,
      "loss": 0.9822,
      "step": 5444
    },
    {
      "epoch": 0.8066666666666666,
      "grad_norm": 2.1227033138275146,
      "learning_rate": 3.884358784284656e-05,
      "loss": 1.1702,
      "step": 5445
    },
    {
      "epoch": 0.8068148148148148,
      "grad_norm": 1.5095551013946533,
      "learning_rate": 3.881393624907339e-05,
      "loss": 0.9813,
      "step": 5446
    },
    {
      "epoch": 0.806962962962963,
      "grad_norm": 1.4496660232543945,
      "learning_rate": 3.8784284655300226e-05,
      "loss": 0.8225,
      "step": 5447
    },
    {
      "epoch": 0.8071111111111111,
      "grad_norm": 1.5028917789459229,
      "learning_rate": 3.875463306152706e-05,
      "loss": 0.8998,
      "step": 5448
    },
    {
      "epoch": 0.8072592592592592,
      "grad_norm": 1.9945577383041382,
      "learning_rate": 3.8724981467753895e-05,
      "loss": 0.9351,
      "step": 5449
    },
    {
      "epoch": 0.8074074074074075,
      "grad_norm": 1.7238104343414307,
      "learning_rate": 3.8695329873980726e-05,
      "loss": 1.0828,
      "step": 5450
    },
    {
      "epoch": 0.8075555555555556,
      "grad_norm": 1.9189610481262207,
      "learning_rate": 3.8665678280207564e-05,
      "loss": 1.192,
      "step": 5451
    },
    {
      "epoch": 0.8077037037037037,
      "grad_norm": 1.6241904497146606,
      "learning_rate": 3.8636026686434394e-05,
      "loss": 1.0557,
      "step": 5452
    },
    {
      "epoch": 0.8078518518518518,
      "grad_norm": 1.2917290925979614,
      "learning_rate": 3.860637509266123e-05,
      "loss": 1.1501,
      "step": 5453
    },
    {
      "epoch": 0.808,
      "grad_norm": 1.5646109580993652,
      "learning_rate": 3.857672349888807e-05,
      "loss": 0.787,
      "step": 5454
    },
    {
      "epoch": 0.8081481481481482,
      "grad_norm": 2.36950421333313,
      "learning_rate": 3.85470719051149e-05,
      "loss": 1.2892,
      "step": 5455
    },
    {
      "epoch": 0.8082962962962963,
      "grad_norm": 1.3422205448150635,
      "learning_rate": 3.851742031134173e-05,
      "loss": 1.1428,
      "step": 5456
    },
    {
      "epoch": 0.8084444444444444,
      "grad_norm": 1.5275359153747559,
      "learning_rate": 3.8487768717568576e-05,
      "loss": 1.2389,
      "step": 5457
    },
    {
      "epoch": 0.8085925925925926,
      "grad_norm": 1.90117609500885,
      "learning_rate": 3.845811712379541e-05,
      "loss": 1.161,
      "step": 5458
    },
    {
      "epoch": 0.8087407407407408,
      "grad_norm": 2.4937429428100586,
      "learning_rate": 3.842846553002224e-05,
      "loss": 1.1525,
      "step": 5459
    },
    {
      "epoch": 0.8088888888888889,
      "grad_norm": 2.7077555656433105,
      "learning_rate": 3.8398813936249076e-05,
      "loss": 1.2007,
      "step": 5460
    },
    {
      "epoch": 0.809037037037037,
      "grad_norm": 2.1760613918304443,
      "learning_rate": 3.836916234247591e-05,
      "loss": 1.022,
      "step": 5461
    },
    {
      "epoch": 0.8091851851851852,
      "grad_norm": 1.5264531373977661,
      "learning_rate": 3.8339510748702744e-05,
      "loss": 1.05,
      "step": 5462
    },
    {
      "epoch": 0.8093333333333333,
      "grad_norm": 1.820239543914795,
      "learning_rate": 3.8309859154929575e-05,
      "loss": 0.8786,
      "step": 5463
    },
    {
      "epoch": 0.8094814814814815,
      "grad_norm": 1.18095064163208,
      "learning_rate": 3.828020756115641e-05,
      "loss": 1.2188,
      "step": 5464
    },
    {
      "epoch": 0.8096296296296296,
      "grad_norm": 1.9990864992141724,
      "learning_rate": 3.825055596738325e-05,
      "loss": 1.0489,
      "step": 5465
    },
    {
      "epoch": 0.8097777777777778,
      "grad_norm": 1.3547680377960205,
      "learning_rate": 3.822090437361008e-05,
      "loss": 1.2582,
      "step": 5466
    },
    {
      "epoch": 0.8099259259259259,
      "grad_norm": 1.4557368755340576,
      "learning_rate": 3.819125277983692e-05,
      "loss": 1.0094,
      "step": 5467
    },
    {
      "epoch": 0.8100740740740741,
      "grad_norm": 1.4652191400527954,
      "learning_rate": 3.816160118606375e-05,
      "loss": 1.101,
      "step": 5468
    },
    {
      "epoch": 0.8102222222222222,
      "grad_norm": 1.6693017482757568,
      "learning_rate": 3.813194959229059e-05,
      "loss": 1.0293,
      "step": 5469
    },
    {
      "epoch": 0.8103703703703704,
      "grad_norm": 1.4987443685531616,
      "learning_rate": 3.8102297998517425e-05,
      "loss": 0.9499,
      "step": 5470
    },
    {
      "epoch": 0.8105185185185185,
      "grad_norm": 2.044036388397217,
      "learning_rate": 3.8072646404744256e-05,
      "loss": 1.0191,
      "step": 5471
    },
    {
      "epoch": 0.8106666666666666,
      "grad_norm": 1.643293857574463,
      "learning_rate": 3.804299481097109e-05,
      "loss": 1.036,
      "step": 5472
    },
    {
      "epoch": 0.8108148148148148,
      "grad_norm": 1.6891175508499146,
      "learning_rate": 3.8013343217197925e-05,
      "loss": 0.938,
      "step": 5473
    },
    {
      "epoch": 0.810962962962963,
      "grad_norm": 1.3681107759475708,
      "learning_rate": 3.798369162342476e-05,
      "loss": 0.947,
      "step": 5474
    },
    {
      "epoch": 0.8111111111111111,
      "grad_norm": 1.179011344909668,
      "learning_rate": 3.795404002965159e-05,
      "loss": 0.8874,
      "step": 5475
    },
    {
      "epoch": 0.8112592592592592,
      "grad_norm": 1.443042516708374,
      "learning_rate": 3.792438843587843e-05,
      "loss": 1.1634,
      "step": 5476
    },
    {
      "epoch": 0.8114074074074074,
      "grad_norm": 1.5776867866516113,
      "learning_rate": 3.789473684210527e-05,
      "loss": 0.8601,
      "step": 5477
    },
    {
      "epoch": 0.8115555555555556,
      "grad_norm": 2.202869176864624,
      "learning_rate": 3.78650852483321e-05,
      "loss": 0.9741,
      "step": 5478
    },
    {
      "epoch": 0.8117037037037037,
      "grad_norm": 1.7886111736297607,
      "learning_rate": 3.783543365455893e-05,
      "loss": 0.9143,
      "step": 5479
    },
    {
      "epoch": 0.8118518518518518,
      "grad_norm": 1.6400786638259888,
      "learning_rate": 3.780578206078577e-05,
      "loss": 0.9915,
      "step": 5480
    },
    {
      "epoch": 0.812,
      "grad_norm": 1.8931728601455688,
      "learning_rate": 3.7776130467012606e-05,
      "loss": 1.1405,
      "step": 5481
    },
    {
      "epoch": 0.8121481481481482,
      "grad_norm": 1.3592950105667114,
      "learning_rate": 3.774647887323944e-05,
      "loss": 0.8755,
      "step": 5482
    },
    {
      "epoch": 0.8122962962962963,
      "grad_norm": 3.140119791030884,
      "learning_rate": 3.7716827279466275e-05,
      "loss": 1.1823,
      "step": 5483
    },
    {
      "epoch": 0.8124444444444444,
      "grad_norm": 1.6572941541671753,
      "learning_rate": 3.768717568569311e-05,
      "loss": 1.0694,
      "step": 5484
    },
    {
      "epoch": 0.8125925925925926,
      "grad_norm": 2.7264232635498047,
      "learning_rate": 3.765752409191994e-05,
      "loss": 0.8261,
      "step": 5485
    },
    {
      "epoch": 0.8127407407407408,
      "grad_norm": 1.2844897508621216,
      "learning_rate": 3.7627872498146774e-05,
      "loss": 1.0877,
      "step": 5486
    },
    {
      "epoch": 0.8128888888888889,
      "grad_norm": 2.3487370014190674,
      "learning_rate": 3.759822090437361e-05,
      "loss": 1.0698,
      "step": 5487
    },
    {
      "epoch": 0.813037037037037,
      "grad_norm": 2.817286491394043,
      "learning_rate": 3.756856931060045e-05,
      "loss": 1.1748,
      "step": 5488
    },
    {
      "epoch": 0.8131851851851852,
      "grad_norm": 9.444596290588379,
      "learning_rate": 3.753891771682728e-05,
      "loss": 0.7979,
      "step": 5489
    },
    {
      "epoch": 0.8133333333333334,
      "grad_norm": 1.417769193649292,
      "learning_rate": 3.750926612305412e-05,
      "loss": 1.0666,
      "step": 5490
    },
    {
      "epoch": 0.8134814814814815,
      "grad_norm": 1.8905470371246338,
      "learning_rate": 3.747961452928095e-05,
      "loss": 0.8707,
      "step": 5491
    },
    {
      "epoch": 0.8136296296296296,
      "grad_norm": 3.3869268894195557,
      "learning_rate": 3.7449962935507787e-05,
      "loss": 1.081,
      "step": 5492
    },
    {
      "epoch": 0.8137777777777778,
      "grad_norm": 2.0902247428894043,
      "learning_rate": 3.742031134173462e-05,
      "loss": 0.9779,
      "step": 5493
    },
    {
      "epoch": 0.8139259259259259,
      "grad_norm": 1.871923565864563,
      "learning_rate": 3.7390659747961455e-05,
      "loss": 1.0492,
      "step": 5494
    },
    {
      "epoch": 0.8140740740740741,
      "grad_norm": 1.3906363248825073,
      "learning_rate": 3.7361008154188286e-05,
      "loss": 1.0126,
      "step": 5495
    },
    {
      "epoch": 0.8142222222222222,
      "grad_norm": 1.8805983066558838,
      "learning_rate": 3.7331356560415124e-05,
      "loss": 0.9645,
      "step": 5496
    },
    {
      "epoch": 0.8143703703703704,
      "grad_norm": 1.46416437625885,
      "learning_rate": 3.730170496664196e-05,
      "loss": 0.9676,
      "step": 5497
    },
    {
      "epoch": 0.8145185185185185,
      "grad_norm": 1.4247856140136719,
      "learning_rate": 3.727205337286879e-05,
      "loss": 0.9509,
      "step": 5498
    },
    {
      "epoch": 0.8146666666666667,
      "grad_norm": 1.200612187385559,
      "learning_rate": 3.724240177909562e-05,
      "loss": 0.8609,
      "step": 5499
    },
    {
      "epoch": 0.8148148148148148,
      "grad_norm": 1.1644580364227295,
      "learning_rate": 3.721275018532247e-05,
      "loss": 0.9188,
      "step": 5500
    },
    {
      "epoch": 0.814962962962963,
      "grad_norm": 1.4518518447875977,
      "learning_rate": 3.71830985915493e-05,
      "loss": 0.8037,
      "step": 5501
    },
    {
      "epoch": 0.8151111111111111,
      "grad_norm": 2.069011926651001,
      "learning_rate": 3.715344699777613e-05,
      "loss": 1.1069,
      "step": 5502
    },
    {
      "epoch": 0.8152592592592592,
      "grad_norm": 1.531945824623108,
      "learning_rate": 3.712379540400297e-05,
      "loss": 1.172,
      "step": 5503
    },
    {
      "epoch": 0.8154074074074074,
      "grad_norm": 1.9932469129562378,
      "learning_rate": 3.7094143810229805e-05,
      "loss": 0.7403,
      "step": 5504
    },
    {
      "epoch": 0.8155555555555556,
      "grad_norm": 1.6678223609924316,
      "learning_rate": 3.7064492216456636e-05,
      "loss": 1.1614,
      "step": 5505
    },
    {
      "epoch": 0.8157037037037037,
      "grad_norm": 1.8684024810791016,
      "learning_rate": 3.703484062268347e-05,
      "loss": 0.9379,
      "step": 5506
    },
    {
      "epoch": 0.8158518518518518,
      "grad_norm": 3.2789533138275146,
      "learning_rate": 3.7005189028910304e-05,
      "loss": 0.853,
      "step": 5507
    },
    {
      "epoch": 0.816,
      "grad_norm": 2.372007369995117,
      "learning_rate": 3.697553743513714e-05,
      "loss": 0.9628,
      "step": 5508
    },
    {
      "epoch": 0.8161481481481482,
      "grad_norm": 1.2470200061798096,
      "learning_rate": 3.694588584136397e-05,
      "loss": 1.1039,
      "step": 5509
    },
    {
      "epoch": 0.8162962962962963,
      "grad_norm": 1.9533486366271973,
      "learning_rate": 3.691623424759081e-05,
      "loss": 1.2456,
      "step": 5510
    },
    {
      "epoch": 0.8164444444444444,
      "grad_norm": 1.8412435054779053,
      "learning_rate": 3.688658265381764e-05,
      "loss": 1.1617,
      "step": 5511
    },
    {
      "epoch": 0.8165925925925926,
      "grad_norm": 2.08192777633667,
      "learning_rate": 3.685693106004448e-05,
      "loss": 1.027,
      "step": 5512
    },
    {
      "epoch": 0.8167407407407408,
      "grad_norm": 1.6966136693954468,
      "learning_rate": 3.682727946627132e-05,
      "loss": 1.2062,
      "step": 5513
    },
    {
      "epoch": 0.8168888888888889,
      "grad_norm": 1.702400803565979,
      "learning_rate": 3.679762787249815e-05,
      "loss": 1.1495,
      "step": 5514
    },
    {
      "epoch": 0.817037037037037,
      "grad_norm": 1.328120231628418,
      "learning_rate": 3.6767976278724986e-05,
      "loss": 0.865,
      "step": 5515
    },
    {
      "epoch": 0.8171851851851852,
      "grad_norm": 2.2423410415649414,
      "learning_rate": 3.6738324684951816e-05,
      "loss": 0.7895,
      "step": 5516
    },
    {
      "epoch": 0.8173333333333334,
      "grad_norm": 1.627179503440857,
      "learning_rate": 3.6708673091178654e-05,
      "loss": 1.073,
      "step": 5517
    },
    {
      "epoch": 0.8174814814814815,
      "grad_norm": 1.8343318700790405,
      "learning_rate": 3.6679021497405485e-05,
      "loss": 0.9913,
      "step": 5518
    },
    {
      "epoch": 0.8176296296296296,
      "grad_norm": 2.3681259155273438,
      "learning_rate": 3.664936990363232e-05,
      "loss": 1.0483,
      "step": 5519
    },
    {
      "epoch": 0.8177777777777778,
      "grad_norm": 1.7481133937835693,
      "learning_rate": 3.661971830985916e-05,
      "loss": 1.1003,
      "step": 5520
    },
    {
      "epoch": 0.817925925925926,
      "grad_norm": 1.617566466331482,
      "learning_rate": 3.659006671608599e-05,
      "loss": 1.1357,
      "step": 5521
    },
    {
      "epoch": 0.8180740740740741,
      "grad_norm": 1.6094902753829956,
      "learning_rate": 3.656041512231282e-05,
      "loss": 0.9548,
      "step": 5522
    },
    {
      "epoch": 0.8182222222222222,
      "grad_norm": 2.6640186309814453,
      "learning_rate": 3.653076352853966e-05,
      "loss": 1.1334,
      "step": 5523
    },
    {
      "epoch": 0.8183703703703704,
      "grad_norm": 2.049234390258789,
      "learning_rate": 3.65011119347665e-05,
      "loss": 1.0645,
      "step": 5524
    },
    {
      "epoch": 0.8185185185185185,
      "grad_norm": 1.8995856046676636,
      "learning_rate": 3.647146034099333e-05,
      "loss": 1.1411,
      "step": 5525
    },
    {
      "epoch": 0.8186666666666667,
      "grad_norm": 3.3620994091033936,
      "learning_rate": 3.6441808747220166e-05,
      "loss": 0.9616,
      "step": 5526
    },
    {
      "epoch": 0.8188148148148148,
      "grad_norm": 1.9591317176818848,
      "learning_rate": 3.6412157153447004e-05,
      "loss": 1.0467,
      "step": 5527
    },
    {
      "epoch": 0.818962962962963,
      "grad_norm": 3.109315872192383,
      "learning_rate": 3.6382505559673835e-05,
      "loss": 1.1829,
      "step": 5528
    },
    {
      "epoch": 0.8191111111111111,
      "grad_norm": 2.5587923526763916,
      "learning_rate": 3.6352853965900666e-05,
      "loss": 0.8522,
      "step": 5529
    },
    {
      "epoch": 0.8192592592592592,
      "grad_norm": 1.3240567445755005,
      "learning_rate": 3.63232023721275e-05,
      "loss": 0.9416,
      "step": 5530
    },
    {
      "epoch": 0.8194074074074074,
      "grad_norm": 1.5940901041030884,
      "learning_rate": 3.629355077835434e-05,
      "loss": 1.0305,
      "step": 5531
    },
    {
      "epoch": 0.8195555555555556,
      "grad_norm": 4.036149501800537,
      "learning_rate": 3.626389918458117e-05,
      "loss": 0.7898,
      "step": 5532
    },
    {
      "epoch": 0.8197037037037037,
      "grad_norm": 1.9516942501068115,
      "learning_rate": 3.623424759080801e-05,
      "loss": 0.9664,
      "step": 5533
    },
    {
      "epoch": 0.8198518518518518,
      "grad_norm": 1.7062143087387085,
      "learning_rate": 3.620459599703484e-05,
      "loss": 0.7753,
      "step": 5534
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.762611150741577,
      "learning_rate": 3.617494440326168e-05,
      "loss": 0.9052,
      "step": 5535
    },
    {
      "epoch": 0.8201481481481482,
      "grad_norm": 1.630911111831665,
      "learning_rate": 3.614529280948851e-05,
      "loss": 1.1295,
      "step": 5536
    },
    {
      "epoch": 0.8202962962962963,
      "grad_norm": 2.24279522895813,
      "learning_rate": 3.611564121571535e-05,
      "loss": 0.791,
      "step": 5537
    },
    {
      "epoch": 0.8204444444444444,
      "grad_norm": 1.067190170288086,
      "learning_rate": 3.608598962194218e-05,
      "loss": 0.9947,
      "step": 5538
    },
    {
      "epoch": 0.8205925925925925,
      "grad_norm": 1.8940973281860352,
      "learning_rate": 3.6056338028169015e-05,
      "loss": 1.1676,
      "step": 5539
    },
    {
      "epoch": 0.8207407407407408,
      "grad_norm": 3.7762491703033447,
      "learning_rate": 3.602668643439585e-05,
      "loss": 1.045,
      "step": 5540
    },
    {
      "epoch": 0.8208888888888889,
      "grad_norm": 1.4663925170898438,
      "learning_rate": 3.5997034840622684e-05,
      "loss": 1.0359,
      "step": 5541
    },
    {
      "epoch": 0.821037037037037,
      "grad_norm": 2.0131566524505615,
      "learning_rate": 3.5967383246849515e-05,
      "loss": 0.9535,
      "step": 5542
    },
    {
      "epoch": 0.8211851851851852,
      "grad_norm": 3.2547378540039062,
      "learning_rate": 3.593773165307636e-05,
      "loss": 1.099,
      "step": 5543
    },
    {
      "epoch": 0.8213333333333334,
      "grad_norm": 1.8051725625991821,
      "learning_rate": 3.590808005930319e-05,
      "loss": 1.1414,
      "step": 5544
    },
    {
      "epoch": 0.8214814814814815,
      "grad_norm": 1.619515299797058,
      "learning_rate": 3.587842846553002e-05,
      "loss": 1.141,
      "step": 5545
    },
    {
      "epoch": 0.8216296296296296,
      "grad_norm": 3.431809425354004,
      "learning_rate": 3.584877687175686e-05,
      "loss": 1.1316,
      "step": 5546
    },
    {
      "epoch": 0.8217777777777778,
      "grad_norm": 1.6146504878997803,
      "learning_rate": 3.5819125277983697e-05,
      "loss": 1.1646,
      "step": 5547
    },
    {
      "epoch": 0.821925925925926,
      "grad_norm": 1.579978346824646,
      "learning_rate": 3.578947368421053e-05,
      "loss": 0.9278,
      "step": 5548
    },
    {
      "epoch": 0.8220740740740741,
      "grad_norm": 1.1909756660461426,
      "learning_rate": 3.575982209043736e-05,
      "loss": 0.8999,
      "step": 5549
    },
    {
      "epoch": 0.8222222222222222,
      "grad_norm": 2.0035927295684814,
      "learning_rate": 3.5730170496664196e-05,
      "loss": 0.8428,
      "step": 5550
    },
    {
      "epoch": 0.8223703703703704,
      "grad_norm": 2.4635682106018066,
      "learning_rate": 3.5700518902891034e-05,
      "loss": 0.8511,
      "step": 5551
    },
    {
      "epoch": 0.8225185185185185,
      "grad_norm": 1.6868245601654053,
      "learning_rate": 3.5670867309117865e-05,
      "loss": 0.9326,
      "step": 5552
    },
    {
      "epoch": 0.8226666666666667,
      "grad_norm": 1.6198513507843018,
      "learning_rate": 3.56412157153447e-05,
      "loss": 1.1399,
      "step": 5553
    },
    {
      "epoch": 0.8228148148148148,
      "grad_norm": 1.3706104755401611,
      "learning_rate": 3.561156412157153e-05,
      "loss": 0.8828,
      "step": 5554
    },
    {
      "epoch": 0.822962962962963,
      "grad_norm": 1.4087867736816406,
      "learning_rate": 3.558191252779837e-05,
      "loss": 1.1209,
      "step": 5555
    },
    {
      "epoch": 0.8231111111111111,
      "grad_norm": 1.3651924133300781,
      "learning_rate": 3.555226093402521e-05,
      "loss": 0.9406,
      "step": 5556
    },
    {
      "epoch": 0.8232592592592592,
      "grad_norm": 1.256922960281372,
      "learning_rate": 3.552260934025204e-05,
      "loss": 0.8475,
      "step": 5557
    },
    {
      "epoch": 0.8234074074074074,
      "grad_norm": 2.030674695968628,
      "learning_rate": 3.549295774647888e-05,
      "loss": 0.9721,
      "step": 5558
    },
    {
      "epoch": 0.8235555555555556,
      "grad_norm": 2.2394938468933105,
      "learning_rate": 3.546330615270571e-05,
      "loss": 0.7677,
      "step": 5559
    },
    {
      "epoch": 0.8237037037037037,
      "grad_norm": 6.911998748779297,
      "learning_rate": 3.5433654558932546e-05,
      "loss": 0.9324,
      "step": 5560
    },
    {
      "epoch": 0.8238518518518518,
      "grad_norm": 1.8028950691223145,
      "learning_rate": 3.540400296515938e-05,
      "loss": 0.8779,
      "step": 5561
    },
    {
      "epoch": 0.824,
      "grad_norm": 1.6942514181137085,
      "learning_rate": 3.5374351371386214e-05,
      "loss": 1.045,
      "step": 5562
    },
    {
      "epoch": 0.8241481481481482,
      "grad_norm": 1.9035639762878418,
      "learning_rate": 3.534469977761305e-05,
      "loss": 1.0837,
      "step": 5563
    },
    {
      "epoch": 0.8242962962962963,
      "grad_norm": 1.3376104831695557,
      "learning_rate": 3.531504818383988e-05,
      "loss": 0.9566,
      "step": 5564
    },
    {
      "epoch": 0.8244444444444444,
      "grad_norm": 1.4366275072097778,
      "learning_rate": 3.5285396590066714e-05,
      "loss": 0.8443,
      "step": 5565
    },
    {
      "epoch": 0.8245925925925925,
      "grad_norm": 1.142834186553955,
      "learning_rate": 3.525574499629355e-05,
      "loss": 1.1526,
      "step": 5566
    },
    {
      "epoch": 0.8247407407407408,
      "grad_norm": 1.9914871454238892,
      "learning_rate": 3.522609340252039e-05,
      "loss": 1.0796,
      "step": 5567
    },
    {
      "epoch": 0.8248888888888889,
      "grad_norm": 1.3143497705459595,
      "learning_rate": 3.519644180874722e-05,
      "loss": 1.0918,
      "step": 5568
    },
    {
      "epoch": 0.825037037037037,
      "grad_norm": 2.031128406524658,
      "learning_rate": 3.516679021497406e-05,
      "loss": 1.2729,
      "step": 5569
    },
    {
      "epoch": 0.8251851851851851,
      "grad_norm": 2.464761972427368,
      "learning_rate": 3.5137138621200896e-05,
      "loss": 0.9552,
      "step": 5570
    },
    {
      "epoch": 0.8253333333333334,
      "grad_norm": 1.7345706224441528,
      "learning_rate": 3.5107487027427726e-05,
      "loss": 0.848,
      "step": 5571
    },
    {
      "epoch": 0.8254814814814815,
      "grad_norm": 1.703432559967041,
      "learning_rate": 3.507783543365456e-05,
      "loss": 1.0549,
      "step": 5572
    },
    {
      "epoch": 0.8256296296296296,
      "grad_norm": 1.3836984634399414,
      "learning_rate": 3.5048183839881395e-05,
      "loss": 0.9967,
      "step": 5573
    },
    {
      "epoch": 0.8257777777777778,
      "grad_norm": 1.334790825843811,
      "learning_rate": 3.501853224610823e-05,
      "loss": 1.0518,
      "step": 5574
    },
    {
      "epoch": 0.825925925925926,
      "grad_norm": 4.566990852355957,
      "learning_rate": 3.4988880652335064e-05,
      "loss": 0.9782,
      "step": 5575
    },
    {
      "epoch": 0.8260740740740741,
      "grad_norm": 2.2013661861419678,
      "learning_rate": 3.49592290585619e-05,
      "loss": 1.0255,
      "step": 5576
    },
    {
      "epoch": 0.8262222222222222,
      "grad_norm": 2.3910906314849854,
      "learning_rate": 3.492957746478873e-05,
      "loss": 1.0668,
      "step": 5577
    },
    {
      "epoch": 0.8263703703703704,
      "grad_norm": 1.7936630249023438,
      "learning_rate": 3.489992587101557e-05,
      "loss": 0.9094,
      "step": 5578
    },
    {
      "epoch": 0.8265185185185185,
      "grad_norm": 1.5428414344787598,
      "learning_rate": 3.48702742772424e-05,
      "loss": 1.031,
      "step": 5579
    },
    {
      "epoch": 0.8266666666666667,
      "grad_norm": 1.2280282974243164,
      "learning_rate": 3.484062268346924e-05,
      "loss": 0.8246,
      "step": 5580
    },
    {
      "epoch": 0.8268148148148148,
      "grad_norm": 2.28139328956604,
      "learning_rate": 3.481097108969607e-05,
      "loss": 0.8762,
      "step": 5581
    },
    {
      "epoch": 0.826962962962963,
      "grad_norm": 2.3486006259918213,
      "learning_rate": 3.478131949592291e-05,
      "loss": 1.0251,
      "step": 5582
    },
    {
      "epoch": 0.8271111111111111,
      "grad_norm": 1.8135404586791992,
      "learning_rate": 3.4751667902149745e-05,
      "loss": 1.0686,
      "step": 5583
    },
    {
      "epoch": 0.8272592592592592,
      "grad_norm": 2.6683990955352783,
      "learning_rate": 3.4722016308376576e-05,
      "loss": 1.0136,
      "step": 5584
    },
    {
      "epoch": 0.8274074074074074,
      "grad_norm": 1.5761295557022095,
      "learning_rate": 3.4692364714603407e-05,
      "loss": 1.1173,
      "step": 5585
    },
    {
      "epoch": 0.8275555555555556,
      "grad_norm": 17.536306381225586,
      "learning_rate": 3.466271312083025e-05,
      "loss": 1.2514,
      "step": 5586
    },
    {
      "epoch": 0.8277037037037037,
      "grad_norm": 2.289813756942749,
      "learning_rate": 3.463306152705708e-05,
      "loss": 0.9197,
      "step": 5587
    },
    {
      "epoch": 0.8278518518518518,
      "grad_norm": 1.5273847579956055,
      "learning_rate": 3.460340993328391e-05,
      "loss": 0.8554,
      "step": 5588
    },
    {
      "epoch": 0.828,
      "grad_norm": 1.2904846668243408,
      "learning_rate": 3.457375833951075e-05,
      "loss": 1.046,
      "step": 5589
    },
    {
      "epoch": 0.8281481481481482,
      "grad_norm": 2.67862606048584,
      "learning_rate": 3.454410674573759e-05,
      "loss": 0.9925,
      "step": 5590
    },
    {
      "epoch": 0.8282962962962963,
      "grad_norm": 1.4206843376159668,
      "learning_rate": 3.451445515196442e-05,
      "loss": 0.9479,
      "step": 5591
    },
    {
      "epoch": 0.8284444444444444,
      "grad_norm": 1.2101153135299683,
      "learning_rate": 3.448480355819125e-05,
      "loss": 1.1607,
      "step": 5592
    },
    {
      "epoch": 0.8285925925925925,
      "grad_norm": 1.2494651079177856,
      "learning_rate": 3.445515196441809e-05,
      "loss": 0.8645,
      "step": 5593
    },
    {
      "epoch": 0.8287407407407408,
      "grad_norm": 2.1751744747161865,
      "learning_rate": 3.4425500370644925e-05,
      "loss": 1.1682,
      "step": 5594
    },
    {
      "epoch": 0.8288888888888889,
      "grad_norm": 1.934248685836792,
      "learning_rate": 3.4395848776871756e-05,
      "loss": 0.9544,
      "step": 5595
    },
    {
      "epoch": 0.829037037037037,
      "grad_norm": 8.274798393249512,
      "learning_rate": 3.4366197183098594e-05,
      "loss": 1.1841,
      "step": 5596
    },
    {
      "epoch": 0.8291851851851851,
      "grad_norm": 2.040048599243164,
      "learning_rate": 3.433654558932543e-05,
      "loss": 0.9006,
      "step": 5597
    },
    {
      "epoch": 0.8293333333333334,
      "grad_norm": 2.306729316711426,
      "learning_rate": 3.430689399555226e-05,
      "loss": 1.1027,
      "step": 5598
    },
    {
      "epoch": 0.8294814814814815,
      "grad_norm": 1.608792781829834,
      "learning_rate": 3.42772424017791e-05,
      "loss": 0.9009,
      "step": 5599
    },
    {
      "epoch": 0.8296296296296296,
      "grad_norm": 2.4428153038024902,
      "learning_rate": 3.424759080800593e-05,
      "loss": 1.1927,
      "step": 5600
    },
    {
      "epoch": 0.8297777777777777,
      "grad_norm": 1.4491654634475708,
      "learning_rate": 3.421793921423277e-05,
      "loss": 0.9686,
      "step": 5601
    },
    {
      "epoch": 0.829925925925926,
      "grad_norm": 2.759342670440674,
      "learning_rate": 3.41882876204596e-05,
      "loss": 1.0876,
      "step": 5602
    },
    {
      "epoch": 0.8300740740740741,
      "grad_norm": 1.6981993913650513,
      "learning_rate": 3.415863602668644e-05,
      "loss": 1.1029,
      "step": 5603
    },
    {
      "epoch": 0.8302222222222222,
      "grad_norm": 1.3702118396759033,
      "learning_rate": 3.412898443291327e-05,
      "loss": 0.9498,
      "step": 5604
    },
    {
      "epoch": 0.8303703703703704,
      "grad_norm": 1.777405023574829,
      "learning_rate": 3.4099332839140106e-05,
      "loss": 0.9104,
      "step": 5605
    },
    {
      "epoch": 0.8305185185185185,
      "grad_norm": 1.8026494979858398,
      "learning_rate": 3.4069681245366944e-05,
      "loss": 1.0487,
      "step": 5606
    },
    {
      "epoch": 0.8306666666666667,
      "grad_norm": 2.668658971786499,
      "learning_rate": 3.4040029651593775e-05,
      "loss": 1.1804,
      "step": 5607
    },
    {
      "epoch": 0.8308148148148148,
      "grad_norm": 1.5562937259674072,
      "learning_rate": 3.4010378057820606e-05,
      "loss": 0.9062,
      "step": 5608
    },
    {
      "epoch": 0.830962962962963,
      "grad_norm": 1.4961464405059814,
      "learning_rate": 3.398072646404744e-05,
      "loss": 1.0791,
      "step": 5609
    },
    {
      "epoch": 0.8311111111111111,
      "grad_norm": 3.3875136375427246,
      "learning_rate": 3.395107487027428e-05,
      "loss": 1.0595,
      "step": 5610
    },
    {
      "epoch": 0.8312592592592593,
      "grad_norm": 1.272215723991394,
      "learning_rate": 3.392142327650111e-05,
      "loss": 1.0394,
      "step": 5611
    },
    {
      "epoch": 0.8314074074074074,
      "grad_norm": 14.303399085998535,
      "learning_rate": 3.389177168272795e-05,
      "loss": 0.9272,
      "step": 5612
    },
    {
      "epoch": 0.8315555555555556,
      "grad_norm": 3.60304856300354,
      "learning_rate": 3.386212008895479e-05,
      "loss": 1.0269,
      "step": 5613
    },
    {
      "epoch": 0.8317037037037037,
      "grad_norm": 1.3587753772735596,
      "learning_rate": 3.383246849518162e-05,
      "loss": 1.4079,
      "step": 5614
    },
    {
      "epoch": 0.8318518518518518,
      "grad_norm": 2.1113405227661133,
      "learning_rate": 3.380281690140845e-05,
      "loss": 1.1314,
      "step": 5615
    },
    {
      "epoch": 0.832,
      "grad_norm": 1.4930996894836426,
      "learning_rate": 3.377316530763529e-05,
      "loss": 1.156,
      "step": 5616
    },
    {
      "epoch": 0.8321481481481482,
      "grad_norm": 1.3228946924209595,
      "learning_rate": 3.3743513713862124e-05,
      "loss": 1.1645,
      "step": 5617
    },
    {
      "epoch": 0.8322962962962963,
      "grad_norm": 1.929006814956665,
      "learning_rate": 3.3713862120088955e-05,
      "loss": 1.0037,
      "step": 5618
    },
    {
      "epoch": 0.8324444444444444,
      "grad_norm": 1.6608818769454956,
      "learning_rate": 3.368421052631579e-05,
      "loss": 1.1946,
      "step": 5619
    },
    {
      "epoch": 0.8325925925925926,
      "grad_norm": 2.0891196727752686,
      "learning_rate": 3.3654558932542624e-05,
      "loss": 1.1041,
      "step": 5620
    },
    {
      "epoch": 0.8327407407407408,
      "grad_norm": 1.4596067667007446,
      "learning_rate": 3.362490733876946e-05,
      "loss": 1.1719,
      "step": 5621
    },
    {
      "epoch": 0.8328888888888889,
      "grad_norm": 3.2699577808380127,
      "learning_rate": 3.359525574499629e-05,
      "loss": 1.3013,
      "step": 5622
    },
    {
      "epoch": 0.833037037037037,
      "grad_norm": 1.3315544128417969,
      "learning_rate": 3.356560415122313e-05,
      "loss": 0.9505,
      "step": 5623
    },
    {
      "epoch": 0.8331851851851851,
      "grad_norm": 1.5666136741638184,
      "learning_rate": 3.353595255744996e-05,
      "loss": 0.9916,
      "step": 5624
    },
    {
      "epoch": 0.8333333333333334,
      "grad_norm": 1.4195020198822021,
      "learning_rate": 3.35063009636768e-05,
      "loss": 0.8547,
      "step": 5625
    },
    {
      "epoch": 0.8334814814814815,
      "grad_norm": 1.1384927034378052,
      "learning_rate": 3.3476649369903636e-05,
      "loss": 0.9563,
      "step": 5626
    },
    {
      "epoch": 0.8336296296296296,
      "grad_norm": 2.0354249477386475,
      "learning_rate": 3.344699777613047e-05,
      "loss": 0.9357,
      "step": 5627
    },
    {
      "epoch": 0.8337777777777777,
      "grad_norm": 1.5699747800827026,
      "learning_rate": 3.34173461823573e-05,
      "loss": 1.2679,
      "step": 5628
    },
    {
      "epoch": 0.833925925925926,
      "grad_norm": 1.2100545167922974,
      "learning_rate": 3.338769458858414e-05,
      "loss": 0.9108,
      "step": 5629
    },
    {
      "epoch": 0.8340740740740741,
      "grad_norm": 1.406457781791687,
      "learning_rate": 3.3358042994810974e-05,
      "loss": 0.8079,
      "step": 5630
    },
    {
      "epoch": 0.8342222222222222,
      "grad_norm": 1.5774122476577759,
      "learning_rate": 3.3328391401037804e-05,
      "loss": 1.158,
      "step": 5631
    },
    {
      "epoch": 0.8343703703703703,
      "grad_norm": 1.589264988899231,
      "learning_rate": 3.329873980726464e-05,
      "loss": 1.0718,
      "step": 5632
    },
    {
      "epoch": 0.8345185185185185,
      "grad_norm": 1.7401427030563354,
      "learning_rate": 3.326908821349148e-05,
      "loss": 1.12,
      "step": 5633
    },
    {
      "epoch": 0.8346666666666667,
      "grad_norm": 1.2324284315109253,
      "learning_rate": 3.323943661971831e-05,
      "loss": 1.0212,
      "step": 5634
    },
    {
      "epoch": 0.8348148148148148,
      "grad_norm": 2.1774914264678955,
      "learning_rate": 3.320978502594514e-05,
      "loss": 1.0251,
      "step": 5635
    },
    {
      "epoch": 0.834962962962963,
      "grad_norm": 1.6775509119033813,
      "learning_rate": 3.318013343217198e-05,
      "loss": 1.1762,
      "step": 5636
    },
    {
      "epoch": 0.8351111111111111,
      "grad_norm": 1.5039470195770264,
      "learning_rate": 3.315048183839882e-05,
      "loss": 1.084,
      "step": 5637
    },
    {
      "epoch": 0.8352592592592593,
      "grad_norm": 1.2579706907272339,
      "learning_rate": 3.312083024462565e-05,
      "loss": 0.851,
      "step": 5638
    },
    {
      "epoch": 0.8354074074074074,
      "grad_norm": 1.8096531629562378,
      "learning_rate": 3.3091178650852486e-05,
      "loss": 0.8298,
      "step": 5639
    },
    {
      "epoch": 0.8355555555555556,
      "grad_norm": 1.6695431470870972,
      "learning_rate": 3.306152705707932e-05,
      "loss": 1.0724,
      "step": 5640
    },
    {
      "epoch": 0.8357037037037037,
      "grad_norm": 0.89354407787323,
      "learning_rate": 3.3031875463306154e-05,
      "loss": 0.6487,
      "step": 5641
    },
    {
      "epoch": 0.8358518518518518,
      "grad_norm": 1.5002517700195312,
      "learning_rate": 3.300222386953299e-05,
      "loss": 0.8566,
      "step": 5642
    },
    {
      "epoch": 0.836,
      "grad_norm": 1.8203537464141846,
      "learning_rate": 3.297257227575982e-05,
      "loss": 1.044,
      "step": 5643
    },
    {
      "epoch": 0.8361481481481482,
      "grad_norm": 2.73797607421875,
      "learning_rate": 3.294292068198666e-05,
      "loss": 0.9902,
      "step": 5644
    },
    {
      "epoch": 0.8362962962962963,
      "grad_norm": 1.4074082374572754,
      "learning_rate": 3.291326908821349e-05,
      "loss": 0.967,
      "step": 5645
    },
    {
      "epoch": 0.8364444444444444,
      "grad_norm": 1.300226092338562,
      "learning_rate": 3.288361749444033e-05,
      "loss": 0.9087,
      "step": 5646
    },
    {
      "epoch": 0.8365925925925926,
      "grad_norm": 2.106065273284912,
      "learning_rate": 3.285396590066716e-05,
      "loss": 1.0165,
      "step": 5647
    },
    {
      "epoch": 0.8367407407407408,
      "grad_norm": 2.0137596130371094,
      "learning_rate": 3.2824314306894e-05,
      "loss": 0.9612,
      "step": 5648
    },
    {
      "epoch": 0.8368888888888889,
      "grad_norm": 1.530579924583435,
      "learning_rate": 3.2794662713120835e-05,
      "loss": 1.0016,
      "step": 5649
    },
    {
      "epoch": 0.837037037037037,
      "grad_norm": 1.4735548496246338,
      "learning_rate": 3.2765011119347666e-05,
      "loss": 0.8761,
      "step": 5650
    },
    {
      "epoch": 0.8371851851851851,
      "grad_norm": 2.0071825981140137,
      "learning_rate": 3.27353595255745e-05,
      "loss": 1.1396,
      "step": 5651
    },
    {
      "epoch": 0.8373333333333334,
      "grad_norm": 1.5885924100875854,
      "learning_rate": 3.2705707931801335e-05,
      "loss": 0.9224,
      "step": 5652
    },
    {
      "epoch": 0.8374814814814815,
      "grad_norm": 1.7556943893432617,
      "learning_rate": 3.267605633802817e-05,
      "loss": 1.0015,
      "step": 5653
    },
    {
      "epoch": 0.8376296296296296,
      "grad_norm": 1.3980185985565186,
      "learning_rate": 3.2646404744255003e-05,
      "loss": 0.887,
      "step": 5654
    },
    {
      "epoch": 0.8377777777777777,
      "grad_norm": 2.194582223892212,
      "learning_rate": 3.261675315048184e-05,
      "loss": 0.9047,
      "step": 5655
    },
    {
      "epoch": 0.837925925925926,
      "grad_norm": 1.9248734712600708,
      "learning_rate": 3.258710155670868e-05,
      "loss": 0.9987,
      "step": 5656
    },
    {
      "epoch": 0.8380740740740741,
      "grad_norm": 1.244874358177185,
      "learning_rate": 3.255744996293551e-05,
      "loss": 0.7646,
      "step": 5657
    },
    {
      "epoch": 0.8382222222222222,
      "grad_norm": 1.1763979196548462,
      "learning_rate": 3.252779836916234e-05,
      "loss": 0.9621,
      "step": 5658
    },
    {
      "epoch": 0.8383703703703703,
      "grad_norm": 2.883955955505371,
      "learning_rate": 3.249814677538918e-05,
      "loss": 1.2885,
      "step": 5659
    },
    {
      "epoch": 0.8385185185185186,
      "grad_norm": 1.3345526456832886,
      "learning_rate": 3.2468495181616016e-05,
      "loss": 1.2323,
      "step": 5660
    },
    {
      "epoch": 0.8386666666666667,
      "grad_norm": 1.8527706861495972,
      "learning_rate": 3.243884358784285e-05,
      "loss": 0.8831,
      "step": 5661
    },
    {
      "epoch": 0.8388148148148148,
      "grad_norm": 1.5353264808654785,
      "learning_rate": 3.2409191994069685e-05,
      "loss": 1.1162,
      "step": 5662
    },
    {
      "epoch": 0.8389629629629629,
      "grad_norm": 2.0129382610321045,
      "learning_rate": 3.2379540400296516e-05,
      "loss": 0.9302,
      "step": 5663
    },
    {
      "epoch": 0.8391111111111111,
      "grad_norm": 2.3754312992095947,
      "learning_rate": 3.234988880652335e-05,
      "loss": 1.0311,
      "step": 5664
    },
    {
      "epoch": 0.8392592592592593,
      "grad_norm": 2.907285451889038,
      "learning_rate": 3.2320237212750184e-05,
      "loss": 0.9833,
      "step": 5665
    },
    {
      "epoch": 0.8394074074074074,
      "grad_norm": 1.2558186054229736,
      "learning_rate": 3.229058561897702e-05,
      "loss": 0.8794,
      "step": 5666
    },
    {
      "epoch": 0.8395555555555556,
      "grad_norm": 2.4155330657958984,
      "learning_rate": 3.226093402520385e-05,
      "loss": 0.9888,
      "step": 5667
    },
    {
      "epoch": 0.8397037037037037,
      "grad_norm": 1.5688879489898682,
      "learning_rate": 3.223128243143069e-05,
      "loss": 1.0269,
      "step": 5668
    },
    {
      "epoch": 0.8398518518518518,
      "grad_norm": 3.373100757598877,
      "learning_rate": 3.220163083765753e-05,
      "loss": 1.1238,
      "step": 5669
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8652637004852295,
      "learning_rate": 3.217197924388436e-05,
      "loss": 0.9321,
      "step": 5670
    },
    {
      "epoch": 0.8401481481481482,
      "grad_norm": 1.1480289697647095,
      "learning_rate": 3.214232765011119e-05,
      "loss": 0.9629,
      "step": 5671
    },
    {
      "epoch": 0.8402962962962963,
      "grad_norm": 1.8617579936981201,
      "learning_rate": 3.2112676056338034e-05,
      "loss": 1.0839,
      "step": 5672
    },
    {
      "epoch": 0.8404444444444444,
      "grad_norm": 1.8071740865707397,
      "learning_rate": 3.2083024462564865e-05,
      "loss": 1.2537,
      "step": 5673
    },
    {
      "epoch": 0.8405925925925926,
      "grad_norm": 2.2916259765625,
      "learning_rate": 3.2053372868791696e-05,
      "loss": 1.0515,
      "step": 5674
    },
    {
      "epoch": 0.8407407407407408,
      "grad_norm": 2.1010496616363525,
      "learning_rate": 3.2023721275018534e-05,
      "loss": 1.1199,
      "step": 5675
    },
    {
      "epoch": 0.8408888888888889,
      "grad_norm": 1.3914610147476196,
      "learning_rate": 3.199406968124537e-05,
      "loss": 1.0279,
      "step": 5676
    },
    {
      "epoch": 0.841037037037037,
      "grad_norm": 1.1546776294708252,
      "learning_rate": 3.19644180874722e-05,
      "loss": 0.8512,
      "step": 5677
    },
    {
      "epoch": 0.8411851851851851,
      "grad_norm": 1.5754714012145996,
      "learning_rate": 3.193476649369903e-05,
      "loss": 1.0605,
      "step": 5678
    },
    {
      "epoch": 0.8413333333333334,
      "grad_norm": 1.2197717428207397,
      "learning_rate": 3.190511489992587e-05,
      "loss": 0.8575,
      "step": 5679
    },
    {
      "epoch": 0.8414814814814815,
      "grad_norm": 1.5923357009887695,
      "learning_rate": 3.187546330615271e-05,
      "loss": 0.8947,
      "step": 5680
    },
    {
      "epoch": 0.8416296296296296,
      "grad_norm": 1.3048105239868164,
      "learning_rate": 3.184581171237954e-05,
      "loss": 1.2803,
      "step": 5681
    },
    {
      "epoch": 0.8417777777777777,
      "grad_norm": 4.217629909515381,
      "learning_rate": 3.181616011860638e-05,
      "loss": 1.2179,
      "step": 5682
    },
    {
      "epoch": 0.841925925925926,
      "grad_norm": 1.3836694955825806,
      "learning_rate": 3.1786508524833215e-05,
      "loss": 0.7614,
      "step": 5683
    },
    {
      "epoch": 0.8420740740740741,
      "grad_norm": 1.9527873992919922,
      "learning_rate": 3.1756856931060046e-05,
      "loss": 1.1067,
      "step": 5684
    },
    {
      "epoch": 0.8422222222222222,
      "grad_norm": 1.5900622606277466,
      "learning_rate": 3.1727205337286884e-05,
      "loss": 1.0342,
      "step": 5685
    },
    {
      "epoch": 0.8423703703703703,
      "grad_norm": 1.4586294889450073,
      "learning_rate": 3.1697553743513714e-05,
      "loss": 0.9812,
      "step": 5686
    },
    {
      "epoch": 0.8425185185185186,
      "grad_norm": 1.678809642791748,
      "learning_rate": 3.166790214974055e-05,
      "loss": 1.04,
      "step": 5687
    },
    {
      "epoch": 0.8426666666666667,
      "grad_norm": 1.2695871591567993,
      "learning_rate": 3.163825055596738e-05,
      "loss": 0.836,
      "step": 5688
    },
    {
      "epoch": 0.8428148148148148,
      "grad_norm": 1.399864912033081,
      "learning_rate": 3.160859896219422e-05,
      "loss": 0.8723,
      "step": 5689
    },
    {
      "epoch": 0.8429629629629629,
      "grad_norm": 1.1795293092727661,
      "learning_rate": 3.157894736842105e-05,
      "loss": 1.058,
      "step": 5690
    },
    {
      "epoch": 0.8431111111111111,
      "grad_norm": 1.6496189832687378,
      "learning_rate": 3.154929577464789e-05,
      "loss": 1.133,
      "step": 5691
    },
    {
      "epoch": 0.8432592592592593,
      "grad_norm": 1.6981966495513916,
      "learning_rate": 3.151964418087473e-05,
      "loss": 0.8794,
      "step": 5692
    },
    {
      "epoch": 0.8434074074074074,
      "grad_norm": 1.2521898746490479,
      "learning_rate": 3.148999258710156e-05,
      "loss": 0.9448,
      "step": 5693
    },
    {
      "epoch": 0.8435555555555555,
      "grad_norm": 1.4220592975616455,
      "learning_rate": 3.146034099332839e-05,
      "loss": 1.217,
      "step": 5694
    },
    {
      "epoch": 0.8437037037037037,
      "grad_norm": 4.723443508148193,
      "learning_rate": 3.1430689399555227e-05,
      "loss": 0.9995,
      "step": 5695
    },
    {
      "epoch": 0.8438518518518519,
      "grad_norm": 2.358628988265991,
      "learning_rate": 3.1401037805782064e-05,
      "loss": 1.1478,
      "step": 5696
    },
    {
      "epoch": 0.844,
      "grad_norm": 1.4681414365768433,
      "learning_rate": 3.1371386212008895e-05,
      "loss": 1.104,
      "step": 5697
    },
    {
      "epoch": 0.8441481481481482,
      "grad_norm": 1.7765322923660278,
      "learning_rate": 3.134173461823573e-05,
      "loss": 1.0113,
      "step": 5698
    },
    {
      "epoch": 0.8442962962962963,
      "grad_norm": 1.6960357427597046,
      "learning_rate": 3.131208302446257e-05,
      "loss": 1.0569,
      "step": 5699
    },
    {
      "epoch": 0.8444444444444444,
      "grad_norm": 3.481264591217041,
      "learning_rate": 3.12824314306894e-05,
      "loss": 0.9691,
      "step": 5700
    },
    {
      "epoch": 0.8445925925925926,
      "grad_norm": 1.8465306758880615,
      "learning_rate": 3.125277983691623e-05,
      "loss": 0.9778,
      "step": 5701
    },
    {
      "epoch": 0.8447407407407408,
      "grad_norm": 1.3541700839996338,
      "learning_rate": 3.122312824314307e-05,
      "loss": 1.0824,
      "step": 5702
    },
    {
      "epoch": 0.8448888888888889,
      "grad_norm": 4.360558986663818,
      "learning_rate": 3.119347664936991e-05,
      "loss": 0.8039,
      "step": 5703
    },
    {
      "epoch": 0.845037037037037,
      "grad_norm": 1.89080810546875,
      "learning_rate": 3.116382505559674e-05,
      "loss": 0.9669,
      "step": 5704
    },
    {
      "epoch": 0.8451851851851852,
      "grad_norm": 1.776655673980713,
      "learning_rate": 3.1134173461823576e-05,
      "loss": 1.0633,
      "step": 5705
    },
    {
      "epoch": 0.8453333333333334,
      "grad_norm": 1.923953890800476,
      "learning_rate": 3.110452186805041e-05,
      "loss": 1.054,
      "step": 5706
    },
    {
      "epoch": 0.8454814814814815,
      "grad_norm": 3.0366568565368652,
      "learning_rate": 3.1074870274277245e-05,
      "loss": 1.03,
      "step": 5707
    },
    {
      "epoch": 0.8456296296296296,
      "grad_norm": 1.6078895330429077,
      "learning_rate": 3.1045218680504076e-05,
      "loss": 1.0269,
      "step": 5708
    },
    {
      "epoch": 0.8457777777777777,
      "grad_norm": 3.041707992553711,
      "learning_rate": 3.1015567086730913e-05,
      "loss": 1.0908,
      "step": 5709
    },
    {
      "epoch": 0.845925925925926,
      "grad_norm": 9.95754623413086,
      "learning_rate": 3.0985915492957744e-05,
      "loss": 1.151,
      "step": 5710
    },
    {
      "epoch": 0.8460740740740741,
      "grad_norm": 1.799899935722351,
      "learning_rate": 3.095626389918458e-05,
      "loss": 1.1803,
      "step": 5711
    },
    {
      "epoch": 0.8462222222222222,
      "grad_norm": 1.8792227506637573,
      "learning_rate": 3.092661230541142e-05,
      "loss": 0.8073,
      "step": 5712
    },
    {
      "epoch": 0.8463703703703703,
      "grad_norm": 1.6597542762756348,
      "learning_rate": 3.089696071163825e-05,
      "loss": 1.1491,
      "step": 5713
    },
    {
      "epoch": 0.8465185185185186,
      "grad_norm": 2.065408945083618,
      "learning_rate": 3.086730911786508e-05,
      "loss": 0.9161,
      "step": 5714
    },
    {
      "epoch": 0.8466666666666667,
      "grad_norm": 2.674891948699951,
      "learning_rate": 3.0837657524091926e-05,
      "loss": 1.0146,
      "step": 5715
    },
    {
      "epoch": 0.8468148148148148,
      "grad_norm": 1.1968002319335938,
      "learning_rate": 3.080800593031876e-05,
      "loss": 0.9344,
      "step": 5716
    },
    {
      "epoch": 0.8469629629629629,
      "grad_norm": 1.9078673124313354,
      "learning_rate": 3.077835433654559e-05,
      "loss": 1.2448,
      "step": 5717
    },
    {
      "epoch": 0.8471111111111111,
      "grad_norm": 1.4666813611984253,
      "learning_rate": 3.0748702742772425e-05,
      "loss": 1.085,
      "step": 5718
    },
    {
      "epoch": 0.8472592592592593,
      "grad_norm": 1.6281707286834717,
      "learning_rate": 3.071905114899926e-05,
      "loss": 1.2932,
      "step": 5719
    },
    {
      "epoch": 0.8474074074074074,
      "grad_norm": 2.0884618759155273,
      "learning_rate": 3.0689399555226094e-05,
      "loss": 1.0515,
      "step": 5720
    },
    {
      "epoch": 0.8475555555555555,
      "grad_norm": 1.8514748811721802,
      "learning_rate": 3.0659747961452925e-05,
      "loss": 0.9292,
      "step": 5721
    },
    {
      "epoch": 0.8477037037037037,
      "grad_norm": 1.4120756387710571,
      "learning_rate": 3.063009636767977e-05,
      "loss": 0.9051,
      "step": 5722
    },
    {
      "epoch": 0.8478518518518519,
      "grad_norm": 1.574057936668396,
      "learning_rate": 3.06004447739066e-05,
      "loss": 0.9985,
      "step": 5723
    },
    {
      "epoch": 0.848,
      "grad_norm": 1.3220384120941162,
      "learning_rate": 3.057079318013343e-05,
      "loss": 0.9563,
      "step": 5724
    },
    {
      "epoch": 0.8481481481481481,
      "grad_norm": 1.1498291492462158,
      "learning_rate": 3.054114158636027e-05,
      "loss": 0.8015,
      "step": 5725
    },
    {
      "epoch": 0.8482962962962963,
      "grad_norm": 1.3494188785552979,
      "learning_rate": 3.0511489992587107e-05,
      "loss": 1.2001,
      "step": 5726
    },
    {
      "epoch": 0.8484444444444444,
      "grad_norm": 2.747594118118286,
      "learning_rate": 3.0481838398813938e-05,
      "loss": 1.2624,
      "step": 5727
    },
    {
      "epoch": 0.8485925925925926,
      "grad_norm": 2.4447944164276123,
      "learning_rate": 3.0452186805040772e-05,
      "loss": 1.0625,
      "step": 5728
    },
    {
      "epoch": 0.8487407407407408,
      "grad_norm": 1.318755865097046,
      "learning_rate": 3.0422535211267606e-05,
      "loss": 0.952,
      "step": 5729
    },
    {
      "epoch": 0.8488888888888889,
      "grad_norm": 1.4050874710083008,
      "learning_rate": 3.0392883617494444e-05,
      "loss": 1.0785,
      "step": 5730
    },
    {
      "epoch": 0.849037037037037,
      "grad_norm": 1.4720213413238525,
      "learning_rate": 3.0363232023721278e-05,
      "loss": 0.8125,
      "step": 5731
    },
    {
      "epoch": 0.8491851851851852,
      "grad_norm": 1.9184999465942383,
      "learning_rate": 3.0333580429948112e-05,
      "loss": 0.934,
      "step": 5732
    },
    {
      "epoch": 0.8493333333333334,
      "grad_norm": 1.6016595363616943,
      "learning_rate": 3.0303928836174943e-05,
      "loss": 1.0236,
      "step": 5733
    },
    {
      "epoch": 0.8494814814814815,
      "grad_norm": 1.7323766946792603,
      "learning_rate": 3.0274277242401784e-05,
      "loss": 0.8992,
      "step": 5734
    },
    {
      "epoch": 0.8496296296296296,
      "grad_norm": 1.5857726335525513,
      "learning_rate": 3.0244625648628615e-05,
      "loss": 0.9172,
      "step": 5735
    },
    {
      "epoch": 0.8497777777777777,
      "grad_norm": 1.6110647916793823,
      "learning_rate": 3.021497405485545e-05,
      "loss": 1.0478,
      "step": 5736
    },
    {
      "epoch": 0.849925925925926,
      "grad_norm": 1.5153177976608276,
      "learning_rate": 3.0185322461082284e-05,
      "loss": 0.8538,
      "step": 5737
    },
    {
      "epoch": 0.8500740740740741,
      "grad_norm": 1.643136739730835,
      "learning_rate": 3.015567086730912e-05,
      "loss": 1.0162,
      "step": 5738
    },
    {
      "epoch": 0.8502222222222222,
      "grad_norm": 1.9467730522155762,
      "learning_rate": 3.0126019273535956e-05,
      "loss": 1.2033,
      "step": 5739
    },
    {
      "epoch": 0.8503703703703703,
      "grad_norm": 1.8516819477081299,
      "learning_rate": 3.0096367679762787e-05,
      "loss": 1.0439,
      "step": 5740
    },
    {
      "epoch": 0.8505185185185186,
      "grad_norm": 1.1998674869537354,
      "learning_rate": 3.006671608598962e-05,
      "loss": 0.8788,
      "step": 5741
    },
    {
      "epoch": 0.8506666666666667,
      "grad_norm": 1.9696849584579468,
      "learning_rate": 3.003706449221646e-05,
      "loss": 0.9554,
      "step": 5742
    },
    {
      "epoch": 0.8508148148148148,
      "grad_norm": 1.493667483329773,
      "learning_rate": 3.0007412898443293e-05,
      "loss": 1.047,
      "step": 5743
    },
    {
      "epoch": 0.8509629629629629,
      "grad_norm": 1.3543988466262817,
      "learning_rate": 2.9977761304670127e-05,
      "loss": 0.9608,
      "step": 5744
    },
    {
      "epoch": 0.8511111111111112,
      "grad_norm": 1.2830764055252075,
      "learning_rate": 2.994810971089696e-05,
      "loss": 0.9455,
      "step": 5745
    },
    {
      "epoch": 0.8512592592592593,
      "grad_norm": 2.551621675491333,
      "learning_rate": 2.99184581171238e-05,
      "loss": 1.0836,
      "step": 5746
    },
    {
      "epoch": 0.8514074074074074,
      "grad_norm": 1.251689076423645,
      "learning_rate": 2.9888806523350634e-05,
      "loss": 1.2618,
      "step": 5747
    },
    {
      "epoch": 0.8515555555555555,
      "grad_norm": 2.0711042881011963,
      "learning_rate": 2.9859154929577465e-05,
      "loss": 1.0205,
      "step": 5748
    },
    {
      "epoch": 0.8517037037037037,
      "grad_norm": 9.725003242492676,
      "learning_rate": 2.98295033358043e-05,
      "loss": 0.9207,
      "step": 5749
    },
    {
      "epoch": 0.8518518518518519,
      "grad_norm": 1.4738051891326904,
      "learning_rate": 2.9799851742031136e-05,
      "loss": 0.9769,
      "step": 5750
    },
    {
      "epoch": 0.852,
      "grad_norm": 1.4541064500808716,
      "learning_rate": 2.977020014825797e-05,
      "loss": 1.1133,
      "step": 5751
    },
    {
      "epoch": 0.8521481481481481,
      "grad_norm": 1.4564684629440308,
      "learning_rate": 2.9740548554484805e-05,
      "loss": 1.2501,
      "step": 5752
    },
    {
      "epoch": 0.8522962962962963,
      "grad_norm": 2.8018648624420166,
      "learning_rate": 2.9710896960711636e-05,
      "loss": 0.8952,
      "step": 5753
    },
    {
      "epoch": 0.8524444444444444,
      "grad_norm": 4.907220840454102,
      "learning_rate": 2.9681245366938477e-05,
      "loss": 0.8738,
      "step": 5754
    },
    {
      "epoch": 0.8525925925925926,
      "grad_norm": 1.850071907043457,
      "learning_rate": 2.9651593773165308e-05,
      "loss": 1.0036,
      "step": 5755
    },
    {
      "epoch": 0.8527407407407407,
      "grad_norm": 1.1384518146514893,
      "learning_rate": 2.9621942179392142e-05,
      "loss": 1.1831,
      "step": 5756
    },
    {
      "epoch": 0.8528888888888889,
      "grad_norm": 1.965689778327942,
      "learning_rate": 2.9592290585618977e-05,
      "loss": 1.1209,
      "step": 5757
    },
    {
      "epoch": 0.853037037037037,
      "grad_norm": 1.3774926662445068,
      "learning_rate": 2.9562638991845814e-05,
      "loss": 0.8957,
      "step": 5758
    },
    {
      "epoch": 0.8531851851851852,
      "grad_norm": 2.0560662746429443,
      "learning_rate": 2.953298739807265e-05,
      "loss": 0.8915,
      "step": 5759
    },
    {
      "epoch": 0.8533333333333334,
      "grad_norm": 1.6404497623443604,
      "learning_rate": 2.9503335804299483e-05,
      "loss": 1.0045,
      "step": 5760
    },
    {
      "epoch": 0.8534814814814815,
      "grad_norm": 2.8091065883636475,
      "learning_rate": 2.9473684210526314e-05,
      "loss": 1.231,
      "step": 5761
    },
    {
      "epoch": 0.8536296296296296,
      "grad_norm": 1.8522920608520508,
      "learning_rate": 2.9444032616753155e-05,
      "loss": 0.8325,
      "step": 5762
    },
    {
      "epoch": 0.8537777777777777,
      "grad_norm": 1.9119120836257935,
      "learning_rate": 2.9414381022979986e-05,
      "loss": 1.1189,
      "step": 5763
    },
    {
      "epoch": 0.853925925925926,
      "grad_norm": 2.7033612728118896,
      "learning_rate": 2.938472942920682e-05,
      "loss": 0.9057,
      "step": 5764
    },
    {
      "epoch": 0.8540740740740741,
      "grad_norm": 2.591933012008667,
      "learning_rate": 2.9355077835433658e-05,
      "loss": 0.9156,
      "step": 5765
    },
    {
      "epoch": 0.8542222222222222,
      "grad_norm": 1.5084139108657837,
      "learning_rate": 2.9325426241660492e-05,
      "loss": 0.7997,
      "step": 5766
    },
    {
      "epoch": 0.8543703703703703,
      "grad_norm": 1.986857295036316,
      "learning_rate": 2.9295774647887326e-05,
      "loss": 1.1218,
      "step": 5767
    },
    {
      "epoch": 0.8545185185185186,
      "grad_norm": 1.9872719049453735,
      "learning_rate": 2.9266123054114157e-05,
      "loss": 0.936,
      "step": 5768
    },
    {
      "epoch": 0.8546666666666667,
      "grad_norm": 1.8185993432998657,
      "learning_rate": 2.9236471460340998e-05,
      "loss": 0.9352,
      "step": 5769
    },
    {
      "epoch": 0.8548148148148148,
      "grad_norm": 1.3699370622634888,
      "learning_rate": 2.920681986656783e-05,
      "loss": 1.0854,
      "step": 5770
    },
    {
      "epoch": 0.8549629629629629,
      "grad_norm": 1.5002169609069824,
      "learning_rate": 2.9177168272794663e-05,
      "loss": 1.1115,
      "step": 5771
    },
    {
      "epoch": 0.8551111111111112,
      "grad_norm": 1.9244588613510132,
      "learning_rate": 2.9147516679021498e-05,
      "loss": 0.9015,
      "step": 5772
    },
    {
      "epoch": 0.8552592592592593,
      "grad_norm": 1.6107194423675537,
      "learning_rate": 2.9117865085248335e-05,
      "loss": 1.0424,
      "step": 5773
    },
    {
      "epoch": 0.8554074074074074,
      "grad_norm": 1.9779789447784424,
      "learning_rate": 2.908821349147517e-05,
      "loss": 1.0823,
      "step": 5774
    },
    {
      "epoch": 0.8555555555555555,
      "grad_norm": 1.4399076700210571,
      "learning_rate": 2.9058561897702004e-05,
      "loss": 1.0582,
      "step": 5775
    },
    {
      "epoch": 0.8557037037037037,
      "grad_norm": 1.1391663551330566,
      "learning_rate": 2.9028910303928835e-05,
      "loss": 0.9975,
      "step": 5776
    },
    {
      "epoch": 0.8558518518518519,
      "grad_norm": 1.6162272691726685,
      "learning_rate": 2.8999258710155676e-05,
      "loss": 0.6114,
      "step": 5777
    },
    {
      "epoch": 0.856,
      "grad_norm": 1.2094829082489014,
      "learning_rate": 2.8969607116382507e-05,
      "loss": 1.0004,
      "step": 5778
    },
    {
      "epoch": 0.8561481481481481,
      "grad_norm": 1.7080848217010498,
      "learning_rate": 2.893995552260934e-05,
      "loss": 1.1552,
      "step": 5779
    },
    {
      "epoch": 0.8562962962962963,
      "grad_norm": 3.8012313842773438,
      "learning_rate": 2.8910303928836176e-05,
      "loss": 0.909,
      "step": 5780
    },
    {
      "epoch": 0.8564444444444445,
      "grad_norm": 2.1891396045684814,
      "learning_rate": 2.8880652335063013e-05,
      "loss": 0.7945,
      "step": 5781
    },
    {
      "epoch": 0.8565925925925926,
      "grad_norm": 3.9642629623413086,
      "learning_rate": 2.8851000741289848e-05,
      "loss": 0.9749,
      "step": 5782
    },
    {
      "epoch": 0.8567407407407407,
      "grad_norm": 1.8733007907867432,
      "learning_rate": 2.882134914751668e-05,
      "loss": 1.1285,
      "step": 5783
    },
    {
      "epoch": 0.8568888888888889,
      "grad_norm": 3.807276964187622,
      "learning_rate": 2.8791697553743513e-05,
      "loss": 1.1222,
      "step": 5784
    },
    {
      "epoch": 0.857037037037037,
      "grad_norm": 2.5071189403533936,
      "learning_rate": 2.876204595997035e-05,
      "loss": 1.2052,
      "step": 5785
    },
    {
      "epoch": 0.8571851851851852,
      "grad_norm": 1.5023587942123413,
      "learning_rate": 2.8732394366197185e-05,
      "loss": 0.981,
      "step": 5786
    },
    {
      "epoch": 0.8573333333333333,
      "grad_norm": 1.3831145763397217,
      "learning_rate": 2.870274277242402e-05,
      "loss": 0.9075,
      "step": 5787
    },
    {
      "epoch": 0.8574814814814815,
      "grad_norm": 1.405256748199463,
      "learning_rate": 2.8673091178650853e-05,
      "loss": 1.1884,
      "step": 5788
    },
    {
      "epoch": 0.8576296296296296,
      "grad_norm": 1.0629494190216064,
      "learning_rate": 2.864343958487769e-05,
      "loss": 0.8273,
      "step": 5789
    },
    {
      "epoch": 0.8577777777777778,
      "grad_norm": 1.526007056236267,
      "learning_rate": 2.8613787991104525e-05,
      "loss": 0.8813,
      "step": 5790
    },
    {
      "epoch": 0.857925925925926,
      "grad_norm": 2.1729397773742676,
      "learning_rate": 2.8584136397331356e-05,
      "loss": 1.1614,
      "step": 5791
    },
    {
      "epoch": 0.8580740740740741,
      "grad_norm": 1.637261152267456,
      "learning_rate": 2.855448480355819e-05,
      "loss": 0.9632,
      "step": 5792
    },
    {
      "epoch": 0.8582222222222222,
      "grad_norm": 1.7835174798965454,
      "learning_rate": 2.8524833209785028e-05,
      "loss": 0.7737,
      "step": 5793
    },
    {
      "epoch": 0.8583703703703703,
      "grad_norm": 1.6379073858261108,
      "learning_rate": 2.8495181616011862e-05,
      "loss": 1.1138,
      "step": 5794
    },
    {
      "epoch": 0.8585185185185186,
      "grad_norm": 1.29437255859375,
      "learning_rate": 2.8465530022238697e-05,
      "loss": 0.9225,
      "step": 5795
    },
    {
      "epoch": 0.8586666666666667,
      "grad_norm": 1.3875452280044556,
      "learning_rate": 2.8435878428465528e-05,
      "loss": 1.1082,
      "step": 5796
    },
    {
      "epoch": 0.8588148148148148,
      "grad_norm": 2.2187299728393555,
      "learning_rate": 2.840622683469237e-05,
      "loss": 1.4328,
      "step": 5797
    },
    {
      "epoch": 0.8589629629629629,
      "grad_norm": 1.1596548557281494,
      "learning_rate": 2.83765752409192e-05,
      "loss": 0.9621,
      "step": 5798
    },
    {
      "epoch": 0.8591111111111112,
      "grad_norm": 1.5259015560150146,
      "learning_rate": 2.8346923647146034e-05,
      "loss": 0.9985,
      "step": 5799
    },
    {
      "epoch": 0.8592592592592593,
      "grad_norm": 1.3542827367782593,
      "learning_rate": 2.8317272053372868e-05,
      "loss": 0.8894,
      "step": 5800
    },
    {
      "epoch": 0.8594074074074074,
      "grad_norm": 3.5634617805480957,
      "learning_rate": 2.8287620459599706e-05,
      "loss": 1.1314,
      "step": 5801
    },
    {
      "epoch": 0.8595555555555555,
      "grad_norm": 1.1892551183700562,
      "learning_rate": 2.825796886582654e-05,
      "loss": 0.994,
      "step": 5802
    },
    {
      "epoch": 0.8597037037037037,
      "grad_norm": 2.7257864475250244,
      "learning_rate": 2.8228317272053374e-05,
      "loss": 1.0517,
      "step": 5803
    },
    {
      "epoch": 0.8598518518518519,
      "grad_norm": 1.1691776514053345,
      "learning_rate": 2.8198665678280212e-05,
      "loss": 1.0076,
      "step": 5804
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0199103355407715,
      "learning_rate": 2.8169014084507046e-05,
      "loss": 0.8931,
      "step": 5805
    },
    {
      "epoch": 0.8601481481481481,
      "grad_norm": 2.6088061332702637,
      "learning_rate": 2.8139362490733877e-05,
      "loss": 1.0654,
      "step": 5806
    },
    {
      "epoch": 0.8602962962962963,
      "grad_norm": 2.0341060161590576,
      "learning_rate": 2.810971089696071e-05,
      "loss": 1.0152,
      "step": 5807
    },
    {
      "epoch": 0.8604444444444445,
      "grad_norm": 1.8954989910125732,
      "learning_rate": 2.808005930318755e-05,
      "loss": 1.0351,
      "step": 5808
    },
    {
      "epoch": 0.8605925925925926,
      "grad_norm": 1.435793161392212,
      "learning_rate": 2.8050407709414384e-05,
      "loss": 1.1166,
      "step": 5809
    },
    {
      "epoch": 0.8607407407407407,
      "grad_norm": 1.5432322025299072,
      "learning_rate": 2.8020756115641218e-05,
      "loss": 0.9407,
      "step": 5810
    },
    {
      "epoch": 0.8608888888888889,
      "grad_norm": 1.739850640296936,
      "learning_rate": 2.799110452186805e-05,
      "loss": 0.9118,
      "step": 5811
    },
    {
      "epoch": 0.861037037037037,
      "grad_norm": 2.085766553878784,
      "learning_rate": 2.796145292809489e-05,
      "loss": 1.0833,
      "step": 5812
    },
    {
      "epoch": 0.8611851851851852,
      "grad_norm": 1.909110188484192,
      "learning_rate": 2.793180133432172e-05,
      "loss": 1.1908,
      "step": 5813
    },
    {
      "epoch": 0.8613333333333333,
      "grad_norm": 1.3825736045837402,
      "learning_rate": 2.7902149740548555e-05,
      "loss": 0.9794,
      "step": 5814
    },
    {
      "epoch": 0.8614814814814815,
      "grad_norm": 1.6004115343093872,
      "learning_rate": 2.787249814677539e-05,
      "loss": 0.8425,
      "step": 5815
    },
    {
      "epoch": 0.8616296296296296,
      "grad_norm": 1.5785696506500244,
      "learning_rate": 2.7842846553002227e-05,
      "loss": 0.7433,
      "step": 5816
    },
    {
      "epoch": 0.8617777777777778,
      "grad_norm": 2.8138880729675293,
      "learning_rate": 2.781319495922906e-05,
      "loss": 1.0487,
      "step": 5817
    },
    {
      "epoch": 0.8619259259259259,
      "grad_norm": 6.772069931030273,
      "learning_rate": 2.7783543365455896e-05,
      "loss": 1.1717,
      "step": 5818
    },
    {
      "epoch": 0.8620740740740741,
      "grad_norm": 1.6086390018463135,
      "learning_rate": 2.7753891771682727e-05,
      "loss": 1.0606,
      "step": 5819
    },
    {
      "epoch": 0.8622222222222222,
      "grad_norm": 3.6303298473358154,
      "learning_rate": 2.7724240177909568e-05,
      "loss": 0.8553,
      "step": 5820
    },
    {
      "epoch": 0.8623703703703703,
      "grad_norm": 1.7301232814788818,
      "learning_rate": 2.76945885841364e-05,
      "loss": 0.9192,
      "step": 5821
    },
    {
      "epoch": 0.8625185185185186,
      "grad_norm": 1.4979515075683594,
      "learning_rate": 2.7664936990363233e-05,
      "loss": 1.1056,
      "step": 5822
    },
    {
      "epoch": 0.8626666666666667,
      "grad_norm": 3.1390674114227295,
      "learning_rate": 2.7635285396590067e-05,
      "loss": 0.9098,
      "step": 5823
    },
    {
      "epoch": 0.8628148148148148,
      "grad_norm": 1.3511881828308105,
      "learning_rate": 2.7605633802816905e-05,
      "loss": 0.7656,
      "step": 5824
    },
    {
      "epoch": 0.8629629629629629,
      "grad_norm": 1.104941964149475,
      "learning_rate": 2.757598220904374e-05,
      "loss": 0.9358,
      "step": 5825
    },
    {
      "epoch": 0.8631111111111112,
      "grad_norm": 2.2065224647521973,
      "learning_rate": 2.754633061527057e-05,
      "loss": 1.312,
      "step": 5826
    },
    {
      "epoch": 0.8632592592592593,
      "grad_norm": 1.4837945699691772,
      "learning_rate": 2.7516679021497404e-05,
      "loss": 1.1161,
      "step": 5827
    },
    {
      "epoch": 0.8634074074074074,
      "grad_norm": 1.9617424011230469,
      "learning_rate": 2.7487027427724242e-05,
      "loss": 0.8527,
      "step": 5828
    },
    {
      "epoch": 0.8635555555555555,
      "grad_norm": 1.9595869779586792,
      "learning_rate": 2.7457375833951076e-05,
      "loss": 0.9336,
      "step": 5829
    },
    {
      "epoch": 0.8637037037037038,
      "grad_norm": 2.233665704727173,
      "learning_rate": 2.742772424017791e-05,
      "loss": 0.8778,
      "step": 5830
    },
    {
      "epoch": 0.8638518518518519,
      "grad_norm": 2.3654582500457764,
      "learning_rate": 2.7398072646404745e-05,
      "loss": 0.753,
      "step": 5831
    },
    {
      "epoch": 0.864,
      "grad_norm": 1.868675708770752,
      "learning_rate": 2.7368421052631583e-05,
      "loss": 0.9666,
      "step": 5832
    },
    {
      "epoch": 0.8641481481481481,
      "grad_norm": 2.269033193588257,
      "learning_rate": 2.7338769458858417e-05,
      "loss": 1.0268,
      "step": 5833
    },
    {
      "epoch": 0.8642962962962963,
      "grad_norm": 1.8759225606918335,
      "learning_rate": 2.7309117865085248e-05,
      "loss": 0.8583,
      "step": 5834
    },
    {
      "epoch": 0.8644444444444445,
      "grad_norm": 1.870804786682129,
      "learning_rate": 2.7279466271312082e-05,
      "loss": 1.0527,
      "step": 5835
    },
    {
      "epoch": 0.8645925925925926,
      "grad_norm": 1.6852681636810303,
      "learning_rate": 2.724981467753892e-05,
      "loss": 0.9133,
      "step": 5836
    },
    {
      "epoch": 0.8647407407407407,
      "grad_norm": 2.536195755004883,
      "learning_rate": 2.7220163083765754e-05,
      "loss": 1.1145,
      "step": 5837
    },
    {
      "epoch": 0.8648888888888889,
      "grad_norm": 2.8536055088043213,
      "learning_rate": 2.719051148999259e-05,
      "loss": 1.2185,
      "step": 5838
    },
    {
      "epoch": 0.865037037037037,
      "grad_norm": 2.1045289039611816,
      "learning_rate": 2.716085989621942e-05,
      "loss": 1.0956,
      "step": 5839
    },
    {
      "epoch": 0.8651851851851852,
      "grad_norm": 1.3994905948638916,
      "learning_rate": 2.713120830244626e-05,
      "loss": 0.9142,
      "step": 5840
    },
    {
      "epoch": 0.8653333333333333,
      "grad_norm": 1.5658845901489258,
      "learning_rate": 2.710155670867309e-05,
      "loss": 0.8722,
      "step": 5841
    },
    {
      "epoch": 0.8654814814814815,
      "grad_norm": 1.4372714757919312,
      "learning_rate": 2.7071905114899926e-05,
      "loss": 1.0431,
      "step": 5842
    },
    {
      "epoch": 0.8656296296296296,
      "grad_norm": 1.67061185836792,
      "learning_rate": 2.704225352112676e-05,
      "loss": 1.0053,
      "step": 5843
    },
    {
      "epoch": 0.8657777777777778,
      "grad_norm": 1.4704614877700806,
      "learning_rate": 2.7012601927353598e-05,
      "loss": 1.0393,
      "step": 5844
    },
    {
      "epoch": 0.8659259259259259,
      "grad_norm": 1.3923258781433105,
      "learning_rate": 2.6982950333580432e-05,
      "loss": 0.6711,
      "step": 5845
    },
    {
      "epoch": 0.8660740740740741,
      "grad_norm": 1.9425890445709229,
      "learning_rate": 2.6953298739807266e-05,
      "loss": 0.9177,
      "step": 5846
    },
    {
      "epoch": 0.8662222222222222,
      "grad_norm": 1.7985904216766357,
      "learning_rate": 2.6923647146034104e-05,
      "loss": 1.1153,
      "step": 5847
    },
    {
      "epoch": 0.8663703703703703,
      "grad_norm": 2.1547863483428955,
      "learning_rate": 2.6893995552260938e-05,
      "loss": 1.1189,
      "step": 5848
    },
    {
      "epoch": 0.8665185185185185,
      "grad_norm": 2.150320291519165,
      "learning_rate": 2.686434395848777e-05,
      "loss": 0.8659,
      "step": 5849
    },
    {
      "epoch": 0.8666666666666667,
      "grad_norm": 1.5663039684295654,
      "learning_rate": 2.6834692364714603e-05,
      "loss": 0.9349,
      "step": 5850
    },
    {
      "epoch": 0.8668148148148148,
      "grad_norm": 1.7574503421783447,
      "learning_rate": 2.680504077094144e-05,
      "loss": 1.018,
      "step": 5851
    },
    {
      "epoch": 0.8669629629629629,
      "grad_norm": 1.6053471565246582,
      "learning_rate": 2.6775389177168275e-05,
      "loss": 0.9127,
      "step": 5852
    },
    {
      "epoch": 0.8671111111111112,
      "grad_norm": 1.1548551321029663,
      "learning_rate": 2.674573758339511e-05,
      "loss": 0.9632,
      "step": 5853
    },
    {
      "epoch": 0.8672592592592593,
      "grad_norm": 2.2477433681488037,
      "learning_rate": 2.671608598962194e-05,
      "loss": 1.0174,
      "step": 5854
    },
    {
      "epoch": 0.8674074074074074,
      "grad_norm": 1.5156891345977783,
      "learning_rate": 2.668643439584878e-05,
      "loss": 1.2504,
      "step": 5855
    },
    {
      "epoch": 0.8675555555555555,
      "grad_norm": 1.7786145210266113,
      "learning_rate": 2.6656782802075612e-05,
      "loss": 0.9207,
      "step": 5856
    },
    {
      "epoch": 0.8677037037037038,
      "grad_norm": 1.5490436553955078,
      "learning_rate": 2.6627131208302447e-05,
      "loss": 1.1374,
      "step": 5857
    },
    {
      "epoch": 0.8678518518518519,
      "grad_norm": 2.0994009971618652,
      "learning_rate": 2.659747961452928e-05,
      "loss": 0.818,
      "step": 5858
    },
    {
      "epoch": 0.868,
      "grad_norm": 1.4977202415466309,
      "learning_rate": 2.656782802075612e-05,
      "loss": 0.89,
      "step": 5859
    },
    {
      "epoch": 0.8681481481481481,
      "grad_norm": 2.5819857120513916,
      "learning_rate": 2.6538176426982953e-05,
      "loss": 1.037,
      "step": 5860
    },
    {
      "epoch": 0.8682962962962963,
      "grad_norm": 1.7890233993530273,
      "learning_rate": 2.6508524833209787e-05,
      "loss": 0.8482,
      "step": 5861
    },
    {
      "epoch": 0.8684444444444445,
      "grad_norm": 1.3251336812973022,
      "learning_rate": 2.6478873239436618e-05,
      "loss": 1.1135,
      "step": 5862
    },
    {
      "epoch": 0.8685925925925926,
      "grad_norm": 1.270087718963623,
      "learning_rate": 2.644922164566346e-05,
      "loss": 0.9451,
      "step": 5863
    },
    {
      "epoch": 0.8687407407407407,
      "grad_norm": 2.885849714279175,
      "learning_rate": 2.641957005189029e-05,
      "loss": 0.9745,
      "step": 5864
    },
    {
      "epoch": 0.8688888888888889,
      "grad_norm": 1.460054874420166,
      "learning_rate": 2.6389918458117125e-05,
      "loss": 0.8953,
      "step": 5865
    },
    {
      "epoch": 0.869037037037037,
      "grad_norm": 1.209641695022583,
      "learning_rate": 2.636026686434396e-05,
      "loss": 1.2016,
      "step": 5866
    },
    {
      "epoch": 0.8691851851851852,
      "grad_norm": 1.343348741531372,
      "learning_rate": 2.6330615270570797e-05,
      "loss": 0.9878,
      "step": 5867
    },
    {
      "epoch": 0.8693333333333333,
      "grad_norm": 1.8933838605880737,
      "learning_rate": 2.630096367679763e-05,
      "loss": 0.7703,
      "step": 5868
    },
    {
      "epoch": 0.8694814814814815,
      "grad_norm": 1.4754409790039062,
      "learning_rate": 2.6271312083024462e-05,
      "loss": 1.235,
      "step": 5869
    },
    {
      "epoch": 0.8696296296296296,
      "grad_norm": 1.4059656858444214,
      "learning_rate": 2.6241660489251296e-05,
      "loss": 1.0932,
      "step": 5870
    },
    {
      "epoch": 0.8697777777777778,
      "grad_norm": 1.4099831581115723,
      "learning_rate": 2.6212008895478134e-05,
      "loss": 0.8421,
      "step": 5871
    },
    {
      "epoch": 0.8699259259259259,
      "grad_norm": 1.7192801237106323,
      "learning_rate": 2.6182357301704968e-05,
      "loss": 1.0589,
      "step": 5872
    },
    {
      "epoch": 0.8700740740740741,
      "grad_norm": 1.7932615280151367,
      "learning_rate": 2.6152705707931802e-05,
      "loss": 0.9618,
      "step": 5873
    },
    {
      "epoch": 0.8702222222222222,
      "grad_norm": 1.4482512474060059,
      "learning_rate": 2.6123054114158637e-05,
      "loss": 0.9546,
      "step": 5874
    },
    {
      "epoch": 0.8703703703703703,
      "grad_norm": 1.6058628559112549,
      "learning_rate": 2.6093402520385474e-05,
      "loss": 1.1576,
      "step": 5875
    },
    {
      "epoch": 0.8705185185185185,
      "grad_norm": 1.2644472122192383,
      "learning_rate": 2.606375092661231e-05,
      "loss": 1.1664,
      "step": 5876
    },
    {
      "epoch": 0.8706666666666667,
      "grad_norm": 2.2129993438720703,
      "learning_rate": 2.603409933283914e-05,
      "loss": 0.8426,
      "step": 5877
    },
    {
      "epoch": 0.8708148148148148,
      "grad_norm": 1.7647929191589355,
      "learning_rate": 2.6004447739065974e-05,
      "loss": 0.8964,
      "step": 5878
    },
    {
      "epoch": 0.8709629629629629,
      "grad_norm": 2.011066436767578,
      "learning_rate": 2.597479614529281e-05,
      "loss": 0.9673,
      "step": 5879
    },
    {
      "epoch": 0.8711111111111111,
      "grad_norm": 2.7519032955169678,
      "learning_rate": 2.5945144551519646e-05,
      "loss": 0.8212,
      "step": 5880
    },
    {
      "epoch": 0.8712592592592593,
      "grad_norm": 1.8849031925201416,
      "learning_rate": 2.591549295774648e-05,
      "loss": 1.0938,
      "step": 5881
    },
    {
      "epoch": 0.8714074074074074,
      "grad_norm": 2.9380102157592773,
      "learning_rate": 2.588584136397331e-05,
      "loss": 1.1205,
      "step": 5882
    },
    {
      "epoch": 0.8715555555555555,
      "grad_norm": 1.3827793598175049,
      "learning_rate": 2.5856189770200152e-05,
      "loss": 0.8481,
      "step": 5883
    },
    {
      "epoch": 0.8717037037037038,
      "grad_norm": 1.1526957750320435,
      "learning_rate": 2.5826538176426983e-05,
      "loss": 0.828,
      "step": 5884
    },
    {
      "epoch": 0.8718518518518519,
      "grad_norm": 2.1674742698669434,
      "learning_rate": 2.5796886582653817e-05,
      "loss": 0.9764,
      "step": 5885
    },
    {
      "epoch": 0.872,
      "grad_norm": 3.5550127029418945,
      "learning_rate": 2.576723498888065e-05,
      "loss": 0.9491,
      "step": 5886
    },
    {
      "epoch": 0.8721481481481481,
      "grad_norm": 1.5558884143829346,
      "learning_rate": 2.573758339510749e-05,
      "loss": 1.2718,
      "step": 5887
    },
    {
      "epoch": 0.8722962962962963,
      "grad_norm": 1.6453651189804077,
      "learning_rate": 2.5707931801334323e-05,
      "loss": 0.8926,
      "step": 5888
    },
    {
      "epoch": 0.8724444444444445,
      "grad_norm": 2.0549097061157227,
      "learning_rate": 2.5678280207561158e-05,
      "loss": 1.0412,
      "step": 5889
    },
    {
      "epoch": 0.8725925925925926,
      "grad_norm": 1.3418998718261719,
      "learning_rate": 2.5648628613787995e-05,
      "loss": 0.9795,
      "step": 5890
    },
    {
      "epoch": 0.8727407407407407,
      "grad_norm": 1.3054873943328857,
      "learning_rate": 2.561897702001483e-05,
      "loss": 1.0158,
      "step": 5891
    },
    {
      "epoch": 0.8728888888888889,
      "grad_norm": 1.6010377407073975,
      "learning_rate": 2.558932542624166e-05,
      "loss": 1.0346,
      "step": 5892
    },
    {
      "epoch": 0.8730370370370371,
      "grad_norm": 1.4372097253799438,
      "learning_rate": 2.5559673832468495e-05,
      "loss": 1.0819,
      "step": 5893
    },
    {
      "epoch": 0.8731851851851852,
      "grad_norm": 1.422444462776184,
      "learning_rate": 2.5530022238695333e-05,
      "loss": 0.9573,
      "step": 5894
    },
    {
      "epoch": 0.8733333333333333,
      "grad_norm": 1.6464009284973145,
      "learning_rate": 2.5500370644922167e-05,
      "loss": 1.107,
      "step": 5895
    },
    {
      "epoch": 0.8734814814814815,
      "grad_norm": 1.4371873140335083,
      "learning_rate": 2.5470719051149e-05,
      "loss": 0.9562,
      "step": 5896
    },
    {
      "epoch": 0.8736296296296296,
      "grad_norm": 1.4094585180282593,
      "learning_rate": 2.5441067457375832e-05,
      "loss": 0.9187,
      "step": 5897
    },
    {
      "epoch": 0.8737777777777778,
      "grad_norm": 1.658033013343811,
      "learning_rate": 2.5411415863602673e-05,
      "loss": 1.1806,
      "step": 5898
    },
    {
      "epoch": 0.8739259259259259,
      "grad_norm": 1.6226048469543457,
      "learning_rate": 2.5381764269829504e-05,
      "loss": 0.93,
      "step": 5899
    },
    {
      "epoch": 0.8740740740740741,
      "grad_norm": 1.6565712690353394,
      "learning_rate": 2.535211267605634e-05,
      "loss": 1.1747,
      "step": 5900
    },
    {
      "epoch": 0.8742222222222222,
      "grad_norm": 3.0640833377838135,
      "learning_rate": 2.5322461082283173e-05,
      "loss": 1.0731,
      "step": 5901
    },
    {
      "epoch": 0.8743703703703704,
      "grad_norm": 1.7662317752838135,
      "learning_rate": 2.529280948851001e-05,
      "loss": 1.0931,
      "step": 5902
    },
    {
      "epoch": 0.8745185185185185,
      "grad_norm": 1.674347996711731,
      "learning_rate": 2.5263157894736845e-05,
      "loss": 1.0951,
      "step": 5903
    },
    {
      "epoch": 0.8746666666666667,
      "grad_norm": 1.3502625226974487,
      "learning_rate": 2.523350630096368e-05,
      "loss": 1.0802,
      "step": 5904
    },
    {
      "epoch": 0.8748148148148148,
      "grad_norm": 3.889312267303467,
      "learning_rate": 2.520385470719051e-05,
      "loss": 0.9884,
      "step": 5905
    },
    {
      "epoch": 0.8749629629629629,
      "grad_norm": 1.6618272066116333,
      "learning_rate": 2.517420311341735e-05,
      "loss": 0.9486,
      "step": 5906
    },
    {
      "epoch": 0.8751111111111111,
      "grad_norm": 2.9989893436431885,
      "learning_rate": 2.5144551519644182e-05,
      "loss": 0.9962,
      "step": 5907
    },
    {
      "epoch": 0.8752592592592593,
      "grad_norm": 1.588752269744873,
      "learning_rate": 2.5114899925871016e-05,
      "loss": 0.9569,
      "step": 5908
    },
    {
      "epoch": 0.8754074074074074,
      "grad_norm": 1.2568234205245972,
      "learning_rate": 2.508524833209785e-05,
      "loss": 0.886,
      "step": 5909
    },
    {
      "epoch": 0.8755555555555555,
      "grad_norm": 2.087700128555298,
      "learning_rate": 2.5055596738324688e-05,
      "loss": 0.9015,
      "step": 5910
    },
    {
      "epoch": 0.8757037037037037,
      "grad_norm": 1.795215129852295,
      "learning_rate": 2.5025945144551522e-05,
      "loss": 0.8777,
      "step": 5911
    },
    {
      "epoch": 0.8758518518518519,
      "grad_norm": 1.0963869094848633,
      "learning_rate": 2.4996293550778353e-05,
      "loss": 1.1008,
      "step": 5912
    },
    {
      "epoch": 0.876,
      "grad_norm": 1.1991289854049683,
      "learning_rate": 2.496664195700519e-05,
      "loss": 0.9306,
      "step": 5913
    },
    {
      "epoch": 0.8761481481481481,
      "grad_norm": 1.3601399660110474,
      "learning_rate": 2.4936990363232025e-05,
      "loss": 1.1353,
      "step": 5914
    },
    {
      "epoch": 0.8762962962962964,
      "grad_norm": 1.3399142026901245,
      "learning_rate": 2.490733876945886e-05,
      "loss": 0.8172,
      "step": 5915
    },
    {
      "epoch": 0.8764444444444445,
      "grad_norm": 2.9210901260375977,
      "learning_rate": 2.4877687175685694e-05,
      "loss": 0.868,
      "step": 5916
    },
    {
      "epoch": 0.8765925925925926,
      "grad_norm": 1.5540348291397095,
      "learning_rate": 2.4848035581912528e-05,
      "loss": 1.0308,
      "step": 5917
    },
    {
      "epoch": 0.8767407407407407,
      "grad_norm": 1.3015390634536743,
      "learning_rate": 2.4818383988139363e-05,
      "loss": 0.9215,
      "step": 5918
    },
    {
      "epoch": 0.8768888888888889,
      "grad_norm": 2.2629032135009766,
      "learning_rate": 2.47887323943662e-05,
      "loss": 1.2229,
      "step": 5919
    },
    {
      "epoch": 0.8770370370370371,
      "grad_norm": 2.0165510177612305,
      "learning_rate": 2.475908080059303e-05,
      "loss": 1.0342,
      "step": 5920
    },
    {
      "epoch": 0.8771851851851852,
      "grad_norm": 1.203102946281433,
      "learning_rate": 2.472942920681987e-05,
      "loss": 0.7615,
      "step": 5921
    },
    {
      "epoch": 0.8773333333333333,
      "grad_norm": 1.7544171810150146,
      "learning_rate": 2.46997776130467e-05,
      "loss": 0.9569,
      "step": 5922
    },
    {
      "epoch": 0.8774814814814815,
      "grad_norm": 1.2334192991256714,
      "learning_rate": 2.4670126019273537e-05,
      "loss": 1.0773,
      "step": 5923
    },
    {
      "epoch": 0.8776296296296296,
      "grad_norm": 1.4766578674316406,
      "learning_rate": 2.464047442550037e-05,
      "loss": 0.9939,
      "step": 5924
    },
    {
      "epoch": 0.8777777777777778,
      "grad_norm": 1.7966846227645874,
      "learning_rate": 2.4610822831727206e-05,
      "loss": 1.0282,
      "step": 5925
    },
    {
      "epoch": 0.8779259259259259,
      "grad_norm": 4.310170650482178,
      "learning_rate": 2.458117123795404e-05,
      "loss": 1.1213,
      "step": 5926
    },
    {
      "epoch": 0.8780740740740741,
      "grad_norm": 2.1789815425872803,
      "learning_rate": 2.4551519644180875e-05,
      "loss": 0.927,
      "step": 5927
    },
    {
      "epoch": 0.8782222222222222,
      "grad_norm": 1.471701741218567,
      "learning_rate": 2.4521868050407712e-05,
      "loss": 0.9364,
      "step": 5928
    },
    {
      "epoch": 0.8783703703703704,
      "grad_norm": 1.8487812280654907,
      "learning_rate": 2.4492216456634547e-05,
      "loss": 1.0671,
      "step": 5929
    },
    {
      "epoch": 0.8785185185185185,
      "grad_norm": 1.4639313220977783,
      "learning_rate": 2.446256486286138e-05,
      "loss": 1.2213,
      "step": 5930
    },
    {
      "epoch": 0.8786666666666667,
      "grad_norm": 1.207892656326294,
      "learning_rate": 2.4432913269088215e-05,
      "loss": 0.8711,
      "step": 5931
    },
    {
      "epoch": 0.8788148148148148,
      "grad_norm": 1.275417447090149,
      "learning_rate": 2.440326167531505e-05,
      "loss": 0.8091,
      "step": 5932
    },
    {
      "epoch": 0.878962962962963,
      "grad_norm": 1.6400730609893799,
      "learning_rate": 2.4373610081541884e-05,
      "loss": 0.9387,
      "step": 5933
    },
    {
      "epoch": 0.8791111111111111,
      "grad_norm": 2.544860363006592,
      "learning_rate": 2.434395848776872e-05,
      "loss": 0.7733,
      "step": 5934
    },
    {
      "epoch": 0.8792592592592593,
      "grad_norm": 2.9975552558898926,
      "learning_rate": 2.4314306893995552e-05,
      "loss": 0.992,
      "step": 5935
    },
    {
      "epoch": 0.8794074074074074,
      "grad_norm": 4.002200603485107,
      "learning_rate": 2.428465530022239e-05,
      "loss": 0.9922,
      "step": 5936
    },
    {
      "epoch": 0.8795555555555555,
      "grad_norm": 1.1059911251068115,
      "learning_rate": 2.425500370644922e-05,
      "loss": 0.9983,
      "step": 5937
    },
    {
      "epoch": 0.8797037037037037,
      "grad_norm": 1.6267253160476685,
      "learning_rate": 2.422535211267606e-05,
      "loss": 0.8535,
      "step": 5938
    },
    {
      "epoch": 0.8798518518518519,
      "grad_norm": 1.4666725397109985,
      "learning_rate": 2.4195700518902893e-05,
      "loss": 1.1576,
      "step": 5939
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4765816926956177,
      "learning_rate": 2.4166048925129727e-05,
      "loss": 0.882,
      "step": 5940
    },
    {
      "epoch": 0.8801481481481481,
      "grad_norm": 1.4435561895370483,
      "learning_rate": 2.413639733135656e-05,
      "loss": 1.1641,
      "step": 5941
    },
    {
      "epoch": 0.8802962962962962,
      "grad_norm": 2.0997135639190674,
      "learning_rate": 2.4106745737583396e-05,
      "loss": 0.8,
      "step": 5942
    },
    {
      "epoch": 0.8804444444444445,
      "grad_norm": 2.8202171325683594,
      "learning_rate": 2.407709414381023e-05,
      "loss": 0.8823,
      "step": 5943
    },
    {
      "epoch": 0.8805925925925926,
      "grad_norm": 2.149346113204956,
      "learning_rate": 2.4047442550037068e-05,
      "loss": 0.9647,
      "step": 5944
    },
    {
      "epoch": 0.8807407407407407,
      "grad_norm": 1.2742054462432861,
      "learning_rate": 2.40177909562639e-05,
      "loss": 0.8836,
      "step": 5945
    },
    {
      "epoch": 0.8808888888888889,
      "grad_norm": 1.5382885932922363,
      "learning_rate": 2.3988139362490736e-05,
      "loss": 1.3162,
      "step": 5946
    },
    {
      "epoch": 0.8810370370370371,
      "grad_norm": 1.5108246803283691,
      "learning_rate": 2.395848776871757e-05,
      "loss": 1.0424,
      "step": 5947
    },
    {
      "epoch": 0.8811851851851852,
      "grad_norm": 1.398380994796753,
      "learning_rate": 2.3928836174944405e-05,
      "loss": 0.8191,
      "step": 5948
    },
    {
      "epoch": 0.8813333333333333,
      "grad_norm": 1.1411432027816772,
      "learning_rate": 2.389918458117124e-05,
      "loss": 0.9616,
      "step": 5949
    },
    {
      "epoch": 0.8814814814814815,
      "grad_norm": 2.4727354049682617,
      "learning_rate": 2.3869532987398074e-05,
      "loss": 0.9446,
      "step": 5950
    },
    {
      "epoch": 0.8816296296296297,
      "grad_norm": 1.8627889156341553,
      "learning_rate": 2.3839881393624908e-05,
      "loss": 1.0192,
      "step": 5951
    },
    {
      "epoch": 0.8817777777777778,
      "grad_norm": 1.4378087520599365,
      "learning_rate": 2.3810229799851742e-05,
      "loss": 0.9181,
      "step": 5952
    },
    {
      "epoch": 0.8819259259259259,
      "grad_norm": 1.4933042526245117,
      "learning_rate": 2.3780578206078576e-05,
      "loss": 1.2257,
      "step": 5953
    },
    {
      "epoch": 0.8820740740740741,
      "grad_norm": 1.9550085067749023,
      "learning_rate": 2.3750926612305414e-05,
      "loss": 0.8789,
      "step": 5954
    },
    {
      "epoch": 0.8822222222222222,
      "grad_norm": 1.4890142679214478,
      "learning_rate": 2.3721275018532245e-05,
      "loss": 0.998,
      "step": 5955
    },
    {
      "epoch": 0.8823703703703704,
      "grad_norm": 2.3651130199432373,
      "learning_rate": 2.3691623424759083e-05,
      "loss": 1.2468,
      "step": 5956
    },
    {
      "epoch": 0.8825185185185185,
      "grad_norm": 4.208375930786133,
      "learning_rate": 2.3661971830985917e-05,
      "loss": 1.0645,
      "step": 5957
    },
    {
      "epoch": 0.8826666666666667,
      "grad_norm": 1.6968998908996582,
      "learning_rate": 2.363232023721275e-05,
      "loss": 0.8437,
      "step": 5958
    },
    {
      "epoch": 0.8828148148148148,
      "grad_norm": 2.545179843902588,
      "learning_rate": 2.3602668643439586e-05,
      "loss": 1.2186,
      "step": 5959
    },
    {
      "epoch": 0.882962962962963,
      "grad_norm": 1.222191572189331,
      "learning_rate": 2.357301704966642e-05,
      "loss": 1.1235,
      "step": 5960
    },
    {
      "epoch": 0.8831111111111111,
      "grad_norm": 1.57589852809906,
      "learning_rate": 2.3543365455893254e-05,
      "loss": 1.2609,
      "step": 5961
    },
    {
      "epoch": 0.8832592592592593,
      "grad_norm": 1.8529330492019653,
      "learning_rate": 2.3513713862120092e-05,
      "loss": 1.0748,
      "step": 5962
    },
    {
      "epoch": 0.8834074074074074,
      "grad_norm": 1.7854204177856445,
      "learning_rate": 2.3484062268346923e-05,
      "loss": 1.2563,
      "step": 5963
    },
    {
      "epoch": 0.8835555555555555,
      "grad_norm": 3.024566173553467,
      "learning_rate": 2.345441067457376e-05,
      "loss": 0.9598,
      "step": 5964
    },
    {
      "epoch": 0.8837037037037037,
      "grad_norm": 1.2156152725219727,
      "learning_rate": 2.342475908080059e-05,
      "loss": 1.0619,
      "step": 5965
    },
    {
      "epoch": 0.8838518518518519,
      "grad_norm": 1.3999546766281128,
      "learning_rate": 2.339510748702743e-05,
      "loss": 0.8424,
      "step": 5966
    },
    {
      "epoch": 0.884,
      "grad_norm": 1.7890654802322388,
      "learning_rate": 2.3365455893254263e-05,
      "loss": 0.9101,
      "step": 5967
    },
    {
      "epoch": 0.8841481481481481,
      "grad_norm": 2.8123888969421387,
      "learning_rate": 2.3335804299481098e-05,
      "loss": 1.2402,
      "step": 5968
    },
    {
      "epoch": 0.8842962962962962,
      "grad_norm": 1.3883906602859497,
      "learning_rate": 2.3306152705707935e-05,
      "loss": 0.8954,
      "step": 5969
    },
    {
      "epoch": 0.8844444444444445,
      "grad_norm": 1.5781482458114624,
      "learning_rate": 2.3276501111934766e-05,
      "loss": 0.9034,
      "step": 5970
    },
    {
      "epoch": 0.8845925925925926,
      "grad_norm": 1.5325167179107666,
      "learning_rate": 2.3246849518161604e-05,
      "loss": 0.9061,
      "step": 5971
    },
    {
      "epoch": 0.8847407407407407,
      "grad_norm": 1.437687873840332,
      "learning_rate": 2.3217197924388438e-05,
      "loss": 1.0718,
      "step": 5972
    },
    {
      "epoch": 0.8848888888888888,
      "grad_norm": 1.120303988456726,
      "learning_rate": 2.3187546330615273e-05,
      "loss": 0.955,
      "step": 5973
    },
    {
      "epoch": 0.8850370370370371,
      "grad_norm": 2.4920289516448975,
      "learning_rate": 2.3157894736842107e-05,
      "loss": 1.0021,
      "step": 5974
    },
    {
      "epoch": 0.8851851851851852,
      "grad_norm": 1.659559726715088,
      "learning_rate": 2.312824314306894e-05,
      "loss": 0.9747,
      "step": 5975
    },
    {
      "epoch": 0.8853333333333333,
      "grad_norm": 2.555039405822754,
      "learning_rate": 2.3098591549295775e-05,
      "loss": 0.788,
      "step": 5976
    },
    {
      "epoch": 0.8854814814814815,
      "grad_norm": 1.2507604360580444,
      "learning_rate": 2.3068939955522613e-05,
      "loss": 1.0211,
      "step": 5977
    },
    {
      "epoch": 0.8856296296296297,
      "grad_norm": 1.7848315238952637,
      "learning_rate": 2.3039288361749444e-05,
      "loss": 0.9813,
      "step": 5978
    },
    {
      "epoch": 0.8857777777777778,
      "grad_norm": 1.4910756349563599,
      "learning_rate": 2.300963676797628e-05,
      "loss": 0.9126,
      "step": 5979
    },
    {
      "epoch": 0.8859259259259259,
      "grad_norm": 2.5473949909210205,
      "learning_rate": 2.2979985174203113e-05,
      "loss": 1.2652,
      "step": 5980
    },
    {
      "epoch": 0.8860740740740741,
      "grad_norm": 1.586308479309082,
      "learning_rate": 2.295033358042995e-05,
      "loss": 0.8096,
      "step": 5981
    },
    {
      "epoch": 0.8862222222222222,
      "grad_norm": 1.499227523803711,
      "learning_rate": 2.2920681986656785e-05,
      "loss": 1.1573,
      "step": 5982
    },
    {
      "epoch": 0.8863703703703704,
      "grad_norm": 1.1961390972137451,
      "learning_rate": 2.289103039288362e-05,
      "loss": 1.1098,
      "step": 5983
    },
    {
      "epoch": 0.8865185185185185,
      "grad_norm": 2.57843279838562,
      "learning_rate": 2.2861378799110453e-05,
      "loss": 0.9151,
      "step": 5984
    },
    {
      "epoch": 0.8866666666666667,
      "grad_norm": 1.743828296661377,
      "learning_rate": 2.2831727205337287e-05,
      "loss": 1.0712,
      "step": 5985
    },
    {
      "epoch": 0.8868148148148148,
      "grad_norm": 1.6296634674072266,
      "learning_rate": 2.2802075611564122e-05,
      "loss": 0.8652,
      "step": 5986
    },
    {
      "epoch": 0.886962962962963,
      "grad_norm": 1.3595842123031616,
      "learning_rate": 2.277242401779096e-05,
      "loss": 1.0706,
      "step": 5987
    },
    {
      "epoch": 0.8871111111111111,
      "grad_norm": 1.528507947921753,
      "learning_rate": 2.274277242401779e-05,
      "loss": 0.9773,
      "step": 5988
    },
    {
      "epoch": 0.8872592592592593,
      "grad_norm": 2.0428545475006104,
      "learning_rate": 2.2713120830244628e-05,
      "loss": 0.9653,
      "step": 5989
    },
    {
      "epoch": 0.8874074074074074,
      "grad_norm": 1.4239282608032227,
      "learning_rate": 2.2683469236471462e-05,
      "loss": 1.0398,
      "step": 5990
    },
    {
      "epoch": 0.8875555555555555,
      "grad_norm": 1.3365000486373901,
      "learning_rate": 2.2653817642698297e-05,
      "loss": 0.8188,
      "step": 5991
    },
    {
      "epoch": 0.8877037037037037,
      "grad_norm": 2.038303852081299,
      "learning_rate": 2.262416604892513e-05,
      "loss": 1.1364,
      "step": 5992
    },
    {
      "epoch": 0.8878518518518519,
      "grad_norm": 1.3759441375732422,
      "learning_rate": 2.2594514455151965e-05,
      "loss": 1.0479,
      "step": 5993
    },
    {
      "epoch": 0.888,
      "grad_norm": 1.4477378129959106,
      "learning_rate": 2.25648628613788e-05,
      "loss": 1.0599,
      "step": 5994
    },
    {
      "epoch": 0.8881481481481481,
      "grad_norm": 2.6113638877868652,
      "learning_rate": 2.2535211267605634e-05,
      "loss": 0.9383,
      "step": 5995
    },
    {
      "epoch": 0.8882962962962963,
      "grad_norm": 1.367432951927185,
      "learning_rate": 2.2505559673832468e-05,
      "loss": 0.9804,
      "step": 5996
    },
    {
      "epoch": 0.8884444444444445,
      "grad_norm": 1.3314729928970337,
      "learning_rate": 2.2475908080059306e-05,
      "loss": 0.9013,
      "step": 5997
    },
    {
      "epoch": 0.8885925925925926,
      "grad_norm": 2.2478277683258057,
      "learning_rate": 2.2446256486286137e-05,
      "loss": 1.0514,
      "step": 5998
    },
    {
      "epoch": 0.8887407407407407,
      "grad_norm": 1.8422318696975708,
      "learning_rate": 2.2416604892512974e-05,
      "loss": 0.7727,
      "step": 5999
    },
    {
      "epoch": 0.8888888888888888,
      "grad_norm": 2.4531776905059814,
      "learning_rate": 2.238695329873981e-05,
      "loss": 0.9298,
      "step": 6000
    },
    {
      "epoch": 0.8890370370370371,
      "grad_norm": 2.0126442909240723,
      "learning_rate": 2.2357301704966643e-05,
      "loss": 1.0887,
      "step": 6001
    },
    {
      "epoch": 0.8891851851851852,
      "grad_norm": 1.7374687194824219,
      "learning_rate": 2.2327650111193477e-05,
      "loss": 1.0529,
      "step": 6002
    },
    {
      "epoch": 0.8893333333333333,
      "grad_norm": 1.3899040222167969,
      "learning_rate": 2.229799851742031e-05,
      "loss": 0.9656,
      "step": 6003
    },
    {
      "epoch": 0.8894814814814814,
      "grad_norm": 2.106459617614746,
      "learning_rate": 2.2268346923647146e-05,
      "loss": 0.965,
      "step": 6004
    },
    {
      "epoch": 0.8896296296296297,
      "grad_norm": 1.8286856412887573,
      "learning_rate": 2.2238695329873984e-05,
      "loss": 1.2328,
      "step": 6005
    },
    {
      "epoch": 0.8897777777777778,
      "grad_norm": 1.7465448379516602,
      "learning_rate": 2.2209043736100814e-05,
      "loss": 0.8868,
      "step": 6006
    },
    {
      "epoch": 0.8899259259259259,
      "grad_norm": 1.3104788064956665,
      "learning_rate": 2.2179392142327652e-05,
      "loss": 0.9981,
      "step": 6007
    },
    {
      "epoch": 0.8900740740740741,
      "grad_norm": 1.680923342704773,
      "learning_rate": 2.2149740548554483e-05,
      "loss": 1.0375,
      "step": 6008
    },
    {
      "epoch": 0.8902222222222222,
      "grad_norm": 1.623653769493103,
      "learning_rate": 2.212008895478132e-05,
      "loss": 0.9158,
      "step": 6009
    },
    {
      "epoch": 0.8903703703703704,
      "grad_norm": 1.3549065589904785,
      "learning_rate": 2.2090437361008155e-05,
      "loss": 0.9938,
      "step": 6010
    },
    {
      "epoch": 0.8905185185185185,
      "grad_norm": 1.6504497528076172,
      "learning_rate": 2.206078576723499e-05,
      "loss": 1.1917,
      "step": 6011
    },
    {
      "epoch": 0.8906666666666667,
      "grad_norm": 1.3264446258544922,
      "learning_rate": 2.2031134173461827e-05,
      "loss": 1.1057,
      "step": 6012
    },
    {
      "epoch": 0.8908148148148148,
      "grad_norm": 1.9469859600067139,
      "learning_rate": 2.2001482579688658e-05,
      "loss": 1.2426,
      "step": 6013
    },
    {
      "epoch": 0.890962962962963,
      "grad_norm": 1.6104439496994019,
      "learning_rate": 2.1971830985915496e-05,
      "loss": 0.9436,
      "step": 6014
    },
    {
      "epoch": 0.8911111111111111,
      "grad_norm": 2.565323829650879,
      "learning_rate": 2.194217939214233e-05,
      "loss": 0.8797,
      "step": 6015
    },
    {
      "epoch": 0.8912592592592593,
      "grad_norm": 1.6997771263122559,
      "learning_rate": 2.1912527798369164e-05,
      "loss": 0.8972,
      "step": 6016
    },
    {
      "epoch": 0.8914074074074074,
      "grad_norm": 1.7999343872070312,
      "learning_rate": 2.1882876204596e-05,
      "loss": 1.093,
      "step": 6017
    },
    {
      "epoch": 0.8915555555555555,
      "grad_norm": 2.178159236907959,
      "learning_rate": 2.1853224610822833e-05,
      "loss": 1.004,
      "step": 6018
    },
    {
      "epoch": 0.8917037037037037,
      "grad_norm": 2.5334293842315674,
      "learning_rate": 2.1823573017049667e-05,
      "loss": 1.084,
      "step": 6019
    },
    {
      "epoch": 0.8918518518518519,
      "grad_norm": 1.747208595275879,
      "learning_rate": 2.1793921423276505e-05,
      "loss": 1.0382,
      "step": 6020
    },
    {
      "epoch": 0.892,
      "grad_norm": 1.4712276458740234,
      "learning_rate": 2.1764269829503336e-05,
      "loss": 0.9342,
      "step": 6021
    },
    {
      "epoch": 0.8921481481481481,
      "grad_norm": 2.3379294872283936,
      "learning_rate": 2.1734618235730173e-05,
      "loss": 1.0192,
      "step": 6022
    },
    {
      "epoch": 0.8922962962962963,
      "grad_norm": 1.1035223007202148,
      "learning_rate": 2.1704966641957004e-05,
      "loss": 0.775,
      "step": 6023
    },
    {
      "epoch": 0.8924444444444445,
      "grad_norm": 2.642238140106201,
      "learning_rate": 2.1675315048183842e-05,
      "loss": 1.1903,
      "step": 6024
    },
    {
      "epoch": 0.8925925925925926,
      "grad_norm": 1.4049618244171143,
      "learning_rate": 2.1645663454410676e-05,
      "loss": 0.8349,
      "step": 6025
    },
    {
      "epoch": 0.8927407407407407,
      "grad_norm": 1.7419692277908325,
      "learning_rate": 2.161601186063751e-05,
      "loss": 0.8475,
      "step": 6026
    },
    {
      "epoch": 0.8928888888888888,
      "grad_norm": 1.5502527952194214,
      "learning_rate": 2.1586360266864345e-05,
      "loss": 0.9261,
      "step": 6027
    },
    {
      "epoch": 0.8930370370370371,
      "grad_norm": 1.383769154548645,
      "learning_rate": 2.155670867309118e-05,
      "loss": 0.9649,
      "step": 6028
    },
    {
      "epoch": 0.8931851851851852,
      "grad_norm": 1.8883402347564697,
      "learning_rate": 2.1527057079318013e-05,
      "loss": 1.1961,
      "step": 6029
    },
    {
      "epoch": 0.8933333333333333,
      "grad_norm": 2.3370683193206787,
      "learning_rate": 2.149740548554485e-05,
      "loss": 0.9132,
      "step": 6030
    },
    {
      "epoch": 0.8934814814814814,
      "grad_norm": 1.52423095703125,
      "learning_rate": 2.1467753891771682e-05,
      "loss": 1.336,
      "step": 6031
    },
    {
      "epoch": 0.8936296296296297,
      "grad_norm": 1.5249754190444946,
      "learning_rate": 2.143810229799852e-05,
      "loss": 1.1204,
      "step": 6032
    },
    {
      "epoch": 0.8937777777777778,
      "grad_norm": 1.9380327463150024,
      "learning_rate": 2.1408450704225354e-05,
      "loss": 1.0009,
      "step": 6033
    },
    {
      "epoch": 0.8939259259259259,
      "grad_norm": 1.273807168006897,
      "learning_rate": 2.1378799110452188e-05,
      "loss": 1.0166,
      "step": 6034
    },
    {
      "epoch": 0.894074074074074,
      "grad_norm": 4.784523010253906,
      "learning_rate": 2.1349147516679023e-05,
      "loss": 1.0915,
      "step": 6035
    },
    {
      "epoch": 0.8942222222222223,
      "grad_norm": 1.5389429330825806,
      "learning_rate": 2.1319495922905857e-05,
      "loss": 0.9364,
      "step": 6036
    },
    {
      "epoch": 0.8943703703703704,
      "grad_norm": 2.0165131092071533,
      "learning_rate": 2.128984432913269e-05,
      "loss": 0.8758,
      "step": 6037
    },
    {
      "epoch": 0.8945185185185185,
      "grad_norm": 3.4345901012420654,
      "learning_rate": 2.1260192735359525e-05,
      "loss": 1.1543,
      "step": 6038
    },
    {
      "epoch": 0.8946666666666667,
      "grad_norm": 1.4130337238311768,
      "learning_rate": 2.123054114158636e-05,
      "loss": 1.0309,
      "step": 6039
    },
    {
      "epoch": 0.8948148148148148,
      "grad_norm": 2.438339948654175,
      "learning_rate": 2.1200889547813197e-05,
      "loss": 0.6411,
      "step": 6040
    },
    {
      "epoch": 0.894962962962963,
      "grad_norm": 1.5909713506698608,
      "learning_rate": 2.117123795404003e-05,
      "loss": 0.9461,
      "step": 6041
    },
    {
      "epoch": 0.8951111111111111,
      "grad_norm": 1.5332297086715698,
      "learning_rate": 2.1141586360266866e-05,
      "loss": 1.2626,
      "step": 6042
    },
    {
      "epoch": 0.8952592592592593,
      "grad_norm": 1.4497228860855103,
      "learning_rate": 2.11119347664937e-05,
      "loss": 1.1115,
      "step": 6043
    },
    {
      "epoch": 0.8954074074074074,
      "grad_norm": 1.4441673755645752,
      "learning_rate": 2.1082283172720535e-05,
      "loss": 1.0415,
      "step": 6044
    },
    {
      "epoch": 0.8955555555555555,
      "grad_norm": 1.511989951133728,
      "learning_rate": 2.105263157894737e-05,
      "loss": 1.0469,
      "step": 6045
    },
    {
      "epoch": 0.8957037037037037,
      "grad_norm": 1.5368791818618774,
      "learning_rate": 2.1022979985174203e-05,
      "loss": 0.8377,
      "step": 6046
    },
    {
      "epoch": 0.8958518518518519,
      "grad_norm": 1.3114882707595825,
      "learning_rate": 2.0993328391401037e-05,
      "loss": 0.8663,
      "step": 6047
    },
    {
      "epoch": 0.896,
      "grad_norm": 1.5364413261413574,
      "learning_rate": 2.0963676797627875e-05,
      "loss": 0.9263,
      "step": 6048
    },
    {
      "epoch": 0.8961481481481481,
      "grad_norm": 1.2767763137817383,
      "learning_rate": 2.0934025203854706e-05,
      "loss": 0.7743,
      "step": 6049
    },
    {
      "epoch": 0.8962962962962963,
      "grad_norm": 1.5056242942810059,
      "learning_rate": 2.0904373610081544e-05,
      "loss": 0.915,
      "step": 6050
    },
    {
      "epoch": 0.8964444444444445,
      "grad_norm": 1.2498618364334106,
      "learning_rate": 2.0874722016308375e-05,
      "loss": 0.8222,
      "step": 6051
    },
    {
      "epoch": 0.8965925925925926,
      "grad_norm": 1.6661354303359985,
      "learning_rate": 2.0845070422535212e-05,
      "loss": 0.997,
      "step": 6052
    },
    {
      "epoch": 0.8967407407407407,
      "grad_norm": 11.182560920715332,
      "learning_rate": 2.0815418828762047e-05,
      "loss": 1.1014,
      "step": 6053
    },
    {
      "epoch": 0.8968888888888888,
      "grad_norm": 1.9369115829467773,
      "learning_rate": 2.078576723498888e-05,
      "loss": 1.2052,
      "step": 6054
    },
    {
      "epoch": 0.8970370370370371,
      "grad_norm": 1.5227136611938477,
      "learning_rate": 2.075611564121572e-05,
      "loss": 1.2471,
      "step": 6055
    },
    {
      "epoch": 0.8971851851851852,
      "grad_norm": 1.391202449798584,
      "learning_rate": 2.072646404744255e-05,
      "loss": 0.8846,
      "step": 6056
    },
    {
      "epoch": 0.8973333333333333,
      "grad_norm": 1.3359544277191162,
      "learning_rate": 2.0696812453669387e-05,
      "loss": 1.0103,
      "step": 6057
    },
    {
      "epoch": 0.8974814814814814,
      "grad_norm": 2.266838550567627,
      "learning_rate": 2.066716085989622e-05,
      "loss": 1.0017,
      "step": 6058
    },
    {
      "epoch": 0.8976296296296297,
      "grad_norm": 1.3277329206466675,
      "learning_rate": 2.0637509266123056e-05,
      "loss": 1.0146,
      "step": 6059
    },
    {
      "epoch": 0.8977777777777778,
      "grad_norm": 3.2451281547546387,
      "learning_rate": 2.060785767234989e-05,
      "loss": 1.2078,
      "step": 6060
    },
    {
      "epoch": 0.8979259259259259,
      "grad_norm": 1.8383342027664185,
      "learning_rate": 2.0578206078576724e-05,
      "loss": 1.0267,
      "step": 6061
    },
    {
      "epoch": 0.898074074074074,
      "grad_norm": 1.8881727457046509,
      "learning_rate": 2.054855448480356e-05,
      "loss": 0.9216,
      "step": 6062
    },
    {
      "epoch": 0.8982222222222223,
      "grad_norm": 1.3337632417678833,
      "learning_rate": 2.0518902891030396e-05,
      "loss": 1.0229,
      "step": 6063
    },
    {
      "epoch": 0.8983703703703704,
      "grad_norm": 2.845303773880005,
      "learning_rate": 2.0489251297257227e-05,
      "loss": 0.766,
      "step": 6064
    },
    {
      "epoch": 0.8985185185185185,
      "grad_norm": 3.4624359607696533,
      "learning_rate": 2.0459599703484065e-05,
      "loss": 0.9487,
      "step": 6065
    },
    {
      "epoch": 0.8986666666666666,
      "grad_norm": 2.0220963954925537,
      "learning_rate": 2.0429948109710896e-05,
      "loss": 0.7419,
      "step": 6066
    },
    {
      "epoch": 0.8988148148148148,
      "grad_norm": 2.0012857913970947,
      "learning_rate": 2.0400296515937734e-05,
      "loss": 1.1056,
      "step": 6067
    },
    {
      "epoch": 0.898962962962963,
      "grad_norm": 2.571730375289917,
      "learning_rate": 2.0370644922164568e-05,
      "loss": 1.0651,
      "step": 6068
    },
    {
      "epoch": 0.8991111111111111,
      "grad_norm": 2.7319865226745605,
      "learning_rate": 2.0340993328391402e-05,
      "loss": 1.0422,
      "step": 6069
    },
    {
      "epoch": 0.8992592592592593,
      "grad_norm": 2.19328236579895,
      "learning_rate": 2.0311341734618236e-05,
      "loss": 0.8098,
      "step": 6070
    },
    {
      "epoch": 0.8994074074074074,
      "grad_norm": 3.327632427215576,
      "learning_rate": 2.028169014084507e-05,
      "loss": 0.8891,
      "step": 6071
    },
    {
      "epoch": 0.8995555555555556,
      "grad_norm": 1.7550420761108398,
      "learning_rate": 2.0252038547071905e-05,
      "loss": 0.9928,
      "step": 6072
    },
    {
      "epoch": 0.8997037037037037,
      "grad_norm": 1.1450306177139282,
      "learning_rate": 2.0222386953298743e-05,
      "loss": 1.0184,
      "step": 6073
    },
    {
      "epoch": 0.8998518518518519,
      "grad_norm": 2.0636534690856934,
      "learning_rate": 2.0192735359525574e-05,
      "loss": 0.9704,
      "step": 6074
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0672996044158936,
      "learning_rate": 2.016308376575241e-05,
      "loss": 0.925,
      "step": 6075
    },
    {
      "epoch": 0.9001481481481481,
      "grad_norm": 2.060155153274536,
      "learning_rate": 2.0133432171979246e-05,
      "loss": 1.0096,
      "step": 6076
    },
    {
      "epoch": 0.9002962962962963,
      "grad_norm": 1.3767861127853394,
      "learning_rate": 2.010378057820608e-05,
      "loss": 0.8819,
      "step": 6077
    },
    {
      "epoch": 0.9004444444444445,
      "grad_norm": 1.256442666053772,
      "learning_rate": 2.0074128984432914e-05,
      "loss": 1.0233,
      "step": 6078
    },
    {
      "epoch": 0.9005925925925926,
      "grad_norm": 2.882657289505005,
      "learning_rate": 2.004447739065975e-05,
      "loss": 1.1429,
      "step": 6079
    },
    {
      "epoch": 0.9007407407407407,
      "grad_norm": 1.7089165449142456,
      "learning_rate": 2.0014825796886583e-05,
      "loss": 0.923,
      "step": 6080
    },
    {
      "epoch": 0.9008888888888889,
      "grad_norm": 1.8851364850997925,
      "learning_rate": 1.9985174203113417e-05,
      "loss": 0.8472,
      "step": 6081
    },
    {
      "epoch": 0.9010370370370371,
      "grad_norm": 3.659064292907715,
      "learning_rate": 1.995552260934025e-05,
      "loss": 1.1521,
      "step": 6082
    },
    {
      "epoch": 0.9011851851851852,
      "grad_norm": 1.409136414527893,
      "learning_rate": 1.992587101556709e-05,
      "loss": 0.8831,
      "step": 6083
    },
    {
      "epoch": 0.9013333333333333,
      "grad_norm": 1.80681574344635,
      "learning_rate": 1.989621942179392e-05,
      "loss": 1.0433,
      "step": 6084
    },
    {
      "epoch": 0.9014814814814814,
      "grad_norm": 1.6147675514221191,
      "learning_rate": 1.9866567828020758e-05,
      "loss": 1.0592,
      "step": 6085
    },
    {
      "epoch": 0.9016296296296297,
      "grad_norm": 2.0292410850524902,
      "learning_rate": 1.9836916234247592e-05,
      "loss": 0.8812,
      "step": 6086
    },
    {
      "epoch": 0.9017777777777778,
      "grad_norm": 1.399593472480774,
      "learning_rate": 1.9807264640474426e-05,
      "loss": 0.8079,
      "step": 6087
    },
    {
      "epoch": 0.9019259259259259,
      "grad_norm": 1.9720758199691772,
      "learning_rate": 1.977761304670126e-05,
      "loss": 0.9161,
      "step": 6088
    },
    {
      "epoch": 0.902074074074074,
      "grad_norm": 2.0885941982269287,
      "learning_rate": 1.9747961452928095e-05,
      "loss": 1.1102,
      "step": 6089
    },
    {
      "epoch": 0.9022222222222223,
      "grad_norm": 1.5641653537750244,
      "learning_rate": 1.971830985915493e-05,
      "loss": 0.7803,
      "step": 6090
    },
    {
      "epoch": 0.9023703703703704,
      "grad_norm": 1.7609938383102417,
      "learning_rate": 1.9688658265381767e-05,
      "loss": 1.0168,
      "step": 6091
    },
    {
      "epoch": 0.9025185185185185,
      "grad_norm": 1.8712331056594849,
      "learning_rate": 1.9659006671608598e-05,
      "loss": 0.9302,
      "step": 6092
    },
    {
      "epoch": 0.9026666666666666,
      "grad_norm": 1.8043031692504883,
      "learning_rate": 1.9629355077835435e-05,
      "loss": 1.2285,
      "step": 6093
    },
    {
      "epoch": 0.9028148148148148,
      "grad_norm": 1.5237916707992554,
      "learning_rate": 1.959970348406227e-05,
      "loss": 1.1949,
      "step": 6094
    },
    {
      "epoch": 0.902962962962963,
      "grad_norm": 2.2680342197418213,
      "learning_rate": 1.9570051890289104e-05,
      "loss": 0.938,
      "step": 6095
    },
    {
      "epoch": 0.9031111111111111,
      "grad_norm": 1.5735719203948975,
      "learning_rate": 1.9540400296515938e-05,
      "loss": 0.8851,
      "step": 6096
    },
    {
      "epoch": 0.9032592592592592,
      "grad_norm": 1.5011521577835083,
      "learning_rate": 1.9510748702742773e-05,
      "loss": 0.9529,
      "step": 6097
    },
    {
      "epoch": 0.9034074074074074,
      "grad_norm": 1.7351727485656738,
      "learning_rate": 1.948109710896961e-05,
      "loss": 1.1232,
      "step": 6098
    },
    {
      "epoch": 0.9035555555555556,
      "grad_norm": 1.602634310722351,
      "learning_rate": 1.945144551519644e-05,
      "loss": 1.0819,
      "step": 6099
    },
    {
      "epoch": 0.9037037037037037,
      "grad_norm": 1.820095419883728,
      "learning_rate": 1.942179392142328e-05,
      "loss": 1.0187,
      "step": 6100
    },
    {
      "epoch": 0.9038518518518519,
      "grad_norm": 1.6920205354690552,
      "learning_rate": 1.9392142327650113e-05,
      "loss": 1.3522,
      "step": 6101
    },
    {
      "epoch": 0.904,
      "grad_norm": 1.2265843152999878,
      "learning_rate": 1.9362490733876947e-05,
      "loss": 1.071,
      "step": 6102
    },
    {
      "epoch": 0.9041481481481481,
      "grad_norm": 2.0982909202575684,
      "learning_rate": 1.9332839140103782e-05,
      "loss": 1.1262,
      "step": 6103
    },
    {
      "epoch": 0.9042962962962963,
      "grad_norm": 2.1813714504241943,
      "learning_rate": 1.9303187546330616e-05,
      "loss": 0.9635,
      "step": 6104
    },
    {
      "epoch": 0.9044444444444445,
      "grad_norm": 1.8678935766220093,
      "learning_rate": 1.927353595255745e-05,
      "loss": 0.9363,
      "step": 6105
    },
    {
      "epoch": 0.9045925925925926,
      "grad_norm": 1.7868329286575317,
      "learning_rate": 1.9243884358784288e-05,
      "loss": 1.2062,
      "step": 6106
    },
    {
      "epoch": 0.9047407407407407,
      "grad_norm": 1.2214263677597046,
      "learning_rate": 1.921423276501112e-05,
      "loss": 0.8393,
      "step": 6107
    },
    {
      "epoch": 0.9048888888888889,
      "grad_norm": 1.5382243394851685,
      "learning_rate": 1.9184581171237957e-05,
      "loss": 1.1659,
      "step": 6108
    },
    {
      "epoch": 0.9050370370370371,
      "grad_norm": 3.6227638721466064,
      "learning_rate": 1.9154929577464788e-05,
      "loss": 1.1544,
      "step": 6109
    },
    {
      "epoch": 0.9051851851851852,
      "grad_norm": 1.6020253896713257,
      "learning_rate": 1.9125277983691625e-05,
      "loss": 1.0659,
      "step": 6110
    },
    {
      "epoch": 0.9053333333333333,
      "grad_norm": 1.2939000129699707,
      "learning_rate": 1.909562638991846e-05,
      "loss": 0.7358,
      "step": 6111
    },
    {
      "epoch": 0.9054814814814814,
      "grad_norm": 2.0559980869293213,
      "learning_rate": 1.9065974796145294e-05,
      "loss": 1.007,
      "step": 6112
    },
    {
      "epoch": 0.9056296296296297,
      "grad_norm": 1.1219966411590576,
      "learning_rate": 1.9036323202372128e-05,
      "loss": 0.7504,
      "step": 6113
    },
    {
      "epoch": 0.9057777777777778,
      "grad_norm": 2.00435733795166,
      "learning_rate": 1.9006671608598962e-05,
      "loss": 0.8884,
      "step": 6114
    },
    {
      "epoch": 0.9059259259259259,
      "grad_norm": 1.2631276845932007,
      "learning_rate": 1.8977020014825797e-05,
      "loss": 1.11,
      "step": 6115
    },
    {
      "epoch": 0.906074074074074,
      "grad_norm": 1.498734474182129,
      "learning_rate": 1.8947368421052634e-05,
      "loss": 1.1082,
      "step": 6116
    },
    {
      "epoch": 0.9062222222222223,
      "grad_norm": 2.8208577632904053,
      "learning_rate": 1.8917716827279465e-05,
      "loss": 0.8463,
      "step": 6117
    },
    {
      "epoch": 0.9063703703703704,
      "grad_norm": 1.2041140794754028,
      "learning_rate": 1.8888065233506303e-05,
      "loss": 0.8351,
      "step": 6118
    },
    {
      "epoch": 0.9065185185185185,
      "grad_norm": 1.9682738780975342,
      "learning_rate": 1.8858413639733137e-05,
      "loss": 0.9793,
      "step": 6119
    },
    {
      "epoch": 0.9066666666666666,
      "grad_norm": 1.3063067197799683,
      "learning_rate": 1.882876204595997e-05,
      "loss": 1.034,
      "step": 6120
    },
    {
      "epoch": 0.9068148148148149,
      "grad_norm": 1.426026701927185,
      "learning_rate": 1.8799110452186806e-05,
      "loss": 1.1258,
      "step": 6121
    },
    {
      "epoch": 0.906962962962963,
      "grad_norm": 1.9268131256103516,
      "learning_rate": 1.876945885841364e-05,
      "loss": 1.0602,
      "step": 6122
    },
    {
      "epoch": 0.9071111111111111,
      "grad_norm": 1.168556571006775,
      "learning_rate": 1.8739807264640474e-05,
      "loss": 0.9043,
      "step": 6123
    },
    {
      "epoch": 0.9072592592592592,
      "grad_norm": 1.3348885774612427,
      "learning_rate": 1.871015567086731e-05,
      "loss": 0.9788,
      "step": 6124
    },
    {
      "epoch": 0.9074074074074074,
      "grad_norm": 2.003084182739258,
      "learning_rate": 1.8680504077094143e-05,
      "loss": 1.0414,
      "step": 6125
    },
    {
      "epoch": 0.9075555555555556,
      "grad_norm": 1.5825577974319458,
      "learning_rate": 1.865085248332098e-05,
      "loss": 0.898,
      "step": 6126
    },
    {
      "epoch": 0.9077037037037037,
      "grad_norm": 1.6305758953094482,
      "learning_rate": 1.862120088954781e-05,
      "loss": 1.076,
      "step": 6127
    },
    {
      "epoch": 0.9078518518518518,
      "grad_norm": 1.466801643371582,
      "learning_rate": 1.859154929577465e-05,
      "loss": 1.0831,
      "step": 6128
    },
    {
      "epoch": 0.908,
      "grad_norm": 1.4597145318984985,
      "learning_rate": 1.8561897702001484e-05,
      "loss": 0.8688,
      "step": 6129
    },
    {
      "epoch": 0.9081481481481481,
      "grad_norm": 1.4790699481964111,
      "learning_rate": 1.8532246108228318e-05,
      "loss": 0.9855,
      "step": 6130
    },
    {
      "epoch": 0.9082962962962963,
      "grad_norm": 3.157147169113159,
      "learning_rate": 1.8502594514455152e-05,
      "loss": 1.111,
      "step": 6131
    },
    {
      "epoch": 0.9084444444444445,
      "grad_norm": 1.674553394317627,
      "learning_rate": 1.8472942920681986e-05,
      "loss": 1.0364,
      "step": 6132
    },
    {
      "epoch": 0.9085925925925926,
      "grad_norm": 1.1325260400772095,
      "learning_rate": 1.844329132690882e-05,
      "loss": 0.8898,
      "step": 6133
    },
    {
      "epoch": 0.9087407407407407,
      "grad_norm": 1.7265822887420654,
      "learning_rate": 1.841363973313566e-05,
      "loss": 1.1837,
      "step": 6134
    },
    {
      "epoch": 0.9088888888888889,
      "grad_norm": 1.8278820514678955,
      "learning_rate": 1.8383988139362493e-05,
      "loss": 1.0625,
      "step": 6135
    },
    {
      "epoch": 0.9090370370370371,
      "grad_norm": 1.437045931816101,
      "learning_rate": 1.8354336545589327e-05,
      "loss": 0.9,
      "step": 6136
    },
    {
      "epoch": 0.9091851851851852,
      "grad_norm": 2.230379819869995,
      "learning_rate": 1.832468495181616e-05,
      "loss": 0.9827,
      "step": 6137
    },
    {
      "epoch": 0.9093333333333333,
      "grad_norm": 2.0429258346557617,
      "learning_rate": 1.8295033358042996e-05,
      "loss": 0.7465,
      "step": 6138
    },
    {
      "epoch": 0.9094814814814814,
      "grad_norm": 2.3619439601898193,
      "learning_rate": 1.826538176426983e-05,
      "loss": 0.9752,
      "step": 6139
    },
    {
      "epoch": 0.9096296296296297,
      "grad_norm": 1.613619327545166,
      "learning_rate": 1.8235730170496664e-05,
      "loss": 0.9935,
      "step": 6140
    },
    {
      "epoch": 0.9097777777777778,
      "grad_norm": 1.7696385383605957,
      "learning_rate": 1.8206078576723502e-05,
      "loss": 0.8633,
      "step": 6141
    },
    {
      "epoch": 0.9099259259259259,
      "grad_norm": 1.6603771448135376,
      "learning_rate": 1.8176426982950333e-05,
      "loss": 0.9566,
      "step": 6142
    },
    {
      "epoch": 0.910074074074074,
      "grad_norm": 2.8760082721710205,
      "learning_rate": 1.814677538917717e-05,
      "loss": 1.0243,
      "step": 6143
    },
    {
      "epoch": 0.9102222222222223,
      "grad_norm": 1.4774494171142578,
      "learning_rate": 1.8117123795404005e-05,
      "loss": 1.1372,
      "step": 6144
    },
    {
      "epoch": 0.9103703703703704,
      "grad_norm": 2.1504828929901123,
      "learning_rate": 1.808747220163084e-05,
      "loss": 1.1725,
      "step": 6145
    },
    {
      "epoch": 0.9105185185185185,
      "grad_norm": 1.656188726425171,
      "learning_rate": 1.8057820607857673e-05,
      "loss": 1.1643,
      "step": 6146
    },
    {
      "epoch": 0.9106666666666666,
      "grad_norm": 1.4865316152572632,
      "learning_rate": 1.8028169014084508e-05,
      "loss": 1.1018,
      "step": 6147
    },
    {
      "epoch": 0.9108148148148149,
      "grad_norm": 1.9200849533081055,
      "learning_rate": 1.7998517420311342e-05,
      "loss": 0.932,
      "step": 6148
    },
    {
      "epoch": 0.910962962962963,
      "grad_norm": 2.5810506343841553,
      "learning_rate": 1.796886582653818e-05,
      "loss": 1.149,
      "step": 6149
    },
    {
      "epoch": 0.9111111111111111,
      "grad_norm": 1.8590203523635864,
      "learning_rate": 1.793921423276501e-05,
      "loss": 0.9868,
      "step": 6150
    },
    {
      "epoch": 0.9112592592592592,
      "grad_norm": 1.795326828956604,
      "learning_rate": 1.7909562638991848e-05,
      "loss": 1.1829,
      "step": 6151
    },
    {
      "epoch": 0.9114074074074074,
      "grad_norm": 1.2344616651535034,
      "learning_rate": 1.787991104521868e-05,
      "loss": 0.9823,
      "step": 6152
    },
    {
      "epoch": 0.9115555555555556,
      "grad_norm": 1.5617588758468628,
      "learning_rate": 1.7850259451445517e-05,
      "loss": 1.1079,
      "step": 6153
    },
    {
      "epoch": 0.9117037037037037,
      "grad_norm": 1.4923650026321411,
      "learning_rate": 1.782060785767235e-05,
      "loss": 1.062,
      "step": 6154
    },
    {
      "epoch": 0.9118518518518518,
      "grad_norm": 4.258939266204834,
      "learning_rate": 1.7790956263899185e-05,
      "loss": 1.036,
      "step": 6155
    },
    {
      "epoch": 0.912,
      "grad_norm": 3.625718593597412,
      "learning_rate": 1.776130467012602e-05,
      "loss": 1.1777,
      "step": 6156
    },
    {
      "epoch": 0.9121481481481482,
      "grad_norm": 1.7118535041809082,
      "learning_rate": 1.7731653076352854e-05,
      "loss": 0.9751,
      "step": 6157
    },
    {
      "epoch": 0.9122962962962963,
      "grad_norm": 1.1163208484649658,
      "learning_rate": 1.770200148257969e-05,
      "loss": 0.867,
      "step": 6158
    },
    {
      "epoch": 0.9124444444444444,
      "grad_norm": 1.6679993867874146,
      "learning_rate": 1.7672349888806526e-05,
      "loss": 1.1601,
      "step": 6159
    },
    {
      "epoch": 0.9125925925925926,
      "grad_norm": 1.2561055421829224,
      "learning_rate": 1.7642698295033357e-05,
      "loss": 0.9182,
      "step": 6160
    },
    {
      "epoch": 0.9127407407407407,
      "grad_norm": 3.2386274337768555,
      "learning_rate": 1.7613046701260195e-05,
      "loss": 1.125,
      "step": 6161
    },
    {
      "epoch": 0.9128888888888889,
      "grad_norm": 1.2529643774032593,
      "learning_rate": 1.758339510748703e-05,
      "loss": 0.7451,
      "step": 6162
    },
    {
      "epoch": 0.9130370370370371,
      "grad_norm": 1.0242217779159546,
      "learning_rate": 1.7553743513713863e-05,
      "loss": 0.7875,
      "step": 6163
    },
    {
      "epoch": 0.9131851851851852,
      "grad_norm": 1.5616753101348877,
      "learning_rate": 1.7524091919940698e-05,
      "loss": 0.9641,
      "step": 6164
    },
    {
      "epoch": 0.9133333333333333,
      "grad_norm": 3.3702902793884277,
      "learning_rate": 1.7494440326167532e-05,
      "loss": 0.8851,
      "step": 6165
    },
    {
      "epoch": 0.9134814814814814,
      "grad_norm": 2.8215017318725586,
      "learning_rate": 1.7464788732394366e-05,
      "loss": 1.1532,
      "step": 6166
    },
    {
      "epoch": 0.9136296296296297,
      "grad_norm": 1.7007137537002563,
      "learning_rate": 1.74351371386212e-05,
      "loss": 0.9779,
      "step": 6167
    },
    {
      "epoch": 0.9137777777777778,
      "grad_norm": 3.317284107208252,
      "learning_rate": 1.7405485544848035e-05,
      "loss": 0.8305,
      "step": 6168
    },
    {
      "epoch": 0.9139259259259259,
      "grad_norm": 1.1211053133010864,
      "learning_rate": 1.7375833951074872e-05,
      "loss": 0.9122,
      "step": 6169
    },
    {
      "epoch": 0.914074074074074,
      "grad_norm": 1.5618693828582764,
      "learning_rate": 1.7346182357301703e-05,
      "loss": 1.0456,
      "step": 6170
    },
    {
      "epoch": 0.9142222222222223,
      "grad_norm": 1.43927800655365,
      "learning_rate": 1.731653076352854e-05,
      "loss": 0.8483,
      "step": 6171
    },
    {
      "epoch": 0.9143703703703704,
      "grad_norm": 1.8553197383880615,
      "learning_rate": 1.7286879169755375e-05,
      "loss": 0.8967,
      "step": 6172
    },
    {
      "epoch": 0.9145185185185185,
      "grad_norm": 2.838083505630493,
      "learning_rate": 1.725722757598221e-05,
      "loss": 1.0936,
      "step": 6173
    },
    {
      "epoch": 0.9146666666666666,
      "grad_norm": 1.672043800354004,
      "learning_rate": 1.7227575982209044e-05,
      "loss": 1.0261,
      "step": 6174
    },
    {
      "epoch": 0.9148148148148149,
      "grad_norm": 1.1843780279159546,
      "learning_rate": 1.7197924388435878e-05,
      "loss": 0.893,
      "step": 6175
    },
    {
      "epoch": 0.914962962962963,
      "grad_norm": 2.937354326248169,
      "learning_rate": 1.7168272794662716e-05,
      "loss": 0.9888,
      "step": 6176
    },
    {
      "epoch": 0.9151111111111111,
      "grad_norm": 2.701634168624878,
      "learning_rate": 1.713862120088955e-05,
      "loss": 1.089,
      "step": 6177
    },
    {
      "epoch": 0.9152592592592592,
      "grad_norm": 1.9226313829421997,
      "learning_rate": 1.7108969607116384e-05,
      "loss": 1.1289,
      "step": 6178
    },
    {
      "epoch": 0.9154074074074074,
      "grad_norm": 1.6943758726119995,
      "learning_rate": 1.707931801334322e-05,
      "loss": 0.7995,
      "step": 6179
    },
    {
      "epoch": 0.9155555555555556,
      "grad_norm": 1.6026709079742432,
      "learning_rate": 1.7049666419570053e-05,
      "loss": 0.8349,
      "step": 6180
    },
    {
      "epoch": 0.9157037037037037,
      "grad_norm": 3.4536123275756836,
      "learning_rate": 1.7020014825796887e-05,
      "loss": 1.1635,
      "step": 6181
    },
    {
      "epoch": 0.9158518518518518,
      "grad_norm": 1.6723133325576782,
      "learning_rate": 1.699036323202372e-05,
      "loss": 0.9406,
      "step": 6182
    },
    {
      "epoch": 0.916,
      "grad_norm": 1.5767154693603516,
      "learning_rate": 1.6960711638250556e-05,
      "loss": 0.9219,
      "step": 6183
    },
    {
      "epoch": 0.9161481481481482,
      "grad_norm": 1.8430118560791016,
      "learning_rate": 1.6931060044477394e-05,
      "loss": 1.1754,
      "step": 6184
    },
    {
      "epoch": 0.9162962962962963,
      "grad_norm": 1.6995733976364136,
      "learning_rate": 1.6901408450704224e-05,
      "loss": 1.5034,
      "step": 6185
    },
    {
      "epoch": 0.9164444444444444,
      "grad_norm": 1.59422767162323,
      "learning_rate": 1.6871756856931062e-05,
      "loss": 1.2148,
      "step": 6186
    },
    {
      "epoch": 0.9165925925925926,
      "grad_norm": 1.5773909091949463,
      "learning_rate": 1.6842105263157896e-05,
      "loss": 0.9517,
      "step": 6187
    },
    {
      "epoch": 0.9167407407407407,
      "grad_norm": 1.9356043338775635,
      "learning_rate": 1.681245366938473e-05,
      "loss": 0.8084,
      "step": 6188
    },
    {
      "epoch": 0.9168888888888889,
      "grad_norm": 2.1239495277404785,
      "learning_rate": 1.6782802075611565e-05,
      "loss": 0.9879,
      "step": 6189
    },
    {
      "epoch": 0.917037037037037,
      "grad_norm": 1.4792206287384033,
      "learning_rate": 1.67531504818384e-05,
      "loss": 0.8791,
      "step": 6190
    },
    {
      "epoch": 0.9171851851851852,
      "grad_norm": 1.8909317255020142,
      "learning_rate": 1.6723498888065234e-05,
      "loss": 1.0974,
      "step": 6191
    },
    {
      "epoch": 0.9173333333333333,
      "grad_norm": 1.6568989753723145,
      "learning_rate": 1.669384729429207e-05,
      "loss": 1.0819,
      "step": 6192
    },
    {
      "epoch": 0.9174814814814815,
      "grad_norm": 1.7282850742340088,
      "learning_rate": 1.6664195700518902e-05,
      "loss": 1.0477,
      "step": 6193
    },
    {
      "epoch": 0.9176296296296297,
      "grad_norm": 3.181680679321289,
      "learning_rate": 1.663454410674574e-05,
      "loss": 0.9178,
      "step": 6194
    },
    {
      "epoch": 0.9177777777777778,
      "grad_norm": 1.6595582962036133,
      "learning_rate": 1.660489251297257e-05,
      "loss": 0.9918,
      "step": 6195
    },
    {
      "epoch": 0.9179259259259259,
      "grad_norm": 2.2920784950256348,
      "learning_rate": 1.657524091919941e-05,
      "loss": 0.932,
      "step": 6196
    },
    {
      "epoch": 0.918074074074074,
      "grad_norm": 1.3185635805130005,
      "learning_rate": 1.6545589325426243e-05,
      "loss": 0.7316,
      "step": 6197
    },
    {
      "epoch": 0.9182222222222223,
      "grad_norm": 1.3961563110351562,
      "learning_rate": 1.6515937731653077e-05,
      "loss": 0.7957,
      "step": 6198
    },
    {
      "epoch": 0.9183703703703704,
      "grad_norm": 5.240330219268799,
      "learning_rate": 1.648628613787991e-05,
      "loss": 0.9162,
      "step": 6199
    },
    {
      "epoch": 0.9185185185185185,
      "grad_norm": 2.126986265182495,
      "learning_rate": 1.6456634544106746e-05,
      "loss": 1.1555,
      "step": 6200
    },
    {
      "epoch": 0.9186666666666666,
      "grad_norm": 1.5535329580307007,
      "learning_rate": 1.642698295033358e-05,
      "loss": 1.0565,
      "step": 6201
    },
    {
      "epoch": 0.9188148148148149,
      "grad_norm": 1.3964463472366333,
      "learning_rate": 1.6397331356560418e-05,
      "loss": 0.8956,
      "step": 6202
    },
    {
      "epoch": 0.918962962962963,
      "grad_norm": 1.6732627153396606,
      "learning_rate": 1.636767976278725e-05,
      "loss": 1.0568,
      "step": 6203
    },
    {
      "epoch": 0.9191111111111111,
      "grad_norm": 1.8250062465667725,
      "learning_rate": 1.6338028169014086e-05,
      "loss": 0.9854,
      "step": 6204
    },
    {
      "epoch": 0.9192592592592592,
      "grad_norm": 1.4552170038223267,
      "learning_rate": 1.630837657524092e-05,
      "loss": 0.9542,
      "step": 6205
    },
    {
      "epoch": 0.9194074074074075,
      "grad_norm": 4.43630313873291,
      "learning_rate": 1.6278724981467755e-05,
      "loss": 0.9096,
      "step": 6206
    },
    {
      "epoch": 0.9195555555555556,
      "grad_norm": 2.3064446449279785,
      "learning_rate": 1.624907338769459e-05,
      "loss": 0.9531,
      "step": 6207
    },
    {
      "epoch": 0.9197037037037037,
      "grad_norm": 1.996294617652893,
      "learning_rate": 1.6219421793921423e-05,
      "loss": 0.9177,
      "step": 6208
    },
    {
      "epoch": 0.9198518518518518,
      "grad_norm": 1.2575515508651733,
      "learning_rate": 1.6189770200148258e-05,
      "loss": 1.0533,
      "step": 6209
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8964482545852661,
      "learning_rate": 1.6160118606375092e-05,
      "loss": 1.0478,
      "step": 6210
    },
    {
      "epoch": 0.9201481481481482,
      "grad_norm": 1.3098604679107666,
      "learning_rate": 1.6130467012601926e-05,
      "loss": 0.8478,
      "step": 6211
    },
    {
      "epoch": 0.9202962962962963,
      "grad_norm": 1.1962435245513916,
      "learning_rate": 1.6100815418828764e-05,
      "loss": 1.0145,
      "step": 6212
    },
    {
      "epoch": 0.9204444444444444,
      "grad_norm": 1.5967687368392944,
      "learning_rate": 1.6071163825055595e-05,
      "loss": 0.98,
      "step": 6213
    },
    {
      "epoch": 0.9205925925925926,
      "grad_norm": 1.894650936126709,
      "learning_rate": 1.6041512231282433e-05,
      "loss": 0.9342,
      "step": 6214
    },
    {
      "epoch": 0.9207407407407407,
      "grad_norm": 2.727250337600708,
      "learning_rate": 1.6011860637509267e-05,
      "loss": 1.2005,
      "step": 6215
    },
    {
      "epoch": 0.9208888888888889,
      "grad_norm": 1.2794644832611084,
      "learning_rate": 1.59822090437361e-05,
      "loss": 0.9428,
      "step": 6216
    },
    {
      "epoch": 0.921037037037037,
      "grad_norm": 1.8853224515914917,
      "learning_rate": 1.5952557449962936e-05,
      "loss": 1.0673,
      "step": 6217
    },
    {
      "epoch": 0.9211851851851852,
      "grad_norm": 1.2796276807785034,
      "learning_rate": 1.592290585618977e-05,
      "loss": 1.0564,
      "step": 6218
    },
    {
      "epoch": 0.9213333333333333,
      "grad_norm": 2.350651264190674,
      "learning_rate": 1.5893254262416607e-05,
      "loss": 1.097,
      "step": 6219
    },
    {
      "epoch": 0.9214814814814815,
      "grad_norm": 1.7367689609527588,
      "learning_rate": 1.5863602668643442e-05,
      "loss": 1.0272,
      "step": 6220
    },
    {
      "epoch": 0.9216296296296296,
      "grad_norm": 1.8030822277069092,
      "learning_rate": 1.5833951074870276e-05,
      "loss": 1.0805,
      "step": 6221
    },
    {
      "epoch": 0.9217777777777778,
      "grad_norm": 1.5849909782409668,
      "learning_rate": 1.580429948109711e-05,
      "loss": 1.0965,
      "step": 6222
    },
    {
      "epoch": 0.9219259259259259,
      "grad_norm": 7.2515668869018555,
      "learning_rate": 1.5774647887323945e-05,
      "loss": 1.2613,
      "step": 6223
    },
    {
      "epoch": 0.922074074074074,
      "grad_norm": 1.6564565896987915,
      "learning_rate": 1.574499629355078e-05,
      "loss": 0.9734,
      "step": 6224
    },
    {
      "epoch": 0.9222222222222223,
      "grad_norm": 2.6008694171905518,
      "learning_rate": 1.5715344699777613e-05,
      "loss": 1.0323,
      "step": 6225
    },
    {
      "epoch": 0.9223703703703704,
      "grad_norm": 1.9936848878860474,
      "learning_rate": 1.5685693106004448e-05,
      "loss": 0.8073,
      "step": 6226
    },
    {
      "epoch": 0.9225185185185185,
      "grad_norm": 2.0086851119995117,
      "learning_rate": 1.5656041512231285e-05,
      "loss": 1.1109,
      "step": 6227
    },
    {
      "epoch": 0.9226666666666666,
      "grad_norm": 2.9048655033111572,
      "learning_rate": 1.5626389918458116e-05,
      "loss": 1.0379,
      "step": 6228
    },
    {
      "epoch": 0.9228148148148149,
      "grad_norm": 2.1338531970977783,
      "learning_rate": 1.5596738324684954e-05,
      "loss": 1.0836,
      "step": 6229
    },
    {
      "epoch": 0.922962962962963,
      "grad_norm": 1.5965696573257446,
      "learning_rate": 1.5567086730911788e-05,
      "loss": 1.143,
      "step": 6230
    },
    {
      "epoch": 0.9231111111111111,
      "grad_norm": 2.2106244564056396,
      "learning_rate": 1.5537435137138622e-05,
      "loss": 1.0348,
      "step": 6231
    },
    {
      "epoch": 0.9232592592592592,
      "grad_norm": 1.9293761253356934,
      "learning_rate": 1.5507783543365457e-05,
      "loss": 1.2752,
      "step": 6232
    },
    {
      "epoch": 0.9234074074074075,
      "grad_norm": 1.8146687746047974,
      "learning_rate": 1.547813194959229e-05,
      "loss": 0.8558,
      "step": 6233
    },
    {
      "epoch": 0.9235555555555556,
      "grad_norm": 1.446638584136963,
      "learning_rate": 1.5448480355819125e-05,
      "loss": 1.0929,
      "step": 6234
    },
    {
      "epoch": 0.9237037037037037,
      "grad_norm": 2.2936630249023438,
      "learning_rate": 1.5418828762045963e-05,
      "loss": 0.9336,
      "step": 6235
    },
    {
      "epoch": 0.9238518518518518,
      "grad_norm": 2.1772422790527344,
      "learning_rate": 1.5389177168272794e-05,
      "loss": 0.9225,
      "step": 6236
    },
    {
      "epoch": 0.924,
      "grad_norm": 2.055823802947998,
      "learning_rate": 1.535952557449963e-05,
      "loss": 1.1602,
      "step": 6237
    },
    {
      "epoch": 0.9241481481481482,
      "grad_norm": 1.4553918838500977,
      "learning_rate": 1.5329873980726462e-05,
      "loss": 0.9859,
      "step": 6238
    },
    {
      "epoch": 0.9242962962962963,
      "grad_norm": 3.1852595806121826,
      "learning_rate": 1.53002223869533e-05,
      "loss": 0.8281,
      "step": 6239
    },
    {
      "epoch": 0.9244444444444444,
      "grad_norm": 1.612755298614502,
      "learning_rate": 1.5270570793180134e-05,
      "loss": 0.7845,
      "step": 6240
    },
    {
      "epoch": 0.9245925925925926,
      "grad_norm": 1.8736701011657715,
      "learning_rate": 1.5240919199406969e-05,
      "loss": 0.8355,
      "step": 6241
    },
    {
      "epoch": 0.9247407407407408,
      "grad_norm": 1.7704135179519653,
      "learning_rate": 1.5211267605633803e-05,
      "loss": 1.0393,
      "step": 6242
    },
    {
      "epoch": 0.9248888888888889,
      "grad_norm": 1.5719643831253052,
      "learning_rate": 1.5181616011860639e-05,
      "loss": 1.0683,
      "step": 6243
    },
    {
      "epoch": 0.925037037037037,
      "grad_norm": 1.3279207944869995,
      "learning_rate": 1.5151964418087472e-05,
      "loss": 0.8243,
      "step": 6244
    },
    {
      "epoch": 0.9251851851851852,
      "grad_norm": 1.4003965854644775,
      "learning_rate": 1.5122312824314308e-05,
      "loss": 0.9751,
      "step": 6245
    },
    {
      "epoch": 0.9253333333333333,
      "grad_norm": 3.054779529571533,
      "learning_rate": 1.5092661230541142e-05,
      "loss": 1.2106,
      "step": 6246
    },
    {
      "epoch": 0.9254814814814815,
      "grad_norm": 1.6408637762069702,
      "learning_rate": 1.5063009636767978e-05,
      "loss": 1.0453,
      "step": 6247
    },
    {
      "epoch": 0.9256296296296296,
      "grad_norm": 1.2181177139282227,
      "learning_rate": 1.503335804299481e-05,
      "loss": 1.0361,
      "step": 6248
    },
    {
      "epoch": 0.9257777777777778,
      "grad_norm": 1.3258213996887207,
      "learning_rate": 1.5003706449221647e-05,
      "loss": 1.1213,
      "step": 6249
    },
    {
      "epoch": 0.9259259259259259,
      "grad_norm": 2.0105180740356445,
      "learning_rate": 1.497405485544848e-05,
      "loss": 1.0037,
      "step": 6250
    },
    {
      "epoch": 0.926074074074074,
      "grad_norm": 1.7296967506408691,
      "learning_rate": 1.4944403261675317e-05,
      "loss": 0.9131,
      "step": 6251
    },
    {
      "epoch": 0.9262222222222222,
      "grad_norm": 1.6838256120681763,
      "learning_rate": 1.491475166790215e-05,
      "loss": 1.1037,
      "step": 6252
    },
    {
      "epoch": 0.9263703703703704,
      "grad_norm": 2.1008238792419434,
      "learning_rate": 1.4885100074128985e-05,
      "loss": 1.1618,
      "step": 6253
    },
    {
      "epoch": 0.9265185185185185,
      "grad_norm": 2.8668837547302246,
      "learning_rate": 1.4855448480355818e-05,
      "loss": 1.0182,
      "step": 6254
    },
    {
      "epoch": 0.9266666666666666,
      "grad_norm": 1.3735758066177368,
      "learning_rate": 1.4825796886582654e-05,
      "loss": 0.7997,
      "step": 6255
    },
    {
      "epoch": 0.9268148148148149,
      "grad_norm": 3.844618797302246,
      "learning_rate": 1.4796145292809488e-05,
      "loss": 0.8231,
      "step": 6256
    },
    {
      "epoch": 0.926962962962963,
      "grad_norm": 1.8267629146575928,
      "learning_rate": 1.4766493699036324e-05,
      "loss": 0.9104,
      "step": 6257
    },
    {
      "epoch": 0.9271111111111111,
      "grad_norm": 2.0057625770568848,
      "learning_rate": 1.4736842105263157e-05,
      "loss": 0.8926,
      "step": 6258
    },
    {
      "epoch": 0.9272592592592592,
      "grad_norm": 2.0294721126556396,
      "learning_rate": 1.4707190511489993e-05,
      "loss": 0.9732,
      "step": 6259
    },
    {
      "epoch": 0.9274074074074075,
      "grad_norm": 1.9177600145339966,
      "learning_rate": 1.4677538917716829e-05,
      "loss": 0.8258,
      "step": 6260
    },
    {
      "epoch": 0.9275555555555556,
      "grad_norm": 3.415085792541504,
      "learning_rate": 1.4647887323943663e-05,
      "loss": 1.1299,
      "step": 6261
    },
    {
      "epoch": 0.9277037037037037,
      "grad_norm": 2.0719778537750244,
      "learning_rate": 1.4618235730170499e-05,
      "loss": 1.1079,
      "step": 6262
    },
    {
      "epoch": 0.9278518518518518,
      "grad_norm": 1.4281821250915527,
      "learning_rate": 1.4588584136397332e-05,
      "loss": 0.9103,
      "step": 6263
    },
    {
      "epoch": 0.928,
      "grad_norm": 3.8981099128723145,
      "learning_rate": 1.4558932542624168e-05,
      "loss": 1.0281,
      "step": 6264
    },
    {
      "epoch": 0.9281481481481482,
      "grad_norm": 1.7960991859436035,
      "learning_rate": 1.4529280948851002e-05,
      "loss": 1.0856,
      "step": 6265
    },
    {
      "epoch": 0.9282962962962963,
      "grad_norm": 1.6068756580352783,
      "learning_rate": 1.4499629355077838e-05,
      "loss": 1.0139,
      "step": 6266
    },
    {
      "epoch": 0.9284444444444444,
      "grad_norm": 2.6951966285705566,
      "learning_rate": 1.446997776130467e-05,
      "loss": 0.8411,
      "step": 6267
    },
    {
      "epoch": 0.9285925925925926,
      "grad_norm": 1.224673867225647,
      "learning_rate": 1.4440326167531507e-05,
      "loss": 1.0133,
      "step": 6268
    },
    {
      "epoch": 0.9287407407407408,
      "grad_norm": 1.793215036392212,
      "learning_rate": 1.441067457375834e-05,
      "loss": 1.0426,
      "step": 6269
    },
    {
      "epoch": 0.9288888888888889,
      "grad_norm": 1.3081490993499756,
      "learning_rate": 1.4381022979985175e-05,
      "loss": 0.8096,
      "step": 6270
    },
    {
      "epoch": 0.929037037037037,
      "grad_norm": 1.283671259880066,
      "learning_rate": 1.435137138621201e-05,
      "loss": 0.8562,
      "step": 6271
    },
    {
      "epoch": 0.9291851851851852,
      "grad_norm": 1.2067022323608398,
      "learning_rate": 1.4321719792438845e-05,
      "loss": 1.0622,
      "step": 6272
    },
    {
      "epoch": 0.9293333333333333,
      "grad_norm": 2.639707326889038,
      "learning_rate": 1.4292068198665678e-05,
      "loss": 1.0466,
      "step": 6273
    },
    {
      "epoch": 0.9294814814814815,
      "grad_norm": 1.2013615369796753,
      "learning_rate": 1.4262416604892514e-05,
      "loss": 1.2571,
      "step": 6274
    },
    {
      "epoch": 0.9296296296296296,
      "grad_norm": 1.916764259338379,
      "learning_rate": 1.4232765011119348e-05,
      "loss": 0.9469,
      "step": 6275
    },
    {
      "epoch": 0.9297777777777778,
      "grad_norm": 1.9235259294509888,
      "learning_rate": 1.4203113417346184e-05,
      "loss": 1.121,
      "step": 6276
    },
    {
      "epoch": 0.9299259259259259,
      "grad_norm": 1.2695482969284058,
      "learning_rate": 1.4173461823573017e-05,
      "loss": 0.937,
      "step": 6277
    },
    {
      "epoch": 0.930074074074074,
      "grad_norm": 1.1932200193405151,
      "learning_rate": 1.4143810229799853e-05,
      "loss": 0.8635,
      "step": 6278
    },
    {
      "epoch": 0.9302222222222222,
      "grad_norm": 1.7852343320846558,
      "learning_rate": 1.4114158636026687e-05,
      "loss": 1.2502,
      "step": 6279
    },
    {
      "epoch": 0.9303703703703704,
      "grad_norm": 1.09291672706604,
      "learning_rate": 1.4084507042253523e-05,
      "loss": 0.9238,
      "step": 6280
    },
    {
      "epoch": 0.9305185185185185,
      "grad_norm": 1.2748316526412964,
      "learning_rate": 1.4054855448480356e-05,
      "loss": 1.2238,
      "step": 6281
    },
    {
      "epoch": 0.9306666666666666,
      "grad_norm": 1.7004750967025757,
      "learning_rate": 1.4025203854707192e-05,
      "loss": 1.1988,
      "step": 6282
    },
    {
      "epoch": 0.9308148148148148,
      "grad_norm": 1.8087915182113647,
      "learning_rate": 1.3995552260934024e-05,
      "loss": 1.1623,
      "step": 6283
    },
    {
      "epoch": 0.930962962962963,
      "grad_norm": 3.6730096340179443,
      "learning_rate": 1.396590066716086e-05,
      "loss": 1.2439,
      "step": 6284
    },
    {
      "epoch": 0.9311111111111111,
      "grad_norm": 1.895340919494629,
      "learning_rate": 1.3936249073387695e-05,
      "loss": 1.0449,
      "step": 6285
    },
    {
      "epoch": 0.9312592592592592,
      "grad_norm": 2.183992624282837,
      "learning_rate": 1.390659747961453e-05,
      "loss": 1.0405,
      "step": 6286
    },
    {
      "epoch": 0.9314074074074075,
      "grad_norm": 1.5544954538345337,
      "learning_rate": 1.3876945885841363e-05,
      "loss": 0.7898,
      "step": 6287
    },
    {
      "epoch": 0.9315555555555556,
      "grad_norm": 1.2439019680023193,
      "learning_rate": 1.38472942920682e-05,
      "loss": 1.0442,
      "step": 6288
    },
    {
      "epoch": 0.9317037037037037,
      "grad_norm": 1.3818901777267456,
      "learning_rate": 1.3817642698295034e-05,
      "loss": 1.1079,
      "step": 6289
    },
    {
      "epoch": 0.9318518518518518,
      "grad_norm": 1.4408797025680542,
      "learning_rate": 1.378799110452187e-05,
      "loss": 1.0344,
      "step": 6290
    },
    {
      "epoch": 0.932,
      "grad_norm": 1.2716048955917358,
      "learning_rate": 1.3758339510748702e-05,
      "loss": 1.0635,
      "step": 6291
    },
    {
      "epoch": 0.9321481481481482,
      "grad_norm": 1.6171166896820068,
      "learning_rate": 1.3728687916975538e-05,
      "loss": 1.0549,
      "step": 6292
    },
    {
      "epoch": 0.9322962962962963,
      "grad_norm": 1.4237630367279053,
      "learning_rate": 1.3699036323202372e-05,
      "loss": 0.8729,
      "step": 6293
    },
    {
      "epoch": 0.9324444444444444,
      "grad_norm": 1.8324460983276367,
      "learning_rate": 1.3669384729429208e-05,
      "loss": 0.882,
      "step": 6294
    },
    {
      "epoch": 0.9325925925925926,
      "grad_norm": 1.6489802598953247,
      "learning_rate": 1.3639733135656041e-05,
      "loss": 1.0537,
      "step": 6295
    },
    {
      "epoch": 0.9327407407407408,
      "grad_norm": 1.4294698238372803,
      "learning_rate": 1.3610081541882877e-05,
      "loss": 0.9593,
      "step": 6296
    },
    {
      "epoch": 0.9328888888888889,
      "grad_norm": 1.6796156167984009,
      "learning_rate": 1.358042994810971e-05,
      "loss": 1.1417,
      "step": 6297
    },
    {
      "epoch": 0.933037037037037,
      "grad_norm": 2.3616726398468018,
      "learning_rate": 1.3550778354336546e-05,
      "loss": 1.104,
      "step": 6298
    },
    {
      "epoch": 0.9331851851851852,
      "grad_norm": 1.9400267601013184,
      "learning_rate": 1.352112676056338e-05,
      "loss": 1.1002,
      "step": 6299
    },
    {
      "epoch": 0.9333333333333333,
      "grad_norm": 1.6310405731201172,
      "learning_rate": 1.3491475166790216e-05,
      "loss": 0.9469,
      "step": 6300
    },
    {
      "epoch": 0.9334814814814815,
      "grad_norm": 1.4283984899520874,
      "learning_rate": 1.3461823573017052e-05,
      "loss": 1.0257,
      "step": 6301
    },
    {
      "epoch": 0.9336296296296296,
      "grad_norm": 1.5619159936904907,
      "learning_rate": 1.3432171979243885e-05,
      "loss": 1.0525,
      "step": 6302
    },
    {
      "epoch": 0.9337777777777778,
      "grad_norm": 1.9005515575408936,
      "learning_rate": 1.340252038547072e-05,
      "loss": 1.2217,
      "step": 6303
    },
    {
      "epoch": 0.9339259259259259,
      "grad_norm": 2.2778842449188232,
      "learning_rate": 1.3372868791697555e-05,
      "loss": 1.0038,
      "step": 6304
    },
    {
      "epoch": 0.9340740740740741,
      "grad_norm": 2.3246541023254395,
      "learning_rate": 1.334321719792439e-05,
      "loss": 0.8599,
      "step": 6305
    },
    {
      "epoch": 0.9342222222222222,
      "grad_norm": 1.6047136783599854,
      "learning_rate": 1.3313565604151223e-05,
      "loss": 0.995,
      "step": 6306
    },
    {
      "epoch": 0.9343703703703704,
      "grad_norm": 1.4049116373062134,
      "learning_rate": 1.328391401037806e-05,
      "loss": 1.2271,
      "step": 6307
    },
    {
      "epoch": 0.9345185185185185,
      "grad_norm": 2.139193058013916,
      "learning_rate": 1.3254262416604894e-05,
      "loss": 1.2563,
      "step": 6308
    },
    {
      "epoch": 0.9346666666666666,
      "grad_norm": 2.2409770488739014,
      "learning_rate": 1.322461082283173e-05,
      "loss": 1.1317,
      "step": 6309
    },
    {
      "epoch": 0.9348148148148148,
      "grad_norm": 1.7860808372497559,
      "learning_rate": 1.3194959229058562e-05,
      "loss": 0.943,
      "step": 6310
    },
    {
      "epoch": 0.934962962962963,
      "grad_norm": 1.4653562307357788,
      "learning_rate": 1.3165307635285398e-05,
      "loss": 1.0892,
      "step": 6311
    },
    {
      "epoch": 0.9351111111111111,
      "grad_norm": 2.659821033477783,
      "learning_rate": 1.3135656041512231e-05,
      "loss": 1.0764,
      "step": 6312
    },
    {
      "epoch": 0.9352592592592592,
      "grad_norm": 6.066284656524658,
      "learning_rate": 1.3106004447739067e-05,
      "loss": 1.1168,
      "step": 6313
    },
    {
      "epoch": 0.9354074074074074,
      "grad_norm": 1.884269118309021,
      "learning_rate": 1.3076352853965901e-05,
      "loss": 0.9515,
      "step": 6314
    },
    {
      "epoch": 0.9355555555555556,
      "grad_norm": 2.44565486907959,
      "learning_rate": 1.3046701260192737e-05,
      "loss": 0.9311,
      "step": 6315
    },
    {
      "epoch": 0.9357037037037037,
      "grad_norm": 2.0083768367767334,
      "learning_rate": 1.301704966641957e-05,
      "loss": 0.9074,
      "step": 6316
    },
    {
      "epoch": 0.9358518518518518,
      "grad_norm": 1.552828311920166,
      "learning_rate": 1.2987398072646406e-05,
      "loss": 1.0611,
      "step": 6317
    },
    {
      "epoch": 0.936,
      "grad_norm": 1.6066118478775024,
      "learning_rate": 1.295774647887324e-05,
      "loss": 0.9488,
      "step": 6318
    },
    {
      "epoch": 0.9361481481481482,
      "grad_norm": 2.312607526779175,
      "learning_rate": 1.2928094885100076e-05,
      "loss": 1.0095,
      "step": 6319
    },
    {
      "epoch": 0.9362962962962963,
      "grad_norm": 1.71018648147583,
      "learning_rate": 1.2898443291326909e-05,
      "loss": 1.0762,
      "step": 6320
    },
    {
      "epoch": 0.9364444444444444,
      "grad_norm": 1.7650734186172485,
      "learning_rate": 1.2868791697553745e-05,
      "loss": 1.0212,
      "step": 6321
    },
    {
      "epoch": 0.9365925925925926,
      "grad_norm": 2.5213658809661865,
      "learning_rate": 1.2839140103780579e-05,
      "loss": 0.7357,
      "step": 6322
    },
    {
      "epoch": 0.9367407407407408,
      "grad_norm": 1.4773727655410767,
      "learning_rate": 1.2809488510007415e-05,
      "loss": 1.0724,
      "step": 6323
    },
    {
      "epoch": 0.9368888888888889,
      "grad_norm": 3.468384027481079,
      "learning_rate": 1.2779836916234247e-05,
      "loss": 1.1406,
      "step": 6324
    },
    {
      "epoch": 0.937037037037037,
      "grad_norm": 0.9928172826766968,
      "learning_rate": 1.2750185322461083e-05,
      "loss": 0.8181,
      "step": 6325
    },
    {
      "epoch": 0.9371851851851852,
      "grad_norm": 1.7566633224487305,
      "learning_rate": 1.2720533728687916e-05,
      "loss": 1.0395,
      "step": 6326
    },
    {
      "epoch": 0.9373333333333334,
      "grad_norm": 2.067307233810425,
      "learning_rate": 1.2690882134914752e-05,
      "loss": 1.0468,
      "step": 6327
    },
    {
      "epoch": 0.9374814814814815,
      "grad_norm": 1.3327713012695312,
      "learning_rate": 1.2661230541141586e-05,
      "loss": 0.8396,
      "step": 6328
    },
    {
      "epoch": 0.9376296296296296,
      "grad_norm": 2.0988290309906006,
      "learning_rate": 1.2631578947368422e-05,
      "loss": 0.7821,
      "step": 6329
    },
    {
      "epoch": 0.9377777777777778,
      "grad_norm": 2.2182228565216064,
      "learning_rate": 1.2601927353595255e-05,
      "loss": 0.8587,
      "step": 6330
    },
    {
      "epoch": 0.9379259259259259,
      "grad_norm": 2.2985942363739014,
      "learning_rate": 1.2572275759822091e-05,
      "loss": 0.9524,
      "step": 6331
    },
    {
      "epoch": 0.9380740740740741,
      "grad_norm": 1.6669297218322754,
      "learning_rate": 1.2542624166048925e-05,
      "loss": 1.1466,
      "step": 6332
    },
    {
      "epoch": 0.9382222222222222,
      "grad_norm": 1.1697335243225098,
      "learning_rate": 1.2512972572275761e-05,
      "loss": 0.8193,
      "step": 6333
    },
    {
      "epoch": 0.9383703703703704,
      "grad_norm": 1.3751897811889648,
      "learning_rate": 1.2483320978502596e-05,
      "loss": 1.1782,
      "step": 6334
    },
    {
      "epoch": 0.9385185185185185,
      "grad_norm": 1.8882319927215576,
      "learning_rate": 1.245366938472943e-05,
      "loss": 1.0286,
      "step": 6335
    },
    {
      "epoch": 0.9386666666666666,
      "grad_norm": 1.8155957460403442,
      "learning_rate": 1.2424017790956264e-05,
      "loss": 1.4968,
      "step": 6336
    },
    {
      "epoch": 0.9388148148148148,
      "grad_norm": 1.2545281648635864,
      "learning_rate": 1.23943661971831e-05,
      "loss": 0.9662,
      "step": 6337
    },
    {
      "epoch": 0.938962962962963,
      "grad_norm": 1.09188973903656,
      "learning_rate": 1.2364714603409934e-05,
      "loss": 1.0145,
      "step": 6338
    },
    {
      "epoch": 0.9391111111111111,
      "grad_norm": 1.5590113401412964,
      "learning_rate": 1.2335063009636769e-05,
      "loss": 0.9294,
      "step": 6339
    },
    {
      "epoch": 0.9392592592592592,
      "grad_norm": 1.930653691291809,
      "learning_rate": 1.2305411415863603e-05,
      "loss": 1.0176,
      "step": 6340
    },
    {
      "epoch": 0.9394074074074074,
      "grad_norm": 1.7895466089248657,
      "learning_rate": 1.2275759822090437e-05,
      "loss": 1.1676,
      "step": 6341
    },
    {
      "epoch": 0.9395555555555556,
      "grad_norm": 2.135303020477295,
      "learning_rate": 1.2246108228317273e-05,
      "loss": 1.0209,
      "step": 6342
    },
    {
      "epoch": 0.9397037037037037,
      "grad_norm": 1.345557689666748,
      "learning_rate": 1.2216456634544108e-05,
      "loss": 0.8773,
      "step": 6343
    },
    {
      "epoch": 0.9398518518518518,
      "grad_norm": 1.3942506313323975,
      "learning_rate": 1.2186805040770942e-05,
      "loss": 0.8741,
      "step": 6344
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.980754017829895,
      "learning_rate": 1.2157153446997776e-05,
      "loss": 0.9258,
      "step": 6345
    },
    {
      "epoch": 0.9401481481481482,
      "grad_norm": 1.8694645166397095,
      "learning_rate": 1.212750185322461e-05,
      "loss": 0.9663,
      "step": 6346
    },
    {
      "epoch": 0.9402962962962963,
      "grad_norm": 1.7359546422958374,
      "learning_rate": 1.2097850259451446e-05,
      "loss": 1.0104,
      "step": 6347
    },
    {
      "epoch": 0.9404444444444444,
      "grad_norm": 1.3269741535186768,
      "learning_rate": 1.206819866567828e-05,
      "loss": 0.8816,
      "step": 6348
    },
    {
      "epoch": 0.9405925925925926,
      "grad_norm": 2.214254140853882,
      "learning_rate": 1.2038547071905115e-05,
      "loss": 1.0563,
      "step": 6349
    },
    {
      "epoch": 0.9407407407407408,
      "grad_norm": 1.2962080240249634,
      "learning_rate": 1.200889547813195e-05,
      "loss": 0.9694,
      "step": 6350
    },
    {
      "epoch": 0.9408888888888889,
      "grad_norm": 2.1471807956695557,
      "learning_rate": 1.1979243884358785e-05,
      "loss": 0.9636,
      "step": 6351
    },
    {
      "epoch": 0.941037037037037,
      "grad_norm": 1.5202158689498901,
      "learning_rate": 1.194959229058562e-05,
      "loss": 0.9733,
      "step": 6352
    },
    {
      "epoch": 0.9411851851851852,
      "grad_norm": 2.029224395751953,
      "learning_rate": 1.1919940696812454e-05,
      "loss": 0.8564,
      "step": 6353
    },
    {
      "epoch": 0.9413333333333334,
      "grad_norm": 1.591579556465149,
      "learning_rate": 1.1890289103039288e-05,
      "loss": 0.9636,
      "step": 6354
    },
    {
      "epoch": 0.9414814814814815,
      "grad_norm": 1.2714380025863647,
      "learning_rate": 1.1860637509266123e-05,
      "loss": 0.8864,
      "step": 6355
    },
    {
      "epoch": 0.9416296296296296,
      "grad_norm": 2.172243356704712,
      "learning_rate": 1.1830985915492958e-05,
      "loss": 1.2668,
      "step": 6356
    },
    {
      "epoch": 0.9417777777777778,
      "grad_norm": 1.522011160850525,
      "learning_rate": 1.1801334321719793e-05,
      "loss": 0.9845,
      "step": 6357
    },
    {
      "epoch": 0.9419259259259259,
      "grad_norm": 1.4941858053207397,
      "learning_rate": 1.1771682727946627e-05,
      "loss": 1.0767,
      "step": 6358
    },
    {
      "epoch": 0.9420740740740741,
      "grad_norm": 1.603853464126587,
      "learning_rate": 1.1742031134173461e-05,
      "loss": 1.0759,
      "step": 6359
    },
    {
      "epoch": 0.9422222222222222,
      "grad_norm": 3.413689613342285,
      "learning_rate": 1.1712379540400296e-05,
      "loss": 1.0716,
      "step": 6360
    },
    {
      "epoch": 0.9423703703703704,
      "grad_norm": 2.329671859741211,
      "learning_rate": 1.1682727946627132e-05,
      "loss": 0.7877,
      "step": 6361
    },
    {
      "epoch": 0.9425185185185185,
      "grad_norm": 1.9882824420928955,
      "learning_rate": 1.1653076352853968e-05,
      "loss": 0.8958,
      "step": 6362
    },
    {
      "epoch": 0.9426666666666667,
      "grad_norm": 2.55271577835083,
      "learning_rate": 1.1623424759080802e-05,
      "loss": 1.2704,
      "step": 6363
    },
    {
      "epoch": 0.9428148148148148,
      "grad_norm": 1.7175090312957764,
      "learning_rate": 1.1593773165307636e-05,
      "loss": 0.8766,
      "step": 6364
    },
    {
      "epoch": 0.942962962962963,
      "grad_norm": 1.5329500436782837,
      "learning_rate": 1.156412157153447e-05,
      "loss": 1.2447,
      "step": 6365
    },
    {
      "epoch": 0.9431111111111111,
      "grad_norm": 2.170001745223999,
      "learning_rate": 1.1534469977761307e-05,
      "loss": 0.9401,
      "step": 6366
    },
    {
      "epoch": 0.9432592592592592,
      "grad_norm": 1.2377965450286865,
      "learning_rate": 1.150481838398814e-05,
      "loss": 0.9982,
      "step": 6367
    },
    {
      "epoch": 0.9434074074074074,
      "grad_norm": 2.794631242752075,
      "learning_rate": 1.1475166790214975e-05,
      "loss": 0.8746,
      "step": 6368
    },
    {
      "epoch": 0.9435555555555556,
      "grad_norm": 1.6014835834503174,
      "learning_rate": 1.144551519644181e-05,
      "loss": 0.9898,
      "step": 6369
    },
    {
      "epoch": 0.9437037037037037,
      "grad_norm": 1.6563359498977661,
      "learning_rate": 1.1415863602668644e-05,
      "loss": 1.2244,
      "step": 6370
    },
    {
      "epoch": 0.9438518518518518,
      "grad_norm": 1.6768234968185425,
      "learning_rate": 1.138621200889548e-05,
      "loss": 0.9442,
      "step": 6371
    },
    {
      "epoch": 0.944,
      "grad_norm": 1.246340274810791,
      "learning_rate": 1.1356560415122314e-05,
      "loss": 1.1951,
      "step": 6372
    },
    {
      "epoch": 0.9441481481481482,
      "grad_norm": 1.406167984008789,
      "learning_rate": 1.1326908821349148e-05,
      "loss": 0.6989,
      "step": 6373
    },
    {
      "epoch": 0.9442962962962963,
      "grad_norm": 5.012592315673828,
      "learning_rate": 1.1297257227575983e-05,
      "loss": 0.9495,
      "step": 6374
    },
    {
      "epoch": 0.9444444444444444,
      "grad_norm": 1.3904874324798584,
      "learning_rate": 1.1267605633802817e-05,
      "loss": 1.0901,
      "step": 6375
    },
    {
      "epoch": 0.9445925925925925,
      "grad_norm": 1.3849149942398071,
      "learning_rate": 1.1237954040029653e-05,
      "loss": 1.1584,
      "step": 6376
    },
    {
      "epoch": 0.9447407407407408,
      "grad_norm": 1.42518150806427,
      "learning_rate": 1.1208302446256487e-05,
      "loss": 1.0418,
      "step": 6377
    },
    {
      "epoch": 0.9448888888888889,
      "grad_norm": 2.9414753913879395,
      "learning_rate": 1.1178650852483321e-05,
      "loss": 1.0448,
      "step": 6378
    },
    {
      "epoch": 0.945037037037037,
      "grad_norm": 1.3405108451843262,
      "learning_rate": 1.1148999258710156e-05,
      "loss": 1.059,
      "step": 6379
    },
    {
      "epoch": 0.9451851851851852,
      "grad_norm": 2.4102418422698975,
      "learning_rate": 1.1119347664936992e-05,
      "loss": 0.9772,
      "step": 6380
    },
    {
      "epoch": 0.9453333333333334,
      "grad_norm": 2.4661507606506348,
      "learning_rate": 1.1089696071163826e-05,
      "loss": 1.1724,
      "step": 6381
    },
    {
      "epoch": 0.9454814814814815,
      "grad_norm": 1.4191091060638428,
      "learning_rate": 1.106004447739066e-05,
      "loss": 0.8019,
      "step": 6382
    },
    {
      "epoch": 0.9456296296296296,
      "grad_norm": 1.429807186126709,
      "learning_rate": 1.1030392883617495e-05,
      "loss": 1.0792,
      "step": 6383
    },
    {
      "epoch": 0.9457777777777778,
      "grad_norm": 1.7942218780517578,
      "learning_rate": 1.1000741289844329e-05,
      "loss": 0.9689,
      "step": 6384
    },
    {
      "epoch": 0.945925925925926,
      "grad_norm": 2.545766592025757,
      "learning_rate": 1.0971089696071165e-05,
      "loss": 1.0841,
      "step": 6385
    },
    {
      "epoch": 0.9460740740740741,
      "grad_norm": 1.7510018348693848,
      "learning_rate": 1.0941438102298e-05,
      "loss": 1.0998,
      "step": 6386
    },
    {
      "epoch": 0.9462222222222222,
      "grad_norm": 1.6062695980072021,
      "learning_rate": 1.0911786508524834e-05,
      "loss": 0.9178,
      "step": 6387
    },
    {
      "epoch": 0.9463703703703704,
      "grad_norm": 1.8828139305114746,
      "learning_rate": 1.0882134914751668e-05,
      "loss": 1.0608,
      "step": 6388
    },
    {
      "epoch": 0.9465185185185185,
      "grad_norm": 1.3656530380249023,
      "learning_rate": 1.0852483320978502e-05,
      "loss": 0.8773,
      "step": 6389
    },
    {
      "epoch": 0.9466666666666667,
      "grad_norm": 2.1898579597473145,
      "learning_rate": 1.0822831727205338e-05,
      "loss": 0.9648,
      "step": 6390
    },
    {
      "epoch": 0.9468148148148148,
      "grad_norm": 1.3358759880065918,
      "learning_rate": 1.0793180133432172e-05,
      "loss": 1.0784,
      "step": 6391
    },
    {
      "epoch": 0.946962962962963,
      "grad_norm": 2.2030200958251953,
      "learning_rate": 1.0763528539659007e-05,
      "loss": 1.0226,
      "step": 6392
    },
    {
      "epoch": 0.9471111111111111,
      "grad_norm": 1.8959722518920898,
      "learning_rate": 1.0733876945885841e-05,
      "loss": 1.0063,
      "step": 6393
    },
    {
      "epoch": 0.9472592592592592,
      "grad_norm": 1.3742667436599731,
      "learning_rate": 1.0704225352112677e-05,
      "loss": 1.0494,
      "step": 6394
    },
    {
      "epoch": 0.9474074074074074,
      "grad_norm": 1.5060292482376099,
      "learning_rate": 1.0674573758339511e-05,
      "loss": 0.9241,
      "step": 6395
    },
    {
      "epoch": 0.9475555555555556,
      "grad_norm": 1.7580757141113281,
      "learning_rate": 1.0644922164566346e-05,
      "loss": 1.1803,
      "step": 6396
    },
    {
      "epoch": 0.9477037037037037,
      "grad_norm": 2.092602252960205,
      "learning_rate": 1.061527057079318e-05,
      "loss": 1.1633,
      "step": 6397
    },
    {
      "epoch": 0.9478518518518518,
      "grad_norm": 2.8467867374420166,
      "learning_rate": 1.0585618977020014e-05,
      "loss": 1.1381,
      "step": 6398
    },
    {
      "epoch": 0.948,
      "grad_norm": 1.915920376777649,
      "learning_rate": 1.055596738324685e-05,
      "loss": 0.9824,
      "step": 6399
    },
    {
      "epoch": 0.9481481481481482,
      "grad_norm": 1.6160054206848145,
      "learning_rate": 1.0526315789473684e-05,
      "loss": 0.9829,
      "step": 6400
    },
    {
      "epoch": 0.9482962962962963,
      "grad_norm": 1.2259273529052734,
      "learning_rate": 1.0496664195700519e-05,
      "loss": 1.0014,
      "step": 6401
    },
    {
      "epoch": 0.9484444444444444,
      "grad_norm": 1.5422176122665405,
      "learning_rate": 1.0467012601927353e-05,
      "loss": 0.8151,
      "step": 6402
    },
    {
      "epoch": 0.9485925925925925,
      "grad_norm": 2.2120344638824463,
      "learning_rate": 1.0437361008154187e-05,
      "loss": 1.3064,
      "step": 6403
    },
    {
      "epoch": 0.9487407407407408,
      "grad_norm": 1.8899518251419067,
      "learning_rate": 1.0407709414381023e-05,
      "loss": 1.0727,
      "step": 6404
    },
    {
      "epoch": 0.9488888888888889,
      "grad_norm": 2.15838885307312,
      "learning_rate": 1.037805782060786e-05,
      "loss": 1.0804,
      "step": 6405
    },
    {
      "epoch": 0.949037037037037,
      "grad_norm": 2.040877342224121,
      "learning_rate": 1.0348406226834694e-05,
      "loss": 0.8583,
      "step": 6406
    },
    {
      "epoch": 0.9491851851851851,
      "grad_norm": 1.3041592836380005,
      "learning_rate": 1.0318754633061528e-05,
      "loss": 1.0614,
      "step": 6407
    },
    {
      "epoch": 0.9493333333333334,
      "grad_norm": 3.521601676940918,
      "learning_rate": 1.0289103039288362e-05,
      "loss": 0.7696,
      "step": 6408
    },
    {
      "epoch": 0.9494814814814815,
      "grad_norm": 2.875967264175415,
      "learning_rate": 1.0259451445515198e-05,
      "loss": 0.9629,
      "step": 6409
    },
    {
      "epoch": 0.9496296296296296,
      "grad_norm": 3.0974297523498535,
      "learning_rate": 1.0229799851742032e-05,
      "loss": 0.7914,
      "step": 6410
    },
    {
      "epoch": 0.9497777777777778,
      "grad_norm": 3.428769588470459,
      "learning_rate": 1.0200148257968867e-05,
      "loss": 0.8564,
      "step": 6411
    },
    {
      "epoch": 0.949925925925926,
      "grad_norm": 1.9870706796646118,
      "learning_rate": 1.0170496664195701e-05,
      "loss": 1.0374,
      "step": 6412
    },
    {
      "epoch": 0.9500740740740741,
      "grad_norm": 1.303742527961731,
      "learning_rate": 1.0140845070422535e-05,
      "loss": 0.8516,
      "step": 6413
    },
    {
      "epoch": 0.9502222222222222,
      "grad_norm": 1.4214708805084229,
      "learning_rate": 1.0111193476649371e-05,
      "loss": 0.8222,
      "step": 6414
    },
    {
      "epoch": 0.9503703703703704,
      "grad_norm": 3.44465708732605,
      "learning_rate": 1.0081541882876206e-05,
      "loss": 1.1527,
      "step": 6415
    },
    {
      "epoch": 0.9505185185185185,
      "grad_norm": 1.6411240100860596,
      "learning_rate": 1.005189028910304e-05,
      "loss": 1.0337,
      "step": 6416
    },
    {
      "epoch": 0.9506666666666667,
      "grad_norm": 1.5941029787063599,
      "learning_rate": 1.0022238695329874e-05,
      "loss": 0.9292,
      "step": 6417
    },
    {
      "epoch": 0.9508148148148148,
      "grad_norm": 1.6715425252914429,
      "learning_rate": 9.992587101556709e-06,
      "loss": 0.9052,
      "step": 6418
    },
    {
      "epoch": 0.950962962962963,
      "grad_norm": 1.9611213207244873,
      "learning_rate": 9.962935507783545e-06,
      "loss": 1.0738,
      "step": 6419
    },
    {
      "epoch": 0.9511111111111111,
      "grad_norm": 1.359175682067871,
      "learning_rate": 9.933283914010379e-06,
      "loss": 1.0809,
      "step": 6420
    },
    {
      "epoch": 0.9512592592592592,
      "grad_norm": 31.894559860229492,
      "learning_rate": 9.903632320237213e-06,
      "loss": 0.8927,
      "step": 6421
    },
    {
      "epoch": 0.9514074074074074,
      "grad_norm": 2.469003915786743,
      "learning_rate": 9.873980726464047e-06,
      "loss": 1.1294,
      "step": 6422
    },
    {
      "epoch": 0.9515555555555556,
      "grad_norm": 1.718002200126648,
      "learning_rate": 9.844329132690883e-06,
      "loss": 1.1614,
      "step": 6423
    },
    {
      "epoch": 0.9517037037037037,
      "grad_norm": 1.6662921905517578,
      "learning_rate": 9.814677538917718e-06,
      "loss": 0.952,
      "step": 6424
    },
    {
      "epoch": 0.9518518518518518,
      "grad_norm": 1.274985909461975,
      "learning_rate": 9.785025945144552e-06,
      "loss": 0.9689,
      "step": 6425
    },
    {
      "epoch": 0.952,
      "grad_norm": 1.6987491846084595,
      "learning_rate": 9.755374351371386e-06,
      "loss": 0.9865,
      "step": 6426
    },
    {
      "epoch": 0.9521481481481482,
      "grad_norm": 1.5471701622009277,
      "learning_rate": 9.72572275759822e-06,
      "loss": 1.0109,
      "step": 6427
    },
    {
      "epoch": 0.9522962962962963,
      "grad_norm": 1.410084843635559,
      "learning_rate": 9.696071163825057e-06,
      "loss": 1.0172,
      "step": 6428
    },
    {
      "epoch": 0.9524444444444444,
      "grad_norm": 2.285048246383667,
      "learning_rate": 9.666419570051891e-06,
      "loss": 0.8758,
      "step": 6429
    },
    {
      "epoch": 0.9525925925925925,
      "grad_norm": 1.4201016426086426,
      "learning_rate": 9.636767976278725e-06,
      "loss": 1.2978,
      "step": 6430
    },
    {
      "epoch": 0.9527407407407408,
      "grad_norm": 1.587431788444519,
      "learning_rate": 9.60711638250556e-06,
      "loss": 1.215,
      "step": 6431
    },
    {
      "epoch": 0.9528888888888889,
      "grad_norm": 1.606855034828186,
      "learning_rate": 9.577464788732394e-06,
      "loss": 1.0183,
      "step": 6432
    },
    {
      "epoch": 0.953037037037037,
      "grad_norm": 2.1990363597869873,
      "learning_rate": 9.54781319495923e-06,
      "loss": 0.9638,
      "step": 6433
    },
    {
      "epoch": 0.9531851851851851,
      "grad_norm": 2.6111652851104736,
      "learning_rate": 9.518161601186064e-06,
      "loss": 0.7105,
      "step": 6434
    },
    {
      "epoch": 0.9533333333333334,
      "grad_norm": 1.7862600088119507,
      "learning_rate": 9.488510007412898e-06,
      "loss": 0.9702,
      "step": 6435
    },
    {
      "epoch": 0.9534814814814815,
      "grad_norm": 1.4127826690673828,
      "learning_rate": 9.458858413639733e-06,
      "loss": 0.9044,
      "step": 6436
    },
    {
      "epoch": 0.9536296296296296,
      "grad_norm": 2.47554087638855,
      "learning_rate": 9.429206819866569e-06,
      "loss": 1.1324,
      "step": 6437
    },
    {
      "epoch": 0.9537777777777777,
      "grad_norm": 1.462254524230957,
      "learning_rate": 9.399555226093403e-06,
      "loss": 0.9682,
      "step": 6438
    },
    {
      "epoch": 0.953925925925926,
      "grad_norm": 1.5132200717926025,
      "learning_rate": 9.369903632320237e-06,
      "loss": 1.1639,
      "step": 6439
    },
    {
      "epoch": 0.9540740740740741,
      "grad_norm": 1.4542734622955322,
      "learning_rate": 9.340252038547072e-06,
      "loss": 1.045,
      "step": 6440
    },
    {
      "epoch": 0.9542222222222222,
      "grad_norm": 1.4217482805252075,
      "learning_rate": 9.310600444773906e-06,
      "loss": 0.9905,
      "step": 6441
    },
    {
      "epoch": 0.9543703703703704,
      "grad_norm": 2.0644896030426025,
      "learning_rate": 9.280948851000742e-06,
      "loss": 0.958,
      "step": 6442
    },
    {
      "epoch": 0.9545185185185185,
      "grad_norm": 1.2777775526046753,
      "learning_rate": 9.251297257227576e-06,
      "loss": 0.9854,
      "step": 6443
    },
    {
      "epoch": 0.9546666666666667,
      "grad_norm": 1.7243410348892212,
      "learning_rate": 9.22164566345441e-06,
      "loss": 1.0892,
      "step": 6444
    },
    {
      "epoch": 0.9548148148148148,
      "grad_norm": 1.822312593460083,
      "learning_rate": 9.191994069681246e-06,
      "loss": 0.9043,
      "step": 6445
    },
    {
      "epoch": 0.954962962962963,
      "grad_norm": 2.3563039302825928,
      "learning_rate": 9.16234247590808e-06,
      "loss": 1.0392,
      "step": 6446
    },
    {
      "epoch": 0.9551111111111111,
      "grad_norm": 1.7175642251968384,
      "learning_rate": 9.132690882134915e-06,
      "loss": 1.0496,
      "step": 6447
    },
    {
      "epoch": 0.9552592592592593,
      "grad_norm": 2.303910970687866,
      "learning_rate": 9.103039288361751e-06,
      "loss": 1.0964,
      "step": 6448
    },
    {
      "epoch": 0.9554074074074074,
      "grad_norm": 1.6031699180603027,
      "learning_rate": 9.073387694588585e-06,
      "loss": 0.9436,
      "step": 6449
    },
    {
      "epoch": 0.9555555555555556,
      "grad_norm": 1.137147307395935,
      "learning_rate": 9.04373610081542e-06,
      "loss": 0.7867,
      "step": 6450
    },
    {
      "epoch": 0.9557037037037037,
      "grad_norm": 1.563591480255127,
      "learning_rate": 9.014084507042254e-06,
      "loss": 0.8269,
      "step": 6451
    },
    {
      "epoch": 0.9558518518518518,
      "grad_norm": 2.2050209045410156,
      "learning_rate": 8.98443291326909e-06,
      "loss": 1.0443,
      "step": 6452
    },
    {
      "epoch": 0.956,
      "grad_norm": 2.460078001022339,
      "learning_rate": 8.954781319495924e-06,
      "loss": 0.7234,
      "step": 6453
    },
    {
      "epoch": 0.9561481481481482,
      "grad_norm": 2.6189513206481934,
      "learning_rate": 8.925129725722758e-06,
      "loss": 0.9367,
      "step": 6454
    },
    {
      "epoch": 0.9562962962962963,
      "grad_norm": 1.8325825929641724,
      "learning_rate": 8.895478131949593e-06,
      "loss": 0.8708,
      "step": 6455
    },
    {
      "epoch": 0.9564444444444444,
      "grad_norm": 1.6920523643493652,
      "learning_rate": 8.865826538176427e-06,
      "loss": 0.8271,
      "step": 6456
    },
    {
      "epoch": 0.9565925925925925,
      "grad_norm": 5.583585262298584,
      "learning_rate": 8.836174944403263e-06,
      "loss": 1.0079,
      "step": 6457
    },
    {
      "epoch": 0.9567407407407408,
      "grad_norm": 2.426647901535034,
      "learning_rate": 8.806523350630097e-06,
      "loss": 0.9096,
      "step": 6458
    },
    {
      "epoch": 0.9568888888888889,
      "grad_norm": 3.149855852127075,
      "learning_rate": 8.776871756856932e-06,
      "loss": 0.8714,
      "step": 6459
    },
    {
      "epoch": 0.957037037037037,
      "grad_norm": 1.0600141286849976,
      "learning_rate": 8.747220163083766e-06,
      "loss": 0.8498,
      "step": 6460
    },
    {
      "epoch": 0.9571851851851851,
      "grad_norm": 1.5460138320922852,
      "learning_rate": 8.7175685693106e-06,
      "loss": 1.2422,
      "step": 6461
    },
    {
      "epoch": 0.9573333333333334,
      "grad_norm": 2.473261833190918,
      "learning_rate": 8.687916975537436e-06,
      "loss": 0.9405,
      "step": 6462
    },
    {
      "epoch": 0.9574814814814815,
      "grad_norm": 1.4824777841567993,
      "learning_rate": 8.65826538176427e-06,
      "loss": 0.8999,
      "step": 6463
    },
    {
      "epoch": 0.9576296296296296,
      "grad_norm": 2.313084602355957,
      "learning_rate": 8.628613787991105e-06,
      "loss": 1.0752,
      "step": 6464
    },
    {
      "epoch": 0.9577777777777777,
      "grad_norm": 4.357772350311279,
      "learning_rate": 8.598962194217939e-06,
      "loss": 0.7528,
      "step": 6465
    },
    {
      "epoch": 0.957925925925926,
      "grad_norm": 1.278823733329773,
      "learning_rate": 8.569310600444775e-06,
      "loss": 0.9011,
      "step": 6466
    },
    {
      "epoch": 0.9580740740740741,
      "grad_norm": 1.3825360536575317,
      "learning_rate": 8.53965900667161e-06,
      "loss": 1.2506,
      "step": 6467
    },
    {
      "epoch": 0.9582222222222222,
      "grad_norm": 1.978281021118164,
      "learning_rate": 8.510007412898444e-06,
      "loss": 0.9997,
      "step": 6468
    },
    {
      "epoch": 0.9583703703703703,
      "grad_norm": 1.4772392511367798,
      "learning_rate": 8.480355819125278e-06,
      "loss": 0.9156,
      "step": 6469
    },
    {
      "epoch": 0.9585185185185185,
      "grad_norm": 1.7920863628387451,
      "learning_rate": 8.450704225352112e-06,
      "loss": 0.8526,
      "step": 6470
    },
    {
      "epoch": 0.9586666666666667,
      "grad_norm": 1.5702614784240723,
      "learning_rate": 8.421052631578948e-06,
      "loss": 0.8537,
      "step": 6471
    },
    {
      "epoch": 0.9588148148148148,
      "grad_norm": 2.078935146331787,
      "learning_rate": 8.391401037805783e-06,
      "loss": 0.877,
      "step": 6472
    },
    {
      "epoch": 0.958962962962963,
      "grad_norm": 1.4247301816940308,
      "learning_rate": 8.361749444032617e-06,
      "loss": 0.9224,
      "step": 6473
    },
    {
      "epoch": 0.9591111111111111,
      "grad_norm": 2.6739819049835205,
      "learning_rate": 8.332097850259451e-06,
      "loss": 1.047,
      "step": 6474
    },
    {
      "epoch": 0.9592592592592593,
      "grad_norm": 7.315777778625488,
      "learning_rate": 8.302446256486285e-06,
      "loss": 0.881,
      "step": 6475
    },
    {
      "epoch": 0.9594074074074074,
      "grad_norm": 1.5592920780181885,
      "learning_rate": 8.272794662713121e-06,
      "loss": 1.1478,
      "step": 6476
    },
    {
      "epoch": 0.9595555555555556,
      "grad_norm": 1.735410451889038,
      "learning_rate": 8.243143068939956e-06,
      "loss": 1.0698,
      "step": 6477
    },
    {
      "epoch": 0.9597037037037037,
      "grad_norm": 1.1631207466125488,
      "learning_rate": 8.21349147516679e-06,
      "loss": 0.9357,
      "step": 6478
    },
    {
      "epoch": 0.9598518518518518,
      "grad_norm": 1.8847887516021729,
      "learning_rate": 8.183839881393624e-06,
      "loss": 1.1412,
      "step": 6479
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.4948129653930664,
      "learning_rate": 8.15418828762046e-06,
      "loss": 1.012,
      "step": 6480
    },
    {
      "epoch": 0.9601481481481482,
      "grad_norm": 1.49558424949646,
      "learning_rate": 8.124536693847295e-06,
      "loss": 0.9518,
      "step": 6481
    },
    {
      "epoch": 0.9602962962962963,
      "grad_norm": 1.9432427883148193,
      "learning_rate": 8.094885100074129e-06,
      "loss": 0.8645,
      "step": 6482
    },
    {
      "epoch": 0.9604444444444444,
      "grad_norm": 1.3539835214614868,
      "learning_rate": 8.065233506300963e-06,
      "loss": 0.814,
      "step": 6483
    },
    {
      "epoch": 0.9605925925925926,
      "grad_norm": 2.094672918319702,
      "learning_rate": 8.035581912527797e-06,
      "loss": 0.9301,
      "step": 6484
    },
    {
      "epoch": 0.9607407407407408,
      "grad_norm": 2.508594512939453,
      "learning_rate": 8.005930318754633e-06,
      "loss": 1.0625,
      "step": 6485
    },
    {
      "epoch": 0.9608888888888889,
      "grad_norm": 1.4947229623794556,
      "learning_rate": 7.976278724981468e-06,
      "loss": 0.9313,
      "step": 6486
    },
    {
      "epoch": 0.961037037037037,
      "grad_norm": 1.365053415298462,
      "learning_rate": 7.946627131208304e-06,
      "loss": 0.9352,
      "step": 6487
    },
    {
      "epoch": 0.9611851851851851,
      "grad_norm": 1.3837653398513794,
      "learning_rate": 7.916975537435138e-06,
      "loss": 0.7481,
      "step": 6488
    },
    {
      "epoch": 0.9613333333333334,
      "grad_norm": 2.1290810108184814,
      "learning_rate": 7.887323943661972e-06,
      "loss": 1.0015,
      "step": 6489
    },
    {
      "epoch": 0.9614814814814815,
      "grad_norm": 1.3843375444412231,
      "learning_rate": 7.857672349888807e-06,
      "loss": 0.8474,
      "step": 6490
    },
    {
      "epoch": 0.9616296296296296,
      "grad_norm": 1.7854416370391846,
      "learning_rate": 7.828020756115643e-06,
      "loss": 0.819,
      "step": 6491
    },
    {
      "epoch": 0.9617777777777777,
      "grad_norm": 1.7904064655303955,
      "learning_rate": 7.798369162342477e-06,
      "loss": 0.8276,
      "step": 6492
    },
    {
      "epoch": 0.961925925925926,
      "grad_norm": 8.385926246643066,
      "learning_rate": 7.768717568569311e-06,
      "loss": 0.6376,
      "step": 6493
    },
    {
      "epoch": 0.9620740740740741,
      "grad_norm": 1.8393787145614624,
      "learning_rate": 7.739065974796146e-06,
      "loss": 0.8887,
      "step": 6494
    },
    {
      "epoch": 0.9622222222222222,
      "grad_norm": 1.6400352716445923,
      "learning_rate": 7.709414381022981e-06,
      "loss": 0.9952,
      "step": 6495
    },
    {
      "epoch": 0.9623703703703703,
      "grad_norm": 1.658842921257019,
      "learning_rate": 7.679762787249816e-06,
      "loss": 1.0377,
      "step": 6496
    },
    {
      "epoch": 0.9625185185185186,
      "grad_norm": 2.5278728008270264,
      "learning_rate": 7.65011119347665e-06,
      "loss": 0.8351,
      "step": 6497
    },
    {
      "epoch": 0.9626666666666667,
      "grad_norm": 1.5374990701675415,
      "learning_rate": 7.620459599703484e-06,
      "loss": 0.8171,
      "step": 6498
    },
    {
      "epoch": 0.9628148148148148,
      "grad_norm": 2.5066142082214355,
      "learning_rate": 7.5908080059303195e-06,
      "loss": 1.0571,
      "step": 6499
    },
    {
      "epoch": 0.9629629629629629,
      "grad_norm": 1.5100359916687012,
      "learning_rate": 7.561156412157154e-06,
      "loss": 1.1588,
      "step": 6500
    },
    {
      "epoch": 0.9631111111111111,
      "grad_norm": 2.0300960540771484,
      "learning_rate": 7.531504818383989e-06,
      "loss": 1.0068,
      "step": 6501
    },
    {
      "epoch": 0.9632592592592593,
      "grad_norm": 1.3035870790481567,
      "learning_rate": 7.501853224610823e-06,
      "loss": 0.9973,
      "step": 6502
    },
    {
      "epoch": 0.9634074074074074,
      "grad_norm": 2.154204845428467,
      "learning_rate": 7.472201630837658e-06,
      "loss": 0.7465,
      "step": 6503
    },
    {
      "epoch": 0.9635555555555556,
      "grad_norm": 1.118698000907898,
      "learning_rate": 7.442550037064493e-06,
      "loss": 1.0132,
      "step": 6504
    },
    {
      "epoch": 0.9637037037037037,
      "grad_norm": 1.9799890518188477,
      "learning_rate": 7.412898443291327e-06,
      "loss": 0.9759,
      "step": 6505
    },
    {
      "epoch": 0.9638518518518518,
      "grad_norm": 1.299014925956726,
      "learning_rate": 7.383246849518162e-06,
      "loss": 0.9128,
      "step": 6506
    },
    {
      "epoch": 0.964,
      "grad_norm": 1.2748594284057617,
      "learning_rate": 7.3535952557449964e-06,
      "loss": 0.9606,
      "step": 6507
    },
    {
      "epoch": 0.9641481481481482,
      "grad_norm": 1.5896930694580078,
      "learning_rate": 7.3239436619718316e-06,
      "loss": 0.7978,
      "step": 6508
    },
    {
      "epoch": 0.9642962962962963,
      "grad_norm": 1.6602082252502441,
      "learning_rate": 7.294292068198666e-06,
      "loss": 0.8388,
      "step": 6509
    },
    {
      "epoch": 0.9644444444444444,
      "grad_norm": 4.57015323638916,
      "learning_rate": 7.264640474425501e-06,
      "loss": 0.8386,
      "step": 6510
    },
    {
      "epoch": 0.9645925925925926,
      "grad_norm": 1.690658688545227,
      "learning_rate": 7.234988880652335e-06,
      "loss": 0.9881,
      "step": 6511
    },
    {
      "epoch": 0.9647407407407408,
      "grad_norm": 2.0589194297790527,
      "learning_rate": 7.20533728687917e-06,
      "loss": 0.7598,
      "step": 6512
    },
    {
      "epoch": 0.9648888888888889,
      "grad_norm": 1.7922649383544922,
      "learning_rate": 7.175685693106005e-06,
      "loss": 0.9423,
      "step": 6513
    },
    {
      "epoch": 0.965037037037037,
      "grad_norm": 1.9171276092529297,
      "learning_rate": 7.146034099332839e-06,
      "loss": 0.9521,
      "step": 6514
    },
    {
      "epoch": 0.9651851851851851,
      "grad_norm": 1.361095666885376,
      "learning_rate": 7.116382505559674e-06,
      "loss": 0.9834,
      "step": 6515
    },
    {
      "epoch": 0.9653333333333334,
      "grad_norm": 2.308819532394409,
      "learning_rate": 7.0867309117865085e-06,
      "loss": 1.1278,
      "step": 6516
    },
    {
      "epoch": 0.9654814814814815,
      "grad_norm": 1.7899411916732788,
      "learning_rate": 7.057079318013344e-06,
      "loss": 1.1066,
      "step": 6517
    },
    {
      "epoch": 0.9656296296296296,
      "grad_norm": 1.8420344591140747,
      "learning_rate": 7.027427724240178e-06,
      "loss": 1.0559,
      "step": 6518
    },
    {
      "epoch": 0.9657777777777777,
      "grad_norm": 2.107468605041504,
      "learning_rate": 6.997776130467012e-06,
      "loss": 0.8807,
      "step": 6519
    },
    {
      "epoch": 0.965925925925926,
      "grad_norm": 2.2763946056365967,
      "learning_rate": 6.968124536693847e-06,
      "loss": 0.9905,
      "step": 6520
    },
    {
      "epoch": 0.9660740740740741,
      "grad_norm": 2.3293261528015137,
      "learning_rate": 6.938472942920682e-06,
      "loss": 1.0107,
      "step": 6521
    },
    {
      "epoch": 0.9662222222222222,
      "grad_norm": 2.9471616744995117,
      "learning_rate": 6.908821349147517e-06,
      "loss": 0.9852,
      "step": 6522
    },
    {
      "epoch": 0.9663703703703703,
      "grad_norm": 1.65193772315979,
      "learning_rate": 6.879169755374351e-06,
      "loss": 0.9008,
      "step": 6523
    },
    {
      "epoch": 0.9665185185185186,
      "grad_norm": 1.3680131435394287,
      "learning_rate": 6.849518161601186e-06,
      "loss": 0.8921,
      "step": 6524
    },
    {
      "epoch": 0.9666666666666667,
      "grad_norm": 1.1072235107421875,
      "learning_rate": 6.8198665678280205e-06,
      "loss": 1.0373,
      "step": 6525
    },
    {
      "epoch": 0.9668148148148148,
      "grad_norm": 1.5350090265274048,
      "learning_rate": 6.790214974054855e-06,
      "loss": 0.8268,
      "step": 6526
    },
    {
      "epoch": 0.9669629629629629,
      "grad_norm": 1.4053906202316284,
      "learning_rate": 6.76056338028169e-06,
      "loss": 0.8095,
      "step": 6527
    },
    {
      "epoch": 0.9671111111111111,
      "grad_norm": 2.2893593311309814,
      "learning_rate": 6.730911786508526e-06,
      "loss": 1.1104,
      "step": 6528
    },
    {
      "epoch": 0.9672592592592593,
      "grad_norm": 1.2980014085769653,
      "learning_rate": 6.70126019273536e-06,
      "loss": 0.8806,
      "step": 6529
    },
    {
      "epoch": 0.9674074074074074,
      "grad_norm": 2.058032989501953,
      "learning_rate": 6.671608598962195e-06,
      "loss": 1.0578,
      "step": 6530
    },
    {
      "epoch": 0.9675555555555555,
      "grad_norm": 2.80718994140625,
      "learning_rate": 6.64195700518903e-06,
      "loss": 0.8415,
      "step": 6531
    },
    {
      "epoch": 0.9677037037037037,
      "grad_norm": 2.3966293334960938,
      "learning_rate": 6.612305411415865e-06,
      "loss": 0.9529,
      "step": 6532
    },
    {
      "epoch": 0.9678518518518519,
      "grad_norm": 1.762076735496521,
      "learning_rate": 6.582653817642699e-06,
      "loss": 1.025,
      "step": 6533
    },
    {
      "epoch": 0.968,
      "grad_norm": 1.6157422065734863,
      "learning_rate": 6.5530022238695334e-06,
      "loss": 0.8749,
      "step": 6534
    },
    {
      "epoch": 0.9681481481481482,
      "grad_norm": 1.8800804615020752,
      "learning_rate": 6.5233506300963686e-06,
      "loss": 1.1532,
      "step": 6535
    },
    {
      "epoch": 0.9682962962962963,
      "grad_norm": 3.605458974838257,
      "learning_rate": 6.493699036323203e-06,
      "loss": 0.9089,
      "step": 6536
    },
    {
      "epoch": 0.9684444444444444,
      "grad_norm": 1.5120714902877808,
      "learning_rate": 6.464047442550038e-06,
      "loss": 1.0531,
      "step": 6537
    },
    {
      "epoch": 0.9685925925925926,
      "grad_norm": 2.1298978328704834,
      "learning_rate": 6.434395848776872e-06,
      "loss": 1.0819,
      "step": 6538
    },
    {
      "epoch": 0.9687407407407408,
      "grad_norm": 1.5408835411071777,
      "learning_rate": 6.4047442550037074e-06,
      "loss": 1.3253,
      "step": 6539
    },
    {
      "epoch": 0.9688888888888889,
      "grad_norm": 3.161810874938965,
      "learning_rate": 6.375092661230542e-06,
      "loss": 1.0546,
      "step": 6540
    },
    {
      "epoch": 0.969037037037037,
      "grad_norm": 5.96724271774292,
      "learning_rate": 6.345441067457376e-06,
      "loss": 0.8363,
      "step": 6541
    },
    {
      "epoch": 0.9691851851851851,
      "grad_norm": 1.551337718963623,
      "learning_rate": 6.315789473684211e-06,
      "loss": 0.7753,
      "step": 6542
    },
    {
      "epoch": 0.9693333333333334,
      "grad_norm": 2.6459968090057373,
      "learning_rate": 6.2861378799110455e-06,
      "loss": 0.8285,
      "step": 6543
    },
    {
      "epoch": 0.9694814814814815,
      "grad_norm": 1.5539460182189941,
      "learning_rate": 6.256486286137881e-06,
      "loss": 1.2449,
      "step": 6544
    },
    {
      "epoch": 0.9696296296296296,
      "grad_norm": 2.9508183002471924,
      "learning_rate": 6.226834692364715e-06,
      "loss": 0.9232,
      "step": 6545
    },
    {
      "epoch": 0.9697777777777777,
      "grad_norm": 1.4745116233825684,
      "learning_rate": 6.19718309859155e-06,
      "loss": 0.8183,
      "step": 6546
    },
    {
      "epoch": 0.969925925925926,
      "grad_norm": 1.5062710046768188,
      "learning_rate": 6.167531504818384e-06,
      "loss": 0.8961,
      "step": 6547
    },
    {
      "epoch": 0.9700740740740741,
      "grad_norm": 1.6228721141815186,
      "learning_rate": 6.137879911045219e-06,
      "loss": 1.0116,
      "step": 6548
    },
    {
      "epoch": 0.9702222222222222,
      "grad_norm": 1.65943443775177,
      "learning_rate": 6.108228317272054e-06,
      "loss": 0.8478,
      "step": 6549
    },
    {
      "epoch": 0.9703703703703703,
      "grad_norm": 3.417999744415283,
      "learning_rate": 6.078576723498888e-06,
      "loss": 0.925,
      "step": 6550
    },
    {
      "epoch": 0.9705185185185186,
      "grad_norm": 1.63390052318573,
      "learning_rate": 6.048925129725723e-06,
      "loss": 1.072,
      "step": 6551
    },
    {
      "epoch": 0.9706666666666667,
      "grad_norm": 1.3300889730453491,
      "learning_rate": 6.0192735359525575e-06,
      "loss": 0.9626,
      "step": 6552
    },
    {
      "epoch": 0.9708148148148148,
      "grad_norm": 1.8411054611206055,
      "learning_rate": 5.989621942179393e-06,
      "loss": 1.0189,
      "step": 6553
    },
    {
      "epoch": 0.9709629629629629,
      "grad_norm": 2.7753472328186035,
      "learning_rate": 5.959970348406227e-06,
      "loss": 0.9686,
      "step": 6554
    },
    {
      "epoch": 0.9711111111111111,
      "grad_norm": 2.478764295578003,
      "learning_rate": 5.930318754633061e-06,
      "loss": 0.8573,
      "step": 6555
    },
    {
      "epoch": 0.9712592592592593,
      "grad_norm": 1.5853458642959595,
      "learning_rate": 5.900667160859896e-06,
      "loss": 0.7584,
      "step": 6556
    },
    {
      "epoch": 0.9714074074074074,
      "grad_norm": 1.3515368700027466,
      "learning_rate": 5.871015567086731e-06,
      "loss": 1.1072,
      "step": 6557
    },
    {
      "epoch": 0.9715555555555555,
      "grad_norm": 1.7907657623291016,
      "learning_rate": 5.841363973313566e-06,
      "loss": 0.9754,
      "step": 6558
    },
    {
      "epoch": 0.9717037037037037,
      "grad_norm": 3.0642828941345215,
      "learning_rate": 5.811712379540401e-06,
      "loss": 0.8858,
      "step": 6559
    },
    {
      "epoch": 0.9718518518518519,
      "grad_norm": 1.4035335779190063,
      "learning_rate": 5.782060785767235e-06,
      "loss": 1.2295,
      "step": 6560
    },
    {
      "epoch": 0.972,
      "grad_norm": 1.6747664213180542,
      "learning_rate": 5.75240919199407e-06,
      "loss": 0.9296,
      "step": 6561
    },
    {
      "epoch": 0.9721481481481481,
      "grad_norm": 2.323160409927368,
      "learning_rate": 5.722757598220905e-06,
      "loss": 1.0252,
      "step": 6562
    },
    {
      "epoch": 0.9722962962962963,
      "grad_norm": 1.5858911275863647,
      "learning_rate": 5.69310600444774e-06,
      "loss": 0.7889,
      "step": 6563
    },
    {
      "epoch": 0.9724444444444444,
      "grad_norm": 1.511087417602539,
      "learning_rate": 5.663454410674574e-06,
      "loss": 1.0915,
      "step": 6564
    },
    {
      "epoch": 0.9725925925925926,
      "grad_norm": 1.9369533061981201,
      "learning_rate": 5.6338028169014084e-06,
      "loss": 0.9701,
      "step": 6565
    },
    {
      "epoch": 0.9727407407407408,
      "grad_norm": 1.9651539325714111,
      "learning_rate": 5.604151223128244e-06,
      "loss": 1.0313,
      "step": 6566
    },
    {
      "epoch": 0.9728888888888889,
      "grad_norm": 1.8601475954055786,
      "learning_rate": 5.574499629355078e-06,
      "loss": 0.8278,
      "step": 6567
    },
    {
      "epoch": 0.973037037037037,
      "grad_norm": 2.7122087478637695,
      "learning_rate": 5.544848035581913e-06,
      "loss": 0.9094,
      "step": 6568
    },
    {
      "epoch": 0.9731851851851852,
      "grad_norm": 1.3325860500335693,
      "learning_rate": 5.515196441808747e-06,
      "loss": 1.1443,
      "step": 6569
    },
    {
      "epoch": 0.9733333333333334,
      "grad_norm": 3.21024489402771,
      "learning_rate": 5.4855448480355825e-06,
      "loss": 0.8182,
      "step": 6570
    },
    {
      "epoch": 0.9734814814814815,
      "grad_norm": 1.379136562347412,
      "learning_rate": 5.455893254262417e-06,
      "loss": 1.0857,
      "step": 6571
    },
    {
      "epoch": 0.9736296296296296,
      "grad_norm": 1.7300907373428345,
      "learning_rate": 5.426241660489251e-06,
      "loss": 1.0925,
      "step": 6572
    },
    {
      "epoch": 0.9737777777777777,
      "grad_norm": 1.1760673522949219,
      "learning_rate": 5.396590066716086e-06,
      "loss": 0.8552,
      "step": 6573
    },
    {
      "epoch": 0.973925925925926,
      "grad_norm": 1.8063416481018066,
      "learning_rate": 5.3669384729429205e-06,
      "loss": 0.8869,
      "step": 6574
    },
    {
      "epoch": 0.9740740740740741,
      "grad_norm": 1.8884493112564087,
      "learning_rate": 5.337286879169756e-06,
      "loss": 0.8914,
      "step": 6575
    },
    {
      "epoch": 0.9742222222222222,
      "grad_norm": 2.1271440982818604,
      "learning_rate": 5.30763528539659e-06,
      "loss": 0.852,
      "step": 6576
    },
    {
      "epoch": 0.9743703703703703,
      "grad_norm": 1.5887964963912964,
      "learning_rate": 5.277983691623425e-06,
      "loss": 0.8127,
      "step": 6577
    },
    {
      "epoch": 0.9745185185185186,
      "grad_norm": 1.3158174753189087,
      "learning_rate": 5.248332097850259e-06,
      "loss": 0.7721,
      "step": 6578
    },
    {
      "epoch": 0.9746666666666667,
      "grad_norm": 2.63740611076355,
      "learning_rate": 5.218680504077094e-06,
      "loss": 0.7826,
      "step": 6579
    },
    {
      "epoch": 0.9748148148148148,
      "grad_norm": 1.204813003540039,
      "learning_rate": 5.18902891030393e-06,
      "loss": 0.7437,
      "step": 6580
    },
    {
      "epoch": 0.9749629629629629,
      "grad_norm": 1.3193559646606445,
      "learning_rate": 5.159377316530764e-06,
      "loss": 1.0686,
      "step": 6581
    },
    {
      "epoch": 0.9751111111111112,
      "grad_norm": 1.195284128189087,
      "learning_rate": 5.129725722757599e-06,
      "loss": 1.1198,
      "step": 6582
    },
    {
      "epoch": 0.9752592592592593,
      "grad_norm": 1.8917425870895386,
      "learning_rate": 5.100074128984433e-06,
      "loss": 1.034,
      "step": 6583
    },
    {
      "epoch": 0.9754074074074074,
      "grad_norm": 2.3464300632476807,
      "learning_rate": 5.070422535211268e-06,
      "loss": 1.0442,
      "step": 6584
    },
    {
      "epoch": 0.9755555555555555,
      "grad_norm": 2.2270100116729736,
      "learning_rate": 5.040770941438103e-06,
      "loss": 1.0523,
      "step": 6585
    },
    {
      "epoch": 0.9757037037037037,
      "grad_norm": 1.4755874872207642,
      "learning_rate": 5.011119347664937e-06,
      "loss": 0.959,
      "step": 6586
    },
    {
      "epoch": 0.9758518518518519,
      "grad_norm": 3.1815688610076904,
      "learning_rate": 4.981467753891772e-06,
      "loss": 0.7292,
      "step": 6587
    },
    {
      "epoch": 0.976,
      "grad_norm": 1.4934961795806885,
      "learning_rate": 4.9518161601186066e-06,
      "loss": 0.9943,
      "step": 6588
    },
    {
      "epoch": 0.9761481481481481,
      "grad_norm": 1.389280080795288,
      "learning_rate": 4.922164566345442e-06,
      "loss": 0.9042,
      "step": 6589
    },
    {
      "epoch": 0.9762962962962963,
      "grad_norm": 1.9162555932998657,
      "learning_rate": 4.892512972572276e-06,
      "loss": 0.9948,
      "step": 6590
    },
    {
      "epoch": 0.9764444444444444,
      "grad_norm": 1.6919996738433838,
      "learning_rate": 4.86286137879911e-06,
      "loss": 0.842,
      "step": 6591
    },
    {
      "epoch": 0.9765925925925926,
      "grad_norm": 1.2007420063018799,
      "learning_rate": 4.8332097850259454e-06,
      "loss": 0.6764,
      "step": 6592
    },
    {
      "epoch": 0.9767407407407407,
      "grad_norm": 2.5459723472595215,
      "learning_rate": 4.80355819125278e-06,
      "loss": 0.8209,
      "step": 6593
    },
    {
      "epoch": 0.9768888888888889,
      "grad_norm": 2.35369873046875,
      "learning_rate": 4.773906597479615e-06,
      "loss": 0.8723,
      "step": 6594
    },
    {
      "epoch": 0.977037037037037,
      "grad_norm": 1.2594969272613525,
      "learning_rate": 4.744255003706449e-06,
      "loss": 0.9956,
      "step": 6595
    },
    {
      "epoch": 0.9771851851851852,
      "grad_norm": 1.4785923957824707,
      "learning_rate": 4.714603409933284e-06,
      "loss": 1.1136,
      "step": 6596
    },
    {
      "epoch": 0.9773333333333334,
      "grad_norm": 1.6019283533096313,
      "learning_rate": 4.684951816160119e-06,
      "loss": 0.9231,
      "step": 6597
    },
    {
      "epoch": 0.9774814814814815,
      "grad_norm": 1.6392041444778442,
      "learning_rate": 4.655300222386953e-06,
      "loss": 0.9903,
      "step": 6598
    },
    {
      "epoch": 0.9776296296296296,
      "grad_norm": 2.194157361984253,
      "learning_rate": 4.625648628613788e-06,
      "loss": 1.0814,
      "step": 6599
    },
    {
      "epoch": 0.9777777777777777,
      "grad_norm": 1.9493076801300049,
      "learning_rate": 4.595997034840623e-06,
      "loss": 1.0672,
      "step": 6600
    },
    {
      "epoch": 0.977925925925926,
      "grad_norm": 1.8517451286315918,
      "learning_rate": 4.5663454410674575e-06,
      "loss": 0.7373,
      "step": 6601
    },
    {
      "epoch": 0.9780740740740741,
      "grad_norm": 1.6022025346755981,
      "learning_rate": 4.536693847294293e-06,
      "loss": 0.8975,
      "step": 6602
    },
    {
      "epoch": 0.9782222222222222,
      "grad_norm": 1.5256716012954712,
      "learning_rate": 4.507042253521127e-06,
      "loss": 1.0167,
      "step": 6603
    },
    {
      "epoch": 0.9783703703703703,
      "grad_norm": 1.8543671369552612,
      "learning_rate": 4.477390659747962e-06,
      "loss": 0.8798,
      "step": 6604
    },
    {
      "epoch": 0.9785185185185186,
      "grad_norm": 1.1587783098220825,
      "learning_rate": 4.447739065974796e-06,
      "loss": 0.939,
      "step": 6605
    },
    {
      "epoch": 0.9786666666666667,
      "grad_norm": 1.7228477001190186,
      "learning_rate": 4.4180874722016315e-06,
      "loss": 0.8886,
      "step": 6606
    },
    {
      "epoch": 0.9788148148148148,
      "grad_norm": 1.776406168937683,
      "learning_rate": 4.388435878428466e-06,
      "loss": 1.0077,
      "step": 6607
    },
    {
      "epoch": 0.9789629629629629,
      "grad_norm": 2.580406427383423,
      "learning_rate": 4.3587842846553e-06,
      "loss": 1.0536,
      "step": 6608
    },
    {
      "epoch": 0.9791111111111112,
      "grad_norm": 1.3938255310058594,
      "learning_rate": 4.329132690882135e-06,
      "loss": 0.8188,
      "step": 6609
    },
    {
      "epoch": 0.9792592592592593,
      "grad_norm": 1.5968995094299316,
      "learning_rate": 4.2994810971089695e-06,
      "loss": 1.1257,
      "step": 6610
    },
    {
      "epoch": 0.9794074074074074,
      "grad_norm": 1.3835958242416382,
      "learning_rate": 4.269829503335805e-06,
      "loss": 1.1758,
      "step": 6611
    },
    {
      "epoch": 0.9795555555555555,
      "grad_norm": 2.4501984119415283,
      "learning_rate": 4.240177909562639e-06,
      "loss": 1.0919,
      "step": 6612
    },
    {
      "epoch": 0.9797037037037037,
      "grad_norm": 1.728827714920044,
      "learning_rate": 4.210526315789474e-06,
      "loss": 0.9941,
      "step": 6613
    },
    {
      "epoch": 0.9798518518518519,
      "grad_norm": 1.5663871765136719,
      "learning_rate": 4.180874722016308e-06,
      "loss": 0.7862,
      "step": 6614
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8939645290374756,
      "learning_rate": 4.151223128243143e-06,
      "loss": 0.9582,
      "step": 6615
    },
    {
      "epoch": 0.9801481481481481,
      "grad_norm": 1.8436479568481445,
      "learning_rate": 4.121571534469978e-06,
      "loss": 1.1677,
      "step": 6616
    },
    {
      "epoch": 0.9802962962962963,
      "grad_norm": 2.3548879623413086,
      "learning_rate": 4.091919940696812e-06,
      "loss": 0.9571,
      "step": 6617
    },
    {
      "epoch": 0.9804444444444445,
      "grad_norm": 1.6790826320648193,
      "learning_rate": 4.062268346923647e-06,
      "loss": 1.2313,
      "step": 6618
    },
    {
      "epoch": 0.9805925925925926,
      "grad_norm": 1.4376988410949707,
      "learning_rate": 4.032616753150482e-06,
      "loss": 0.926,
      "step": 6619
    },
    {
      "epoch": 0.9807407407407407,
      "grad_norm": 2.4382681846618652,
      "learning_rate": 4.002965159377317e-06,
      "loss": 1.1405,
      "step": 6620
    },
    {
      "epoch": 0.9808888888888889,
      "grad_norm": 1.5281683206558228,
      "learning_rate": 3.973313565604152e-06,
      "loss": 0.981,
      "step": 6621
    },
    {
      "epoch": 0.981037037037037,
      "grad_norm": 1.5083342790603638,
      "learning_rate": 3.943661971830986e-06,
      "loss": 0.9166,
      "step": 6622
    },
    {
      "epoch": 0.9811851851851852,
      "grad_norm": 1.549145221710205,
      "learning_rate": 3.914010378057821e-06,
      "loss": 0.9484,
      "step": 6623
    },
    {
      "epoch": 0.9813333333333333,
      "grad_norm": 1.4674689769744873,
      "learning_rate": 3.884358784284656e-06,
      "loss": 0.8851,
      "step": 6624
    },
    {
      "epoch": 0.9814814814814815,
      "grad_norm": 1.2259571552276611,
      "learning_rate": 3.854707190511491e-06,
      "loss": 1.0165,
      "step": 6625
    },
    {
      "epoch": 0.9816296296296296,
      "grad_norm": 1.5182334184646606,
      "learning_rate": 3.825055596738325e-06,
      "loss": 0.9455,
      "step": 6626
    },
    {
      "epoch": 0.9817777777777777,
      "grad_norm": 1.8458360433578491,
      "learning_rate": 3.7954040029651598e-06,
      "loss": 1.0414,
      "step": 6627
    },
    {
      "epoch": 0.981925925925926,
      "grad_norm": 1.4810439348220825,
      "learning_rate": 3.7657524091919945e-06,
      "loss": 0.9363,
      "step": 6628
    },
    {
      "epoch": 0.9820740740740741,
      "grad_norm": 1.8782624006271362,
      "learning_rate": 3.736100815418829e-06,
      "loss": 0.9122,
      "step": 6629
    },
    {
      "epoch": 0.9822222222222222,
      "grad_norm": 2.1159908771514893,
      "learning_rate": 3.7064492216456635e-06,
      "loss": 1.0393,
      "step": 6630
    },
    {
      "epoch": 0.9823703703703703,
      "grad_norm": 1.7776083946228027,
      "learning_rate": 3.6767976278724982e-06,
      "loss": 1.1439,
      "step": 6631
    },
    {
      "epoch": 0.9825185185185186,
      "grad_norm": 1.437274694442749,
      "learning_rate": 3.647146034099333e-06,
      "loss": 1.2951,
      "step": 6632
    },
    {
      "epoch": 0.9826666666666667,
      "grad_norm": 2.729619264602661,
      "learning_rate": 3.6174944403261677e-06,
      "loss": 0.834,
      "step": 6633
    },
    {
      "epoch": 0.9828148148148148,
      "grad_norm": 1.2838209867477417,
      "learning_rate": 3.5878428465530024e-06,
      "loss": 0.9613,
      "step": 6634
    },
    {
      "epoch": 0.9829629629629629,
      "grad_norm": 1.4120662212371826,
      "learning_rate": 3.558191252779837e-06,
      "loss": 1.2446,
      "step": 6635
    },
    {
      "epoch": 0.9831111111111112,
      "grad_norm": 1.354019045829773,
      "learning_rate": 3.528539659006672e-06,
      "loss": 0.9115,
      "step": 6636
    },
    {
      "epoch": 0.9832592592592593,
      "grad_norm": 1.6061333417892456,
      "learning_rate": 3.498888065233506e-06,
      "loss": 0.784,
      "step": 6637
    },
    {
      "epoch": 0.9834074074074074,
      "grad_norm": 1.5644516944885254,
      "learning_rate": 3.469236471460341e-06,
      "loss": 1.0192,
      "step": 6638
    },
    {
      "epoch": 0.9835555555555555,
      "grad_norm": 1.7661339044570923,
      "learning_rate": 3.4395848776871755e-06,
      "loss": 1.0909,
      "step": 6639
    },
    {
      "epoch": 0.9837037037037037,
      "grad_norm": 1.819481372833252,
      "learning_rate": 3.4099332839140103e-06,
      "loss": 1.147,
      "step": 6640
    },
    {
      "epoch": 0.9838518518518519,
      "grad_norm": 2.0585787296295166,
      "learning_rate": 3.380281690140845e-06,
      "loss": 0.8059,
      "step": 6641
    },
    {
      "epoch": 0.984,
      "grad_norm": 1.4306613206863403,
      "learning_rate": 3.35063009636768e-06,
      "loss": 0.9699,
      "step": 6642
    },
    {
      "epoch": 0.9841481481481481,
      "grad_norm": 1.5439941883087158,
      "learning_rate": 3.320978502594515e-06,
      "loss": 1.0914,
      "step": 6643
    },
    {
      "epoch": 0.9842962962962963,
      "grad_norm": 1.494676947593689,
      "learning_rate": 3.2913269088213496e-06,
      "loss": 0.9988,
      "step": 6644
    },
    {
      "epoch": 0.9844444444444445,
      "grad_norm": 1.6927095651626587,
      "learning_rate": 3.2616753150481843e-06,
      "loss": 1.0446,
      "step": 6645
    },
    {
      "epoch": 0.9845925925925926,
      "grad_norm": 1.642819881439209,
      "learning_rate": 3.232023721275019e-06,
      "loss": 0.929,
      "step": 6646
    },
    {
      "epoch": 0.9847407407407407,
      "grad_norm": 1.5157071352005005,
      "learning_rate": 3.2023721275018537e-06,
      "loss": 0.9346,
      "step": 6647
    },
    {
      "epoch": 0.9848888888888889,
      "grad_norm": 1.68966805934906,
      "learning_rate": 3.172720533728688e-06,
      "loss": 1.0874,
      "step": 6648
    },
    {
      "epoch": 0.985037037037037,
      "grad_norm": 1.5840626955032349,
      "learning_rate": 3.1430689399555227e-06,
      "loss": 0.7515,
      "step": 6649
    },
    {
      "epoch": 0.9851851851851852,
      "grad_norm": 2.2057268619537354,
      "learning_rate": 3.1134173461823575e-06,
      "loss": 1.1616,
      "step": 6650
    },
    {
      "epoch": 0.9853333333333333,
      "grad_norm": 1.8586816787719727,
      "learning_rate": 3.083765752409192e-06,
      "loss": 0.9756,
      "step": 6651
    },
    {
      "epoch": 0.9854814814814815,
      "grad_norm": 3.2284040451049805,
      "learning_rate": 3.054114158636027e-06,
      "loss": 1.0309,
      "step": 6652
    },
    {
      "epoch": 0.9856296296296296,
      "grad_norm": 1.3896958827972412,
      "learning_rate": 3.0244625648628616e-06,
      "loss": 0.9007,
      "step": 6653
    },
    {
      "epoch": 0.9857777777777778,
      "grad_norm": 2.136592388153076,
      "learning_rate": 2.9948109710896963e-06,
      "loss": 1.14,
      "step": 6654
    },
    {
      "epoch": 0.9859259259259259,
      "grad_norm": 2.3692097663879395,
      "learning_rate": 2.9651593773165306e-06,
      "loss": 1.0577,
      "step": 6655
    },
    {
      "epoch": 0.9860740740740741,
      "grad_norm": 3.259517192840576,
      "learning_rate": 2.9355077835433653e-06,
      "loss": 1.1438,
      "step": 6656
    },
    {
      "epoch": 0.9862222222222222,
      "grad_norm": 1.794457197189331,
      "learning_rate": 2.9058561897702005e-06,
      "loss": 0.947,
      "step": 6657
    },
    {
      "epoch": 0.9863703703703703,
      "grad_norm": 1.563719630241394,
      "learning_rate": 2.876204595997035e-06,
      "loss": 0.8938,
      "step": 6658
    },
    {
      "epoch": 0.9865185185185186,
      "grad_norm": 1.9052386283874512,
      "learning_rate": 2.84655300222387e-06,
      "loss": 1.0833,
      "step": 6659
    },
    {
      "epoch": 0.9866666666666667,
      "grad_norm": 1.1962292194366455,
      "learning_rate": 2.8169014084507042e-06,
      "loss": 1.1096,
      "step": 6660
    },
    {
      "epoch": 0.9868148148148148,
      "grad_norm": 2.1645631790161133,
      "learning_rate": 2.787249814677539e-06,
      "loss": 0.9058,
      "step": 6661
    },
    {
      "epoch": 0.9869629629629629,
      "grad_norm": 1.8755302429199219,
      "learning_rate": 2.7575982209043737e-06,
      "loss": 1.1832,
      "step": 6662
    },
    {
      "epoch": 0.9871111111111112,
      "grad_norm": 7.888797760009766,
      "learning_rate": 2.7279466271312084e-06,
      "loss": 0.9308,
      "step": 6663
    },
    {
      "epoch": 0.9872592592592593,
      "grad_norm": 1.6029328107833862,
      "learning_rate": 2.698295033358043e-06,
      "loss": 1.0354,
      "step": 6664
    },
    {
      "epoch": 0.9874074074074074,
      "grad_norm": 1.4510563611984253,
      "learning_rate": 2.668643439584878e-06,
      "loss": 1.0924,
      "step": 6665
    },
    {
      "epoch": 0.9875555555555555,
      "grad_norm": 2.7548041343688965,
      "learning_rate": 2.6389918458117125e-06,
      "loss": 1.4003,
      "step": 6666
    },
    {
      "epoch": 0.9877037037037038,
      "grad_norm": 1.3703808784484863,
      "learning_rate": 2.609340252038547e-06,
      "loss": 1.0794,
      "step": 6667
    },
    {
      "epoch": 0.9878518518518519,
      "grad_norm": 1.6302189826965332,
      "learning_rate": 2.579688658265382e-06,
      "loss": 0.9665,
      "step": 6668
    },
    {
      "epoch": 0.988,
      "grad_norm": 1.29432213306427,
      "learning_rate": 2.5500370644922167e-06,
      "loss": 0.8653,
      "step": 6669
    },
    {
      "epoch": 0.9881481481481481,
      "grad_norm": 1.2217215299606323,
      "learning_rate": 2.5203854707190514e-06,
      "loss": 0.8904,
      "step": 6670
    },
    {
      "epoch": 0.9882962962962963,
      "grad_norm": 2.8260674476623535,
      "learning_rate": 2.490733876945886e-06,
      "loss": 1.0907,
      "step": 6671
    },
    {
      "epoch": 0.9884444444444445,
      "grad_norm": 2.121968984603882,
      "learning_rate": 2.461082283172721e-06,
      "loss": 0.9767,
      "step": 6672
    },
    {
      "epoch": 0.9885925925925926,
      "grad_norm": 2.9519248008728027,
      "learning_rate": 2.431430689399555e-06,
      "loss": 0.9438,
      "step": 6673
    },
    {
      "epoch": 0.9887407407407407,
      "grad_norm": 2.851862907409668,
      "learning_rate": 2.40177909562639e-06,
      "loss": 0.872,
      "step": 6674
    },
    {
      "epoch": 0.9888888888888889,
      "grad_norm": 1.1649541854858398,
      "learning_rate": 2.3721275018532246e-06,
      "loss": 0.8041,
      "step": 6675
    },
    {
      "epoch": 0.989037037037037,
      "grad_norm": 1.4583702087402344,
      "learning_rate": 2.3424759080800593e-06,
      "loss": 1.0451,
      "step": 6676
    },
    {
      "epoch": 0.9891851851851852,
      "grad_norm": 2.0155556201934814,
      "learning_rate": 2.312824314306894e-06,
      "loss": 0.9207,
      "step": 6677
    },
    {
      "epoch": 0.9893333333333333,
      "grad_norm": 1.766683578491211,
      "learning_rate": 2.2831727205337287e-06,
      "loss": 0.9521,
      "step": 6678
    },
    {
      "epoch": 0.9894814814814815,
      "grad_norm": 1.982771635055542,
      "learning_rate": 2.2535211267605635e-06,
      "loss": 0.9774,
      "step": 6679
    },
    {
      "epoch": 0.9896296296296296,
      "grad_norm": 1.9274686574935913,
      "learning_rate": 2.223869532987398e-06,
      "loss": 1.0067,
      "step": 6680
    },
    {
      "epoch": 0.9897777777777778,
      "grad_norm": 1.6137382984161377,
      "learning_rate": 2.194217939214233e-06,
      "loss": 0.977,
      "step": 6681
    },
    {
      "epoch": 0.9899259259259259,
      "grad_norm": 1.5147463083267212,
      "learning_rate": 2.1645663454410676e-06,
      "loss": 0.87,
      "step": 6682
    },
    {
      "epoch": 0.9900740740740741,
      "grad_norm": 3.287322759628296,
      "learning_rate": 2.1349147516679023e-06,
      "loss": 0.803,
      "step": 6683
    },
    {
      "epoch": 0.9902222222222222,
      "grad_norm": 1.6876987218856812,
      "learning_rate": 2.105263157894737e-06,
      "loss": 1.152,
      "step": 6684
    },
    {
      "epoch": 0.9903703703703703,
      "grad_norm": 1.5785335302352905,
      "learning_rate": 2.0756115641215714e-06,
      "loss": 0.8979,
      "step": 6685
    },
    {
      "epoch": 0.9905185185185185,
      "grad_norm": 1.839627742767334,
      "learning_rate": 2.045959970348406e-06,
      "loss": 0.8454,
      "step": 6686
    },
    {
      "epoch": 0.9906666666666667,
      "grad_norm": 2.166228771209717,
      "learning_rate": 2.016308376575241e-06,
      "loss": 1.0755,
      "step": 6687
    },
    {
      "epoch": 0.9908148148148148,
      "grad_norm": 1.7218915224075317,
      "learning_rate": 1.986656782802076e-06,
      "loss": 1.122,
      "step": 6688
    },
    {
      "epoch": 0.9909629629629629,
      "grad_norm": 2.0172159671783447,
      "learning_rate": 1.9570051890289107e-06,
      "loss": 1.0773,
      "step": 6689
    },
    {
      "epoch": 0.9911111111111112,
      "grad_norm": 2.1524360179901123,
      "learning_rate": 1.9273535952557454e-06,
      "loss": 0.8959,
      "step": 6690
    },
    {
      "epoch": 0.9912592592592593,
      "grad_norm": 1.6936663389205933,
      "learning_rate": 1.8977020014825799e-06,
      "loss": 1.1442,
      "step": 6691
    },
    {
      "epoch": 0.9914074074074074,
      "grad_norm": 3.5624890327453613,
      "learning_rate": 1.8680504077094146e-06,
      "loss": 0.7474,
      "step": 6692
    },
    {
      "epoch": 0.9915555555555555,
      "grad_norm": 2.378261089324951,
      "learning_rate": 1.8383988139362491e-06,
      "loss": 0.8932,
      "step": 6693
    },
    {
      "epoch": 0.9917037037037038,
      "grad_norm": 1.7613540887832642,
      "learning_rate": 1.8087472201630838e-06,
      "loss": 0.6714,
      "step": 6694
    },
    {
      "epoch": 0.9918518518518519,
      "grad_norm": 1.6235287189483643,
      "learning_rate": 1.7790956263899185e-06,
      "loss": 0.9776,
      "step": 6695
    },
    {
      "epoch": 0.992,
      "grad_norm": 1.8661762475967407,
      "learning_rate": 1.749444032616753e-06,
      "loss": 1.0605,
      "step": 6696
    },
    {
      "epoch": 0.9921481481481481,
      "grad_norm": 2.032745599746704,
      "learning_rate": 1.7197924388435878e-06,
      "loss": 0.8629,
      "step": 6697
    },
    {
      "epoch": 0.9922962962962963,
      "grad_norm": 1.286075234413147,
      "learning_rate": 1.6901408450704225e-06,
      "loss": 0.9903,
      "step": 6698
    },
    {
      "epoch": 0.9924444444444445,
      "grad_norm": 1.348991870880127,
      "learning_rate": 1.6604892512972574e-06,
      "loss": 0.9778,
      "step": 6699
    },
    {
      "epoch": 0.9925925925925926,
      "grad_norm": 1.556408405303955,
      "learning_rate": 1.6308376575240921e-06,
      "loss": 0.9447,
      "step": 6700
    },
    {
      "epoch": 0.9927407407407407,
      "grad_norm": 1.4265048503875732,
      "learning_rate": 1.6011860637509269e-06,
      "loss": 1.0695,
      "step": 6701
    },
    {
      "epoch": 0.9928888888888889,
      "grad_norm": 1.8670324087142944,
      "learning_rate": 1.5715344699777614e-06,
      "loss": 0.9521,
      "step": 6702
    },
    {
      "epoch": 0.993037037037037,
      "grad_norm": 1.4540780782699585,
      "learning_rate": 1.541882876204596e-06,
      "loss": 0.8364,
      "step": 6703
    },
    {
      "epoch": 0.9931851851851852,
      "grad_norm": 1.2747162580490112,
      "learning_rate": 1.5122312824314308e-06,
      "loss": 0.8357,
      "step": 6704
    },
    {
      "epoch": 0.9933333333333333,
      "grad_norm": 1.5093507766723633,
      "learning_rate": 1.4825796886582653e-06,
      "loss": 1.0274,
      "step": 6705
    },
    {
      "epoch": 0.9934814814814815,
      "grad_norm": 1.8258951902389526,
      "learning_rate": 1.4529280948851002e-06,
      "loss": 1.1714,
      "step": 6706
    },
    {
      "epoch": 0.9936296296296296,
      "grad_norm": 1.4696855545043945,
      "learning_rate": 1.423276501111935e-06,
      "loss": 1.0053,
      "step": 6707
    },
    {
      "epoch": 0.9937777777777778,
      "grad_norm": 1.2955344915390015,
      "learning_rate": 1.3936249073387695e-06,
      "loss": 0.8922,
      "step": 6708
    },
    {
      "epoch": 0.9939259259259259,
      "grad_norm": 2.6339271068573,
      "learning_rate": 1.3639733135656042e-06,
      "loss": 0.7903,
      "step": 6709
    },
    {
      "epoch": 0.9940740740740741,
      "grad_norm": 1.6920212507247925,
      "learning_rate": 1.334321719792439e-06,
      "loss": 1.0618,
      "step": 6710
    },
    {
      "epoch": 0.9942222222222222,
      "grad_norm": 1.0905203819274902,
      "learning_rate": 1.3046701260192734e-06,
      "loss": 1.0253,
      "step": 6711
    },
    {
      "epoch": 0.9943703703703703,
      "grad_norm": 1.664962887763977,
      "learning_rate": 1.2750185322461083e-06,
      "loss": 1.1039,
      "step": 6712
    },
    {
      "epoch": 0.9945185185185185,
      "grad_norm": 1.530849814414978,
      "learning_rate": 1.245366938472943e-06,
      "loss": 0.983,
      "step": 6713
    },
    {
      "epoch": 0.9946666666666667,
      "grad_norm": 1.5666024684906006,
      "learning_rate": 1.2157153446997776e-06,
      "loss": 1.078,
      "step": 6714
    },
    {
      "epoch": 0.9948148148148148,
      "grad_norm": 1.3971589803695679,
      "learning_rate": 1.1860637509266123e-06,
      "loss": 0.9032,
      "step": 6715
    },
    {
      "epoch": 0.9949629629629629,
      "grad_norm": 2.2599244117736816,
      "learning_rate": 1.156412157153447e-06,
      "loss": 1.2584,
      "step": 6716
    },
    {
      "epoch": 0.9951111111111111,
      "grad_norm": 1.9005279541015625,
      "learning_rate": 1.1267605633802817e-06,
      "loss": 1.1237,
      "step": 6717
    },
    {
      "epoch": 0.9952592592592593,
      "grad_norm": 1.5285006761550903,
      "learning_rate": 1.0971089696071165e-06,
      "loss": 1.0576,
      "step": 6718
    },
    {
      "epoch": 0.9954074074074074,
      "grad_norm": 1.7799162864685059,
      "learning_rate": 1.0674573758339512e-06,
      "loss": 1.0083,
      "step": 6719
    },
    {
      "epoch": 0.9955555555555555,
      "grad_norm": 1.1048457622528076,
      "learning_rate": 1.0378057820607857e-06,
      "loss": 0.9721,
      "step": 6720
    },
    {
      "epoch": 0.9957037037037038,
      "grad_norm": 2.5665197372436523,
      "learning_rate": 1.0081541882876204e-06,
      "loss": 1.2598,
      "step": 6721
    },
    {
      "epoch": 0.9958518518518519,
      "grad_norm": 1.661734700202942,
      "learning_rate": 9.785025945144553e-07,
      "loss": 1.0126,
      "step": 6722
    },
    {
      "epoch": 0.996,
      "grad_norm": 1.7528083324432373,
      "learning_rate": 9.488510007412899e-07,
      "loss": 0.9573,
      "step": 6723
    },
    {
      "epoch": 0.9961481481481481,
      "grad_norm": 1.5361084938049316,
      "learning_rate": 9.191994069681246e-07,
      "loss": 0.9741,
      "step": 6724
    },
    {
      "epoch": 0.9962962962962963,
      "grad_norm": 1.998482584953308,
      "learning_rate": 8.895478131949593e-07,
      "loss": 0.9462,
      "step": 6725
    },
    {
      "epoch": 0.9964444444444445,
      "grad_norm": 2.735476493835449,
      "learning_rate": 8.598962194217939e-07,
      "loss": 0.9576,
      "step": 6726
    },
    {
      "epoch": 0.9965925925925926,
      "grad_norm": 2.3294827938079834,
      "learning_rate": 8.302446256486287e-07,
      "loss": 0.9671,
      "step": 6727
    },
    {
      "epoch": 0.9967407407407407,
      "grad_norm": 2.1672589778900146,
      "learning_rate": 8.005930318754634e-07,
      "loss": 1.0485,
      "step": 6728
    },
    {
      "epoch": 0.9968888888888889,
      "grad_norm": 1.595106840133667,
      "learning_rate": 7.70941438102298e-07,
      "loss": 1.1496,
      "step": 6729
    },
    {
      "epoch": 0.997037037037037,
      "grad_norm": 2.001743793487549,
      "learning_rate": 7.412898443291327e-07,
      "loss": 1.1263,
      "step": 6730
    },
    {
      "epoch": 0.9971851851851852,
      "grad_norm": 2.020989418029785,
      "learning_rate": 7.116382505559675e-07,
      "loss": 0.9639,
      "step": 6731
    },
    {
      "epoch": 0.9973333333333333,
      "grad_norm": 2.517296552658081,
      "learning_rate": 6.819866567828021e-07,
      "loss": 0.9608,
      "step": 6732
    },
    {
      "epoch": 0.9974814814814815,
      "grad_norm": 1.6524677276611328,
      "learning_rate": 6.523350630096367e-07,
      "loss": 1.1922,
      "step": 6733
    },
    {
      "epoch": 0.9976296296296296,
      "grad_norm": 1.6622039079666138,
      "learning_rate": 6.226834692364715e-07,
      "loss": 0.9946,
      "step": 6734
    },
    {
      "epoch": 0.9977777777777778,
      "grad_norm": 1.4304887056350708,
      "learning_rate": 5.930318754633061e-07,
      "loss": 1.0696,
      "step": 6735
    },
    {
      "epoch": 0.9979259259259259,
      "grad_norm": 1.105725884437561,
      "learning_rate": 5.633802816901409e-07,
      "loss": 0.9014,
      "step": 6736
    },
    {
      "epoch": 0.9980740740740741,
      "grad_norm": 1.4582500457763672,
      "learning_rate": 5.337286879169756e-07,
      "loss": 1.0234,
      "step": 6737
    },
    {
      "epoch": 0.9982222222222222,
      "grad_norm": 2.0556626319885254,
      "learning_rate": 5.040770941438102e-07,
      "loss": 0.9039,
      "step": 6738
    },
    {
      "epoch": 0.9983703703703704,
      "grad_norm": 2.3461997509002686,
      "learning_rate": 4.7442550037064497e-07,
      "loss": 1.0611,
      "step": 6739
    },
    {
      "epoch": 0.9985185185185185,
      "grad_norm": 1.3852459192276,
      "learning_rate": 4.4477390659747964e-07,
      "loss": 1.1235,
      "step": 6740
    },
    {
      "epoch": 0.9986666666666667,
      "grad_norm": 1.3906238079071045,
      "learning_rate": 4.1512231282431436e-07,
      "loss": 0.9771,
      "step": 6741
    },
    {
      "epoch": 0.9988148148148148,
      "grad_norm": 1.7508108615875244,
      "learning_rate": 3.85470719051149e-07,
      "loss": 0.974,
      "step": 6742
    },
    {
      "epoch": 0.9989629629629629,
      "grad_norm": 2.0732643604278564,
      "learning_rate": 3.5581912527798374e-07,
      "loss": 1.0118,
      "step": 6743
    },
    {
      "epoch": 0.9991111111111111,
      "grad_norm": 1.8446046113967896,
      "learning_rate": 3.2616753150481835e-07,
      "loss": 1.1361,
      "step": 6744
    },
    {
      "epoch": 0.9992592592592593,
      "grad_norm": 1.2646979093551636,
      "learning_rate": 2.9651593773165307e-07,
      "loss": 1.0286,
      "step": 6745
    },
    {
      "epoch": 0.9994074074074074,
      "grad_norm": 1.2417453527450562,
      "learning_rate": 2.668643439584878e-07,
      "loss": 1.1493,
      "step": 6746
    },
    {
      "epoch": 0.9995555555555555,
      "grad_norm": 1.6472728252410889,
      "learning_rate": 2.3721275018532249e-07,
      "loss": 0.9548,
      "step": 6747
    },
    {
      "epoch": 0.9997037037037036,
      "grad_norm": 2.7328872680664062,
      "learning_rate": 2.0756115641215718e-07,
      "loss": 1.014,
      "step": 6748
    },
    {
      "epoch": 0.9998518518518519,
      "grad_norm": 1.9749536514282227,
      "learning_rate": 1.7790956263899187e-07,
      "loss": 0.7927,
      "step": 6749
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.27304744720459,
      "learning_rate": 1.4825796886582654e-07,
      "loss": 1.1388,
      "step": 6750
    }
  ],
  "logging_steps": 1,
  "max_steps": 6750,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3.089575766795232e+17,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}