{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 6267,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "grad_norm": 0.2775368392467499,
      "learning_rate": 1.0582010582010582e-06,
      "loss": 1.6413,
      "step": 1
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.22058266401290894,
      "learning_rate": 2.1164021164021164e-06,
      "loss": 1.4002,
      "step": 2
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.24383942782878876,
      "learning_rate": 3.1746031746031746e-06,
      "loss": 1.1712,
      "step": 3
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7188221216201782,
      "learning_rate": 4.232804232804233e-06,
      "loss": 1.8836,
      "step": 4
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.4612157940864563,
      "learning_rate": 5.291005291005291e-06,
      "loss": 1.8155,
      "step": 5
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.18907752633094788,
      "learning_rate": 6.349206349206349e-06,
      "loss": 1.3096,
      "step": 6
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6804981827735901,
      "learning_rate": 7.4074074074074075e-06,
      "loss": 2.1407,
      "step": 7
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.3221743702888489,
      "learning_rate": 8.465608465608466e-06,
      "loss": 1.2664,
      "step": 8
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.3426727056503296,
      "learning_rate": 9.523809523809523e-06,
      "loss": 1.3062,
      "step": 9
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.20028634369373322,
      "learning_rate": 1.0582010582010582e-05,
      "loss": 1.0689,
      "step": 10
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.30557742714881897,
      "learning_rate": 1.164021164021164e-05,
      "loss": 1.277,
      "step": 11
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.2677030563354492,
      "learning_rate": 1.2698412698412699e-05,
      "loss": 1.408,
      "step": 12
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.1790410280227661,
      "learning_rate": 1.3756613756613756e-05,
      "loss": 1.1303,
      "step": 13
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.46371275186538696,
      "learning_rate": 1.4814814814814815e-05,
      "loss": 1.4194,
      "step": 14
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9294260144233704,
      "learning_rate": 1.5873015873015872e-05,
      "loss": 1.9689,
      "step": 15
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.23375417292118073,
      "learning_rate": 1.693121693121693e-05,
      "loss": 1.3985,
      "step": 16
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.2636398375034332,
      "learning_rate": 1.798941798941799e-05,
      "loss": 1.1655,
      "step": 17
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.3833554685115814,
      "learning_rate": 1.9047619047619046e-05,
      "loss": 1.4089,
      "step": 18
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8202497959136963,
      "learning_rate": 2.0105820105820105e-05,
      "loss": 2.5243,
      "step": 19
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.15685799717903137,
      "learning_rate": 2.1164021164021164e-05,
      "loss": 2.0789,
      "step": 20
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7239970564842224,
      "learning_rate": 2.2222222222222223e-05,
      "loss": 2.4303,
      "step": 21
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.27701154351234436,
      "learning_rate": 2.328042328042328e-05,
      "loss": 1.2762,
      "step": 22
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.31271129846572876,
      "learning_rate": 2.4338624338624338e-05,
      "loss": 1.3217,
      "step": 23
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.2485959827899933,
      "learning_rate": 2.5396825396825397e-05,
      "loss": 1.1332,
      "step": 24
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.4106338620185852,
      "learning_rate": 2.6455026455026456e-05,
      "loss": 1.7543,
      "step": 25
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.22196777164936066,
      "learning_rate": 2.7513227513227512e-05,
      "loss": 1.1995,
      "step": 26
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7972785830497742,
      "learning_rate": 2.857142857142857e-05,
      "loss": 2.3348,
      "step": 27
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.27511030435562134,
      "learning_rate": 2.962962962962963e-05,
      "loss": 1.313,
      "step": 28
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.39403605461120605,
      "learning_rate": 3.068783068783069e-05,
      "loss": 1.3756,
      "step": 29
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.24426384270191193,
      "learning_rate": 3.1746031746031745e-05,
      "loss": 1.5965,
      "step": 30
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.31007620692253113,
      "learning_rate": 3.280423280423281e-05,
      "loss": 1.4204,
      "step": 31
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6058558225631714,
      "learning_rate": 3.386243386243386e-05,
      "loss": 2.1047,
      "step": 32
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.3730120360851288,
      "learning_rate": 3.492063492063492e-05,
      "loss": 1.6638,
      "step": 33
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.2913112938404083,
      "learning_rate": 3.597883597883598e-05,
      "loss": 1.1682,
      "step": 34
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.21642601490020752,
      "learning_rate": 3.7037037037037037e-05,
      "loss": 1.7017,
      "step": 35
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.18489326536655426,
      "learning_rate": 3.809523809523809e-05,
      "loss": 1.2163,
      "step": 36
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.2250489592552185,
      "learning_rate": 3.9153439153439155e-05,
      "loss": 1.418,
      "step": 37
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.29317358136177063,
      "learning_rate": 4.021164021164021e-05,
      "loss": 1.4218,
      "step": 38
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.21897652745246887,
      "learning_rate": 4.126984126984127e-05,
      "loss": 1.6453,
      "step": 39
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.31583139300346375,
      "learning_rate": 4.232804232804233e-05,
      "loss": 1.4359,
      "step": 40
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.35319212079048157,
      "learning_rate": 4.3386243386243384e-05,
      "loss": 1.5103,
      "step": 41
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.47440919280052185,
      "learning_rate": 4.4444444444444447e-05,
      "loss": 1.564,
      "step": 42
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.26996803283691406,
      "learning_rate": 4.55026455026455e-05,
      "loss": 1.327,
      "step": 43
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.11083919554948807,
      "learning_rate": 4.656084656084656e-05,
      "loss": 1.1011,
      "step": 44
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.3413199484348297,
      "learning_rate": 4.761904761904762e-05,
      "loss": 1.4704,
      "step": 45
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.37313517928123474,
      "learning_rate": 4.8677248677248676e-05,
      "loss": 1.6284,
      "step": 46
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.3506244421005249,
      "learning_rate": 4.973544973544973e-05,
      "loss": 1.3227,
      "step": 47
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.4703328013420105,
      "learning_rate": 5.0793650793650794e-05,
      "loss": 1.7639,
      "step": 48
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.3501473069190979,
      "learning_rate": 5.185185185185185e-05,
      "loss": 1.3659,
      "step": 49
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7742831110954285,
      "learning_rate": 5.291005291005291e-05,
      "loss": 2.0478,
      "step": 50
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8281198740005493,
      "learning_rate": 5.396825396825397e-05,
      "loss": 2.3573,
      "step": 51
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.1962369829416275,
      "learning_rate": 5.5026455026455024e-05,
      "loss": 1.2145,
      "step": 52
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.42654427886009216,
      "learning_rate": 5.6084656084656086e-05,
      "loss": 1.5013,
      "step": 53
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.2224654257297516,
      "learning_rate": 5.714285714285714e-05,
      "loss": 1.1307,
      "step": 54
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.2636048197746277,
      "learning_rate": 5.82010582010582e-05,
      "loss": 1.1898,
      "step": 55
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8690416812896729,
      "learning_rate": 5.925925925925926e-05,
      "loss": 2.6946,
      "step": 56
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.21315345168113708,
      "learning_rate": 6.0317460317460316e-05,
      "loss": 1.3311,
      "step": 57
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.3089248836040497,
      "learning_rate": 6.137566137566138e-05,
      "loss": 1.3172,
      "step": 58
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.46613290905952454,
      "learning_rate": 6.243386243386243e-05,
      "loss": 1.5051,
      "step": 59
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.22203129529953003,
      "learning_rate": 6.349206349206349e-05,
      "loss": 1.0269,
      "step": 60
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.40593740344047546,
      "learning_rate": 6.455026455026454e-05,
      "loss": 1.5404,
      "step": 61
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.20608435571193695,
      "learning_rate": 6.560846560846561e-05,
      "loss": 1.3082,
      "step": 62
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.4350586533546448,
      "learning_rate": 6.666666666666667e-05,
      "loss": 1.4284,
      "step": 63
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.19673950970172882,
      "learning_rate": 6.772486772486773e-05,
      "loss": 1.3163,
      "step": 64
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.1913604736328125,
      "learning_rate": 6.878306878306878e-05,
      "loss": 1.5404,
      "step": 65
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.24638070166110992,
      "learning_rate": 6.984126984126984e-05,
      "loss": 1.1844,
      "step": 66
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.2841491997241974,
      "learning_rate": 7.089947089947089e-05,
      "loss": 1.859,
      "step": 67
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.2263411432504654,
      "learning_rate": 7.195767195767196e-05,
      "loss": 1.8898,
      "step": 68
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.36326220631599426,
      "learning_rate": 7.301587301587302e-05,
      "loss": 1.6953,
      "step": 69
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.32789310812950134,
      "learning_rate": 7.407407407407407e-05,
      "loss": 1.4027,
      "step": 70
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.19388175010681152,
      "learning_rate": 7.513227513227513e-05,
      "loss": 1.0815,
      "step": 71
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.3386622369289398,
      "learning_rate": 7.619047619047618e-05,
      "loss": 1.2046,
      "step": 72
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.2703385353088379,
      "learning_rate": 7.724867724867725e-05,
      "loss": 1.176,
      "step": 73
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0655968189239502,
      "learning_rate": 7.830687830687831e-05,
      "loss": 2.1573,
      "step": 74
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.4094426929950714,
      "learning_rate": 7.936507936507937e-05,
      "loss": 1.5299,
      "step": 75
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.24328134953975677,
      "learning_rate": 8.042328042328042e-05,
      "loss": 1.1641,
      "step": 76
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.2085375040769577,
      "learning_rate": 8.148148148148148e-05,
      "loss": 1.063,
      "step": 77
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.22432726621627808,
      "learning_rate": 8.253968253968255e-05,
      "loss": 1.2994,
      "step": 78
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.3053766191005707,
      "learning_rate": 8.35978835978836e-05,
      "loss": 1.3523,
      "step": 79
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7692846655845642,
      "learning_rate": 8.465608465608466e-05,
      "loss": 1.6329,
      "step": 80
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.3396718204021454,
      "learning_rate": 8.571428571428571e-05,
      "loss": 1.1088,
      "step": 81
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.3611130118370056,
      "learning_rate": 8.677248677248677e-05,
      "loss": 1.6538,
      "step": 82
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5153508186340332,
      "learning_rate": 8.783068783068782e-05,
      "loss": 1.4637,
      "step": 83
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5658602118492126,
      "learning_rate": 8.888888888888889e-05,
      "loss": 1.4004,
      "step": 84
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5707806944847107,
      "learning_rate": 8.994708994708995e-05,
      "loss": 1.5071,
      "step": 85
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.3738985061645508,
      "learning_rate": 9.1005291005291e-05,
      "loss": 1.2137,
      "step": 86
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.31264883279800415,
      "learning_rate": 9.206349206349206e-05,
      "loss": 1.1283,
      "step": 87
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.30848148465156555,
      "learning_rate": 9.312169312169312e-05,
      "loss": 1.2487,
      "step": 88
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.27727821469306946,
      "learning_rate": 9.417989417989419e-05,
      "loss": 1.231,
      "step": 89
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.23825979232788086,
      "learning_rate": 9.523809523809524e-05,
      "loss": 0.96,
      "step": 90
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.30639296770095825,
      "learning_rate": 9.62962962962963e-05,
      "loss": 1.0695,
      "step": 91
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.4166839122772217,
      "learning_rate": 9.735449735449735e-05,
      "loss": 1.3187,
      "step": 92
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5817872881889343,
      "learning_rate": 9.841269841269841e-05,
      "loss": 1.6071,
      "step": 93
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.24909548461437225,
      "learning_rate": 9.947089947089946e-05,
      "loss": 1.9703,
      "step": 94
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9310131669044495,
      "learning_rate": 0.00010052910052910055,
      "loss": 1.8554,
      "step": 95
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.3705301284790039,
      "learning_rate": 0.00010158730158730159,
      "loss": 1.5383,
      "step": 96
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.39980438351631165,
      "learning_rate": 0.00010264550264550266,
      "loss": 1.1086,
      "step": 97
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4157795310020447,
      "learning_rate": 0.0001037037037037037,
      "loss": 1.375,
      "step": 98
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4891487658023834,
      "learning_rate": 0.00010476190476190477,
      "loss": 1.6422,
      "step": 99
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5578123927116394,
      "learning_rate": 0.00010582010582010582,
      "loss": 1.5312,
      "step": 100
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.927128255367279,
      "learning_rate": 0.0001068783068783069,
      "loss": 1.9311,
      "step": 101
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.40842747688293457,
      "learning_rate": 0.00010793650793650794,
      "loss": 1.1909,
      "step": 102
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.18715190887451172,
      "learning_rate": 0.000108994708994709,
      "loss": 1.4756,
      "step": 103
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.49563390016555786,
      "learning_rate": 0.00011005291005291005,
      "loss": 1.3842,
      "step": 104
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4268365502357483,
      "learning_rate": 0.00011111111111111112,
      "loss": 1.1911,
      "step": 105
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1524708271026611,
      "learning_rate": 0.00011216931216931217,
      "loss": 1.8636,
      "step": 106
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5523980855941772,
      "learning_rate": 0.00011322751322751324,
      "loss": 1.4107,
      "step": 107
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7130999565124512,
      "learning_rate": 0.00011428571428571428,
      "loss": 1.4601,
      "step": 108
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5138590335845947,
      "learning_rate": 0.00011534391534391535,
      "loss": 1.269,
      "step": 109
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.32723110914230347,
      "learning_rate": 0.0001164021164021164,
      "loss": 0.9714,
      "step": 110
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.31494349241256714,
      "learning_rate": 0.00011746031746031746,
      "loss": 1.1442,
      "step": 111
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.54449063539505,
      "learning_rate": 0.00011851851851851852,
      "loss": 1.3801,
      "step": 112
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.494110643863678,
      "learning_rate": 0.00011957671957671959,
      "loss": 0.9425,
      "step": 113
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.33389729261398315,
      "learning_rate": 0.00012063492063492063,
      "loss": 1.1143,
      "step": 114
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5894310474395752,
      "learning_rate": 0.0001216931216931217,
      "loss": 1.6302,
      "step": 115
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4765518009662628,
      "learning_rate": 0.00012275132275132276,
      "loss": 1.2424,
      "step": 116
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.502227783203125,
      "learning_rate": 0.0001238095238095238,
      "loss": 0.9898,
      "step": 117
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.25684934854507446,
      "learning_rate": 0.00012486772486772487,
      "loss": 1.0542,
      "step": 118
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5765454173088074,
      "learning_rate": 0.00012592592592592592,
      "loss": 1.0751,
      "step": 119
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4511030912399292,
      "learning_rate": 0.00012698412698412698,
      "loss": 1.2657,
      "step": 120
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4885489344596863,
      "learning_rate": 0.00012804232804232806,
      "loss": 1.1263,
      "step": 121
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8949145078659058,
      "learning_rate": 0.0001291005291005291,
      "loss": 1.2125,
      "step": 122
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8059971332550049,
      "learning_rate": 0.00013015873015873017,
      "loss": 1.885,
      "step": 123
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.766596794128418,
      "learning_rate": 0.00013121693121693123,
      "loss": 1.5713,
      "step": 124
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.6581116318702698,
      "learning_rate": 0.00013227513227513228,
      "loss": 1.11,
      "step": 125
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1053041219711304,
      "learning_rate": 0.00013333333333333334,
      "loss": 1.169,
      "step": 126
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.2617335319519043,
      "learning_rate": 0.0001343915343915344,
      "loss": 1.4276,
      "step": 127
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.6285470128059387,
      "learning_rate": 0.00013544973544973545,
      "loss": 1.0308,
      "step": 128
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3883614540100098,
      "learning_rate": 0.0001365079365079365,
      "loss": 1.61,
      "step": 129
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8833580017089844,
      "learning_rate": 0.00013756613756613756,
      "loss": 1.2524,
      "step": 130
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.6177710890769958,
      "learning_rate": 0.00013862433862433865,
      "loss": 0.9757,
      "step": 131
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5023919343948364,
      "learning_rate": 0.00013968253968253967,
      "loss": 0.8927,
      "step": 132
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.2681211829185486,
      "learning_rate": 0.00014074074074074076,
      "loss": 1.0838,
      "step": 133
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.34934189915657043,
      "learning_rate": 0.00014179894179894179,
      "loss": 1.1382,
      "step": 134
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.27239319682121277,
      "learning_rate": 0.00014285714285714287,
      "loss": 0.8342,
      "step": 135
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.6889602541923523,
      "learning_rate": 0.00014391534391534392,
      "loss": 0.9997,
      "step": 136
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8749409317970276,
      "learning_rate": 0.00014497354497354498,
      "loss": 1.0456,
      "step": 137
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9397290349006653,
      "learning_rate": 0.00014603174603174603,
      "loss": 1.1635,
      "step": 138
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5522536039352417,
      "learning_rate": 0.0001470899470899471,
      "loss": 0.9798,
      "step": 139
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5815756916999817,
      "learning_rate": 0.00014814814814814815,
      "loss": 1.09,
      "step": 140
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5852010846138,
      "learning_rate": 0.00014920634920634923,
      "loss": 0.8882,
      "step": 141
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6863272190093994,
      "learning_rate": 0.00015026455026455026,
      "loss": 0.9023,
      "step": 142
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7377687096595764,
      "learning_rate": 0.00015132275132275134,
      "loss": 1.0003,
      "step": 143
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7904374003410339,
      "learning_rate": 0.00015238095238095237,
      "loss": 1.0136,
      "step": 144
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8264901638031006,
      "learning_rate": 0.00015343915343915345,
      "loss": 0.9731,
      "step": 145
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5391393899917603,
      "learning_rate": 0.0001544973544973545,
      "loss": 1.121,
      "step": 146
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5472258925437927,
      "learning_rate": 0.00015555555555555556,
      "loss": 0.7722,
      "step": 147
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7301981449127197,
      "learning_rate": 0.00015661375661375662,
      "loss": 0.9882,
      "step": 148
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7645363807678223,
      "learning_rate": 0.00015767195767195767,
      "loss": 1.0223,
      "step": 149
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4810018539428711,
      "learning_rate": 0.00015873015873015873,
      "loss": 1.0026,
      "step": 150
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0327802896499634,
      "learning_rate": 0.00015978835978835979,
      "loss": 0.999,
      "step": 151
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.380346655845642,
      "learning_rate": 0.00016084656084656084,
      "loss": 0.9712,
      "step": 152
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.27255693078041077,
      "learning_rate": 0.00016190476190476192,
      "loss": 0.7719,
      "step": 153
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1240710020065308,
      "learning_rate": 0.00016296296296296295,
      "loss": 1.1055,
      "step": 154
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9442301392555237,
      "learning_rate": 0.00016402116402116404,
      "loss": 1.0152,
      "step": 155
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7351832389831543,
      "learning_rate": 0.0001650793650793651,
      "loss": 1.1163,
      "step": 156
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.25930097699165344,
      "learning_rate": 0.00016613756613756615,
      "loss": 1.209,
      "step": 157
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.25930097699165344,
      "learning_rate": 0.00016613756613756615,
      "loss": 1.0077,
      "step": 158
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.2431420236825943,
      "learning_rate": 0.0001671957671957672,
      "loss": 0.8146,
      "step": 159
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3339853882789612,
      "learning_rate": 0.00016825396825396826,
      "loss": 0.9908,
      "step": 160
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.49051934480667114,
      "learning_rate": 0.00016931216931216931,
      "loss": 1.0184,
      "step": 161
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3301498591899872,
      "learning_rate": 0.00017037037037037037,
      "loss": 0.8457,
      "step": 162
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7784838080406189,
      "learning_rate": 0.00017142857142857143,
      "loss": 0.7144,
      "step": 163
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.20318245887756348,
      "learning_rate": 0.0001724867724867725,
      "loss": 0.7589,
      "step": 164
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3654181957244873,
      "learning_rate": 0.00017354497354497354,
      "loss": 0.8199,
      "step": 165
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.38021329045295715,
      "learning_rate": 0.00017460317460317462,
      "loss": 0.8481,
      "step": 166
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.2728421986103058,
      "learning_rate": 0.00017566137566137565,
      "loss": 1.0832,
      "step": 167
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.33730068802833557,
      "learning_rate": 0.00017671957671957673,
      "loss": 0.9061,
      "step": 168
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.2410222887992859,
      "learning_rate": 0.00017777777777777779,
      "loss": 1.0368,
      "step": 169
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.6788235306739807,
      "learning_rate": 0.00017883597883597884,
      "loss": 0.9986,
      "step": 170
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7442582249641418,
      "learning_rate": 0.0001798941798941799,
      "loss": 1.0387,
      "step": 171
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7751567363739014,
      "learning_rate": 0.00018095238095238095,
      "loss": 0.759,
      "step": 172
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.6972407698631287,
      "learning_rate": 0.000182010582010582,
      "loss": 0.8493,
      "step": 173
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.17545567452907562,
      "learning_rate": 0.0001830687830687831,
      "loss": 0.7595,
      "step": 174
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3156915605068207,
      "learning_rate": 0.00018412698412698412,
      "loss": 0.8855,
      "step": 175
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9245812296867371,
      "learning_rate": 0.0001851851851851852,
      "loss": 0.8849,
      "step": 176
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3211194574832916,
      "learning_rate": 0.00018624338624338623,
      "loss": 0.9716,
      "step": 177
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.2655186057090759,
      "learning_rate": 0.00018730158730158731,
      "loss": 0.9234,
      "step": 178
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.31071433424949646,
      "learning_rate": 0.00018835978835978837,
      "loss": 0.5733,
      "step": 179
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.18987324833869934,
      "learning_rate": 0.00018941798941798943,
      "loss": 0.8174,
      "step": 180
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.1964818835258484,
      "learning_rate": 0.00019047619047619048,
      "loss": 0.9092,
      "step": 181
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3890428841114044,
      "learning_rate": 0.00019153439153439154,
      "loss": 0.9841,
      "step": 182
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.6426774263381958,
      "learning_rate": 0.0001925925925925926,
      "loss": 0.7967,
      "step": 183
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9267591834068298,
      "learning_rate": 0.00019365079365079365,
      "loss": 0.8779,
      "step": 184
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.44163382053375244,
      "learning_rate": 0.0001947089947089947,
      "loss": 0.9989,
      "step": 185
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7246573567390442,
      "learning_rate": 0.0001957671957671958,
      "loss": 0.8589,
      "step": 186
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3188604712486267,
      "learning_rate": 0.00019682539682539682,
      "loss": 0.9207,
      "step": 187
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.20971353352069855,
      "learning_rate": 0.0001978835978835979,
      "loss": 0.9086,
      "step": 188
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3979068696498871,
      "learning_rate": 0.00019894179894179893,
      "loss": 1.0745,
      "step": 189
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.2954675555229187,
      "learning_rate": 0.0002,
      "loss": 0.6749,
      "step": 190
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.29796090722084045,
      "learning_rate": 0.00019999998664178747,
      "loss": 0.8014,
      "step": 191
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.4901796877384186,
      "learning_rate": 0.0001999999465671535,
      "loss": 0.8588,
      "step": 192
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.38625389337539673,
      "learning_rate": 0.00019999987977610873,
      "loss": 0.9558,
      "step": 193
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.2693013846874237,
      "learning_rate": 0.00019999978626867104,
      "loss": 0.6862,
      "step": 194
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.13912534713745117,
      "learning_rate": 0.00019999966604486539,
      "loss": 0.564,
      "step": 195
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.28337788581848145,
      "learning_rate": 0.00019999951910472396,
      "loss": 0.7268,
      "step": 196
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5211120843887329,
      "learning_rate": 0.00019999934544828594,
      "loss": 1.0206,
      "step": 197
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.18158501386642456,
      "learning_rate": 0.00019999914507559777,
      "loss": 0.8074,
      "step": 198
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.25880226492881775,
      "learning_rate": 0.0001999989179867129,
      "loss": 0.919,
      "step": 199
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.35540369153022766,
      "learning_rate": 0.00019999866418169213,
      "loss": 0.9799,
      "step": 200
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.34719356894493103,
      "learning_rate": 0.00019999838366060318,
      "loss": 0.901,
      "step": 201
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.28923800587654114,
      "learning_rate": 0.000199998076423521,
      "loss": 0.6951,
      "step": 202
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.267736554145813,
      "learning_rate": 0.0001999977424705277,
      "loss": 0.8304,
      "step": 203
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.2910594344139099,
      "learning_rate": 0.00019999738180171247,
      "loss": 0.8976,
      "step": 204
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.2393970489501953,
      "learning_rate": 0.0001999969944171717,
      "loss": 0.7856,
      "step": 205
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.15290570259094238,
      "learning_rate": 0.00019999658031700888,
      "loss": 0.7575,
      "step": 206
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.2662679851055145,
      "learning_rate": 0.0001999961395013346,
      "loss": 0.8107,
      "step": 207
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.054207444190979,
      "learning_rate": 0.0001999956719702667,
      "loss": 0.8378,
      "step": 208
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.23891401290893555,
      "learning_rate": 0.00019999517772393004,
      "loss": 0.7107,
      "step": 209
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.49826744198799133,
      "learning_rate": 0.00019999465676245667,
      "loss": 0.7184,
      "step": 210
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.2390405535697937,
      "learning_rate": 0.0001999941090859858,
      "loss": 0.8258,
      "step": 211
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.32522526383399963,
      "learning_rate": 0.00019999353469466372,
      "loss": 0.9029,
      "step": 212
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.1994389295578003,
      "learning_rate": 0.00019999293358864386,
      "loss": 0.6377,
      "step": 213
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.23954011499881744,
      "learning_rate": 0.0001999923057680869,
      "loss": 0.8565,
      "step": 214
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.2731366753578186,
      "learning_rate": 0.00019999165123316047,
      "loss": 0.7491,
      "step": 215
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.6676952242851257,
      "learning_rate": 0.00019999096998403953,
      "loss": 1.1711,
      "step": 216
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.26935842633247375,
      "learning_rate": 0.00019999026202090602,
      "loss": 0.7137,
      "step": 217
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.21712985634803772,
      "learning_rate": 0.0001999895273439491,
      "loss": 0.8663,
      "step": 218
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7272562980651855,
      "learning_rate": 0.0001999887659533651,
      "loss": 0.7846,
      "step": 219
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.376611590385437,
      "learning_rate": 0.00019998797784935736,
      "loss": 0.8055,
      "step": 220
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8714967966079712,
      "learning_rate": 0.00019998716303213648,
      "loss": 0.6407,
      "step": 221
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5537949204444885,
      "learning_rate": 0.00019998632150192012,
      "loss": 1.1161,
      "step": 222
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8118828535079956,
      "learning_rate": 0.0001999854532589331,
      "loss": 1.0083,
      "step": 223
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.7679688930511475,
      "learning_rate": 0.00019998455830340747,
      "loss": 1.0754,
      "step": 224
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.40060147643089294,
      "learning_rate": 0.00019998363663558218,
      "loss": 1.0733,
      "step": 225
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.4393571615219116,
      "learning_rate": 0.00019998268825570362,
      "loss": 1.2786,
      "step": 226
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.6882356405258179,
      "learning_rate": 0.00019998171316402508,
      "loss": 1.2752,
      "step": 227
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5426409840583801,
      "learning_rate": 0.00019998071136080706,
      "loss": 0.9722,
      "step": 228
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5275511145591736,
      "learning_rate": 0.00019997968284631728,
      "loss": 0.6577,
      "step": 229
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.20731140673160553,
      "learning_rate": 0.0001999786276208304,
      "loss": 0.8529,
      "step": 230
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.749111533164978,
      "learning_rate": 0.0001999775456846285,
      "loss": 0.8275,
      "step": 231
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.44946861267089844,
      "learning_rate": 0.00019997643703800047,
      "loss": 0.7831,
      "step": 232
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.44324296712875366,
      "learning_rate": 0.00019997530168124265,
      "loss": 0.883,
      "step": 233
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.6493027806282043,
      "learning_rate": 0.00019997413961465825,
      "loss": 0.5507,
      "step": 234
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.19852375984191895,
      "learning_rate": 0.0001999729508385578,
      "loss": 0.8027,
      "step": 235
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.29649004340171814,
      "learning_rate": 0.00019997173535325885,
      "loss": 0.8652,
      "step": 236
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.4001787006855011,
      "learning_rate": 0.00019997049315908616,
      "loss": 1.0044,
      "step": 237
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.19646108150482178,
      "learning_rate": 0.00019996922425637162,
      "loss": 0.6956,
      "step": 238
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.26843035221099854,
      "learning_rate": 0.0001999679286454542,
      "loss": 1.0716,
      "step": 239
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.2966955900192261,
      "learning_rate": 0.00019996660632668004,
      "loss": 0.73,
      "step": 240
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2508254051208496,
      "learning_rate": 0.00019996525730040245,
      "loss": 1.0509,
      "step": 241
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5097018480300903,
      "learning_rate": 0.00019996388156698185,
      "loss": 0.7171,
      "step": 242
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.22723321616649628,
      "learning_rate": 0.00019996247912678575,
      "loss": 0.8952,
      "step": 243
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5013904571533203,
      "learning_rate": 0.00019996104998018882,
      "loss": 0.6528,
      "step": 244
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.39770281314849854,
      "learning_rate": 0.0001999595941275729,
      "loss": 0.732,
      "step": 245
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.7324321866035461,
      "learning_rate": 0.00019995811156932694,
      "loss": 0.8666,
      "step": 246
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.2386011779308319,
      "learning_rate": 0.00019995660230584706,
      "loss": 0.9825,
      "step": 247
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.3447628617286682,
      "learning_rate": 0.0001999550663375364,
      "loss": 0.762,
      "step": 248
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.2638325095176697,
      "learning_rate": 0.0001999535036648054,
      "loss": 0.8654,
      "step": 249
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.2760397791862488,
      "learning_rate": 0.0001999519142880715,
      "loss": 1.1158,
      "step": 250
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5826970934867859,
      "learning_rate": 0.00019995029820775936,
      "loss": 1.1495,
      "step": 251
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5516299605369568,
      "learning_rate": 0.00019994865542430067,
      "loss": 0.9159,
      "step": 252
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.33351531624794006,
      "learning_rate": 0.00019994698593813444,
      "loss": 0.9773,
      "step": 253
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.46574312448501587,
      "learning_rate": 0.00019994528974970658,
      "loss": 0.9427,
      "step": 254
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.4421521723270416,
      "learning_rate": 0.0001999435668594703,
      "loss": 0.8494,
      "step": 255
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.3428579270839691,
      "learning_rate": 0.0001999418172678859,
      "loss": 1.1604,
      "step": 256
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.3589499294757843,
      "learning_rate": 0.00019994004097542082,
      "loss": 0.7487,
      "step": 257
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.566631555557251,
      "learning_rate": 0.0001999382379825496,
      "loss": 1.0395,
      "step": 258
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0758161544799805,
      "learning_rate": 0.00019993640828975395,
      "loss": 0.7811,
      "step": 259
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.33492913842201233,
      "learning_rate": 0.0001999345518975227,
      "loss": 0.7806,
      "step": 260
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5101330876350403,
      "learning_rate": 0.00019993266880635174,
      "loss": 0.7704,
      "step": 261
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.2982483506202698,
      "learning_rate": 0.00019993075901674425,
      "loss": 0.9235,
      "step": 262
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3243740797042847,
      "learning_rate": 0.00019992882252921045,
      "loss": 1.1089,
      "step": 263
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.45989927649497986,
      "learning_rate": 0.00019992685934426766,
      "loss": 0.8256,
      "step": 264
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.3876826763153076,
      "learning_rate": 0.0001999248694624404,
      "loss": 0.7278,
      "step": 265
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.24214334785938263,
      "learning_rate": 0.00019992285288426031,
      "loss": 0.8419,
      "step": 266
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.27799177169799805,
      "learning_rate": 0.0001999208096102661,
      "loss": 1.1497,
      "step": 267
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.31843364238739014,
      "learning_rate": 0.0001999187396410037,
      "loss": 1.0246,
      "step": 268
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.3395389914512634,
      "learning_rate": 0.00019991664297702616,
      "loss": 0.9571,
      "step": 269
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.3005499243736267,
      "learning_rate": 0.00019991451961889352,
      "loss": 0.83,
      "step": 270
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.2673066556453705,
      "learning_rate": 0.00019991236956717318,
      "loss": 0.8448,
      "step": 271
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.39509445428848267,
      "learning_rate": 0.00019991019282243952,
      "loss": 0.975,
      "step": 272
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.20044808089733124,
      "learning_rate": 0.00019990798938527408,
      "loss": 0.7568,
      "step": 273
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.3208981454372406,
      "learning_rate": 0.0001999057592562655,
      "loss": 0.9086,
      "step": 274
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5373703241348267,
      "learning_rate": 0.00019990350243600968,
      "loss": 0.5706,
      "step": 275
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0277663469314575,
      "learning_rate": 0.0001999012189251095,
      "loss": 0.7407,
      "step": 276
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.22999215126037598,
      "learning_rate": 0.00019989890872417507,
      "loss": 1.2958,
      "step": 277
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.43420982360839844,
      "learning_rate": 0.00019989657183382356,
      "loss": 0.973,
      "step": 278
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8198911547660828,
      "learning_rate": 0.0001998942082546793,
      "loss": 1.1517,
      "step": 279
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.2566721737384796,
      "learning_rate": 0.0001998918179873738,
      "loss": 0.7728,
      "step": 280
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.4823971092700958,
      "learning_rate": 0.00019988940103254557,
      "loss": 1.071,
      "step": 281
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.26075923442840576,
      "learning_rate": 0.00019988695739084044,
      "loss": 1.0033,
      "step": 282
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.3437795042991638,
      "learning_rate": 0.0001998844870629112,
      "loss": 0.9092,
      "step": 283
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.34666430950164795,
      "learning_rate": 0.0001998819900494178,
      "loss": 0.9921,
      "step": 284
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.31166741251945496,
      "learning_rate": 0.00019987946635102746,
      "loss": 0.7524,
      "step": 285
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.43736791610717773,
      "learning_rate": 0.00019987691596841433,
      "loss": 0.835,
      "step": 286
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.22120977938175201,
      "learning_rate": 0.0001998743389022598,
      "loss": 0.8776,
      "step": 287
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5275918841362,
      "learning_rate": 0.00019987173515325236,
      "loss": 0.8588,
      "step": 288
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.44380664825439453,
      "learning_rate": 0.0001998691047220877,
      "loss": 0.8253,
      "step": 289
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.17241142690181732,
      "learning_rate": 0.00019986644760946852,
      "loss": 0.6867,
      "step": 290
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.32836341857910156,
      "learning_rate": 0.00019986376381610473,
      "loss": 0.9168,
      "step": 291
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.36636587977409363,
      "learning_rate": 0.00019986105334271332,
      "loss": 1.0858,
      "step": 292
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.21369561553001404,
      "learning_rate": 0.00019985831619001845,
      "loss": 0.7514,
      "step": 293
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.23572002351284027,
      "learning_rate": 0.0001998555523587514,
      "loss": 0.7763,
      "step": 294
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.3044251501560211,
      "learning_rate": 0.00019985276184965054,
      "loss": 0.6,
      "step": 295
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.7884005308151245,
      "learning_rate": 0.0001998499446634614,
      "loss": 0.8689,
      "step": 296
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.3632194995880127,
      "learning_rate": 0.00019984710080093665,
      "loss": 1.1035,
      "step": 297
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.27811557054519653,
      "learning_rate": 0.00019984423026283605,
      "loss": 0.9714,
      "step": 298
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.3212566077709198,
      "learning_rate": 0.0001998413330499265,
      "loss": 0.7382,
      "step": 299
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2864648401737213,
      "learning_rate": 0.00019983840916298207,
      "loss": 0.6487,
      "step": 300
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2655390501022339,
      "learning_rate": 0.00019983545860278388,
      "loss": 0.8726,
      "step": 301
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2431739717721939,
      "learning_rate": 0.00019983248137012022,
      "loss": 0.9582,
      "step": 302
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5975880026817322,
      "learning_rate": 0.0001998294774657865,
      "loss": 1.0422,
      "step": 303
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.4394955337047577,
      "learning_rate": 0.00019982644689058528,
      "loss": 0.9504,
      "step": 304
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.3223266005516052,
      "learning_rate": 0.0001998233896453262,
      "loss": 0.9447,
      "step": 305
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.24571087956428528,
      "learning_rate": 0.000199820305730826,
      "loss": 0.7722,
      "step": 306
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.3075926601886749,
      "learning_rate": 0.00019981719514790874,
      "loss": 0.6645,
      "step": 307
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.30381521582603455,
      "learning_rate": 0.00019981405789740528,
      "loss": 1.0321,
      "step": 308
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5468243360519409,
      "learning_rate": 0.00019981089398015387,
      "loss": 0.8401,
      "step": 309
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.32920846343040466,
      "learning_rate": 0.00019980770339699978,
      "loss": 0.9278,
      "step": 310
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.24359580874443054,
      "learning_rate": 0.00019980448614879547,
      "loss": 0.922,
      "step": 311
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.415422648191452,
      "learning_rate": 0.0001998012422364004,
      "loss": 0.9797,
      "step": 312
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.1806984394788742,
      "learning_rate": 0.00019979797166068126,
      "loss": 1.0568,
      "step": 313
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.376783549785614,
      "learning_rate": 0.0001997946744225118,
      "loss": 0.9759,
      "step": 314
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.23442836105823517,
      "learning_rate": 0.00019979135052277297,
      "loss": 0.8228,
      "step": 315
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.24465732276439667,
      "learning_rate": 0.00019978799996235277,
      "loss": 0.7166,
      "step": 316
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2892625331878662,
      "learning_rate": 0.00019978462274214637,
      "loss": 0.9764,
      "step": 317
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.273517370223999,
      "learning_rate": 0.00019978121886305602,
      "loss": 0.7791,
      "step": 318
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.26664257049560547,
      "learning_rate": 0.00019977778832599115,
      "loss": 0.8269,
      "step": 319
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.255893349647522,
      "learning_rate": 0.00019977433113186824,
      "loss": 0.9295,
      "step": 320
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6840054392814636,
      "learning_rate": 0.00019977084728161094,
      "loss": 0.9902,
      "step": 321
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.3878416121006012,
      "learning_rate": 0.00019976733677615,
      "loss": 0.7936,
      "step": 322
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0721887350082397,
      "learning_rate": 0.0001997637996164233,
      "loss": 1.1855,
      "step": 323
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2939502000808716,
      "learning_rate": 0.00019976023580337587,
      "loss": 0.6914,
      "step": 324
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.20568883419036865,
      "learning_rate": 0.00019975664533795984,
      "loss": 0.9188,
      "step": 325
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.3231680989265442,
      "learning_rate": 0.0001997530282211344,
      "loss": 0.8105,
      "step": 326
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.3447208106517792,
      "learning_rate": 0.00019974938445386595,
      "loss": 0.9645,
      "step": 327
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.7631486654281616,
      "learning_rate": 0.000199745714037128,
      "loss": 0.9503,
      "step": 328
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.25843098759651184,
      "learning_rate": 0.00019974201697190108,
      "loss": 0.7998,
      "step": 329
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.28338584303855896,
      "learning_rate": 0.000199738293259173,
      "loss": 0.8323,
      "step": 330
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.34728825092315674,
      "learning_rate": 0.00019973454289993854,
      "loss": 0.6201,
      "step": 331
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6377072334289551,
      "learning_rate": 0.00019973076589519968,
      "loss": 1.0723,
      "step": 332
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.25263649225234985,
      "learning_rate": 0.00019972696224596553,
      "loss": 0.8019,
      "step": 333
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.30477574467658997,
      "learning_rate": 0.00019972313195325226,
      "loss": 0.9455,
      "step": 334
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.4002573788166046,
      "learning_rate": 0.00019971927501808315,
      "loss": 0.8355,
      "step": 335
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.281225323677063,
      "learning_rate": 0.0001997153914414887,
      "loss": 0.9064,
      "step": 336
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2492245435714722,
      "learning_rate": 0.00019971148122450644,
      "loss": 1.0373,
      "step": 337
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.4577679932117462,
      "learning_rate": 0.00019970754436818107,
      "loss": 0.8483,
      "step": 338
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.4345574975013733,
      "learning_rate": 0.00019970358087356428,
      "loss": 0.9571,
      "step": 339
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8129209876060486,
      "learning_rate": 0.00019969959074171508,
      "loss": 0.7216,
      "step": 340
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.1909695565700531,
      "learning_rate": 0.00019969557397369947,
      "loss": 0.8619,
      "step": 341
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.295391708612442,
      "learning_rate": 0.00019969153057059055,
      "loss": 0.8744,
      "step": 342
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.18926553428173065,
      "learning_rate": 0.00019968746053346858,
      "loss": 0.7234,
      "step": 343
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2071923166513443,
      "learning_rate": 0.00019968336386342095,
      "loss": 0.7584,
      "step": 344
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.2709278464317322,
      "learning_rate": 0.00019967924056154212,
      "loss": 0.9874,
      "step": 345
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.1878633350133896,
      "learning_rate": 0.0001996750906289337,
      "loss": 0.7492,
      "step": 346
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.35984358191490173,
      "learning_rate": 0.00019967091406670445,
      "loss": 1.0436,
      "step": 347
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3239493668079376,
      "learning_rate": 0.00019966671087597008,
      "loss": 0.8453,
      "step": 348
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.7825733423233032,
      "learning_rate": 0.00019966248105785365,
      "loss": 1.1038,
      "step": 349
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.7784348130226135,
      "learning_rate": 0.0001996582246134852,
      "loss": 0.7636,
      "step": 350
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5633595585823059,
      "learning_rate": 0.0001996539415440018,
      "loss": 1.2002,
      "step": 351
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.131707787513733,
      "learning_rate": 0.00019964963185054786,
      "loss": 0.8989,
      "step": 352
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.24750792980194092,
      "learning_rate": 0.00019964529553427468,
      "loss": 0.7905,
      "step": 353
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3838002681732178,
      "learning_rate": 0.00019964093259634084,
      "loss": 0.9134,
      "step": 354
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.4333482086658478,
      "learning_rate": 0.00019963654303791192,
      "loss": 0.8565,
      "step": 355
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3473266065120697,
      "learning_rate": 0.00019963212686016068,
      "loss": 0.919,
      "step": 356
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3055814802646637,
      "learning_rate": 0.00019962768406426692,
      "loss": 0.8033,
      "step": 357
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3203541934490204,
      "learning_rate": 0.00019962321465141764,
      "loss": 0.6881,
      "step": 358
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.324602335691452,
      "learning_rate": 0.0001996187186228069,
      "loss": 0.9467,
      "step": 359
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.2655504047870636,
      "learning_rate": 0.00019961419597963587,
      "loss": 0.8673,
      "step": 360
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8781147599220276,
      "learning_rate": 0.0001996096467231128,
      "loss": 0.6375,
      "step": 361
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.4599803388118744,
      "learning_rate": 0.00019960507085445313,
      "loss": 0.783,
      "step": 362
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3022764027118683,
      "learning_rate": 0.00019960046837487944,
      "loss": 0.7458,
      "step": 363
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.2959281802177429,
      "learning_rate": 0.0001995958392856212,
      "loss": 0.8575,
      "step": 364
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3197516202926636,
      "learning_rate": 0.00019959118358791524,
      "loss": 1.0848,
      "step": 365
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.19173048436641693,
      "learning_rate": 0.00019958650128300536,
      "loss": 0.6159,
      "step": 366
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.4743506610393524,
      "learning_rate": 0.0001995817923721425,
      "loss": 0.798,
      "step": 367
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.4389582872390747,
      "learning_rate": 0.00019957705685658478,
      "loss": 1.0644,
      "step": 368
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3231479227542877,
      "learning_rate": 0.00019957229473759722,
      "loss": 0.866,
      "step": 369
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.23190565407276154,
      "learning_rate": 0.00019956750601645223,
      "loss": 0.965,
      "step": 370
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0113565921783447,
      "learning_rate": 0.0001995626906944291,
      "loss": 1.0415,
      "step": 371
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.32693520188331604,
      "learning_rate": 0.00019955784877281435,
      "loss": 0.8557,
      "step": 372
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.2559601962566376,
      "learning_rate": 0.00019955298025290156,
      "loss": 0.8694,
      "step": 373
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3325576186180115,
      "learning_rate": 0.00019954808513599143,
      "loss": 0.9585,
      "step": 374
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5195139646530151,
      "learning_rate": 0.00019954316342339173,
      "loss": 0.8194,
      "step": 375
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.4150591194629669,
      "learning_rate": 0.0001995382151164174,
      "loss": 0.7388,
      "step": 376
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.2889802157878876,
      "learning_rate": 0.00019953324021639043,
      "loss": 0.7292,
      "step": 377
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.4832020103931427,
      "learning_rate": 0.00019952823872463997,
      "loss": 0.5889,
      "step": 378
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3505914509296417,
      "learning_rate": 0.0001995232106425022,
      "loss": 0.8631,
      "step": 379
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3356497287750244,
      "learning_rate": 0.00019951815597132045,
      "loss": 0.9304,
      "step": 380
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.24190665781497955,
      "learning_rate": 0.00019951307471244517,
      "loss": 0.7301,
      "step": 381
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.34134232997894287,
      "learning_rate": 0.00019950796686723384,
      "loss": 0.6697,
      "step": 382
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.21698357164859772,
      "learning_rate": 0.00019950283243705118,
      "loss": 0.6725,
      "step": 383
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.27923089265823364,
      "learning_rate": 0.00019949767142326885,
      "loss": 0.7611,
      "step": 384
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5498031973838806,
      "learning_rate": 0.00019949248382726572,
      "loss": 0.8269,
      "step": 385
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.6905609369277954,
      "learning_rate": 0.00019948726965042773,
      "loss": 0.9711,
      "step": 386
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.2936451733112335,
      "learning_rate": 0.0001994820288941479,
      "loss": 0.7453,
      "step": 387
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.30254217982292175,
      "learning_rate": 0.0001994767615598264,
      "loss": 1.0102,
      "step": 388
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3230556845664978,
      "learning_rate": 0.00019947146764887045,
      "loss": 0.9345,
      "step": 389
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.37075215578079224,
      "learning_rate": 0.00019946614716269442,
      "loss": 0.8846,
      "step": 390
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.4866223931312561,
      "learning_rate": 0.00019946080010271975,
      "loss": 0.9579,
      "step": 391
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.23618194460868835,
      "learning_rate": 0.00019945542647037493,
      "loss": 0.9147,
      "step": 392
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.36483821272850037,
      "learning_rate": 0.00019945002626709567,
      "loss": 0.8546,
      "step": 393
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3424234390258789,
      "learning_rate": 0.0001994445994943247,
      "loss": 1.0143,
      "step": 394
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.6068972945213318,
      "learning_rate": 0.00019943914615351186,
      "loss": 0.8736,
      "step": 395
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.4368737041950226,
      "learning_rate": 0.00019943366624611405,
      "loss": 0.8577,
      "step": 396
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.27408549189567566,
      "learning_rate": 0.0001994281597735953,
      "loss": 0.7274,
      "step": 397
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3031626045703888,
      "learning_rate": 0.0001994226267374268,
      "loss": 0.9368,
      "step": 398
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.2520623505115509,
      "learning_rate": 0.00019941706713908674,
      "loss": 0.939,
      "step": 399
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5956240296363831,
      "learning_rate": 0.00019941148098006047,
      "loss": 0.6711,
      "step": 400
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.20520710945129395,
      "learning_rate": 0.0001994058682618404,
      "loss": 0.9791,
      "step": 401
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.34039878845214844,
      "learning_rate": 0.00019940022898592608,
      "loss": 0.8624,
      "step": 402
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.28212276101112366,
      "learning_rate": 0.00019939456315382404,
      "loss": 0.9609,
      "step": 403
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.19385021924972534,
      "learning_rate": 0.00019938887076704804,
      "loss": 0.9171,
      "step": 404
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.2684202194213867,
      "learning_rate": 0.00019938315182711888,
      "loss": 0.7616,
      "step": 405
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.24253050982952118,
      "learning_rate": 0.0001993774063355645,
      "loss": 0.6804,
      "step": 406
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.225099578499794,
      "learning_rate": 0.0001993716342939198,
      "loss": 0.8261,
      "step": 407
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.38843679428100586,
      "learning_rate": 0.00019936583570372694,
      "loss": 0.817,
      "step": 408
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.21813204884529114,
      "learning_rate": 0.00019936001056653505,
      "loss": 0.7235,
      "step": 409
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5496176481246948,
      "learning_rate": 0.00019935415888390042,
      "loss": 0.9643,
      "step": 410
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.2421552836894989,
      "learning_rate": 0.0001993482806573864,
      "loss": 0.7752,
      "step": 411
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.24622325599193573,
      "learning_rate": 0.00019934237588856344,
      "loss": 0.7766,
      "step": 412
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5839800238609314,
      "learning_rate": 0.0001993364445790091,
      "loss": 0.7993,
      "step": 413
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.34820014238357544,
      "learning_rate": 0.000199330486730308,
      "loss": 0.6962,
      "step": 414
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.23068787157535553,
      "learning_rate": 0.00019932450234405184,
      "loss": 0.8614,
      "step": 415
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5485920310020447,
      "learning_rate": 0.0001993184914218395,
      "loss": 1.1007,
      "step": 416
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3509712517261505,
      "learning_rate": 0.00019931245396527682,
      "loss": 0.6957,
      "step": 417
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.281520277261734,
      "learning_rate": 0.00019930638997597684,
      "loss": 0.9312,
      "step": 418
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.2522992491722107,
      "learning_rate": 0.0001993002994555596,
      "loss": 0.7548,
      "step": 419
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3342667520046234,
      "learning_rate": 0.0001992941824056523,
      "loss": 0.9112,
      "step": 420
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6142451763153076,
      "learning_rate": 0.00019928803882788917,
      "loss": 0.8499,
      "step": 421
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.16736458241939545,
      "learning_rate": 0.00019928186872391156,
      "loss": 0.8329,
      "step": 422
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.20739677548408508,
      "learning_rate": 0.00019927567209536794,
      "loss": 0.716,
      "step": 423
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3511585295200348,
      "learning_rate": 0.0001992694489439138,
      "loss": 0.7743,
      "step": 424
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.4328821003437042,
      "learning_rate": 0.00019926319927121173,
      "loss": 1.1595,
      "step": 425
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7415485382080078,
      "learning_rate": 0.00019925692307893144,
      "loss": 0.6069,
      "step": 426
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.2508932054042816,
      "learning_rate": 0.0001992506203687497,
      "loss": 0.7701,
      "step": 427
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.23741789162158966,
      "learning_rate": 0.00019924429114235036,
      "loss": 0.8512,
      "step": 428
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.36572086811065674,
      "learning_rate": 0.00019923793540142432,
      "loss": 0.8726,
      "step": 429
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3228726387023926,
      "learning_rate": 0.0001992315531476697,
      "loss": 0.8695,
      "step": 430
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3223040699958801,
      "learning_rate": 0.00019922514438279156,
      "loss": 1.0544,
      "step": 431
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.264818012714386,
      "learning_rate": 0.0001992187091085021,
      "loss": 0.884,
      "step": 432
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3359452188014984,
      "learning_rate": 0.00019921224732652058,
      "loss": 0.9571,
      "step": 433
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.23115573823451996,
      "learning_rate": 0.00019920575903857338,
      "loss": 0.9327,
      "step": 434
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.4078423082828522,
      "learning_rate": 0.00019919924424639392,
      "loss": 0.9099,
      "step": 435
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.19606636464595795,
      "learning_rate": 0.0001991927029517227,
      "loss": 0.7028,
      "step": 436
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6013062596321106,
      "learning_rate": 0.00019918613515630738,
      "loss": 1.001,
      "step": 437
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.29109832644462585,
      "learning_rate": 0.0001991795408619026,
      "loss": 0.6469,
      "step": 438
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5194069147109985,
      "learning_rate": 0.00019917292007027014,
      "loss": 1.0308,
      "step": 439
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.22000356018543243,
      "learning_rate": 0.0001991662727831788,
      "loss": 0.9656,
      "step": 440
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.16392017900943756,
      "learning_rate": 0.00019915959900240458,
      "loss": 0.8198,
      "step": 441
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.26826927065849304,
      "learning_rate": 0.00019915289872973036,
      "loss": 0.9022,
      "step": 442
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3523117005825043,
      "learning_rate": 0.0001991461719669463,
      "loss": 1.1356,
      "step": 443
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.30283281207084656,
      "learning_rate": 0.00019913941871584951,
      "loss": 0.8929,
      "step": 444
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5101399421691895,
      "learning_rate": 0.00019913263897824423,
      "loss": 0.8356,
      "step": 445
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.20601943135261536,
      "learning_rate": 0.00019912583275594176,
      "loss": 0.9271,
      "step": 446
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.642131507396698,
      "learning_rate": 0.0001991190000507605,
      "loss": 0.9992,
      "step": 447
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.33205416798591614,
      "learning_rate": 0.00019911214086452587,
      "loss": 0.6991,
      "step": 448
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.23991380631923676,
      "learning_rate": 0.0001991052551990704,
      "loss": 0.8101,
      "step": 449
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.27827972173690796,
      "learning_rate": 0.00019909834305623377,
      "loss": 0.9567,
      "step": 450
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6650862097740173,
      "learning_rate": 0.00019909140443786255,
      "loss": 0.9207,
      "step": 451
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.28517311811447144,
      "learning_rate": 0.00019908443934581055,
      "loss": 0.7937,
      "step": 452
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.24322494864463806,
      "learning_rate": 0.0001990774477819386,
      "loss": 0.8252,
      "step": 453
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3665752112865448,
      "learning_rate": 0.00019907042974811457,
      "loss": 0.8895,
      "step": 454
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.409489244222641,
      "learning_rate": 0.00019906338524621343,
      "loss": 0.6918,
      "step": 455
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3869452476501465,
      "learning_rate": 0.00019905631427811723,
      "loss": 0.7486,
      "step": 456
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3032418191432953,
      "learning_rate": 0.00019904921684571507,
      "loss": 0.9128,
      "step": 457
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.23837944865226746,
      "learning_rate": 0.00019904209295090313,
      "loss": 0.8163,
      "step": 458
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3232380449771881,
      "learning_rate": 0.00019903494259558465,
      "loss": 0.7346,
      "step": 459
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.14098459482192993,
      "learning_rate": 0.00019902776578166999,
      "loss": 0.7695,
      "step": 460
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.176029235124588,
      "learning_rate": 0.0001990205625110765,
      "loss": 0.6217,
      "step": 461
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3075037896633148,
      "learning_rate": 0.00019901333278572866,
      "loss": 0.8998,
      "step": 462
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3151698112487793,
      "learning_rate": 0.000199006076607558,
      "loss": 0.9585,
      "step": 463
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5474414825439453,
      "learning_rate": 0.00019899879397850304,
      "loss": 0.6781,
      "step": 464
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.28878042101860046,
      "learning_rate": 0.00019899148490050954,
      "loss": 0.8126,
      "step": 465
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3002655804157257,
      "learning_rate": 0.00019898414937553018,
      "loss": 0.8713,
      "step": 466
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3542482256889343,
      "learning_rate": 0.00019897678740552475,
      "loss": 0.8531,
      "step": 467
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3790711760520935,
      "learning_rate": 0.00019896939899246004,
      "loss": 0.7718,
      "step": 468
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6146113872528076,
      "learning_rate": 0.0001989619841383101,
      "loss": 0.9488,
      "step": 469
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.2547500729560852,
      "learning_rate": 0.00019895454284505585,
      "loss": 1.0822,
      "step": 470
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.302065908908844,
      "learning_rate": 0.00019894707511468527,
      "loss": 0.9361,
      "step": 471
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.29273101687431335,
      "learning_rate": 0.0001989395809491936,
      "loss": 0.9395,
      "step": 472
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.13730944693088531,
      "learning_rate": 0.00019893206035058293,
      "loss": 0.8903,
      "step": 473
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2760636806488037,
      "learning_rate": 0.00019892451332086247,
      "loss": 0.7882,
      "step": 474
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.21855312585830688,
      "learning_rate": 0.0001989169398620486,
      "loss": 0.8297,
      "step": 475
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.15980499982833862,
      "learning_rate": 0.00019890933997616461,
      "loss": 0.7469,
      "step": 476
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.3620885908603668,
      "learning_rate": 0.00019890171366524094,
      "loss": 1.073,
      "step": 477
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.21858996152877808,
      "learning_rate": 0.0001988940609313151,
      "loss": 0.954,
      "step": 478
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.4542141556739807,
      "learning_rate": 0.00019888638177643163,
      "loss": 0.8967,
      "step": 479
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.23320822417736053,
      "learning_rate": 0.00019887867620264205,
      "loss": 0.7744,
      "step": 480
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.25438180565834045,
      "learning_rate": 0.00019887094421200505,
      "loss": 0.6999,
      "step": 481
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.23381321132183075,
      "learning_rate": 0.00019886318580658637,
      "loss": 0.6491,
      "step": 482
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.23931169509887695,
      "learning_rate": 0.00019885540098845875,
      "loss": 0.895,
      "step": 483
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.48242002725601196,
      "learning_rate": 0.000198847589759702,
      "loss": 0.9889,
      "step": 484
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9053157567977905,
      "learning_rate": 0.00019883975212240307,
      "loss": 0.894,
      "step": 485
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.3876802921295166,
      "learning_rate": 0.00019883188807865584,
      "loss": 0.867,
      "step": 486
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.680789589881897,
      "learning_rate": 0.0001988239976305613,
      "loss": 1.0813,
      "step": 487
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.3351377546787262,
      "learning_rate": 0.00019881608078022752,
      "loss": 0.7457,
      "step": 488
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.4244190752506256,
      "learning_rate": 0.00019880813752976958,
      "loss": 0.8426,
      "step": 489
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2202087640762329,
      "learning_rate": 0.0001988001678813096,
      "loss": 0.7002,
      "step": 490
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.33611464500427246,
      "learning_rate": 0.0001987921718369769,
      "loss": 0.9751,
      "step": 491
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.3452919125556946,
      "learning_rate": 0.0001987841493989076,
      "loss": 0.8381,
      "step": 492
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2808714210987091,
      "learning_rate": 0.0001987761005692451,
      "loss": 0.7768,
      "step": 493
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.17545920610427856,
      "learning_rate": 0.00019876802535013973,
      "loss": 0.8052,
      "step": 494
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.16643303632736206,
      "learning_rate": 0.00019875992374374893,
      "loss": 0.9906,
      "step": 495
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2630583941936493,
      "learning_rate": 0.00019875179575223708,
      "loss": 0.7817,
      "step": 496
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6224496364593506,
      "learning_rate": 0.00019874364137777576,
      "loss": 0.9929,
      "step": 497
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.3863585889339447,
      "learning_rate": 0.0001987354606225435,
      "loss": 0.8815,
      "step": 498
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2530553638935089,
      "learning_rate": 0.0001987272534887259,
      "loss": 0.7842,
      "step": 499
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.22032374143600464,
      "learning_rate": 0.00019871901997851565,
      "loss": 0.8864,
      "step": 500
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.22702191770076752,
      "learning_rate": 0.00019871076009411243,
      "loss": 0.8017,
      "step": 501
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2029409408569336,
      "learning_rate": 0.00019870247383772295,
      "loss": 0.6832,
      "step": 502
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.31609609723091125,
      "learning_rate": 0.00019869416121156105,
      "loss": 0.86,
      "step": 503
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.26578351855278015,
      "learning_rate": 0.00019868582221784756,
      "loss": 0.8073,
      "step": 504
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.30495136976242065,
      "learning_rate": 0.00019867745685881033,
      "loss": 1.1443,
      "step": 505
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.19871757924556732,
      "learning_rate": 0.00019866906513668427,
      "loss": 0.6328,
      "step": 506
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.3829546570777893,
      "learning_rate": 0.00019866064705371145,
      "loss": 1.0069,
      "step": 507
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.18352967500686646,
      "learning_rate": 0.00019865220261214078,
      "loss": 0.6333,
      "step": 508
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2389804720878601,
      "learning_rate": 0.00019864373181422833,
      "loss": 1.1208,
      "step": 509
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.33123496174812317,
      "learning_rate": 0.00019863523466223722,
      "loss": 0.8887,
      "step": 510
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.42547816038131714,
      "learning_rate": 0.0001986267111584376,
      "loss": 0.9142,
      "step": 511
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.3454482853412628,
      "learning_rate": 0.00019861816130510658,
      "loss": 0.8372,
      "step": 512
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.16060225665569305,
      "learning_rate": 0.00019860958510452842,
      "loss": 0.7147,
      "step": 513
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.30430546402931213,
      "learning_rate": 0.00019860098255899437,
      "loss": 0.9266,
      "step": 514
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.3987911343574524,
      "learning_rate": 0.00019859235367080275,
      "loss": 0.592,
      "step": 515
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.19711381196975708,
      "learning_rate": 0.00019858369844225887,
      "loss": 0.8414,
      "step": 516
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6430870294570923,
      "learning_rate": 0.00019857501687567505,
      "loss": 0.8116,
      "step": 517
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.22628845274448395,
      "learning_rate": 0.00019856630897337077,
      "loss": 0.8685,
      "step": 518
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5383879542350769,
      "learning_rate": 0.00019855757473767242,
      "loss": 0.7886,
      "step": 519
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.7530208230018616,
      "learning_rate": 0.0001985488141709135,
      "loss": 0.8535,
      "step": 520
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2319243997335434,
      "learning_rate": 0.0001985400272754345,
      "loss": 0.7798,
      "step": 521
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2137853354215622,
      "learning_rate": 0.00019853121405358296,
      "loss": 0.9592,
      "step": 522
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.23711815476417542,
      "learning_rate": 0.0001985223745077135,
      "loss": 0.8726,
      "step": 523
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.17506934702396393,
      "learning_rate": 0.00019851350864018768,
      "loss": 0.6394,
      "step": 524
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.27189749479293823,
      "learning_rate": 0.0001985046164533742,
      "loss": 0.803,
      "step": 525
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2796277701854706,
      "learning_rate": 0.00019849569794964865,
      "loss": 0.8093,
      "step": 526
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.1571151465177536,
      "learning_rate": 0.00019848675313139383,
      "loss": 0.9923,
      "step": 527
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.21927769482135773,
      "learning_rate": 0.0001984777820009994,
      "loss": 0.8925,
      "step": 528
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.29762548208236694,
      "learning_rate": 0.0001984687845608622,
      "loss": 1.0314,
      "step": 529
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.37927791476249695,
      "learning_rate": 0.00019845976081338596,
      "loss": 1.1409,
      "step": 530
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.21579086780548096,
      "learning_rate": 0.0001984507107609815,
      "loss": 0.8629,
      "step": 531
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6288403868675232,
      "learning_rate": 0.00019844163440606673,
      "loss": 0.8129,
      "step": 532
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5173250436782837,
      "learning_rate": 0.00019843253175106645,
      "loss": 1.1368,
      "step": 533
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.29715070128440857,
      "learning_rate": 0.00019842340279841266,
      "loss": 0.8643,
      "step": 534
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2756355106830597,
      "learning_rate": 0.00019841424755054422,
      "loss": 0.8521,
      "step": 535
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.21964915096759796,
      "learning_rate": 0.0001984050660099071,
      "loss": 0.7107,
      "step": 536
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.31793975830078125,
      "learning_rate": 0.00019839585817895428,
      "loss": 0.9049,
      "step": 537
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.36198487877845764,
      "learning_rate": 0.00019838662406014573,
      "loss": 1.0296,
      "step": 538
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.4106085002422333,
      "learning_rate": 0.00019837736365594855,
      "loss": 1.0453,
      "step": 539
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.3647058308124542,
      "learning_rate": 0.00019836807696883672,
      "loss": 1.0681,
      "step": 540
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.7292600870132446,
      "learning_rate": 0.00019835876400129136,
      "loss": 0.7589,
      "step": 541
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.304271936416626,
      "learning_rate": 0.00019834942475580053,
      "loss": 1.066,
      "step": 542
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.19680587947368622,
      "learning_rate": 0.0001983400592348594,
      "loss": 0.9787,
      "step": 543
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2291305512189865,
      "learning_rate": 0.00019833066744096999,
      "loss": 0.9619,
      "step": 544
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.7099002003669739,
      "learning_rate": 0.00019832124937664154,
      "loss": 0.8222,
      "step": 545
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2601619064807892,
      "learning_rate": 0.00019831180504439023,
      "loss": 0.8164,
      "step": 546
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.15867426991462708,
      "learning_rate": 0.00019830233444673918,
      "loss": 0.8959,
      "step": 547
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2409762740135193,
      "learning_rate": 0.00019829283758621865,
      "loss": 0.7033,
      "step": 548
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2666078805923462,
      "learning_rate": 0.00019828331446536583,
      "loss": 0.778,
      "step": 549
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.3715464174747467,
      "learning_rate": 0.00019827376508672496,
      "loss": 0.8958,
      "step": 550
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.28523632884025574,
      "learning_rate": 0.00019826418945284732,
      "loss": 1.0133,
      "step": 551
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2699063718318939,
      "learning_rate": 0.00019825458756629117,
      "loss": 0.7878,
      "step": 552
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.3194313943386078,
      "learning_rate": 0.00019824495942962178,
      "loss": 0.7112,
      "step": 553
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.30332082509994507,
      "learning_rate": 0.00019823530504541143,
      "loss": 0.7371,
      "step": 554
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.1819697916507721,
      "learning_rate": 0.00019822562441623945,
      "loss": 1.0358,
      "step": 555
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.30387768149375916,
      "learning_rate": 0.00019821591754469215,
      "loss": 0.867,
      "step": 556
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.3026760518550873,
      "learning_rate": 0.00019820618443336288,
      "loss": 0.9247,
      "step": 557
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2734585702419281,
      "learning_rate": 0.0001981964250848519,
      "loss": 0.6572,
      "step": 558
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.23254548013210297,
      "learning_rate": 0.0001981866395017667,
      "loss": 0.8952,
      "step": 559
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.30720534920692444,
      "learning_rate": 0.00019817682768672148,
      "loss": 1.1244,
      "step": 560
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.3204452693462372,
      "learning_rate": 0.0001981669896423377,
      "loss": 0.8329,
      "step": 561
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.25828391313552856,
      "learning_rate": 0.0001981571253712437,
      "loss": 0.8917,
      "step": 562
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.4552439749240875,
      "learning_rate": 0.00019814723487607491,
      "loss": 0.9066,
      "step": 563
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.21315927803516388,
      "learning_rate": 0.00019813731815947368,
      "loss": 0.8737,
      "step": 564
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.406791627407074,
      "learning_rate": 0.00019812737522408938,
      "loss": 0.9563,
      "step": 565
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.26249173283576965,
      "learning_rate": 0.00019811740607257844,
      "loss": 0.9687,
      "step": 566
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.27975335717201233,
      "learning_rate": 0.00019810741070760428,
      "loss": 0.9066,
      "step": 567
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.32463571429252625,
      "learning_rate": 0.00019809738913183724,
      "loss": 0.9384,
      "step": 568
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5084578990936279,
      "learning_rate": 0.0001980873413479548,
      "loss": 0.8945,
      "step": 569
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.19104281067848206,
      "learning_rate": 0.00019807726735864128,
      "loss": 0.8497,
      "step": 570
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.25283902883529663,
      "learning_rate": 0.00019806716716658818,
      "loss": 0.7327,
      "step": 571
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.258681058883667,
      "learning_rate": 0.00019805704077449385,
      "loss": 0.8061,
      "step": 572
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.24622702598571777,
      "learning_rate": 0.00019804688818506373,
      "loss": 0.7862,
      "step": 573
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.3366948962211609,
      "learning_rate": 0.00019803670940101022,
      "loss": 0.8849,
      "step": 574
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2544059753417969,
      "learning_rate": 0.00019802650442505274,
      "loss": 1.0661,
      "step": 575
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2133902609348297,
      "learning_rate": 0.00019801627325991767,
      "loss": 0.8273,
      "step": 576
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.23675177991390228,
      "learning_rate": 0.00019800601590833842,
      "loss": 0.8054,
      "step": 577
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2449563890695572,
      "learning_rate": 0.00019799573237305542,
      "loss": 0.8601,
      "step": 578
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.3292624056339264,
      "learning_rate": 0.00019798542265681598,
      "loss": 0.8119,
      "step": 579
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.28089290857315063,
      "learning_rate": 0.0001979750867623746,
      "loss": 0.9379,
      "step": 580
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.21688401699066162,
      "learning_rate": 0.00019796472469249258,
      "loss": 0.9747,
      "step": 581
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.13079896569252014,
      "learning_rate": 0.00019795433644993833,
      "loss": 0.7827,
      "step": 582
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.33304542303085327,
      "learning_rate": 0.00019794392203748717,
      "loss": 0.9521,
      "step": 583
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.3335334360599518,
      "learning_rate": 0.0001979334814579215,
      "loss": 1.0039,
      "step": 584
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.17485477030277252,
      "learning_rate": 0.0001979230147140307,
      "loss": 0.9186,
      "step": 585
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.29629865288734436,
      "learning_rate": 0.00019791252180861106,
      "loss": 0.726,
      "step": 586
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.18266913294792175,
      "learning_rate": 0.00019790200274446594,
      "loss": 0.6346,
      "step": 587
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.3097255825996399,
      "learning_rate": 0.0001978914575244056,
      "loss": 0.77,
      "step": 588
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.1989104449748993,
      "learning_rate": 0.0001978808861512474,
      "loss": 0.8049,
      "step": 589
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.38421693444252014,
      "learning_rate": 0.00019787028862781563,
      "loss": 0.9297,
      "step": 590
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.3715411126613617,
      "learning_rate": 0.00019785966495694155,
      "loss": 0.9717,
      "step": 591
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.4899900257587433,
      "learning_rate": 0.00019784901514146346,
      "loss": 0.7599,
      "step": 592
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.4391191899776459,
      "learning_rate": 0.00019783833918422653,
      "loss": 0.9347,
      "step": 593
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.24889209866523743,
      "learning_rate": 0.00019782763708808308,
      "loss": 0.7771,
      "step": 594
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.7339361906051636,
      "learning_rate": 0.0001978169088558923,
      "loss": 1.0477,
      "step": 595
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5719285607337952,
      "learning_rate": 0.0001978061544905204,
      "loss": 0.9634,
      "step": 596
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.45090556144714355,
      "learning_rate": 0.0001977953739948405,
      "loss": 1.0967,
      "step": 597
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2973722517490387,
      "learning_rate": 0.00019778456737173285,
      "loss": 0.9584,
      "step": 598
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.24452774226665497,
      "learning_rate": 0.00019777373462408454,
      "loss": 0.6098,
      "step": 599
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2937648594379425,
      "learning_rate": 0.0001977628757547897,
      "loss": 0.8879,
      "step": 600
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.28402718901634216,
      "learning_rate": 0.00019775199076674946,
      "loss": 0.8106,
      "step": 601
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.19483642280101776,
      "learning_rate": 0.00019774107966287187,
      "loss": 0.9534,
      "step": 602
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2060997635126114,
      "learning_rate": 0.000197730142446072,
      "loss": 0.7828,
      "step": 603
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.23113122582435608,
      "learning_rate": 0.00019771917911927186,
      "loss": 0.7797,
      "step": 604
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.3063499629497528,
      "learning_rate": 0.0001977081896854005,
      "loss": 0.8016,
      "step": 605
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.17358066141605377,
      "learning_rate": 0.00019769717414739387,
      "loss": 0.7705,
      "step": 606
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.20933988690376282,
      "learning_rate": 0.00019768613250819493,
      "loss": 0.6777,
      "step": 607
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.612690269947052,
      "learning_rate": 0.00019767506477075364,
      "loss": 0.8936,
      "step": 608
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.23404499888420105,
      "learning_rate": 0.00019766397093802689,
      "loss": 0.859,
      "step": 609
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.24961723387241364,
      "learning_rate": 0.00019765285101297852,
      "loss": 0.8709,
      "step": 610
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.27530789375305176,
      "learning_rate": 0.00019764170499857943,
      "loss": 0.8073,
      "step": 611
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.30722400546073914,
      "learning_rate": 0.0001976305328978074,
      "loss": 0.757,
      "step": 612
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.3290161192417145,
      "learning_rate": 0.0001976193347136472,
      "loss": 0.8576,
      "step": 613
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.20819233357906342,
      "learning_rate": 0.00019760811044909068,
      "loss": 0.6937,
      "step": 614
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2544260621070862,
      "learning_rate": 0.00019759686010713644,
      "loss": 0.8065,
      "step": 615
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.34453725814819336,
      "learning_rate": 0.00019758558369079027,
      "loss": 0.6451,
      "step": 616
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.3149903118610382,
      "learning_rate": 0.00019757428120306474,
      "loss": 0.6027,
      "step": 617
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.3551630973815918,
      "learning_rate": 0.00019756295264697953,
      "loss": 0.9087,
      "step": 618
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.13562460243701935,
      "learning_rate": 0.0001975515980255612,
      "loss": 0.6385,
      "step": 619
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.308493971824646,
      "learning_rate": 0.0001975402173418433,
      "loss": 0.8709,
      "step": 620
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.30189716815948486,
      "learning_rate": 0.00019752881059886636,
      "loss": 0.9049,
      "step": 621
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.6003822088241577,
      "learning_rate": 0.00019751737779967785,
      "loss": 0.8668,
      "step": 622
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.31589025259017944,
      "learning_rate": 0.00019750591894733216,
      "loss": 1.0033,
      "step": 623
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.21087029576301575,
      "learning_rate": 0.00019749443404489073,
      "loss": 0.6385,
      "step": 624
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.27787595987319946,
      "learning_rate": 0.00019748292309542192,
      "loss": 0.8394,
      "step": 625
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.3085406422615051,
      "learning_rate": 0.00019747138610200105,
      "loss": 0.7467,
      "step": 626
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.28744107484817505,
      "learning_rate": 0.00019745982306771035,
      "loss": 0.9469,
      "step": 627
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.32608941197395325,
      "learning_rate": 0.00019744823399563908,
      "loss": 0.9665,
      "step": 628
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.3879786729812622,
      "learning_rate": 0.00019743661888888342,
      "loss": 0.6539,
      "step": 629
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.3207158148288727,
      "learning_rate": 0.0001974249777505465,
      "loss": 0.7487,
      "step": 630
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.6371877789497375,
      "learning_rate": 0.00019741331058373843,
      "loss": 0.7253,
      "step": 631
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.43657711148262024,
      "learning_rate": 0.00019740161739157625,
      "loss": 0.8976,
      "step": 632
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.6421412229537964,
      "learning_rate": 0.00019738989817718396,
      "loss": 0.9073,
      "step": 633
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.22149471938610077,
      "learning_rate": 0.00019737815294369252,
      "loss": 0.8223,
      "step": 634
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.22275887429714203,
      "learning_rate": 0.0001973663816942399,
      "loss": 0.9863,
      "step": 635
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.17939285933971405,
      "learning_rate": 0.00019735458443197084,
      "loss": 0.7958,
      "step": 636
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.30789220333099365,
      "learning_rate": 0.00019734276116003722,
      "loss": 0.724,
      "step": 637
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.18272221088409424,
      "learning_rate": 0.00019733091188159775,
      "loss": 0.7833,
      "step": 638
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.22234411537647247,
      "learning_rate": 0.0001973190365998182,
      "loss": 0.8893,
      "step": 639
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.27738189697265625,
      "learning_rate": 0.00019730713531787117,
      "loss": 0.6628,
      "step": 640
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.4232870936393738,
      "learning_rate": 0.00019729520803893628,
      "loss": 0.9815,
      "step": 641
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.3298378586769104,
      "learning_rate": 0.00019728325476620005,
      "loss": 0.7425,
      "step": 642
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2554842233657837,
      "learning_rate": 0.000197271275502856,
      "loss": 0.667,
      "step": 643
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.3036528527736664,
      "learning_rate": 0.00019725927025210453,
      "loss": 0.8874,
      "step": 644
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.294475793838501,
      "learning_rate": 0.00019724723901715302,
      "loss": 0.7962,
      "step": 645
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.20145389437675476,
      "learning_rate": 0.0001972351818012158,
      "loss": 0.8041,
      "step": 646
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2790697515010834,
      "learning_rate": 0.00019722309860751414,
      "loss": 0.8363,
      "step": 647
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.4564383029937744,
      "learning_rate": 0.0001972109894392762,
      "loss": 0.8934,
      "step": 648
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.17316775023937225,
      "learning_rate": 0.0001971988542997371,
      "loss": 0.5991,
      "step": 649
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.23875190317630768,
      "learning_rate": 0.00019718669319213896,
      "loss": 1.0229,
      "step": 650
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.4984225928783417,
      "learning_rate": 0.0001971745061197308,
      "loss": 0.9761,
      "step": 651
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2893241047859192,
      "learning_rate": 0.0001971622930857685,
      "loss": 0.7501,
      "step": 652
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.43513554334640503,
      "learning_rate": 0.0001971500540935151,
      "loss": 1.0151,
      "step": 653
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.18287834525108337,
      "learning_rate": 0.00019713778914624025,
      "loss": 0.8679,
      "step": 654
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.19029662013053894,
      "learning_rate": 0.00019712549824722078,
      "loss": 0.7553,
      "step": 655
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.29287001490592957,
      "learning_rate": 0.00019711318139974034,
      "loss": 0.9139,
      "step": 656
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.35108351707458496,
      "learning_rate": 0.00019710083860708966,
      "loss": 0.9842,
      "step": 657
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.4431273937225342,
      "learning_rate": 0.00019708846987256617,
      "loss": 1.0183,
      "step": 658
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.2510368824005127,
      "learning_rate": 0.0001970760751994744,
      "loss": 1.042,
      "step": 659
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.2667982578277588,
      "learning_rate": 0.00019706365459112578,
      "loss": 0.8941,
      "step": 660
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.44644761085510254,
      "learning_rate": 0.0001970512080508386,
      "loss": 0.6078,
      "step": 661
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.2782925069332123,
      "learning_rate": 0.0001970387355819382,
      "loss": 1.0445,
      "step": 662
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.432697057723999,
      "learning_rate": 0.00019702623718775673,
      "loss": 0.8746,
      "step": 663
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.3165985345840454,
      "learning_rate": 0.00019701371287163334,
      "loss": 1.0954,
      "step": 664
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.3341020345687866,
      "learning_rate": 0.0001970011626369141,
      "loss": 0.8527,
      "step": 665
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.23376207053661346,
      "learning_rate": 0.0001969885864869519,
      "loss": 0.7778,
      "step": 666
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.16384026408195496,
      "learning_rate": 0.00019697598442510673,
      "loss": 0.5716,
      "step": 667
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.198348268866539,
      "learning_rate": 0.00019696335645474534,
      "loss": 0.6335,
      "step": 668
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.4664691984653473,
      "learning_rate": 0.00019695070257924152,
      "loss": 0.9169,
      "step": 669
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.1697290539741516,
      "learning_rate": 0.0001969380228019759,
      "loss": 1.2062,
      "step": 670
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.23597468435764313,
      "learning_rate": 0.0001969253171263361,
      "loss": 0.748,
      "step": 671
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.1845516562461853,
      "learning_rate": 0.0001969125855557166,
      "loss": 0.723,
      "step": 672
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.280588835477829,
      "learning_rate": 0.0001968998280935188,
      "loss": 0.8159,
      "step": 673
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.3032468855381012,
      "learning_rate": 0.0001968870447431511,
      "loss": 1.0595,
      "step": 674
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.21268771588802338,
      "learning_rate": 0.00019687423550802867,
      "loss": 0.8959,
      "step": 675
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.296229749917984,
      "learning_rate": 0.00019686140039157373,
      "loss": 0.7606,
      "step": 676
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.29086047410964966,
      "learning_rate": 0.00019684853939721538,
      "loss": 0.9115,
      "step": 677
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.41116562485694885,
      "learning_rate": 0.0001968356525283896,
      "loss": 0.9799,
      "step": 678
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.3512682616710663,
      "learning_rate": 0.0001968227397885393,
      "loss": 0.6919,
      "step": 679
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5280768275260925,
      "learning_rate": 0.00019680980118111428,
      "loss": 1.0,
      "step": 680
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9142493009567261,
      "learning_rate": 0.0001967968367095713,
      "loss": 0.7081,
      "step": 681
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.6862466335296631,
      "learning_rate": 0.00019678384637737396,
      "loss": 0.947,
      "step": 682
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.19189248979091644,
      "learning_rate": 0.0001967708301879929,
      "loss": 0.7219,
      "step": 683
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.3244698941707611,
      "learning_rate": 0.0001967577881449055,
      "loss": 0.7564,
      "step": 684
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.14882375299930573,
      "learning_rate": 0.00019674472025159618,
      "loss": 0.5731,
      "step": 685
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.6527371406555176,
      "learning_rate": 0.0001967316265115562,
      "loss": 1.3952,
      "step": 686
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.232822448015213,
      "learning_rate": 0.00019671850692828366,
      "loss": 0.7352,
      "step": 687
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.33648520708084106,
      "learning_rate": 0.00019670536150528378,
      "loss": 0.7456,
      "step": 688
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.2611759305000305,
      "learning_rate": 0.00019669219024606846,
      "loss": 0.8323,
      "step": 689
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.25862768292427063,
      "learning_rate": 0.00019667899315415661,
      "loss": 1.0484,
      "step": 690
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.19622787833213806,
      "learning_rate": 0.00019666577023307402,
      "loss": 0.9436,
      "step": 691
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.27477800846099854,
      "learning_rate": 0.0001966525214863534,
      "loss": 1.1326,
      "step": 692
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0392472743988037,
      "learning_rate": 0.0001966392469175343,
      "loss": 1.2459,
      "step": 693
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.16916941106319427,
      "learning_rate": 0.00019662594653016324,
      "loss": 0.8615,
      "step": 694
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.622750461101532,
      "learning_rate": 0.0001966126203277936,
      "loss": 1.0336,
      "step": 695
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.332845538854599,
      "learning_rate": 0.0001965992683139857,
      "loss": 0.9851,
      "step": 696
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.30918970704078674,
      "learning_rate": 0.00019658589049230665,
      "loss": 1.038,
      "step": 697
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.443524569272995,
      "learning_rate": 0.00019657248686633056,
      "loss": 0.8648,
      "step": 698
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.2746441960334778,
      "learning_rate": 0.00019655905743963845,
      "loss": 0.6546,
      "step": 699
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.2594236731529236,
      "learning_rate": 0.00019654560221581808,
      "loss": 0.8905,
      "step": 700
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.28061559796333313,
      "learning_rate": 0.00019653212119846432,
      "loss": 0.8346,
      "step": 701
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.3070833086967468,
      "learning_rate": 0.0001965186143911787,
      "loss": 0.7041,
      "step": 702
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.41409793496131897,
      "learning_rate": 0.00019650508179756986,
      "loss": 0.7933,
      "step": 703
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.32812660932540894,
      "learning_rate": 0.00019649152342125314,
      "loss": 0.7854,
      "step": 704
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8520134091377258,
      "learning_rate": 0.0001964779392658509,
      "loss": 0.7766,
      "step": 705
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.22475507855415344,
      "learning_rate": 0.00019646432933499236,
      "loss": 0.856,
      "step": 706
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9238573908805847,
      "learning_rate": 0.00019645069363231356,
      "loss": 0.7297,
      "step": 707
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.16270437836647034,
      "learning_rate": 0.0001964370321614575,
      "loss": 0.8162,
      "step": 708
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.28890758752822876,
      "learning_rate": 0.00019642334492607402,
      "loss": 0.9496,
      "step": 709
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.4181707501411438,
      "learning_rate": 0.00019640963192981987,
      "loss": 0.8733,
      "step": 710
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5579323768615723,
      "learning_rate": 0.00019639589317635867,
      "loss": 0.7953,
      "step": 711
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.1842830330133438,
      "learning_rate": 0.0001963821286693609,
      "loss": 0.754,
      "step": 712
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.3866645395755768,
      "learning_rate": 0.000196368338412504,
      "loss": 0.9445,
      "step": 713
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.17049235105514526,
      "learning_rate": 0.00019635452240947222,
      "loss": 0.8374,
      "step": 714
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.21679052710533142,
      "learning_rate": 0.00019634068066395666,
      "loss": 0.7376,
      "step": 715
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.2520493268966675,
      "learning_rate": 0.00019632681317965534,
      "loss": 0.9977,
      "step": 716
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.541283905506134,
      "learning_rate": 0.00019631291996027322,
      "loss": 0.892,
      "step": 717
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.44707801938056946,
      "learning_rate": 0.00019629900100952204,
      "loss": 0.9542,
      "step": 718
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.3005651831626892,
      "learning_rate": 0.0001962850563311204,
      "loss": 0.7032,
      "step": 719
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.28198087215423584,
      "learning_rate": 0.00019627108592879387,
      "loss": 0.8207,
      "step": 720
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.25587958097457886,
      "learning_rate": 0.00019625708980627483,
      "loss": 1.2006,
      "step": 721
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.7685407400131226,
      "learning_rate": 0.00019624306796730255,
      "loss": 0.923,
      "step": 722
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.21695607900619507,
      "learning_rate": 0.00019622902041562315,
      "loss": 0.882,
      "step": 723
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2887914776802063,
      "learning_rate": 0.00019621494715498961,
      "loss": 0.8204,
      "step": 724
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.30578678846359253,
      "learning_rate": 0.0001962008481891619,
      "loss": 0.9269,
      "step": 725
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.42802101373672485,
      "learning_rate": 0.00019618672352190663,
      "loss": 1.1895,
      "step": 726
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5210464596748352,
      "learning_rate": 0.0001961725731569975,
      "loss": 0.9733,
      "step": 727
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.24264337122440338,
      "learning_rate": 0.00019615839709821495,
      "loss": 0.6359,
      "step": 728
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.25561484694480896,
      "learning_rate": 0.0001961441953493463,
      "loss": 0.8863,
      "step": 729
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5776144862174988,
      "learning_rate": 0.00019612996791418578,
      "loss": 0.7534,
      "step": 730
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.30542808771133423,
      "learning_rate": 0.00019611571479653445,
      "loss": 0.8944,
      "step": 731
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.31194302439689636,
      "learning_rate": 0.0001961014360002002,
      "loss": 0.7999,
      "step": 732
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.3261597752571106,
      "learning_rate": 0.00019608713152899785,
      "loss": 1.0405,
      "step": 733
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2659723162651062,
      "learning_rate": 0.00019607280138674902,
      "loss": 0.8085,
      "step": 734
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.4894388020038605,
      "learning_rate": 0.00019605844557728222,
      "loss": 0.8971,
      "step": 735
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.33349087834358215,
      "learning_rate": 0.00019604406410443282,
      "loss": 0.7623,
      "step": 736
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2983339726924896,
      "learning_rate": 0.000196029656972043,
      "loss": 0.8479,
      "step": 737
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.7887246012687683,
      "learning_rate": 0.00019601522418396188,
      "loss": 1.2056,
      "step": 738
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.239938423037529,
      "learning_rate": 0.00019600076574404534,
      "loss": 0.8251,
      "step": 739
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2408219575881958,
      "learning_rate": 0.00019598628165615618,
      "loss": 0.8103,
      "step": 740
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.28342387080192566,
      "learning_rate": 0.00019597177192416405,
      "loss": 0.8734,
      "step": 741
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2703987956047058,
      "learning_rate": 0.0001959572365519454,
      "loss": 0.9708,
      "step": 742
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.23595187067985535,
      "learning_rate": 0.00019594267554338358,
      "loss": 1.017,
      "step": 743
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.6412906646728516,
      "learning_rate": 0.00019592808890236876,
      "loss": 0.7593,
      "step": 744
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.3015212118625641,
      "learning_rate": 0.00019591347663279796,
      "loss": 0.898,
      "step": 745
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2386510968208313,
      "learning_rate": 0.00019589883873857508,
      "loss": 0.8836,
      "step": 746
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2580740451812744,
      "learning_rate": 0.00019588417522361083,
      "loss": 0.9092,
      "step": 747
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2683041989803314,
      "learning_rate": 0.0001958694860918228,
      "loss": 1.1412,
      "step": 748
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.22159399092197418,
      "learning_rate": 0.00019585477134713533,
      "loss": 0.9553,
      "step": 749
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.33559438586235046,
      "learning_rate": 0.00019584003099347975,
      "loss": 0.9839,
      "step": 750
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.32426536083221436,
      "learning_rate": 0.00019582526503479414,
      "loss": 0.9544,
      "step": 751
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.3072356879711151,
      "learning_rate": 0.0001958104734750234,
      "loss": 0.9957,
      "step": 752
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.1648864597082138,
      "learning_rate": 0.00019579565631811934,
      "loss": 0.617,
      "step": 753
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2835381329059601,
      "learning_rate": 0.00019578081356804057,
      "loss": 0.8948,
      "step": 754
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.19143809378147125,
      "learning_rate": 0.00019576594522875254,
      "loss": 0.6705,
      "step": 755
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.1943771243095398,
      "learning_rate": 0.0001957510513042275,
      "loss": 0.9597,
      "step": 756
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.26383498311042786,
      "learning_rate": 0.00019573613179844465,
      "loss": 0.7922,
      "step": 757
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.25964462757110596,
      "learning_rate": 0.0001957211867153899,
      "loss": 0.8715,
      "step": 758
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2345961332321167,
      "learning_rate": 0.00019570621605905606,
      "loss": 0.7996,
      "step": 759
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.3017592132091522,
      "learning_rate": 0.00019569121983344272,
      "loss": 0.6729,
      "step": 760
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.24985986948013306,
      "learning_rate": 0.00019567619804255638,
      "loss": 0.5101,
      "step": 761
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.18803907930850983,
      "learning_rate": 0.0001956611506904103,
      "loss": 0.8821,
      "step": 762
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.31696879863739014,
      "learning_rate": 0.0001956460777810246,
      "loss": 1.0118,
      "step": 763
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.24734877049922943,
      "learning_rate": 0.0001956309793184262,
      "loss": 0.9622,
      "step": 764
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.3079148232936859,
      "learning_rate": 0.00019561585530664891,
      "loss": 1.1113,
      "step": 765
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2714509069919586,
      "learning_rate": 0.00019560070574973332,
      "loss": 0.6825,
      "step": 766
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.1949394941329956,
      "learning_rate": 0.00019558553065172682,
      "loss": 0.7069,
      "step": 767
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.3857214152812958,
      "learning_rate": 0.0001955703300166837,
      "loss": 0.788,
      "step": 768
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.31633174419403076,
      "learning_rate": 0.00019555510384866497,
      "loss": 0.7379,
      "step": 769
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2149718701839447,
      "learning_rate": 0.00019553985215173855,
      "loss": 0.9018,
      "step": 770
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.18761509656906128,
      "learning_rate": 0.00019552457492997912,
      "loss": 0.7462,
      "step": 771
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.7492721676826477,
      "learning_rate": 0.00019550927218746827,
      "loss": 0.8096,
      "step": 772
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2799462378025055,
      "learning_rate": 0.00019549394392829429,
      "loss": 0.7501,
      "step": 773
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2780233323574066,
      "learning_rate": 0.00019547859015655236,
      "loss": 0.9254,
      "step": 774
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.18686938285827637,
      "learning_rate": 0.00019546321087634448,
      "loss": 0.9319,
      "step": 775
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.24776963889598846,
      "learning_rate": 0.0001954478060917794,
      "loss": 0.8693,
      "step": 776
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.19436436891555786,
      "learning_rate": 0.00019543237580697272,
      "loss": 0.9062,
      "step": 777
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2722555696964264,
      "learning_rate": 0.00019541692002604695,
      "loss": 0.9173,
      "step": 778
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.17914269864559174,
      "learning_rate": 0.0001954014387531312,
      "loss": 0.7698,
      "step": 779
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5123549103736877,
      "learning_rate": 0.0001953859319923616,
      "loss": 0.7869,
      "step": 780
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2640467584133148,
      "learning_rate": 0.00019537039974788103,
      "loss": 0.8652,
      "step": 781
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.3591148853302002,
      "learning_rate": 0.00019535484202383904,
      "loss": 0.6479,
      "step": 782
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1489627361297607,
      "learning_rate": 0.00019533925882439217,
      "loss": 0.6799,
      "step": 783
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.24518133699893951,
      "learning_rate": 0.00019532365015370367,
      "loss": 0.8502,
      "step": 784
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2969750165939331,
      "learning_rate": 0.00019530801601594364,
      "loss": 0.8347,
      "step": 785
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.3002925217151642,
      "learning_rate": 0.00019529235641528895,
      "loss": 0.8408,
      "step": 786
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.29589855670928955,
      "learning_rate": 0.00019527667135592328,
      "loss": 0.8135,
      "step": 787
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.3221976161003113,
      "learning_rate": 0.00019526096084203714,
      "loss": 0.9175,
      "step": 788
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.3412923812866211,
      "learning_rate": 0.00019524522487782776,
      "loss": 0.9202,
      "step": 789
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.47191300988197327,
      "learning_rate": 0.00019522946346749932,
      "loss": 0.674,
      "step": 790
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.1609720140695572,
      "learning_rate": 0.00019521367661526261,
      "loss": 0.8381,
      "step": 791
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.4755758047103882,
      "learning_rate": 0.00019519786432533538,
      "loss": 0.8649,
      "step": 792
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.3697628974914551,
      "learning_rate": 0.0001951820266019421,
      "loss": 0.9786,
      "step": 793
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.24545419216156006,
      "learning_rate": 0.000195166163449314,
      "loss": 0.836,
      "step": 794
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.3044380843639374,
      "learning_rate": 0.00019515027487168918,
      "loss": 0.8722,
      "step": 795
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2601337432861328,
      "learning_rate": 0.0001951343608733125,
      "loss": 0.8177,
      "step": 796
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6629306077957153,
      "learning_rate": 0.0001951184214584356,
      "loss": 1.1486,
      "step": 797
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.4887841045856476,
      "learning_rate": 0.0001951024566313169,
      "loss": 0.8153,
      "step": 798
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.20148508250713348,
      "learning_rate": 0.0001950864663962217,
      "loss": 0.8038,
      "step": 799
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.21694251894950867,
      "learning_rate": 0.000195070450757422,
      "loss": 0.9135,
      "step": 800
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.27514439821243286,
      "learning_rate": 0.00019505440971919656,
      "loss": 0.9625,
      "step": 801
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.26139718294143677,
      "learning_rate": 0.00019503834328583097,
      "loss": 0.7105,
      "step": 802
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.25874051451683044,
      "learning_rate": 0.00019502225146161766,
      "loss": 0.8299,
      "step": 803
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.26085996627807617,
      "learning_rate": 0.00019500613425085578,
      "loss": 0.7632,
      "step": 804
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.28978681564331055,
      "learning_rate": 0.00019498999165785123,
      "loss": 1.0095,
      "step": 805
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2858978509902954,
      "learning_rate": 0.00019497382368691675,
      "loss": 0.9647,
      "step": 806
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2679208517074585,
      "learning_rate": 0.00019495763034237186,
      "loss": 0.8522,
      "step": 807
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6977773904800415,
      "learning_rate": 0.00019494141162854285,
      "loss": 0.889,
      "step": 808
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.19119936227798462,
      "learning_rate": 0.00019492516754976278,
      "loss": 0.9753,
      "step": 809
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.3105207085609436,
      "learning_rate": 0.00019490889811037146,
      "loss": 0.7692,
      "step": 810
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.21160364151000977,
      "learning_rate": 0.00019489260331471552,
      "loss": 0.7105,
      "step": 811
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.32964128255844116,
      "learning_rate": 0.0001948762831671483,
      "loss": 0.8411,
      "step": 812
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.33347025513648987,
      "learning_rate": 0.00019485993767203005,
      "loss": 0.9861,
      "step": 813
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.41201573610305786,
      "learning_rate": 0.00019484356683372765,
      "loss": 0.8817,
      "step": 814
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.1888434737920761,
      "learning_rate": 0.00019482717065661483,
      "loss": 0.6682,
      "step": 815
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.1959923505783081,
      "learning_rate": 0.000194810749145072,
      "loss": 0.8466,
      "step": 816
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.20411331951618195,
      "learning_rate": 0.00019479430230348648,
      "loss": 0.7982,
      "step": 817
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.217714324593544,
      "learning_rate": 0.00019477783013625223,
      "loss": 0.9104,
      "step": 818
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.287471204996109,
      "learning_rate": 0.00019476133264777,
      "loss": 1.029,
      "step": 819
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.48051464557647705,
      "learning_rate": 0.0001947448098424474,
      "loss": 0.8392,
      "step": 820
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.3538849949836731,
      "learning_rate": 0.00019472826172469866,
      "loss": 0.9625,
      "step": 821
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.22774779796600342,
      "learning_rate": 0.0001947116882989449,
      "loss": 0.8054,
      "step": 822
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.3177339434623718,
      "learning_rate": 0.00019469508956961392,
      "loss": 1.0342,
      "step": 823
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.44478583335876465,
      "learning_rate": 0.00019467846554114033,
      "loss": 0.7831,
      "step": 824
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.27781563997268677,
      "learning_rate": 0.00019466181621796547,
      "loss": 0.8033,
      "step": 825
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2497037649154663,
      "learning_rate": 0.0001946451416045374,
      "loss": 0.6719,
      "step": 826
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.3760283887386322,
      "learning_rate": 0.00019462844170531105,
      "loss": 0.7898,
      "step": 827
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6879208087921143,
      "learning_rate": 0.00019461171652474798,
      "loss": 0.8411,
      "step": 828
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.19389097392559052,
      "learning_rate": 0.0001945949660673166,
      "loss": 0.797,
      "step": 829
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8208065629005432,
      "learning_rate": 0.00019457819033749202,
      "loss": 0.9908,
      "step": 830
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8188951015472412,
      "learning_rate": 0.00019456138933975607,
      "loss": 0.7043,
      "step": 831
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.3953312337398529,
      "learning_rate": 0.00019454456307859745,
      "loss": 0.8333,
      "step": 832
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2894149720668793,
      "learning_rate": 0.00019452771155851152,
      "loss": 0.8795,
      "step": 833
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2022722363471985,
      "learning_rate": 0.00019451083478400037,
      "loss": 0.8627,
      "step": 834
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2401445060968399,
      "learning_rate": 0.00019449393275957285,
      "loss": 0.7568,
      "step": 835
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.32160642743110657,
      "learning_rate": 0.00019447700548974467,
      "loss": 0.7308,
      "step": 836
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.4588199257850647,
      "learning_rate": 0.0001944600529790381,
      "loss": 0.9333,
      "step": 837
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.29307830333709717,
      "learning_rate": 0.0001944430752319823,
      "loss": 0.8038,
      "step": 838
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.14261336624622345,
      "learning_rate": 0.0001944260722531131,
      "loss": 0.9009,
      "step": 839
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.38845115900039673,
      "learning_rate": 0.00019440904404697306,
      "loss": 0.7139,
      "step": 840
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2693929374217987,
      "learning_rate": 0.00019439199061811152,
      "loss": 0.9556,
      "step": 841
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.49033308029174805,
      "learning_rate": 0.0001943749119710846,
      "loss": 0.9816,
      "step": 842
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5428810119628906,
      "learning_rate": 0.00019435780811045505,
      "loss": 0.9353,
      "step": 843
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.20457781851291656,
      "learning_rate": 0.0001943406790407924,
      "loss": 0.8736,
      "step": 844
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.266250878572464,
      "learning_rate": 0.000194323524766673,
      "loss": 0.7874,
      "step": 845
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.20878466963768005,
      "learning_rate": 0.00019430634529267978,
      "loss": 0.7165,
      "step": 846
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.2328757792711258,
      "learning_rate": 0.00019428914062340249,
      "loss": 0.7501,
      "step": 847
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.2325037270784378,
      "learning_rate": 0.00019427191076343766,
      "loss": 0.8752,
      "step": 848
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.2710583806037903,
      "learning_rate": 0.00019425465571738841,
      "loss": 0.7091,
      "step": 849
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.20924465358257294,
      "learning_rate": 0.00019423737548986475,
      "loss": 0.6896,
      "step": 850
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.348136305809021,
      "learning_rate": 0.00019422007008548325,
      "loss": 1.0549,
      "step": 851
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5531610250473022,
      "learning_rate": 0.00019420273950886743,
      "loss": 0.8164,
      "step": 852
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.26058313250541687,
      "learning_rate": 0.00019418538376464727,
      "loss": 0.8838,
      "step": 853
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.4385451674461365,
      "learning_rate": 0.00019416800285745965,
      "loss": 0.8061,
      "step": 854
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5138673186302185,
      "learning_rate": 0.00019415059679194817,
      "loss": 0.9601,
      "step": 855
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.2790364623069763,
      "learning_rate": 0.00019413316557276305,
      "loss": 0.9231,
      "step": 856
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.27886369824409485,
      "learning_rate": 0.0001941157092045613,
      "loss": 0.8135,
      "step": 857
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.3289061486721039,
      "learning_rate": 0.00019409822769200666,
      "loss": 0.7915,
      "step": 858
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6807961463928223,
      "learning_rate": 0.00019408072103976954,
      "loss": 0.7698,
      "step": 859
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.335909903049469,
      "learning_rate": 0.00019406318925252708,
      "loss": 0.8377,
      "step": 860
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5578577518463135,
      "learning_rate": 0.0001940456323349632,
      "loss": 0.6839,
      "step": 861
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.3685588240623474,
      "learning_rate": 0.00019402805029176845,
      "loss": 0.6981,
      "step": 862
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6020337343215942,
      "learning_rate": 0.00019401044312764013,
      "loss": 0.7573,
      "step": 863
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.26686668395996094,
      "learning_rate": 0.0001939928108472822,
      "loss": 0.8358,
      "step": 864
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.8579389452934265,
      "learning_rate": 0.00019397515345540546,
      "loss": 0.9274,
      "step": 865
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.23140394687652588,
      "learning_rate": 0.00019395747095672728,
      "loss": 0.8938,
      "step": 866
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.292155921459198,
      "learning_rate": 0.00019393976335597176,
      "loss": 0.8946,
      "step": 867
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.19520768523216248,
      "learning_rate": 0.00019392203065786982,
      "loss": 0.8632,
      "step": 868
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.21907122433185577,
      "learning_rate": 0.00019390427286715894,
      "loss": 0.7656,
      "step": 869
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.2973809540271759,
      "learning_rate": 0.00019388648998858342,
      "loss": 0.9873,
      "step": 870
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.38084641098976135,
      "learning_rate": 0.00019386868202689414,
      "loss": 1.0616,
      "step": 871
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.15988749265670776,
      "learning_rate": 0.00019385084898684878,
      "loss": 0.9715,
      "step": 872
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.26595744490623474,
      "learning_rate": 0.00019383299087321173,
      "loss": 0.8253,
      "step": 873
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.28502145409584045,
      "learning_rate": 0.000193815107690754,
      "loss": 0.7205,
      "step": 874
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.17905955016613007,
      "learning_rate": 0.00019379719944425335,
      "loss": 0.7411,
      "step": 875
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.2815902829170227,
      "learning_rate": 0.00019377926613849418,
      "loss": 0.7971,
      "step": 876
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.2756863534450531,
      "learning_rate": 0.0001937613077782677,
      "loss": 0.8856,
      "step": 877
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.18730495870113373,
      "learning_rate": 0.00019374332436837167,
      "loss": 0.6911,
      "step": 878
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.3976004123687744,
      "learning_rate": 0.0001937253159136107,
      "loss": 1.0122,
      "step": 879
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.35557159781455994,
      "learning_rate": 0.00019370728241879594,
      "loss": 0.903,
      "step": 880
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.33049342036247253,
      "learning_rate": 0.00019368922388874528,
      "loss": 0.9531,
      "step": 881
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.22908909618854523,
      "learning_rate": 0.00019367114032828339,
      "loss": 0.7942,
      "step": 882
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.399886816740036,
      "learning_rate": 0.0001936530317422415,
      "loss": 0.7595,
      "step": 883
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.23952382802963257,
      "learning_rate": 0.00019363489813545756,
      "loss": 0.8543,
      "step": 884
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.25874897837638855,
      "learning_rate": 0.0001936167395127763,
      "loss": 0.6456,
      "step": 885
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.49352556467056274,
      "learning_rate": 0.0001935985558790489,
      "loss": 0.835,
      "step": 886
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.25842025876045227,
      "learning_rate": 0.00019358034723913355,
      "loss": 1.013,
      "step": 887
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.29468581080436707,
      "learning_rate": 0.00019356211359789489,
      "loss": 0.7744,
      "step": 888
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.012279987335205,
      "learning_rate": 0.00019354385496020424,
      "loss": 0.8421,
      "step": 889
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.32381418347358704,
      "learning_rate": 0.00019352557133093973,
      "loss": 0.9706,
      "step": 890
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.20730027556419373,
      "learning_rate": 0.000193507262714986,
      "loss": 0.6535,
      "step": 891
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.1983049064874649,
      "learning_rate": 0.00019348892911723458,
      "loss": 0.6762,
      "step": 892
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.44346365332603455,
      "learning_rate": 0.00019347057054258345,
      "loss": 0.9347,
      "step": 893
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.21107949316501617,
      "learning_rate": 0.00019345218699593742,
      "loss": 1.0877,
      "step": 894
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5879597663879395,
      "learning_rate": 0.00019343377848220789,
      "loss": 0.8957,
      "step": 895
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.18560382723808289,
      "learning_rate": 0.00019341534500631296,
      "loss": 0.657,
      "step": 896
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.42214372754096985,
      "learning_rate": 0.00019339688657317745,
      "loss": 0.8931,
      "step": 897
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.623537540435791,
      "learning_rate": 0.00019337840318773268,
      "loss": 0.8182,
      "step": 898
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.13332770764827728,
      "learning_rate": 0.00019335989485491685,
      "loss": 0.7587,
      "step": 899
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.2539914846420288,
      "learning_rate": 0.00019334136157967468,
      "loss": 0.6419,
      "step": 900
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.28273123502731323,
      "learning_rate": 0.00019332280336695762,
      "loss": 0.7573,
      "step": 901
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.24439308047294617,
      "learning_rate": 0.00019330422022172377,
      "loss": 0.8002,
      "step": 902
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.3049640655517578,
      "learning_rate": 0.00019328561214893784,
      "loss": 0.7597,
      "step": 903
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.21107251942157745,
      "learning_rate": 0.00019326697915357123,
      "loss": 0.8509,
      "step": 904
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.4330517649650574,
      "learning_rate": 0.00019324832124060208,
      "loss": 0.7628,
      "step": 905
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6358018517494202,
      "learning_rate": 0.00019322963841501508,
      "loss": 0.9835,
      "step": 906
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.36925750970840454,
      "learning_rate": 0.00019321093068180162,
      "loss": 1.0206,
      "step": 907
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.7827594876289368,
      "learning_rate": 0.00019319219804595972,
      "loss": 0.7593,
      "step": 908
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.22128506004810333,
      "learning_rate": 0.0001931734405124941,
      "loss": 0.6088,
      "step": 909
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.664861261844635,
      "learning_rate": 0.00019315465808641605,
      "loss": 0.7026,
      "step": 910
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.22341856360435486,
      "learning_rate": 0.00019313585077274366,
      "loss": 0.8873,
      "step": 911
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.18514485657215118,
      "learning_rate": 0.00019311701857650145,
      "loss": 0.6917,
      "step": 912
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.198794886469841,
      "learning_rate": 0.0001930981615027208,
      "loss": 0.7435,
      "step": 913
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.3058580160140991,
      "learning_rate": 0.00019307927955643957,
      "loss": 0.8343,
      "step": 914
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.15016454458236694,
      "learning_rate": 0.00019306037274270245,
      "loss": 0.7289,
      "step": 915
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.38836386799812317,
      "learning_rate": 0.00019304144106656055,
      "loss": 0.9906,
      "step": 916
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5395264625549316,
      "learning_rate": 0.00019302248453307184,
      "loss": 0.7528,
      "step": 917
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.2181786745786667,
      "learning_rate": 0.0001930035031473007,
      "loss": 1.014,
      "step": 918
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.34989795088768005,
      "learning_rate": 0.00019298449691431842,
      "loss": 0.7893,
      "step": 919
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.7719791531562805,
      "learning_rate": 0.0001929654658392027,
      "loss": 0.9944,
      "step": 920
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.24061189591884613,
      "learning_rate": 0.000192946409927038,
      "loss": 0.8197,
      "step": 921
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.4062196612358093,
      "learning_rate": 0.00019292732918291532,
      "loss": 0.6822,
      "step": 922
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5636285543441772,
      "learning_rate": 0.00019290822361193243,
      "loss": 0.7711,
      "step": 923
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.21471378207206726,
      "learning_rate": 0.0001928890932191936,
      "loss": 0.7946,
      "step": 924
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.29061082005500793,
      "learning_rate": 0.00019286993800980983,
      "loss": 0.8225,
      "step": 925
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.21015271544456482,
      "learning_rate": 0.00019285075798889864,
      "loss": 0.8249,
      "step": 926
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.3022051155567169,
      "learning_rate": 0.00019283155316158435,
      "loss": 0.8864,
      "step": 927
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8421458601951599,
      "learning_rate": 0.00019281232353299767,
      "loss": 1.0071,
      "step": 928
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.2658364772796631,
      "learning_rate": 0.00019279306910827623,
      "loss": 1.0238,
      "step": 929
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5162569284439087,
      "learning_rate": 0.00019277378989256396,
      "loss": 0.8506,
      "step": 930
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6807587742805481,
      "learning_rate": 0.00019275448589101168,
      "loss": 0.7844,
      "step": 931
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.2817138433456421,
      "learning_rate": 0.00019273515710877673,
      "loss": 0.9105,
      "step": 932
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5229153037071228,
      "learning_rate": 0.000192715803551023,
      "loss": 0.8123,
      "step": 933
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.4661097228527069,
      "learning_rate": 0.00019269642522292112,
      "loss": 1.1763,
      "step": 934
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.33970582485198975,
      "learning_rate": 0.00019267702212964828,
      "loss": 1.0972,
      "step": 935
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.34213748574256897,
      "learning_rate": 0.0001926575942763883,
      "loss": 0.6876,
      "step": 936
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.28069937229156494,
      "learning_rate": 0.00019263814166833157,
      "loss": 0.7806,
      "step": 937
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.23675139248371124,
      "learning_rate": 0.00019261866431067519,
      "loss": 0.7577,
      "step": 938
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.32118964195251465,
      "learning_rate": 0.00019259916220862279,
      "loss": 0.8858,
      "step": 939
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.3090830445289612,
      "learning_rate": 0.0001925796353673846,
      "loss": 1.1024,
      "step": 940
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.16338643431663513,
      "learning_rate": 0.00019256008379217753,
      "loss": 0.7684,
      "step": 941
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.3082687556743622,
      "learning_rate": 0.00019254050748822508,
      "loss": 0.9414,
      "step": 942
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.17537552118301392,
      "learning_rate": 0.0001925209064607573,
      "loss": 0.6067,
      "step": 943
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.23660019040107727,
      "learning_rate": 0.00019250128071501087,
      "loss": 0.9472,
      "step": 944
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.18759702146053314,
      "learning_rate": 0.0001924816302562291,
      "loss": 0.7523,
      "step": 945
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5778675079345703,
      "learning_rate": 0.00019246195508966193,
      "loss": 0.7438,
      "step": 946
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.2965249717235565,
      "learning_rate": 0.00019244225522056584,
      "loss": 0.8128,
      "step": 947
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.23428566753864288,
      "learning_rate": 0.00019242253065420394,
      "loss": 0.7447,
      "step": 948
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.28626713156700134,
      "learning_rate": 0.00019240278139584591,
      "loss": 1.1012,
      "step": 949
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.30381494760513306,
      "learning_rate": 0.00019238300745076802,
      "loss": 0.8253,
      "step": 950
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8470340967178345,
      "learning_rate": 0.0001923632088242532,
      "loss": 1.0173,
      "step": 951
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.2048945426940918,
      "learning_rate": 0.00019234338552159095,
      "loss": 0.8577,
      "step": 952
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.22546835243701935,
      "learning_rate": 0.00019232353754807733,
      "loss": 0.9094,
      "step": 953
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.23830343782901764,
      "learning_rate": 0.00019230366490901498,
      "loss": 0.8518,
      "step": 954
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.272129088640213,
      "learning_rate": 0.00019228376760971317,
      "loss": 0.8772,
      "step": 955
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6877616047859192,
      "learning_rate": 0.00019226384565548777,
      "loss": 0.7665,
      "step": 956
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.26193270087242126,
      "learning_rate": 0.0001922438990516612,
      "loss": 0.831,
      "step": 957
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.351654976606369,
      "learning_rate": 0.0001922239278035625,
      "loss": 0.7577,
      "step": 958
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.16100390255451202,
      "learning_rate": 0.00019220393191652727,
      "loss": 0.8597,
      "step": 959
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.21040727198123932,
      "learning_rate": 0.00019218391139589765,
      "loss": 0.8991,
      "step": 960
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.11867040395736694,
      "learning_rate": 0.00019216386624702246,
      "loss": 0.8838,
      "step": 961
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.23232026398181915,
      "learning_rate": 0.000192143796475257,
      "loss": 0.8415,
      "step": 962
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.2609661817550659,
      "learning_rate": 0.00019212370208596325,
      "loss": 0.8011,
      "step": 963
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.19939571619033813,
      "learning_rate": 0.00019210358308450966,
      "loss": 0.7839,
      "step": 964
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.23382341861724854,
      "learning_rate": 0.00019208343947627133,
      "loss": 0.8619,
      "step": 965
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.20894773304462433,
      "learning_rate": 0.00019206327126662995,
      "loss": 1.0657,
      "step": 966
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.23454752564430237,
      "learning_rate": 0.0001920430784609737,
      "loss": 0.7949,
      "step": 967
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.38111311197280884,
      "learning_rate": 0.00019202286106469738,
      "loss": 0.6508,
      "step": 968
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.19819191098213196,
      "learning_rate": 0.00019200261908320235,
      "loss": 0.7666,
      "step": 969
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.2534272074699402,
      "learning_rate": 0.00019198235252189657,
      "loss": 0.7457,
      "step": 970
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.32723283767700195,
      "learning_rate": 0.0001919620613861945,
      "loss": 0.9535,
      "step": 971
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.20166471600532532,
      "learning_rate": 0.00019194174568151726,
      "loss": 0.915,
      "step": 972
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.3077252507209778,
      "learning_rate": 0.0001919214054132924,
      "loss": 0.8382,
      "step": 973
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.3985017240047455,
      "learning_rate": 0.00019190104058695423,
      "loss": 0.8882,
      "step": 974
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.2567432224750519,
      "learning_rate": 0.0001918806512079434,
      "loss": 0.8122,
      "step": 975
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.7353218793869019,
      "learning_rate": 0.00019186023728170727,
      "loss": 0.8273,
      "step": 976
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.2252037525177002,
      "learning_rate": 0.00019183979881369972,
      "loss": 0.9646,
      "step": 977
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.11739931255578995,
      "learning_rate": 0.00019181933580938117,
      "loss": 0.9687,
      "step": 978
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.2984813451766968,
      "learning_rate": 0.00019179884827421855,
      "loss": 0.8879,
      "step": 979
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.23513995110988617,
      "learning_rate": 0.00019177833621368545,
      "loss": 0.7333,
      "step": 980
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.14504918456077576,
      "learning_rate": 0.00019175779963326198,
      "loss": 0.8622,
      "step": 981
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.14871276915073395,
      "learning_rate": 0.0001917372385384347,
      "loss": 0.9987,
      "step": 982
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.18769913911819458,
      "learning_rate": 0.00019171665293469688,
      "loss": 0.7971,
      "step": 983
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.19840289652347565,
      "learning_rate": 0.00019169604282754822,
      "loss": 0.8059,
      "step": 984
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.2708349823951721,
      "learning_rate": 0.00019167540822249502,
      "loss": 0.7078,
      "step": 985
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.2311522364616394,
      "learning_rate": 0.00019165474912505008,
      "loss": 0.9548,
      "step": 986
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.22134897112846375,
      "learning_rate": 0.0001916340655407328,
      "loss": 0.7402,
      "step": 987
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.40411102771759033,
      "learning_rate": 0.0001916133574750691,
      "loss": 0.9001,
      "step": 988
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.2519533634185791,
      "learning_rate": 0.0001915926249335914,
      "loss": 0.8855,
      "step": 989
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.20765411853790283,
      "learning_rate": 0.00019157186792183873,
      "loss": 0.8252,
      "step": 990
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.3994400203227997,
      "learning_rate": 0.00019155108644535658,
      "loss": 1.0869,
      "step": 991
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.38394269347190857,
      "learning_rate": 0.00019153028050969704,
      "loss": 0.8548,
      "step": 992
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.6710423827171326,
      "learning_rate": 0.00019150945012041875,
      "loss": 0.703,
      "step": 993
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5963103771209717,
      "learning_rate": 0.00019148859528308681,
      "loss": 1.0438,
      "step": 994
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.3468247950077057,
      "learning_rate": 0.00019146771600327288,
      "loss": 0.8274,
      "step": 995
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.23043733835220337,
      "learning_rate": 0.00019144681228655514,
      "loss": 0.7067,
      "step": 996
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.22569577395915985,
      "learning_rate": 0.00019142588413851837,
      "loss": 0.651,
      "step": 997
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.2809253931045532,
      "learning_rate": 0.00019140493156475378,
      "loss": 0.712,
      "step": 998
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.1875516027212143,
      "learning_rate": 0.00019138395457085915,
      "loss": 0.945,
      "step": 999
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.21867170929908752,
      "learning_rate": 0.00019136295316243883,
      "loss": 0.7883,
      "step": 1000
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.1538011133670807,
      "learning_rate": 0.00019134192734510358,
      "loss": 0.7364,
      "step": 1001
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.2741049826145172,
      "learning_rate": 0.00019132087712447082,
      "loss": 0.7919,
      "step": 1002
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.32010617852211,
      "learning_rate": 0.00019129980250616433,
      "loss": 0.8062,
      "step": 1003
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.21860773861408234,
      "learning_rate": 0.00019127870349581455,
      "loss": 0.737,
      "step": 1004
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.26266205310821533,
      "learning_rate": 0.00019125758009905838,
      "loss": 0.8282,
      "step": 1005
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.26649531722068787,
      "learning_rate": 0.0001912364323215392,
      "loss": 1.0913,
      "step": 1006
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.2591760754585266,
      "learning_rate": 0.000191215260168907,
      "loss": 0.6572,
      "step": 1007
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.2854015827178955,
      "learning_rate": 0.00019119406364681818,
      "loss": 0.6728,
      "step": 1008
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.18169474601745605,
      "learning_rate": 0.0001911728427609357,
      "loss": 0.9388,
      "step": 1009
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.20006553828716278,
      "learning_rate": 0.00019115159751692903,
      "loss": 0.7061,
      "step": 1010
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.31902146339416504,
      "learning_rate": 0.0001911303279204741,
      "loss": 0.95,
      "step": 1011
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.255718857049942,
      "learning_rate": 0.00019110903397725343,
      "loss": 0.9578,
      "step": 1012
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.3744613826274872,
      "learning_rate": 0.00019108771569295604,
      "loss": 0.9626,
      "step": 1013
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.29579198360443115,
      "learning_rate": 0.0001910663730732773,
      "loss": 0.7014,
      "step": 1014
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.3531471788883209,
      "learning_rate": 0.00019104500612391928,
      "loss": 0.9051,
      "step": 1015
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.14332455396652222,
      "learning_rate": 0.00019102361485059043,
      "loss": 1.0741,
      "step": 1016
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.1680322289466858,
      "learning_rate": 0.00019100219925900576,
      "loss": 0.8135,
      "step": 1017
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.3054114282131195,
      "learning_rate": 0.00019098075935488677,
      "loss": 0.7547,
      "step": 1018
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.20826712250709534,
      "learning_rate": 0.0001909592951439614,
      "loss": 0.8084,
      "step": 1019
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.21223577857017517,
      "learning_rate": 0.00019093780663196407,
      "loss": 0.9322,
      "step": 1020
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.23022077977657318,
      "learning_rate": 0.00019091629382463583,
      "loss": 0.7525,
      "step": 1021
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.6217515468597412,
      "learning_rate": 0.00019089475672772407,
      "loss": 0.8379,
      "step": 1022
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.4647139012813568,
      "learning_rate": 0.0001908731953469828,
      "loss": 0.8082,
      "step": 1023
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.24199485778808594,
      "learning_rate": 0.00019085160968817236,
      "loss": 0.8924,
      "step": 1024
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.4484071731567383,
      "learning_rate": 0.00019082999975705977,
      "loss": 0.7477,
      "step": 1025
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.19897854328155518,
      "learning_rate": 0.00019080836555941834,
      "loss": 0.8584,
      "step": 1026
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.27579039335250854,
      "learning_rate": 0.000190786707101028,
      "loss": 1.0717,
      "step": 1027
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.24339130520820618,
      "learning_rate": 0.00019076502438767511,
      "loss": 0.7919,
      "step": 1028
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.4454144537448883,
      "learning_rate": 0.00019074331742515254,
      "loss": 0.7881,
      "step": 1029
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.2995893955230713,
      "learning_rate": 0.00019072158621925956,
      "loss": 0.8875,
      "step": 1030
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.20693597197532654,
      "learning_rate": 0.00019069983077580203,
      "loss": 0.9256,
      "step": 1031
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.27615296840667725,
      "learning_rate": 0.00019067805110059216,
      "loss": 0.8717,
      "step": 1032
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.44306960701942444,
      "learning_rate": 0.00019065624719944875,
      "loss": 0.6323,
      "step": 1033
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.16065919399261475,
      "learning_rate": 0.00019063441907819702,
      "loss": 0.6836,
      "step": 1034
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0550715923309326,
      "learning_rate": 0.00019061256674266863,
      "loss": 0.8406,
      "step": 1035
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2484932690858841,
      "learning_rate": 0.00019059069019870177,
      "loss": 0.8368,
      "step": 1036
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.173093780875206,
      "learning_rate": 0.00019056878945214106,
      "loss": 0.7585,
      "step": 1037
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.23025664687156677,
      "learning_rate": 0.00019054686450883763,
      "loss": 1.0376,
      "step": 1038
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.18077290058135986,
      "learning_rate": 0.00019052491537464904,
      "loss": 0.8303,
      "step": 1039
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2596956789493561,
      "learning_rate": 0.00019050294205543922,
      "loss": 0.8238,
      "step": 1040
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.3479515314102173,
      "learning_rate": 0.00019048094455707877,
      "loss": 0.8372,
      "step": 1041
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.35513171553611755,
      "learning_rate": 0.00019045892288544456,
      "loss": 0.9938,
      "step": 1042
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.3165166974067688,
      "learning_rate": 0.00019043687704642004,
      "loss": 0.7412,
      "step": 1043
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2651920020580292,
      "learning_rate": 0.00019041480704589506,
      "loss": 0.8284,
      "step": 1044
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.3876506984233856,
      "learning_rate": 0.00019039271288976589,
      "loss": 0.9918,
      "step": 1045
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.3763922154903412,
      "learning_rate": 0.00019037059458393535,
      "loss": 0.7589,
      "step": 1046
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.39976516366004944,
      "learning_rate": 0.00019034845213431267,
      "loss": 0.8273,
      "step": 1047
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.4543869197368622,
      "learning_rate": 0.00019032628554681348,
      "loss": 0.6831,
      "step": 1048
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2223670929670334,
      "learning_rate": 0.00019030409482735994,
      "loss": 0.9307,
      "step": 1049
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.28508689999580383,
      "learning_rate": 0.0001902818799818806,
      "loss": 0.5968,
      "step": 1050
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.26035186648368835,
      "learning_rate": 0.00019025964101631042,
      "loss": 0.884,
      "step": 1051
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5029306411743164,
      "learning_rate": 0.0001902373779365909,
      "loss": 0.7385,
      "step": 1052
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.7093461155891418,
      "learning_rate": 0.00019021509074866998,
      "loss": 0.6646,
      "step": 1053
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.19238990545272827,
      "learning_rate": 0.00019019277945850197,
      "loss": 0.9171,
      "step": 1054
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.20672662556171417,
      "learning_rate": 0.00019017044407204764,
      "loss": 0.7484,
      "step": 1055
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.4247112572193146,
      "learning_rate": 0.00019014808459527415,
      "loss": 1.0303,
      "step": 1056
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.1856595128774643,
      "learning_rate": 0.0001901257010341553,
      "loss": 0.7335,
      "step": 1057
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.39102309942245483,
      "learning_rate": 0.000190103293394671,
      "loss": 0.8088,
      "step": 1058
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.21595853567123413,
      "learning_rate": 0.0001900808616828079,
      "loss": 0.8375,
      "step": 1059
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.24174493551254272,
      "learning_rate": 0.00019005840590455894,
      "loss": 1.0698,
      "step": 1060
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.4919494688510895,
      "learning_rate": 0.00019003592606592343,
      "loss": 0.5647,
      "step": 1061
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2154160887002945,
      "learning_rate": 0.0001900134221729072,
      "loss": 0.8686,
      "step": 1062
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2681412398815155,
      "learning_rate": 0.00018999089423152256,
      "loss": 0.7957,
      "step": 1063
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.298284649848938,
      "learning_rate": 0.00018996834224778807,
      "loss": 0.778,
      "step": 1064
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.3402464985847473,
      "learning_rate": 0.0001899457662277289,
      "loss": 1.1693,
      "step": 1065
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.248704731464386,
      "learning_rate": 0.0001899231661773765,
      "loss": 0.8189,
      "step": 1066
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.35822662711143494,
      "learning_rate": 0.00018990054210276882,
      "loss": 1.0108,
      "step": 1067
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.18482597172260284,
      "learning_rate": 0.0001898778940099502,
      "loss": 0.5381,
      "step": 1068
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.23591448366641998,
      "learning_rate": 0.00018985522190497137,
      "loss": 0.7369,
      "step": 1069
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.13761313259601593,
      "learning_rate": 0.00018983252579388954,
      "loss": 1.038,
      "step": 1070
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.15450407564640045,
      "learning_rate": 0.00018980980568276832,
      "loss": 0.7229,
      "step": 1071
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.3404417932033539,
      "learning_rate": 0.00018978706157767765,
      "loss": 0.6621,
      "step": 1072
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.4018527567386627,
      "learning_rate": 0.00018976429348469397,
      "loss": 0.8191,
      "step": 1073
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.24961969256401062,
      "learning_rate": 0.00018974150140990012,
      "loss": 0.9133,
      "step": 1074
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2606128752231598,
      "learning_rate": 0.0001897186853593853,
      "loss": 0.795,
      "step": 1075
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.535144567489624,
      "learning_rate": 0.00018969584533924517,
      "loss": 0.9594,
      "step": 1076
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2101634442806244,
      "learning_rate": 0.00018967298135558175,
      "loss": 0.6919,
      "step": 1077
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.3233322501182556,
      "learning_rate": 0.0001896500934145035,
      "loss": 1.0402,
      "step": 1078
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.1752619743347168,
      "learning_rate": 0.00018962718152212523,
      "loss": 0.751,
      "step": 1079
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2598724067211151,
      "learning_rate": 0.00018960424568456819,
      "loss": 0.7841,
      "step": 1080
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2573759853839874,
      "learning_rate": 0.00018958128590796,
      "loss": 0.8741,
      "step": 1081
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.14989545941352844,
      "learning_rate": 0.00018955830219843472,
      "loss": 0.8096,
      "step": 1082
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.28993165493011475,
      "learning_rate": 0.00018953529456213278,
      "loss": 0.7086,
      "step": 1083
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.17926256358623505,
      "learning_rate": 0.00018951226300520095,
      "loss": 0.9499,
      "step": 1084
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2273615002632141,
      "learning_rate": 0.0001894892075337925,
      "loss": 0.8481,
      "step": 1085
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.18489021062850952,
      "learning_rate": 0.00018946612815406698,
      "loss": 0.8782,
      "step": 1086
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.29478004574775696,
      "learning_rate": 0.00018944302487219038,
      "loss": 0.8433,
      "step": 1087
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.22429277002811432,
      "learning_rate": 0.0001894198976943351,
      "loss": 0.789,
      "step": 1088
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2684773802757263,
      "learning_rate": 0.00018939674662667985,
      "loss": 0.9234,
      "step": 1089
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.21232715249061584,
      "learning_rate": 0.00018937357167540984,
      "loss": 0.7034,
      "step": 1090
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.22438961267471313,
      "learning_rate": 0.00018935037284671652,
      "loss": 0.8416,
      "step": 1091
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.3543095886707306,
      "learning_rate": 0.00018932715014679784,
      "loss": 0.6806,
      "step": 1092
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.31099840998649597,
      "learning_rate": 0.000189303903581858,
      "loss": 1.0885,
      "step": 1093
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.11157713085412979,
      "learning_rate": 0.0001892806331581077,
      "loss": 0.8145,
      "step": 1094
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.7377251982688904,
      "learning_rate": 0.000189257338881764,
      "loss": 0.8334,
      "step": 1095
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2041141241788864,
      "learning_rate": 0.00018923402075905025,
      "loss": 0.6849,
      "step": 1096
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.1736309975385666,
      "learning_rate": 0.00018921067879619624,
      "loss": 0.7828,
      "step": 1097
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.28412100672721863,
      "learning_rate": 0.00018918731299943808,
      "loss": 0.763,
      "step": 1098
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.3564109802246094,
      "learning_rate": 0.0001891639233750183,
      "loss": 0.96,
      "step": 1099
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5255376100540161,
      "learning_rate": 0.00018914050992918576,
      "loss": 0.864,
      "step": 1100
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.3375678062438965,
      "learning_rate": 0.00018911707266819572,
      "loss": 1.0047,
      "step": 1101
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2355327010154724,
      "learning_rate": 0.00018909361159830974,
      "loss": 0.7834,
      "step": 1102
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0795366764068604,
      "learning_rate": 0.00018907012672579582,
      "loss": 0.8112,
      "step": 1103
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.3689994513988495,
      "learning_rate": 0.00018904661805692826,
      "loss": 0.7688,
      "step": 1104
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.33006709814071655,
      "learning_rate": 0.0001890230855979877,
      "loss": 0.8764,
      "step": 1105
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2522560656070709,
      "learning_rate": 0.00018899952935526123,
      "loss": 0.7764,
      "step": 1106
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.4814196825027466,
      "learning_rate": 0.00018897594933504222,
      "loss": 0.7906,
      "step": 1107
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.34535813331604004,
      "learning_rate": 0.00018895234554363035,
      "loss": 0.8592,
      "step": 1108
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.246543750166893,
      "learning_rate": 0.0001889287179873318,
      "loss": 0.7306,
      "step": 1109
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2273987978696823,
      "learning_rate": 0.00018890506667245896,
      "loss": 0.7954,
      "step": 1110
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.406931608915329,
      "learning_rate": 0.00018888139160533064,
      "loss": 1.0396,
      "step": 1111
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.228530615568161,
      "learning_rate": 0.0001888576927922719,
      "loss": 0.8793,
      "step": 1112
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.1820293664932251,
      "learning_rate": 0.00018883397023961433,
      "loss": 0.975,
      "step": 1113
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.4519634246826172,
      "learning_rate": 0.00018881022395369565,
      "loss": 0.8677,
      "step": 1114
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2594951391220093,
      "learning_rate": 0.00018878645394086009,
      "loss": 0.9982,
      "step": 1115
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.23033367097377777,
      "learning_rate": 0.00018876266020745807,
      "loss": 0.7171,
      "step": 1116
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2581356465816498,
      "learning_rate": 0.0001887388427598465,
      "loss": 0.847,
      "step": 1117
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2031250149011612,
      "learning_rate": 0.0001887150016043885,
      "loss": 0.8277,
      "step": 1118
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.3103141188621521,
      "learning_rate": 0.0001886911367474536,
      "loss": 0.7276,
      "step": 1119
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.1729084998369217,
      "learning_rate": 0.00018866724819541764,
      "loss": 0.8189,
      "step": 1120
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.3196006417274475,
      "learning_rate": 0.0001886433359546628,
      "loss": 0.9325,
      "step": 1121
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.4802674353122711,
      "learning_rate": 0.00018861940003157753,
      "loss": 0.81,
      "step": 1122
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.46137452125549316,
      "learning_rate": 0.00018859544043255667,
      "loss": 0.766,
      "step": 1123
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.20410282909870148,
      "learning_rate": 0.00018857145716400138,
      "loss": 0.8344,
      "step": 1124
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.25158706307411194,
      "learning_rate": 0.00018854745023231913,
      "loss": 0.7579,
      "step": 1125
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.313598096370697,
      "learning_rate": 0.0001885234196439237,
      "loss": 0.9063,
      "step": 1126
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.33015546202659607,
      "learning_rate": 0.00018849936540523522,
      "loss": 0.9395,
      "step": 1127
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.15990813076496124,
      "learning_rate": 0.0001884752875226801,
      "loss": 1.0875,
      "step": 1128
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.22864307463169098,
      "learning_rate": 0.00018845118600269113,
      "loss": 1.015,
      "step": 1129
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.24785065650939941,
      "learning_rate": 0.00018842706085170737,
      "loss": 0.8947,
      "step": 1130
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.3726942241191864,
      "learning_rate": 0.00018840291207617417,
      "loss": 0.8086,
      "step": 1131
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.3088562786579132,
      "learning_rate": 0.0001883787396825432,
      "loss": 0.709,
      "step": 1132
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.33833760023117065,
      "learning_rate": 0.0001883545436772725,
      "loss": 0.7494,
      "step": 1133
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.3669144809246063,
      "learning_rate": 0.0001883303240668264,
      "loss": 0.8907,
      "step": 1134
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5386047959327698,
      "learning_rate": 0.00018830608085767544,
      "loss": 0.9667,
      "step": 1135
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.25248920917510986,
      "learning_rate": 0.00018828181405629657,
      "loss": 0.7404,
      "step": 1136
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.20727990567684174,
      "learning_rate": 0.00018825752366917304,
      "loss": 1.0218,
      "step": 1137
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.25378039479255676,
      "learning_rate": 0.00018823320970279435,
      "loss": 0.7046,
      "step": 1138
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5099999308586121,
      "learning_rate": 0.00018820887216365636,
      "loss": 0.8539,
      "step": 1139
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5168853998184204,
      "learning_rate": 0.0001881845110582611,
      "loss": 0.8954,
      "step": 1140
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.4527966380119324,
      "learning_rate": 0.0001881601263931171,
      "loss": 0.7951,
      "step": 1141
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.22066934406757355,
      "learning_rate": 0.00018813571817473897,
      "loss": 0.7302,
      "step": 1142
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.23836524784564972,
      "learning_rate": 0.00018811128640964776,
      "loss": 0.9513,
      "step": 1143
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.39327695965766907,
      "learning_rate": 0.00018808683110437078,
      "loss": 1.05,
      "step": 1144
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.14937368035316467,
      "learning_rate": 0.00018806235226544158,
      "loss": 0.9068,
      "step": 1145
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.11650221794843674,
      "learning_rate": 0.00018803784989940007,
      "loss": 0.6454,
      "step": 1146
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2836046516895294,
      "learning_rate": 0.00018801332401279238,
      "loss": 0.7975,
      "step": 1147
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.1992424726486206,
      "learning_rate": 0.0001879887746121709,
      "loss": 0.8733,
      "step": 1148
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.19758780300617218,
      "learning_rate": 0.00018796420170409444,
      "loss": 0.8587,
      "step": 1149
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.22536468505859375,
      "learning_rate": 0.00018793960529512796,
      "loss": 0.6749,
      "step": 1150
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.24685421586036682,
      "learning_rate": 0.0001879149853918428,
      "loss": 0.7273,
      "step": 1151
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2080535888671875,
      "learning_rate": 0.0001878903420008164,
      "loss": 0.8009,
      "step": 1152
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.38759148120880127,
      "learning_rate": 0.0001878656751286327,
      "loss": 0.999,
      "step": 1153
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.21051958203315735,
      "learning_rate": 0.00018784098478188174,
      "loss": 0.684,
      "step": 1154
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.327419638633728,
      "learning_rate": 0.0001878162709671599,
      "loss": 0.8893,
      "step": 1155
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2629655599594116,
      "learning_rate": 0.0001877915336910699,
      "loss": 0.7581,
      "step": 1156
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.1923925280570984,
      "learning_rate": 0.0001877667729602206,
      "loss": 0.9014,
      "step": 1157
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.26210466027259827,
      "learning_rate": 0.00018774198878122715,
      "loss": 0.7855,
      "step": 1158
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5602946877479553,
      "learning_rate": 0.00018771718116071106,
      "loss": 0.8284,
      "step": 1159
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.21428391337394714,
      "learning_rate": 0.00018769235010530002,
      "loss": 0.7908,
      "step": 1160
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.21038730442523956,
      "learning_rate": 0.000187667495621628,
      "loss": 0.7437,
      "step": 1161
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.2694574296474457,
      "learning_rate": 0.0001876426177163352,
      "loss": 0.6402,
      "step": 1162
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.25531479716300964,
      "learning_rate": 0.00018761771639606818,
      "loss": 0.8402,
      "step": 1163
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.22102557122707367,
      "learning_rate": 0.00018759279166747958,
      "loss": 0.7065,
      "step": 1164
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.3082064092159271,
      "learning_rate": 0.00018756784353722846,
      "loss": 0.862,
      "step": 1165
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.8016282320022583,
      "learning_rate": 0.0001875428720119801,
      "loss": 0.6733,
      "step": 1166
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.7817736268043518,
      "learning_rate": 0.00018751787709840595,
      "loss": 0.8391,
      "step": 1167
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.23919065296649933,
      "learning_rate": 0.00018749285880318372,
      "loss": 0.8,
      "step": 1168
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.17521372437477112,
      "learning_rate": 0.00018746781713299747,
      "loss": 0.8903,
      "step": 1169
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.42719289660453796,
      "learning_rate": 0.00018744275209453743,
      "loss": 0.9432,
      "step": 1170
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.30626386404037476,
      "learning_rate": 0.00018741766369450007,
      "loss": 0.743,
      "step": 1171
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.2774352431297302,
      "learning_rate": 0.0001873925519395881,
      "loss": 0.9018,
      "step": 1172
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5977029204368591,
      "learning_rate": 0.00018736741683651048,
      "loss": 0.8012,
      "step": 1173
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.22164009511470795,
      "learning_rate": 0.00018734225839198246,
      "loss": 0.7889,
      "step": 1174
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.25048279762268066,
      "learning_rate": 0.00018731707661272546,
      "loss": 1.0803,
      "step": 1175
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.2292318344116211,
      "learning_rate": 0.0001872918715054671,
      "loss": 0.8962,
      "step": 1176
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.2251380980014801,
      "learning_rate": 0.00018726664307694134,
      "loss": 0.66,
      "step": 1177
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.1499244123697281,
      "learning_rate": 0.00018724139133388833,
      "loss": 0.9603,
      "step": 1178
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.1870645135641098,
      "learning_rate": 0.00018721611628305434,
      "loss": 0.6546,
      "step": 1179
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.20187652111053467,
      "learning_rate": 0.00018719081793119204,
      "loss": 0.565,
      "step": 1180
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.24145860970020294,
      "learning_rate": 0.00018716549628506018,
      "loss": 0.6302,
      "step": 1181
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.21748077869415283,
      "learning_rate": 0.0001871401513514239,
      "loss": 0.8558,
      "step": 1182
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.30236274003982544,
      "learning_rate": 0.0001871147831370544,
      "loss": 0.6389,
      "step": 1183
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.15527497231960297,
      "learning_rate": 0.0001870893916487291,
      "loss": 0.6485,
      "step": 1184
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.3679155707359314,
      "learning_rate": 0.00018706397689323182,
      "loss": 0.8004,
      "step": 1185
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.2061629742383957,
      "learning_rate": 0.0001870385388773524,
      "loss": 0.7831,
      "step": 1186
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.35365793108940125,
      "learning_rate": 0.00018701307760788697,
      "loss": 0.9043,
      "step": 1187
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.30352234840393066,
      "learning_rate": 0.00018698759309163793,
      "loss": 1.041,
      "step": 1188
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.25917765498161316,
      "learning_rate": 0.00018696208533541372,
      "loss": 0.7814,
      "step": 1189
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.20076321065425873,
      "learning_rate": 0.0001869365543460292,
      "loss": 0.8422,
      "step": 1190
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.3489948511123657,
      "learning_rate": 0.0001869110001303053,
      "loss": 0.9162,
      "step": 1191
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.2501094341278076,
      "learning_rate": 0.00018688542269506916,
      "loss": 0.8636,
      "step": 1192
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.3312455713748932,
      "learning_rate": 0.00018685982204715425,
      "loss": 0.8327,
      "step": 1193
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.3867807686328888,
      "learning_rate": 0.00018683419819340004,
      "loss": 0.6535,
      "step": 1194
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.47496938705444336,
      "learning_rate": 0.00018680855114065235,
      "loss": 0.9398,
      "step": 1195
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.22182364761829376,
      "learning_rate": 0.00018678288089576324,
      "loss": 1.0212,
      "step": 1196
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.2567947208881378,
      "learning_rate": 0.00018675718746559073,
      "loss": 0.9915,
      "step": 1197
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.1721343845129013,
      "learning_rate": 0.0001867314708569993,
      "loss": 0.8092,
      "step": 1198
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.2532687187194824,
      "learning_rate": 0.00018670573107685946,
      "loss": 0.6685,
      "step": 1199
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.55368572473526,
      "learning_rate": 0.00018667996813204795,
      "loss": 0.9189,
      "step": 1200
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.21356233954429626,
      "learning_rate": 0.00018665418202944777,
      "loss": 0.5717,
      "step": 1201
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.30151352286338806,
      "learning_rate": 0.00018662837277594796,
      "loss": 1.0336,
      "step": 1202
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.1935695856809616,
      "learning_rate": 0.00018660254037844388,
      "loss": 0.8629,
      "step": 1203
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6064620018005371,
      "learning_rate": 0.000186576684843837,
      "loss": 0.736,
      "step": 1204
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.29021430015563965,
      "learning_rate": 0.00018655080617903505,
      "loss": 0.8756,
      "step": 1205
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.23779910802841187,
      "learning_rate": 0.0001865249043909518,
      "loss": 0.9419,
      "step": 1206
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.32474130392074585,
      "learning_rate": 0.00018649897948650734,
      "loss": 0.7939,
      "step": 1207
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6059696674346924,
      "learning_rate": 0.00018647303147262788,
      "loss": 0.8177,
      "step": 1208
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.2602784037590027,
      "learning_rate": 0.00018644706035624578,
      "loss": 0.7045,
      "step": 1209
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.663666844367981,
      "learning_rate": 0.00018642106614429957,
      "loss": 0.911,
      "step": 1210
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.20528872311115265,
      "learning_rate": 0.00018639504884373402,
      "loss": 0.8361,
      "step": 1211
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.20827889442443848,
      "learning_rate": 0.0001863690084615,
      "loss": 0.7377,
      "step": 1212
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.3677816390991211,
      "learning_rate": 0.00018634294500455457,
      "loss": 0.6222,
      "step": 1213
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.26547887921333313,
      "learning_rate": 0.00018631685847986097,
      "loss": 0.9535,
      "step": 1214
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6669391393661499,
      "learning_rate": 0.00018629074889438857,
      "loss": 0.7971,
      "step": 1215
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.2662108242511749,
      "learning_rate": 0.00018626461625511294,
      "loss": 0.8608,
      "step": 1216
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.3008791208267212,
      "learning_rate": 0.00018623846056901573,
      "loss": 0.7155,
      "step": 1217
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5115108489990234,
      "learning_rate": 0.00018621228184308487,
      "loss": 0.8209,
      "step": 1218
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.16667547821998596,
      "learning_rate": 0.00018618608008431434,
      "loss": 0.7058,
      "step": 1219
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.38122615218162537,
      "learning_rate": 0.00018615985529970433,
      "loss": 0.917,
      "step": 1220
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.8017314076423645,
      "learning_rate": 0.00018613360749626117,
      "loss": 0.6635,
      "step": 1221
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.395334929227829,
      "learning_rate": 0.00018610733668099732,
      "loss": 0.5073,
      "step": 1222
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.22808025777339935,
      "learning_rate": 0.0001860810428609314,
      "loss": 1.0783,
      "step": 1223
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.2738450765609741,
      "learning_rate": 0.00018605472604308818,
      "loss": 0.9905,
      "step": 1224
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.27419793605804443,
      "learning_rate": 0.0001860283862344986,
      "loss": 0.7313,
      "step": 1225
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.13464461266994476,
      "learning_rate": 0.0001860020234421997,
      "loss": 0.6269,
      "step": 1226
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9208712577819824,
      "learning_rate": 0.00018597563767323467,
      "loss": 1.1998,
      "step": 1227
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.26451417803764343,
      "learning_rate": 0.00018594922893465284,
      "loss": 0.918,
      "step": 1228
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.7765925526618958,
      "learning_rate": 0.00018592279723350966,
      "loss": 1.1744,
      "step": 1229
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.6769363880157471,
      "learning_rate": 0.00018589634257686681,
      "loss": 0.9272,
      "step": 1230
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.2322641760110855,
      "learning_rate": 0.00018586986497179196,
      "loss": 0.9449,
      "step": 1231
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.2243955433368683,
      "learning_rate": 0.000185843364425359,
      "loss": 0.8718,
      "step": 1232
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.40436694025993347,
      "learning_rate": 0.0001858168409446479,
      "loss": 1.0949,
      "step": 1233
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.29602012038230896,
      "learning_rate": 0.00018579029453674487,
      "loss": 0.8806,
      "step": 1234
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.4474785625934601,
      "learning_rate": 0.00018576372520874205,
      "loss": 0.9423,
      "step": 1235
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.1821964979171753,
      "learning_rate": 0.0001857371329677379,
      "loss": 0.6899,
      "step": 1236
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.22496560215950012,
      "learning_rate": 0.0001857105178208369,
      "loss": 0.8844,
      "step": 1237
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.36220496892929077,
      "learning_rate": 0.00018568387977514964,
      "loss": 0.9544,
      "step": 1238
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.26034975051879883,
      "learning_rate": 0.00018565721883779285,
      "loss": 0.7088,
      "step": 1239
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.277192085981369,
      "learning_rate": 0.0001856305350158894,
      "loss": 0.646,
      "step": 1240
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.3131934404373169,
      "learning_rate": 0.00018560382831656823,
      "loss": 0.9111,
      "step": 1241
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.280513733625412,
      "learning_rate": 0.00018557709874696446,
      "loss": 0.6319,
      "step": 1242
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.42607536911964417,
      "learning_rate": 0.00018555034631421923,
      "loss": 0.9845,
      "step": 1243
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.27389755845069885,
      "learning_rate": 0.0001855235710254799,
      "loss": 0.7945,
      "step": 1244
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.2901134192943573,
      "learning_rate": 0.00018549677288789979,
      "loss": 0.7453,
      "step": 1245
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.27094566822052,
      "learning_rate": 0.00018546995190863843,
      "loss": 0.9585,
      "step": 1246
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.20099607110023499,
      "learning_rate": 0.00018544310809486144,
      "loss": 0.9362,
      "step": 1247
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.22246307134628296,
      "learning_rate": 0.0001854162414537405,
      "loss": 1.0875,
      "step": 1248
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.32179200649261475,
      "learning_rate": 0.0001853893519924535,
      "loss": 1.085,
      "step": 1249
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.24386368691921234,
      "learning_rate": 0.0001853624397181842,
      "loss": 0.9447,
      "step": 1250
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.19231253862380981,
      "learning_rate": 0.00018533550463812273,
      "loss": 0.7845,
      "step": 1251
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.28372621536254883,
      "learning_rate": 0.00018530854675946512,
      "loss": 0.8776,
      "step": 1252
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.35820212960243225,
      "learning_rate": 0.00018528156608941355,
      "loss": 0.59,
      "step": 1253
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.3504108488559723,
      "learning_rate": 0.00018525456263517628,
      "loss": 0.7328,
      "step": 1254
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.397708535194397,
      "learning_rate": 0.00018522753640396773,
      "loss": 0.7582,
      "step": 1255
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.2075684368610382,
      "learning_rate": 0.00018520048740300826,
      "loss": 0.7796,
      "step": 1256
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.3807012438774109,
      "learning_rate": 0.0001851734156395245,
      "loss": 0.5311,
      "step": 1257
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.15833884477615356,
      "learning_rate": 0.00018514632112074893,
      "loss": 0.8073,
      "step": 1258
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.3195546865463257,
      "learning_rate": 0.00018511920385392032,
      "loss": 0.9423,
      "step": 1259
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.1675804704427719,
      "learning_rate": 0.0001850920638462834,
      "loss": 0.7088,
      "step": 1260
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.2965960204601288,
      "learning_rate": 0.00018506490110508904,
      "loss": 1.1227,
      "step": 1261
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5766452550888062,
      "learning_rate": 0.00018503771563759414,
      "loss": 0.7545,
      "step": 1262
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.43841901421546936,
      "learning_rate": 0.00018501050745106169,
      "loss": 0.7801,
      "step": 1263
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.21959801018238068,
      "learning_rate": 0.00018498327655276073,
      "loss": 0.8556,
      "step": 1264
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.23792685568332672,
      "learning_rate": 0.00018495602294996637,
      "loss": 0.7981,
      "step": 1265
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.30011269450187683,
      "learning_rate": 0.00018492874664995985,
      "loss": 0.9105,
      "step": 1266
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.26169201731681824,
      "learning_rate": 0.00018490144766002836,
      "loss": 0.7543,
      "step": 1267
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.2623223066329956,
      "learning_rate": 0.00018487412598746527,
      "loss": 1.0779,
      "step": 1268
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.12741823494434357,
      "learning_rate": 0.00018484678163956995,
      "loss": 0.8861,
      "step": 1269
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.3696349263191223,
      "learning_rate": 0.00018481941462364775,
      "loss": 0.9993,
      "step": 1270
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.19559913873672485,
      "learning_rate": 0.00018479202494701028,
      "loss": 0.6336,
      "step": 1271
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.20094084739685059,
      "learning_rate": 0.000184764612616975,
      "loss": 0.6728,
      "step": 1272
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.22532349824905396,
      "learning_rate": 0.0001847371776408655,
      "loss": 0.7423,
      "step": 1273
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.3638726472854614,
      "learning_rate": 0.00018470972002601147,
      "loss": 0.8742,
      "step": 1274
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.23848213255405426,
      "learning_rate": 0.0001846822397797486,
      "loss": 0.8858,
      "step": 1275
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.23806573450565338,
      "learning_rate": 0.0001846547369094186,
      "loss": 0.8342,
      "step": 1276
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.18467192351818085,
      "learning_rate": 0.00018462721142236928,
      "loss": 0.7343,
      "step": 1277
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.22790953516960144,
      "learning_rate": 0.00018459966332595444,
      "loss": 0.7892,
      "step": 1278
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.42479977011680603,
      "learning_rate": 0.00018457209262753396,
      "loss": 0.7929,
      "step": 1279
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.3905547559261322,
      "learning_rate": 0.00018454449933447375,
      "loss": 1.0007,
      "step": 1280
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.20737090706825256,
      "learning_rate": 0.00018451688345414574,
      "loss": 0.8178,
      "step": 1281
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5382395386695862,
      "learning_rate": 0.00018448924499392795,
      "loss": 1.0132,
      "step": 1282
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.17233455181121826,
      "learning_rate": 0.0001844615839612043,
      "loss": 0.8808,
      "step": 1283
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.24220000207424164,
      "learning_rate": 0.00018443390036336493,
      "loss": 0.7672,
      "step": 1284
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.22899314761161804,
      "learning_rate": 0.00018440619420780584,
      "loss": 0.7801,
      "step": 1285
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.29535096883773804,
      "learning_rate": 0.00018437846550192912,
      "loss": 0.9208,
      "step": 1286
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.20247384905815125,
      "learning_rate": 0.0001843507142531429,
      "loss": 0.8187,
      "step": 1287
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.47021177411079407,
      "learning_rate": 0.00018432294046886137,
      "loss": 0.8073,
      "step": 1288
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.1782267540693283,
      "learning_rate": 0.00018429514415650464,
      "loss": 0.9286,
      "step": 1289
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.1798563301563263,
      "learning_rate": 0.0001842673253234989,
      "loss": 0.6604,
      "step": 1290
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.30468717217445374,
      "learning_rate": 0.00018423948397727638,
      "loss": 0.9134,
      "step": 1291
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.3026343584060669,
      "learning_rate": 0.00018421162012527523,
      "loss": 1.0581,
      "step": 1292
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.2644207775592804,
      "learning_rate": 0.00018418373377493974,
      "loss": 0.8008,
      "step": 1293
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.25246068835258484,
      "learning_rate": 0.00018415582493372013,
      "loss": 0.8571,
      "step": 1294
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.14873483777046204,
      "learning_rate": 0.00018412789360907258,
      "loss": 0.8808,
      "step": 1295
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.38219353556632996,
      "learning_rate": 0.00018409993980845942,
      "loss": 0.8496,
      "step": 1296
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.23008137941360474,
      "learning_rate": 0.0001840719635393489,
      "loss": 0.8552,
      "step": 1297
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.4629540741443634,
      "learning_rate": 0.00018404396480921524,
      "loss": 0.8456,
      "step": 1298
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.19923873245716095,
      "learning_rate": 0.0001840159436255387,
      "loss": 1.0161,
      "step": 1299
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.1906134933233261,
      "learning_rate": 0.0001839878999958056,
      "loss": 0.792,
      "step": 1300
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.3002176582813263,
      "learning_rate": 0.0001839598339275081,
      "loss": 0.6616,
      "step": 1301
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.17941942811012268,
      "learning_rate": 0.00018393174542814453,
      "loss": 0.9409,
      "step": 1302
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.23496879637241364,
      "learning_rate": 0.00018390363450521914,
      "loss": 0.7604,
      "step": 1303
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.1679767519235611,
      "learning_rate": 0.0001838755011662421,
      "loss": 0.7354,
      "step": 1304
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.41490668058395386,
      "learning_rate": 0.00018384734541872962,
      "loss": 0.8846,
      "step": 1305
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.16063648462295532,
      "learning_rate": 0.000183819167270204,
      "loss": 0.7276,
      "step": 1306
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.2164960503578186,
      "learning_rate": 0.00018379096672819335,
      "loss": 0.78,
      "step": 1307
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.25044992566108704,
      "learning_rate": 0.00018376274380023193,
      "loss": 0.7799,
      "step": 1308
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.3465784788131714,
      "learning_rate": 0.00018373449849385978,
      "loss": 0.933,
      "step": 1309
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.29289570450782776,
      "learning_rate": 0.00018370623081662317,
      "loss": 0.7959,
      "step": 1310
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.22164900600910187,
      "learning_rate": 0.00018367794077607413,
      "loss": 0.6426,
      "step": 1311
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.1742102950811386,
      "learning_rate": 0.00018364962837977075,
      "loss": 0.8825,
      "step": 1312
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.2760159969329834,
      "learning_rate": 0.00018362129363527709,
      "loss": 0.941,
      "step": 1313
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.3095329701900482,
      "learning_rate": 0.00018359293655016324,
      "loss": 0.9483,
      "step": 1314
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.2230674922466278,
      "learning_rate": 0.0001835645571320051,
      "loss": 0.7099,
      "step": 1315
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.18878720700740814,
      "learning_rate": 0.00018353615538838474,
      "loss": 0.8718,
      "step": 1316
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.22312194108963013,
      "learning_rate": 0.00018350773132689001,
      "loss": 0.9073,
      "step": 1317
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.28193020820617676,
      "learning_rate": 0.00018347928495511483,
      "loss": 0.8024,
      "step": 1318
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.17051461338996887,
      "learning_rate": 0.0001834508162806591,
      "loss": 0.7898,
      "step": 1319
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.21895575523376465,
      "learning_rate": 0.00018342232531112855,
      "loss": 0.7854,
      "step": 1320
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.2620944380760193,
      "learning_rate": 0.00018339381205413502,
      "loss": 0.7291,
      "step": 1321
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.7799584865570068,
      "learning_rate": 0.00018336527651729618,
      "loss": 0.8015,
      "step": 1322
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.1495790332555771,
      "learning_rate": 0.00018333671870823573,
      "loss": 0.6712,
      "step": 1323
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.23111292719841003,
      "learning_rate": 0.0001833081386345833,
      "loss": 0.7757,
      "step": 1324
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.37781789898872375,
      "learning_rate": 0.00018327953630397446,
      "loss": 0.7163,
      "step": 1325
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.26320987939834595,
      "learning_rate": 0.0001832509117240507,
      "loss": 0.7546,
      "step": 1326
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.7526392340660095,
      "learning_rate": 0.00018322226490245953,
      "loss": 1.0132,
      "step": 1327
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.3171197772026062,
      "learning_rate": 0.00018319359584685434,
      "loss": 0.857,
      "step": 1328
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.2927488088607788,
      "learning_rate": 0.0001831649045648945,
      "loss": 0.9924,
      "step": 1329
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.26311907172203064,
      "learning_rate": 0.0001831361910642452,
      "loss": 0.8412,
      "step": 1330
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.346422016620636,
      "learning_rate": 0.00018310745535257778,
      "loss": 0.7963,
      "step": 1331
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.346422016620636,
      "learning_rate": 0.00018310745535257778,
      "loss": 0.7492,
      "step": 1332
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.20978941023349762,
      "learning_rate": 0.00018307869743756932,
      "loss": 0.9445,
      "step": 1333
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.3129740059375763,
      "learning_rate": 0.00018304991732690296,
      "loss": 0.9124,
      "step": 1334
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.13040007650852203,
      "learning_rate": 0.00018302111502826768,
      "loss": 0.752,
      "step": 1335
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.2200053483247757,
      "learning_rate": 0.00018299229054935846,
      "loss": 0.8563,
      "step": 1336
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.19205571711063385,
      "learning_rate": 0.0001829634438978761,
      "loss": 0.787,
      "step": 1337
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.187309131026268,
      "learning_rate": 0.00018293457508152745,
      "loss": 0.9726,
      "step": 1338
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.28417152166366577,
      "learning_rate": 0.00018290568410802522,
      "loss": 0.8511,
      "step": 1339
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.27811944484710693,
      "learning_rate": 0.00018287677098508803,
      "loss": 0.8483,
      "step": 1340
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.21617572009563446,
      "learning_rate": 0.00018284783572044045,
      "loss": 0.9702,
      "step": 1341
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.3353249728679657,
      "learning_rate": 0.00018281887832181291,
      "loss": 0.9463,
      "step": 1342
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.4694057106971741,
      "learning_rate": 0.00018278989879694186,
      "loss": 0.7039,
      "step": 1343
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.2948024570941925,
      "learning_rate": 0.00018276089715356953,
      "loss": 0.7672,
      "step": 1344
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.7137619256973267,
      "learning_rate": 0.00018273187339944418,
      "loss": 0.8234,
      "step": 1345
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.2216477245092392,
      "learning_rate": 0.00018270282754231985,
      "loss": 0.8056,
      "step": 1346
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.3933102488517761,
      "learning_rate": 0.00018267375958995658,
      "loss": 0.8404,
      "step": 1347
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6004481315612793,
      "learning_rate": 0.00018264466955012035,
      "loss": 0.751,
      "step": 1348
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.1968058943748474,
      "learning_rate": 0.0001826155574305829,
      "loss": 0.4603,
      "step": 1349
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.26292043924331665,
      "learning_rate": 0.00018258642323912196,
      "loss": 0.8532,
      "step": 1350
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5416619777679443,
      "learning_rate": 0.00018255726698352117,
      "loss": 0.9237,
      "step": 1351
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.18001894652843475,
      "learning_rate": 0.00018252808867157003,
      "loss": 0.6665,
      "step": 1352
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.16467057168483734,
      "learning_rate": 0.00018249888831106396,
      "loss": 0.7123,
      "step": 1353
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.2759355902671814,
      "learning_rate": 0.0001824696659098042,
      "loss": 0.882,
      "step": 1354
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.26624593138694763,
      "learning_rate": 0.000182440421475598,
      "loss": 0.7935,
      "step": 1355
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.2187303602695465,
      "learning_rate": 0.00018241115501625837,
      "loss": 0.7474,
      "step": 1356
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.3029341995716095,
      "learning_rate": 0.00018238186653960427,
      "loss": 0.4875,
      "step": 1357
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.18986043334007263,
      "learning_rate": 0.00018235255605346057,
      "loss": 0.7783,
      "step": 1358
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.4439036250114441,
      "learning_rate": 0.00018232322356565795,
      "loss": 0.8205,
      "step": 1359
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.3778383433818817,
      "learning_rate": 0.00018229386908403302,
      "loss": 0.8531,
      "step": 1360
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.23622940480709076,
      "learning_rate": 0.00018226449261642821,
      "loss": 0.6568,
      "step": 1361
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.27074432373046875,
      "learning_rate": 0.0001822350941706919,
      "loss": 0.7214,
      "step": 1362
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.2887376844882965,
      "learning_rate": 0.00018220567375467833,
      "loss": 0.9489,
      "step": 1363
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5016897916793823,
      "learning_rate": 0.00018217623137624752,
      "loss": 0.7702,
      "step": 1364
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.41379132866859436,
      "learning_rate": 0.00018214676704326547,
      "loss": 0.9489,
      "step": 1365
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.2968260943889618,
      "learning_rate": 0.00018211728076360397,
      "loss": 0.899,
      "step": 1366
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5838313698768616,
      "learning_rate": 0.0001820877725451407,
      "loss": 0.8364,
      "step": 1367
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.4110313057899475,
      "learning_rate": 0.00018205824239575922,
      "loss": 0.9617,
      "step": 1368
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5777244567871094,
      "learning_rate": 0.00018202869032334893,
      "loss": 0.7726,
      "step": 1369
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.3175279498100281,
      "learning_rate": 0.00018199911633580505,
      "loss": 0.8707,
      "step": 1370
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.3990786671638489,
      "learning_rate": 0.00018196952044102874,
      "loss": 0.6367,
      "step": 1371
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.33783575892448425,
      "learning_rate": 0.00018193990264692692,
      "loss": 0.8598,
      "step": 1372
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.19958442449569702,
      "learning_rate": 0.00018191026296141244,
      "loss": 0.7595,
      "step": 1373
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.3777536451816559,
      "learning_rate": 0.00018188060139240393,
      "loss": 0.6139,
      "step": 1374
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.27622705698013306,
      "learning_rate": 0.00018185091794782596,
      "loss": 0.9466,
      "step": 1375
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6872371435165405,
      "learning_rate": 0.00018182121263560883,
      "loss": 1.1432,
      "step": 1376
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.3103821873664856,
      "learning_rate": 0.00018179148546368875,
      "loss": 0.5681,
      "step": 1377
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.24877259135246277,
      "learning_rate": 0.0001817617364400078,
      "loss": 0.771,
      "step": 1378
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.31606051325798035,
      "learning_rate": 0.00018173196557251376,
      "loss": 1.0534,
      "step": 1379
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1742808818817139,
      "learning_rate": 0.00018170217286916045,
      "loss": 0.9475,
      "step": 1380
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.30190354585647583,
      "learning_rate": 0.00018167235833790738,
      "loss": 0.9696,
      "step": 1381
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.21082483232021332,
      "learning_rate": 0.00018164252198671986,
      "loss": 0.9524,
      "step": 1382
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.784009575843811,
      "learning_rate": 0.0001816126638235692,
      "loss": 0.9032,
      "step": 1383
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.2571183741092682,
      "learning_rate": 0.00018158278385643236,
      "loss": 0.7814,
      "step": 1384
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.18946896493434906,
      "learning_rate": 0.0001815528820932922,
      "loss": 0.7257,
      "step": 1385
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.3494076132774353,
      "learning_rate": 0.00018152295854213744,
      "loss": 1.0784,
      "step": 1386
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5868812799453735,
      "learning_rate": 0.0001814930132109626,
      "loss": 0.8009,
      "step": 1387
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.2626528739929199,
      "learning_rate": 0.00018146304610776795,
      "loss": 0.9139,
      "step": 1388
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.2868996560573578,
      "learning_rate": 0.00018143305724055965,
      "loss": 0.771,
      "step": 1389
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.3073597252368927,
      "learning_rate": 0.0001814030466173496,
      "loss": 0.9491,
      "step": 1390
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.3477679491043091,
      "learning_rate": 0.0001813730142461557,
      "loss": 1.0223,
      "step": 1391
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.3506452441215515,
      "learning_rate": 0.00018134296013500137,
      "loss": 0.8539,
      "step": 1392
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.30835357308387756,
      "learning_rate": 0.0001813128842919161,
      "loss": 0.7147,
      "step": 1393
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.26949194073677063,
      "learning_rate": 0.00018128278672493507,
      "loss": 0.9746,
      "step": 1394
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.21776187419891357,
      "learning_rate": 0.00018125266744209922,
      "loss": 0.9089,
      "step": 1395
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.234989732503891,
      "learning_rate": 0.0001812225264514554,
      "loss": 0.8028,
      "step": 1396
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5252193808555603,
      "learning_rate": 0.00018119236376105618,
      "loss": 1.1446,
      "step": 1397
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.3817419707775116,
      "learning_rate": 0.00018116217937895994,
      "loss": 0.5953,
      "step": 1398
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.30079299211502075,
      "learning_rate": 0.00018113197331323089,
      "loss": 0.864,
      "step": 1399
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.23942260444164276,
      "learning_rate": 0.00018110174557193898,
      "loss": 0.9298,
      "step": 1400
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.26664260029792786,
      "learning_rate": 0.00018107149616316005,
      "loss": 0.9203,
      "step": 1401
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.518056333065033,
      "learning_rate": 0.0001810412250949756,
      "loss": 0.7461,
      "step": 1402
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.36129578948020935,
      "learning_rate": 0.00018101093237547297,
      "loss": 0.9946,
      "step": 1403
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.3740208148956299,
      "learning_rate": 0.00018098061801274533,
      "loss": 0.8016,
      "step": 1404
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5597389340400696,
      "learning_rate": 0.0001809502820148916,
      "loss": 0.6848,
      "step": 1405
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6893675923347473,
      "learning_rate": 0.00018091992439001642,
      "loss": 1.0044,
      "step": 1406
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.1611628234386444,
      "learning_rate": 0.00018088954514623032,
      "loss": 0.7775,
      "step": 1407
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.23692892491817474,
      "learning_rate": 0.0001808591442916495,
      "loss": 1.0294,
      "step": 1408
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.29078051447868347,
      "learning_rate": 0.000180828721834396,
      "loss": 0.7322,
      "step": 1409
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.21175149083137512,
      "learning_rate": 0.00018079827778259765,
      "loss": 0.7557,
      "step": 1410
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.32306915521621704,
      "learning_rate": 0.00018076781214438795,
      "loss": 0.5505,
      "step": 1411
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.19697335362434387,
      "learning_rate": 0.0001807373249279063,
      "loss": 0.6537,
      "step": 1412
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.3707191050052643,
      "learning_rate": 0.0001807068161412977,
      "loss": 0.8456,
      "step": 1413
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.1613295078277588,
      "learning_rate": 0.0001806762857927131,
      "loss": 0.5848,
      "step": 1414
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.489471971988678,
      "learning_rate": 0.00018064573389030907,
      "loss": 0.7439,
      "step": 1415
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.22256731986999512,
      "learning_rate": 0.00018061516044224797,
      "loss": 0.9741,
      "step": 1416
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.14221753180027008,
      "learning_rate": 0.000180584565456698,
      "loss": 0.7845,
      "step": 1417
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.3335963785648346,
      "learning_rate": 0.00018055394894183295,
      "loss": 0.5861,
      "step": 1418
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.24843798577785492,
      "learning_rate": 0.00018052331090583255,
      "loss": 0.8948,
      "step": 1419
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2655305564403534,
      "learning_rate": 0.00018049265135688215,
      "loss": 1.0254,
      "step": 1420
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.21898262202739716,
      "learning_rate": 0.00018046197030317286,
      "loss": 0.7829,
      "step": 1421
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.3877382278442383,
      "learning_rate": 0.0001804312677529016,
      "loss": 1.0725,
      "step": 1422
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.3082130253314972,
      "learning_rate": 0.00018040054371427097,
      "loss": 0.6704,
      "step": 1423
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.21518997848033905,
      "learning_rate": 0.00018036979819548935,
      "loss": 0.7436,
      "step": 1424
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.16237996518611908,
      "learning_rate": 0.00018033903120477085,
      "loss": 0.8283,
      "step": 1425
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.21138867735862732,
      "learning_rate": 0.0001803082427503353,
      "loss": 0.8027,
      "step": 1426
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.18223772943019867,
      "learning_rate": 0.00018027743284040825,
      "loss": 0.7664,
      "step": 1427
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.19396072626113892,
      "learning_rate": 0.00018024660148322107,
      "loss": 0.7117,
      "step": 1428
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.29922300577163696,
      "learning_rate": 0.0001802157486870107,
      "loss": 0.7594,
      "step": 1429
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.17520874738693237,
      "learning_rate": 0.00018018487446002,
      "loss": 0.8262,
      "step": 1430
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5202134847640991,
      "learning_rate": 0.00018015397881049737,
      "loss": 0.8832,
      "step": 1431
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.28229352831840515,
      "learning_rate": 0.0001801230617466971,
      "loss": 0.853,
      "step": 1432
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.33451545238494873,
      "learning_rate": 0.00018009212327687913,
      "loss": 1.0867,
      "step": 1433
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.261625736951828,
      "learning_rate": 0.000180061163409309,
      "loss": 0.7703,
      "step": 1434
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.1610385924577713,
      "learning_rate": 0.00018003018215225822,
      "loss": 0.837,
      "step": 1435
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2683800756931305,
      "learning_rate": 0.00017999917951400384,
      "loss": 0.758,
      "step": 1436
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5349836349487305,
      "learning_rate": 0.00017996815550282857,
      "loss": 0.6823,
      "step": 1437
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2543414533138275,
      "learning_rate": 0.000179937110127021,
      "loss": 0.8395,
      "step": 1438
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.27316519618034363,
      "learning_rate": 0.0001799060433948753,
      "loss": 0.7487,
      "step": 1439
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.1614943891763687,
      "learning_rate": 0.00017987495531469145,
      "loss": 0.7902,
      "step": 1440
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.23774538934230804,
      "learning_rate": 0.00017984384589477502,
      "loss": 0.6941,
      "step": 1441
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2857237756252289,
      "learning_rate": 0.0001798127151434373,
      "loss": 0.7068,
      "step": 1442
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.24900290369987488,
      "learning_rate": 0.0001797815630689954,
      "loss": 0.747,
      "step": 1443
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.15391062200069427,
      "learning_rate": 0.00017975038967977204,
      "loss": 0.954,
      "step": 1444
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.16260461509227753,
      "learning_rate": 0.00017971919498409555,
      "loss": 0.8089,
      "step": 1445
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.26611584424972534,
      "learning_rate": 0.0001796879789903001,
      "loss": 0.9554,
      "step": 1446
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.21356000006198883,
      "learning_rate": 0.0001796567417067255,
      "loss": 1.1011,
      "step": 1447
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.616613507270813,
      "learning_rate": 0.0001796254831417172,
      "loss": 0.5674,
      "step": 1448
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.18683765828609467,
      "learning_rate": 0.00017959420330362636,
      "loss": 0.6465,
      "step": 1449
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2209913581609726,
      "learning_rate": 0.00017956290220080986,
      "loss": 1.1348,
      "step": 1450
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.22686399519443512,
      "learning_rate": 0.00017953157984163025,
      "loss": 1.0059,
      "step": 1451
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2739703059196472,
      "learning_rate": 0.00017950023623445572,
      "loss": 0.7295,
      "step": 1452
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2368312031030655,
      "learning_rate": 0.00017946887138766017,
      "loss": 0.6364,
      "step": 1453
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.30560457706451416,
      "learning_rate": 0.00017943748530962315,
      "loss": 0.7825,
      "step": 1454
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2307925820350647,
      "learning_rate": 0.0001794060780087299,
      "loss": 0.6759,
      "step": 1455
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.12132783979177475,
      "learning_rate": 0.00017937464949337138,
      "loss": 0.8616,
      "step": 1456
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.4608183801174164,
      "learning_rate": 0.00017934319977194407,
      "loss": 0.7619,
      "step": 1457
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2990238666534424,
      "learning_rate": 0.0001793117288528503,
      "loss": 1.0292,
      "step": 1458
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.35019806027412415,
      "learning_rate": 0.00017928023674449795,
      "loss": 0.7815,
      "step": 1459
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2607220709323883,
      "learning_rate": 0.00017924872345530054,
      "loss": 0.7841,
      "step": 1460
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5722203254699707,
      "learning_rate": 0.00017921718899367733,
      "loss": 0.9411,
      "step": 1461
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6743856072425842,
      "learning_rate": 0.00017918563336805324,
      "loss": 0.818,
      "step": 1462
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2650226652622223,
      "learning_rate": 0.00017915405658685876,
      "loss": 0.7176,
      "step": 1463
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.26520097255706787,
      "learning_rate": 0.00017912245865853006,
      "loss": 0.831,
      "step": 1464
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.21871626377105713,
      "learning_rate": 0.000179090839591509,
      "loss": 0.7474,
      "step": 1465
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.20392687618732452,
      "learning_rate": 0.00017905919939424308,
      "loss": 0.9447,
      "step": 1466
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.396558552980423,
      "learning_rate": 0.0001790275380751854,
      "loss": 1.1281,
      "step": 1467
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.31426236033439636,
      "learning_rate": 0.00017899585564279478,
      "loss": 0.8034,
      "step": 1468
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.1955685168504715,
      "learning_rate": 0.00017896415210553557,
      "loss": 0.9179,
      "step": 1469
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.4477945864200592,
      "learning_rate": 0.00017893242747187786,
      "loss": 0.9838,
      "step": 1470
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2637459337711334,
      "learning_rate": 0.0001789006817502973,
      "loss": 0.8489,
      "step": 1471
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.21175040304660797,
      "learning_rate": 0.00017886891494927527,
      "loss": 0.8042,
      "step": 1472
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.26170504093170166,
      "learning_rate": 0.00017883712707729868,
      "loss": 0.8279,
      "step": 1473
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.2226918637752533,
      "learning_rate": 0.00017880531814286018,
      "loss": 0.9518,
      "step": 1474
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5094869136810303,
      "learning_rate": 0.00017877348815445787,
      "loss": 0.9154,
      "step": 1475
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.28257447481155396,
      "learning_rate": 0.00017874163712059565,
      "loss": 0.822,
      "step": 1476
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.18957120180130005,
      "learning_rate": 0.000178709765049783,
      "loss": 0.878,
      "step": 1477
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.24895405769348145,
      "learning_rate": 0.00017867787195053497,
      "loss": 0.9535,
      "step": 1478
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.3885946273803711,
      "learning_rate": 0.00017864595783137222,
      "loss": 0.8345,
      "step": 1479
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.4865983724594116,
      "learning_rate": 0.0001786140227008211,
      "loss": 0.9232,
      "step": 1480
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.28830742835998535,
      "learning_rate": 0.00017858206656741355,
      "loss": 0.8399,
      "step": 1481
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.14477528631687164,
      "learning_rate": 0.00017855008943968708,
      "loss": 0.7155,
      "step": 1482
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.40859749913215637,
      "learning_rate": 0.00017851809132618486,
      "loss": 0.6515,
      "step": 1483
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.2415543645620346,
      "learning_rate": 0.0001784860722354556,
      "loss": 0.8837,
      "step": 1484
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.3486257493495941,
      "learning_rate": 0.0001784540321760537,
      "loss": 0.7181,
      "step": 1485
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.28804826736450195,
      "learning_rate": 0.00017842197115653906,
      "loss": 0.88,
      "step": 1486
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.19074603915214539,
      "learning_rate": 0.00017838988918547733,
      "loss": 0.8171,
      "step": 1487
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.23491066694259644,
      "learning_rate": 0.00017835778627143959,
      "loss": 0.6738,
      "step": 1488
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.46287068724632263,
      "learning_rate": 0.0001783256624230026,
      "loss": 0.8804,
      "step": 1489
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.289186030626297,
      "learning_rate": 0.00017829351764874876,
      "loss": 0.8928,
      "step": 1490
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.4435836970806122,
      "learning_rate": 0.0001782613519572659,
      "loss": 0.6741,
      "step": 1491
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5243536233901978,
      "learning_rate": 0.0001782291653571477,
      "loss": 0.7307,
      "step": 1492
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.35576456785202026,
      "learning_rate": 0.0001781969578569931,
      "loss": 0.6827,
      "step": 1493
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.26859918236732483,
      "learning_rate": 0.00017816472946540689,
      "loss": 0.7342,
      "step": 1494
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.3591601550579071,
      "learning_rate": 0.00017813248019099933,
      "loss": 0.9456,
      "step": 1495
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.4271202087402344,
      "learning_rate": 0.00017810021004238623,
      "loss": 0.9536,
      "step": 1496
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.40169036388397217,
      "learning_rate": 0.0001780679190281891,
      "loss": 1.0058,
      "step": 1497
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.19124965369701385,
      "learning_rate": 0.00017803560715703488,
      "loss": 0.9372,
      "step": 1498
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.2788808345794678,
      "learning_rate": 0.00017800327443755616,
      "loss": 0.735,
      "step": 1499
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.24428604543209076,
      "learning_rate": 0.00017797092087839113,
      "loss": 0.8197,
      "step": 1500
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.15267661213874817,
      "learning_rate": 0.00017793854648818342,
      "loss": 0.9315,
      "step": 1501
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.2636547386646271,
      "learning_rate": 0.00017790615127558237,
      "loss": 0.7977,
      "step": 1502
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.27806106209754944,
      "learning_rate": 0.00017787373524924283,
      "loss": 0.6796,
      "step": 1503
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.3373166024684906,
      "learning_rate": 0.00017784129841782518,
      "loss": 0.7883,
      "step": 1504
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5739229321479797,
      "learning_rate": 0.00017780884078999538,
      "loss": 0.8663,
      "step": 1505
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.23296967148780823,
      "learning_rate": 0.00017777636237442494,
      "loss": 0.7254,
      "step": 1506
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.41894176602363586,
      "learning_rate": 0.00017774386317979095,
      "loss": 0.7821,
      "step": 1507
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.3254104256629944,
      "learning_rate": 0.00017771134321477604,
      "loss": 0.8374,
      "step": 1508
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.17000238597393036,
      "learning_rate": 0.00017767880248806836,
      "loss": 0.8449,
      "step": 1509
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.22714409232139587,
      "learning_rate": 0.00017764624100836165,
      "loss": 0.8068,
      "step": 1510
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.4006255269050598,
      "learning_rate": 0.00017761365878435513,
      "loss": 0.934,
      "step": 1511
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.22497497498989105,
      "learning_rate": 0.00017758105582475365,
      "loss": 0.8261,
      "step": 1512
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.21548563241958618,
      "learning_rate": 0.00017754843213826758,
      "loss": 0.6891,
      "step": 1513
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.23665224015712738,
      "learning_rate": 0.00017751578773361274,
      "loss": 0.8431,
      "step": 1514
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.2575601637363434,
      "learning_rate": 0.00017748312261951055,
      "loss": 0.7588,
      "step": 1515
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.1440887153148651,
      "learning_rate": 0.000177450436804688,
      "loss": 0.8603,
      "step": 1516
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.27003243565559387,
      "learning_rate": 0.0001774177302978776,
      "loss": 0.827,
      "step": 1517
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.38702884316444397,
      "learning_rate": 0.00017738500310781724,
      "loss": 0.8951,
      "step": 1518
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.27399712800979614,
      "learning_rate": 0.00017735225524325059,
      "loss": 0.8876,
      "step": 1519
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.8054304718971252,
      "learning_rate": 0.0001773194867129266,
      "loss": 0.7625,
      "step": 1520
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.2865411639213562,
      "learning_rate": 0.0001772866975255999,
      "loss": 1.0267,
      "step": 1521
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.18720750510692596,
      "learning_rate": 0.00017725388769003063,
      "loss": 0.7931,
      "step": 1522
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5725424289703369,
      "learning_rate": 0.00017722105721498435,
      "loss": 0.9264,
      "step": 1523
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.3187529444694519,
      "learning_rate": 0.00017718820610923222,
      "loss": 0.9663,
      "step": 1524
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.21964608132839203,
      "learning_rate": 0.00017715533438155082,
      "loss": 0.8182,
      "step": 1525
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.3448609709739685,
      "learning_rate": 0.00017712244204072235,
      "loss": 1.048,
      "step": 1526
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.2784580886363983,
      "learning_rate": 0.00017708952909553446,
      "loss": 0.8748,
      "step": 1527
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.2772420644760132,
      "learning_rate": 0.0001770565955547803,
      "loss": 0.7591,
      "step": 1528
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5506030917167664,
      "learning_rate": 0.0001770236414272586,
      "loss": 0.8765,
      "step": 1529
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.3156081736087799,
      "learning_rate": 0.00017699066672177344,
      "loss": 0.8571,
      "step": 1530
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.17868655920028687,
      "learning_rate": 0.0001769576714471345,
      "loss": 0.7481,
      "step": 1531
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5359590649604797,
      "learning_rate": 0.000176924655612157,
      "loss": 0.6767,
      "step": 1532
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.16367490589618683,
      "learning_rate": 0.0001768916192256615,
      "loss": 0.7406,
      "step": 1533
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.1854495257139206,
      "learning_rate": 0.0001768585622964742,
      "loss": 0.825,
      "step": 1534
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.32731950283050537,
      "learning_rate": 0.00017682548483342672,
      "loss": 1.0238,
      "step": 1535
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2850567400455475,
      "learning_rate": 0.00017679238684535615,
      "loss": 0.9435,
      "step": 1536
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.1999683380126953,
      "learning_rate": 0.00017675926834110513,
      "loss": 0.726,
      "step": 1537
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0328675508499146,
      "learning_rate": 0.00017672612932952172,
      "loss": 0.7472,
      "step": 1538
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.3019428253173828,
      "learning_rate": 0.00017669296981945944,
      "loss": 0.7435,
      "step": 1539
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2063782811164856,
      "learning_rate": 0.0001766597898197774,
      "loss": 0.6915,
      "step": 1540
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.29344290494918823,
      "learning_rate": 0.0001766265893393401,
      "loss": 0.8177,
      "step": 1541
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.18409617245197296,
      "learning_rate": 0.00017659336838701743,
      "loss": 0.6889,
      "step": 1542
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.30702999234199524,
      "learning_rate": 0.00017656012697168496,
      "loss": 0.6829,
      "step": 1543
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2943236231803894,
      "learning_rate": 0.0001765268651022235,
      "loss": 0.6758,
      "step": 1544
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.16815511882305145,
      "learning_rate": 0.00017649358278751956,
      "loss": 0.9671,
      "step": 1545
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.17826154828071594,
      "learning_rate": 0.00017646028003646483,
      "loss": 0.5978,
      "step": 1546
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.18078076839447021,
      "learning_rate": 0.00017642695685795675,
      "loss": 0.8703,
      "step": 1547
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.3055894374847412,
      "learning_rate": 0.00017639361326089804,
      "loss": 0.977,
      "step": 1548
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.38437625765800476,
      "learning_rate": 0.00017636024925419687,
      "loss": 0.7789,
      "step": 1549
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2766260802745819,
      "learning_rate": 0.00017632686484676696,
      "loss": 0.8743,
      "step": 1550
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2676248550415039,
      "learning_rate": 0.0001762934600475274,
      "loss": 0.8438,
      "step": 1551
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.3467411696910858,
      "learning_rate": 0.0001762600348654028,
      "loss": 0.9547,
      "step": 1552
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.20186319947242737,
      "learning_rate": 0.00017622658930932313,
      "loss": 0.8457,
      "step": 1553
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.7073321342468262,
      "learning_rate": 0.00017619312338822387,
      "loss": 0.9841,
      "step": 1554
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.23274162411689758,
      "learning_rate": 0.00017615963711104592,
      "loss": 0.7751,
      "step": 1555
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.4295259416103363,
      "learning_rate": 0.00017612613048673562,
      "loss": 0.7392,
      "step": 1556
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2736862897872925,
      "learning_rate": 0.0001760926035242447,
      "loss": 0.9091,
      "step": 1557
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.24258770048618317,
      "learning_rate": 0.00017605905623253038,
      "loss": 0.8015,
      "step": 1558
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.393074631690979,
      "learning_rate": 0.00017602548862055532,
      "loss": 1.0475,
      "step": 1559
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6377884745597839,
      "learning_rate": 0.0001759919006972876,
      "loss": 0.827,
      "step": 1560
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.3583137094974518,
      "learning_rate": 0.0001759582924717007,
      "loss": 0.9314,
      "step": 1561
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.21434138715267181,
      "learning_rate": 0.0001759246639527735,
      "loss": 0.8247,
      "step": 1562
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0627541542053223,
      "learning_rate": 0.0001758910151494904,
      "loss": 0.8871,
      "step": 1563
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.36841386556625366,
      "learning_rate": 0.00017585734607084109,
      "loss": 0.8232,
      "step": 1564
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.3272690773010254,
      "learning_rate": 0.00017582365672582078,
      "loss": 0.764,
      "step": 1565
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2006182074546814,
      "learning_rate": 0.00017578994712343007,
      "loss": 0.7557,
      "step": 1566
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.17187121510505676,
      "learning_rate": 0.00017575621727267495,
      "loss": 0.9247,
      "step": 1567
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2438923865556717,
      "learning_rate": 0.00017572246718256678,
      "loss": 0.7477,
      "step": 1568
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.29861173033714294,
      "learning_rate": 0.0001756886968621225,
      "loss": 0.7332,
      "step": 1569
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.19892726838588715,
      "learning_rate": 0.0001756549063203642,
      "loss": 1.0266,
      "step": 1570
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.22800874710083008,
      "learning_rate": 0.00017562109556631958,
      "loss": 0.8365,
      "step": 1571
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.24853608012199402,
      "learning_rate": 0.00017558726460902165,
      "loss": 0.8033,
      "step": 1572
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.25365573167800903,
      "learning_rate": 0.00017555341345750885,
      "loss": 0.7368,
      "step": 1573
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.3670751750469208,
      "learning_rate": 0.00017551954212082494,
      "loss": 0.9938,
      "step": 1574
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2783086895942688,
      "learning_rate": 0.00017548565060801916,
      "loss": 0.9174,
      "step": 1575
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.34819093346595764,
      "learning_rate": 0.00017545173892814613,
      "loss": 0.8753,
      "step": 1576
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.20864978432655334,
      "learning_rate": 0.00017541780709026583,
      "loss": 1.1213,
      "step": 1577
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.17735376954078674,
      "learning_rate": 0.00017538385510344363,
      "loss": 0.7863,
      "step": 1578
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.19554801285266876,
      "learning_rate": 0.00017534988297675027,
      "loss": 0.7272,
      "step": 1579
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.22674700617790222,
      "learning_rate": 0.00017531589071926194,
      "loss": 0.7461,
      "step": 1580
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.850347101688385,
      "learning_rate": 0.00017528187834006009,
      "loss": 0.7806,
      "step": 1581
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.23882180452346802,
      "learning_rate": 0.00017524784584823164,
      "loss": 0.8971,
      "step": 1582
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2634866237640381,
      "learning_rate": 0.00017521379325286887,
      "loss": 0.9759,
      "step": 1583
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.15619447827339172,
      "learning_rate": 0.0001751797205630694,
      "loss": 0.6745,
      "step": 1584
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.25713348388671875,
      "learning_rate": 0.00017514562778793625,
      "loss": 0.9913,
      "step": 1585
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2080388069152832,
      "learning_rate": 0.00017511151493657776,
      "loss": 0.8154,
      "step": 1586
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.1759488433599472,
      "learning_rate": 0.00017507738201810765,
      "loss": 0.6675,
      "step": 1587
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.40627485513687134,
      "learning_rate": 0.00017504322904164513,
      "loss": 0.838,
      "step": 1588
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.319382905960083,
      "learning_rate": 0.00017500905601631453,
      "loss": 1.0024,
      "step": 1589
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.18773455917835236,
      "learning_rate": 0.00017497486295124567,
      "loss": 0.7699,
      "step": 1590
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.26610812544822693,
      "learning_rate": 0.00017494064985557382,
      "loss": 0.7398,
      "step": 1591
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.16111573576927185,
      "learning_rate": 0.00017490641673843937,
      "loss": 0.8024,
      "step": 1592
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6271716952323914,
      "learning_rate": 0.00017487216360898827,
      "loss": 0.9109,
      "step": 1593
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.5977947115898132,
      "learning_rate": 0.00017483789047637166,
      "loss": 0.6317,
      "step": 1594
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.4218549132347107,
      "learning_rate": 0.00017480359734974615,
      "loss": 0.7302,
      "step": 1595
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.23908548057079315,
      "learning_rate": 0.00017476928423827364,
      "loss": 0.9922,
      "step": 1596
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.35037559270858765,
      "learning_rate": 0.00017473495115112136,
      "loss": 1.0334,
      "step": 1597
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2665032744407654,
      "learning_rate": 0.00017470059809746187,
      "loss": 0.7964,
      "step": 1598
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.20136184990406036,
      "learning_rate": 0.00017466622508647306,
      "loss": 1.0813,
      "step": 1599
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.2682799994945526,
      "learning_rate": 0.00017463183212733822,
      "loss": 0.5763,
      "step": 1600
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.25692257285118103,
      "learning_rate": 0.00017459741922924588,
      "loss": 0.9134,
      "step": 1601
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.19802285730838776,
      "learning_rate": 0.00017456298640138994,
      "loss": 0.675,
      "step": 1602
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.27711498737335205,
      "learning_rate": 0.00017452853365296963,
      "loss": 0.7136,
      "step": 1603
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.31814801692962646,
      "learning_rate": 0.0001744940609931895,
      "loss": 0.7205,
      "step": 1604
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.1949274241924286,
      "learning_rate": 0.0001744595684312594,
      "loss": 1.0077,
      "step": 1605
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.31189051270484924,
      "learning_rate": 0.00017442505597639452,
      "loss": 0.815,
      "step": 1606
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.4471169710159302,
      "learning_rate": 0.00017439052363781533,
      "loss": 0.789,
      "step": 1607
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.2889825999736786,
      "learning_rate": 0.00017435597142474767,
      "loss": 0.8839,
      "step": 1608
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.28603583574295044,
      "learning_rate": 0.00017432139934642262,
      "loss": 0.8535,
      "step": 1609
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.3628825843334198,
      "learning_rate": 0.0001742868074120766,
      "loss": 0.7887,
      "step": 1610
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.20414689183235168,
      "learning_rate": 0.00017425219563095142,
      "loss": 0.9167,
      "step": 1611
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.17191140353679657,
      "learning_rate": 0.000174217564012294,
      "loss": 0.8629,
      "step": 1612
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.20936930179595947,
      "learning_rate": 0.00017418291256535677,
      "loss": 0.7916,
      "step": 1613
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.1528608202934265,
      "learning_rate": 0.0001741482412993973,
      "loss": 0.8015,
      "step": 1614
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.32540881633758545,
      "learning_rate": 0.0001741135502236785,
      "loss": 0.9978,
      "step": 1615
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.3117353618144989,
      "learning_rate": 0.0001740788393474686,
      "loss": 0.5785,
      "step": 1616
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.4292644262313843,
      "learning_rate": 0.0001740441086800411,
      "loss": 0.8915,
      "step": 1617
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.2185625433921814,
      "learning_rate": 0.00017400935823067487,
      "loss": 0.7857,
      "step": 1618
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.23778553307056427,
      "learning_rate": 0.00017397458800865384,
      "loss": 0.8445,
      "step": 1619
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6814897656440735,
      "learning_rate": 0.00017393979802326752,
      "loss": 0.6429,
      "step": 1620
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.1943795531988144,
      "learning_rate": 0.00017390498828381045,
      "loss": 0.7675,
      "step": 1621
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.10445375740528107,
      "learning_rate": 0.00017387015879958263,
      "loss": 0.58,
      "step": 1622
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.3312947452068329,
      "learning_rate": 0.00017383530957988913,
      "loss": 0.8265,
      "step": 1623
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.2737201154232025,
      "learning_rate": 0.00017380044063404054,
      "loss": 0.858,
      "step": 1624
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.3172123432159424,
      "learning_rate": 0.00017376555197135254,
      "loss": 0.7819,
      "step": 1625
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.30435431003570557,
      "learning_rate": 0.00017373064360114612,
      "loss": 0.6717,
      "step": 1626
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.30859825015068054,
      "learning_rate": 0.00017369571553274758,
      "loss": 0.9076,
      "step": 1627
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.30620330572128296,
      "learning_rate": 0.00017366076777548846,
      "loss": 0.9709,
      "step": 1628
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.2307305932044983,
      "learning_rate": 0.0001736258003387055,
      "loss": 0.8968,
      "step": 1629
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.25108081102371216,
      "learning_rate": 0.0001735908132317408,
      "loss": 0.6945,
      "step": 1630
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.1398221254348755,
      "learning_rate": 0.00017355580646394162,
      "loss": 0.8047,
      "step": 1631
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.25688430666923523,
      "learning_rate": 0.00017352078004466057,
      "loss": 0.8598,
      "step": 1632
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5340328216552734,
      "learning_rate": 0.0001734857339832554,
      "loss": 0.8958,
      "step": 1633
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.28087329864501953,
      "learning_rate": 0.00017345066828908923,
      "loss": 1.2056,
      "step": 1634
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.2652375400066376,
      "learning_rate": 0.0001734155829715303,
      "loss": 0.8458,
      "step": 1635
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.23788996040821075,
      "learning_rate": 0.00017338047803995216,
      "loss": 0.8082,
      "step": 1636
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.3429185748100281,
      "learning_rate": 0.00017334535350373362,
      "loss": 0.8256,
      "step": 1637
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.17832320928573608,
      "learning_rate": 0.0001733102093722587,
      "loss": 0.738,
      "step": 1638
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.3827884793281555,
      "learning_rate": 0.00017327504565491664,
      "loss": 0.9421,
      "step": 1639
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.23601853847503662,
      "learning_rate": 0.00017323986236110193,
      "loss": 0.7448,
      "step": 1640
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.31423842906951904,
      "learning_rate": 0.00017320465950021428,
      "loss": 0.9469,
      "step": 1641
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.21806161105632782,
      "learning_rate": 0.00017316943708165864,
      "loss": 0.7323,
      "step": 1642
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.12115266174077988,
      "learning_rate": 0.0001731341951148452,
      "loss": 0.9379,
      "step": 1643
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.29731667041778564,
      "learning_rate": 0.00017309893360918936,
      "loss": 0.809,
      "step": 1644
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.605288028717041,
      "learning_rate": 0.0001730636525741117,
      "loss": 0.752,
      "step": 1645
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.23421341180801392,
      "learning_rate": 0.00017302835201903806,
      "loss": 0.7726,
      "step": 1646
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.31794747710227966,
      "learning_rate": 0.00017299303195339948,
      "loss": 0.8255,
      "step": 1647
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.1733589470386505,
      "learning_rate": 0.00017295769238663227,
      "loss": 0.6787,
      "step": 1648
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.414106160402298,
      "learning_rate": 0.00017292233332817785,
      "loss": 0.6953,
      "step": 1649
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.24939322471618652,
      "learning_rate": 0.00017288695478748288,
      "loss": 0.9149,
      "step": 1650
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.489632248878479,
      "learning_rate": 0.00017285155677399926,
      "loss": 0.7621,
      "step": 1651
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.1942123919725418,
      "learning_rate": 0.00017281613929718412,
      "loss": 0.6856,
      "step": 1652
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.29461556673049927,
      "learning_rate": 0.00017278070236649971,
      "loss": 0.9926,
      "step": 1653
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.28858160972595215,
      "learning_rate": 0.00017274524599141346,
      "loss": 0.8213,
      "step": 1654
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.2498042732477188,
      "learning_rate": 0.00017270977018139813,
      "loss": 0.688,
      "step": 1655
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.30806758999824524,
      "learning_rate": 0.00017267427494593155,
      "loss": 0.899,
      "step": 1656
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.21764585375785828,
      "learning_rate": 0.00017263876029449674,
      "loss": 0.9944,
      "step": 1657
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.25922656059265137,
      "learning_rate": 0.00017260322623658203,
      "loss": 0.7616,
      "step": 1658
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.15801186859607697,
      "learning_rate": 0.00017256767278168075,
      "loss": 0.743,
      "step": 1659
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.31557855010032654,
      "learning_rate": 0.0001725320999392916,
      "loss": 0.7879,
      "step": 1660
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.22714978456497192,
      "learning_rate": 0.00017249650771891835,
      "loss": 0.9219,
      "step": 1661
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6309428215026855,
      "learning_rate": 0.00017246089613006996,
      "loss": 0.8514,
      "step": 1662
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.24420128762722015,
      "learning_rate": 0.0001724252651822605,
      "loss": 0.7373,
      "step": 1663
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.4495256543159485,
      "learning_rate": 0.00017238961488500945,
      "loss": 0.6454,
      "step": 1664
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.20104923844337463,
      "learning_rate": 0.00017235394524784114,
      "loss": 0.4675,
      "step": 1665
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.43304798007011414,
      "learning_rate": 0.0001723182562802853,
      "loss": 0.6421,
      "step": 1666
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.43304798007011414,
      "learning_rate": 0.0001723182562802853,
      "loss": 0.9384,
      "step": 1667
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5639911890029907,
      "learning_rate": 0.0001722825479918767,
      "loss": 0.7972,
      "step": 1668
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.1867443025112152,
      "learning_rate": 0.0001722468203921554,
      "loss": 0.6789,
      "step": 1669
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.3841310143470764,
      "learning_rate": 0.00017221107349066643,
      "loss": 0.9927,
      "step": 1670
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2908351421356201,
      "learning_rate": 0.00017217530729696017,
      "loss": 0.6874,
      "step": 1671
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.641609251499176,
      "learning_rate": 0.00017213952182059203,
      "loss": 1.1147,
      "step": 1672
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.3135237693786621,
      "learning_rate": 0.00017210371707112262,
      "loss": 0.8083,
      "step": 1673
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2697260081768036,
      "learning_rate": 0.00017206789305811767,
      "loss": 0.6821,
      "step": 1674
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2828006446361542,
      "learning_rate": 0.00017203204979114812,
      "loss": 0.8195,
      "step": 1675
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.21109169721603394,
      "learning_rate": 0.00017199618727978995,
      "loss": 0.6449,
      "step": 1676
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2875900864601135,
      "learning_rate": 0.0001719603055336244,
      "loss": 0.8761,
      "step": 1677
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.16623644530773163,
      "learning_rate": 0.00017192440456223772,
      "loss": 0.861,
      "step": 1678
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.20505350828170776,
      "learning_rate": 0.00017188848437522144,
      "loss": 0.6805,
      "step": 1679
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.25906652212142944,
      "learning_rate": 0.00017185254498217208,
      "loss": 0.7257,
      "step": 1680
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.18688900768756866,
      "learning_rate": 0.00017181658639269144,
      "loss": 0.8108,
      "step": 1681
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.3584699034690857,
      "learning_rate": 0.00017178060861638633,
      "loss": 0.9781,
      "step": 1682
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.23528505861759186,
      "learning_rate": 0.0001717446116628687,
      "loss": 0.9619,
      "step": 1683
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.23185642063617706,
      "learning_rate": 0.00017170859554175566,
      "loss": 0.9138,
      "step": 1684
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.355165958404541,
      "learning_rate": 0.00017167256026266947,
      "loss": 1.0393,
      "step": 1685
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2716372311115265,
      "learning_rate": 0.00017163650583523743,
      "loss": 0.6844,
      "step": 1686
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.24319560825824738,
      "learning_rate": 0.00017160043226909202,
      "loss": 0.6289,
      "step": 1687
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.25922998785972595,
      "learning_rate": 0.00017156433957387076,
      "loss": 0.8348,
      "step": 1688
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.7295400500297546,
      "learning_rate": 0.00017152822775921638,
      "loss": 0.99,
      "step": 1689
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.11386293917894363,
      "learning_rate": 0.00017149209683477664,
      "loss": 0.9483,
      "step": 1690
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.29602164030075073,
      "learning_rate": 0.00017145594681020445,
      "loss": 0.7156,
      "step": 1691
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.25077611207962036,
      "learning_rate": 0.00017141977769515778,
      "loss": 0.8309,
      "step": 1692
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.37415802478790283,
      "learning_rate": 0.00017138358949929977,
      "loss": 0.8257,
      "step": 1693
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.31985601782798767,
      "learning_rate": 0.00017134738223229852,
      "loss": 0.8023,
      "step": 1694
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2598036825656891,
      "learning_rate": 0.0001713111559038274,
      "loss": 0.8475,
      "step": 1695
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.4003340005874634,
      "learning_rate": 0.00017127491052356476,
      "loss": 0.8141,
      "step": 1696
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.21808406710624695,
      "learning_rate": 0.0001712386461011941,
      "loss": 0.6025,
      "step": 1697
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2857385277748108,
      "learning_rate": 0.00017120236264640392,
      "loss": 0.6376,
      "step": 1698
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.30357617139816284,
      "learning_rate": 0.0001711660601688879,
      "loss": 1.136,
      "step": 1699
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.280524343252182,
      "learning_rate": 0.00017112973867834476,
      "loss": 1.0672,
      "step": 1700
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.23640190064907074,
      "learning_rate": 0.00017109339818447832,
      "loss": 0.6883,
      "step": 1701
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.28205323219299316,
      "learning_rate": 0.00017105703869699744,
      "loss": 0.7466,
      "step": 1702
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.38354039192199707,
      "learning_rate": 0.0001710206602256161,
      "loss": 0.8663,
      "step": 1703
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.17591306567192078,
      "learning_rate": 0.00017098426278005325,
      "loss": 0.7125,
      "step": 1704
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.1890038698911667,
      "learning_rate": 0.00017094784637003307,
      "loss": 0.7897,
      "step": 1705
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.23284617066383362,
      "learning_rate": 0.0001709114110052847,
      "loss": 0.866,
      "step": 1706
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.43929174542427063,
      "learning_rate": 0.00017087495669554237,
      "loss": 0.8521,
      "step": 1707
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.39994093775749207,
      "learning_rate": 0.00017083848345054534,
      "loss": 0.952,
      "step": 1708
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2285773605108261,
      "learning_rate": 0.00017080199128003795,
      "loss": 0.8768,
      "step": 1709
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.23592692613601685,
      "learning_rate": 0.00017076548019376967,
      "loss": 0.8971,
      "step": 1710
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6050797700881958,
      "learning_rate": 0.00017072895020149494,
      "loss": 0.9988,
      "step": 1711
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.3047102093696594,
      "learning_rate": 0.00017069240131297318,
      "loss": 0.9679,
      "step": 1712
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.18338657915592194,
      "learning_rate": 0.00017065583353796906,
      "loss": 0.8155,
      "step": 1713
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.49684223532676697,
      "learning_rate": 0.0001706192468862521,
      "loss": 0.8872,
      "step": 1714
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.8854992389678955,
      "learning_rate": 0.000170582641367597,
      "loss": 1.1946,
      "step": 1715
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.3052016496658325,
      "learning_rate": 0.00017054601699178346,
      "loss": 0.9774,
      "step": 1716
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.7482655048370361,
      "learning_rate": 0.00017050937376859613,
      "loss": 0.5991,
      "step": 1717
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2638699412345886,
      "learning_rate": 0.00017047271170782483,
      "loss": 0.9555,
      "step": 1718
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.41627296805381775,
      "learning_rate": 0.0001704360308192643,
      "loss": 0.9827,
      "step": 1719
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2845969796180725,
      "learning_rate": 0.0001703993311127144,
      "loss": 0.9341,
      "step": 1720
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.17573435604572296,
      "learning_rate": 0.00017036261259797999,
      "loss": 0.8024,
      "step": 1721
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.26212993264198303,
      "learning_rate": 0.0001703258752848709,
      "loss": 0.831,
      "step": 1722
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2136053442955017,
      "learning_rate": 0.00017028911918320208,
      "loss": 0.9215,
      "step": 1723
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.39071953296661377,
      "learning_rate": 0.0001702523443027934,
      "loss": 1.0917,
      "step": 1724
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.24572721123695374,
      "learning_rate": 0.00017021555065346983,
      "loss": 0.847,
      "step": 1725
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.22487430274486542,
      "learning_rate": 0.00017017873824506127,
      "loss": 0.7248,
      "step": 1726
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.2439170777797699,
      "learning_rate": 0.00017014190708740273,
      "loss": 1.0164,
      "step": 1727
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.2301466166973114,
      "learning_rate": 0.00017010505719033419,
      "loss": 0.8844,
      "step": 1728
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.574055552482605,
      "learning_rate": 0.00017006818856370054,
      "loss": 0.6541,
      "step": 1729
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.574516773223877,
      "learning_rate": 0.00017003130121735185,
      "loss": 0.9728,
      "step": 1730
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.34638893604278564,
      "learning_rate": 0.00016999439516114304,
      "loss": 0.9759,
      "step": 1731
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.19384965300559998,
      "learning_rate": 0.00016995747040493412,
      "loss": 0.6164,
      "step": 1732
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.3656960129737854,
      "learning_rate": 0.0001699205269585901,
      "loss": 0.7926,
      "step": 1733
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.19287893176078796,
      "learning_rate": 0.00016988356483198084,
      "loss": 0.6165,
      "step": 1734
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.2912212908267975,
      "learning_rate": 0.0001698465840349814,
      "loss": 0.7368,
      "step": 1735
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.2148180902004242,
      "learning_rate": 0.0001698095845774717,
      "loss": 0.7581,
      "step": 1736
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.4335290789604187,
      "learning_rate": 0.0001697725664693366,
      "loss": 1.1884,
      "step": 1737
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.28351953625679016,
      "learning_rate": 0.00016973552972046613,
      "loss": 0.8988,
      "step": 1738
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.3505178391933441,
      "learning_rate": 0.00016969847434075512,
      "loss": 0.7791,
      "step": 1739
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.11304440349340439,
      "learning_rate": 0.00016966140034010348,
      "loss": 0.7118,
      "step": 1740
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.3362009823322296,
      "learning_rate": 0.00016962430772841602,
      "loss": 0.7357,
      "step": 1741
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6304141879081726,
      "learning_rate": 0.00016958719651560258,
      "loss": 0.7933,
      "step": 1742
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.7677857279777527,
      "learning_rate": 0.0001695500667115779,
      "loss": 0.8722,
      "step": 1743
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.22440387308597565,
      "learning_rate": 0.00016951291832626182,
      "loss": 0.8754,
      "step": 1744
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.19403578341007233,
      "learning_rate": 0.000169475751369579,
      "loss": 0.931,
      "step": 1745
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.1581544429063797,
      "learning_rate": 0.00016943856585145917,
      "loss": 0.6199,
      "step": 1746
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.19015011191368103,
      "learning_rate": 0.00016940136178183695,
      "loss": 0.6733,
      "step": 1747
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.39804041385650635,
      "learning_rate": 0.0001693641391706519,
      "loss": 0.7065,
      "step": 1748
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.23891817033290863,
      "learning_rate": 0.00016932689802784861,
      "loss": 0.6898,
      "step": 1749
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.38879886269569397,
      "learning_rate": 0.00016928963836337655,
      "loss": 0.8755,
      "step": 1750
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.19689933955669403,
      "learning_rate": 0.0001692523601871902,
      "loss": 0.947,
      "step": 1751
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.2616431713104248,
      "learning_rate": 0.000169215063509249,
      "loss": 0.8962,
      "step": 1752
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.2544642984867096,
      "learning_rate": 0.0001691777483395172,
      "loss": 0.7603,
      "step": 1753
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.34580859541893005,
      "learning_rate": 0.0001691404146879641,
      "loss": 0.7608,
      "step": 1754
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.350591242313385,
      "learning_rate": 0.00016910306256456397,
      "loss": 0.9732,
      "step": 1755
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.5270747542381287,
      "learning_rate": 0.00016906569197929592,
      "loss": 0.8635,
      "step": 1756
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.2009015679359436,
      "learning_rate": 0.00016902830294214405,
      "loss": 0.5781,
      "step": 1757
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.21809059381484985,
      "learning_rate": 0.00016899089546309736,
      "loss": 0.9331,
      "step": 1758
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.2439567595720291,
      "learning_rate": 0.00016895346955214977,
      "loss": 0.6446,
      "step": 1759
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.1974611133337021,
      "learning_rate": 0.0001689160252193002,
      "loss": 0.7927,
      "step": 1760
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.34590595960617065,
      "learning_rate": 0.0001688785624745524,
      "loss": 0.7826,
      "step": 1761
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.24978412687778473,
      "learning_rate": 0.00016884108132791506,
      "loss": 0.7785,
      "step": 1762
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.20006275177001953,
      "learning_rate": 0.00016880358178940184,
      "loss": 0.7558,
      "step": 1763
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.0942302718758583,
      "learning_rate": 0.00016876606386903128,
      "loss": 0.7538,
      "step": 1764
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.2582448422908783,
      "learning_rate": 0.00016872852757682683,
      "loss": 0.6967,
      "step": 1765
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.30698561668395996,
      "learning_rate": 0.00016869097292281681,
      "loss": 0.9491,
      "step": 1766
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.7550094723701477,
      "learning_rate": 0.0001686533999170345,
      "loss": 0.8798,
      "step": 1767
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6384320259094238,
      "learning_rate": 0.00016861580856951806,
      "loss": 1.0092,
      "step": 1768
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.29378488659858704,
      "learning_rate": 0.00016857819889031054,
      "loss": 1.1602,
      "step": 1769
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.23345646262168884,
      "learning_rate": 0.00016854057088945993,
      "loss": 0.8678,
      "step": 1770
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.30828142166137695,
      "learning_rate": 0.00016850292457701907,
      "loss": 0.8447,
      "step": 1771
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.21960309147834778,
      "learning_rate": 0.0001684652599630457,
      "loss": 0.8745,
      "step": 1772
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.20053298771381378,
      "learning_rate": 0.0001684275770576025,
      "loss": 0.9018,
      "step": 1773
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.4030926823616028,
      "learning_rate": 0.00016838987587075693,
      "loss": 0.7881,
      "step": 1774
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.41260650753974915,
      "learning_rate": 0.00016835215641258149,
      "loss": 0.9906,
      "step": 1775
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.4041730761528015,
      "learning_rate": 0.0001683144186931534,
      "loss": 1.0068,
      "step": 1776
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.812812864780426,
      "learning_rate": 0.0001682766627225548,
      "loss": 0.87,
      "step": 1777
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.32380855083465576,
      "learning_rate": 0.0001682388885108728,
      "loss": 0.9481,
      "step": 1778
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.5127311944961548,
      "learning_rate": 0.0001682010960681993,
      "loss": 0.9604,
      "step": 1779
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.38640087842941284,
      "learning_rate": 0.00016816328540463112,
      "loss": 0.7999,
      "step": 1780
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.32559773325920105,
      "learning_rate": 0.00016812545653026987,
      "loss": 0.674,
      "step": 1781
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.31453338265419006,
      "learning_rate": 0.00016808760945522208,
      "loss": 0.7229,
      "step": 1782
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9802420139312744,
      "learning_rate": 0.00016804974418959916,
      "loss": 0.8605,
      "step": 1783
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.24503205716609955,
      "learning_rate": 0.00016801186074351737,
      "loss": 0.9968,
      "step": 1784
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.4189132750034332,
      "learning_rate": 0.00016797395912709773,
      "loss": 0.9493,
      "step": 1785
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.25158950686454773,
      "learning_rate": 0.00016793603935046626,
      "loss": 0.8962,
      "step": 1786
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.15598557889461517,
      "learning_rate": 0.00016789810142375377,
      "loss": 0.8114,
      "step": 1787
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.30159738659858704,
      "learning_rate": 0.00016786014535709592,
      "loss": 0.9084,
      "step": 1788
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.3639749586582184,
      "learning_rate": 0.0001678221711606332,
      "loss": 0.9518,
      "step": 1789
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.3325149416923523,
      "learning_rate": 0.00016778417884451093,
      "loss": 0.5292,
      "step": 1790
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6813601851463318,
      "learning_rate": 0.00016774616841887932,
      "loss": 0.7723,
      "step": 1791
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.3778146505355835,
      "learning_rate": 0.00016770813989389348,
      "loss": 0.8425,
      "step": 1792
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.266071617603302,
      "learning_rate": 0.00016767009327971314,
      "loss": 0.9173,
      "step": 1793
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.21721693873405457,
      "learning_rate": 0.00016763202858650303,
      "loss": 0.763,
      "step": 1794
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2244548797607422,
      "learning_rate": 0.00016759394582443275,
      "loss": 0.9132,
      "step": 1795
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.4136221408843994,
      "learning_rate": 0.00016755584500367657,
      "loss": 0.7917,
      "step": 1796
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6172612905502319,
      "learning_rate": 0.00016751772613441372,
      "loss": 0.8425,
      "step": 1797
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.369940847158432,
      "learning_rate": 0.00016747958922682816,
      "loss": 0.8119,
      "step": 1798
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.4470018148422241,
      "learning_rate": 0.0001674414342911087,
      "loss": 0.9517,
      "step": 1799
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.20936986804008484,
      "learning_rate": 0.00016740326133744905,
      "loss": 0.7773,
      "step": 1800
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.23710954189300537,
      "learning_rate": 0.00016736507037604757,
      "loss": 0.838,
      "step": 1801
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.22652025520801544,
      "learning_rate": 0.00016732686141710757,
      "loss": 0.8359,
      "step": 1802
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2561778128147125,
      "learning_rate": 0.00016728863447083712,
      "loss": 0.815,
      "step": 1803
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6061865091323853,
      "learning_rate": 0.00016725038954744904,
      "loss": 0.6953,
      "step": 1804
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.3124062716960907,
      "learning_rate": 0.00016721212665716108,
      "loss": 0.8297,
      "step": 1805
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.3163222074508667,
      "learning_rate": 0.00016717384581019565,
      "loss": 0.8642,
      "step": 1806
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.7174985408782959,
      "learning_rate": 0.0001671355470167801,
      "loss": 0.8551,
      "step": 1807
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2897241413593292,
      "learning_rate": 0.00016709723028714642,
      "loss": 0.7958,
      "step": 1808
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.1815556138753891,
      "learning_rate": 0.00016705889563153152,
      "loss": 0.8104,
      "step": 1809
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.3861762583255768,
      "learning_rate": 0.000167020543060177,
      "loss": 0.8608,
      "step": 1810
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.31567642092704773,
      "learning_rate": 0.0001669821725833294,
      "loss": 0.6068,
      "step": 1811
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.3347039520740509,
      "learning_rate": 0.0001669437842112398,
      "loss": 0.7812,
      "step": 1812
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.24487841129302979,
      "learning_rate": 0.00016690537795416432,
      "loss": 0.7986,
      "step": 1813
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.3192692697048187,
      "learning_rate": 0.00016686695382236365,
      "loss": 0.8456,
      "step": 1814
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.21610698103904724,
      "learning_rate": 0.0001668285118261034,
      "loss": 0.8027,
      "step": 1815
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.21458716690540314,
      "learning_rate": 0.00016679005197565386,
      "loss": 0.7817,
      "step": 1816
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6158285737037659,
      "learning_rate": 0.00016675157428129017,
      "loss": 0.9644,
      "step": 1817
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.29051896929740906,
      "learning_rate": 0.0001667130787532922,
      "loss": 0.7908,
      "step": 1818
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.31979262828826904,
      "learning_rate": 0.0001666745654019445,
      "loss": 0.9512,
      "step": 1819
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.22584842145442963,
      "learning_rate": 0.0001666360342375365,
      "loss": 0.7106,
      "step": 1820
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.21338775753974915,
      "learning_rate": 0.00016659748527036243,
      "loss": 0.8466,
      "step": 1821
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2165154665708542,
      "learning_rate": 0.00016655891851072107,
      "loss": 0.7265,
      "step": 1822
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.21490995585918427,
      "learning_rate": 0.00016652033396891616,
      "loss": 0.7732,
      "step": 1823
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.28183990716934204,
      "learning_rate": 0.00016648173165525608,
      "loss": 0.9743,
      "step": 1824
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.1855984628200531,
      "learning_rate": 0.00016644311158005399,
      "loss": 0.7914,
      "step": 1825
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.3823080360889435,
      "learning_rate": 0.0001664044737536278,
      "loss": 0.9391,
      "step": 1826
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.3937717080116272,
      "learning_rate": 0.00016636581818630018,
      "loss": 0.9951,
      "step": 1827
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2522265911102295,
      "learning_rate": 0.00016632714488839847,
      "loss": 0.7064,
      "step": 1828
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.1951160728931427,
      "learning_rate": 0.0001662884538702548,
      "loss": 0.6881,
      "step": 1829
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2369173765182495,
      "learning_rate": 0.00016624974514220604,
      "loss": 0.912,
      "step": 1830
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.26009976863861084,
      "learning_rate": 0.00016621101871459377,
      "loss": 0.9171,
      "step": 1831
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.4654848575592041,
      "learning_rate": 0.00016617227459776433,
      "loss": 0.7897,
      "step": 1832
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.259420782327652,
      "learning_rate": 0.00016613351280206877,
      "loss": 0.8764,
      "step": 1833
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.386909157037735,
      "learning_rate": 0.0001660947333378628,
      "loss": 1.0877,
      "step": 1834
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6151875853538513,
      "learning_rate": 0.00016605593621550697,
      "loss": 0.8005,
      "step": 1835
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2510405480861664,
      "learning_rate": 0.00016601712144536642,
      "loss": 0.7856,
      "step": 1836
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.29514390230178833,
      "learning_rate": 0.00016597828903781113,
      "loss": 0.9023,
      "step": 1837
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.19742892682552338,
      "learning_rate": 0.00016593943900321568,
      "loss": 0.6459,
      "step": 1838
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.4554835855960846,
      "learning_rate": 0.00016590057135195947,
      "loss": 0.8129,
      "step": 1839
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.722804844379425,
      "learning_rate": 0.0001658616860944265,
      "loss": 0.8167,
      "step": 1840
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.48084038496017456,
      "learning_rate": 0.0001658227832410055,
      "loss": 0.8816,
      "step": 1841
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.7869875431060791,
      "learning_rate": 0.00016578386280208997,
      "loss": 0.8646,
      "step": 1842
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.46422451734542847,
      "learning_rate": 0.00016574492478807807,
      "loss": 0.6573,
      "step": 1843
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.24159488081932068,
      "learning_rate": 0.00016570596920937258,
      "loss": 0.6872,
      "step": 1844
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.38870128989219666,
      "learning_rate": 0.00016566699607638112,
      "loss": 0.9062,
      "step": 1845
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.5875572562217712,
      "learning_rate": 0.00016562800539951584,
      "loss": 0.8951,
      "step": 1846
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.25492221117019653,
      "learning_rate": 0.0001655889971891937,
      "loss": 0.8369,
      "step": 1847
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.18284286558628082,
      "learning_rate": 0.00016554997145583632,
      "loss": 0.7758,
      "step": 1848
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.4645591378211975,
      "learning_rate": 0.00016551092820986992,
      "loss": 0.8839,
      "step": 1849
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2791902422904968,
      "learning_rate": 0.00016547186746172546,
      "loss": 0.4457,
      "step": 1850
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.18003609776496887,
      "learning_rate": 0.00016543278922183865,
      "loss": 0.7752,
      "step": 1851
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6851456165313721,
      "learning_rate": 0.00016539369350064974,
      "loss": 0.7038,
      "step": 1852
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2570846974849701,
      "learning_rate": 0.0001653545803086037,
      "loss": 0.6117,
      "step": 1853
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.17789384722709656,
      "learning_rate": 0.00016531544965615026,
      "loss": 0.8914,
      "step": 1854
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5770279169082642,
      "learning_rate": 0.0001652763015537436,
      "loss": 0.8993,
      "step": 1855
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.3697173297405243,
      "learning_rate": 0.0001652371360118428,
      "loss": 0.7599,
      "step": 1856
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2326582968235016,
      "learning_rate": 0.00016519795304091145,
      "loss": 1.0564,
      "step": 1857
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.3972550332546234,
      "learning_rate": 0.00016515875265141788,
      "loss": 1.0548,
      "step": 1858
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.3021339476108551,
      "learning_rate": 0.00016511953485383494,
      "loss": 1.0061,
      "step": 1859
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5671452283859253,
      "learning_rate": 0.00016508029965864028,
      "loss": 0.7824,
      "step": 1860
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.3202767074108124,
      "learning_rate": 0.0001650410470763162,
      "loss": 0.5442,
      "step": 1861
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.24216052889823914,
      "learning_rate": 0.0001650017771173495,
      "loss": 0.9217,
      "step": 1862
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.28568825125694275,
      "learning_rate": 0.00016496248979223175,
      "loss": 0.8529,
      "step": 1863
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2116692215204239,
      "learning_rate": 0.0001649231851114591,
      "loss": 0.796,
      "step": 1864
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.36652758717536926,
      "learning_rate": 0.00016488386308553235,
      "loss": 0.8393,
      "step": 1865
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5487049221992493,
      "learning_rate": 0.000164844523724957,
      "loss": 0.776,
      "step": 1866
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.3448236882686615,
      "learning_rate": 0.000164805167040243,
      "loss": 0.7796,
      "step": 1867
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.23860444128513336,
      "learning_rate": 0.0001647657930419052,
      "loss": 0.9079,
      "step": 1868
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.3331494629383087,
      "learning_rate": 0.0001647264017404628,
      "loss": 1.0652,
      "step": 1869
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2440977692604065,
      "learning_rate": 0.00016468699314643983,
      "loss": 0.9768,
      "step": 1870
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.37202200293540955,
      "learning_rate": 0.0001646475672703648,
      "loss": 0.7229,
      "step": 1871
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.38763338327407837,
      "learning_rate": 0.0001646081241227709,
      "loss": 1.0584,
      "step": 1872
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.3433094918727875,
      "learning_rate": 0.00016456866371419596,
      "loss": 0.8619,
      "step": 1873
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.37084463238716125,
      "learning_rate": 0.00016452918605518242,
      "loss": 0.8546,
      "step": 1874
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2794240713119507,
      "learning_rate": 0.0001644896911562772,
      "loss": 1.0499,
      "step": 1875
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2485450953245163,
      "learning_rate": 0.000164450179028032,
      "loss": 0.7282,
      "step": 1876
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.27913281321525574,
      "learning_rate": 0.00016441064968100304,
      "loss": 0.9732,
      "step": 1877
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.303756982088089,
      "learning_rate": 0.00016437110312575115,
      "loss": 0.6773,
      "step": 1878
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.3068351447582245,
      "learning_rate": 0.0001643315393728417,
      "loss": 0.8852,
      "step": 1879
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2977913022041321,
      "learning_rate": 0.00016429195843284478,
      "loss": 0.9126,
      "step": 1880
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.30494996905326843,
      "learning_rate": 0.00016425236031633497,
      "loss": 1.0221,
      "step": 1881
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5354574918746948,
      "learning_rate": 0.0001642127450338915,
      "loss": 0.947,
      "step": 1882
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2258252650499344,
      "learning_rate": 0.00016417311259609812,
      "loss": 0.9197,
      "step": 1883
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6146717667579651,
      "learning_rate": 0.0001641334630135432,
      "loss": 1.0509,
      "step": 1884
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2761136293411255,
      "learning_rate": 0.00016409379629681975,
      "loss": 0.9152,
      "step": 1885
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.16111519932746887,
      "learning_rate": 0.00016405411245652517,
      "loss": 0.6575,
      "step": 1886
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.4018690288066864,
      "learning_rate": 0.0001640144115032617,
      "loss": 0.877,
      "step": 1887
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.33016252517700195,
      "learning_rate": 0.00016397469344763598,
      "loss": 0.9159,
      "step": 1888
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0553048849105835,
      "learning_rate": 0.00016393495830025923,
      "loss": 0.896,
      "step": 1889
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6978235244750977,
      "learning_rate": 0.00016389520607174726,
      "loss": 0.8586,
      "step": 1890
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.24933616816997528,
      "learning_rate": 0.00016385543677272044,
      "loss": 0.8406,
      "step": 1891
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2359410673379898,
      "learning_rate": 0.00016381565041380375,
      "loss": 0.8834,
      "step": 1892
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.3785106837749481,
      "learning_rate": 0.00016377584700562662,
      "loss": 0.8139,
      "step": 1893
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.17098911106586456,
      "learning_rate": 0.00016373602655882317,
      "loss": 0.7615,
      "step": 1894
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6830399632453918,
      "learning_rate": 0.00016369618908403192,
      "loss": 0.9051,
      "step": 1895
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.31641480326652527,
      "learning_rate": 0.00016365633459189608,
      "loss": 0.9553,
      "step": 1896
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2535792887210846,
      "learning_rate": 0.0001636164630930633,
      "loss": 0.7976,
      "step": 1897
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.28335633873939514,
      "learning_rate": 0.00016357657459818587,
      "loss": 1.0478,
      "step": 1898
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.20126113295555115,
      "learning_rate": 0.00016353666911792054,
      "loss": 0.8101,
      "step": 1899
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2698211073875427,
      "learning_rate": 0.0001634967466629286,
      "loss": 0.9287,
      "step": 1900
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.41340917348861694,
      "learning_rate": 0.00016345680724387597,
      "loss": 0.6772,
      "step": 1901
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.31977373361587524,
      "learning_rate": 0.00016341685087143296,
      "loss": 0.8429,
      "step": 1902
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.28010302782058716,
      "learning_rate": 0.00016337687755627454,
      "loss": 0.9196,
      "step": 1903
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.22561940550804138,
      "learning_rate": 0.00016333688730908014,
      "loss": 0.8121,
      "step": 1904
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.31293985247612,
      "learning_rate": 0.0001632968801405337,
      "loss": 0.6785,
      "step": 1905
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.3640248477458954,
      "learning_rate": 0.00016325685606132377,
      "loss": 0.7587,
      "step": 1906
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.25914663076400757,
      "learning_rate": 0.00016321681508214324,
      "loss": 1.0408,
      "step": 1907
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.7298555374145508,
      "learning_rate": 0.0001631767572136897,
      "loss": 0.9589,
      "step": 1908
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2544383406639099,
      "learning_rate": 0.00016313668246666522,
      "loss": 0.8445,
      "step": 1909
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.28795456886291504,
      "learning_rate": 0.00016309659085177628,
      "loss": 0.9328,
      "step": 1910
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.26721513271331787,
      "learning_rate": 0.00016305648237973391,
      "loss": 0.8798,
      "step": 1911
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.715302050113678,
      "learning_rate": 0.00016301635706125373,
      "loss": 0.7529,
      "step": 1912
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.24304591119289398,
      "learning_rate": 0.00016297621490705573,
      "loss": 0.7849,
      "step": 1913
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.27507901191711426,
      "learning_rate": 0.0001629360559278645,
      "loss": 0.6039,
      "step": 1914
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.4540608525276184,
      "learning_rate": 0.00016289588013440902,
      "loss": 0.9543,
      "step": 1915
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.501755952835083,
      "learning_rate": 0.00016285568753742288,
      "loss": 0.8229,
      "step": 1916
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.3552852272987366,
      "learning_rate": 0.00016281547814764414,
      "loss": 0.6602,
      "step": 1917
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.3383658826351166,
      "learning_rate": 0.00016277525197581523,
      "loss": 0.7319,
      "step": 1918
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5767070055007935,
      "learning_rate": 0.00016273500903268316,
      "loss": 0.5283,
      "step": 1919
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.31833505630493164,
      "learning_rate": 0.00016269474932899947,
      "loss": 0.882,
      "step": 1920
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.2445637732744217,
      "learning_rate": 0.00016265447287552002,
      "loss": 0.7332,
      "step": 1921
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.2533565163612366,
      "learning_rate": 0.00016261417968300532,
      "loss": 0.8545,
      "step": 1922
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.21311502158641815,
      "learning_rate": 0.00016257386976222023,
      "loss": 0.9817,
      "step": 1923
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6569021940231323,
      "learning_rate": 0.00016253354312393414,
      "loss": 0.773,
      "step": 1924
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.23222459852695465,
      "learning_rate": 0.00016249319977892085,
      "loss": 0.8677,
      "step": 1925
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.2877637445926666,
      "learning_rate": 0.00016245283973795872,
      "loss": 0.7545,
      "step": 1926
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.38038066029548645,
      "learning_rate": 0.00016241246301183044,
      "loss": 0.9714,
      "step": 1927
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.37013116478919983,
      "learning_rate": 0.00016237206961132325,
      "loss": 0.7311,
      "step": 1928
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.4019503593444824,
      "learning_rate": 0.00016233165954722889,
      "loss": 0.7611,
      "step": 1929
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.378101646900177,
      "learning_rate": 0.00016229123283034337,
      "loss": 1.0353,
      "step": 1930
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.32486405968666077,
      "learning_rate": 0.00016225078947146738,
      "loss": 0.8417,
      "step": 1931
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.2318248599767685,
      "learning_rate": 0.00016221032948140582,
      "loss": 0.7065,
      "step": 1932
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.34295061230659485,
      "learning_rate": 0.00016216985287096825,
      "loss": 0.7989,
      "step": 1933
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.7234256863594055,
      "learning_rate": 0.00016212935965096854,
      "loss": 0.9447,
      "step": 1934
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.26989808678627014,
      "learning_rate": 0.000162088849832225,
      "loss": 0.9508,
      "step": 1935
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.19867677986621857,
      "learning_rate": 0.00016204832342556048,
      "loss": 0.8683,
      "step": 1936
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.2698214650154114,
      "learning_rate": 0.00016200778044180212,
      "loss": 0.629,
      "step": 1937
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.2492637187242508,
      "learning_rate": 0.00016196722089178157,
      "loss": 0.8262,
      "step": 1938
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.2436245083808899,
      "learning_rate": 0.0001619266447863349,
      "loss": 0.7677,
      "step": 1939
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.3496896028518677,
      "learning_rate": 0.0001618860521363026,
      "loss": 0.6701,
      "step": 1940
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.31939077377319336,
      "learning_rate": 0.00016184544295252956,
      "loss": 0.9867,
      "step": 1941
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5988732576370239,
      "learning_rate": 0.00016180481724586515,
      "loss": 0.9954,
      "step": 1942
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.16380834579467773,
      "learning_rate": 0.00016176417502716302,
      "loss": 0.713,
      "step": 1943
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.22185321152210236,
      "learning_rate": 0.00016172351630728136,
      "loss": 0.7954,
      "step": 1944
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.49391040205955505,
      "learning_rate": 0.00016168284109708276,
      "loss": 0.9717,
      "step": 1945
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.7922899127006531,
      "learning_rate": 0.00016164214940743413,
      "loss": 0.8596,
      "step": 1946
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5446658730506897,
      "learning_rate": 0.0001616014412492069,
      "loss": 0.6029,
      "step": 1947
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.3668834865093231,
      "learning_rate": 0.00016156071663327675,
      "loss": 0.9347,
      "step": 1948
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.32287222146987915,
      "learning_rate": 0.00016151997557052388,
      "loss": 0.878,
      "step": 1949
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.17133712768554688,
      "learning_rate": 0.00016147921807183288,
      "loss": 0.503,
      "step": 1950
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.25685396790504456,
      "learning_rate": 0.00016143844414809268,
      "loss": 0.6828,
      "step": 1951
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.23520877957344055,
      "learning_rate": 0.00016139765381019657,
      "loss": 0.6865,
      "step": 1952
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.1775226593017578,
      "learning_rate": 0.0001613568470690423,
      "loss": 0.8415,
      "step": 1953
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.416981965303421,
      "learning_rate": 0.00016131602393553202,
      "loss": 0.912,
      "step": 1954
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.26442253589630127,
      "learning_rate": 0.0001612751844205721,
      "loss": 0.8702,
      "step": 1955
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.21701273322105408,
      "learning_rate": 0.0001612343285350735,
      "loss": 0.8887,
      "step": 1956
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.22595073282718658,
      "learning_rate": 0.00016119345628995142,
      "loss": 0.8016,
      "step": 1957
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.2700255215167999,
      "learning_rate": 0.00016115256769612545,
      "loss": 0.8541,
      "step": 1958
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6187791228294373,
      "learning_rate": 0.00016111166276451953,
      "loss": 0.6419,
      "step": 1959
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.33984801173210144,
      "learning_rate": 0.0001610707415060621,
      "loss": 0.8846,
      "step": 1960
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.25121966004371643,
      "learning_rate": 0.00016102980393168577,
      "loss": 0.7152,
      "step": 1961
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.4538581371307373,
      "learning_rate": 0.0001609888500523276,
      "loss": 0.8723,
      "step": 1962
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.1552920639514923,
      "learning_rate": 0.00016094787987892906,
      "loss": 0.7039,
      "step": 1963
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.22496716678142548,
      "learning_rate": 0.0001609068934224358,
      "loss": 0.7417,
      "step": 1964
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.2626790404319763,
      "learning_rate": 0.0001608658906937981,
      "loss": 0.8827,
      "step": 1965
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5865355134010315,
      "learning_rate": 0.0001608248717039703,
      "loss": 0.5941,
      "step": 1966
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.38321590423583984,
      "learning_rate": 0.00016078383646391125,
      "loss": 0.7903,
      "step": 1967
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.1973310112953186,
      "learning_rate": 0.00016074278498458408,
      "loss": 0.931,
      "step": 1968
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.15618622303009033,
      "learning_rate": 0.00016070171727695627,
      "loss": 0.666,
      "step": 1969
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.2882949411869049,
      "learning_rate": 0.0001606606333519997,
      "loss": 0.8571,
      "step": 1970
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.3609826862812042,
      "learning_rate": 0.00016061953322069048,
      "loss": 0.9025,
      "step": 1971
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.25698286294937134,
      "learning_rate": 0.0001605784168940091,
      "loss": 1.1049,
      "step": 1972
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.3016101121902466,
      "learning_rate": 0.00016053728438294039,
      "loss": 0.6373,
      "step": 1973
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.29702526330947876,
      "learning_rate": 0.00016049613569847347,
      "loss": 0.8107,
      "step": 1974
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5830485820770264,
      "learning_rate": 0.00016045497085160178,
      "loss": 0.8261,
      "step": 1975
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.5765150189399719,
      "learning_rate": 0.00016041378985332314,
      "loss": 0.6773,
      "step": 1976
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.22989165782928467,
      "learning_rate": 0.0001603725927146396,
      "loss": 0.8999,
      "step": 1977
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.3171211779117584,
      "learning_rate": 0.0001603313794465576,
      "loss": 0.8783,
      "step": 1978
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.2628138065338135,
      "learning_rate": 0.00016029015006008784,
      "loss": 0.7636,
      "step": 1979
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.3302731513977051,
      "learning_rate": 0.00016024890456624527,
      "loss": 1.1672,
      "step": 1980
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.32240986824035645,
      "learning_rate": 0.00016020764297604935,
      "loss": 0.974,
      "step": 1981
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.23174597322940826,
      "learning_rate": 0.0001601663653005236,
      "loss": 0.7411,
      "step": 1982
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.18740549683570862,
      "learning_rate": 0.00016012507155069593,
      "loss": 0.8234,
      "step": 1983
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.2476632297039032,
      "learning_rate": 0.00016008376173759864,
      "loss": 0.8128,
      "step": 1984
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.2685929238796234,
      "learning_rate": 0.00016004243587226813,
      "loss": 0.8771,
      "step": 1985
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.2928779721260071,
      "learning_rate": 0.0001600010939657453,
      "loss": 0.9399,
      "step": 1986
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.47961151599884033,
      "learning_rate": 0.00015995973602907514,
      "loss": 0.7156,
      "step": 1987
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.18103371560573578,
      "learning_rate": 0.00015991836207330704,
      "loss": 0.7295,
      "step": 1988
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.22255033254623413,
      "learning_rate": 0.0001598769721094947,
      "loss": 0.7649,
      "step": 1989
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.531714141368866,
      "learning_rate": 0.00015983556614869592,
      "loss": 0.6898,
      "step": 1990
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.25997689366340637,
      "learning_rate": 0.00015979414420197298,
      "loss": 0.8724,
      "step": 1991
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.197952538728714,
      "learning_rate": 0.00015975270628039234,
      "loss": 0.8077,
      "step": 1992
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.18375234305858612,
      "learning_rate": 0.0001597112523950247,
      "loss": 0.978,
      "step": 1993
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.3343925178050995,
      "learning_rate": 0.00015966978255694509,
      "loss": 0.6604,
      "step": 1994
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.40551847219467163,
      "learning_rate": 0.00015962829677723276,
      "loss": 0.9411,
      "step": 1995
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.3830109238624573,
      "learning_rate": 0.00015958679506697116,
      "loss": 0.7386,
      "step": 1996
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.34672972559928894,
      "learning_rate": 0.00015954527743724817,
      "loss": 1.0881,
      "step": 1997
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.278710275888443,
      "learning_rate": 0.00015950374389915576,
      "loss": 0.8052,
      "step": 1998
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.21230430901050568,
      "learning_rate": 0.00015946219446379023,
      "loss": 0.8197,
      "step": 1999
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.13555367290973663,
      "learning_rate": 0.00015942062914225206,
      "loss": 0.8051,
      "step": 2000
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.1964871883392334,
      "learning_rate": 0.000159379047945646,
      "loss": 0.5207,
      "step": 2001
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.49655377864837646,
      "learning_rate": 0.0001593374508850812,
      "loss": 0.8002,
      "step": 2002
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.26363328099250793,
      "learning_rate": 0.00015929583797167076,
      "loss": 0.858,
      "step": 2003
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.23067046701908112,
      "learning_rate": 0.0001592542092165322,
      "loss": 0.8708,
      "step": 2004
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.26086172461509705,
      "learning_rate": 0.0001592125646307873,
      "loss": 0.8302,
      "step": 2005
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.10624253004789352,
      "learning_rate": 0.00015917090422556192,
      "loss": 0.793,
      "step": 2006
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.27799704670906067,
      "learning_rate": 0.00015912922801198626,
      "loss": 0.9949,
      "step": 2007
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.23432980477809906,
      "learning_rate": 0.0001590875360011947,
      "loss": 0.6329,
      "step": 2008
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.2147177904844284,
      "learning_rate": 0.0001590458282043259,
      "loss": 0.5804,
      "step": 2009
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.2899872064590454,
      "learning_rate": 0.00015900410463252272,
      "loss": 0.7362,
      "step": 2010
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.27376696467399597,
      "learning_rate": 0.0001589623652969321,
      "loss": 0.9959,
      "step": 2011
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.219391867518425,
      "learning_rate": 0.00015892061020870532,
      "loss": 0.7706,
      "step": 2012
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.3187768757343292,
      "learning_rate": 0.00015887883937899791,
      "loss": 0.7395,
      "step": 2013
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.21588650345802307,
      "learning_rate": 0.00015883705281896952,
      "loss": 0.6036,
      "step": 2014
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.21571403741836548,
      "learning_rate": 0.000158795250539784,
      "loss": 0.7776,
      "step": 2015
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.7199824452400208,
      "learning_rate": 0.00015875343255260946,
      "loss": 0.8745,
      "step": 2016
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.21415495872497559,
      "learning_rate": 0.00015871159886861813,
      "loss": 0.8612,
      "step": 2017
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.301607608795166,
      "learning_rate": 0.0001586697494989865,
      "loss": 0.7656,
      "step": 2018
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.22800493240356445,
      "learning_rate": 0.00015862788445489525,
      "loss": 0.7674,
      "step": 2019
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.21776083111763,
      "learning_rate": 0.00015858600374752917,
      "loss": 0.8125,
      "step": 2020
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.3044271469116211,
      "learning_rate": 0.00015854410738807732,
      "loss": 0.9071,
      "step": 2021
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.20620226860046387,
      "learning_rate": 0.0001585021953877329,
      "loss": 0.8736,
      "step": 2022
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.26858842372894287,
      "learning_rate": 0.0001584602677576933,
      "loss": 0.6848,
      "step": 2023
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.2056419551372528,
      "learning_rate": 0.0001584183245091601,
      "loss": 0.818,
      "step": 2024
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.2840169072151184,
      "learning_rate": 0.00015837636565333896,
      "loss": 0.8583,
      "step": 2025
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.5631041526794434,
      "learning_rate": 0.00015833439120143994,
      "loss": 0.7512,
      "step": 2026
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.32401081919670105,
      "learning_rate": 0.00015829240116467695,
      "loss": 0.7879,
      "step": 2027
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.39345479011535645,
      "learning_rate": 0.0001582503955542683,
      "loss": 0.6927,
      "step": 2028
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.20103907585144043,
      "learning_rate": 0.00015820837438143643,
      "loss": 0.8029,
      "step": 2029
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.3016130328178406,
      "learning_rate": 0.00015816633765740782,
      "loss": 1.0819,
      "step": 2030
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.28219079971313477,
      "learning_rate": 0.00015812428539341319,
      "loss": 0.7934,
      "step": 2031
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.2706296443939209,
      "learning_rate": 0.00015808221760068745,
      "loss": 0.6942,
      "step": 2032
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.24023142457008362,
      "learning_rate": 0.00015804013429046956,
      "loss": 0.7994,
      "step": 2033
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.23293974995613098,
      "learning_rate": 0.00015799803547400274,
      "loss": 0.9476,
      "step": 2034
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.1800355464220047,
      "learning_rate": 0.0001579559211625342,
      "loss": 0.6606,
      "step": 2035
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.31703847646713257,
      "learning_rate": 0.00015791379136731543,
      "loss": 0.9671,
      "step": 2036
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2546432316303253,
      "learning_rate": 0.00015787164609960204,
      "loss": 0.6558,
      "step": 2037
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.29772818088531494,
      "learning_rate": 0.00015782948537065364,
      "loss": 0.9204,
      "step": 2038
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.22962643206119537,
      "learning_rate": 0.00015778730919173417,
      "loss": 0.7252,
      "step": 2039
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.41308265924453735,
      "learning_rate": 0.00015774511757411154,
      "loss": 0.593,
      "step": 2040
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.5931591987609863,
      "learning_rate": 0.00015770291052905785,
      "loss": 0.8569,
      "step": 2041
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.28199124336242676,
      "learning_rate": 0.00015766068806784934,
      "loss": 0.9026,
      "step": 2042
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.311128169298172,
      "learning_rate": 0.00015761845020176632,
      "loss": 1.0579,
      "step": 2043
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.22488833963871002,
      "learning_rate": 0.0001575761969420932,
      "loss": 0.7913,
      "step": 2044
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6087571382522583,
      "learning_rate": 0.00015753392830011864,
      "loss": 0.924,
      "step": 2045
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6081514358520508,
      "learning_rate": 0.0001574916442871352,
      "loss": 0.8858,
      "step": 2046
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.43764635920524597,
      "learning_rate": 0.00015744934491443977,
      "loss": 0.7166,
      "step": 2047
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.3117217421531677,
      "learning_rate": 0.0001574070301933331,
      "loss": 0.8417,
      "step": 2048
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.23788221180438995,
      "learning_rate": 0.00015736470013512027,
      "loss": 0.7695,
      "step": 2049
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.25738468766212463,
      "learning_rate": 0.00015732235475111037,
      "loss": 0.819,
      "step": 2050
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.31086447834968567,
      "learning_rate": 0.00015727999405261645,
      "loss": 0.787,
      "step": 2051
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.4072301983833313,
      "learning_rate": 0.00015723761805095589,
      "loss": 0.9007,
      "step": 2052
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2205123007297516,
      "learning_rate": 0.00015719522675745003,
      "loss": 0.8941,
      "step": 2053
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.20794333517551422,
      "learning_rate": 0.00015715282018342424,
      "loss": 0.8802,
      "step": 2054
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.3440935015678406,
      "learning_rate": 0.00015711039834020811,
      "loss": 0.8369,
      "step": 2055
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2681485712528229,
      "learning_rate": 0.0001570679612391352,
      "loss": 0.7714,
      "step": 2056
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.30423247814178467,
      "learning_rate": 0.00015702550889154323,
      "loss": 0.7197,
      "step": 2057
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2776646316051483,
      "learning_rate": 0.00015698304130877392,
      "loss": 0.8791,
      "step": 2058
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6829207539558411,
      "learning_rate": 0.00015694055850217308,
      "loss": 0.7809,
      "step": 2059
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.24260927736759186,
      "learning_rate": 0.00015689806048309058,
      "loss": 0.8441,
      "step": 2060
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2623765468597412,
      "learning_rate": 0.00015685554726288043,
      "loss": 0.7333,
      "step": 2061
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.25162744522094727,
      "learning_rate": 0.00015681301885290064,
      "loss": 0.5508,
      "step": 2062
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.23372451961040497,
      "learning_rate": 0.00015677047526451321,
      "loss": 0.7138,
      "step": 2063
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2009749412536621,
      "learning_rate": 0.0001567279165090843,
      "loss": 0.9869,
      "step": 2064
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2395859956741333,
      "learning_rate": 0.00015668534259798413,
      "loss": 0.7419,
      "step": 2065
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2368599772453308,
      "learning_rate": 0.00015664275354258689,
      "loss": 0.7562,
      "step": 2066
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.4292595088481903,
      "learning_rate": 0.00015660014935427082,
      "loss": 1.017,
      "step": 2067
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.15665949881076813,
      "learning_rate": 0.0001565575300444183,
      "loss": 0.9956,
      "step": 2068
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.20844484865665436,
      "learning_rate": 0.00015651489562441568,
      "loss": 0.6399,
      "step": 2069
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.29294008016586304,
      "learning_rate": 0.00015647224610565333,
      "loss": 0.534,
      "step": 2070
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.30617883801460266,
      "learning_rate": 0.00015642958149952562,
      "loss": 0.7361,
      "step": 2071
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2988731861114502,
      "learning_rate": 0.00015638690181743115,
      "loss": 0.579,
      "step": 2072
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.20115520060062408,
      "learning_rate": 0.00015634420707077225,
      "loss": 0.6549,
      "step": 2073
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.32511571049690247,
      "learning_rate": 0.00015630149727095555,
      "loss": 0.7232,
      "step": 2074
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.22209735214710236,
      "learning_rate": 0.00015625877242939152,
      "loss": 0.7718,
      "step": 2075
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6000066995620728,
      "learning_rate": 0.00015621603255749471,
      "loss": 1.098,
      "step": 2076
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2800965905189514,
      "learning_rate": 0.0001561732776666837,
      "loss": 0.9676,
      "step": 2077
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.19016119837760925,
      "learning_rate": 0.00015613050776838106,
      "loss": 0.8858,
      "step": 2078
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.1821461319923401,
      "learning_rate": 0.0001560877228740134,
      "loss": 1.0826,
      "step": 2079
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.25593870878219604,
      "learning_rate": 0.00015604492299501133,
      "loss": 0.8473,
      "step": 2080
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.37762877345085144,
      "learning_rate": 0.00015600210814280935,
      "loss": 0.8127,
      "step": 2081
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.3765249252319336,
      "learning_rate": 0.0001559592783288462,
      "loss": 0.9339,
      "step": 2082
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.17075449228286743,
      "learning_rate": 0.00015591643356456435,
      "loss": 0.6735,
      "step": 2083
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.25100177526474,
      "learning_rate": 0.00015587357386141046,
      "loss": 0.6233,
      "step": 2084
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.41989508271217346,
      "learning_rate": 0.00015583069923083507,
      "loss": 0.69,
      "step": 2085
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.25075921416282654,
      "learning_rate": 0.0001557878096842928,
      "loss": 0.6488,
      "step": 2086
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.25773364305496216,
      "learning_rate": 0.00015574490523324216,
      "loss": 1.0172,
      "step": 2087
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2558109760284424,
      "learning_rate": 0.0001557019858891457,
      "loss": 0.7493,
      "step": 2088
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.29498666524887085,
      "learning_rate": 0.0001556590516634699,
      "loss": 0.6777,
      "step": 2089
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.20416900515556335,
      "learning_rate": 0.0001556161025676853,
      "loss": 0.8435,
      "step": 2090
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.32168203592300415,
      "learning_rate": 0.00015557313861326637,
      "loss": 0.964,
      "step": 2091
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.20101258158683777,
      "learning_rate": 0.00015553015981169146,
      "loss": 0.7576,
      "step": 2092
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.25699371099472046,
      "learning_rate": 0.00015548716617444313,
      "loss": 0.9695,
      "step": 2093
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.32784831523895264,
      "learning_rate": 0.00015544415771300755,
      "loss": 0.9729,
      "step": 2094
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.28661221265792847,
      "learning_rate": 0.00015540113443887515,
      "loss": 0.9811,
      "step": 2095
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6823977828025818,
      "learning_rate": 0.00015535809636354025,
      "loss": 0.7574,
      "step": 2096
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.274081826210022,
      "learning_rate": 0.00015531504349850094,
      "loss": 0.9256,
      "step": 2097
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6006103754043579,
      "learning_rate": 0.00015527197585525957,
      "loss": 0.9672,
      "step": 2098
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.4342872202396393,
      "learning_rate": 0.00015522889344532216,
      "loss": 0.5987,
      "step": 2099
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.7237433195114136,
      "learning_rate": 0.00015518579628019885,
      "loss": 1.0061,
      "step": 2100
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.7370263934135437,
      "learning_rate": 0.00015514268437140364,
      "loss": 0.7392,
      "step": 2101
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.3420640528202057,
      "learning_rate": 0.00015509955773045447,
      "loss": 0.9737,
      "step": 2102
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.25041189789772034,
      "learning_rate": 0.00015505641636887329,
      "loss": 0.8227,
      "step": 2103
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.3036976158618927,
      "learning_rate": 0.00015501326029818588,
      "loss": 0.9289,
      "step": 2104
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9138036370277405,
      "learning_rate": 0.000154970089529922,
      "loss": 1.1771,
      "step": 2105
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.23002009093761444,
      "learning_rate": 0.00015492690407561542,
      "loss": 0.8486,
      "step": 2106
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.30184486508369446,
      "learning_rate": 0.00015488370394680363,
      "loss": 0.9501,
      "step": 2107
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2453727275133133,
      "learning_rate": 0.00015484048915502822,
      "loss": 0.8455,
      "step": 2108
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.4158976972103119,
      "learning_rate": 0.00015479725971183464,
      "loss": 1.0171,
      "step": 2109
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.3900267779827118,
      "learning_rate": 0.00015475401562877226,
      "loss": 0.8219,
      "step": 2110
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.27392056584358215,
      "learning_rate": 0.0001547107569173943,
      "loss": 0.9138,
      "step": 2111
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.49295827746391296,
      "learning_rate": 0.000154667483589258,
      "loss": 0.7703,
      "step": 2112
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.3213893473148346,
      "learning_rate": 0.00015462419565592442,
      "loss": 0.8996,
      "step": 2113
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.16135667264461517,
      "learning_rate": 0.00015458089312895857,
      "loss": 0.9286,
      "step": 2114
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.16983944177627563,
      "learning_rate": 0.0001545375760199293,
      "loss": 0.8268,
      "step": 2115
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.21800711750984192,
      "learning_rate": 0.0001544942443404094,
      "loss": 0.873,
      "step": 2116
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.20324386656284332,
      "learning_rate": 0.00015445089810197556,
      "loss": 0.9785,
      "step": 2117
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.3037072718143463,
      "learning_rate": 0.00015440753731620836,
      "loss": 0.5984,
      "step": 2118
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.264620304107666,
      "learning_rate": 0.00015436416199469223,
      "loss": 0.8613,
      "step": 2119
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6980445981025696,
      "learning_rate": 0.0001543207721490155,
      "loss": 0.7702,
      "step": 2120
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.23524296283721924,
      "learning_rate": 0.00015427736779077041,
      "loss": 0.7547,
      "step": 2121
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.46193110942840576,
      "learning_rate": 0.00015423394893155306,
      "loss": 0.8493,
      "step": 2122
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6434472799301147,
      "learning_rate": 0.00015419051558296335,
      "loss": 0.6381,
      "step": 2123
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.23082335293293,
      "learning_rate": 0.00015414706775660516,
      "loss": 0.8381,
      "step": 2124
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.15079569816589355,
      "learning_rate": 0.00015410360546408625,
      "loss": 0.7021,
      "step": 2125
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2648935914039612,
      "learning_rate": 0.0001540601287170181,
      "loss": 0.7945,
      "step": 2126
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9902966022491455,
      "learning_rate": 0.00015401663752701622,
      "loss": 0.7404,
      "step": 2127
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.21096131205558777,
      "learning_rate": 0.0001539731319056998,
      "loss": 0.7708,
      "step": 2128
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.27751708030700684,
      "learning_rate": 0.0001539296118646921,
      "loss": 0.6083,
      "step": 2129
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2629346549510956,
      "learning_rate": 0.00015388607741562008,
      "loss": 0.8211,
      "step": 2130
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.3443010449409485,
      "learning_rate": 0.00015384252857011455,
      "loss": 0.6637,
      "step": 2131
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.23641394078731537,
      "learning_rate": 0.00015379896533981025,
      "loss": 0.7874,
      "step": 2132
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.351389616727829,
      "learning_rate": 0.00015375538773634568,
      "loss": 0.9282,
      "step": 2133
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.29489538073539734,
      "learning_rate": 0.00015371179577136328,
      "loss": 0.7995,
      "step": 2134
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2040664404630661,
      "learning_rate": 0.0001536681894565092,
      "loss": 0.7915,
      "step": 2135
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.23851576447486877,
      "learning_rate": 0.0001536245688034335,
      "loss": 0.7698,
      "step": 2136
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.18818636238574982,
      "learning_rate": 0.00015358093382379005,
      "loss": 0.7678,
      "step": 2137
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.17893993854522705,
      "learning_rate": 0.0001535372845292366,
      "loss": 0.6773,
      "step": 2138
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2002429962158203,
      "learning_rate": 0.00015349362093143468,
      "loss": 0.7918,
      "step": 2139
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.44135144352912903,
      "learning_rate": 0.00015344994304204962,
      "loss": 0.9135,
      "step": 2140
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6936823725700378,
      "learning_rate": 0.00015340625087275055,
      "loss": 1.0444,
      "step": 2141
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.17222295701503754,
      "learning_rate": 0.00015336254443521052,
      "loss": 0.7334,
      "step": 2142
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2480802834033966,
      "learning_rate": 0.00015331882374110633,
      "loss": 0.9509,
      "step": 2143
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.48803412914276123,
      "learning_rate": 0.00015327508880211852,
      "loss": 0.6763,
      "step": 2144
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.19439196586608887,
      "learning_rate": 0.00015323133962993156,
      "loss": 0.7686,
      "step": 2145
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.1873595118522644,
      "learning_rate": 0.00015318757623623367,
      "loss": 0.7111,
      "step": 2146
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2280067801475525,
      "learning_rate": 0.0001531437986327168,
      "loss": 0.7403,
      "step": 2147
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2551144063472748,
      "learning_rate": 0.00015310000683107684,
      "loss": 0.9995,
      "step": 2148
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.3126496374607086,
      "learning_rate": 0.00015305620084301333,
      "loss": 0.7817,
      "step": 2149
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.22924333810806274,
      "learning_rate": 0.0001530123806802297,
      "loss": 0.7238,
      "step": 2150
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.4616803228855133,
      "learning_rate": 0.00015296854635443312,
      "loss": 1.1211,
      "step": 2151
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2343638837337494,
      "learning_rate": 0.00015292469787733456,
      "loss": 0.7279,
      "step": 2152
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.44465604424476624,
      "learning_rate": 0.00015288083526064877,
      "loss": 0.8664,
      "step": 2153
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.34150752425193787,
      "learning_rate": 0.00015283695851609424,
      "loss": 0.9166,
      "step": 2154
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.34402966499328613,
      "learning_rate": 0.00015279306765539333,
      "loss": 0.9361,
      "step": 2155
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.20321963727474213,
      "learning_rate": 0.00015274916269027205,
      "loss": 0.7213,
      "step": 2156
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2988041937351227,
      "learning_rate": 0.00015270524363246026,
      "loss": 0.7113,
      "step": 2157
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.23971694707870483,
      "learning_rate": 0.00015266131049369156,
      "loss": 0.8111,
      "step": 2158
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6295557618141174,
      "learning_rate": 0.00015261736328570332,
      "loss": 0.8728,
      "step": 2159
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2572064995765686,
      "learning_rate": 0.00015257340202023663,
      "loss": 0.7874,
      "step": 2160
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.23432360589504242,
      "learning_rate": 0.00015252942670903645,
      "loss": 0.7703,
      "step": 2161
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.12636031210422516,
      "learning_rate": 0.00015248543736385135,
      "loss": 0.8525,
      "step": 2162
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.25600889325141907,
      "learning_rate": 0.00015244143399643367,
      "loss": 0.8453,
      "step": 2163
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.2648664712905884,
      "learning_rate": 0.0001523974166185396,
      "loss": 0.7074,
      "step": 2164
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.281293660402298,
      "learning_rate": 0.00015235338524192902,
      "loss": 0.8242,
      "step": 2165
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.3131871223449707,
      "learning_rate": 0.00015230933987836552,
      "loss": 0.8682,
      "step": 2166
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.1938846856355667,
      "learning_rate": 0.00015226528053961642,
      "loss": 0.7327,
      "step": 2167
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.21962210536003113,
      "learning_rate": 0.0001522212072374528,
      "loss": 0.9451,
      "step": 2168
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.19290728867053986,
      "learning_rate": 0.0001521771199836495,
      "loss": 0.9219,
      "step": 2169
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.40701889991760254,
      "learning_rate": 0.00015213301878998507,
      "loss": 0.8571,
      "step": 2170
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.5146868228912354,
      "learning_rate": 0.00015208890366824176,
      "loss": 1.0412,
      "step": 2171
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.3476948142051697,
      "learning_rate": 0.00015204477463020552,
      "loss": 0.8234,
      "step": 2172
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.36139971017837524,
      "learning_rate": 0.0001520006316876661,
      "loss": 0.9306,
      "step": 2173
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.19076380133628845,
      "learning_rate": 0.00015195647485241687,
      "loss": 0.7687,
      "step": 2174
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.14012634754180908,
      "learning_rate": 0.000151912304136255,
      "loss": 0.7095,
      "step": 2175
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6375357508659363,
      "learning_rate": 0.0001518681195509813,
      "loss": 0.9258,
      "step": 2176
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.32928261160850525,
      "learning_rate": 0.00015182392110840031,
      "loss": 0.9184,
      "step": 2177
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.24936726689338684,
      "learning_rate": 0.00015177970882032028,
      "loss": 0.8801,
      "step": 2178
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.5882261395454407,
      "learning_rate": 0.00015173548269855318,
      "loss": 0.6644,
      "step": 2179
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.17585380375385284,
      "learning_rate": 0.0001516912427549146,
      "loss": 0.696,
      "step": 2180
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.12774454057216644,
      "learning_rate": 0.00015164698900122393,
      "loss": 0.8165,
      "step": 2181
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.23623374104499817,
      "learning_rate": 0.0001516027214493041,
      "loss": 0.7306,
      "step": 2182
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.3922203779220581,
      "learning_rate": 0.00015155844011098193,
      "loss": 1.0445,
      "step": 2183
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.17798852920532227,
      "learning_rate": 0.00015151414499808773,
      "loss": 0.8046,
      "step": 2184
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.2336069941520691,
      "learning_rate": 0.00015146983612245558,
      "loss": 0.9543,
      "step": 2185
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.22221563756465912,
      "learning_rate": 0.00015142551349592325,
      "loss": 0.8405,
      "step": 2186
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.3135877251625061,
      "learning_rate": 0.00015138117713033218,
      "loss": 0.7592,
      "step": 2187
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.2221650630235672,
      "learning_rate": 0.0001513368270375274,
      "loss": 0.869,
      "step": 2188
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.2371613085269928,
      "learning_rate": 0.00015129246322935773,
      "loss": 0.8481,
      "step": 2189
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.26214438676834106,
      "learning_rate": 0.00015124808571767554,
      "loss": 1.1361,
      "step": 2190
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.3168241083621979,
      "learning_rate": 0.00015120369451433695,
      "loss": 0.8497,
      "step": 2191
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.3272588551044464,
      "learning_rate": 0.00015115928963120172,
      "loss": 0.849,
      "step": 2192
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.34469494223594666,
      "learning_rate": 0.0001511148710801332,
      "loss": 1.0454,
      "step": 2193
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.4145812392234802,
      "learning_rate": 0.0001510704388729985,
      "loss": 0.9171,
      "step": 2194
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.17291028797626495,
      "learning_rate": 0.00015102599302166826,
      "loss": 0.8564,
      "step": 2195
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.31677716970443726,
      "learning_rate": 0.00015098153353801678,
      "loss": 0.783,
      "step": 2196
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.24707141518592834,
      "learning_rate": 0.00015093706043392218,
      "loss": 0.9846,
      "step": 2197
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.27919650077819824,
      "learning_rate": 0.000150892573721266,
      "loss": 0.8004,
      "step": 2198
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.35723745822906494,
      "learning_rate": 0.0001508480734119335,
      "loss": 0.7668,
      "step": 2199
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.2784711420536041,
      "learning_rate": 0.00015080355951781356,
      "loss": 0.9809,
      "step": 2200
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.19506320357322693,
      "learning_rate": 0.00015075903205079874,
      "loss": 0.8987,
      "step": 2201
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.1466917246580124,
      "learning_rate": 0.00015071449102278516,
      "loss": 0.9041,
      "step": 2202
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.23503902554512024,
      "learning_rate": 0.00015066993644567264,
      "loss": 0.8708,
      "step": 2203
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.33096936345100403,
      "learning_rate": 0.0001506253683313645,
      "loss": 0.5755,
      "step": 2204
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.23562504351139069,
      "learning_rate": 0.00015058078669176776,
      "loss": 1.0086,
      "step": 2205
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.3190794885158539,
      "learning_rate": 0.00015053619153879307,
      "loss": 0.9248,
      "step": 2206
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.25667598843574524,
      "learning_rate": 0.00015049158288435468,
      "loss": 0.8314,
      "step": 2207
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.3396112620830536,
      "learning_rate": 0.00015044696074037036,
      "loss": 0.8209,
      "step": 2208
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.4094826281070709,
      "learning_rate": 0.00015040232511876158,
      "loss": 0.7572,
      "step": 2209
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.4081026613712311,
      "learning_rate": 0.0001503576760314534,
      "loss": 0.86,
      "step": 2210
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0965120792388916,
      "learning_rate": 0.00015031301349037444,
      "loss": 0.9839,
      "step": 2211
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.24655155837535858,
      "learning_rate": 0.00015026833750745696,
      "loss": 0.7043,
      "step": 2212
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.7752378582954407,
      "learning_rate": 0.00015022364809463677,
      "loss": 0.8201,
      "step": 2213
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.2605553865432739,
      "learning_rate": 0.00015017894526385327,
      "loss": 0.8359,
      "step": 2214
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.2230912148952484,
      "learning_rate": 0.0001501342290270495,
      "loss": 0.6175,
      "step": 2215
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.5111907124519348,
      "learning_rate": 0.000150089499396172,
      "loss": 0.9224,
      "step": 2216
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.2843204736709595,
      "learning_rate": 0.00015004475638317092,
      "loss": 0.9406,
      "step": 2217
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.2527127265930176,
      "learning_rate": 0.00015000000000000001,
      "loss": 0.8046,
      "step": 2218
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.29503771662712097,
      "learning_rate": 0.0001499552302586166,
      "loss": 0.5527,
      "step": 2219
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.3523508310317993,
      "learning_rate": 0.00014991044717098154,
      "loss": 0.7182,
      "step": 2220
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.15166299045085907,
      "learning_rate": 0.00014986565074905927,
      "loss": 0.6818,
      "step": 2221
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.2280823290348053,
      "learning_rate": 0.00014982084100481776,
      "loss": 0.7235,
      "step": 2222
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.30657607316970825,
      "learning_rate": 0.00014977601795022864,
      "loss": 0.9444,
      "step": 2223
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.7670952677726746,
      "learning_rate": 0.000149731181597267,
      "loss": 0.8111,
      "step": 2224
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.3040211796760559,
      "learning_rate": 0.00014968633195791152,
      "loss": 0.7258,
      "step": 2225
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.322078138589859,
      "learning_rate": 0.00014964146904414437,
      "loss": 0.8915,
      "step": 2226
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.3068115711212158,
      "learning_rate": 0.00014959659286795137,
      "loss": 0.8895,
      "step": 2227
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.33009687066078186,
      "learning_rate": 0.00014955170344132176,
      "loss": 0.8182,
      "step": 2228
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.22680240869522095,
      "learning_rate": 0.00014950680077624852,
      "loss": 0.8413,
      "step": 2229
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.23320814967155457,
      "learning_rate": 0.00014946188488472795,
      "loss": 0.8162,
      "step": 2230
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.5696733593940735,
      "learning_rate": 0.00014941695577875996,
      "loss": 0.7826,
      "step": 2231
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.29366880655288696,
      "learning_rate": 0.00014937201347034798,
      "loss": 0.9398,
      "step": 2232
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.20396320521831512,
      "learning_rate": 0.0001493270579714991,
      "loss": 0.6428,
      "step": 2233
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.17580462992191315,
      "learning_rate": 0.00014928208929422372,
      "loss": 0.7187,
      "step": 2234
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.2487381100654602,
      "learning_rate": 0.00014923710745053586,
      "loss": 0.8875,
      "step": 2235
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.37087106704711914,
      "learning_rate": 0.00014919211245245314,
      "loss": 0.9038,
      "step": 2236
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.23902627825737,
      "learning_rate": 0.00014914710431199657,
      "loss": 0.7337,
      "step": 2237
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.30767562985420227,
      "learning_rate": 0.0001491020830411907,
      "loss": 0.9896,
      "step": 2238
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.28240329027175903,
      "learning_rate": 0.00014905704865206363,
      "loss": 0.8741,
      "step": 2239
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.19530229270458221,
      "learning_rate": 0.0001490120011566469,
      "loss": 0.9201,
      "step": 2240
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.37120991945266724,
      "learning_rate": 0.00014896694056697565,
      "loss": 0.8244,
      "step": 2241
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.367313027381897,
      "learning_rate": 0.00014892186689508842,
      "loss": 0.7793,
      "step": 2242
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.2339111864566803,
      "learning_rate": 0.00014887678015302727,
      "loss": 0.6433,
      "step": 2243
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.18959471583366394,
      "learning_rate": 0.0001488316803528378,
      "loss": 0.9033,
      "step": 2244
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6274024844169617,
      "learning_rate": 0.00014878656750656906,
      "loss": 0.9788,
      "step": 2245
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.18269146978855133,
      "learning_rate": 0.00014874144162627356,
      "loss": 0.7444,
      "step": 2246
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.30227795243263245,
      "learning_rate": 0.00014869630272400737,
      "loss": 0.8946,
      "step": 2247
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.31143397092819214,
      "learning_rate": 0.00014865115081182995,
      "loss": 0.9133,
      "step": 2248
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.3277219831943512,
      "learning_rate": 0.0001486059859018043,
      "loss": 0.989,
      "step": 2249
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.41539251804351807,
      "learning_rate": 0.00014856080800599686,
      "loss": 1.0185,
      "step": 2250
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.17602631449699402,
      "learning_rate": 0.00014851561713647752,
      "loss": 0.8324,
      "step": 2251
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.19294938445091248,
      "learning_rate": 0.00014847041330531976,
      "loss": 0.7226,
      "step": 2252
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.25870266556739807,
      "learning_rate": 0.00014842519652460032,
      "loss": 0.77,
      "step": 2253
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.27007877826690674,
      "learning_rate": 0.00014837996680639953,
      "loss": 0.8533,
      "step": 2254
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.2776242792606354,
      "learning_rate": 0.0001483347241628012,
      "loss": 0.7815,
      "step": 2255
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.2914296090602875,
      "learning_rate": 0.00014828946860589247,
      "loss": 0.9414,
      "step": 2256
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6659608483314514,
      "learning_rate": 0.0001482442001477641,
      "loss": 0.8493,
      "step": 2257
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.2668604552745819,
      "learning_rate": 0.00014819891880051017,
      "loss": 0.6759,
      "step": 2258
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.28930962085723877,
      "learning_rate": 0.00014815362457622818,
      "loss": 0.9109,
      "step": 2259
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.26073694229125977,
      "learning_rate": 0.00014810831748701922,
      "loss": 0.8248,
      "step": 2260
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.22363312542438507,
      "learning_rate": 0.00014806299754498766,
      "loss": 0.7724,
      "step": 2261
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.23291516304016113,
      "learning_rate": 0.0001480176647622414,
      "loss": 0.7228,
      "step": 2262
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.4719015657901764,
      "learning_rate": 0.0001479723191508917,
      "loss": 0.9817,
      "step": 2263
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.45693278312683105,
      "learning_rate": 0.00014792696072305332,
      "loss": 0.7172,
      "step": 2264
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.26544687151908875,
      "learning_rate": 0.00014788158949084442,
      "loss": 0.6879,
      "step": 2265
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6262943148612976,
      "learning_rate": 0.0001478362054663865,
      "loss": 1.1078,
      "step": 2266
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.2581386864185333,
      "learning_rate": 0.0001477908086618047,
      "loss": 0.7104,
      "step": 2267
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.1935689002275467,
      "learning_rate": 0.00014774539908922723,
      "loss": 0.8266,
      "step": 2268
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.30870693922042847,
      "learning_rate": 0.00014769997676078607,
      "loss": 0.8177,
      "step": 2269
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.22931811213493347,
      "learning_rate": 0.00014765454168861635,
      "loss": 0.8005,
      "step": 2270
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.3609732985496521,
      "learning_rate": 0.00014760909388485672,
      "loss": 0.5337,
      "step": 2271
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.2658248841762543,
      "learning_rate": 0.0001475636333616492,
      "loss": 0.8386,
      "step": 2272
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.2518165409564972,
      "learning_rate": 0.00014751816013113924,
      "loss": 0.8194,
      "step": 2273
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.329321950674057,
      "learning_rate": 0.00014747267420547563,
      "loss": 0.8169,
      "step": 2274
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.2283117026090622,
      "learning_rate": 0.00014742717559681058,
      "loss": 0.9225,
      "step": 2275
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.10379299521446228,
      "learning_rate": 0.00014738166431729977,
      "loss": 0.8026,
      "step": 2276
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.29956740140914917,
      "learning_rate": 0.0001473361403791021,
      "loss": 0.8539,
      "step": 2277
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.2453261762857437,
      "learning_rate": 0.00014729060379437993,
      "loss": 0.6845,
      "step": 2278
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.20133328437805176,
      "learning_rate": 0.00014724505457529912,
      "loss": 0.7366,
      "step": 2279
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.15443076193332672,
      "learning_rate": 0.00014719949273402867,
      "loss": 0.8521,
      "step": 2280
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.36080214381217957,
      "learning_rate": 0.0001471539182827411,
      "loss": 0.7773,
      "step": 2281
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.22351616621017456,
      "learning_rate": 0.00014710833123361233,
      "loss": 0.8973,
      "step": 2282
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.45762133598327637,
      "learning_rate": 0.00014706273159882156,
      "loss": 0.8794,
      "step": 2283
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.23240937292575836,
      "learning_rate": 0.00014701711939055137,
      "loss": 0.6199,
      "step": 2284
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.7578893899917603,
      "learning_rate": 0.00014697149462098775,
      "loss": 0.7331,
      "step": 2285
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.41265544295310974,
      "learning_rate": 0.0001469258573023199,
      "loss": 0.9798,
      "step": 2286
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.2724035382270813,
      "learning_rate": 0.00014688020744674062,
      "loss": 0.8444,
      "step": 2287
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.4656124413013458,
      "learning_rate": 0.00014683454506644585,
      "loss": 0.6674,
      "step": 2288
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2930022180080414,
      "learning_rate": 0.00014678887017363496,
      "loss": 0.8011,
      "step": 2289
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.21969109773635864,
      "learning_rate": 0.00014674318278051063,
      "loss": 0.7964,
      "step": 2290
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.3908613324165344,
      "learning_rate": 0.00014669748289927888,
      "loss": 0.7463,
      "step": 2291
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2935437262058258,
      "learning_rate": 0.00014665177054214916,
      "loss": 0.9016,
      "step": 2292
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.31562182307243347,
      "learning_rate": 0.0001466060457213341,
      "loss": 0.7558,
      "step": 2293
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6572291851043701,
      "learning_rate": 0.0001465603084490498,
      "loss": 0.7766,
      "step": 2294
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.3264436721801758,
      "learning_rate": 0.00014651455873751558,
      "loss": 0.8149,
      "step": 2295
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5017528533935547,
      "learning_rate": 0.00014646879659895414,
      "loss": 0.6972,
      "step": 2296
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.17868098616600037,
      "learning_rate": 0.00014642302204559147,
      "loss": 0.7001,
      "step": 2297
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.3462487459182739,
      "learning_rate": 0.00014637723508965694,
      "loss": 0.9991,
      "step": 2298
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2151951789855957,
      "learning_rate": 0.00014633143574338314,
      "loss": 0.8626,
      "step": 2299
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.26452693343162537,
      "learning_rate": 0.00014628562401900602,
      "loss": 0.7324,
      "step": 2300
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.3622521758079529,
      "learning_rate": 0.0001462397999287649,
      "loss": 0.8702,
      "step": 2301
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.25818872451782227,
      "learning_rate": 0.00014619396348490227,
      "loss": 0.8661,
      "step": 2302
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.23265741765499115,
      "learning_rate": 0.00014614811469966402,
      "loss": 0.8419,
      "step": 2303
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.26200997829437256,
      "learning_rate": 0.0001461022535852993,
      "loss": 0.8746,
      "step": 2304
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.29928159713745117,
      "learning_rate": 0.0001460563801540605,
      "loss": 0.8421,
      "step": 2305
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.3369646668434143,
      "learning_rate": 0.0001460104944182035,
      "loss": 0.8806,
      "step": 2306
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.7505955696105957,
      "learning_rate": 0.00014596459638998717,
      "loss": 1.1059,
      "step": 2307
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2387053370475769,
      "learning_rate": 0.00014591868608167396,
      "loss": 0.7861,
      "step": 2308
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.24735663831233978,
      "learning_rate": 0.00014587276350552938,
      "loss": 0.8387,
      "step": 2309
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.4209224283695221,
      "learning_rate": 0.00014582682867382235,
      "loss": 0.5215,
      "step": 2310
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.29308608174324036,
      "learning_rate": 0.00014578088159882495,
      "loss": 0.7242,
      "step": 2311
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.3042390048503876,
      "learning_rate": 0.00014573492229281264,
      "loss": 0.8008,
      "step": 2312
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.27277347445487976,
      "learning_rate": 0.0001456889507680641,
      "loss": 0.8077,
      "step": 2313
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.22850143909454346,
      "learning_rate": 0.00014564296703686129,
      "loss": 0.8408,
      "step": 2314
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6971317529678345,
      "learning_rate": 0.00014559697111148938,
      "loss": 0.7991,
      "step": 2315
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.20666393637657166,
      "learning_rate": 0.00014555096300423686,
      "loss": 0.9613,
      "step": 2316
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.33734381198883057,
      "learning_rate": 0.0001455049427273955,
      "loss": 0.6889,
      "step": 2317
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.249893918633461,
      "learning_rate": 0.00014545891029326018,
      "loss": 0.874,
      "step": 2318
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.21141619980335236,
      "learning_rate": 0.00014541286571412916,
      "loss": 0.917,
      "step": 2319
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.264023095369339,
      "learning_rate": 0.00014536680900230394,
      "loss": 0.9798,
      "step": 2320
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2060088962316513,
      "learning_rate": 0.0001453207401700892,
      "loss": 0.6575,
      "step": 2321
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.20655620098114014,
      "learning_rate": 0.00014527465922979286,
      "loss": 0.9406,
      "step": 2322
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.3334447145462036,
      "learning_rate": 0.0001452285661937261,
      "loss": 0.8371,
      "step": 2323
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.1910451054573059,
      "learning_rate": 0.0001451824610742034,
      "loss": 0.8155,
      "step": 2324
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2348099946975708,
      "learning_rate": 0.00014513634388354233,
      "loss": 0.7807,
      "step": 2325
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.3381849229335785,
      "learning_rate": 0.0001450902146340638,
      "loss": 0.9455,
      "step": 2326
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2901449501514435,
      "learning_rate": 0.00014504407333809188,
      "loss": 0.7178,
      "step": 2327
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.22970819473266602,
      "learning_rate": 0.00014499792000795383,
      "loss": 0.61,
      "step": 2328
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.40742355585098267,
      "learning_rate": 0.00014495175465598025,
      "loss": 0.6216,
      "step": 2329
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.18442103266716003,
      "learning_rate": 0.0001449055772945048,
      "loss": 0.6374,
      "step": 2330
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.3024638891220093,
      "learning_rate": 0.00014485938793586448,
      "loss": 0.8465,
      "step": 2331
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.12403620034456253,
      "learning_rate": 0.0001448131865923994,
      "loss": 0.8034,
      "step": 2332
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.4964429438114166,
      "learning_rate": 0.00014476697327645292,
      "loss": 0.584,
      "step": 2333
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.3109918534755707,
      "learning_rate": 0.00014472074800037156,
      "loss": 0.8617,
      "step": 2334
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5827199220657349,
      "learning_rate": 0.0001446745107765051,
      "loss": 0.7692,
      "step": 2335
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.19524209201335907,
      "learning_rate": 0.00014462826161720647,
      "loss": 0.8,
      "step": 2336
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6199813485145569,
      "learning_rate": 0.00014458200053483173,
      "loss": 0.795,
      "step": 2337
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5727781653404236,
      "learning_rate": 0.00014453572754174026,
      "loss": 0.7104,
      "step": 2338
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.3366560935974121,
      "learning_rate": 0.00014448944265029452,
      "loss": 0.9258,
      "step": 2339
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.22985489666461945,
      "learning_rate": 0.0001444431458728602,
      "loss": 0.9177,
      "step": 2340
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.39314815402030945,
      "learning_rate": 0.0001443968372218061,
      "loss": 1.0018,
      "step": 2341
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.4022800326347351,
      "learning_rate": 0.00014435051670950427,
      "loss": 0.9102,
      "step": 2342
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.267549604177475,
      "learning_rate": 0.0001443041843483299,
      "loss": 0.734,
      "step": 2343
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.28219860792160034,
      "learning_rate": 0.0001442578401506613,
      "loss": 0.846,
      "step": 2344
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.18731406331062317,
      "learning_rate": 0.00014421148412888002,
      "loss": 1.036,
      "step": 2345
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.35142630338668823,
      "learning_rate": 0.00014416511629537074,
      "loss": 0.9185,
      "step": 2346
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.28891563415527344,
      "learning_rate": 0.00014411873666252126,
      "loss": 0.6519,
      "step": 2347
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2202114760875702,
      "learning_rate": 0.00014407234524272254,
      "loss": 0.803,
      "step": 2348
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2719172239303589,
      "learning_rate": 0.0001440259420483688,
      "loss": 0.6851,
      "step": 2349
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2714228928089142,
      "learning_rate": 0.0001439795270918572,
      "loss": 0.7252,
      "step": 2350
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.22782635688781738,
      "learning_rate": 0.00014393310038558825,
      "loss": 0.6969,
      "step": 2351
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.40645134449005127,
      "learning_rate": 0.00014388666194196543,
      "loss": 0.8424,
      "step": 2352
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9075053334236145,
      "learning_rate": 0.00014384021177339548,
      "loss": 0.8643,
      "step": 2353
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.17239877581596375,
      "learning_rate": 0.0001437937498922882,
      "loss": 0.7572,
      "step": 2354
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.1730787754058838,
      "learning_rate": 0.0001437472763110566,
      "loss": 0.7946,
      "step": 2355
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.33066266775131226,
      "learning_rate": 0.00014370079104211665,
      "loss": 0.8267,
      "step": 2356
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.23244698345661163,
      "learning_rate": 0.00014365429409788768,
      "loss": 0.7211,
      "step": 2357
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.20850831270217896,
      "learning_rate": 0.00014360778549079193,
      "loss": 0.7994,
      "step": 2358
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.3952830135822296,
      "learning_rate": 0.00014356126523325485,
      "loss": 0.7193,
      "step": 2359
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.2335541993379593,
      "learning_rate": 0.000143514733337705,
      "loss": 0.7499,
      "step": 2360
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.22221410274505615,
      "learning_rate": 0.00014346818981657403,
      "loss": 0.8703,
      "step": 2361
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.1346714198589325,
      "learning_rate": 0.0001434216346822967,
      "loss": 0.6947,
      "step": 2362
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.19459573924541473,
      "learning_rate": 0.00014337506794731092,
      "loss": 0.7817,
      "step": 2363
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.2718961536884308,
      "learning_rate": 0.00014332848962405757,
      "loss": 1.0009,
      "step": 2364
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.7205201387405396,
      "learning_rate": 0.00014328189972498084,
      "loss": 0.8693,
      "step": 2365
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6589094400405884,
      "learning_rate": 0.00014323529826252776,
      "loss": 0.8066,
      "step": 2366
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.13576307892799377,
      "learning_rate": 0.00014318868524914864,
      "loss": 0.8135,
      "step": 2367
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.2232765257358551,
      "learning_rate": 0.00014314206069729682,
      "loss": 0.8393,
      "step": 2368
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.33462584018707275,
      "learning_rate": 0.00014309542461942863,
      "loss": 0.6388,
      "step": 2369
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.17280414700508118,
      "learning_rate": 0.00014304877702800368,
      "loss": 0.7438,
      "step": 2370
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.264430969953537,
      "learning_rate": 0.00014300211793548447,
      "loss": 0.82,
      "step": 2371
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.3498428761959076,
      "learning_rate": 0.00014295544735433666,
      "loss": 0.846,
      "step": 2372
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.3036356568336487,
      "learning_rate": 0.00014290876529702895,
      "loss": 0.7811,
      "step": 2373
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.18760105967521667,
      "learning_rate": 0.0001428620717760331,
      "loss": 0.8943,
      "step": 2374
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.22531965374946594,
      "learning_rate": 0.00014281536680382402,
      "loss": 0.6404,
      "step": 2375
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2335814237594604,
      "learning_rate": 0.0001427686503928795,
      "loss": 0.9886,
      "step": 2376
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.18346960842609406,
      "learning_rate": 0.0001427219225556806,
      "loss": 0.6696,
      "step": 2377
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6730534434318542,
      "learning_rate": 0.0001426751833047113,
      "loss": 0.6263,
      "step": 2378
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.31292176246643066,
      "learning_rate": 0.0001426284326524586,
      "loss": 1.0101,
      "step": 2379
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.33519408106803894,
      "learning_rate": 0.00014258167061141264,
      "loss": 0.8298,
      "step": 2380
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.38257989287376404,
      "learning_rate": 0.0001425348971940666,
      "loss": 0.9376,
      "step": 2381
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.2259543538093567,
      "learning_rate": 0.00014248811241291662,
      "loss": 0.6972,
      "step": 2382
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.26336562633514404,
      "learning_rate": 0.00014244131628046193,
      "loss": 0.8997,
      "step": 2383
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.3073304295539856,
      "learning_rate": 0.00014239450880920476,
      "loss": 0.8647,
      "step": 2384
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.34438472986221313,
      "learning_rate": 0.0001423476900116505,
      "loss": 0.9661,
      "step": 2385
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6708562970161438,
      "learning_rate": 0.00014230085990030733,
      "loss": 0.6008,
      "step": 2386
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.41450992226600647,
      "learning_rate": 0.0001422540184876866,
      "loss": 0.7004,
      "step": 2387
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.3428523540496826,
      "learning_rate": 0.00014220716578630272,
      "loss": 1.0304,
      "step": 2388
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.26867061853408813,
      "learning_rate": 0.00014216030180867306,
      "loss": 0.8293,
      "step": 2389
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.17142240703105927,
      "learning_rate": 0.00014211342656731795,
      "loss": 0.6804,
      "step": 2390
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.26140740513801575,
      "learning_rate": 0.0001420665400747608,
      "loss": 0.8912,
      "step": 2391
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.31380993127822876,
      "learning_rate": 0.00014201964234352801,
      "loss": 0.7718,
      "step": 2392
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.7480127215385437,
      "learning_rate": 0.00014197273338614893,
      "loss": 1.0456,
      "step": 2393
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.36928078532218933,
      "learning_rate": 0.00014192581321515604,
      "loss": 0.7434,
      "step": 2394
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.1755344718694687,
      "learning_rate": 0.00014187888184308468,
      "loss": 0.8754,
      "step": 2395
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.20409660041332245,
      "learning_rate": 0.00014183193928247323,
      "loss": 0.8096,
      "step": 2396
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.259235143661499,
      "learning_rate": 0.00014178498554586307,
      "loss": 0.8969,
      "step": 2397
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.2653603255748749,
      "learning_rate": 0.00014173802064579858,
      "loss": 0.9366,
      "step": 2398
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.44053441286087036,
      "learning_rate": 0.0001416910445948271,
      "loss": 0.9061,
      "step": 2399
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.24442823231220245,
      "learning_rate": 0.00014164405740549893,
      "loss": 1.0312,
      "step": 2400
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.19490085542201996,
      "learning_rate": 0.00014159705909036737,
      "loss": 0.6951,
      "step": 2401
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.1960410624742508,
      "learning_rate": 0.00014155004966198874,
      "loss": 0.7197,
      "step": 2402
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.21545018255710602,
      "learning_rate": 0.00014150302913292217,
      "loss": 0.7661,
      "step": 2403
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.3831214904785156,
      "learning_rate": 0.00014145599751572995,
      "loss": 0.9038,
      "step": 2404
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.21963761746883392,
      "learning_rate": 0.00014140895482297726,
      "loss": 0.7439,
      "step": 2405
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.2508026659488678,
      "learning_rate": 0.00014136190106723217,
      "loss": 0.7974,
      "step": 2406
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.1762431412935257,
      "learning_rate": 0.00014131483626106582,
      "loss": 0.8143,
      "step": 2407
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.4845079481601715,
      "learning_rate": 0.00014126776041705215,
      "loss": 0.8031,
      "step": 2408
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.18585649132728577,
      "learning_rate": 0.0001412206735477682,
      "loss": 0.745,
      "step": 2409
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.33426254987716675,
      "learning_rate": 0.00014117357566579398,
      "loss": 0.9427,
      "step": 2410
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.2650124430656433,
      "learning_rate": 0.00014112646678371223,
      "loss": 0.8027,
      "step": 2411
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.23448063433170319,
      "learning_rate": 0.00014107934691410878,
      "loss": 0.724,
      "step": 2412
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.32412686944007874,
      "learning_rate": 0.00014103221606957245,
      "loss": 0.5521,
      "step": 2413
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.17298898100852966,
      "learning_rate": 0.00014098507426269484,
      "loss": 0.6739,
      "step": 2414
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.2445252388715744,
      "learning_rate": 0.0001409379215060706,
      "loss": 0.9245,
      "step": 2415
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6318814158439636,
      "learning_rate": 0.00014089075781229725,
      "loss": 0.8558,
      "step": 2416
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.24891601502895355,
      "learning_rate": 0.00014084358319397522,
      "loss": 0.8274,
      "step": 2417
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.25888147950172424,
      "learning_rate": 0.00014079639766370792,
      "loss": 0.8496,
      "step": 2418
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.36969587206840515,
      "learning_rate": 0.0001407492012341016,
      "loss": 0.7718,
      "step": 2419
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.29243382811546326,
      "learning_rate": 0.0001407019939177655,
      "loss": 0.9107,
      "step": 2420
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.35134416818618774,
      "learning_rate": 0.00014065477572731166,
      "loss": 0.7707,
      "step": 2421
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.3067546486854553,
      "learning_rate": 0.0001406075466753552,
      "loss": 0.7619,
      "step": 2422
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.21858395636081696,
      "learning_rate": 0.00014056030677451394,
      "loss": 0.6977,
      "step": 2423
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6878756880760193,
      "learning_rate": 0.0001405130560374087,
      "loss": 0.7352,
      "step": 2424
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.2085597962141037,
      "learning_rate": 0.00014046579447666324,
      "loss": 0.8833,
      "step": 2425
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6031900644302368,
      "learning_rate": 0.0001404185221049041,
      "loss": 0.7448,
      "step": 2426
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.18520382046699524,
      "learning_rate": 0.00014037123893476084,
      "loss": 0.8147,
      "step": 2427
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.8229082226753235,
      "learning_rate": 0.00014032394497886578,
      "loss": 0.7584,
      "step": 2428
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.3067129850387573,
      "learning_rate": 0.00014027664024985416,
      "loss": 0.7222,
      "step": 2429
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.22335094213485718,
      "learning_rate": 0.00014022932476036415,
      "loss": 0.9498,
      "step": 2430
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.25801679491996765,
      "learning_rate": 0.00014018199852303676,
      "loss": 0.7564,
      "step": 2431
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.23923251032829285,
      "learning_rate": 0.00014013466155051585,
      "loss": 0.7426,
      "step": 2432
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.22420883178710938,
      "learning_rate": 0.00014008731385544814,
      "loss": 0.8355,
      "step": 2433
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.2280421406030655,
      "learning_rate": 0.00014003995545048333,
      "loss": 0.6883,
      "step": 2434
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.25262755155563354,
      "learning_rate": 0.00013999258634827378,
      "loss": 0.791,
      "step": 2435
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.22796697914600372,
      "learning_rate": 0.0001399452065614749,
      "loss": 0.6642,
      "step": 2436
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.24146917462348938,
      "learning_rate": 0.00013989781610274484,
      "loss": 0.7829,
      "step": 2437
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.26179951429367065,
      "learning_rate": 0.00013985041498474466,
      "loss": 0.8488,
      "step": 2438
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.2895072102546692,
      "learning_rate": 0.00013980300322013822,
      "loss": 0.6614,
      "step": 2439
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.29915228486061096,
      "learning_rate": 0.00013975558082159224,
      "loss": 0.8519,
      "step": 2440
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.3813111484050751,
      "learning_rate": 0.00013970814780177635,
      "loss": 0.5923,
      "step": 2441
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.37766972184181213,
      "learning_rate": 0.0001396607041733629,
      "loss": 0.7857,
      "step": 2442
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.20488446950912476,
      "learning_rate": 0.00013961324994902713,
      "loss": 0.8112,
      "step": 2443
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.16040094196796417,
      "learning_rate": 0.00013956578514144716,
      "loss": 0.7503,
      "step": 2444
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.24290111660957336,
      "learning_rate": 0.00013951830976330384,
      "loss": 0.7855,
      "step": 2445
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.20249225199222565,
      "learning_rate": 0.0001394708238272809,
      "loss": 0.7798,
      "step": 2446
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.17435458302497864,
      "learning_rate": 0.00013942332734606495,
      "loss": 0.7244,
      "step": 2447
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.3728397786617279,
      "learning_rate": 0.00013937582033234525,
      "loss": 0.9302,
      "step": 2448
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.17183688282966614,
      "learning_rate": 0.00013932830279881405,
      "loss": 0.6659,
      "step": 2449
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.5527690052986145,
      "learning_rate": 0.00013928077475816632,
      "loss": 0.7576,
      "step": 2450
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.5611159801483154,
      "learning_rate": 0.00013923323622309984,
      "loss": 0.9903,
      "step": 2451
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.3619031608104706,
      "learning_rate": 0.00013918568720631519,
      "loss": 0.7126,
      "step": 2452
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.490081787109375,
      "learning_rate": 0.0001391381277205158,
      "loss": 0.8047,
      "step": 2453
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.2431175410747528,
      "learning_rate": 0.00013909055777840785,
      "loss": 0.6737,
      "step": 2454
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.1982722133398056,
      "learning_rate": 0.00013904297739270036,
      "loss": 0.7193,
      "step": 2455
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.1585005521774292,
      "learning_rate": 0.00013899538657610505,
      "loss": 0.8953,
      "step": 2456
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.592008650302887,
      "learning_rate": 0.0001389477853413365,
      "loss": 0.8113,
      "step": 2457
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.21411806344985962,
      "learning_rate": 0.0001389001737011121,
      "loss": 0.8835,
      "step": 2458
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.3751707077026367,
      "learning_rate": 0.00013885255166815196,
      "loss": 0.7201,
      "step": 2459
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.5617505311965942,
      "learning_rate": 0.00013880491925517897,
      "loss": 0.8153,
      "step": 2460
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.4925346374511719,
      "learning_rate": 0.00013875727647491882,
      "loss": 0.6202,
      "step": 2461
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.15960943698883057,
      "learning_rate": 0.0001387096233400999,
      "loss": 0.5943,
      "step": 2462
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.26329782605171204,
      "learning_rate": 0.00013866195986345355,
      "loss": 0.7791,
      "step": 2463
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.2895709276199341,
      "learning_rate": 0.00013861428605771363,
      "loss": 0.8492,
      "step": 2464
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.28053221106529236,
      "learning_rate": 0.00013856660193561693,
      "loss": 0.8302,
      "step": 2465
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.24614492058753967,
      "learning_rate": 0.00013851890750990294,
      "loss": 0.9272,
      "step": 2466
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.22474117577075958,
      "learning_rate": 0.00013847120279331386,
      "loss": 0.7535,
      "step": 2467
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.2505946457386017,
      "learning_rate": 0.00013842348779859477,
      "loss": 0.7389,
      "step": 2468
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6555995941162109,
      "learning_rate": 0.00013837576253849333,
      "loss": 0.7011,
      "step": 2469
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.15317730605602264,
      "learning_rate": 0.00013832802702576008,
      "loss": 0.8827,
      "step": 2470
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.5859485864639282,
      "learning_rate": 0.0001382802812731482,
      "loss": 0.8883,
      "step": 2471
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.21897970139980316,
      "learning_rate": 0.00013823252529341368,
      "loss": 0.771,
      "step": 2472
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.2638358175754547,
      "learning_rate": 0.0001381847590993152,
      "loss": 0.693,
      "step": 2473
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.2552714943885803,
      "learning_rate": 0.00013813698270361417,
      "loss": 0.7329,
      "step": 2474
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.5519471764564514,
      "learning_rate": 0.00013808919611907474,
      "loss": 0.3193,
      "step": 2475
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.45425018668174744,
      "learning_rate": 0.0001380413993584638,
      "loss": 0.8186,
      "step": 2476
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2722291052341461,
      "learning_rate": 0.00013799359243455087,
      "loss": 0.8016,
      "step": 2477
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.20911331474781036,
      "learning_rate": 0.00013794577536010833,
      "loss": 0.6057,
      "step": 2478
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2433968037366867,
      "learning_rate": 0.00013789794814791117,
      "loss": 0.8722,
      "step": 2479
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.35349470376968384,
      "learning_rate": 0.00013785011081073707,
      "loss": 0.9373,
      "step": 2480
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.20419621467590332,
      "learning_rate": 0.00013780226336136648,
      "loss": 0.8671,
      "step": 2481
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.5143230557441711,
      "learning_rate": 0.0001377544058125826,
      "loss": 0.7028,
      "step": 2482
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.22853386402130127,
      "learning_rate": 0.00013770653817717112,
      "loss": 1.0099,
      "step": 2483
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2610328793525696,
      "learning_rate": 0.00013765866046792067,
      "loss": 0.9547,
      "step": 2484
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2947998344898224,
      "learning_rate": 0.0001376107726976224,
      "loss": 0.916,
      "step": 2485
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.33650705218315125,
      "learning_rate": 0.0001375628748790702,
      "loss": 0.946,
      "step": 2486
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.300165057182312,
      "learning_rate": 0.00013751496702506076,
      "loss": 0.6569,
      "step": 2487
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6593897938728333,
      "learning_rate": 0.00013746704914839326,
      "loss": 1.0375,
      "step": 2488
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.3071247339248657,
      "learning_rate": 0.0001374191212618696,
      "loss": 0.7592,
      "step": 2489
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.5557536482810974,
      "learning_rate": 0.00013737118337829452,
      "loss": 0.6665,
      "step": 2490
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2220776081085205,
      "learning_rate": 0.00013732323551047526,
      "loss": 0.8651,
      "step": 2491
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.4346526563167572,
      "learning_rate": 0.00013727527767122173,
      "loss": 1.0067,
      "step": 2492
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2225855439901352,
      "learning_rate": 0.00013722730987334657,
      "loss": 0.7512,
      "step": 2493
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2791966199874878,
      "learning_rate": 0.00013717933212966505,
      "loss": 0.9228,
      "step": 2494
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.36873072385787964,
      "learning_rate": 0.00013713134445299518,
      "loss": 0.7451,
      "step": 2495
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.24754348397254944,
      "learning_rate": 0.00013708334685615746,
      "loss": 0.7258,
      "step": 2496
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.24046561121940613,
      "learning_rate": 0.0001370353393519752,
      "loss": 0.9166,
      "step": 2497
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.270344078540802,
      "learning_rate": 0.00013698732195327427,
      "loss": 0.8688,
      "step": 2498
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.19683033227920532,
      "learning_rate": 0.00013693929467288317,
      "loss": 0.8235,
      "step": 2499
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.26374948024749756,
      "learning_rate": 0.00013689125752363313,
      "loss": 1.0262,
      "step": 2500
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.20777520537376404,
      "learning_rate": 0.0001368432105183579,
      "loss": 0.702,
      "step": 2501
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2278452068567276,
      "learning_rate": 0.00013679515366989392,
      "loss": 0.9988,
      "step": 2502
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.23963822424411774,
      "learning_rate": 0.00013674708699108035,
      "loss": 0.6265,
      "step": 2503
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.18451426923274994,
      "learning_rate": 0.0001366990104947588,
      "loss": 0.6787,
      "step": 2504
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.13328655064105988,
      "learning_rate": 0.0001366509241937736,
      "loss": 0.7862,
      "step": 2505
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.3637969493865967,
      "learning_rate": 0.00013660282810097176,
      "loss": 0.8252,
      "step": 2506
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2118663787841797,
      "learning_rate": 0.00013655472222920273,
      "loss": 0.8167,
      "step": 2507
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.1763051450252533,
      "learning_rate": 0.00013650660659131878,
      "loss": 0.7382,
      "step": 2508
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.8949601650238037,
      "learning_rate": 0.00013645848120017462,
      "loss": 0.6831,
      "step": 2509
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.3358483910560608,
      "learning_rate": 0.00013641034606862767,
      "loss": 0.7433,
      "step": 2510
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.22388960421085358,
      "learning_rate": 0.00013636220120953792,
      "loss": 0.7458,
      "step": 2511
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.24921855330467224,
      "learning_rate": 0.0001363140466357679,
      "loss": 0.5556,
      "step": 2512
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2782127857208252,
      "learning_rate": 0.00013626588236018283,
      "loss": 0.6111,
      "step": 2513
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6115017533302307,
      "learning_rate": 0.00013621770839565054,
      "loss": 0.9977,
      "step": 2514
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.32366254925727844,
      "learning_rate": 0.0001361695247550413,
      "loss": 0.8793,
      "step": 2515
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2686774730682373,
      "learning_rate": 0.0001361213314512281,
      "loss": 0.878,
      "step": 2516
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.7019500732421875,
      "learning_rate": 0.00013607312849708643,
      "loss": 0.5997,
      "step": 2517
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.45619505643844604,
      "learning_rate": 0.00013602491590549443,
      "loss": 0.7864,
      "step": 2518
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2911899983882904,
      "learning_rate": 0.00013597669368933278,
      "loss": 0.7609,
      "step": 2519
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2662532329559326,
      "learning_rate": 0.00013592846186148474,
      "loss": 0.8864,
      "step": 2520
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.32398173213005066,
      "learning_rate": 0.0001358802204348361,
      "loss": 0.9085,
      "step": 2521
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2905527949333191,
      "learning_rate": 0.00013583196942227528,
      "loss": 0.7839,
      "step": 2522
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.24412643909454346,
      "learning_rate": 0.0001357837088366932,
      "loss": 0.9288,
      "step": 2523
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.20930448174476624,
      "learning_rate": 0.0001357354386909834,
      "loss": 0.8082,
      "step": 2524
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2749195992946625,
      "learning_rate": 0.00013568715899804184,
      "loss": 0.9044,
      "step": 2525
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.24593819677829742,
      "learning_rate": 0.00013563886977076723,
      "loss": 0.8541,
      "step": 2526
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.22085259854793549,
      "learning_rate": 0.0001355905710220607,
      "loss": 0.7365,
      "step": 2527
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.29604002833366394,
      "learning_rate": 0.00013554226276482595,
      "loss": 0.8516,
      "step": 2528
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2848421335220337,
      "learning_rate": 0.0001354939450119692,
      "loss": 0.9316,
      "step": 2529
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2342422604560852,
      "learning_rate": 0.00013544561777639922,
      "loss": 1.0122,
      "step": 2530
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2696267068386078,
      "learning_rate": 0.00013539728107102734,
      "loss": 0.6644,
      "step": 2531
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.3422604203224182,
      "learning_rate": 0.0001353489349087674,
      "loss": 0.8854,
      "step": 2532
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6250295042991638,
      "learning_rate": 0.0001353005793025358,
      "loss": 0.9247,
      "step": 2533
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.8491381406784058,
      "learning_rate": 0.00013525221426525133,
      "loss": 0.7872,
      "step": 2534
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2412242591381073,
      "learning_rate": 0.0001352038398098355,
      "loss": 0.7656,
      "step": 2535
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2593570351600647,
      "learning_rate": 0.00013515545594921217,
      "loss": 0.6996,
      "step": 2536
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.33324339985847473,
      "learning_rate": 0.00013510706269630781,
      "loss": 0.8714,
      "step": 2537
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.36670222878456116,
      "learning_rate": 0.00013505866006405137,
      "loss": 1.0139,
      "step": 2538
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.11512383073568344,
      "learning_rate": 0.00013501024806537428,
      "loss": 0.6002,
      "step": 2539
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.41225117444992065,
      "learning_rate": 0.00013496182671321051,
      "loss": 0.9084,
      "step": 2540
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5973315834999084,
      "learning_rate": 0.00013491339602049652,
      "loss": 0.6606,
      "step": 2541
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.19857776165008545,
      "learning_rate": 0.00013486495600017123,
      "loss": 0.8105,
      "step": 2542
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.1411210298538208,
      "learning_rate": 0.00013481650666517613,
      "loss": 0.8862,
      "step": 2543
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.25305792689323425,
      "learning_rate": 0.0001347680480284551,
      "loss": 0.822,
      "step": 2544
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.17716503143310547,
      "learning_rate": 0.0001347195801029546,
      "loss": 0.6284,
      "step": 2545
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.3129384219646454,
      "learning_rate": 0.00013467110290162353,
      "loss": 0.8176,
      "step": 2546
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.27314427495002747,
      "learning_rate": 0.00013462261643741317,
      "loss": 0.606,
      "step": 2547
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.34890732169151306,
      "learning_rate": 0.00013457412072327753,
      "loss": 0.9196,
      "step": 2548
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.27647095918655396,
      "learning_rate": 0.00013452561577217278,
      "loss": 0.9974,
      "step": 2549
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.2888062596321106,
      "learning_rate": 0.00013447710159705779,
      "loss": 0.9809,
      "step": 2550
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.22298206388950348,
      "learning_rate": 0.00013442857821089382,
      "loss": 0.9471,
      "step": 2551
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.35916417837142944,
      "learning_rate": 0.0001343800456266445,
      "loss": 1.1199,
      "step": 2552
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.32025644183158875,
      "learning_rate": 0.00013433150385727606,
      "loss": 0.799,
      "step": 2553
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.2834935784339905,
      "learning_rate": 0.00013428295291575718,
      "loss": 0.8648,
      "step": 2554
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.21756276488304138,
      "learning_rate": 0.00013423439281505888,
      "loss": 0.8423,
      "step": 2555
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6932559013366699,
      "learning_rate": 0.00013418582356815466,
      "loss": 0.7543,
      "step": 2556
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.13268855214118958,
      "learning_rate": 0.00013413724518802051,
      "loss": 0.9726,
      "step": 2557
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.3180486261844635,
      "learning_rate": 0.00013408865768763483,
      "loss": 0.7944,
      "step": 2558
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.3005668520927429,
      "learning_rate": 0.0001340400610799785,
      "loss": 0.855,
      "step": 2559
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.26104316115379333,
      "learning_rate": 0.00013399145537803476,
      "loss": 0.9216,
      "step": 2560
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.30152270197868347,
      "learning_rate": 0.00013394284059478926,
      "loss": 0.8867,
      "step": 2561
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.30901801586151123,
      "learning_rate": 0.00013389421674323029,
      "loss": 0.823,
      "step": 2562
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.3084751069545746,
      "learning_rate": 0.00013384558383634825,
      "loss": 0.8757,
      "step": 2563
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.20911960303783417,
      "learning_rate": 0.00013379694188713617,
      "loss": 0.7061,
      "step": 2564
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6684141159057617,
      "learning_rate": 0.0001337482909085895,
      "loss": 0.8759,
      "step": 2565
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.3992511034011841,
      "learning_rate": 0.00013369963091370592,
      "loss": 0.6928,
      "step": 2566
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.25013288855552673,
      "learning_rate": 0.00013365096191548576,
      "loss": 0.7358,
      "step": 2567
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.22034944593906403,
      "learning_rate": 0.00013360228392693153,
      "loss": 0.6347,
      "step": 2568
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.24480809271335602,
      "learning_rate": 0.00013355359696104834,
      "loss": 0.744,
      "step": 2569
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.1468706578016281,
      "learning_rate": 0.00013350490103084358,
      "loss": 1.0013,
      "step": 2570
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.4098435640335083,
      "learning_rate": 0.00013345619614932704,
      "loss": 0.7924,
      "step": 2571
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.1271502822637558,
      "learning_rate": 0.0001334074823295109,
      "loss": 0.5252,
      "step": 2572
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.2587434649467468,
      "learning_rate": 0.0001333587595844098,
      "loss": 0.9328,
      "step": 2573
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.7546692490577698,
      "learning_rate": 0.00013331002792704073,
      "loss": 1.0504,
      "step": 2574
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.4783627986907959,
      "learning_rate": 0.000133261287370423,
      "loss": 0.8325,
      "step": 2575
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5284131765365601,
      "learning_rate": 0.00013321253792757833,
      "loss": 0.8723,
      "step": 2576
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.3306172788143158,
      "learning_rate": 0.00013316377961153088,
      "loss": 0.8942,
      "step": 2577
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6719715595245361,
      "learning_rate": 0.00013311501243530715,
      "loss": 0.866,
      "step": 2578
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5380599498748779,
      "learning_rate": 0.0001330662364119359,
      "loss": 0.806,
      "step": 2579
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.23865732550621033,
      "learning_rate": 0.0001330174515544484,
      "loss": 0.7301,
      "step": 2580
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.21573418378829956,
      "learning_rate": 0.00013296865787587817,
      "loss": 0.9061,
      "step": 2581
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.7700544595718384,
      "learning_rate": 0.0001329198553892612,
      "loss": 0.9145,
      "step": 2582
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.18527525663375854,
      "learning_rate": 0.00013287104410763577,
      "loss": 0.7618,
      "step": 2583
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.1899913102388382,
      "learning_rate": 0.00013282222404404243,
      "loss": 0.9417,
      "step": 2584
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.20477886497974396,
      "learning_rate": 0.00013277339521152422,
      "loss": 0.7307,
      "step": 2585
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.19189465045928955,
      "learning_rate": 0.00013272455762312646,
      "loss": 0.722,
      "step": 2586
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.26532554626464844,
      "learning_rate": 0.0001326757112918968,
      "loss": 0.7774,
      "step": 2587
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6619186401367188,
      "learning_rate": 0.0001326268562308852,
      "loss": 1.1871,
      "step": 2588
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5123942494392395,
      "learning_rate": 0.00013257799245314401,
      "loss": 0.5195,
      "step": 2589
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.31979256868362427,
      "learning_rate": 0.00013252911997172788,
      "loss": 0.9951,
      "step": 2590
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.24918489158153534,
      "learning_rate": 0.0001324802387996938,
      "loss": 0.7581,
      "step": 2591
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.3030394911766052,
      "learning_rate": 0.0001324313489501011,
      "loss": 0.874,
      "step": 2592
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.26233333349227905,
      "learning_rate": 0.00013238245043601133,
      "loss": 0.8951,
      "step": 2593
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.22790443897247314,
      "learning_rate": 0.00013233354327048849,
      "loss": 0.7604,
      "step": 2594
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.17712560296058655,
      "learning_rate": 0.00013228462746659876,
      "loss": 0.8339,
      "step": 2595
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.24723349511623383,
      "learning_rate": 0.00013223570303741076,
      "loss": 0.6188,
      "step": 2596
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.18535009026527405,
      "learning_rate": 0.00013218676999599533,
      "loss": 0.6026,
      "step": 2597
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.24560999870300293,
      "learning_rate": 0.0001321378283554256,
      "loss": 0.7065,
      "step": 2598
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5964983105659485,
      "learning_rate": 0.00013208887812877706,
      "loss": 0.933,
      "step": 2599
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.35692712664604187,
      "learning_rate": 0.00013203991932912742,
      "loss": 0.9433,
      "step": 2600
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.21907857060432434,
      "learning_rate": 0.00013199095196955677,
      "loss": 0.6736,
      "step": 2601
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.287105917930603,
      "learning_rate": 0.0001319419760631474,
      "loss": 0.7843,
      "step": 2602
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.3928597569465637,
      "learning_rate": 0.00013189299162298397,
      "loss": 0.8719,
      "step": 2603
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6624888181686401,
      "learning_rate": 0.00013184399866215333,
      "loss": 0.5549,
      "step": 2604
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.343715637922287,
      "learning_rate": 0.00013179499719374464,
      "loss": 0.6609,
      "step": 2605
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.2766316831111908,
      "learning_rate": 0.00013174598723084938,
      "loss": 0.6563,
      "step": 2606
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.528509795665741,
      "learning_rate": 0.00013169696878656122,
      "loss": 0.8826,
      "step": 2607
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5374410152435303,
      "learning_rate": 0.00013164794187397612,
      "loss": 0.8959,
      "step": 2608
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.23552843928337097,
      "learning_rate": 0.0001315989065061924,
      "loss": 0.8577,
      "step": 2609
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5874251127243042,
      "learning_rate": 0.0001315498626963105,
      "loss": 0.5803,
      "step": 2610
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.8098704814910889,
      "learning_rate": 0.00013150081045743318,
      "loss": 0.7362,
      "step": 2611
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.20316267013549805,
      "learning_rate": 0.00013145174980266547,
      "loss": 0.6753,
      "step": 2612
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.24296440184116364,
      "learning_rate": 0.00013140268074511455,
      "loss": 0.7095,
      "step": 2613
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.22348105907440186,
      "learning_rate": 0.00013135360329788996,
      "loss": 0.7903,
      "step": 2614
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.1378195881843567,
      "learning_rate": 0.00013130451747410353,
      "loss": 0.9101,
      "step": 2615
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.4112924635410309,
      "learning_rate": 0.0001312554232868691,
      "loss": 0.8112,
      "step": 2616
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.390364408493042,
      "learning_rate": 0.0001312063207493029,
      "loss": 0.7577,
      "step": 2617
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.40361547470092773,
      "learning_rate": 0.00013115720987452346,
      "loss": 0.7694,
      "step": 2618
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.2007908821105957,
      "learning_rate": 0.0001311080906756514,
      "loss": 0.8005,
      "step": 2619
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.25833454728126526,
      "learning_rate": 0.0001310589631658096,
      "loss": 0.9815,
      "step": 2620
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.26599374413490295,
      "learning_rate": 0.00013100982735812314,
      "loss": 0.6908,
      "step": 2621
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.38140809535980225,
      "learning_rate": 0.00013096068326571945,
      "loss": 0.8134,
      "step": 2622
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5566459894180298,
      "learning_rate": 0.000130911530901728,
      "loss": 0.7492,
      "step": 2623
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.30602318048477173,
      "learning_rate": 0.0001308623702792806,
      "loss": 0.756,
      "step": 2624
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.2592381238937378,
      "learning_rate": 0.00013081320141151114,
      "loss": 0.7257,
      "step": 2625
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.19362841546535492,
      "learning_rate": 0.00013076402431155584,
      "loss": 0.7222,
      "step": 2626
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.15879711508750916,
      "learning_rate": 0.00013071483899255304,
      "loss": 0.7957,
      "step": 2627
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.22558781504631042,
      "learning_rate": 0.0001306656454676433,
      "loss": 0.8031,
      "step": 2628
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.30509060621261597,
      "learning_rate": 0.0001306164437499694,
      "loss": 0.832,
      "step": 2629
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.21914581954479218,
      "learning_rate": 0.0001305672338526762,
      "loss": 0.8985,
      "step": 2630
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6996191740036011,
      "learning_rate": 0.00013051801578891094,
      "loss": 0.9129,
      "step": 2631
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.23742246627807617,
      "learning_rate": 0.0001304687895718228,
      "loss": 0.8481,
      "step": 2632
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.3202550411224365,
      "learning_rate": 0.00013041955521456342,
      "loss": 0.8827,
      "step": 2633
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.23232974112033844,
      "learning_rate": 0.00013037031273028632,
      "loss": 0.725,
      "step": 2634
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.34326687455177307,
      "learning_rate": 0.00013032106213214738,
      "loss": 0.9493,
      "step": 2635
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.2463419884443283,
      "learning_rate": 0.00013027180343330464,
      "loss": 0.7267,
      "step": 2636
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.1777982860803604,
      "learning_rate": 0.00013022253664691816,
      "loss": 0.8777,
      "step": 2637
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.32104960083961487,
      "learning_rate": 0.00013017326178615038,
      "loss": 1.0273,
      "step": 2638
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.1958327442407608,
      "learning_rate": 0.00013012397886416572,
      "loss": 0.7205,
      "step": 2639
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.27673137187957764,
      "learning_rate": 0.00013007468789413083,
      "loss": 0.7339,
      "step": 2640
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.24782483279705048,
      "learning_rate": 0.00013002538888921448,
      "loss": 0.7023,
      "step": 2641
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.30220499634742737,
      "learning_rate": 0.00012997608186258764,
      "loss": 0.7674,
      "step": 2642
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.13424432277679443,
      "learning_rate": 0.00012992676682742333,
      "loss": 0.8283,
      "step": 2643
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.24385321140289307,
      "learning_rate": 0.0001298774437968968,
      "loss": 0.6438,
      "step": 2644
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.2476421445608139,
      "learning_rate": 0.0001298281127841854,
      "loss": 0.5942,
      "step": 2645
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.1701427847146988,
      "learning_rate": 0.0001297787738024686,
      "loss": 0.7192,
      "step": 2646
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.2866537272930145,
      "learning_rate": 0.00012972942686492804,
      "loss": 0.9135,
      "step": 2647
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.2777150869369507,
      "learning_rate": 0.00012968007198474742,
      "loss": 0.8977,
      "step": 2648
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.2608729600906372,
      "learning_rate": 0.00012963070917511259,
      "loss": 1.007,
      "step": 2649
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.2668079137802124,
      "learning_rate": 0.0001295813384492116,
      "loss": 0.9007,
      "step": 2650
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.24040113389492035,
      "learning_rate": 0.00012953195982023447,
      "loss": 0.783,
      "step": 2651
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.35466083884239197,
      "learning_rate": 0.00012948257330137343,
      "loss": 0.7581,
      "step": 2652
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.24119719862937927,
      "learning_rate": 0.00012943317890582278,
      "loss": 0.8406,
      "step": 2653
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.30426695942878723,
      "learning_rate": 0.00012938377664677896,
      "loss": 0.772,
      "step": 2654
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.21760883927345276,
      "learning_rate": 0.0001293343665374405,
      "loss": 0.8008,
      "step": 2655
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.7034767866134644,
      "learning_rate": 0.00012928494859100798,
      "loss": 0.72,
      "step": 2656
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.29552626609802246,
      "learning_rate": 0.0001292355228206841,
      "loss": 0.7546,
      "step": 2657
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5785254836082458,
      "learning_rate": 0.0001291860892396737,
      "loss": 1.1476,
      "step": 2658
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.1969612091779709,
      "learning_rate": 0.00012913664786118364,
      "loss": 0.7586,
      "step": 2659
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.29816529154777527,
      "learning_rate": 0.00012908719869842287,
      "loss": 0.7766,
      "step": 2660
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.23593780398368835,
      "learning_rate": 0.0001290377417646025,
      "loss": 0.9615,
      "step": 2661
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.27615031599998474,
      "learning_rate": 0.0001289882770729356,
      "loss": 0.991,
      "step": 2662
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.4548010528087616,
      "learning_rate": 0.00012893880463663742,
      "loss": 0.8247,
      "step": 2663
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.1919942945241928,
      "learning_rate": 0.00012888932446892512,
      "loss": 1.0554,
      "step": 2664
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.411422997713089,
      "learning_rate": 0.00012883983658301814,
      "loss": 0.7967,
      "step": 2665
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.3756856918334961,
      "learning_rate": 0.00012879034099213787,
      "loss": 0.8334,
      "step": 2666
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.745652973651886,
      "learning_rate": 0.00012874083770950765,
      "loss": 0.7435,
      "step": 2667
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.17381466925144196,
      "learning_rate": 0.00012869132674835315,
      "loss": 0.7212,
      "step": 2668
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.7017350792884827,
      "learning_rate": 0.0001286418081219018,
      "loss": 0.8539,
      "step": 2669
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.34659647941589355,
      "learning_rate": 0.00012859228184338326,
      "loss": 0.7192,
      "step": 2670
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.545827329158783,
      "learning_rate": 0.00012854274792602918,
      "loss": 0.7824,
      "step": 2671
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.28148093819618225,
      "learning_rate": 0.00012849320638307323,
      "loss": 1.0332,
      "step": 2672
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.4562681019306183,
      "learning_rate": 0.00012844365722775115,
      "loss": 0.8591,
      "step": 2673
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.26684796810150146,
      "learning_rate": 0.00012839410047330073,
      "loss": 0.7097,
      "step": 2674
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.7883116006851196,
      "learning_rate": 0.00012834453613296175,
      "loss": 0.7978,
      "step": 2675
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.3525182008743286,
      "learning_rate": 0.000128294964219976,
      "loss": 0.955,
      "step": 2676
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.16311372816562653,
      "learning_rate": 0.00012824538474758734,
      "loss": 0.7487,
      "step": 2677
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.2668920159339905,
      "learning_rate": 0.00012819579772904163,
      "loss": 1.0336,
      "step": 2678
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.1646813154220581,
      "learning_rate": 0.00012814620317758678,
      "loss": 0.818,
      "step": 2679
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.16381505131721497,
      "learning_rate": 0.00012809660110647264,
      "loss": 0.7854,
      "step": 2680
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.1665990799665451,
      "learning_rate": 0.00012804699152895113,
      "loss": 0.7586,
      "step": 2681
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9570996165275574,
      "learning_rate": 0.00012799737445827613,
      "loss": 0.6823,
      "step": 2682
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.32168474793434143,
      "learning_rate": 0.0001279477499077036,
      "loss": 0.92,
      "step": 2683
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.31895914673805237,
      "learning_rate": 0.0001278981178904914,
      "loss": 1.0133,
      "step": 2684
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.2798295319080353,
      "learning_rate": 0.00012784847841989944,
      "loss": 0.9676,
      "step": 2685
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.4091574251651764,
      "learning_rate": 0.0001277988315091896,
      "loss": 0.6613,
      "step": 2686
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.26380500197410583,
      "learning_rate": 0.0001277491771716258,
      "loss": 0.9418,
      "step": 2687
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.15686991810798645,
      "learning_rate": 0.0001276995154204739,
      "loss": 0.8504,
      "step": 2688
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5846171975135803,
      "learning_rate": 0.0001276498462690017,
      "loss": 0.9653,
      "step": 2689
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.1923055499792099,
      "learning_rate": 0.00012760016973047906,
      "loss": 0.9253,
      "step": 2690
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.7520831823348999,
      "learning_rate": 0.00012755048581817774,
      "loss": 1.0208,
      "step": 2691
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.3040758967399597,
      "learning_rate": 0.00012750079454537154,
      "loss": 0.7856,
      "step": 2692
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.2922755181789398,
      "learning_rate": 0.0001274510959253362,
      "loss": 0.8698,
      "step": 2693
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.23313137888908386,
      "learning_rate": 0.00012740138997134937,
      "loss": 0.7401,
      "step": 2694
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.23591724038124084,
      "learning_rate": 0.00012735167669669073,
      "loss": 0.8553,
      "step": 2695
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.28797677159309387,
      "learning_rate": 0.00012730195611464185,
      "loss": 0.6259,
      "step": 2696
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.2848646938800812,
      "learning_rate": 0.00012725222823848639,
      "loss": 0.7529,
      "step": 2697
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.3490495979785919,
      "learning_rate": 0.0001272024930815098,
      "loss": 0.5293,
      "step": 2698
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.26761218905448914,
      "learning_rate": 0.00012715275065699948,
      "loss": 0.849,
      "step": 2699
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.27042627334594727,
      "learning_rate": 0.00012710300097824493,
      "loss": 0.6654,
      "step": 2700
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.1715688556432724,
      "learning_rate": 0.00012705324405853742,
      "loss": 0.9448,
      "step": 2701
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.16265550255775452,
      "learning_rate": 0.00012700347991117026,
      "loss": 0.6186,
      "step": 2702
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.21469958126544952,
      "learning_rate": 0.00012695370854943862,
      "loss": 0.829,
      "step": 2703
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.34254416823387146,
      "learning_rate": 0.00012690392998663963,
      "loss": 0.9886,
      "step": 2704
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2580490112304688,
      "learning_rate": 0.00012685414423607236,
      "loss": 0.9718,
      "step": 2705
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.4295068681240082,
      "learning_rate": 0.00012680435131103774,
      "loss": 0.8052,
      "step": 2706
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5582977533340454,
      "learning_rate": 0.00012675455122483875,
      "loss": 0.7139,
      "step": 2707
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.1781761646270752,
      "learning_rate": 0.00012670474399078011,
      "loss": 0.802,
      "step": 2708
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.20831026136875153,
      "learning_rate": 0.00012665492962216855,
      "loss": 0.7353,
      "step": 2709
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.322296679019928,
      "learning_rate": 0.0001266051081323127,
      "loss": 0.7723,
      "step": 2710
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.29225438833236694,
      "learning_rate": 0.00012655527953452312,
      "loss": 0.8147,
      "step": 2711
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.3305177092552185,
      "learning_rate": 0.0001265054438421121,
      "loss": 0.7981,
      "step": 2712
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5630453824996948,
      "learning_rate": 0.00012645560106839412,
      "loss": 0.7446,
      "step": 2713
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.20010988414287567,
      "learning_rate": 0.00012640575122668528,
      "loss": 0.7424,
      "step": 2714
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.3016258776187897,
      "learning_rate": 0.0001263558943303037,
      "loss": 0.632,
      "step": 2715
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.3568258583545685,
      "learning_rate": 0.0001263060303925694,
      "loss": 0.6854,
      "step": 2716
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.2389720380306244,
      "learning_rate": 0.0001262561594268042,
      "loss": 0.901,
      "step": 2717
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.22215446829795837,
      "learning_rate": 0.0001262062814463318,
      "loss": 0.7019,
      "step": 2718
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.2553802728652954,
      "learning_rate": 0.0001261563964644779,
      "loss": 0.7412,
      "step": 2719
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.2178622931241989,
      "learning_rate": 0.00012610650449456994,
      "loss": 0.7178,
      "step": 2720
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.2448093146085739,
      "learning_rate": 0.0001260566055499373,
      "loss": 0.7733,
      "step": 2721
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.47148218750953674,
      "learning_rate": 0.00012600669964391115,
      "loss": 0.7166,
      "step": 2722
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.35892632603645325,
      "learning_rate": 0.00012595678678982455,
      "loss": 0.9414,
      "step": 2723
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.222209632396698,
      "learning_rate": 0.0001259068670010125,
      "loss": 0.7411,
      "step": 2724
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.2818649411201477,
      "learning_rate": 0.00012585694029081175,
      "loss": 0.9029,
      "step": 2725
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.3162371814250946,
      "learning_rate": 0.0001258070066725609,
      "loss": 0.8457,
      "step": 2726
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.33754298090934753,
      "learning_rate": 0.00012575706615960047,
      "loss": 0.9194,
      "step": 2727
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.30378687381744385,
      "learning_rate": 0.00012570711876527276,
      "loss": 0.833,
      "step": 2728
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.2933448255062103,
      "learning_rate": 0.00012565716450292197,
      "loss": 0.7867,
      "step": 2729
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.29068028926849365,
      "learning_rate": 0.00012560720338589403,
      "loss": 0.7355,
      "step": 2730
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.5635267496109009,
      "learning_rate": 0.00012555723542753678,
      "loss": 0.698,
      "step": 2731
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.2469870001077652,
      "learning_rate": 0.0001255072606411999,
      "loss": 0.8115,
      "step": 2732
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.17138689756393433,
      "learning_rate": 0.00012545727904023486,
      "loss": 0.8585,
      "step": 2733
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.3618875741958618,
      "learning_rate": 0.0001254072906379949,
      "loss": 0.9,
      "step": 2734
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.3264915347099304,
      "learning_rate": 0.00012535729544783526,
      "loss": 0.9966,
      "step": 2735
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.22727404534816742,
      "learning_rate": 0.00012530729348311272,
      "loss": 0.7019,
      "step": 2736
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.29045358300209045,
      "learning_rate": 0.00012525728475718613,
      "loss": 0.8398,
      "step": 2737
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.19224710762500763,
      "learning_rate": 0.00012520726928341594,
      "loss": 0.6864,
      "step": 2738
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.34145426750183105,
      "learning_rate": 0.0001251572470751646,
      "loss": 0.8447,
      "step": 2739
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.15700864791870117,
      "learning_rate": 0.00012510721814579617,
      "loss": 0.8349,
      "step": 2740
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.761971116065979,
      "learning_rate": 0.0001250571825086766,
      "loss": 0.8618,
      "step": 2741
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.2251320630311966,
      "learning_rate": 0.0001250071401771737,
      "loss": 0.9981,
      "step": 2742
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.29741573333740234,
      "learning_rate": 0.00012495709116465694,
      "loss": 0.9481,
      "step": 2743
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.4602788984775543,
      "learning_rate": 0.00012490703548449759,
      "loss": 1.0129,
      "step": 2744
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.3991004228591919,
      "learning_rate": 0.0001248569731500688,
      "loss": 0.9,
      "step": 2745
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.3099501430988312,
      "learning_rate": 0.00012480690417474537,
      "loss": 0.9561,
      "step": 2746
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.230595201253891,
      "learning_rate": 0.000124756828571904,
      "loss": 0.7109,
      "step": 2747
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.22713708877563477,
      "learning_rate": 0.00012470674635492313,
      "loss": 0.6992,
      "step": 2748
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.2754499316215515,
      "learning_rate": 0.00012465665753718283,
      "loss": 0.8948,
      "step": 2749
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.15765821933746338,
      "learning_rate": 0.00012460656213206513,
      "loss": 0.8132,
      "step": 2750
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.606368899345398,
      "learning_rate": 0.00012455646015295367,
      "loss": 0.7547,
      "step": 2751
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.20028746128082275,
      "learning_rate": 0.00012450635161323398,
      "loss": 0.8035,
      "step": 2752
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.507400631904602,
      "learning_rate": 0.0001244562365262932,
      "loss": 0.8107,
      "step": 2753
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.3135436475276947,
      "learning_rate": 0.00012440611490552035,
      "loss": 0.8728,
      "step": 2754
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.3351019322872162,
      "learning_rate": 0.00012435598676430607,
      "loss": 0.7678,
      "step": 2755
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.32608741521835327,
      "learning_rate": 0.00012430585211604286,
      "loss": 0.5941,
      "step": 2756
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.5083513259887695,
      "learning_rate": 0.0001242557109741249,
      "loss": 0.6319,
      "step": 2757
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.4586973488330841,
      "learning_rate": 0.00012420556335194808,
      "loss": 0.6888,
      "step": 2758
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.3009372055530548,
      "learning_rate": 0.00012415540926291008,
      "loss": 0.9468,
      "step": 2759
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.18859051167964935,
      "learning_rate": 0.00012410524872041027,
      "loss": 0.9377,
      "step": 2760
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.3056480586528778,
      "learning_rate": 0.00012405508173784976,
      "loss": 0.8742,
      "step": 2761
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.15370525419712067,
      "learning_rate": 0.00012400490832863137,
      "loss": 0.8675,
      "step": 2762
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.17773784697055817,
      "learning_rate": 0.0001239547285061596,
      "loss": 0.7289,
      "step": 2763
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.23719114065170288,
      "learning_rate": 0.00012390454228384078,
      "loss": 0.7505,
      "step": 2764
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.33759114146232605,
      "learning_rate": 0.00012385434967508282,
      "loss": 0.9681,
      "step": 2765
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.14454372227191925,
      "learning_rate": 0.00012380415069329544,
      "loss": 0.7688,
      "step": 2766
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.20066282153129578,
      "learning_rate": 0.00012375394535188997,
      "loss": 0.8938,
      "step": 2767
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.26931342482566833,
      "learning_rate": 0.0001237037336642795,
      "loss": 0.8851,
      "step": 2768
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.23670002818107605,
      "learning_rate": 0.00012365351564387877,
      "loss": 0.7759,
      "step": 2769
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.22979260981082916,
      "learning_rate": 0.00012360329130410428,
      "loss": 0.827,
      "step": 2770
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6519129276275635,
      "learning_rate": 0.00012355306065837417,
      "loss": 0.8032,
      "step": 2771
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.23859651386737823,
      "learning_rate": 0.00012350282372010826,
      "loss": 0.7119,
      "step": 2772
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.31523820757865906,
      "learning_rate": 0.00012345258050272802,
      "loss": 0.9134,
      "step": 2773
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.4447263479232788,
      "learning_rate": 0.00012340233101965672,
      "loss": 0.6705,
      "step": 2774
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.24364620447158813,
      "learning_rate": 0.00012335207528431922,
      "loss": 0.7646,
      "step": 2775
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.253420352935791,
      "learning_rate": 0.00012330181331014203,
      "loss": 0.7273,
      "step": 2776
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.23012614250183105,
      "learning_rate": 0.00012325154511055335,
      "loss": 1.0431,
      "step": 2777
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.2785976231098175,
      "learning_rate": 0.000123201270698983,
      "loss": 0.9978,
      "step": 2778
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.3932701051235199,
      "learning_rate": 0.0001231509900888626,
      "loss": 0.7049,
      "step": 2779
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.29818853735923767,
      "learning_rate": 0.0001231007032936253,
      "loss": 0.7481,
      "step": 2780
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.20394080877304077,
      "learning_rate": 0.0001230504103267059,
      "loss": 0.8264,
      "step": 2781
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.26857689023017883,
      "learning_rate": 0.00012300011120154088,
      "loss": 0.9013,
      "step": 2782
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.3505883812904358,
      "learning_rate": 0.0001229498059315684,
      "loss": 0.6783,
      "step": 2783
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.4002523124217987,
      "learning_rate": 0.00012289949453022825,
      "loss": 0.945,
      "step": 2784
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.31659215688705444,
      "learning_rate": 0.0001228491770109618,
      "loss": 0.6957,
      "step": 2785
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.3339032530784607,
      "learning_rate": 0.00012279885338721208,
      "loss": 0.6774,
      "step": 2786
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6391658186912537,
      "learning_rate": 0.00012274852367242378,
      "loss": 0.5644,
      "step": 2787
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.24784164130687714,
      "learning_rate": 0.00012269818788004323,
      "loss": 0.8574,
      "step": 2788
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.25425785779953003,
      "learning_rate": 0.00012264784602351834,
      "loss": 0.8088,
      "step": 2789
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.12110455334186554,
      "learning_rate": 0.0001225974981162986,
      "loss": 0.8185,
      "step": 2790
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.5111344456672668,
      "learning_rate": 0.00012254714417183523,
      "loss": 0.7938,
      "step": 2791
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.328177273273468,
      "learning_rate": 0.000122496784203581,
      "loss": 0.8696,
      "step": 2792
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6007897853851318,
      "learning_rate": 0.00012244641822499027,
      "loss": 0.8971,
      "step": 2793
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.28746896982192993,
      "learning_rate": 0.00012239604624951906,
      "loss": 0.5524,
      "step": 2794
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2513626217842102,
      "learning_rate": 0.0001223456682906249,
      "loss": 0.8888,
      "step": 2795
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.12351474165916443,
      "learning_rate": 0.00012229528436176706,
      "loss": 0.6272,
      "step": 2796
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.4468541741371155,
      "learning_rate": 0.00012224489447640625,
      "loss": 0.7967,
      "step": 2797
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.3289395570755005,
      "learning_rate": 0.00012219449864800494,
      "loss": 0.6988,
      "step": 2798
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2507520318031311,
      "learning_rate": 0.00012214409689002698,
      "loss": 0.8004,
      "step": 2799
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.4741341173648834,
      "learning_rate": 0.000122093689215938,
      "loss": 0.7295,
      "step": 2800
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.3320819139480591,
      "learning_rate": 0.0001220432756392051,
      "loss": 0.9112,
      "step": 2801
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.20856952667236328,
      "learning_rate": 0.00012199285617329697,
      "loss": 0.7673,
      "step": 2802
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.3112906515598297,
      "learning_rate": 0.0001219424308316839,
      "loss": 0.9247,
      "step": 2803
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.29492273926734924,
      "learning_rate": 0.00012189199962783777,
      "loss": 0.9294,
      "step": 2804
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.3749832808971405,
      "learning_rate": 0.00012184156257523197,
      "loss": 0.7994,
      "step": 2805
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2801509499549866,
      "learning_rate": 0.00012179111968734143,
      "loss": 0.9273,
      "step": 2806
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.25400999188423157,
      "learning_rate": 0.00012174067097764277,
      "loss": 0.8918,
      "step": 2807
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.3803490400314331,
      "learning_rate": 0.00012169021645961405,
      "loss": 0.9513,
      "step": 2808
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2550119161605835,
      "learning_rate": 0.00012163975614673491,
      "loss": 0.7388,
      "step": 2809
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.4008060395717621,
      "learning_rate": 0.00012158929005248651,
      "loss": 0.9331,
      "step": 2810
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.28235289454460144,
      "learning_rate": 0.00012153881819035163,
      "loss": 0.7331,
      "step": 2811
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.24501274526119232,
      "learning_rate": 0.00012148834057381458,
      "loss": 0.6376,
      "step": 2812
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.20345444977283478,
      "learning_rate": 0.00012143785721636106,
      "loss": 0.7859,
      "step": 2813
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2213176041841507,
      "learning_rate": 0.00012138736813147849,
      "loss": 0.8177,
      "step": 2814
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.5887667536735535,
      "learning_rate": 0.00012133687333265578,
      "loss": 0.8191,
      "step": 2815
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2139681726694107,
      "learning_rate": 0.00012128637283338328,
      "loss": 0.6566,
      "step": 2816
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.40240582823753357,
      "learning_rate": 0.00012123586664715297,
      "loss": 0.9645,
      "step": 2817
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2867717146873474,
      "learning_rate": 0.0001211853547874582,
      "loss": 0.7184,
      "step": 2818
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.35351359844207764,
      "learning_rate": 0.00012113483726779402,
      "loss": 0.776,
      "step": 2819
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.33210304379463196,
      "learning_rate": 0.00012108431410165692,
      "loss": 0.7035,
      "step": 2820
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2609618008136749,
      "learning_rate": 0.00012103378530254484,
      "loss": 0.773,
      "step": 2821
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.34427520632743835,
      "learning_rate": 0.00012098325088395724,
      "loss": 0.9142,
      "step": 2822
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.1863582283258438,
      "learning_rate": 0.00012093271085939517,
      "loss": 0.7137,
      "step": 2823
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.29520121216773987,
      "learning_rate": 0.00012088216524236107,
      "loss": 0.9438,
      "step": 2824
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.30039235949516296,
      "learning_rate": 0.00012083161404635899,
      "loss": 0.8137,
      "step": 2825
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.20210203528404236,
      "learning_rate": 0.00012078105728489432,
      "loss": 0.8536,
      "step": 2826
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.1650468409061432,
      "learning_rate": 0.00012073049497147406,
      "loss": 0.8333,
      "step": 2827
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.22511161863803864,
      "learning_rate": 0.00012067992711960667,
      "loss": 0.9372,
      "step": 2828
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.29080668091773987,
      "learning_rate": 0.00012062935374280205,
      "loss": 0.9913,
      "step": 2829
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2787846326828003,
      "learning_rate": 0.00012057877485457159,
      "loss": 0.8493,
      "step": 2830
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2420526146888733,
      "learning_rate": 0.00012052819046842817,
      "loss": 0.8062,
      "step": 2831
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2501155436038971,
      "learning_rate": 0.00012047760059788613,
      "loss": 0.7805,
      "step": 2832
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6571948528289795,
      "learning_rate": 0.00012042700525646129,
      "loss": 0.8115,
      "step": 2833
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2598873972892761,
      "learning_rate": 0.00012037640445767089,
      "loss": 0.8077,
      "step": 2834
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.15727347135543823,
      "learning_rate": 0.00012032579821503367,
      "loss": 0.9508,
      "step": 2835
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.32881858944892883,
      "learning_rate": 0.00012027518654206982,
      "loss": 0.9754,
      "step": 2836
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.27262672781944275,
      "learning_rate": 0.00012022456945230091,
      "loss": 0.8807,
      "step": 2837
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2836672365665436,
      "learning_rate": 0.00012017394695925009,
      "loss": 0.7743,
      "step": 2838
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.29948413372039795,
      "learning_rate": 0.00012012331907644185,
      "loss": 1.0062,
      "step": 2839
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.279293030500412,
      "learning_rate": 0.00012007268581740216,
      "loss": 0.9956,
      "step": 2840
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2919675409793854,
      "learning_rate": 0.00012002204719565842,
      "loss": 0.8447,
      "step": 2841
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.15367579460144043,
      "learning_rate": 0.00011997140322473943,
      "loss": 0.9016,
      "step": 2842
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.29397839307785034,
      "learning_rate": 0.00011992075391817545,
      "loss": 0.944,
      "step": 2843
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2782045006752014,
      "learning_rate": 0.00011987009928949824,
      "loss": 0.8752,
      "step": 2844
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.15950992703437805,
      "learning_rate": 0.0001198194393522408,
      "loss": 0.8639,
      "step": 2845
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.3650744557380676,
      "learning_rate": 0.0001197687741199377,
      "loss": 0.9426,
      "step": 2846
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2587478458881378,
      "learning_rate": 0.0001197181036061249,
      "loss": 0.7352,
      "step": 2847
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.25001344084739685,
      "learning_rate": 0.00011966742782433971,
      "loss": 0.8061,
      "step": 2848
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.4707080125808716,
      "learning_rate": 0.00011961674678812094,
      "loss": 0.769,
      "step": 2849
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.23608317971229553,
      "learning_rate": 0.00011956606051100869,
      "loss": 0.7557,
      "step": 2850
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.39204782247543335,
      "learning_rate": 0.00011951536900654454,
      "loss": 0.8485,
      "step": 2851
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.24483025074005127,
      "learning_rate": 0.00011946467228827147,
      "loss": 0.7624,
      "step": 2852
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.2752787172794342,
      "learning_rate": 0.00011941397036973386,
      "loss": 0.7204,
      "step": 2853
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.20159444212913513,
      "learning_rate": 0.00011936326326447733,
      "loss": 0.7585,
      "step": 2854
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.19199872016906738,
      "learning_rate": 0.00011931255098604914,
      "loss": 0.8348,
      "step": 2855
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.2938099801540375,
      "learning_rate": 0.00011926183354799771,
      "loss": 0.6457,
      "step": 2856
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.5249415636062622,
      "learning_rate": 0.00011921111096387299,
      "loss": 0.642,
      "step": 2857
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.3275289237499237,
      "learning_rate": 0.00011916038324722621,
      "loss": 0.8635,
      "step": 2858
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.16368712484836578,
      "learning_rate": 0.00011910965041160997,
      "loss": 0.8123,
      "step": 2859
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.3538644015789032,
      "learning_rate": 0.00011905891247057831,
      "loss": 0.9179,
      "step": 2860
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.24111630022525787,
      "learning_rate": 0.0001190081694376866,
      "loss": 0.8661,
      "step": 2861
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.2602483034133911,
      "learning_rate": 0.00011895742132649155,
      "loss": 0.789,
      "step": 2862
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.33216917514801025,
      "learning_rate": 0.00011890666815055125,
      "loss": 0.808,
      "step": 2863
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.8436432480812073,
      "learning_rate": 0.00011885590992342507,
      "loss": 0.763,
      "step": 2864
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.25700926780700684,
      "learning_rate": 0.0001188051466586739,
      "loss": 0.8414,
      "step": 2865
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6185002326965332,
      "learning_rate": 0.00011875437836985981,
      "loss": 0.8188,
      "step": 2866
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.25269466638565063,
      "learning_rate": 0.00011870360507054627,
      "loss": 0.5694,
      "step": 2867
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.1524447202682495,
      "learning_rate": 0.00011865282677429811,
      "loss": 0.6578,
      "step": 2868
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.28343790769577026,
      "learning_rate": 0.00011860204349468148,
      "loss": 0.7221,
      "step": 2869
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.3156312108039856,
      "learning_rate": 0.0001185512552452638,
      "loss": 0.723,
      "step": 2870
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.38277754187583923,
      "learning_rate": 0.00011850046203961396,
      "loss": 0.8893,
      "step": 2871
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.28510260581970215,
      "learning_rate": 0.00011844966389130206,
      "loss": 0.8879,
      "step": 2872
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.430117666721344,
      "learning_rate": 0.0001183988608138995,
      "loss": 0.6103,
      "step": 2873
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.23068290948867798,
      "learning_rate": 0.00011834805282097908,
      "loss": 0.6551,
      "step": 2874
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.38811200857162476,
      "learning_rate": 0.00011829723992611487,
      "loss": 0.899,
      "step": 2875
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.22605323791503906,
      "learning_rate": 0.0001182464221428823,
      "loss": 0.7027,
      "step": 2876
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.3099002540111542,
      "learning_rate": 0.00011819559948485804,
      "loss": 0.8198,
      "step": 2877
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.20012839138507843,
      "learning_rate": 0.00011814477196562006,
      "loss": 0.7708,
      "step": 2878
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.2538854777812958,
      "learning_rate": 0.0001180939395987477,
      "loss": 0.8581,
      "step": 2879
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.20556369423866272,
      "learning_rate": 0.0001180431023978215,
      "loss": 0.4932,
      "step": 2880
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.25464022159576416,
      "learning_rate": 0.00011799226037642342,
      "loss": 0.764,
      "step": 2881
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.23722560703754425,
      "learning_rate": 0.00011794141354813653,
      "loss": 0.9732,
      "step": 2882
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.2618451416492462,
      "learning_rate": 0.00011789056192654535,
      "loss": 0.9414,
      "step": 2883
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.8428218364715576,
      "learning_rate": 0.00011783970552523563,
      "loss": 1.0611,
      "step": 2884
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6774807572364807,
      "learning_rate": 0.00011778884435779434,
      "loss": 0.7513,
      "step": 2885
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.35672393441200256,
      "learning_rate": 0.00011773797843780976,
      "loss": 0.6985,
      "step": 2886
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.3602862060070038,
      "learning_rate": 0.00011768710777887151,
      "loss": 0.7485,
      "step": 2887
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.2501998543739319,
      "learning_rate": 0.00011763623239457034,
      "loss": 0.7649,
      "step": 2888
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.3051433563232422,
      "learning_rate": 0.00011758535229849836,
      "loss": 0.8277,
      "step": 2889
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.2736133635044098,
      "learning_rate": 0.00011753446750424893,
      "loss": 0.967,
      "step": 2890
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.3794856667518616,
      "learning_rate": 0.00011748357802541661,
      "loss": 1.2178,
      "step": 2891
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.22887402772903442,
      "learning_rate": 0.00011743268387559729,
      "loss": 0.7063,
      "step": 2892
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.19468370079994202,
      "learning_rate": 0.00011738178506838804,
      "loss": 0.7834,
      "step": 2893
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.304461807012558,
      "learning_rate": 0.00011733088161738721,
      "loss": 0.8056,
      "step": 2894
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.34238144755363464,
      "learning_rate": 0.0001172799735361944,
      "loss": 0.8468,
      "step": 2895
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.3344690501689911,
      "learning_rate": 0.00011722906083841039,
      "loss": 0.8686,
      "step": 2896
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.21526101231575012,
      "learning_rate": 0.00011717814353763727,
      "loss": 0.9386,
      "step": 2897
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.27831119298934937,
      "learning_rate": 0.00011712722164747829,
      "loss": 0.757,
      "step": 2898
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.310785710811615,
      "learning_rate": 0.000117076295181538,
      "loss": 0.7667,
      "step": 2899
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.33931058645248413,
      "learning_rate": 0.00011702536415342211,
      "loss": 0.7291,
      "step": 2900
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.34936341643333435,
      "learning_rate": 0.00011697442857673754,
      "loss": 0.9118,
      "step": 2901
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.3087674677371979,
      "learning_rate": 0.00011692348846509249,
      "loss": 0.5758,
      "step": 2902
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.3184913098812103,
      "learning_rate": 0.00011687254383209634,
      "loss": 0.9713,
      "step": 2903
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.28290876746177673,
      "learning_rate": 0.00011682159469135967,
      "loss": 0.82,
      "step": 2904
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.20247219502925873,
      "learning_rate": 0.00011677064105649427,
      "loss": 0.8217,
      "step": 2905
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.23463624715805054,
      "learning_rate": 0.0001167196829411131,
      "loss": 0.7919,
      "step": 2906
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.5645097494125366,
      "learning_rate": 0.00011666872035883037,
      "loss": 0.7802,
      "step": 2907
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.5663992762565613,
      "learning_rate": 0.00011661775332326146,
      "loss": 0.7357,
      "step": 2908
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.2915682792663574,
      "learning_rate": 0.00011656678184802296,
      "loss": 0.8345,
      "step": 2909
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.28361037373542786,
      "learning_rate": 0.00011651580594673257,
      "loss": 0.9258,
      "step": 2910
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.5438295006752014,
      "learning_rate": 0.00011646482563300931,
      "loss": 0.5585,
      "step": 2911
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.25191977620124817,
      "learning_rate": 0.0001164138409204732,
      "loss": 0.7659,
      "step": 2912
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.14153023064136505,
      "learning_rate": 0.00011636285182274565,
      "loss": 0.99,
      "step": 2913
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.4238814115524292,
      "learning_rate": 0.000116311858353449,
      "loss": 0.6626,
      "step": 2914
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.22717685997486115,
      "learning_rate": 0.00011626086052620695,
      "loss": 0.7276,
      "step": 2915
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6911187171936035,
      "learning_rate": 0.0001162098583546443,
      "loss": 0.845,
      "step": 2916
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2243594378232956,
      "learning_rate": 0.00011615885185238699,
      "loss": 0.6553,
      "step": 2917
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.24141964316368103,
      "learning_rate": 0.00011610784103306214,
      "loss": 0.8001,
      "step": 2918
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.3252140283584595,
      "learning_rate": 0.00011605682591029801,
      "loss": 1.1121,
      "step": 2919
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.21953104436397552,
      "learning_rate": 0.00011600580649772402,
      "loss": 0.8293,
      "step": 2920
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6482974290847778,
      "learning_rate": 0.00011595478280897074,
      "loss": 1.0384,
      "step": 2921
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.3188464045524597,
      "learning_rate": 0.00011590375485766987,
      "loss": 0.6722,
      "step": 2922
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.23666055500507355,
      "learning_rate": 0.00011585272265745424,
      "loss": 0.8918,
      "step": 2923
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2551921010017395,
      "learning_rate": 0.00011580168622195786,
      "loss": 0.8687,
      "step": 2924
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.31006503105163574,
      "learning_rate": 0.0001157506455648158,
      "loss": 0.6821,
      "step": 2925
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2675802707672119,
      "learning_rate": 0.00011569960069966435,
      "loss": 0.826,
      "step": 2926
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2611114978790283,
      "learning_rate": 0.00011564855164014086,
      "loss": 0.7706,
      "step": 2927
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.13137148320674896,
      "learning_rate": 0.00011559749839988377,
      "loss": 0.8819,
      "step": 2928
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2638119161128998,
      "learning_rate": 0.00011554644099253271,
      "loss": 0.7962,
      "step": 2929
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.3025442063808441,
      "learning_rate": 0.00011549537943172839,
      "loss": 0.9462,
      "step": 2930
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.31293827295303345,
      "learning_rate": 0.00011544431373111266,
      "loss": 0.6149,
      "step": 2931
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.20748302340507507,
      "learning_rate": 0.00011539324390432844,
      "loss": 0.801,
      "step": 2932
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2551407217979431,
      "learning_rate": 0.00011534216996501971,
      "loss": 0.9631,
      "step": 2933
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.3880159556865692,
      "learning_rate": 0.00011529109192683167,
      "loss": 0.9676,
      "step": 2934
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2753511965274811,
      "learning_rate": 0.00011524000980341053,
      "loss": 0.9127,
      "step": 2935
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.1861070841550827,
      "learning_rate": 0.00011518892360840357,
      "loss": 0.6856,
      "step": 2936
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2137385606765747,
      "learning_rate": 0.00011513783335545924,
      "loss": 0.7177,
      "step": 2937
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2192721962928772,
      "learning_rate": 0.00011508673905822697,
      "loss": 0.7676,
      "step": 2938
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.15600326657295227,
      "learning_rate": 0.0001150356407303574,
      "loss": 0.8457,
      "step": 2939
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.26970720291137695,
      "learning_rate": 0.00011498453838550214,
      "loss": 0.9817,
      "step": 2940
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2849070727825165,
      "learning_rate": 0.00011493343203731393,
      "loss": 0.7809,
      "step": 2941
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.5406070947647095,
      "learning_rate": 0.0001148823216994465,
      "loss": 0.9383,
      "step": 2942
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.18953873217105865,
      "learning_rate": 0.00011483120738555477,
      "loss": 0.694,
      "step": 2943
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2882836163043976,
      "learning_rate": 0.00011478008910929463,
      "loss": 0.7764,
      "step": 2944
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.14012649655342102,
      "learning_rate": 0.00011472896688432311,
      "loss": 0.7914,
      "step": 2945
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.31917744874954224,
      "learning_rate": 0.00011467784072429812,
      "loss": 1.1275,
      "step": 2946
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.253580927848816,
      "learning_rate": 0.00011462671064287885,
      "loss": 0.8914,
      "step": 2947
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.21436072885990143,
      "learning_rate": 0.0001145755766537254,
      "loss": 0.8846,
      "step": 2948
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2436280995607376,
      "learning_rate": 0.00011452443877049894,
      "loss": 0.8584,
      "step": 2949
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.26440420746803284,
      "learning_rate": 0.00011447329700686166,
      "loss": 0.7748,
      "step": 2950
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.38116562366485596,
      "learning_rate": 0.00011442215137647685,
      "loss": 0.7559,
      "step": 2951
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.27446216344833374,
      "learning_rate": 0.00011437100189300877,
      "loss": 0.8062,
      "step": 2952
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.3763505518436432,
      "learning_rate": 0.00011431984857012277,
      "loss": 0.9029,
      "step": 2953
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.1566392183303833,
      "learning_rate": 0.00011426869142148513,
      "loss": 0.8182,
      "step": 2954
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.40910205245018005,
      "learning_rate": 0.00011421753046076324,
      "loss": 0.9845,
      "step": 2955
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.5238561630249023,
      "learning_rate": 0.00011416636570162551,
      "loss": 0.9733,
      "step": 2956
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2565077543258667,
      "learning_rate": 0.00011411519715774127,
      "loss": 0.8363,
      "step": 2957
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.3126351535320282,
      "learning_rate": 0.00011406402484278099,
      "loss": 0.8522,
      "step": 2958
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.20104169845581055,
      "learning_rate": 0.00011401284877041604,
      "loss": 0.9855,
      "step": 2959
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.20426620543003082,
      "learning_rate": 0.00011396166895431883,
      "loss": 0.689,
      "step": 2960
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2309906929731369,
      "learning_rate": 0.00011391048540816284,
      "loss": 0.8012,
      "step": 2961
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.25707361102104187,
      "learning_rate": 0.00011385929814562242,
      "loss": 0.9651,
      "step": 2962
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.28649580478668213,
      "learning_rate": 0.00011380810718037302,
      "loss": 0.8303,
      "step": 2963
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.49125391244888306,
      "learning_rate": 0.000113756912526091,
      "loss": 0.5998,
      "step": 2964
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.3253740072250366,
      "learning_rate": 0.00011370571419645375,
      "loss": 0.7549,
      "step": 2965
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.4104686677455902,
      "learning_rate": 0.00011365451220513965,
      "loss": 0.9559,
      "step": 2966
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.34757351875305176,
      "learning_rate": 0.00011360330656582802,
      "loss": 0.6099,
      "step": 2967
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.36981186270713806,
      "learning_rate": 0.0001135520972921992,
      "loss": 1.0467,
      "step": 2968
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.23969601094722748,
      "learning_rate": 0.00011350088439793446,
      "loss": 0.8415,
      "step": 2969
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2463625967502594,
      "learning_rate": 0.00011344966789671607,
      "loss": 0.6764,
      "step": 2970
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6394909024238586,
      "learning_rate": 0.00011339844780222721,
      "loss": 0.9576,
      "step": 2971
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.4191812574863434,
      "learning_rate": 0.00011334722412815212,
      "loss": 0.7763,
      "step": 2972
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.18503649532794952,
      "learning_rate": 0.0001132959968881759,
      "loss": 0.8063,
      "step": 2973
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2193889021873474,
      "learning_rate": 0.00011324476609598463,
      "loss": 0.8178,
      "step": 2974
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.24221470952033997,
      "learning_rate": 0.00011319353176526532,
      "loss": 0.8504,
      "step": 2975
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.46993014216423035,
      "learning_rate": 0.00011314229390970602,
      "loss": 0.8703,
      "step": 2976
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.46993014216423035,
      "learning_rate": 0.00011314229390970602,
      "loss": 1.0864,
      "step": 2977
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.24646145105361938,
      "learning_rate": 0.00011309105254299564,
      "loss": 0.6906,
      "step": 2978
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.23572929203510284,
      "learning_rate": 0.00011303980767882395,
      "loss": 0.6959,
      "step": 2979
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5177665948867798,
      "learning_rate": 0.00011298855933088186,
      "loss": 0.7328,
      "step": 2980
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.4777488112449646,
      "learning_rate": 0.00011293730751286107,
      "loss": 0.91,
      "step": 2981
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.21504434943199158,
      "learning_rate": 0.00011288605223845417,
      "loss": 0.7151,
      "step": 2982
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6811528205871582,
      "learning_rate": 0.00011283479352135484,
      "loss": 0.8826,
      "step": 2983
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.31702136993408203,
      "learning_rate": 0.00011278353137525748,
      "loss": 0.7527,
      "step": 2984
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.18919575214385986,
      "learning_rate": 0.00011273226581385754,
      "loss": 0.7656,
      "step": 2985
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.19558565318584442,
      "learning_rate": 0.00011268099685085136,
      "loss": 0.9314,
      "step": 2986
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.213118314743042,
      "learning_rate": 0.00011262972449993617,
      "loss": 0.8198,
      "step": 2987
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5781983733177185,
      "learning_rate": 0.00011257844877481007,
      "loss": 0.7061,
      "step": 2988
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.19280453026294708,
      "learning_rate": 0.00011252716968917217,
      "loss": 0.6833,
      "step": 2989
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6630820631980896,
      "learning_rate": 0.00011247588725672234,
      "loss": 0.8483,
      "step": 2990
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.23312024772167206,
      "learning_rate": 0.00011242460149116145,
      "loss": 0.6914,
      "step": 2991
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.20992204546928406,
      "learning_rate": 0.00011237331240619122,
      "loss": 0.7397,
      "step": 2992
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.31641626358032227,
      "learning_rate": 0.00011232202001551424,
      "loss": 0.9583,
      "step": 2993
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.428783118724823,
      "learning_rate": 0.00011227072433283405,
      "loss": 0.7608,
      "step": 2994
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.39955952763557434,
      "learning_rate": 0.00011221942537185497,
      "loss": 0.9481,
      "step": 2995
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.25795599818229675,
      "learning_rate": 0.00011216812314628226,
      "loss": 1.0631,
      "step": 2996
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.11658056825399399,
      "learning_rate": 0.00011211681766982209,
      "loss": 0.5627,
      "step": 2997
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.3014621138572693,
      "learning_rate": 0.00011206550895618138,
      "loss": 0.7509,
      "step": 2998
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.25997376441955566,
      "learning_rate": 0.00011201419701906801,
      "loss": 0.9112,
      "step": 2999
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.24402517080307007,
      "learning_rate": 0.0001119628818721907,
      "loss": 0.8297,
      "step": 3000
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.36498403549194336,
      "learning_rate": 0.00011191156352925904,
      "loss": 0.9079,
      "step": 3001
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.36745402216911316,
      "learning_rate": 0.00011186024200398344,
      "loss": 1.0138,
      "step": 3002
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.20281651616096497,
      "learning_rate": 0.00011180891731007515,
      "loss": 0.761,
      "step": 3003
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.2746065557003021,
      "learning_rate": 0.0001117575894612463,
      "loss": 0.788,
      "step": 3004
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.3279140293598175,
      "learning_rate": 0.00011170625847120992,
      "loss": 0.4687,
      "step": 3005
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.30366605520248413,
      "learning_rate": 0.00011165492435367977,
      "loss": 0.6936,
      "step": 3006
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.667969822883606,
      "learning_rate": 0.00011160358712237046,
      "loss": 0.6875,
      "step": 3007
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.20602869987487793,
      "learning_rate": 0.00011155224679099751,
      "loss": 0.8245,
      "step": 3008
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.505750834941864,
      "learning_rate": 0.00011150090337327718,
      "loss": 0.6649,
      "step": 3009
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.4608457684516907,
      "learning_rate": 0.00011144955688292666,
      "loss": 1.0338,
      "step": 3010
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.2310003787279129,
      "learning_rate": 0.00011139820733366386,
      "loss": 0.7697,
      "step": 3011
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.17346063256263733,
      "learning_rate": 0.00011134685473920751,
      "loss": 0.8856,
      "step": 3012
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.1920306235551834,
      "learning_rate": 0.00011129549911327726,
      "loss": 0.8234,
      "step": 3013
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.22443240880966187,
      "learning_rate": 0.00011124414046959342,
      "loss": 0.8225,
      "step": 3014
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.7639800310134888,
      "learning_rate": 0.00011119277882187724,
      "loss": 0.8093,
      "step": 3015
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.2603866457939148,
      "learning_rate": 0.00011114141418385067,
      "loss": 0.785,
      "step": 3016
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6800035834312439,
      "learning_rate": 0.00011109004656923654,
      "loss": 0.8531,
      "step": 3017
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.3070164620876312,
      "learning_rate": 0.00011103867599175845,
      "loss": 0.9649,
      "step": 3018
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.31887298822402954,
      "learning_rate": 0.00011098730246514076,
      "loss": 0.9947,
      "step": 3019
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.2591553032398224,
      "learning_rate": 0.00011093592600310863,
      "loss": 0.6678,
      "step": 3020
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.19275972247123718,
      "learning_rate": 0.00011088454661938804,
      "loss": 0.8039,
      "step": 3021
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.3510432541370392,
      "learning_rate": 0.00011083316432770567,
      "loss": 0.9897,
      "step": 3022
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.36339014768600464,
      "learning_rate": 0.00011078177914178911,
      "loss": 1.0611,
      "step": 3023
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.34073972702026367,
      "learning_rate": 0.00011073039107536661,
      "loss": 0.9762,
      "step": 3024
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.28092342615127563,
      "learning_rate": 0.00011067900014216719,
      "loss": 0.7998,
      "step": 3025
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.098958969116211,
      "learning_rate": 0.00011062760635592074,
      "loss": 1.0578,
      "step": 3026
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.3307187557220459,
      "learning_rate": 0.00011057620973035779,
      "loss": 0.9259,
      "step": 3027
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.1988876461982727,
      "learning_rate": 0.00011052481027920968,
      "loss": 0.798,
      "step": 3028
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.31107577681541443,
      "learning_rate": 0.00011047340801620856,
      "loss": 0.8512,
      "step": 3029
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.28430792689323425,
      "learning_rate": 0.00011042200295508718,
      "loss": 0.7765,
      "step": 3030
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.2665393054485321,
      "learning_rate": 0.00011037059510957924,
      "loss": 0.6677,
      "step": 3031
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.34004950523376465,
      "learning_rate": 0.00011031918449341901,
      "loss": 0.8506,
      "step": 3032
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.395911306142807,
      "learning_rate": 0.00011026777112034159,
      "loss": 0.9551,
      "step": 3033
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.21070830523967743,
      "learning_rate": 0.0001102163550040828,
      "loss": 0.7434,
      "step": 3034
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.38135987520217896,
      "learning_rate": 0.00011016493615837917,
      "loss": 0.9828,
      "step": 3035
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.1824963390827179,
      "learning_rate": 0.00011011351459696799,
      "loss": 0.6611,
      "step": 3036
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9863038063049316,
      "learning_rate": 0.0001100620903335873,
      "loss": 0.8253,
      "step": 3037
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.27112463116645813,
      "learning_rate": 0.00011001066338197576,
      "loss": 0.7137,
      "step": 3038
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.29607725143432617,
      "learning_rate": 0.00010995923375587288,
      "loss": 0.7714,
      "step": 3039
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.2999114692211151,
      "learning_rate": 0.0001099078014690187,
      "loss": 0.6781,
      "step": 3040
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.29796820878982544,
      "learning_rate": 0.00010985636653515421,
      "loss": 0.9861,
      "step": 3041
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.2616356313228607,
      "learning_rate": 0.00010980492896802095,
      "loss": 0.9102,
      "step": 3042
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.8920146822929382,
      "learning_rate": 0.0001097534887813612,
      "loss": 0.6515,
      "step": 3043
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.8073492646217346,
      "learning_rate": 0.00010970204598891792,
      "loss": 0.839,
      "step": 3044
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.1262246072292328,
      "learning_rate": 0.00010965060060443479,
      "loss": 0.6686,
      "step": 3045
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.3267408609390259,
      "learning_rate": 0.00010959915264165617,
      "loss": 0.7513,
      "step": 3046
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.18884705007076263,
      "learning_rate": 0.00010954770211432717,
      "loss": 0.7614,
      "step": 3047
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.33308109641075134,
      "learning_rate": 0.00010949624903619344,
      "loss": 0.8391,
      "step": 3048
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.18657535314559937,
      "learning_rate": 0.00010944479342100148,
      "loss": 0.8047,
      "step": 3049
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.30085593461990356,
      "learning_rate": 0.00010939333528249838,
      "loss": 0.849,
      "step": 3050
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.3514222204685211,
      "learning_rate": 0.00010934187463443188,
      "loss": 0.878,
      "step": 3051
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.5810489654541016,
      "learning_rate": 0.00010929041149055046,
      "loss": 0.7551,
      "step": 3052
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.41675877571105957,
      "learning_rate": 0.00010923894586460322,
      "loss": 0.9413,
      "step": 3053
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.23648503422737122,
      "learning_rate": 0.00010918747777033989,
      "loss": 0.778,
      "step": 3054
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.41874760389328003,
      "learning_rate": 0.00010913600722151101,
      "loss": 0.7113,
      "step": 3055
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.4575808346271515,
      "learning_rate": 0.00010908453423186758,
      "loss": 0.8888,
      "step": 3056
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.2462533712387085,
      "learning_rate": 0.00010903305881516134,
      "loss": 0.839,
      "step": 3057
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.31322476267814636,
      "learning_rate": 0.00010898158098514476,
      "loss": 0.8137,
      "step": 3058
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.340364009141922,
      "learning_rate": 0.0001089301007555708,
      "loss": 0.8542,
      "step": 3059
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.23703595995903015,
      "learning_rate": 0.0001088786181401932,
      "loss": 0.7988,
      "step": 3060
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.21414496004581451,
      "learning_rate": 0.0001088271331527662,
      "loss": 0.6632,
      "step": 3061
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.42735469341278076,
      "learning_rate": 0.0001087756458070448,
      "loss": 0.9779,
      "step": 3062
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.36254066228866577,
      "learning_rate": 0.00010872415611678459,
      "loss": 0.8493,
      "step": 3063
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.3355332314968109,
      "learning_rate": 0.0001086726640957417,
      "loss": 0.7913,
      "step": 3064
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.22927650809288025,
      "learning_rate": 0.00010862116975767306,
      "loss": 0.8805,
      "step": 3065
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.29326000809669495,
      "learning_rate": 0.00010856967311633606,
      "loss": 0.7719,
      "step": 3066
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.4005374610424042,
      "learning_rate": 0.00010851817418548873,
      "loss": 0.5268,
      "step": 3067
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.46557849645614624,
      "learning_rate": 0.00010846667297888977,
      "loss": 0.7532,
      "step": 3068
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.246930330991745,
      "learning_rate": 0.00010841516951029851,
      "loss": 0.8949,
      "step": 3069
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.36515599489212036,
      "learning_rate": 0.0001083636637934748,
      "loss": 0.853,
      "step": 3070
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.7391675710678101,
      "learning_rate": 0.0001083121558421791,
      "loss": 0.8569,
      "step": 3071
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.20786352455615997,
      "learning_rate": 0.00010826064567017252,
      "loss": 0.9305,
      "step": 3072
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.3507573902606964,
      "learning_rate": 0.00010820913329121671,
      "loss": 0.9109,
      "step": 3073
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.45345205068588257,
      "learning_rate": 0.000108157618719074,
      "loss": 1.0981,
      "step": 3074
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.1516105979681015,
      "learning_rate": 0.00010810610196750716,
      "loss": 0.7575,
      "step": 3075
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.5163320302963257,
      "learning_rate": 0.00010805458305027967,
      "loss": 0.8685,
      "step": 3076
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.34852561354637146,
      "learning_rate": 0.00010800306198115558,
      "loss": 0.9418,
      "step": 3077
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.3732437789440155,
      "learning_rate": 0.00010795153877389937,
      "loss": 0.8334,
      "step": 3078
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.2697422504425049,
      "learning_rate": 0.00010790001344227634,
      "loss": 0.7015,
      "step": 3079
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.1299266219139099,
      "learning_rate": 0.00010784848600005207,
      "loss": 0.742,
      "step": 3080
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.2921143174171448,
      "learning_rate": 0.00010779695646099295,
      "loss": 1.0333,
      "step": 3081
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.3723088502883911,
      "learning_rate": 0.00010774542483886581,
      "loss": 0.8292,
      "step": 3082
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.23153838515281677,
      "learning_rate": 0.00010769389114743802,
      "loss": 0.7805,
      "step": 3083
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.3332394063472748,
      "learning_rate": 0.00010764235540047759,
      "loss": 0.7258,
      "step": 3084
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.7492542266845703,
      "learning_rate": 0.000107590817611753,
      "loss": 0.7606,
      "step": 3085
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.3788118064403534,
      "learning_rate": 0.0001075392777950333,
      "loss": 0.9302,
      "step": 3086
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.3751322329044342,
      "learning_rate": 0.00010748773596408814,
      "loss": 0.8218,
      "step": 3087
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.273887574672699,
      "learning_rate": 0.00010743619213268759,
      "loss": 0.8832,
      "step": 3088
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.3119448125362396,
      "learning_rate": 0.00010738464631460232,
      "loss": 0.7666,
      "step": 3089
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.2176056206226349,
      "learning_rate": 0.00010733309852360358,
      "loss": 0.8716,
      "step": 3090
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.26532769203186035,
      "learning_rate": 0.00010728154877346306,
      "loss": 0.7451,
      "step": 3091
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.2239433228969574,
      "learning_rate": 0.00010722999707795302,
      "loss": 0.7719,
      "step": 3092
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.622904360294342,
      "learning_rate": 0.00010717844345084627,
      "loss": 0.5826,
      "step": 3093
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.3516522943973541,
      "learning_rate": 0.00010712688790591599,
      "loss": 0.7429,
      "step": 3094
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.20540213584899902,
      "learning_rate": 0.0001070753304569361,
      "loss": 0.7371,
      "step": 3095
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.3183198571205139,
      "learning_rate": 0.0001070237711176808,
      "loss": 0.7696,
      "step": 3096
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.3106946647167206,
      "learning_rate": 0.00010697220990192498,
      "loss": 0.9104,
      "step": 3097
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.43778422474861145,
      "learning_rate": 0.00010692064682344394,
      "loss": 0.7332,
      "step": 3098
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.19792649149894714,
      "learning_rate": 0.00010686908189601346,
      "loss": 0.7435,
      "step": 3099
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.25175124406814575,
      "learning_rate": 0.00010681751513340985,
      "loss": 0.8348,
      "step": 3100
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.526390552520752,
      "learning_rate": 0.00010676594654940996,
      "loss": 0.6063,
      "step": 3101
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.311633825302124,
      "learning_rate": 0.00010671437615779103,
      "loss": 0.8958,
      "step": 3102
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.47043606638908386,
      "learning_rate": 0.00010666280397233081,
      "loss": 0.8191,
      "step": 3103
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.20610438287258148,
      "learning_rate": 0.00010661123000680754,
      "loss": 0.7715,
      "step": 3104
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.3449203073978424,
      "learning_rate": 0.00010655965427499996,
      "loss": 0.9018,
      "step": 3105
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.416167289018631,
      "learning_rate": 0.00010650807679068731,
      "loss": 1.002,
      "step": 3106
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.21004118025302887,
      "learning_rate": 0.00010645649756764918,
      "loss": 0.5207,
      "step": 3107
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.290141224861145,
      "learning_rate": 0.0001064049166196657,
      "loss": 0.9879,
      "step": 3108
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.28439775109291077,
      "learning_rate": 0.0001063533339605175,
      "loss": 0.6992,
      "step": 3109
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.4623754620552063,
      "learning_rate": 0.00010630174960398556,
      "loss": 0.8189,
      "step": 3110
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.3442874848842621,
      "learning_rate": 0.00010625016356385146,
      "loss": 0.9057,
      "step": 3111
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6388236880302429,
      "learning_rate": 0.00010619857585389705,
      "loss": 0.6175,
      "step": 3112
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.1962784379720688,
      "learning_rate": 0.00010614698648790477,
      "loss": 0.6844,
      "step": 3113
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.21710877120494843,
      "learning_rate": 0.00010609539547965748,
      "loss": 0.7617,
      "step": 3114
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.2841540575027466,
      "learning_rate": 0.0001060438028429384,
      "loss": 0.759,
      "step": 3115
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.21852611005306244,
      "learning_rate": 0.00010599220859153129,
      "loss": 0.8221,
      "step": 3116
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.3717159330844879,
      "learning_rate": 0.00010594061273922025,
      "loss": 1.0366,
      "step": 3117
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.3717159330844879,
      "learning_rate": 0.00010594061273922025,
      "loss": 1.0717,
      "step": 3118
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.15747883915901184,
      "learning_rate": 0.00010588901529978985,
      "loss": 0.644,
      "step": 3119
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.24505460262298584,
      "learning_rate": 0.0001058374162870251,
      "loss": 0.7192,
      "step": 3120
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6812542676925659,
      "learning_rate": 0.00010578581571471143,
      "loss": 0.7557,
      "step": 3121
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.3011525869369507,
      "learning_rate": 0.0001057342135966346,
      "loss": 0.7447,
      "step": 3122
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.1746830940246582,
      "learning_rate": 0.00010568260994658092,
      "loss": 0.7348,
      "step": 3123
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.17373815178871155,
      "learning_rate": 0.00010563100477833698,
      "loss": 0.7917,
      "step": 3124
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.22639842331409454,
      "learning_rate": 0.00010557939810568991,
      "loss": 0.5175,
      "step": 3125
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.196741983294487,
      "learning_rate": 0.00010552778994242711,
      "loss": 0.815,
      "step": 3126
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.19835391640663147,
      "learning_rate": 0.00010547618030233643,
      "loss": 0.8216,
      "step": 3127
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.35692158341407776,
      "learning_rate": 0.00010542456919920619,
      "loss": 1.0555,
      "step": 3128
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.22457629442214966,
      "learning_rate": 0.00010537295664682494,
      "loss": 0.6934,
      "step": 3129
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.24273531138896942,
      "learning_rate": 0.00010532134265898179,
      "loss": 0.9149,
      "step": 3130
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.23614177107810974,
      "learning_rate": 0.0001052697272494661,
      "loss": 0.7465,
      "step": 3131
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6459603309631348,
      "learning_rate": 0.00010521811043206769,
      "loss": 0.5147,
      "step": 3132
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.28488796949386597,
      "learning_rate": 0.0001051664922205767,
      "loss": 0.6451,
      "step": 3133
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.22585193812847137,
      "learning_rate": 0.0001051148726287837,
      "loss": 0.9833,
      "step": 3134
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.22649653255939484,
      "learning_rate": 0.00010506325167047962,
      "loss": 0.8465,
      "step": 3135
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.5283639430999756,
      "learning_rate": 0.00010501162935945565,
      "loss": 0.5171,
      "step": 3136
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.306403249502182,
      "learning_rate": 0.0001049600057095035,
      "loss": 0.9474,
      "step": 3137
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.26546573638916016,
      "learning_rate": 0.00010490838073441514,
      "loss": 0.6774,
      "step": 3138
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.3034471273422241,
      "learning_rate": 0.00010485675444798293,
      "loss": 0.7105,
      "step": 3139
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.2220892757177353,
      "learning_rate": 0.00010480512686399955,
      "loss": 0.9161,
      "step": 3140
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.24971811473369598,
      "learning_rate": 0.00010475349799625805,
      "loss": 0.917,
      "step": 3141
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.2358618527650833,
      "learning_rate": 0.00010470186785855183,
      "loss": 0.7316,
      "step": 3142
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.20283910632133484,
      "learning_rate": 0.00010465023646467458,
      "loss": 0.802,
      "step": 3143
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6491701602935791,
      "learning_rate": 0.00010459860382842041,
      "loss": 0.9327,
      "step": 3144
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.26686128973960876,
      "learning_rate": 0.00010454696996358373,
      "loss": 0.9033,
      "step": 3145
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.20684444904327393,
      "learning_rate": 0.00010449533488395917,
      "loss": 0.6059,
      "step": 3146
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.3677475154399872,
      "learning_rate": 0.00010444369860334187,
      "loss": 0.9294,
      "step": 3147
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.24735304713249207,
      "learning_rate": 0.00010439206113552715,
      "loss": 0.9195,
      "step": 3148
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.33339959383010864,
      "learning_rate": 0.00010434042249431073,
      "loss": 0.7229,
      "step": 3149
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.2718801200389862,
      "learning_rate": 0.00010428878269348857,
      "loss": 0.84,
      "step": 3150
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.47579002380371094,
      "learning_rate": 0.000104237141746857,
      "loss": 0.9574,
      "step": 3151
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.4502701461315155,
      "learning_rate": 0.00010418549966821263,
      "loss": 1.1644,
      "step": 3152
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.2888058125972748,
      "learning_rate": 0.00010413385647135239,
      "loss": 0.692,
      "step": 3153
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.2874656021595001,
      "learning_rate": 0.00010408221217007346,
      "loss": 0.8432,
      "step": 3154
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.26786521077156067,
      "learning_rate": 0.00010403056677817338,
      "loss": 0.8938,
      "step": 3155
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.2772689759731293,
      "learning_rate": 0.00010397892030944996,
      "loss": 0.7099,
      "step": 3156
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.22561649978160858,
      "learning_rate": 0.00010392727277770126,
      "loss": 0.7448,
      "step": 3157
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7231593132019043,
      "learning_rate": 0.0001038756241967257,
      "loss": 0.7477,
      "step": 3158
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.4519484043121338,
      "learning_rate": 0.00010382397458032189,
      "loss": 0.5556,
      "step": 3159
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.4556506872177124,
      "learning_rate": 0.00010377232394228877,
      "loss": 0.982,
      "step": 3160
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.35334891080856323,
      "learning_rate": 0.00010372067229642555,
      "loss": 1.0938,
      "step": 3161
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.25475257635116577,
      "learning_rate": 0.00010366901965653173,
      "loss": 0.7326,
      "step": 3162
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.3150332272052765,
      "learning_rate": 0.00010361736603640702,
      "loss": 0.9823,
      "step": 3163
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.19032412767410278,
      "learning_rate": 0.00010356571144985141,
      "loss": 0.7807,
      "step": 3164
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.2799344062805176,
      "learning_rate": 0.00010351405591066516,
      "loss": 0.768,
      "step": 3165
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.38938450813293457,
      "learning_rate": 0.00010346239943264881,
      "loss": 1.0232,
      "step": 3166
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.3641057312488556,
      "learning_rate": 0.00010341074202960313,
      "loss": 0.8673,
      "step": 3167
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.23009689152240753,
      "learning_rate": 0.0001033590837153291,
      "loss": 0.6411,
      "step": 3168
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.21581460535526276,
      "learning_rate": 0.00010330742450362797,
      "loss": 0.7839,
      "step": 3169
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.2749083340167999,
      "learning_rate": 0.00010325576440830126,
      "loss": 0.9101,
      "step": 3170
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6485013961791992,
      "learning_rate": 0.0001032041034431507,
      "loss": 0.8713,
      "step": 3171
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.4369968771934509,
      "learning_rate": 0.00010315244162197826,
      "loss": 0.771,
      "step": 3172
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.2947264611721039,
      "learning_rate": 0.0001031007789585861,
      "loss": 0.9066,
      "step": 3173
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.7055186033248901,
      "learning_rate": 0.00010304911546677665,
      "loss": 0.8834,
      "step": 3174
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.3607613742351532,
      "learning_rate": 0.00010299745116035253,
      "loss": 0.885,
      "step": 3175
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.2375394105911255,
      "learning_rate": 0.00010294578605311666,
      "loss": 0.6846,
      "step": 3176
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.29870152473449707,
      "learning_rate": 0.00010289412015887205,
      "loss": 0.955,
      "step": 3177
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.18760500848293304,
      "learning_rate": 0.000102842453491422,
      "loss": 0.5977,
      "step": 3178
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.3026891052722931,
      "learning_rate": 0.00010279078606457,
      "loss": 0.7645,
      "step": 3179
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6108086109161377,
      "learning_rate": 0.00010273911789211973,
      "loss": 1.0617,
      "step": 3180
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.18460464477539062,
      "learning_rate": 0.00010268744898787505,
      "loss": 0.882,
      "step": 3181
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.28730642795562744,
      "learning_rate": 0.00010263577936564012,
      "loss": 0.7298,
      "step": 3182
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.39684516191482544,
      "learning_rate": 0.00010258410903921913,
      "loss": 0.9043,
      "step": 3183
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.24682961404323578,
      "learning_rate": 0.00010253243802241664,
      "loss": 0.717,
      "step": 3184
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.3538999557495117,
      "learning_rate": 0.00010248076632903721,
      "loss": 0.7172,
      "step": 3185
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.23744706809520721,
      "learning_rate": 0.0001024290939728857,
      "loss": 0.6766,
      "step": 3186
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.2755926549434662,
      "learning_rate": 0.00010237742096776713,
      "loss": 0.7003,
      "step": 3187
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.634932816028595,
      "learning_rate": 0.00010232574732748666,
      "loss": 0.6842,
      "step": 3188
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.175834059715271,
      "learning_rate": 0.00010227407306584964,
      "loss": 0.6834,
      "step": 3189
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.22007973492145538,
      "learning_rate": 0.00010222239819666162,
      "loss": 0.5176,
      "step": 3190
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.5797334909439087,
      "learning_rate": 0.00010217072273372823,
      "loss": 0.887,
      "step": 3191
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.23916390538215637,
      "learning_rate": 0.00010211904669085534,
      "loss": 0.8032,
      "step": 3192
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.38923925161361694,
      "learning_rate": 0.00010206737008184893,
      "loss": 0.9872,
      "step": 3193
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.27395859360694885,
      "learning_rate": 0.00010201569292051513,
      "loss": 0.8412,
      "step": 3194
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.28214922547340393,
      "learning_rate": 0.00010196401522066026,
      "loss": 0.6051,
      "step": 3195
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.20467564463615417,
      "learning_rate": 0.00010191233699609071,
      "loss": 0.7094,
      "step": 3196
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.2819322943687439,
      "learning_rate": 0.00010186065826061308,
      "loss": 0.8992,
      "step": 3197
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.23108454048633575,
      "learning_rate": 0.0001018089790280341,
      "loss": 0.9379,
      "step": 3198
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.3470602333545685,
      "learning_rate": 0.00010175729931216061,
      "loss": 0.9685,
      "step": 3199
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.2665245234966278,
      "learning_rate": 0.00010170561912679954,
      "loss": 0.7844,
      "step": 3200
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.41944262385368347,
      "learning_rate": 0.00010165393848575802,
      "loss": 0.9156,
      "step": 3201
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.2366163730621338,
      "learning_rate": 0.00010160225740284324,
      "loss": 0.8906,
      "step": 3202
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.23420272767543793,
      "learning_rate": 0.00010155057589186259,
      "loss": 0.6525,
      "step": 3203
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.3132653832435608,
      "learning_rate": 0.00010149889396662352,
      "loss": 1.0493,
      "step": 3204
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.5364794135093689,
      "learning_rate": 0.00010144721164093352,
      "loss": 0.956,
      "step": 3205
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.1543562114238739,
      "learning_rate": 0.00010139552892860031,
      "loss": 0.6239,
      "step": 3206
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.18535557389259338,
      "learning_rate": 0.00010134384584343167,
      "loss": 0.8921,
      "step": 3207
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.27767637372016907,
      "learning_rate": 0.00010129216239923546,
      "loss": 0.8203,
      "step": 3208
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.4932328462600708,
      "learning_rate": 0.00010124047860981969,
      "loss": 0.9512,
      "step": 3209
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.29586851596832275,
      "learning_rate": 0.00010118879448899232,
      "loss": 0.8963,
      "step": 3210
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.4779726266860962,
      "learning_rate": 0.00010113711005056162,
      "loss": 0.4972,
      "step": 3211
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.3479253351688385,
      "learning_rate": 0.00010108542530833571,
      "loss": 0.7064,
      "step": 3212
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.2772933840751648,
      "learning_rate": 0.000101033740276123,
      "loss": 0.7241,
      "step": 3213
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.3919409513473511,
      "learning_rate": 0.00010098205496773183,
      "loss": 1.0314,
      "step": 3214
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.1637454777956009,
      "learning_rate": 0.00010093036939697066,
      "loss": 0.7863,
      "step": 3215
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.254162073135376,
      "learning_rate": 0.00010087868357764809,
      "loss": 0.8243,
      "step": 3216
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.3214544355869293,
      "learning_rate": 0.00010082699752357268,
      "loss": 0.9644,
      "step": 3217
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.40076744556427,
      "learning_rate": 0.00010077531124855306,
      "loss": 0.7771,
      "step": 3218
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.22720611095428467,
      "learning_rate": 0.000100723624766398,
      "loss": 0.7346,
      "step": 3219
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.24646024405956268,
      "learning_rate": 0.00010067193809091628,
      "loss": 0.6452,
      "step": 3220
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.21785488724708557,
      "learning_rate": 0.00010062025123591672,
      "loss": 0.8118,
      "step": 3221
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.2427283376455307,
      "learning_rate": 0.00010056856421520824,
      "loss": 0.7999,
      "step": 3222
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.2753644585609436,
      "learning_rate": 0.00010051687704259966,
      "loss": 0.7443,
      "step": 3223
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.35600045323371887,
      "learning_rate": 0.00010046518973190007,
      "loss": 0.9955,
      "step": 3224
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6324539184570312,
      "learning_rate": 0.00010041350229691838,
      "loss": 0.7726,
      "step": 3225
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.23245535790920258,
      "learning_rate": 0.00010036181475146367,
      "loss": 0.9419,
      "step": 3226
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.28296005725860596,
      "learning_rate": 0.000100310127109345,
      "loss": 0.8153,
      "step": 3227
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.630694568157196,
      "learning_rate": 0.00010025843938437143,
      "loss": 0.6646,
      "step": 3228
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7314017415046692,
      "learning_rate": 0.00010020675159035211,
      "loss": 0.7877,
      "step": 3229
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.26228249073028564,
      "learning_rate": 0.00010015506374109616,
      "loss": 0.873,
      "step": 3230
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2954980432987213,
      "learning_rate": 0.00010010337585041272,
      "loss": 0.7706,
      "step": 3231
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2500395178794861,
      "learning_rate": 0.00010005168793211097,
      "loss": 0.7419,
      "step": 3232
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.32623225450515747,
      "learning_rate": 0.0001,
      "loss": 0.6691,
      "step": 3233
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.23068250715732574,
      "learning_rate": 9.994831206788908e-05,
      "loss": 0.8459,
      "step": 3234
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.258783757686615,
      "learning_rate": 9.989662414958729e-05,
      "loss": 0.736,
      "step": 3235
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.16538068652153015,
      "learning_rate": 9.984493625890386e-05,
      "loss": 0.8818,
      "step": 3236
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6218604445457458,
      "learning_rate": 9.979324840964793e-05,
      "loss": 0.9253,
      "step": 3237
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.25533154606819153,
      "learning_rate": 9.974156061562857e-05,
      "loss": 0.8175,
      "step": 3238
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.4421786069869995,
      "learning_rate": 9.968987289065502e-05,
      "loss": 0.6886,
      "step": 3239
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.22418828308582306,
      "learning_rate": 9.963818524853636e-05,
      "loss": 0.9014,
      "step": 3240
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2449357658624649,
      "learning_rate": 9.958649770308167e-05,
      "loss": 0.5146,
      "step": 3241
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.8105624318122864,
      "learning_rate": 9.953481026809996e-05,
      "loss": 0.9013,
      "step": 3242
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.294261634349823,
      "learning_rate": 9.948312295740036e-05,
      "loss": 0.6099,
      "step": 3243
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2817838191986084,
      "learning_rate": 9.943143578479181e-05,
      "loss": 0.8916,
      "step": 3244
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.16446493566036224,
      "learning_rate": 9.937974876408329e-05,
      "loss": 0.7483,
      "step": 3245
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.3871713876724243,
      "learning_rate": 9.932806190908374e-05,
      "loss": 0.9691,
      "step": 3246
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.40038394927978516,
      "learning_rate": 9.927637523360202e-05,
      "loss": 0.8479,
      "step": 3247
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.504966139793396,
      "learning_rate": 9.922468875144695e-05,
      "loss": 0.5126,
      "step": 3248
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2688159942626953,
      "learning_rate": 9.917300247642734e-05,
      "loss": 0.7321,
      "step": 3249
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.32948148250579834,
      "learning_rate": 9.912131642235195e-05,
      "loss": 0.8968,
      "step": 3250
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2551119923591614,
      "learning_rate": 9.906963060302933e-05,
      "loss": 0.7723,
      "step": 3251
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.24546629190444946,
      "learning_rate": 9.901794503226818e-05,
      "loss": 0.8027,
      "step": 3252
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.21390439569950104,
      "learning_rate": 9.896625972387702e-05,
      "loss": 0.8349,
      "step": 3253
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.4997202754020691,
      "learning_rate": 9.891457469166429e-05,
      "loss": 0.7299,
      "step": 3254
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2807074785232544,
      "learning_rate": 9.88628899494384e-05,
      "loss": 0.8528,
      "step": 3255
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.15749545395374298,
      "learning_rate": 9.881120551100769e-05,
      "loss": 0.7802,
      "step": 3256
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.22528019547462463,
      "learning_rate": 9.875952139018036e-05,
      "loss": 0.4995,
      "step": 3257
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.34052178263664246,
      "learning_rate": 9.870783760076455e-05,
      "loss": 0.8513,
      "step": 3258
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.18752895295619965,
      "learning_rate": 9.865615415656834e-05,
      "loss": 0.7996,
      "step": 3259
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2969367206096649,
      "learning_rate": 9.860447107139972e-05,
      "loss": 1.1495,
      "step": 3260
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.8937594294548035,
      "learning_rate": 9.855278835906649e-05,
      "loss": 1.0303,
      "step": 3261
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2295820415019989,
      "learning_rate": 9.850110603337651e-05,
      "loss": 0.7888,
      "step": 3262
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.1408931016921997,
      "learning_rate": 9.844942410813742e-05,
      "loss": 0.7598,
      "step": 3263
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.232910618185997,
      "learning_rate": 9.839774259715677e-05,
      "loss": 0.9503,
      "step": 3264
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.3356793224811554,
      "learning_rate": 9.8346061514242e-05,
      "loss": 0.6535,
      "step": 3265
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2920728325843811,
      "learning_rate": 9.82943808732005e-05,
      "loss": 0.8258,
      "step": 3266
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2502315640449524,
      "learning_rate": 9.824270068783941e-05,
      "loss": 0.6968,
      "step": 3267
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.20423553884029388,
      "learning_rate": 9.819102097196591e-05,
      "loss": 0.7773,
      "step": 3268
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2589471638202667,
      "learning_rate": 9.813934173938694e-05,
      "loss": 0.8537,
      "step": 3269
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.411310613155365,
      "learning_rate": 9.808766300390929e-05,
      "loss": 0.8959,
      "step": 3270
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2334451973438263,
      "learning_rate": 9.803598477933976e-05,
      "loss": 0.8493,
      "step": 3271
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.28303515911102295,
      "learning_rate": 9.798430707948489e-05,
      "loss": 0.8382,
      "step": 3272
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.4037236273288727,
      "learning_rate": 9.793262991815113e-05,
      "loss": 0.9196,
      "step": 3273
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.38257288932800293,
      "learning_rate": 9.788095330914467e-05,
      "loss": 0.8256,
      "step": 3274
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.5805515050888062,
      "learning_rate": 9.782927726627179e-05,
      "loss": 0.6843,
      "step": 3275
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.865494430065155,
      "learning_rate": 9.777760180333843e-05,
      "loss": 1.1591,
      "step": 3276
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6177706718444824,
      "learning_rate": 9.772592693415037e-05,
      "loss": 0.7516,
      "step": 3277
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.30479851365089417,
      "learning_rate": 9.767425267251338e-05,
      "loss": 0.9377,
      "step": 3278
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.28255948424339294,
      "learning_rate": 9.762257903223292e-05,
      "loss": 0.8561,
      "step": 3279
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.30989718437194824,
      "learning_rate": 9.757090602711431e-05,
      "loss": 0.9575,
      "step": 3280
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2530398368835449,
      "learning_rate": 9.75192336709628e-05,
      "loss": 0.9224,
      "step": 3281
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.3672225773334503,
      "learning_rate": 9.746756197758341e-05,
      "loss": 0.6418,
      "step": 3282
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.33920198678970337,
      "learning_rate": 9.741589096078085e-05,
      "loss": 0.859,
      "step": 3283
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.2343384176492691,
      "learning_rate": 9.73642206343599e-05,
      "loss": 0.6794,
      "step": 3284
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.20391607284545898,
      "learning_rate": 9.731255101212496e-05,
      "loss": 0.8947,
      "step": 3285
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.3624059855937958,
      "learning_rate": 9.726088210788028e-05,
      "loss": 0.8551,
      "step": 3286
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.18005740642547607,
      "learning_rate": 9.720921393543002e-05,
      "loss": 0.7345,
      "step": 3287
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.69180828332901,
      "learning_rate": 9.715754650857802e-05,
      "loss": 0.8543,
      "step": 3288
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.28390592336654663,
      "learning_rate": 9.710587984112797e-05,
      "loss": 0.8679,
      "step": 3289
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.28254181146621704,
      "learning_rate": 9.705421394688336e-05,
      "loss": 0.8413,
      "step": 3290
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.29257941246032715,
      "learning_rate": 9.700254883964748e-05,
      "loss": 1.0576,
      "step": 3291
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.7199087738990784,
      "learning_rate": 9.69508845332234e-05,
      "loss": 0.779,
      "step": 3292
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.3297320604324341,
      "learning_rate": 9.689922104141391e-05,
      "loss": 0.975,
      "step": 3293
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.26131799817085266,
      "learning_rate": 9.684755837802176e-05,
      "loss": 0.7464,
      "step": 3294
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.2577565908432007,
      "learning_rate": 9.679589655684931e-05,
      "loss": 0.7368,
      "step": 3295
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.3181924521923065,
      "learning_rate": 9.674423559169874e-05,
      "loss": 0.9707,
      "step": 3296
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.42512068152427673,
      "learning_rate": 9.669257549637204e-05,
      "loss": 1.0401,
      "step": 3297
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.248337522149086,
      "learning_rate": 9.664091628467094e-05,
      "loss": 0.775,
      "step": 3298
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.1850123554468155,
      "learning_rate": 9.658925797039688e-05,
      "loss": 0.6475,
      "step": 3299
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.18750035762786865,
      "learning_rate": 9.653760056735121e-05,
      "loss": 0.7703,
      "step": 3300
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.3099617660045624,
      "learning_rate": 9.648594408933486e-05,
      "loss": 0.8906,
      "step": 3301
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.13829462230205536,
      "learning_rate": 9.64342885501486e-05,
      "loss": 0.7638,
      "step": 3302
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.25076496601104736,
      "learning_rate": 9.6382633963593e-05,
      "loss": 0.9312,
      "step": 3303
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.42542362213134766,
      "learning_rate": 9.633098034346829e-05,
      "loss": 0.6378,
      "step": 3304
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.3663657605648041,
      "learning_rate": 9.627932770357449e-05,
      "loss": 0.9412,
      "step": 3305
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.3823757469654083,
      "learning_rate": 9.622767605771124e-05,
      "loss": 0.8551,
      "step": 3306
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.22222024202346802,
      "learning_rate": 9.617602541967814e-05,
      "loss": 0.5557,
      "step": 3307
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.39970070123672485,
      "learning_rate": 9.612437580327434e-05,
      "loss": 0.8625,
      "step": 3308
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.38432052731513977,
      "learning_rate": 9.607272722229875e-05,
      "loss": 0.9433,
      "step": 3309
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.4429771304130554,
      "learning_rate": 9.602107969055008e-05,
      "loss": 0.8761,
      "step": 3310
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.23614515364170074,
      "learning_rate": 9.596943322182666e-05,
      "loss": 0.865,
      "step": 3311
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.3215290307998657,
      "learning_rate": 9.591778782992655e-05,
      "loss": 0.7789,
      "step": 3312
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.5765694975852966,
      "learning_rate": 9.586614352864765e-05,
      "loss": 0.77,
      "step": 3313
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.19592560827732086,
      "learning_rate": 9.581450033178742e-05,
      "loss": 0.989,
      "step": 3314
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.2387695610523224,
      "learning_rate": 9.576285825314302e-05,
      "loss": 0.6459,
      "step": 3315
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.29308128356933594,
      "learning_rate": 9.571121730651144e-05,
      "loss": 0.6673,
      "step": 3316
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.2647705376148224,
      "learning_rate": 9.56595775056893e-05,
      "loss": 0.7931,
      "step": 3317
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.2996886074542999,
      "learning_rate": 9.560793886447285e-05,
      "loss": 0.8745,
      "step": 3318
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.27186229825019836,
      "learning_rate": 9.555630139665814e-05,
      "loss": 0.7642,
      "step": 3319
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.37688979506492615,
      "learning_rate": 9.550466511604084e-05,
      "loss": 0.8502,
      "step": 3320
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.47079357504844666,
      "learning_rate": 9.545303003641633e-05,
      "loss": 1.0951,
      "step": 3321
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.21746276319026947,
      "learning_rate": 9.54013961715796e-05,
      "loss": 0.8257,
      "step": 3322
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.235815167427063,
      "learning_rate": 9.534976353532544e-05,
      "loss": 0.6272,
      "step": 3323
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.13907839357852936,
      "learning_rate": 9.529813214144822e-05,
      "loss": 0.833,
      "step": 3324
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.3250106871128082,
      "learning_rate": 9.524650200374195e-05,
      "loss": 0.7479,
      "step": 3325
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.1799558848142624,
      "learning_rate": 9.519487313600047e-05,
      "loss": 0.7193,
      "step": 3326
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.28121218085289,
      "learning_rate": 9.514324555201711e-05,
      "loss": 0.9401,
      "step": 3327
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.3599085807800293,
      "learning_rate": 9.509161926558487e-05,
      "loss": 1.0407,
      "step": 3328
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.28580212593078613,
      "learning_rate": 9.503999429049653e-05,
      "loss": 0.8003,
      "step": 3329
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.33252623677253723,
      "learning_rate": 9.498837064054437e-05,
      "loss": 0.6751,
      "step": 3330
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.257826030254364,
      "learning_rate": 9.493674832952042e-05,
      "loss": 0.8186,
      "step": 3331
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.2241230010986328,
      "learning_rate": 9.488512737121631e-05,
      "loss": 0.7391,
      "step": 3332
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.5352124571800232,
      "learning_rate": 9.483350777942333e-05,
      "loss": 0.9821,
      "step": 3333
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.33843955397605896,
      "learning_rate": 9.478188956793231e-05,
      "loss": 0.8781,
      "step": 3334
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.18267802894115448,
      "learning_rate": 9.47302727505339e-05,
      "loss": 0.7802,
      "step": 3335
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.5874035358428955,
      "learning_rate": 9.467865734101822e-05,
      "loss": 0.6614,
      "step": 3336
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.2415754795074463,
      "learning_rate": 9.462704335317507e-05,
      "loss": 0.7501,
      "step": 3337
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.49921202659606934,
      "learning_rate": 9.457543080079382e-05,
      "loss": 0.7245,
      "step": 3338
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.26906466484069824,
      "learning_rate": 9.452381969766358e-05,
      "loss": 0.9481,
      "step": 3339
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.3482654094696045,
      "learning_rate": 9.447221005757292e-05,
      "loss": 0.7465,
      "step": 3340
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.3697846233844757,
      "learning_rate": 9.442060189431012e-05,
      "loss": 0.8152,
      "step": 3341
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.17288118600845337,
      "learning_rate": 9.436899522166303e-05,
      "loss": 0.8648,
      "step": 3342
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.19033671915531158,
      "learning_rate": 9.431739005341913e-05,
      "loss": 0.9534,
      "step": 3343
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.17398661375045776,
      "learning_rate": 9.42657864033654e-05,
      "loss": 1.151,
      "step": 3344
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.19255024194717407,
      "learning_rate": 9.42141842852886e-05,
      "loss": 0.8493,
      "step": 3345
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.1830521821975708,
      "learning_rate": 9.416258371297493e-05,
      "loss": 0.8332,
      "step": 3346
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.2607920169830322,
      "learning_rate": 9.411098470021014e-05,
      "loss": 0.929,
      "step": 3347
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.3028251528739929,
      "learning_rate": 9.405938726077976e-05,
      "loss": 0.9378,
      "step": 3348
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.18208809196949005,
      "learning_rate": 9.400779140846874e-05,
      "loss": 0.5881,
      "step": 3349
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.32315412163734436,
      "learning_rate": 9.39561971570616e-05,
      "loss": 0.961,
      "step": 3350
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.32520976662635803,
      "learning_rate": 9.390460452034254e-05,
      "loss": 1.0171,
      "step": 3351
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.34931373596191406,
      "learning_rate": 9.385301351209525e-05,
      "loss": 0.9404,
      "step": 3352
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.3779484033584595,
      "learning_rate": 9.380142414610298e-05,
      "loss": 0.8784,
      "step": 3353
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.2796642780303955,
      "learning_rate": 9.374983643614856e-05,
      "loss": 0.8105,
      "step": 3354
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.18315735459327698,
      "learning_rate": 9.369825039601447e-05,
      "loss": 0.9027,
      "step": 3355
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.18146878480911255,
      "learning_rate": 9.364666603948255e-05,
      "loss": 0.777,
      "step": 3356
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.36429864168167114,
      "learning_rate": 9.359508338033431e-05,
      "loss": 0.7731,
      "step": 3357
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.5104894042015076,
      "learning_rate": 9.354350243235083e-05,
      "loss": 0.749,
      "step": 3358
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6026068925857544,
      "learning_rate": 9.349192320931271e-05,
      "loss": 0.7988,
      "step": 3359
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.2762569785118103,
      "learning_rate": 9.344034572500002e-05,
      "loss": 0.8632,
      "step": 3360
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.7598171234130859,
      "learning_rate": 9.338876999319248e-05,
      "loss": 0.7717,
      "step": 3361
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.23177488148212433,
      "learning_rate": 9.333719602766924e-05,
      "loss": 0.8978,
      "step": 3362
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.20303896069526672,
      "learning_rate": 9.3285623842209e-05,
      "loss": 0.5954,
      "step": 3363
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.13323166966438293,
      "learning_rate": 9.323405345059006e-05,
      "loss": 0.6453,
      "step": 3364
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.17389382421970367,
      "learning_rate": 9.318248486659016e-05,
      "loss": 0.587,
      "step": 3365
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.4265141189098358,
      "learning_rate": 9.313091810398654e-05,
      "loss": 0.8591,
      "step": 3366
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.1891917884349823,
      "learning_rate": 9.307935317655607e-05,
      "loss": 0.8162,
      "step": 3367
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.2899455428123474,
      "learning_rate": 9.302779009807503e-05,
      "loss": 0.6799,
      "step": 3368
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.24124981462955475,
      "learning_rate": 9.297622888231922e-05,
      "loss": 0.8079,
      "step": 3369
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.2932463586330414,
      "learning_rate": 9.292466954306394e-05,
      "loss": 0.8946,
      "step": 3370
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.31478774547576904,
      "learning_rate": 9.287311209408402e-05,
      "loss": 0.8902,
      "step": 3371
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.22006294131278992,
      "learning_rate": 9.282155654915379e-05,
      "loss": 0.7454,
      "step": 3372
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.15693537890911102,
      "learning_rate": 9.277000292204698e-05,
      "loss": 0.8119,
      "step": 3373
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.5797706246376038,
      "learning_rate": 9.271845122653696e-05,
      "loss": 0.7589,
      "step": 3374
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.19110794365406036,
      "learning_rate": 9.266690147639644e-05,
      "loss": 0.9329,
      "step": 3375
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.36945492029190063,
      "learning_rate": 9.261535368539768e-05,
      "loss": 1.021,
      "step": 3376
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.21095068752765656,
      "learning_rate": 9.256380786731244e-05,
      "loss": 0.7838,
      "step": 3377
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.22701327502727509,
      "learning_rate": 9.25122640359119e-05,
      "loss": 0.8786,
      "step": 3378
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.2241186946630478,
      "learning_rate": 9.24607222049667e-05,
      "loss": 0.7792,
      "step": 3379
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.1613563895225525,
      "learning_rate": 9.240918238824702e-05,
      "loss": 0.8223,
      "step": 3380
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.16215018928050995,
      "learning_rate": 9.235764459952244e-05,
      "loss": 0.7529,
      "step": 3381
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.2311634123325348,
      "learning_rate": 9.2306108852562e-05,
      "loss": 0.796,
      "step": 3382
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.3621109426021576,
      "learning_rate": 9.225457516113423e-05,
      "loss": 0.4924,
      "step": 3383
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.23731426894664764,
      "learning_rate": 9.220304353900709e-05,
      "loss": 0.7444,
      "step": 3384
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.2317153364419937,
      "learning_rate": 9.215151399994797e-05,
      "loss": 0.875,
      "step": 3385
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.25925251841545105,
      "learning_rate": 9.20999865577237e-05,
      "loss": 0.8414,
      "step": 3386
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.48262491822242737,
      "learning_rate": 9.204846122610064e-05,
      "loss": 0.6711,
      "step": 3387
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.26672598719596863,
      "learning_rate": 9.199693801884447e-05,
      "loss": 0.696,
      "step": 3388
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.562762439250946,
      "learning_rate": 9.194541694972032e-05,
      "loss": 0.8328,
      "step": 3389
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.8946965336799622,
      "learning_rate": 9.189389803249285e-05,
      "loss": 1.0465,
      "step": 3390
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.18916642665863037,
      "learning_rate": 9.184238128092604e-05,
      "loss": 0.7285,
      "step": 3391
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6412101984024048,
      "learning_rate": 9.17908667087833e-05,
      "loss": 1.1441,
      "step": 3392
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.3311481773853302,
      "learning_rate": 9.17393543298275e-05,
      "loss": 0.9043,
      "step": 3393
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.3236232101917267,
      "learning_rate": 9.168784415782093e-05,
      "loss": 0.8218,
      "step": 3394
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.17546917498111725,
      "learning_rate": 9.163633620652523e-05,
      "loss": 0.8234,
      "step": 3395
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.17687375843524933,
      "learning_rate": 9.158483048970151e-05,
      "loss": 0.8598,
      "step": 3396
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.8654428124427795,
      "learning_rate": 9.153332702111024e-05,
      "loss": 1.0232,
      "step": 3397
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.29717695713043213,
      "learning_rate": 9.148182581451128e-05,
      "loss": 0.6933,
      "step": 3398
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.19331632554531097,
      "learning_rate": 9.143032688366397e-05,
      "loss": 0.8283,
      "step": 3399
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6250726580619812,
      "learning_rate": 9.137883024232696e-05,
      "loss": 0.802,
      "step": 3400
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.3796162009239197,
      "learning_rate": 9.132733590425831e-05,
      "loss": 0.8946,
      "step": 3401
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.2700478434562683,
      "learning_rate": 9.127584388321545e-05,
      "loss": 0.8549,
      "step": 3402
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.31517523527145386,
      "learning_rate": 9.122435419295522e-05,
      "loss": 0.7486,
      "step": 3403
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.1895739585161209,
      "learning_rate": 9.117286684723383e-05,
      "loss": 0.8335,
      "step": 3404
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.1992524117231369,
      "learning_rate": 9.112138185980683e-05,
      "loss": 0.7574,
      "step": 3405
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.2752724289894104,
      "learning_rate": 9.106989924442921e-05,
      "loss": 0.9455,
      "step": 3406
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.38053247332572937,
      "learning_rate": 9.101841901485529e-05,
      "loss": 0.8357,
      "step": 3407
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.25411343574523926,
      "learning_rate": 9.096694118483865e-05,
      "loss": 0.9633,
      "step": 3408
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6492912173271179,
      "learning_rate": 9.091546576813244e-05,
      "loss": 0.894,
      "step": 3409
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6349780559539795,
      "learning_rate": 9.086399277848903e-05,
      "loss": 0.8487,
      "step": 3410
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.26337340474128723,
      "learning_rate": 9.081252222966011e-05,
      "loss": 0.8125,
      "step": 3411
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.7807018756866455,
      "learning_rate": 9.076105413539682e-05,
      "loss": 1.0868,
      "step": 3412
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.2801019847393036,
      "learning_rate": 9.070958850944958e-05,
      "loss": 0.7713,
      "step": 3413
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.2429162710905075,
      "learning_rate": 9.065812536556813e-05,
      "loss": 0.9324,
      "step": 3414
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.20099805295467377,
      "learning_rate": 9.060666471750164e-05,
      "loss": 0.6407,
      "step": 3415
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.2414027899503708,
      "learning_rate": 9.055520657899854e-05,
      "loss": 0.8566,
      "step": 3416
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.24077177047729492,
      "learning_rate": 9.050375096380659e-05,
      "loss": 0.8394,
      "step": 3417
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.22064082324504852,
      "learning_rate": 9.045229788567286e-05,
      "loss": 0.7545,
      "step": 3418
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.32519295811653137,
      "learning_rate": 9.040084735834385e-05,
      "loss": 0.8227,
      "step": 3419
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.5261947512626648,
      "learning_rate": 9.034939939556526e-05,
      "loss": 0.958,
      "step": 3420
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6319401264190674,
      "learning_rate": 9.02979540110821e-05,
      "loss": 0.8243,
      "step": 3421
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.4229465126991272,
      "learning_rate": 9.024651121863882e-05,
      "loss": 1.0323,
      "step": 3422
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.31984156370162964,
      "learning_rate": 9.019507103197906e-05,
      "loss": 0.7561,
      "step": 3423
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.3159458637237549,
      "learning_rate": 9.014363346484579e-05,
      "loss": 0.9633,
      "step": 3424
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.2075386494398117,
      "learning_rate": 9.00921985309813e-05,
      "loss": 0.6927,
      "step": 3425
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.5825222134590149,
      "learning_rate": 9.004076624412719e-05,
      "loss": 1.1374,
      "step": 3426
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.21645493805408478,
      "learning_rate": 8.998933661802426e-05,
      "loss": 0.7781,
      "step": 3427
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.17651572823524475,
      "learning_rate": 8.993790966641273e-05,
      "loss": 0.8157,
      "step": 3428
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.28000134229660034,
      "learning_rate": 8.988648540303202e-05,
      "loss": 0.8096,
      "step": 3429
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6042351722717285,
      "learning_rate": 8.983506384162082e-05,
      "loss": 0.7766,
      "step": 3430
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.3575747013092041,
      "learning_rate": 8.978364499591722e-05,
      "loss": 0.9441,
      "step": 3431
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.2189517468214035,
      "learning_rate": 8.973222887965843e-05,
      "loss": 0.973,
      "step": 3432
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.3156614899635315,
      "learning_rate": 8.968081550658102e-05,
      "loss": 0.8043,
      "step": 3433
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.26958945393562317,
      "learning_rate": 8.962940489042078e-05,
      "loss": 0.5941,
      "step": 3434
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.2565614879131317,
      "learning_rate": 8.957799704491283e-05,
      "loss": 0.9501,
      "step": 3435
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.23515087366104126,
      "learning_rate": 8.952659198379149e-05,
      "loss": 0.7635,
      "step": 3436
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.3099784553050995,
      "learning_rate": 8.947518972079033e-05,
      "loss": 1.0243,
      "step": 3437
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.23775552213191986,
      "learning_rate": 8.942379026964225e-05,
      "loss": 0.987,
      "step": 3438
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.2384675145149231,
      "learning_rate": 8.93723936440793e-05,
      "loss": 1.0358,
      "step": 3439
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.24011027812957764,
      "learning_rate": 8.93209998578328e-05,
      "loss": 0.7784,
      "step": 3440
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.10255476832389832,
      "learning_rate": 8.92696089246334e-05,
      "loss": 0.753,
      "step": 3441
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.4042394161224365,
      "learning_rate": 8.921822085821091e-05,
      "loss": 0.8342,
      "step": 3442
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.17701035737991333,
      "learning_rate": 8.916683567229432e-05,
      "loss": 0.8263,
      "step": 3443
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.250009685754776,
      "learning_rate": 8.9115453380612e-05,
      "loss": 0.5839,
      "step": 3444
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.20312044024467468,
      "learning_rate": 8.90640739968914e-05,
      "loss": 0.6772,
      "step": 3445
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.18705005943775177,
      "learning_rate": 8.901269753485927e-05,
      "loss": 0.8894,
      "step": 3446
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.1978457272052765,
      "learning_rate": 8.896132400824157e-05,
      "loss": 0.7763,
      "step": 3447
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.34571903944015503,
      "learning_rate": 8.890995343076348e-05,
      "loss": 0.6664,
      "step": 3448
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.34850504994392395,
      "learning_rate": 8.885858581614937e-05,
      "loss": 0.9487,
      "step": 3449
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.31478437781333923,
      "learning_rate": 8.880722117812278e-05,
      "loss": 0.8372,
      "step": 3450
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.3036271631717682,
      "learning_rate": 8.875585953040662e-05,
      "loss": 0.8998,
      "step": 3451
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.3454534709453583,
      "learning_rate": 8.87045008867228e-05,
      "loss": 0.7696,
      "step": 3452
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.25336772203445435,
      "learning_rate": 8.865314526079248e-05,
      "loss": 0.7844,
      "step": 3453
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.23396167159080505,
      "learning_rate": 8.860179266633616e-05,
      "loss": 0.7477,
      "step": 3454
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.16184920072555542,
      "learning_rate": 8.855044311707335e-05,
      "loss": 0.772,
      "step": 3455
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.4001740515232086,
      "learning_rate": 8.84990966267228e-05,
      "loss": 0.7582,
      "step": 3456
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.4018576145172119,
      "learning_rate": 8.844775320900251e-05,
      "loss": 0.6779,
      "step": 3457
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.5941338539123535,
      "learning_rate": 8.839641287762956e-05,
      "loss": 0.7812,
      "step": 3458
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.2977406680583954,
      "learning_rate": 8.834507564632024e-05,
      "loss": 0.7986,
      "step": 3459
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.3027874529361725,
      "learning_rate": 8.82937415287901e-05,
      "loss": 0.9331,
      "step": 3460
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.31721165776252747,
      "learning_rate": 8.824241053875372e-05,
      "loss": 0.9489,
      "step": 3461
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.37721696496009827,
      "learning_rate": 8.819108268992486e-05,
      "loss": 0.9597,
      "step": 3462
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.20293939113616943,
      "learning_rate": 8.81397579960166e-05,
      "loss": 0.8098,
      "step": 3463
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.4017479419708252,
      "learning_rate": 8.8088436470741e-05,
      "loss": 0.9518,
      "step": 3464
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.3023255467414856,
      "learning_rate": 8.803711812780933e-05,
      "loss": 0.7498,
      "step": 3465
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.34705787897109985,
      "learning_rate": 8.798580298093201e-05,
      "loss": 0.9556,
      "step": 3466
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.1859462410211563,
      "learning_rate": 8.793449104381866e-05,
      "loss": 0.8839,
      "step": 3467
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.3435666859149933,
      "learning_rate": 8.788318233017796e-05,
      "loss": 0.9186,
      "step": 3468
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.21478068828582764,
      "learning_rate": 8.783187685371775e-05,
      "loss": 0.9003,
      "step": 3469
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.24412669241428375,
      "learning_rate": 8.778057462814506e-05,
      "loss": 0.6248,
      "step": 3470
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.3917315900325775,
      "learning_rate": 8.772927566716599e-05,
      "loss": 0.6264,
      "step": 3471
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.3993481993675232,
      "learning_rate": 8.767797998448575e-05,
      "loss": 1.0889,
      "step": 3472
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.3212924003601074,
      "learning_rate": 8.76266875938088e-05,
      "loss": 0.7456,
      "step": 3473
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.2906345725059509,
      "learning_rate": 8.757539850883857e-05,
      "loss": 0.6469,
      "step": 3474
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.39229050278663635,
      "learning_rate": 8.752411274327767e-05,
      "loss": 0.9732,
      "step": 3475
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.26272574067115784,
      "learning_rate": 8.747283031082786e-05,
      "loss": 0.7701,
      "step": 3476
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.4748488962650299,
      "learning_rate": 8.742155122518995e-05,
      "loss": 0.646,
      "step": 3477
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.20268911123275757,
      "learning_rate": 8.737027550006386e-05,
      "loss": 0.7869,
      "step": 3478
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.2029736340045929,
      "learning_rate": 8.731900314914866e-05,
      "loss": 1.0214,
      "step": 3479
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.12157048285007477,
      "learning_rate": 8.72677341861425e-05,
      "loss": 1.1894,
      "step": 3480
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.2607593238353729,
      "learning_rate": 8.721646862474257e-05,
      "loss": 0.8267,
      "step": 3481
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.2330280840396881,
      "learning_rate": 8.716520647864517e-05,
      "loss": 0.641,
      "step": 3482
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.36220982670783997,
      "learning_rate": 8.711394776154584e-05,
      "loss": 0.9645,
      "step": 3483
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.21472930908203125,
      "learning_rate": 8.706269248713899e-05,
      "loss": 0.8379,
      "step": 3484
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.24238654971122742,
      "learning_rate": 8.701144066911814e-05,
      "loss": 0.8658,
      "step": 3485
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.5446374416351318,
      "learning_rate": 8.696019232117606e-05,
      "loss": 0.5972,
      "step": 3486
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.4641260504722595,
      "learning_rate": 8.69089474570044e-05,
      "loss": 0.7455,
      "step": 3487
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.5437308549880981,
      "learning_rate": 8.6857706090294e-05,
      "loss": 1.0138,
      "step": 3488
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.13316710293293,
      "learning_rate": 8.68064682347347e-05,
      "loss": 0.8092,
      "step": 3489
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6203511953353882,
      "learning_rate": 8.675523390401542e-05,
      "loss": 0.6591,
      "step": 3490
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.5807340145111084,
      "learning_rate": 8.67040031118241e-05,
      "loss": 1.018,
      "step": 3491
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.27749523520469666,
      "learning_rate": 8.66527758718479e-05,
      "loss": 1.0551,
      "step": 3492
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.42085978388786316,
      "learning_rate": 8.660155219777281e-05,
      "loss": 0.8999,
      "step": 3493
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.2669888436794281,
      "learning_rate": 8.655033210328394e-05,
      "loss": 0.968,
      "step": 3494
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.2669888436794281,
      "learning_rate": 8.655033210328394e-05,
      "loss": 1.1542,
      "step": 3495
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.30691951513290405,
      "learning_rate": 8.649911560206554e-05,
      "loss": 0.8657,
      "step": 3496
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.23126909136772156,
      "learning_rate": 8.644790270780081e-05,
      "loss": 0.7987,
      "step": 3497
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.33935704827308655,
      "learning_rate": 8.6396693434172e-05,
      "loss": 0.7142,
      "step": 3498
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.29585349559783936,
      "learning_rate": 8.634548779486037e-05,
      "loss": 0.8443,
      "step": 3499
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.5097432136535645,
      "learning_rate": 8.629428580354627e-05,
      "loss": 0.6715,
      "step": 3500
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.41990330815315247,
      "learning_rate": 8.624308747390904e-05,
      "loss": 0.6632,
      "step": 3501
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.5460381507873535,
      "learning_rate": 8.6191892819627e-05,
      "loss": 0.9376,
      "step": 3502
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.18902303278446198,
      "learning_rate": 8.61407018543776e-05,
      "loss": 0.6965,
      "step": 3503
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.1841539442539215,
      "learning_rate": 8.608951459183721e-05,
      "loss": 0.8934,
      "step": 3504
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6098721027374268,
      "learning_rate": 8.603833104568115e-05,
      "loss": 0.8603,
      "step": 3505
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.4759826064109802,
      "learning_rate": 8.598715122958398e-05,
      "loss": 1.0755,
      "step": 3506
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.3535871207714081,
      "learning_rate": 8.593597515721904e-05,
      "loss": 0.7065,
      "step": 3507
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.48229438066482544,
      "learning_rate": 8.588480284225874e-05,
      "loss": 0.7378,
      "step": 3508
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.1744450479745865,
      "learning_rate": 8.583363429837452e-05,
      "loss": 0.8502,
      "step": 3509
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.2804935872554779,
      "learning_rate": 8.578246953923678e-05,
      "loss": 0.946,
      "step": 3510
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.3067629635334015,
      "learning_rate": 8.573130857851491e-05,
      "loss": 0.85,
      "step": 3511
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6566327214241028,
      "learning_rate": 8.568015142987727e-05,
      "loss": 0.8315,
      "step": 3512
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.4008902311325073,
      "learning_rate": 8.562899810699125e-05,
      "loss": 0.6985,
      "step": 3513
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.25233715772628784,
      "learning_rate": 8.557784862352318e-05,
      "loss": 0.8878,
      "step": 3514
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.3577536940574646,
      "learning_rate": 8.552670299313835e-05,
      "loss": 0.8486,
      "step": 3515
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.26415157318115234,
      "learning_rate": 8.547556122950108e-05,
      "loss": 0.7633,
      "step": 3516
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.27621328830718994,
      "learning_rate": 8.542442334627464e-05,
      "loss": 0.8404,
      "step": 3517
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.2868078649044037,
      "learning_rate": 8.537328935712115e-05,
      "loss": 0.9263,
      "step": 3518
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.23169109225273132,
      "learning_rate": 8.532215927570189e-05,
      "loss": 0.7439,
      "step": 3519
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.3079158365726471,
      "learning_rate": 8.527103311567695e-05,
      "loss": 0.9068,
      "step": 3520
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.33855438232421875,
      "learning_rate": 8.521991089070536e-05,
      "loss": 0.7426,
      "step": 3521
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.21075603365898132,
      "learning_rate": 8.516879261444525e-05,
      "loss": 0.7627,
      "step": 3522
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.35234466195106506,
      "learning_rate": 8.511767830055353e-05,
      "loss": 1.0305,
      "step": 3523
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.32044607400894165,
      "learning_rate": 8.506656796268608e-05,
      "loss": 0.7495,
      "step": 3524
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.3379153311252594,
      "learning_rate": 8.501546161449789e-05,
      "loss": 0.8584,
      "step": 3525
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.22242042422294617,
      "learning_rate": 8.496435926964264e-05,
      "loss": 0.8267,
      "step": 3526
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.24070413410663605,
      "learning_rate": 8.491326094177306e-05,
      "loss": 0.8094,
      "step": 3527
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.7633682489395142,
      "learning_rate": 8.486216664454079e-05,
      "loss": 0.8189,
      "step": 3528
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.3195512294769287,
      "learning_rate": 8.481107639159644e-05,
      "loss": 1.0251,
      "step": 3529
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.2019522339105606,
      "learning_rate": 8.47599901965895e-05,
      "loss": 0.5938,
      "step": 3530
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.20741626620292664,
      "learning_rate": 8.470890807316834e-05,
      "loss": 0.869,
      "step": 3531
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.17086432874202728,
      "learning_rate": 8.46578300349803e-05,
      "loss": 0.77,
      "step": 3532
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.2641015648841858,
      "learning_rate": 8.46067560956716e-05,
      "loss": 0.8644,
      "step": 3533
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.2118988186120987,
      "learning_rate": 8.455568626888735e-05,
      "loss": 0.7517,
      "step": 3534
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.24817737936973572,
      "learning_rate": 8.450462056827162e-05,
      "loss": 0.926,
      "step": 3535
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.16256362199783325,
      "learning_rate": 8.445355900746734e-05,
      "loss": 0.7567,
      "step": 3536
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.31760144233703613,
      "learning_rate": 8.440250160011624e-05,
      "loss": 0.8034,
      "step": 3537
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.3039175271987915,
      "learning_rate": 8.435144835985917e-05,
      "loss": 0.6456,
      "step": 3538
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.260654479265213,
      "learning_rate": 8.430039930033566e-05,
      "loss": 0.8312,
      "step": 3539
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.4028107523918152,
      "learning_rate": 8.42493544351842e-05,
      "loss": 0.7503,
      "step": 3540
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.25897669792175293,
      "learning_rate": 8.419831377804217e-05,
      "loss": 1.095,
      "step": 3541
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.22579391300678253,
      "learning_rate": 8.414727734254579e-05,
      "loss": 0.9464,
      "step": 3542
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.24206534028053284,
      "learning_rate": 8.409624514233017e-05,
      "loss": 0.7135,
      "step": 3543
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.26322901248931885,
      "learning_rate": 8.40452171910293e-05,
      "loss": 0.72,
      "step": 3544
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.2864113450050354,
      "learning_rate": 8.3994193502276e-05,
      "loss": 0.6434,
      "step": 3545
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.2614036500453949,
      "learning_rate": 8.394317408970202e-05,
      "loss": 0.8305,
      "step": 3546
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.48615798354148865,
      "learning_rate": 8.389215896693786e-05,
      "loss": 1.0492,
      "step": 3547
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.23334015905857086,
      "learning_rate": 8.384114814761302e-05,
      "loss": 0.7994,
      "step": 3548
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.23241518437862396,
      "learning_rate": 8.379014164535573e-05,
      "loss": 0.6128,
      "step": 3549
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.20276771485805511,
      "learning_rate": 8.373913947379305e-05,
      "loss": 0.9258,
      "step": 3550
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.24536068737506866,
      "learning_rate": 8.3688141646551e-05,
      "loss": 0.8764,
      "step": 3551
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.59693843126297,
      "learning_rate": 8.363714817725439e-05,
      "loss": 0.9167,
      "step": 3552
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.24988503754138947,
      "learning_rate": 8.358615907952678e-05,
      "loss": 0.8804,
      "step": 3553
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6244630217552185,
      "learning_rate": 8.353517436699071e-05,
      "loss": 0.9207,
      "step": 3554
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.40762951970100403,
      "learning_rate": 8.348419405326744e-05,
      "loss": 0.6715,
      "step": 3555
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.36586785316467285,
      "learning_rate": 8.343321815197705e-05,
      "loss": 0.8791,
      "step": 3556
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.30793434381484985,
      "learning_rate": 8.338224667673855e-05,
      "loss": 0.74,
      "step": 3557
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.3355123698711395,
      "learning_rate": 8.333127964116966e-05,
      "loss": 0.8592,
      "step": 3558
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.16082628071308136,
      "learning_rate": 8.328031705888695e-05,
      "loss": 0.7213,
      "step": 3559
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.14365428686141968,
      "learning_rate": 8.322935894350575e-05,
      "loss": 0.6804,
      "step": 3560
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.2697550356388092,
      "learning_rate": 8.317840530864034e-05,
      "loss": 0.6205,
      "step": 3561
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.17566998302936554,
      "learning_rate": 8.312745616790367e-05,
      "loss": 0.7392,
      "step": 3562
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.16218890249729156,
      "learning_rate": 8.30765115349075e-05,
      "loss": 0.81,
      "step": 3563
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.20406030118465424,
      "learning_rate": 8.302557142326248e-05,
      "loss": 0.9187,
      "step": 3564
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6163858771324158,
      "learning_rate": 8.297463584657793e-05,
      "loss": 0.8238,
      "step": 3565
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.37404578924179077,
      "learning_rate": 8.292370481846201e-05,
      "loss": 0.6664,
      "step": 3566
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.629665195941925,
      "learning_rate": 8.287277835252173e-05,
      "loss": 0.6846,
      "step": 3567
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.32722219824790955,
      "learning_rate": 8.282185646236277e-05,
      "loss": 0.689,
      "step": 3568
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.28282421827316284,
      "learning_rate": 8.277093916158961e-05,
      "loss": 1.0998,
      "step": 3569
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.22258232533931732,
      "learning_rate": 8.272002646380563e-05,
      "loss": 0.8062,
      "step": 3570
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.24694077670574188,
      "learning_rate": 8.266911838261281e-05,
      "loss": 0.9042,
      "step": 3571
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.2435636818408966,
      "learning_rate": 8.261821493161196e-05,
      "loss": 0.7796,
      "step": 3572
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.2762112319469452,
      "learning_rate": 8.256731612440273e-05,
      "loss": 0.6191,
      "step": 3573
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.3745580017566681,
      "learning_rate": 8.251642197458342e-05,
      "loss": 0.952,
      "step": 3574
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.5830767154693604,
      "learning_rate": 8.246553249575112e-05,
      "loss": 0.8935,
      "step": 3575
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.2860445976257324,
      "learning_rate": 8.241464770150167e-05,
      "loss": 1.2358,
      "step": 3576
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.35226061940193176,
      "learning_rate": 8.23637676054297e-05,
      "loss": 0.9389,
      "step": 3577
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.22287555038928986,
      "learning_rate": 8.231289222112854e-05,
      "loss": 0.6381,
      "step": 3578
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.23113809525966644,
      "learning_rate": 8.226202156219023e-05,
      "loss": 0.5194,
      "step": 3579
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.2996826469898224,
      "learning_rate": 8.221115564220568e-05,
      "loss": 0.8931,
      "step": 3580
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.2962035834789276,
      "learning_rate": 8.216029447476442e-05,
      "loss": 0.991,
      "step": 3581
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.3329136371612549,
      "learning_rate": 8.210943807345465e-05,
      "loss": 0.7447,
      "step": 3582
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.3098863959312439,
      "learning_rate": 8.205858645186348e-05,
      "loss": 0.6186,
      "step": 3583
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.2665243446826935,
      "learning_rate": 8.200773962357663e-05,
      "loss": 0.7596,
      "step": 3584
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.2528529465198517,
      "learning_rate": 8.195689760217851e-05,
      "loss": 0.7729,
      "step": 3585
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.2665480673313141,
      "learning_rate": 8.190606040125233e-05,
      "loss": 0.7818,
      "step": 3586
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.5527331233024597,
      "learning_rate": 8.185522803437997e-05,
      "loss": 0.8886,
      "step": 3587
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.26597529649734497,
      "learning_rate": 8.180440051514196e-05,
      "loss": 0.8471,
      "step": 3588
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.5609765648841858,
      "learning_rate": 8.175357785711771e-05,
      "loss": 0.4937,
      "step": 3589
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.23363211750984192,
      "learning_rate": 8.170276007388515e-05,
      "loss": 0.8199,
      "step": 3590
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.26402410864830017,
      "learning_rate": 8.165194717902096e-05,
      "loss": 0.8743,
      "step": 3591
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.3533754050731659,
      "learning_rate": 8.160113918610053e-05,
      "loss": 0.9263,
      "step": 3592
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.29713428020477295,
      "learning_rate": 8.155033610869798e-05,
      "loss": 0.7628,
      "step": 3593
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.19664283096790314,
      "learning_rate": 8.149953796038606e-05,
      "loss": 0.5968,
      "step": 3594
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.274223268032074,
      "learning_rate": 8.144874475473619e-05,
      "loss": 0.696,
      "step": 3595
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.23804841935634613,
      "learning_rate": 8.139795650531855e-05,
      "loss": 0.8024,
      "step": 3596
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.2671147286891937,
      "learning_rate": 8.134717322570191e-05,
      "loss": 0.8693,
      "step": 3597
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.27503013610839844,
      "learning_rate": 8.129639492945374e-05,
      "loss": 0.8382,
      "step": 3598
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.14594988524913788,
      "learning_rate": 8.124562163014021e-05,
      "loss": 0.8127,
      "step": 3599
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.23974335193634033,
      "learning_rate": 8.119485334132613e-05,
      "loss": 0.7107,
      "step": 3600
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.24285054206848145,
      "learning_rate": 8.114409007657492e-05,
      "loss": 0.7002,
      "step": 3601
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.33097460865974426,
      "learning_rate": 8.109333184944879e-05,
      "loss": 0.8438,
      "step": 3602
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.15078885853290558,
      "learning_rate": 8.104257867350847e-05,
      "loss": 0.8093,
      "step": 3603
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.19186648726463318,
      "learning_rate": 8.099183056231341e-05,
      "loss": 1.0097,
      "step": 3604
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.31917253136634827,
      "learning_rate": 8.09410875294217e-05,
      "loss": 0.8891,
      "step": 3605
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.686836838722229,
      "learning_rate": 8.089034958839006e-05,
      "loss": 0.9584,
      "step": 3606
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.2558533549308777,
      "learning_rate": 8.083961675277384e-05,
      "loss": 0.8486,
      "step": 3607
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.22637665271759033,
      "learning_rate": 8.078888903612702e-05,
      "loss": 0.8619,
      "step": 3608
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.2155427783727646,
      "learning_rate": 8.073816645200231e-05,
      "loss": 0.8442,
      "step": 3609
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.16365505754947662,
      "learning_rate": 8.06874490139509e-05,
      "loss": 0.7518,
      "step": 3610
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.42298975586891174,
      "learning_rate": 8.063673673552265e-05,
      "loss": 0.8319,
      "step": 3611
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.31491410732269287,
      "learning_rate": 8.058602963026618e-05,
      "loss": 0.843,
      "step": 3612
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.33955639600753784,
      "learning_rate": 8.053532771172856e-05,
      "loss": 0.9358,
      "step": 3613
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.20290808379650116,
      "learning_rate": 8.048463099345547e-05,
      "loss": 0.8052,
      "step": 3614
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.2707533538341522,
      "learning_rate": 8.043393948899134e-05,
      "loss": 0.5893,
      "step": 3615
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.27678757905960083,
      "learning_rate": 8.038325321187911e-05,
      "loss": 0.8582,
      "step": 3616
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.2689156234264374,
      "learning_rate": 8.03325721756603e-05,
      "loss": 0.9426,
      "step": 3617
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.17710788547992706,
      "learning_rate": 8.028189639387513e-05,
      "loss": 0.9121,
      "step": 3618
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9828022122383118,
      "learning_rate": 8.023122588006233e-05,
      "loss": 0.8181,
      "step": 3619
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.1715444177389145,
      "learning_rate": 8.018056064775921e-05,
      "loss": 0.7208,
      "step": 3620
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.250889390707016,
      "learning_rate": 8.01299007105018e-05,
      "loss": 0.7599,
      "step": 3621
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.2750486433506012,
      "learning_rate": 8.007924608182457e-05,
      "loss": 0.7924,
      "step": 3622
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.362855464220047,
      "learning_rate": 8.002859677526062e-05,
      "loss": 1.0417,
      "step": 3623
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.38414567708969116,
      "learning_rate": 7.99779528043416e-05,
      "loss": 1.0812,
      "step": 3624
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.4035875201225281,
      "learning_rate": 7.992731418259786e-05,
      "loss": 0.9645,
      "step": 3625
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.3247416913509369,
      "learning_rate": 7.987668092355817e-05,
      "loss": 0.747,
      "step": 3626
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.3393847644329071,
      "learning_rate": 7.982605304074991e-05,
      "loss": 0.8111,
      "step": 3627
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.28265994787216187,
      "learning_rate": 7.97754305476991e-05,
      "loss": 0.792,
      "step": 3628
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.31937089562416077,
      "learning_rate": 7.972481345793023e-05,
      "loss": 0.9113,
      "step": 3629
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.26805582642555237,
      "learning_rate": 7.967420178496636e-05,
      "loss": 0.7734,
      "step": 3630
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6287921667098999,
      "learning_rate": 7.962359554232915e-05,
      "loss": 0.7149,
      "step": 3631
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.2465207576751709,
      "learning_rate": 7.957299474353875e-05,
      "loss": 0.9209,
      "step": 3632
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.4023100733757019,
      "learning_rate": 7.952239940211387e-05,
      "loss": 0.9486,
      "step": 3633
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.2678241729736328,
      "learning_rate": 7.947180953157183e-05,
      "loss": 0.7418,
      "step": 3634
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.35018664598464966,
      "learning_rate": 7.942122514542843e-05,
      "loss": 1.0213,
      "step": 3635
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.28843042254447937,
      "learning_rate": 7.937064625719796e-05,
      "loss": 0.9356,
      "step": 3636
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.26309099793434143,
      "learning_rate": 7.932007288039335e-05,
      "loss": 0.8347,
      "step": 3637
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7326022386550903,
      "learning_rate": 7.926950502852595e-05,
      "loss": 0.7466,
      "step": 3638
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.32131484150886536,
      "learning_rate": 7.921894271510571e-05,
      "loss": 0.9329,
      "step": 3639
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.2932816445827484,
      "learning_rate": 7.916838595364105e-05,
      "loss": 0.9242,
      "step": 3640
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.20790307223796844,
      "learning_rate": 7.911783475763894e-05,
      "loss": 0.9981,
      "step": 3641
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.3375793993473053,
      "learning_rate": 7.906728914060487e-05,
      "loss": 0.7295,
      "step": 3642
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.30968397855758667,
      "learning_rate": 7.901674911604276e-05,
      "loss": 0.4522,
      "step": 3643
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.24491457641124725,
      "learning_rate": 7.89662146974552e-05,
      "loss": 0.8632,
      "step": 3644
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.20792970061302185,
      "learning_rate": 7.891568589834313e-05,
      "loss": 0.7842,
      "step": 3645
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.3204215466976166,
      "learning_rate": 7.886516273220596e-05,
      "loss": 0.7762,
      "step": 3646
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.3298042416572571,
      "learning_rate": 7.881464521254181e-05,
      "loss": 0.7983,
      "step": 3647
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.18950971961021423,
      "learning_rate": 7.87641333528471e-05,
      "loss": 0.7659,
      "step": 3648
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.2929334342479706,
      "learning_rate": 7.871362716661673e-05,
      "loss": 0.9543,
      "step": 3649
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.1941991150379181,
      "learning_rate": 7.866312666734425e-05,
      "loss": 0.9194,
      "step": 3650
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.24367371201515198,
      "learning_rate": 7.861263186852152e-05,
      "loss": 0.79,
      "step": 3651
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.2230703979730606,
      "learning_rate": 7.856214278363895e-05,
      "loss": 0.6788,
      "step": 3652
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.23171640932559967,
      "learning_rate": 7.851165942618546e-05,
      "loss": 0.7292,
      "step": 3653
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6131773591041565,
      "learning_rate": 7.846118180964838e-05,
      "loss": 0.8903,
      "step": 3654
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.284241646528244,
      "learning_rate": 7.841070994751353e-05,
      "loss": 0.9834,
      "step": 3655
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.37130478024482727,
      "learning_rate": 7.83602438532651e-05,
      "loss": 0.8493,
      "step": 3656
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.21721598505973816,
      "learning_rate": 7.830978354038596e-05,
      "loss": 0.7203,
      "step": 3657
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.2199954390525818,
      "learning_rate": 7.825932902235724e-05,
      "loss": 0.782,
      "step": 3658
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.30023813247680664,
      "learning_rate": 7.820888031265856e-05,
      "loss": 0.7322,
      "step": 3659
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.3048880994319916,
      "learning_rate": 7.815843742476807e-05,
      "loss": 0.8428,
      "step": 3660
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6393557786941528,
      "learning_rate": 7.810800037216225e-05,
      "loss": 0.8576,
      "step": 3661
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.19184164702892303,
      "learning_rate": 7.805756916831612e-05,
      "loss": 0.6652,
      "step": 3662
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.22698064148426056,
      "learning_rate": 7.800714382670307e-05,
      "loss": 0.6889,
      "step": 3663
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.34284526109695435,
      "learning_rate": 7.795672436079495e-05,
      "loss": 0.8974,
      "step": 3664
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.21293388307094574,
      "learning_rate": 7.790631078406201e-05,
      "loss": 1.0245,
      "step": 3665
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.22449050843715668,
      "learning_rate": 7.785590310997303e-05,
      "loss": 0.9136,
      "step": 3666
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.15255959331989288,
      "learning_rate": 7.78055013519951e-05,
      "loss": 0.6946,
      "step": 3667
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.23793147504329681,
      "learning_rate": 7.775510552359373e-05,
      "loss": 0.812,
      "step": 3668
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.5826781988143921,
      "learning_rate": 7.770471563823295e-05,
      "loss": 0.9001,
      "step": 3669
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.27923306822776794,
      "learning_rate": 7.765433170937511e-05,
      "loss": 0.6743,
      "step": 3670
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.2950364649295807,
      "learning_rate": 7.760395375048099e-05,
      "loss": 0.9316,
      "step": 3671
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.3044540286064148,
      "learning_rate": 7.755358177500974e-05,
      "loss": 0.8699,
      "step": 3672
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.7982708811759949,
      "learning_rate": 7.750321579641903e-05,
      "loss": 0.7339,
      "step": 3673
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.2882750332355499,
      "learning_rate": 7.74528558281648e-05,
      "loss": 0.7455,
      "step": 3674
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.29735371470451355,
      "learning_rate": 7.740250188370139e-05,
      "loss": 0.8591,
      "step": 3675
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.20147384703159332,
      "learning_rate": 7.735215397648168e-05,
      "loss": 0.8982,
      "step": 3676
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.3180200457572937,
      "learning_rate": 7.730181211995681e-05,
      "loss": 0.7673,
      "step": 3677
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.23797696828842163,
      "learning_rate": 7.725147632757621e-05,
      "loss": 0.8407,
      "step": 3678
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.347979873418808,
      "learning_rate": 7.720114661278793e-05,
      "loss": 0.8297,
      "step": 3679
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.20475134253501892,
      "learning_rate": 7.715082298903824e-05,
      "loss": 0.8184,
      "step": 3680
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.23639480769634247,
      "learning_rate": 7.710050546977177e-05,
      "loss": 0.7023,
      "step": 3681
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.25020480155944824,
      "learning_rate": 7.70501940684316e-05,
      "loss": 0.7543,
      "step": 3682
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.3676780164241791,
      "learning_rate": 7.699988879845915e-05,
      "loss": 1.0781,
      "step": 3683
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.41441571712493896,
      "learning_rate": 7.69495896732941e-05,
      "loss": 0.9264,
      "step": 3684
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.2700017988681793,
      "learning_rate": 7.689929670637472e-05,
      "loss": 0.6866,
      "step": 3685
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.22158853709697723,
      "learning_rate": 7.684900991113742e-05,
      "loss": 0.826,
      "step": 3686
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.2700606882572174,
      "learning_rate": 7.679872930101703e-05,
      "loss": 1.0761,
      "step": 3687
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.29719963669776917,
      "learning_rate": 7.674845488944667e-05,
      "loss": 0.8786,
      "step": 3688
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.30270716547966003,
      "learning_rate": 7.6698186689858e-05,
      "loss": 0.852,
      "step": 3689
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.26039204001426697,
      "learning_rate": 7.664792471568079e-05,
      "loss": 0.6833,
      "step": 3690
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.37027671933174133,
      "learning_rate": 7.659766898034326e-05,
      "loss": 1.1543,
      "step": 3691
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.28825563192367554,
      "learning_rate": 7.654741949727197e-05,
      "loss": 0.7228,
      "step": 3692
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.18098191916942596,
      "learning_rate": 7.649717627989179e-05,
      "loss": 0.9005,
      "step": 3693
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.2769218385219574,
      "learning_rate": 7.644693934162585e-05,
      "loss": 0.85,
      "step": 3694
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.22790785133838654,
      "learning_rate": 7.639670869589574e-05,
      "loss": 0.861,
      "step": 3695
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.27237024903297424,
      "learning_rate": 7.634648435612127e-05,
      "loss": 0.8319,
      "step": 3696
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.2686496376991272,
      "learning_rate": 7.629626633572052e-05,
      "loss": 0.8972,
      "step": 3697
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.1731705516576767,
      "learning_rate": 7.624605464811004e-05,
      "loss": 0.6825,
      "step": 3698
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.19021005928516388,
      "learning_rate": 7.619584930670458e-05,
      "loss": 0.9925,
      "step": 3699
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.4810575246810913,
      "learning_rate": 7.614565032491716e-05,
      "loss": 0.9754,
      "step": 3700
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.3342438042163849,
      "learning_rate": 7.609545771615923e-05,
      "loss": 0.7637,
      "step": 3701
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.2360439896583557,
      "learning_rate": 7.604527149384041e-05,
      "loss": 0.7893,
      "step": 3702
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6034780144691467,
      "learning_rate": 7.599509167136868e-05,
      "loss": 0.8405,
      "step": 3703
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.3682830035686493,
      "learning_rate": 7.594491826215027e-05,
      "loss": 0.8884,
      "step": 3704
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.2506824731826782,
      "learning_rate": 7.589475127958976e-05,
      "loss": 0.7834,
      "step": 3705
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.2269362509250641,
      "learning_rate": 7.584459073708996e-05,
      "loss": 0.8508,
      "step": 3706
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.34417396783828735,
      "learning_rate": 7.579443664805193e-05,
      "loss": 0.7534,
      "step": 3707
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6508209705352783,
      "learning_rate": 7.574428902587512e-05,
      "loss": 0.8274,
      "step": 3708
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.2135089635848999,
      "learning_rate": 7.569414788395715e-05,
      "loss": 0.6598,
      "step": 3709
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.23617863655090332,
      "learning_rate": 7.564401323569394e-05,
      "loss": 0.7168,
      "step": 3710
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.14462809264659882,
      "learning_rate": 7.559388509447967e-05,
      "loss": 0.893,
      "step": 3711
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.381893128156662,
      "learning_rate": 7.554376347370681e-05,
      "loss": 0.9874,
      "step": 3712
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6441928744316101,
      "learning_rate": 7.549364838676605e-05,
      "loss": 0.8859,
      "step": 3713
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.2430260330438614,
      "learning_rate": 7.544353984704634e-05,
      "loss": 0.6084,
      "step": 3714
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.23202848434448242,
      "learning_rate": 7.539343786793492e-05,
      "loss": 0.8555,
      "step": 3715
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.18756122887134552,
      "learning_rate": 7.534334246281716e-05,
      "loss": 0.862,
      "step": 3716
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.25607573986053467,
      "learning_rate": 7.52932536450769e-05,
      "loss": 0.7923,
      "step": 3717
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.2067583203315735,
      "learning_rate": 7.524317142809601e-05,
      "loss": 0.7993,
      "step": 3718
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.23022283613681793,
      "learning_rate": 7.519309582525467e-05,
      "loss": 0.9326,
      "step": 3719
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.46201831102371216,
      "learning_rate": 7.514302684993124e-05,
      "loss": 0.6961,
      "step": 3720
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.26114845275878906,
      "learning_rate": 7.509296451550244e-05,
      "loss": 0.861,
      "step": 3721
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.5427724719047546,
      "learning_rate": 7.50429088353431e-05,
      "loss": 0.7255,
      "step": 3722
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.20400777459144592,
      "learning_rate": 7.499285982282632e-05,
      "loss": 0.8546,
      "step": 3723
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.5036421418190002,
      "learning_rate": 7.49428174913234e-05,
      "loss": 0.9781,
      "step": 3724
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.35625967383384705,
      "learning_rate": 7.489278185420387e-05,
      "loss": 0.8063,
      "step": 3725
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.2714517414569855,
      "learning_rate": 7.48427529248354e-05,
      "loss": 0.8569,
      "step": 3726
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.36167654395103455,
      "learning_rate": 7.479273071658408e-05,
      "loss": 0.8528,
      "step": 3727
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.16807863116264343,
      "learning_rate": 7.474271524281393e-05,
      "loss": 0.6572,
      "step": 3728
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.3160095512866974,
      "learning_rate": 7.46927065168873e-05,
      "loss": 0.8393,
      "step": 3729
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.25635039806365967,
      "learning_rate": 7.464270455216477e-05,
      "loss": 0.6885,
      "step": 3730
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.5290404558181763,
      "learning_rate": 7.45927093620051e-05,
      "loss": 1.036,
      "step": 3731
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.5656370520591736,
      "learning_rate": 7.454272095976516e-05,
      "loss": 1.0424,
      "step": 3732
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.19415780901908875,
      "learning_rate": 7.449273935880011e-05,
      "loss": 0.7438,
      "step": 3733
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.45716094970703125,
      "learning_rate": 7.444276457246325e-05,
      "loss": 0.9117,
      "step": 3734
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2684631645679474,
      "learning_rate": 7.439279661410601e-05,
      "loss": 0.6852,
      "step": 3735
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.28176355361938477,
      "learning_rate": 7.434283549707806e-05,
      "loss": 0.9716,
      "step": 3736
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.24824665486812592,
      "learning_rate": 7.429288123472725e-05,
      "loss": 1.0074,
      "step": 3737
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.30908632278442383,
      "learning_rate": 7.424293384039955e-05,
      "loss": 0.6808,
      "step": 3738
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.21367241442203522,
      "learning_rate": 7.41929933274391e-05,
      "loss": 0.8775,
      "step": 3739
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.3530590832233429,
      "learning_rate": 7.414305970918826e-05,
      "loss": 0.8142,
      "step": 3740
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.272773414850235,
      "learning_rate": 7.409313299898753e-05,
      "loss": 0.7801,
      "step": 3741
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.1910013109445572,
      "learning_rate": 7.404321321017545e-05,
      "loss": 0.9438,
      "step": 3742
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.21415868401527405,
      "learning_rate": 7.399330035608889e-05,
      "loss": 0.7444,
      "step": 3743
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.23306553065776825,
      "learning_rate": 7.394339445006275e-05,
      "loss": 0.8476,
      "step": 3744
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.4693785309791565,
      "learning_rate": 7.389349550543005e-05,
      "loss": 0.961,
      "step": 3745
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.24129042029380798,
      "learning_rate": 7.384360353552211e-05,
      "loss": 0.8186,
      "step": 3746
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.1773339956998825,
      "learning_rate": 7.379371855366822e-05,
      "loss": 0.8326,
      "step": 3747
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2912137508392334,
      "learning_rate": 7.374384057319581e-05,
      "loss": 0.8431,
      "step": 3748
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.3045090138912201,
      "learning_rate": 7.369396960743061e-05,
      "loss": 0.7181,
      "step": 3749
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2972191572189331,
      "learning_rate": 7.364410566969633e-05,
      "loss": 0.9193,
      "step": 3750
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.19280138611793518,
      "learning_rate": 7.359424877331476e-05,
      "loss": 0.9055,
      "step": 3751
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.13157141208648682,
      "learning_rate": 7.35443989316059e-05,
      "loss": 0.7321,
      "step": 3752
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.26884710788726807,
      "learning_rate": 7.349455615788789e-05,
      "loss": 0.8319,
      "step": 3753
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.32709574699401855,
      "learning_rate": 7.344472046547695e-05,
      "loss": 0.7421,
      "step": 3754
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.25467219948768616,
      "learning_rate": 7.339489186768731e-05,
      "loss": 1.0176,
      "step": 3755
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.3376185894012451,
      "learning_rate": 7.334507037783148e-05,
      "loss": 0.8411,
      "step": 3756
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2981029152870178,
      "learning_rate": 7.329525600921994e-05,
      "loss": 0.7995,
      "step": 3757
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2811482846736908,
      "learning_rate": 7.324544877516126e-05,
      "loss": 0.9122,
      "step": 3758
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2620615065097809,
      "learning_rate": 7.319564868896227e-05,
      "loss": 0.9733,
      "step": 3759
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2740532159805298,
      "learning_rate": 7.314585576392769e-05,
      "loss": 0.8535,
      "step": 3760
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2525557279586792,
      "learning_rate": 7.309607001336039e-05,
      "loss": 0.8454,
      "step": 3761
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.26148101687431335,
      "learning_rate": 7.304629145056142e-05,
      "loss": 0.8411,
      "step": 3762
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2513936758041382,
      "learning_rate": 7.299652008882977e-05,
      "loss": 0.7608,
      "step": 3763
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.19183214008808136,
      "learning_rate": 7.294675594146259e-05,
      "loss": 0.4808,
      "step": 3764
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.513472318649292,
      "learning_rate": 7.289699902175508e-05,
      "loss": 0.7904,
      "step": 3765
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.24641059339046478,
      "learning_rate": 7.284724934300054e-05,
      "loss": 0.9207,
      "step": 3766
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.23240695893764496,
      "learning_rate": 7.279750691849026e-05,
      "loss": 0.6968,
      "step": 3767
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2504221796989441,
      "learning_rate": 7.274777176151362e-05,
      "loss": 0.7123,
      "step": 3768
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.24177145957946777,
      "learning_rate": 7.269804388535816e-05,
      "loss": 0.737,
      "step": 3769
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6192745566368103,
      "learning_rate": 7.264832330330934e-05,
      "loss": 0.8577,
      "step": 3770
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.8224341869354248,
      "learning_rate": 7.259861002865065e-05,
      "loss": 1.0813,
      "step": 3771
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6520782113075256,
      "learning_rate": 7.254890407466384e-05,
      "loss": 0.6051,
      "step": 3772
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.21469005942344666,
      "learning_rate": 7.249920545462849e-05,
      "loss": 0.7427,
      "step": 3773
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2495570182800293,
      "learning_rate": 7.244951418182227e-05,
      "loss": 0.8522,
      "step": 3774
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.1647931933403015,
      "learning_rate": 7.239983026952098e-05,
      "loss": 0.8287,
      "step": 3775
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.18790623545646667,
      "learning_rate": 7.235015373099833e-05,
      "loss": 0.9051,
      "step": 3776
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.32002827525138855,
      "learning_rate": 7.230048457952612e-05,
      "loss": 0.8705,
      "step": 3777
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2851047217845917,
      "learning_rate": 7.225082282837421e-05,
      "loss": 0.7454,
      "step": 3778
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.3064095079898834,
      "learning_rate": 7.220116849081042e-05,
      "loss": 0.8488,
      "step": 3779
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.3701287508010864,
      "learning_rate": 7.215152158010057e-05,
      "loss": 0.979,
      "step": 3780
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.5519550442695618,
      "learning_rate": 7.210188210950863e-05,
      "loss": 1.0337,
      "step": 3781
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.19093874096870422,
      "learning_rate": 7.205225009229643e-05,
      "loss": 0.6567,
      "step": 3782
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.369732528924942,
      "learning_rate": 7.200262554172391e-05,
      "loss": 0.8996,
      "step": 3783
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.26537129282951355,
      "learning_rate": 7.195300847104889e-05,
      "loss": 0.832,
      "step": 3784
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.1551806777715683,
      "learning_rate": 7.19033988935274e-05,
      "loss": 0.7415,
      "step": 3785
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.3838285505771637,
      "learning_rate": 7.185379682241326e-05,
      "loss": 0.9884,
      "step": 3786
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2603355348110199,
      "learning_rate": 7.180420227095838e-05,
      "loss": 0.9476,
      "step": 3787
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.22275947034358978,
      "learning_rate": 7.175461525241269e-05,
      "loss": 0.6131,
      "step": 3788
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.35998770594596863,
      "learning_rate": 7.170503578002404e-05,
      "loss": 1.0663,
      "step": 3789
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.3041115403175354,
      "learning_rate": 7.165546386703825e-05,
      "loss": 0.659,
      "step": 3790
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.32579299807548523,
      "learning_rate": 7.160589952669929e-05,
      "loss": 0.4808,
      "step": 3791
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.36508041620254517,
      "learning_rate": 7.155634277224888e-05,
      "loss": 0.669,
      "step": 3792
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2603738605976105,
      "learning_rate": 7.150679361692676e-05,
      "loss": 0.6536,
      "step": 3793
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.16027578711509705,
      "learning_rate": 7.145725207397083e-05,
      "loss": 0.7767,
      "step": 3794
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.21198205649852753,
      "learning_rate": 7.140771815661675e-05,
      "loss": 0.882,
      "step": 3795
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.29186588525772095,
      "learning_rate": 7.135819187809823e-05,
      "loss": 0.9911,
      "step": 3796
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.20322749018669128,
      "learning_rate": 7.130867325164687e-05,
      "loss": 0.6254,
      "step": 3797
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2731444835662842,
      "learning_rate": 7.125916229049234e-05,
      "loss": 0.9305,
      "step": 3798
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6098154783248901,
      "learning_rate": 7.120965900786218e-05,
      "loss": 0.8302,
      "step": 3799
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2856026887893677,
      "learning_rate": 7.116016341698187e-05,
      "loss": 0.7672,
      "step": 3800
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.24528084695339203,
      "learning_rate": 7.111067553107489e-05,
      "loss": 0.9223,
      "step": 3801
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.3031716048717499,
      "learning_rate": 7.106119536336264e-05,
      "loss": 0.6581,
      "step": 3802
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.21375523507595062,
      "learning_rate": 7.101172292706439e-05,
      "loss": 0.9437,
      "step": 3803
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.18022772669792175,
      "learning_rate": 7.09622582353975e-05,
      "loss": 0.8407,
      "step": 3804
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.26830729842185974,
      "learning_rate": 7.091280130157714e-05,
      "loss": 0.8304,
      "step": 3805
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.22640228271484375,
      "learning_rate": 7.086335213881637e-05,
      "loss": 0.9247,
      "step": 3806
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.24893923103809357,
      "learning_rate": 7.081391076032633e-05,
      "loss": 0.9355,
      "step": 3807
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.23287874460220337,
      "learning_rate": 7.076447717931593e-05,
      "loss": 0.7512,
      "step": 3808
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.20863811671733856,
      "learning_rate": 7.071505140899206e-05,
      "loss": 0.7918,
      "step": 3809
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2676553726196289,
      "learning_rate": 7.066563346255954e-05,
      "loss": 0.9782,
      "step": 3810
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.4030774235725403,
      "learning_rate": 7.061622335322107e-05,
      "loss": 0.9062,
      "step": 3811
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.18341948091983795,
      "learning_rate": 7.056682109417726e-05,
      "loss": 0.7364,
      "step": 3812
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.3616332411766052,
      "learning_rate": 7.05174266986266e-05,
      "loss": 0.7802,
      "step": 3813
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.14271366596221924,
      "learning_rate": 7.046804017976556e-05,
      "loss": 0.8323,
      "step": 3814
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2345820516347885,
      "learning_rate": 7.041866155078846e-05,
      "loss": 1.0357,
      "step": 3815
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.8417219519615173,
      "learning_rate": 7.036929082488741e-05,
      "loss": 1.1649,
      "step": 3816
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.3233186602592468,
      "learning_rate": 7.031992801525262e-05,
      "loss": 0.9767,
      "step": 3817
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.3528916835784912,
      "learning_rate": 7.0270573135072e-05,
      "loss": 0.7631,
      "step": 3818
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2694389522075653,
      "learning_rate": 7.022122619753141e-05,
      "loss": 0.8577,
      "step": 3819
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7067842483520508,
      "learning_rate": 7.017188721581463e-05,
      "loss": 0.8583,
      "step": 3820
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2683485448360443,
      "learning_rate": 7.012255620310323e-05,
      "loss": 0.7776,
      "step": 3821
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.26298925280570984,
      "learning_rate": 7.007323317257667e-05,
      "loss": 0.7124,
      "step": 3822
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.23579417169094086,
      "learning_rate": 7.00239181374124e-05,
      "loss": 0.8302,
      "step": 3823
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.3643285036087036,
      "learning_rate": 6.997461111078554e-05,
      "loss": 0.6449,
      "step": 3824
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.185590922832489,
      "learning_rate": 6.992531210586916e-05,
      "loss": 0.9533,
      "step": 3825
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.32594117522239685,
      "learning_rate": 6.987602113583429e-05,
      "loss": 0.5993,
      "step": 3826
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.25991091132164,
      "learning_rate": 6.982673821384965e-05,
      "loss": 0.8411,
      "step": 3827
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.24951641261577606,
      "learning_rate": 6.977746335308186e-05,
      "loss": 1.0384,
      "step": 3828
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.319113552570343,
      "learning_rate": 6.972819656669541e-05,
      "loss": 0.963,
      "step": 3829
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.18980862200260162,
      "learning_rate": 6.967893786785264e-05,
      "loss": 0.8348,
      "step": 3830
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.24187059700489044,
      "learning_rate": 6.962968726971372e-05,
      "loss": 0.6544,
      "step": 3831
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.3019846975803375,
      "learning_rate": 6.958044478543661e-05,
      "loss": 0.8871,
      "step": 3832
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2794528603553772,
      "learning_rate": 6.95312104281772e-05,
      "loss": 0.9036,
      "step": 3833
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.22706493735313416,
      "learning_rate": 6.948198421108911e-05,
      "loss": 0.6498,
      "step": 3834
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6879016757011414,
      "learning_rate": 6.943276614732379e-05,
      "loss": 0.8549,
      "step": 3835
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.268943727016449,
      "learning_rate": 6.938355625003062e-05,
      "loss": 1.001,
      "step": 3836
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2992998957633972,
      "learning_rate": 6.933435453235672e-05,
      "loss": 0.7857,
      "step": 3837
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.24061523377895355,
      "learning_rate": 6.928516100744697e-05,
      "loss": 0.7783,
      "step": 3838
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2669731676578522,
      "learning_rate": 6.923597568844418e-05,
      "loss": 0.8425,
      "step": 3839
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2750985324382782,
      "learning_rate": 6.918679858848889e-05,
      "loss": 0.7823,
      "step": 3840
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2966645658016205,
      "learning_rate": 6.913762972071942e-05,
      "loss": 0.8738,
      "step": 3841
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.25347593426704407,
      "learning_rate": 6.9088469098272e-05,
      "loss": 0.9387,
      "step": 3842
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.246694877743721,
      "learning_rate": 6.903931673428058e-05,
      "loss": 0.9344,
      "step": 3843
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.1883782297372818,
      "learning_rate": 6.899017264187688e-05,
      "loss": 0.5318,
      "step": 3844
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.262813001871109,
      "learning_rate": 6.894103683419043e-05,
      "loss": 0.638,
      "step": 3845
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6050893068313599,
      "learning_rate": 6.889190932434864e-05,
      "loss": 0.6757,
      "step": 3846
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.21670116484165192,
      "learning_rate": 6.884279012547659e-05,
      "loss": 0.8881,
      "step": 3847
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.1875879019498825,
      "learning_rate": 6.87936792506971e-05,
      "loss": 0.6826,
      "step": 3848
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.3562796711921692,
      "learning_rate": 6.874457671313094e-05,
      "loss": 0.586,
      "step": 3849
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.5544823408126831,
      "learning_rate": 6.869548252589651e-05,
      "loss": 0.7207,
      "step": 3850
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7641035914421082,
      "learning_rate": 6.864639670211001e-05,
      "loss": 0.9867,
      "step": 3851
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.316299170255661,
      "learning_rate": 6.859731925488546e-05,
      "loss": 0.8369,
      "step": 3852
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7670835852622986,
      "learning_rate": 6.854825019733458e-05,
      "loss": 0.889,
      "step": 3853
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.41483622789382935,
      "learning_rate": 6.84991895425668e-05,
      "loss": 0.8514,
      "step": 3854
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6553211808204651,
      "learning_rate": 6.845013730368952e-05,
      "loss": 0.6879,
      "step": 3855
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.8252565860748291,
      "learning_rate": 6.840109349380762e-05,
      "loss": 0.7342,
      "step": 3856
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.24866171181201935,
      "learning_rate": 6.835205812602385e-05,
      "loss": 0.5588,
      "step": 3857
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.4080507755279541,
      "learning_rate": 6.830303121343879e-05,
      "loss": 0.777,
      "step": 3858
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.3307512402534485,
      "learning_rate": 6.825401276915065e-05,
      "loss": 0.8043,
      "step": 3859
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.24729691445827484,
      "learning_rate": 6.820500280625539e-05,
      "loss": 0.6825,
      "step": 3860
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2080582082271576,
      "learning_rate": 6.815600133784669e-05,
      "loss": 0.8144,
      "step": 3861
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.22700761258602142,
      "learning_rate": 6.810700837701604e-05,
      "loss": 0.9607,
      "step": 3862
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.5322756171226501,
      "learning_rate": 6.805802393685261e-05,
      "loss": 0.4985,
      "step": 3863
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2310459464788437,
      "learning_rate": 6.800904803044325e-05,
      "loss": 0.6911,
      "step": 3864
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2270941138267517,
      "learning_rate": 6.79600806708726e-05,
      "loss": 0.756,
      "step": 3865
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.3009992241859436,
      "learning_rate": 6.791112187122299e-05,
      "loss": 0.7268,
      "step": 3866
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.18685293197631836,
      "learning_rate": 6.78621716445744e-05,
      "loss": 0.8946,
      "step": 3867
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.34943029284477234,
      "learning_rate": 6.781323000400469e-05,
      "loss": 0.7166,
      "step": 3868
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.38100889325141907,
      "learning_rate": 6.776429696258925e-05,
      "loss": 1.0664,
      "step": 3869
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.3178374171257019,
      "learning_rate": 6.771537253340123e-05,
      "loss": 0.852,
      "step": 3870
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2892356812953949,
      "learning_rate": 6.766645672951154e-05,
      "loss": 0.9539,
      "step": 3871
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.15866976976394653,
      "learning_rate": 6.761754956398869e-05,
      "loss": 0.8247,
      "step": 3872
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.3999134600162506,
      "learning_rate": 6.756865104989893e-05,
      "loss": 0.6884,
      "step": 3873
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.37811407446861267,
      "learning_rate": 6.751976120030621e-05,
      "loss": 0.8357,
      "step": 3874
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.23819167912006378,
      "learning_rate": 6.747088002827214e-05,
      "loss": 0.798,
      "step": 3875
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.28469663858413696,
      "learning_rate": 6.742200754685604e-05,
      "loss": 0.9015,
      "step": 3876
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.24030141532421112,
      "learning_rate": 6.737314376911482e-05,
      "loss": 0.6561,
      "step": 3877
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2485043853521347,
      "learning_rate": 6.732428870810324e-05,
      "loss": 0.8768,
      "step": 3878
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.37858328223228455,
      "learning_rate": 6.727544237687359e-05,
      "loss": 0.7675,
      "step": 3879
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2948249578475952,
      "learning_rate": 6.722660478847578e-05,
      "loss": 0.8949,
      "step": 3880
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2681567370891571,
      "learning_rate": 6.717777595595758e-05,
      "loss": 0.8023,
      "step": 3881
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.24003350734710693,
      "learning_rate": 6.712895589236427e-05,
      "loss": 1.0383,
      "step": 3882
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.306548535823822,
      "learning_rate": 6.708014461073879e-05,
      "loss": 0.839,
      "step": 3883
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2525723874568939,
      "learning_rate": 6.703134212412183e-05,
      "loss": 0.7746,
      "step": 3884
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.845737636089325,
      "learning_rate": 6.698254844555165e-05,
      "loss": 0.8913,
      "step": 3885
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2640727162361145,
      "learning_rate": 6.693376358806411e-05,
      "loss": 0.7622,
      "step": 3886
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7206928730010986,
      "learning_rate": 6.688498756469287e-05,
      "loss": 0.7239,
      "step": 3887
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.18988463282585144,
      "learning_rate": 6.683622038846913e-05,
      "loss": 0.9354,
      "step": 3888
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.5465232133865356,
      "learning_rate": 6.678746207242166e-05,
      "loss": 0.7566,
      "step": 3889
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2228272706270218,
      "learning_rate": 6.673871262957702e-05,
      "loss": 0.9622,
      "step": 3890
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2609547972679138,
      "learning_rate": 6.66899720729593e-05,
      "loss": 0.9458,
      "step": 3891
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.3472975790500641,
      "learning_rate": 6.664124041559021e-05,
      "loss": 0.9998,
      "step": 3892
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.20388509333133698,
      "learning_rate": 6.65925176704891e-05,
      "loss": 0.7272,
      "step": 3893
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.30286917090415955,
      "learning_rate": 6.6543803850673e-05,
      "loss": 0.7688,
      "step": 3894
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2978511452674866,
      "learning_rate": 6.649509896915647e-05,
      "loss": 0.7339,
      "step": 3895
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.25139838457107544,
      "learning_rate": 6.644640303895167e-05,
      "loss": 0.6716,
      "step": 3896
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2266472429037094,
      "learning_rate": 6.639771607306848e-05,
      "loss": 0.9166,
      "step": 3897
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2885359227657318,
      "learning_rate": 6.634903808451429e-05,
      "loss": 0.846,
      "step": 3898
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.25512605905532837,
      "learning_rate": 6.630036908629406e-05,
      "loss": 0.7766,
      "step": 3899
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.3385232090950012,
      "learning_rate": 6.625170909141053e-05,
      "loss": 0.7688,
      "step": 3900
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.8244383931159973,
      "learning_rate": 6.620305811286383e-05,
      "loss": 0.7576,
      "step": 3901
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.26123934984207153,
      "learning_rate": 6.615441616365176e-05,
      "loss": 0.7306,
      "step": 3902
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.14591921865940094,
      "learning_rate": 6.610578325676975e-05,
      "loss": 0.8209,
      "step": 3903
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.37763461470603943,
      "learning_rate": 6.605715940521073e-05,
      "loss": 1.1007,
      "step": 3904
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.30200648307800293,
      "learning_rate": 6.600854462196528e-05,
      "loss": 0.8487,
      "step": 3905
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.15400712192058563,
      "learning_rate": 6.595993892002153e-05,
      "loss": 0.8072,
      "step": 3906
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2742244005203247,
      "learning_rate": 6.59113423123652e-05,
      "loss": 0.8492,
      "step": 3907
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.8656650185585022,
      "learning_rate": 6.586275481197955e-05,
      "loss": 0.903,
      "step": 3908
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.3317274749279022,
      "learning_rate": 6.581417643184535e-05,
      "loss": 0.7447,
      "step": 3909
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.19193516671657562,
      "learning_rate": 6.576560718494115e-05,
      "loss": 0.834,
      "step": 3910
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.24692288041114807,
      "learning_rate": 6.571704708424287e-05,
      "loss": 0.9084,
      "step": 3911
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.4376998245716095,
      "learning_rate": 6.566849614272392e-05,
      "loss": 1.0156,
      "step": 3912
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.22029420733451843,
      "learning_rate": 6.561995437335553e-05,
      "loss": 0.7836,
      "step": 3913
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.23739366233348846,
      "learning_rate": 6.557142178910625e-05,
      "loss": 0.7549,
      "step": 3914
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2916111350059509,
      "learning_rate": 6.552289840294224e-05,
      "loss": 0.9082,
      "step": 3915
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2385418862104416,
      "learning_rate": 6.547438422782725e-05,
      "loss": 0.7902,
      "step": 3916
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.23745447397232056,
      "learning_rate": 6.542587927672252e-05,
      "loss": 0.8205,
      "step": 3917
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.30324792861938477,
      "learning_rate": 6.53773835625868e-05,
      "loss": 0.8868,
      "step": 3918
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.2086676061153412,
      "learning_rate": 6.53288970983765e-05,
      "loss": 1.2361,
      "step": 3919
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.21571053564548492,
      "learning_rate": 6.528041989704544e-05,
      "loss": 0.9059,
      "step": 3920
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.27371087670326233,
      "learning_rate": 6.523195197154487e-05,
      "loss": 0.8081,
      "step": 3921
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.4192798137664795,
      "learning_rate": 6.518349333482388e-05,
      "loss": 0.619,
      "step": 3922
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.3178955912590027,
      "learning_rate": 6.513504399982877e-05,
      "loss": 0.9064,
      "step": 3923
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.2587558329105377,
      "learning_rate": 6.508660397950352e-05,
      "loss": 0.7124,
      "step": 3924
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.22807304561138153,
      "learning_rate": 6.503817328678951e-05,
      "loss": 0.872,
      "step": 3925
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.8630792498588562,
      "learning_rate": 6.498975193462575e-05,
      "loss": 0.8944,
      "step": 3926
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.8673673868179321,
      "learning_rate": 6.494133993594867e-05,
      "loss": 1.0751,
      "step": 3927
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.32105445861816406,
      "learning_rate": 6.489293730369221e-05,
      "loss": 0.7605,
      "step": 3928
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.24141687154769897,
      "learning_rate": 6.484454405078786e-05,
      "loss": 0.9479,
      "step": 3929
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.23380063474178314,
      "learning_rate": 6.479616019016454e-05,
      "loss": 1.0964,
      "step": 3930
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.2751850187778473,
      "learning_rate": 6.474778573474866e-05,
      "loss": 0.818,
      "step": 3931
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.24877730011940002,
      "learning_rate": 6.469942069746424e-05,
      "loss": 0.9961,
      "step": 3932
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.3235902190208435,
      "learning_rate": 6.46510650912326e-05,
      "loss": 0.5002,
      "step": 3933
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.3359324336051941,
      "learning_rate": 6.460271892897266e-05,
      "loss": 0.6988,
      "step": 3934
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.20933832228183746,
      "learning_rate": 6.45543822236008e-05,
      "loss": 0.8861,
      "step": 3935
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.2468889206647873,
      "learning_rate": 6.450605498803083e-05,
      "loss": 0.8202,
      "step": 3936
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.2740335464477539,
      "learning_rate": 6.445773723517408e-05,
      "loss": 0.9052,
      "step": 3937
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.40982773900032043,
      "learning_rate": 6.440942897793931e-05,
      "loss": 0.9438,
      "step": 3938
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.4587453305721283,
      "learning_rate": 6.43611302292328e-05,
      "loss": 0.8359,
      "step": 3939
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.36911025643348694,
      "learning_rate": 6.43128410019582e-05,
      "loss": 0.6352,
      "step": 3940
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.21754038333892822,
      "learning_rate": 6.426456130901663e-05,
      "loss": 0.7697,
      "step": 3941
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.22502949833869934,
      "learning_rate": 6.421629116330681e-05,
      "loss": 0.7989,
      "step": 3942
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.27638939023017883,
      "learning_rate": 6.416803057772476e-05,
      "loss": 0.9273,
      "step": 3943
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.1744547337293625,
      "learning_rate": 6.41197795651639e-05,
      "loss": 0.7097,
      "step": 3944
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.285791277885437,
      "learning_rate": 6.407153813851528e-05,
      "loss": 0.9402,
      "step": 3945
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.372709721326828,
      "learning_rate": 6.402330631066724e-05,
      "loss": 0.781,
      "step": 3946
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.30496031045913696,
      "learning_rate": 6.39750840945056e-05,
      "loss": 0.6208,
      "step": 3947
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.5585252642631531,
      "learning_rate": 6.392687150291362e-05,
      "loss": 0.8424,
      "step": 3948
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.2334444373846054,
      "learning_rate": 6.387866854877195e-05,
      "loss": 0.7647,
      "step": 3949
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.25874316692352295,
      "learning_rate": 6.38304752449587e-05,
      "loss": 0.5621,
      "step": 3950
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.24477849900722504,
      "learning_rate": 6.378229160434948e-05,
      "loss": 0.8412,
      "step": 3951
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.25109004974365234,
      "learning_rate": 6.373411763981718e-05,
      "loss": 0.8601,
      "step": 3952
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.24226081371307373,
      "learning_rate": 6.36859533642321e-05,
      "loss": 0.7436,
      "step": 3953
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.47266262769699097,
      "learning_rate": 6.36377987904621e-05,
      "loss": 0.8269,
      "step": 3954
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.298447847366333,
      "learning_rate": 6.358965393137235e-05,
      "loss": 0.7279,
      "step": 3955
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.29383233189582825,
      "learning_rate": 6.35415187998254e-05,
      "loss": 0.7572,
      "step": 3956
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.22171670198440552,
      "learning_rate": 6.349339340868124e-05,
      "loss": 0.7347,
      "step": 3957
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.20744653046131134,
      "learning_rate": 6.344527777079728e-05,
      "loss": 0.7289,
      "step": 3958
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.34100157022476196,
      "learning_rate": 6.339717189902829e-05,
      "loss": 0.7679,
      "step": 3959
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7129939794540405,
      "learning_rate": 6.33490758062264e-05,
      "loss": 0.5054,
      "step": 3960
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.23883605003356934,
      "learning_rate": 6.330098950524123e-05,
      "loss": 0.7562,
      "step": 3961
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.2938968241214752,
      "learning_rate": 6.325291300891968e-05,
      "loss": 0.7712,
      "step": 3962
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.36177298426628113,
      "learning_rate": 6.320484633010605e-05,
      "loss": 0.7424,
      "step": 3963
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.28184351325035095,
      "learning_rate": 6.315678948164212e-05,
      "loss": 0.8765,
      "step": 3964
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.37220871448516846,
      "learning_rate": 6.310874247636691e-05,
      "loss": 0.7446,
      "step": 3965
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.23320020735263824,
      "learning_rate": 6.306070532711683e-05,
      "loss": 0.7581,
      "step": 3966
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.29401832818984985,
      "learning_rate": 6.301267804672575e-05,
      "loss": 0.8257,
      "step": 3967
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7202320694923401,
      "learning_rate": 6.296466064802481e-05,
      "loss": 0.8006,
      "step": 3968
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.5560463070869446,
      "learning_rate": 6.291665314384254e-05,
      "loss": 0.763,
      "step": 3969
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.26739680767059326,
      "learning_rate": 6.286865554700484e-05,
      "loss": 0.933,
      "step": 3970
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.2184048444032669,
      "learning_rate": 6.282066787033498e-05,
      "loss": 0.8682,
      "step": 3971
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.36902666091918945,
      "learning_rate": 6.277269012665348e-05,
      "loss": 0.7421,
      "step": 3972
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.3194674551486969,
      "learning_rate": 6.272472232877831e-05,
      "loss": 0.9866,
      "step": 3973
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.2747090756893158,
      "learning_rate": 6.267676448952478e-05,
      "loss": 0.8039,
      "step": 3974
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.3643084168434143,
      "learning_rate": 6.262881662170549e-05,
      "loss": 0.9166,
      "step": 3975
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.2691226899623871,
      "learning_rate": 6.258087873813037e-05,
      "loss": 0.9719,
      "step": 3976
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.3628131151199341,
      "learning_rate": 6.253295085160678e-05,
      "loss": 0.8438,
      "step": 3977
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.32972055673599243,
      "learning_rate": 6.248503297493926e-05,
      "loss": 0.9496,
      "step": 3978
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.2766328752040863,
      "learning_rate": 6.243712512092978e-05,
      "loss": 0.872,
      "step": 3979
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.23591575026512146,
      "learning_rate": 6.238922730237765e-05,
      "loss": 0.9022,
      "step": 3980
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.3026307225227356,
      "learning_rate": 6.234133953207938e-05,
      "loss": 0.9598,
      "step": 3981
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2313053458929062,
      "learning_rate": 6.229346182282887e-05,
      "loss": 0.809,
      "step": 3982
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.20703400671482086,
      "learning_rate": 6.224559418741743e-05,
      "loss": 0.8235,
      "step": 3983
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2239978313446045,
      "learning_rate": 6.219773663863353e-05,
      "loss": 0.7676,
      "step": 3984
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.25676923990249634,
      "learning_rate": 6.214988918926293e-05,
      "loss": 0.9934,
      "step": 3985
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.26996150612831116,
      "learning_rate": 6.210205185208886e-05,
      "loss": 0.7009,
      "step": 3986
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.31108358502388,
      "learning_rate": 6.205422463989168e-05,
      "loss": 0.9522,
      "step": 3987
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2636515498161316,
      "learning_rate": 6.200640756544914e-05,
      "loss": 0.7586,
      "step": 3988
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.20587576925754547,
      "learning_rate": 6.195860064153623e-05,
      "loss": 0.7261,
      "step": 3989
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2286793440580368,
      "learning_rate": 6.191080388092528e-05,
      "loss": 1.0844,
      "step": 3990
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6223757266998291,
      "learning_rate": 6.186301729638585e-05,
      "loss": 0.9426,
      "step": 3991
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.20390725135803223,
      "learning_rate": 6.181524090068481e-05,
      "loss": 0.7528,
      "step": 3992
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2703605890274048,
      "learning_rate": 6.176747470658634e-05,
      "loss": 0.8194,
      "step": 3993
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.4282436966896057,
      "learning_rate": 6.171971872685183e-05,
      "loss": 0.997,
      "step": 3994
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2685796022415161,
      "learning_rate": 6.167197297423993e-05,
      "loss": 0.9144,
      "step": 3995
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.20093873143196106,
      "learning_rate": 6.162423746150667e-05,
      "loss": 0.8971,
      "step": 3996
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.5535579323768616,
      "learning_rate": 6.157651220140525e-05,
      "loss": 0.8612,
      "step": 3997
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.463090717792511,
      "learning_rate": 6.152879720668613e-05,
      "loss": 0.6905,
      "step": 3998
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6819261312484741,
      "learning_rate": 6.148109249009709e-05,
      "loss": 0.5772,
      "step": 3999
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.25899404287338257,
      "learning_rate": 6.14333980643831e-05,
      "loss": 0.6032,
      "step": 4000
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.27206936478614807,
      "learning_rate": 6.138571394228638e-05,
      "loss": 1.0671,
      "step": 4001
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6838433146476746,
      "learning_rate": 6.133804013654649e-05,
      "loss": 0.8571,
      "step": 4002
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.29474860429763794,
      "learning_rate": 6.12903766599001e-05,
      "loss": 0.6456,
      "step": 4003
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2755157947540283,
      "learning_rate": 6.124272352508123e-05,
      "loss": 0.837,
      "step": 4004
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2599579393863678,
      "learning_rate": 6.119508074482104e-05,
      "loss": 0.7483,
      "step": 4005
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.3118239939212799,
      "learning_rate": 6.114744833184805e-05,
      "loss": 0.8874,
      "step": 4006
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6170270442962646,
      "learning_rate": 6.10998262988879e-05,
      "loss": 0.9994,
      "step": 4007
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.3004859983921051,
      "learning_rate": 6.10522146586635e-05,
      "loss": 0.9465,
      "step": 4008
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.3182370364665985,
      "learning_rate": 6.1004613423894986e-05,
      "loss": 0.8204,
      "step": 4009
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.15147000551223755,
      "learning_rate": 6.0957022607299685e-05,
      "loss": 0.8787,
      "step": 4010
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2356518805027008,
      "learning_rate": 6.090944222159216e-05,
      "loss": 0.7405,
      "step": 4011
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.23813313245773315,
      "learning_rate": 6.086187227948423e-05,
      "loss": 0.9347,
      "step": 4012
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6162638664245605,
      "learning_rate": 6.0814312793684845e-05,
      "loss": 1.0671,
      "step": 4013
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.33928486704826355,
      "learning_rate": 6.076676377690018e-05,
      "loss": 0.7277,
      "step": 4014
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.37881162762641907,
      "learning_rate": 6.07192252418337e-05,
      "loss": 0.7968,
      "step": 4015
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6941760182380676,
      "learning_rate": 6.067169720118599e-05,
      "loss": 0.6791,
      "step": 4016
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.34211960434913635,
      "learning_rate": 6.0624179667654744e-05,
      "loss": 0.9131,
      "step": 4017
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.27731549739837646,
      "learning_rate": 6.057667265393507e-05,
      "loss": 0.7419,
      "step": 4018
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.20166859030723572,
      "learning_rate": 6.0529176172719096e-05,
      "loss": 0.8785,
      "step": 4019
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2909664511680603,
      "learning_rate": 6.048169023669619e-05,
      "loss": 0.8916,
      "step": 4020
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6597419381141663,
      "learning_rate": 6.043421485855285e-05,
      "loss": 0.7781,
      "step": 4021
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.3405303359031677,
      "learning_rate": 6.038675005097288e-05,
      "loss": 0.627,
      "step": 4022
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.1727607697248459,
      "learning_rate": 6.033929582663713e-05,
      "loss": 0.7781,
      "step": 4023
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.3000878691673279,
      "learning_rate": 6.029185219822365e-05,
      "loss": 0.9851,
      "step": 4024
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.3083297908306122,
      "learning_rate": 6.024441917840777e-05,
      "loss": 0.8463,
      "step": 4025
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.19505088031291962,
      "learning_rate": 6.019699677986183e-05,
      "loss": 0.8265,
      "step": 4026
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.8195051550865173,
      "learning_rate": 6.014958501525536e-05,
      "loss": 0.5639,
      "step": 4027
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.22176559269428253,
      "learning_rate": 6.010218389725517e-05,
      "loss": 1.0032,
      "step": 4028
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2057928740978241,
      "learning_rate": 6.005479343852514e-05,
      "loss": 0.7508,
      "step": 4029
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.3294934034347534,
      "learning_rate": 6.000741365172623e-05,
      "loss": 0.6284,
      "step": 4030
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.24764509499073029,
      "learning_rate": 5.996004454951671e-05,
      "loss": 0.8216,
      "step": 4031
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.4326554834842682,
      "learning_rate": 5.991268614455188e-05,
      "loss": 1.02,
      "step": 4032
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2466733157634735,
      "learning_rate": 5.986533844948417e-05,
      "loss": 0.8866,
      "step": 4033
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.24722789227962494,
      "learning_rate": 5.981800147696326e-05,
      "loss": 0.9084,
      "step": 4034
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.24393397569656372,
      "learning_rate": 5.9770675239635865e-05,
      "loss": 1.0512,
      "step": 4035
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6667054295539856,
      "learning_rate": 5.972335975014587e-05,
      "loss": 0.9209,
      "step": 4036
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.32211753726005554,
      "learning_rate": 5.967605502113424e-05,
      "loss": 1.0695,
      "step": 4037
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.3254261612892151,
      "learning_rate": 5.962876106523918e-05,
      "loss": 0.781,
      "step": 4038
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.234885573387146,
      "learning_rate": 5.95814778950959e-05,
      "loss": 0.9091,
      "step": 4039
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2396087944507599,
      "learning_rate": 5.953420552333677e-05,
      "loss": 0.838,
      "step": 4040
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.26911213994026184,
      "learning_rate": 5.948694396259131e-05,
      "loss": 0.8588,
      "step": 4041
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6544930338859558,
      "learning_rate": 5.943969322548611e-05,
      "loss": 0.8038,
      "step": 4042
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.4144744575023651,
      "learning_rate": 5.939245332464483e-05,
      "loss": 0.768,
      "step": 4043
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.7635195851325989,
      "learning_rate": 5.934522427268834e-05,
      "loss": 1.0689,
      "step": 4044
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6413111090660095,
      "learning_rate": 5.929800608223455e-05,
      "loss": 0.7614,
      "step": 4045
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.1935018002986908,
      "learning_rate": 5.92507987658984e-05,
      "loss": 0.9566,
      "step": 4046
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.46392643451690674,
      "learning_rate": 5.92036023362921e-05,
      "loss": 0.8846,
      "step": 4047
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.18582437932491302,
      "learning_rate": 5.9156416806024815e-05,
      "loss": 0.8599,
      "step": 4048
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.21591676771640778,
      "learning_rate": 5.9109242187702774e-05,
      "loss": 0.866,
      "step": 4049
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.4153083860874176,
      "learning_rate": 5.906207849392942e-05,
      "loss": 0.8247,
      "step": 4050
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.26094648241996765,
      "learning_rate": 5.901492573730518e-05,
      "loss": 0.7256,
      "step": 4051
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6555805802345276,
      "learning_rate": 5.896778393042759e-05,
      "loss": 0.7522,
      "step": 4052
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.22007329761981964,
      "learning_rate": 5.892065308589123e-05,
      "loss": 0.699,
      "step": 4053
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.27047204971313477,
      "learning_rate": 5.887353321628781e-05,
      "loss": 1.0376,
      "step": 4054
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.27908855676651,
      "learning_rate": 5.882642433420607e-05,
      "loss": 0.8222,
      "step": 4055
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.2456049770116806,
      "learning_rate": 5.8779326452231764e-05,
      "loss": 0.7334,
      "step": 4056
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.25722262263298035,
      "learning_rate": 5.873223958294788e-05,
      "loss": 0.8487,
      "step": 4057
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.1414240300655365,
      "learning_rate": 5.868516373893425e-05,
      "loss": 0.6235,
      "step": 4058
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.21642108261585236,
      "learning_rate": 5.863809893276784e-05,
      "loss": 0.8572,
      "step": 4059
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.45147332549095154,
      "learning_rate": 5.859104517702275e-05,
      "loss": 0.9904,
      "step": 4060
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.3151394724845886,
      "learning_rate": 5.8544002484270053e-05,
      "loss": 0.8753,
      "step": 4061
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.21197330951690674,
      "learning_rate": 5.849697086707784e-05,
      "loss": 0.8644,
      "step": 4062
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.22877268493175507,
      "learning_rate": 5.8449950338011304e-05,
      "loss": 0.6226,
      "step": 4063
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.2358504831790924,
      "learning_rate": 5.840294090963265e-05,
      "loss": 0.7924,
      "step": 4064
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.5427731275558472,
      "learning_rate": 5.835594259450111e-05,
      "loss": 0.8494,
      "step": 4065
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.2495565265417099,
      "learning_rate": 5.830895540517293e-05,
      "loss": 0.8059,
      "step": 4066
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.2606455385684967,
      "learning_rate": 5.826197935420144e-05,
      "loss": 0.7791,
      "step": 4067
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.3173256814479828,
      "learning_rate": 5.821501445413696e-05,
      "loss": 0.8282,
      "step": 4068
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.5587453246116638,
      "learning_rate": 5.816806071752681e-05,
      "loss": 0.9487,
      "step": 4069
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.3182958960533142,
      "learning_rate": 5.8121118156915345e-05,
      "loss": 0.8368,
      "step": 4070
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.3105509877204895,
      "learning_rate": 5.807418678484401e-05,
      "loss": 0.6504,
      "step": 4071
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.3699150085449219,
      "learning_rate": 5.802726661385105e-05,
      "loss": 0.7642,
      "step": 4072
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.29781290888786316,
      "learning_rate": 5.798035765647203e-05,
      "loss": 0.7791,
      "step": 4073
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.19952437281608582,
      "learning_rate": 5.793345992523925e-05,
      "loss": 0.7138,
      "step": 4074
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.4969084560871124,
      "learning_rate": 5.788657343268204e-05,
      "loss": 0.6976,
      "step": 4075
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.21556001901626587,
      "learning_rate": 5.7839698191326964e-05,
      "loss": 0.7421,
      "step": 4076
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6113178133964539,
      "learning_rate": 5.7792834213697264e-05,
      "loss": 0.6354,
      "step": 4077
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.26326626539230347,
      "learning_rate": 5.7745981512313384e-05,
      "loss": 0.9888,
      "step": 4078
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.3199230134487152,
      "learning_rate": 5.7699140099692716e-05,
      "loss": 0.8152,
      "step": 4079
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.23014333844184875,
      "learning_rate": 5.765230998834954e-05,
      "loss": 0.9155,
      "step": 4080
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.16836725175380707,
      "learning_rate": 5.760549119079526e-05,
      "loss": 0.6491,
      "step": 4081
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.4880889356136322,
      "learning_rate": 5.755868371953811e-05,
      "loss": 0.9876,
      "step": 4082
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.33616480231285095,
      "learning_rate": 5.751188758708341e-05,
      "loss": 1.0676,
      "step": 4083
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.4693872928619385,
      "learning_rate": 5.746510280593346e-05,
      "loss": 1.0341,
      "step": 4084
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.21355023980140686,
      "learning_rate": 5.7418329388587354e-05,
      "loss": 0.9458,
      "step": 4085
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.2880993187427521,
      "learning_rate": 5.7371567347541434e-05,
      "loss": 0.8328,
      "step": 4086
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.2172928899526596,
      "learning_rate": 5.732481669528873e-05,
      "loss": 0.6555,
      "step": 4087
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.28741151094436646,
      "learning_rate": 5.7278077444319386e-05,
      "loss": 0.7913,
      "step": 4088
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.2784458100795746,
      "learning_rate": 5.723134960712051e-05,
      "loss": 0.7759,
      "step": 4089
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.3404279351234436,
      "learning_rate": 5.718463319617602e-05,
      "loss": 0.8577,
      "step": 4090
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.5133073329925537,
      "learning_rate": 5.71379282239669e-05,
      "loss": 0.8254,
      "step": 4091
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.22553999722003937,
      "learning_rate": 5.709123470297109e-05,
      "loss": 0.8369,
      "step": 4092
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.20280010998249054,
      "learning_rate": 5.7044552645663374e-05,
      "loss": 0.7861,
      "step": 4093
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.19585241377353668,
      "learning_rate": 5.699788206451554e-05,
      "loss": 0.7166,
      "step": 4094
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.4761788249015808,
      "learning_rate": 5.695122297199631e-05,
      "loss": 0.6306,
      "step": 4095
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.31439292430877686,
      "learning_rate": 5.690457538057138e-05,
      "loss": 0.9605,
      "step": 4096
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.5113157033920288,
      "learning_rate": 5.6857939302703266e-05,
      "loss": 0.7406,
      "step": 4097
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.7382208704948425,
      "learning_rate": 5.6811314750851375e-05,
      "loss": 1.065,
      "step": 4098
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.28330889344215393,
      "learning_rate": 5.676470173747228e-05,
      "loss": 0.7079,
      "step": 4099
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.26815515756607056,
      "learning_rate": 5.6718100275019206e-05,
      "loss": 0.8647,
      "step": 4100
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.8107288479804993,
      "learning_rate": 5.6671510375942416e-05,
      "loss": 0.5659,
      "step": 4101
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.2466038465499878,
      "learning_rate": 5.662493205268913e-05,
      "loss": 0.6888,
      "step": 4102
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.2647465169429779,
      "learning_rate": 5.6578365317703306e-05,
      "loss": 0.7524,
      "step": 4103
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6462384462356567,
      "learning_rate": 5.6531810183425995e-05,
      "loss": 0.7512,
      "step": 4104
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7597174644470215,
      "learning_rate": 5.648526666229505e-05,
      "loss": 0.6447,
      "step": 4105
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.23056048154830933,
      "learning_rate": 5.643873476674518e-05,
      "loss": 0.7752,
      "step": 4106
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2582421600818634,
      "learning_rate": 5.639221450920808e-05,
      "loss": 0.7699,
      "step": 4107
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.34718504548072815,
      "learning_rate": 5.634570590211232e-05,
      "loss": 0.7359,
      "step": 4108
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.4332895874977112,
      "learning_rate": 5.629920895788336e-05,
      "loss": 0.671,
      "step": 4109
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.24173830449581146,
      "learning_rate": 5.625272368894343e-05,
      "loss": 0.7809,
      "step": 4110
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.3948962390422821,
      "learning_rate": 5.620625010771179e-05,
      "loss": 0.815,
      "step": 4111
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.47855687141418457,
      "learning_rate": 5.615978822660456e-05,
      "loss": 1.0406,
      "step": 4112
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.5970498919487,
      "learning_rate": 5.6113338058034606e-05,
      "loss": 0.7548,
      "step": 4113
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.27235737442970276,
      "learning_rate": 5.606689961441178e-05,
      "loss": 0.741,
      "step": 4114
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.22072279453277588,
      "learning_rate": 5.602047290814284e-05,
      "loss": 0.6368,
      "step": 4115
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2556578814983368,
      "learning_rate": 5.597405795163124e-05,
      "loss": 0.9318,
      "step": 4116
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.18247638642787933,
      "learning_rate": 5.592765475727746e-05,
      "loss": 0.7325,
      "step": 4117
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.33222126960754395,
      "learning_rate": 5.5881263337478765e-05,
      "loss": 0.9939,
      "step": 4118
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.40681982040405273,
      "learning_rate": 5.5834883704629304e-05,
      "loss": 0.7668,
      "step": 4119
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.3239881694316864,
      "learning_rate": 5.578851587111999e-05,
      "loss": 0.8723,
      "step": 4120
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.23930558562278748,
      "learning_rate": 5.574215984933872e-05,
      "loss": 0.8042,
      "step": 4121
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.3260376453399658,
      "learning_rate": 5.5695815651670155e-05,
      "loss": 0.9353,
      "step": 4122
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.37386515736579895,
      "learning_rate": 5.564948329049576e-05,
      "loss": 0.8925,
      "step": 4123
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7141849994659424,
      "learning_rate": 5.560316277819393e-05,
      "loss": 1.0077,
      "step": 4124
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.3627719581127167,
      "learning_rate": 5.5556854127139866e-05,
      "loss": 0.6853,
      "step": 4125
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2446010410785675,
      "learning_rate": 5.551055734970547e-05,
      "loss": 0.7275,
      "step": 4126
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.16557231545448303,
      "learning_rate": 5.546427245825976e-05,
      "loss": 0.9282,
      "step": 4127
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.6423966884613037,
      "learning_rate": 5.541799946516828e-05,
      "loss": 0.844,
      "step": 4128
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.8084217309951782,
      "learning_rate": 5.537173838279359e-05,
      "loss": 0.8129,
      "step": 4129
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.25423675775527954,
      "learning_rate": 5.532548922349492e-05,
      "loss": 0.9674,
      "step": 4130
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2879215180873871,
      "learning_rate": 5.5279251999628444e-05,
      "loss": 0.772,
      "step": 4131
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2398567944765091,
      "learning_rate": 5.523302672354713e-05,
      "loss": 0.6721,
      "step": 4132
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.42828935384750366,
      "learning_rate": 5.518681340760062e-05,
      "loss": 0.6139,
      "step": 4133
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.20084281265735626,
      "learning_rate": 5.5140612064135524e-05,
      "loss": 0.7031,
      "step": 4134
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.31216832995414734,
      "learning_rate": 5.509442270549523e-05,
      "loss": 0.6643,
      "step": 4135
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.579359769821167,
      "learning_rate": 5.504824534401973e-05,
      "loss": 1.0832,
      "step": 4136
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.711586058139801,
      "learning_rate": 5.5002079992046176e-05,
      "loss": 0.8292,
      "step": 4137
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.3291469216346741,
      "learning_rate": 5.4955926661908175e-05,
      "loss": 0.8396,
      "step": 4138
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.42390984296798706,
      "learning_rate": 5.490978536593618e-05,
      "loss": 0.8048,
      "step": 4139
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.310301810503006,
      "learning_rate": 5.486365611645767e-05,
      "loss": 0.9302,
      "step": 4140
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.22858868539333344,
      "learning_rate": 5.481753892579661e-05,
      "loss": 0.715,
      "step": 4141
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.4671165645122528,
      "learning_rate": 5.477143380627388e-05,
      "loss": 0.9979,
      "step": 4142
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2890144884586334,
      "learning_rate": 5.472534077020718e-05,
      "loss": 1.0753,
      "step": 4143
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2457607090473175,
      "learning_rate": 5.4679259829910845e-05,
      "loss": 0.7041,
      "step": 4144
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.39217570424079895,
      "learning_rate": 5.4633190997696104e-05,
      "loss": 0.9062,
      "step": 4145
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.46314483880996704,
      "learning_rate": 5.4587134285870866e-05,
      "loss": 0.8163,
      "step": 4146
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.43835821747779846,
      "learning_rate": 5.454108970673986e-05,
      "loss": 0.9738,
      "step": 4147
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.35036709904670715,
      "learning_rate": 5.4495057272604566e-05,
      "loss": 0.553,
      "step": 4148
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.5558268427848816,
      "learning_rate": 5.4449036995763115e-05,
      "loss": 1.0019,
      "step": 4149
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.22857408225536346,
      "learning_rate": 5.440302888851063e-05,
      "loss": 0.8961,
      "step": 4150
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2939273416996002,
      "learning_rate": 5.435703296313873e-05,
      "loss": 0.9376,
      "step": 4151
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.21123920381069183,
      "learning_rate": 5.431104923193589e-05,
      "loss": 0.8397,
      "step": 4152
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7354604005813599,
      "learning_rate": 5.426507770718738e-05,
      "loss": 0.8988,
      "step": 4153
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.36991623044013977,
      "learning_rate": 5.4219118401175065e-05,
      "loss": 0.9922,
      "step": 4154
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.24939191341400146,
      "learning_rate": 5.4173171326177676e-05,
      "loss": 0.8199,
      "step": 4155
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7154248356819153,
      "learning_rate": 5.4127236494470646e-05,
      "loss": 0.7048,
      "step": 4156
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2851681113243103,
      "learning_rate": 5.4081313918326046e-05,
      "loss": 0.7628,
      "step": 4157
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2429962456226349,
      "learning_rate": 5.40354036100128e-05,
      "loss": 0.7942,
      "step": 4158
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2258974015712738,
      "learning_rate": 5.398950558179651e-05,
      "loss": 0.7215,
      "step": 4159
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2320895791053772,
      "learning_rate": 5.39436198459395e-05,
      "loss": 0.8286,
      "step": 4160
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2512860596179962,
      "learning_rate": 5.389774641470078e-05,
      "loss": 0.9042,
      "step": 4161
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.3277638256549835,
      "learning_rate": 5.385188530033599e-05,
      "loss": 0.918,
      "step": 4162
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.3846217691898346,
      "learning_rate": 5.3806036515097755e-05,
      "loss": 0.7992,
      "step": 4163
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.3636234700679779,
      "learning_rate": 5.3760200071235126e-05,
      "loss": 0.8092,
      "step": 4164
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2671559751033783,
      "learning_rate": 5.371437598099396e-05,
      "loss": 0.7172,
      "step": 4165
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.3457612097263336,
      "learning_rate": 5.36685642566169e-05,
      "loss": 0.7559,
      "step": 4166
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.5199432373046875,
      "learning_rate": 5.3622764910343096e-05,
      "loss": 0.8636,
      "step": 4167
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.4978496730327606,
      "learning_rate": 5.357697795440854e-05,
      "loss": 0.8284,
      "step": 4168
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.22441260516643524,
      "learning_rate": 5.353120340104587e-05,
      "loss": 0.6551,
      "step": 4169
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.38077062368392944,
      "learning_rate": 5.3485441262484445e-05,
      "loss": 0.8327,
      "step": 4170
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.3221091032028198,
      "learning_rate": 5.343969155095022e-05,
      "loss": 0.896,
      "step": 4171
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.32531362771987915,
      "learning_rate": 5.339395427866589e-05,
      "loss": 0.7037,
      "step": 4172
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.356445848941803,
      "learning_rate": 5.334822945785087e-05,
      "loss": 0.7537,
      "step": 4173
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.2604730427265167,
      "learning_rate": 5.3302517100721114e-05,
      "loss": 1.0154,
      "step": 4174
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.2661752998828888,
      "learning_rate": 5.3256817219489386e-05,
      "loss": 0.7894,
      "step": 4175
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.31379157304763794,
      "learning_rate": 5.3211129826365095e-05,
      "loss": 0.7614,
      "step": 4176
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.22697992622852325,
      "learning_rate": 5.316545493355417e-05,
      "loss": 0.6827,
      "step": 4177
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.3567211329936981,
      "learning_rate": 5.311979255325939e-05,
      "loss": 0.7898,
      "step": 4178
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.37452077865600586,
      "learning_rate": 5.3074142697680116e-05,
      "loss": 0.6372,
      "step": 4179
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.39821335673332214,
      "learning_rate": 5.302850537901231e-05,
      "loss": 0.9095,
      "step": 4180
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.4298562705516815,
      "learning_rate": 5.298288060944865e-05,
      "loss": 0.5668,
      "step": 4181
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.3814087212085724,
      "learning_rate": 5.293726840117845e-05,
      "loss": 0.7783,
      "step": 4182
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.5430545806884766,
      "learning_rate": 5.289166876638769e-05,
      "loss": 0.9629,
      "step": 4183
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.22793619334697723,
      "learning_rate": 5.284608171725891e-05,
      "loss": 0.7581,
      "step": 4184
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.22525840997695923,
      "learning_rate": 5.280050726597136e-05,
      "loss": 0.8664,
      "step": 4185
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.20548568665981293,
      "learning_rate": 5.275494542470094e-05,
      "loss": 0.7859,
      "step": 4186
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.558893620967865,
      "learning_rate": 5.270939620562008e-05,
      "loss": 0.6581,
      "step": 4187
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.3220410645008087,
      "learning_rate": 5.266385962089793e-05,
      "loss": 0.8038,
      "step": 4188
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.4154021143913269,
      "learning_rate": 5.261833568270028e-05,
      "loss": 1.1033,
      "step": 4189
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.1613432765007019,
      "learning_rate": 5.257282440318938e-05,
      "loss": 0.6309,
      "step": 4190
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.32182979583740234,
      "learning_rate": 5.2527325794524395e-05,
      "loss": 0.9533,
      "step": 4191
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.28332188725471497,
      "learning_rate": 5.248183986886077e-05,
      "loss": 0.7208,
      "step": 4192
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.1569688320159912,
      "learning_rate": 5.2436366638350835e-05,
      "loss": 0.785,
      "step": 4193
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.18006978929042816,
      "learning_rate": 5.2390906115143304e-05,
      "loss": 0.7697,
      "step": 4194
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.41148483753204346,
      "learning_rate": 5.2345458311383664e-05,
      "loss": 1.1051,
      "step": 4195
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.3156595826148987,
      "learning_rate": 5.230002323921397e-05,
      "loss": 0.8317,
      "step": 4196
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.3538270890712738,
      "learning_rate": 5.2254600910772765e-05,
      "loss": 0.8599,
      "step": 4197
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.23998455703258514,
      "learning_rate": 5.220919133819533e-05,
      "loss": 0.8543,
      "step": 4198
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.19193167984485626,
      "learning_rate": 5.2163794533613505e-05,
      "loss": 0.824,
      "step": 4199
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.26187488436698914,
      "learning_rate": 5.211841050915558e-05,
      "loss": 0.8659,
      "step": 4200
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.30071157217025757,
      "learning_rate": 5.2073039276946686e-05,
      "loss": 0.7564,
      "step": 4201
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.32727426290512085,
      "learning_rate": 5.202768084910835e-05,
      "loss": 0.7667,
      "step": 4202
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.17962981760501862,
      "learning_rate": 5.198233523775862e-05,
      "loss": 0.7922,
      "step": 4203
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.1944834142923355,
      "learning_rate": 5.1937002455012364e-05,
      "loss": 0.753,
      "step": 4204
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.26295727491378784,
      "learning_rate": 5.18916825129808e-05,
      "loss": 0.7682,
      "step": 4205
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.27583184838294983,
      "learning_rate": 5.184637542377181e-05,
      "loss": 0.8266,
      "step": 4206
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.2522880733013153,
      "learning_rate": 5.1801081199489875e-05,
      "loss": 0.7571,
      "step": 4207
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.7668541073799133,
      "learning_rate": 5.175579985223591e-05,
      "loss": 1.152,
      "step": 4208
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.35723504424095154,
      "learning_rate": 5.171053139410756e-05,
      "loss": 0.9066,
      "step": 4209
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.1768179088830948,
      "learning_rate": 5.1665275837198854e-05,
      "loss": 0.8371,
      "step": 4210
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.3357088565826416,
      "learning_rate": 5.16200331936005e-05,
      "loss": 0.9287,
      "step": 4211
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.4925815165042877,
      "learning_rate": 5.157480347539976e-05,
      "loss": 0.7408,
      "step": 4212
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.3169197738170624,
      "learning_rate": 5.1529586694680266e-05,
      "loss": 0.7421,
      "step": 4213
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.26066315174102783,
      "learning_rate": 5.1484382863522485e-05,
      "loss": 0.8656,
      "step": 4214
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.2969996929168701,
      "learning_rate": 5.1439191994003154e-05,
      "loss": 0.9069,
      "step": 4215
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.2851215898990631,
      "learning_rate": 5.1394014098195684e-05,
      "loss": 0.7164,
      "step": 4216
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.2966230809688568,
      "learning_rate": 5.134884918817007e-05,
      "loss": 0.769,
      "step": 4217
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.27428382635116577,
      "learning_rate": 5.1303697275992635e-05,
      "loss": 0.7731,
      "step": 4218
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.29327622056007385,
      "learning_rate": 5.1258558373726416e-05,
      "loss": 0.8728,
      "step": 4219
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.27025410532951355,
      "learning_rate": 5.121343249343096e-05,
      "loss": 0.812,
      "step": 4220
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.21637509763240814,
      "learning_rate": 5.116831964716221e-05,
      "loss": 0.743,
      "step": 4221
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.24336297810077667,
      "learning_rate": 5.1123219846972724e-05,
      "loss": 0.9358,
      "step": 4222
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6937596201896667,
      "learning_rate": 5.107813310491159e-05,
      "loss": 0.8002,
      "step": 4223
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.30901771783828735,
      "learning_rate": 5.103305943302438e-05,
      "loss": 0.6629,
      "step": 4224
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.3803512454032898,
      "learning_rate": 5.0987998843353146e-05,
      "loss": 0.742,
      "step": 4225
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.23431655764579773,
      "learning_rate": 5.0942951347936384e-05,
      "loss": 0.8384,
      "step": 4226
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.21362198889255524,
      "learning_rate": 5.0897916958809336e-05,
      "loss": 0.7253,
      "step": 4227
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.2594153583049774,
      "learning_rate": 5.0852895688003455e-05,
      "loss": 0.6674,
      "step": 4228
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.24913837015628815,
      "learning_rate": 5.080788754754686e-05,
      "loss": 0.886,
      "step": 4229
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.3388136625289917,
      "learning_rate": 5.076289254946416e-05,
      "loss": 0.975,
      "step": 4230
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.23769497871398926,
      "learning_rate": 5.071791070577632e-05,
      "loss": 0.683,
      "step": 4231
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.4123585820198059,
      "learning_rate": 5.0672942028500926e-05,
      "loss": 1.0111,
      "step": 4232
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.356183797121048,
      "learning_rate": 5.062798652965199e-05,
      "loss": 0.8216,
      "step": 4233
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.690639853477478,
      "learning_rate": 5.0583044221240093e-05,
      "loss": 1.0079,
      "step": 4234
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.48869022727012634,
      "learning_rate": 5.053811511527209e-05,
      "loss": 0.6673,
      "step": 4235
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.26527687907218933,
      "learning_rate": 5.049319922375149e-05,
      "loss": 0.865,
      "step": 4236
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.29890358448028564,
      "learning_rate": 5.0448296558678245e-05,
      "loss": 0.9126,
      "step": 4237
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.3245985507965088,
      "learning_rate": 5.0403407132048674e-05,
      "loss": 0.8604,
      "step": 4238
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6402094960212708,
      "learning_rate": 5.035853095585565e-05,
      "loss": 0.826,
      "step": 4239
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.7699466943740845,
      "learning_rate": 5.0313668042088544e-05,
      "loss": 0.8496,
      "step": 4240
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.32381412386894226,
      "learning_rate": 5.0268818402733034e-05,
      "loss": 0.6867,
      "step": 4241
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.4325183928012848,
      "learning_rate": 5.022398204977137e-05,
      "loss": 0.9268,
      "step": 4242
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.39395418763160706,
      "learning_rate": 5.017915899518228e-05,
      "loss": 0.784,
      "step": 4243
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.208694189786911,
      "learning_rate": 5.013434925094078e-05,
      "loss": 0.8194,
      "step": 4244
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.40231314301490784,
      "learning_rate": 5.008955282901849e-05,
      "loss": 1.1298,
      "step": 4245
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.154954195022583,
      "learning_rate": 5.004476974138341e-05,
      "loss": 0.5695,
      "step": 4246
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.32948702573776245,
      "learning_rate": 5.000000000000002e-05,
      "loss": 0.8579,
      "step": 4247
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.22772106528282166,
      "learning_rate": 4.9955243616829115e-05,
      "loss": 0.8711,
      "step": 4248
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6144508123397827,
      "learning_rate": 4.9910500603828025e-05,
      "loss": 1.0715,
      "step": 4249
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.225535050034523,
      "learning_rate": 4.9865770972950545e-05,
      "loss": 0.8993,
      "step": 4250
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.3389943540096283,
      "learning_rate": 4.982105473614674e-05,
      "loss": 0.7839,
      "step": 4251
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.8307225108146667,
      "learning_rate": 4.977635190536324e-05,
      "loss": 0.8433,
      "step": 4252
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.2999875545501709,
      "learning_rate": 4.973166249254307e-05,
      "loss": 0.7934,
      "step": 4253
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.376631498336792,
      "learning_rate": 4.968698650962555e-05,
      "loss": 1.1543,
      "step": 4254
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.20710250735282898,
      "learning_rate": 4.964232396854662e-05,
      "loss": 0.9634,
      "step": 4255
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.5132454037666321,
      "learning_rate": 4.959767488123843e-05,
      "loss": 0.7581,
      "step": 4256
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.630998969078064,
      "learning_rate": 4.9553039259629684e-05,
      "loss": 0.8649,
      "step": 4257
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.5534392595291138,
      "learning_rate": 4.950841711564537e-05,
      "loss": 0.9752,
      "step": 4258
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.3097231090068817,
      "learning_rate": 4.946380846120694e-05,
      "loss": 0.9359,
      "step": 4259
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.2460332065820694,
      "learning_rate": 4.941921330823227e-05,
      "loss": 0.7748,
      "step": 4260
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.20422282814979553,
      "learning_rate": 4.937463166863554e-05,
      "loss": 0.8363,
      "step": 4261
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.39863163232803345,
      "learning_rate": 4.9330063554327386e-05,
      "loss": 0.9982,
      "step": 4262
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.14202122390270233,
      "learning_rate": 4.928550897721487e-05,
      "loss": 0.7406,
      "step": 4263
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.36101749539375305,
      "learning_rate": 4.924096794920124e-05,
      "loss": 0.863,
      "step": 4264
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.3549804985523224,
      "learning_rate": 4.9196440482186446e-05,
      "loss": 0.743,
      "step": 4265
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.37585797905921936,
      "learning_rate": 4.915192658806655e-05,
      "loss": 0.9326,
      "step": 4266
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6149414777755737,
      "learning_rate": 4.9107426278734e-05,
      "loss": 0.8401,
      "step": 4267
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.3552855849266052,
      "learning_rate": 4.906293956607784e-05,
      "loss": 0.7518,
      "step": 4268
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.2193557173013687,
      "learning_rate": 4.9018466461983206e-05,
      "loss": 0.7871,
      "step": 4269
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.23937192559242249,
      "learning_rate": 4.897400697833177e-05,
      "loss": 0.8002,
      "step": 4270
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.1824451982975006,
      "learning_rate": 4.8929561127001545e-05,
      "loss": 0.8843,
      "step": 4271
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.18278813362121582,
      "learning_rate": 4.888512891986681e-05,
      "loss": 0.6732,
      "step": 4272
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.2440076470375061,
      "learning_rate": 4.884071036879832e-05,
      "loss": 0.8402,
      "step": 4273
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.20316950976848602,
      "learning_rate": 4.879630548566303e-05,
      "loss": 0.7688,
      "step": 4274
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.41974887251853943,
      "learning_rate": 4.875191428232447e-05,
      "loss": 0.8293,
      "step": 4275
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.2569236755371094,
      "learning_rate": 4.8707536770642325e-05,
      "loss": 0.9317,
      "step": 4276
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.35570669174194336,
      "learning_rate": 4.86631729624726e-05,
      "loss": 1.0308,
      "step": 4277
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.2882157564163208,
      "learning_rate": 4.861882286966786e-05,
      "loss": 0.8052,
      "step": 4278
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.276541531085968,
      "learning_rate": 4.8574486504076756e-05,
      "loss": 0.9604,
      "step": 4279
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.22613422572612762,
      "learning_rate": 4.853016387754442e-05,
      "loss": 0.8643,
      "step": 4280
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.18079356849193573,
      "learning_rate": 4.8485855001912315e-05,
      "loss": 0.9484,
      "step": 4281
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.3007704019546509,
      "learning_rate": 4.844155988901811e-05,
      "loss": 0.6706,
      "step": 4282
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.17155326902866364,
      "learning_rate": 4.839727855069589e-05,
      "loss": 0.9302,
      "step": 4283
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.3091491758823395,
      "learning_rate": 4.8353010998776125e-05,
      "loss": 0.8156,
      "step": 4284
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.321935772895813,
      "learning_rate": 4.8308757245085415e-05,
      "loss": 0.6292,
      "step": 4285
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.33669766783714294,
      "learning_rate": 4.8264517301446834e-05,
      "loss": 1.0056,
      "step": 4286
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.16559946537017822,
      "learning_rate": 4.822029117967971e-05,
      "loss": 0.9713,
      "step": 4287
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.3968130946159363,
      "learning_rate": 4.8176078891599715e-05,
      "loss": 1.0385,
      "step": 4288
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.2238253653049469,
      "learning_rate": 4.8131880449018716e-05,
      "loss": 0.668,
      "step": 4289
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.22527475655078888,
      "learning_rate": 4.8087695863745006e-05,
      "loss": 0.6805,
      "step": 4290
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.8260080218315125,
      "learning_rate": 4.8043525147583155e-05,
      "loss": 0.8846,
      "step": 4291
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.48435506224632263,
      "learning_rate": 4.7999368312333925e-05,
      "loss": 0.902,
      "step": 4292
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.24469290673732758,
      "learning_rate": 4.795522536979448e-05,
      "loss": 0.7957,
      "step": 4293
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.26128217577934265,
      "learning_rate": 4.7911096331758274e-05,
      "loss": 0.7911,
      "step": 4294
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.2982015311717987,
      "learning_rate": 4.786698121001494e-05,
      "loss": 0.751,
      "step": 4295
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.28361865878105164,
      "learning_rate": 4.782288001635049e-05,
      "loss": 0.8353,
      "step": 4296
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.33002039790153503,
      "learning_rate": 4.77787927625472e-05,
      "loss": 0.9148,
      "step": 4297
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6466007828712463,
      "learning_rate": 4.7734719460383624e-05,
      "loss": 0.727,
      "step": 4298
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.38619258999824524,
      "learning_rate": 4.769066012163451e-05,
      "loss": 0.7979,
      "step": 4299
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.3237338960170746,
      "learning_rate": 4.7646614758070985e-05,
      "loss": 0.9419,
      "step": 4300
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.20936734974384308,
      "learning_rate": 4.760258338146042e-05,
      "loss": 0.7523,
      "step": 4301
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.2595241367816925,
      "learning_rate": 4.755856600356635e-05,
      "loss": 0.6383,
      "step": 4302
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.3286050856113434,
      "learning_rate": 4.751456263614868e-05,
      "loss": 0.8405,
      "step": 4303
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.22950194776058197,
      "learning_rate": 4.7470573290963595e-05,
      "loss": 0.8177,
      "step": 4304
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.20488418638706207,
      "learning_rate": 4.7426597979763365e-05,
      "loss": 0.9042,
      "step": 4305
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.24498295783996582,
      "learning_rate": 4.738263671429669e-05,
      "loss": 0.7683,
      "step": 4306
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.8313513994216919,
      "learning_rate": 4.7338689506308474e-05,
      "loss": 0.8074,
      "step": 4307
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.3037697970867157,
      "learning_rate": 4.729475636753977e-05,
      "loss": 0.7422,
      "step": 4308
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.514603316783905,
      "learning_rate": 4.725083730972797e-05,
      "loss": 0.7389,
      "step": 4309
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.271735817193985,
      "learning_rate": 4.720693234460668e-05,
      "loss": 0.8904,
      "step": 4310
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.23939330875873566,
      "learning_rate": 4.716304148390578e-05,
      "loss": 0.9912,
      "step": 4311
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.28345486521720886,
      "learning_rate": 4.711916473935125e-05,
      "loss": 0.8663,
      "step": 4312
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.26284071803092957,
      "learning_rate": 4.7075302122665446e-05,
      "loss": 0.7978,
      "step": 4313
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.20736615359783173,
      "learning_rate": 4.7031453645566916e-05,
      "loss": 0.6331,
      "step": 4314
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.3900993764400482,
      "learning_rate": 4.698761931977033e-05,
      "loss": 0.8739,
      "step": 4315
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.3339424431324005,
      "learning_rate": 4.694379915698669e-05,
      "loss": 0.7312,
      "step": 4316
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.18665319681167603,
      "learning_rate": 4.689999316892322e-05,
      "loss": 0.7474,
      "step": 4317
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.2827455401420593,
      "learning_rate": 4.685620136728319e-05,
      "loss": 0.709,
      "step": 4318
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.2884094715118408,
      "learning_rate": 4.6812423763766365e-05,
      "loss": 0.6783,
      "step": 4319
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.23569366335868835,
      "learning_rate": 4.676866037006845e-05,
      "loss": 0.7089,
      "step": 4320
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.41897743940353394,
      "learning_rate": 4.6724911197881513e-05,
      "loss": 0.9827,
      "step": 4321
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.360949844121933,
      "learning_rate": 4.668117625889371e-05,
      "loss": 0.5385,
      "step": 4322
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.3847789764404297,
      "learning_rate": 4.663745556478949e-05,
      "loss": 0.8174,
      "step": 4323
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.24856224656105042,
      "learning_rate": 4.659374912724948e-05,
      "loss": 0.8957,
      "step": 4324
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.5518739223480225,
      "learning_rate": 4.655005695795043e-05,
      "loss": 0.7457,
      "step": 4325
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.19536860287189484,
      "learning_rate": 4.650637906856534e-05,
      "loss": 0.64,
      "step": 4326
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6196130514144897,
      "learning_rate": 4.646271547076343e-05,
      "loss": 0.7311,
      "step": 4327
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.2137523591518402,
      "learning_rate": 4.6419066176209936e-05,
      "loss": 0.8544,
      "step": 4328
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.22473621368408203,
      "learning_rate": 4.6375431196566546e-05,
      "loss": 0.7723,
      "step": 4329
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.2543317973613739,
      "learning_rate": 4.633181054349084e-05,
      "loss": 1.0288,
      "step": 4330
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.24277882277965546,
      "learning_rate": 4.6288204228636736e-05,
      "loss": 0.7744,
      "step": 4331
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6351627111434937,
      "learning_rate": 4.624461226365433e-05,
      "loss": 0.6242,
      "step": 4332
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.22702395915985107,
      "learning_rate": 4.620103466018977e-05,
      "loss": 0.7519,
      "step": 4333
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.27601251006126404,
      "learning_rate": 4.6157471429885444e-05,
      "loss": 0.7133,
      "step": 4334
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.22205771505832672,
      "learning_rate": 4.6113922584379956e-05,
      "loss": 0.797,
      "step": 4335
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.2551232874393463,
      "learning_rate": 4.6070388135307895e-05,
      "loss": 0.7901,
      "step": 4336
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.2569882273674011,
      "learning_rate": 4.6026868094300216e-05,
      "loss": 0.9944,
      "step": 4337
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.5227881669998169,
      "learning_rate": 4.5983362472983794e-05,
      "loss": 0.8936,
      "step": 4338
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.24032269418239594,
      "learning_rate": 4.593987128298191e-05,
      "loss": 0.9128,
      "step": 4339
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.3208746016025543,
      "learning_rate": 4.58963945359138e-05,
      "loss": 0.8784,
      "step": 4340
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.2640138864517212,
      "learning_rate": 4.5852932243394806e-05,
      "loss": 0.7734,
      "step": 4341
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.29612886905670166,
      "learning_rate": 4.580948441703668e-05,
      "loss": 0.9913,
      "step": 4342
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.1871592402458191,
      "learning_rate": 4.576605106844697e-05,
      "loss": 0.739,
      "step": 4343
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.3504692316055298,
      "learning_rate": 4.5722632209229575e-05,
      "loss": 0.8245,
      "step": 4344
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.18437530100345612,
      "learning_rate": 4.567922785098451e-05,
      "loss": 0.8587,
      "step": 4345
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7533274292945862,
      "learning_rate": 4.5635838005307785e-05,
      "loss": 0.9712,
      "step": 4346
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6710000038146973,
      "learning_rate": 4.5592462683791637e-05,
      "loss": 0.8745,
      "step": 4347
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.31242454051971436,
      "learning_rate": 4.554910189802446e-05,
      "loss": 0.7869,
      "step": 4348
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.26156139373779297,
      "learning_rate": 4.550575565959062e-05,
      "loss": 0.842,
      "step": 4349
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.2732463777065277,
      "learning_rate": 4.546242398007076e-05,
      "loss": 0.7511,
      "step": 4350
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7880022525787354,
      "learning_rate": 4.541910687104144e-05,
      "loss": 0.9262,
      "step": 4351
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.21546944975852966,
      "learning_rate": 4.53758043440756e-05,
      "loss": 0.7021,
      "step": 4352
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6861681342124939,
      "learning_rate": 4.533251641074201e-05,
      "loss": 0.9308,
      "step": 4353
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.28117552399635315,
      "learning_rate": 4.528924308260569e-05,
      "loss": 0.7569,
      "step": 4354
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.16954384744167328,
      "learning_rate": 4.524598437122778e-05,
      "loss": 0.5962,
      "step": 4355
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.14818193018436432,
      "learning_rate": 4.520274028816537e-05,
      "loss": 0.9105,
      "step": 4356
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.2684759497642517,
      "learning_rate": 4.515951084497178e-05,
      "loss": 0.8691,
      "step": 4357
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.38128095865249634,
      "learning_rate": 4.5116296053196396e-05,
      "loss": 0.9043,
      "step": 4358
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.5465126633644104,
      "learning_rate": 4.507309592438461e-05,
      "loss": 0.9173,
      "step": 4359
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9592300057411194,
      "learning_rate": 4.5029910470077984e-05,
      "loss": 0.9523,
      "step": 4360
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.3051108419895172,
      "learning_rate": 4.4986739701814116e-05,
      "loss": 0.8482,
      "step": 4361
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.1027570515871048,
      "learning_rate": 4.494358363112674e-05,
      "loss": 0.9138,
      "step": 4362
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.38161712884902954,
      "learning_rate": 4.490044226954554e-05,
      "loss": 1.0257,
      "step": 4363
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.2537340223789215,
      "learning_rate": 4.485731562859637e-05,
      "loss": 0.8311,
      "step": 4364
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.24759165942668915,
      "learning_rate": 4.481420371980118e-05,
      "loss": 0.7811,
      "step": 4365
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.270857036113739,
      "learning_rate": 4.477110655467786e-05,
      "loss": 0.9978,
      "step": 4366
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.2787322700023651,
      "learning_rate": 4.472802414474044e-05,
      "loss": 0.8937,
      "step": 4367
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.34769684076309204,
      "learning_rate": 4.468495650149907e-05,
      "loss": 0.7346,
      "step": 4368
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.2037825733423233,
      "learning_rate": 4.46419036364598e-05,
      "loss": 0.9256,
      "step": 4369
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.20783400535583496,
      "learning_rate": 4.4598865561124845e-05,
      "loss": 0.7618,
      "step": 4370
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.20036154985427856,
      "learning_rate": 4.455584228699249e-05,
      "loss": 0.7914,
      "step": 4371
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.2599415183067322,
      "learning_rate": 4.4512833825556924e-05,
      "loss": 0.9963,
      "step": 4372
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.46929824352264404,
      "learning_rate": 4.446984018830852e-05,
      "loss": 0.7245,
      "step": 4373
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.24620534479618073,
      "learning_rate": 4.442686138673364e-05,
      "loss": 0.7774,
      "step": 4374
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.308645635843277,
      "learning_rate": 4.438389743231471e-05,
      "loss": 0.6483,
      "step": 4375
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6188127398490906,
      "learning_rate": 4.4340948336530106e-05,
      "loss": 0.7823,
      "step": 4376
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.13882774114608765,
      "learning_rate": 4.4298014110854326e-05,
      "loss": 0.7829,
      "step": 4377
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.29455122351646423,
      "learning_rate": 4.4255094766757886e-05,
      "loss": 0.9112,
      "step": 4378
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.28110471367836,
      "learning_rate": 4.4212190315707193e-05,
      "loss": 0.7467,
      "step": 4379
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.15005125105381012,
      "learning_rate": 4.4169300769164935e-05,
      "loss": 0.693,
      "step": 4380
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.26637640595436096,
      "learning_rate": 4.412642613858958e-05,
      "loss": 0.7759,
      "step": 4381
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.3234366178512573,
      "learning_rate": 4.408356643543568e-05,
      "loss": 0.7557,
      "step": 4382
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.3308541476726532,
      "learning_rate": 4.404072167115383e-05,
      "loss": 0.5331,
      "step": 4383
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.26636624336242676,
      "learning_rate": 4.399789185719063e-05,
      "loss": 0.7734,
      "step": 4384
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.22503875195980072,
      "learning_rate": 4.395507700498871e-05,
      "loss": 0.8136,
      "step": 4385
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.3371555209159851,
      "learning_rate": 4.39122771259866e-05,
      "loss": 0.7097,
      "step": 4386
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.37700775265693665,
      "learning_rate": 4.386949223161894e-05,
      "loss": 0.8722,
      "step": 4387
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.26427754759788513,
      "learning_rate": 4.382672233331634e-05,
      "loss": 0.7512,
      "step": 4388
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.26618510484695435,
      "learning_rate": 4.378396744250532e-05,
      "loss": 0.8048,
      "step": 4389
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.24284526705741882,
      "learning_rate": 4.374122757060851e-05,
      "loss": 0.7352,
      "step": 4390
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.5542339086532593,
      "learning_rate": 4.36985027290445e-05,
      "loss": 0.8519,
      "step": 4391
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.3303951919078827,
      "learning_rate": 4.365579292922773e-05,
      "loss": 1.0338,
      "step": 4392
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.14012368023395538,
      "learning_rate": 4.361309818256889e-05,
      "loss": 0.7014,
      "step": 4393
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.37716466188430786,
      "learning_rate": 4.3570418500474365e-05,
      "loss": 0.7189,
      "step": 4394
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.246959388256073,
      "learning_rate": 4.352775389434669e-05,
      "loss": 0.6038,
      "step": 4395
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.5008018612861633,
      "learning_rate": 4.348510437558435e-05,
      "loss": 0.9119,
      "step": 4396
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.2855418622493744,
      "learning_rate": 4.34424699555817e-05,
      "loss": 1.0264,
      "step": 4397
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.29874488711357117,
      "learning_rate": 4.33998506457292e-05,
      "loss": 0.8434,
      "step": 4398
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.2802024781703949,
      "learning_rate": 4.335724645741316e-05,
      "loss": 0.8133,
      "step": 4399
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6846092939376831,
      "learning_rate": 4.331465740201589e-05,
      "loss": 0.8767,
      "step": 4400
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.2104833573102951,
      "learning_rate": 4.327208349091574e-05,
      "loss": 0.8589,
      "step": 4401
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.1786210834980011,
      "learning_rate": 4.32295247354868e-05,
      "loss": 0.7453,
      "step": 4402
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.2505505383014679,
      "learning_rate": 4.31869811470994e-05,
      "loss": 0.8994,
      "step": 4403
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6121141910552979,
      "learning_rate": 4.314445273711961e-05,
      "loss": 0.7615,
      "step": 4404
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.24849817156791687,
      "learning_rate": 4.3101939516909406e-05,
      "loss": 0.7417,
      "step": 4405
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.2764970064163208,
      "learning_rate": 4.305944149782696e-05,
      "loss": 0.9592,
      "step": 4406
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.15302175283432007,
      "learning_rate": 4.3016958691226105e-05,
      "loss": 0.5652,
      "step": 4407
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.22586224973201752,
      "learning_rate": 4.297449110845677e-05,
      "loss": 0.6777,
      "step": 4408
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.27725842595100403,
      "learning_rate": 4.293203876086481e-05,
      "loss": 0.6564,
      "step": 4409
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.1667337715625763,
      "learning_rate": 4.28896016597919e-05,
      "loss": 0.9255,
      "step": 4410
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.2883830964565277,
      "learning_rate": 4.284717981657576e-05,
      "loss": 0.8795,
      "step": 4411
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6263657212257385,
      "learning_rate": 4.280477324255001e-05,
      "loss": 0.7084,
      "step": 4412
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.46378251910209656,
      "learning_rate": 4.276238194904413e-05,
      "loss": 0.8124,
      "step": 4413
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.23902778327465057,
      "learning_rate": 4.272000594738359e-05,
      "loss": 0.788,
      "step": 4414
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.31290802359580994,
      "learning_rate": 4.267764524888965e-05,
      "loss": 0.9172,
      "step": 4415
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.34496957063674927,
      "learning_rate": 4.263529986487974e-05,
      "loss": 0.9867,
      "step": 4416
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.14247600734233856,
      "learning_rate": 4.259296980666689e-05,
      "loss": 0.7696,
      "step": 4417
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.5779356360435486,
      "learning_rate": 4.255065508556025e-05,
      "loss": 1.0043,
      "step": 4418
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.1786877065896988,
      "learning_rate": 4.250835571286481e-05,
      "loss": 0.981,
      "step": 4419
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.5563228130340576,
      "learning_rate": 4.246607169988138e-05,
      "loss": 0.927,
      "step": 4420
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.3838962912559509,
      "learning_rate": 4.2423803057906784e-05,
      "loss": 0.9286,
      "step": 4421
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.2804698050022125,
      "learning_rate": 4.238154979823372e-05,
      "loss": 0.8552,
      "step": 4422
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.28373488783836365,
      "learning_rate": 4.2339311932150685e-05,
      "loss": 0.9749,
      "step": 4423
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.1892809271812439,
      "learning_rate": 4.2297089470942155e-05,
      "loss": 0.7826,
      "step": 4424
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.6844810247421265,
      "learning_rate": 4.225488242588846e-05,
      "loss": 0.6937,
      "step": 4425
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.21712863445281982,
      "learning_rate": 4.221269080826585e-05,
      "loss": 0.7461,
      "step": 4426
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.42185577750205994,
      "learning_rate": 4.217051462934636e-05,
      "loss": 0.8932,
      "step": 4427
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.26198118925094604,
      "learning_rate": 4.2128353900397974e-05,
      "loss": 0.8524,
      "step": 4428
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.21789827942848206,
      "learning_rate": 4.2086208632684584e-05,
      "loss": 0.685,
      "step": 4429
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.308938205242157,
      "learning_rate": 4.204407883746582e-05,
      "loss": 0.6876,
      "step": 4430
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.2693031430244446,
      "learning_rate": 4.2001964525997286e-05,
      "loss": 0.8873,
      "step": 4431
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.22552450001239777,
      "learning_rate": 4.195986570953045e-05,
      "loss": 0.8562,
      "step": 4432
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.26139208674430847,
      "learning_rate": 4.1917782399312566e-05,
      "loss": 0.8167,
      "step": 4433
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.3183025121688843,
      "learning_rate": 4.187571460658681e-05,
      "loss": 0.9133,
      "step": 4434
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.6507330536842346,
      "learning_rate": 4.1833662342592194e-05,
      "loss": 0.9567,
      "step": 4435
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.26627808809280396,
      "learning_rate": 4.1791625618563614e-05,
      "loss": 0.932,
      "step": 4436
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.27183997631073,
      "learning_rate": 4.1749604445731703e-05,
      "loss": 0.8541,
      "step": 4437
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.2592218816280365,
      "learning_rate": 4.170759883532306e-05,
      "loss": 0.8729,
      "step": 4438
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.29264160990715027,
      "learning_rate": 4.1665608798560116e-05,
      "loss": 0.7016,
      "step": 4439
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.20685291290283203,
      "learning_rate": 4.162363434666103e-05,
      "loss": 0.6005,
      "step": 4440
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.3032693862915039,
      "learning_rate": 4.158167549083993e-05,
      "loss": 0.9116,
      "step": 4441
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.17263077199459076,
      "learning_rate": 4.1539732242306736e-05,
      "loss": 0.8071,
      "step": 4442
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.162874236702919,
      "learning_rate": 4.1497804612267085e-05,
      "loss": 0.8735,
      "step": 4443
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.3876219391822815,
      "learning_rate": 4.14558926119227e-05,
      "loss": 0.7322,
      "step": 4444
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.17363746464252472,
      "learning_rate": 4.1413996252470865e-05,
      "loss": 0.8157,
      "step": 4445
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.2548193633556366,
      "learning_rate": 4.1372115545104785e-05,
      "loss": 0.7369,
      "step": 4446
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.19335028529167175,
      "learning_rate": 4.13302505010135e-05,
      "loss": 0.6995,
      "step": 4447
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.18290607631206512,
      "learning_rate": 4.128840113138187e-05,
      "loss": 0.8053,
      "step": 4448
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.16323068737983704,
      "learning_rate": 4.1246567447390574e-05,
      "loss": 0.8951,
      "step": 4449
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7203754782676697,
      "learning_rate": 4.120474946021601e-05,
      "loss": 0.8327,
      "step": 4450
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.2921849489212036,
      "learning_rate": 4.1162947181030484e-05,
      "loss": 0.7209,
      "step": 4451
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.18172062933444977,
      "learning_rate": 4.1121160621002116e-05,
      "loss": 0.7452,
      "step": 4452
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.3836020827293396,
      "learning_rate": 4.10793897912947e-05,
      "loss": 0.7073,
      "step": 4453
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.24935294687747955,
      "learning_rate": 4.103763470306794e-05,
      "loss": 0.8943,
      "step": 4454
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.19359563291072845,
      "learning_rate": 4.099589536747734e-05,
      "loss": 0.7167,
      "step": 4455
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.35461413860321045,
      "learning_rate": 4.095417179567407e-05,
      "loss": 0.6937,
      "step": 4456
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.47900861501693726,
      "learning_rate": 4.09124639988053e-05,
      "loss": 0.9001,
      "step": 4457
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.23338261246681213,
      "learning_rate": 4.087077198801376e-05,
      "loss": 0.8226,
      "step": 4458
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.445262610912323,
      "learning_rate": 4.082909577443809e-05,
      "loss": 0.8997,
      "step": 4459
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.18441632390022278,
      "learning_rate": 4.0787435369212735e-05,
      "loss": 0.7563,
      "step": 4460
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.2901599109172821,
      "learning_rate": 4.07457907834678e-05,
      "loss": 0.7951,
      "step": 4461
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.24579158425331116,
      "learning_rate": 4.0704162028329286e-05,
      "loss": 0.851,
      "step": 4462
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.675704836845398,
      "learning_rate": 4.066254911491884e-05,
      "loss": 0.6224,
      "step": 4463
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.35842421650886536,
      "learning_rate": 4.062095205435398e-05,
      "loss": 0.818,
      "step": 4464
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.30727455019950867,
      "learning_rate": 4.0579370857747986e-05,
      "loss": 0.8711,
      "step": 4465
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.39469146728515625,
      "learning_rate": 4.0537805536209786e-05,
      "loss": 1.043,
      "step": 4466
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.688217282295227,
      "learning_rate": 4.049625610084425e-05,
      "loss": 0.8609,
      "step": 4467
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.4053383767604828,
      "learning_rate": 4.045472256275187e-05,
      "loss": 0.9773,
      "step": 4468
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.2855646312236786,
      "learning_rate": 4.041320493302881e-05,
      "loss": 0.746,
      "step": 4469
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.446613073348999,
      "learning_rate": 4.037170322276728e-05,
      "loss": 0.9173,
      "step": 4470
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.1550166755914688,
      "learning_rate": 4.033021744305492e-05,
      "loss": 0.7732,
      "step": 4471
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.40922701358795166,
      "learning_rate": 4.0288747604975286e-05,
      "loss": 0.6734,
      "step": 4472
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.2542587220668793,
      "learning_rate": 4.024729371960768e-05,
      "loss": 0.6619,
      "step": 4473
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.1805696189403534,
      "learning_rate": 4.020585579802703e-05,
      "loss": 0.7415,
      "step": 4474
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.35462263226509094,
      "learning_rate": 4.0164433851304095e-05,
      "loss": 1.0508,
      "step": 4475
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.3049841821193695,
      "learning_rate": 4.012302789050537e-05,
      "loss": 0.8095,
      "step": 4476
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.25094351172447205,
      "learning_rate": 4.008163792669298e-05,
      "loss": 0.8391,
      "step": 4477
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.30251166224479675,
      "learning_rate": 4.004026397092492e-05,
      "loss": 0.8254,
      "step": 4478
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.16224196553230286,
      "learning_rate": 3.9998906034254714e-05,
      "loss": 0.6162,
      "step": 4479
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.21979716420173645,
      "learning_rate": 3.9957564127731884e-05,
      "loss": 0.8078,
      "step": 4480
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.23419252038002014,
      "learning_rate": 3.991623826240138e-05,
      "loss": 0.9165,
      "step": 4481
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.555534839630127,
      "learning_rate": 3.987492844930406e-05,
      "loss": 0.9092,
      "step": 4482
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.26997387409210205,
      "learning_rate": 3.9833634699476444e-05,
      "loss": 0.9005,
      "step": 4483
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.3149716556072235,
      "learning_rate": 3.979235702395067e-05,
      "loss": 1.0102,
      "step": 4484
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.23779307305812836,
      "learning_rate": 3.97510954337547e-05,
      "loss": 0.7537,
      "step": 4485
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.16554966568946838,
      "learning_rate": 3.97098499399122e-05,
      "loss": 0.6817,
      "step": 4486
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.30277830362319946,
      "learning_rate": 3.966862055344243e-05,
      "loss": 1.1096,
      "step": 4487
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.31810757517814636,
      "learning_rate": 3.9627407285360404e-05,
      "loss": 0.8559,
      "step": 4488
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.2822582721710205,
      "learning_rate": 3.958621014667687e-05,
      "loss": 0.8906,
      "step": 4489
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.5330873131752014,
      "learning_rate": 3.954502914839825e-05,
      "loss": 0.903,
      "step": 4490
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1689883470535278,
      "learning_rate": 3.950386430152656e-05,
      "loss": 1.0628,
      "step": 4491
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.19735664129257202,
      "learning_rate": 3.946271561705963e-05,
      "loss": 0.5113,
      "step": 4492
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.8230045437812805,
      "learning_rate": 3.9421583105990936e-05,
      "loss": 0.8753,
      "step": 4493
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.5603688955307007,
      "learning_rate": 3.9380466779309547e-05,
      "loss": 0.9912,
      "step": 4494
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.3911040127277374,
      "learning_rate": 3.933936664800032e-05,
      "loss": 0.8185,
      "step": 4495
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.35133838653564453,
      "learning_rate": 3.9298282723043756e-05,
      "loss": 0.5983,
      "step": 4496
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.27051016688346863,
      "learning_rate": 3.925721501541596e-05,
      "loss": 0.8067,
      "step": 4497
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.19642719626426697,
      "learning_rate": 3.921616353608879e-05,
      "loss": 0.9033,
      "step": 4498
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.27097806334495544,
      "learning_rate": 3.9175128296029714e-05,
      "loss": 0.8288,
      "step": 4499
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.34060534834861755,
      "learning_rate": 3.9134109306201936e-05,
      "loss": 0.8516,
      "step": 4500
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.33515045046806335,
      "learning_rate": 3.9093106577564184e-05,
      "loss": 0.8094,
      "step": 4501
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.3245089054107666,
      "learning_rate": 3.9052120121070966e-05,
      "loss": 0.6557,
      "step": 4502
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.2659807503223419,
      "learning_rate": 3.901114994767243e-05,
      "loss": 0.666,
      "step": 4503
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.37467795610427856,
      "learning_rate": 3.8970196068314255e-05,
      "loss": 0.7975,
      "step": 4504
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.6878629922866821,
      "learning_rate": 3.892925849393792e-05,
      "loss": 0.7376,
      "step": 4505
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.3328764736652374,
      "learning_rate": 3.888833723548048e-05,
      "loss": 0.8659,
      "step": 4506
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.13620281219482422,
      "learning_rate": 3.884743230387455e-05,
      "loss": 0.8105,
      "step": 4507
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.30484992265701294,
      "learning_rate": 3.88065437100486e-05,
      "loss": 0.7731,
      "step": 4508
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.1510440558195114,
      "learning_rate": 3.876567146492653e-05,
      "loss": 0.8593,
      "step": 4509
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.23748542368412018,
      "learning_rate": 3.872481557942792e-05,
      "loss": 0.8349,
      "step": 4510
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.28554439544677734,
      "learning_rate": 3.868397606446802e-05,
      "loss": 0.605,
      "step": 4511
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.1776222288608551,
      "learning_rate": 3.8643152930957695e-05,
      "loss": 0.6286,
      "step": 4512
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.2620657682418823,
      "learning_rate": 3.860234618980346e-05,
      "loss": 0.5653,
      "step": 4513
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.19150225818157196,
      "learning_rate": 3.856155585190735e-05,
      "loss": 0.5199,
      "step": 4514
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.20140615105628967,
      "learning_rate": 3.8520781928167116e-05,
      "loss": 0.9389,
      "step": 4515
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.2871261537075043,
      "learning_rate": 3.8480024429476126e-05,
      "loss": 0.8507,
      "step": 4516
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.277062326669693,
      "learning_rate": 3.8439283366723263e-05,
      "loss": 1.0637,
      "step": 4517
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.35318058729171753,
      "learning_rate": 3.8398558750793125e-05,
      "loss": 0.703,
      "step": 4518
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.19093801081180573,
      "learning_rate": 3.835785059256589e-05,
      "loss": 0.7341,
      "step": 4519
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.3242324888706207,
      "learning_rate": 3.8317158902917225e-05,
      "loss": 0.9158,
      "step": 4520
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.22896796464920044,
      "learning_rate": 3.827648369271865e-05,
      "loss": 0.7607,
      "step": 4521
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.3022713363170624,
      "learning_rate": 3.8235824972837e-05,
      "loss": 0.8209,
      "step": 4522
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.224375918507576,
      "learning_rate": 3.8195182754134874e-05,
      "loss": 0.9309,
      "step": 4523
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.3027117848396301,
      "learning_rate": 3.815455704747045e-05,
      "loss": 0.8188,
      "step": 4524
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.21042537689208984,
      "learning_rate": 3.811394786369741e-05,
      "loss": 0.968,
      "step": 4525
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.22731754183769226,
      "learning_rate": 3.807335521366513e-05,
      "loss": 0.9018,
      "step": 4526
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.2771616578102112,
      "learning_rate": 3.803277910821845e-05,
      "loss": 0.8129,
      "step": 4527
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.20325949788093567,
      "learning_rate": 3.7992219558197894e-05,
      "loss": 1.1372,
      "step": 4528
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.24906358122825623,
      "learning_rate": 3.795167657443956e-05,
      "loss": 0.8293,
      "step": 4529
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.3173636794090271,
      "learning_rate": 3.791115016777498e-05,
      "loss": 0.6668,
      "step": 4530
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.3515484631061554,
      "learning_rate": 3.7870640349031485e-05,
      "loss": 0.9198,
      "step": 4531
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.20763634145259857,
      "learning_rate": 3.783014712903179e-05,
      "loss": 0.786,
      "step": 4532
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.2531219720840454,
      "learning_rate": 3.7789670518594167e-05,
      "loss": 0.9119,
      "step": 4533
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.21987563371658325,
      "learning_rate": 3.7749210528532664e-05,
      "loss": 0.7129,
      "step": 4534
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.23649710416793823,
      "learning_rate": 3.770876716965663e-05,
      "loss": 1.1489,
      "step": 4535
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.26641491055488586,
      "learning_rate": 3.7668340452771124e-05,
      "loss": 0.7999,
      "step": 4536
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.11765255033969879,
      "learning_rate": 3.7627930388676756e-05,
      "loss": 0.7261,
      "step": 4537
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.28294190764427185,
      "learning_rate": 3.758753698816958e-05,
      "loss": 0.6595,
      "step": 4538
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.36893731355667114,
      "learning_rate": 3.75471602620413e-05,
      "loss": 0.9417,
      "step": 4539
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.2928343713283539,
      "learning_rate": 3.750680022107914e-05,
      "loss": 0.8239,
      "step": 4540
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.16671894490718842,
      "learning_rate": 3.7466456876065893e-05,
      "loss": 0.6879,
      "step": 4541
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.20277711749076843,
      "learning_rate": 3.742613023777982e-05,
      "loss": 0.9217,
      "step": 4542
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.2519170343875885,
      "learning_rate": 3.738582031699468e-05,
      "loss": 0.6214,
      "step": 4543
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.299410343170166,
      "learning_rate": 3.734552712448001e-05,
      "loss": 0.7234,
      "step": 4544
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.3773120939731598,
      "learning_rate": 3.730525067100057e-05,
      "loss": 0.754,
      "step": 4545
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.583653450012207,
      "learning_rate": 3.726499096731684e-05,
      "loss": 0.7892,
      "step": 4546
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.3812181055545807,
      "learning_rate": 3.722474802418482e-05,
      "loss": 0.8684,
      "step": 4547
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.3066314458847046,
      "learning_rate": 3.71845218523559e-05,
      "loss": 0.6041,
      "step": 4548
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.33436739444732666,
      "learning_rate": 3.7144312462577116e-05,
      "loss": 0.6622,
      "step": 4549
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2874692976474762,
      "learning_rate": 3.7104119865591014e-05,
      "loss": 0.7274,
      "step": 4550
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.5862867832183838,
      "learning_rate": 3.7063944072135545e-05,
      "loss": 0.8885,
      "step": 4551
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2685791552066803,
      "learning_rate": 3.702378509294428e-05,
      "loss": 0.7241,
      "step": 4552
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.20305220782756805,
      "learning_rate": 3.698364293874628e-05,
      "loss": 0.8096,
      "step": 4553
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.23305381834506989,
      "learning_rate": 3.69435176202661e-05,
      "loss": 0.8758,
      "step": 4554
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2764829397201538,
      "learning_rate": 3.690340914822375e-05,
      "loss": 0.933,
      "step": 4555
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.1577948033809662,
      "learning_rate": 3.6863317533334786e-05,
      "loss": 0.6852,
      "step": 4556
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.177603080868721,
      "learning_rate": 3.6823242786310306e-05,
      "loss": 0.8882,
      "step": 4557
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.3583468496799469,
      "learning_rate": 3.6783184917856774e-05,
      "loss": 1.0692,
      "step": 4558
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2614285945892334,
      "learning_rate": 3.674314393867626e-05,
      "loss": 0.6994,
      "step": 4559
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.19457054138183594,
      "learning_rate": 3.6703119859466317e-05,
      "loss": 1.0026,
      "step": 4560
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.4096197485923767,
      "learning_rate": 3.666311269091989e-05,
      "loss": 0.8702,
      "step": 4561
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2836320698261261,
      "learning_rate": 3.6623122443725465e-05,
      "loss": 0.8985,
      "step": 4562
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.22273896634578705,
      "learning_rate": 3.658314912856704e-05,
      "loss": 0.8338,
      "step": 4563
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2819596529006958,
      "learning_rate": 3.6543192756124077e-05,
      "loss": 0.677,
      "step": 4564
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.3928001821041107,
      "learning_rate": 3.650325333707142e-05,
      "loss": 0.9595,
      "step": 4565
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.3915267586708069,
      "learning_rate": 3.646333088207948e-05,
      "loss": 0.7855,
      "step": 4566
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.8915837407112122,
      "learning_rate": 3.642342540181417e-05,
      "loss": 0.813,
      "step": 4567
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.265074759721756,
      "learning_rate": 3.638353690693671e-05,
      "loss": 0.6416,
      "step": 4568
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.13688349723815918,
      "learning_rate": 3.634366540810393e-05,
      "loss": 0.6893,
      "step": 4569
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.36881524324417114,
      "learning_rate": 3.630381091596812e-05,
      "loss": 0.7582,
      "step": 4570
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.19749091565608978,
      "learning_rate": 3.6263973441176836e-05,
      "loss": 1.0207,
      "step": 4571
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.17092281579971313,
      "learning_rate": 3.6224152994373386e-05,
      "loss": 0.7238,
      "step": 4572
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.39532819390296936,
      "learning_rate": 3.6184349586196286e-05,
      "loss": 0.9195,
      "step": 4573
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.3522305488586426,
      "learning_rate": 3.614456322727957e-05,
      "loss": 0.8267,
      "step": 4574
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.31142210960388184,
      "learning_rate": 3.6104793928252756e-05,
      "loss": 1.0648,
      "step": 4575
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.20832806825637817,
      "learning_rate": 3.6065041699740775e-05,
      "loss": 0.756,
      "step": 4576
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.19928774237632751,
      "learning_rate": 3.602530655236405e-05,
      "loss": 0.8153,
      "step": 4577
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2889464199542999,
      "learning_rate": 3.59855884967383e-05,
      "loss": 0.9344,
      "step": 4578
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.4125659763813019,
      "learning_rate": 3.594588754347482e-05,
      "loss": 0.8952,
      "step": 4579
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.28066498041152954,
      "learning_rate": 3.590620370318032e-05,
      "loss": 0.7725,
      "step": 4580
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2981148362159729,
      "learning_rate": 3.586653698645683e-05,
      "loss": 0.8211,
      "step": 4581
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.5305972695350647,
      "learning_rate": 3.5826887403901906e-05,
      "loss": 0.9409,
      "step": 4582
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.3235059678554535,
      "learning_rate": 3.578725496610855e-05,
      "loss": 1.0813,
      "step": 4583
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2555611729621887,
      "learning_rate": 3.574763968366502e-05,
      "loss": 0.9489,
      "step": 4584
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.24973739683628082,
      "learning_rate": 3.570804156715524e-05,
      "loss": 0.7556,
      "step": 4585
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.8617566227912903,
      "learning_rate": 3.566846062715831e-05,
      "loss": 0.6213,
      "step": 4586
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.23312601447105408,
      "learning_rate": 3.562889687424887e-05,
      "loss": 1.1606,
      "step": 4587
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2623741626739502,
      "learning_rate": 3.5589350318996984e-05,
      "loss": 0.9125,
      "step": 4588
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.27298474311828613,
      "learning_rate": 3.554982097196801e-05,
      "loss": 0.814,
      "step": 4589
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.17249925434589386,
      "learning_rate": 3.551030884372283e-05,
      "loss": 0.915,
      "step": 4590
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.33270779252052307,
      "learning_rate": 3.5470813944817624e-05,
      "loss": 1.0296,
      "step": 4591
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.31252533197402954,
      "learning_rate": 3.543133628580404e-05,
      "loss": 0.7891,
      "step": 4592
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2809332013130188,
      "learning_rate": 3.539187587722913e-05,
      "loss": 0.8876,
      "step": 4593
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2353806048631668,
      "learning_rate": 3.535243272963521e-05,
      "loss": 0.9479,
      "step": 4594
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.3781234920024872,
      "learning_rate": 3.5313006853560205e-05,
      "loss": 0.9566,
      "step": 4595
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.1920173317193985,
      "learning_rate": 3.5273598259537246e-05,
      "loss": 0.7282,
      "step": 4596
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.23443494737148285,
      "learning_rate": 3.523420695809481e-05,
      "loss": 0.7413,
      "step": 4597
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2494598627090454,
      "learning_rate": 3.5194832959757e-05,
      "loss": 0.6615,
      "step": 4598
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2059793323278427,
      "learning_rate": 3.515547627504303e-05,
      "loss": 0.7737,
      "step": 4599
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.6304756999015808,
      "learning_rate": 3.5116136914467645e-05,
      "loss": 0.9268,
      "step": 4600
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.2454749047756195,
      "learning_rate": 3.507681488854093e-05,
      "loss": 0.8068,
      "step": 4601
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.19011259078979492,
      "learning_rate": 3.5037510207768276e-05,
      "loss": 1.0175,
      "step": 4602
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.3735576570034027,
      "learning_rate": 3.49982228826505e-05,
      "loss": 0.7991,
      "step": 4603
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.23589608073234558,
      "learning_rate": 3.4958952923683795e-05,
      "loss": 0.811,
      "step": 4604
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.38703447580337524,
      "learning_rate": 3.4919700341359716e-05,
      "loss": 0.8805,
      "step": 4605
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.39999502897262573,
      "learning_rate": 3.488046514616511e-05,
      "loss": 0.9628,
      "step": 4606
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.19067473709583282,
      "learning_rate": 3.484124734858215e-05,
      "loss": 0.8769,
      "step": 4607
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.27870818972587585,
      "learning_rate": 3.480204695908857e-05,
      "loss": 0.886,
      "step": 4608
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.7379229068756104,
      "learning_rate": 3.476286398815721e-05,
      "loss": 0.6602,
      "step": 4609
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.32156315445899963,
      "learning_rate": 3.4723698446256403e-05,
      "loss": 0.7445,
      "step": 4610
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.3131166994571686,
      "learning_rate": 3.46845503438498e-05,
      "loss": 0.9905,
      "step": 4611
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.25000566244125366,
      "learning_rate": 3.4645419691396305e-05,
      "loss": 0.815,
      "step": 4612
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.28058674931526184,
      "learning_rate": 3.460630649935028e-05,
      "loss": 0.8805,
      "step": 4613
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.21204347908496857,
      "learning_rate": 3.4567210778161394e-05,
      "loss": 0.7367,
      "step": 4614
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.2624492049217224,
      "learning_rate": 3.452813253827456e-05,
      "loss": 0.4987,
      "step": 4615
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.655121922492981,
      "learning_rate": 3.448907179013011e-05,
      "loss": 0.7327,
      "step": 4616
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.39079710841178894,
      "learning_rate": 3.445002854416371e-05,
      "loss": 0.9801,
      "step": 4617
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.33233723044395447,
      "learning_rate": 3.441100281080632e-05,
      "loss": 0.5477,
      "step": 4618
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.23656104505062103,
      "learning_rate": 3.437199460048417e-05,
      "loss": 0.747,
      "step": 4619
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.24628596007823944,
      "learning_rate": 3.433300392361889e-05,
      "loss": 0.6444,
      "step": 4620
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0402120351791382,
      "learning_rate": 3.429403079062743e-05,
      "loss": 1.0736,
      "step": 4621
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.3066161870956421,
      "learning_rate": 3.425507521192195e-05,
      "loss": 0.8354,
      "step": 4622
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.181759312748909,
      "learning_rate": 3.421613719791003e-05,
      "loss": 0.6935,
      "step": 4623
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.4315882623195648,
      "learning_rate": 3.4177216758994524e-05,
      "loss": 0.7226,
      "step": 4624
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.7900176644325256,
      "learning_rate": 3.4138313905573536e-05,
      "loss": 0.5803,
      "step": 4625
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.3971518278121948,
      "learning_rate": 3.4099428648040545e-05,
      "loss": 0.9618,
      "step": 4626
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.21714463829994202,
      "learning_rate": 3.406056099678431e-05,
      "loss": 0.7434,
      "step": 4627
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.5441672801971436,
      "learning_rate": 3.402171096218889e-05,
      "loss": 0.8523,
      "step": 4628
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.2738534212112427,
      "learning_rate": 3.398287855463358e-05,
      "loss": 0.4285,
      "step": 4629
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.1857019066810608,
      "learning_rate": 3.394406378449304e-05,
      "loss": 0.9256,
      "step": 4630
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.19451111555099487,
      "learning_rate": 3.390526666213721e-05,
      "loss": 0.7048,
      "step": 4631
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.2007787823677063,
      "learning_rate": 3.3866487197931254e-05,
      "loss": 1.0108,
      "step": 4632
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.33989080786705017,
      "learning_rate": 3.3827725402235655e-05,
      "loss": 0.9102,
      "step": 4633
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.292184442281723,
      "learning_rate": 3.378898128540624e-05,
      "loss": 0.8184,
      "step": 4634
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.5842868685722351,
      "learning_rate": 3.375025485779398e-05,
      "loss": 0.9235,
      "step": 4635
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.3528236448764801,
      "learning_rate": 3.371154612974522e-05,
      "loss": 0.9401,
      "step": 4636
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.282728374004364,
      "learning_rate": 3.367285511160159e-05,
      "loss": 1.1048,
      "step": 4637
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.26302170753479004,
      "learning_rate": 3.363418181369986e-05,
      "loss": 0.937,
      "step": 4638
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.514236569404602,
      "learning_rate": 3.359552624637221e-05,
      "loss": 0.9104,
      "step": 4639
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.25385135412216187,
      "learning_rate": 3.355688841994601e-05,
      "loss": 0.823,
      "step": 4640
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.25326621532440186,
      "learning_rate": 3.3518268344743954e-05,
      "loss": 0.703,
      "step": 4641
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.23930396139621735,
      "learning_rate": 3.347966603108386e-05,
      "loss": 0.8502,
      "step": 4642
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.2837264835834503,
      "learning_rate": 3.3441081489278935e-05,
      "loss": 1.0259,
      "step": 4643
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.5963236093521118,
      "learning_rate": 3.340251472963761e-05,
      "loss": 0.9199,
      "step": 4644
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.28836458921432495,
      "learning_rate": 3.336396576246347e-05,
      "loss": 0.9289,
      "step": 4645
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.2570215165615082,
      "learning_rate": 3.332543459805552e-05,
      "loss": 0.9192,
      "step": 4646
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.313434898853302,
      "learning_rate": 3.328692124670786e-05,
      "loss": 0.813,
      "step": 4647
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.3055402338504791,
      "learning_rate": 3.324842571870981e-05,
      "loss": 0.8003,
      "step": 4648
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.30971163511276245,
      "learning_rate": 3.320994802434614e-05,
      "loss": 0.9047,
      "step": 4649
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.15418551862239838,
      "learning_rate": 3.3171488173896616e-05,
      "loss": 0.6377,
      "step": 4650
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.30614471435546875,
      "learning_rate": 3.3133046177636384e-05,
      "loss": 0.8554,
      "step": 4651
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.2961150109767914,
      "learning_rate": 3.3094622045835724e-05,
      "loss": 0.9407,
      "step": 4652
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.24373136460781097,
      "learning_rate": 3.30562157887602e-05,
      "loss": 0.6545,
      "step": 4653
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.30184438824653625,
      "learning_rate": 3.301782741667065e-05,
      "loss": 0.859,
      "step": 4654
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.3382834196090698,
      "learning_rate": 3.2979456939823006e-05,
      "loss": 0.8621,
      "step": 4655
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.2575562596321106,
      "learning_rate": 3.29411043684685e-05,
      "loss": 0.8544,
      "step": 4656
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.24544212222099304,
      "learning_rate": 3.290276971285362e-05,
      "loss": 0.7983,
      "step": 4657
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.28448861837387085,
      "learning_rate": 3.2864452983219906e-05,
      "loss": 0.9217,
      "step": 4658
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.2632863223552704,
      "learning_rate": 3.282615418980435e-05,
      "loss": 0.6209,
      "step": 4659
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.3821103870868683,
      "learning_rate": 3.2787873342838934e-05,
      "loss": 0.773,
      "step": 4660
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.23859158158302307,
      "learning_rate": 3.274961045255095e-05,
      "loss": 0.8954,
      "step": 4661
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.27219393849372864,
      "learning_rate": 3.271136552916292e-05,
      "loss": 0.6601,
      "step": 4662
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.17756272852420807,
      "learning_rate": 3.2673138582892446e-05,
      "loss": 0.9336,
      "step": 4663
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.2710960805416107,
      "learning_rate": 3.2634929623952435e-05,
      "loss": 0.7811,
      "step": 4664
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.2622879147529602,
      "learning_rate": 3.2596738662550984e-05,
      "loss": 0.9114,
      "step": 4665
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.27030491828918457,
      "learning_rate": 3.255856570889131e-05,
      "loss": 0.7056,
      "step": 4666
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.2271953523159027,
      "learning_rate": 3.252041077317189e-05,
      "loss": 0.8488,
      "step": 4667
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.29415062069892883,
      "learning_rate": 3.248227386558629e-05,
      "loss": 0.653,
      "step": 4668
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.5155461430549622,
      "learning_rate": 3.244415499632345e-05,
      "loss": 0.7612,
      "step": 4669
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.2127319723367691,
      "learning_rate": 3.24060541755673e-05,
      "loss": 0.8076,
      "step": 4670
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.24126912653446198,
      "learning_rate": 3.2367971413496955e-05,
      "loss": 0.7591,
      "step": 4671
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.24484559893608093,
      "learning_rate": 3.2329906720286894e-05,
      "loss": 0.8807,
      "step": 4672
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.3254683017730713,
      "learning_rate": 3.2291860106106556e-05,
      "loss": 0.9955,
      "step": 4673
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7908711433410645,
      "learning_rate": 3.225383158112065e-05,
      "loss": 1.057,
      "step": 4674
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.6096619367599487,
      "learning_rate": 3.221582115548909e-05,
      "loss": 0.7435,
      "step": 4675
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.3767704963684082,
      "learning_rate": 3.217782883936683e-05,
      "loss": 0.7277,
      "step": 4676
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.215582013130188,
      "learning_rate": 3.2139854642904087e-05,
      "loss": 0.9596,
      "step": 4677
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.28406983613967896,
      "learning_rate": 3.210189857624626e-05,
      "loss": 0.9734,
      "step": 4678
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.2865844666957855,
      "learning_rate": 3.206396064953375e-05,
      "loss": 0.7688,
      "step": 4679
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.24019736051559448,
      "learning_rate": 3.2026040872902286e-05,
      "loss": 0.915,
      "step": 4680
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.24575117230415344,
      "learning_rate": 3.198813925648266e-05,
      "loss": 0.7839,
      "step": 4681
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.8632822632789612,
      "learning_rate": 3.195025581040086e-05,
      "loss": 0.7249,
      "step": 4682
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.2168959379196167,
      "learning_rate": 3.191239054477792e-05,
      "loss": 0.9534,
      "step": 4683
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.2572563588619232,
      "learning_rate": 3.1874543469730136e-05,
      "loss": 0.8392,
      "step": 4684
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.37020352482795715,
      "learning_rate": 3.183671459536891e-05,
      "loss": 0.8603,
      "step": 4685
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.4354654848575592,
      "learning_rate": 3.1798903931800704e-05,
      "loss": 0.921,
      "step": 4686
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.2878931164741516,
      "learning_rate": 3.1761111489127205e-05,
      "loss": 0.9064,
      "step": 4687
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.4349796772003174,
      "learning_rate": 3.172333727744523e-05,
      "loss": 0.8908,
      "step": 4688
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.3044115900993347,
      "learning_rate": 3.168558130684666e-05,
      "loss": 0.6004,
      "step": 4689
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.2088109850883484,
      "learning_rate": 3.164784358741854e-05,
      "loss": 0.6594,
      "step": 4690
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.27509236335754395,
      "learning_rate": 3.1610124129243055e-05,
      "loss": 0.8018,
      "step": 4691
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.2842583656311035,
      "learning_rate": 3.157242294239753e-05,
      "loss": 0.9458,
      "step": 4692
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.3050662577152252,
      "learning_rate": 3.1534740036954304e-05,
      "loss": 0.8897,
      "step": 4693
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.3819757103919983,
      "learning_rate": 3.149707542298094e-05,
      "loss": 0.9281,
      "step": 4694
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.20187415182590485,
      "learning_rate": 3.145942911054011e-05,
      "loss": 0.9764,
      "step": 4695
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.21473610401153564,
      "learning_rate": 3.142180110968949e-05,
      "loss": 0.7544,
      "step": 4696
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.22957782447338104,
      "learning_rate": 3.138419143048197e-05,
      "loss": 0.6514,
      "step": 4697
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.1726054847240448,
      "learning_rate": 3.134660008296554e-05,
      "loss": 0.8265,
      "step": 4698
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.24490420520305634,
      "learning_rate": 3.1309027077183216e-05,
      "loss": 0.8063,
      "step": 4699
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.34121569991111755,
      "learning_rate": 3.127147242317318e-05,
      "loss": 0.9226,
      "step": 4700
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.33278706669807434,
      "learning_rate": 3.1233936130968733e-05,
      "loss": 0.7872,
      "step": 4701
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.432324081659317,
      "learning_rate": 3.1196418210598155e-05,
      "loss": 1.0493,
      "step": 4702
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.4810118079185486,
      "learning_rate": 3.1158918672084946e-05,
      "loss": 0.8862,
      "step": 4703
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.6155079007148743,
      "learning_rate": 3.112143752544762e-05,
      "loss": 0.7988,
      "step": 4704
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7888616323471069,
      "learning_rate": 3.1083974780699844e-05,
      "loss": 0.7805,
      "step": 4705
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.26677390933036804,
      "learning_rate": 3.104653044785025e-05,
      "loss": 0.7964,
      "step": 4706
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.30944544076919556,
      "learning_rate": 3.100910453690268e-05,
      "loss": 0.6811,
      "step": 4707
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.36280253529548645,
      "learning_rate": 3.0971697057855995e-05,
      "loss": 0.9696,
      "step": 4708
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.1674014925956726,
      "learning_rate": 3.0934308020704075e-05,
      "loss": 0.5637,
      "step": 4709
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.27842605113983154,
      "learning_rate": 3.089693743543604e-05,
      "loss": 0.8548,
      "step": 4710
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.19122232496738434,
      "learning_rate": 3.0859585312035924e-05,
      "loss": 0.8622,
      "step": 4711
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.3396204113960266,
      "learning_rate": 3.08222516604828e-05,
      "loss": 0.7361,
      "step": 4712
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.2358868271112442,
      "learning_rate": 3.0784936490751024e-05,
      "loss": 0.9147,
      "step": 4713
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.26149269938468933,
      "learning_rate": 3.074763981280979e-05,
      "loss": 0.7626,
      "step": 4714
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.23766492307186127,
      "learning_rate": 3.0710361636623475e-05,
      "loss": 0.7362,
      "step": 4715
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.24850454926490784,
      "learning_rate": 3.067310197215143e-05,
      "loss": 0.6427,
      "step": 4716
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.2235437035560608,
      "learning_rate": 3.0635860829348126e-05,
      "loss": 0.9276,
      "step": 4717
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.2102113515138626,
      "learning_rate": 3.059863821816311e-05,
      "loss": 0.7283,
      "step": 4718
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.24382275342941284,
      "learning_rate": 3.0561434148540856e-05,
      "loss": 0.8523,
      "step": 4719
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.5490009188652039,
      "learning_rate": 3.0524248630421e-05,
      "loss": 0.5716,
      "step": 4720
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.41867783665657043,
      "learning_rate": 3.0487081673738215e-05,
      "loss": 0.852,
      "step": 4721
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.20515824854373932,
      "learning_rate": 3.0449933288422083e-05,
      "loss": 0.8965,
      "step": 4722
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.37190648913383484,
      "learning_rate": 3.0412803484397457e-05,
      "loss": 0.7663,
      "step": 4723
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.23701338469982147,
      "learning_rate": 3.0375692271584e-05,
      "loss": 0.8169,
      "step": 4724
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.33983492851257324,
      "learning_rate": 3.0338599659896527e-05,
      "loss": 0.9582,
      "step": 4725
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7721033096313477,
      "learning_rate": 3.030152565924489e-05,
      "loss": 0.7066,
      "step": 4726
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.204746812582016,
      "learning_rate": 3.0264470279533876e-05,
      "loss": 0.7235,
      "step": 4727
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.3593306839466095,
      "learning_rate": 3.0227433530663385e-05,
      "loss": 0.9132,
      "step": 4728
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.29784974455833435,
      "learning_rate": 3.019041542252835e-05,
      "loss": 0.7607,
      "step": 4729
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.27797555923461914,
      "learning_rate": 3.015341596501863e-05,
      "loss": 0.7404,
      "step": 4730
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.22730402648448944,
      "learning_rate": 3.0116435168019198e-05,
      "loss": 0.7663,
      "step": 4731
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.2813575863838196,
      "learning_rate": 3.007947304140992e-05,
      "loss": 0.7773,
      "step": 4732
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.18879544734954834,
      "learning_rate": 3.0042529595065882e-05,
      "loss": 0.7083,
      "step": 4733
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.4553906321525574,
      "learning_rate": 3.0005604838856993e-05,
      "loss": 0.8244,
      "step": 4734
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.1792357861995697,
      "learning_rate": 2.996869878264815e-05,
      "loss": 0.9296,
      "step": 4735
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.20993784070014954,
      "learning_rate": 2.9931811436299472e-05,
      "loss": 0.9117,
      "step": 4736
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.25078171491622925,
      "learning_rate": 2.9894942809665837e-05,
      "loss": 0.7732,
      "step": 4737
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.14015573263168335,
      "learning_rate": 2.9858092912597258e-05,
      "loss": 0.9303,
      "step": 4738
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.25783610343933105,
      "learning_rate": 2.9821261754938747e-05,
      "loss": 0.7576,
      "step": 4739
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.39233338832855225,
      "learning_rate": 2.9784449346530198e-05,
      "loss": 0.7215,
      "step": 4740
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.2972393035888672,
      "learning_rate": 2.9747655697206612e-05,
      "loss": 0.7904,
      "step": 4741
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.22824980318546295,
      "learning_rate": 2.971088081679796e-05,
      "loss": 0.9983,
      "step": 4742
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.23309144377708435,
      "learning_rate": 2.9674124715129124e-05,
      "loss": 0.871,
      "step": 4743
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.2669467329978943,
      "learning_rate": 2.9637387402020034e-05,
      "loss": 0.8074,
      "step": 4744
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.2514881491661072,
      "learning_rate": 2.9600668887285608e-05,
      "loss": 0.8757,
      "step": 4745
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.4424818158149719,
      "learning_rate": 2.956396918073573e-05,
      "loss": 1.0551,
      "step": 4746
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.40262800455093384,
      "learning_rate": 2.9527288292175204e-05,
      "loss": 0.7088,
      "step": 4747
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.44785988330841064,
      "learning_rate": 2.9490626231403885e-05,
      "loss": 0.9021,
      "step": 4748
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.29211872816085815,
      "learning_rate": 2.945398300821658e-05,
      "loss": 0.811,
      "step": 4749
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.20798338949680328,
      "learning_rate": 2.941735863240299e-05,
      "loss": 0.8251,
      "step": 4750
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.22216399013996124,
      "learning_rate": 2.938075311374788e-05,
      "loss": 0.6088,
      "step": 4751
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.20437604188919067,
      "learning_rate": 2.9344166462030963e-05,
      "loss": 0.9106,
      "step": 4752
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.20589418709278107,
      "learning_rate": 2.9307598687026826e-05,
      "loss": 0.7919,
      "step": 4753
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0641462802886963,
      "learning_rate": 2.927104979850509e-05,
      "loss": 0.8712,
      "step": 4754
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.20385371148586273,
      "learning_rate": 2.923451980623032e-05,
      "loss": 0.8803,
      "step": 4755
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.192649707198143,
      "learning_rate": 2.9198008719962056e-05,
      "loss": 0.8954,
      "step": 4756
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.2113293558359146,
      "learning_rate": 2.9161516549454693e-05,
      "loss": 0.6624,
      "step": 4757
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.35768285393714905,
      "learning_rate": 2.9125043304457655e-05,
      "loss": 1.0387,
      "step": 4758
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.42982685565948486,
      "learning_rate": 2.908858899471534e-05,
      "loss": 0.7028,
      "step": 4759
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.8041394352912903,
      "learning_rate": 2.905215362996695e-05,
      "loss": 1.0473,
      "step": 4760
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.41722947359085083,
      "learning_rate": 2.901573721994676e-05,
      "loss": 0.7684,
      "step": 4761
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.2369544804096222,
      "learning_rate": 2.897933977438395e-05,
      "loss": 0.7369,
      "step": 4762
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.8277308940887451,
      "learning_rate": 2.894296130300258e-05,
      "loss": 0.757,
      "step": 4763
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.3050149977207184,
      "learning_rate": 2.8906601815521683e-05,
      "loss": 0.7487,
      "step": 4764
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.8084016442298889,
      "learning_rate": 2.8870261321655222e-05,
      "loss": 0.7495,
      "step": 4765
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.5294256210327148,
      "learning_rate": 2.883393983111211e-05,
      "loss": 0.8434,
      "step": 4766
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.2465563714504242,
      "learning_rate": 2.8797637353596097e-05,
      "loss": 0.8052,
      "step": 4767
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.21224452555179596,
      "learning_rate": 2.8761353898805922e-05,
      "loss": 0.9001,
      "step": 4768
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.072237491607666,
      "learning_rate": 2.8725089476435264e-05,
      "loss": 0.734,
      "step": 4769
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.5156484246253967,
      "learning_rate": 2.8688844096172618e-05,
      "loss": 0.8237,
      "step": 4770
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.350093811750412,
      "learning_rate": 2.8652617767701495e-05,
      "loss": 0.9663,
      "step": 4771
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.679699718952179,
      "learning_rate": 2.8616410500700298e-05,
      "loss": 0.8505,
      "step": 4772
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.19703854620456696,
      "learning_rate": 2.858022230484221e-05,
      "loss": 0.99,
      "step": 4773
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.21031031012535095,
      "learning_rate": 2.854405318979556e-05,
      "loss": 0.8986,
      "step": 4774
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.16097688674926758,
      "learning_rate": 2.8507903165223382e-05,
      "loss": 0.997,
      "step": 4775
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.24625171720981598,
      "learning_rate": 2.847177224078361e-05,
      "loss": 0.9044,
      "step": 4776
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.18539179861545563,
      "learning_rate": 2.8435660426129252e-05,
      "loss": 0.6812,
      "step": 4777
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.20722956955432892,
      "learning_rate": 2.8399567730908004e-05,
      "loss": 0.854,
      "step": 4778
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.22125521302223206,
      "learning_rate": 2.8363494164762593e-05,
      "loss": 0.7822,
      "step": 4779
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.36615416407585144,
      "learning_rate": 2.8327439737330552e-05,
      "loss": 1.0351,
      "step": 4780
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.1457345336675644,
      "learning_rate": 2.8291404458244342e-05,
      "loss": 0.6804,
      "step": 4781
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.3960024416446686,
      "learning_rate": 2.825538833713134e-05,
      "loss": 0.7624,
      "step": 4782
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.34577852487564087,
      "learning_rate": 2.8219391383613712e-05,
      "loss": 0.8036,
      "step": 4783
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.3829205334186554,
      "learning_rate": 2.8183413607308573e-05,
      "loss": 0.9514,
      "step": 4784
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.19809655845165253,
      "learning_rate": 2.814745501782794e-05,
      "loss": 0.9396,
      "step": 4785
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.23011839389801025,
      "learning_rate": 2.8111515624778574e-05,
      "loss": 0.9786,
      "step": 4786
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.14013026654720306,
      "learning_rate": 2.8075595437762303e-05,
      "loss": 0.5962,
      "step": 4787
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.3031526505947113,
      "learning_rate": 2.803969446637563e-05,
      "loss": 0.8329,
      "step": 4788
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.2427252233028412,
      "learning_rate": 2.8003812720210056e-05,
      "loss": 0.7604,
      "step": 4789
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.4419197142124176,
      "learning_rate": 2.796795020885192e-05,
      "loss": 0.8398,
      "step": 4790
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7284539341926575,
      "learning_rate": 2.793210694188234e-05,
      "loss": 0.8182,
      "step": 4791
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.22998452186584473,
      "learning_rate": 2.7896282928877394e-05,
      "loss": 0.8101,
      "step": 4792
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.19162248075008392,
      "learning_rate": 2.7860478179408e-05,
      "loss": 0.6691,
      "step": 4793
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.3028649091720581,
      "learning_rate": 2.7824692703039846e-05,
      "loss": 0.6318,
      "step": 4794
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.19503532350063324,
      "learning_rate": 2.7788926509333602e-05,
      "loss": 0.6407,
      "step": 4795
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.41432902216911316,
      "learning_rate": 2.7753179607844615e-05,
      "loss": 0.8908,
      "step": 4796
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.19783782958984375,
      "learning_rate": 2.7717452008123312e-05,
      "loss": 0.8014,
      "step": 4797
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.4828290641307831,
      "learning_rate": 2.7681743719714758e-05,
      "loss": 0.7711,
      "step": 4798
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.28546661138534546,
      "learning_rate": 2.7646054752158867e-05,
      "loss": 1.0031,
      "step": 4799
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.2724970579147339,
      "learning_rate": 2.76103851149906e-05,
      "loss": 0.7638,
      "step": 4800
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.323687344789505,
      "learning_rate": 2.757473481773949e-05,
      "loss": 0.8247,
      "step": 4801
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.40148380398750305,
      "learning_rate": 2.753910386993007e-05,
      "loss": 0.9718,
      "step": 4802
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.2580116391181946,
      "learning_rate": 2.750349228108169e-05,
      "loss": 0.816,
      "step": 4803
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.23252621293067932,
      "learning_rate": 2.7467900060708408e-05,
      "loss": 0.7888,
      "step": 4804
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.3282400667667389,
      "learning_rate": 2.7432327218319255e-05,
      "loss": 0.7868,
      "step": 4805
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.23357081413269043,
      "learning_rate": 2.7396773763417993e-05,
      "loss": 0.5993,
      "step": 4806
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.2972813844680786,
      "learning_rate": 2.7361239705503284e-05,
      "loss": 0.8349,
      "step": 4807
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.3984099328517914,
      "learning_rate": 2.7325725054068485e-05,
      "loss": 0.7466,
      "step": 4808
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.27172935009002686,
      "learning_rate": 2.7290229818601888e-05,
      "loss": 0.8757,
      "step": 4809
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.20691372454166412,
      "learning_rate": 2.725475400858656e-05,
      "loss": 0.7278,
      "step": 4810
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.44887009263038635,
      "learning_rate": 2.721929763350033e-05,
      "loss": 0.7511,
      "step": 4811
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.19931426644325256,
      "learning_rate": 2.7183860702815887e-05,
      "loss": 0.6545,
      "step": 4812
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.5099042654037476,
      "learning_rate": 2.7148443226000754e-05,
      "loss": 0.9943,
      "step": 4813
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.46185335516929626,
      "learning_rate": 2.711304521251714e-05,
      "loss": 0.7787,
      "step": 4814
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.24495212733745575,
      "learning_rate": 2.7077666671822177e-05,
      "loss": 0.9914,
      "step": 4815
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.403389573097229,
      "learning_rate": 2.7042307613367768e-05,
      "loss": 1.2849,
      "step": 4816
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.4958042800426483,
      "learning_rate": 2.7006968046600524e-05,
      "loss": 0.3734,
      "step": 4817
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.23940140008926392,
      "learning_rate": 2.6971647980961954e-05,
      "loss": 0.847,
      "step": 4818
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.16577467322349548,
      "learning_rate": 2.693634742588831e-05,
      "loss": 1.0028,
      "step": 4819
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.2781061828136444,
      "learning_rate": 2.6901066390810657e-05,
      "loss": 0.8045,
      "step": 4820
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.21678194403648376,
      "learning_rate": 2.68658048851548e-05,
      "loss": 0.851,
      "step": 4821
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.334401935338974,
      "learning_rate": 2.683056291834135e-05,
      "loss": 0.8427,
      "step": 4822
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.3041546046733856,
      "learning_rate": 2.679534049978575e-05,
      "loss": 0.8487,
      "step": 4823
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.42840883135795593,
      "learning_rate": 2.6760137638898097e-05,
      "loss": 0.8928,
      "step": 4824
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.15826478600502014,
      "learning_rate": 2.6724954345083374e-05,
      "loss": 0.7624,
      "step": 4825
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.34925106167793274,
      "learning_rate": 2.668979062774133e-05,
      "loss": 0.8285,
      "step": 4826
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.22690312564373016,
      "learning_rate": 2.66546464962664e-05,
      "loss": 0.8672,
      "step": 4827
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.6308243870735168,
      "learning_rate": 2.6619521960047843e-05,
      "loss": 0.7718,
      "step": 4828
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.6654287576675415,
      "learning_rate": 2.658441702846972e-05,
      "loss": 0.8266,
      "step": 4829
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.44702476263046265,
      "learning_rate": 2.6549331710910807e-05,
      "loss": 0.7885,
      "step": 4830
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.26460057497024536,
      "learning_rate": 2.6514266016744603e-05,
      "loss": 0.8977,
      "step": 4831
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.32930684089660645,
      "learning_rate": 2.647921995533944e-05,
      "loss": 0.9498,
      "step": 4832
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.27887675166130066,
      "learning_rate": 2.6444193536058405e-05,
      "loss": 0.7102,
      "step": 4833
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.20157885551452637,
      "learning_rate": 2.640918676825923e-05,
      "loss": 0.893,
      "step": 4834
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.24987010657787323,
      "learning_rate": 2.637419966129451e-05,
      "loss": 0.7593,
      "step": 4835
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.4944954812526703,
      "learning_rate": 2.633923222451159e-05,
      "loss": 0.7333,
      "step": 4836
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.3016544282436371,
      "learning_rate": 2.6304284467252404e-05,
      "loss": 0.8818,
      "step": 4837
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.19347722828388214,
      "learning_rate": 2.6269356398853896e-05,
      "loss": 0.6965,
      "step": 4838
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.28468623757362366,
      "learning_rate": 2.6234448028647507e-05,
      "loss": 0.7187,
      "step": 4839
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.24250733852386475,
      "learning_rate": 2.6199559365959457e-05,
      "loss": 0.8045,
      "step": 4840
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.17163050174713135,
      "learning_rate": 2.6164690420110882e-05,
      "loss": 0.8581,
      "step": 4841
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.33449673652648926,
      "learning_rate": 2.6129841200417405e-05,
      "loss": 0.9124,
      "step": 4842
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.15420810878276825,
      "learning_rate": 2.6095011716189576e-05,
      "loss": 0.6514,
      "step": 4843
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.23029889166355133,
      "learning_rate": 2.60602019767325e-05,
      "loss": 0.8912,
      "step": 4844
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.25070905685424805,
      "learning_rate": 2.602541199134615e-05,
      "loss": 0.7807,
      "step": 4845
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.24483850598335266,
      "learning_rate": 2.5990641769325186e-05,
      "loss": 0.7264,
      "step": 4846
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.30475154519081116,
      "learning_rate": 2.5955891319958915e-05,
      "loss": 0.8722,
      "step": 4847
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.2361190766096115,
      "learning_rate": 2.592116065253143e-05,
      "loss": 0.7578,
      "step": 4848
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.28781819343566895,
      "learning_rate": 2.5886449776321564e-05,
      "loss": 0.705,
      "step": 4849
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.3787461221218109,
      "learning_rate": 2.5851758700602723e-05,
      "loss": 0.9392,
      "step": 4850
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.2494814395904541,
      "learning_rate": 2.5817087434643263e-05,
      "loss": 0.8094,
      "step": 4851
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.27389588952064514,
      "learning_rate": 2.578243598770599e-05,
      "loss": 0.7922,
      "step": 4852
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.3341312110424042,
      "learning_rate": 2.5747804369048588e-05,
      "loss": 1.0037,
      "step": 4853
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.6617529988288879,
      "learning_rate": 2.5713192587923395e-05,
      "loss": 0.7263,
      "step": 4854
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.29203036427497864,
      "learning_rate": 2.56786006535774e-05,
      "loss": 0.7948,
      "step": 4855
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.17208309471607208,
      "learning_rate": 2.5644028575252343e-05,
      "loss": 0.902,
      "step": 4856
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.2880607545375824,
      "learning_rate": 2.5609476362184692e-05,
      "loss": 0.9848,
      "step": 4857
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.4411762058734894,
      "learning_rate": 2.5574944023605506e-05,
      "loss": 0.6999,
      "step": 4858
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.2028767317533493,
      "learning_rate": 2.554043156874063e-05,
      "loss": 0.6946,
      "step": 4859
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.29598602652549744,
      "learning_rate": 2.5505939006810496e-05,
      "loss": 0.8855,
      "step": 4860
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.21271507441997528,
      "learning_rate": 2.5471466347030383e-05,
      "loss": 0.7109,
      "step": 4861
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.267887145280838,
      "learning_rate": 2.5437013598610093e-05,
      "loss": 0.7377,
      "step": 4862
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.44656801223754883,
      "learning_rate": 2.5402580770754125e-05,
      "loss": 0.6077,
      "step": 4863
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.26102375984191895,
      "learning_rate": 2.5368167872661798e-05,
      "loss": 0.7801,
      "step": 4864
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.23119939863681793,
      "learning_rate": 2.533377491352694e-05,
      "loss": 0.8151,
      "step": 4865
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.19783087074756622,
      "learning_rate": 2.5299401902538135e-05,
      "loss": 0.871,
      "step": 4866
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.2312706559896469,
      "learning_rate": 2.526504884887866e-05,
      "loss": 0.7777,
      "step": 4867
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.2240922898054123,
      "learning_rate": 2.5230715761726355e-05,
      "loss": 0.7889,
      "step": 4868
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.29063838720321655,
      "learning_rate": 2.5196402650253838e-05,
      "loss": 0.7633,
      "step": 4869
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.5405299663543701,
      "learning_rate": 2.516210952362833e-05,
      "loss": 0.9188,
      "step": 4870
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.2176663875579834,
      "learning_rate": 2.5127836391011773e-05,
      "loss": 0.5614,
      "step": 4871
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.20128217339515686,
      "learning_rate": 2.509358326156065e-05,
      "loss": 0.5086,
      "step": 4872
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.32327646017074585,
      "learning_rate": 2.5059350144426208e-05,
      "loss": 0.9033,
      "step": 4873
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.30935975909233093,
      "learning_rate": 2.5025137048754332e-05,
      "loss": 0.7314,
      "step": 4874
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.338731974363327,
      "learning_rate": 2.49909439836855e-05,
      "loss": 0.6519,
      "step": 4875
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.6088739037513733,
      "learning_rate": 2.4956770958354892e-05,
      "loss": 0.6775,
      "step": 4876
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.39501577615737915,
      "learning_rate": 2.492261798189235e-05,
      "loss": 0.8085,
      "step": 4877
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.3535667955875397,
      "learning_rate": 2.4888485063422273e-05,
      "loss": 0.9658,
      "step": 4878
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.44169124960899353,
      "learning_rate": 2.4854372212063782e-05,
      "loss": 0.8691,
      "step": 4879
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.32973748445510864,
      "learning_rate": 2.482027943693064e-05,
      "loss": 0.8313,
      "step": 4880
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.2824920117855072,
      "learning_rate": 2.4786206747131157e-05,
      "loss": 0.8182,
      "step": 4881
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.3475906550884247,
      "learning_rate": 2.475215415176837e-05,
      "loss": 0.8874,
      "step": 4882
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.20139411091804504,
      "learning_rate": 2.4718121659939917e-05,
      "loss": 0.9901,
      "step": 4883
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.24892795085906982,
      "learning_rate": 2.46841092807381e-05,
      "loss": 0.999,
      "step": 4884
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.45212966203689575,
      "learning_rate": 2.4650117023249743e-05,
      "loss": 0.6704,
      "step": 4885
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.29619300365448,
      "learning_rate": 2.4616144896556382e-05,
      "loss": 0.6859,
      "step": 4886
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.16620661318302155,
      "learning_rate": 2.4582192909734203e-05,
      "loss": 0.7435,
      "step": 4887
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.30989912152290344,
      "learning_rate": 2.4548261071853883e-05,
      "loss": 0.8111,
      "step": 4888
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.16960376501083374,
      "learning_rate": 2.451434939198085e-05,
      "loss": 0.6803,
      "step": 4889
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.2814215123653412,
      "learning_rate": 2.4480457879175113e-05,
      "loss": 0.9125,
      "step": 4890
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.30610135197639465,
      "learning_rate": 2.44465865424912e-05,
      "loss": 1.0453,
      "step": 4891
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.46074044704437256,
      "learning_rate": 2.4412735390978357e-05,
      "loss": 1.0929,
      "step": 4892
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.17609046399593353,
      "learning_rate": 2.4378904433680417e-05,
      "loss": 0.9217,
      "step": 4893
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.22652891278266907,
      "learning_rate": 2.434509367963582e-05,
      "loss": 0.7829,
      "step": 4894
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.24449104070663452,
      "learning_rate": 2.4311303137877526e-05,
      "loss": 0.8513,
      "step": 4895
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.1220296248793602,
      "learning_rate": 2.42775328174332e-05,
      "loss": 0.63,
      "step": 4896
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.28876954317092896,
      "learning_rate": 2.424378272732508e-05,
      "loss": 0.8734,
      "step": 4897
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.23534038662910461,
      "learning_rate": 2.4210052876569944e-05,
      "loss": 0.7724,
      "step": 4898
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.19929039478302002,
      "learning_rate": 2.4176343274179224e-05,
      "loss": 0.9847,
      "step": 4899
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.21964481472969055,
      "learning_rate": 2.4142653929158944e-05,
      "loss": 1.054,
      "step": 4900
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.8311096429824829,
      "learning_rate": 2.410898485050961e-05,
      "loss": 0.9413,
      "step": 4901
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.3009783625602722,
      "learning_rate": 2.4075336047226503e-05,
      "loss": 0.609,
      "step": 4902
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.30480673909187317,
      "learning_rate": 2.404170752829934e-05,
      "loss": 0.6362,
      "step": 4903
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.32882997393608093,
      "learning_rate": 2.4008099302712416e-05,
      "loss": 0.7362,
      "step": 4904
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.2084866464138031,
      "learning_rate": 2.3974511379444688e-05,
      "loss": 0.6719,
      "step": 4905
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.3838040828704834,
      "learning_rate": 2.3940943767469625e-05,
      "loss": 0.8982,
      "step": 4906
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.3786190152168274,
      "learning_rate": 2.390739647575535e-05,
      "loss": 0.8606,
      "step": 4907
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.27102532982826233,
      "learning_rate": 2.3873869513264436e-05,
      "loss": 0.8972,
      "step": 4908
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.2777271866798401,
      "learning_rate": 2.3840362888954104e-05,
      "loss": 0.7466,
      "step": 4909
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.28201982378959656,
      "learning_rate": 2.3806876611776165e-05,
      "loss": 0.7989,
      "step": 4910
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.21426619589328766,
      "learning_rate": 2.3773410690676858e-05,
      "loss": 0.766,
      "step": 4911
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.3116530478000641,
      "learning_rate": 2.373996513459721e-05,
      "loss": 0.9556,
      "step": 4912
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.39805909991264343,
      "learning_rate": 2.3706539952472616e-05,
      "loss": 0.8901,
      "step": 4913
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.213626429438591,
      "learning_rate": 2.367313515323304e-05,
      "loss": 0.7786,
      "step": 4914
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.5389255285263062,
      "learning_rate": 2.3639750745803145e-05,
      "loss": 0.8349,
      "step": 4915
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.2179786115884781,
      "learning_rate": 2.3606386739101983e-05,
      "loss": 0.6596,
      "step": 4916
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.27120259404182434,
      "learning_rate": 2.3573043142043238e-05,
      "loss": 0.9497,
      "step": 4917
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.39120447635650635,
      "learning_rate": 2.3539719963535166e-05,
      "loss": 0.7494,
      "step": 4918
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.5490342974662781,
      "learning_rate": 2.350641721248047e-05,
      "loss": 0.7411,
      "step": 4919
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6808369159698486,
      "learning_rate": 2.3473134897776516e-05,
      "loss": 0.8507,
      "step": 4920
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.3501843214035034,
      "learning_rate": 2.3439873028315083e-05,
      "loss": 0.5904,
      "step": 4921
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.20296482741832733,
      "learning_rate": 2.3406631612982587e-05,
      "loss": 0.8097,
      "step": 4922
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.3078536093235016,
      "learning_rate": 2.3373410660659966e-05,
      "loss": 0.8384,
      "step": 4923
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.20288079977035522,
      "learning_rate": 2.33402101802226e-05,
      "loss": 0.7705,
      "step": 4924
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.3199770152568817,
      "learning_rate": 2.3307030180540568e-05,
      "loss": 0.9161,
      "step": 4925
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.2673113942146301,
      "learning_rate": 2.3273870670478313e-05,
      "loss": 0.7575,
      "step": 4926
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.5413228869438171,
      "learning_rate": 2.324073165889489e-05,
      "loss": 0.8184,
      "step": 4927
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.22991852462291718,
      "learning_rate": 2.3207613154643883e-05,
      "loss": 0.9286,
      "step": 4928
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.4097607731819153,
      "learning_rate": 2.3174515166573306e-05,
      "loss": 0.7867,
      "step": 4929
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.34780561923980713,
      "learning_rate": 2.3141437703525816e-05,
      "loss": 0.9345,
      "step": 4930
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.24358166754245758,
      "learning_rate": 2.3108380774338532e-05,
      "loss": 0.8204,
      "step": 4931
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6811206936836243,
      "learning_rate": 2.3075344387843033e-05,
      "loss": 0.8259,
      "step": 4932
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6162370443344116,
      "learning_rate": 2.3042328552865487e-05,
      "loss": 0.774,
      "step": 4933
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.8103727698326111,
      "learning_rate": 2.3009333278226563e-05,
      "loss": 0.7762,
      "step": 4934
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.38823363184928894,
      "learning_rate": 2.297635857274142e-05,
      "loss": 0.8533,
      "step": 4935
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.32520803809165955,
      "learning_rate": 2.2943404445219706e-05,
      "loss": 0.7438,
      "step": 4936
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.21482089161872864,
      "learning_rate": 2.291047090446553e-05,
      "loss": 0.6252,
      "step": 4937
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.2549033761024475,
      "learning_rate": 2.2877557959277672e-05,
      "loss": 0.7479,
      "step": 4938
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.22055624425411224,
      "learning_rate": 2.28446656184492e-05,
      "loss": 0.8759,
      "step": 4939
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.3036395311355591,
      "learning_rate": 2.2811793890767808e-05,
      "loss": 0.8662,
      "step": 4940
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.42443665862083435,
      "learning_rate": 2.2778942785015676e-05,
      "loss": 0.9083,
      "step": 4941
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.4703480005264282,
      "learning_rate": 2.274611230996937e-05,
      "loss": 0.6395,
      "step": 4942
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.2653079628944397,
      "learning_rate": 2.2713302474400078e-05,
      "loss": 0.7368,
      "step": 4943
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.3980872929096222,
      "learning_rate": 2.268051328707341e-05,
      "loss": 0.9998,
      "step": 4944
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.20599886775016785,
      "learning_rate": 2.2647744756749444e-05,
      "loss": 0.7596,
      "step": 4945
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.62261563539505,
      "learning_rate": 2.261499689218276e-05,
      "loss": 0.7597,
      "step": 4946
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.20055076479911804,
      "learning_rate": 2.2582269702122426e-05,
      "loss": 0.8942,
      "step": 4947
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.2848547101020813,
      "learning_rate": 2.254956319531201e-05,
      "loss": 0.8231,
      "step": 4948
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.3058386445045471,
      "learning_rate": 2.2516877380489455e-05,
      "loss": 0.8491,
      "step": 4949
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.4913463592529297,
      "learning_rate": 2.248421226638727e-05,
      "loss": 0.8427,
      "step": 4950
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.3010297119617462,
      "learning_rate": 2.245156786173246e-05,
      "loss": 0.8618,
      "step": 4951
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.18361248075962067,
      "learning_rate": 2.2418944175246346e-05,
      "loss": 0.8668,
      "step": 4952
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.30002784729003906,
      "learning_rate": 2.238634121564487e-05,
      "loss": 0.914,
      "step": 4953
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.3734686076641083,
      "learning_rate": 2.2353758991638396e-05,
      "loss": 0.8363,
      "step": 4954
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.20711128413677216,
      "learning_rate": 2.2321197511931667e-05,
      "loss": 0.7977,
      "step": 4955
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.1996450424194336,
      "learning_rate": 2.2288656785223983e-05,
      "loss": 1.1004,
      "step": 4956
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.5606786012649536,
      "learning_rate": 2.225613682020905e-05,
      "loss": 0.8136,
      "step": 4957
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.30555376410484314,
      "learning_rate": 2.222363762557509e-05,
      "loss": 0.8303,
      "step": 4958
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.3634406328201294,
      "learning_rate": 2.219115921000465e-05,
      "loss": 0.9651,
      "step": 4959
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.28387004137039185,
      "learning_rate": 2.2158701582174833e-05,
      "loss": 0.8235,
      "step": 4960
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.3161092698574066,
      "learning_rate": 2.2126264750757197e-05,
      "loss": 0.6288,
      "step": 4961
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.23776930570602417,
      "learning_rate": 2.2093848724417643e-05,
      "loss": 0.9078,
      "step": 4962
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.17578555643558502,
      "learning_rate": 2.206145351181659e-05,
      "loss": 0.7417,
      "step": 4963
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.3820595145225525,
      "learning_rate": 2.202907912160892e-05,
      "loss": 0.8648,
      "step": 4964
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.36584293842315674,
      "learning_rate": 2.1996725562443833e-05,
      "loss": 0.6386,
      "step": 4965
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.29274094104766846,
      "learning_rate": 2.196439284296513e-05,
      "loss": 0.8945,
      "step": 4966
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.2646094560623169,
      "learning_rate": 2.193208097181094e-05,
      "loss": 1.0698,
      "step": 4967
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.3005093038082123,
      "learning_rate": 2.189978995761378e-05,
      "loss": 0.6324,
      "step": 4968
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.2242826372385025,
      "learning_rate": 2.1867519809000703e-05,
      "loss": 0.5297,
      "step": 4969
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.26500260829925537,
      "learning_rate": 2.183527053459312e-05,
      "loss": 0.8543,
      "step": 4970
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.12846976518630981,
      "learning_rate": 2.1803042143006937e-05,
      "loss": 0.6575,
      "step": 4971
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.25097209215164185,
      "learning_rate": 2.1770834642852343e-05,
      "loss": 0.8603,
      "step": 4972
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.30343249440193176,
      "learning_rate": 2.173864804273409e-05,
      "loss": 0.8073,
      "step": 4973
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7238214612007141,
      "learning_rate": 2.170648235125129e-05,
      "loss": 0.7691,
      "step": 4974
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.2963162958621979,
      "learning_rate": 2.1674337576997385e-05,
      "loss": 0.8677,
      "step": 4975
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.26743581891059875,
      "learning_rate": 2.1642213728560434e-05,
      "loss": 0.8516,
      "step": 4976
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.37149950861930847,
      "learning_rate": 2.161011081452272e-05,
      "loss": 0.965,
      "step": 4977
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.3321375250816345,
      "learning_rate": 2.157802884346093e-05,
      "loss": 0.819,
      "step": 4978
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.2704627811908722,
      "learning_rate": 2.1545967823946346e-05,
      "loss": 0.7291,
      "step": 4979
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.30249401926994324,
      "learning_rate": 2.1513927764544428e-05,
      "loss": 0.9539,
      "step": 4980
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.24757900834083557,
      "learning_rate": 2.1481908673815164e-05,
      "loss": 0.7704,
      "step": 4981
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.42187440395355225,
      "learning_rate": 2.1449910560312945e-05,
      "loss": 0.8455,
      "step": 4982
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.39759165048599243,
      "learning_rate": 2.141793343258647e-05,
      "loss": 1.0338,
      "step": 4983
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.33515068888664246,
      "learning_rate": 2.1385977299178927e-05,
      "loss": 1.0465,
      "step": 4984
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.49531129002571106,
      "learning_rate": 2.1354042168627818e-05,
      "loss": 0.544,
      "step": 4985
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.1694357991218567,
      "learning_rate": 2.1322128049465074e-05,
      "loss": 0.7776,
      "step": 4986
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.24304035305976868,
      "learning_rate": 2.129023495021705e-05,
      "loss": 0.9103,
      "step": 4987
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6999689936637878,
      "learning_rate": 2.1258362879404336e-05,
      "loss": 0.5688,
      "step": 4988
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.21464447677135468,
      "learning_rate": 2.1226511845542153e-05,
      "loss": 1.0228,
      "step": 4989
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.21609891951084137,
      "learning_rate": 2.1194681857139853e-05,
      "loss": 1.0188,
      "step": 4990
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.37093672156333923,
      "learning_rate": 2.1162872922701304e-05,
      "loss": 0.7982,
      "step": 4991
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6092144846916199,
      "learning_rate": 2.113108505072474e-05,
      "loss": 0.7862,
      "step": 4992
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.21090777218341827,
      "learning_rate": 2.1099318249702703e-05,
      "loss": 0.8326,
      "step": 4993
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.23470479249954224,
      "learning_rate": 2.106757252812216e-05,
      "loss": 0.7784,
      "step": 4994
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.29048773646354675,
      "learning_rate": 2.1035847894464466e-05,
      "loss": 0.6359,
      "step": 4995
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.431904137134552,
      "learning_rate": 2.1004144357205247e-05,
      "loss": 1.1477,
      "step": 4996
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.39068084955215454,
      "learning_rate": 2.09724619248146e-05,
      "loss": 0.8746,
      "step": 4997
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.330130010843277,
      "learning_rate": 2.0940800605756918e-05,
      "loss": 0.8785,
      "step": 4998
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.22416876256465912,
      "learning_rate": 2.0909160408491014e-05,
      "loss": 0.7706,
      "step": 4999
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.18705667555332184,
      "learning_rate": 2.0877541341469976e-05,
      "loss": 0.5548,
      "step": 5000
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.16707345843315125,
      "learning_rate": 2.0845943413141256e-05,
      "loss": 0.8446,
      "step": 5001
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.24798786640167236,
      "learning_rate": 2.081436663194678e-05,
      "loss": 0.8282,
      "step": 5002
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.26623404026031494,
      "learning_rate": 2.0782811006322668e-05,
      "loss": 0.7481,
      "step": 5003
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.24649417400360107,
      "learning_rate": 2.075127654469946e-05,
      "loss": 0.7717,
      "step": 5004
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.2743172347545624,
      "learning_rate": 2.07197632555021e-05,
      "loss": 0.7278,
      "step": 5005
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.40982210636138916,
      "learning_rate": 2.068827114714972e-05,
      "loss": 0.8927,
      "step": 5006
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.39964497089385986,
      "learning_rate": 2.065680022805594e-05,
      "loss": 0.604,
      "step": 5007
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.4288814663887024,
      "learning_rate": 2.0625350506628672e-05,
      "loss": 0.9657,
      "step": 5008
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.21206751465797424,
      "learning_rate": 2.0593921991270116e-05,
      "loss": 0.8546,
      "step": 5009
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.17321732640266418,
      "learning_rate": 2.0562514690376877e-05,
      "loss": 0.7824,
      "step": 5010
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.3138575255870819,
      "learning_rate": 2.0531128612339857e-05,
      "loss": 0.9156,
      "step": 5011
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.2841512858867645,
      "learning_rate": 2.049976376554431e-05,
      "loss": 0.7392,
      "step": 5012
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.28130924701690674,
      "learning_rate": 2.046842015836977e-05,
      "loss": 0.8165,
      "step": 5013
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6424183249473572,
      "learning_rate": 2.043709779919014e-05,
      "loss": 0.8755,
      "step": 5014
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.23349623382091522,
      "learning_rate": 2.040579669637366e-05,
      "loss": 0.8647,
      "step": 5015
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.26198917627334595,
      "learning_rate": 2.0374516858282822e-05,
      "loss": 0.9488,
      "step": 5016
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.3003259301185608,
      "learning_rate": 2.0343258293274513e-05,
      "loss": 1.0909,
      "step": 5017
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.15631741285324097,
      "learning_rate": 2.0312021009699912e-05,
      "loss": 0.742,
      "step": 5018
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.17593587934970856,
      "learning_rate": 2.0280805015904457e-05,
      "loss": 0.998,
      "step": 5019
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.2918497622013092,
      "learning_rate": 2.0249610320227976e-05,
      "loss": 0.8905,
      "step": 5020
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.23410841822624207,
      "learning_rate": 2.021843693100458e-05,
      "loss": 0.8981,
      "step": 5021
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.33630844950675964,
      "learning_rate": 2.0187284856562703e-05,
      "loss": 0.9519,
      "step": 5022
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.3214268684387207,
      "learning_rate": 2.0156154105225022e-05,
      "loss": 1.0171,
      "step": 5023
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.26216381788253784,
      "learning_rate": 2.012504468530857e-05,
      "loss": 0.7322,
      "step": 5024
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6180487275123596,
      "learning_rate": 2.009395660512472e-05,
      "loss": 0.6741,
      "step": 5025
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.19734065234661102,
      "learning_rate": 2.006288987297903e-05,
      "loss": 0.7685,
      "step": 5026
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.2557995319366455,
      "learning_rate": 2.0031844497171447e-05,
      "loss": 0.9541,
      "step": 5027
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.2683420181274414,
      "learning_rate": 2.000082048599622e-05,
      "loss": 0.715,
      "step": 5028
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.16220325231552124,
      "learning_rate": 1.9969817847741758e-05,
      "loss": 0.7848,
      "step": 5029
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.25103017687797546,
      "learning_rate": 1.993883659069099e-05,
      "loss": 0.761,
      "step": 5030
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.18549734354019165,
      "learning_rate": 1.99078767231209e-05,
      "loss": 0.8482,
      "step": 5031
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.26904168725013733,
      "learning_rate": 1.9876938253302913e-05,
      "loss": 0.697,
      "step": 5032
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.24009130895137787,
      "learning_rate": 1.984602118950264e-05,
      "loss": 0.8785,
      "step": 5033
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.2161477953195572,
      "learning_rate": 1.981512553998003e-05,
      "loss": 0.9451,
      "step": 5034
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.628824770450592,
      "learning_rate": 1.9784251312989332e-05,
      "loss": 0.854,
      "step": 5035
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.280764102935791,
      "learning_rate": 1.9753398516778976e-05,
      "loss": 0.8973,
      "step": 5036
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.27525192499160767,
      "learning_rate": 1.9722567159591754e-05,
      "loss": 0.9688,
      "step": 5037
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.40597212314605713,
      "learning_rate": 1.9691757249664732e-05,
      "loss": 0.8355,
      "step": 5038
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.33017396926879883,
      "learning_rate": 1.966096879522914e-05,
      "loss": 1.0814,
      "step": 5039
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.32181036472320557,
      "learning_rate": 1.963020180451065e-05,
      "loss": 0.7585,
      "step": 5040
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.47278255224227905,
      "learning_rate": 1.9599456285729056e-05,
      "loss": 0.5547,
      "step": 5041
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.3082839548587799,
      "learning_rate": 1.9568732247098398e-05,
      "loss": 0.5534,
      "step": 5042
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.380717396736145,
      "learning_rate": 1.9538029696827152e-05,
      "loss": 1.0334,
      "step": 5043
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.23470205068588257,
      "learning_rate": 1.9507348643117873e-05,
      "loss": 0.8695,
      "step": 5044
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.34301063418388367,
      "learning_rate": 1.9476689094167454e-05,
      "loss": 1.0783,
      "step": 5045
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.28543248772621155,
      "learning_rate": 1.944605105816706e-05,
      "loss": 0.9805,
      "step": 5046
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.21298320591449738,
      "learning_rate": 1.9415434543302037e-05,
      "loss": 0.9683,
      "step": 5047
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.6692608594894409,
      "learning_rate": 1.9384839557752054e-05,
      "loss": 1.1004,
      "step": 5048
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.20520475506782532,
      "learning_rate": 1.9354266109690966e-05,
      "loss": 0.75,
      "step": 5049
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.2505548298358917,
      "learning_rate": 1.9323714207286924e-05,
      "loss": 0.8025,
      "step": 5050
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.256397008895874,
      "learning_rate": 1.9293183858702334e-05,
      "loss": 0.7316,
      "step": 5051
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.21006616950035095,
      "learning_rate": 1.9262675072093718e-05,
      "loss": 0.6114,
      "step": 5052
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.4538816213607788,
      "learning_rate": 1.923218785561206e-05,
      "loss": 0.8478,
      "step": 5053
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.3021281361579895,
      "learning_rate": 1.9201722217402363e-05,
      "loss": 0.9132,
      "step": 5054
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.2892903983592987,
      "learning_rate": 1.917127816560399e-05,
      "loss": 0.7833,
      "step": 5055
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.23720002174377441,
      "learning_rate": 1.9140855708350525e-05,
      "loss": 1.0643,
      "step": 5056
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.3889181911945343,
      "learning_rate": 1.911045485376971e-05,
      "loss": 0.952,
      "step": 5057
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.30867862701416016,
      "learning_rate": 1.9080075609983593e-05,
      "loss": 0.7293,
      "step": 5058
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.6594305038452148,
      "learning_rate": 1.904971798510844e-05,
      "loss": 0.5898,
      "step": 5059
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.29457706212997437,
      "learning_rate": 1.9019381987254682e-05,
      "loss": 0.7263,
      "step": 5060
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.2698060870170593,
      "learning_rate": 1.8989067624527035e-05,
      "loss": 0.7383,
      "step": 5061
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.390116810798645,
      "learning_rate": 1.8958774905024412e-05,
      "loss": 0.7985,
      "step": 5062
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.186747208237648,
      "learning_rate": 1.892850383683997e-05,
      "loss": 0.7792,
      "step": 5063
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.2199368178844452,
      "learning_rate": 1.8898254428061045e-05,
      "loss": 0.6454,
      "step": 5064
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.5085387229919434,
      "learning_rate": 1.8868026686769124e-05,
      "loss": 0.6813,
      "step": 5065
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.47460806369781494,
      "learning_rate": 1.883782062104008e-05,
      "loss": 0.8038,
      "step": 5066
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.1798868179321289,
      "learning_rate": 1.880763623894385e-05,
      "loss": 0.6926,
      "step": 5067
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.18087169528007507,
      "learning_rate": 1.8777473548544612e-05,
      "loss": 0.5482,
      "step": 5068
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.30587464570999146,
      "learning_rate": 1.87473325579008e-05,
      "loss": 0.7857,
      "step": 5069
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.2280321568250656,
      "learning_rate": 1.8717213275064947e-05,
      "loss": 0.8239,
      "step": 5070
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.33124950528144836,
      "learning_rate": 1.868711570808389e-05,
      "loss": 0.6514,
      "step": 5071
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.39556461572647095,
      "learning_rate": 1.865703986499864e-05,
      "loss": 0.7542,
      "step": 5072
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.17900477349758148,
      "learning_rate": 1.862698575384434e-05,
      "loss": 0.7444,
      "step": 5073
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.19522689282894135,
      "learning_rate": 1.8596953382650396e-05,
      "loss": 0.7005,
      "step": 5074
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.1757408082485199,
      "learning_rate": 1.8566942759440374e-05,
      "loss": 0.8472,
      "step": 5075
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.38555464148521423,
      "learning_rate": 1.853695389223209e-05,
      "loss": 0.8267,
      "step": 5076
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.18089336156845093,
      "learning_rate": 1.8506986789037427e-05,
      "loss": 0.5758,
      "step": 5077
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.4506857097148895,
      "learning_rate": 1.8477041457862553e-05,
      "loss": 0.917,
      "step": 5078
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.5829901099205017,
      "learning_rate": 1.8447117906707823e-05,
      "loss": 0.8766,
      "step": 5079
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.3557930588722229,
      "learning_rate": 1.8417216143567674e-05,
      "loss": 0.7954,
      "step": 5080
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.3130500912666321,
      "learning_rate": 1.8387336176430825e-05,
      "loss": 0.7899,
      "step": 5081
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.3188861608505249,
      "learning_rate": 1.835747801328016e-05,
      "loss": 0.642,
      "step": 5082
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.2584910988807678,
      "learning_rate": 1.8327641662092654e-05,
      "loss": 0.9181,
      "step": 5083
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.15433207154273987,
      "learning_rate": 1.829782713083955e-05,
      "loss": 0.7621,
      "step": 5084
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.26217547059059143,
      "learning_rate": 1.8268034427486224e-05,
      "loss": 1.0669,
      "step": 5085
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.23407568037509918,
      "learning_rate": 1.823826355999223e-05,
      "loss": 0.938,
      "step": 5086
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7899132370948792,
      "learning_rate": 1.820851453631125e-05,
      "loss": 0.8144,
      "step": 5087
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.13802136480808258,
      "learning_rate": 1.817878736439117e-05,
      "loss": 0.6648,
      "step": 5088
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.20150184631347656,
      "learning_rate": 1.814908205217407e-05,
      "loss": 0.6495,
      "step": 5089
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.35145652294158936,
      "learning_rate": 1.8119398607596072e-05,
      "loss": 0.8283,
      "step": 5090
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.6208770871162415,
      "learning_rate": 1.8089737038587584e-05,
      "loss": 0.8434,
      "step": 5091
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.6849430799484253,
      "learning_rate": 1.8060097353073113e-05,
      "loss": 1.257,
      "step": 5092
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.22470468282699585,
      "learning_rate": 1.803047955897127e-05,
      "loss": 0.8868,
      "step": 5093
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.15812711417675018,
      "learning_rate": 1.8000883664194966e-05,
      "loss": 0.6837,
      "step": 5094
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.24952611327171326,
      "learning_rate": 1.7971309676651094e-05,
      "loss": 0.7655,
      "step": 5095
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.23189619183540344,
      "learning_rate": 1.7941757604240793e-05,
      "loss": 0.8003,
      "step": 5096
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.33880001306533813,
      "learning_rate": 1.791222745485931e-05,
      "loss": 0.872,
      "step": 5097
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.2410232275724411,
      "learning_rate": 1.788271923639604e-05,
      "loss": 0.8948,
      "step": 5098
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.19986771047115326,
      "learning_rate": 1.7853232956734557e-05,
      "loss": 0.8965,
      "step": 5099
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.15458135306835175,
      "learning_rate": 1.782376862375249e-05,
      "loss": 0.9087,
      "step": 5100
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.20558030903339386,
      "learning_rate": 1.779432624532168e-05,
      "loss": 0.9442,
      "step": 5101
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.38633596897125244,
      "learning_rate": 1.776490582930811e-05,
      "loss": 0.9861,
      "step": 5102
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.24852165579795837,
      "learning_rate": 1.773550738357178e-05,
      "loss": 0.6497,
      "step": 5103
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.20859134197235107,
      "learning_rate": 1.770613091596701e-05,
      "loss": 0.6139,
      "step": 5104
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.33115339279174805,
      "learning_rate": 1.767677643434209e-05,
      "loss": 0.7463,
      "step": 5105
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.25865206122398376,
      "learning_rate": 1.7647443946539445e-05,
      "loss": 0.9087,
      "step": 5106
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.2364329844713211,
      "learning_rate": 1.7618133460395746e-05,
      "loss": 0.8767,
      "step": 5107
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.6218249201774597,
      "learning_rate": 1.758884498374165e-05,
      "loss": 0.7174,
      "step": 5108
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.3181113302707672,
      "learning_rate": 1.755957852440202e-05,
      "loss": 1.0652,
      "step": 5109
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.3535342514514923,
      "learning_rate": 1.753033409019581e-05,
      "loss": 0.9926,
      "step": 5110
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.3418084681034088,
      "learning_rate": 1.7501111688936054e-05,
      "loss": 0.8646,
      "step": 5111
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7035610675811768,
      "learning_rate": 1.747191132842999e-05,
      "loss": 0.8854,
      "step": 5112
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.30006933212280273,
      "learning_rate": 1.744273301647884e-05,
      "loss": 0.9429,
      "step": 5113
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2306012213230133,
      "learning_rate": 1.7413576760878048e-05,
      "loss": 0.7919,
      "step": 5114
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2587260901927948,
      "learning_rate": 1.7384442569417137e-05,
      "loss": 0.6568,
      "step": 5115
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2760818898677826,
      "learning_rate": 1.7355330449879658e-05,
      "loss": 0.7889,
      "step": 5116
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.15494491159915924,
      "learning_rate": 1.7326240410043415e-05,
      "loss": 0.8293,
      "step": 5117
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2246229499578476,
      "learning_rate": 1.7297172457680166e-05,
      "loss": 0.7833,
      "step": 5118
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.1764804869890213,
      "learning_rate": 1.7268126600555846e-05,
      "loss": 0.8559,
      "step": 5119
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.452332466840744,
      "learning_rate": 1.7239102846430488e-05,
      "loss": 0.7585,
      "step": 5120
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.3988686800003052,
      "learning_rate": 1.721010120305816e-05,
      "loss": 0.9592,
      "step": 5121
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.22855833172798157,
      "learning_rate": 1.7181121678187085e-05,
      "loss": 0.7209,
      "step": 5122
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.3252435326576233,
      "learning_rate": 1.7152164279559592e-05,
      "loss": 0.6988,
      "step": 5123
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2965250015258789,
      "learning_rate": 1.712322901491199e-05,
      "loss": 0.7322,
      "step": 5124
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.3628522753715515,
      "learning_rate": 1.70943158919748e-05,
      "loss": 0.7638,
      "step": 5125
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.43177416920661926,
      "learning_rate": 1.706542491847256e-05,
      "loss": 0.8831,
      "step": 5126
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.46616894006729126,
      "learning_rate": 1.7036556102123925e-05,
      "loss": 0.8282,
      "step": 5127
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.3727424740791321,
      "learning_rate": 1.7007709450641597e-05,
      "loss": 0.8538,
      "step": 5128
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.3345264792442322,
      "learning_rate": 1.6978884971732322e-05,
      "loss": 0.6186,
      "step": 5129
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.48204872012138367,
      "learning_rate": 1.6950082673097055e-05,
      "loss": 0.9295,
      "step": 5130
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.1561022251844406,
      "learning_rate": 1.6921302562430675e-05,
      "loss": 0.7885,
      "step": 5131
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.5595120191574097,
      "learning_rate": 1.6892544647422225e-05,
      "loss": 0.7188,
      "step": 5132
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2694258987903595,
      "learning_rate": 1.686380893575481e-05,
      "loss": 0.822,
      "step": 5133
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.5490002632141113,
      "learning_rate": 1.6835095435105552e-05,
      "loss": 0.6117,
      "step": 5134
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.3212796449661255,
      "learning_rate": 1.680640415314567e-05,
      "loss": 0.8802,
      "step": 5135
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.5267684459686279,
      "learning_rate": 1.677773509754047e-05,
      "loss": 1.0784,
      "step": 5136
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.25281262397766113,
      "learning_rate": 1.674908827594932e-05,
      "loss": 0.7933,
      "step": 5137
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.5043409466743469,
      "learning_rate": 1.672046369602557e-05,
      "loss": 0.7076,
      "step": 5138
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.24716724455356598,
      "learning_rate": 1.6691861365416717e-05,
      "loss": 0.6887,
      "step": 5139
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2598564922809601,
      "learning_rate": 1.66632812917643e-05,
      "loss": 0.8798,
      "step": 5140
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2866201400756836,
      "learning_rate": 1.663472348270384e-05,
      "loss": 0.6739,
      "step": 5141
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.6892355680465698,
      "learning_rate": 1.6606187945864994e-05,
      "loss": 1.0021,
      "step": 5142
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2381112277507782,
      "learning_rate": 1.657767468887147e-05,
      "loss": 0.7383,
      "step": 5143
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.1261584460735321,
      "learning_rate": 1.6549183719340922e-05,
      "loss": 0.626,
      "step": 5144
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.8526644706726074,
      "learning_rate": 1.652071504488516e-05,
      "loss": 0.8163,
      "step": 5145
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.19807153940200806,
      "learning_rate": 1.6492268673110012e-05,
      "loss": 0.6548,
      "step": 5146
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.244464710354805,
      "learning_rate": 1.6463844611615288e-05,
      "loss": 0.7667,
      "step": 5147
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.3387959897518158,
      "learning_rate": 1.643544286799491e-05,
      "loss": 0.7346,
      "step": 5148
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.19720929861068726,
      "learning_rate": 1.640706344983679e-05,
      "loss": 0.8929,
      "step": 5149
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.28531181812286377,
      "learning_rate": 1.637870636472293e-05,
      "loss": 0.8877,
      "step": 5150
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.47768262028694153,
      "learning_rate": 1.6350371620229277e-05,
      "loss": 0.8845,
      "step": 5151
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.6641738414764404,
      "learning_rate": 1.63220592239259e-05,
      "loss": 0.5565,
      "step": 5152
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.28065842390060425,
      "learning_rate": 1.629376918337686e-05,
      "loss": 0.7992,
      "step": 5153
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.24852287769317627,
      "learning_rate": 1.6265501506140223e-05,
      "loss": 0.5451,
      "step": 5154
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.23612752556800842,
      "learning_rate": 1.6237256199768103e-05,
      "loss": 0.6684,
      "step": 5155
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.19906209409236908,
      "learning_rate": 1.6209033271806663e-05,
      "loss": 0.6119,
      "step": 5156
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.29422566294670105,
      "learning_rate": 1.6180832729796e-05,
      "loss": 0.8964,
      "step": 5157
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2765655219554901,
      "learning_rate": 1.6152654581270386e-05,
      "loss": 0.8527,
      "step": 5158
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.3452732264995575,
      "learning_rate": 1.6124498833757927e-05,
      "loss": 0.7626,
      "step": 5159
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.31826111674308777,
      "learning_rate": 1.6096365494780908e-05,
      "loss": 0.8603,
      "step": 5160
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2559737265110016,
      "learning_rate": 1.6068254571855467e-05,
      "loss": 0.8892,
      "step": 5161
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.22198861837387085,
      "learning_rate": 1.604016607249189e-05,
      "loss": 1.0093,
      "step": 5162
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.20783297717571259,
      "learning_rate": 1.601210000419444e-05,
      "loss": 0.9479,
      "step": 5163
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.3006274700164795,
      "learning_rate": 1.598405637446131e-05,
      "loss": 0.7846,
      "step": 5164
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.4943687915802002,
      "learning_rate": 1.595603519078478e-05,
      "loss": 0.8251,
      "step": 5165
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.3049621284008026,
      "learning_rate": 1.592803646065113e-05,
      "loss": 0.7636,
      "step": 5166
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.27919602394104004,
      "learning_rate": 1.5900060191540568e-05,
      "loss": 0.7019,
      "step": 5167
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2514853775501251,
      "learning_rate": 1.5872106390927423e-05,
      "loss": 0.8855,
      "step": 5168
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2664444148540497,
      "learning_rate": 1.584417506627992e-05,
      "loss": 0.7538,
      "step": 5169
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2924547791481018,
      "learning_rate": 1.5816266225060262e-05,
      "loss": 0.9114,
      "step": 5170
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.32161474227905273,
      "learning_rate": 1.578837987472478e-05,
      "loss": 0.8798,
      "step": 5171
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.34656086564064026,
      "learning_rate": 1.5760516022723637e-05,
      "loss": 0.6094,
      "step": 5172
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.2457994967699051,
      "learning_rate": 1.5732674676501113e-05,
      "loss": 0.8833,
      "step": 5173
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.47021135687828064,
      "learning_rate": 1.570485584349538e-05,
      "loss": 0.742,
      "step": 5174
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.40836793184280396,
      "learning_rate": 1.567705953113865e-05,
      "loss": 0.8074,
      "step": 5175
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.24938540160655975,
      "learning_rate": 1.5649285746857124e-05,
      "loss": 0.8525,
      "step": 5176
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.1884380728006363,
      "learning_rate": 1.5621534498070924e-05,
      "loss": 0.6111,
      "step": 5177
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.2567780315876007,
      "learning_rate": 1.5593805792194205e-05,
      "loss": 0.754,
      "step": 5178
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.5479152202606201,
      "learning_rate": 1.5566099636635124e-05,
      "loss": 0.7967,
      "step": 5179
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.29188427329063416,
      "learning_rate": 1.553841603879569e-05,
      "loss": 0.6771,
      "step": 5180
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.13336582481861115,
      "learning_rate": 1.5510755006072085e-05,
      "loss": 0.9383,
      "step": 5181
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.30523961782455444,
      "learning_rate": 1.548311654585426e-05,
      "loss": 0.7028,
      "step": 5182
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7601490616798401,
      "learning_rate": 1.5455500665526246e-05,
      "loss": 1.0146,
      "step": 5183
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.3864254057407379,
      "learning_rate": 1.5427907372466056e-05,
      "loss": 0.806,
      "step": 5184
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.28673696517944336,
      "learning_rate": 1.5400336674045567e-05,
      "loss": 0.8217,
      "step": 5185
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.23444518446922302,
      "learning_rate": 1.5372788577630727e-05,
      "loss": 0.935,
      "step": 5186
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.4126487076282501,
      "learning_rate": 1.5345263090581418e-05,
      "loss": 0.7896,
      "step": 5187
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.3266940712928772,
      "learning_rate": 1.531776022025142e-05,
      "loss": 0.809,
      "step": 5188
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.30416855216026306,
      "learning_rate": 1.5290279973988554e-05,
      "loss": 0.7868,
      "step": 5189
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.867695689201355,
      "learning_rate": 1.5262822359134498e-05,
      "loss": 0.8384,
      "step": 5190
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.18515881896018982,
      "learning_rate": 1.5235387383025035e-05,
      "loss": 0.7279,
      "step": 5191
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.34868085384368896,
      "learning_rate": 1.5207975052989743e-05,
      "loss": 0.8285,
      "step": 5192
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.21266944706439972,
      "learning_rate": 1.5180585376352241e-05,
      "loss": 0.8982,
      "step": 5193
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.3715643584728241,
      "learning_rate": 1.5153218360430088e-05,
      "loss": 1.1988,
      "step": 5194
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.5008381605148315,
      "learning_rate": 1.5125874012534735e-05,
      "loss": 0.7936,
      "step": 5195
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.18841645121574402,
      "learning_rate": 1.5098552339971627e-05,
      "loss": 0.7263,
      "step": 5196
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.27988243103027344,
      "learning_rate": 1.5071253350040183e-05,
      "loss": 0.7407,
      "step": 5197
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.3949677050113678,
      "learning_rate": 1.5043977050033642e-05,
      "loss": 0.7838,
      "step": 5198
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.2513878047466278,
      "learning_rate": 1.5016723447239289e-05,
      "loss": 0.9083,
      "step": 5199
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.47620487213134766,
      "learning_rate": 1.4989492548938322e-05,
      "loss": 0.9304,
      "step": 5200
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.19346459209918976,
      "learning_rate": 1.4962284362405876e-05,
      "loss": 0.7962,
      "step": 5201
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.3277750611305237,
      "learning_rate": 1.4935098894910971e-05,
      "loss": 0.966,
      "step": 5202
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.3197561502456665,
      "learning_rate": 1.4907936153716607e-05,
      "loss": 0.8415,
      "step": 5203
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.20881159603595734,
      "learning_rate": 1.4880796146079713e-05,
      "loss": 0.6725,
      "step": 5204
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.34348779916763306,
      "learning_rate": 1.4853678879251099e-05,
      "loss": 1.0298,
      "step": 5205
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.2757747769355774,
      "learning_rate": 1.4826584360475538e-05,
      "loss": 0.8969,
      "step": 5206
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.2311715930700302,
      "learning_rate": 1.479951259699175e-05,
      "loss": 1.0652,
      "step": 5207
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.23977698385715485,
      "learning_rate": 1.4772463596032294e-05,
      "loss": 0.8063,
      "step": 5208
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.6050183773040771,
      "learning_rate": 1.4745437364823722e-05,
      "loss": 0.7543,
      "step": 5209
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.31780630350112915,
      "learning_rate": 1.4718433910586481e-05,
      "loss": 0.668,
      "step": 5210
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.31753793358802795,
      "learning_rate": 1.4691453240534914e-05,
      "loss": 0.801,
      "step": 5211
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.4529934525489807,
      "learning_rate": 1.4664495361877284e-05,
      "loss": 0.7973,
      "step": 5212
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.2372206449508667,
      "learning_rate": 1.4637560281815799e-05,
      "loss": 0.7304,
      "step": 5213
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.2868511378765106,
      "learning_rate": 1.4610648007546545e-05,
      "loss": 0.678,
      "step": 5214
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.24409839510917664,
      "learning_rate": 1.4583758546259496e-05,
      "loss": 0.8857,
      "step": 5215
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.20283648371696472,
      "learning_rate": 1.455689190513857e-05,
      "loss": 0.8218,
      "step": 5216
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.27644628286361694,
      "learning_rate": 1.4530048091361603e-05,
      "loss": 0.729,
      "step": 5217
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.6460586190223694,
      "learning_rate": 1.4503227112100237e-05,
      "loss": 0.7732,
      "step": 5218
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.36772215366363525,
      "learning_rate": 1.4476428974520117e-05,
      "loss": 0.8525,
      "step": 5219
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.2388809770345688,
      "learning_rate": 1.4449653685780772e-05,
      "loss": 1.2209,
      "step": 5220
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.22696538269519806,
      "learning_rate": 1.4422901253035548e-05,
      "loss": 0.9214,
      "step": 5221
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.25695472955703735,
      "learning_rate": 1.439617168343177e-05,
      "loss": 0.7001,
      "step": 5222
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.3694773018360138,
      "learning_rate": 1.4369464984110615e-05,
      "loss": 1.081,
      "step": 5223
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.21869488060474396,
      "learning_rate": 1.4342781162207186e-05,
      "loss": 0.7976,
      "step": 5224
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.2852613627910614,
      "learning_rate": 1.4316120224850393e-05,
      "loss": 0.8541,
      "step": 5225
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.5830526351928711,
      "learning_rate": 1.4289482179163127e-05,
      "loss": 0.9466,
      "step": 5226
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.2802093029022217,
      "learning_rate": 1.4262867032262118e-05,
      "loss": 0.8673,
      "step": 5227
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.3433513045310974,
      "learning_rate": 1.4236274791257964e-05,
      "loss": 0.717,
      "step": 5228
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.4059070944786072,
      "learning_rate": 1.4209705463255162e-05,
      "loss": 0.8244,
      "step": 5229
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.35097166895866394,
      "learning_rate": 1.4183159055352114e-05,
      "loss": 1.0121,
      "step": 5230
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.36312493681907654,
      "learning_rate": 1.4156635574641009e-05,
      "loss": 0.8325,
      "step": 5231
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.2078256607055664,
      "learning_rate": 1.4130135028208058e-05,
      "loss": 0.7709,
      "step": 5232
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.2139301598072052,
      "learning_rate": 1.4103657423133221e-05,
      "loss": 0.8448,
      "step": 5233
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.3309754729270935,
      "learning_rate": 1.4077202766490328e-05,
      "loss": 0.7856,
      "step": 5234
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.1942647248506546,
      "learning_rate": 1.4050771065347179e-05,
      "loss": 0.8961,
      "step": 5235
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.256453275680542,
      "learning_rate": 1.4024362326765338e-05,
      "loss": 0.7346,
      "step": 5236
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.26036423444747925,
      "learning_rate": 1.3997976557800318e-05,
      "loss": 0.8338,
      "step": 5237
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.20879383385181427,
      "learning_rate": 1.3971613765501412e-05,
      "loss": 0.8738,
      "step": 5238
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.5554811358451843,
      "learning_rate": 1.394527395691182e-05,
      "loss": 0.765,
      "step": 5239
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.3930339217185974,
      "learning_rate": 1.3918957139068633e-05,
      "loss": 0.8994,
      "step": 5240
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.366325706243515,
      "learning_rate": 1.3892663319002686e-05,
      "loss": 0.911,
      "step": 5241
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.22498919069766998,
      "learning_rate": 1.3866392503738857e-05,
      "loss": 1.0751,
      "step": 5242
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.2640080749988556,
      "learning_rate": 1.3840144700295699e-05,
      "loss": 0.7526,
      "step": 5243
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.2596164047718048,
      "learning_rate": 1.381391991568567e-05,
      "loss": 0.7021,
      "step": 5244
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.19258880615234375,
      "learning_rate": 1.3787718156915164e-05,
      "loss": 0.8263,
      "step": 5245
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.23817722499370575,
      "learning_rate": 1.3761539430984282e-05,
      "loss": 0.7623,
      "step": 5246
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.3003074526786804,
      "learning_rate": 1.3735383744887087e-05,
      "loss": 0.7108,
      "step": 5247
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.3966521620750427,
      "learning_rate": 1.3709251105611453e-05,
      "loss": 0.7179,
      "step": 5248
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.4924822449684143,
      "learning_rate": 1.3683141520139041e-05,
      "loss": 0.7558,
      "step": 5249
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.25907841324806213,
      "learning_rate": 1.3657054995445428e-05,
      "loss": 0.8084,
      "step": 5250
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.12904129922389984,
      "learning_rate": 1.3630991538500026e-05,
      "loss": 0.9257,
      "step": 5251
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.2319835126399994,
      "learning_rate": 1.3604951156265998e-05,
      "loss": 0.8396,
      "step": 5252
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.1447056084871292,
      "learning_rate": 1.357893385570046e-05,
      "loss": 0.6457,
      "step": 5253
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.28192755579948425,
      "learning_rate": 1.3552939643754237e-05,
      "loss": 0.9893,
      "step": 5254
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.23622360825538635,
      "learning_rate": 1.352696852737214e-05,
      "loss": 0.7926,
      "step": 5255
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.24374669790267944,
      "learning_rate": 1.3501020513492668e-05,
      "loss": 0.9553,
      "step": 5256
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.24048137664794922,
      "learning_rate": 1.3475095609048204e-05,
      "loss": 0.6979,
      "step": 5257
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.28218674659729004,
      "learning_rate": 1.3449193820964989e-05,
      "loss": 0.8575,
      "step": 5258
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.2788163721561432,
      "learning_rate": 1.3423315156163007e-05,
      "loss": 0.6848,
      "step": 5259
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.22404855489730835,
      "learning_rate": 1.339745962155613e-05,
      "loss": 0.8845,
      "step": 5260
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.6954174637794495,
      "learning_rate": 1.3371627224052074e-05,
      "loss": 0.6903,
      "step": 5261
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.2708692252635956,
      "learning_rate": 1.3345817970552265e-05,
      "loss": 0.9767,
      "step": 5262
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.5872067213058472,
      "learning_rate": 1.3320031867952055e-05,
      "loss": 0.8075,
      "step": 5263
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.3320034444332123,
      "learning_rate": 1.3294268923140552e-05,
      "loss": 0.8604,
      "step": 5264
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.24356430768966675,
      "learning_rate": 1.3268529143000718e-05,
      "loss": 0.9854,
      "step": 5265
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.2628888487815857,
      "learning_rate": 1.3242812534409277e-05,
      "loss": 0.8845,
      "step": 5266
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.21012075245380402,
      "learning_rate": 1.3217119104236785e-05,
      "loss": 0.6474,
      "step": 5267
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.19318343698978424,
      "learning_rate": 1.3191448859347644e-05,
      "loss": 0.5575,
      "step": 5268
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.3870421350002289,
      "learning_rate": 1.3165801806599975e-05,
      "loss": 1.0473,
      "step": 5269
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.3012453019618988,
      "learning_rate": 1.3140177952845777e-05,
      "loss": 0.5301,
      "step": 5270
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.25753065943717957,
      "learning_rate": 1.3114577304930853e-05,
      "loss": 0.9991,
      "step": 5271
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.3320007920265198,
      "learning_rate": 1.308899986969473e-05,
      "loss": 0.9766,
      "step": 5272
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.23815910518169403,
      "learning_rate": 1.3063445653970819e-05,
      "loss": 0.7841,
      "step": 5273
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.3054928779602051,
      "learning_rate": 1.3037914664586303e-05,
      "loss": 0.5243,
      "step": 5274
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.2512281835079193,
      "learning_rate": 1.3012406908362108e-05,
      "loss": 0.9347,
      "step": 5275
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.17689451575279236,
      "learning_rate": 1.2986922392113021e-05,
      "loss": 0.6443,
      "step": 5276
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.17744000256061554,
      "learning_rate": 1.29614611226476e-05,
      "loss": 0.6718,
      "step": 5277
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.4693145751953125,
      "learning_rate": 1.2936023106768192e-05,
      "loss": 0.795,
      "step": 5278
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.2674323618412018,
      "learning_rate": 1.291060835127088e-05,
      "loss": 0.6943,
      "step": 5279
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.3321923315525055,
      "learning_rate": 1.2885216862945615e-05,
      "loss": 0.9472,
      "step": 5280
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.43088120222091675,
      "learning_rate": 1.2859848648576112e-05,
      "loss": 0.6036,
      "step": 5281
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.6984078884124756,
      "learning_rate": 1.2834503714939794e-05,
      "loss": 0.9551,
      "step": 5282
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.3128681182861328,
      "learning_rate": 1.280918206880798e-05,
      "loss": 0.854,
      "step": 5283
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.3274467885494232,
      "learning_rate": 1.27838837169457e-05,
      "loss": 0.6728,
      "step": 5284
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.32977035641670227,
      "learning_rate": 1.2758608666111716e-05,
      "loss": 0.6756,
      "step": 5285
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.18490932881832123,
      "learning_rate": 1.273335692305866e-05,
      "loss": 0.7234,
      "step": 5286
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7123404145240784,
      "learning_rate": 1.270812849453289e-05,
      "loss": 0.847,
      "step": 5287
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.46946394443511963,
      "learning_rate": 1.268292338727456e-05,
      "loss": 1.0068,
      "step": 5288
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.20769204199314117,
      "learning_rate": 1.2657741608017537e-05,
      "loss": 0.812,
      "step": 5289
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.24338330328464508,
      "learning_rate": 1.2632583163489509e-05,
      "loss": 0.6977,
      "step": 5290
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.2646169364452362,
      "learning_rate": 1.260744806041193e-05,
      "loss": 0.6643,
      "step": 5291
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.2627338171005249,
      "learning_rate": 1.2582336305499964e-05,
      "loss": 0.9854,
      "step": 5292
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.31532543897628784,
      "learning_rate": 1.2557247905462589e-05,
      "loss": 0.8497,
      "step": 5293
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.3365263044834137,
      "learning_rate": 1.2532182867002551e-05,
      "loss": 0.71,
      "step": 5294
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.4534619450569153,
      "learning_rate": 1.2507141196816286e-05,
      "loss": 0.7367,
      "step": 5295
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.26748600602149963,
      "learning_rate": 1.2482122901594096e-05,
      "loss": 0.8148,
      "step": 5296
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.33925941586494446,
      "learning_rate": 1.2457127988019923e-05,
      "loss": 0.684,
      "step": 5297
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2086637020111084,
      "learning_rate": 1.2432156462771527e-05,
      "loss": 0.7394,
      "step": 5298
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2624996304512024,
      "learning_rate": 1.2407208332520436e-05,
      "loss": 0.8658,
      "step": 5299
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2482568323612213,
      "learning_rate": 1.2382283603931844e-05,
      "loss": 1.0058,
      "step": 5300
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.30769598484039307,
      "learning_rate": 1.2357382283664809e-05,
      "loss": 0.8767,
      "step": 5301
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.5423340201377869,
      "learning_rate": 1.2332504378372023e-05,
      "loss": 0.5714,
      "step": 5302
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.21141280233860016,
      "learning_rate": 1.230764989469999e-05,
      "loss": 0.6969,
      "step": 5303
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.3975145220756531,
      "learning_rate": 1.2282818839288957e-05,
      "loss": 0.9819,
      "step": 5304
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2981437146663666,
      "learning_rate": 1.2258011218772847e-05,
      "loss": 0.9789,
      "step": 5305
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.26101917028427124,
      "learning_rate": 1.2233227039779438e-05,
      "loss": 0.909,
      "step": 5306
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2767777144908905,
      "learning_rate": 1.2208466308930133e-05,
      "loss": 0.6688,
      "step": 5307
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.3838098347187042,
      "learning_rate": 1.2183729032840085e-05,
      "loss": 1.1167,
      "step": 5308
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.3666408360004425,
      "learning_rate": 1.2159015218118297e-05,
      "loss": 0.7245,
      "step": 5309
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.27144503593444824,
      "learning_rate": 1.213432487136733e-05,
      "loss": 0.8075,
      "step": 5310
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.5768581628799438,
      "learning_rate": 1.210965799918361e-05,
      "loss": 0.825,
      "step": 5311
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.1906348317861557,
      "learning_rate": 1.208501460815724e-05,
      "loss": 0.7063,
      "step": 5312
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.24960729479789734,
      "learning_rate": 1.2060394704872036e-05,
      "loss": 0.7605,
      "step": 5313
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.34096482396125793,
      "learning_rate": 1.2035798295905553e-05,
      "loss": 0.8769,
      "step": 5314
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.13168393075466156,
      "learning_rate": 1.2011225387829107e-05,
      "loss": 0.9165,
      "step": 5315
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.30755436420440674,
      "learning_rate": 1.1986675987207662e-05,
      "loss": 0.7379,
      "step": 5316
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.5963638424873352,
      "learning_rate": 1.1962150100599967e-05,
      "loss": 0.7844,
      "step": 5317
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.22173595428466797,
      "learning_rate": 1.1937647734558411e-05,
      "loss": 0.8505,
      "step": 5318
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.33042365312576294,
      "learning_rate": 1.1913168895629234e-05,
      "loss": 1.0659,
      "step": 5319
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2043309509754181,
      "learning_rate": 1.1888713590352241e-05,
      "loss": 0.6857,
      "step": 5320
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2407718300819397,
      "learning_rate": 1.1864281825261037e-05,
      "loss": 0.8396,
      "step": 5321
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.28031590580940247,
      "learning_rate": 1.1839873606882935e-05,
      "loss": 0.9278,
      "step": 5322
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.29472842812538147,
      "learning_rate": 1.1815488941738906e-05,
      "loss": 0.7402,
      "step": 5323
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.37504512071609497,
      "learning_rate": 1.1791127836343663e-05,
      "loss": 0.8294,
      "step": 5324
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.18520741164684296,
      "learning_rate": 1.1766790297205655e-05,
      "loss": 0.7161,
      "step": 5325
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.4716947674751282,
      "learning_rate": 1.1742476330826969e-05,
      "loss": 0.6483,
      "step": 5326
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.33269068598747253,
      "learning_rate": 1.171818594370343e-05,
      "loss": 0.6871,
      "step": 5327
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2758256793022156,
      "learning_rate": 1.1693919142324571e-05,
      "loss": 0.9016,
      "step": 5328
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2213318794965744,
      "learning_rate": 1.1669675933173641e-05,
      "loss": 0.7813,
      "step": 5329
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.3317961096763611,
      "learning_rate": 1.164545632272751e-05,
      "loss": 0.6513,
      "step": 5330
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.3951967656612396,
      "learning_rate": 1.1621260317456806e-05,
      "loss": 0.6724,
      "step": 5331
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.38621950149536133,
      "learning_rate": 1.1597087923825866e-05,
      "loss": 0.8798,
      "step": 5332
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2236240804195404,
      "learning_rate": 1.1572939148292649e-05,
      "loss": 0.9019,
      "step": 5333
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.22623153030872345,
      "learning_rate": 1.154881399730886e-05,
      "loss": 0.769,
      "step": 5334
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.39331960678100586,
      "learning_rate": 1.15247124773199e-05,
      "loss": 0.9139,
      "step": 5335
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.27840253710746765,
      "learning_rate": 1.1500634594764792e-05,
      "loss": 0.8494,
      "step": 5336
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2759804129600525,
      "learning_rate": 1.1476580356076317e-05,
      "loss": 0.8025,
      "step": 5337
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2551261782646179,
      "learning_rate": 1.1452549767680898e-05,
      "loss": 0.9253,
      "step": 5338
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2298641800880432,
      "learning_rate": 1.1428542835998646e-05,
      "loss": 1.0836,
      "step": 5339
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.502751886844635,
      "learning_rate": 1.1404559567443352e-05,
      "loss": 0.7725,
      "step": 5340
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.20915134251117706,
      "learning_rate": 1.1380599968422489e-05,
      "loss": 0.634,
      "step": 5341
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.18219879269599915,
      "learning_rate": 1.135666404533723e-05,
      "loss": 0.8766,
      "step": 5342
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2626306116580963,
      "learning_rate": 1.1332751804582353e-05,
      "loss": 0.8845,
      "step": 5343
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2845267653465271,
      "learning_rate": 1.1308863252546387e-05,
      "loss": 1.028,
      "step": 5344
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.21452324092388153,
      "learning_rate": 1.1284998395611513e-05,
      "loss": 0.7843,
      "step": 5345
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.39808887243270874,
      "learning_rate": 1.1261157240153497e-05,
      "loss": 0.8375,
      "step": 5346
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.40340688824653625,
      "learning_rate": 1.1237339792541934e-05,
      "loss": 0.7905,
      "step": 5347
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.32074031233787537,
      "learning_rate": 1.1213546059139945e-05,
      "loss": 0.8181,
      "step": 5348
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2292618602514267,
      "learning_rate": 1.1189776046304357e-05,
      "loss": 0.8536,
      "step": 5349
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.3355071246623993,
      "learning_rate": 1.1166029760385687e-05,
      "loss": 1.0329,
      "step": 5350
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2211468517780304,
      "learning_rate": 1.1142307207728087e-05,
      "loss": 0.8689,
      "step": 5351
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.4152647852897644,
      "learning_rate": 1.1118608394669394e-05,
      "loss": 0.8642,
      "step": 5352
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.45360198616981506,
      "learning_rate": 1.1094933327541045e-05,
      "loss": 1.0546,
      "step": 5353
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.1813628226518631,
      "learning_rate": 1.1071282012668205e-05,
      "loss": 0.6707,
      "step": 5354
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.8772658705711365,
      "learning_rate": 1.104765445636966e-05,
      "loss": 0.8995,
      "step": 5355
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.1827099472284317,
      "learning_rate": 1.102405066495782e-05,
      "loss": 0.8076,
      "step": 5356
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.2800893783569336,
      "learning_rate": 1.1000470644738781e-05,
      "loss": 0.7494,
      "step": 5357
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.20711493492126465,
      "learning_rate": 1.0976914402012318e-05,
      "loss": 0.5795,
      "step": 5358
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.2223018854856491,
      "learning_rate": 1.095338194307175e-05,
      "loss": 0.8181,
      "step": 5359
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.31360167264938354,
      "learning_rate": 1.0929873274204194e-05,
      "loss": 0.8682,
      "step": 5360
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.6433094143867493,
      "learning_rate": 1.0906388401690259e-05,
      "loss": 0.5326,
      "step": 5361
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.6344923377037048,
      "learning_rate": 1.0882927331804282e-05,
      "loss": 0.92,
      "step": 5362
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.20100882649421692,
      "learning_rate": 1.0859490070814237e-05,
      "loss": 0.8763,
      "step": 5363
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.4890459179878235,
      "learning_rate": 1.0836076624981705e-05,
      "loss": 0.8123,
      "step": 5364
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.2278340756893158,
      "learning_rate": 1.0812687000561939e-05,
      "loss": 0.7038,
      "step": 5365
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.18811222910881042,
      "learning_rate": 1.0789321203803782e-05,
      "loss": 0.747,
      "step": 5366
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.27613118290901184,
      "learning_rate": 1.0765979240949753e-05,
      "loss": 0.7307,
      "step": 5367
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.3723534345626831,
      "learning_rate": 1.074266111823602e-05,
      "loss": 0.5939,
      "step": 5368
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7304198145866394,
      "learning_rate": 1.0719366841892287e-05,
      "loss": 0.6314,
      "step": 5369
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.15103799104690552,
      "learning_rate": 1.069609641814202e-05,
      "loss": 0.8736,
      "step": 5370
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.5207322239875793,
      "learning_rate": 1.0672849853202216e-05,
      "loss": 0.818,
      "step": 5371
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.3695005476474762,
      "learning_rate": 1.0649627153283493e-05,
      "loss": 0.9434,
      "step": 5372
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.18975605070590973,
      "learning_rate": 1.0626428324590188e-05,
      "loss": 0.845,
      "step": 5373
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.403379887342453,
      "learning_rate": 1.0603253373320155e-05,
      "loss": 0.857,
      "step": 5374
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.4586794078350067,
      "learning_rate": 1.0580102305664918e-05,
      "loss": 0.6851,
      "step": 5375
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.2872927188873291,
      "learning_rate": 1.0556975127809642e-05,
      "loss": 1.0297,
      "step": 5376
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.23494884371757507,
      "learning_rate": 1.0533871845933053e-05,
      "loss": 0.7567,
      "step": 5377
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.3141128122806549,
      "learning_rate": 1.0510792466207531e-05,
      "loss": 1.0262,
      "step": 5378
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.23163321614265442,
      "learning_rate": 1.048773699479907e-05,
      "loss": 0.8399,
      "step": 5379
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.36350494623184204,
      "learning_rate": 1.046470543786725e-05,
      "loss": 0.8162,
      "step": 5380
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7822958827018738,
      "learning_rate": 1.0441697801565308e-05,
      "loss": 0.6918,
      "step": 5381
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.2778388261795044,
      "learning_rate": 1.0418714092039993e-05,
      "loss": 0.822,
      "step": 5382
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.3899458944797516,
      "learning_rate": 1.0395754315431838e-05,
      "loss": 0.9249,
      "step": 5383
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.23222902417182922,
      "learning_rate": 1.037281847787478e-05,
      "loss": 0.9389,
      "step": 5384
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.40883636474609375,
      "learning_rate": 1.0349906585496506e-05,
      "loss": 0.9088,
      "step": 5385
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.25152891874313354,
      "learning_rate": 1.0327018644418252e-05,
      "loss": 1.0241,
      "step": 5386
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.1388360559940338,
      "learning_rate": 1.0304154660754828e-05,
      "loss": 0.7055,
      "step": 5387
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.25939181447029114,
      "learning_rate": 1.0281314640614692e-05,
      "loss": 0.7574,
      "step": 5388
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.331466406583786,
      "learning_rate": 1.0258498590099897e-05,
      "loss": 0.8577,
      "step": 5389
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.28887268900871277,
      "learning_rate": 1.0235706515306043e-05,
      "loss": 0.8105,
      "step": 5390
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.2771788239479065,
      "learning_rate": 1.0212938422322371e-05,
      "loss": 0.8511,
      "step": 5391
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.37625956535339355,
      "learning_rate": 1.0190194317231706e-05,
      "loss": 0.9343,
      "step": 5392
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.30966660380363464,
      "learning_rate": 1.0167474206110473e-05,
      "loss": 0.7515,
      "step": 5393
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.3218461871147156,
      "learning_rate": 1.0144778095028651e-05,
      "loss": 0.9536,
      "step": 5394
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.2043931484222412,
      "learning_rate": 1.0122105990049823e-05,
      "loss": 0.8146,
      "step": 5395
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.231362447142601,
      "learning_rate": 1.00994578972312e-05,
      "loss": 0.7144,
      "step": 5396
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.19252467155456543,
      "learning_rate": 1.0076833822623511e-05,
      "loss": 0.7385,
      "step": 5397
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.19032225012779236,
      "learning_rate": 1.0054233772271105e-05,
      "loss": 0.7584,
      "step": 5398
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.8616867661476135,
      "learning_rate": 1.0031657752211932e-05,
      "loss": 0.6315,
      "step": 5399
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.19415231049060822,
      "learning_rate": 1.0009105768477456e-05,
      "loss": 0.955,
      "step": 5400
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.6436915993690491,
      "learning_rate": 9.986577827092791e-06,
      "loss": 0.6116,
      "step": 5401
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.4399048686027527,
      "learning_rate": 9.96407393407659e-06,
      "loss": 1.231,
      "step": 5402
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.23517221212387085,
      "learning_rate": 9.941594095441098e-06,
      "loss": 1.0301,
      "step": 5403
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.3621818423271179,
      "learning_rate": 9.9191383171921e-06,
      "loss": 0.7711,
      "step": 5404
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.2540998160839081,
      "learning_rate": 9.896706605328998e-06,
      "loss": 0.9331,
      "step": 5405
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.16933466494083405,
      "learning_rate": 9.874298965844752e-06,
      "loss": 0.8432,
      "step": 5406
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.2451665997505188,
      "learning_rate": 9.851915404725843e-06,
      "loss": 0.8646,
      "step": 5407
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.2650547921657562,
      "learning_rate": 9.829555927952393e-06,
      "loss": 0.831,
      "step": 5408
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.23421478271484375,
      "learning_rate": 9.807220541498063e-06,
      "loss": 0.8445,
      "step": 5409
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.31643182039260864,
      "learning_rate": 9.78490925133001e-06,
      "loss": 0.8634,
      "step": 5410
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.40097254514694214,
      "learning_rate": 9.762622063409089e-06,
      "loss": 1.0442,
      "step": 5411
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.6745843291282654,
      "learning_rate": 9.740358983689612e-06,
      "loss": 0.5733,
      "step": 5412
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.21455241739749908,
      "learning_rate": 9.718120018119448e-06,
      "loss": 1.1358,
      "step": 5413
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.24572868645191193,
      "learning_rate": 9.695905172640085e-06,
      "loss": 0.7972,
      "step": 5414
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.41716280579566956,
      "learning_rate": 9.673714453186522e-06,
      "loss": 0.7883,
      "step": 5415
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.32033878564834595,
      "learning_rate": 9.65154786568735e-06,
      "loss": 1.0027,
      "step": 5416
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.6851819753646851,
      "learning_rate": 9.629405416064652e-06,
      "loss": 0.8109,
      "step": 5417
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.1714022159576416,
      "learning_rate": 9.60728711023412e-06,
      "loss": 0.6957,
      "step": 5418
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.3570953607559204,
      "learning_rate": 9.585192954104982e-06,
      "loss": 0.7587,
      "step": 5419
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.3962930142879486,
      "learning_rate": 9.563122953579983e-06,
      "loss": 0.9274,
      "step": 5420
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.3917682468891144,
      "learning_rate": 9.541077114555453e-06,
      "loss": 0.9858,
      "step": 5421
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.19101986289024353,
      "learning_rate": 9.519055442921277e-06,
      "loss": 0.6963,
      "step": 5422
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.32521313428878784,
      "learning_rate": 9.497057944560783e-06,
      "loss": 0.7864,
      "step": 5423
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.2500469982624054,
      "learning_rate": 9.475084625351004e-06,
      "loss": 0.7304,
      "step": 5424
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.4489002525806427,
      "learning_rate": 9.45313549116238e-06,
      "loss": 1.0134,
      "step": 5425
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.28515881299972534,
      "learning_rate": 9.431210547858926e-06,
      "loss": 1.0243,
      "step": 5426
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.19018016755580902,
      "learning_rate": 9.40930980129825e-06,
      "loss": 0.8657,
      "step": 5427
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.43402591347694397,
      "learning_rate": 9.38743325733139e-06,
      "loss": 0.8351,
      "step": 5428
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.2124609798192978,
      "learning_rate": 9.365580921803018e-06,
      "loss": 0.706,
      "step": 5429
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.250235915184021,
      "learning_rate": 9.343752800551275e-06,
      "loss": 0.8655,
      "step": 5430
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.20288130640983582,
      "learning_rate": 9.321948899407862e-06,
      "loss": 0.8767,
      "step": 5431
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.2657746374607086,
      "learning_rate": 9.300169224198018e-06,
      "loss": 0.8259,
      "step": 5432
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.2907962203025818,
      "learning_rate": 9.278413780740435e-06,
      "loss": 0.898,
      "step": 5433
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.5493448376655579,
      "learning_rate": 9.256682574847475e-06,
      "loss": 0.9666,
      "step": 5434
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.17407216131687164,
      "learning_rate": 9.234975612324904e-06,
      "loss": 0.7988,
      "step": 5435
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.36483585834503174,
      "learning_rate": 9.213292898971993e-06,
      "loss": 0.9318,
      "step": 5436
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.28752732276916504,
      "learning_rate": 9.191634440581675e-06,
      "loss": 0.8747,
      "step": 5437
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.14174549281597137,
      "learning_rate": 9.170000242940247e-06,
      "loss": 0.6348,
      "step": 5438
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.2514326870441437,
      "learning_rate": 9.148390311827625e-06,
      "loss": 0.7869,
      "step": 5439
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.20489424467086792,
      "learning_rate": 9.12680465301723e-06,
      "loss": 0.6364,
      "step": 5440
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.3148215711116791,
      "learning_rate": 9.105243272275942e-06,
      "loss": 0.7898,
      "step": 5441
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.24551069736480713,
      "learning_rate": 9.083706175364193e-06,
      "loss": 0.8384,
      "step": 5442
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.31662458181381226,
      "learning_rate": 9.062193368035953e-06,
      "loss": 0.8628,
      "step": 5443
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.22828732430934906,
      "learning_rate": 9.04070485603864e-06,
      "loss": 0.7578,
      "step": 5444
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.25691694021224976,
      "learning_rate": 9.019240645113258e-06,
      "loss": 0.7123,
      "step": 5445
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.22179467976093292,
      "learning_rate": 8.997800740994223e-06,
      "loss": 0.8239,
      "step": 5446
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.8269292116165161,
      "learning_rate": 8.976385149409561e-06,
      "loss": 0.988,
      "step": 5447
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.2988883852958679,
      "learning_rate": 8.95499387608072e-06,
      "loss": 0.8624,
      "step": 5448
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.43137404322624207,
      "learning_rate": 8.933626926722705e-06,
      "loss": 0.7185,
      "step": 5449
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.6643619537353516,
      "learning_rate": 8.912284307044006e-06,
      "loss": 0.5992,
      "step": 5450
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.2581159472465515,
      "learning_rate": 8.890966022746572e-06,
      "loss": 0.6843,
      "step": 5451
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.46295398473739624,
      "learning_rate": 8.869672079525903e-06,
      "loss": 0.6302,
      "step": 5452
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.32323768734931946,
      "learning_rate": 8.848402483071017e-06,
      "loss": 0.7498,
      "step": 5453
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.24633827805519104,
      "learning_rate": 8.82715723906432e-06,
      "loss": 0.9875,
      "step": 5454
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.3437221050262451,
      "learning_rate": 8.805936353181832e-06,
      "loss": 0.8899,
      "step": 5455
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.18521437048912048,
      "learning_rate": 8.784739831093003e-06,
      "loss": 0.8174,
      "step": 5456
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.362385630607605,
      "learning_rate": 8.763567678460804e-06,
      "loss": 1.0657,
      "step": 5457
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.21816487610340118,
      "learning_rate": 8.742419900941634e-06,
      "loss": 0.6194,
      "step": 5458
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.33175718784332275,
      "learning_rate": 8.72129650418545e-06,
      "loss": 0.83,
      "step": 5459
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.2699546217918396,
      "learning_rate": 8.700197493835694e-06,
      "loss": 0.6636,
      "step": 5460
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.2981574535369873,
      "learning_rate": 8.67912287552921e-06,
      "loss": 0.8316,
      "step": 5461
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.23604276776313782,
      "learning_rate": 8.65807265489641e-06,
      "loss": 0.7442,
      "step": 5462
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.38264212012290955,
      "learning_rate": 8.63704683756119e-06,
      "loss": 0.7393,
      "step": 5463
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.2216813713312149,
      "learning_rate": 8.61604542914085e-06,
      "loss": 0.9116,
      "step": 5464
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7237443327903748,
      "learning_rate": 8.595068435246222e-06,
      "loss": 0.6427,
      "step": 5465
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.38282090425491333,
      "learning_rate": 8.574115861481636e-06,
      "loss": 1.1425,
      "step": 5466
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.25279107689857483,
      "learning_rate": 8.553187713444866e-06,
      "loss": 1.0494,
      "step": 5467
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.5931234359741211,
      "learning_rate": 8.532283996727142e-06,
      "loss": 0.7875,
      "step": 5468
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.21334946155548096,
      "learning_rate": 8.511404716913207e-06,
      "loss": 0.7856,
      "step": 5469
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.3871012032032013,
      "learning_rate": 8.490549879581266e-06,
      "loss": 0.9775,
      "step": 5470
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.2293872982263565,
      "learning_rate": 8.469719490302951e-06,
      "loss": 0.716,
      "step": 5471
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.25139331817626953,
      "learning_rate": 8.448913554643424e-06,
      "loss": 0.9388,
      "step": 5472
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.278812050819397,
      "learning_rate": 8.428132078161311e-06,
      "loss": 0.7682,
      "step": 5473
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.23589928448200226,
      "learning_rate": 8.407375066408618e-06,
      "loss": 0.6343,
      "step": 5474
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.214908629655838,
      "learning_rate": 8.38664252493092e-06,
      "loss": 0.9448,
      "step": 5475
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.27407586574554443,
      "learning_rate": 8.365934459267222e-06,
      "loss": 0.8085,
      "step": 5476
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.6388877034187317,
      "learning_rate": 8.34525087494994e-06,
      "loss": 0.8254,
      "step": 5477
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.32728561758995056,
      "learning_rate": 8.324591777505009e-06,
      "loss": 0.7417,
      "step": 5478
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.3100265860557556,
      "learning_rate": 8.303957172451793e-06,
      "loss": 0.6661,
      "step": 5479
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.2476683109998703,
      "learning_rate": 8.283347065303138e-06,
      "loss": 0.7537,
      "step": 5480
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.2740892767906189,
      "learning_rate": 8.262761461565315e-06,
      "loss": 0.7417,
      "step": 5481
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.22573719918727875,
      "learning_rate": 8.242200366738051e-06,
      "loss": 0.6931,
      "step": 5482
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.4638975262641907,
      "learning_rate": 8.221663786314571e-06,
      "loss": 0.681,
      "step": 5483
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.7854638695716858,
      "learning_rate": 8.201151725781475e-06,
      "loss": 0.8229,
      "step": 5484
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.3345983326435089,
      "learning_rate": 8.180664190618859e-06,
      "loss": 0.9359,
      "step": 5485
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.254238098859787,
      "learning_rate": 8.160201186300299e-06,
      "loss": 0.9325,
      "step": 5486
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.25630128383636475,
      "learning_rate": 8.13976271829271e-06,
      "loss": 0.6366,
      "step": 5487
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.34111103415489197,
      "learning_rate": 8.11934879205659e-06,
      "loss": 1.0122,
      "step": 5488
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.3300560712814331,
      "learning_rate": 8.09895941304577e-06,
      "loss": 0.8086,
      "step": 5489
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.36495330929756165,
      "learning_rate": 8.078594586707589e-06,
      "loss": 0.8046,
      "step": 5490
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.29619932174682617,
      "learning_rate": 8.058254318482761e-06,
      "loss": 0.9267,
      "step": 5491
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.24368120729923248,
      "learning_rate": 8.037938613805507e-06,
      "loss": 0.8696,
      "step": 5492
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.697213351726532,
      "learning_rate": 8.017647478103462e-06,
      "loss": 0.9628,
      "step": 5493
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.4448893070220947,
      "learning_rate": 7.99738091679767e-06,
      "loss": 0.6322,
      "step": 5494
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.2724793255329132,
      "learning_rate": 7.977138935302641e-06,
      "loss": 0.8479,
      "step": 5495
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.19835826754570007,
      "learning_rate": 7.95692153902633e-06,
      "loss": 0.89,
      "step": 5496
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.2664680778980255,
      "learning_rate": 7.936728733370046e-06,
      "loss": 0.8461,
      "step": 5497
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.2664680778980255,
      "learning_rate": 7.936728733370046e-06,
      "loss": 0.9355,
      "step": 5498
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.19494745135307312,
      "learning_rate": 7.916560523728666e-06,
      "loss": 0.7468,
      "step": 5499
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.30522865056991577,
      "learning_rate": 7.896416915490357e-06,
      "loss": 0.805,
      "step": 5500
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.22797158360481262,
      "learning_rate": 7.876297914036768e-06,
      "loss": 0.8933,
      "step": 5501
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.1529056876897812,
      "learning_rate": 7.85620352474301e-06,
      "loss": 0.7571,
      "step": 5502
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.1627064049243927,
      "learning_rate": 7.83613375297757e-06,
      "loss": 0.8109,
      "step": 5503
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.5887826681137085,
      "learning_rate": 7.816088604102356e-06,
      "loss": 0.7254,
      "step": 5504
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.20010797679424286,
      "learning_rate": 7.796068083472763e-06,
      "loss": 0.8941,
      "step": 5505
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.22649338841438293,
      "learning_rate": 7.776072196437501e-06,
      "loss": 0.8522,
      "step": 5506
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.27822035551071167,
      "learning_rate": 7.756100948338806e-06,
      "loss": 0.7184,
      "step": 5507
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.41011789441108704,
      "learning_rate": 7.736154344512225e-06,
      "loss": 0.7564,
      "step": 5508
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.567182183265686,
      "learning_rate": 7.716232390286837e-06,
      "loss": 0.5711,
      "step": 5509
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.4972211420536041,
      "learning_rate": 7.696335090985062e-06,
      "loss": 0.6853,
      "step": 5510
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.24540209770202637,
      "learning_rate": 7.676462451922695e-06,
      "loss": 0.8496,
      "step": 5511
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.28671956062316895,
      "learning_rate": 7.656614478409063e-06,
      "loss": 0.5983,
      "step": 5512
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.20590555667877197,
      "learning_rate": 7.636791175746794e-06,
      "loss": 0.8027,
      "step": 5513
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.3456001579761505,
      "learning_rate": 7.616992549231983e-06,
      "loss": 0.84,
      "step": 5514
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.15852311253547668,
      "learning_rate": 7.597218604154122e-06,
      "loss": 0.8046,
      "step": 5515
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.22798484563827515,
      "learning_rate": 7.577469345796073e-06,
      "loss": 0.5928,
      "step": 5516
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.22662365436553955,
      "learning_rate": 7.5577447794341505e-06,
      "loss": 0.6559,
      "step": 5517
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.2545791566371918,
      "learning_rate": 7.538044910338071e-06,
      "loss": 0.8312,
      "step": 5518
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.31097736954689026,
      "learning_rate": 7.518369743770892e-06,
      "loss": 0.7963,
      "step": 5519
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.2546951472759247,
      "learning_rate": 7.498719284989153e-06,
      "loss": 0.5602,
      "step": 5520
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.1931544542312622,
      "learning_rate": 7.479093539242732e-06,
      "loss": 0.8987,
      "step": 5521
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.2610527575016022,
      "learning_rate": 7.459492511774946e-06,
      "loss": 0.7486,
      "step": 5522
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.28065168857574463,
      "learning_rate": 7.439916207822473e-06,
      "loss": 0.7912,
      "step": 5523
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.30221840739250183,
      "learning_rate": 7.4203646326154064e-06,
      "loss": 0.9546,
      "step": 5524
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.3039972484111786,
      "learning_rate": 7.400837791377235e-06,
      "loss": 0.8635,
      "step": 5525
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.23945719003677368,
      "learning_rate": 7.3813356893248195e-06,
      "loss": 0.727,
      "step": 5526
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.48404714465141296,
      "learning_rate": 7.361858331668425e-06,
      "loss": 0.5759,
      "step": 5527
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.3303256630897522,
      "learning_rate": 7.342405723611723e-06,
      "loss": 0.8692,
      "step": 5528
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.48648303747177124,
      "learning_rate": 7.3229778703517326e-06,
      "loss": 0.7875,
      "step": 5529
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.48648303747177124,
      "learning_rate": 7.3229778703517326e-06,
      "loss": 0.9604,
      "step": 5530
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.43310075998306274,
      "learning_rate": 7.303574777078892e-06,
      "loss": 0.9191,
      "step": 5531
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.30939003825187683,
      "learning_rate": 7.284196448977021e-06,
      "loss": 0.9359,
      "step": 5532
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.6458499431610107,
      "learning_rate": 7.264842891223311e-06,
      "loss": 0.7738,
      "step": 5533
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.28604888916015625,
      "learning_rate": 7.245514108988327e-06,
      "loss": 0.9181,
      "step": 5534
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.38859736919403076,
      "learning_rate": 7.226210107436049e-06,
      "loss": 1.0186,
      "step": 5535
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.16889238357543945,
      "learning_rate": 7.2069308917238175e-06,
      "loss": 0.8055,
      "step": 5536
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.5009798407554626,
      "learning_rate": 7.187676467002324e-06,
      "loss": 0.9801,
      "step": 5537
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.21314169466495514,
      "learning_rate": 7.168446838415677e-06,
      "loss": 0.9106,
      "step": 5538
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.19521240890026093,
      "learning_rate": 7.1492420111013646e-06,
      "loss": 0.8605,
      "step": 5539
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.18950799107551575,
      "learning_rate": 7.1300619901901954e-06,
      "loss": 0.8496,
      "step": 5540
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.4214918613433838,
      "learning_rate": 7.110906780806404e-06,
      "loss": 0.8615,
      "step": 5541
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.211788609623909,
      "learning_rate": 7.091776388067595e-06,
      "loss": 0.7077,
      "step": 5542
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.32285794615745544,
      "learning_rate": 7.072670817084692e-06,
      "loss": 0.8515,
      "step": 5543
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.26720136404037476,
      "learning_rate": 7.0535900729620245e-06,
      "loss": 0.7946,
      "step": 5544
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.3852829337120056,
      "learning_rate": 7.0345341607973015e-06,
      "loss": 0.873,
      "step": 5545
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.1720999777317047,
      "learning_rate": 7.0155030856815965e-06,
      "loss": 0.746,
      "step": 5546
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.5882025361061096,
      "learning_rate": 6.996496852699286e-06,
      "loss": 0.7913,
      "step": 5547
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2934662997722626,
      "learning_rate": 6.977515466928186e-06,
      "loss": 0.79,
      "step": 5548
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.25546491146087646,
      "learning_rate": 6.958558933439463e-06,
      "loss": 0.6412,
      "step": 5549
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.1974811851978302,
      "learning_rate": 6.939627257297576e-06,
      "loss": 0.6876,
      "step": 5550
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.1692613959312439,
      "learning_rate": 6.920720443560424e-06,
      "loss": 0.8099,
      "step": 5551
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.1578298807144165,
      "learning_rate": 6.901838497279234e-06,
      "loss": 0.9354,
      "step": 5552
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2618764340877533,
      "learning_rate": 6.882981423498558e-06,
      "loss": 0.6361,
      "step": 5553
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.18129637837409973,
      "learning_rate": 6.864149227256378e-06,
      "loss": 0.8648,
      "step": 5554
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.3056811988353729,
      "learning_rate": 6.8453419135839354e-06,
      "loss": 0.7293,
      "step": 5555
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.27532660961151123,
      "learning_rate": 6.8265594875059236e-06,
      "loss": 0.7714,
      "step": 5556
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2318316102027893,
      "learning_rate": 6.807801954040283e-06,
      "loss": 0.7387,
      "step": 5557
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.162367045879364,
      "learning_rate": 6.7890693181983825e-06,
      "loss": 0.6711,
      "step": 5558
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.24173174798488617,
      "learning_rate": 6.77036158498493e-06,
      "loss": 1.0589,
      "step": 5559
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6120384335517883,
      "learning_rate": 6.751678759397917e-06,
      "loss": 0.8835,
      "step": 5560
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2362106740474701,
      "learning_rate": 6.7330208464287615e-06,
      "loss": 0.8732,
      "step": 5561
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6214918494224548,
      "learning_rate": 6.714387851062198e-06,
      "loss": 0.8788,
      "step": 5562
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.7779544591903687,
      "learning_rate": 6.695779778276246e-06,
      "loss": 1.0093,
      "step": 5563
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.22223593294620514,
      "learning_rate": 6.6771966330423815e-06,
      "loss": 0.9011,
      "step": 5564
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.1876651644706726,
      "learning_rate": 6.658638420325314e-06,
      "loss": 1.0538,
      "step": 5565
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.5566770434379578,
      "learning_rate": 6.640105145083142e-06,
      "loss": 0.604,
      "step": 5566
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.3397636115550995,
      "learning_rate": 6.621596812267317e-06,
      "loss": 0.978,
      "step": 5567
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.29055285453796387,
      "learning_rate": 6.603113426822571e-06,
      "loss": 0.9352,
      "step": 5568
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.30312734842300415,
      "learning_rate": 6.584654993687023e-06,
      "loss": 0.9138,
      "step": 5569
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.29186126589775085,
      "learning_rate": 6.566221517792126e-06,
      "loss": 0.7413,
      "step": 5570
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.5534297823905945,
      "learning_rate": 6.547813004062597e-06,
      "loss": 1.0288,
      "step": 5571
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.16451765596866608,
      "learning_rate": 6.529429457416569e-06,
      "loss": 0.7359,
      "step": 5572
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2634305953979492,
      "learning_rate": 6.511070882765435e-06,
      "loss": 0.976,
      "step": 5573
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.15086551010608673,
      "learning_rate": 6.492737285013995e-06,
      "loss": 0.8444,
      "step": 5574
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2790510356426239,
      "learning_rate": 6.474428669060317e-06,
      "loss": 0.781,
      "step": 5575
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.23832271993160248,
      "learning_rate": 6.456145039795769e-06,
      "loss": 0.6372,
      "step": 5576
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.5828902721405029,
      "learning_rate": 6.4378864021051424e-06,
      "loss": 0.7887,
      "step": 5577
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2744772732257843,
      "learning_rate": 6.4196527608664455e-06,
      "loss": 0.7551,
      "step": 5578
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2200256586074829,
      "learning_rate": 6.4014441209510835e-06,
      "loss": 0.689,
      "step": 5579
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.21209578216075897,
      "learning_rate": 6.383260487223752e-06,
      "loss": 0.7893,
      "step": 5580
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.29012975096702576,
      "learning_rate": 6.365101864542444e-06,
      "loss": 0.9468,
      "step": 5581
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.23516690731048584,
      "learning_rate": 6.346968257758512e-06,
      "loss": 0.6919,
      "step": 5582
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.5101744532585144,
      "learning_rate": 6.328859671716625e-06,
      "loss": 0.7927,
      "step": 5583
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.337684690952301,
      "learning_rate": 6.310776111254712e-06,
      "loss": 0.8043,
      "step": 5584
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.23985914885997772,
      "learning_rate": 6.292717581204077e-06,
      "loss": 0.9111,
      "step": 5585
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6497972011566162,
      "learning_rate": 6.274684086389315e-06,
      "loss": 0.7225,
      "step": 5586
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.30986350774765015,
      "learning_rate": 6.256675631628328e-06,
      "loss": 0.695,
      "step": 5587
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.5658658742904663,
      "learning_rate": 6.238692221732323e-06,
      "loss": 0.663,
      "step": 5588
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2787618935108185,
      "learning_rate": 6.220733861505834e-06,
      "loss": 0.9892,
      "step": 5589
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0430651903152466,
      "learning_rate": 6.202800555746691e-06,
      "loss": 0.7934,
      "step": 5590
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2959114611148834,
      "learning_rate": 6.1848923092460265e-06,
      "loss": 0.9465,
      "step": 5591
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.25783854722976685,
      "learning_rate": 6.167009126788281e-06,
      "loss": 0.9961,
      "step": 5592
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.3286045789718628,
      "learning_rate": 6.149151013151233e-06,
      "loss": 0.7767,
      "step": 5593
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2820930778980255,
      "learning_rate": 6.1313179731058765e-06,
      "loss": 0.67,
      "step": 5594
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8085203766822815,
      "learning_rate": 6.113510011416601e-06,
      "loss": 0.8103,
      "step": 5595
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.17199555039405823,
      "learning_rate": 6.095727132841056e-06,
      "loss": 0.684,
      "step": 5596
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.3771955966949463,
      "learning_rate": 6.077969342130186e-06,
      "loss": 0.9612,
      "step": 5597
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.19968272745609283,
      "learning_rate": 6.060236644028228e-06,
      "loss": 1.016,
      "step": 5598
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.12549984455108643,
      "learning_rate": 6.042529043272738e-06,
      "loss": 0.8487,
      "step": 5599
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.4172881543636322,
      "learning_rate": 6.024846544594553e-06,
      "loss": 0.9727,
      "step": 5600
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.23163612186908722,
      "learning_rate": 6.0071891527177955e-06,
      "loss": 0.855,
      "step": 5601
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2432096302509308,
      "learning_rate": 5.989556872359891e-06,
      "loss": 0.7962,
      "step": 5602
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2934105694293976,
      "learning_rate": 5.971949708231572e-06,
      "loss": 0.9099,
      "step": 5603
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.3119153380393982,
      "learning_rate": 5.95436766503682e-06,
      "loss": 0.8327,
      "step": 5604
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2508552670478821,
      "learning_rate": 5.93681074747292e-06,
      "loss": 0.6321,
      "step": 5605
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.2945883572101593,
      "learning_rate": 5.919278960230501e-06,
      "loss": 0.8288,
      "step": 5606
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.5195305347442627,
      "learning_rate": 5.90177230799338e-06,
      "loss": 1.0368,
      "step": 5607
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.23074273765087128,
      "learning_rate": 5.8842907954387295e-06,
      "loss": 0.7498,
      "step": 5608
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.5059413313865662,
      "learning_rate": 5.866834427236978e-06,
      "loss": 0.8254,
      "step": 5609
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.153046652674675,
      "learning_rate": 5.849403208051862e-06,
      "loss": 0.8404,
      "step": 5610
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7863677740097046,
      "learning_rate": 5.831997142540357e-06,
      "loss": 0.6945,
      "step": 5611
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.33135366439819336,
      "learning_rate": 5.814616235352743e-06,
      "loss": 0.8399,
      "step": 5612
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.31771260499954224,
      "learning_rate": 5.797260491132606e-06,
      "loss": 0.6664,
      "step": 5613
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.2687692940235138,
      "learning_rate": 5.779929914516724e-06,
      "loss": 0.9251,
      "step": 5614
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.35668131709098816,
      "learning_rate": 5.762624510135284e-06,
      "loss": 0.9167,
      "step": 5615
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.32450249791145325,
      "learning_rate": 5.74534428261162e-06,
      "loss": 0.8438,
      "step": 5616
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.5653581619262695,
      "learning_rate": 5.728089236562362e-06,
      "loss": 0.5845,
      "step": 5617
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.16462790966033936,
      "learning_rate": 5.7108593765975214e-06,
      "loss": 0.8085,
      "step": 5618
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.3574149012565613,
      "learning_rate": 5.69365470732024e-06,
      "loss": 1.0189,
      "step": 5619
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.46936336159706116,
      "learning_rate": 5.676475233327028e-06,
      "loss": 0.8638,
      "step": 5620
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.3203156292438507,
      "learning_rate": 5.659320959207592e-06,
      "loss": 0.9348,
      "step": 5621
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.22811685502529144,
      "learning_rate": 5.642191889544968e-06,
      "loss": 1.0091,
      "step": 5622
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.3203220069408417,
      "learning_rate": 5.6250880289154265e-06,
      "loss": 0.8865,
      "step": 5623
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.24459517002105713,
      "learning_rate": 5.6080093818884795e-06,
      "loss": 0.7624,
      "step": 5624
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.3114190101623535,
      "learning_rate": 5.590955953026966e-06,
      "loss": 0.8536,
      "step": 5625
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.408296674489975,
      "learning_rate": 5.573927746886942e-06,
      "loss": 0.9464,
      "step": 5626
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.24020008742809296,
      "learning_rate": 5.5569247680177125e-06,
      "loss": 0.8702,
      "step": 5627
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.39751192927360535,
      "learning_rate": 5.539947020961911e-06,
      "loss": 0.94,
      "step": 5628
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.28739604353904724,
      "learning_rate": 5.5229945102553416e-06,
      "loss": 0.8693,
      "step": 5629
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7857042551040649,
      "learning_rate": 5.506067240427138e-06,
      "loss": 0.7736,
      "step": 5630
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.2842487096786499,
      "learning_rate": 5.4891652159996586e-06,
      "loss": 0.9795,
      "step": 5631
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.3526056408882141,
      "learning_rate": 5.472288441488493e-06,
      "loss": 0.8683,
      "step": 5632
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.29055553674697876,
      "learning_rate": 5.4554369214025455e-06,
      "loss": 0.7485,
      "step": 5633
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.3176170885562897,
      "learning_rate": 5.438610660243926e-06,
      "loss": 0.9517,
      "step": 5634
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.2901403605937958,
      "learning_rate": 5.421809662508004e-06,
      "loss": 0.7734,
      "step": 5635
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7180376052856445,
      "learning_rate": 5.405033932683423e-06,
      "loss": 1.0773,
      "step": 5636
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.27523353695869446,
      "learning_rate": 5.388283475252009e-06,
      "loss": 0.6529,
      "step": 5637
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.3017944395542145,
      "learning_rate": 5.3715582946889606e-06,
      "loss": 0.7673,
      "step": 5638
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.16287657618522644,
      "learning_rate": 5.354858395462614e-06,
      "loss": 1.0002,
      "step": 5639
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.23447169363498688,
      "learning_rate": 5.338183782034545e-06,
      "loss": 0.6291,
      "step": 5640
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.3473987281322479,
      "learning_rate": 5.321534458859678e-06,
      "loss": 0.8745,
      "step": 5641
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.33441683650016785,
      "learning_rate": 5.304910430386078e-06,
      "loss": 0.8707,
      "step": 5642
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.2527902126312256,
      "learning_rate": 5.288311701055104e-06,
      "loss": 0.8255,
      "step": 5643
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.33062297105789185,
      "learning_rate": 5.271738275301363e-06,
      "loss": 0.8038,
      "step": 5644
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.27619510889053345,
      "learning_rate": 5.255190157552636e-06,
      "loss": 1.1333,
      "step": 5645
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.316900372505188,
      "learning_rate": 5.238667352230009e-06,
      "loss": 0.7954,
      "step": 5646
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.295027494430542,
      "learning_rate": 5.222169863747817e-06,
      "loss": 0.83,
      "step": 5647
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.6918859481811523,
      "learning_rate": 5.205697696513545e-06,
      "loss": 0.8228,
      "step": 5648
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.3065117299556732,
      "learning_rate": 5.189250854928007e-06,
      "loss": 0.8539,
      "step": 5649
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.3499529957771301,
      "learning_rate": 5.172829343385188e-06,
      "loss": 0.8464,
      "step": 5650
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.6943280100822449,
      "learning_rate": 5.156433166272345e-06,
      "loss": 0.8467,
      "step": 5651
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.3044078052043915,
      "learning_rate": 5.140062327969941e-06,
      "loss": 0.7106,
      "step": 5652
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.2561732828617096,
      "learning_rate": 5.123716832851677e-06,
      "loss": 0.7735,
      "step": 5653
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.5082197785377502,
      "learning_rate": 5.107396685284504e-06,
      "loss": 0.9054,
      "step": 5654
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.3973329961299896,
      "learning_rate": 5.0911018896285576e-06,
      "loss": 0.7069,
      "step": 5655
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.3603047728538513,
      "learning_rate": 5.074832450237233e-06,
      "loss": 0.7507,
      "step": 5656
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.2709652781486511,
      "learning_rate": 5.058588371457152e-06,
      "loss": 0.761,
      "step": 5657
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.6120540499687195,
      "learning_rate": 5.042369657628143e-06,
      "loss": 0.8033,
      "step": 5658
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.25709179043769836,
      "learning_rate": 5.0261763130832525e-06,
      "loss": 0.8435,
      "step": 5659
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.2184310406446457,
      "learning_rate": 5.010008342148787e-06,
      "loss": 0.7367,
      "step": 5660
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.25393325090408325,
      "learning_rate": 4.9938657491442555e-06,
      "loss": 0.7474,
      "step": 5661
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.49349939823150635,
      "learning_rate": 4.977748538382343e-06,
      "loss": 0.7647,
      "step": 5662
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.15362805128097534,
      "learning_rate": 4.961656714169028e-06,
      "loss": 0.5874,
      "step": 5663
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.5686683654785156,
      "learning_rate": 4.945590280803469e-06,
      "loss": 0.8957,
      "step": 5664
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.24770750105381012,
      "learning_rate": 4.929549242578024e-06,
      "loss": 0.6963,
      "step": 5665
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.620284378528595,
      "learning_rate": 4.9135336037782976e-06,
      "loss": 1.0259,
      "step": 5666
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.3761209547519684,
      "learning_rate": 4.8975433686831e-06,
      "loss": 0.9275,
      "step": 5667
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.26389846205711365,
      "learning_rate": 4.881578541564425e-06,
      "loss": 0.7747,
      "step": 5668
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.2802606523036957,
      "learning_rate": 4.865639126687527e-06,
      "loss": 0.6267,
      "step": 5669
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.23810505867004395,
      "learning_rate": 4.849725128310834e-06,
      "loss": 0.6413,
      "step": 5670
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.21702684462070465,
      "learning_rate": 4.833836550686033e-06,
      "loss": 0.9868,
      "step": 5671
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.23659348487854004,
      "learning_rate": 4.817973398057929e-06,
      "loss": 1.0665,
      "step": 5672
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.18203294277191162,
      "learning_rate": 4.802135674664632e-06,
      "loss": 0.8687,
      "step": 5673
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.15952228009700775,
      "learning_rate": 4.786323384737401e-06,
      "loss": 0.6345,
      "step": 5674
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2774445116519928,
      "learning_rate": 4.770536532500703e-06,
      "loss": 0.6869,
      "step": 5675
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.27237844467163086,
      "learning_rate": 4.754775122172239e-06,
      "loss": 0.7822,
      "step": 5676
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.41969960927963257,
      "learning_rate": 4.739039157962899e-06,
      "loss": 0.8795,
      "step": 5677
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.3686719834804535,
      "learning_rate": 4.723328644076719e-06,
      "loss": 1.1075,
      "step": 5678
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.23393142223358154,
      "learning_rate": 4.707643584711063e-06,
      "loss": 0.7306,
      "step": 5679
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.25353965163230896,
      "learning_rate": 4.691983984056381e-06,
      "loss": 0.9359,
      "step": 5680
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2903365194797516,
      "learning_rate": 4.676349846296324e-06,
      "loss": 0.6111,
      "step": 5681
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.18961620330810547,
      "learning_rate": 4.660741175607852e-06,
      "loss": 0.9756,
      "step": 5682
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.19648335874080658,
      "learning_rate": 4.645157976160974e-06,
      "loss": 0.7507,
      "step": 5683
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.17554743587970734,
      "learning_rate": 4.629600252119004e-06,
      "loss": 0.8341,
      "step": 5684
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.7154755592346191,
      "learning_rate": 4.614068007638383e-06,
      "loss": 0.8224,
      "step": 5685
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2316173017024994,
      "learning_rate": 4.598561246868793e-06,
      "loss": 0.5761,
      "step": 5686
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9872010350227356,
      "learning_rate": 4.583079973953086e-06,
      "loss": 0.8419,
      "step": 5687
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.26256057620048523,
      "learning_rate": 4.567624193027275e-06,
      "loss": 0.6227,
      "step": 5688
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.3876238465309143,
      "learning_rate": 4.5521939082206255e-06,
      "loss": 1.0178,
      "step": 5689
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2233051210641861,
      "learning_rate": 4.53678912365556e-06,
      "loss": 0.7101,
      "step": 5690
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.17866434156894684,
      "learning_rate": 4.521409843447644e-06,
      "loss": 0.6876,
      "step": 5691
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2074006348848343,
      "learning_rate": 4.506056071705722e-06,
      "loss": 0.989,
      "step": 5692
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.21179956197738647,
      "learning_rate": 4.4907278125317365e-06,
      "loss": 0.9228,
      "step": 5693
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2214665412902832,
      "learning_rate": 4.475425070020867e-06,
      "loss": 0.8223,
      "step": 5694
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.24090461432933807,
      "learning_rate": 4.4601478482614645e-06,
      "loss": 0.8547,
      "step": 5695
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.3033595383167267,
      "learning_rate": 4.444896151335043e-06,
      "loss": 0.8313,
      "step": 5696
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2883886396884918,
      "learning_rate": 4.42966998331632e-06,
      "loss": 0.7831,
      "step": 5697
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.217718705534935,
      "learning_rate": 4.414469348273187e-06,
      "loss": 0.805,
      "step": 5698
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.5733277201652527,
      "learning_rate": 4.399294250266684e-06,
      "loss": 0.7848,
      "step": 5699
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2860662341117859,
      "learning_rate": 4.3841446933511e-06,
      "loss": 0.9128,
      "step": 5700
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.44859832525253296,
      "learning_rate": 4.369020681573799e-06,
      "loss": 0.8292,
      "step": 5701
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.20787903666496277,
      "learning_rate": 4.353922218975426e-06,
      "loss": 0.8647,
      "step": 5702
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2677392363548279,
      "learning_rate": 4.338849309589732e-06,
      "loss": 1.0112,
      "step": 5703
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.24155014753341675,
      "learning_rate": 4.3238019574436295e-06,
      "loss": 0.8249,
      "step": 5704
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.331703245639801,
      "learning_rate": 4.308780166557291e-06,
      "loss": 0.6087,
      "step": 5705
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.23176813125610352,
      "learning_rate": 4.293783940943963e-06,
      "loss": 0.7282,
      "step": 5706
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.19274786114692688,
      "learning_rate": 4.278813284610106e-06,
      "loss": 0.7757,
      "step": 5707
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2940000891685486,
      "learning_rate": 4.263868201555366e-06,
      "loss": 0.8232,
      "step": 5708
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.29474112391471863,
      "learning_rate": 4.248948695772493e-06,
      "loss": 0.9594,
      "step": 5709
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.14709201455116272,
      "learning_rate": 4.234054771247475e-06,
      "loss": 0.8255,
      "step": 5710
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.622916579246521,
      "learning_rate": 4.219186431959454e-06,
      "loss": 0.7959,
      "step": 5711
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2099466770887375,
      "learning_rate": 4.204343681880674e-06,
      "loss": 0.847,
      "step": 5712
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2791605293750763,
      "learning_rate": 4.189526524976617e-06,
      "loss": 0.8257,
      "step": 5713
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.35492196679115295,
      "learning_rate": 4.174734965205873e-06,
      "loss": 0.7653,
      "step": 5714
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.43358245491981506,
      "learning_rate": 4.159969006520259e-06,
      "loss": 0.799,
      "step": 5715
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.5809074640274048,
      "learning_rate": 4.145228652864675e-06,
      "loss": 0.6905,
      "step": 5716
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.31807446479797363,
      "learning_rate": 4.1305139081772244e-06,
      "loss": 0.985,
      "step": 5717
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2694711685180664,
      "learning_rate": 4.115824776389188e-06,
      "loss": 0.7411,
      "step": 5718
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.4062570631504059,
      "learning_rate": 4.101161261424935e-06,
      "loss": 0.8587,
      "step": 5719
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.18123850226402283,
      "learning_rate": 4.086523367202044e-06,
      "loss": 0.7397,
      "step": 5720
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.19504152238368988,
      "learning_rate": 4.071911097631265e-06,
      "loss": 0.508,
      "step": 5721
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.7174710035324097,
      "learning_rate": 4.05732445661644e-06,
      "loss": 0.9322,
      "step": 5722
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.38628795742988586,
      "learning_rate": 4.042763448054609e-06,
      "loss": 0.7183,
      "step": 5723
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.22866326570510864,
      "learning_rate": 4.028228075835961e-06,
      "loss": 0.766,
      "step": 5724
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.3348078727722168,
      "learning_rate": 4.01371834384382e-06,
      "loss": 0.8028,
      "step": 5725
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.17400968074798584,
      "learning_rate": 3.999234255954665e-06,
      "loss": 1.0281,
      "step": 5726
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.21070103347301483,
      "learning_rate": 3.984775816038133e-06,
      "loss": 0.8282,
      "step": 5727
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.23684203624725342,
      "learning_rate": 3.970343027957013e-06,
      "loss": 0.731,
      "step": 5728
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.24143770337104797,
      "learning_rate": 3.955935895567209e-06,
      "loss": 0.6749,
      "step": 5729
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.34173277020454407,
      "learning_rate": 3.941554422717797e-06,
      "loss": 0.8607,
      "step": 5730
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.23197099566459656,
      "learning_rate": 3.927198613251004e-06,
      "loss": 0.6283,
      "step": 5731
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.6072430610656738,
      "learning_rate": 3.912868471002173e-06,
      "loss": 1.0467,
      "step": 5732
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.23266245424747467,
      "learning_rate": 3.898563999799809e-06,
      "loss": 0.8629,
      "step": 5733
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.40842679142951965,
      "learning_rate": 3.884285203465565e-06,
      "loss": 1.01,
      "step": 5734
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.28638187050819397,
      "learning_rate": 3.870032085814224e-06,
      "loss": 0.882,
      "step": 5735
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.29717209935188293,
      "learning_rate": 3.855804650653694e-06,
      "loss": 0.6952,
      "step": 5736
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.3302818536758423,
      "learning_rate": 3.841602901785057e-06,
      "loss": 0.8155,
      "step": 5737
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.24270743131637573,
      "learning_rate": 3.827426843002513e-06,
      "loss": 0.8511,
      "step": 5738
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.2643081843852997,
      "learning_rate": 3.8132764780933748e-06,
      "loss": 0.9032,
      "step": 5739
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.22216922044754028,
      "learning_rate": 3.7991518108381195e-06,
      "loss": 0.7191,
      "step": 5740
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.21692974865436554,
      "learning_rate": 3.785052845010384e-06,
      "loss": 0.7619,
      "step": 5741
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.8720180988311768,
      "learning_rate": 3.7709795843768657e-06,
      "loss": 0.8732,
      "step": 5742
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.21340583264827728,
      "learning_rate": 3.7569320326974687e-06,
      "loss": 0.7384,
      "step": 5743
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.2779400944709778,
      "learning_rate": 3.742910193725191e-06,
      "loss": 0.8653,
      "step": 5744
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.31260624527931213,
      "learning_rate": 3.7289140712061575e-06,
      "loss": 1.0074,
      "step": 5745
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.2551622986793518,
      "learning_rate": 3.7149436688796223e-06,
      "loss": 1.011,
      "step": 5746
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.3649458885192871,
      "learning_rate": 3.7009989904779885e-06,
      "loss": 0.8291,
      "step": 5747
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.3248983323574066,
      "learning_rate": 3.687080039726798e-06,
      "loss": 0.9846,
      "step": 5748
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.283128023147583,
      "learning_rate": 3.673186820344654e-06,
      "loss": 0.8902,
      "step": 5749
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.1934070587158203,
      "learning_rate": 3.6593193360433652e-06,
      "loss": 0.764,
      "step": 5750
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.7009592056274414,
      "learning_rate": 3.645477590527813e-06,
      "loss": 0.6932,
      "step": 5751
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.1896420270204544,
      "learning_rate": 3.6316615874960047e-06,
      "loss": 0.7804,
      "step": 5752
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.3465903699398041,
      "learning_rate": 3.617871330639089e-06,
      "loss": 0.8941,
      "step": 5753
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.22749720513820648,
      "learning_rate": 3.604106823641351e-06,
      "loss": 0.8814,
      "step": 5754
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.3059869408607483,
      "learning_rate": 3.590368070180139e-06,
      "loss": 0.8884,
      "step": 5755
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.288194864988327,
      "learning_rate": 3.576655073926005e-06,
      "loss": 0.6222,
      "step": 5756
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.3237980008125305,
      "learning_rate": 3.562967838542519e-06,
      "loss": 0.7236,
      "step": 5757
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.2161843478679657,
      "learning_rate": 3.5493063676864448e-06,
      "loss": 0.7687,
      "step": 5758
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.15999285876750946,
      "learning_rate": 3.535670665007662e-06,
      "loss": 0.6981,
      "step": 5759
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.2739187777042389,
      "learning_rate": 3.5220607341490907e-06,
      "loss": 0.9999,
      "step": 5760
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.2858325242996216,
      "learning_rate": 3.5084765787468776e-06,
      "loss": 0.7569,
      "step": 5761
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.27556097507476807,
      "learning_rate": 3.494918202430164e-06,
      "loss": 0.8235,
      "step": 5762
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.29056328535079956,
      "learning_rate": 3.4813856088213083e-06,
      "loss": 0.6686,
      "step": 5763
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.5968664884567261,
      "learning_rate": 3.4678788015357178e-06,
      "loss": 0.7612,
      "step": 5764
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.16723081469535828,
      "learning_rate": 3.4543977841819066e-06,
      "loss": 0.7394,
      "step": 5765
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.19752207398414612,
      "learning_rate": 3.440942560361571e-06,
      "loss": 0.7126,
      "step": 5766
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.2873706519603729,
      "learning_rate": 3.4275131336694465e-06,
      "loss": 0.9037,
      "step": 5767
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.34249961376190186,
      "learning_rate": 3.4141095076933527e-06,
      "loss": 0.8651,
      "step": 5768
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.24648211896419525,
      "learning_rate": 3.4007316860143245e-06,
      "loss": 0.8751,
      "step": 5769
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.4557269215583801,
      "learning_rate": 3.387379672206403e-06,
      "loss": 0.7076,
      "step": 5770
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.37243157625198364,
      "learning_rate": 3.3740534698367687e-06,
      "loss": 1.1158,
      "step": 5771
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.43207189440727234,
      "learning_rate": 3.3607530824657173e-06,
      "loss": 0.7728,
      "step": 5772
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.24852634966373444,
      "learning_rate": 3.347478513646618e-06,
      "loss": 0.7248,
      "step": 5773
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.3268473148345947,
      "learning_rate": 3.334229766925989e-06,
      "loss": 1.0624,
      "step": 5774
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.2616529166698456,
      "learning_rate": 3.3210068458434e-06,
      "loss": 0.8468,
      "step": 5775
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.29007771611213684,
      "learning_rate": 3.3078097539315567e-06,
      "loss": 0.8177,
      "step": 5776
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.31077826023101807,
      "learning_rate": 3.29463849471624e-06,
      "loss": 0.8809,
      "step": 5777
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.3159696161746979,
      "learning_rate": 3.281493071716324e-06,
      "loss": 1.0382,
      "step": 5778
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.20573295652866364,
      "learning_rate": 3.2683734884438434e-06,
      "loss": 0.9023,
      "step": 5779
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.865585446357727,
      "learning_rate": 3.255279748403839e-06,
      "loss": 0.7662,
      "step": 5780
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.8608042597770691,
      "learning_rate": 3.2422118550945013e-06,
      "loss": 1.1045,
      "step": 5781
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.44358012080192566,
      "learning_rate": 3.2291698120071156e-06,
      "loss": 0.6685,
      "step": 5782
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.339722603559494,
      "learning_rate": 3.216153622626039e-06,
      "loss": 0.8014,
      "step": 5783
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.5060225129127502,
      "learning_rate": 3.2031632904287233e-06,
      "loss": 0.8687,
      "step": 5784
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.3167794644832611,
      "learning_rate": 3.190198818885759e-06,
      "loss": 0.8827,
      "step": 5785
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.20951835811138153,
      "learning_rate": 3.1772602114607307e-06,
      "loss": 0.6224,
      "step": 5786
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.5686812400817871,
      "learning_rate": 3.1643474716104184e-06,
      "loss": 0.5814,
      "step": 5787
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.22048930823802948,
      "learning_rate": 3.151460602784617e-06,
      "loss": 0.8719,
      "step": 5788
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.3882722854614258,
      "learning_rate": 3.1385996084262737e-06,
      "loss": 0.8983,
      "step": 5789
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.1274283081293106,
      "learning_rate": 3.125764491971339e-06,
      "loss": 0.8877,
      "step": 5790
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.21492509543895721,
      "learning_rate": 3.112955256848926e-06,
      "loss": 0.789,
      "step": 5791
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.250997930765152,
      "learning_rate": 3.1001719064812087e-06,
      "loss": 0.8891,
      "step": 5792
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.592142641544342,
      "learning_rate": 3.0874144442834208e-06,
      "loss": 0.9153,
      "step": 5793
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.22549158334732056,
      "learning_rate": 3.0746828736639146e-06,
      "loss": 0.7634,
      "step": 5794
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.12992359697818756,
      "learning_rate": 3.061977198024113e-06,
      "loss": 0.8469,
      "step": 5795
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.25036072731018066,
      "learning_rate": 3.049297420758501e-06,
      "loss": 0.6392,
      "step": 5796
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.19581259787082672,
      "learning_rate": 3.0366435452546695e-06,
      "loss": 0.7728,
      "step": 5797
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.21969638764858246,
      "learning_rate": 3.024015574893291e-06,
      "loss": 0.8161,
      "step": 5798
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.3392452299594879,
      "learning_rate": 3.0114135130481113e-06,
      "loss": 0.9542,
      "step": 5799
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.21718814969062805,
      "learning_rate": 2.998837363085927e-06,
      "loss": 0.9148,
      "step": 5800
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.22719916701316833,
      "learning_rate": 2.9862871283666492e-06,
      "loss": 0.7788,
      "step": 5801
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.32227182388305664,
      "learning_rate": 2.9737628122432746e-06,
      "loss": 0.852,
      "step": 5802
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.794320821762085,
      "learning_rate": 2.9612644180618044e-06,
      "loss": 1.0177,
      "step": 5803
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.19476979970932007,
      "learning_rate": 2.9487919491614004e-06,
      "loss": 0.6971,
      "step": 5804
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.2703760266304016,
      "learning_rate": 2.9363454088742525e-06,
      "loss": 0.864,
      "step": 5805
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.3237552046775818,
      "learning_rate": 2.9239248005256126e-06,
      "loss": 0.8779,
      "step": 5806
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.690328061580658,
      "learning_rate": 2.9115301274338593e-06,
      "loss": 0.6842,
      "step": 5807
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.5588569045066833,
      "learning_rate": 2.899161392910377e-06,
      "loss": 0.6959,
      "step": 5808
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.34181344509124756,
      "learning_rate": 2.886818600259655e-06,
      "loss": 0.8097,
      "step": 5809
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.3264749050140381,
      "learning_rate": 2.8745017527792464e-06,
      "loss": 1.1079,
      "step": 5810
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.2902888357639313,
      "learning_rate": 2.8622108537597726e-06,
      "loss": 0.9159,
      "step": 5811
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.21798427402973175,
      "learning_rate": 2.849945906484941e-06,
      "loss": 0.7204,
      "step": 5812
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.2640264928340912,
      "learning_rate": 2.837706914231475e-06,
      "loss": 0.8017,
      "step": 5813
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.2665356695652008,
      "learning_rate": 2.8254938802692143e-06,
      "loss": 0.9106,
      "step": 5814
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.2612767517566681,
      "learning_rate": 2.8133068078610603e-06,
      "loss": 0.9783,
      "step": 5815
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.1694793999195099,
      "learning_rate": 2.8011457002629194e-06,
      "loss": 0.703,
      "step": 5816
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.6863704919815063,
      "learning_rate": 2.789010560723848e-06,
      "loss": 0.5503,
      "step": 5817
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.21580663323402405,
      "learning_rate": 2.776901392485898e-06,
      "loss": 0.8593,
      "step": 5818
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.2762939929962158,
      "learning_rate": 2.7648181987842025e-06,
      "loss": 0.7199,
      "step": 5819
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.16166605055332184,
      "learning_rate": 2.7527609828469803e-06,
      "loss": 0.8252,
      "step": 5820
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.19718974828720093,
      "learning_rate": 2.7407297478954763e-06,
      "loss": 0.8001,
      "step": 5821
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.2860226333141327,
      "learning_rate": 2.7287244971440084e-06,
      "loss": 0.8073,
      "step": 5822
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.20529775321483612,
      "learning_rate": 2.7167452337999555e-06,
      "loss": 0.6452,
      "step": 5823
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.6601143479347229,
      "learning_rate": 2.704791961063724e-06,
      "loss": 0.6712,
      "step": 5824
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.2592531144618988,
      "learning_rate": 2.692864682128837e-06,
      "loss": 0.6508,
      "step": 5825
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.1776047796010971,
      "learning_rate": 2.6809634001818127e-06,
      "loss": 0.6815,
      "step": 5826
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.23327355086803436,
      "learning_rate": 2.669088118402241e-06,
      "loss": 0.6629,
      "step": 5827
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.3891962170600891,
      "learning_rate": 2.6572388399628055e-06,
      "loss": 1.0346,
      "step": 5828
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.5746845602989197,
      "learning_rate": 2.6454155680291746e-06,
      "loss": 0.9271,
      "step": 5829
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.241797536611557,
      "learning_rate": 2.6336183057601328e-06,
      "loss": 0.6481,
      "step": 5830
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.20126686990261078,
      "learning_rate": 2.621847056307469e-06,
      "loss": 0.6137,
      "step": 5831
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.4356425702571869,
      "learning_rate": 2.6101018228160466e-06,
      "loss": 1.0338,
      "step": 5832
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.45642587542533875,
      "learning_rate": 2.5983826084237663e-06,
      "loss": 0.9853,
      "step": 5833
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.5929207801818848,
      "learning_rate": 2.5866894162615895e-06,
      "loss": 0.838,
      "step": 5834
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.14973033964633942,
      "learning_rate": 2.575022249453518e-06,
      "loss": 0.7281,
      "step": 5835
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7359582185745239,
      "learning_rate": 2.563381111116614e-06,
      "loss": 0.8467,
      "step": 5836
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.1624535322189331,
      "learning_rate": 2.5517660043609447e-06,
      "loss": 0.864,
      "step": 5837
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.3476593792438507,
      "learning_rate": 2.540176932289662e-06,
      "loss": 0.9874,
      "step": 5838
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.2715587913990021,
      "learning_rate": 2.528613897998966e-06,
      "loss": 0.975,
      "step": 5839
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.6005218625068665,
      "learning_rate": 2.517076904578075e-06,
      "loss": 0.8666,
      "step": 5840
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.21200431883335114,
      "learning_rate": 2.505565955109268e-06,
      "loss": 0.7009,
      "step": 5841
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.294223815202713,
      "learning_rate": 2.4940810526678404e-06,
      "loss": 0.7462,
      "step": 5842
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.28931063413619995,
      "learning_rate": 2.4826222003221823e-06,
      "loss": 0.7727,
      "step": 5843
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.14645978808403015,
      "learning_rate": 2.4711894011336556e-06,
      "loss": 0.7126,
      "step": 5844
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.17274974286556244,
      "learning_rate": 2.4597826581567063e-06,
      "loss": 0.7126,
      "step": 5845
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.3397405743598938,
      "learning_rate": 2.448401974438819e-06,
      "loss": 0.9319,
      "step": 5846
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.19471725821495056,
      "learning_rate": 2.437047353020483e-06,
      "loss": 0.8149,
      "step": 5847
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.21125808358192444,
      "learning_rate": 2.4257187969352725e-06,
      "loss": 0.7885,
      "step": 5848
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.49164387583732605,
      "learning_rate": 2.414416309209755e-06,
      "loss": 0.7636,
      "step": 5849
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.23492786288261414,
      "learning_rate": 2.4031398928635596e-06,
      "loss": 0.8913,
      "step": 5850
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.5489408373832703,
      "learning_rate": 2.391889550909343e-06,
      "loss": 0.7878,
      "step": 5851
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.20061244070529938,
      "learning_rate": 2.380665286352779e-06,
      "loss": 0.7964,
      "step": 5852
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.27575811743736267,
      "learning_rate": 2.369467102192624e-06,
      "loss": 0.8457,
      "step": 5853
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.3701903820037842,
      "learning_rate": 2.3582950014205962e-06,
      "loss": 0.8986,
      "step": 5854
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.19868916273117065,
      "learning_rate": 2.3471489870214857e-06,
      "loss": 0.6234,
      "step": 5855
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.30066919326782227,
      "learning_rate": 2.336029061973144e-06,
      "loss": 0.9775,
      "step": 5856
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.23867961764335632,
      "learning_rate": 2.324935229246372e-06,
      "loss": 0.7759,
      "step": 5857
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.6096466183662415,
      "learning_rate": 2.313867491805066e-06,
      "loss": 0.8386,
      "step": 5858
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.3037331998348236,
      "learning_rate": 2.30282585260615e-06,
      "loss": 0.7525,
      "step": 5859
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.17903439700603485,
      "learning_rate": 2.2918103145995187e-06,
      "loss": 0.6613,
      "step": 5860
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2236505150794983,
      "learning_rate": 2.2808208807281406e-06,
      "loss": 1.0313,
      "step": 5861
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.3076213002204895,
      "learning_rate": 2.269857553928012e-06,
      "loss": 0.7019,
      "step": 5862
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2116139680147171,
      "learning_rate": 2.258920337128134e-06,
      "loss": 0.7609,
      "step": 5863
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.31488707661628723,
      "learning_rate": 2.2480092332505365e-06,
      "loss": 0.8088,
      "step": 5864
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.3801177740097046,
      "learning_rate": 2.237124245210287e-06,
      "loss": 0.8847,
      "step": 5865
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6528475284576416,
      "learning_rate": 2.2262653759154707e-06,
      "loss": 0.8769,
      "step": 5866
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.39753201603889465,
      "learning_rate": 2.2154326282671557e-06,
      "loss": 0.7283,
      "step": 5867
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.18691720068454742,
      "learning_rate": 2.2046260051594936e-06,
      "loss": 0.7913,
      "step": 5868
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.29483118653297424,
      "learning_rate": 2.1938455094796306e-06,
      "loss": 0.8404,
      "step": 5869
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.29062068462371826,
      "learning_rate": 2.1830911441076964e-06,
      "loss": 0.8392,
      "step": 5870
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.173582524061203,
      "learning_rate": 2.1723629119169144e-06,
      "loss": 0.7653,
      "step": 5871
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.3449835777282715,
      "learning_rate": 2.1616608157734807e-06,
      "loss": 0.5466,
      "step": 5872
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.20275075733661652,
      "learning_rate": 2.1509848585365756e-06,
      "loss": 0.7273,
      "step": 5873
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.21280492842197418,
      "learning_rate": 2.140335043058461e-06,
      "loss": 0.868,
      "step": 5874
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2758726477622986,
      "learning_rate": 2.129711372184384e-06,
      "loss": 0.6197,
      "step": 5875
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.7206146717071533,
      "learning_rate": 2.1191138487526074e-06,
      "loss": 0.6846,
      "step": 5876
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.16676786541938782,
      "learning_rate": 2.1085424755944107e-06,
      "loss": 0.7759,
      "step": 5877
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.21295808255672455,
      "learning_rate": 2.09799725553409e-06,
      "loss": 0.6313,
      "step": 5878
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2940179109573364,
      "learning_rate": 2.0874781913889585e-06,
      "loss": 0.6563,
      "step": 5879
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.32014453411102295,
      "learning_rate": 2.076985285969302e-06,
      "loss": 0.9167,
      "step": 5880
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6985118985176086,
      "learning_rate": 2.0665185420784884e-06,
      "loss": 0.9654,
      "step": 5881
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.23788069188594818,
      "learning_rate": 2.056077962512837e-06,
      "loss": 1.0451,
      "step": 5882
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.21587949991226196,
      "learning_rate": 2.045663550061694e-06,
      "loss": 0.6956,
      "step": 5883
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9039804339408875,
      "learning_rate": 2.035275307507434e-06,
      "loss": 0.734,
      "step": 5884
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.15799863636493683,
      "learning_rate": 2.0249132376254143e-06,
      "loss": 0.8633,
      "step": 5885
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.24769070744514465,
      "learning_rate": 2.0145773431840097e-06,
      "loss": 0.7867,
      "step": 5886
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.23628921806812286,
      "learning_rate": 2.0042676269446113e-06,
      "loss": 0.7535,
      "step": 5887
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.18058623373508453,
      "learning_rate": 1.9939840916615826e-06,
      "loss": 0.6209,
      "step": 5888
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.3020934760570526,
      "learning_rate": 1.983726740082348e-06,
      "loss": 0.7887,
      "step": 5889
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2862851023674011,
      "learning_rate": 1.9734955749472815e-06,
      "loss": 0.6662,
      "step": 5890
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.28719040751457214,
      "learning_rate": 1.9632905989897867e-06,
      "loss": 0.7196,
      "step": 5891
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.21182851493358612,
      "learning_rate": 1.9531118149362813e-06,
      "loss": 0.8554,
      "step": 5892
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.4211932420730591,
      "learning_rate": 1.9429592255061577e-06,
      "loss": 0.6041,
      "step": 5893
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.19868524372577667,
      "learning_rate": 1.932832833411846e-06,
      "loss": 0.6996,
      "step": 5894
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.31496262550354004,
      "learning_rate": 1.9227326413587265e-06,
      "loss": 0.9401,
      "step": 5895
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.5633779168128967,
      "learning_rate": 1.9126586520452293e-06,
      "loss": 0.6755,
      "step": 5896
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2141513228416443,
      "learning_rate": 1.9026108681627686e-06,
      "loss": 0.7332,
      "step": 5897
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.27947309613227844,
      "learning_rate": 1.8925892923957412e-06,
      "loss": 0.9855,
      "step": 5898
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.21100209653377533,
      "learning_rate": 1.882593927421561e-06,
      "loss": 0.6647,
      "step": 5899
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.22962647676467896,
      "learning_rate": 1.8726247759106253e-06,
      "loss": 0.9913,
      "step": 5900
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.18635189533233643,
      "learning_rate": 1.862681840526337e-06,
      "loss": 0.7157,
      "step": 5901
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2851431369781494,
      "learning_rate": 1.8527651239250933e-06,
      "loss": 0.8757,
      "step": 5902
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.20391486585140228,
      "learning_rate": 1.842874628756286e-06,
      "loss": 0.9513,
      "step": 5903
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.24027159810066223,
      "learning_rate": 1.8330103576623125e-06,
      "loss": 0.6681,
      "step": 5904
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.20232859253883362,
      "learning_rate": 1.8231723132785538e-06,
      "loss": 0.71,
      "step": 5905
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.20910175144672394,
      "learning_rate": 1.8133604982333408e-06,
      "loss": 0.8597,
      "step": 5906
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.40013387799263,
      "learning_rate": 1.8035749151480986e-06,
      "loss": 0.9311,
      "step": 5907
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.31685954332351685,
      "learning_rate": 1.793815566637147e-06,
      "loss": 0.8602,
      "step": 5908
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.24497318267822266,
      "learning_rate": 1.784082455307856e-06,
      "loss": 0.7538,
      "step": 5909
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.23150990903377533,
      "learning_rate": 1.774375583760557e-06,
      "loss": 0.9232,
      "step": 5910
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.5644747018814087,
      "learning_rate": 1.764694954588575e-06,
      "loss": 0.8345,
      "step": 5911
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2838149666786194,
      "learning_rate": 1.7550405703782302e-06,
      "loss": 0.7665,
      "step": 5912
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.31105250120162964,
      "learning_rate": 1.7454124337088373e-06,
      "loss": 0.9541,
      "step": 5913
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.27102914452552795,
      "learning_rate": 1.735810547152672e-06,
      "loss": 0.8134,
      "step": 5914
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2925817668437958,
      "learning_rate": 1.7262349132750377e-06,
      "loss": 0.7574,
      "step": 5915
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.20371541380882263,
      "learning_rate": 1.716685534634177e-06,
      "loss": 0.6868,
      "step": 5916
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2584799826145172,
      "learning_rate": 1.7071624137813712e-06,
      "loss": 0.7283,
      "step": 5917
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2639150023460388,
      "learning_rate": 1.69766555326083e-06,
      "loss": 0.8555,
      "step": 5918
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.23047079145908356,
      "learning_rate": 1.6881949556097898e-06,
      "loss": 0.7494,
      "step": 5919
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.20774497091770172,
      "learning_rate": 1.6787506233584604e-06,
      "loss": 0.7025,
      "step": 5920
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2413378208875656,
      "learning_rate": 1.6693325590300234e-06,
      "loss": 1.0426,
      "step": 5921
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2676883041858673,
      "learning_rate": 1.6599407651406328e-06,
      "loss": 0.7439,
      "step": 5922
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.2855021059513092,
      "learning_rate": 1.6505752441994704e-06,
      "loss": 0.7054,
      "step": 5923
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.22507837414741516,
      "learning_rate": 1.6412359987086455e-06,
      "loss": 0.8611,
      "step": 5924
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.3045470714569092,
      "learning_rate": 1.6319230311632849e-06,
      "loss": 0.8179,
      "step": 5925
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.42898258566856384,
      "learning_rate": 1.6226363440514647e-06,
      "loss": 0.8832,
      "step": 5926
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.28866854310035706,
      "learning_rate": 1.613375939854278e-06,
      "loss": 0.8324,
      "step": 5927
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.20499300956726074,
      "learning_rate": 1.6041418210457571e-06,
      "loss": 0.8183,
      "step": 5928
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.267655611038208,
      "learning_rate": 1.5949339900929282e-06,
      "loss": 1.1495,
      "step": 5929
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.324984073638916,
      "learning_rate": 1.5857524494558019e-06,
      "loss": 0.7437,
      "step": 5930
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.3198508322238922,
      "learning_rate": 1.5765972015873487e-06,
      "loss": 0.8975,
      "step": 5931
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.2788533866405487,
      "learning_rate": 1.5674682489335345e-06,
      "loss": 0.8324,
      "step": 5932
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.20544859766960144,
      "learning_rate": 1.5583655939332863e-06,
      "loss": 0.6856,
      "step": 5933
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.27338194847106934,
      "learning_rate": 1.5492892390184922e-06,
      "loss": 0.8908,
      "step": 5934
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.5944815278053284,
      "learning_rate": 1.5402391866140565e-06,
      "loss": 0.7324,
      "step": 5935
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.5348771810531616,
      "learning_rate": 1.5312154391378119e-06,
      "loss": 0.5632,
      "step": 5936
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.263348788022995,
      "learning_rate": 1.522217999000597e-06,
      "loss": 0.8,
      "step": 5937
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.3296310305595398,
      "learning_rate": 1.5132468686061774e-06,
      "loss": 0.8833,
      "step": 5938
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.46476298570632935,
      "learning_rate": 1.5043020503513471e-06,
      "loss": 0.6753,
      "step": 5939
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.23412485420703888,
      "learning_rate": 1.4953835466258281e-06,
      "loss": 0.8514,
      "step": 5940
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.31697288155555725,
      "learning_rate": 1.4864913598123253e-06,
      "loss": 0.6587,
      "step": 5941
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.3447577655315399,
      "learning_rate": 1.4776254922865163e-06,
      "loss": 0.9844,
      "step": 5942
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.2639293968677521,
      "learning_rate": 1.4687859464170505e-06,
      "loss": 0.6649,
      "step": 5943
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.40224921703338623,
      "learning_rate": 1.459972724565517e-06,
      "loss": 0.8347,
      "step": 5944
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.33145400881767273,
      "learning_rate": 1.4511858290865322e-06,
      "loss": 0.6916,
      "step": 5945
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.22756345570087433,
      "learning_rate": 1.4424252623276068e-06,
      "loss": 0.568,
      "step": 5946
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.2563188374042511,
      "learning_rate": 1.433691026629247e-06,
      "loss": 0.7574,
      "step": 5947
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.2296551913022995,
      "learning_rate": 1.4249831243249522e-06,
      "loss": 0.9253,
      "step": 5948
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.25800663232803345,
      "learning_rate": 1.416301557741151e-06,
      "loss": 0.8106,
      "step": 5949
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.2554468512535095,
      "learning_rate": 1.4076463291972542e-06,
      "loss": 0.8978,
      "step": 5950
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.24968041479587555,
      "learning_rate": 1.3990174410056234e-06,
      "loss": 0.7515,
      "step": 5951
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.23153209686279297,
      "learning_rate": 1.3904148954715811e-06,
      "loss": 0.9275,
      "step": 5952
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.44120264053344727,
      "learning_rate": 1.3818386948934447e-06,
      "loss": 0.8161,
      "step": 5953
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.16861969232559204,
      "learning_rate": 1.3732888415624368e-06,
      "loss": 0.8985,
      "step": 5954
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.17123784124851227,
      "learning_rate": 1.3647653377627968e-06,
      "loss": 0.9292,
      "step": 5955
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.33694422245025635,
      "learning_rate": 1.3562681857716918e-06,
      "loss": 0.695,
      "step": 5956
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.45563340187072754,
      "learning_rate": 1.347797387859251e-06,
      "loss": 0.7653,
      "step": 5957
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.36162278056144714,
      "learning_rate": 1.3393529462885856e-06,
      "loss": 1.007,
      "step": 5958
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.18599432706832886,
      "learning_rate": 1.3309348633157247e-06,
      "loss": 0.8643,
      "step": 5959
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.36138421297073364,
      "learning_rate": 1.3225431411896915e-06,
      "loss": 0.937,
      "step": 5960
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.16642342507839203,
      "learning_rate": 1.3141777821524703e-06,
      "loss": 0.6827,
      "step": 5961
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.672707200050354,
      "learning_rate": 1.3058387884389623e-06,
      "loss": 0.7631,
      "step": 5962
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.35908234119415283,
      "learning_rate": 1.2975261622770519e-06,
      "loss": 0.7759,
      "step": 5963
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.19751128554344177,
      "learning_rate": 1.2892399058875848e-06,
      "loss": 0.9004,
      "step": 5964
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.36394333839416504,
      "learning_rate": 1.2809800214843459e-06,
      "loss": 0.7695,
      "step": 5965
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.24150350689888,
      "learning_rate": 1.2727465112740922e-06,
      "loss": 0.8352,
      "step": 5966
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.29997822642326355,
      "learning_rate": 1.2645393774564973e-06,
      "loss": 0.9487,
      "step": 5967
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.21333152055740356,
      "learning_rate": 1.2563586222242517e-06,
      "loss": 0.9877,
      "step": 5968
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.35816720128059387,
      "learning_rate": 1.2482042477629296e-06,
      "loss": 0.7863,
      "step": 5969
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.41287845373153687,
      "learning_rate": 1.2400762562510881e-06,
      "loss": 0.9513,
      "step": 5970
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.14952696859836578,
      "learning_rate": 1.231974649860268e-06,
      "loss": 0.815,
      "step": 5971
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.18521910905838013,
      "learning_rate": 1.2238994307548934e-06,
      "loss": 0.651,
      "step": 5972
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.17057904601097107,
      "learning_rate": 1.215850601092383e-06,
      "loss": 0.7386,
      "step": 5973
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.8067764043807983,
      "learning_rate": 1.207828163023117e-06,
      "loss": 0.7781,
      "step": 5974
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.40521925687789917,
      "learning_rate": 1.1998321186903805e-06,
      "loss": 0.601,
      "step": 5975
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.3997703492641449,
      "learning_rate": 1.1918624702304427e-06,
      "loss": 0.8041,
      "step": 5976
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.508115291595459,
      "learning_rate": 1.1839192197725001e-06,
      "loss": 0.5157,
      "step": 5977
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.41677743196487427,
      "learning_rate": 1.1760023694387113e-06,
      "loss": 0.7297,
      "step": 5978
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.22085434198379517,
      "learning_rate": 1.1681119213441726e-06,
      "loss": 0.8519,
      "step": 5979
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.2570361793041229,
      "learning_rate": 1.1602478775969317e-06,
      "loss": 0.8787,
      "step": 5980
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.158119797706604,
      "learning_rate": 1.1524102402979852e-06,
      "loss": 0.8587,
      "step": 5981
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.24677011370658875,
      "learning_rate": 1.1445990115412586e-06,
      "loss": 0.7339,
      "step": 5982
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.3531719148159027,
      "learning_rate": 1.136814193413649e-06,
      "loss": 0.9061,
      "step": 5983
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.2146756798028946,
      "learning_rate": 1.1290557879949594e-06,
      "loss": 0.7146,
      "step": 5984
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.24785973131656647,
      "learning_rate": 1.121323797357976e-06,
      "loss": 0.7864,
      "step": 5985
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.5951249599456787,
      "learning_rate": 1.1136182235684023e-06,
      "loss": 0.7043,
      "step": 5986
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.26488032937049866,
      "learning_rate": 1.1059390686848915e-06,
      "loss": 0.6578,
      "step": 5987
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2726629674434662,
      "learning_rate": 1.0982863347590467e-06,
      "loss": 0.7752,
      "step": 5988
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2584904730319977,
      "learning_rate": 1.090660023835388e-06,
      "loss": 0.6548,
      "step": 5989
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.5480527281761169,
      "learning_rate": 1.0830601379514194e-06,
      "loss": 0.5192,
      "step": 5990
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.41434454917907715,
      "learning_rate": 1.0754866791375384e-06,
      "loss": 0.9661,
      "step": 5991
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.3605526089668274,
      "learning_rate": 1.0679396494171156e-06,
      "loss": 0.9347,
      "step": 5992
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2350638210773468,
      "learning_rate": 1.0604190508064272e-06,
      "loss": 0.7883,
      "step": 5993
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.3989630937576294,
      "learning_rate": 1.0529248853147323e-06,
      "loss": 0.9391,
      "step": 5994
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.30576831102371216,
      "learning_rate": 1.0454571549441849e-06,
      "loss": 0.846,
      "step": 5995
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.19326728582382202,
      "learning_rate": 1.0380158616899116e-06,
      "loss": 0.6281,
      "step": 5996
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.18209590017795563,
      "learning_rate": 1.0306010075399442e-06,
      "loss": 0.5694,
      "step": 5997
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.22229573130607605,
      "learning_rate": 1.0232125944752756e-06,
      "loss": 0.8165,
      "step": 5998
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2084396481513977,
      "learning_rate": 1.0158506244698273e-06,
      "loss": 0.8693,
      "step": 5999
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.37782716751098633,
      "learning_rate": 1.0085150994904592e-06,
      "loss": 1.0035,
      "step": 6000
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.3085147440433502,
      "learning_rate": 1.0012060214969476e-06,
      "loss": 0.7097,
      "step": 6001
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.605918824672699,
      "learning_rate": 9.9392339244202e-07,
      "loss": 0.7413,
      "step": 6002
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.19640785455703735,
      "learning_rate": 9.866672142713418e-07,
      "loss": 1.0711,
      "step": 6003
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.6342986822128296,
      "learning_rate": 9.794374889234958e-07,
      "loss": 0.9149,
      "step": 6004
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.24689407646656036,
      "learning_rate": 9.722342183300149e-07,
      "loss": 1.1282,
      "step": 6005
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.20738601684570312,
      "learning_rate": 9.650574044153483e-07,
      "loss": 0.7686,
      "step": 6006
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.3210364878177643,
      "learning_rate": 9.579070490968955e-07,
      "loss": 0.9197,
      "step": 6007
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.6366755962371826,
      "learning_rate": 9.50783154284951e-07,
      "loss": 1.045,
      "step": 6008
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2531128525733948,
      "learning_rate": 9.436857218827922e-07,
      "loss": 0.6042,
      "step": 6009
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.4710052013397217,
      "learning_rate": 9.36614753786591e-07,
      "loss": 1.1869,
      "step": 6010
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.3764288127422333,
      "learning_rate": 9.295702518854476e-07,
      "loss": 0.9501,
      "step": 6011
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.8350560069084167,
      "learning_rate": 9.225522180614121e-07,
      "loss": 0.799,
      "step": 6012
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.4955502450466156,
      "learning_rate": 9.155606541894513e-07,
      "loss": 0.7002,
      "step": 6013
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2795802354812622,
      "learning_rate": 9.085955621374598e-07,
      "loss": 0.5656,
      "step": 6014
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2926497459411621,
      "learning_rate": 9.016569437662492e-07,
      "loss": 0.9496,
      "step": 6015
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.27332183718681335,
      "learning_rate": 8.947448009295812e-07,
      "loss": 0.7638,
      "step": 6016
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2963472306728363,
      "learning_rate": 8.87859135474145e-07,
      "loss": 0.8241,
      "step": 6017
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.3456336259841919,
      "learning_rate": 8.809999492395249e-07,
      "loss": 0.8996,
      "step": 6018
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2592528164386749,
      "learning_rate": 8.741672440582438e-07,
      "loss": 0.766,
      "step": 6019
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.25219377875328064,
      "learning_rate": 8.673610217557859e-07,
      "loss": 0.8297,
      "step": 6020
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.3172195851802826,
      "learning_rate": 8.605812841505078e-07,
      "loss": 0.6858,
      "step": 6021
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.6351509094238281,
      "learning_rate": 8.538280330537274e-07,
      "loss": 0.8551,
      "step": 6022
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.660088062286377,
      "learning_rate": 8.471012702696568e-07,
      "loss": 0.6304,
      "step": 6023
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2620913088321686,
      "learning_rate": 8.404009975954475e-07,
      "loss": 0.9006,
      "step": 6024
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.24987657368183136,
      "learning_rate": 8.337272168211895e-07,
      "loss": 0.8635,
      "step": 6025
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2333436757326126,
      "learning_rate": 8.270799297298681e-07,
      "loss": 0.6282,
      "step": 6026
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2935751676559448,
      "learning_rate": 8.204591380973958e-07,
      "loss": 0.9781,
      "step": 6027
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.38058769702911377,
      "learning_rate": 8.138648436926243e-07,
      "loss": 0.8102,
      "step": 6028
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2998814582824707,
      "learning_rate": 8.072970482773001e-07,
      "loss": 1.0801,
      "step": 6029
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.1770964115858078,
      "learning_rate": 8.007557536061083e-07,
      "loss": 0.8001,
      "step": 6030
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.15171897411346436,
      "learning_rate": 7.9424096142664e-07,
      "loss": 0.7099,
      "step": 6031
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.23406916856765747,
      "learning_rate": 7.877526734794361e-07,
      "loss": 0.8997,
      "step": 6032
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.33138927817344666,
      "learning_rate": 7.812908914979212e-07,
      "loss": 0.8611,
      "step": 6033
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.27427050471305847,
      "learning_rate": 7.748556172084475e-07,
      "loss": 0.8515,
      "step": 6034
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.33611276745796204,
      "learning_rate": 7.684468523303068e-07,
      "loss": 0.8184,
      "step": 6035
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.6454780697822571,
      "learning_rate": 7.620645985756847e-07,
      "loss": 0.9393,
      "step": 6036
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.27736255526542664,
      "learning_rate": 7.55708857649673e-07,
      "loss": 0.8132,
      "step": 6037
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.19003897905349731,
      "learning_rate": 7.493796312503354e-07,
      "loss": 0.767,
      "step": 6038
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.23193712532520294,
      "learning_rate": 7.430769210685751e-07,
      "loss": 0.7928,
      "step": 6039
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.28061506152153015,
      "learning_rate": 7.368007287882784e-07,
      "loss": 0.5441,
      "step": 6040
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.31741005182266235,
      "learning_rate": 7.305510560862039e-07,
      "loss": 0.8641,
      "step": 6041
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2687416672706604,
      "learning_rate": 7.243279046320605e-07,
      "loss": 0.767,
      "step": 6042
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2734261155128479,
      "learning_rate": 7.181312760884296e-07,
      "loss": 0.7648,
      "step": 6043
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.069840431213379,
      "learning_rate": 7.11961172110831e-07,
      "loss": 0.9231,
      "step": 6044
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9884225130081177,
      "learning_rate": 7.058175943477241e-07,
      "loss": 0.6598,
      "step": 6045
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.6914166808128357,
      "learning_rate": 6.99700544440407e-07,
      "loss": 1.0342,
      "step": 6046
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.1815134584903717,
      "learning_rate": 6.936100240231836e-07,
      "loss": 0.8165,
      "step": 6047
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.42613425850868225,
      "learning_rate": 6.875460347231855e-07,
      "loss": 0.7773,
      "step": 6048
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.45787715911865234,
      "learning_rate": 6.815085781605168e-07,
      "loss": 0.8484,
      "step": 6049
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.19389313459396362,
      "learning_rate": 6.754976559481652e-07,
      "loss": 0.8082,
      "step": 6050
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.24878183007240295,
      "learning_rate": 6.695132696920348e-07,
      "loss": 0.7549,
      "step": 6051
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.4540415108203888,
      "learning_rate": 6.635554209909245e-07,
      "loss": 0.8771,
      "step": 6052
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.2838927209377289,
      "learning_rate": 6.576241114365833e-07,
      "loss": 0.7371,
      "step": 6053
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.293276309967041,
      "learning_rate": 6.517193426136215e-07,
      "loss": 0.8644,
      "step": 6054
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.1642562597990036,
      "learning_rate": 6.458411160996103e-07,
      "loss": 0.924,
      "step": 6055
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.40296709537506104,
      "learning_rate": 6.399894334649714e-07,
      "loss": 1.0424,
      "step": 6056
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4261631965637207,
      "learning_rate": 6.341642962730765e-07,
      "loss": 0.8264,
      "step": 6057
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.4184379577636719,
      "learning_rate": 6.283657060802028e-07,
      "loss": 0.9563,
      "step": 6058
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.3132166266441345,
      "learning_rate": 6.225936644355224e-07,
      "loss": 0.8146,
      "step": 6059
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.1385408639907837,
      "learning_rate": 6.168481728811126e-07,
      "loss": 0.6648,
      "step": 6060
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.30579519271850586,
      "learning_rate": 6.11129232951968e-07,
      "loss": 0.868,
      "step": 6061
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.2061222940683365,
      "learning_rate": 6.054368461759774e-07,
      "loss": 0.544,
      "step": 6062
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.5566745400428772,
      "learning_rate": 5.997710140739577e-07,
      "loss": 1.0429,
      "step": 6063
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.2290748506784439,
      "learning_rate": 5.941317381595978e-07,
      "loss": 0.6997,
      "step": 6064
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.2018236666917801,
      "learning_rate": 5.885190199395263e-07,
      "loss": 0.8444,
      "step": 6065
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.2537176012992859,
      "learning_rate": 5.829328609132545e-07,
      "loss": 0.8409,
      "step": 6066
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.18149708211421967,
      "learning_rate": 5.773732625732109e-07,
      "loss": 0.6467,
      "step": 6067
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.24854719638824463,
      "learning_rate": 5.718402264047074e-07,
      "loss": 0.7054,
      "step": 6068
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.530846893787384,
      "learning_rate": 5.663337538859837e-07,
      "loss": 0.7947,
      "step": 6069
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.6498779654502869,
      "learning_rate": 5.608538464881741e-07,
      "loss": 0.7505,
      "step": 6070
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.3129858076572418,
      "learning_rate": 5.554005056753187e-07,
      "loss": 1.0223,
      "step": 6071
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.2334202229976654,
      "learning_rate": 5.499737329043298e-07,
      "loss": 0.588,
      "step": 6072
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.27373936772346497,
      "learning_rate": 5.445735296250698e-07,
      "loss": 0.8867,
      "step": 6073
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.28529247641563416,
      "learning_rate": 5.391998972802848e-07,
      "loss": 0.8073,
      "step": 6074
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.17607451975345612,
      "learning_rate": 5.338528373055929e-07,
      "loss": 0.8134,
      "step": 6075
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.2665923833847046,
      "learning_rate": 5.285323511295625e-07,
      "loss": 0.9364,
      "step": 6076
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.251571387052536,
      "learning_rate": 5.232384401736123e-07,
      "loss": 0.7341,
      "step": 6077
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.26900967955589294,
      "learning_rate": 5.179711058521109e-07,
      "loss": 1.1818,
      "step": 6078
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9304454922676086,
      "learning_rate": 5.127303495722879e-07,
      "loss": 0.6907,
      "step": 6079
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.2271474003791809,
      "learning_rate": 5.075161727342903e-07,
      "loss": 0.6669,
      "step": 6080
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.32021403312683105,
      "learning_rate": 5.023285767311592e-07,
      "loss": 0.8373,
      "step": 6081
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.3091064691543579,
      "learning_rate": 4.971675629488304e-07,
      "loss": 0.6564,
      "step": 6082
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.1863096058368683,
      "learning_rate": 4.920331327661453e-07,
      "loss": 0.8486,
      "step": 6083
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.4463178515434265,
      "learning_rate": 4.869252875548402e-07,
      "loss": 0.8593,
      "step": 6084
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.19975678622722626,
      "learning_rate": 4.818440286795456e-07,
      "loss": 0.7644,
      "step": 6085
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.2523190975189209,
      "learning_rate": 4.7678935749780885e-07,
      "loss": 0.5904,
      "step": 6086
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.28413674235343933,
      "learning_rate": 4.7176127536003866e-07,
      "loss": 0.8301,
      "step": 6087
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.34219247102737427,
      "learning_rate": 4.667597836095605e-07,
      "loss": 0.9974,
      "step": 6088
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.17452973127365112,
      "learning_rate": 4.6178488358260554e-07,
      "loss": 0.7712,
      "step": 6089
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.34090736508369446,
      "learning_rate": 4.568365766082661e-07,
      "loss": 0.4797,
      "step": 6090
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.2280740737915039,
      "learning_rate": 4.519148640085846e-07,
      "loss": 0.6887,
      "step": 6091
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.32016515731811523,
      "learning_rate": 4.470197470984427e-07,
      "loss": 0.9334,
      "step": 6092
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.30393362045288086,
      "learning_rate": 4.4215122718564985e-07,
      "loss": 1.0564,
      "step": 6093
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.24873268604278564,
      "learning_rate": 4.3730930557090985e-07,
      "loss": 0.7966,
      "step": 6094
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.5179977416992188,
      "learning_rate": 4.3249398354777703e-07,
      "loss": 0.8612,
      "step": 6095
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.16667160391807556,
      "learning_rate": 4.2770526240277775e-07,
      "loss": 0.7304,
      "step": 6096
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.18649433553218842,
      "learning_rate": 4.2294314341525533e-07,
      "loss": 0.7859,
      "step": 6097
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.2236190289258957,
      "learning_rate": 4.18207627857492e-07,
      "loss": 0.688,
      "step": 6098
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.2591651976108551,
      "learning_rate": 4.134987169946536e-07,
      "loss": 0.9223,
      "step": 6099
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.2101471722126007,
      "learning_rate": 4.0881641208476707e-07,
      "loss": 0.9829,
      "step": 6100
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.3498663306236267,
      "learning_rate": 4.0416071437880953e-07,
      "loss": 0.7211,
      "step": 6101
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.31710997223854065,
      "learning_rate": 3.9953162512058604e-07,
      "loss": 0.7917,
      "step": 6102
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.29149988293647766,
      "learning_rate": 3.949291455468518e-07,
      "loss": 0.8661,
      "step": 6103
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.37017494440078735,
      "learning_rate": 3.903532768872009e-07,
      "loss": 0.7238,
      "step": 6104
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.32702434062957764,
      "learning_rate": 3.858040203641555e-07,
      "loss": 0.9708,
      "step": 6105
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.297207772731781,
      "learning_rate": 3.812813771931212e-07,
      "loss": 0.7941,
      "step": 6106
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.29179316759109497,
      "learning_rate": 3.767853485823647e-07,
      "loss": 0.6818,
      "step": 6107
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.349439799785614,
      "learning_rate": 3.7231593573308077e-07,
      "loss": 0.8232,
      "step": 6108
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.224651500582695,
      "learning_rate": 3.6787313983933646e-07,
      "loss": 1.0242,
      "step": 6109
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.36449554562568665,
      "learning_rate": 3.634569620880823e-07,
      "loss": 0.8953,
      "step": 6110
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.3117818832397461,
      "learning_rate": 3.590674036591635e-07,
      "loss": 0.8088,
      "step": 6111
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.23121905326843262,
      "learning_rate": 3.5470446572531957e-07,
      "loss": 0.9692,
      "step": 6112
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.2613270878791809,
      "learning_rate": 3.503681494521627e-07,
      "loss": 0.8107,
      "step": 6113
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.38488107919692993,
      "learning_rate": 3.460584559981994e-07,
      "loss": 0.8278,
      "step": 6114
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.20438739657402039,
      "learning_rate": 3.417753865148421e-07,
      "loss": 0.9134,
      "step": 6115
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.32767242193222046,
      "learning_rate": 3.3751894214635315e-07,
      "loss": 0.9492,
      "step": 6116
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.2548084259033203,
      "learning_rate": 3.3328912402991184e-07,
      "loss": 1.0041,
      "step": 6117
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.21454302966594696,
      "learning_rate": 3.290859332955809e-07,
      "loss": 0.7421,
      "step": 6118
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.25072962045669556,
      "learning_rate": 3.2490937106629537e-07,
      "loss": 0.7218,
      "step": 6119
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.32757842540740967,
      "learning_rate": 3.2075943845788494e-07,
      "loss": 0.7429,
      "step": 6120
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.34523284435272217,
      "learning_rate": 3.1663613657906266e-07,
      "loss": 0.9756,
      "step": 6121
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.21411611139774323,
      "learning_rate": 3.125394665314363e-07,
      "loss": 1.0018,
      "step": 6122
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.21236063539981842,
      "learning_rate": 3.0846942940946367e-07,
      "loss": 0.9476,
      "step": 6123
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.24380768835544586,
      "learning_rate": 3.044260263005416e-07,
      "loss": 0.735,
      "step": 6124
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.2974204421043396,
      "learning_rate": 3.004092582849172e-07,
      "loss": 0.9513,
      "step": 6125
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.5563105344772339,
      "learning_rate": 2.964191264357097e-07,
      "loss": 1.0888,
      "step": 6126
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.28711244463920593,
      "learning_rate": 2.924556318189553e-07,
      "loss": 0.8387,
      "step": 6127
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.24989481270313263,
      "learning_rate": 2.8851877549356255e-07,
      "loss": 0.8738,
      "step": 6128
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.16848157346248627,
      "learning_rate": 2.846085585113012e-07,
      "loss": 0.781,
      "step": 6129
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.25759729743003845,
      "learning_rate": 2.807249819168578e-07,
      "loss": 0.8332,
      "step": 6130
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.2562333941459656,
      "learning_rate": 2.768680467477691e-07,
      "loss": 0.8026,
      "step": 6131
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.2602846026420593,
      "learning_rate": 2.730377540344886e-07,
      "loss": 0.9832,
      "step": 6132
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.22353051602840424,
      "learning_rate": 2.6923410480032e-07,
      "loss": 0.8382,
      "step": 6133
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.8594008088111877,
      "learning_rate": 2.6545710006147253e-07,
      "loss": 1.0315,
      "step": 6134
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.2804906964302063,
      "learning_rate": 2.6170674082701683e-07,
      "loss": 0.8718,
      "step": 6135
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.49900782108306885,
      "learning_rate": 2.5798302809891816e-07,
      "loss": 0.6022,
      "step": 6136
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.6170993447303772,
      "learning_rate": 2.5428596287202513e-07,
      "loss": 0.9246,
      "step": 6137
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.462458997964859,
      "learning_rate": 2.506155461340587e-07,
      "loss": 0.8844,
      "step": 6138
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.4136393070220947,
      "learning_rate": 2.469717788656123e-07,
      "loss": 0.88,
      "step": 6139
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.20301836729049683,
      "learning_rate": 2.43354662040185e-07,
      "loss": 0.73,
      "step": 6140
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.25606173276901245,
      "learning_rate": 2.3976419662413707e-07,
      "loss": 0.8259,
      "step": 6141
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.4538917541503906,
      "learning_rate": 2.3620038357671236e-07,
      "loss": 0.7286,
      "step": 6142
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.19159094989299774,
      "learning_rate": 2.3266322385002704e-07,
      "loss": 0.9463,
      "step": 6143
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.2194126397371292,
      "learning_rate": 2.291527183890918e-07,
      "loss": 0.7384,
      "step": 6144
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.5067562460899353,
      "learning_rate": 2.2566886813177866e-07,
      "loss": 0.7938,
      "step": 6145
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.28635793924331665,
      "learning_rate": 2.2221167400886532e-07,
      "loss": 0.8697,
      "step": 6146
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.21997010707855225,
      "learning_rate": 2.1878113694397962e-07,
      "loss": 0.7968,
      "step": 6147
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.3544570207595825,
      "learning_rate": 2.1537725785363283e-07,
      "loss": 0.8206,
      "step": 6148
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.5479974150657654,
      "learning_rate": 2.1200003764721978e-07,
      "loss": 0.8731,
      "step": 6149
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.22441183030605316,
      "learning_rate": 2.0864947722702978e-07,
      "loss": 0.8247,
      "step": 6150
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0319286584854126,
      "learning_rate": 2.0532557748820236e-07,
      "loss": 0.8042,
      "step": 6151
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.2335972785949707,
      "learning_rate": 2.0202833931876052e-07,
      "loss": 0.6502,
      "step": 6152
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.2674984931945801,
      "learning_rate": 1.9875776359962185e-07,
      "loss": 0.8077,
      "step": 6153
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.21517013013362885,
      "learning_rate": 1.9551385120454292e-07,
      "loss": 0.5389,
      "step": 6154
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.25688499212265015,
      "learning_rate": 1.9229660300020824e-07,
      "loss": 0.88,
      "step": 6155
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.30134060978889465,
      "learning_rate": 1.891060198461303e-07,
      "loss": 0.6745,
      "step": 6156
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.2135041505098343,
      "learning_rate": 1.8594210259472723e-07,
      "loss": 0.9541,
      "step": 6157
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.18719099462032318,
      "learning_rate": 1.828048520912895e-07,
      "loss": 1.0168,
      "step": 6158
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.441806823015213,
      "learning_rate": 1.7969426917398003e-07,
      "loss": 0.9817,
      "step": 6159
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.21939797699451447,
      "learning_rate": 1.7661035467382292e-07,
      "loss": 1.0523,
      "step": 6160
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.1896337866783142,
      "learning_rate": 1.7355310941473691e-07,
      "loss": 0.9317,
      "step": 6161
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.32478460669517517,
      "learning_rate": 1.7052253421350196e-07,
      "loss": 0.784,
      "step": 6162
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.24761836230754852,
      "learning_rate": 1.6751862987979262e-07,
      "loss": 0.7992,
      "step": 6163
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.302736759185791,
      "learning_rate": 1.645413972161336e-07,
      "loss": 0.9737,
      "step": 6164
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.3876696825027466,
      "learning_rate": 1.615908370179442e-07,
      "loss": 0.7944,
      "step": 6165
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.31641116738319397,
      "learning_rate": 1.5866695007350497e-07,
      "loss": 0.7957,
      "step": 6166
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.13319022953510284,
      "learning_rate": 1.557697371639577e-07,
      "loss": 0.6339,
      "step": 6167
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.26414769887924194,
      "learning_rate": 1.5289919906336103e-07,
      "loss": 0.9356,
      "step": 6168
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.22032305598258972,
      "learning_rate": 1.500553365386015e-07,
      "loss": 0.9231,
      "step": 6169
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.5989689826965332,
      "learning_rate": 1.4723815034947131e-07,
      "loss": 0.7419,
      "step": 6170
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.45214366912841797,
      "learning_rate": 1.4444764124861287e-07,
      "loss": 0.7228,
      "step": 6171
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.15695081651210785,
      "learning_rate": 1.4168380998155206e-07,
      "loss": 0.7262,
      "step": 6172
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.25829800963401794,
      "learning_rate": 1.389466572866871e-07,
      "loss": 0.7087,
      "step": 6173
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.2413756549358368,
      "learning_rate": 1.362361838952775e-07,
      "loss": 0.8338,
      "step": 6174
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.26534128189086914,
      "learning_rate": 1.3355239053147727e-07,
      "loss": 0.7313,
      "step": 6175
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6883341073989868,
      "learning_rate": 1.3089527791230182e-07,
      "loss": 0.8209,
      "step": 6176
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6922610402107239,
      "learning_rate": 1.2826484674762774e-07,
      "loss": 0.6591,
      "step": 6177
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.33724939823150635,
      "learning_rate": 1.2566109774021506e-07,
      "loss": 0.9739,
      "step": 6178
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.39612141251564026,
      "learning_rate": 1.2308403158569626e-07,
      "loss": 0.9425,
      "step": 6179
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.7099065780639648,
      "learning_rate": 1.2053364897256504e-07,
      "loss": 0.8437,
      "step": 6180
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.2418508231639862,
      "learning_rate": 1.1800995058218745e-07,
      "loss": 0.7773,
      "step": 6181
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.4178754389286041,
      "learning_rate": 1.1551293708882416e-07,
      "loss": 0.7408,
      "step": 6182
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.2964886724948883,
      "learning_rate": 1.1304260915957488e-07,
      "loss": 0.7551,
      "step": 6183
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.3086206912994385,
      "learning_rate": 1.1059896745442277e-07,
      "loss": 0.7997,
      "step": 6184
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.35829317569732666,
      "learning_rate": 1.0818201262622341e-07,
      "loss": 0.8074,
      "step": 6185
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.2404836118221283,
      "learning_rate": 1.0579174532070469e-07,
      "loss": 0.9011,
      "step": 6186
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.15142062306404114,
      "learning_rate": 1.0342816617645578e-07,
      "loss": 0.7482,
      "step": 6187
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.2549605071544647,
      "learning_rate": 1.0109127582493827e-07,
      "loss": 0.575,
      "step": 6188
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.7328779697418213,
      "learning_rate": 9.878107489049715e-08,
      "loss": 0.7487,
      "step": 6189
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.19693073630332947,
      "learning_rate": 9.649756399031651e-08,
      "loss": 0.6452,
      "step": 6190
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.29776233434677124,
      "learning_rate": 9.42407437344861e-08,
      "loss": 0.7218,
      "step": 6191
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.39786022901535034,
      "learning_rate": 9.201061472594586e-08,
      "loss": 0.8718,
      "step": 6192
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.5726588368415833,
      "learning_rate": 8.980717756049695e-08,
      "loss": 0.773,
      "step": 6193
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.24044294655323029,
      "learning_rate": 8.763043282682404e-08,
      "loss": 0.5431,
      "step": 6194
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.24766495823860168,
      "learning_rate": 8.548038110648415e-08,
      "loss": 0.9173,
      "step": 6195
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.280032753944397,
      "learning_rate": 8.335702297387338e-08,
      "loss": 0.7831,
      "step": 6196
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.2900524437427521,
      "learning_rate": 8.126035899629347e-08,
      "loss": 0.7692,
      "step": 6197
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.31158602237701416,
      "learning_rate": 7.919038973389636e-08,
      "loss": 1.0322,
      "step": 6198
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.7120456695556641,
      "learning_rate": 7.714711573970634e-08,
      "loss": 0.7659,
      "step": 6199
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.15650874376296997,
      "learning_rate": 7.513053755959786e-08,
      "loss": 0.858,
      "step": 6200
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.2944503426551819,
      "learning_rate": 7.314065573233997e-08,
      "loss": 0.6659,
      "step": 6201
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.26668286323547363,
      "learning_rate": 7.117747078956294e-08,
      "loss": 0.939,
      "step": 6202
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.28265830874443054,
      "learning_rate": 6.924098325575834e-08,
      "loss": 0.6109,
      "step": 6203
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.2420104444026947,
      "learning_rate": 6.73311936482679e-08,
      "loss": 0.7312,
      "step": 6204
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.2076893150806427,
      "learning_rate": 6.544810247733902e-08,
      "loss": 0.8454,
      "step": 6205
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6145219802856445,
      "learning_rate": 6.359171024606925e-08,
      "loss": 0.8049,
      "step": 6206
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.2724874019622803,
      "learning_rate": 6.176201745040633e-08,
      "loss": 0.8283,
      "step": 6207
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.28051552176475525,
      "learning_rate": 5.995902457918146e-08,
      "loss": 0.7426,
      "step": 6208
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.296589732170105,
      "learning_rate": 5.818273211408709e-08,
      "loss": 0.6491,
      "step": 6209
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.47591477632522583,
      "learning_rate": 5.643314052969917e-08,
      "loss": 1.0248,
      "step": 6210
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.22306697070598602,
      "learning_rate": 5.4710250293432686e-08,
      "loss": 1.0006,
      "step": 6211
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.46611711382865906,
      "learning_rate": 5.301406186558611e-08,
      "loss": 0.9103,
      "step": 6212
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.31883183121681213,
      "learning_rate": 5.1344575699319164e-08,
      "loss": 1.0191,
      "step": 6213
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.2864503264427185,
      "learning_rate": 4.970179224066396e-08,
      "loss": 0.932,
      "step": 6214
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.3269355595111847,
      "learning_rate": 4.808571192851385e-08,
      "loss": 0.7453,
      "step": 6215
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.5846944451332092,
      "learning_rate": 4.649633519461238e-08,
      "loss": 0.8556,
      "step": 6216
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6360899209976196,
      "learning_rate": 4.493366246360875e-08,
      "loss": 0.7466,
      "step": 6217
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.240190327167511,
      "learning_rate": 4.339769415296901e-08,
      "loss": 0.6959,
      "step": 6218
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.305060476064682,
      "learning_rate": 4.1888430673064916e-08,
      "loss": 0.8668,
      "step": 6219
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.328843355178833,
      "learning_rate": 4.040587242711835e-08,
      "loss": 0.8707,
      "step": 6220
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.2178790122270584,
      "learning_rate": 3.895001981121249e-08,
      "loss": 0.698,
      "step": 6221
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.2922777831554413,
      "learning_rate": 3.7520873214291763e-08,
      "loss": 0.7959,
      "step": 6222
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.3351325988769531,
      "learning_rate": 3.611843301817297e-08,
      "loss": 0.8057,
      "step": 6223
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.3135840594768524,
      "learning_rate": 3.474269959754528e-08,
      "loss": 0.8382,
      "step": 6224
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.16495545208454132,
      "learning_rate": 3.339367331995913e-08,
      "loss": 0.7175,
      "step": 6225
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.5205861926078796,
      "learning_rate": 3.207135454581511e-08,
      "loss": 0.9882,
      "step": 6226
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.19471047818660736,
      "learning_rate": 3.07757436284084e-08,
      "loss": 0.7378,
      "step": 6227
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.4493439495563507,
      "learning_rate": 2.950684091385103e-08,
      "loss": 0.7576,
      "step": 6228
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.3881864845752716,
      "learning_rate": 2.8264646741171797e-08,
      "loss": 0.7453,
      "step": 6229
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.3481845557689667,
      "learning_rate": 2.7049161442227466e-08,
      "loss": 0.5023,
      "step": 6230
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.8823036551475525,
      "learning_rate": 2.586038534176938e-08,
      "loss": 0.7173,
      "step": 6231
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.2629055380821228,
      "learning_rate": 2.4698318757365724e-08,
      "loss": 0.9011,
      "step": 6232
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.21714197099208832,
      "learning_rate": 2.3562961999512577e-08,
      "loss": 0.7786,
      "step": 6233
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.19259251654148102,
      "learning_rate": 2.2454315371522873e-08,
      "loss": 0.7469,
      "step": 6234
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.3478018641471863,
      "learning_rate": 2.13723791695819e-08,
      "loss": 0.8144,
      "step": 6235
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.29506245255470276,
      "learning_rate": 2.0317153682747335e-08,
      "loss": 0.8694,
      "step": 6236
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.28919464349746704,
      "learning_rate": 1.9288639192938107e-08,
      "loss": 1.0059,
      "step": 6237
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.28163912892341614,
      "learning_rate": 1.8286835974934413e-08,
      "loss": 0.8645,
      "step": 6238
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.6244699954986572,
      "learning_rate": 1.731174429638882e-08,
      "loss": 0.9198,
      "step": 6239
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.27753958106040955,
      "learning_rate": 1.6363364417815164e-08,
      "loss": 0.7098,
      "step": 6240
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.316998153924942,
      "learning_rate": 1.5441696592566336e-08,
      "loss": 0.9299,
      "step": 6241
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.2699652314186096,
      "learning_rate": 1.4546741066900903e-08,
      "loss": 0.8261,
      "step": 6242
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.12154107540845871,
      "learning_rate": 1.3678498079905399e-08,
      "loss": 0.8247,
      "step": 6243
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.34004801511764526,
      "learning_rate": 1.283696786354982e-08,
      "loss": 0.8649,
      "step": 6244
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.23861825466156006,
      "learning_rate": 1.2022150642654328e-08,
      "loss": 0.7486,
      "step": 6245
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.23525449633598328,
      "learning_rate": 1.1234046634922558e-08,
      "loss": 0.7802,
      "step": 6246
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.15253321826457977,
      "learning_rate": 1.0472656050886098e-08,
      "loss": 0.7056,
      "step": 6247
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.29623252153396606,
      "learning_rate": 9.737979093982219e-09,
      "loss": 0.7618,
      "step": 6248
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.3934471309185028,
      "learning_rate": 9.030015960487249e-09,
      "loss": 1.0012,
      "step": 6249
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.2374650537967682,
      "learning_rate": 8.348766839527677e-09,
      "loss": 0.8279,
      "step": 6250
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.8445020914077759,
      "learning_rate": 7.694231913124572e-09,
      "loss": 0.9135,
      "step": 6251
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.2725718021392822,
      "learning_rate": 7.066411356138059e-09,
      "loss": 0.9109,
      "step": 6252
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.1751037985086441,
      "learning_rate": 6.465305336311733e-09,
      "loss": 0.695,
      "step": 6253
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.5786471962928772,
      "learning_rate": 5.890914014217152e-09,
      "loss": 0.875,
      "step": 6254
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.26099222898483276,
      "learning_rate": 5.343237543331548e-09,
      "loss": 0.6828,
      "step": 6255
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.22649329900741577,
      "learning_rate": 4.82227606997121e-09,
      "loss": 0.7939,
      "step": 6256
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.3831254839897156,
      "learning_rate": 4.328029733302596e-09,
      "loss": 0.7709,
      "step": 6257
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.24478775262832642,
      "learning_rate": 3.860498665386736e-09,
      "loss": 0.8832,
      "step": 6258
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.20791685581207275,
      "learning_rate": 3.4196829911348205e-09,
      "loss": 0.941,
      "step": 6259
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.32221993803977966,
      "learning_rate": 3.0055828283082066e-09,
      "loss": 0.7291,
      "step": 6260
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.7361505627632141,
      "learning_rate": 2.6181982875295163e-09,
      "loss": 0.68,
      "step": 6261
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.25268927216529846,
      "learning_rate": 2.2575294723159445e-09,
      "loss": 0.8699,
      "step": 6262
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.6639279723167419,
      "learning_rate": 1.9235764790126455e-09,
      "loss": 0.8227,
      "step": 6263
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.1440669745206833,
      "learning_rate": 1.616339396837141e-09,
      "loss": 0.908,
      "step": 6264
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.5527451038360596,
      "learning_rate": 1.335818307890424e-09,
      "loss": 0.784,
      "step": 6265
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.22039932012557983,
      "learning_rate": 1.0820132870903443e-09,
      "loss": 0.8609,
      "step": 6266
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.18587030470371246,
      "learning_rate": 8.549244022604264e-10,
      "loss": 0.8206,
      "step": 6267
    },
    {
      "epoch": 1.0,
      "step": 6267,
      "total_flos": 4.5409781464170496e+17,
      "train_loss": 0.8411025973395029,
      "train_runtime": 49907.3994,
      "train_samples_per_second": 4.018,
      "train_steps_per_second": 0.126
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 6267,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "total_flos": 4.5409781464170496e+17,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}