{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.0,
  "eval_steps": 500,
  "global_step": 6664,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0,
      "eval_loss": 8.257464408874512,
      "eval_runtime": 3786.6179,
      "eval_samples_per_second": 12.236,
      "eval_steps_per_second": 0.765,
      "step": 0
    },
    {
      "epoch": 0.00030012004801920766,
      "grad_norm": 18.56479835510254,
      "learning_rate": 0.0,
      "loss": 4.7242,
      "step": 1
    },
    {
      "epoch": 0.0006002400960384153,
      "grad_norm": 49.334659576416016,
      "learning_rate": 3.003003003003003e-07,
      "loss": 5.5074,
      "step": 2
    },
    {
      "epoch": 0.000900360144057623,
      "grad_norm": 20.299957275390625,
      "learning_rate": 6.006006006006006e-07,
      "loss": 4.7201,
      "step": 3
    },
    {
      "epoch": 0.0012004801920768306,
      "grad_norm": 21.796030044555664,
      "learning_rate": 9.00900900900901e-07,
      "loss": 4.9276,
      "step": 4
    },
    {
      "epoch": 0.0015006002400960385,
      "grad_norm": 61.833614349365234,
      "learning_rate": 1.2012012012012013e-06,
      "loss": 5.5694,
      "step": 5
    },
    {
      "epoch": 0.001800720288115246,
      "grad_norm": 19.953771591186523,
      "learning_rate": 1.5015015015015015e-06,
      "loss": 4.7228,
      "step": 6
    },
    {
      "epoch": 0.0021008403361344537,
      "grad_norm": 26.493492126464844,
      "learning_rate": 1.801801801801802e-06,
      "loss": 5.1359,
      "step": 7
    },
    {
      "epoch": 0.0024009603841536613,
      "grad_norm": 16.626035690307617,
      "learning_rate": 2.102102102102102e-06,
      "loss": 4.5624,
      "step": 8
    },
    {
      "epoch": 0.0027010804321728693,
      "grad_norm": 19.844329833984375,
      "learning_rate": 2.4024024024024026e-06,
      "loss": 4.5288,
      "step": 9
    },
    {
      "epoch": 0.003001200480192077,
      "grad_norm": 19.03563690185547,
      "learning_rate": 2.702702702702703e-06,
      "loss": 4.8509,
      "step": 10
    },
    {
      "epoch": 0.0033013205282112846,
      "grad_norm": 12.096508979797363,
      "learning_rate": 3.003003003003003e-06,
      "loss": 4.272,
      "step": 11
    },
    {
      "epoch": 0.003601440576230492,
      "grad_norm": 13.593870162963867,
      "learning_rate": 3.3033033033033035e-06,
      "loss": 4.7412,
      "step": 12
    },
    {
      "epoch": 0.0039015606242496998,
      "grad_norm": 10.979002952575684,
      "learning_rate": 3.603603603603604e-06,
      "loss": 4.236,
      "step": 13
    },
    {
      "epoch": 0.004201680672268907,
      "grad_norm": 7.361409664154053,
      "learning_rate": 3.903903903903904e-06,
      "loss": 4.085,
      "step": 14
    },
    {
      "epoch": 0.004501800720288115,
      "grad_norm": 6.058810234069824,
      "learning_rate": 4.204204204204204e-06,
      "loss": 3.9739,
      "step": 15
    },
    {
      "epoch": 0.004801920768307323,
      "grad_norm": 4.1888227462768555,
      "learning_rate": 4.504504504504505e-06,
      "loss": 3.7215,
      "step": 16
    },
    {
      "epoch": 0.00510204081632653,
      "grad_norm": 3.432919979095459,
      "learning_rate": 4.804804804804805e-06,
      "loss": 3.7775,
      "step": 17
    },
    {
      "epoch": 0.005402160864345739,
      "grad_norm": 2.8430354595184326,
      "learning_rate": 5.105105105105106e-06,
      "loss": 3.692,
      "step": 18
    },
    {
      "epoch": 0.005702280912364946,
      "grad_norm": 2.4966061115264893,
      "learning_rate": 5.405405405405406e-06,
      "loss": 3.5742,
      "step": 19
    },
    {
      "epoch": 0.006002400960384154,
      "grad_norm": 2.3826422691345215,
      "learning_rate": 5.705705705705706e-06,
      "loss": 3.4681,
      "step": 20
    },
    {
      "epoch": 0.0063025210084033615,
      "grad_norm": 2.0872232913970947,
      "learning_rate": 6.006006006006006e-06,
      "loss": 3.5767,
      "step": 21
    },
    {
      "epoch": 0.006602641056422569,
      "grad_norm": 1.654006838798523,
      "learning_rate": 6.306306306306306e-06,
      "loss": 3.0309,
      "step": 22
    },
    {
      "epoch": 0.006902761104441777,
      "grad_norm": 1.6882315874099731,
      "learning_rate": 6.606606606606607e-06,
      "loss": 3.2576,
      "step": 23
    },
    {
      "epoch": 0.007202881152460984,
      "grad_norm": 1.6118031740188599,
      "learning_rate": 6.906906906906907e-06,
      "loss": 3.0787,
      "step": 24
    },
    {
      "epoch": 0.007503001200480192,
      "grad_norm": 1.591232419013977,
      "learning_rate": 7.207207207207208e-06,
      "loss": 3.1461,
      "step": 25
    },
    {
      "epoch": 0.0078031212484993995,
      "grad_norm": 1.4966458082199097,
      "learning_rate": 7.507507507507508e-06,
      "loss": 3.165,
      "step": 26
    },
    {
      "epoch": 0.008103241296518607,
      "grad_norm": 1.3096973896026611,
      "learning_rate": 7.807807807807808e-06,
      "loss": 3.0777,
      "step": 27
    },
    {
      "epoch": 0.008403361344537815,
      "grad_norm": 1.3433228731155396,
      "learning_rate": 8.108108108108109e-06,
      "loss": 3.1098,
      "step": 28
    },
    {
      "epoch": 0.008703481392557022,
      "grad_norm": 1.3368873596191406,
      "learning_rate": 8.408408408408409e-06,
      "loss": 2.9624,
      "step": 29
    },
    {
      "epoch": 0.00900360144057623,
      "grad_norm": 1.3983137607574463,
      "learning_rate": 8.708708708708708e-06,
      "loss": 2.434,
      "step": 30
    },
    {
      "epoch": 0.009303721488595438,
      "grad_norm": 1.2248708009719849,
      "learning_rate": 9.00900900900901e-06,
      "loss": 2.7106,
      "step": 31
    },
    {
      "epoch": 0.009603841536614645,
      "grad_norm": 1.2953872680664062,
      "learning_rate": 9.309309309309309e-06,
      "loss": 3.0944,
      "step": 32
    },
    {
      "epoch": 0.009903961584633853,
      "grad_norm": 1.2448747158050537,
      "learning_rate": 9.60960960960961e-06,
      "loss": 3.0083,
      "step": 33
    },
    {
      "epoch": 0.01020408163265306,
      "grad_norm": 1.2225483655929565,
      "learning_rate": 9.90990990990991e-06,
      "loss": 2.8514,
      "step": 34
    },
    {
      "epoch": 0.01050420168067227,
      "grad_norm": 1.0415458679199219,
      "learning_rate": 1.0210210210210211e-05,
      "loss": 2.5665,
      "step": 35
    },
    {
      "epoch": 0.010804321728691477,
      "grad_norm": 1.1476035118103027,
      "learning_rate": 1.051051051051051e-05,
      "loss": 2.6508,
      "step": 36
    },
    {
      "epoch": 0.011104441776710685,
      "grad_norm": 1.2470183372497559,
      "learning_rate": 1.0810810810810812e-05,
      "loss": 2.5615,
      "step": 37
    },
    {
      "epoch": 0.011404561824729893,
      "grad_norm": 0.9873207807540894,
      "learning_rate": 1.1111111111111112e-05,
      "loss": 2.371,
      "step": 38
    },
    {
      "epoch": 0.0117046818727491,
      "grad_norm": 1.2748939990997314,
      "learning_rate": 1.1411411411411411e-05,
      "loss": 2.4716,
      "step": 39
    },
    {
      "epoch": 0.012004801920768308,
      "grad_norm": 1.439241886138916,
      "learning_rate": 1.1711711711711713e-05,
      "loss": 2.4789,
      "step": 40
    },
    {
      "epoch": 0.012304921968787515,
      "grad_norm": 1.3756464719772339,
      "learning_rate": 1.2012012012012012e-05,
      "loss": 2.3341,
      "step": 41
    },
    {
      "epoch": 0.012605042016806723,
      "grad_norm": 0.8937436938285828,
      "learning_rate": 1.2312312312312313e-05,
      "loss": 1.9548,
      "step": 42
    },
    {
      "epoch": 0.01290516206482593,
      "grad_norm": 1.1486737728118896,
      "learning_rate": 1.2612612612612611e-05,
      "loss": 2.2497,
      "step": 43
    },
    {
      "epoch": 0.013205282112845138,
      "grad_norm": 1.0456668138504028,
      "learning_rate": 1.2912912912912914e-05,
      "loss": 2.256,
      "step": 44
    },
    {
      "epoch": 0.013505402160864346,
      "grad_norm": 1.137314796447754,
      "learning_rate": 1.3213213213213214e-05,
      "loss": 2.0677,
      "step": 45
    },
    {
      "epoch": 0.013805522208883553,
      "grad_norm": 1.262323021888733,
      "learning_rate": 1.3513513513513515e-05,
      "loss": 2.0702,
      "step": 46
    },
    {
      "epoch": 0.014105642256902761,
      "grad_norm": 0.7741314768791199,
      "learning_rate": 1.3813813813813815e-05,
      "loss": 1.7912,
      "step": 47
    },
    {
      "epoch": 0.014405762304921969,
      "grad_norm": 1.0190695524215698,
      "learning_rate": 1.4114114114114116e-05,
      "loss": 1.9836,
      "step": 48
    },
    {
      "epoch": 0.014705882352941176,
      "grad_norm": 1.171444296836853,
      "learning_rate": 1.4414414414414416e-05,
      "loss": 2.2635,
      "step": 49
    },
    {
      "epoch": 0.015006002400960384,
      "grad_norm": 0.9180420637130737,
      "learning_rate": 1.4714714714714713e-05,
      "loss": 2.0183,
      "step": 50
    },
    {
      "epoch": 0.015306122448979591,
      "grad_norm": 0.8563716411590576,
      "learning_rate": 1.5015015015015016e-05,
      "loss": 1.8753,
      "step": 51
    },
    {
      "epoch": 0.015606242496998799,
      "grad_norm": 0.8259108662605286,
      "learning_rate": 1.5315315315315316e-05,
      "loss": 1.771,
      "step": 52
    },
    {
      "epoch": 0.015906362545018007,
      "grad_norm": 0.8018758296966553,
      "learning_rate": 1.5615615615615616e-05,
      "loss": 1.6323,
      "step": 53
    },
    {
      "epoch": 0.016206482593037214,
      "grad_norm": 0.985905647277832,
      "learning_rate": 1.5915915915915915e-05,
      "loss": 1.8725,
      "step": 54
    },
    {
      "epoch": 0.016506602641056422,
      "grad_norm": 0.8832401037216187,
      "learning_rate": 1.6216216216216218e-05,
      "loss": 1.7508,
      "step": 55
    },
    {
      "epoch": 0.01680672268907563,
      "grad_norm": 0.8357267379760742,
      "learning_rate": 1.6516516516516518e-05,
      "loss": 1.5477,
      "step": 56
    },
    {
      "epoch": 0.017106842737094837,
      "grad_norm": 0.8062194585800171,
      "learning_rate": 1.6816816816816817e-05,
      "loss": 1.4678,
      "step": 57
    },
    {
      "epoch": 0.017406962785114045,
      "grad_norm": 0.9643778800964355,
      "learning_rate": 1.7117117117117117e-05,
      "loss": 1.5877,
      "step": 58
    },
    {
      "epoch": 0.017707082833133252,
      "grad_norm": 0.8363720774650574,
      "learning_rate": 1.7417417417417416e-05,
      "loss": 1.474,
      "step": 59
    },
    {
      "epoch": 0.01800720288115246,
      "grad_norm": 0.770699143409729,
      "learning_rate": 1.771771771771772e-05,
      "loss": 1.3455,
      "step": 60
    },
    {
      "epoch": 0.018307322929171668,
      "grad_norm": 0.948675274848938,
      "learning_rate": 1.801801801801802e-05,
      "loss": 1.4573,
      "step": 61
    },
    {
      "epoch": 0.018607442977190875,
      "grad_norm": 0.7970764636993408,
      "learning_rate": 1.831831831831832e-05,
      "loss": 1.1989,
      "step": 62
    },
    {
      "epoch": 0.018907563025210083,
      "grad_norm": 0.7488381862640381,
      "learning_rate": 1.8618618618618618e-05,
      "loss": 1.2167,
      "step": 63
    },
    {
      "epoch": 0.01920768307322929,
      "grad_norm": 0.7332976460456848,
      "learning_rate": 1.891891891891892e-05,
      "loss": 1.2067,
      "step": 64
    },
    {
      "epoch": 0.019507803121248498,
      "grad_norm": 0.6228386163711548,
      "learning_rate": 1.921921921921922e-05,
      "loss": 1.0552,
      "step": 65
    },
    {
      "epoch": 0.019807923169267706,
      "grad_norm": 0.5040204524993896,
      "learning_rate": 1.951951951951952e-05,
      "loss": 1.0225,
      "step": 66
    },
    {
      "epoch": 0.020108043217286913,
      "grad_norm": 0.638465404510498,
      "learning_rate": 1.981981981981982e-05,
      "loss": 1.1401,
      "step": 67
    },
    {
      "epoch": 0.02040816326530612,
      "grad_norm": 0.5351008176803589,
      "learning_rate": 2.012012012012012e-05,
      "loss": 1.0391,
      "step": 68
    },
    {
      "epoch": 0.02070828331332533,
      "grad_norm": 0.491290420293808,
      "learning_rate": 2.0420420420420422e-05,
      "loss": 1.0132,
      "step": 69
    },
    {
      "epoch": 0.02100840336134454,
      "grad_norm": 0.6342625617980957,
      "learning_rate": 2.0720720720720722e-05,
      "loss": 0.9974,
      "step": 70
    },
    {
      "epoch": 0.021308523409363747,
      "grad_norm": 0.4186214804649353,
      "learning_rate": 2.102102102102102e-05,
      "loss": 0.8443,
      "step": 71
    },
    {
      "epoch": 0.021608643457382955,
      "grad_norm": 0.4734509289264679,
      "learning_rate": 2.132132132132132e-05,
      "loss": 0.9307,
      "step": 72
    },
    {
      "epoch": 0.021908763505402162,
      "grad_norm": 0.39881131052970886,
      "learning_rate": 2.1621621621621624e-05,
      "loss": 0.9182,
      "step": 73
    },
    {
      "epoch": 0.02220888355342137,
      "grad_norm": 0.37704476714134216,
      "learning_rate": 2.1921921921921924e-05,
      "loss": 0.905,
      "step": 74
    },
    {
      "epoch": 0.022509003601440578,
      "grad_norm": 0.3518320918083191,
      "learning_rate": 2.2222222222222223e-05,
      "loss": 0.847,
      "step": 75
    },
    {
      "epoch": 0.022809123649459785,
      "grad_norm": 0.4406863749027252,
      "learning_rate": 2.2522522522522523e-05,
      "loss": 0.8292,
      "step": 76
    },
    {
      "epoch": 0.023109243697478993,
      "grad_norm": 0.31686627864837646,
      "learning_rate": 2.2822822822822822e-05,
      "loss": 0.893,
      "step": 77
    },
    {
      "epoch": 0.0234093637454982,
      "grad_norm": 0.3058432936668396,
      "learning_rate": 2.3123123123123125e-05,
      "loss": 0.8069,
      "step": 78
    },
    {
      "epoch": 0.023709483793517408,
      "grad_norm": 0.32039502263069153,
      "learning_rate": 2.3423423423423425e-05,
      "loss": 0.8222,
      "step": 79
    },
    {
      "epoch": 0.024009603841536616,
      "grad_norm": 0.29504913091659546,
      "learning_rate": 2.3723723723723725e-05,
      "loss": 0.8688,
      "step": 80
    },
    {
      "epoch": 0.024309723889555823,
      "grad_norm": 0.26919642090797424,
      "learning_rate": 2.4024024024024024e-05,
      "loss": 0.7322,
      "step": 81
    },
    {
      "epoch": 0.02460984393757503,
      "grad_norm": 0.32023561000823975,
      "learning_rate": 2.4324324324324327e-05,
      "loss": 0.7903,
      "step": 82
    },
    {
      "epoch": 0.02490996398559424,
      "grad_norm": 0.30055469274520874,
      "learning_rate": 2.4624624624624627e-05,
      "loss": 0.8126,
      "step": 83
    },
    {
      "epoch": 0.025210084033613446,
      "grad_norm": 0.2616680860519409,
      "learning_rate": 2.4924924924924926e-05,
      "loss": 0.7882,
      "step": 84
    },
    {
      "epoch": 0.025510204081632654,
      "grad_norm": 0.2315160185098648,
      "learning_rate": 2.5225225225225222e-05,
      "loss": 0.7482,
      "step": 85
    },
    {
      "epoch": 0.02581032412965186,
      "grad_norm": 0.28771960735321045,
      "learning_rate": 2.552552552552553e-05,
      "loss": 0.8175,
      "step": 86
    },
    {
      "epoch": 0.02611044417767107,
      "grad_norm": 0.240019753575325,
      "learning_rate": 2.582582582582583e-05,
      "loss": 0.7051,
      "step": 87
    },
    {
      "epoch": 0.026410564225690276,
      "grad_norm": 0.4383765161037445,
      "learning_rate": 2.6126126126126128e-05,
      "loss": 0.72,
      "step": 88
    },
    {
      "epoch": 0.026710684273709484,
      "grad_norm": 0.23066319525241852,
      "learning_rate": 2.6426426426426428e-05,
      "loss": 0.7018,
      "step": 89
    },
    {
      "epoch": 0.02701080432172869,
      "grad_norm": 0.2606968581676483,
      "learning_rate": 2.672672672672673e-05,
      "loss": 0.7069,
      "step": 90
    },
    {
      "epoch": 0.0273109243697479,
      "grad_norm": 0.234344020485878,
      "learning_rate": 2.702702702702703e-05,
      "loss": 0.737,
      "step": 91
    },
    {
      "epoch": 0.027611044417767107,
      "grad_norm": 0.2612946629524231,
      "learning_rate": 2.732732732732733e-05,
      "loss": 0.7118,
      "step": 92
    },
    {
      "epoch": 0.027911164465786314,
      "grad_norm": 0.23396819829940796,
      "learning_rate": 2.762762762762763e-05,
      "loss": 0.6997,
      "step": 93
    },
    {
      "epoch": 0.028211284513805522,
      "grad_norm": 0.2197222113609314,
      "learning_rate": 2.7927927927927926e-05,
      "loss": 0.6989,
      "step": 94
    },
    {
      "epoch": 0.02851140456182473,
      "grad_norm": 0.21125797927379608,
      "learning_rate": 2.8228228228228232e-05,
      "loss": 0.7102,
      "step": 95
    },
    {
      "epoch": 0.028811524609843937,
      "grad_norm": 0.18250249326229095,
      "learning_rate": 2.852852852852853e-05,
      "loss": 0.6388,
      "step": 96
    },
    {
      "epoch": 0.029111644657863145,
      "grad_norm": 0.2123304009437561,
      "learning_rate": 2.882882882882883e-05,
      "loss": 0.7159,
      "step": 97
    },
    {
      "epoch": 0.029411764705882353,
      "grad_norm": 0.19415165483951569,
      "learning_rate": 2.912912912912913e-05,
      "loss": 0.6594,
      "step": 98
    },
    {
      "epoch": 0.02971188475390156,
      "grad_norm": 0.1903056651353836,
      "learning_rate": 2.9429429429429427e-05,
      "loss": 0.712,
      "step": 99
    },
    {
      "epoch": 0.030012004801920768,
      "grad_norm": 0.17540313303470612,
      "learning_rate": 2.9729729729729733e-05,
      "loss": 0.608,
      "step": 100
    },
    {
      "epoch": 0.030312124849939975,
      "grad_norm": 0.1896171122789383,
      "learning_rate": 3.0030030030030033e-05,
      "loss": 0.711,
      "step": 101
    },
    {
      "epoch": 0.030612244897959183,
      "grad_norm": 0.18761886656284332,
      "learning_rate": 3.0330330330330332e-05,
      "loss": 0.6884,
      "step": 102
    },
    {
      "epoch": 0.03091236494597839,
      "grad_norm": 0.20305292308330536,
      "learning_rate": 3.063063063063063e-05,
      "loss": 0.6427,
      "step": 103
    },
    {
      "epoch": 0.031212484993997598,
      "grad_norm": 0.1764855533838272,
      "learning_rate": 3.093093093093093e-05,
      "loss": 0.6063,
      "step": 104
    },
    {
      "epoch": 0.031512605042016806,
      "grad_norm": 0.19730336964130402,
      "learning_rate": 3.123123123123123e-05,
      "loss": 0.6546,
      "step": 105
    },
    {
      "epoch": 0.03181272509003601,
      "grad_norm": 0.24252024292945862,
      "learning_rate": 3.153153153153153e-05,
      "loss": 0.6414,
      "step": 106
    },
    {
      "epoch": 0.03211284513805522,
      "grad_norm": 0.17647583782672882,
      "learning_rate": 3.183183183183183e-05,
      "loss": 0.6475,
      "step": 107
    },
    {
      "epoch": 0.03241296518607443,
      "grad_norm": 0.22339290380477905,
      "learning_rate": 3.213213213213213e-05,
      "loss": 0.6326,
      "step": 108
    },
    {
      "epoch": 0.032713085234093636,
      "grad_norm": 0.18274854123592377,
      "learning_rate": 3.2432432432432436e-05,
      "loss": 0.6822,
      "step": 109
    },
    {
      "epoch": 0.033013205282112844,
      "grad_norm": 0.19241338968276978,
      "learning_rate": 3.2732732732732736e-05,
      "loss": 0.6295,
      "step": 110
    },
    {
      "epoch": 0.03331332533013205,
      "grad_norm": 0.16756710410118103,
      "learning_rate": 3.3033033033033035e-05,
      "loss": 0.5989,
      "step": 111
    },
    {
      "epoch": 0.03361344537815126,
      "grad_norm": 0.1821005493402481,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 0.6185,
      "step": 112
    },
    {
      "epoch": 0.03391356542617047,
      "grad_norm": 0.19916538894176483,
      "learning_rate": 3.3633633633633635e-05,
      "loss": 0.62,
      "step": 113
    },
    {
      "epoch": 0.034213685474189674,
      "grad_norm": 0.18223139643669128,
      "learning_rate": 3.3933933933933934e-05,
      "loss": 0.6281,
      "step": 114
    },
    {
      "epoch": 0.03451380552220888,
      "grad_norm": 0.20026777684688568,
      "learning_rate": 3.4234234234234234e-05,
      "loss": 0.6476,
      "step": 115
    },
    {
      "epoch": 0.03481392557022809,
      "grad_norm": 0.18175727128982544,
      "learning_rate": 3.453453453453453e-05,
      "loss": 0.6849,
      "step": 116
    },
    {
      "epoch": 0.0351140456182473,
      "grad_norm": 0.2390904575586319,
      "learning_rate": 3.483483483483483e-05,
      "loss": 0.6154,
      "step": 117
    },
    {
      "epoch": 0.035414165666266505,
      "grad_norm": 0.16057820618152618,
      "learning_rate": 3.513513513513514e-05,
      "loss": 0.5825,
      "step": 118
    },
    {
      "epoch": 0.03571428571428571,
      "grad_norm": 0.15922337770462036,
      "learning_rate": 3.543543543543544e-05,
      "loss": 0.62,
      "step": 119
    },
    {
      "epoch": 0.03601440576230492,
      "grad_norm": 0.23435887694358826,
      "learning_rate": 3.573573573573574e-05,
      "loss": 0.6075,
      "step": 120
    },
    {
      "epoch": 0.03631452581032413,
      "grad_norm": 0.1574699729681015,
      "learning_rate": 3.603603603603604e-05,
      "loss": 0.6137,
      "step": 121
    },
    {
      "epoch": 0.036614645858343335,
      "grad_norm": 0.18464826047420502,
      "learning_rate": 3.633633633633634e-05,
      "loss": 0.586,
      "step": 122
    },
    {
      "epoch": 0.03691476590636254,
      "grad_norm": 0.19280335307121277,
      "learning_rate": 3.663663663663664e-05,
      "loss": 0.6323,
      "step": 123
    },
    {
      "epoch": 0.03721488595438175,
      "grad_norm": 0.17928992211818695,
      "learning_rate": 3.693693693693694e-05,
      "loss": 0.6187,
      "step": 124
    },
    {
      "epoch": 0.03751500600240096,
      "grad_norm": 0.16199256479740143,
      "learning_rate": 3.7237237237237236e-05,
      "loss": 0.6087,
      "step": 125
    },
    {
      "epoch": 0.037815126050420166,
      "grad_norm": 0.18012242019176483,
      "learning_rate": 3.7537537537537536e-05,
      "loss": 0.6379,
      "step": 126
    },
    {
      "epoch": 0.03811524609843937,
      "grad_norm": 0.16345292329788208,
      "learning_rate": 3.783783783783784e-05,
      "loss": 0.5887,
      "step": 127
    },
    {
      "epoch": 0.03841536614645858,
      "grad_norm": 0.19358745217323303,
      "learning_rate": 3.813813813813814e-05,
      "loss": 0.5929,
      "step": 128
    },
    {
      "epoch": 0.03871548619447779,
      "grad_norm": 0.1952536702156067,
      "learning_rate": 3.843843843843844e-05,
      "loss": 0.6674,
      "step": 129
    },
    {
      "epoch": 0.039015606242496996,
      "grad_norm": 0.16455616056919098,
      "learning_rate": 3.873873873873874e-05,
      "loss": 0.5442,
      "step": 130
    },
    {
      "epoch": 0.039315726290516204,
      "grad_norm": 0.19198055565357208,
      "learning_rate": 3.903903903903904e-05,
      "loss": 0.6454,
      "step": 131
    },
    {
      "epoch": 0.03961584633853541,
      "grad_norm": 0.1654767543077469,
      "learning_rate": 3.933933933933934e-05,
      "loss": 0.6032,
      "step": 132
    },
    {
      "epoch": 0.03991596638655462,
      "grad_norm": 0.17861434817314148,
      "learning_rate": 3.963963963963964e-05,
      "loss": 0.6412,
      "step": 133
    },
    {
      "epoch": 0.040216086434573826,
      "grad_norm": 0.1698954701423645,
      "learning_rate": 3.993993993993994e-05,
      "loss": 0.5854,
      "step": 134
    },
    {
      "epoch": 0.040516206482593034,
      "grad_norm": 0.18049751222133636,
      "learning_rate": 4.024024024024024e-05,
      "loss": 0.6572,
      "step": 135
    },
    {
      "epoch": 0.04081632653061224,
      "grad_norm": 0.15890692174434662,
      "learning_rate": 4.0540540540540545e-05,
      "loss": 0.5808,
      "step": 136
    },
    {
      "epoch": 0.04111644657863145,
      "grad_norm": 0.15884089469909668,
      "learning_rate": 4.0840840840840845e-05,
      "loss": 0.5729,
      "step": 137
    },
    {
      "epoch": 0.04141656662665066,
      "grad_norm": 0.188554048538208,
      "learning_rate": 4.1141141141141144e-05,
      "loss": 0.5937,
      "step": 138
    },
    {
      "epoch": 0.04171668667466987,
      "grad_norm": 0.16957660019397736,
      "learning_rate": 4.1441441441441444e-05,
      "loss": 0.6158,
      "step": 139
    },
    {
      "epoch": 0.04201680672268908,
      "grad_norm": 0.17422978579998016,
      "learning_rate": 4.1741741741741744e-05,
      "loss": 0.6554,
      "step": 140
    },
    {
      "epoch": 0.04231692677070829,
      "grad_norm": 0.16660812497138977,
      "learning_rate": 4.204204204204204e-05,
      "loss": 0.7306,
      "step": 141
    },
    {
      "epoch": 0.042617046818727494,
      "grad_norm": 0.17347297072410583,
      "learning_rate": 4.234234234234234e-05,
      "loss": 0.5936,
      "step": 142
    },
    {
      "epoch": 0.0429171668667467,
      "grad_norm": 0.15610091388225555,
      "learning_rate": 4.264264264264264e-05,
      "loss": 0.5666,
      "step": 143
    },
    {
      "epoch": 0.04321728691476591,
      "grad_norm": 0.1987975686788559,
      "learning_rate": 4.294294294294294e-05,
      "loss": 0.6298,
      "step": 144
    },
    {
      "epoch": 0.04351740696278512,
      "grad_norm": 0.16874830424785614,
      "learning_rate": 4.324324324324325e-05,
      "loss": 0.5792,
      "step": 145
    },
    {
      "epoch": 0.043817527010804325,
      "grad_norm": 0.1750735342502594,
      "learning_rate": 4.354354354354355e-05,
      "loss": 0.5878,
      "step": 146
    },
    {
      "epoch": 0.04411764705882353,
      "grad_norm": 0.1773807853460312,
      "learning_rate": 4.384384384384385e-05,
      "loss": 0.576,
      "step": 147
    },
    {
      "epoch": 0.04441776710684274,
      "grad_norm": 0.1598764806985855,
      "learning_rate": 4.414414414414415e-05,
      "loss": 0.5881,
      "step": 148
    },
    {
      "epoch": 0.04471788715486195,
      "grad_norm": 0.1856532096862793,
      "learning_rate": 4.4444444444444447e-05,
      "loss": 0.5463,
      "step": 149
    },
    {
      "epoch": 0.045018007202881155,
      "grad_norm": 0.16291286051273346,
      "learning_rate": 4.4744744744744746e-05,
      "loss": 0.5138,
      "step": 150
    },
    {
      "epoch": 0.04531812725090036,
      "grad_norm": 0.19795602560043335,
      "learning_rate": 4.5045045045045046e-05,
      "loss": 0.5249,
      "step": 151
    },
    {
      "epoch": 0.04561824729891957,
      "grad_norm": 0.2006046324968338,
      "learning_rate": 4.5345345345345345e-05,
      "loss": 0.6039,
      "step": 152
    },
    {
      "epoch": 0.04591836734693878,
      "grad_norm": 0.17838355898857117,
      "learning_rate": 4.5645645645645645e-05,
      "loss": 0.5634,
      "step": 153
    },
    {
      "epoch": 0.046218487394957986,
      "grad_norm": 0.160551056265831,
      "learning_rate": 4.594594594594595e-05,
      "loss": 0.5825,
      "step": 154
    },
    {
      "epoch": 0.04651860744297719,
      "grad_norm": 0.16036823391914368,
      "learning_rate": 4.624624624624625e-05,
      "loss": 0.54,
      "step": 155
    },
    {
      "epoch": 0.0468187274909964,
      "grad_norm": 0.16031847894191742,
      "learning_rate": 4.654654654654655e-05,
      "loss": 0.5655,
      "step": 156
    },
    {
      "epoch": 0.04711884753901561,
      "grad_norm": 0.17296087741851807,
      "learning_rate": 4.684684684684685e-05,
      "loss": 0.6163,
      "step": 157
    },
    {
      "epoch": 0.047418967587034816,
      "grad_norm": 0.15543188154697418,
      "learning_rate": 4.714714714714715e-05,
      "loss": 0.5416,
      "step": 158
    },
    {
      "epoch": 0.047719087635054024,
      "grad_norm": 0.16270007193088531,
      "learning_rate": 4.744744744744745e-05,
      "loss": 0.5561,
      "step": 159
    },
    {
      "epoch": 0.04801920768307323,
      "grad_norm": 0.15587948262691498,
      "learning_rate": 4.774774774774775e-05,
      "loss": 0.5877,
      "step": 160
    },
    {
      "epoch": 0.04831932773109244,
      "grad_norm": 0.16889938712120056,
      "learning_rate": 4.804804804804805e-05,
      "loss": 0.5613,
      "step": 161
    },
    {
      "epoch": 0.048619447779111646,
      "grad_norm": 0.15977361798286438,
      "learning_rate": 4.834834834834835e-05,
      "loss": 0.5806,
      "step": 162
    },
    {
      "epoch": 0.048919567827130854,
      "grad_norm": 0.1628406047821045,
      "learning_rate": 4.8648648648648654e-05,
      "loss": 0.584,
      "step": 163
    },
    {
      "epoch": 0.04921968787515006,
      "grad_norm": 0.1554487943649292,
      "learning_rate": 4.8948948948948954e-05,
      "loss": 0.5566,
      "step": 164
    },
    {
      "epoch": 0.04951980792316927,
      "grad_norm": 0.16129934787750244,
      "learning_rate": 4.9249249249249253e-05,
      "loss": 0.5373,
      "step": 165
    },
    {
      "epoch": 0.04981992797118848,
      "grad_norm": 0.16680221259593964,
      "learning_rate": 4.954954954954955e-05,
      "loss": 0.5526,
      "step": 166
    },
    {
      "epoch": 0.050120048019207684,
      "grad_norm": 0.22599273920059204,
      "learning_rate": 4.984984984984985e-05,
      "loss": 0.5677,
      "step": 167
    },
    {
      "epoch": 0.05042016806722689,
      "grad_norm": 0.15940923988819122,
      "learning_rate": 5.015015015015015e-05,
      "loss": 0.5585,
      "step": 168
    },
    {
      "epoch": 0.0507202881152461,
      "grad_norm": 0.21803030371665955,
      "learning_rate": 5.0450450450450445e-05,
      "loss": 0.5465,
      "step": 169
    },
    {
      "epoch": 0.05102040816326531,
      "grad_norm": 0.155521959066391,
      "learning_rate": 5.075075075075075e-05,
      "loss": 0.5617,
      "step": 170
    },
    {
      "epoch": 0.051320528211284515,
      "grad_norm": 0.16137102246284485,
      "learning_rate": 5.105105105105106e-05,
      "loss": 0.5666,
      "step": 171
    },
    {
      "epoch": 0.05162064825930372,
      "grad_norm": 0.17119623720645905,
      "learning_rate": 5.135135135135135e-05,
      "loss": 0.5736,
      "step": 172
    },
    {
      "epoch": 0.05192076830732293,
      "grad_norm": 0.1570027768611908,
      "learning_rate": 5.165165165165166e-05,
      "loss": 0.5682,
      "step": 173
    },
    {
      "epoch": 0.05222088835534214,
      "grad_norm": 0.17013399302959442,
      "learning_rate": 5.195195195195195e-05,
      "loss": 0.5806,
      "step": 174
    },
    {
      "epoch": 0.052521008403361345,
      "grad_norm": 0.16840900480747223,
      "learning_rate": 5.2252252252252256e-05,
      "loss": 0.524,
      "step": 175
    },
    {
      "epoch": 0.05282112845138055,
      "grad_norm": 0.19608522951602936,
      "learning_rate": 5.2552552552552556e-05,
      "loss": 0.558,
      "step": 176
    },
    {
      "epoch": 0.05312124849939976,
      "grad_norm": 0.16340655088424683,
      "learning_rate": 5.2852852852852855e-05,
      "loss": 0.5473,
      "step": 177
    },
    {
      "epoch": 0.05342136854741897,
      "grad_norm": 0.17019999027252197,
      "learning_rate": 5.3153153153153155e-05,
      "loss": 0.6102,
      "step": 178
    },
    {
      "epoch": 0.053721488595438176,
      "grad_norm": 0.18055562674999237,
      "learning_rate": 5.345345345345346e-05,
      "loss": 0.5499,
      "step": 179
    },
    {
      "epoch": 0.05402160864345738,
      "grad_norm": 0.18409283459186554,
      "learning_rate": 5.3753753753753754e-05,
      "loss": 0.5542,
      "step": 180
    },
    {
      "epoch": 0.05432172869147659,
      "grad_norm": 0.15948091447353363,
      "learning_rate": 5.405405405405406e-05,
      "loss": 0.5801,
      "step": 181
    },
    {
      "epoch": 0.0546218487394958,
      "grad_norm": 0.15832237899303436,
      "learning_rate": 5.435435435435435e-05,
      "loss": 0.5363,
      "step": 182
    },
    {
      "epoch": 0.054921968787515006,
      "grad_norm": 0.16188354790210724,
      "learning_rate": 5.465465465465466e-05,
      "loss": 0.5323,
      "step": 183
    },
    {
      "epoch": 0.055222088835534214,
      "grad_norm": 0.17373915016651154,
      "learning_rate": 5.4954954954954966e-05,
      "loss": 0.5252,
      "step": 184
    },
    {
      "epoch": 0.05552220888355342,
      "grad_norm": 0.1621847003698349,
      "learning_rate": 5.525525525525526e-05,
      "loss": 0.5449,
      "step": 185
    },
    {
      "epoch": 0.05582232893157263,
      "grad_norm": 0.16235846281051636,
      "learning_rate": 5.555555555555556e-05,
      "loss": 0.5933,
      "step": 186
    },
    {
      "epoch": 0.05612244897959184,
      "grad_norm": 0.20305441319942474,
      "learning_rate": 5.585585585585585e-05,
      "loss": 0.5164,
      "step": 187
    },
    {
      "epoch": 0.056422569027611044,
      "grad_norm": 0.1579139083623886,
      "learning_rate": 5.615615615615616e-05,
      "loss": 0.5778,
      "step": 188
    },
    {
      "epoch": 0.05672268907563025,
      "grad_norm": 0.14820082485675812,
      "learning_rate": 5.6456456456456464e-05,
      "loss": 0.5317,
      "step": 189
    },
    {
      "epoch": 0.05702280912364946,
      "grad_norm": 0.4610205590724945,
      "learning_rate": 5.6756756756756757e-05,
      "loss": 0.5196,
      "step": 190
    },
    {
      "epoch": 0.05732292917166867,
      "grad_norm": 0.1503513902425766,
      "learning_rate": 5.705705705705706e-05,
      "loss": 0.5291,
      "step": 191
    },
    {
      "epoch": 0.057623049219687875,
      "grad_norm": 0.17538698017597198,
      "learning_rate": 5.7357357357357356e-05,
      "loss": 0.5559,
      "step": 192
    },
    {
      "epoch": 0.05792316926770708,
      "grad_norm": 0.17536750435829163,
      "learning_rate": 5.765765765765766e-05,
      "loss": 0.5611,
      "step": 193
    },
    {
      "epoch": 0.05822328931572629,
      "grad_norm": 0.23853377997875214,
      "learning_rate": 5.795795795795796e-05,
      "loss": 0.5556,
      "step": 194
    },
    {
      "epoch": 0.0585234093637455,
      "grad_norm": 0.16671805083751678,
      "learning_rate": 5.825825825825826e-05,
      "loss": 0.5689,
      "step": 195
    },
    {
      "epoch": 0.058823529411764705,
      "grad_norm": 0.18346083164215088,
      "learning_rate": 5.855855855855856e-05,
      "loss": 0.6339,
      "step": 196
    },
    {
      "epoch": 0.05912364945978391,
      "grad_norm": 0.16490010917186737,
      "learning_rate": 5.8858858858858854e-05,
      "loss": 0.5744,
      "step": 197
    },
    {
      "epoch": 0.05942376950780312,
      "grad_norm": 0.15952922403812408,
      "learning_rate": 5.915915915915916e-05,
      "loss": 0.5466,
      "step": 198
    },
    {
      "epoch": 0.05972388955582233,
      "grad_norm": 0.16180749237537384,
      "learning_rate": 5.9459459459459466e-05,
      "loss": 0.5249,
      "step": 199
    },
    {
      "epoch": 0.060024009603841535,
      "grad_norm": 0.1582827866077423,
      "learning_rate": 5.975975975975976e-05,
      "loss": 0.517,
      "step": 200
    },
    {
      "epoch": 0.06032412965186074,
      "grad_norm": 0.17207732796669006,
      "learning_rate": 6.0060060060060066e-05,
      "loss": 0.5024,
      "step": 201
    },
    {
      "epoch": 0.06062424969987995,
      "grad_norm": 0.1727384626865387,
      "learning_rate": 6.0360360360360365e-05,
      "loss": 0.5579,
      "step": 202
    },
    {
      "epoch": 0.06092436974789916,
      "grad_norm": 0.165960893034935,
      "learning_rate": 6.0660660660660665e-05,
      "loss": 0.556,
      "step": 203
    },
    {
      "epoch": 0.061224489795918366,
      "grad_norm": 0.1521759182214737,
      "learning_rate": 6.0960960960960964e-05,
      "loss": 0.5119,
      "step": 204
    },
    {
      "epoch": 0.061524609843937574,
      "grad_norm": 0.2733907401561737,
      "learning_rate": 6.126126126126126e-05,
      "loss": 0.5353,
      "step": 205
    },
    {
      "epoch": 0.06182472989195678,
      "grad_norm": 0.164889395236969,
      "learning_rate": 6.156156156156156e-05,
      "loss": 0.5342,
      "step": 206
    },
    {
      "epoch": 0.06212484993997599,
      "grad_norm": 0.15708684921264648,
      "learning_rate": 6.186186186186186e-05,
      "loss": 0.5417,
      "step": 207
    },
    {
      "epoch": 0.062424969987995196,
      "grad_norm": 0.14472995698451996,
      "learning_rate": 6.216216216216216e-05,
      "loss": 0.4934,
      "step": 208
    },
    {
      "epoch": 0.06272509003601441,
      "grad_norm": 0.168310284614563,
      "learning_rate": 6.246246246246246e-05,
      "loss": 0.521,
      "step": 209
    },
    {
      "epoch": 0.06302521008403361,
      "grad_norm": 0.14763006567955017,
      "learning_rate": 6.276276276276276e-05,
      "loss": 0.5292,
      "step": 210
    },
    {
      "epoch": 0.06332533013205283,
      "grad_norm": 0.14773689210414886,
      "learning_rate": 6.306306306306306e-05,
      "loss": 0.4917,
      "step": 211
    },
    {
      "epoch": 0.06362545018007203,
      "grad_norm": 0.15610907971858978,
      "learning_rate": 6.336336336336337e-05,
      "loss": 0.5573,
      "step": 212
    },
    {
      "epoch": 0.06392557022809124,
      "grad_norm": 0.15105871856212616,
      "learning_rate": 6.366366366366366e-05,
      "loss": 0.5192,
      "step": 213
    },
    {
      "epoch": 0.06422569027611044,
      "grad_norm": 0.15257810056209564,
      "learning_rate": 6.396396396396397e-05,
      "loss": 0.5685,
      "step": 214
    },
    {
      "epoch": 0.06452581032412966,
      "grad_norm": 0.16696257889270782,
      "learning_rate": 6.426426426426426e-05,
      "loss": 0.5774,
      "step": 215
    },
    {
      "epoch": 0.06482593037214886,
      "grad_norm": 0.15298417210578918,
      "learning_rate": 6.456456456456457e-05,
      "loss": 0.547,
      "step": 216
    },
    {
      "epoch": 0.06512605042016807,
      "grad_norm": 0.1644119918346405,
      "learning_rate": 6.486486486486487e-05,
      "loss": 0.5713,
      "step": 217
    },
    {
      "epoch": 0.06542617046818727,
      "grad_norm": 0.21756312251091003,
      "learning_rate": 6.516516516516516e-05,
      "loss": 0.5501,
      "step": 218
    },
    {
      "epoch": 0.06572629051620649,
      "grad_norm": 0.1546732485294342,
      "learning_rate": 6.546546546546547e-05,
      "loss": 0.5478,
      "step": 219
    },
    {
      "epoch": 0.06602641056422569,
      "grad_norm": 0.15699245035648346,
      "learning_rate": 6.576576576576577e-05,
      "loss": 0.5431,
      "step": 220
    },
    {
      "epoch": 0.0663265306122449,
      "grad_norm": 0.1714629828929901,
      "learning_rate": 6.606606606606607e-05,
      "loss": 0.6054,
      "step": 221
    },
    {
      "epoch": 0.0666266506602641,
      "grad_norm": 0.1773165464401245,
      "learning_rate": 6.636636636636637e-05,
      "loss": 0.5467,
      "step": 222
    },
    {
      "epoch": 0.06692677070828332,
      "grad_norm": 0.1482841670513153,
      "learning_rate": 6.666666666666667e-05,
      "loss": 0.5377,
      "step": 223
    },
    {
      "epoch": 0.06722689075630252,
      "grad_norm": 0.17168587446212769,
      "learning_rate": 6.696696696696697e-05,
      "loss": 0.5223,
      "step": 224
    },
    {
      "epoch": 0.06752701080432173,
      "grad_norm": 0.15758070349693298,
      "learning_rate": 6.726726726726727e-05,
      "loss": 0.54,
      "step": 225
    },
    {
      "epoch": 0.06782713085234093,
      "grad_norm": 0.1595706045627594,
      "learning_rate": 6.756756756756757e-05,
      "loss": 0.5675,
      "step": 226
    },
    {
      "epoch": 0.06812725090036015,
      "grad_norm": 0.14920708537101746,
      "learning_rate": 6.786786786786787e-05,
      "loss": 0.5844,
      "step": 227
    },
    {
      "epoch": 0.06842737094837935,
      "grad_norm": 0.15330851078033447,
      "learning_rate": 6.816816816816817e-05,
      "loss": 0.5548,
      "step": 228
    },
    {
      "epoch": 0.06872749099639856,
      "grad_norm": 0.16282516717910767,
      "learning_rate": 6.846846846846847e-05,
      "loss": 0.4937,
      "step": 229
    },
    {
      "epoch": 0.06902761104441776,
      "grad_norm": 0.15990369021892548,
      "learning_rate": 6.876876876876878e-05,
      "loss": 0.5599,
      "step": 230
    },
    {
      "epoch": 0.06932773109243698,
      "grad_norm": 0.14240749180316925,
      "learning_rate": 6.906906906906907e-05,
      "loss": 0.5104,
      "step": 231
    },
    {
      "epoch": 0.06962785114045618,
      "grad_norm": 0.18181869387626648,
      "learning_rate": 6.936936936936938e-05,
      "loss": 0.566,
      "step": 232
    },
    {
      "epoch": 0.0699279711884754,
      "grad_norm": 0.16088823974132538,
      "learning_rate": 6.966966966966967e-05,
      "loss": 0.529,
      "step": 233
    },
    {
      "epoch": 0.0702280912364946,
      "grad_norm": 0.152555450797081,
      "learning_rate": 6.996996996996998e-05,
      "loss": 0.6098,
      "step": 234
    },
    {
      "epoch": 0.07052821128451381,
      "grad_norm": 0.14834022521972656,
      "learning_rate": 7.027027027027028e-05,
      "loss": 0.4863,
      "step": 235
    },
    {
      "epoch": 0.07082833133253301,
      "grad_norm": 0.156663715839386,
      "learning_rate": 7.057057057057056e-05,
      "loss": 0.4761,
      "step": 236
    },
    {
      "epoch": 0.07112845138055222,
      "grad_norm": 0.14352193474769592,
      "learning_rate": 7.087087087087088e-05,
      "loss": 0.55,
      "step": 237
    },
    {
      "epoch": 0.07142857142857142,
      "grad_norm": 0.14808101952075958,
      "learning_rate": 7.117117117117116e-05,
      "loss": 0.5447,
      "step": 238
    },
    {
      "epoch": 0.07172869147659064,
      "grad_norm": 0.15214911103248596,
      "learning_rate": 7.147147147147148e-05,
      "loss": 0.5245,
      "step": 239
    },
    {
      "epoch": 0.07202881152460984,
      "grad_norm": 0.1910678893327713,
      "learning_rate": 7.177177177177178e-05,
      "loss": 0.5181,
      "step": 240
    },
    {
      "epoch": 0.07232893157262905,
      "grad_norm": 0.1260625272989273,
      "learning_rate": 7.207207207207208e-05,
      "loss": 0.474,
      "step": 241
    },
    {
      "epoch": 0.07262905162064826,
      "grad_norm": 0.167415589094162,
      "learning_rate": 7.237237237237238e-05,
      "loss": 0.5691,
      "step": 242
    },
    {
      "epoch": 0.07292917166866747,
      "grad_norm": 0.33259207010269165,
      "learning_rate": 7.267267267267268e-05,
      "loss": 0.5795,
      "step": 243
    },
    {
      "epoch": 0.07322929171668667,
      "grad_norm": 0.13412445783615112,
      "learning_rate": 7.297297297297297e-05,
      "loss": 0.5308,
      "step": 244
    },
    {
      "epoch": 0.07352941176470588,
      "grad_norm": 0.1500299721956253,
      "learning_rate": 7.327327327327327e-05,
      "loss": 0.5355,
      "step": 245
    },
    {
      "epoch": 0.07382953181272509,
      "grad_norm": 0.15848858654499054,
      "learning_rate": 7.357357357357357e-05,
      "loss": 0.5417,
      "step": 246
    },
    {
      "epoch": 0.0741296518607443,
      "grad_norm": 0.1430959850549698,
      "learning_rate": 7.387387387387387e-05,
      "loss": 0.473,
      "step": 247
    },
    {
      "epoch": 0.0744297719087635,
      "grad_norm": 0.14406929910182953,
      "learning_rate": 7.417417417417419e-05,
      "loss": 0.5497,
      "step": 248
    },
    {
      "epoch": 0.07472989195678272,
      "grad_norm": 0.34863534569740295,
      "learning_rate": 7.447447447447447e-05,
      "loss": 0.564,
      "step": 249
    },
    {
      "epoch": 0.07503001200480192,
      "grad_norm": 0.2568702697753906,
      "learning_rate": 7.477477477477479e-05,
      "loss": 0.5526,
      "step": 250
    },
    {
      "epoch": 0.07533013205282113,
      "grad_norm": 0.21536609530448914,
      "learning_rate": 7.507507507507507e-05,
      "loss": 0.5499,
      "step": 251
    },
    {
      "epoch": 0.07563025210084033,
      "grad_norm": 0.14825338125228882,
      "learning_rate": 7.537537537537538e-05,
      "loss": 0.5292,
      "step": 252
    },
    {
      "epoch": 0.07593037214885955,
      "grad_norm": 0.14850740134716034,
      "learning_rate": 7.567567567567568e-05,
      "loss": 0.5686,
      "step": 253
    },
    {
      "epoch": 0.07623049219687875,
      "grad_norm": 0.16904740035533905,
      "learning_rate": 7.597597597597597e-05,
      "loss": 0.5328,
      "step": 254
    },
    {
      "epoch": 0.07653061224489796,
      "grad_norm": 0.15750734508037567,
      "learning_rate": 7.627627627627628e-05,
      "loss": 0.5588,
      "step": 255
    },
    {
      "epoch": 0.07683073229291716,
      "grad_norm": 0.1463199406862259,
      "learning_rate": 7.657657657657657e-05,
      "loss": 0.5541,
      "step": 256
    },
    {
      "epoch": 0.07713085234093638,
      "grad_norm": 0.7012650370597839,
      "learning_rate": 7.687687687687688e-05,
      "loss": 0.535,
      "step": 257
    },
    {
      "epoch": 0.07743097238895558,
      "grad_norm": 0.14505982398986816,
      "learning_rate": 7.717717717717718e-05,
      "loss": 0.5628,
      "step": 258
    },
    {
      "epoch": 0.07773109243697479,
      "grad_norm": 0.13912923634052277,
      "learning_rate": 7.747747747747748e-05,
      "loss": 0.5189,
      "step": 259
    },
    {
      "epoch": 0.07803121248499399,
      "grad_norm": 0.1794511377811432,
      "learning_rate": 7.777777777777778e-05,
      "loss": 0.5081,
      "step": 260
    },
    {
      "epoch": 0.0783313325330132,
      "grad_norm": 1.2760982513427734,
      "learning_rate": 7.807807807807808e-05,
      "loss": 0.6025,
      "step": 261
    },
    {
      "epoch": 0.07863145258103241,
      "grad_norm": 0.14869123697280884,
      "learning_rate": 7.837837837837838e-05,
      "loss": 0.5255,
      "step": 262
    },
    {
      "epoch": 0.07893157262905162,
      "grad_norm": 0.15125605463981628,
      "learning_rate": 7.867867867867868e-05,
      "loss": 0.5463,
      "step": 263
    },
    {
      "epoch": 0.07923169267707082,
      "grad_norm": 0.1474657654762268,
      "learning_rate": 7.897897897897898e-05,
      "loss": 0.5469,
      "step": 264
    },
    {
      "epoch": 0.07953181272509004,
      "grad_norm": 0.14748071134090424,
      "learning_rate": 7.927927927927928e-05,
      "loss": 0.5293,
      "step": 265
    },
    {
      "epoch": 0.07983193277310924,
      "grad_norm": 0.14991365373134613,
      "learning_rate": 7.957957957957959e-05,
      "loss": 0.5938,
      "step": 266
    },
    {
      "epoch": 0.08013205282112845,
      "grad_norm": 0.15257029235363007,
      "learning_rate": 7.987987987987988e-05,
      "loss": 0.5606,
      "step": 267
    },
    {
      "epoch": 0.08043217286914765,
      "grad_norm": 0.13995935022830963,
      "learning_rate": 8.018018018018019e-05,
      "loss": 0.525,
      "step": 268
    },
    {
      "epoch": 0.08073229291716687,
      "grad_norm": 0.13580486178398132,
      "learning_rate": 8.048048048048048e-05,
      "loss": 0.5221,
      "step": 269
    },
    {
      "epoch": 0.08103241296518607,
      "grad_norm": 0.137712761759758,
      "learning_rate": 8.078078078078079e-05,
      "loss": 0.4955,
      "step": 270
    },
    {
      "epoch": 0.08133253301320528,
      "grad_norm": 0.14473161101341248,
      "learning_rate": 8.108108108108109e-05,
      "loss": 0.5652,
      "step": 271
    },
    {
      "epoch": 0.08163265306122448,
      "grad_norm": 0.1528928130865097,
      "learning_rate": 8.138138138138138e-05,
      "loss": 0.538,
      "step": 272
    },
    {
      "epoch": 0.0819327731092437,
      "grad_norm": 0.5879867672920227,
      "learning_rate": 8.168168168168169e-05,
      "loss": 0.523,
      "step": 273
    },
    {
      "epoch": 0.0822328931572629,
      "grad_norm": 0.14671894907951355,
      "learning_rate": 8.198198198198198e-05,
      "loss": 0.5658,
      "step": 274
    },
    {
      "epoch": 0.08253301320528211,
      "grad_norm": 0.23328697681427002,
      "learning_rate": 8.228228228228229e-05,
      "loss": 0.5037,
      "step": 275
    },
    {
      "epoch": 0.08283313325330131,
      "grad_norm": 0.1431223601102829,
      "learning_rate": 8.258258258258259e-05,
      "loss": 0.5596,
      "step": 276
    },
    {
      "epoch": 0.08313325330132053,
      "grad_norm": 0.13177427649497986,
      "learning_rate": 8.288288288288289e-05,
      "loss": 0.4746,
      "step": 277
    },
    {
      "epoch": 0.08343337334933974,
      "grad_norm": 0.13189871609210968,
      "learning_rate": 8.318318318318319e-05,
      "loss": 0.4897,
      "step": 278
    },
    {
      "epoch": 0.08373349339735894,
      "grad_norm": 0.12248660624027252,
      "learning_rate": 8.348348348348349e-05,
      "loss": 0.4442,
      "step": 279
    },
    {
      "epoch": 0.08403361344537816,
      "grad_norm": 0.13041585683822632,
      "learning_rate": 8.378378378378379e-05,
      "loss": 0.5096,
      "step": 280
    },
    {
      "epoch": 0.08433373349339736,
      "grad_norm": 0.13134099543094635,
      "learning_rate": 8.408408408408409e-05,
      "loss": 0.506,
      "step": 281
    },
    {
      "epoch": 0.08463385354141657,
      "grad_norm": 0.1440073847770691,
      "learning_rate": 8.438438438438439e-05,
      "loss": 0.5683,
      "step": 282
    },
    {
      "epoch": 0.08493397358943577,
      "grad_norm": 0.1371690332889557,
      "learning_rate": 8.468468468468469e-05,
      "loss": 0.5403,
      "step": 283
    },
    {
      "epoch": 0.08523409363745499,
      "grad_norm": 0.1403842717409134,
      "learning_rate": 8.4984984984985e-05,
      "loss": 0.5708,
      "step": 284
    },
    {
      "epoch": 0.08553421368547419,
      "grad_norm": 0.13641871511936188,
      "learning_rate": 8.528528528528528e-05,
      "loss": 0.5176,
      "step": 285
    },
    {
      "epoch": 0.0858343337334934,
      "grad_norm": 0.12627846002578735,
      "learning_rate": 8.55855855855856e-05,
      "loss": 0.4752,
      "step": 286
    },
    {
      "epoch": 0.0861344537815126,
      "grad_norm": 0.1365559846162796,
      "learning_rate": 8.588588588588588e-05,
      "loss": 0.5224,
      "step": 287
    },
    {
      "epoch": 0.08643457382953182,
      "grad_norm": 0.1484965831041336,
      "learning_rate": 8.61861861861862e-05,
      "loss": 0.5857,
      "step": 288
    },
    {
      "epoch": 0.08673469387755102,
      "grad_norm": 0.13150005042552948,
      "learning_rate": 8.64864864864865e-05,
      "loss": 0.5204,
      "step": 289
    },
    {
      "epoch": 0.08703481392557023,
      "grad_norm": 0.13024255633354187,
      "learning_rate": 8.678678678678678e-05,
      "loss": 0.4807,
      "step": 290
    },
    {
      "epoch": 0.08733493397358943,
      "grad_norm": 0.1377982497215271,
      "learning_rate": 8.70870870870871e-05,
      "loss": 0.582,
      "step": 291
    },
    {
      "epoch": 0.08763505402160865,
      "grad_norm": 0.13690394163131714,
      "learning_rate": 8.738738738738738e-05,
      "loss": 0.5142,
      "step": 292
    },
    {
      "epoch": 0.08793517406962785,
      "grad_norm": 0.1395886093378067,
      "learning_rate": 8.76876876876877e-05,
      "loss": 0.5291,
      "step": 293
    },
    {
      "epoch": 0.08823529411764706,
      "grad_norm": 0.174406498670578,
      "learning_rate": 8.7987987987988e-05,
      "loss": 0.5506,
      "step": 294
    },
    {
      "epoch": 0.08853541416566627,
      "grad_norm": 0.14555053412914276,
      "learning_rate": 8.82882882882883e-05,
      "loss": 0.5302,
      "step": 295
    },
    {
      "epoch": 0.08883553421368548,
      "grad_norm": 0.13706374168395996,
      "learning_rate": 8.85885885885886e-05,
      "loss": 0.529,
      "step": 296
    },
    {
      "epoch": 0.08913565426170468,
      "grad_norm": 0.16008315980434418,
      "learning_rate": 8.888888888888889e-05,
      "loss": 0.6128,
      "step": 297
    },
    {
      "epoch": 0.0894357743097239,
      "grad_norm": 0.12834343314170837,
      "learning_rate": 8.918918918918919e-05,
      "loss": 0.5424,
      "step": 298
    },
    {
      "epoch": 0.0897358943577431,
      "grad_norm": 0.15433359146118164,
      "learning_rate": 8.948948948948949e-05,
      "loss": 0.5354,
      "step": 299
    },
    {
      "epoch": 0.09003601440576231,
      "grad_norm": 0.1307957023382187,
      "learning_rate": 8.978978978978979e-05,
      "loss": 0.4998,
      "step": 300
    },
    {
      "epoch": 0.09033613445378151,
      "grad_norm": 0.12451066076755524,
      "learning_rate": 9.009009009009009e-05,
      "loss": 0.4458,
      "step": 301
    },
    {
      "epoch": 0.09063625450180073,
      "grad_norm": 0.1300276517868042,
      "learning_rate": 9.039039039039039e-05,
      "loss": 0.5064,
      "step": 302
    },
    {
      "epoch": 0.09093637454981993,
      "grad_norm": 0.19848628342151642,
      "learning_rate": 9.069069069069069e-05,
      "loss": 0.5372,
      "step": 303
    },
    {
      "epoch": 0.09123649459783914,
      "grad_norm": 0.12379094213247299,
      "learning_rate": 9.0990990990991e-05,
      "loss": 0.5418,
      "step": 304
    },
    {
      "epoch": 0.09153661464585834,
      "grad_norm": 0.1259545385837555,
      "learning_rate": 9.129129129129129e-05,
      "loss": 0.4933,
      "step": 305
    },
    {
      "epoch": 0.09183673469387756,
      "grad_norm": 0.14117270708084106,
      "learning_rate": 9.15915915915916e-05,
      "loss": 0.5039,
      "step": 306
    },
    {
      "epoch": 0.09213685474189676,
      "grad_norm": 0.16921882331371307,
      "learning_rate": 9.18918918918919e-05,
      "loss": 0.5283,
      "step": 307
    },
    {
      "epoch": 0.09243697478991597,
      "grad_norm": 0.1352926343679428,
      "learning_rate": 9.219219219219219e-05,
      "loss": 0.529,
      "step": 308
    },
    {
      "epoch": 0.09273709483793517,
      "grad_norm": 0.12800848484039307,
      "learning_rate": 9.24924924924925e-05,
      "loss": 0.5578,
      "step": 309
    },
    {
      "epoch": 0.09303721488595439,
      "grad_norm": 0.12968765199184418,
      "learning_rate": 9.279279279279279e-05,
      "loss": 0.498,
      "step": 310
    },
    {
      "epoch": 0.09333733493397359,
      "grad_norm": 0.13185739517211914,
      "learning_rate": 9.30930930930931e-05,
      "loss": 0.5547,
      "step": 311
    },
    {
      "epoch": 0.0936374549819928,
      "grad_norm": 0.1271120011806488,
      "learning_rate": 9.33933933933934e-05,
      "loss": 0.5198,
      "step": 312
    },
    {
      "epoch": 0.093937575030012,
      "grad_norm": 0.13291484117507935,
      "learning_rate": 9.36936936936937e-05,
      "loss": 0.5277,
      "step": 313
    },
    {
      "epoch": 0.09423769507803122,
      "grad_norm": 0.1287054568529129,
      "learning_rate": 9.3993993993994e-05,
      "loss": 0.5788,
      "step": 314
    },
    {
      "epoch": 0.09453781512605042,
      "grad_norm": 0.13590851426124573,
      "learning_rate": 9.42942942942943e-05,
      "loss": 0.5443,
      "step": 315
    },
    {
      "epoch": 0.09483793517406963,
      "grad_norm": 0.12829774618148804,
      "learning_rate": 9.45945945945946e-05,
      "loss": 0.4761,
      "step": 316
    },
    {
      "epoch": 0.09513805522208883,
      "grad_norm": 0.12990467250347137,
      "learning_rate": 9.48948948948949e-05,
      "loss": 0.5363,
      "step": 317
    },
    {
      "epoch": 0.09543817527010805,
      "grad_norm": 0.12929613888263702,
      "learning_rate": 9.51951951951952e-05,
      "loss": 0.5325,
      "step": 318
    },
    {
      "epoch": 0.09573829531812725,
      "grad_norm": 0.12171660363674164,
      "learning_rate": 9.54954954954955e-05,
      "loss": 0.4698,
      "step": 319
    },
    {
      "epoch": 0.09603841536614646,
      "grad_norm": 0.12339114397764206,
      "learning_rate": 9.57957957957958e-05,
      "loss": 0.512,
      "step": 320
    },
    {
      "epoch": 0.09633853541416566,
      "grad_norm": 0.12217818200588226,
      "learning_rate": 9.60960960960961e-05,
      "loss": 0.4688,
      "step": 321
    },
    {
      "epoch": 0.09663865546218488,
      "grad_norm": 0.1298341602087021,
      "learning_rate": 9.639639639639641e-05,
      "loss": 0.5217,
      "step": 322
    },
    {
      "epoch": 0.09693877551020408,
      "grad_norm": 0.1366010457277298,
      "learning_rate": 9.66966966966967e-05,
      "loss": 0.5411,
      "step": 323
    },
    {
      "epoch": 0.09723889555822329,
      "grad_norm": 0.11594495177268982,
      "learning_rate": 9.699699699699701e-05,
      "loss": 0.457,
      "step": 324
    },
    {
      "epoch": 0.0975390156062425,
      "grad_norm": 0.12001260370016098,
      "learning_rate": 9.729729729729731e-05,
      "loss": 0.4871,
      "step": 325
    },
    {
      "epoch": 0.09783913565426171,
      "grad_norm": 0.1352197229862213,
      "learning_rate": 9.75975975975976e-05,
      "loss": 0.5627,
      "step": 326
    },
    {
      "epoch": 0.09813925570228091,
      "grad_norm": 0.14736923575401306,
      "learning_rate": 9.789789789789791e-05,
      "loss": 0.4938,
      "step": 327
    },
    {
      "epoch": 0.09843937575030012,
      "grad_norm": 0.13322827219963074,
      "learning_rate": 9.81981981981982e-05,
      "loss": 0.5221,
      "step": 328
    },
    {
      "epoch": 0.09873949579831932,
      "grad_norm": 0.1316617876291275,
      "learning_rate": 9.849849849849851e-05,
      "loss": 0.4848,
      "step": 329
    },
    {
      "epoch": 0.09903961584633854,
      "grad_norm": 0.12125842273235321,
      "learning_rate": 9.87987987987988e-05,
      "loss": 0.5035,
      "step": 330
    },
    {
      "epoch": 0.09933973589435774,
      "grad_norm": 0.13165231049060822,
      "learning_rate": 9.90990990990991e-05,
      "loss": 0.4913,
      "step": 331
    },
    {
      "epoch": 0.09963985594237695,
      "grad_norm": 0.14205414056777954,
      "learning_rate": 9.93993993993994e-05,
      "loss": 0.4759,
      "step": 332
    },
    {
      "epoch": 0.09993997599039615,
      "grad_norm": 0.8694428205490112,
      "learning_rate": 9.96996996996997e-05,
      "loss": 0.527,
      "step": 333
    },
    {
      "epoch": 0.10024009603841537,
      "grad_norm": 0.13224342465400696,
      "learning_rate": 0.0001,
      "loss": 0.4765,
      "step": 334
    },
    {
      "epoch": 0.10054021608643457,
      "grad_norm": 0.12250874191522598,
      "learning_rate": 0.0001003003003003003,
      "loss": 0.4789,
      "step": 335
    },
    {
      "epoch": 0.10084033613445378,
      "grad_norm": 0.2647605836391449,
      "learning_rate": 0.00010060060060060062,
      "loss": 0.5888,
      "step": 336
    },
    {
      "epoch": 0.10114045618247298,
      "grad_norm": 0.16100604832172394,
      "learning_rate": 0.00010090090090090089,
      "loss": 0.4987,
      "step": 337
    },
    {
      "epoch": 0.1014405762304922,
      "grad_norm": 0.1282327026128769,
      "learning_rate": 0.0001012012012012012,
      "loss": 0.542,
      "step": 338
    },
    {
      "epoch": 0.1017406962785114,
      "grad_norm": 0.1321956217288971,
      "learning_rate": 0.0001015015015015015,
      "loss": 0.4768,
      "step": 339
    },
    {
      "epoch": 0.10204081632653061,
      "grad_norm": 0.1253899186849594,
      "learning_rate": 0.00010180180180180182,
      "loss": 0.5044,
      "step": 340
    },
    {
      "epoch": 0.10234093637454982,
      "grad_norm": 0.12141785025596619,
      "learning_rate": 0.00010210210210210212,
      "loss": 0.4857,
      "step": 341
    },
    {
      "epoch": 0.10264105642256903,
      "grad_norm": 0.12600919604301453,
      "learning_rate": 0.0001024024024024024,
      "loss": 0.5238,
      "step": 342
    },
    {
      "epoch": 0.10294117647058823,
      "grad_norm": 0.12873877584934235,
      "learning_rate": 0.0001027027027027027,
      "loss": 0.4718,
      "step": 343
    },
    {
      "epoch": 0.10324129651860744,
      "grad_norm": 0.11940476298332214,
      "learning_rate": 0.000103003003003003,
      "loss": 0.5153,
      "step": 344
    },
    {
      "epoch": 0.10354141656662665,
      "grad_norm": 1.817137598991394,
      "learning_rate": 0.00010330330330330331,
      "loss": 0.4968,
      "step": 345
    },
    {
      "epoch": 0.10384153661464586,
      "grad_norm": 0.13736383616924286,
      "learning_rate": 0.00010360360360360361,
      "loss": 0.4911,
      "step": 346
    },
    {
      "epoch": 0.10414165666266506,
      "grad_norm": 0.13706469535827637,
      "learning_rate": 0.0001039039039039039,
      "loss": 0.5602,
      "step": 347
    },
    {
      "epoch": 0.10444177671068428,
      "grad_norm": 0.11970093101263046,
      "learning_rate": 0.0001042042042042042,
      "loss": 0.4455,
      "step": 348
    },
    {
      "epoch": 0.10474189675870348,
      "grad_norm": 0.12620647251605988,
      "learning_rate": 0.00010450450450450451,
      "loss": 0.4818,
      "step": 349
    },
    {
      "epoch": 0.10504201680672269,
      "grad_norm": 0.1250682771205902,
      "learning_rate": 0.00010480480480480481,
      "loss": 0.536,
      "step": 350
    },
    {
      "epoch": 0.10534213685474189,
      "grad_norm": 0.1511351764202118,
      "learning_rate": 0.00010510510510510511,
      "loss": 0.5285,
      "step": 351
    },
    {
      "epoch": 0.1056422569027611,
      "grad_norm": 0.19505727291107178,
      "learning_rate": 0.0001054054054054054,
      "loss": 0.4803,
      "step": 352
    },
    {
      "epoch": 0.1059423769507803,
      "grad_norm": 0.12491302192211151,
      "learning_rate": 0.00010570570570570571,
      "loss": 0.5079,
      "step": 353
    },
    {
      "epoch": 0.10624249699879952,
      "grad_norm": 0.12418210506439209,
      "learning_rate": 0.00010600600600600601,
      "loss": 0.518,
      "step": 354
    },
    {
      "epoch": 0.10654261704681872,
      "grad_norm": 0.13729716837406158,
      "learning_rate": 0.00010630630630630631,
      "loss": 0.5257,
      "step": 355
    },
    {
      "epoch": 0.10684273709483794,
      "grad_norm": 0.12184851616621017,
      "learning_rate": 0.00010660660660660662,
      "loss": 0.5152,
      "step": 356
    },
    {
      "epoch": 0.10714285714285714,
      "grad_norm": 0.12144186347723007,
      "learning_rate": 0.00010690690690690692,
      "loss": 0.4442,
      "step": 357
    },
    {
      "epoch": 0.10744297719087635,
      "grad_norm": 0.12210109084844589,
      "learning_rate": 0.00010720720720720721,
      "loss": 0.4776,
      "step": 358
    },
    {
      "epoch": 0.10774309723889555,
      "grad_norm": 0.12035097181797028,
      "learning_rate": 0.00010750750750750751,
      "loss": 0.5004,
      "step": 359
    },
    {
      "epoch": 0.10804321728691477,
      "grad_norm": 0.4249653220176697,
      "learning_rate": 0.00010780780780780782,
      "loss": 0.5689,
      "step": 360
    },
    {
      "epoch": 0.10834333733493397,
      "grad_norm": 0.1250094324350357,
      "learning_rate": 0.00010810810810810812,
      "loss": 0.5051,
      "step": 361
    },
    {
      "epoch": 0.10864345738295318,
      "grad_norm": 0.12944050133228302,
      "learning_rate": 0.00010840840840840842,
      "loss": 0.5163,
      "step": 362
    },
    {
      "epoch": 0.10894357743097238,
      "grad_norm": 0.17619773745536804,
      "learning_rate": 0.0001087087087087087,
      "loss": 0.4903,
      "step": 363
    },
    {
      "epoch": 0.1092436974789916,
      "grad_norm": 0.13504037261009216,
      "learning_rate": 0.000109009009009009,
      "loss": 0.5294,
      "step": 364
    },
    {
      "epoch": 0.1095438175270108,
      "grad_norm": 0.14919887483119965,
      "learning_rate": 0.00010930930930930932,
      "loss": 0.5045,
      "step": 365
    },
    {
      "epoch": 0.10984393757503001,
      "grad_norm": 0.12428752332925797,
      "learning_rate": 0.00010960960960960962,
      "loss": 0.5324,
      "step": 366
    },
    {
      "epoch": 0.11014405762304921,
      "grad_norm": 0.1244334876537323,
      "learning_rate": 0.00010990990990990993,
      "loss": 0.471,
      "step": 367
    },
    {
      "epoch": 0.11044417767106843,
      "grad_norm": 0.12586592137813568,
      "learning_rate": 0.0001102102102102102,
      "loss": 0.5102,
      "step": 368
    },
    {
      "epoch": 0.11074429771908763,
      "grad_norm": 0.11768530309200287,
      "learning_rate": 0.00011051051051051052,
      "loss": 0.5072,
      "step": 369
    },
    {
      "epoch": 0.11104441776710684,
      "grad_norm": 0.17226853966712952,
      "learning_rate": 0.00011081081081081082,
      "loss": 0.4927,
      "step": 370
    },
    {
      "epoch": 0.11134453781512606,
      "grad_norm": 0.12605972588062286,
      "learning_rate": 0.00011111111111111112,
      "loss": 0.5093,
      "step": 371
    },
    {
      "epoch": 0.11164465786314526,
      "grad_norm": 0.13863171637058258,
      "learning_rate": 0.00011141141141141143,
      "loss": 0.5109,
      "step": 372
    },
    {
      "epoch": 0.11194477791116447,
      "grad_norm": 0.11750851571559906,
      "learning_rate": 0.0001117117117117117,
      "loss": 0.509,
      "step": 373
    },
    {
      "epoch": 0.11224489795918367,
      "grad_norm": 0.12177072465419769,
      "learning_rate": 0.00011201201201201202,
      "loss": 0.4714,
      "step": 374
    },
    {
      "epoch": 0.11254501800720289,
      "grad_norm": 0.12073265016078949,
      "learning_rate": 0.00011231231231231231,
      "loss": 0.5356,
      "step": 375
    },
    {
      "epoch": 0.11284513805522209,
      "grad_norm": 0.1178957149386406,
      "learning_rate": 0.00011261261261261263,
      "loss": 0.5507,
      "step": 376
    },
    {
      "epoch": 0.1131452581032413,
      "grad_norm": 0.13189667463302612,
      "learning_rate": 0.00011291291291291293,
      "loss": 0.5367,
      "step": 377
    },
    {
      "epoch": 0.1134453781512605,
      "grad_norm": 0.12463075667619705,
      "learning_rate": 0.00011321321321321321,
      "loss": 0.5351,
      "step": 378
    },
    {
      "epoch": 0.11374549819927972,
      "grad_norm": 0.11254709959030151,
      "learning_rate": 0.00011351351351351351,
      "loss": 0.4595,
      "step": 379
    },
    {
      "epoch": 0.11404561824729892,
      "grad_norm": 0.1292826384305954,
      "learning_rate": 0.00011381381381381381,
      "loss": 0.4878,
      "step": 380
    },
    {
      "epoch": 0.11434573829531813,
      "grad_norm": 0.12284654378890991,
      "learning_rate": 0.00011411411411411413,
      "loss": 0.499,
      "step": 381
    },
    {
      "epoch": 0.11464585834333733,
      "grad_norm": 0.12618692219257355,
      "learning_rate": 0.00011441441441441443,
      "loss": 0.5046,
      "step": 382
    },
    {
      "epoch": 0.11494597839135655,
      "grad_norm": 0.123695969581604,
      "learning_rate": 0.00011471471471471471,
      "loss": 0.5389,
      "step": 383
    },
    {
      "epoch": 0.11524609843937575,
      "grad_norm": 0.11941318213939667,
      "learning_rate": 0.00011501501501501501,
      "loss": 0.5136,
      "step": 384
    },
    {
      "epoch": 0.11554621848739496,
      "grad_norm": 0.12415286153554916,
      "learning_rate": 0.00011531531531531532,
      "loss": 0.5181,
      "step": 385
    },
    {
      "epoch": 0.11584633853541416,
      "grad_norm": 0.11182846873998642,
      "learning_rate": 0.00011561561561561562,
      "loss": 0.47,
      "step": 386
    },
    {
      "epoch": 0.11614645858343338,
      "grad_norm": 0.21233585476875305,
      "learning_rate": 0.00011591591591591592,
      "loss": 0.4958,
      "step": 387
    },
    {
      "epoch": 0.11644657863145258,
      "grad_norm": 0.11492254585027695,
      "learning_rate": 0.00011621621621621621,
      "loss": 0.4552,
      "step": 388
    },
    {
      "epoch": 0.1167466986794718,
      "grad_norm": 0.12788830697536469,
      "learning_rate": 0.00011651651651651652,
      "loss": 0.5562,
      "step": 389
    },
    {
      "epoch": 0.117046818727491,
      "grad_norm": 0.12814950942993164,
      "learning_rate": 0.00011681681681681682,
      "loss": 0.4751,
      "step": 390
    },
    {
      "epoch": 0.11734693877551021,
      "grad_norm": 0.11689490079879761,
      "learning_rate": 0.00011711711711711712,
      "loss": 0.5067,
      "step": 391
    },
    {
      "epoch": 0.11764705882352941,
      "grad_norm": 0.11457665264606476,
      "learning_rate": 0.00011741741741741743,
      "loss": 0.5146,
      "step": 392
    },
    {
      "epoch": 0.11794717887154862,
      "grad_norm": 0.1177125945687294,
      "learning_rate": 0.00011771771771771771,
      "loss": 0.5285,
      "step": 393
    },
    {
      "epoch": 0.11824729891956783,
      "grad_norm": 0.11953035742044449,
      "learning_rate": 0.00011801801801801802,
      "loss": 0.5156,
      "step": 394
    },
    {
      "epoch": 0.11854741896758704,
      "grad_norm": 0.11999611556529999,
      "learning_rate": 0.00011831831831831832,
      "loss": 0.5099,
      "step": 395
    },
    {
      "epoch": 0.11884753901560624,
      "grad_norm": 0.29343682527542114,
      "learning_rate": 0.00011861861861861863,
      "loss": 0.528,
      "step": 396
    },
    {
      "epoch": 0.11914765906362546,
      "grad_norm": 0.15137045085430145,
      "learning_rate": 0.00011891891891891893,
      "loss": 0.5343,
      "step": 397
    },
    {
      "epoch": 0.11944777911164466,
      "grad_norm": 0.12200610339641571,
      "learning_rate": 0.00011921921921921923,
      "loss": 0.4978,
      "step": 398
    },
    {
      "epoch": 0.11974789915966387,
      "grad_norm": 0.11575727164745331,
      "learning_rate": 0.00011951951951951952,
      "loss": 0.5404,
      "step": 399
    },
    {
      "epoch": 0.12004801920768307,
      "grad_norm": 0.12000302225351334,
      "learning_rate": 0.00011981981981981982,
      "loss": 0.4837,
      "step": 400
    },
    {
      "epoch": 0.12034813925570229,
      "grad_norm": 0.13822582364082336,
      "learning_rate": 0.00012012012012012013,
      "loss": 0.5004,
      "step": 401
    },
    {
      "epoch": 0.12064825930372149,
      "grad_norm": 0.14978677034378052,
      "learning_rate": 0.00012042042042042043,
      "loss": 0.5117,
      "step": 402
    },
    {
      "epoch": 0.1209483793517407,
      "grad_norm": 0.12508971989154816,
      "learning_rate": 0.00012072072072072073,
      "loss": 0.5458,
      "step": 403
    },
    {
      "epoch": 0.1212484993997599,
      "grad_norm": 0.12150216102600098,
      "learning_rate": 0.00012102102102102102,
      "loss": 0.5195,
      "step": 404
    },
    {
      "epoch": 0.12154861944777912,
      "grad_norm": 0.147630512714386,
      "learning_rate": 0.00012132132132132133,
      "loss": 0.5179,
      "step": 405
    },
    {
      "epoch": 0.12184873949579832,
      "grad_norm": 0.11386080831289291,
      "learning_rate": 0.00012162162162162163,
      "loss": 0.5021,
      "step": 406
    },
    {
      "epoch": 0.12214885954381753,
      "grad_norm": 0.11933228373527527,
      "learning_rate": 0.00012192192192192193,
      "loss": 0.5113,
      "step": 407
    },
    {
      "epoch": 0.12244897959183673,
      "grad_norm": 0.16394619643688202,
      "learning_rate": 0.00012222222222222224,
      "loss": 0.4879,
      "step": 408
    },
    {
      "epoch": 0.12274909963985595,
      "grad_norm": 0.12113256007432938,
      "learning_rate": 0.00012252252252252253,
      "loss": 0.4934,
      "step": 409
    },
    {
      "epoch": 0.12304921968787515,
      "grad_norm": 0.11715859174728394,
      "learning_rate": 0.00012282282282282281,
      "loss": 0.4911,
      "step": 410
    },
    {
      "epoch": 0.12334933973589436,
      "grad_norm": 0.1113106906414032,
      "learning_rate": 0.00012312312312312313,
      "loss": 0.443,
      "step": 411
    },
    {
      "epoch": 0.12364945978391356,
      "grad_norm": 0.11617186665534973,
      "learning_rate": 0.00012342342342342344,
      "loss": 0.5064,
      "step": 412
    },
    {
      "epoch": 0.12394957983193278,
      "grad_norm": 0.12112707644701004,
      "learning_rate": 0.00012372372372372373,
      "loss": 0.5287,
      "step": 413
    },
    {
      "epoch": 0.12424969987995198,
      "grad_norm": 0.12584644556045532,
      "learning_rate": 0.000124024024024024,
      "loss": 0.4628,
      "step": 414
    },
    {
      "epoch": 0.12454981992797119,
      "grad_norm": 0.12498998641967773,
      "learning_rate": 0.00012432432432432433,
      "loss": 0.524,
      "step": 415
    },
    {
      "epoch": 0.12484993997599039,
      "grad_norm": 0.12376196682453156,
      "learning_rate": 0.00012462462462462464,
      "loss": 0.5042,
      "step": 416
    },
    {
      "epoch": 0.1251500600240096,
      "grad_norm": 0.11971154063940048,
      "learning_rate": 0.00012492492492492492,
      "loss": 0.5125,
      "step": 417
    },
    {
      "epoch": 0.12545018007202882,
      "grad_norm": 0.12637357413768768,
      "learning_rate": 0.00012522522522522524,
      "loss": 0.5476,
      "step": 418
    },
    {
      "epoch": 0.125750300120048,
      "grad_norm": 0.11340111494064331,
      "learning_rate": 0.00012552552552552552,
      "loss": 0.4854,
      "step": 419
    },
    {
      "epoch": 0.12605042016806722,
      "grad_norm": 0.13555923104286194,
      "learning_rate": 0.00012582582582582584,
      "loss": 0.5619,
      "step": 420
    },
    {
      "epoch": 0.12635054021608644,
      "grad_norm": 0.11894300580024719,
      "learning_rate": 0.00012612612612612612,
      "loss": 0.518,
      "step": 421
    },
    {
      "epoch": 0.12665066026410565,
      "grad_norm": 0.1221066564321518,
      "learning_rate": 0.00012642642642642644,
      "loss": 0.4481,
      "step": 422
    },
    {
      "epoch": 0.12695078031212484,
      "grad_norm": 0.1434706300497055,
      "learning_rate": 0.00012672672672672675,
      "loss": 0.4978,
      "step": 423
    },
    {
      "epoch": 0.12725090036014405,
      "grad_norm": 0.11965423077344894,
      "learning_rate": 0.00012702702702702703,
      "loss": 0.4808,
      "step": 424
    },
    {
      "epoch": 0.12755102040816327,
      "grad_norm": 0.13010963797569275,
      "learning_rate": 0.00012732732732732732,
      "loss": 0.5166,
      "step": 425
    },
    {
      "epoch": 0.12785114045618248,
      "grad_norm": 0.11788228154182434,
      "learning_rate": 0.00012762762762762763,
      "loss": 0.4836,
      "step": 426
    },
    {
      "epoch": 0.12815126050420167,
      "grad_norm": 0.11451204121112823,
      "learning_rate": 0.00012792792792792795,
      "loss": 0.5056,
      "step": 427
    },
    {
      "epoch": 0.12845138055222088,
      "grad_norm": 0.11950941383838654,
      "learning_rate": 0.00012822822822822823,
      "loss": 0.492,
      "step": 428
    },
    {
      "epoch": 0.1287515006002401,
      "grad_norm": 0.11622516065835953,
      "learning_rate": 0.00012852852852852852,
      "loss": 0.5266,
      "step": 429
    },
    {
      "epoch": 0.1290516206482593,
      "grad_norm": 0.10531352460384369,
      "learning_rate": 0.00012882882882882883,
      "loss": 0.455,
      "step": 430
    },
    {
      "epoch": 0.1293517406962785,
      "grad_norm": 0.11804667860269547,
      "learning_rate": 0.00012912912912912915,
      "loss": 0.5124,
      "step": 431
    },
    {
      "epoch": 0.12965186074429771,
      "grad_norm": 0.1157531887292862,
      "learning_rate": 0.00012942942942942943,
      "loss": 0.5081,
      "step": 432
    },
    {
      "epoch": 0.12995198079231693,
      "grad_norm": 0.12014187127351761,
      "learning_rate": 0.00012972972972972974,
      "loss": 0.5014,
      "step": 433
    },
    {
      "epoch": 0.13025210084033614,
      "grad_norm": 0.11232215166091919,
      "learning_rate": 0.00013003003003003003,
      "loss": 0.4866,
      "step": 434
    },
    {
      "epoch": 0.13055222088835533,
      "grad_norm": 0.12502364814281464,
      "learning_rate": 0.00013033033033033032,
      "loss": 0.5622,
      "step": 435
    },
    {
      "epoch": 0.13085234093637454,
      "grad_norm": 0.10632560402154922,
      "learning_rate": 0.00013063063063063063,
      "loss": 0.446,
      "step": 436
    },
    {
      "epoch": 0.13115246098439376,
      "grad_norm": 0.12490659952163696,
      "learning_rate": 0.00013093093093093094,
      "loss": 0.4816,
      "step": 437
    },
    {
      "epoch": 0.13145258103241297,
      "grad_norm": 0.11873575299978256,
      "learning_rate": 0.00013123123123123126,
      "loss": 0.4863,
      "step": 438
    },
    {
      "epoch": 0.13175270108043216,
      "grad_norm": 0.11742313206195831,
      "learning_rate": 0.00013153153153153154,
      "loss": 0.5057,
      "step": 439
    },
    {
      "epoch": 0.13205282112845138,
      "grad_norm": 0.11808551102876663,
      "learning_rate": 0.00013183183183183183,
      "loss": 0.5425,
      "step": 440
    },
    {
      "epoch": 0.1323529411764706,
      "grad_norm": 0.12108122557401657,
      "learning_rate": 0.00013213213213213214,
      "loss": 0.505,
      "step": 441
    },
    {
      "epoch": 0.1326530612244898,
      "grad_norm": 0.11308103799819946,
      "learning_rate": 0.00013243243243243243,
      "loss": 0.4761,
      "step": 442
    },
    {
      "epoch": 0.132953181272509,
      "grad_norm": 0.11376778036355972,
      "learning_rate": 0.00013273273273273274,
      "loss": 0.4999,
      "step": 443
    },
    {
      "epoch": 0.1332533013205282,
      "grad_norm": 0.1098993644118309,
      "learning_rate": 0.00013303303303303305,
      "loss": 0.4636,
      "step": 444
    },
    {
      "epoch": 0.13355342136854742,
      "grad_norm": 0.12314711511135101,
      "learning_rate": 0.00013333333333333334,
      "loss": 0.4813,
      "step": 445
    },
    {
      "epoch": 0.13385354141656663,
      "grad_norm": 0.11199234426021576,
      "learning_rate": 0.00013363363363363363,
      "loss": 0.4767,
      "step": 446
    },
    {
      "epoch": 0.13415366146458582,
      "grad_norm": 0.11720983684062958,
      "learning_rate": 0.00013393393393393394,
      "loss": 0.4833,
      "step": 447
    },
    {
      "epoch": 0.13445378151260504,
      "grad_norm": 0.11430398374795914,
      "learning_rate": 0.00013423423423423425,
      "loss": 0.449,
      "step": 448
    },
    {
      "epoch": 0.13475390156062425,
      "grad_norm": 0.2317732721567154,
      "learning_rate": 0.00013453453453453454,
      "loss": 0.5283,
      "step": 449
    },
    {
      "epoch": 0.13505402160864347,
      "grad_norm": 0.11623187363147736,
      "learning_rate": 0.00013483483483483482,
      "loss": 0.4559,
      "step": 450
    },
    {
      "epoch": 0.13535414165666265,
      "grad_norm": 0.12079429626464844,
      "learning_rate": 0.00013513513513513514,
      "loss": 0.5142,
      "step": 451
    },
    {
      "epoch": 0.13565426170468187,
      "grad_norm": 0.1218424141407013,
      "learning_rate": 0.00013543543543543545,
      "loss": 0.4925,
      "step": 452
    },
    {
      "epoch": 0.13595438175270108,
      "grad_norm": 0.11799100786447525,
      "learning_rate": 0.00013573573573573574,
      "loss": 0.5254,
      "step": 453
    },
    {
      "epoch": 0.1362545018007203,
      "grad_norm": 0.11575998365879059,
      "learning_rate": 0.00013603603603603605,
      "loss": 0.504,
      "step": 454
    },
    {
      "epoch": 0.13655462184873948,
      "grad_norm": 0.697054386138916,
      "learning_rate": 0.00013633633633633634,
      "loss": 0.5308,
      "step": 455
    },
    {
      "epoch": 0.1368547418967587,
      "grad_norm": 0.11700598895549774,
      "learning_rate": 0.00013663663663663665,
      "loss": 0.4937,
      "step": 456
    },
    {
      "epoch": 0.1371548619447779,
      "grad_norm": 0.9261333346366882,
      "learning_rate": 0.00013693693693693693,
      "loss": 0.5606,
      "step": 457
    },
    {
      "epoch": 0.13745498199279713,
      "grad_norm": 0.13678379356861115,
      "learning_rate": 0.00013723723723723725,
      "loss": 0.558,
      "step": 458
    },
    {
      "epoch": 0.1377551020408163,
      "grad_norm": 0.13666567206382751,
      "learning_rate": 0.00013753753753753756,
      "loss": 0.4878,
      "step": 459
    },
    {
      "epoch": 0.13805522208883553,
      "grad_norm": 0.14556999504566193,
      "learning_rate": 0.00013783783783783785,
      "loss": 0.497,
      "step": 460
    },
    {
      "epoch": 0.13835534213685474,
      "grad_norm": 0.1318492740392685,
      "learning_rate": 0.00013813813813813813,
      "loss": 0.5445,
      "step": 461
    },
    {
      "epoch": 0.13865546218487396,
      "grad_norm": 0.1597602367401123,
      "learning_rate": 0.00013843843843843845,
      "loss": 0.4491,
      "step": 462
    },
    {
      "epoch": 0.13895558223289317,
      "grad_norm": 0.12518081068992615,
      "learning_rate": 0.00013873873873873876,
      "loss": 0.5233,
      "step": 463
    },
    {
      "epoch": 0.13925570228091236,
      "grad_norm": 0.20594088733196259,
      "learning_rate": 0.00013903903903903905,
      "loss": 0.5249,
      "step": 464
    },
    {
      "epoch": 0.13955582232893157,
      "grad_norm": 0.1464836448431015,
      "learning_rate": 0.00013933933933933933,
      "loss": 0.4862,
      "step": 465
    },
    {
      "epoch": 0.1398559423769508,
      "grad_norm": 0.1688799411058426,
      "learning_rate": 0.00013963963963963964,
      "loss": 0.5128,
      "step": 466
    },
    {
      "epoch": 0.14015606242497,
      "grad_norm": 0.1309269368648529,
      "learning_rate": 0.00013993993993993996,
      "loss": 0.5293,
      "step": 467
    },
    {
      "epoch": 0.1404561824729892,
      "grad_norm": 0.18867067992687225,
      "learning_rate": 0.00014024024024024024,
      "loss": 0.4913,
      "step": 468
    },
    {
      "epoch": 0.1407563025210084,
      "grad_norm": 0.1426246464252472,
      "learning_rate": 0.00014054054054054056,
      "loss": 0.5017,
      "step": 469
    },
    {
      "epoch": 0.14105642256902762,
      "grad_norm": 0.12447234243154526,
      "learning_rate": 0.00014084084084084084,
      "loss": 0.4744,
      "step": 470
    },
    {
      "epoch": 0.14135654261704683,
      "grad_norm": 0.1284492313861847,
      "learning_rate": 0.00014114114114114113,
      "loss": 0.4824,
      "step": 471
    },
    {
      "epoch": 0.14165666266506602,
      "grad_norm": 0.13119681179523468,
      "learning_rate": 0.00014144144144144144,
      "loss": 0.534,
      "step": 472
    },
    {
      "epoch": 0.14195678271308523,
      "grad_norm": 0.17826277017593384,
      "learning_rate": 0.00014174174174174176,
      "loss": 0.5433,
      "step": 473
    },
    {
      "epoch": 0.14225690276110445,
      "grad_norm": 0.14619775116443634,
      "learning_rate": 0.00014204204204204207,
      "loss": 0.5719,
      "step": 474
    },
    {
      "epoch": 0.14255702280912366,
      "grad_norm": 0.13723814487457275,
      "learning_rate": 0.00014234234234234233,
      "loss": 0.4814,
      "step": 475
    },
    {
      "epoch": 0.14285714285714285,
      "grad_norm": 0.1651460975408554,
      "learning_rate": 0.00014264264264264264,
      "loss": 0.5306,
      "step": 476
    },
    {
      "epoch": 0.14315726290516206,
      "grad_norm": 0.1289675384759903,
      "learning_rate": 0.00014294294294294295,
      "loss": 0.5254,
      "step": 477
    },
    {
      "epoch": 0.14345738295318128,
      "grad_norm": 0.15384627878665924,
      "learning_rate": 0.00014324324324324324,
      "loss": 0.5559,
      "step": 478
    },
    {
      "epoch": 0.1437575030012005,
      "grad_norm": 0.14855274558067322,
      "learning_rate": 0.00014354354354354355,
      "loss": 0.4941,
      "step": 479
    },
    {
      "epoch": 0.14405762304921968,
      "grad_norm": 0.1353788524866104,
      "learning_rate": 0.00014384384384384387,
      "loss": 0.4631,
      "step": 480
    },
    {
      "epoch": 0.1443577430972389,
      "grad_norm": 0.12929096817970276,
      "learning_rate": 0.00014414414414414415,
      "loss": 0.4867,
      "step": 481
    },
    {
      "epoch": 0.1446578631452581,
      "grad_norm": 0.13982702791690826,
      "learning_rate": 0.00014444444444444444,
      "loss": 0.5191,
      "step": 482
    },
    {
      "epoch": 0.14495798319327732,
      "grad_norm": 0.12878680229187012,
      "learning_rate": 0.00014474474474474475,
      "loss": 0.4988,
      "step": 483
    },
    {
      "epoch": 0.1452581032412965,
      "grad_norm": 0.11960715055465698,
      "learning_rate": 0.00014504504504504506,
      "loss": 0.4968,
      "step": 484
    },
    {
      "epoch": 0.14555822328931572,
      "grad_norm": 0.1115783229470253,
      "learning_rate": 0.00014534534534534535,
      "loss": 0.4758,
      "step": 485
    },
    {
      "epoch": 0.14585834333733494,
      "grad_norm": 0.1291203647851944,
      "learning_rate": 0.00014564564564564564,
      "loss": 0.4929,
      "step": 486
    },
    {
      "epoch": 0.14615846338535415,
      "grad_norm": 0.10866429656744003,
      "learning_rate": 0.00014594594594594595,
      "loss": 0.4278,
      "step": 487
    },
    {
      "epoch": 0.14645858343337334,
      "grad_norm": 0.1162528246641159,
      "learning_rate": 0.00014624624624624626,
      "loss": 0.4836,
      "step": 488
    },
    {
      "epoch": 0.14675870348139256,
      "grad_norm": 0.12269286066293716,
      "learning_rate": 0.00014654654654654655,
      "loss": 0.5048,
      "step": 489
    },
    {
      "epoch": 0.14705882352941177,
      "grad_norm": 0.11308015137910843,
      "learning_rate": 0.00014684684684684686,
      "loss": 0.4922,
      "step": 490
    },
    {
      "epoch": 0.14735894357743098,
      "grad_norm": 0.11391475796699524,
      "learning_rate": 0.00014714714714714715,
      "loss": 0.5073,
      "step": 491
    },
    {
      "epoch": 0.14765906362545017,
      "grad_norm": 0.1623706966638565,
      "learning_rate": 0.00014744744744744746,
      "loss": 0.486,
      "step": 492
    },
    {
      "epoch": 0.14795918367346939,
      "grad_norm": 0.12103123962879181,
      "learning_rate": 0.00014774774774774775,
      "loss": 0.5211,
      "step": 493
    },
    {
      "epoch": 0.1482593037214886,
      "grad_norm": 0.1285637617111206,
      "learning_rate": 0.00014804804804804806,
      "loss": 0.4595,
      "step": 494
    },
    {
      "epoch": 0.14855942376950781,
      "grad_norm": 0.13039712607860565,
      "learning_rate": 0.00014834834834834837,
      "loss": 0.5023,
      "step": 495
    },
    {
      "epoch": 0.148859543817527,
      "grad_norm": 0.11426712572574615,
      "learning_rate": 0.00014864864864864866,
      "loss": 0.4792,
      "step": 496
    },
    {
      "epoch": 0.14915966386554622,
      "grad_norm": 0.11875149607658386,
      "learning_rate": 0.00014894894894894895,
      "loss": 0.5282,
      "step": 497
    },
    {
      "epoch": 0.14945978391356543,
      "grad_norm": 0.12333963811397552,
      "learning_rate": 0.00014924924924924926,
      "loss": 0.5092,
      "step": 498
    },
    {
      "epoch": 0.14975990396158465,
      "grad_norm": 0.10604982823133469,
      "learning_rate": 0.00014954954954954957,
      "loss": 0.4027,
      "step": 499
    },
    {
      "epoch": 0.15006002400960383,
      "grad_norm": 0.11714020371437073,
      "learning_rate": 0.00014984984984984986,
      "loss": 0.5226,
      "step": 500
    },
    {
      "epoch": 0.15036014405762305,
      "grad_norm": 0.10923773050308228,
      "learning_rate": 0.00015015015015015014,
      "loss": 0.4432,
      "step": 501
    },
    {
      "epoch": 0.15066026410564226,
      "grad_norm": 0.17080309987068176,
      "learning_rate": 0.00015045045045045046,
      "loss": 0.5269,
      "step": 502
    },
    {
      "epoch": 0.15096038415366148,
      "grad_norm": 0.12028209120035172,
      "learning_rate": 0.00015075075075075077,
      "loss": 0.4871,
      "step": 503
    },
    {
      "epoch": 0.15126050420168066,
      "grad_norm": 0.12861275672912598,
      "learning_rate": 0.00015105105105105106,
      "loss": 0.4997,
      "step": 504
    },
    {
      "epoch": 0.15156062424969988,
      "grad_norm": 0.11345670372247696,
      "learning_rate": 0.00015135135135135137,
      "loss": 0.4506,
      "step": 505
    },
    {
      "epoch": 0.1518607442977191,
      "grad_norm": 0.12062987685203552,
      "learning_rate": 0.00015165165165165165,
      "loss": 0.5536,
      "step": 506
    },
    {
      "epoch": 0.1521608643457383,
      "grad_norm": 0.11072537302970886,
      "learning_rate": 0.00015195195195195194,
      "loss": 0.4368,
      "step": 507
    },
    {
      "epoch": 0.1524609843937575,
      "grad_norm": 0.11545488238334656,
      "learning_rate": 0.00015225225225225225,
      "loss": 0.4591,
      "step": 508
    },
    {
      "epoch": 0.1527611044417767,
      "grad_norm": 0.10316600650548935,
      "learning_rate": 0.00015255255255255257,
      "loss": 0.4261,
      "step": 509
    },
    {
      "epoch": 0.15306122448979592,
      "grad_norm": 0.11584563553333282,
      "learning_rate": 0.00015285285285285288,
      "loss": 0.5289,
      "step": 510
    },
    {
      "epoch": 0.15336134453781514,
      "grad_norm": 0.11215624213218689,
      "learning_rate": 0.00015315315315315314,
      "loss": 0.4747,
      "step": 511
    },
    {
      "epoch": 0.15366146458583432,
      "grad_norm": 0.10986870527267456,
      "learning_rate": 0.00015345345345345345,
      "loss": 0.4486,
      "step": 512
    },
    {
      "epoch": 0.15396158463385354,
      "grad_norm": 0.14615389704704285,
      "learning_rate": 0.00015375375375375377,
      "loss": 0.4973,
      "step": 513
    },
    {
      "epoch": 0.15426170468187275,
      "grad_norm": 0.11121159791946411,
      "learning_rate": 0.00015405405405405405,
      "loss": 0.4692,
      "step": 514
    },
    {
      "epoch": 0.15456182472989197,
      "grad_norm": 0.1426669806241989,
      "learning_rate": 0.00015435435435435436,
      "loss": 0.4925,
      "step": 515
    },
    {
      "epoch": 0.15486194477791115,
      "grad_norm": 0.1225138008594513,
      "learning_rate": 0.00015465465465465465,
      "loss": 0.4686,
      "step": 516
    },
    {
      "epoch": 0.15516206482593037,
      "grad_norm": 0.11575968563556671,
      "learning_rate": 0.00015495495495495496,
      "loss": 0.4677,
      "step": 517
    },
    {
      "epoch": 0.15546218487394958,
      "grad_norm": 0.1262788027524948,
      "learning_rate": 0.00015525525525525525,
      "loss": 0.4947,
      "step": 518
    },
    {
      "epoch": 0.1557623049219688,
      "grad_norm": 0.11267603933811188,
      "learning_rate": 0.00015555555555555556,
      "loss": 0.5223,
      "step": 519
    },
    {
      "epoch": 0.15606242496998798,
      "grad_norm": 0.11654146015644073,
      "learning_rate": 0.00015585585585585588,
      "loss": 0.5202,
      "step": 520
    },
    {
      "epoch": 0.1563625450180072,
      "grad_norm": 0.11901742964982986,
      "learning_rate": 0.00015615615615615616,
      "loss": 0.5293,
      "step": 521
    },
    {
      "epoch": 0.1566626650660264,
      "grad_norm": 0.12037361413240433,
      "learning_rate": 0.00015645645645645645,
      "loss": 0.5164,
      "step": 522
    },
    {
      "epoch": 0.15696278511404563,
      "grad_norm": 0.11640309542417526,
      "learning_rate": 0.00015675675675675676,
      "loss": 0.4944,
      "step": 523
    },
    {
      "epoch": 0.15726290516206481,
      "grad_norm": 0.1187070906162262,
      "learning_rate": 0.00015705705705705707,
      "loss": 0.5435,
      "step": 524
    },
    {
      "epoch": 0.15756302521008403,
      "grad_norm": 0.11369931697845459,
      "learning_rate": 0.00015735735735735736,
      "loss": 0.4934,
      "step": 525
    },
    {
      "epoch": 0.15786314525810324,
      "grad_norm": 0.1534053087234497,
      "learning_rate": 0.00015765765765765767,
      "loss": 0.4869,
      "step": 526
    },
    {
      "epoch": 0.15816326530612246,
      "grad_norm": 0.11573463678359985,
      "learning_rate": 0.00015795795795795796,
      "loss": 0.5013,
      "step": 527
    },
    {
      "epoch": 0.15846338535414164,
      "grad_norm": 0.1068471297621727,
      "learning_rate": 0.00015825825825825827,
      "loss": 0.4565,
      "step": 528
    },
    {
      "epoch": 0.15876350540216086,
      "grad_norm": 0.12096768617630005,
      "learning_rate": 0.00015855855855855856,
      "loss": 0.54,
      "step": 529
    },
    {
      "epoch": 0.15906362545018007,
      "grad_norm": 0.17728646099567413,
      "learning_rate": 0.00015885885885885887,
      "loss": 0.526,
      "step": 530
    },
    {
      "epoch": 0.1593637454981993,
      "grad_norm": 0.11367742717266083,
      "learning_rate": 0.00015915915915915919,
      "loss": 0.5088,
      "step": 531
    },
    {
      "epoch": 0.15966386554621848,
      "grad_norm": 0.12228825688362122,
      "learning_rate": 0.00015945945945945947,
      "loss": 0.4922,
      "step": 532
    },
    {
      "epoch": 0.1599639855942377,
      "grad_norm": 0.12022542208433151,
      "learning_rate": 0.00015975975975975976,
      "loss": 0.5108,
      "step": 533
    },
    {
      "epoch": 0.1602641056422569,
      "grad_norm": 0.11787454783916473,
      "learning_rate": 0.00016006006006006007,
      "loss": 0.5062,
      "step": 534
    },
    {
      "epoch": 0.16056422569027612,
      "grad_norm": 0.1120874211192131,
      "learning_rate": 0.00016036036036036038,
      "loss": 0.4568,
      "step": 535
    },
    {
      "epoch": 0.1608643457382953,
      "grad_norm": 0.10945652425289154,
      "learning_rate": 0.00016066066066066067,
      "loss": 0.4865,
      "step": 536
    },
    {
      "epoch": 0.16116446578631452,
      "grad_norm": 0.11467491835355759,
      "learning_rate": 0.00016096096096096096,
      "loss": 0.5116,
      "step": 537
    },
    {
      "epoch": 0.16146458583433373,
      "grad_norm": 0.14926712214946747,
      "learning_rate": 0.00016126126126126127,
      "loss": 0.5284,
      "step": 538
    },
    {
      "epoch": 0.16176470588235295,
      "grad_norm": 0.10946252942085266,
      "learning_rate": 0.00016156156156156158,
      "loss": 0.4367,
      "step": 539
    },
    {
      "epoch": 0.16206482593037214,
      "grad_norm": 0.10662908852100372,
      "learning_rate": 0.00016186186186186187,
      "loss": 0.4311,
      "step": 540
    },
    {
      "epoch": 0.16236494597839135,
      "grad_norm": 0.11896120011806488,
      "learning_rate": 0.00016216216216216218,
      "loss": 0.5218,
      "step": 541
    },
    {
      "epoch": 0.16266506602641057,
      "grad_norm": 0.13229900598526,
      "learning_rate": 0.00016246246246246247,
      "loss": 0.4867,
      "step": 542
    },
    {
      "epoch": 0.16296518607442978,
      "grad_norm": 0.11431475728750229,
      "learning_rate": 0.00016276276276276275,
      "loss": 0.4592,
      "step": 543
    },
    {
      "epoch": 0.16326530612244897,
      "grad_norm": 0.15720653533935547,
      "learning_rate": 0.00016306306306306307,
      "loss": 0.519,
      "step": 544
    },
    {
      "epoch": 0.16356542617046818,
      "grad_norm": 0.11554212868213654,
      "learning_rate": 0.00016336336336336338,
      "loss": 0.5232,
      "step": 545
    },
    {
      "epoch": 0.1638655462184874,
      "grad_norm": 0.10940536856651306,
      "learning_rate": 0.0001636636636636637,
      "loss": 0.4515,
      "step": 546
    },
    {
      "epoch": 0.1641656662665066,
      "grad_norm": 0.11184670031070709,
      "learning_rate": 0.00016396396396396395,
      "loss": 0.477,
      "step": 547
    },
    {
      "epoch": 0.1644657863145258,
      "grad_norm": 0.13043731451034546,
      "learning_rate": 0.00016426426426426426,
      "loss": 0.5079,
      "step": 548
    },
    {
      "epoch": 0.164765906362545,
      "grad_norm": 0.1191568523645401,
      "learning_rate": 0.00016456456456456458,
      "loss": 0.4987,
      "step": 549
    },
    {
      "epoch": 0.16506602641056423,
      "grad_norm": 0.10903244465589523,
      "learning_rate": 0.00016486486486486486,
      "loss": 0.4772,
      "step": 550
    },
    {
      "epoch": 0.16536614645858344,
      "grad_norm": 0.15818609297275543,
      "learning_rate": 0.00016516516516516518,
      "loss": 0.5382,
      "step": 551
    },
    {
      "epoch": 0.16566626650660263,
      "grad_norm": 0.11612758785486221,
      "learning_rate": 0.00016546546546546546,
      "loss": 0.5043,
      "step": 552
    },
    {
      "epoch": 0.16596638655462184,
      "grad_norm": 0.11419732123613358,
      "learning_rate": 0.00016576576576576578,
      "loss": 0.4823,
      "step": 553
    },
    {
      "epoch": 0.16626650660264106,
      "grad_norm": 0.1097126379609108,
      "learning_rate": 0.00016606606606606606,
      "loss": 0.4657,
      "step": 554
    },
    {
      "epoch": 0.16656662665066027,
      "grad_norm": 0.11375732719898224,
      "learning_rate": 0.00016636636636636638,
      "loss": 0.4976,
      "step": 555
    },
    {
      "epoch": 0.16686674669867949,
      "grad_norm": 0.14020946621894836,
      "learning_rate": 0.0001666666666666667,
      "loss": 0.5031,
      "step": 556
    },
    {
      "epoch": 0.16716686674669867,
      "grad_norm": 0.11537282168865204,
      "learning_rate": 0.00016696696696696697,
      "loss": 0.5307,
      "step": 557
    },
    {
      "epoch": 0.1674669867947179,
      "grad_norm": 0.11510564386844635,
      "learning_rate": 0.00016726726726726726,
      "loss": 0.4818,
      "step": 558
    },
    {
      "epoch": 0.1677671068427371,
      "grad_norm": 0.14965474605560303,
      "learning_rate": 0.00016756756756756757,
      "loss": 0.4733,
      "step": 559
    },
    {
      "epoch": 0.16806722689075632,
      "grad_norm": 0.11072426289319992,
      "learning_rate": 0.0001678678678678679,
      "loss": 0.4945,
      "step": 560
    },
    {
      "epoch": 0.1683673469387755,
      "grad_norm": 0.13948391377925873,
      "learning_rate": 0.00016816816816816817,
      "loss": 0.5384,
      "step": 561
    },
    {
      "epoch": 0.16866746698679472,
      "grad_norm": 0.12210942059755325,
      "learning_rate": 0.00016846846846846846,
      "loss": 0.5396,
      "step": 562
    },
    {
      "epoch": 0.16896758703481393,
      "grad_norm": 0.1163802221417427,
      "learning_rate": 0.00016876876876876877,
      "loss": 0.5014,
      "step": 563
    },
    {
      "epoch": 0.16926770708283315,
      "grad_norm": 0.15363533794879913,
      "learning_rate": 0.00016906906906906908,
      "loss": 0.4728,
      "step": 564
    },
    {
      "epoch": 0.16956782713085233,
      "grad_norm": 0.13578364253044128,
      "learning_rate": 0.00016936936936936937,
      "loss": 0.473,
      "step": 565
    },
    {
      "epoch": 0.16986794717887155,
      "grad_norm": 0.11850472539663315,
      "learning_rate": 0.00016966966966966968,
      "loss": 0.5105,
      "step": 566
    },
    {
      "epoch": 0.17016806722689076,
      "grad_norm": 0.11600814759731293,
      "learning_rate": 0.00016996996996997,
      "loss": 0.5138,
      "step": 567
    },
    {
      "epoch": 0.17046818727490998,
      "grad_norm": 0.11694512516260147,
      "learning_rate": 0.00017027027027027028,
      "loss": 0.4818,
      "step": 568
    },
    {
      "epoch": 0.17076830732292916,
      "grad_norm": 0.1589028388261795,
      "learning_rate": 0.00017057057057057057,
      "loss": 0.5286,
      "step": 569
    },
    {
      "epoch": 0.17106842737094838,
      "grad_norm": 0.11037249118089676,
      "learning_rate": 0.00017087087087087088,
      "loss": 0.5065,
      "step": 570
    },
    {
      "epoch": 0.1713685474189676,
      "grad_norm": 0.11246825754642487,
      "learning_rate": 0.0001711711711711712,
      "loss": 0.4572,
      "step": 571
    },
    {
      "epoch": 0.1716686674669868,
      "grad_norm": 0.12457087635993958,
      "learning_rate": 0.00017147147147147148,
      "loss": 0.5341,
      "step": 572
    },
    {
      "epoch": 0.171968787515006,
      "grad_norm": 0.11229050159454346,
      "learning_rate": 0.00017177177177177177,
      "loss": 0.4152,
      "step": 573
    },
    {
      "epoch": 0.1722689075630252,
      "grad_norm": 0.10759669542312622,
      "learning_rate": 0.00017207207207207208,
      "loss": 0.4719,
      "step": 574
    },
    {
      "epoch": 0.17256902761104442,
      "grad_norm": 0.1949225217103958,
      "learning_rate": 0.0001723723723723724,
      "loss": 0.4645,
      "step": 575
    },
    {
      "epoch": 0.17286914765906364,
      "grad_norm": 0.11539410054683685,
      "learning_rate": 0.00017267267267267268,
      "loss": 0.5257,
      "step": 576
    },
    {
      "epoch": 0.17316926770708282,
      "grad_norm": 0.11130233108997345,
      "learning_rate": 0.000172972972972973,
      "loss": 0.4651,
      "step": 577
    },
    {
      "epoch": 0.17346938775510204,
      "grad_norm": 0.11778189241886139,
      "learning_rate": 0.00017327327327327328,
      "loss": 0.5365,
      "step": 578
    },
    {
      "epoch": 0.17376950780312125,
      "grad_norm": 0.11627792567014694,
      "learning_rate": 0.00017357357357357357,
      "loss": 0.5052,
      "step": 579
    },
    {
      "epoch": 0.17406962785114047,
      "grad_norm": 0.13903485238552094,
      "learning_rate": 0.00017387387387387388,
      "loss": 0.5194,
      "step": 580
    },
    {
      "epoch": 0.17436974789915966,
      "grad_norm": 0.12274301797151566,
      "learning_rate": 0.0001741741741741742,
      "loss": 0.506,
      "step": 581
    },
    {
      "epoch": 0.17466986794717887,
      "grad_norm": 0.11964797973632812,
      "learning_rate": 0.0001744744744744745,
      "loss": 0.4422,
      "step": 582
    },
    {
      "epoch": 0.17496998799519808,
      "grad_norm": 0.11528735607862473,
      "learning_rate": 0.00017477477477477476,
      "loss": 0.5045,
      "step": 583
    },
    {
      "epoch": 0.1752701080432173,
      "grad_norm": 0.12178003042936325,
      "learning_rate": 0.00017507507507507508,
      "loss": 0.4316,
      "step": 584
    },
    {
      "epoch": 0.17557022809123649,
      "grad_norm": 0.10782955586910248,
      "learning_rate": 0.0001753753753753754,
      "loss": 0.4541,
      "step": 585
    },
    {
      "epoch": 0.1758703481392557,
      "grad_norm": 0.11372298747301102,
      "learning_rate": 0.00017567567567567568,
      "loss": 0.4489,
      "step": 586
    },
    {
      "epoch": 0.17617046818727491,
      "grad_norm": 0.16553634405136108,
      "learning_rate": 0.000175975975975976,
      "loss": 0.4732,
      "step": 587
    },
    {
      "epoch": 0.17647058823529413,
      "grad_norm": 0.12725332379341125,
      "learning_rate": 0.00017627627627627627,
      "loss": 0.5046,
      "step": 588
    },
    {
      "epoch": 0.17677070828331332,
      "grad_norm": 0.11720646917819977,
      "learning_rate": 0.0001765765765765766,
      "loss": 0.5264,
      "step": 589
    },
    {
      "epoch": 0.17707082833133253,
      "grad_norm": 0.11271098256111145,
      "learning_rate": 0.00017687687687687687,
      "loss": 0.4926,
      "step": 590
    },
    {
      "epoch": 0.17737094837935174,
      "grad_norm": 0.12022582441568375,
      "learning_rate": 0.0001771771771771772,
      "loss": 0.5234,
      "step": 591
    },
    {
      "epoch": 0.17767106842737096,
      "grad_norm": 0.12866370379924774,
      "learning_rate": 0.0001774774774774775,
      "loss": 0.4921,
      "step": 592
    },
    {
      "epoch": 0.17797118847539015,
      "grad_norm": 0.11018224060535431,
      "learning_rate": 0.00017777777777777779,
      "loss": 0.4588,
      "step": 593
    },
    {
      "epoch": 0.17827130852340936,
      "grad_norm": 0.12001125514507294,
      "learning_rate": 0.00017807807807807807,
      "loss": 0.5136,
      "step": 594
    },
    {
      "epoch": 0.17857142857142858,
      "grad_norm": 0.12296062707901001,
      "learning_rate": 0.00017837837837837839,
      "loss": 0.4919,
      "step": 595
    },
    {
      "epoch": 0.1788715486194478,
      "grad_norm": 0.11281454563140869,
      "learning_rate": 0.0001786786786786787,
      "loss": 0.494,
      "step": 596
    },
    {
      "epoch": 0.17917166866746698,
      "grad_norm": 0.10556471347808838,
      "learning_rate": 0.00017897897897897898,
      "loss": 0.4229,
      "step": 597
    },
    {
      "epoch": 0.1794717887154862,
      "grad_norm": 0.1097581535577774,
      "learning_rate": 0.00017927927927927927,
      "loss": 0.4967,
      "step": 598
    },
    {
      "epoch": 0.1797719087635054,
      "grad_norm": 0.10584639757871628,
      "learning_rate": 0.00017957957957957958,
      "loss": 0.4255,
      "step": 599
    },
    {
      "epoch": 0.18007202881152462,
      "grad_norm": 0.11400244385004044,
      "learning_rate": 0.0001798798798798799,
      "loss": 0.4282,
      "step": 600
    },
    {
      "epoch": 0.1803721488595438,
      "grad_norm": 0.11473983526229858,
      "learning_rate": 0.00018018018018018018,
      "loss": 0.557,
      "step": 601
    },
    {
      "epoch": 0.18067226890756302,
      "grad_norm": 0.24360248446464539,
      "learning_rate": 0.0001804804804804805,
      "loss": 0.4693,
      "step": 602
    },
    {
      "epoch": 0.18097238895558224,
      "grad_norm": 0.11157704144716263,
      "learning_rate": 0.00018078078078078078,
      "loss": 0.4937,
      "step": 603
    },
    {
      "epoch": 0.18127250900360145,
      "grad_norm": 0.12321450561285019,
      "learning_rate": 0.0001810810810810811,
      "loss": 0.5592,
      "step": 604
    },
    {
      "epoch": 0.18157262905162064,
      "grad_norm": 0.11670718342065811,
      "learning_rate": 0.00018138138138138138,
      "loss": 0.507,
      "step": 605
    },
    {
      "epoch": 0.18187274909963985,
      "grad_norm": 0.10992065072059631,
      "learning_rate": 0.0001816816816816817,
      "loss": 0.4762,
      "step": 606
    },
    {
      "epoch": 0.18217286914765907,
      "grad_norm": 0.11405187100172043,
      "learning_rate": 0.000181981981981982,
      "loss": 0.4785,
      "step": 607
    },
    {
      "epoch": 0.18247298919567828,
      "grad_norm": 0.11776269227266312,
      "learning_rate": 0.0001822822822822823,
      "loss": 0.438,
      "step": 608
    },
    {
      "epoch": 0.18277310924369747,
      "grad_norm": 0.29667896032333374,
      "learning_rate": 0.00018258258258258258,
      "loss": 0.4869,
      "step": 609
    },
    {
      "epoch": 0.18307322929171668,
      "grad_norm": 0.11483728140592575,
      "learning_rate": 0.0001828828828828829,
      "loss": 0.5124,
      "step": 610
    },
    {
      "epoch": 0.1833733493397359,
      "grad_norm": 0.15464454889297485,
      "learning_rate": 0.0001831831831831832,
      "loss": 0.49,
      "step": 611
    },
    {
      "epoch": 0.1836734693877551,
      "grad_norm": 0.1975976973772049,
      "learning_rate": 0.0001834834834834835,
      "loss": 0.4841,
      "step": 612
    },
    {
      "epoch": 0.1839735894357743,
      "grad_norm": 0.18922753632068634,
      "learning_rate": 0.0001837837837837838,
      "loss": 0.461,
      "step": 613
    },
    {
      "epoch": 0.1842737094837935,
      "grad_norm": 0.11498506367206573,
      "learning_rate": 0.0001840840840840841,
      "loss": 0.4768,
      "step": 614
    },
    {
      "epoch": 0.18457382953181273,
      "grad_norm": 0.11605051159858704,
      "learning_rate": 0.00018438438438438438,
      "loss": 0.4853,
      "step": 615
    },
    {
      "epoch": 0.18487394957983194,
      "grad_norm": 0.13284830749034882,
      "learning_rate": 0.0001846846846846847,
      "loss": 0.521,
      "step": 616
    },
    {
      "epoch": 0.18517406962785113,
      "grad_norm": 0.11343251913785934,
      "learning_rate": 0.000184984984984985,
      "loss": 0.4741,
      "step": 617
    },
    {
      "epoch": 0.18547418967587034,
      "grad_norm": 0.11391028016805649,
      "learning_rate": 0.00018528528528528532,
      "loss": 0.464,
      "step": 618
    },
    {
      "epoch": 0.18577430972388956,
      "grad_norm": 0.14338339865207672,
      "learning_rate": 0.00018558558558558558,
      "loss": 0.4896,
      "step": 619
    },
    {
      "epoch": 0.18607442977190877,
      "grad_norm": 0.11370062828063965,
      "learning_rate": 0.0001858858858858859,
      "loss": 0.4137,
      "step": 620
    },
    {
      "epoch": 0.18637454981992796,
      "grad_norm": 0.20215009152889252,
      "learning_rate": 0.0001861861861861862,
      "loss": 0.4753,
      "step": 621
    },
    {
      "epoch": 0.18667466986794717,
      "grad_norm": 0.11335323750972748,
      "learning_rate": 0.0001864864864864865,
      "loss": 0.484,
      "step": 622
    },
    {
      "epoch": 0.1869747899159664,
      "grad_norm": 0.12075243145227432,
      "learning_rate": 0.0001867867867867868,
      "loss": 0.5086,
      "step": 623
    },
    {
      "epoch": 0.1872749099639856,
      "grad_norm": 0.1137528046965599,
      "learning_rate": 0.0001870870870870871,
      "loss": 0.4862,
      "step": 624
    },
    {
      "epoch": 0.1875750300120048,
      "grad_norm": 0.11862170696258545,
      "learning_rate": 0.0001873873873873874,
      "loss": 0.4472,
      "step": 625
    },
    {
      "epoch": 0.187875150060024,
      "grad_norm": 0.10449140518903732,
      "learning_rate": 0.00018768768768768769,
      "loss": 0.4252,
      "step": 626
    },
    {
      "epoch": 0.18817527010804322,
      "grad_norm": 0.12479628622531891,
      "learning_rate": 0.000187987987987988,
      "loss": 0.5151,
      "step": 627
    },
    {
      "epoch": 0.18847539015606243,
      "grad_norm": 0.11015018075704575,
      "learning_rate": 0.0001882882882882883,
      "loss": 0.4688,
      "step": 628
    },
    {
      "epoch": 0.18877551020408162,
      "grad_norm": 0.11410848796367645,
      "learning_rate": 0.0001885885885885886,
      "loss": 0.4999,
      "step": 629
    },
    {
      "epoch": 0.18907563025210083,
      "grad_norm": 0.1426745504140854,
      "learning_rate": 0.00018888888888888888,
      "loss": 0.4098,
      "step": 630
    },
    {
      "epoch": 0.18937575030012005,
      "grad_norm": 0.10985606908798218,
      "learning_rate": 0.0001891891891891892,
      "loss": 0.4525,
      "step": 631
    },
    {
      "epoch": 0.18967587034813926,
      "grad_norm": 0.11787772178649902,
      "learning_rate": 0.0001894894894894895,
      "loss": 0.4897,
      "step": 632
    },
    {
      "epoch": 0.18997599039615845,
      "grad_norm": 0.12313798815011978,
      "learning_rate": 0.0001897897897897898,
      "loss": 0.5339,
      "step": 633
    },
    {
      "epoch": 0.19027611044417767,
      "grad_norm": 0.1817101240158081,
      "learning_rate": 0.00019009009009009008,
      "loss": 0.5494,
      "step": 634
    },
    {
      "epoch": 0.19057623049219688,
      "grad_norm": 0.11306577175855637,
      "learning_rate": 0.0001903903903903904,
      "loss": 0.4803,
      "step": 635
    },
    {
      "epoch": 0.1908763505402161,
      "grad_norm": 0.11045024544000626,
      "learning_rate": 0.0001906906906906907,
      "loss": 0.4431,
      "step": 636
    },
    {
      "epoch": 0.19117647058823528,
      "grad_norm": 0.11855772882699966,
      "learning_rate": 0.000190990990990991,
      "loss": 0.5276,
      "step": 637
    },
    {
      "epoch": 0.1914765906362545,
      "grad_norm": 0.2188035398721695,
      "learning_rate": 0.0001912912912912913,
      "loss": 0.4671,
      "step": 638
    },
    {
      "epoch": 0.1917767106842737,
      "grad_norm": 0.1268298178911209,
      "learning_rate": 0.0001915915915915916,
      "loss": 0.4684,
      "step": 639
    },
    {
      "epoch": 0.19207683073229292,
      "grad_norm": 0.11935406178236008,
      "learning_rate": 0.0001918918918918919,
      "loss": 0.4585,
      "step": 640
    },
    {
      "epoch": 0.1923769507803121,
      "grad_norm": 0.12008475512266159,
      "learning_rate": 0.0001921921921921922,
      "loss": 0.4586,
      "step": 641
    },
    {
      "epoch": 0.19267707082833133,
      "grad_norm": 0.1218239888548851,
      "learning_rate": 0.0001924924924924925,
      "loss": 0.5081,
      "step": 642
    },
    {
      "epoch": 0.19297719087635054,
      "grad_norm": 0.11091190576553345,
      "learning_rate": 0.00019279279279279282,
      "loss": 0.5079,
      "step": 643
    },
    {
      "epoch": 0.19327731092436976,
      "grad_norm": 0.1120649129152298,
      "learning_rate": 0.00019309309309309308,
      "loss": 0.512,
      "step": 644
    },
    {
      "epoch": 0.19357743097238894,
      "grad_norm": 0.12589308619499207,
      "learning_rate": 0.0001933933933933934,
      "loss": 0.5332,
      "step": 645
    },
    {
      "epoch": 0.19387755102040816,
      "grad_norm": 0.20976229012012482,
      "learning_rate": 0.0001936936936936937,
      "loss": 0.4552,
      "step": 646
    },
    {
      "epoch": 0.19417767106842737,
      "grad_norm": 0.10452635586261749,
      "learning_rate": 0.00019399399399399402,
      "loss": 0.4325,
      "step": 647
    },
    {
      "epoch": 0.19447779111644659,
      "grad_norm": 0.12973804771900177,
      "learning_rate": 0.0001942942942942943,
      "loss": 0.4784,
      "step": 648
    },
    {
      "epoch": 0.1947779111644658,
      "grad_norm": 0.11603761464357376,
      "learning_rate": 0.00019459459459459462,
      "loss": 0.4605,
      "step": 649
    },
    {
      "epoch": 0.195078031212485,
      "grad_norm": 0.11491604894399643,
      "learning_rate": 0.0001948948948948949,
      "loss": 0.4921,
      "step": 650
    },
    {
      "epoch": 0.1953781512605042,
      "grad_norm": 0.11520378291606903,
      "learning_rate": 0.0001951951951951952,
      "loss": 0.5079,
      "step": 651
    },
    {
      "epoch": 0.19567827130852342,
      "grad_norm": 0.16177918016910553,
      "learning_rate": 0.0001954954954954955,
      "loss": 0.5376,
      "step": 652
    },
    {
      "epoch": 0.19597839135654263,
      "grad_norm": 0.11226612329483032,
      "learning_rate": 0.00019579579579579582,
      "loss": 0.4742,
      "step": 653
    },
    {
      "epoch": 0.19627851140456182,
      "grad_norm": 0.11720920354127884,
      "learning_rate": 0.00019609609609609613,
      "loss": 0.4879,
      "step": 654
    },
    {
      "epoch": 0.19657863145258103,
      "grad_norm": 0.12089575827121735,
      "learning_rate": 0.0001963963963963964,
      "loss": 0.4753,
      "step": 655
    },
    {
      "epoch": 0.19687875150060025,
      "grad_norm": 0.11939114332199097,
      "learning_rate": 0.0001966966966966967,
      "loss": 0.51,
      "step": 656
    },
    {
      "epoch": 0.19717887154861946,
      "grad_norm": 0.11049570888280869,
      "learning_rate": 0.00019699699699699701,
      "loss": 0.4316,
      "step": 657
    },
    {
      "epoch": 0.19747899159663865,
      "grad_norm": 0.11864820122718811,
      "learning_rate": 0.0001972972972972973,
      "loss": 0.5286,
      "step": 658
    },
    {
      "epoch": 0.19777911164465786,
      "grad_norm": 0.13813814520835876,
      "learning_rate": 0.0001975975975975976,
      "loss": 0.4973,
      "step": 659
    },
    {
      "epoch": 0.19807923169267708,
      "grad_norm": 0.10893283039331436,
      "learning_rate": 0.0001978978978978979,
      "loss": 0.4862,
      "step": 660
    },
    {
      "epoch": 0.1983793517406963,
      "grad_norm": 0.11592289805412292,
      "learning_rate": 0.0001981981981981982,
      "loss": 0.4995,
      "step": 661
    },
    {
      "epoch": 0.19867947178871548,
      "grad_norm": 0.10914364457130432,
      "learning_rate": 0.0001984984984984985,
      "loss": 0.4387,
      "step": 662
    },
    {
      "epoch": 0.1989795918367347,
      "grad_norm": 0.11548639833927155,
      "learning_rate": 0.0001987987987987988,
      "loss": 0.4889,
      "step": 663
    },
    {
      "epoch": 0.1992797118847539,
      "grad_norm": 0.1200682744383812,
      "learning_rate": 0.00019909909909909912,
      "loss": 0.5314,
      "step": 664
    },
    {
      "epoch": 0.19957983193277312,
      "grad_norm": 0.11693330109119415,
      "learning_rate": 0.0001993993993993994,
      "loss": 0.4713,
      "step": 665
    },
    {
      "epoch": 0.1998799519807923,
      "grad_norm": 0.11875863373279572,
      "learning_rate": 0.0001996996996996997,
      "loss": 0.494,
      "step": 666
    },
    {
      "epoch": 0.20018007202881152,
      "grad_norm": 0.11220604181289673,
      "learning_rate": 0.0002,
      "loss": 0.429,
      "step": 667
    },
    {
      "epoch": 0.20048019207683074,
      "grad_norm": 0.11995385587215424,
      "learning_rate": 0.00019999998628307335,
      "loss": 0.4765,
      "step": 668
    },
    {
      "epoch": 0.20078031212484995,
      "grad_norm": 0.14676910638809204,
      "learning_rate": 0.0001999999451322971,
      "loss": 0.5261,
      "step": 669
    },
    {
      "epoch": 0.20108043217286914,
      "grad_norm": 0.12924613058567047,
      "learning_rate": 0.00019999987654768255,
      "loss": 0.4744,
      "step": 670
    },
    {
      "epoch": 0.20138055222088835,
      "grad_norm": 0.34157878160476685,
      "learning_rate": 0.00019999978052924851,
      "loss": 0.4935,
      "step": 671
    },
    {
      "epoch": 0.20168067226890757,
      "grad_norm": 0.11561044305562973,
      "learning_rate": 0.0001999996570770214,
      "loss": 0.4938,
      "step": 672
    },
    {
      "epoch": 0.20198079231692678,
      "grad_norm": 0.12380159646272659,
      "learning_rate": 0.000199999506191035,
      "loss": 0.5206,
      "step": 673
    },
    {
      "epoch": 0.20228091236494597,
      "grad_norm": 0.12147749960422516,
      "learning_rate": 0.00019999932787133072,
      "loss": 0.419,
      "step": 674
    },
    {
      "epoch": 0.20258103241296518,
      "grad_norm": 0.12487666308879852,
      "learning_rate": 0.00019999912211795748,
      "loss": 0.482,
      "step": 675
    },
    {
      "epoch": 0.2028811524609844,
      "grad_norm": 0.11645001918077469,
      "learning_rate": 0.00019999888893097175,
      "loss": 0.495,
      "step": 676
    },
    {
      "epoch": 0.2031812725090036,
      "grad_norm": 0.14186392724514008,
      "learning_rate": 0.00019999862831043748,
      "loss": 0.4577,
      "step": 677
    },
    {
      "epoch": 0.2034813925570228,
      "grad_norm": 0.11139611899852753,
      "learning_rate": 0.00019999834025642618,
      "loss": 0.4751,
      "step": 678
    },
    {
      "epoch": 0.20378151260504201,
      "grad_norm": 0.1277855485677719,
      "learning_rate": 0.00019999802476901687,
      "loss": 0.5092,
      "step": 679
    },
    {
      "epoch": 0.20408163265306123,
      "grad_norm": 0.1118745356798172,
      "learning_rate": 0.0001999976818482961,
      "loss": 0.4756,
      "step": 680
    },
    {
      "epoch": 0.20438175270108044,
      "grad_norm": 0.12182165682315826,
      "learning_rate": 0.00019999731149435794,
      "loss": 0.4777,
      "step": 681
    },
    {
      "epoch": 0.20468187274909963,
      "grad_norm": 0.1322716474533081,
      "learning_rate": 0.00019999691370730402,
      "loss": 0.4691,
      "step": 682
    },
    {
      "epoch": 0.20498199279711884,
      "grad_norm": 0.12655936181545258,
      "learning_rate": 0.00019999648848724344,
      "loss": 0.4575,
      "step": 683
    },
    {
      "epoch": 0.20528211284513806,
      "grad_norm": 0.10949641466140747,
      "learning_rate": 0.00019999603583429284,
      "loss": 0.4587,
      "step": 684
    },
    {
      "epoch": 0.20558223289315727,
      "grad_norm": 0.11819802969694138,
      "learning_rate": 0.00019999555574857646,
      "loss": 0.5044,
      "step": 685
    },
    {
      "epoch": 0.20588235294117646,
      "grad_norm": 0.12319862842559814,
      "learning_rate": 0.00019999504823022592,
      "loss": 0.5049,
      "step": 686
    },
    {
      "epoch": 0.20618247298919568,
      "grad_norm": 0.12791141867637634,
      "learning_rate": 0.00019999451327938053,
      "loss": 0.5069,
      "step": 687
    },
    {
      "epoch": 0.2064825930372149,
      "grad_norm": 0.11336719244718552,
      "learning_rate": 0.00019999395089618702,
      "loss": 0.4507,
      "step": 688
    },
    {
      "epoch": 0.2067827130852341,
      "grad_norm": 0.15613985061645508,
      "learning_rate": 0.00019999336108079968,
      "loss": 0.498,
      "step": 689
    },
    {
      "epoch": 0.2070828331332533,
      "grad_norm": 0.11534976214170456,
      "learning_rate": 0.00019999274383338027,
      "loss": 0.4632,
      "step": 690
    },
    {
      "epoch": 0.2073829531812725,
      "grad_norm": 0.12534739077091217,
      "learning_rate": 0.0001999920991540982,
      "loss": 0.472,
      "step": 691
    },
    {
      "epoch": 0.20768307322929172,
      "grad_norm": 0.11476431041955948,
      "learning_rate": 0.00019999142704313027,
      "loss": 0.4315,
      "step": 692
    },
    {
      "epoch": 0.20798319327731093,
      "grad_norm": 0.13909682631492615,
      "learning_rate": 0.0001999907275006609,
      "loss": 0.4957,
      "step": 693
    },
    {
      "epoch": 0.20828331332533012,
      "grad_norm": 0.11797010153532028,
      "learning_rate": 0.00019999000052688202,
      "loss": 0.4987,
      "step": 694
    },
    {
      "epoch": 0.20858343337334934,
      "grad_norm": 0.12070614844560623,
      "learning_rate": 0.000199989246121993,
      "loss": 0.536,
      "step": 695
    },
    {
      "epoch": 0.20888355342136855,
      "grad_norm": 0.13717220723628998,
      "learning_rate": 0.00019998846428620089,
      "loss": 0.4716,
      "step": 696
    },
    {
      "epoch": 0.20918367346938777,
      "grad_norm": 0.11131128668785095,
      "learning_rate": 0.00019998765501972007,
      "loss": 0.47,
      "step": 697
    },
    {
      "epoch": 0.20948379351740695,
      "grad_norm": 0.1145501509308815,
      "learning_rate": 0.00019998681832277267,
      "loss": 0.4592,
      "step": 698
    },
    {
      "epoch": 0.20978391356542617,
      "grad_norm": 0.1326148509979248,
      "learning_rate": 0.00019998595419558812,
      "loss": 0.487,
      "step": 699
    },
    {
      "epoch": 0.21008403361344538,
      "grad_norm": 0.1202148050069809,
      "learning_rate": 0.00019998506263840354,
      "loss": 0.4405,
      "step": 700
    },
    {
      "epoch": 0.2103841536614646,
      "grad_norm": 0.12064465880393982,
      "learning_rate": 0.00019998414365146353,
      "loss": 0.4919,
      "step": 701
    },
    {
      "epoch": 0.21068427370948378,
      "grad_norm": 0.19440989196300507,
      "learning_rate": 0.00019998319723502019,
      "loss": 0.4824,
      "step": 702
    },
    {
      "epoch": 0.210984393757503,
      "grad_norm": 0.12534023821353912,
      "learning_rate": 0.00019998222338933315,
      "loss": 0.534,
      "step": 703
    },
    {
      "epoch": 0.2112845138055222,
      "grad_norm": 0.13558034598827362,
      "learning_rate": 0.00019998122211466957,
      "loss": 0.5248,
      "step": 704
    },
    {
      "epoch": 0.21158463385354143,
      "grad_norm": 0.2575712502002716,
      "learning_rate": 0.00019998019341130416,
      "loss": 0.4579,
      "step": 705
    },
    {
      "epoch": 0.2118847539015606,
      "grad_norm": 0.116855688393116,
      "learning_rate": 0.0001999791372795191,
      "loss": 0.4579,
      "step": 706
    },
    {
      "epoch": 0.21218487394957983,
      "grad_norm": 0.12518906593322754,
      "learning_rate": 0.00019997805371960417,
      "loss": 0.5,
      "step": 707
    },
    {
      "epoch": 0.21248499399759904,
      "grad_norm": 0.12143798172473907,
      "learning_rate": 0.00019997694273185662,
      "loss": 0.4403,
      "step": 708
    },
    {
      "epoch": 0.21278511404561826,
      "grad_norm": 0.11407876759767532,
      "learning_rate": 0.0001999758043165812,
      "loss": 0.4452,
      "step": 709
    },
    {
      "epoch": 0.21308523409363744,
      "grad_norm": 0.10889005661010742,
      "learning_rate": 0.00019997463847409023,
      "loss": 0.4205,
      "step": 710
    },
    {
      "epoch": 0.21338535414165666,
      "grad_norm": 0.12497064471244812,
      "learning_rate": 0.00019997344520470358,
      "loss": 0.446,
      "step": 711
    },
    {
      "epoch": 0.21368547418967587,
      "grad_norm": 0.14738810062408447,
      "learning_rate": 0.0001999722245087486,
      "loss": 0.4782,
      "step": 712
    },
    {
      "epoch": 0.2139855942376951,
      "grad_norm": 0.1568441241979599,
      "learning_rate": 0.00019997097638656014,
      "loss": 0.4735,
      "step": 713
    },
    {
      "epoch": 0.21428571428571427,
      "grad_norm": 0.13330377638339996,
      "learning_rate": 0.00019996970083848066,
      "loss": 0.5534,
      "step": 714
    },
    {
      "epoch": 0.2145858343337335,
      "grad_norm": 0.13056230545043945,
      "learning_rate": 0.00019996839786486006,
      "loss": 0.4948,
      "step": 715
    },
    {
      "epoch": 0.2148859543817527,
      "grad_norm": 0.1922270804643631,
      "learning_rate": 0.00019996706746605583,
      "loss": 0.5532,
      "step": 716
    },
    {
      "epoch": 0.21518607442977192,
      "grad_norm": 0.12903466820716858,
      "learning_rate": 0.00019996570964243287,
      "loss": 0.5108,
      "step": 717
    },
    {
      "epoch": 0.2154861944777911,
      "grad_norm": 0.11882127076387405,
      "learning_rate": 0.00019996432439436376,
      "loss": 0.5006,
      "step": 718
    },
    {
      "epoch": 0.21578631452581032,
      "grad_norm": 0.13730670511722565,
      "learning_rate": 0.00019996291172222848,
      "loss": 0.508,
      "step": 719
    },
    {
      "epoch": 0.21608643457382953,
      "grad_norm": 0.13259319961071014,
      "learning_rate": 0.00019996147162641464,
      "loss": 0.5398,
      "step": 720
    },
    {
      "epoch": 0.21638655462184875,
      "grad_norm": 0.2860109508037567,
      "learning_rate": 0.00019996000410731725,
      "loss": 0.5913,
      "step": 721
    },
    {
      "epoch": 0.21668667466986793,
      "grad_norm": 0.1916925013065338,
      "learning_rate": 0.00019995850916533896,
      "loss": 0.4511,
      "step": 722
    },
    {
      "epoch": 0.21698679471788715,
      "grad_norm": 0.31316134333610535,
      "learning_rate": 0.00019995698680088983,
      "loss": 0.5074,
      "step": 723
    },
    {
      "epoch": 0.21728691476590636,
      "grad_norm": 0.14046156406402588,
      "learning_rate": 0.00019995543701438757,
      "loss": 0.4805,
      "step": 724
    },
    {
      "epoch": 0.21758703481392558,
      "grad_norm": 0.11503440886735916,
      "learning_rate": 0.00019995385980625728,
      "loss": 0.4434,
      "step": 725
    },
    {
      "epoch": 0.21788715486194477,
      "grad_norm": 0.11567936092615128,
      "learning_rate": 0.00019995225517693174,
      "loss": 0.4499,
      "step": 726
    },
    {
      "epoch": 0.21818727490996398,
      "grad_norm": 0.11768448352813721,
      "learning_rate": 0.00019995062312685104,
      "loss": 0.4508,
      "step": 727
    },
    {
      "epoch": 0.2184873949579832,
      "grad_norm": 0.10805214941501617,
      "learning_rate": 0.000199948963656463,
      "loss": 0.4069,
      "step": 728
    },
    {
      "epoch": 0.2187875150060024,
      "grad_norm": 0.5363399982452393,
      "learning_rate": 0.00019994727676622286,
      "loss": 0.4972,
      "step": 729
    },
    {
      "epoch": 0.2190876350540216,
      "grad_norm": 0.11812508851289749,
      "learning_rate": 0.00019994556245659338,
      "loss": 0.4807,
      "step": 730
    },
    {
      "epoch": 0.2193877551020408,
      "grad_norm": 0.20644044876098633,
      "learning_rate": 0.00019994382072804489,
      "loss": 0.4612,
      "step": 731
    },
    {
      "epoch": 0.21968787515006002,
      "grad_norm": 0.11623464524745941,
      "learning_rate": 0.00019994205158105517,
      "loss": 0.4529,
      "step": 732
    },
    {
      "epoch": 0.21998799519807924,
      "grad_norm": 0.12570780515670776,
      "learning_rate": 0.00019994025501610962,
      "loss": 0.5192,
      "step": 733
    },
    {
      "epoch": 0.22028811524609843,
      "grad_norm": 0.12002886086702347,
      "learning_rate": 0.00019993843103370104,
      "loss": 0.4486,
      "step": 734
    },
    {
      "epoch": 0.22058823529411764,
      "grad_norm": 0.12708617746829987,
      "learning_rate": 0.0001999365796343299,
      "loss": 0.4994,
      "step": 735
    },
    {
      "epoch": 0.22088835534213686,
      "grad_norm": 0.14321328699588776,
      "learning_rate": 0.00019993470081850406,
      "loss": 0.5461,
      "step": 736
    },
    {
      "epoch": 0.22118847539015607,
      "grad_norm": 0.5809831619262695,
      "learning_rate": 0.00019993279458673896,
      "loss": 0.4908,
      "step": 737
    },
    {
      "epoch": 0.22148859543817526,
      "grad_norm": 0.12648674845695496,
      "learning_rate": 0.00019993086093955754,
      "loss": 0.5008,
      "step": 738
    },
    {
      "epoch": 0.22178871548619447,
      "grad_norm": 0.12926629185676575,
      "learning_rate": 0.00019992889987749033,
      "loss": 0.4538,
      "step": 739
    },
    {
      "epoch": 0.22208883553421369,
      "grad_norm": 0.13172459602355957,
      "learning_rate": 0.00019992691140107525,
      "loss": 0.4697,
      "step": 740
    },
    {
      "epoch": 0.2223889555822329,
      "grad_norm": 0.20475362241268158,
      "learning_rate": 0.00019992489551085783,
      "loss": 0.5028,
      "step": 741
    },
    {
      "epoch": 0.22268907563025211,
      "grad_norm": 0.11884354799985886,
      "learning_rate": 0.00019992285220739114,
      "loss": 0.4742,
      "step": 742
    },
    {
      "epoch": 0.2229891956782713,
      "grad_norm": 0.1247478723526001,
      "learning_rate": 0.0001999207814912357,
      "loss": 0.4895,
      "step": 743
    },
    {
      "epoch": 0.22328931572629052,
      "grad_norm": 0.13872456550598145,
      "learning_rate": 0.00019991868336295964,
      "loss": 0.4687,
      "step": 744
    },
    {
      "epoch": 0.22358943577430973,
      "grad_norm": 0.12662111222743988,
      "learning_rate": 0.00019991655782313853,
      "loss": 0.4898,
      "step": 745
    },
    {
      "epoch": 0.22388955582232895,
      "grad_norm": 0.12319193035364151,
      "learning_rate": 0.0001999144048723555,
      "loss": 0.5516,
      "step": 746
    },
    {
      "epoch": 0.22418967587034813,
      "grad_norm": 0.11385344713926315,
      "learning_rate": 0.0001999122245112011,
      "loss": 0.4497,
      "step": 747
    },
    {
      "epoch": 0.22448979591836735,
      "grad_norm": 0.13391318917274475,
      "learning_rate": 0.0001999100167402736,
      "loss": 0.4507,
      "step": 748
    },
    {
      "epoch": 0.22478991596638656,
      "grad_norm": 0.13216985762119293,
      "learning_rate": 0.00019990778156017864,
      "loss": 0.4939,
      "step": 749
    },
    {
      "epoch": 0.22509003601440578,
      "grad_norm": 0.13763558864593506,
      "learning_rate": 0.0001999055189715294,
      "loss": 0.513,
      "step": 750
    },
    {
      "epoch": 0.22539015606242496,
      "grad_norm": 0.1200341135263443,
      "learning_rate": 0.0001999032289749466,
      "loss": 0.4783,
      "step": 751
    },
    {
      "epoch": 0.22569027611044418,
      "grad_norm": 0.13042916357517242,
      "learning_rate": 0.00019990091157105847,
      "loss": 0.469,
      "step": 752
    },
    {
      "epoch": 0.2259903961584634,
      "grad_norm": 0.12593336403369904,
      "learning_rate": 0.0001998985667605008,
      "loss": 0.5282,
      "step": 753
    },
    {
      "epoch": 0.2262905162064826,
      "grad_norm": 0.12227312475442886,
      "learning_rate": 0.00019989619454391684,
      "loss": 0.4582,
      "step": 754
    },
    {
      "epoch": 0.2265906362545018,
      "grad_norm": 0.12027385085821152,
      "learning_rate": 0.00019989379492195734,
      "loss": 0.4634,
      "step": 755
    },
    {
      "epoch": 0.226890756302521,
      "grad_norm": 0.1255529671907425,
      "learning_rate": 0.0001998913678952807,
      "loss": 0.5003,
      "step": 756
    },
    {
      "epoch": 0.22719087635054022,
      "grad_norm": 0.12932389974594116,
      "learning_rate": 0.00019988891346455262,
      "loss": 0.4656,
      "step": 757
    },
    {
      "epoch": 0.22749099639855944,
      "grad_norm": 0.12482926994562149,
      "learning_rate": 0.00019988643163044656,
      "loss": 0.474,
      "step": 758
    },
    {
      "epoch": 0.22779111644657862,
      "grad_norm": 0.125040665268898,
      "learning_rate": 0.00019988392239364333,
      "loss": 0.4825,
      "step": 759
    },
    {
      "epoch": 0.22809123649459784,
      "grad_norm": 0.12448814511299133,
      "learning_rate": 0.0001998813857548313,
      "loss": 0.4414,
      "step": 760
    },
    {
      "epoch": 0.22839135654261705,
      "grad_norm": 0.15806365013122559,
      "learning_rate": 0.0001998788217147064,
      "loss": 0.5386,
      "step": 761
    },
    {
      "epoch": 0.22869147659063627,
      "grad_norm": 0.17397083342075348,
      "learning_rate": 0.00019987623027397207,
      "loss": 0.5484,
      "step": 762
    },
    {
      "epoch": 0.22899159663865545,
      "grad_norm": 0.15767407417297363,
      "learning_rate": 0.00019987361143333917,
      "loss": 0.4764,
      "step": 763
    },
    {
      "epoch": 0.22929171668667467,
      "grad_norm": 0.13692127168178558,
      "learning_rate": 0.00019987096519352617,
      "loss": 0.4733,
      "step": 764
    },
    {
      "epoch": 0.22959183673469388,
      "grad_norm": 0.17084169387817383,
      "learning_rate": 0.00019986829155525907,
      "loss": 0.4945,
      "step": 765
    },
    {
      "epoch": 0.2298919567827131,
      "grad_norm": 0.1780541092157364,
      "learning_rate": 0.0001998655905192713,
      "loss": 0.4839,
      "step": 766
    },
    {
      "epoch": 0.23019207683073228,
      "grad_norm": 0.4227488934993744,
      "learning_rate": 0.0001998628620863039,
      "loss": 0.4601,
      "step": 767
    },
    {
      "epoch": 0.2304921968787515,
      "grad_norm": 0.12997548282146454,
      "learning_rate": 0.0001998601062571054,
      "loss": 0.4737,
      "step": 768
    },
    {
      "epoch": 0.2307923169267707,
      "grad_norm": 0.12152252346277237,
      "learning_rate": 0.00019985732303243178,
      "loss": 0.4925,
      "step": 769
    },
    {
      "epoch": 0.23109243697478993,
      "grad_norm": 0.15152108669281006,
      "learning_rate": 0.0001998545124130466,
      "loss": 0.4779,
      "step": 770
    },
    {
      "epoch": 0.23139255702280911,
      "grad_norm": 0.11856577545404434,
      "learning_rate": 0.00019985167439972096,
      "loss": 0.4509,
      "step": 771
    },
    {
      "epoch": 0.23169267707082833,
      "grad_norm": 0.1667887270450592,
      "learning_rate": 0.0001998488089932334,
      "loss": 0.5191,
      "step": 772
    },
    {
      "epoch": 0.23199279711884754,
      "grad_norm": 0.127762109041214,
      "learning_rate": 0.00019984591619437,
      "loss": 0.5351,
      "step": 773
    },
    {
      "epoch": 0.23229291716686676,
      "grad_norm": 0.13002650439739227,
      "learning_rate": 0.0001998429960039244,
      "loss": 0.4875,
      "step": 774
    },
    {
      "epoch": 0.23259303721488594,
      "grad_norm": 0.1593828648328781,
      "learning_rate": 0.00019984004842269767,
      "loss": 0.4812,
      "step": 775
    },
    {
      "epoch": 0.23289315726290516,
      "grad_norm": 0.14525969326496124,
      "learning_rate": 0.0001998370734514985,
      "loss": 0.5652,
      "step": 776
    },
    {
      "epoch": 0.23319327731092437,
      "grad_norm": 0.12786003947257996,
      "learning_rate": 0.00019983407109114306,
      "loss": 0.417,
      "step": 777
    },
    {
      "epoch": 0.2334933973589436,
      "grad_norm": 0.12177425622940063,
      "learning_rate": 0.0001998310413424549,
      "loss": 0.4973,
      "step": 778
    },
    {
      "epoch": 0.23379351740696278,
      "grad_norm": 0.12841089069843292,
      "learning_rate": 0.0001998279842062653,
      "loss": 0.5296,
      "step": 779
    },
    {
      "epoch": 0.234093637454982,
      "grad_norm": 0.40901222825050354,
      "learning_rate": 0.00019982489968341292,
      "loss": 0.4556,
      "step": 780
    },
    {
      "epoch": 0.2343937575030012,
      "grad_norm": 0.12318484485149384,
      "learning_rate": 0.000199821787774744,
      "loss": 0.4793,
      "step": 781
    },
    {
      "epoch": 0.23469387755102042,
      "grad_norm": 0.13381503522396088,
      "learning_rate": 0.00019981864848111217,
      "loss": 0.4677,
      "step": 782
    },
    {
      "epoch": 0.2349939975990396,
      "grad_norm": 0.13625945150852203,
      "learning_rate": 0.00019981548180337874,
      "loss": 0.4417,
      "step": 783
    },
    {
      "epoch": 0.23529411764705882,
      "grad_norm": 0.14159783720970154,
      "learning_rate": 0.00019981228774241242,
      "loss": 0.479,
      "step": 784
    },
    {
      "epoch": 0.23559423769507803,
      "grad_norm": 0.1346697211265564,
      "learning_rate": 0.0001998090662990894,
      "loss": 0.4177,
      "step": 785
    },
    {
      "epoch": 0.23589435774309725,
      "grad_norm": 0.12795834243297577,
      "learning_rate": 0.00019980581747429358,
      "loss": 0.5186,
      "step": 786
    },
    {
      "epoch": 0.23619447779111644,
      "grad_norm": 0.12394683063030243,
      "learning_rate": 0.00019980254126891614,
      "loss": 0.4767,
      "step": 787
    },
    {
      "epoch": 0.23649459783913565,
      "grad_norm": 0.12592841684818268,
      "learning_rate": 0.0001997992376838559,
      "loss": 0.4718,
      "step": 788
    },
    {
      "epoch": 0.23679471788715487,
      "grad_norm": 0.136773481965065,
      "learning_rate": 0.00019979590672001917,
      "loss": 0.4687,
      "step": 789
    },
    {
      "epoch": 0.23709483793517408,
      "grad_norm": 0.15789003670215607,
      "learning_rate": 0.00019979254837831976,
      "loss": 0.5294,
      "step": 790
    },
    {
      "epoch": 0.23739495798319327,
      "grad_norm": 0.15132829546928406,
      "learning_rate": 0.00019978916265967896,
      "loss": 0.533,
      "step": 791
    },
    {
      "epoch": 0.23769507803121248,
      "grad_norm": 0.13565434515476227,
      "learning_rate": 0.00019978574956502562,
      "loss": 0.4872,
      "step": 792
    },
    {
      "epoch": 0.2379951980792317,
      "grad_norm": 0.12341909855604172,
      "learning_rate": 0.0001997823090952961,
      "loss": 0.4661,
      "step": 793
    },
    {
      "epoch": 0.2382953181272509,
      "grad_norm": 0.13288845121860504,
      "learning_rate": 0.00019977884125143422,
      "loss": 0.4695,
      "step": 794
    },
    {
      "epoch": 0.2385954381752701,
      "grad_norm": 0.11384455114603043,
      "learning_rate": 0.0001997753460343914,
      "loss": 0.4839,
      "step": 795
    },
    {
      "epoch": 0.2388955582232893,
      "grad_norm": 0.13420362770557404,
      "learning_rate": 0.00019977182344512647,
      "loss": 0.5234,
      "step": 796
    },
    {
      "epoch": 0.23919567827130853,
      "grad_norm": 0.11861885339021683,
      "learning_rate": 0.0001997682734846058,
      "loss": 0.4593,
      "step": 797
    },
    {
      "epoch": 0.23949579831932774,
      "grad_norm": 0.11524344980716705,
      "learning_rate": 0.00019976469615380334,
      "loss": 0.4461,
      "step": 798
    },
    {
      "epoch": 0.23979591836734693,
      "grad_norm": 0.1178302988409996,
      "learning_rate": 0.00019976109145370042,
      "loss": 0.4366,
      "step": 799
    },
    {
      "epoch": 0.24009603841536614,
      "grad_norm": 0.13384704291820526,
      "learning_rate": 0.00019975745938528597,
      "loss": 0.471,
      "step": 800
    },
    {
      "epoch": 0.24039615846338536,
      "grad_norm": 0.12177698314189911,
      "learning_rate": 0.00019975379994955644,
      "loss": 0.4967,
      "step": 801
    },
    {
      "epoch": 0.24069627851140457,
      "grad_norm": 0.16466966271400452,
      "learning_rate": 0.0001997501131475157,
      "loss": 0.4926,
      "step": 802
    },
    {
      "epoch": 0.24099639855942376,
      "grad_norm": 0.14508703351020813,
      "learning_rate": 0.00019974639898017525,
      "loss": 0.5903,
      "step": 803
    },
    {
      "epoch": 0.24129651860744297,
      "grad_norm": 0.11782081425189972,
      "learning_rate": 0.00019974265744855397,
      "loss": 0.4213,
      "step": 804
    },
    {
      "epoch": 0.2415966386554622,
      "grad_norm": 0.1257871836423874,
      "learning_rate": 0.0001997388885536783,
      "loss": 0.4696,
      "step": 805
    },
    {
      "epoch": 0.2418967587034814,
      "grad_norm": 0.13063441216945648,
      "learning_rate": 0.00019973509229658225,
      "loss": 0.5095,
      "step": 806
    },
    {
      "epoch": 0.2421968787515006,
      "grad_norm": 0.1315949410200119,
      "learning_rate": 0.00019973126867830728,
      "loss": 0.5238,
      "step": 807
    },
    {
      "epoch": 0.2424969987995198,
      "grad_norm": 0.16877683997154236,
      "learning_rate": 0.00019972741769990228,
      "loss": 0.4921,
      "step": 808
    },
    {
      "epoch": 0.24279711884753902,
      "grad_norm": 0.11835787445306778,
      "learning_rate": 0.00019972353936242377,
      "loss": 0.4703,
      "step": 809
    },
    {
      "epoch": 0.24309723889555823,
      "grad_norm": 0.14192615449428558,
      "learning_rate": 0.00019971963366693574,
      "loss": 0.4702,
      "step": 810
    },
    {
      "epoch": 0.24339735894357742,
      "grad_norm": 0.13355907797813416,
      "learning_rate": 0.00019971570061450963,
      "loss": 0.4859,
      "step": 811
    },
    {
      "epoch": 0.24369747899159663,
      "grad_norm": 0.125127911567688,
      "learning_rate": 0.00019971174020622448,
      "loss": 0.4636,
      "step": 812
    },
    {
      "epoch": 0.24399759903961585,
      "grad_norm": 0.14461135864257812,
      "learning_rate": 0.00019970775244316675,
      "loss": 0.4712,
      "step": 813
    },
    {
      "epoch": 0.24429771908763506,
      "grad_norm": 0.12992002069950104,
      "learning_rate": 0.00019970373732643046,
      "loss": 0.5124,
      "step": 814
    },
    {
      "epoch": 0.24459783913565425,
      "grad_norm": 0.12226548790931702,
      "learning_rate": 0.0001996996948571171,
      "loss": 0.4968,
      "step": 815
    },
    {
      "epoch": 0.24489795918367346,
      "grad_norm": 0.1229477971792221,
      "learning_rate": 0.00019969562503633563,
      "loss": 0.4902,
      "step": 816
    },
    {
      "epoch": 0.24519807923169268,
      "grad_norm": 0.12690682709217072,
      "learning_rate": 0.00019969152786520264,
      "loss": 0.485,
      "step": 817
    },
    {
      "epoch": 0.2454981992797119,
      "grad_norm": 0.12483441084623337,
      "learning_rate": 0.00019968740334484205,
      "loss": 0.4781,
      "step": 818
    },
    {
      "epoch": 0.24579831932773108,
      "grad_norm": 0.7821135520935059,
      "learning_rate": 0.00019968325147638548,
      "loss": 0.4798,
      "step": 819
    },
    {
      "epoch": 0.2460984393757503,
      "grad_norm": 0.13457560539245605,
      "learning_rate": 0.0001996790722609719,
      "loss": 0.446,
      "step": 820
    },
    {
      "epoch": 0.2463985594237695,
      "grad_norm": 0.16727985441684723,
      "learning_rate": 0.00019967486569974778,
      "loss": 0.5498,
      "step": 821
    },
    {
      "epoch": 0.24669867947178872,
      "grad_norm": 0.17841017246246338,
      "learning_rate": 0.00019967063179386721,
      "loss": 0.5494,
      "step": 822
    },
    {
      "epoch": 0.2469987995198079,
      "grad_norm": 0.17153313755989075,
      "learning_rate": 0.0001996663705444917,
      "loss": 0.5141,
      "step": 823
    },
    {
      "epoch": 0.24729891956782712,
      "grad_norm": 0.13014820218086243,
      "learning_rate": 0.00019966208195279023,
      "loss": 0.4661,
      "step": 824
    },
    {
      "epoch": 0.24759903961584634,
      "grad_norm": 0.20828866958618164,
      "learning_rate": 0.00019965776601993938,
      "loss": 0.474,
      "step": 825
    },
    {
      "epoch": 0.24789915966386555,
      "grad_norm": 0.14066027104854584,
      "learning_rate": 0.00019965342274712316,
      "loss": 0.5127,
      "step": 826
    },
    {
      "epoch": 0.24819927971188474,
      "grad_norm": 0.13748477399349213,
      "learning_rate": 0.0001996490521355331,
      "loss": 0.5034,
      "step": 827
    },
    {
      "epoch": 0.24849939975990396,
      "grad_norm": 0.14567652344703674,
      "learning_rate": 0.00019964465418636823,
      "loss": 0.4954,
      "step": 828
    },
    {
      "epoch": 0.24879951980792317,
      "grad_norm": 0.15060196816921234,
      "learning_rate": 0.00019964022890083503,
      "loss": 0.4874,
      "step": 829
    },
    {
      "epoch": 0.24909963985594238,
      "grad_norm": 0.1204688772559166,
      "learning_rate": 0.00019963577628014757,
      "loss": 0.4384,
      "step": 830
    },
    {
      "epoch": 0.24939975990396157,
      "grad_norm": 0.12509852647781372,
      "learning_rate": 0.0001996312963255274,
      "loss": 0.4729,
      "step": 831
    },
    {
      "epoch": 0.24969987995198079,
      "grad_norm": 0.22054116427898407,
      "learning_rate": 0.00019962678903820348,
      "loss": 0.4813,
      "step": 832
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.13491611182689667,
      "learning_rate": 0.0001996222544194124,
      "loss": 0.51,
      "step": 833
    },
    {
      "epoch": 0.2503001200480192,
      "grad_norm": 0.14967964589595795,
      "learning_rate": 0.00019961769247039813,
      "loss": 0.4411,
      "step": 834
    },
    {
      "epoch": 0.25060024009603843,
      "grad_norm": 0.13337065279483795,
      "learning_rate": 0.0001996131031924122,
      "loss": 0.4768,
      "step": 835
    },
    {
      "epoch": 0.25090036014405764,
      "grad_norm": 0.17040053009986877,
      "learning_rate": 0.00019960848658671365,
      "loss": 0.4829,
      "step": 836
    },
    {
      "epoch": 0.25120048019207686,
      "grad_norm": 0.14164522290229797,
      "learning_rate": 0.00019960384265456894,
      "loss": 0.4903,
      "step": 837
    },
    {
      "epoch": 0.251500600240096,
      "grad_norm": 0.1331344097852707,
      "learning_rate": 0.00019959917139725212,
      "loss": 0.5193,
      "step": 838
    },
    {
      "epoch": 0.25180072028811523,
      "grad_norm": 0.1266501247882843,
      "learning_rate": 0.00019959447281604474,
      "loss": 0.461,
      "step": 839
    },
    {
      "epoch": 0.25210084033613445,
      "grad_norm": 0.13748733699321747,
      "learning_rate": 0.00019958974691223572,
      "loss": 0.547,
      "step": 840
    },
    {
      "epoch": 0.25240096038415366,
      "grad_norm": 0.1415606290102005,
      "learning_rate": 0.00019958499368712156,
      "loss": 0.4919,
      "step": 841
    },
    {
      "epoch": 0.2527010804321729,
      "grad_norm": 0.20114395022392273,
      "learning_rate": 0.00019958021314200633,
      "loss": 0.4776,
      "step": 842
    },
    {
      "epoch": 0.2530012004801921,
      "grad_norm": 0.13372939825057983,
      "learning_rate": 0.00019957540527820142,
      "loss": 0.5162,
      "step": 843
    },
    {
      "epoch": 0.2533013205282113,
      "grad_norm": 0.12159455567598343,
      "learning_rate": 0.0001995705700970259,
      "loss": 0.4015,
      "step": 844
    },
    {
      "epoch": 0.2536014405762305,
      "grad_norm": 0.12221530824899673,
      "learning_rate": 0.00019956570759980621,
      "loss": 0.4822,
      "step": 845
    },
    {
      "epoch": 0.2539015606242497,
      "grad_norm": 0.1302434504032135,
      "learning_rate": 0.0001995608177878763,
      "loss": 0.5038,
      "step": 846
    },
    {
      "epoch": 0.2542016806722689,
      "grad_norm": 0.12383106350898743,
      "learning_rate": 0.00019955590066257766,
      "loss": 0.4843,
      "step": 847
    },
    {
      "epoch": 0.2545018007202881,
      "grad_norm": 0.13749061524868011,
      "learning_rate": 0.00019955095622525924,
      "loss": 0.5388,
      "step": 848
    },
    {
      "epoch": 0.2548019207683073,
      "grad_norm": 0.13634788990020752,
      "learning_rate": 0.00019954598447727748,
      "loss": 0.5423,
      "step": 849
    },
    {
      "epoch": 0.25510204081632654,
      "grad_norm": 0.13687866926193237,
      "learning_rate": 0.00019954098541999634,
      "loss": 0.4582,
      "step": 850
    },
    {
      "epoch": 0.25540216086434575,
      "grad_norm": 0.12292606383562088,
      "learning_rate": 0.00019953595905478725,
      "loss": 0.4771,
      "step": 851
    },
    {
      "epoch": 0.25570228091236497,
      "grad_norm": 0.30889269709587097,
      "learning_rate": 0.0001995309053830291,
      "loss": 0.519,
      "step": 852
    },
    {
      "epoch": 0.2560024009603842,
      "grad_norm": 0.13533712923526764,
      "learning_rate": 0.0001995258244061084,
      "loss": 0.5394,
      "step": 853
    },
    {
      "epoch": 0.25630252100840334,
      "grad_norm": 0.2249276041984558,
      "learning_rate": 0.00019952071612541893,
      "loss": 0.5533,
      "step": 854
    },
    {
      "epoch": 0.25660264105642255,
      "grad_norm": 0.1300957351922989,
      "learning_rate": 0.00019951558054236222,
      "loss": 0.4871,
      "step": 855
    },
    {
      "epoch": 0.25690276110444177,
      "grad_norm": 0.12137411534786224,
      "learning_rate": 0.00019951041765834703,
      "loss": 0.4811,
      "step": 856
    },
    {
      "epoch": 0.257202881152461,
      "grad_norm": 0.11973215639591217,
      "learning_rate": 0.0001995052274747898,
      "loss": 0.4272,
      "step": 857
    },
    {
      "epoch": 0.2575030012004802,
      "grad_norm": 0.14288219809532166,
      "learning_rate": 0.00019950000999311443,
      "loss": 0.5364,
      "step": 858
    },
    {
      "epoch": 0.2578031212484994,
      "grad_norm": 0.13095241785049438,
      "learning_rate": 0.00019949476521475225,
      "loss": 0.4968,
      "step": 859
    },
    {
      "epoch": 0.2581032412965186,
      "grad_norm": 0.12350008636713028,
      "learning_rate": 0.00019948949314114208,
      "loss": 0.4447,
      "step": 860
    },
    {
      "epoch": 0.25840336134453784,
      "grad_norm": 0.11974304169416428,
      "learning_rate": 0.0001994841937737303,
      "loss": 0.4127,
      "step": 861
    },
    {
      "epoch": 0.258703481392557,
      "grad_norm": 0.11944734305143356,
      "learning_rate": 0.0001994788671139707,
      "loss": 0.4226,
      "step": 862
    },
    {
      "epoch": 0.2590036014405762,
      "grad_norm": 0.12920348346233368,
      "learning_rate": 0.00019947351316332453,
      "loss": 0.4136,
      "step": 863
    },
    {
      "epoch": 0.25930372148859543,
      "grad_norm": 0.12352780997753143,
      "learning_rate": 0.0001994681319232607,
      "loss": 0.4503,
      "step": 864
    },
    {
      "epoch": 0.25960384153661464,
      "grad_norm": 0.14253178238868713,
      "learning_rate": 0.00019946272339525542,
      "loss": 0.4836,
      "step": 865
    },
    {
      "epoch": 0.25990396158463386,
      "grad_norm": 0.12619134783744812,
      "learning_rate": 0.00019945728758079248,
      "loss": 0.4841,
      "step": 866
    },
    {
      "epoch": 0.2602040816326531,
      "grad_norm": 0.14023606479167938,
      "learning_rate": 0.0001994518244813631,
      "loss": 0.4952,
      "step": 867
    },
    {
      "epoch": 0.2605042016806723,
      "grad_norm": 0.1295354962348938,
      "learning_rate": 0.00019944633409846606,
      "loss": 0.5012,
      "step": 868
    },
    {
      "epoch": 0.2608043217286915,
      "grad_norm": 0.12856262922286987,
      "learning_rate": 0.0001994408164336076,
      "loss": 0.4753,
      "step": 869
    },
    {
      "epoch": 0.26110444177671066,
      "grad_norm": 0.12840332090854645,
      "learning_rate": 0.00019943527148830138,
      "loss": 0.5248,
      "step": 870
    },
    {
      "epoch": 0.2614045618247299,
      "grad_norm": 0.12111607193946838,
      "learning_rate": 0.0001994296992640686,
      "loss": 0.4126,
      "step": 871
    },
    {
      "epoch": 0.2617046818727491,
      "grad_norm": 0.1936316192150116,
      "learning_rate": 0.00019942409976243797,
      "loss": 0.4748,
      "step": 872
    },
    {
      "epoch": 0.2620048019207683,
      "grad_norm": 0.1337498128414154,
      "learning_rate": 0.00019941847298494557,
      "loss": 0.4875,
      "step": 873
    },
    {
      "epoch": 0.2623049219687875,
      "grad_norm": 0.1343999058008194,
      "learning_rate": 0.00019941281893313514,
      "loss": 0.5657,
      "step": 874
    },
    {
      "epoch": 0.26260504201680673,
      "grad_norm": 0.1383265107870102,
      "learning_rate": 0.0001994071376085578,
      "loss": 0.3998,
      "step": 875
    },
    {
      "epoch": 0.26290516206482595,
      "grad_norm": 0.1333858221769333,
      "learning_rate": 0.00019940142901277207,
      "loss": 0.4655,
      "step": 876
    },
    {
      "epoch": 0.26320528211284516,
      "grad_norm": 0.12730911374092102,
      "learning_rate": 0.00019939569314734407,
      "loss": 0.4565,
      "step": 877
    },
    {
      "epoch": 0.2635054021608643,
      "grad_norm": 0.12576372921466827,
      "learning_rate": 0.0001993899300138474,
      "loss": 0.4825,
      "step": 878
    },
    {
      "epoch": 0.26380552220888354,
      "grad_norm": 0.12902750074863434,
      "learning_rate": 0.0001993841396138631,
      "loss": 0.4152,
      "step": 879
    },
    {
      "epoch": 0.26410564225690275,
      "grad_norm": 0.1310381442308426,
      "learning_rate": 0.00019937832194897968,
      "loss": 0.498,
      "step": 880
    },
    {
      "epoch": 0.26440576230492197,
      "grad_norm": 0.13245545327663422,
      "learning_rate": 0.00019937247702079322,
      "loss": 0.4811,
      "step": 881
    },
    {
      "epoch": 0.2647058823529412,
      "grad_norm": 0.1286735236644745,
      "learning_rate": 0.00019936660483090707,
      "loss": 0.48,
      "step": 882
    },
    {
      "epoch": 0.2650060024009604,
      "grad_norm": 0.38595524430274963,
      "learning_rate": 0.0001993607053809323,
      "loss": 0.4453,
      "step": 883
    },
    {
      "epoch": 0.2653061224489796,
      "grad_norm": 0.1547454297542572,
      "learning_rate": 0.00019935477867248739,
      "loss": 0.5371,
      "step": 884
    },
    {
      "epoch": 0.2656062424969988,
      "grad_norm": 0.14364950358867645,
      "learning_rate": 0.00019934882470719817,
      "loss": 0.4814,
      "step": 885
    },
    {
      "epoch": 0.265906362545018,
      "grad_norm": 0.1367032378911972,
      "learning_rate": 0.0001993428434866981,
      "loss": 0.4894,
      "step": 886
    },
    {
      "epoch": 0.2662064825930372,
      "grad_norm": 0.14121219515800476,
      "learning_rate": 0.00019933683501262804,
      "loss": 0.4671,
      "step": 887
    },
    {
      "epoch": 0.2665066026410564,
      "grad_norm": 0.13421456515789032,
      "learning_rate": 0.0001993307992866363,
      "loss": 0.5321,
      "step": 888
    },
    {
      "epoch": 0.2668067226890756,
      "grad_norm": 0.13314856588840485,
      "learning_rate": 0.00019932473631037882,
      "loss": 0.541,
      "step": 889
    },
    {
      "epoch": 0.26710684273709484,
      "grad_norm": 0.13720490038394928,
      "learning_rate": 0.00019931864608551886,
      "loss": 0.5456,
      "step": 890
    },
    {
      "epoch": 0.26740696278511406,
      "grad_norm": 0.21238237619400024,
      "learning_rate": 0.00019931252861372714,
      "loss": 0.5126,
      "step": 891
    },
    {
      "epoch": 0.26770708283313327,
      "grad_norm": 0.1344596892595291,
      "learning_rate": 0.000199306383896682,
      "loss": 0.4836,
      "step": 892
    },
    {
      "epoch": 0.2680072028811525,
      "grad_norm": 0.16728933155536652,
      "learning_rate": 0.00019930021193606914,
      "loss": 0.5175,
      "step": 893
    },
    {
      "epoch": 0.26830732292917164,
      "grad_norm": 0.1329762041568756,
      "learning_rate": 0.00019929401273358177,
      "loss": 0.4533,
      "step": 894
    },
    {
      "epoch": 0.26860744297719086,
      "grad_norm": 0.13503654301166534,
      "learning_rate": 0.00019928778629092056,
      "loss": 0.4785,
      "step": 895
    },
    {
      "epoch": 0.2689075630252101,
      "grad_norm": 0.12425743043422699,
      "learning_rate": 0.00019928153260979366,
      "loss": 0.4158,
      "step": 896
    },
    {
      "epoch": 0.2692076830732293,
      "grad_norm": 0.12247985601425171,
      "learning_rate": 0.00019927525169191674,
      "loss": 0.4642,
      "step": 897
    },
    {
      "epoch": 0.2695078031212485,
      "grad_norm": 0.13523244857788086,
      "learning_rate": 0.00019926894353901288,
      "loss": 0.4824,
      "step": 898
    },
    {
      "epoch": 0.2698079231692677,
      "grad_norm": 0.1293213963508606,
      "learning_rate": 0.00019926260815281258,
      "loss": 0.5019,
      "step": 899
    },
    {
      "epoch": 0.27010804321728693,
      "grad_norm": 0.13876238465309143,
      "learning_rate": 0.000199256245535054,
      "loss": 0.498,
      "step": 900
    },
    {
      "epoch": 0.27040816326530615,
      "grad_norm": 0.13814818859100342,
      "learning_rate": 0.00019924985568748254,
      "loss": 0.536,
      "step": 901
    },
    {
      "epoch": 0.2707082833133253,
      "grad_norm": 0.12295421212911606,
      "learning_rate": 0.00019924343861185123,
      "loss": 0.467,
      "step": 902
    },
    {
      "epoch": 0.2710084033613445,
      "grad_norm": 0.12685605883598328,
      "learning_rate": 0.00019923699430992055,
      "loss": 0.4507,
      "step": 903
    },
    {
      "epoch": 0.27130852340936373,
      "grad_norm": 0.126492440700531,
      "learning_rate": 0.00019923052278345837,
      "loss": 0.4906,
      "step": 904
    },
    {
      "epoch": 0.27160864345738295,
      "grad_norm": 0.13927894830703735,
      "learning_rate": 0.0001992240240342401,
      "loss": 0.4868,
      "step": 905
    },
    {
      "epoch": 0.27190876350540216,
      "grad_norm": 0.1388770341873169,
      "learning_rate": 0.00019921749806404862,
      "loss": 0.5374,
      "step": 906
    },
    {
      "epoch": 0.2722088835534214,
      "grad_norm": 0.2512054145336151,
      "learning_rate": 0.0001992109448746742,
      "loss": 0.4949,
      "step": 907
    },
    {
      "epoch": 0.2725090036014406,
      "grad_norm": 0.22620728611946106,
      "learning_rate": 0.0001992043644679147,
      "loss": 0.4882,
      "step": 908
    },
    {
      "epoch": 0.2728091236494598,
      "grad_norm": 0.13367605209350586,
      "learning_rate": 0.0001991977568455753,
      "loss": 0.4923,
      "step": 909
    },
    {
      "epoch": 0.27310924369747897,
      "grad_norm": 0.14091336727142334,
      "learning_rate": 0.00019919112200946878,
      "loss": 0.5282,
      "step": 910
    },
    {
      "epoch": 0.2734093637454982,
      "grad_norm": 0.14546939730644226,
      "learning_rate": 0.00019918445996141535,
      "loss": 0.4407,
      "step": 911
    },
    {
      "epoch": 0.2737094837935174,
      "grad_norm": 0.1297386884689331,
      "learning_rate": 0.0001991777707032426,
      "loss": 0.4682,
      "step": 912
    },
    {
      "epoch": 0.2740096038415366,
      "grad_norm": 0.1352037638425827,
      "learning_rate": 0.00019917105423678574,
      "loss": 0.4778,
      "step": 913
    },
    {
      "epoch": 0.2743097238895558,
      "grad_norm": 0.1269834339618683,
      "learning_rate": 0.00019916431056388724,
      "loss": 0.4584,
      "step": 914
    },
    {
      "epoch": 0.27460984393757504,
      "grad_norm": 0.14803633093833923,
      "learning_rate": 0.00019915753968639726,
      "loss": 0.4682,
      "step": 915
    },
    {
      "epoch": 0.27490996398559425,
      "grad_norm": 0.1348196566104889,
      "learning_rate": 0.00019915074160617324,
      "loss": 0.4941,
      "step": 916
    },
    {
      "epoch": 0.27521008403361347,
      "grad_norm": 0.15063588321208954,
      "learning_rate": 0.0001991439163250802,
      "loss": 0.5334,
      "step": 917
    },
    {
      "epoch": 0.2755102040816326,
      "grad_norm": 0.16838787496089935,
      "learning_rate": 0.00019913706384499055,
      "loss": 0.4767,
      "step": 918
    },
    {
      "epoch": 0.27581032412965184,
      "grad_norm": 0.15116068720817566,
      "learning_rate": 0.00019913018416778419,
      "loss": 0.4798,
      "step": 919
    },
    {
      "epoch": 0.27611044417767105,
      "grad_norm": 0.1270640790462494,
      "learning_rate": 0.0001991232772953485,
      "loss": 0.4905,
      "step": 920
    },
    {
      "epoch": 0.27641056422569027,
      "grad_norm": 0.12478886544704437,
      "learning_rate": 0.00019911634322957827,
      "loss": 0.464,
      "step": 921
    },
    {
      "epoch": 0.2767106842737095,
      "grad_norm": 0.18658436834812164,
      "learning_rate": 0.00019910938197237582,
      "loss": 0.4703,
      "step": 922
    },
    {
      "epoch": 0.2770108043217287,
      "grad_norm": 0.14810101687908173,
      "learning_rate": 0.00019910239352565086,
      "loss": 0.4689,
      "step": 923
    },
    {
      "epoch": 0.2773109243697479,
      "grad_norm": 0.26814642548561096,
      "learning_rate": 0.00019909537789132063,
      "loss": 0.4454,
      "step": 924
    },
    {
      "epoch": 0.2776110444177671,
      "grad_norm": 0.15047408640384674,
      "learning_rate": 0.00019908833507130972,
      "loss": 0.4585,
      "step": 925
    },
    {
      "epoch": 0.27791116446578634,
      "grad_norm": 0.12900130450725555,
      "learning_rate": 0.0001990812650675503,
      "loss": 0.4725,
      "step": 926
    },
    {
      "epoch": 0.2782112845138055,
      "grad_norm": 0.28590309619903564,
      "learning_rate": 0.00019907416788198196,
      "loss": 0.5181,
      "step": 927
    },
    {
      "epoch": 0.2785114045618247,
      "grad_norm": 0.2776770293712616,
      "learning_rate": 0.00019906704351655167,
      "loss": 0.4349,
      "step": 928
    },
    {
      "epoch": 0.27881152460984393,
      "grad_norm": 0.7942174673080444,
      "learning_rate": 0.00019905989197321398,
      "loss": 0.4483,
      "step": 929
    },
    {
      "epoch": 0.27911164465786314,
      "grad_norm": 0.20447468757629395,
      "learning_rate": 0.0001990527132539308,
      "loss": 0.4817,
      "step": 930
    },
    {
      "epoch": 0.27941176470588236,
      "grad_norm": 0.14460432529449463,
      "learning_rate": 0.0001990455073606715,
      "loss": 0.4645,
      "step": 931
    },
    {
      "epoch": 0.2797118847539016,
      "grad_norm": 0.16081230342388153,
      "learning_rate": 0.00019903827429541303,
      "loss": 0.4403,
      "step": 932
    },
    {
      "epoch": 0.2800120048019208,
      "grad_norm": 0.18377286195755005,
      "learning_rate": 0.0001990310140601396,
      "loss": 0.4532,
      "step": 933
    },
    {
      "epoch": 0.28031212484994,
      "grad_norm": 0.20535436272621155,
      "learning_rate": 0.000199023726656843,
      "loss": 0.5203,
      "step": 934
    },
    {
      "epoch": 0.28061224489795916,
      "grad_norm": 0.16945256292819977,
      "learning_rate": 0.00019901641208752246,
      "loss": 0.4573,
      "step": 935
    },
    {
      "epoch": 0.2809123649459784,
      "grad_norm": 0.15890268981456757,
      "learning_rate": 0.00019900907035418465,
      "loss": 0.4381,
      "step": 936
    },
    {
      "epoch": 0.2812124849939976,
      "grad_norm": 0.15431177616119385,
      "learning_rate": 0.0001990017014588437,
      "loss": 0.5099,
      "step": 937
    },
    {
      "epoch": 0.2815126050420168,
      "grad_norm": 0.16148389875888824,
      "learning_rate": 0.00019899430540352118,
      "loss": 0.5215,
      "step": 938
    },
    {
      "epoch": 0.281812725090036,
      "grad_norm": 0.1503535807132721,
      "learning_rate": 0.00019898688219024605,
      "loss": 0.4793,
      "step": 939
    },
    {
      "epoch": 0.28211284513805523,
      "grad_norm": 0.15371309220790863,
      "learning_rate": 0.00019897943182105486,
      "loss": 0.4807,
      "step": 940
    },
    {
      "epoch": 0.28241296518607445,
      "grad_norm": 0.14984077215194702,
      "learning_rate": 0.0001989719542979915,
      "loss": 0.4534,
      "step": 941
    },
    {
      "epoch": 0.28271308523409366,
      "grad_norm": 0.1417742371559143,
      "learning_rate": 0.00019896444962310737,
      "loss": 0.4728,
      "step": 942
    },
    {
      "epoch": 0.2830132052821128,
      "grad_norm": 0.14261500537395477,
      "learning_rate": 0.00019895691779846125,
      "loss": 0.4636,
      "step": 943
    },
    {
      "epoch": 0.28331332533013204,
      "grad_norm": 0.15536335110664368,
      "learning_rate": 0.00019894935882611942,
      "loss": 0.5089,
      "step": 944
    },
    {
      "epoch": 0.28361344537815125,
      "grad_norm": 0.4062034785747528,
      "learning_rate": 0.00019894177270815563,
      "loss": 0.5466,
      "step": 945
    },
    {
      "epoch": 0.28391356542617047,
      "grad_norm": 0.12950041890144348,
      "learning_rate": 0.00019893415944665098,
      "loss": 0.4514,
      "step": 946
    },
    {
      "epoch": 0.2842136854741897,
      "grad_norm": 0.15430551767349243,
      "learning_rate": 0.0001989265190436942,
      "loss": 0.4482,
      "step": 947
    },
    {
      "epoch": 0.2845138055222089,
      "grad_norm": 0.14366622269153595,
      "learning_rate": 0.0001989188515013812,
      "loss": 0.4811,
      "step": 948
    },
    {
      "epoch": 0.2848139255702281,
      "grad_norm": 0.20464728772640228,
      "learning_rate": 0.0001989111568218156,
      "loss": 0.5033,
      "step": 949
    },
    {
      "epoch": 0.2851140456182473,
      "grad_norm": 0.1582338809967041,
      "learning_rate": 0.00019890343500710827,
      "loss": 0.5106,
      "step": 950
    },
    {
      "epoch": 0.2854141656662665,
      "grad_norm": 0.14050230383872986,
      "learning_rate": 0.00019889568605937761,
      "loss": 0.4848,
      "step": 951
    },
    {
      "epoch": 0.2857142857142857,
      "grad_norm": 0.12696197628974915,
      "learning_rate": 0.00019888790998074952,
      "loss": 0.4441,
      "step": 952
    },
    {
      "epoch": 0.2860144057623049,
      "grad_norm": 0.18210993707180023,
      "learning_rate": 0.0001988801067733572,
      "loss": 0.5396,
      "step": 953
    },
    {
      "epoch": 0.2863145258103241,
      "grad_norm": 0.8190274834632874,
      "learning_rate": 0.00019887227643934142,
      "loss": 0.4312,
      "step": 954
    },
    {
      "epoch": 0.28661464585834334,
      "grad_norm": 0.13277262449264526,
      "learning_rate": 0.00019886441898085035,
      "loss": 0.4746,
      "step": 955
    },
    {
      "epoch": 0.28691476590636256,
      "grad_norm": 0.16039538383483887,
      "learning_rate": 0.00019885653440003954,
      "loss": 0.498,
      "step": 956
    },
    {
      "epoch": 0.28721488595438177,
      "grad_norm": 0.14940515160560608,
      "learning_rate": 0.00019884862269907205,
      "loss": 0.5033,
      "step": 957
    },
    {
      "epoch": 0.287515006002401,
      "grad_norm": 0.12827421724796295,
      "learning_rate": 0.0001988406838801184,
      "loss": 0.4444,
      "step": 958
    },
    {
      "epoch": 0.28781512605042014,
      "grad_norm": 0.16131725907325745,
      "learning_rate": 0.00019883271794535648,
      "loss": 0.508,
      "step": 959
    },
    {
      "epoch": 0.28811524609843936,
      "grad_norm": 0.20877015590667725,
      "learning_rate": 0.0001988247248969717,
      "loss": 0.5394,
      "step": 960
    },
    {
      "epoch": 0.2884153661464586,
      "grad_norm": 0.13646623492240906,
      "learning_rate": 0.0001988167047371568,
      "loss": 0.4716,
      "step": 961
    },
    {
      "epoch": 0.2887154861944778,
      "grad_norm": 0.13780243694782257,
      "learning_rate": 0.00019880865746811207,
      "loss": 0.5101,
      "step": 962
    },
    {
      "epoch": 0.289015606242497,
      "grad_norm": 0.13552388548851013,
      "learning_rate": 0.00019880058309204514,
      "loss": 0.4945,
      "step": 963
    },
    {
      "epoch": 0.2893157262905162,
      "grad_norm": 0.1313784420490265,
      "learning_rate": 0.00019879248161117113,
      "loss": 0.449,
      "step": 964
    },
    {
      "epoch": 0.28961584633853543,
      "grad_norm": 0.17604967951774597,
      "learning_rate": 0.0001987843530277126,
      "loss": 0.483,
      "step": 965
    },
    {
      "epoch": 0.28991596638655465,
      "grad_norm": 0.13665145635604858,
      "learning_rate": 0.00019877619734389956,
      "loss": 0.4529,
      "step": 966
    },
    {
      "epoch": 0.2902160864345738,
      "grad_norm": 0.13970158994197845,
      "learning_rate": 0.00019876801456196943,
      "loss": 0.4481,
      "step": 967
    },
    {
      "epoch": 0.290516206482593,
      "grad_norm": 0.13947072625160217,
      "learning_rate": 0.000198759804684167,
      "loss": 0.479,
      "step": 968
    },
    {
      "epoch": 0.29081632653061223,
      "grad_norm": 0.14099106192588806,
      "learning_rate": 0.0001987515677127446,
      "loss": 0.4454,
      "step": 969
    },
    {
      "epoch": 0.29111644657863145,
      "grad_norm": 0.1705184131860733,
      "learning_rate": 0.00019874330364996192,
      "loss": 0.509,
      "step": 970
    },
    {
      "epoch": 0.29141656662665066,
      "grad_norm": 0.14040535688400269,
      "learning_rate": 0.00019873501249808616,
      "loss": 0.4816,
      "step": 971
    },
    {
      "epoch": 0.2917166866746699,
      "grad_norm": 0.17384812235832214,
      "learning_rate": 0.00019872669425939185,
      "loss": 0.5492,
      "step": 972
    },
    {
      "epoch": 0.2920168067226891,
      "grad_norm": 0.15088632702827454,
      "learning_rate": 0.00019871834893616107,
      "loss": 0.5129,
      "step": 973
    },
    {
      "epoch": 0.2923169267707083,
      "grad_norm": 0.13551321625709534,
      "learning_rate": 0.0001987099765306832,
      "loss": 0.4778,
      "step": 974
    },
    {
      "epoch": 0.29261704681872747,
      "grad_norm": 0.18222583830356598,
      "learning_rate": 0.0001987015770452551,
      "loss": 0.4995,
      "step": 975
    },
    {
      "epoch": 0.2929171668667467,
      "grad_norm": 0.1446659415960312,
      "learning_rate": 0.00019869315048218116,
      "loss": 0.4742,
      "step": 976
    },
    {
      "epoch": 0.2932172869147659,
      "grad_norm": 0.14516235888004303,
      "learning_rate": 0.00019868469684377306,
      "loss": 0.4851,
      "step": 977
    },
    {
      "epoch": 0.2935174069627851,
      "grad_norm": 0.13349777460098267,
      "learning_rate": 0.00019867621613234993,
      "loss": 0.4479,
      "step": 978
    },
    {
      "epoch": 0.2938175270108043,
      "grad_norm": 0.14506781101226807,
      "learning_rate": 0.00019866770835023836,
      "loss": 0.4712,
      "step": 979
    },
    {
      "epoch": 0.29411764705882354,
      "grad_norm": 0.18141105771064758,
      "learning_rate": 0.00019865917349977242,
      "loss": 0.4869,
      "step": 980
    },
    {
      "epoch": 0.29441776710684275,
      "grad_norm": 0.12961047887802124,
      "learning_rate": 0.00019865061158329353,
      "loss": 0.4201,
      "step": 981
    },
    {
      "epoch": 0.29471788715486197,
      "grad_norm": 0.1588519662618637,
      "learning_rate": 0.0001986420226031505,
      "loss": 0.4782,
      "step": 982
    },
    {
      "epoch": 0.2950180072028811,
      "grad_norm": 0.14325343072414398,
      "learning_rate": 0.00019863340656169965,
      "loss": 0.5089,
      "step": 983
    },
    {
      "epoch": 0.29531812725090034,
      "grad_norm": 0.17272049188613892,
      "learning_rate": 0.00019862476346130473,
      "loss": 0.4914,
      "step": 984
    },
    {
      "epoch": 0.29561824729891956,
      "grad_norm": 0.17687954008579254,
      "learning_rate": 0.00019861609330433684,
      "loss": 0.4788,
      "step": 985
    },
    {
      "epoch": 0.29591836734693877,
      "grad_norm": 0.1358242630958557,
      "learning_rate": 0.00019860739609317457,
      "loss": 0.4454,
      "step": 986
    },
    {
      "epoch": 0.296218487394958,
      "grad_norm": 0.14220671355724335,
      "learning_rate": 0.0001985986718302038,
      "loss": 0.4397,
      "step": 987
    },
    {
      "epoch": 0.2965186074429772,
      "grad_norm": 0.13197766244411469,
      "learning_rate": 0.00019858992051781805,
      "loss": 0.4539,
      "step": 988
    },
    {
      "epoch": 0.2968187274909964,
      "grad_norm": 0.1444684863090515,
      "learning_rate": 0.0001985811421584181,
      "loss": 0.5167,
      "step": 989
    },
    {
      "epoch": 0.29711884753901563,
      "grad_norm": 0.13593900203704834,
      "learning_rate": 0.00019857233675441217,
      "loss": 0.4868,
      "step": 990
    },
    {
      "epoch": 0.2974189675870348,
      "grad_norm": 0.12780894339084625,
      "learning_rate": 0.00019856350430821594,
      "loss": 0.4521,
      "step": 991
    },
    {
      "epoch": 0.297719087635054,
      "grad_norm": 0.17024055123329163,
      "learning_rate": 0.0001985546448222525,
      "loss": 0.4543,
      "step": 992
    },
    {
      "epoch": 0.2980192076830732,
      "grad_norm": 0.1440305858850479,
      "learning_rate": 0.00019854575829895233,
      "loss": 0.5076,
      "step": 993
    },
    {
      "epoch": 0.29831932773109243,
      "grad_norm": 0.13460861146450043,
      "learning_rate": 0.00019853684474075337,
      "loss": 0.4522,
      "step": 994
    },
    {
      "epoch": 0.29861944777911165,
      "grad_norm": 0.1406427025794983,
      "learning_rate": 0.00019852790415010092,
      "loss": 0.518,
      "step": 995
    },
    {
      "epoch": 0.29891956782713086,
      "grad_norm": 0.13395576179027557,
      "learning_rate": 0.00019851893652944776,
      "loss": 0.4416,
      "step": 996
    },
    {
      "epoch": 0.2992196878751501,
      "grad_norm": 0.138408824801445,
      "learning_rate": 0.00019850994188125401,
      "loss": 0.5226,
      "step": 997
    },
    {
      "epoch": 0.2995198079231693,
      "grad_norm": 0.16668342053890228,
      "learning_rate": 0.00019850092020798728,
      "loss": 0.4604,
      "step": 998
    },
    {
      "epoch": 0.29981992797118845,
      "grad_norm": 0.14884911477565765,
      "learning_rate": 0.00019849187151212258,
      "loss": 0.5255,
      "step": 999
    },
    {
      "epoch": 0.30012004801920766,
      "grad_norm": 0.13796059787273407,
      "learning_rate": 0.0001984827957961423,
      "loss": 0.5336,
      "step": 1000
    },
    {
      "epoch": 0.3004201680672269,
      "grad_norm": 0.15053890645503998,
      "learning_rate": 0.00019847369306253624,
      "loss": 0.5012,
      "step": 1001
    },
    {
      "epoch": 0.3007202881152461,
      "grad_norm": 0.13115812838077545,
      "learning_rate": 0.00019846456331380167,
      "loss": 0.4925,
      "step": 1002
    },
    {
      "epoch": 0.3010204081632653,
      "grad_norm": 0.1401546746492386,
      "learning_rate": 0.00019845540655244318,
      "loss": 0.5165,
      "step": 1003
    },
    {
      "epoch": 0.3013205282112845,
      "grad_norm": 0.1433335393667221,
      "learning_rate": 0.00019844622278097286,
      "loss": 0.4357,
      "step": 1004
    },
    {
      "epoch": 0.30162064825930374,
      "grad_norm": 0.1426486372947693,
      "learning_rate": 0.00019843701200191016,
      "loss": 0.5084,
      "step": 1005
    },
    {
      "epoch": 0.30192076830732295,
      "grad_norm": 0.13518790900707245,
      "learning_rate": 0.00019842777421778197,
      "loss": 0.4797,
      "step": 1006
    },
    {
      "epoch": 0.3022208883553421,
      "grad_norm": 0.12768086791038513,
      "learning_rate": 0.0001984185094311225,
      "loss": 0.4687,
      "step": 1007
    },
    {
      "epoch": 0.3025210084033613,
      "grad_norm": 0.12086029350757599,
      "learning_rate": 0.00019840921764447357,
      "loss": 0.4326,
      "step": 1008
    },
    {
      "epoch": 0.30282112845138054,
      "grad_norm": 0.13988471031188965,
      "learning_rate": 0.00019839989886038416,
      "loss": 0.5151,
      "step": 1009
    },
    {
      "epoch": 0.30312124849939975,
      "grad_norm": 0.1398009955883026,
      "learning_rate": 0.00019839055308141078,
      "loss": 0.4604,
      "step": 1010
    },
    {
      "epoch": 0.30342136854741897,
      "grad_norm": 0.27455607056617737,
      "learning_rate": 0.00019838118031011742,
      "loss": 0.4402,
      "step": 1011
    },
    {
      "epoch": 0.3037214885954382,
      "grad_norm": 0.14252755045890808,
      "learning_rate": 0.0001983717805490753,
      "loss": 0.438,
      "step": 1012
    },
    {
      "epoch": 0.3040216086434574,
      "grad_norm": 0.1371551901102066,
      "learning_rate": 0.0001983623538008632,
      "loss": 0.4083,
      "step": 1013
    },
    {
      "epoch": 0.3043217286914766,
      "grad_norm": 0.14904725551605225,
      "learning_rate": 0.0001983529000680672,
      "loss": 0.5038,
      "step": 1014
    },
    {
      "epoch": 0.30462184873949577,
      "grad_norm": 0.1304553896188736,
      "learning_rate": 0.00019834341935328086,
      "loss": 0.4621,
      "step": 1015
    },
    {
      "epoch": 0.304921968787515,
      "grad_norm": 0.12457938492298126,
      "learning_rate": 0.00019833391165910503,
      "loss": 0.4339,
      "step": 1016
    },
    {
      "epoch": 0.3052220888355342,
      "grad_norm": 0.1293737143278122,
      "learning_rate": 0.00019832437698814813,
      "loss": 0.4279,
      "step": 1017
    },
    {
      "epoch": 0.3055222088835534,
      "grad_norm": 0.1546356976032257,
      "learning_rate": 0.00019831481534302584,
      "loss": 0.4467,
      "step": 1018
    },
    {
      "epoch": 0.30582232893157263,
      "grad_norm": 0.13538290560245514,
      "learning_rate": 0.00019830522672636128,
      "loss": 0.4969,
      "step": 1019
    },
    {
      "epoch": 0.30612244897959184,
      "grad_norm": 0.1218218058347702,
      "learning_rate": 0.00019829561114078503,
      "loss": 0.4241,
      "step": 1020
    },
    {
      "epoch": 0.30642256902761106,
      "grad_norm": 0.1502799242734909,
      "learning_rate": 0.00019828596858893495,
      "loss": 0.4992,
      "step": 1021
    },
    {
      "epoch": 0.3067226890756303,
      "grad_norm": 0.22983862459659576,
      "learning_rate": 0.0001982762990734564,
      "loss": 0.4829,
      "step": 1022
    },
    {
      "epoch": 0.3070228091236495,
      "grad_norm": 0.1303202360868454,
      "learning_rate": 0.00019826660259700208,
      "loss": 0.4841,
      "step": 1023
    },
    {
      "epoch": 0.30732292917166865,
      "grad_norm": 0.13013200461864471,
      "learning_rate": 0.00019825687916223217,
      "loss": 0.4883,
      "step": 1024
    },
    {
      "epoch": 0.30762304921968786,
      "grad_norm": 0.138884037733078,
      "learning_rate": 0.0001982471287718141,
      "loss": 0.4776,
      "step": 1025
    },
    {
      "epoch": 0.3079231692677071,
      "grad_norm": 0.1241547241806984,
      "learning_rate": 0.0001982373514284228,
      "loss": 0.4507,
      "step": 1026
    },
    {
      "epoch": 0.3082232893157263,
      "grad_norm": 0.12917166948318481,
      "learning_rate": 0.00019822754713474057,
      "loss": 0.4002,
      "step": 1027
    },
    {
      "epoch": 0.3085234093637455,
      "grad_norm": 0.13225270807743073,
      "learning_rate": 0.00019821771589345713,
      "loss": 0.4844,
      "step": 1028
    },
    {
      "epoch": 0.3088235294117647,
      "grad_norm": 0.13052913546562195,
      "learning_rate": 0.0001982078577072696,
      "loss": 0.4794,
      "step": 1029
    },
    {
      "epoch": 0.30912364945978393,
      "grad_norm": 0.12789681553840637,
      "learning_rate": 0.00019819797257888237,
      "loss": 0.4012,
      "step": 1030
    },
    {
      "epoch": 0.30942376950780315,
      "grad_norm": 0.13375438749790192,
      "learning_rate": 0.00019818806051100736,
      "loss": 0.4654,
      "step": 1031
    },
    {
      "epoch": 0.3097238895558223,
      "grad_norm": 0.3724403977394104,
      "learning_rate": 0.00019817812150636383,
      "loss": 0.4445,
      "step": 1032
    },
    {
      "epoch": 0.3100240096038415,
      "grad_norm": 0.13455694913864136,
      "learning_rate": 0.00019816815556767848,
      "loss": 0.497,
      "step": 1033
    },
    {
      "epoch": 0.31032412965186074,
      "grad_norm": 0.13548237085342407,
      "learning_rate": 0.00019815816269768525,
      "loss": 0.437,
      "step": 1034
    },
    {
      "epoch": 0.31062424969987995,
      "grad_norm": 0.13410355150699615,
      "learning_rate": 0.00019814814289912565,
      "loss": 0.4331,
      "step": 1035
    },
    {
      "epoch": 0.31092436974789917,
      "grad_norm": 0.12189717590808868,
      "learning_rate": 0.00019813809617474844,
      "loss": 0.4196,
      "step": 1036
    },
    {
      "epoch": 0.3112244897959184,
      "grad_norm": 0.13920094072818756,
      "learning_rate": 0.00019812802252730988,
      "loss": 0.541,
      "step": 1037
    },
    {
      "epoch": 0.3115246098439376,
      "grad_norm": 0.13746348023414612,
      "learning_rate": 0.00019811792195957353,
      "loss": 0.4779,
      "step": 1038
    },
    {
      "epoch": 0.3118247298919568,
      "grad_norm": 0.15016810595989227,
      "learning_rate": 0.00019810779447431036,
      "loss": 0.4794,
      "step": 1039
    },
    {
      "epoch": 0.31212484993997597,
      "grad_norm": 0.14637719094753265,
      "learning_rate": 0.00019809764007429874,
      "loss": 0.553,
      "step": 1040
    },
    {
      "epoch": 0.3124249699879952,
      "grad_norm": 0.1545702964067459,
      "learning_rate": 0.0001980874587623244,
      "loss": 0.4634,
      "step": 1041
    },
    {
      "epoch": 0.3127250900360144,
      "grad_norm": 0.13468553125858307,
      "learning_rate": 0.0001980772505411805,
      "loss": 0.5257,
      "step": 1042
    },
    {
      "epoch": 0.3130252100840336,
      "grad_norm": 0.1812884509563446,
      "learning_rate": 0.00019806701541366753,
      "loss": 0.4869,
      "step": 1043
    },
    {
      "epoch": 0.3133253301320528,
      "grad_norm": 0.14042791724205017,
      "learning_rate": 0.00019805675338259335,
      "loss": 0.5218,
      "step": 1044
    },
    {
      "epoch": 0.31362545018007204,
      "grad_norm": 0.13315889239311218,
      "learning_rate": 0.00019804646445077326,
      "loss": 0.4418,
      "step": 1045
    },
    {
      "epoch": 0.31392557022809126,
      "grad_norm": 0.15402880311012268,
      "learning_rate": 0.0001980361486210299,
      "loss": 0.4833,
      "step": 1046
    },
    {
      "epoch": 0.31422569027611047,
      "grad_norm": 0.12961941957473755,
      "learning_rate": 0.00019802580589619334,
      "loss": 0.4996,
      "step": 1047
    },
    {
      "epoch": 0.31452581032412963,
      "grad_norm": 0.4248056411743164,
      "learning_rate": 0.0001980154362791009,
      "loss": 0.4639,
      "step": 1048
    },
    {
      "epoch": 0.31482593037214884,
      "grad_norm": 0.1287326067686081,
      "learning_rate": 0.00019800503977259747,
      "loss": 0.4019,
      "step": 1049
    },
    {
      "epoch": 0.31512605042016806,
      "grad_norm": 0.19719886779785156,
      "learning_rate": 0.00019799461637953517,
      "loss": 0.4509,
      "step": 1050
    },
    {
      "epoch": 0.3154261704681873,
      "grad_norm": 0.13943341374397278,
      "learning_rate": 0.00019798416610277347,
      "loss": 0.5395,
      "step": 1051
    },
    {
      "epoch": 0.3157262905162065,
      "grad_norm": 0.1295875459909439,
      "learning_rate": 0.00019797368894517939,
      "loss": 0.4619,
      "step": 1052
    },
    {
      "epoch": 0.3160264105642257,
      "grad_norm": 0.13652342557907104,
      "learning_rate": 0.00019796318490962716,
      "loss": 0.4586,
      "step": 1053
    },
    {
      "epoch": 0.3163265306122449,
      "grad_norm": 0.14109963178634644,
      "learning_rate": 0.00019795265399899842,
      "loss": 0.5335,
      "step": 1054
    },
    {
      "epoch": 0.31662665066026413,
      "grad_norm": 0.13764730095863342,
      "learning_rate": 0.0001979420962161823,
      "loss": 0.4758,
      "step": 1055
    },
    {
      "epoch": 0.3169267707082833,
      "grad_norm": 0.14121860265731812,
      "learning_rate": 0.0001979315115640751,
      "loss": 0.454,
      "step": 1056
    },
    {
      "epoch": 0.3172268907563025,
      "grad_norm": 0.12700317800045013,
      "learning_rate": 0.00019792090004558066,
      "loss": 0.4472,
      "step": 1057
    },
    {
      "epoch": 0.3175270108043217,
      "grad_norm": 0.14510038495063782,
      "learning_rate": 0.0001979102616636101,
      "loss": 0.5273,
      "step": 1058
    },
    {
      "epoch": 0.31782713085234093,
      "grad_norm": 0.1315007358789444,
      "learning_rate": 0.00019789959642108195,
      "loss": 0.4757,
      "step": 1059
    },
    {
      "epoch": 0.31812725090036015,
      "grad_norm": 0.14424051344394684,
      "learning_rate": 0.00019788890432092211,
      "loss": 0.4913,
      "step": 1060
    },
    {
      "epoch": 0.31842737094837936,
      "grad_norm": 0.12802337110042572,
      "learning_rate": 0.0001978781853660638,
      "loss": 0.4166,
      "step": 1061
    },
    {
      "epoch": 0.3187274909963986,
      "grad_norm": 0.13996674120426178,
      "learning_rate": 0.00019786743955944769,
      "loss": 0.476,
      "step": 1062
    },
    {
      "epoch": 0.3190276110444178,
      "grad_norm": 0.18760690093040466,
      "learning_rate": 0.00019785666690402175,
      "loss": 0.4954,
      "step": 1063
    },
    {
      "epoch": 0.31932773109243695,
      "grad_norm": 0.13369061052799225,
      "learning_rate": 0.00019784586740274128,
      "loss": 0.4986,
      "step": 1064
    },
    {
      "epoch": 0.31962785114045617,
      "grad_norm": 0.12482760846614838,
      "learning_rate": 0.00019783504105856908,
      "loss": 0.4301,
      "step": 1065
    },
    {
      "epoch": 0.3199279711884754,
      "grad_norm": 0.13145141303539276,
      "learning_rate": 0.00019782418787447518,
      "loss": 0.4374,
      "step": 1066
    },
    {
      "epoch": 0.3202280912364946,
      "grad_norm": 0.14006423950195312,
      "learning_rate": 0.00019781330785343705,
      "loss": 0.5104,
      "step": 1067
    },
    {
      "epoch": 0.3205282112845138,
      "grad_norm": 0.14591243863105774,
      "learning_rate": 0.00019780240099843952,
      "loss": 0.4999,
      "step": 1068
    },
    {
      "epoch": 0.320828331332533,
      "grad_norm": 0.1316094547510147,
      "learning_rate": 0.0001977914673124747,
      "loss": 0.4804,
      "step": 1069
    },
    {
      "epoch": 0.32112845138055224,
      "grad_norm": 0.13612405955791473,
      "learning_rate": 0.0001977805067985422,
      "loss": 0.5368,
      "step": 1070
    },
    {
      "epoch": 0.32142857142857145,
      "grad_norm": 0.1620555967092514,
      "learning_rate": 0.0001977695194596488,
      "loss": 0.4883,
      "step": 1071
    },
    {
      "epoch": 0.3217286914765906,
      "grad_norm": 0.13244867324829102,
      "learning_rate": 0.00019775850529880887,
      "loss": 0.4626,
      "step": 1072
    },
    {
      "epoch": 0.3220288115246098,
      "grad_norm": 0.1393079161643982,
      "learning_rate": 0.00019774746431904395,
      "loss": 0.4577,
      "step": 1073
    },
    {
      "epoch": 0.32232893157262904,
      "grad_norm": 0.1493871957063675,
      "learning_rate": 0.00019773639652338306,
      "loss": 0.4419,
      "step": 1074
    },
    {
      "epoch": 0.32262905162064826,
      "grad_norm": 0.14268308877944946,
      "learning_rate": 0.00019772530191486244,
      "loss": 0.482,
      "step": 1075
    },
    {
      "epoch": 0.32292917166866747,
      "grad_norm": 0.12896285951137543,
      "learning_rate": 0.00019771418049652586,
      "loss": 0.4488,
      "step": 1076
    },
    {
      "epoch": 0.3232292917166867,
      "grad_norm": 0.1597939133644104,
      "learning_rate": 0.00019770303227142425,
      "loss": 0.5328,
      "step": 1077
    },
    {
      "epoch": 0.3235294117647059,
      "grad_norm": 0.12893250584602356,
      "learning_rate": 0.00019769185724261611,
      "loss": 0.4659,
      "step": 1078
    },
    {
      "epoch": 0.3238295318127251,
      "grad_norm": 0.14060784876346588,
      "learning_rate": 0.00019768065541316712,
      "loss": 0.4903,
      "step": 1079
    },
    {
      "epoch": 0.3241296518607443,
      "grad_norm": 0.2835310995578766,
      "learning_rate": 0.00019766942678615035,
      "loss": 0.4688,
      "step": 1080
    },
    {
      "epoch": 0.3244297719087635,
      "grad_norm": 0.12974952161312103,
      "learning_rate": 0.0001976581713646463,
      "loss": 0.4589,
      "step": 1081
    },
    {
      "epoch": 0.3247298919567827,
      "grad_norm": 0.1387726068496704,
      "learning_rate": 0.00019764688915174274,
      "loss": 0.4869,
      "step": 1082
    },
    {
      "epoch": 0.3250300120048019,
      "grad_norm": 0.1310199350118637,
      "learning_rate": 0.00019763558015053483,
      "loss": 0.4745,
      "step": 1083
    },
    {
      "epoch": 0.32533013205282113,
      "grad_norm": 0.12792225182056427,
      "learning_rate": 0.00019762424436412502,
      "loss": 0.444,
      "step": 1084
    },
    {
      "epoch": 0.32563025210084034,
      "grad_norm": 0.12761080265045166,
      "learning_rate": 0.0001976128817956232,
      "loss": 0.4397,
      "step": 1085
    },
    {
      "epoch": 0.32593037214885956,
      "grad_norm": 0.13653092086315155,
      "learning_rate": 0.00019760149244814655,
      "loss": 0.4884,
      "step": 1086
    },
    {
      "epoch": 0.3262304921968788,
      "grad_norm": 0.14195531606674194,
      "learning_rate": 0.0001975900763248196,
      "loss": 0.4732,
      "step": 1087
    },
    {
      "epoch": 0.32653061224489793,
      "grad_norm": 0.13525669276714325,
      "learning_rate": 0.00019757863342877423,
      "loss": 0.4426,
      "step": 1088
    },
    {
      "epoch": 0.32683073229291715,
      "grad_norm": 0.12862493097782135,
      "learning_rate": 0.0001975671637631497,
      "loss": 0.4438,
      "step": 1089
    },
    {
      "epoch": 0.32713085234093636,
      "grad_norm": 0.1356307566165924,
      "learning_rate": 0.00019755566733109251,
      "loss": 0.4471,
      "step": 1090
    },
    {
      "epoch": 0.3274309723889556,
      "grad_norm": 0.14373083412647247,
      "learning_rate": 0.00019754414413575665,
      "loss": 0.488,
      "step": 1091
    },
    {
      "epoch": 0.3277310924369748,
      "grad_norm": 0.2767897844314575,
      "learning_rate": 0.00019753259418030334,
      "loss": 0.4432,
      "step": 1092
    },
    {
      "epoch": 0.328031212484994,
      "grad_norm": 0.1524336040019989,
      "learning_rate": 0.00019752101746790118,
      "loss": 0.491,
      "step": 1093
    },
    {
      "epoch": 0.3283313325330132,
      "grad_norm": 0.15589389204978943,
      "learning_rate": 0.0001975094140017261,
      "loss": 0.5193,
      "step": 1094
    },
    {
      "epoch": 0.32863145258103243,
      "grad_norm": 0.12623004615306854,
      "learning_rate": 0.00019749778378496142,
      "loss": 0.4119,
      "step": 1095
    },
    {
      "epoch": 0.3289315726290516,
      "grad_norm": 0.1387036293745041,
      "learning_rate": 0.0001974861268207977,
      "loss": 0.4313,
      "step": 1096
    },
    {
      "epoch": 0.3292316926770708,
      "grad_norm": 0.14449627697467804,
      "learning_rate": 0.0001974744431124329,
      "loss": 0.5194,
      "step": 1097
    },
    {
      "epoch": 0.32953181272509,
      "grad_norm": 0.1603599190711975,
      "learning_rate": 0.00019746273266307238,
      "loss": 0.4854,
      "step": 1098
    },
    {
      "epoch": 0.32983193277310924,
      "grad_norm": 0.13858915865421295,
      "learning_rate": 0.00019745099547592866,
      "loss": 0.5116,
      "step": 1099
    },
    {
      "epoch": 0.33013205282112845,
      "grad_norm": 0.13826991617679596,
      "learning_rate": 0.0001974392315542218,
      "loss": 0.4898,
      "step": 1100
    },
    {
      "epoch": 0.33043217286914767,
      "grad_norm": 0.2203083485364914,
      "learning_rate": 0.00019742744090117906,
      "loss": 0.475,
      "step": 1101
    },
    {
      "epoch": 0.3307322929171669,
      "grad_norm": 0.1723625510931015,
      "learning_rate": 0.00019741562352003508,
      "loss": 0.4794,
      "step": 1102
    },
    {
      "epoch": 0.3310324129651861,
      "grad_norm": 0.16649343073368073,
      "learning_rate": 0.0001974037794140318,
      "loss": 0.5317,
      "step": 1103
    },
    {
      "epoch": 0.33133253301320525,
      "grad_norm": 0.1325429528951645,
      "learning_rate": 0.00019739190858641853,
      "loss": 0.4915,
      "step": 1104
    },
    {
      "epoch": 0.33163265306122447,
      "grad_norm": 0.28730836510658264,
      "learning_rate": 0.00019738001104045185,
      "loss": 0.4688,
      "step": 1105
    },
    {
      "epoch": 0.3319327731092437,
      "grad_norm": 0.14861632883548737,
      "learning_rate": 0.0001973680867793958,
      "loss": 0.455,
      "step": 1106
    },
    {
      "epoch": 0.3322328931572629,
      "grad_norm": 0.20707793533802032,
      "learning_rate": 0.00019735613580652159,
      "loss": 0.4811,
      "step": 1107
    },
    {
      "epoch": 0.3325330132052821,
      "grad_norm": 0.16124333441257477,
      "learning_rate": 0.0001973441581251079,
      "loss": 0.5491,
      "step": 1108
    },
    {
      "epoch": 0.3328331332533013,
      "grad_norm": 0.14531394839286804,
      "learning_rate": 0.00019733215373844064,
      "loss": 0.4206,
      "step": 1109
    },
    {
      "epoch": 0.33313325330132054,
      "grad_norm": 0.12873904407024384,
      "learning_rate": 0.000197320122649813,
      "loss": 0.4752,
      "step": 1110
    },
    {
      "epoch": 0.33343337334933976,
      "grad_norm": 0.13601170480251312,
      "learning_rate": 0.0001973080648625257,
      "loss": 0.5011,
      "step": 1111
    },
    {
      "epoch": 0.33373349339735897,
      "grad_norm": 0.15537546575069427,
      "learning_rate": 0.00019729598037988662,
      "loss": 0.4942,
      "step": 1112
    },
    {
      "epoch": 0.33403361344537813,
      "grad_norm": 0.14289778470993042,
      "learning_rate": 0.0001972838692052109,
      "loss": 0.5334,
      "step": 1113
    },
    {
      "epoch": 0.33433373349339734,
      "grad_norm": 0.14202508330345154,
      "learning_rate": 0.00019727173134182123,
      "loss": 0.5125,
      "step": 1114
    },
    {
      "epoch": 0.33463385354141656,
      "grad_norm": 0.15394620597362518,
      "learning_rate": 0.00019725956679304742,
      "loss": 0.5323,
      "step": 1115
    },
    {
      "epoch": 0.3349339735894358,
      "grad_norm": 0.15147684514522552,
      "learning_rate": 0.00019724737556222672,
      "loss": 0.4669,
      "step": 1116
    },
    {
      "epoch": 0.335234093637455,
      "grad_norm": 0.13286633789539337,
      "learning_rate": 0.0001972351576527036,
      "loss": 0.4936,
      "step": 1117
    },
    {
      "epoch": 0.3355342136854742,
      "grad_norm": 0.14134052395820618,
      "learning_rate": 0.00019722291306782997,
      "loss": 0.5268,
      "step": 1118
    },
    {
      "epoch": 0.3358343337334934,
      "grad_norm": 0.8179460167884827,
      "learning_rate": 0.00019721064181096493,
      "loss": 0.5263,
      "step": 1119
    },
    {
      "epoch": 0.33613445378151263,
      "grad_norm": 0.1837950497865677,
      "learning_rate": 0.000197198343885475,
      "loss": 0.4431,
      "step": 1120
    },
    {
      "epoch": 0.3364345738295318,
      "grad_norm": 0.13986214995384216,
      "learning_rate": 0.00019718601929473393,
      "loss": 0.4894,
      "step": 1121
    },
    {
      "epoch": 0.336734693877551,
      "grad_norm": 0.15578554570674896,
      "learning_rate": 0.00019717366804212287,
      "loss": 0.4619,
      "step": 1122
    },
    {
      "epoch": 0.3370348139255702,
      "grad_norm": 0.15266619622707367,
      "learning_rate": 0.00019716129013103024,
      "loss": 0.4898,
      "step": 1123
    },
    {
      "epoch": 0.33733493397358943,
      "grad_norm": 0.17838340997695923,
      "learning_rate": 0.00019714888556485177,
      "loss": 0.5273,
      "step": 1124
    },
    {
      "epoch": 0.33763505402160865,
      "grad_norm": 0.1382509022951126,
      "learning_rate": 0.0001971364543469905,
      "loss": 0.4752,
      "step": 1125
    },
    {
      "epoch": 0.33793517406962786,
      "grad_norm": 0.1678454428911209,
      "learning_rate": 0.0001971239964808568,
      "loss": 0.4108,
      "step": 1126
    },
    {
      "epoch": 0.3382352941176471,
      "grad_norm": 0.14172542095184326,
      "learning_rate": 0.00019711151196986836,
      "loss": 0.433,
      "step": 1127
    },
    {
      "epoch": 0.3385354141656663,
      "grad_norm": 0.1336435079574585,
      "learning_rate": 0.00019709900081745014,
      "loss": 0.4269,
      "step": 1128
    },
    {
      "epoch": 0.33883553421368545,
      "grad_norm": 0.1344050168991089,
      "learning_rate": 0.00019708646302703446,
      "loss": 0.4533,
      "step": 1129
    },
    {
      "epoch": 0.33913565426170467,
      "grad_norm": 0.1460920125246048,
      "learning_rate": 0.00019707389860206087,
      "loss": 0.4229,
      "step": 1130
    },
    {
      "epoch": 0.3394357743097239,
      "grad_norm": 0.13166992366313934,
      "learning_rate": 0.00019706130754597632,
      "loss": 0.4411,
      "step": 1131
    },
    {
      "epoch": 0.3397358943577431,
      "grad_norm": 0.20385001599788666,
      "learning_rate": 0.000197048689862235,
      "loss": 0.448,
      "step": 1132
    },
    {
      "epoch": 0.3400360144057623,
      "grad_norm": 0.15308980643749237,
      "learning_rate": 0.00019703604555429844,
      "loss": 0.4357,
      "step": 1133
    },
    {
      "epoch": 0.3403361344537815,
      "grad_norm": 0.14577004313468933,
      "learning_rate": 0.00019702337462563545,
      "loss": 0.444,
      "step": 1134
    },
    {
      "epoch": 0.34063625450180074,
      "grad_norm": 0.1675492376089096,
      "learning_rate": 0.00019701067707972216,
      "loss": 0.4727,
      "step": 1135
    },
    {
      "epoch": 0.34093637454981995,
      "grad_norm": 0.1404566764831543,
      "learning_rate": 0.000196997952920042,
      "loss": 0.4681,
      "step": 1136
    },
    {
      "epoch": 0.3412364945978391,
      "grad_norm": 0.18300069868564606,
      "learning_rate": 0.00019698520215008568,
      "loss": 0.4652,
      "step": 1137
    },
    {
      "epoch": 0.3415366146458583,
      "grad_norm": 0.14181415736675262,
      "learning_rate": 0.00019697242477335127,
      "loss": 0.4962,
      "step": 1138
    },
    {
      "epoch": 0.34183673469387754,
      "grad_norm": 0.13853693008422852,
      "learning_rate": 0.00019695962079334405,
      "loss": 0.4491,
      "step": 1139
    },
    {
      "epoch": 0.34213685474189676,
      "grad_norm": 0.14697465300559998,
      "learning_rate": 0.00019694679021357666,
      "loss": 0.4479,
      "step": 1140
    },
    {
      "epoch": 0.34243697478991597,
      "grad_norm": 0.18280468881130219,
      "learning_rate": 0.000196933933037569,
      "loss": 0.4262,
      "step": 1141
    },
    {
      "epoch": 0.3427370948379352,
      "grad_norm": 0.1395919919013977,
      "learning_rate": 0.00019692104926884833,
      "loss": 0.4608,
      "step": 1142
    },
    {
      "epoch": 0.3430372148859544,
      "grad_norm": 0.13262638449668884,
      "learning_rate": 0.00019690813891094916,
      "loss": 0.4743,
      "step": 1143
    },
    {
      "epoch": 0.3433373349339736,
      "grad_norm": 0.13647164404392242,
      "learning_rate": 0.00019689520196741324,
      "loss": 0.4362,
      "step": 1144
    },
    {
      "epoch": 0.3436374549819928,
      "grad_norm": 0.13809150457382202,
      "learning_rate": 0.00019688223844178975,
      "loss": 0.4411,
      "step": 1145
    },
    {
      "epoch": 0.343937575030012,
      "grad_norm": 0.20118433237075806,
      "learning_rate": 0.00019686924833763506,
      "loss": 0.5154,
      "step": 1146
    },
    {
      "epoch": 0.3442376950780312,
      "grad_norm": 0.14372298121452332,
      "learning_rate": 0.00019685623165851285,
      "loss": 0.5373,
      "step": 1147
    },
    {
      "epoch": 0.3445378151260504,
      "grad_norm": 0.4217036962509155,
      "learning_rate": 0.00019684318840799408,
      "loss": 0.4773,
      "step": 1148
    },
    {
      "epoch": 0.34483793517406963,
      "grad_norm": 0.14791075885295868,
      "learning_rate": 0.00019683011858965703,
      "loss": 0.5459,
      "step": 1149
    },
    {
      "epoch": 0.34513805522208885,
      "grad_norm": 0.1378076672554016,
      "learning_rate": 0.00019681702220708725,
      "loss": 0.4692,
      "step": 1150
    },
    {
      "epoch": 0.34543817527010806,
      "grad_norm": 0.1290854811668396,
      "learning_rate": 0.00019680389926387762,
      "loss": 0.4516,
      "step": 1151
    },
    {
      "epoch": 0.3457382953181273,
      "grad_norm": 0.14721539616584778,
      "learning_rate": 0.0001967907497636282,
      "loss": 0.4897,
      "step": 1152
    },
    {
      "epoch": 0.34603841536614643,
      "grad_norm": 0.1549588143825531,
      "learning_rate": 0.00019677757370994647,
      "loss": 0.5171,
      "step": 1153
    },
    {
      "epoch": 0.34633853541416565,
      "grad_norm": 0.13510900735855103,
      "learning_rate": 0.00019676437110644707,
      "loss": 0.4619,
      "step": 1154
    },
    {
      "epoch": 0.34663865546218486,
      "grad_norm": 0.16830456256866455,
      "learning_rate": 0.00019675114195675205,
      "loss": 0.4727,
      "step": 1155
    },
    {
      "epoch": 0.3469387755102041,
      "grad_norm": 0.1219051405787468,
      "learning_rate": 0.00019673788626449064,
      "loss": 0.4297,
      "step": 1156
    },
    {
      "epoch": 0.3472388955582233,
      "grad_norm": 0.13119164109230042,
      "learning_rate": 0.00019672460403329935,
      "loss": 0.4646,
      "step": 1157
    },
    {
      "epoch": 0.3475390156062425,
      "grad_norm": 0.13671201467514038,
      "learning_rate": 0.00019671129526682205,
      "loss": 0.457,
      "step": 1158
    },
    {
      "epoch": 0.3478391356542617,
      "grad_norm": 0.1703466922044754,
      "learning_rate": 0.00019669795996870987,
      "loss": 0.5045,
      "step": 1159
    },
    {
      "epoch": 0.34813925570228094,
      "grad_norm": 0.18858648836612701,
      "learning_rate": 0.00019668459814262116,
      "loss": 0.4698,
      "step": 1160
    },
    {
      "epoch": 0.3484393757503001,
      "grad_norm": 0.13836561143398285,
      "learning_rate": 0.0001966712097922216,
      "loss": 0.4736,
      "step": 1161
    },
    {
      "epoch": 0.3487394957983193,
      "grad_norm": 0.15764808654785156,
      "learning_rate": 0.0001966577949211841,
      "loss": 0.4802,
      "step": 1162
    },
    {
      "epoch": 0.3490396158463385,
      "grad_norm": 0.1542765349149704,
      "learning_rate": 0.0001966443535331889,
      "loss": 0.4649,
      "step": 1163
    },
    {
      "epoch": 0.34933973589435774,
      "grad_norm": 0.13806657493114471,
      "learning_rate": 0.00019663088563192348,
      "loss": 0.4928,
      "step": 1164
    },
    {
      "epoch": 0.34963985594237695,
      "grad_norm": 0.2369850128889084,
      "learning_rate": 0.00019661739122108263,
      "loss": 0.525,
      "step": 1165
    },
    {
      "epoch": 0.34993997599039617,
      "grad_norm": 0.15411926805973053,
      "learning_rate": 0.00019660387030436837,
      "loss": 0.5242,
      "step": 1166
    },
    {
      "epoch": 0.3502400960384154,
      "grad_norm": 0.12959231436252594,
      "learning_rate": 0.00019659032288549003,
      "loss": 0.4718,
      "step": 1167
    },
    {
      "epoch": 0.3505402160864346,
      "grad_norm": 0.1349993348121643,
      "learning_rate": 0.00019657674896816414,
      "loss": 0.4333,
      "step": 1168
    },
    {
      "epoch": 0.35084033613445376,
      "grad_norm": 0.15307295322418213,
      "learning_rate": 0.00019656314855611456,
      "loss": 0.473,
      "step": 1169
    },
    {
      "epoch": 0.35114045618247297,
      "grad_norm": 0.13697105646133423,
      "learning_rate": 0.00019654952165307245,
      "loss": 0.4935,
      "step": 1170
    },
    {
      "epoch": 0.3514405762304922,
      "grad_norm": 0.14775574207305908,
      "learning_rate": 0.00019653586826277617,
      "loss": 0.5187,
      "step": 1171
    },
    {
      "epoch": 0.3517406962785114,
      "grad_norm": 0.135857954621315,
      "learning_rate": 0.00019652218838897136,
      "loss": 0.4352,
      "step": 1172
    },
    {
      "epoch": 0.3520408163265306,
      "grad_norm": 0.13962113857269287,
      "learning_rate": 0.00019650848203541093,
      "loss": 0.4965,
      "step": 1173
    },
    {
      "epoch": 0.35234093637454983,
      "grad_norm": 0.14366528391838074,
      "learning_rate": 0.00019649474920585512,
      "loss": 0.5181,
      "step": 1174
    },
    {
      "epoch": 0.35264105642256904,
      "grad_norm": 0.1447860300540924,
      "learning_rate": 0.0001964809899040713,
      "loss": 0.5149,
      "step": 1175
    },
    {
      "epoch": 0.35294117647058826,
      "grad_norm": 0.13299641013145447,
      "learning_rate": 0.0001964672041338342,
      "loss": 0.4445,
      "step": 1176
    },
    {
      "epoch": 0.3532412965186074,
      "grad_norm": 0.1375095099210739,
      "learning_rate": 0.0001964533918989258,
      "loss": 0.4906,
      "step": 1177
    },
    {
      "epoch": 0.35354141656662663,
      "grad_norm": 0.14751577377319336,
      "learning_rate": 0.00019643955320313534,
      "loss": 0.4753,
      "step": 1178
    },
    {
      "epoch": 0.35384153661464585,
      "grad_norm": 0.16620895266532898,
      "learning_rate": 0.00019642568805025928,
      "loss": 0.482,
      "step": 1179
    },
    {
      "epoch": 0.35414165666266506,
      "grad_norm": 0.1296658217906952,
      "learning_rate": 0.00019641179644410136,
      "loss": 0.4524,
      "step": 1180
    },
    {
      "epoch": 0.3544417767106843,
      "grad_norm": 0.24293987452983856,
      "learning_rate": 0.00019639787838847265,
      "loss": 0.5122,
      "step": 1181
    },
    {
      "epoch": 0.3547418967587035,
      "grad_norm": 0.1454843431711197,
      "learning_rate": 0.00019638393388719133,
      "loss": 0.5021,
      "step": 1182
    },
    {
      "epoch": 0.3550420168067227,
      "grad_norm": 0.13670776784420013,
      "learning_rate": 0.00019636996294408293,
      "loss": 0.4903,
      "step": 1183
    },
    {
      "epoch": 0.3553421368547419,
      "grad_norm": 0.14824290573596954,
      "learning_rate": 0.00019635596556298024,
      "loss": 0.5182,
      "step": 1184
    },
    {
      "epoch": 0.3556422569027611,
      "grad_norm": 0.1408873349428177,
      "learning_rate": 0.00019634194174772326,
      "loss": 0.4947,
      "step": 1185
    },
    {
      "epoch": 0.3559423769507803,
      "grad_norm": 0.12988775968551636,
      "learning_rate": 0.00019632789150215928,
      "loss": 0.4538,
      "step": 1186
    },
    {
      "epoch": 0.3562424969987995,
      "grad_norm": 0.148466095328331,
      "learning_rate": 0.00019631381483014283,
      "loss": 0.507,
      "step": 1187
    },
    {
      "epoch": 0.3565426170468187,
      "grad_norm": 0.13022229075431824,
      "learning_rate": 0.00019629971173553567,
      "loss": 0.4291,
      "step": 1188
    },
    {
      "epoch": 0.35684273709483794,
      "grad_norm": 0.15934327244758606,
      "learning_rate": 0.00019628558222220682,
      "loss": 0.502,
      "step": 1189
    },
    {
      "epoch": 0.35714285714285715,
      "grad_norm": 0.1747078150510788,
      "learning_rate": 0.00019627142629403258,
      "loss": 0.4984,
      "step": 1190
    },
    {
      "epoch": 0.35744297719087637,
      "grad_norm": 0.7439995408058167,
      "learning_rate": 0.0001962572439548964,
      "loss": 0.4476,
      "step": 1191
    },
    {
      "epoch": 0.3577430972388956,
      "grad_norm": 0.1325507014989853,
      "learning_rate": 0.0001962430352086891,
      "loss": 0.4926,
      "step": 1192
    },
    {
      "epoch": 0.35804321728691474,
      "grad_norm": 0.14991387724876404,
      "learning_rate": 0.00019622880005930866,
      "loss": 0.5043,
      "step": 1193
    },
    {
      "epoch": 0.35834333733493395,
      "grad_norm": 0.1313386708498001,
      "learning_rate": 0.00019621453851066036,
      "loss": 0.4202,
      "step": 1194
    },
    {
      "epoch": 0.35864345738295317,
      "grad_norm": 0.13692636787891388,
      "learning_rate": 0.00019620025056665664,
      "loss": 0.488,
      "step": 1195
    },
    {
      "epoch": 0.3589435774309724,
      "grad_norm": 0.13470913469791412,
      "learning_rate": 0.00019618593623121731,
      "loss": 0.4661,
      "step": 1196
    },
    {
      "epoch": 0.3592436974789916,
      "grad_norm": 0.16752992570400238,
      "learning_rate": 0.00019617159550826927,
      "loss": 0.475,
      "step": 1197
    },
    {
      "epoch": 0.3595438175270108,
      "grad_norm": 0.15151073038578033,
      "learning_rate": 0.00019615722840174676,
      "loss": 0.4865,
      "step": 1198
    },
    {
      "epoch": 0.35984393757503,
      "grad_norm": 0.14800961315631866,
      "learning_rate": 0.00019614283491559123,
      "loss": 0.4617,
      "step": 1199
    },
    {
      "epoch": 0.36014405762304924,
      "grad_norm": 0.25916588306427,
      "learning_rate": 0.00019612841505375138,
      "loss": 0.3924,
      "step": 1200
    },
    {
      "epoch": 0.3604441776710684,
      "grad_norm": 0.13493013381958008,
      "learning_rate": 0.00019611396882018313,
      "loss": 0.4551,
      "step": 1201
    },
    {
      "epoch": 0.3607442977190876,
      "grad_norm": 0.1315002739429474,
      "learning_rate": 0.00019609949621884966,
      "loss": 0.4402,
      "step": 1202
    },
    {
      "epoch": 0.36104441776710683,
      "grad_norm": 0.14526283740997314,
      "learning_rate": 0.00019608499725372127,
      "loss": 0.4859,
      "step": 1203
    },
    {
      "epoch": 0.36134453781512604,
      "grad_norm": 0.15128463506698608,
      "learning_rate": 0.0001960704719287757,
      "loss": 0.4719,
      "step": 1204
    },
    {
      "epoch": 0.36164465786314526,
      "grad_norm": 0.13079828023910522,
      "learning_rate": 0.00019605592024799772,
      "loss": 0.4258,
      "step": 1205
    },
    {
      "epoch": 0.3619447779111645,
      "grad_norm": 0.14006419479846954,
      "learning_rate": 0.0001960413422153795,
      "loss": 0.478,
      "step": 1206
    },
    {
      "epoch": 0.3622448979591837,
      "grad_norm": 0.2214106172323227,
      "learning_rate": 0.00019602673783492027,
      "loss": 0.4962,
      "step": 1207
    },
    {
      "epoch": 0.3625450180072029,
      "grad_norm": 0.14224013686180115,
      "learning_rate": 0.00019601210711062662,
      "loss": 0.5082,
      "step": 1208
    },
    {
      "epoch": 0.3628451380552221,
      "grad_norm": 0.17071548104286194,
      "learning_rate": 0.00019599745004651235,
      "loss": 0.4933,
      "step": 1209
    },
    {
      "epoch": 0.3631452581032413,
      "grad_norm": 0.14860154688358307,
      "learning_rate": 0.0001959827666465984,
      "loss": 0.4828,
      "step": 1210
    },
    {
      "epoch": 0.3634453781512605,
      "grad_norm": 0.15933696925640106,
      "learning_rate": 0.000195968056914913,
      "loss": 0.4377,
      "step": 1211
    },
    {
      "epoch": 0.3637454981992797,
      "grad_norm": 0.20036178827285767,
      "learning_rate": 0.00019595332085549163,
      "loss": 0.5222,
      "step": 1212
    },
    {
      "epoch": 0.3640456182472989,
      "grad_norm": 0.15371794998645782,
      "learning_rate": 0.00019593855847237693,
      "loss": 0.4734,
      "step": 1213
    },
    {
      "epoch": 0.36434573829531813,
      "grad_norm": 0.14381040632724762,
      "learning_rate": 0.0001959237697696188,
      "loss": 0.4294,
      "step": 1214
    },
    {
      "epoch": 0.36464585834333735,
      "grad_norm": 0.1590506136417389,
      "learning_rate": 0.00019590895475127436,
      "loss": 0.4526,
      "step": 1215
    },
    {
      "epoch": 0.36494597839135656,
      "grad_norm": 0.1287468820810318,
      "learning_rate": 0.00019589411342140793,
      "loss": 0.4134,
      "step": 1216
    },
    {
      "epoch": 0.3652460984393758,
      "grad_norm": 0.1266324520111084,
      "learning_rate": 0.00019587924578409104,
      "loss": 0.3931,
      "step": 1217
    },
    {
      "epoch": 0.36554621848739494,
      "grad_norm": 0.13392604887485504,
      "learning_rate": 0.0001958643518434025,
      "loss": 0.4429,
      "step": 1218
    },
    {
      "epoch": 0.36584633853541415,
      "grad_norm": 0.14387540519237518,
      "learning_rate": 0.0001958494316034283,
      "loss": 0.4916,
      "step": 1219
    },
    {
      "epoch": 0.36614645858343337,
      "grad_norm": 0.14918778836727142,
      "learning_rate": 0.00019583448506826155,
      "loss": 0.4862,
      "step": 1220
    },
    {
      "epoch": 0.3664465786314526,
      "grad_norm": 0.14455977082252502,
      "learning_rate": 0.00019581951224200274,
      "loss": 0.4743,
      "step": 1221
    },
    {
      "epoch": 0.3667466986794718,
      "grad_norm": 0.1635429561138153,
      "learning_rate": 0.00019580451312875945,
      "loss": 0.4825,
      "step": 1222
    },
    {
      "epoch": 0.367046818727491,
      "grad_norm": 0.1514321118593216,
      "learning_rate": 0.00019578948773264657,
      "loss": 0.465,
      "step": 1223
    },
    {
      "epoch": 0.3673469387755102,
      "grad_norm": 0.13055840134620667,
      "learning_rate": 0.00019577443605778606,
      "loss": 0.4603,
      "step": 1224
    },
    {
      "epoch": 0.36764705882352944,
      "grad_norm": 0.14448609948158264,
      "learning_rate": 0.00019575935810830724,
      "loss": 0.4908,
      "step": 1225
    },
    {
      "epoch": 0.3679471788715486,
      "grad_norm": 0.162399023771286,
      "learning_rate": 0.00019574425388834657,
      "loss": 0.4625,
      "step": 1226
    },
    {
      "epoch": 0.3682472989195678,
      "grad_norm": 0.2367607057094574,
      "learning_rate": 0.00019572912340204773,
      "loss": 0.499,
      "step": 1227
    },
    {
      "epoch": 0.368547418967587,
      "grad_norm": 0.14852334558963776,
      "learning_rate": 0.00019571396665356153,
      "loss": 0.4791,
      "step": 1228
    },
    {
      "epoch": 0.36884753901560624,
      "grad_norm": 0.14649629592895508,
      "learning_rate": 0.00019569878364704613,
      "loss": 0.4526,
      "step": 1229
    },
    {
      "epoch": 0.36914765906362546,
      "grad_norm": 0.14013509452342987,
      "learning_rate": 0.00019568357438666675,
      "loss": 0.4865,
      "step": 1230
    },
    {
      "epoch": 0.36944777911164467,
      "grad_norm": 0.13371364772319794,
      "learning_rate": 0.0001956683388765959,
      "loss": 0.4612,
      "step": 1231
    },
    {
      "epoch": 0.3697478991596639,
      "grad_norm": 0.1385035365819931,
      "learning_rate": 0.00019565307712101325,
      "loss": 0.456,
      "step": 1232
    },
    {
      "epoch": 0.3700480192076831,
      "grad_norm": 0.1443071961402893,
      "learning_rate": 0.00019563778912410574,
      "loss": 0.5028,
      "step": 1233
    },
    {
      "epoch": 0.37034813925570226,
      "grad_norm": 0.18161094188690186,
      "learning_rate": 0.00019562247489006738,
      "loss": 0.4309,
      "step": 1234
    },
    {
      "epoch": 0.3706482593037215,
      "grad_norm": 0.21723665297031403,
      "learning_rate": 0.00019560713442309954,
      "loss": 0.4589,
      "step": 1235
    },
    {
      "epoch": 0.3709483793517407,
      "grad_norm": 0.13557744026184082,
      "learning_rate": 0.00019559176772741065,
      "loss": 0.4953,
      "step": 1236
    },
    {
      "epoch": 0.3712484993997599,
      "grad_norm": 0.1500803530216217,
      "learning_rate": 0.0001955763748072164,
      "loss": 0.542,
      "step": 1237
    },
    {
      "epoch": 0.3715486194477791,
      "grad_norm": 0.1344929188489914,
      "learning_rate": 0.00019556095566673962,
      "loss": 0.5093,
      "step": 1238
    },
    {
      "epoch": 0.37184873949579833,
      "grad_norm": 0.15922845900058746,
      "learning_rate": 0.00019554551031021044,
      "loss": 0.4529,
      "step": 1239
    },
    {
      "epoch": 0.37214885954381755,
      "grad_norm": 0.1892634481191635,
      "learning_rate": 0.00019553003874186607,
      "loss": 0.4942,
      "step": 1240
    },
    {
      "epoch": 0.37244897959183676,
      "grad_norm": 0.16977249085903168,
      "learning_rate": 0.00019551454096595097,
      "loss": 0.4628,
      "step": 1241
    },
    {
      "epoch": 0.3727490996398559,
      "grad_norm": 0.1330426037311554,
      "learning_rate": 0.00019549901698671685,
      "loss": 0.4938,
      "step": 1242
    },
    {
      "epoch": 0.37304921968787513,
      "grad_norm": 0.12905094027519226,
      "learning_rate": 0.0001954834668084224,
      "loss": 0.3876,
      "step": 1243
    },
    {
      "epoch": 0.37334933973589435,
      "grad_norm": 0.1504504233598709,
      "learning_rate": 0.0001954678904353337,
      "loss": 0.538,
      "step": 1244
    },
    {
      "epoch": 0.37364945978391356,
      "grad_norm": 0.1254984438419342,
      "learning_rate": 0.00019545228787172397,
      "loss": 0.3784,
      "step": 1245
    },
    {
      "epoch": 0.3739495798319328,
      "grad_norm": 0.1318364143371582,
      "learning_rate": 0.00019543665912187357,
      "loss": 0.4715,
      "step": 1246
    },
    {
      "epoch": 0.374249699879952,
      "grad_norm": 0.15896952152252197,
      "learning_rate": 0.00019542100419007007,
      "loss": 0.4601,
      "step": 1247
    },
    {
      "epoch": 0.3745498199279712,
      "grad_norm": 0.14802490174770355,
      "learning_rate": 0.00019540532308060825,
      "loss": 0.4695,
      "step": 1248
    },
    {
      "epoch": 0.3748499399759904,
      "grad_norm": 0.14402136206626892,
      "learning_rate": 0.00019538961579778998,
      "loss": 0.4744,
      "step": 1249
    },
    {
      "epoch": 0.3751500600240096,
      "grad_norm": 0.18700093030929565,
      "learning_rate": 0.00019537388234592442,
      "loss": 0.5243,
      "step": 1250
    },
    {
      "epoch": 0.3754501800720288,
      "grad_norm": 0.13441616296768188,
      "learning_rate": 0.00019535812272932786,
      "loss": 0.4646,
      "step": 1251
    },
    {
      "epoch": 0.375750300120048,
      "grad_norm": 0.1384831815958023,
      "learning_rate": 0.00019534233695232375,
      "loss": 0.4393,
      "step": 1252
    },
    {
      "epoch": 0.3760504201680672,
      "grad_norm": 0.15414471924304962,
      "learning_rate": 0.00019532652501924277,
      "loss": 0.471,
      "step": 1253
    },
    {
      "epoch": 0.37635054021608644,
      "grad_norm": 0.14955464005470276,
      "learning_rate": 0.0001953106869344227,
      "loss": 0.4703,
      "step": 1254
    },
    {
      "epoch": 0.37665066026410565,
      "grad_norm": 0.13937316834926605,
      "learning_rate": 0.00019529482270220857,
      "loss": 0.4557,
      "step": 1255
    },
    {
      "epoch": 0.37695078031212487,
      "grad_norm": 0.1474233865737915,
      "learning_rate": 0.00019527893232695252,
      "loss": 0.4807,
      "step": 1256
    },
    {
      "epoch": 0.3772509003601441,
      "grad_norm": 0.14132389426231384,
      "learning_rate": 0.00019526301581301392,
      "loss": 0.4805,
      "step": 1257
    },
    {
      "epoch": 0.37755102040816324,
      "grad_norm": 0.15703056752681732,
      "learning_rate": 0.00019524707316475928,
      "loss": 0.4785,
      "step": 1258
    },
    {
      "epoch": 0.37785114045618245,
      "grad_norm": 0.13659298419952393,
      "learning_rate": 0.00019523110438656228,
      "loss": 0.4925,
      "step": 1259
    },
    {
      "epoch": 0.37815126050420167,
      "grad_norm": 0.20109659433364868,
      "learning_rate": 0.00019521510948280373,
      "loss": 0.4768,
      "step": 1260
    },
    {
      "epoch": 0.3784513805522209,
      "grad_norm": 0.19589029252529144,
      "learning_rate": 0.0001951990884578717,
      "loss": 0.5204,
      "step": 1261
    },
    {
      "epoch": 0.3787515006002401,
      "grad_norm": 0.1271110624074936,
      "learning_rate": 0.00019518304131616138,
      "loss": 0.4295,
      "step": 1262
    },
    {
      "epoch": 0.3790516206482593,
      "grad_norm": 0.2087676227092743,
      "learning_rate": 0.0001951669680620751,
      "loss": 0.5067,
      "step": 1263
    },
    {
      "epoch": 0.3793517406962785,
      "grad_norm": 0.18466299772262573,
      "learning_rate": 0.00019515086870002234,
      "loss": 0.5341,
      "step": 1264
    },
    {
      "epoch": 0.37965186074429774,
      "grad_norm": 0.12623387575149536,
      "learning_rate": 0.00019513474323441986,
      "loss": 0.4194,
      "step": 1265
    },
    {
      "epoch": 0.3799519807923169,
      "grad_norm": 0.14969417452812195,
      "learning_rate": 0.00019511859166969142,
      "loss": 0.4693,
      "step": 1266
    },
    {
      "epoch": 0.3802521008403361,
      "grad_norm": 0.14682205021381378,
      "learning_rate": 0.00019510241401026802,
      "loss": 0.5259,
      "step": 1267
    },
    {
      "epoch": 0.38055222088835533,
      "grad_norm": 0.20016992092132568,
      "learning_rate": 0.00019508621026058785,
      "loss": 0.4574,
      "step": 1268
    },
    {
      "epoch": 0.38085234093637454,
      "grad_norm": 0.1485625058412552,
      "learning_rate": 0.00019506998042509622,
      "loss": 0.441,
      "step": 1269
    },
    {
      "epoch": 0.38115246098439376,
      "grad_norm": 0.1220528706908226,
      "learning_rate": 0.0001950537245082456,
      "loss": 0.4228,
      "step": 1270
    },
    {
      "epoch": 0.381452581032413,
      "grad_norm": 0.131525918841362,
      "learning_rate": 0.00019503744251449557,
      "loss": 0.4669,
      "step": 1271
    },
    {
      "epoch": 0.3817527010804322,
      "grad_norm": 0.1620159149169922,
      "learning_rate": 0.00019502113444831297,
      "loss": 0.5135,
      "step": 1272
    },
    {
      "epoch": 0.3820528211284514,
      "grad_norm": 0.1345333456993103,
      "learning_rate": 0.00019500480031417166,
      "loss": 0.4917,
      "step": 1273
    },
    {
      "epoch": 0.38235294117647056,
      "grad_norm": 0.15267445147037506,
      "learning_rate": 0.0001949884401165528,
      "loss": 0.4456,
      "step": 1274
    },
    {
      "epoch": 0.3826530612244898,
      "grad_norm": 0.18665656447410583,
      "learning_rate": 0.00019497205385994457,
      "loss": 0.5099,
      "step": 1275
    },
    {
      "epoch": 0.382953181272509,
      "grad_norm": 0.139787957072258,
      "learning_rate": 0.0001949556415488424,
      "loss": 0.4554,
      "step": 1276
    },
    {
      "epoch": 0.3832533013205282,
      "grad_norm": 0.14109259843826294,
      "learning_rate": 0.00019493920318774873,
      "loss": 0.4776,
      "step": 1277
    },
    {
      "epoch": 0.3835534213685474,
      "grad_norm": 0.1319589614868164,
      "learning_rate": 0.00019492273878117335,
      "loss": 0.4415,
      "step": 1278
    },
    {
      "epoch": 0.38385354141656663,
      "grad_norm": 0.16108369827270508,
      "learning_rate": 0.000194906248333633,
      "loss": 0.523,
      "step": 1279
    },
    {
      "epoch": 0.38415366146458585,
      "grad_norm": 0.14197713136672974,
      "learning_rate": 0.0001948897318496517,
      "loss": 0.4603,
      "step": 1280
    },
    {
      "epoch": 0.38445378151260506,
      "grad_norm": 0.1432972252368927,
      "learning_rate": 0.00019487318933376048,
      "loss": 0.4849,
      "step": 1281
    },
    {
      "epoch": 0.3847539015606242,
      "grad_norm": 0.13725528120994568,
      "learning_rate": 0.00019485662079049768,
      "loss": 0.4327,
      "step": 1282
    },
    {
      "epoch": 0.38505402160864344,
      "grad_norm": 0.14719566702842712,
      "learning_rate": 0.0001948400262244086,
      "loss": 0.4813,
      "step": 1283
    },
    {
      "epoch": 0.38535414165666265,
      "grad_norm": 0.13413792848587036,
      "learning_rate": 0.00019482340564004586,
      "loss": 0.4021,
      "step": 1284
    },
    {
      "epoch": 0.38565426170468187,
      "grad_norm": 0.14877858757972717,
      "learning_rate": 0.00019480675904196907,
      "loss": 0.4527,
      "step": 1285
    },
    {
      "epoch": 0.3859543817527011,
      "grad_norm": 0.1463140845298767,
      "learning_rate": 0.00019479008643474505,
      "loss": 0.4249,
      "step": 1286
    },
    {
      "epoch": 0.3862545018007203,
      "grad_norm": 0.14037886261940002,
      "learning_rate": 0.00019477338782294772,
      "loss": 0.4336,
      "step": 1287
    },
    {
      "epoch": 0.3865546218487395,
      "grad_norm": 0.42916664481163025,
      "learning_rate": 0.0001947566632111582,
      "loss": 0.5011,
      "step": 1288
    },
    {
      "epoch": 0.3868547418967587,
      "grad_norm": 0.26069143414497375,
      "learning_rate": 0.00019473991260396463,
      "loss": 0.4952,
      "step": 1289
    },
    {
      "epoch": 0.3871548619447779,
      "grad_norm": 0.16868044435977936,
      "learning_rate": 0.0001947231360059624,
      "loss": 0.4922,
      "step": 1290
    },
    {
      "epoch": 0.3874549819927971,
      "grad_norm": 0.13486826419830322,
      "learning_rate": 0.00019470633342175394,
      "loss": 0.4639,
      "step": 1291
    },
    {
      "epoch": 0.3877551020408163,
      "grad_norm": 0.131912499666214,
      "learning_rate": 0.00019468950485594888,
      "loss": 0.4263,
      "step": 1292
    },
    {
      "epoch": 0.3880552220888355,
      "grad_norm": 0.13306498527526855,
      "learning_rate": 0.0001946726503131639,
      "loss": 0.4493,
      "step": 1293
    },
    {
      "epoch": 0.38835534213685474,
      "grad_norm": 0.1558721959590912,
      "learning_rate": 0.00019465576979802292,
      "loss": 0.4647,
      "step": 1294
    },
    {
      "epoch": 0.38865546218487396,
      "grad_norm": 0.14165359735488892,
      "learning_rate": 0.00019463886331515685,
      "loss": 0.4208,
      "step": 1295
    },
    {
      "epoch": 0.38895558223289317,
      "grad_norm": 0.14463305473327637,
      "learning_rate": 0.00019462193086920384,
      "loss": 0.4406,
      "step": 1296
    },
    {
      "epoch": 0.3892557022809124,
      "grad_norm": 0.1373589187860489,
      "learning_rate": 0.00019460497246480903,
      "loss": 0.4195,
      "step": 1297
    },
    {
      "epoch": 0.3895558223289316,
      "grad_norm": 0.13842253386974335,
      "learning_rate": 0.00019458798810662487,
      "loss": 0.4675,
      "step": 1298
    },
    {
      "epoch": 0.38985594237695076,
      "grad_norm": 0.17589081823825836,
      "learning_rate": 0.00019457097779931076,
      "loss": 0.5043,
      "step": 1299
    },
    {
      "epoch": 0.39015606242497,
      "grad_norm": 0.1493915170431137,
      "learning_rate": 0.0001945539415475333,
      "loss": 0.4974,
      "step": 1300
    },
    {
      "epoch": 0.3904561824729892,
      "grad_norm": 0.16712096333503723,
      "learning_rate": 0.00019453687935596617,
      "loss": 0.5679,
      "step": 1301
    },
    {
      "epoch": 0.3907563025210084,
      "grad_norm": 0.15799379348754883,
      "learning_rate": 0.00019451979122929024,
      "loss": 0.4574,
      "step": 1302
    },
    {
      "epoch": 0.3910564225690276,
      "grad_norm": 0.13828876614570618,
      "learning_rate": 0.0001945026771721934,
      "loss": 0.4455,
      "step": 1303
    },
    {
      "epoch": 0.39135654261704683,
      "grad_norm": 0.13726243376731873,
      "learning_rate": 0.00019448553718937067,
      "loss": 0.4973,
      "step": 1304
    },
    {
      "epoch": 0.39165666266506605,
      "grad_norm": 0.6387799382209778,
      "learning_rate": 0.00019446837128552424,
      "loss": 0.464,
      "step": 1305
    },
    {
      "epoch": 0.39195678271308526,
      "grad_norm": 0.13185249269008636,
      "learning_rate": 0.00019445117946536339,
      "loss": 0.4391,
      "step": 1306
    },
    {
      "epoch": 0.3922569027611044,
      "grad_norm": 0.20164337754249573,
      "learning_rate": 0.0001944339617336045,
      "loss": 0.445,
      "step": 1307
    },
    {
      "epoch": 0.39255702280912363,
      "grad_norm": 0.15695242583751678,
      "learning_rate": 0.00019441671809497104,
      "loss": 0.4472,
      "step": 1308
    },
    {
      "epoch": 0.39285714285714285,
      "grad_norm": 0.19588248431682587,
      "learning_rate": 0.00019439944855419362,
      "loss": 0.4808,
      "step": 1309
    },
    {
      "epoch": 0.39315726290516206,
      "grad_norm": 0.42993319034576416,
      "learning_rate": 0.00019438215311600989,
      "loss": 0.4679,
      "step": 1310
    },
    {
      "epoch": 0.3934573829531813,
      "grad_norm": 0.15529251098632812,
      "learning_rate": 0.0001943648317851647,
      "loss": 0.4399,
      "step": 1311
    },
    {
      "epoch": 0.3937575030012005,
      "grad_norm": 0.17741309106349945,
      "learning_rate": 0.00019434748456641,
      "loss": 0.4295,
      "step": 1312
    },
    {
      "epoch": 0.3940576230492197,
      "grad_norm": 0.3504805266857147,
      "learning_rate": 0.0001943301114645047,
      "loss": 0.4933,
      "step": 1313
    },
    {
      "epoch": 0.3943577430972389,
      "grad_norm": 0.14857859909534454,
      "learning_rate": 0.00019431271248421497,
      "loss": 0.4353,
      "step": 1314
    },
    {
      "epoch": 0.3946578631452581,
      "grad_norm": 0.19789060950279236,
      "learning_rate": 0.00019429528763031403,
      "loss": 0.5175,
      "step": 1315
    },
    {
      "epoch": 0.3949579831932773,
      "grad_norm": 0.15028391778469086,
      "learning_rate": 0.00019427783690758216,
      "loss": 0.5012,
      "step": 1316
    },
    {
      "epoch": 0.3952581032412965,
      "grad_norm": 0.14596232771873474,
      "learning_rate": 0.0001942603603208068,
      "loss": 0.4382,
      "step": 1317
    },
    {
      "epoch": 0.3955582232893157,
      "grad_norm": 0.14369916915893555,
      "learning_rate": 0.00019424285787478243,
      "loss": 0.3947,
      "step": 1318
    },
    {
      "epoch": 0.39585834333733494,
      "grad_norm": 0.14337095618247986,
      "learning_rate": 0.00019422532957431062,
      "loss": 0.4126,
      "step": 1319
    },
    {
      "epoch": 0.39615846338535415,
      "grad_norm": 0.16140638291835785,
      "learning_rate": 0.0001942077754242001,
      "loss": 0.4489,
      "step": 1320
    },
    {
      "epoch": 0.39645858343337337,
      "grad_norm": 0.16274520754814148,
      "learning_rate": 0.00019419019542926664,
      "loss": 0.4399,
      "step": 1321
    },
    {
      "epoch": 0.3967587034813926,
      "grad_norm": 0.15587328374385834,
      "learning_rate": 0.0001941725895943331,
      "loss": 0.4705,
      "step": 1322
    },
    {
      "epoch": 0.39705882352941174,
      "grad_norm": 0.1563081294298172,
      "learning_rate": 0.00019415495792422945,
      "loss": 0.4875,
      "step": 1323
    },
    {
      "epoch": 0.39735894357743096,
      "grad_norm": 0.15606364607810974,
      "learning_rate": 0.0001941373004237927,
      "loss": 0.5291,
      "step": 1324
    },
    {
      "epoch": 0.39765906362545017,
      "grad_norm": 0.16615036129951477,
      "learning_rate": 0.00019411961709786703,
      "loss": 0.53,
      "step": 1325
    },
    {
      "epoch": 0.3979591836734694,
      "grad_norm": 0.14595146477222443,
      "learning_rate": 0.00019410190795130365,
      "loss": 0.4094,
      "step": 1326
    },
    {
      "epoch": 0.3982593037214886,
      "grad_norm": 0.1788942515850067,
      "learning_rate": 0.00019408417298896085,
      "loss": 0.4767,
      "step": 1327
    },
    {
      "epoch": 0.3985594237695078,
      "grad_norm": 0.36024177074432373,
      "learning_rate": 0.00019406641221570402,
      "loss": 0.4652,
      "step": 1328
    },
    {
      "epoch": 0.39885954381752703,
      "grad_norm": 0.15379220247268677,
      "learning_rate": 0.00019404862563640558,
      "loss": 0.4679,
      "step": 1329
    },
    {
      "epoch": 0.39915966386554624,
      "grad_norm": 0.14786048233509064,
      "learning_rate": 0.00019403081325594516,
      "loss": 0.4564,
      "step": 1330
    },
    {
      "epoch": 0.3994597839135654,
      "grad_norm": 0.16695883870124817,
      "learning_rate": 0.0001940129750792093,
      "loss": 0.4398,
      "step": 1331
    },
    {
      "epoch": 0.3997599039615846,
      "grad_norm": 0.14832280576229095,
      "learning_rate": 0.00019399511111109176,
      "loss": 0.4871,
      "step": 1332
    },
    {
      "epoch": 0.40006002400960383,
      "grad_norm": 0.18180081248283386,
      "learning_rate": 0.00019397722135649326,
      "loss": 0.4901,
      "step": 1333
    },
    {
      "epoch": 0.40036014405762305,
      "grad_norm": 0.1705753356218338,
      "learning_rate": 0.0001939593058203217,
      "loss": 0.4704,
      "step": 1334
    },
    {
      "epoch": 0.40066026410564226,
      "grad_norm": 0.16212989389896393,
      "learning_rate": 0.00019394136450749197,
      "loss": 0.5103,
      "step": 1335
    },
    {
      "epoch": 0.4009603841536615,
      "grad_norm": 0.15347157418727875,
      "learning_rate": 0.00019392339742292612,
      "loss": 0.4944,
      "step": 1336
    },
    {
      "epoch": 0.4012605042016807,
      "grad_norm": 0.13810157775878906,
      "learning_rate": 0.00019390540457155312,
      "loss": 0.4947,
      "step": 1337
    },
    {
      "epoch": 0.4015606242496999,
      "grad_norm": 0.22768346965312958,
      "learning_rate": 0.00019388738595830916,
      "loss": 0.5081,
      "step": 1338
    },
    {
      "epoch": 0.40186074429771906,
      "grad_norm": 0.1549905240535736,
      "learning_rate": 0.00019386934158813744,
      "loss": 0.442,
      "step": 1339
    },
    {
      "epoch": 0.4021608643457383,
      "grad_norm": 0.1581527441740036,
      "learning_rate": 0.0001938512714659882,
      "loss": 0.5218,
      "step": 1340
    },
    {
      "epoch": 0.4024609843937575,
      "grad_norm": 0.14964637160301208,
      "learning_rate": 0.0001938331755968188,
      "loss": 0.5056,
      "step": 1341
    },
    {
      "epoch": 0.4027611044417767,
      "grad_norm": 0.15961486101150513,
      "learning_rate": 0.0001938150539855936,
      "loss": 0.5259,
      "step": 1342
    },
    {
      "epoch": 0.4030612244897959,
      "grad_norm": 0.15585605800151825,
      "learning_rate": 0.0001937969066372841,
      "loss": 0.4805,
      "step": 1343
    },
    {
      "epoch": 0.40336134453781514,
      "grad_norm": 0.14608198404312134,
      "learning_rate": 0.00019377873355686879,
      "loss": 0.4866,
      "step": 1344
    },
    {
      "epoch": 0.40366146458583435,
      "grad_norm": 0.16492308676242828,
      "learning_rate": 0.00019376053474933324,
      "loss": 0.4777,
      "step": 1345
    },
    {
      "epoch": 0.40396158463385357,
      "grad_norm": 0.1487661898136139,
      "learning_rate": 0.00019374231021967013,
      "loss": 0.5075,
      "step": 1346
    },
    {
      "epoch": 0.4042617046818727,
      "grad_norm": 0.1344498097896576,
      "learning_rate": 0.00019372405997287908,
      "loss": 0.4452,
      "step": 1347
    },
    {
      "epoch": 0.40456182472989194,
      "grad_norm": 0.13872256875038147,
      "learning_rate": 0.00019370578401396688,
      "loss": 0.467,
      "step": 1348
    },
    {
      "epoch": 0.40486194477791115,
      "grad_norm": 0.17656131088733673,
      "learning_rate": 0.00019368748234794731,
      "loss": 0.5042,
      "step": 1349
    },
    {
      "epoch": 0.40516206482593037,
      "grad_norm": 0.15703557431697845,
      "learning_rate": 0.00019366915497984126,
      "loss": 0.4602,
      "step": 1350
    },
    {
      "epoch": 0.4054621848739496,
      "grad_norm": 0.14739936590194702,
      "learning_rate": 0.0001936508019146766,
      "loss": 0.4794,
      "step": 1351
    },
    {
      "epoch": 0.4057623049219688,
      "grad_norm": 0.1558038592338562,
      "learning_rate": 0.00019363242315748828,
      "loss": 0.5116,
      "step": 1352
    },
    {
      "epoch": 0.406062424969988,
      "grad_norm": 0.14387072622776031,
      "learning_rate": 0.0001936140187133183,
      "loss": 0.5056,
      "step": 1353
    },
    {
      "epoch": 0.4063625450180072,
      "grad_norm": 0.14097860455513,
      "learning_rate": 0.00019359558858721574,
      "loss": 0.5079,
      "step": 1354
    },
    {
      "epoch": 0.4066626650660264,
      "grad_norm": 0.13414618372917175,
      "learning_rate": 0.00019357713278423666,
      "loss": 0.4497,
      "step": 1355
    },
    {
      "epoch": 0.4069627851140456,
      "grad_norm": 0.1423015296459198,
      "learning_rate": 0.0001935586513094442,
      "loss": 0.4772,
      "step": 1356
    },
    {
      "epoch": 0.4072629051620648,
      "grad_norm": 0.14633090794086456,
      "learning_rate": 0.0001935401441679086,
      "loss": 0.4682,
      "step": 1357
    },
    {
      "epoch": 0.40756302521008403,
      "grad_norm": 0.13317428529262543,
      "learning_rate": 0.00019352161136470698,
      "loss": 0.4852,
      "step": 1358
    },
    {
      "epoch": 0.40786314525810324,
      "grad_norm": 0.15108975768089294,
      "learning_rate": 0.00019350305290492367,
      "loss": 0.503,
      "step": 1359
    },
    {
      "epoch": 0.40816326530612246,
      "grad_norm": 0.13461902737617493,
      "learning_rate": 0.00019348446879364998,
      "loss": 0.4551,
      "step": 1360
    },
    {
      "epoch": 0.4084633853541417,
      "grad_norm": 0.14803214371204376,
      "learning_rate": 0.0001934658590359842,
      "loss": 0.5164,
      "step": 1361
    },
    {
      "epoch": 0.4087635054021609,
      "grad_norm": 0.17757324874401093,
      "learning_rate": 0.00019344722363703174,
      "loss": 0.4634,
      "step": 1362
    },
    {
      "epoch": 0.40906362545018005,
      "grad_norm": 0.15838757157325745,
      "learning_rate": 0.000193428562601905,
      "loss": 0.5696,
      "step": 1363
    },
    {
      "epoch": 0.40936374549819926,
      "grad_norm": 0.13884811103343964,
      "learning_rate": 0.0001934098759357234,
      "loss": 0.4073,
      "step": 1364
    },
    {
      "epoch": 0.4096638655462185,
      "grad_norm": 0.15501153469085693,
      "learning_rate": 0.00019339116364361342,
      "loss": 0.4901,
      "step": 1365
    },
    {
      "epoch": 0.4099639855942377,
      "grad_norm": 0.1534595936536789,
      "learning_rate": 0.00019337242573070858,
      "loss": 0.5782,
      "step": 1366
    },
    {
      "epoch": 0.4102641056422569,
      "grad_norm": 0.39484915137290955,
      "learning_rate": 0.00019335366220214943,
      "loss": 0.4158,
      "step": 1367
    },
    {
      "epoch": 0.4105642256902761,
      "grad_norm": 0.1517392247915268,
      "learning_rate": 0.0001933348730630835,
      "loss": 0.4379,
      "step": 1368
    },
    {
      "epoch": 0.41086434573829533,
      "grad_norm": 0.14215496182441711,
      "learning_rate": 0.00019331605831866534,
      "loss": 0.4906,
      "step": 1369
    },
    {
      "epoch": 0.41116446578631455,
      "grad_norm": 0.1274791955947876,
      "learning_rate": 0.00019329721797405665,
      "loss": 0.435,
      "step": 1370
    },
    {
      "epoch": 0.4114645858343337,
      "grad_norm": 0.14641650021076202,
      "learning_rate": 0.00019327835203442596,
      "loss": 0.5223,
      "step": 1371
    },
    {
      "epoch": 0.4117647058823529,
      "grad_norm": 0.15195374190807343,
      "learning_rate": 0.000193259460504949,
      "loss": 0.5157,
      "step": 1372
    },
    {
      "epoch": 0.41206482593037214,
      "grad_norm": 0.14944523572921753,
      "learning_rate": 0.00019324054339080838,
      "loss": 0.5046,
      "step": 1373
    },
    {
      "epoch": 0.41236494597839135,
      "grad_norm": 0.20714707672595978,
      "learning_rate": 0.00019322160069719388,
      "loss": 0.5317,
      "step": 1374
    },
    {
      "epoch": 0.41266506602641057,
      "grad_norm": 0.14786897599697113,
      "learning_rate": 0.00019320263242930214,
      "loss": 0.4566,
      "step": 1375
    },
    {
      "epoch": 0.4129651860744298,
      "grad_norm": 0.15220782160758972,
      "learning_rate": 0.00019318363859233693,
      "loss": 0.5205,
      "step": 1376
    },
    {
      "epoch": 0.413265306122449,
      "grad_norm": 0.12114045768976212,
      "learning_rate": 0.00019316461919150895,
      "loss": 0.3673,
      "step": 1377
    },
    {
      "epoch": 0.4135654261704682,
      "grad_norm": 0.14805008471012115,
      "learning_rate": 0.00019314557423203595,
      "loss": 0.4754,
      "step": 1378
    },
    {
      "epoch": 0.41386554621848737,
      "grad_norm": 0.15739750862121582,
      "learning_rate": 0.00019312650371914277,
      "loss": 0.447,
      "step": 1379
    },
    {
      "epoch": 0.4141656662665066,
      "grad_norm": 0.15176567435264587,
      "learning_rate": 0.00019310740765806112,
      "loss": 0.4614,
      "step": 1380
    },
    {
      "epoch": 0.4144657863145258,
      "grad_norm": 0.14173932373523712,
      "learning_rate": 0.0001930882860540298,
      "loss": 0.4456,
      "step": 1381
    },
    {
      "epoch": 0.414765906362545,
      "grad_norm": 0.1475897878408432,
      "learning_rate": 0.00019306913891229462,
      "loss": 0.4743,
      "step": 1382
    },
    {
      "epoch": 0.4150660264105642,
      "grad_norm": 0.1380489021539688,
      "learning_rate": 0.00019304996623810834,
      "loss": 0.4284,
      "step": 1383
    },
    {
      "epoch": 0.41536614645858344,
      "grad_norm": 0.1423666626214981,
      "learning_rate": 0.0001930307680367308,
      "loss": 0.5312,
      "step": 1384
    },
    {
      "epoch": 0.41566626650660266,
      "grad_norm": 0.159433051943779,
      "learning_rate": 0.0001930115443134288,
      "loss": 0.5392,
      "step": 1385
    },
    {
      "epoch": 0.41596638655462187,
      "grad_norm": 0.13836570084095,
      "learning_rate": 0.00019299229507347614,
      "loss": 0.4596,
      "step": 1386
    },
    {
      "epoch": 0.41626650660264103,
      "grad_norm": 0.1377885788679123,
      "learning_rate": 0.00019297302032215364,
      "loss": 0.4446,
      "step": 1387
    },
    {
      "epoch": 0.41656662665066024,
      "grad_norm": 0.13649654388427734,
      "learning_rate": 0.00019295372006474906,
      "loss": 0.4496,
      "step": 1388
    },
    {
      "epoch": 0.41686674669867946,
      "grad_norm": 0.1478634625673294,
      "learning_rate": 0.00019293439430655726,
      "loss": 0.5175,
      "step": 1389
    },
    {
      "epoch": 0.4171668667466987,
      "grad_norm": 0.16393691301345825,
      "learning_rate": 0.00019291504305288005,
      "loss": 0.5568,
      "step": 1390
    },
    {
      "epoch": 0.4174669867947179,
      "grad_norm": 0.1378331482410431,
      "learning_rate": 0.00019289566630902619,
      "loss": 0.449,
      "step": 1391
    },
    {
      "epoch": 0.4177671068427371,
      "grad_norm": 0.1320262998342514,
      "learning_rate": 0.00019287626408031147,
      "loss": 0.4264,
      "step": 1392
    },
    {
      "epoch": 0.4180672268907563,
      "grad_norm": 0.13636216521263123,
      "learning_rate": 0.00019285683637205864,
      "loss": 0.5008,
      "step": 1393
    },
    {
      "epoch": 0.41836734693877553,
      "grad_norm": 0.14212261140346527,
      "learning_rate": 0.00019283738318959752,
      "loss": 0.451,
      "step": 1394
    },
    {
      "epoch": 0.41866746698679475,
      "grad_norm": 0.1388690024614334,
      "learning_rate": 0.00019281790453826484,
      "loss": 0.4727,
      "step": 1395
    },
    {
      "epoch": 0.4189675870348139,
      "grad_norm": 0.13603146374225616,
      "learning_rate": 0.0001927984004234044,
      "loss": 0.4617,
      "step": 1396
    },
    {
      "epoch": 0.4192677070828331,
      "grad_norm": 0.1306666135787964,
      "learning_rate": 0.00019277887085036684,
      "loss": 0.5171,
      "step": 1397
    },
    {
      "epoch": 0.41956782713085233,
      "grad_norm": 0.1543315351009369,
      "learning_rate": 0.0001927593158245099,
      "loss": 0.4892,
      "step": 1398
    },
    {
      "epoch": 0.41986794717887155,
      "grad_norm": 0.3799855411052704,
      "learning_rate": 0.00019273973535119835,
      "loss": 0.5509,
      "step": 1399
    },
    {
      "epoch": 0.42016806722689076,
      "grad_norm": 0.1430201381444931,
      "learning_rate": 0.00019272012943580383,
      "loss": 0.5026,
      "step": 1400
    },
    {
      "epoch": 0.42046818727491,
      "grad_norm": 0.13826300203800201,
      "learning_rate": 0.00019270049808370492,
      "loss": 0.453,
      "step": 1401
    },
    {
      "epoch": 0.4207683073229292,
      "grad_norm": 0.14627663791179657,
      "learning_rate": 0.00019268084130028736,
      "loss": 0.4964,
      "step": 1402
    },
    {
      "epoch": 0.4210684273709484,
      "grad_norm": 0.16021540760993958,
      "learning_rate": 0.00019266115909094368,
      "loss": 0.4424,
      "step": 1403
    },
    {
      "epoch": 0.42136854741896757,
      "grad_norm": 0.13889935612678528,
      "learning_rate": 0.00019264145146107356,
      "loss": 0.4589,
      "step": 1404
    },
    {
      "epoch": 0.4216686674669868,
      "grad_norm": 0.14017444849014282,
      "learning_rate": 0.00019262171841608348,
      "loss": 0.4843,
      "step": 1405
    },
    {
      "epoch": 0.421968787515006,
      "grad_norm": 0.14206208288669586,
      "learning_rate": 0.00019260195996138703,
      "loss": 0.5191,
      "step": 1406
    },
    {
      "epoch": 0.4222689075630252,
      "grad_norm": 0.14236919581890106,
      "learning_rate": 0.00019258217610240467,
      "loss": 0.4836,
      "step": 1407
    },
    {
      "epoch": 0.4225690276110444,
      "grad_norm": 0.13786455988883972,
      "learning_rate": 0.0001925623668445639,
      "loss": 0.4229,
      "step": 1408
    },
    {
      "epoch": 0.42286914765906364,
      "grad_norm": 0.1529259830713272,
      "learning_rate": 0.0001925425321932992,
      "loss": 0.4672,
      "step": 1409
    },
    {
      "epoch": 0.42316926770708285,
      "grad_norm": 0.14073142409324646,
      "learning_rate": 0.00019252267215405188,
      "loss": 0.4777,
      "step": 1410
    },
    {
      "epoch": 0.42346938775510207,
      "grad_norm": 0.1450301557779312,
      "learning_rate": 0.00019250278673227042,
      "loss": 0.4815,
      "step": 1411
    },
    {
      "epoch": 0.4237695078031212,
      "grad_norm": 0.13629521429538727,
      "learning_rate": 0.0001924828759334101,
      "loss": 0.4118,
      "step": 1412
    },
    {
      "epoch": 0.42406962785114044,
      "grad_norm": 0.1464170515537262,
      "learning_rate": 0.0001924629397629332,
      "loss": 0.5251,
      "step": 1413
    },
    {
      "epoch": 0.42436974789915966,
      "grad_norm": 0.14412495493888855,
      "learning_rate": 0.00019244297822630906,
      "loss": 0.461,
      "step": 1414
    },
    {
      "epoch": 0.42466986794717887,
      "grad_norm": 0.13709735870361328,
      "learning_rate": 0.0001924229913290138,
      "loss": 0.5078,
      "step": 1415
    },
    {
      "epoch": 0.4249699879951981,
      "grad_norm": 0.14858576655387878,
      "learning_rate": 0.0001924029790765307,
      "loss": 0.5115,
      "step": 1416
    },
    {
      "epoch": 0.4252701080432173,
      "grad_norm": 0.12990660965442657,
      "learning_rate": 0.0001923829414743498,
      "loss": 0.417,
      "step": 1417
    },
    {
      "epoch": 0.4255702280912365,
      "grad_norm": 0.1423971801996231,
      "learning_rate": 0.00019236287852796821,
      "loss": 0.4614,
      "step": 1418
    },
    {
      "epoch": 0.4258703481392557,
      "grad_norm": 0.14663192629814148,
      "learning_rate": 0.00019234279024289003,
      "loss": 0.4758,
      "step": 1419
    },
    {
      "epoch": 0.4261704681872749,
      "grad_norm": 0.13479375839233398,
      "learning_rate": 0.00019232267662462618,
      "loss": 0.4386,
      "step": 1420
    },
    {
      "epoch": 0.4264705882352941,
      "grad_norm": 0.1321115493774414,
      "learning_rate": 0.0001923025376786946,
      "loss": 0.428,
      "step": 1421
    },
    {
      "epoch": 0.4267707082833133,
      "grad_norm": 0.14896945655345917,
      "learning_rate": 0.00019228237341062024,
      "loss": 0.5236,
      "step": 1422
    },
    {
      "epoch": 0.42707082833133253,
      "grad_norm": 0.1456255316734314,
      "learning_rate": 0.00019226218382593487,
      "loss": 0.5066,
      "step": 1423
    },
    {
      "epoch": 0.42737094837935174,
      "grad_norm": 0.13657549023628235,
      "learning_rate": 0.0001922419689301773,
      "loss": 0.4759,
      "step": 1424
    },
    {
      "epoch": 0.42767106842737096,
      "grad_norm": 0.14950986206531525,
      "learning_rate": 0.00019222172872889327,
      "loss": 0.4289,
      "step": 1425
    },
    {
      "epoch": 0.4279711884753902,
      "grad_norm": 0.1470792442560196,
      "learning_rate": 0.00019220146322763545,
      "loss": 0.4723,
      "step": 1426
    },
    {
      "epoch": 0.4282713085234094,
      "grad_norm": 0.15343989431858063,
      "learning_rate": 0.0001921811724319634,
      "loss": 0.4901,
      "step": 1427
    },
    {
      "epoch": 0.42857142857142855,
      "grad_norm": 0.16162234544754028,
      "learning_rate": 0.0001921608563474437,
      "loss": 0.4972,
      "step": 1428
    },
    {
      "epoch": 0.42887154861944776,
      "grad_norm": 0.1257782131433487,
      "learning_rate": 0.00019214051497964984,
      "loss": 0.413,
      "step": 1429
    },
    {
      "epoch": 0.429171668667467,
      "grad_norm": 0.15724851191043854,
      "learning_rate": 0.00019212014833416222,
      "loss": 0.5128,
      "step": 1430
    },
    {
      "epoch": 0.4294717887154862,
      "grad_norm": 0.1418631672859192,
      "learning_rate": 0.0001920997564165682,
      "loss": 0.4481,
      "step": 1431
    },
    {
      "epoch": 0.4297719087635054,
      "grad_norm": 0.13407331705093384,
      "learning_rate": 0.0001920793392324621,
      "loss": 0.4786,
      "step": 1432
    },
    {
      "epoch": 0.4300720288115246,
      "grad_norm": 0.13005974888801575,
      "learning_rate": 0.00019205889678744514,
      "loss": 0.4658,
      "step": 1433
    },
    {
      "epoch": 0.43037214885954383,
      "grad_norm": 0.13358049094676971,
      "learning_rate": 0.0001920384290871254,
      "loss": 0.4239,
      "step": 1434
    },
    {
      "epoch": 0.43067226890756305,
      "grad_norm": 0.14098550379276276,
      "learning_rate": 0.00019201793613711802,
      "loss": 0.4933,
      "step": 1435
    },
    {
      "epoch": 0.4309723889555822,
      "grad_norm": 0.1439901441335678,
      "learning_rate": 0.000191997417943045,
      "loss": 0.4727,
      "step": 1436
    },
    {
      "epoch": 0.4312725090036014,
      "grad_norm": 0.15355221927165985,
      "learning_rate": 0.00019197687451053526,
      "loss": 0.4613,
      "step": 1437
    },
    {
      "epoch": 0.43157262905162064,
      "grad_norm": 0.38890597224235535,
      "learning_rate": 0.00019195630584522465,
      "loss": 0.5481,
      "step": 1438
    },
    {
      "epoch": 0.43187274909963985,
      "grad_norm": 0.3396846055984497,
      "learning_rate": 0.00019193571195275596,
      "loss": 0.4547,
      "step": 1439
    },
    {
      "epoch": 0.43217286914765907,
      "grad_norm": 0.12148111313581467,
      "learning_rate": 0.00019191509283877892,
      "loss": 0.4084,
      "step": 1440
    },
    {
      "epoch": 0.4324729891956783,
      "grad_norm": 0.15273834764957428,
      "learning_rate": 0.00019189444850895008,
      "loss": 0.4986,
      "step": 1441
    },
    {
      "epoch": 0.4327731092436975,
      "grad_norm": 0.17270119488239288,
      "learning_rate": 0.000191873778968933,
      "loss": 0.533,
      "step": 1442
    },
    {
      "epoch": 0.4330732292917167,
      "grad_norm": 0.14104081690311432,
      "learning_rate": 0.00019185308422439815,
      "loss": 0.4491,
      "step": 1443
    },
    {
      "epoch": 0.43337334933973587,
      "grad_norm": 0.1499965637922287,
      "learning_rate": 0.00019183236428102287,
      "loss": 0.5033,
      "step": 1444
    },
    {
      "epoch": 0.4336734693877551,
      "grad_norm": 0.14168301224708557,
      "learning_rate": 0.00019181161914449146,
      "loss": 0.4739,
      "step": 1445
    },
    {
      "epoch": 0.4339735894357743,
      "grad_norm": 0.14708314836025238,
      "learning_rate": 0.00019179084882049513,
      "loss": 0.4672,
      "step": 1446
    },
    {
      "epoch": 0.4342737094837935,
      "grad_norm": 0.1471162885427475,
      "learning_rate": 0.00019177005331473193,
      "loss": 0.4717,
      "step": 1447
    },
    {
      "epoch": 0.4345738295318127,
      "grad_norm": 0.15595698356628418,
      "learning_rate": 0.0001917492326329069,
      "loss": 0.4544,
      "step": 1448
    },
    {
      "epoch": 0.43487394957983194,
      "grad_norm": 0.14595715701580048,
      "learning_rate": 0.00019172838678073193,
      "loss": 0.489,
      "step": 1449
    },
    {
      "epoch": 0.43517406962785116,
      "grad_norm": 0.14658313989639282,
      "learning_rate": 0.00019170751576392587,
      "loss": 0.4284,
      "step": 1450
    },
    {
      "epoch": 0.43547418967587037,
      "grad_norm": 0.1370433270931244,
      "learning_rate": 0.00019168661958821441,
      "loss": 0.4149,
      "step": 1451
    },
    {
      "epoch": 0.43577430972388953,
      "grad_norm": 0.15391285717487335,
      "learning_rate": 0.00019166569825933025,
      "loss": 0.482,
      "step": 1452
    },
    {
      "epoch": 0.43607442977190874,
      "grad_norm": 0.1594506800174713,
      "learning_rate": 0.00019164475178301283,
      "loss": 0.5282,
      "step": 1453
    },
    {
      "epoch": 0.43637454981992796,
      "grad_norm": 0.5735306739807129,
      "learning_rate": 0.0001916237801650086,
      "loss": 0.4453,
      "step": 1454
    },
    {
      "epoch": 0.4366746698679472,
      "grad_norm": 0.16294141113758087,
      "learning_rate": 0.00019160278341107093,
      "loss": 0.4922,
      "step": 1455
    },
    {
      "epoch": 0.4369747899159664,
      "grad_norm": 0.15014778077602386,
      "learning_rate": 0.00019158176152695998,
      "loss": 0.4767,
      "step": 1456
    },
    {
      "epoch": 0.4372749099639856,
      "grad_norm": 0.1422508955001831,
      "learning_rate": 0.00019156071451844288,
      "loss": 0.4987,
      "step": 1457
    },
    {
      "epoch": 0.4375750300120048,
      "grad_norm": 0.2151377648115158,
      "learning_rate": 0.00019153964239129365,
      "loss": 0.4407,
      "step": 1458
    },
    {
      "epoch": 0.43787515006002403,
      "grad_norm": 0.16766145825386047,
      "learning_rate": 0.00019151854515129317,
      "loss": 0.5277,
      "step": 1459
    },
    {
      "epoch": 0.4381752701080432,
      "grad_norm": 0.15800268948078156,
      "learning_rate": 0.00019149742280422924,
      "loss": 0.4434,
      "step": 1460
    },
    {
      "epoch": 0.4384753901560624,
      "grad_norm": 0.14400655031204224,
      "learning_rate": 0.00019147627535589653,
      "loss": 0.4703,
      "step": 1461
    },
    {
      "epoch": 0.4387755102040816,
      "grad_norm": 0.15325772762298584,
      "learning_rate": 0.0001914551028120966,
      "loss": 0.4469,
      "step": 1462
    },
    {
      "epoch": 0.43907563025210083,
      "grad_norm": 0.16910713911056519,
      "learning_rate": 0.00019143390517863788,
      "loss": 0.4803,
      "step": 1463
    },
    {
      "epoch": 0.43937575030012005,
      "grad_norm": 0.1375475972890854,
      "learning_rate": 0.00019141268246133572,
      "loss": 0.4597,
      "step": 1464
    },
    {
      "epoch": 0.43967587034813926,
      "grad_norm": 0.14966444671154022,
      "learning_rate": 0.00019139143466601231,
      "loss": 0.5085,
      "step": 1465
    },
    {
      "epoch": 0.4399759903961585,
      "grad_norm": 0.13662435114383698,
      "learning_rate": 0.00019137016179849673,
      "loss": 0.4712,
      "step": 1466
    },
    {
      "epoch": 0.4402761104441777,
      "grad_norm": 0.15421763062477112,
      "learning_rate": 0.00019134886386462497,
      "loss": 0.4656,
      "step": 1467
    },
    {
      "epoch": 0.44057623049219685,
      "grad_norm": 0.14279350638389587,
      "learning_rate": 0.0001913275408702399,
      "loss": 0.4611,
      "step": 1468
    },
    {
      "epoch": 0.44087635054021607,
      "grad_norm": 0.1423294097185135,
      "learning_rate": 0.00019130619282119117,
      "loss": 0.4791,
      "step": 1469
    },
    {
      "epoch": 0.4411764705882353,
      "grad_norm": 0.15571759641170502,
      "learning_rate": 0.00019128481972333544,
      "loss": 0.4612,
      "step": 1470
    },
    {
      "epoch": 0.4414765906362545,
      "grad_norm": 0.14783750474452972,
      "learning_rate": 0.00019126342158253614,
      "loss": 0.4404,
      "step": 1471
    },
    {
      "epoch": 0.4417767106842737,
      "grad_norm": 0.2501294016838074,
      "learning_rate": 0.0001912419984046636,
      "loss": 0.4526,
      "step": 1472
    },
    {
      "epoch": 0.4420768307322929,
      "grad_norm": 0.13411974906921387,
      "learning_rate": 0.00019122055019559503,
      "loss": 0.461,
      "step": 1473
    },
    {
      "epoch": 0.44237695078031214,
      "grad_norm": 0.1492329239845276,
      "learning_rate": 0.0001911990769612145,
      "loss": 0.4456,
      "step": 1474
    },
    {
      "epoch": 0.44267707082833135,
      "grad_norm": 0.15338747203350067,
      "learning_rate": 0.00019117757870741294,
      "loss": 0.55,
      "step": 1475
    },
    {
      "epoch": 0.4429771908763505,
      "grad_norm": 0.30840009450912476,
      "learning_rate": 0.0001911560554400882,
      "loss": 0.469,
      "step": 1476
    },
    {
      "epoch": 0.4432773109243697,
      "grad_norm": 0.14980410039424896,
      "learning_rate": 0.00019113450716514487,
      "loss": 0.4939,
      "step": 1477
    },
    {
      "epoch": 0.44357743097238894,
      "grad_norm": 0.16766425967216492,
      "learning_rate": 0.00019111293388849449,
      "loss": 0.4491,
      "step": 1478
    },
    {
      "epoch": 0.44387755102040816,
      "grad_norm": 0.13539418578147888,
      "learning_rate": 0.00019109133561605546,
      "loss": 0.4329,
      "step": 1479
    },
    {
      "epoch": 0.44417767106842737,
      "grad_norm": 0.19142962992191315,
      "learning_rate": 0.00019106971235375298,
      "loss": 0.5242,
      "step": 1480
    },
    {
      "epoch": 0.4444777911164466,
      "grad_norm": 0.1418309360742569,
      "learning_rate": 0.00019104806410751924,
      "loss": 0.49,
      "step": 1481
    },
    {
      "epoch": 0.4447779111644658,
      "grad_norm": 0.14699430763721466,
      "learning_rate": 0.00019102639088329308,
      "loss": 0.489,
      "step": 1482
    },
    {
      "epoch": 0.445078031212485,
      "grad_norm": 0.14769864082336426,
      "learning_rate": 0.00019100469268702036,
      "loss": 0.4961,
      "step": 1483
    },
    {
      "epoch": 0.44537815126050423,
      "grad_norm": 0.22757241129875183,
      "learning_rate": 0.0001909829695246537,
      "loss": 0.4027,
      "step": 1484
    },
    {
      "epoch": 0.4456782713085234,
      "grad_norm": 0.1623411476612091,
      "learning_rate": 0.00019096122140215262,
      "loss": 0.4917,
      "step": 1485
    },
    {
      "epoch": 0.4459783913565426,
      "grad_norm": 0.13646866381168365,
      "learning_rate": 0.00019093944832548348,
      "loss": 0.411,
      "step": 1486
    },
    {
      "epoch": 0.4462785114045618,
      "grad_norm": 0.13902203738689423,
      "learning_rate": 0.00019091765030061943,
      "loss": 0.4696,
      "step": 1487
    },
    {
      "epoch": 0.44657863145258103,
      "grad_norm": 0.15161632001399994,
      "learning_rate": 0.00019089582733354055,
      "loss": 0.4859,
      "step": 1488
    },
    {
      "epoch": 0.44687875150060025,
      "grad_norm": 0.169401615858078,
      "learning_rate": 0.0001908739794302337,
      "loss": 0.48,
      "step": 1489
    },
    {
      "epoch": 0.44717887154861946,
      "grad_norm": 0.1369437575340271,
      "learning_rate": 0.0001908521065966926,
      "loss": 0.5028,
      "step": 1490
    },
    {
      "epoch": 0.4474789915966387,
      "grad_norm": 0.14216217398643494,
      "learning_rate": 0.00019083020883891783,
      "loss": 0.506,
      "step": 1491
    },
    {
      "epoch": 0.4477791116446579,
      "grad_norm": 0.13778440654277802,
      "learning_rate": 0.0001908082861629168,
      "loss": 0.4893,
      "step": 1492
    },
    {
      "epoch": 0.44807923169267705,
      "grad_norm": 0.13807134330272675,
      "learning_rate": 0.0001907863385747037,
      "loss": 0.4029,
      "step": 1493
    },
    {
      "epoch": 0.44837935174069626,
      "grad_norm": 0.15739092230796814,
      "learning_rate": 0.0001907643660802996,
      "loss": 0.4539,
      "step": 1494
    },
    {
      "epoch": 0.4486794717887155,
      "grad_norm": 0.17259110510349274,
      "learning_rate": 0.00019074236868573245,
      "loss": 0.475,
      "step": 1495
    },
    {
      "epoch": 0.4489795918367347,
      "grad_norm": 0.1383882761001587,
      "learning_rate": 0.00019072034639703694,
      "loss": 0.439,
      "step": 1496
    },
    {
      "epoch": 0.4492797118847539,
      "grad_norm": 0.13202977180480957,
      "learning_rate": 0.00019069829922025466,
      "loss": 0.4226,
      "step": 1497
    },
    {
      "epoch": 0.4495798319327731,
      "grad_norm": 0.12966297566890717,
      "learning_rate": 0.00019067622716143398,
      "loss": 0.4331,
      "step": 1498
    },
    {
      "epoch": 0.44987995198079234,
      "grad_norm": 0.14526928961277008,
      "learning_rate": 0.00019065413022663013,
      "loss": 0.5089,
      "step": 1499
    },
    {
      "epoch": 0.45018007202881155,
      "grad_norm": 0.1366245597600937,
      "learning_rate": 0.00019063200842190514,
      "loss": 0.3964,
      "step": 1500
    },
    {
      "epoch": 0.4504801920768307,
      "grad_norm": 0.1469152271747589,
      "learning_rate": 0.00019060986175332788,
      "loss": 0.5168,
      "step": 1501
    },
    {
      "epoch": 0.4507803121248499,
      "grad_norm": 0.7257106304168701,
      "learning_rate": 0.00019058769022697406,
      "loss": 0.4882,
      "step": 1502
    },
    {
      "epoch": 0.45108043217286914,
      "grad_norm": 0.14178527891635895,
      "learning_rate": 0.00019056549384892612,
      "loss": 0.5005,
      "step": 1503
    },
    {
      "epoch": 0.45138055222088835,
      "grad_norm": 0.1433630883693695,
      "learning_rate": 0.00019054327262527345,
      "loss": 0.4629,
      "step": 1504
    },
    {
      "epoch": 0.45168067226890757,
      "grad_norm": 0.1455744206905365,
      "learning_rate": 0.00019052102656211216,
      "loss": 0.4764,
      "step": 1505
    },
    {
      "epoch": 0.4519807923169268,
      "grad_norm": 0.16220326721668243,
      "learning_rate": 0.00019049875566554518,
      "loss": 0.5098,
      "step": 1506
    },
    {
      "epoch": 0.452280912364946,
      "grad_norm": 0.208993062376976,
      "learning_rate": 0.0001904764599416823,
      "loss": 0.5381,
      "step": 1507
    },
    {
      "epoch": 0.4525810324129652,
      "grad_norm": 0.14417657256126404,
      "learning_rate": 0.0001904541393966401,
      "loss": 0.4596,
      "step": 1508
    },
    {
      "epoch": 0.45288115246098437,
      "grad_norm": 0.14887084066867828,
      "learning_rate": 0.00019043179403654191,
      "loss": 0.49,
      "step": 1509
    },
    {
      "epoch": 0.4531812725090036,
      "grad_norm": 0.14634130895137787,
      "learning_rate": 0.00019040942386751804,
      "loss": 0.4513,
      "step": 1510
    },
    {
      "epoch": 0.4534813925570228,
      "grad_norm": 0.14948152005672455,
      "learning_rate": 0.0001903870288957054,
      "loss": 0.4613,
      "step": 1511
    },
    {
      "epoch": 0.453781512605042,
      "grad_norm": 0.14923574030399323,
      "learning_rate": 0.0001903646091272478,
      "loss": 0.5116,
      "step": 1512
    },
    {
      "epoch": 0.45408163265306123,
      "grad_norm": 0.13415589928627014,
      "learning_rate": 0.00019034216456829584,
      "loss": 0.4443,
      "step": 1513
    },
    {
      "epoch": 0.45438175270108044,
      "grad_norm": 0.1644652783870697,
      "learning_rate": 0.00019031969522500695,
      "loss": 0.5325,
      "step": 1514
    },
    {
      "epoch": 0.45468187274909966,
      "grad_norm": 0.16179829835891724,
      "learning_rate": 0.00019029720110354535,
      "loss": 0.5353,
      "step": 1515
    },
    {
      "epoch": 0.4549819927971189,
      "grad_norm": 0.14486360549926758,
      "learning_rate": 0.00019027468221008203,
      "loss": 0.4886,
      "step": 1516
    },
    {
      "epoch": 0.45528211284513803,
      "grad_norm": 0.18260490894317627,
      "learning_rate": 0.0001902521385507948,
      "loss": 0.4648,
      "step": 1517
    },
    {
      "epoch": 0.45558223289315725,
      "grad_norm": 0.17621013522148132,
      "learning_rate": 0.00019022957013186821,
      "loss": 0.5227,
      "step": 1518
    },
    {
      "epoch": 0.45588235294117646,
      "grad_norm": 0.17596934735774994,
      "learning_rate": 0.00019020697695949372,
      "loss": 0.4976,
      "step": 1519
    },
    {
      "epoch": 0.4561824729891957,
      "grad_norm": 0.16129888594150543,
      "learning_rate": 0.00019018435903986943,
      "loss": 0.4665,
      "step": 1520
    },
    {
      "epoch": 0.4564825930372149,
      "grad_norm": 0.20698495209217072,
      "learning_rate": 0.00019016171637920034,
      "loss": 0.4531,
      "step": 1521
    },
    {
      "epoch": 0.4567827130852341,
      "grad_norm": 0.5277857780456543,
      "learning_rate": 0.00019013904898369826,
      "loss": 0.4494,
      "step": 1522
    },
    {
      "epoch": 0.4570828331332533,
      "grad_norm": 0.17072156071662903,
      "learning_rate": 0.00019011635685958162,
      "loss": 0.4635,
      "step": 1523
    },
    {
      "epoch": 0.45738295318127253,
      "grad_norm": 0.1649957001209259,
      "learning_rate": 0.00019009364001307586,
      "loss": 0.487,
      "step": 1524
    },
    {
      "epoch": 0.4576830732292917,
      "grad_norm": 0.1527813822031021,
      "learning_rate": 0.00019007089845041297,
      "loss": 0.4954,
      "step": 1525
    },
    {
      "epoch": 0.4579831932773109,
      "grad_norm": 0.14998187124729156,
      "learning_rate": 0.00019004813217783192,
      "loss": 0.4788,
      "step": 1526
    },
    {
      "epoch": 0.4582833133253301,
      "grad_norm": 0.2669890820980072,
      "learning_rate": 0.00019002534120157835,
      "loss": 0.4909,
      "step": 1527
    },
    {
      "epoch": 0.45858343337334934,
      "grad_norm": 0.17830884456634521,
      "learning_rate": 0.0001900025255279047,
      "loss": 0.4735,
      "step": 1528
    },
    {
      "epoch": 0.45888355342136855,
      "grad_norm": 0.1925525665283203,
      "learning_rate": 0.00018997968516307022,
      "loss": 0.5461,
      "step": 1529
    },
    {
      "epoch": 0.45918367346938777,
      "grad_norm": 0.15895669162273407,
      "learning_rate": 0.00018995682011334087,
      "loss": 0.4522,
      "step": 1530
    },
    {
      "epoch": 0.459483793517407,
      "grad_norm": 0.14554451406002045,
      "learning_rate": 0.00018993393038498941,
      "loss": 0.4931,
      "step": 1531
    },
    {
      "epoch": 0.4597839135654262,
      "grad_norm": 0.2117418497800827,
      "learning_rate": 0.0001899110159842954,
      "loss": 0.4882,
      "step": 1532
    },
    {
      "epoch": 0.46008403361344535,
      "grad_norm": 0.16560959815979004,
      "learning_rate": 0.0001898880769175451,
      "loss": 0.5318,
      "step": 1533
    },
    {
      "epoch": 0.46038415366146457,
      "grad_norm": 0.14025753736495972,
      "learning_rate": 0.0001898651131910316,
      "loss": 0.4266,
      "step": 1534
    },
    {
      "epoch": 0.4606842737094838,
      "grad_norm": 0.1386413872241974,
      "learning_rate": 0.00018984212481105476,
      "loss": 0.4467,
      "step": 1535
    },
    {
      "epoch": 0.460984393757503,
      "grad_norm": 0.15134479105472565,
      "learning_rate": 0.00018981911178392116,
      "loss": 0.4654,
      "step": 1536
    },
    {
      "epoch": 0.4612845138055222,
      "grad_norm": 0.14854450523853302,
      "learning_rate": 0.00018979607411594417,
      "loss": 0.5032,
      "step": 1537
    },
    {
      "epoch": 0.4615846338535414,
      "grad_norm": 0.1487223356962204,
      "learning_rate": 0.0001897730118134439,
      "loss": 0.4023,
      "step": 1538
    },
    {
      "epoch": 0.46188475390156064,
      "grad_norm": 0.1386006474494934,
      "learning_rate": 0.0001897499248827472,
      "loss": 0.4373,
      "step": 1539
    },
    {
      "epoch": 0.46218487394957986,
      "grad_norm": 0.14046189188957214,
      "learning_rate": 0.00018972681333018776,
      "loss": 0.421,
      "step": 1540
    },
    {
      "epoch": 0.462484993997599,
      "grad_norm": 0.1442844271659851,
      "learning_rate": 0.00018970367716210593,
      "loss": 0.4859,
      "step": 1541
    },
    {
      "epoch": 0.46278511404561823,
      "grad_norm": 0.2659657895565033,
      "learning_rate": 0.00018968051638484888,
      "loss": 0.4487,
      "step": 1542
    },
    {
      "epoch": 0.46308523409363744,
      "grad_norm": 0.14862355589866638,
      "learning_rate": 0.00018965733100477044,
      "loss": 0.489,
      "step": 1543
    },
    {
      "epoch": 0.46338535414165666,
      "grad_norm": 0.1391323059797287,
      "learning_rate": 0.00018963412102823138,
      "loss": 0.4772,
      "step": 1544
    },
    {
      "epoch": 0.4636854741896759,
      "grad_norm": 0.17410308122634888,
      "learning_rate": 0.00018961088646159897,
      "loss": 0.5178,
      "step": 1545
    },
    {
      "epoch": 0.4639855942376951,
      "grad_norm": 0.16075238585472107,
      "learning_rate": 0.0001895876273112474,
      "loss": 0.5296,
      "step": 1546
    },
    {
      "epoch": 0.4642857142857143,
      "grad_norm": 0.14295291900634766,
      "learning_rate": 0.00018956434358355755,
      "loss": 0.51,
      "step": 1547
    },
    {
      "epoch": 0.4645858343337335,
      "grad_norm": 0.14986905455589294,
      "learning_rate": 0.000189541035284917,
      "loss": 0.4853,
      "step": 1548
    },
    {
      "epoch": 0.4648859543817527,
      "grad_norm": 0.14428989589214325,
      "learning_rate": 0.00018951770242172018,
      "loss": 0.4914,
      "step": 1549
    },
    {
      "epoch": 0.4651860744297719,
      "grad_norm": 0.1400524526834488,
      "learning_rate": 0.00018949434500036816,
      "loss": 0.47,
      "step": 1550
    },
    {
      "epoch": 0.4654861944777911,
      "grad_norm": 0.1384679079055786,
      "learning_rate": 0.00018947096302726876,
      "loss": 0.47,
      "step": 1551
    },
    {
      "epoch": 0.4657863145258103,
      "grad_norm": 0.13552284240722656,
      "learning_rate": 0.0001894475565088366,
      "loss": 0.4475,
      "step": 1552
    },
    {
      "epoch": 0.46608643457382953,
      "grad_norm": 0.1343267560005188,
      "learning_rate": 0.00018942412545149297,
      "loss": 0.4762,
      "step": 1553
    },
    {
      "epoch": 0.46638655462184875,
      "grad_norm": 0.16411390900611877,
      "learning_rate": 0.00018940066986166592,
      "loss": 0.4922,
      "step": 1554
    },
    {
      "epoch": 0.46668667466986796,
      "grad_norm": 0.14296169579029083,
      "learning_rate": 0.0001893771897457902,
      "loss": 0.4421,
      "step": 1555
    },
    {
      "epoch": 0.4669867947178872,
      "grad_norm": 0.1436728686094284,
      "learning_rate": 0.00018935368511030734,
      "loss": 0.4694,
      "step": 1556
    },
    {
      "epoch": 0.46728691476590634,
      "grad_norm": 0.14206938445568085,
      "learning_rate": 0.00018933015596166554,
      "loss": 0.4481,
      "step": 1557
    },
    {
      "epoch": 0.46758703481392555,
      "grad_norm": 0.14524251222610474,
      "learning_rate": 0.00018930660230631976,
      "loss": 0.459,
      "step": 1558
    },
    {
      "epoch": 0.46788715486194477,
      "grad_norm": 0.1441972553730011,
      "learning_rate": 0.0001892830241507317,
      "loss": 0.4591,
      "step": 1559
    },
    {
      "epoch": 0.468187274909964,
      "grad_norm": 0.14738333225250244,
      "learning_rate": 0.0001892594215013697,
      "loss": 0.528,
      "step": 1560
    },
    {
      "epoch": 0.4684873949579832,
      "grad_norm": 0.15935885906219482,
      "learning_rate": 0.00018923579436470894,
      "loss": 0.5305,
      "step": 1561
    },
    {
      "epoch": 0.4687875150060024,
      "grad_norm": 0.13202980160713196,
      "learning_rate": 0.0001892121427472312,
      "loss": 0.4403,
      "step": 1562
    },
    {
      "epoch": 0.4690876350540216,
      "grad_norm": 0.13312020897865295,
      "learning_rate": 0.00018918846665542507,
      "loss": 0.3999,
      "step": 1563
    },
    {
      "epoch": 0.46938775510204084,
      "grad_norm": 0.14565500617027283,
      "learning_rate": 0.00018916476609578582,
      "loss": 0.486,
      "step": 1564
    },
    {
      "epoch": 0.46968787515006,
      "grad_norm": 0.16923834383487701,
      "learning_rate": 0.00018914104107481538,
      "loss": 0.4643,
      "step": 1565
    },
    {
      "epoch": 0.4699879951980792,
      "grad_norm": 0.13346311450004578,
      "learning_rate": 0.00018911729159902247,
      "loss": 0.429,
      "step": 1566
    },
    {
      "epoch": 0.4702881152460984,
      "grad_norm": 0.18116609752178192,
      "learning_rate": 0.0001890935176749225,
      "loss": 0.4765,
      "step": 1567
    },
    {
      "epoch": 0.47058823529411764,
      "grad_norm": 0.1305547058582306,
      "learning_rate": 0.0001890697193090375,
      "loss": 0.4561,
      "step": 1568
    },
    {
      "epoch": 0.47088835534213686,
      "grad_norm": 0.14567294716835022,
      "learning_rate": 0.00018904589650789642,
      "loss": 0.569,
      "step": 1569
    },
    {
      "epoch": 0.47118847539015607,
      "grad_norm": 0.14942717552185059,
      "learning_rate": 0.00018902204927803462,
      "loss": 0.4551,
      "step": 1570
    },
    {
      "epoch": 0.4714885954381753,
      "grad_norm": 0.14293356239795685,
      "learning_rate": 0.0001889981776259944,
      "loss": 0.4829,
      "step": 1571
    },
    {
      "epoch": 0.4717887154861945,
      "grad_norm": 0.17996667325496674,
      "learning_rate": 0.00018897428155832465,
      "loss": 0.4385,
      "step": 1572
    },
    {
      "epoch": 0.47208883553421366,
      "grad_norm": 0.14416664838790894,
      "learning_rate": 0.000188950361081581,
      "loss": 0.5011,
      "step": 1573
    },
    {
      "epoch": 0.4723889555822329,
      "grad_norm": 0.14845241606235504,
      "learning_rate": 0.00018892641620232574,
      "loss": 0.4433,
      "step": 1574
    },
    {
      "epoch": 0.4726890756302521,
      "grad_norm": 0.14318734407424927,
      "learning_rate": 0.00018890244692712786,
      "loss": 0.4827,
      "step": 1575
    },
    {
      "epoch": 0.4729891956782713,
      "grad_norm": 0.1348116546869278,
      "learning_rate": 0.00018887845326256308,
      "loss": 0.4357,
      "step": 1576
    },
    {
      "epoch": 0.4732893157262905,
      "grad_norm": 0.1515018492937088,
      "learning_rate": 0.00018885443521521377,
      "loss": 0.5059,
      "step": 1577
    },
    {
      "epoch": 0.47358943577430973,
      "grad_norm": 0.1488025188446045,
      "learning_rate": 0.00018883039279166903,
      "loss": 0.4918,
      "step": 1578
    },
    {
      "epoch": 0.47388955582232895,
      "grad_norm": 0.1464966982603073,
      "learning_rate": 0.0001888063259985246,
      "loss": 0.4734,
      "step": 1579
    },
    {
      "epoch": 0.47418967587034816,
      "grad_norm": 0.15424638986587524,
      "learning_rate": 0.00018878223484238295,
      "loss": 0.4756,
      "step": 1580
    },
    {
      "epoch": 0.4744897959183674,
      "grad_norm": 0.14891758561134338,
      "learning_rate": 0.00018875811932985318,
      "loss": 0.5217,
      "step": 1581
    },
    {
      "epoch": 0.47478991596638653,
      "grad_norm": 0.1575443595647812,
      "learning_rate": 0.00018873397946755116,
      "loss": 0.4884,
      "step": 1582
    },
    {
      "epoch": 0.47509003601440575,
      "grad_norm": 0.13640117645263672,
      "learning_rate": 0.00018870981526209932,
      "loss": 0.4691,
      "step": 1583
    },
    {
      "epoch": 0.47539015606242496,
      "grad_norm": 0.1447877287864685,
      "learning_rate": 0.00018868562672012687,
      "loss": 0.4475,
      "step": 1584
    },
    {
      "epoch": 0.4756902761104442,
      "grad_norm": 0.16914622485637665,
      "learning_rate": 0.0001886614138482697,
      "loss": 0.4971,
      "step": 1585
    },
    {
      "epoch": 0.4759903961584634,
      "grad_norm": 0.14456208050251007,
      "learning_rate": 0.0001886371766531702,
      "loss": 0.475,
      "step": 1586
    },
    {
      "epoch": 0.4762905162064826,
      "grad_norm": 0.13137194514274597,
      "learning_rate": 0.00018861291514147768,
      "loss": 0.4211,
      "step": 1587
    },
    {
      "epoch": 0.4765906362545018,
      "grad_norm": 0.1403452455997467,
      "learning_rate": 0.000188588629319848,
      "loss": 0.4933,
      "step": 1588
    },
    {
      "epoch": 0.47689075630252103,
      "grad_norm": 0.1483708769083023,
      "learning_rate": 0.00018856431919494365,
      "loss": 0.5357,
      "step": 1589
    },
    {
      "epoch": 0.4771908763505402,
      "grad_norm": 0.16883468627929688,
      "learning_rate": 0.00018853998477343385,
      "loss": 0.4721,
      "step": 1590
    },
    {
      "epoch": 0.4774909963985594,
      "grad_norm": 0.16150923073291779,
      "learning_rate": 0.0001885156260619945,
      "loss": 0.4541,
      "step": 1591
    },
    {
      "epoch": 0.4777911164465786,
      "grad_norm": 0.15299014747142792,
      "learning_rate": 0.0001884912430673081,
      "loss": 0.4627,
      "step": 1592
    },
    {
      "epoch": 0.47809123649459784,
      "grad_norm": 0.1427268385887146,
      "learning_rate": 0.0001884668357960639,
      "loss": 0.4164,
      "step": 1593
    },
    {
      "epoch": 0.47839135654261705,
      "grad_norm": 0.14231397211551666,
      "learning_rate": 0.00018844240425495767,
      "loss": 0.4666,
      "step": 1594
    },
    {
      "epoch": 0.47869147659063627,
      "grad_norm": 0.14517545700073242,
      "learning_rate": 0.00018841794845069195,
      "loss": 0.4795,
      "step": 1595
    },
    {
      "epoch": 0.4789915966386555,
      "grad_norm": 0.14669832587242126,
      "learning_rate": 0.00018839346838997594,
      "loss": 0.523,
      "step": 1596
    },
    {
      "epoch": 0.4792917166866747,
      "grad_norm": 0.14146114885807037,
      "learning_rate": 0.00018836896407952548,
      "loss": 0.4394,
      "step": 1597
    },
    {
      "epoch": 0.47959183673469385,
      "grad_norm": 0.4386819303035736,
      "learning_rate": 0.000188344435526063,
      "loss": 0.4967,
      "step": 1598
    },
    {
      "epoch": 0.47989195678271307,
      "grad_norm": 0.1534242480993271,
      "learning_rate": 0.00018831988273631763,
      "loss": 0.5025,
      "step": 1599
    },
    {
      "epoch": 0.4801920768307323,
      "grad_norm": 0.14127042889595032,
      "learning_rate": 0.00018829530571702515,
      "loss": 0.4793,
      "step": 1600
    },
    {
      "epoch": 0.4804921968787515,
      "grad_norm": 0.1541667878627777,
      "learning_rate": 0.00018827070447492803,
      "loss": 0.4118,
      "step": 1601
    },
    {
      "epoch": 0.4807923169267707,
      "grad_norm": 0.29758089780807495,
      "learning_rate": 0.00018824607901677526,
      "loss": 0.5182,
      "step": 1602
    },
    {
      "epoch": 0.4810924369747899,
      "grad_norm": 0.13616251945495605,
      "learning_rate": 0.00018822142934932261,
      "loss": 0.4765,
      "step": 1603
    },
    {
      "epoch": 0.48139255702280914,
      "grad_norm": 0.14783746004104614,
      "learning_rate": 0.00018819675547933243,
      "loss": 0.4987,
      "step": 1604
    },
    {
      "epoch": 0.48169267707082836,
      "grad_norm": 0.1417647898197174,
      "learning_rate": 0.0001881720574135737,
      "loss": 0.4763,
      "step": 1605
    },
    {
      "epoch": 0.4819927971188475,
      "grad_norm": 0.1706775277853012,
      "learning_rate": 0.000188147335158822,
      "loss": 0.4747,
      "step": 1606
    },
    {
      "epoch": 0.48229291716686673,
      "grad_norm": 0.13298912346363068,
      "learning_rate": 0.0001881225887218597,
      "loss": 0.4387,
      "step": 1607
    },
    {
      "epoch": 0.48259303721488594,
      "grad_norm": 0.13805459439754486,
      "learning_rate": 0.00018809781810947564,
      "loss": 0.5038,
      "step": 1608
    },
    {
      "epoch": 0.48289315726290516,
      "grad_norm": 0.1466389149427414,
      "learning_rate": 0.00018807302332846538,
      "loss": 0.3996,
      "step": 1609
    },
    {
      "epoch": 0.4831932773109244,
      "grad_norm": 0.14047665894031525,
      "learning_rate": 0.000188048204385631,
      "loss": 0.4513,
      "step": 1610
    },
    {
      "epoch": 0.4834933973589436,
      "grad_norm": 0.1531951129436493,
      "learning_rate": 0.00018802336128778143,
      "loss": 0.5707,
      "step": 1611
    },
    {
      "epoch": 0.4837935174069628,
      "grad_norm": 0.19110116362571716,
      "learning_rate": 0.000187998494041732,
      "loss": 0.5178,
      "step": 1612
    },
    {
      "epoch": 0.484093637454982,
      "grad_norm": 0.13959315419197083,
      "learning_rate": 0.00018797360265430474,
      "loss": 0.4237,
      "step": 1613
    },
    {
      "epoch": 0.4843937575030012,
      "grad_norm": 0.15554258227348328,
      "learning_rate": 0.0001879486871323284,
      "loss": 0.5351,
      "step": 1614
    },
    {
      "epoch": 0.4846938775510204,
      "grad_norm": 0.1472192108631134,
      "learning_rate": 0.00018792374748263817,
      "loss": 0.4626,
      "step": 1615
    },
    {
      "epoch": 0.4849939975990396,
      "grad_norm": 0.14186328649520874,
      "learning_rate": 0.00018789878371207604,
      "loss": 0.4982,
      "step": 1616
    },
    {
      "epoch": 0.4852941176470588,
      "grad_norm": 0.14574813842773438,
      "learning_rate": 0.00018787379582749046,
      "loss": 0.4851,
      "step": 1617
    },
    {
      "epoch": 0.48559423769507803,
      "grad_norm": 0.15164965391159058,
      "learning_rate": 0.00018784878383573664,
      "loss": 0.4579,
      "step": 1618
    },
    {
      "epoch": 0.48589435774309725,
      "grad_norm": 0.13515906035900116,
      "learning_rate": 0.00018782374774367627,
      "loss": 0.4339,
      "step": 1619
    },
    {
      "epoch": 0.48619447779111646,
      "grad_norm": 0.15253469347953796,
      "learning_rate": 0.00018779868755817777,
      "loss": 0.5466,
      "step": 1620
    },
    {
      "epoch": 0.4864945978391357,
      "grad_norm": 0.1400163322687149,
      "learning_rate": 0.00018777360328611607,
      "loss": 0.4704,
      "step": 1621
    },
    {
      "epoch": 0.48679471788715484,
      "grad_norm": 0.13967138528823853,
      "learning_rate": 0.0001877484949343728,
      "loss": 0.4404,
      "step": 1622
    },
    {
      "epoch": 0.48709483793517405,
      "grad_norm": 0.14153768122196198,
      "learning_rate": 0.00018772336250983608,
      "loss": 0.489,
      "step": 1623
    },
    {
      "epoch": 0.48739495798319327,
      "grad_norm": 0.14577268064022064,
      "learning_rate": 0.0001876982060194008,
      "loss": 0.4514,
      "step": 1624
    },
    {
      "epoch": 0.4876950780312125,
      "grad_norm": 0.44660818576812744,
      "learning_rate": 0.00018767302546996825,
      "loss": 0.4758,
      "step": 1625
    },
    {
      "epoch": 0.4879951980792317,
      "grad_norm": 0.14724412560462952,
      "learning_rate": 0.00018764782086844647,
      "loss": 0.4163,
      "step": 1626
    },
    {
      "epoch": 0.4882953181272509,
      "grad_norm": 0.1457844376564026,
      "learning_rate": 0.0001876225922217501,
      "loss": 0.4749,
      "step": 1627
    },
    {
      "epoch": 0.4885954381752701,
      "grad_norm": 0.14120006561279297,
      "learning_rate": 0.00018759733953680025,
      "loss": 0.5085,
      "step": 1628
    },
    {
      "epoch": 0.48889555822328934,
      "grad_norm": 0.13881921768188477,
      "learning_rate": 0.00018757206282052474,
      "loss": 0.4957,
      "step": 1629
    },
    {
      "epoch": 0.4891956782713085,
      "grad_norm": 0.19758376479148865,
      "learning_rate": 0.00018754676207985798,
      "loss": 0.4463,
      "step": 1630
    },
    {
      "epoch": 0.4894957983193277,
      "grad_norm": 0.1473570466041565,
      "learning_rate": 0.00018752143732174087,
      "loss": 0.5127,
      "step": 1631
    },
    {
      "epoch": 0.4897959183673469,
      "grad_norm": 0.13440637290477753,
      "learning_rate": 0.000187496088553121,
      "loss": 0.4459,
      "step": 1632
    },
    {
      "epoch": 0.49009603841536614,
      "grad_norm": 0.14222657680511475,
      "learning_rate": 0.00018747071578095254,
      "loss": 0.4833,
      "step": 1633
    },
    {
      "epoch": 0.49039615846338536,
      "grad_norm": 0.1522848904132843,
      "learning_rate": 0.00018744531901219617,
      "loss": 0.5005,
      "step": 1634
    },
    {
      "epoch": 0.49069627851140457,
      "grad_norm": 0.1476879119873047,
      "learning_rate": 0.00018741989825381928,
      "loss": 0.4714,
      "step": 1635
    },
    {
      "epoch": 0.4909963985594238,
      "grad_norm": 0.1372116506099701,
      "learning_rate": 0.00018739445351279566,
      "loss": 0.4604,
      "step": 1636
    },
    {
      "epoch": 0.491296518607443,
      "grad_norm": 0.1398506462574005,
      "learning_rate": 0.00018736898479610584,
      "loss": 0.4741,
      "step": 1637
    },
    {
      "epoch": 0.49159663865546216,
      "grad_norm": 0.19208797812461853,
      "learning_rate": 0.0001873434921107369,
      "loss": 0.4623,
      "step": 1638
    },
    {
      "epoch": 0.4918967587034814,
      "grad_norm": 0.16721111536026,
      "learning_rate": 0.00018731797546368243,
      "loss": 0.4741,
      "step": 1639
    },
    {
      "epoch": 0.4921968787515006,
      "grad_norm": 0.1624535322189331,
      "learning_rate": 0.00018729243486194258,
      "loss": 0.5396,
      "step": 1640
    },
    {
      "epoch": 0.4924969987995198,
      "grad_norm": 0.16763810813426971,
      "learning_rate": 0.0001872668703125242,
      "loss": 0.4297,
      "step": 1641
    },
    {
      "epoch": 0.492797118847539,
      "grad_norm": 0.13680794835090637,
      "learning_rate": 0.00018724128182244062,
      "loss": 0.4482,
      "step": 1642
    },
    {
      "epoch": 0.49309723889555823,
      "grad_norm": 0.14087484776973724,
      "learning_rate": 0.00018721566939871172,
      "loss": 0.4711,
      "step": 1643
    },
    {
      "epoch": 0.49339735894357745,
      "grad_norm": 0.14618854224681854,
      "learning_rate": 0.000187190033048364,
      "loss": 0.4466,
      "step": 1644
    },
    {
      "epoch": 0.49369747899159666,
      "grad_norm": 0.13818864524364471,
      "learning_rate": 0.00018716437277843046,
      "loss": 0.4859,
      "step": 1645
    },
    {
      "epoch": 0.4939975990396158,
      "grad_norm": 0.14081290364265442,
      "learning_rate": 0.00018713868859595074,
      "loss": 0.4132,
      "step": 1646
    },
    {
      "epoch": 0.49429771908763503,
      "grad_norm": 0.16399510204792023,
      "learning_rate": 0.00018711298050797098,
      "loss": 0.4874,
      "step": 1647
    },
    {
      "epoch": 0.49459783913565425,
      "grad_norm": 0.14325211942195892,
      "learning_rate": 0.0001870872485215439,
      "loss": 0.4684,
      "step": 1648
    },
    {
      "epoch": 0.49489795918367346,
      "grad_norm": 0.19651995599269867,
      "learning_rate": 0.0001870614926437288,
      "loss": 0.4346,
      "step": 1649
    },
    {
      "epoch": 0.4951980792316927,
      "grad_norm": 0.15420101583003998,
      "learning_rate": 0.0001870357128815915,
      "loss": 0.4813,
      "step": 1650
    },
    {
      "epoch": 0.4954981992797119,
      "grad_norm": 0.13209006190299988,
      "learning_rate": 0.0001870099092422043,
      "loss": 0.411,
      "step": 1651
    },
    {
      "epoch": 0.4957983193277311,
      "grad_norm": 0.37966907024383545,
      "learning_rate": 0.00018698408173264627,
      "loss": 0.4548,
      "step": 1652
    },
    {
      "epoch": 0.4960984393757503,
      "grad_norm": 0.1550043672323227,
      "learning_rate": 0.0001869582303600028,
      "loss": 0.4663,
      "step": 1653
    },
    {
      "epoch": 0.4963985594237695,
      "grad_norm": 0.14909137785434723,
      "learning_rate": 0.00018693235513136597,
      "loss": 0.4915,
      "step": 1654
    },
    {
      "epoch": 0.4966986794717887,
      "grad_norm": 0.12357178330421448,
      "learning_rate": 0.00018690645605383432,
      "loss": 0.4396,
      "step": 1655
    },
    {
      "epoch": 0.4969987995198079,
      "grad_norm": 0.2299381047487259,
      "learning_rate": 0.00018688053313451296,
      "loss": 0.4607,
      "step": 1656
    },
    {
      "epoch": 0.4972989195678271,
      "grad_norm": 0.1735653132200241,
      "learning_rate": 0.00018685458638051361,
      "loss": 0.4232,
      "step": 1657
    },
    {
      "epoch": 0.49759903961584634,
      "grad_norm": 0.14225593209266663,
      "learning_rate": 0.00018682861579895436,
      "loss": 0.4265,
      "step": 1658
    },
    {
      "epoch": 0.49789915966386555,
      "grad_norm": 0.15407206118106842,
      "learning_rate": 0.00018680262139695997,
      "loss": 0.4791,
      "step": 1659
    },
    {
      "epoch": 0.49819927971188477,
      "grad_norm": 0.13994479179382324,
      "learning_rate": 0.00018677660318166178,
      "loss": 0.4441,
      "step": 1660
    },
    {
      "epoch": 0.498499399759904,
      "grad_norm": 0.15098072588443756,
      "learning_rate": 0.00018675056116019753,
      "loss": 0.4234,
      "step": 1661
    },
    {
      "epoch": 0.49879951980792314,
      "grad_norm": 0.17504270374774933,
      "learning_rate": 0.00018672449533971156,
      "loss": 0.487,
      "step": 1662
    },
    {
      "epoch": 0.49909963985594236,
      "grad_norm": 0.14800134301185608,
      "learning_rate": 0.00018669840572735472,
      "loss": 0.4558,
      "step": 1663
    },
    {
      "epoch": 0.49939975990396157,
      "grad_norm": 0.13439930975437164,
      "learning_rate": 0.0001866722923302844,
      "loss": 0.4529,
      "step": 1664
    },
    {
      "epoch": 0.4996998799519808,
      "grad_norm": 0.13524958491325378,
      "learning_rate": 0.0001866461551556645,
      "loss": 0.423,
      "step": 1665
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.1444150060415268,
      "learning_rate": 0.0001866199942106655,
      "loss": 0.4748,
      "step": 1666
    },
    {
      "epoch": 0.5003001200480192,
      "grad_norm": 0.14408966898918152,
      "learning_rate": 0.00018659380950246434,
      "loss": 0.4791,
      "step": 1667
    },
    {
      "epoch": 0.5006002400960384,
      "grad_norm": 0.18511995673179626,
      "learning_rate": 0.0001865676010382444,
      "loss": 0.4899,
      "step": 1668
    },
    {
      "epoch": 0.5009003601440576,
      "grad_norm": 0.14206524193286896,
      "learning_rate": 0.00018654136882519578,
      "loss": 0.4371,
      "step": 1669
    },
    {
      "epoch": 0.5012004801920769,
      "grad_norm": 0.1453809291124344,
      "learning_rate": 0.000186515112870515,
      "loss": 0.4839,
      "step": 1670
    },
    {
      "epoch": 0.501500600240096,
      "grad_norm": 0.13603635132312775,
      "learning_rate": 0.000186488833181405,
      "loss": 0.4679,
      "step": 1671
    },
    {
      "epoch": 0.5018007202881153,
      "grad_norm": 0.2923451066017151,
      "learning_rate": 0.00018646252976507537,
      "loss": 0.5172,
      "step": 1672
    },
    {
      "epoch": 0.5021008403361344,
      "grad_norm": 0.14126873016357422,
      "learning_rate": 0.0001864362026287421,
      "loss": 0.4173,
      "step": 1673
    },
    {
      "epoch": 0.5024009603841537,
      "grad_norm": 0.16036510467529297,
      "learning_rate": 0.00018640985177962783,
      "loss": 0.4706,
      "step": 1674
    },
    {
      "epoch": 0.5027010804321729,
      "grad_norm": 0.18069399893283844,
      "learning_rate": 0.0001863834772249615,
      "loss": 0.5106,
      "step": 1675
    },
    {
      "epoch": 0.503001200480192,
      "grad_norm": 0.17470668256282806,
      "learning_rate": 0.00018635707897197873,
      "loss": 0.4521,
      "step": 1676
    },
    {
      "epoch": 0.5033013205282113,
      "grad_norm": 0.14301811158657074,
      "learning_rate": 0.00018633065702792153,
      "loss": 0.4877,
      "step": 1677
    },
    {
      "epoch": 0.5036014405762305,
      "grad_norm": 0.14429409801959991,
      "learning_rate": 0.00018630421140003854,
      "loss": 0.5012,
      "step": 1678
    },
    {
      "epoch": 0.5039015606242497,
      "grad_norm": 0.1368817389011383,
      "learning_rate": 0.0001862777420955847,
      "loss": 0.439,
      "step": 1679
    },
    {
      "epoch": 0.5042016806722689,
      "grad_norm": 0.1479075849056244,
      "learning_rate": 0.0001862512491218217,
      "loss": 0.4278,
      "step": 1680
    },
    {
      "epoch": 0.5045018007202882,
      "grad_norm": 0.13655149936676025,
      "learning_rate": 0.00018622473248601748,
      "loss": 0.4818,
      "step": 1681
    },
    {
      "epoch": 0.5048019207683073,
      "grad_norm": 0.7065068483352661,
      "learning_rate": 0.00018619819219544662,
      "loss": 0.492,
      "step": 1682
    },
    {
      "epoch": 0.5051020408163265,
      "grad_norm": 0.1435791552066803,
      "learning_rate": 0.00018617162825739013,
      "loss": 0.4943,
      "step": 1683
    },
    {
      "epoch": 0.5054021608643458,
      "grad_norm": 0.12881679832935333,
      "learning_rate": 0.0001861450406791355,
      "loss": 0.4289,
      "step": 1684
    },
    {
      "epoch": 0.5057022809123649,
      "grad_norm": 0.15346279740333557,
      "learning_rate": 0.00018611842946797676,
      "loss": 0.4951,
      "step": 1685
    },
    {
      "epoch": 0.5060024009603842,
      "grad_norm": 0.1458282768726349,
      "learning_rate": 0.0001860917946312144,
      "loss": 0.4993,
      "step": 1686
    },
    {
      "epoch": 0.5063025210084033,
      "grad_norm": 0.17145638167858124,
      "learning_rate": 0.00018606513617615533,
      "loss": 0.4638,
      "step": 1687
    },
    {
      "epoch": 0.5066026410564226,
      "grad_norm": 0.1460111290216446,
      "learning_rate": 0.00018603845411011303,
      "loss": 0.4975,
      "step": 1688
    },
    {
      "epoch": 0.5069027611044418,
      "grad_norm": 0.13614998757839203,
      "learning_rate": 0.00018601174844040742,
      "loss": 0.3831,
      "step": 1689
    },
    {
      "epoch": 0.507202881152461,
      "grad_norm": 0.13792163133621216,
      "learning_rate": 0.00018598501917436487,
      "loss": 0.4625,
      "step": 1690
    },
    {
      "epoch": 0.5075030012004802,
      "grad_norm": 0.19221265614032745,
      "learning_rate": 0.0001859582663193183,
      "loss": 0.4357,
      "step": 1691
    },
    {
      "epoch": 0.5078031212484994,
      "grad_norm": 0.1561165750026703,
      "learning_rate": 0.000185931489882607,
      "loss": 0.4401,
      "step": 1692
    },
    {
      "epoch": 0.5081032412965186,
      "grad_norm": 0.19886402785778046,
      "learning_rate": 0.00018590468987157678,
      "loss": 0.4883,
      "step": 1693
    },
    {
      "epoch": 0.5084033613445378,
      "grad_norm": 0.14886119961738586,
      "learning_rate": 0.00018587786629357993,
      "loss": 0.468,
      "step": 1694
    },
    {
      "epoch": 0.508703481392557,
      "grad_norm": 0.13450387120246887,
      "learning_rate": 0.00018585101915597518,
      "loss": 0.4569,
      "step": 1695
    },
    {
      "epoch": 0.5090036014405762,
      "grad_norm": 0.14112287759780884,
      "learning_rate": 0.00018582414846612775,
      "loss": 0.467,
      "step": 1696
    },
    {
      "epoch": 0.5093037214885955,
      "grad_norm": 0.15156422555446625,
      "learning_rate": 0.0001857972542314093,
      "loss": 0.4965,
      "step": 1697
    },
    {
      "epoch": 0.5096038415366146,
      "grad_norm": 0.13746501505374908,
      "learning_rate": 0.00018577033645919794,
      "loss": 0.4749,
      "step": 1698
    },
    {
      "epoch": 0.5099039615846338,
      "grad_norm": 0.1336117684841156,
      "learning_rate": 0.0001857433951568783,
      "loss": 0.46,
      "step": 1699
    },
    {
      "epoch": 0.5102040816326531,
      "grad_norm": 0.1843332201242447,
      "learning_rate": 0.00018571643033184136,
      "loss": 0.4509,
      "step": 1700
    },
    {
      "epoch": 0.5105042016806722,
      "grad_norm": 0.148481085896492,
      "learning_rate": 0.00018568944199148462,
      "loss": 0.4617,
      "step": 1701
    },
    {
      "epoch": 0.5108043217286915,
      "grad_norm": 0.13783060014247894,
      "learning_rate": 0.00018566243014321205,
      "loss": 0.4891,
      "step": 1702
    },
    {
      "epoch": 0.5111044417767107,
      "grad_norm": 0.1354529857635498,
      "learning_rate": 0.00018563539479443404,
      "loss": 0.4588,
      "step": 1703
    },
    {
      "epoch": 0.5114045618247299,
      "grad_norm": 0.16476033627986908,
      "learning_rate": 0.0001856083359525674,
      "loss": 0.5441,
      "step": 1704
    },
    {
      "epoch": 0.5117046818727491,
      "grad_norm": 0.18820038437843323,
      "learning_rate": 0.00018558125362503543,
      "loss": 0.4573,
      "step": 1705
    },
    {
      "epoch": 0.5120048019207684,
      "grad_norm": 0.14193595945835114,
      "learning_rate": 0.00018555414781926786,
      "loss": 0.4989,
      "step": 1706
    },
    {
      "epoch": 0.5123049219687875,
      "grad_norm": 0.13493549823760986,
      "learning_rate": 0.00018552701854270082,
      "loss": 0.4456,
      "step": 1707
    },
    {
      "epoch": 0.5126050420168067,
      "grad_norm": 0.138336643576622,
      "learning_rate": 0.000185499865802777,
      "loss": 0.4975,
      "step": 1708
    },
    {
      "epoch": 0.512905162064826,
      "grad_norm": 0.1490534096956253,
      "learning_rate": 0.00018547268960694533,
      "loss": 0.469,
      "step": 1709
    },
    {
      "epoch": 0.5132052821128451,
      "grad_norm": 0.18119500577449799,
      "learning_rate": 0.00018544548996266138,
      "loss": 0.4968,
      "step": 1710
    },
    {
      "epoch": 0.5135054021608644,
      "grad_norm": 0.14145724475383759,
      "learning_rate": 0.000185418266877387,
      "loss": 0.4767,
      "step": 1711
    },
    {
      "epoch": 0.5138055222088835,
      "grad_norm": 0.151786208152771,
      "learning_rate": 0.00018539102035859057,
      "loss": 0.5035,
      "step": 1712
    },
    {
      "epoch": 0.5141056422569028,
      "grad_norm": 0.33611804246902466,
      "learning_rate": 0.00018536375041374684,
      "loss": 0.4856,
      "step": 1713
    },
    {
      "epoch": 0.514405762304922,
      "grad_norm": 0.1260775625705719,
      "learning_rate": 0.00018533645705033703,
      "loss": 0.3688,
      "step": 1714
    },
    {
      "epoch": 0.5147058823529411,
      "grad_norm": 0.1975342482328415,
      "learning_rate": 0.00018530914027584875,
      "loss": 0.5317,
      "step": 1715
    },
    {
      "epoch": 0.5150060024009604,
      "grad_norm": 0.14410541951656342,
      "learning_rate": 0.00018528180009777601,
      "loss": 0.4889,
      "step": 1716
    },
    {
      "epoch": 0.5153061224489796,
      "grad_norm": 0.15689077973365784,
      "learning_rate": 0.00018525443652361935,
      "loss": 0.5142,
      "step": 1717
    },
    {
      "epoch": 0.5156062424969988,
      "grad_norm": 0.1498923897743225,
      "learning_rate": 0.00018522704956088558,
      "loss": 0.5111,
      "step": 1718
    },
    {
      "epoch": 0.515906362545018,
      "grad_norm": 0.13396055996418,
      "learning_rate": 0.00018519963921708805,
      "loss": 0.4741,
      "step": 1719
    },
    {
      "epoch": 0.5162064825930373,
      "grad_norm": 0.13936327397823334,
      "learning_rate": 0.00018517220549974642,
      "loss": 0.4694,
      "step": 1720
    },
    {
      "epoch": 0.5165066026410564,
      "grad_norm": 0.16161802411079407,
      "learning_rate": 0.00018514474841638685,
      "loss": 0.4651,
      "step": 1721
    },
    {
      "epoch": 0.5168067226890757,
      "grad_norm": 0.1398150771856308,
      "learning_rate": 0.00018511726797454189,
      "loss": 0.4367,
      "step": 1722
    },
    {
      "epoch": 0.5171068427370948,
      "grad_norm": 0.1277453601360321,
      "learning_rate": 0.00018508976418175045,
      "loss": 0.4314,
      "step": 1723
    },
    {
      "epoch": 0.517406962785114,
      "grad_norm": 0.14832301437854767,
      "learning_rate": 0.0001850622370455579,
      "loss": 0.5337,
      "step": 1724
    },
    {
      "epoch": 0.5177070828331333,
      "grad_norm": 0.1403554230928421,
      "learning_rate": 0.000185034686573516,
      "loss": 0.4656,
      "step": 1725
    },
    {
      "epoch": 0.5180072028811524,
      "grad_norm": 0.14238956570625305,
      "learning_rate": 0.00018500711277318288,
      "loss": 0.4612,
      "step": 1726
    },
    {
      "epoch": 0.5183073229291717,
      "grad_norm": 0.13832856714725494,
      "learning_rate": 0.0001849795156521231,
      "loss": 0.4691,
      "step": 1727
    },
    {
      "epoch": 0.5186074429771909,
      "grad_norm": 0.17772655189037323,
      "learning_rate": 0.00018495189521790766,
      "loss": 0.4763,
      "step": 1728
    },
    {
      "epoch": 0.5189075630252101,
      "grad_norm": 0.1370764821767807,
      "learning_rate": 0.00018492425147811385,
      "loss": 0.4635,
      "step": 1729
    },
    {
      "epoch": 0.5192076830732293,
      "grad_norm": 0.15430189669132233,
      "learning_rate": 0.00018489658444032544,
      "loss": 0.5373,
      "step": 1730
    },
    {
      "epoch": 0.5195078031212484,
      "grad_norm": 0.1377253532409668,
      "learning_rate": 0.0001848688941121326,
      "loss": 0.4291,
      "step": 1731
    },
    {
      "epoch": 0.5198079231692677,
      "grad_norm": 0.1475270688533783,
      "learning_rate": 0.00018484118050113177,
      "loss": 0.4561,
      "step": 1732
    },
    {
      "epoch": 0.5201080432172869,
      "grad_norm": 0.17185480892658234,
      "learning_rate": 0.00018481344361492592,
      "loss": 0.5085,
      "step": 1733
    },
    {
      "epoch": 0.5204081632653061,
      "grad_norm": 0.14446613192558289,
      "learning_rate": 0.00018478568346112434,
      "loss": 0.4896,
      "step": 1734
    },
    {
      "epoch": 0.5207082833133253,
      "grad_norm": 0.1468825787305832,
      "learning_rate": 0.0001847579000473427,
      "loss": 0.4509,
      "step": 1735
    },
    {
      "epoch": 0.5210084033613446,
      "grad_norm": 0.12412779033184052,
      "learning_rate": 0.00018473009338120308,
      "loss": 0.4299,
      "step": 1736
    },
    {
      "epoch": 0.5213085234093637,
      "grad_norm": 0.19421713054180145,
      "learning_rate": 0.00018470226347033387,
      "loss": 0.5167,
      "step": 1737
    },
    {
      "epoch": 0.521608643457383,
      "grad_norm": 0.2185223549604416,
      "learning_rate": 0.00018467441032236995,
      "loss": 0.4709,
      "step": 1738
    },
    {
      "epoch": 0.5219087635054022,
      "grad_norm": 0.17113527655601501,
      "learning_rate": 0.0001846465339449525,
      "loss": 0.489,
      "step": 1739
    },
    {
      "epoch": 0.5222088835534213,
      "grad_norm": 0.1450916826725006,
      "learning_rate": 0.00018461863434572905,
      "loss": 0.5109,
      "step": 1740
    },
    {
      "epoch": 0.5225090036014406,
      "grad_norm": 0.13966700434684753,
      "learning_rate": 0.00018459071153235356,
      "loss": 0.4244,
      "step": 1741
    },
    {
      "epoch": 0.5228091236494598,
      "grad_norm": 0.26031041145324707,
      "learning_rate": 0.0001845627655124863,
      "loss": 0.5413,
      "step": 1742
    },
    {
      "epoch": 0.523109243697479,
      "grad_norm": 0.14094829559326172,
      "learning_rate": 0.000184534796293794,
      "loss": 0.4504,
      "step": 1743
    },
    {
      "epoch": 0.5234093637454982,
      "grad_norm": 0.29313942790031433,
      "learning_rate": 0.00018450680388394967,
      "loss": 0.428,
      "step": 1744
    },
    {
      "epoch": 0.5237094837935174,
      "grad_norm": 0.135672926902771,
      "learning_rate": 0.00018447878829063268,
      "loss": 0.4074,
      "step": 1745
    },
    {
      "epoch": 0.5240096038415366,
      "grad_norm": 0.1842111051082611,
      "learning_rate": 0.0001844507495215288,
      "loss": 0.4195,
      "step": 1746
    },
    {
      "epoch": 0.5243097238895558,
      "grad_norm": 0.2752435505390167,
      "learning_rate": 0.0001844226875843302,
      "loss": 0.4208,
      "step": 1747
    },
    {
      "epoch": 0.524609843937575,
      "grad_norm": 0.15792347490787506,
      "learning_rate": 0.00018439460248673522,
      "loss": 0.4752,
      "step": 1748
    },
    {
      "epoch": 0.5249099639855942,
      "grad_norm": 0.732960045337677,
      "learning_rate": 0.00018436649423644882,
      "loss": 0.4421,
      "step": 1749
    },
    {
      "epoch": 0.5252100840336135,
      "grad_norm": 0.23590520024299622,
      "learning_rate": 0.0001843383628411821,
      "loss": 0.4635,
      "step": 1750
    },
    {
      "epoch": 0.5255102040816326,
      "grad_norm": 0.23066602647304535,
      "learning_rate": 0.0001843102083086526,
      "loss": 0.4581,
      "step": 1751
    },
    {
      "epoch": 0.5258103241296519,
      "grad_norm": 0.20254339277744293,
      "learning_rate": 0.00018428203064658422,
      "loss": 0.497,
      "step": 1752
    },
    {
      "epoch": 0.526110444177671,
      "grad_norm": 0.24237699806690216,
      "learning_rate": 0.00018425382986270717,
      "loss": 0.4573,
      "step": 1753
    },
    {
      "epoch": 0.5264105642256903,
      "grad_norm": 0.16338394582271576,
      "learning_rate": 0.000184225605964758,
      "loss": 0.4818,
      "step": 1754
    },
    {
      "epoch": 0.5267106842737095,
      "grad_norm": 0.2194354236125946,
      "learning_rate": 0.0001841973589604796,
      "loss": 0.5307,
      "step": 1755
    },
    {
      "epoch": 0.5270108043217286,
      "grad_norm": 0.15968391299247742,
      "learning_rate": 0.00018416908885762122,
      "loss": 0.4751,
      "step": 1756
    },
    {
      "epoch": 0.5273109243697479,
      "grad_norm": 0.3560429513454437,
      "learning_rate": 0.00018414079566393844,
      "loss": 0.5122,
      "step": 1757
    },
    {
      "epoch": 0.5276110444177671,
      "grad_norm": 0.2543269097805023,
      "learning_rate": 0.0001841124793871932,
      "loss": 0.462,
      "step": 1758
    },
    {
      "epoch": 0.5279111644657863,
      "grad_norm": 0.2932210862636566,
      "learning_rate": 0.00018408414003515371,
      "loss": 0.452,
      "step": 1759
    },
    {
      "epoch": 0.5282112845138055,
      "grad_norm": 0.1889122724533081,
      "learning_rate": 0.00018405577761559453,
      "loss": 0.4586,
      "step": 1760
    },
    {
      "epoch": 0.5285114045618248,
      "grad_norm": 0.16669833660125732,
      "learning_rate": 0.00018402739213629665,
      "loss": 0.4901,
      "step": 1761
    },
    {
      "epoch": 0.5288115246098439,
      "grad_norm": 0.15147271752357483,
      "learning_rate": 0.0001839989836050472,
      "loss": 0.4456,
      "step": 1762
    },
    {
      "epoch": 0.5291116446578632,
      "grad_norm": 0.24772000312805176,
      "learning_rate": 0.00018397055202963982,
      "loss": 0.4918,
      "step": 1763
    },
    {
      "epoch": 0.5294117647058824,
      "grad_norm": 0.19363531470298767,
      "learning_rate": 0.0001839420974178743,
      "loss": 0.4948,
      "step": 1764
    },
    {
      "epoch": 0.5297118847539015,
      "grad_norm": 0.17379561066627502,
      "learning_rate": 0.00018391361977755693,
      "loss": 0.4648,
      "step": 1765
    },
    {
      "epoch": 0.5300120048019208,
      "grad_norm": 0.38105544447898865,
      "learning_rate": 0.00018388511911650014,
      "loss": 0.5332,
      "step": 1766
    },
    {
      "epoch": 0.53031212484994,
      "grad_norm": 0.20324936509132385,
      "learning_rate": 0.00018385659544252283,
      "loss": 0.4776,
      "step": 1767
    },
    {
      "epoch": 0.5306122448979592,
      "grad_norm": 0.22118645906448364,
      "learning_rate": 0.00018382804876345007,
      "loss": 0.5479,
      "step": 1768
    },
    {
      "epoch": 0.5309123649459784,
      "grad_norm": 0.15739142894744873,
      "learning_rate": 0.00018379947908711336,
      "loss": 0.4636,
      "step": 1769
    },
    {
      "epoch": 0.5312124849939976,
      "grad_norm": 0.16198864579200745,
      "learning_rate": 0.0001837708864213505,
      "loss": 0.465,
      "step": 1770
    },
    {
      "epoch": 0.5315126050420168,
      "grad_norm": 0.1969040334224701,
      "learning_rate": 0.0001837422707740055,
      "loss": 0.4949,
      "step": 1771
    },
    {
      "epoch": 0.531812725090036,
      "grad_norm": 0.19768664240837097,
      "learning_rate": 0.00018371363215292873,
      "loss": 0.4686,
      "step": 1772
    },
    {
      "epoch": 0.5321128451380552,
      "grad_norm": 0.1620958149433136,
      "learning_rate": 0.00018368497056597688,
      "loss": 0.4895,
      "step": 1773
    },
    {
      "epoch": 0.5324129651860744,
      "grad_norm": 0.19767068326473236,
      "learning_rate": 0.00018365628602101295,
      "loss": 0.5202,
      "step": 1774
    },
    {
      "epoch": 0.5327130852340937,
      "grad_norm": 0.15351001918315887,
      "learning_rate": 0.0001836275785259062,
      "loss": 0.4893,
      "step": 1775
    },
    {
      "epoch": 0.5330132052821128,
      "grad_norm": 0.15768110752105713,
      "learning_rate": 0.00018359884808853222,
      "loss": 0.4987,
      "step": 1776
    },
    {
      "epoch": 0.5333133253301321,
      "grad_norm": 0.18536004424095154,
      "learning_rate": 0.00018357009471677284,
      "loss": 0.4503,
      "step": 1777
    },
    {
      "epoch": 0.5336134453781513,
      "grad_norm": 0.1558152437210083,
      "learning_rate": 0.00018354131841851623,
      "loss": 0.4222,
      "step": 1778
    },
    {
      "epoch": 0.5339135654261705,
      "grad_norm": 0.18492354452610016,
      "learning_rate": 0.00018351251920165686,
      "loss": 0.4214,
      "step": 1779
    },
    {
      "epoch": 0.5342136854741897,
      "grad_norm": 0.21126689016819,
      "learning_rate": 0.00018348369707409546,
      "loss": 0.4899,
      "step": 1780
    },
    {
      "epoch": 0.5345138055222088,
      "grad_norm": 0.30342015624046326,
      "learning_rate": 0.00018345485204373905,
      "loss": 0.5045,
      "step": 1781
    },
    {
      "epoch": 0.5348139255702281,
      "grad_norm": 0.20529039204120636,
      "learning_rate": 0.00018342598411850088,
      "loss": 0.5035,
      "step": 1782
    },
    {
      "epoch": 0.5351140456182473,
      "grad_norm": 0.16132153570652008,
      "learning_rate": 0.0001833970933063006,
      "loss": 0.4648,
      "step": 1783
    },
    {
      "epoch": 0.5354141656662665,
      "grad_norm": 0.17530177533626556,
      "learning_rate": 0.00018336817961506408,
      "loss": 0.5205,
      "step": 1784
    },
    {
      "epoch": 0.5357142857142857,
      "grad_norm": 0.3235676884651184,
      "learning_rate": 0.0001833392430527234,
      "loss": 0.5159,
      "step": 1785
    },
    {
      "epoch": 0.536014405762305,
      "grad_norm": 0.228851780295372,
      "learning_rate": 0.00018331028362721701,
      "loss": 0.5307,
      "step": 1786
    },
    {
      "epoch": 0.5363145258103241,
      "grad_norm": 0.16981162130832672,
      "learning_rate": 0.0001832813013464896,
      "loss": 0.5155,
      "step": 1787
    },
    {
      "epoch": 0.5366146458583433,
      "grad_norm": 0.15694698691368103,
      "learning_rate": 0.0001832522962184921,
      "loss": 0.4797,
      "step": 1788
    },
    {
      "epoch": 0.5369147659063626,
      "grad_norm": 0.15145984292030334,
      "learning_rate": 0.00018322326825118176,
      "loss": 0.4455,
      "step": 1789
    },
    {
      "epoch": 0.5372148859543817,
      "grad_norm": 0.20638592541217804,
      "learning_rate": 0.00018319421745252208,
      "loss": 0.4699,
      "step": 1790
    },
    {
      "epoch": 0.537515006002401,
      "grad_norm": 0.17048896849155426,
      "learning_rate": 0.0001831651438304828,
      "loss": 0.4974,
      "step": 1791
    },
    {
      "epoch": 0.5378151260504201,
      "grad_norm": 0.1427546590566635,
      "learning_rate": 0.00018313604739303988,
      "loss": 0.4574,
      "step": 1792
    },
    {
      "epoch": 0.5381152460984394,
      "grad_norm": 0.19892023503780365,
      "learning_rate": 0.00018310692814817569,
      "loss": 0.4983,
      "step": 1793
    },
    {
      "epoch": 0.5384153661464586,
      "grad_norm": 0.22113119065761566,
      "learning_rate": 0.00018307778610387868,
      "loss": 0.5172,
      "step": 1794
    },
    {
      "epoch": 0.5387154861944778,
      "grad_norm": 0.1393192708492279,
      "learning_rate": 0.0001830486212681437,
      "loss": 0.4397,
      "step": 1795
    },
    {
      "epoch": 0.539015606242497,
      "grad_norm": 0.17644095420837402,
      "learning_rate": 0.00018301943364897177,
      "loss": 0.4744,
      "step": 1796
    },
    {
      "epoch": 0.5393157262905162,
      "grad_norm": 0.1647583246231079,
      "learning_rate": 0.00018299022325437013,
      "loss": 0.4958,
      "step": 1797
    },
    {
      "epoch": 0.5396158463385354,
      "grad_norm": 0.19911617040634155,
      "learning_rate": 0.0001829609900923524,
      "loss": 0.4451,
      "step": 1798
    },
    {
      "epoch": 0.5399159663865546,
      "grad_norm": 0.1554144322872162,
      "learning_rate": 0.00018293173417093826,
      "loss": 0.5257,
      "step": 1799
    },
    {
      "epoch": 0.5402160864345739,
      "grad_norm": 0.245337575674057,
      "learning_rate": 0.00018290245549815385,
      "loss": 0.4839,
      "step": 1800
    },
    {
      "epoch": 0.540516206482593,
      "grad_norm": 0.17131870985031128,
      "learning_rate": 0.00018287315408203135,
      "loss": 0.4841,
      "step": 1801
    },
    {
      "epoch": 0.5408163265306123,
      "grad_norm": 0.30908921360969543,
      "learning_rate": 0.00018284382993060931,
      "loss": 0.4453,
      "step": 1802
    },
    {
      "epoch": 0.5411164465786314,
      "grad_norm": 0.15892593562602997,
      "learning_rate": 0.00018281448305193244,
      "loss": 0.4909,
      "step": 1803
    },
    {
      "epoch": 0.5414165666266506,
      "grad_norm": 0.20572002232074738,
      "learning_rate": 0.00018278511345405177,
      "loss": 0.4391,
      "step": 1804
    },
    {
      "epoch": 0.5417166866746699,
      "grad_norm": 0.16379763185977936,
      "learning_rate": 0.0001827557211450245,
      "loss": 0.478,
      "step": 1805
    },
    {
      "epoch": 0.542016806722689,
      "grad_norm": 0.19017498195171356,
      "learning_rate": 0.00018272630613291405,
      "loss": 0.4413,
      "step": 1806
    },
    {
      "epoch": 0.5423169267707083,
      "grad_norm": 0.1718524992465973,
      "learning_rate": 0.0001826968684257901,
      "loss": 0.5658,
      "step": 1807
    },
    {
      "epoch": 0.5426170468187275,
      "grad_norm": 0.3137906491756439,
      "learning_rate": 0.0001826674080317285,
      "loss": 0.3869,
      "step": 1808
    },
    {
      "epoch": 0.5429171668667467,
      "grad_norm": 0.2052076756954193,
      "learning_rate": 0.00018263792495881148,
      "loss": 0.4597,
      "step": 1809
    },
    {
      "epoch": 0.5432172869147659,
      "grad_norm": 0.2138308882713318,
      "learning_rate": 0.0001826084192151273,
      "loss": 0.4839,
      "step": 1810
    },
    {
      "epoch": 0.5435174069627852,
      "grad_norm": 0.17791545391082764,
      "learning_rate": 0.00018257889080877055,
      "loss": 0.4989,
      "step": 1811
    },
    {
      "epoch": 0.5438175270108043,
      "grad_norm": 0.1574055552482605,
      "learning_rate": 0.000182549339747842,
      "loss": 0.4785,
      "step": 1812
    },
    {
      "epoch": 0.5441176470588235,
      "grad_norm": 0.33668404817581177,
      "learning_rate": 0.00018251976604044868,
      "loss": 0.486,
      "step": 1813
    },
    {
      "epoch": 0.5444177671068428,
      "grad_norm": 0.1609889566898346,
      "learning_rate": 0.0001824901696947037,
      "loss": 0.5451,
      "step": 1814
    },
    {
      "epoch": 0.5447178871548619,
      "grad_norm": 0.14856895804405212,
      "learning_rate": 0.0001824605507187266,
      "loss": 0.4601,
      "step": 1815
    },
    {
      "epoch": 0.5450180072028812,
      "grad_norm": 0.3381519913673401,
      "learning_rate": 0.00018243090912064294,
      "loss": 0.4626,
      "step": 1816
    },
    {
      "epoch": 0.5453181272509003,
      "grad_norm": 0.1419222056865692,
      "learning_rate": 0.00018240124490858457,
      "loss": 0.4175,
      "step": 1817
    },
    {
      "epoch": 0.5456182472989196,
      "grad_norm": 0.15159635245800018,
      "learning_rate": 0.0001823715580906895,
      "loss": 0.4754,
      "step": 1818
    },
    {
      "epoch": 0.5459183673469388,
      "grad_norm": 0.17327073216438293,
      "learning_rate": 0.000182341848675102,
      "loss": 0.4694,
      "step": 1819
    },
    {
      "epoch": 0.5462184873949579,
      "grad_norm": 0.18374492228031158,
      "learning_rate": 0.00018231211666997247,
      "loss": 0.4907,
      "step": 1820
    },
    {
      "epoch": 0.5465186074429772,
      "grad_norm": 0.16119374334812164,
      "learning_rate": 0.00018228236208345762,
      "loss": 0.4843,
      "step": 1821
    },
    {
      "epoch": 0.5468187274909964,
      "grad_norm": 0.15638823807239532,
      "learning_rate": 0.00018225258492372018,
      "loss": 0.4957,
      "step": 1822
    },
    {
      "epoch": 0.5471188475390156,
      "grad_norm": 0.15086990594863892,
      "learning_rate": 0.00018222278519892926,
      "loss": 0.4562,
      "step": 1823
    },
    {
      "epoch": 0.5474189675870348,
      "grad_norm": 0.2456623613834381,
      "learning_rate": 0.00018219296291726003,
      "loss": 0.4646,
      "step": 1824
    },
    {
      "epoch": 0.5477190876350541,
      "grad_norm": 1.42583429813385,
      "learning_rate": 0.00018216311808689388,
      "loss": 0.4726,
      "step": 1825
    },
    {
      "epoch": 0.5480192076830732,
      "grad_norm": 0.16602733731269836,
      "learning_rate": 0.0001821332507160184,
      "loss": 0.4781,
      "step": 1826
    },
    {
      "epoch": 0.5483193277310925,
      "grad_norm": 0.16486293077468872,
      "learning_rate": 0.00018210336081282743,
      "loss": 0.4647,
      "step": 1827
    },
    {
      "epoch": 0.5486194477791116,
      "grad_norm": 0.1668195128440857,
      "learning_rate": 0.00018207344838552084,
      "loss": 0.4584,
      "step": 1828
    },
    {
      "epoch": 0.5489195678271308,
      "grad_norm": 0.1644868105649948,
      "learning_rate": 0.00018204351344230474,
      "loss": 0.4652,
      "step": 1829
    },
    {
      "epoch": 0.5492196878751501,
      "grad_norm": 0.1930205076932907,
      "learning_rate": 0.00018201355599139154,
      "loss": 0.4959,
      "step": 1830
    },
    {
      "epoch": 0.5495198079231692,
      "grad_norm": 0.16677197813987732,
      "learning_rate": 0.00018198357604099966,
      "loss": 0.4816,
      "step": 1831
    },
    {
      "epoch": 0.5498199279711885,
      "grad_norm": 0.2054395079612732,
      "learning_rate": 0.00018195357359935375,
      "loss": 0.4158,
      "step": 1832
    },
    {
      "epoch": 0.5501200480192077,
      "grad_norm": 0.18362215161323547,
      "learning_rate": 0.00018192354867468467,
      "loss": 0.5312,
      "step": 1833
    },
    {
      "epoch": 0.5504201680672269,
      "grad_norm": 0.16130132973194122,
      "learning_rate": 0.00018189350127522936,
      "loss": 0.4718,
      "step": 1834
    },
    {
      "epoch": 0.5507202881152461,
      "grad_norm": 0.24162361025810242,
      "learning_rate": 0.00018186343140923106,
      "loss": 0.4735,
      "step": 1835
    },
    {
      "epoch": 0.5510204081632653,
      "grad_norm": 0.13708935678005219,
      "learning_rate": 0.00018183333908493903,
      "loss": 0.4089,
      "step": 1836
    },
    {
      "epoch": 0.5513205282112845,
      "grad_norm": 0.20122745633125305,
      "learning_rate": 0.00018180322431060874,
      "loss": 0.4965,
      "step": 1837
    },
    {
      "epoch": 0.5516206482593037,
      "grad_norm": 0.1756470650434494,
      "learning_rate": 0.00018177308709450192,
      "loss": 0.5196,
      "step": 1838
    },
    {
      "epoch": 0.551920768307323,
      "grad_norm": 0.142266184091568,
      "learning_rate": 0.00018174292744488628,
      "loss": 0.4741,
      "step": 1839
    },
    {
      "epoch": 0.5522208883553421,
      "grad_norm": 0.13735970854759216,
      "learning_rate": 0.0001817127453700358,
      "loss": 0.3803,
      "step": 1840
    },
    {
      "epoch": 0.5525210084033614,
      "grad_norm": 0.14827807247638702,
      "learning_rate": 0.00018168254087823062,
      "loss": 0.4642,
      "step": 1841
    },
    {
      "epoch": 0.5528211284513805,
      "grad_norm": 0.15369684994220734,
      "learning_rate": 0.00018165231397775696,
      "loss": 0.5226,
      "step": 1842
    },
    {
      "epoch": 0.5531212484993998,
      "grad_norm": 0.17196229100227356,
      "learning_rate": 0.0001816220646769072,
      "loss": 0.542,
      "step": 1843
    },
    {
      "epoch": 0.553421368547419,
      "grad_norm": 0.15198828279972076,
      "learning_rate": 0.00018159179298397996,
      "loss": 0.4516,
      "step": 1844
    },
    {
      "epoch": 0.5537214885954381,
      "grad_norm": 0.14250534772872925,
      "learning_rate": 0.0001815614989072799,
      "loss": 0.4312,
      "step": 1845
    },
    {
      "epoch": 0.5540216086434574,
      "grad_norm": 0.190664604306221,
      "learning_rate": 0.00018153118245511785,
      "loss": 0.4878,
      "step": 1846
    },
    {
      "epoch": 0.5543217286914766,
      "grad_norm": 0.16536127030849457,
      "learning_rate": 0.00018150084363581075,
      "loss": 0.4951,
      "step": 1847
    },
    {
      "epoch": 0.5546218487394958,
      "grad_norm": 0.1848268061876297,
      "learning_rate": 0.00018147048245768175,
      "loss": 0.4926,
      "step": 1848
    },
    {
      "epoch": 0.554921968787515,
      "grad_norm": 0.19543147087097168,
      "learning_rate": 0.00018144009892906006,
      "loss": 0.4962,
      "step": 1849
    },
    {
      "epoch": 0.5552220888355343,
      "grad_norm": 0.23135653138160706,
      "learning_rate": 0.00018140969305828106,
      "loss": 0.5021,
      "step": 1850
    },
    {
      "epoch": 0.5555222088835534,
      "grad_norm": 0.13906599581241608,
      "learning_rate": 0.0001813792648536863,
      "loss": 0.4226,
      "step": 1851
    },
    {
      "epoch": 0.5558223289315727,
      "grad_norm": 0.14647486805915833,
      "learning_rate": 0.00018134881432362336,
      "loss": 0.4676,
      "step": 1852
    },
    {
      "epoch": 0.5561224489795918,
      "grad_norm": 0.1806657612323761,
      "learning_rate": 0.000181318341476446,
      "loss": 0.4747,
      "step": 1853
    },
    {
      "epoch": 0.556422569027611,
      "grad_norm": 0.2328762412071228,
      "learning_rate": 0.00018128784632051408,
      "loss": 0.4944,
      "step": 1854
    },
    {
      "epoch": 0.5567226890756303,
      "grad_norm": 0.14552703499794006,
      "learning_rate": 0.00018125732886419362,
      "loss": 0.4999,
      "step": 1855
    },
    {
      "epoch": 0.5570228091236494,
      "grad_norm": 0.15310394763946533,
      "learning_rate": 0.00018122678911585677,
      "loss": 0.4801,
      "step": 1856
    },
    {
      "epoch": 0.5573229291716687,
      "grad_norm": 0.2084084302186966,
      "learning_rate": 0.00018119622708388165,
      "loss": 0.5182,
      "step": 1857
    },
    {
      "epoch": 0.5576230492196879,
      "grad_norm": 0.19386032223701477,
      "learning_rate": 0.00018116564277665274,
      "loss": 0.4938,
      "step": 1858
    },
    {
      "epoch": 0.5579231692677071,
      "grad_norm": 0.1554754376411438,
      "learning_rate": 0.0001811350362025604,
      "loss": 0.4759,
      "step": 1859
    },
    {
      "epoch": 0.5582232893157263,
      "grad_norm": 0.14368936419487,
      "learning_rate": 0.00018110440737000122,
      "loss": 0.4267,
      "step": 1860
    },
    {
      "epoch": 0.5585234093637454,
      "grad_norm": 0.16357707977294922,
      "learning_rate": 0.00018107375628737785,
      "loss": 0.5091,
      "step": 1861
    },
    {
      "epoch": 0.5588235294117647,
      "grad_norm": 0.14437748491764069,
      "learning_rate": 0.00018104308296309913,
      "loss": 0.4361,
      "step": 1862
    },
    {
      "epoch": 0.5591236494597839,
      "grad_norm": 0.1523028463125229,
      "learning_rate": 0.00018101238740557985,
      "loss": 0.4523,
      "step": 1863
    },
    {
      "epoch": 0.5594237695078031,
      "grad_norm": 0.1871221661567688,
      "learning_rate": 0.000180981669623241,
      "loss": 0.5747,
      "step": 1864
    },
    {
      "epoch": 0.5597238895558223,
      "grad_norm": 0.14608240127563477,
      "learning_rate": 0.00018095092962450973,
      "loss": 0.4509,
      "step": 1865
    },
    {
      "epoch": 0.5600240096038416,
      "grad_norm": 0.16460062563419342,
      "learning_rate": 0.0001809201674178191,
      "loss": 0.4713,
      "step": 1866
    },
    {
      "epoch": 0.5603241296518607,
      "grad_norm": 0.1632416993379593,
      "learning_rate": 0.00018088938301160843,
      "loss": 0.4592,
      "step": 1867
    },
    {
      "epoch": 0.56062424969988,
      "grad_norm": 0.15670481324195862,
      "learning_rate": 0.0001808585764143231,
      "loss": 0.4528,
      "step": 1868
    },
    {
      "epoch": 0.5609243697478992,
      "grad_norm": 0.14016574621200562,
      "learning_rate": 0.00018082774763441444,
      "loss": 0.3966,
      "step": 1869
    },
    {
      "epoch": 0.5612244897959183,
      "grad_norm": 0.15027348697185516,
      "learning_rate": 0.00018079689668034005,
      "loss": 0.4706,
      "step": 1870
    },
    {
      "epoch": 0.5615246098439376,
      "grad_norm": 0.16850395500659943,
      "learning_rate": 0.00018076602356056353,
      "loss": 0.4728,
      "step": 1871
    },
    {
      "epoch": 0.5618247298919568,
      "grad_norm": 0.14605863392353058,
      "learning_rate": 0.00018073512828355458,
      "loss": 0.4838,
      "step": 1872
    },
    {
      "epoch": 0.562124849939976,
      "grad_norm": 0.15179717540740967,
      "learning_rate": 0.0001807042108577889,
      "loss": 0.4864,
      "step": 1873
    },
    {
      "epoch": 0.5624249699879952,
      "grad_norm": 0.13549867272377014,
      "learning_rate": 0.0001806732712917484,
      "loss": 0.3831,
      "step": 1874
    },
    {
      "epoch": 0.5627250900360145,
      "grad_norm": 0.14311176538467407,
      "learning_rate": 0.00018064230959392096,
      "loss": 0.4374,
      "step": 1875
    },
    {
      "epoch": 0.5630252100840336,
      "grad_norm": 0.16444933414459229,
      "learning_rate": 0.00018061132577280056,
      "loss": 0.5191,
      "step": 1876
    },
    {
      "epoch": 0.5633253301320528,
      "grad_norm": 0.2601252496242523,
      "learning_rate": 0.0001805803198368873,
      "loss": 0.4561,
      "step": 1877
    },
    {
      "epoch": 0.563625450180072,
      "grad_norm": 0.14315593242645264,
      "learning_rate": 0.00018054929179468724,
      "loss": 0.4424,
      "step": 1878
    },
    {
      "epoch": 0.5639255702280912,
      "grad_norm": 0.1418648660182953,
      "learning_rate": 0.00018051824165471263,
      "loss": 0.4463,
      "step": 1879
    },
    {
      "epoch": 0.5642256902761105,
      "grad_norm": 0.1637127697467804,
      "learning_rate": 0.00018048716942548168,
      "loss": 0.4576,
      "step": 1880
    },
    {
      "epoch": 0.5645258103241296,
      "grad_norm": 0.1702413707971573,
      "learning_rate": 0.00018045607511551869,
      "loss": 0.5403,
      "step": 1881
    },
    {
      "epoch": 0.5648259303721489,
      "grad_norm": 0.15511217713356018,
      "learning_rate": 0.0001804249587333541,
      "loss": 0.4783,
      "step": 1882
    },
    {
      "epoch": 0.5651260504201681,
      "grad_norm": 0.166106715798378,
      "learning_rate": 0.00018039382028752426,
      "loss": 0.4398,
      "step": 1883
    },
    {
      "epoch": 0.5654261704681873,
      "grad_norm": 0.1387651264667511,
      "learning_rate": 0.00018036265978657164,
      "loss": 0.4556,
      "step": 1884
    },
    {
      "epoch": 0.5657262905162065,
      "grad_norm": 0.15797646343708038,
      "learning_rate": 0.0001803314772390448,
      "loss": 0.4892,
      "step": 1885
    },
    {
      "epoch": 0.5660264105642256,
      "grad_norm": 0.20617718994617462,
      "learning_rate": 0.00018030027265349835,
      "loss": 0.5124,
      "step": 1886
    },
    {
      "epoch": 0.5663265306122449,
      "grad_norm": 0.14816582202911377,
      "learning_rate": 0.0001802690460384928,
      "loss": 0.5076,
      "step": 1887
    },
    {
      "epoch": 0.5666266506602641,
      "grad_norm": 0.1775469332933426,
      "learning_rate": 0.00018023779740259494,
      "loss": 0.4847,
      "step": 1888
    },
    {
      "epoch": 0.5669267707082833,
      "grad_norm": 0.1552363783121109,
      "learning_rate": 0.0001802065267543774,
      "loss": 0.5234,
      "step": 1889
    },
    {
      "epoch": 0.5672268907563025,
      "grad_norm": 0.1534537822008133,
      "learning_rate": 0.00018017523410241893,
      "loss": 0.4943,
      "step": 1890
    },
    {
      "epoch": 0.5675270108043218,
      "grad_norm": 0.15913110971450806,
      "learning_rate": 0.0001801439194553043,
      "loss": 0.4902,
      "step": 1891
    },
    {
      "epoch": 0.5678271308523409,
      "grad_norm": 0.16289573907852173,
      "learning_rate": 0.0001801125828216244,
      "loss": 0.4366,
      "step": 1892
    },
    {
      "epoch": 0.5681272509003601,
      "grad_norm": 0.14295737445354462,
      "learning_rate": 0.00018008122420997598,
      "loss": 0.435,
      "step": 1893
    },
    {
      "epoch": 0.5684273709483794,
      "grad_norm": 0.1722162663936615,
      "learning_rate": 0.00018004984362896196,
      "loss": 0.4712,
      "step": 1894
    },
    {
      "epoch": 0.5687274909963985,
      "grad_norm": 0.15125919878482819,
      "learning_rate": 0.00018001844108719124,
      "loss": 0.4776,
      "step": 1895
    },
    {
      "epoch": 0.5690276110444178,
      "grad_norm": 0.14587509632110596,
      "learning_rate": 0.00017998701659327875,
      "loss": 0.4654,
      "step": 1896
    },
    {
      "epoch": 0.569327731092437,
      "grad_norm": 0.1562846153974533,
      "learning_rate": 0.00017995557015584542,
      "loss": 0.4874,
      "step": 1897
    },
    {
      "epoch": 0.5696278511404562,
      "grad_norm": 0.18704284727573395,
      "learning_rate": 0.00017992410178351825,
      "loss": 0.4313,
      "step": 1898
    },
    {
      "epoch": 0.5699279711884754,
      "grad_norm": 0.1448519378900528,
      "learning_rate": 0.00017989261148493023,
      "loss": 0.4397,
      "step": 1899
    },
    {
      "epoch": 0.5702280912364946,
      "grad_norm": 0.1406678855419159,
      "learning_rate": 0.00017986109926872032,
      "loss": 0.4486,
      "step": 1900
    },
    {
      "epoch": 0.5705282112845138,
      "grad_norm": 0.1617930382490158,
      "learning_rate": 0.0001798295651435336,
      "loss": 0.5144,
      "step": 1901
    },
    {
      "epoch": 0.570828331332533,
      "grad_norm": 0.14808118343353271,
      "learning_rate": 0.000179798009118021,
      "loss": 0.4841,
      "step": 1902
    },
    {
      "epoch": 0.5711284513805522,
      "grad_norm": 0.23392923176288605,
      "learning_rate": 0.00017976643120083964,
      "loss": 0.4073,
      "step": 1903
    },
    {
      "epoch": 0.5714285714285714,
      "grad_norm": 0.14798158407211304,
      "learning_rate": 0.00017973483140065253,
      "loss": 0.4561,
      "step": 1904
    },
    {
      "epoch": 0.5717286914765907,
      "grad_norm": 0.16208335757255554,
      "learning_rate": 0.00017970320972612869,
      "loss": 0.4918,
      "step": 1905
    },
    {
      "epoch": 0.5720288115246098,
      "grad_norm": 0.1403273195028305,
      "learning_rate": 0.00017967156618594322,
      "loss": 0.426,
      "step": 1906
    },
    {
      "epoch": 0.5723289315726291,
      "grad_norm": 0.1615184247493744,
      "learning_rate": 0.0001796399007887771,
      "loss": 0.4749,
      "step": 1907
    },
    {
      "epoch": 0.5726290516206483,
      "grad_norm": 0.2007116824388504,
      "learning_rate": 0.00017960821354331738,
      "loss": 0.4807,
      "step": 1908
    },
    {
      "epoch": 0.5729291716686674,
      "grad_norm": 0.158131405711174,
      "learning_rate": 0.00017957650445825713,
      "loss": 0.5156,
      "step": 1909
    },
    {
      "epoch": 0.5732292917166867,
      "grad_norm": 0.15085642039775848,
      "learning_rate": 0.00017954477354229536,
      "loss": 0.4494,
      "step": 1910
    },
    {
      "epoch": 0.5735294117647058,
      "grad_norm": 0.16237707436084747,
      "learning_rate": 0.00017951302080413707,
      "loss": 0.4955,
      "step": 1911
    },
    {
      "epoch": 0.5738295318127251,
      "grad_norm": 0.1413777470588684,
      "learning_rate": 0.00017948124625249328,
      "loss": 0.4761,
      "step": 1912
    },
    {
      "epoch": 0.5741296518607443,
      "grad_norm": 0.1585283726453781,
      "learning_rate": 0.00017944944989608096,
      "loss": 0.5372,
      "step": 1913
    },
    {
      "epoch": 0.5744297719087635,
      "grad_norm": 0.13110783696174622,
      "learning_rate": 0.00017941763174362304,
      "loss": 0.4157,
      "step": 1914
    },
    {
      "epoch": 0.5747298919567827,
      "grad_norm": 0.14906930923461914,
      "learning_rate": 0.00017938579180384854,
      "loss": 0.4632,
      "step": 1915
    },
    {
      "epoch": 0.575030012004802,
      "grad_norm": 0.13836872577667236,
      "learning_rate": 0.00017935393008549228,
      "loss": 0.4291,
      "step": 1916
    },
    {
      "epoch": 0.5753301320528211,
      "grad_norm": 0.16051827371120453,
      "learning_rate": 0.0001793220465972953,
      "loss": 0.4826,
      "step": 1917
    },
    {
      "epoch": 0.5756302521008403,
      "grad_norm": 0.14186377823352814,
      "learning_rate": 0.00017929014134800432,
      "loss": 0.4562,
      "step": 1918
    },
    {
      "epoch": 0.5759303721488596,
      "grad_norm": 0.17933087050914764,
      "learning_rate": 0.0001792582143463723,
      "loss": 0.4544,
      "step": 1919
    },
    {
      "epoch": 0.5762304921968787,
      "grad_norm": 0.5719718337059021,
      "learning_rate": 0.00017922626560115798,
      "loss": 0.4773,
      "step": 1920
    },
    {
      "epoch": 0.576530612244898,
      "grad_norm": 0.15085569024085999,
      "learning_rate": 0.0001791942951211261,
      "loss": 0.494,
      "step": 1921
    },
    {
      "epoch": 0.5768307322929171,
      "grad_norm": 0.1766566038131714,
      "learning_rate": 0.0001791623029150475,
      "loss": 0.5268,
      "step": 1922
    },
    {
      "epoch": 0.5771308523409364,
      "grad_norm": 0.34068360924720764,
      "learning_rate": 0.00017913028899169882,
      "loss": 0.4105,
      "step": 1923
    },
    {
      "epoch": 0.5774309723889556,
      "grad_norm": 0.2693949341773987,
      "learning_rate": 0.00017909825335986267,
      "loss": 0.4243,
      "step": 1924
    },
    {
      "epoch": 0.5777310924369747,
      "grad_norm": 0.23097528517246246,
      "learning_rate": 0.00017906619602832774,
      "loss": 0.4775,
      "step": 1925
    },
    {
      "epoch": 0.578031212484994,
      "grad_norm": 0.3153398334980011,
      "learning_rate": 0.0001790341170058885,
      "loss": 0.4802,
      "step": 1926
    },
    {
      "epoch": 0.5783313325330132,
      "grad_norm": 0.17524994909763336,
      "learning_rate": 0.00017900201630134555,
      "loss": 0.5036,
      "step": 1927
    },
    {
      "epoch": 0.5786314525810324,
      "grad_norm": 0.19373682141304016,
      "learning_rate": 0.0001789698939235053,
      "loss": 0.465,
      "step": 1928
    },
    {
      "epoch": 0.5789315726290516,
      "grad_norm": 0.19627533853054047,
      "learning_rate": 0.00017893774988118015,
      "loss": 0.4986,
      "step": 1929
    },
    {
      "epoch": 0.5792316926770709,
      "grad_norm": 0.15393482148647308,
      "learning_rate": 0.0001789055841831885,
      "loss": 0.4475,
      "step": 1930
    },
    {
      "epoch": 0.57953181272509,
      "grad_norm": 0.15590296685695648,
      "learning_rate": 0.00017887339683835457,
      "loss": 0.4688,
      "step": 1931
    },
    {
      "epoch": 0.5798319327731093,
      "grad_norm": 0.2071526050567627,
      "learning_rate": 0.00017884118785550866,
      "loss": 0.502,
      "step": 1932
    },
    {
      "epoch": 0.5801320528211285,
      "grad_norm": 0.14130908250808716,
      "learning_rate": 0.00017880895724348687,
      "loss": 0.446,
      "step": 1933
    },
    {
      "epoch": 0.5804321728691476,
      "grad_norm": 0.14672444760799408,
      "learning_rate": 0.00017877670501113135,
      "loss": 0.4557,
      "step": 1934
    },
    {
      "epoch": 0.5807322929171669,
      "grad_norm": 0.162306010723114,
      "learning_rate": 0.00017874443116729013,
      "loss": 0.5119,
      "step": 1935
    },
    {
      "epoch": 0.581032412965186,
      "grad_norm": 0.15669575333595276,
      "learning_rate": 0.0001787121357208171,
      "loss": 0.5024,
      "step": 1936
    },
    {
      "epoch": 0.5813325330132053,
      "grad_norm": 0.22228948771953583,
      "learning_rate": 0.00017867981868057223,
      "loss": 0.4616,
      "step": 1937
    },
    {
      "epoch": 0.5816326530612245,
      "grad_norm": 0.15275882184505463,
      "learning_rate": 0.0001786474800554213,
      "loss": 0.4786,
      "step": 1938
    },
    {
      "epoch": 0.5819327731092437,
      "grad_norm": 0.14351172745227814,
      "learning_rate": 0.000178615119854236,
      "loss": 0.421,
      "step": 1939
    },
    {
      "epoch": 0.5822328931572629,
      "grad_norm": 0.13428664207458496,
      "learning_rate": 0.00017858273808589402,
      "loss": 0.3897,
      "step": 1940
    },
    {
      "epoch": 0.5825330132052821,
      "grad_norm": 0.1439991444349289,
      "learning_rate": 0.00017855033475927895,
      "loss": 0.4672,
      "step": 1941
    },
    {
      "epoch": 0.5828331332533013,
      "grad_norm": 0.161099374294281,
      "learning_rate": 0.00017851790988328024,
      "loss": 0.4948,
      "step": 1942
    },
    {
      "epoch": 0.5831332533013205,
      "grad_norm": 0.14966915547847748,
      "learning_rate": 0.00017848546346679327,
      "loss": 0.4741,
      "step": 1943
    },
    {
      "epoch": 0.5834333733493398,
      "grad_norm": 0.17617997527122498,
      "learning_rate": 0.00017845299551871936,
      "loss": 0.4958,
      "step": 1944
    },
    {
      "epoch": 0.5837334933973589,
      "grad_norm": 0.14991939067840576,
      "learning_rate": 0.0001784205060479657,
      "loss": 0.4269,
      "step": 1945
    },
    {
      "epoch": 0.5840336134453782,
      "grad_norm": 0.15833768248558044,
      "learning_rate": 0.00017838799506344544,
      "loss": 0.4718,
      "step": 1946
    },
    {
      "epoch": 0.5843337334933973,
      "grad_norm": 0.1564813107252121,
      "learning_rate": 0.0001783554625740776,
      "loss": 0.4885,
      "step": 1947
    },
    {
      "epoch": 0.5846338535414166,
      "grad_norm": 0.9468123912811279,
      "learning_rate": 0.00017832290858878704,
      "loss": 0.4513,
      "step": 1948
    },
    {
      "epoch": 0.5849339735894358,
      "grad_norm": 0.14726650714874268,
      "learning_rate": 0.00017829033311650462,
      "loss": 0.4731,
      "step": 1949
    },
    {
      "epoch": 0.5852340936374549,
      "grad_norm": 5.142820358276367,
      "learning_rate": 0.00017825773616616703,
      "loss": 0.5011,
      "step": 1950
    },
    {
      "epoch": 0.5855342136854742,
      "grad_norm": 0.17839622497558594,
      "learning_rate": 0.00017822511774671687,
      "loss": 0.4732,
      "step": 1951
    },
    {
      "epoch": 0.5858343337334934,
      "grad_norm": 0.2276451140642166,
      "learning_rate": 0.00017819247786710264,
      "loss": 0.5079,
      "step": 1952
    },
    {
      "epoch": 0.5861344537815126,
      "grad_norm": 0.34687280654907227,
      "learning_rate": 0.0001781598165362787,
      "loss": 0.4788,
      "step": 1953
    },
    {
      "epoch": 0.5864345738295318,
      "grad_norm": 0.16111914813518524,
      "learning_rate": 0.0001781271337632053,
      "loss": 0.4534,
      "step": 1954
    },
    {
      "epoch": 0.5867346938775511,
      "grad_norm": 0.19313451647758484,
      "learning_rate": 0.00017809442955684862,
      "loss": 0.495,
      "step": 1955
    },
    {
      "epoch": 0.5870348139255702,
      "grad_norm": 0.18948647379875183,
      "learning_rate": 0.00017806170392618067,
      "loss": 0.5002,
      "step": 1956
    },
    {
      "epoch": 0.5873349339735895,
      "grad_norm": 0.19670206308364868,
      "learning_rate": 0.00017802895688017936,
      "loss": 0.4845,
      "step": 1957
    },
    {
      "epoch": 0.5876350540216086,
      "grad_norm": 0.18781894445419312,
      "learning_rate": 0.00017799618842782844,
      "loss": 0.4335,
      "step": 1958
    },
    {
      "epoch": 0.5879351740696278,
      "grad_norm": 0.17452123761177063,
      "learning_rate": 0.00017796339857811756,
      "loss": 0.4928,
      "step": 1959
    },
    {
      "epoch": 0.5882352941176471,
      "grad_norm": 0.2590268552303314,
      "learning_rate": 0.0001779305873400423,
      "loss": 0.5229,
      "step": 1960
    },
    {
      "epoch": 0.5885354141656662,
      "grad_norm": 4.169418811798096,
      "learning_rate": 0.00017789775472260396,
      "loss": 0.5012,
      "step": 1961
    },
    {
      "epoch": 0.5888355342136855,
      "grad_norm": 4.5325212478637695,
      "learning_rate": 0.00017786490073480984,
      "loss": 2.4917,
      "step": 1962
    },
    {
      "epoch": 0.5891356542617047,
      "grad_norm": 1.494038701057434,
      "learning_rate": 0.00017783202538567308,
      "loss": 0.6653,
      "step": 1963
    },
    {
      "epoch": 0.5894357743097239,
      "grad_norm": 2.7276971340179443,
      "learning_rate": 0.0001777991286842126,
      "loss": 0.579,
      "step": 1964
    },
    {
      "epoch": 0.5897358943577431,
      "grad_norm": 6.630552291870117,
      "learning_rate": 0.00017776621063945322,
      "loss": 1.2808,
      "step": 1965
    },
    {
      "epoch": 0.5900360144057623,
      "grad_norm": 1.0019192695617676,
      "learning_rate": 0.0001777332712604257,
      "loss": 0.5761,
      "step": 1966
    },
    {
      "epoch": 0.5903361344537815,
      "grad_norm": 0.37464189529418945,
      "learning_rate": 0.00017770031055616654,
      "loss": 0.5019,
      "step": 1967
    },
    {
      "epoch": 0.5906362545018007,
      "grad_norm": 0.32812222838401794,
      "learning_rate": 0.00017766732853571814,
      "loss": 0.5611,
      "step": 1968
    },
    {
      "epoch": 0.59093637454982,
      "grad_norm": 0.4193577170372009,
      "learning_rate": 0.00017763432520812874,
      "loss": 0.5191,
      "step": 1969
    },
    {
      "epoch": 0.5912364945978391,
      "grad_norm": 0.32257169485092163,
      "learning_rate": 0.00017760130058245242,
      "loss": 0.5225,
      "step": 1970
    },
    {
      "epoch": 0.5915366146458584,
      "grad_norm": 0.2552598714828491,
      "learning_rate": 0.00017756825466774912,
      "loss": 0.407,
      "step": 1971
    },
    {
      "epoch": 0.5918367346938775,
      "grad_norm": 0.2669002115726471,
      "learning_rate": 0.00017753518747308454,
      "loss": 0.568,
      "step": 1972
    },
    {
      "epoch": 0.5921368547418968,
      "grad_norm": 0.2475007325410843,
      "learning_rate": 0.0001775020990075304,
      "loss": 0.5357,
      "step": 1973
    },
    {
      "epoch": 0.592436974789916,
      "grad_norm": 0.285609632730484,
      "learning_rate": 0.00017746898928016404,
      "loss": 0.5008,
      "step": 1974
    },
    {
      "epoch": 0.5927370948379351,
      "grad_norm": 0.24300764501094818,
      "learning_rate": 0.0001774358583000688,
      "loss": 0.6036,
      "step": 1975
    },
    {
      "epoch": 0.5930372148859544,
      "grad_norm": 0.18297113478183746,
      "learning_rate": 0.00017740270607633377,
      "loss": 0.5048,
      "step": 1976
    },
    {
      "epoch": 0.5933373349339736,
      "grad_norm": 0.22285380959510803,
      "learning_rate": 0.00017736953261805386,
      "loss": 0.4995,
      "step": 1977
    },
    {
      "epoch": 0.5936374549819928,
      "grad_norm": 0.1846853494644165,
      "learning_rate": 0.00017733633793432985,
      "loss": 0.4931,
      "step": 1978
    },
    {
      "epoch": 0.593937575030012,
      "grad_norm": 0.19081871211528778,
      "learning_rate": 0.0001773031220342683,
      "loss": 0.5039,
      "step": 1979
    },
    {
      "epoch": 0.5942376950780313,
      "grad_norm": 0.214079812169075,
      "learning_rate": 0.0001772698849269816,
      "loss": 0.5024,
      "step": 1980
    },
    {
      "epoch": 0.5945378151260504,
      "grad_norm": 0.23798619210720062,
      "learning_rate": 0.00017723662662158803,
      "loss": 0.5301,
      "step": 1981
    },
    {
      "epoch": 0.5948379351740696,
      "grad_norm": 0.2054298371076584,
      "learning_rate": 0.00017720334712721157,
      "loss": 0.5638,
      "step": 1982
    },
    {
      "epoch": 0.5951380552220888,
      "grad_norm": 0.18289883434772491,
      "learning_rate": 0.00017717004645298207,
      "loss": 0.5326,
      "step": 1983
    },
    {
      "epoch": 0.595438175270108,
      "grad_norm": 0.17352937161922455,
      "learning_rate": 0.00017713672460803524,
      "loss": 0.4728,
      "step": 1984
    },
    {
      "epoch": 0.5957382953181273,
      "grad_norm": 0.17404381930828094,
      "learning_rate": 0.00017710338160151248,
      "loss": 0.5195,
      "step": 1985
    },
    {
      "epoch": 0.5960384153661464,
      "grad_norm": 0.20349542796611786,
      "learning_rate": 0.00017707001744256108,
      "loss": 0.5368,
      "step": 1986
    },
    {
      "epoch": 0.5963385354141657,
      "grad_norm": 0.18232762813568115,
      "learning_rate": 0.00017703663214033415,
      "loss": 0.4905,
      "step": 1987
    },
    {
      "epoch": 0.5966386554621849,
      "grad_norm": 0.19482576847076416,
      "learning_rate": 0.00017700322570399056,
      "loss": 0.5443,
      "step": 1988
    },
    {
      "epoch": 0.5969387755102041,
      "grad_norm": 0.1861189305782318,
      "learning_rate": 0.00017696979814269489,
      "loss": 0.4927,
      "step": 1989
    },
    {
      "epoch": 0.5972388955582233,
      "grad_norm": 0.15059329569339752,
      "learning_rate": 0.00017693634946561775,
      "loss": 0.4527,
      "step": 1990
    },
    {
      "epoch": 0.5975390156062425,
      "grad_norm": 0.1789100617170334,
      "learning_rate": 0.00017690287968193528,
      "loss": 0.5073,
      "step": 1991
    },
    {
      "epoch": 0.5978391356542617,
      "grad_norm": 0.18303994834423065,
      "learning_rate": 0.00017686938880082963,
      "loss": 0.4518,
      "step": 1992
    },
    {
      "epoch": 0.5981392557022809,
      "grad_norm": 0.1630188226699829,
      "learning_rate": 0.00017683587683148857,
      "loss": 0.5175,
      "step": 1993
    },
    {
      "epoch": 0.5984393757503002,
      "grad_norm": 0.15870651602745056,
      "learning_rate": 0.00017680234378310573,
      "loss": 0.4886,
      "step": 1994
    },
    {
      "epoch": 0.5987394957983193,
      "grad_norm": 0.16810081899166107,
      "learning_rate": 0.00017676878966488055,
      "loss": 0.5112,
      "step": 1995
    },
    {
      "epoch": 0.5990396158463386,
      "grad_norm": 0.1768048256635666,
      "learning_rate": 0.0001767352144860182,
      "loss": 0.4502,
      "step": 1996
    },
    {
      "epoch": 0.5993397358943577,
      "grad_norm": 0.16407325863838196,
      "learning_rate": 0.00017670161825572965,
      "loss": 0.4885,
      "step": 1997
    },
    {
      "epoch": 0.5996398559423769,
      "grad_norm": 0.16511856019496918,
      "learning_rate": 0.00017666800098323164,
      "loss": 0.4894,
      "step": 1998
    },
    {
      "epoch": 0.5999399759903962,
      "grad_norm": 0.18376819789409637,
      "learning_rate": 0.0001766343626777467,
      "loss": 0.5458,
      "step": 1999
    },
    {
      "epoch": 0.6002400960384153,
      "grad_norm": 0.1894349306821823,
      "learning_rate": 0.00017660070334850304,
      "loss": 0.4612,
      "step": 2000
    },
    {
      "epoch": 0.6005402160864346,
      "grad_norm": 0.15564632415771484,
      "learning_rate": 0.00017656702300473481,
      "loss": 0.4722,
      "step": 2001
    },
    {
      "epoch": 0.6008403361344538,
      "grad_norm": 0.188431978225708,
      "learning_rate": 0.00017653332165568178,
      "loss": 0.5229,
      "step": 2002
    },
    {
      "epoch": 0.601140456182473,
      "grad_norm": 0.25116777420043945,
      "learning_rate": 0.0001764995993105895,
      "loss": 0.5073,
      "step": 2003
    },
    {
      "epoch": 0.6014405762304922,
      "grad_norm": 0.15402956306934357,
      "learning_rate": 0.00017646585597870935,
      "loss": 0.4539,
      "step": 2004
    },
    {
      "epoch": 0.6017406962785115,
      "grad_norm": 0.2883153259754181,
      "learning_rate": 0.00017643209166929845,
      "loss": 0.492,
      "step": 2005
    },
    {
      "epoch": 0.6020408163265306,
      "grad_norm": 0.14544089138507843,
      "learning_rate": 0.00017639830639161957,
      "loss": 0.4453,
      "step": 2006
    },
    {
      "epoch": 0.6023409363745498,
      "grad_norm": 0.16686096787452698,
      "learning_rate": 0.00017636450015494137,
      "loss": 0.4639,
      "step": 2007
    },
    {
      "epoch": 0.602641056422569,
      "grad_norm": 0.18141674995422363,
      "learning_rate": 0.0001763306729685382,
      "loss": 0.4564,
      "step": 2008
    },
    {
      "epoch": 0.6029411764705882,
      "grad_norm": 0.16426688432693481,
      "learning_rate": 0.00017629682484169014,
      "loss": 0.5075,
      "step": 2009
    },
    {
      "epoch": 0.6032412965186075,
      "grad_norm": 0.18985973298549652,
      "learning_rate": 0.00017626295578368305,
      "loss": 0.4788,
      "step": 2010
    },
    {
      "epoch": 0.6035414165666266,
      "grad_norm": 0.15430620312690735,
      "learning_rate": 0.00017622906580380852,
      "loss": 0.46,
      "step": 2011
    },
    {
      "epoch": 0.6038415366146459,
      "grad_norm": 0.15269403159618378,
      "learning_rate": 0.0001761951549113639,
      "loss": 0.5045,
      "step": 2012
    },
    {
      "epoch": 0.6041416566626651,
      "grad_norm": 0.15193504095077515,
      "learning_rate": 0.00017616122311565218,
      "loss": 0.4332,
      "step": 2013
    },
    {
      "epoch": 0.6044417767106842,
      "grad_norm": 0.1438152939081192,
      "learning_rate": 0.00017612727042598224,
      "loss": 0.451,
      "step": 2014
    },
    {
      "epoch": 0.6047418967587035,
      "grad_norm": 0.16883939504623413,
      "learning_rate": 0.0001760932968516686,
      "loss": 0.518,
      "step": 2015
    },
    {
      "epoch": 0.6050420168067226,
      "grad_norm": 0.1587616354227066,
      "learning_rate": 0.00017605930240203145,
      "loss": 0.464,
      "step": 2016
    },
    {
      "epoch": 0.6053421368547419,
      "grad_norm": 0.18665722012519836,
      "learning_rate": 0.00017602528708639685,
      "loss": 0.561,
      "step": 2017
    },
    {
      "epoch": 0.6056422569027611,
      "grad_norm": 0.14915643632411957,
      "learning_rate": 0.00017599125091409648,
      "loss": 0.4374,
      "step": 2018
    },
    {
      "epoch": 0.6059423769507803,
      "grad_norm": 0.15475080907344818,
      "learning_rate": 0.0001759571938944678,
      "loss": 0.5175,
      "step": 2019
    },
    {
      "epoch": 0.6062424969987995,
      "grad_norm": 0.1503390371799469,
      "learning_rate": 0.00017592311603685393,
      "loss": 0.4692,
      "step": 2020
    },
    {
      "epoch": 0.6065426170468188,
      "grad_norm": 0.15767855942249298,
      "learning_rate": 0.00017588901735060376,
      "loss": 0.51,
      "step": 2021
    },
    {
      "epoch": 0.6068427370948379,
      "grad_norm": 0.15096405148506165,
      "learning_rate": 0.00017585489784507186,
      "loss": 0.4601,
      "step": 2022
    },
    {
      "epoch": 0.6071428571428571,
      "grad_norm": 0.15504375100135803,
      "learning_rate": 0.00017582075752961855,
      "loss": 0.5359,
      "step": 2023
    },
    {
      "epoch": 0.6074429771908764,
      "grad_norm": 0.1470356434583664,
      "learning_rate": 0.00017578659641360978,
      "loss": 0.4396,
      "step": 2024
    },
    {
      "epoch": 0.6077430972388955,
      "grad_norm": 0.18372634053230286,
      "learning_rate": 0.00017575241450641736,
      "loss": 0.5157,
      "step": 2025
    },
    {
      "epoch": 0.6080432172869148,
      "grad_norm": 0.16779500246047974,
      "learning_rate": 0.00017571821181741859,
      "loss": 0.5137,
      "step": 2026
    },
    {
      "epoch": 0.608343337334934,
      "grad_norm": 0.31798943877220154,
      "learning_rate": 0.00017568398835599667,
      "loss": 0.4779,
      "step": 2027
    },
    {
      "epoch": 0.6086434573829532,
      "grad_norm": 0.1485154628753662,
      "learning_rate": 0.00017564974413154035,
      "loss": 0.4492,
      "step": 2028
    },
    {
      "epoch": 0.6089435774309724,
      "grad_norm": 0.15988373756408691,
      "learning_rate": 0.0001756154791534442,
      "loss": 0.5101,
      "step": 2029
    },
    {
      "epoch": 0.6092436974789915,
      "grad_norm": 0.1446724385023117,
      "learning_rate": 0.00017558119343110838,
      "loss": 0.4409,
      "step": 2030
    },
    {
      "epoch": 0.6095438175270108,
      "grad_norm": 0.16256844997406006,
      "learning_rate": 0.0001755468869739388,
      "loss": 0.4911,
      "step": 2031
    },
    {
      "epoch": 0.60984393757503,
      "grad_norm": 0.16651900112628937,
      "learning_rate": 0.00017551255979134705,
      "loss": 0.488,
      "step": 2032
    },
    {
      "epoch": 0.6101440576230492,
      "grad_norm": 0.14109483361244202,
      "learning_rate": 0.00017547821189275038,
      "loss": 0.4293,
      "step": 2033
    },
    {
      "epoch": 0.6104441776710684,
      "grad_norm": 0.15952004492282867,
      "learning_rate": 0.00017544384328757175,
      "loss": 0.4497,
      "step": 2034
    },
    {
      "epoch": 0.6107442977190877,
      "grad_norm": 0.14462940394878387,
      "learning_rate": 0.00017540945398523982,
      "loss": 0.4239,
      "step": 2035
    },
    {
      "epoch": 0.6110444177671068,
      "grad_norm": 0.14844803512096405,
      "learning_rate": 0.00017537504399518886,
      "loss": 0.4745,
      "step": 2036
    },
    {
      "epoch": 0.6113445378151261,
      "grad_norm": 0.14788131415843964,
      "learning_rate": 0.00017534061332685888,
      "loss": 0.457,
      "step": 2037
    },
    {
      "epoch": 0.6116446578631453,
      "grad_norm": 0.14577676355838776,
      "learning_rate": 0.00017530616198969555,
      "loss": 0.4824,
      "step": 2038
    },
    {
      "epoch": 0.6119447779111644,
      "grad_norm": 0.14970937371253967,
      "learning_rate": 0.00017527168999315016,
      "loss": 0.4017,
      "step": 2039
    },
    {
      "epoch": 0.6122448979591837,
      "grad_norm": 0.15392345190048218,
      "learning_rate": 0.00017523719734667973,
      "loss": 0.4667,
      "step": 2040
    },
    {
      "epoch": 0.6125450180072028,
      "grad_norm": 0.1536334753036499,
      "learning_rate": 0.00017520268405974692,
      "loss": 0.4893,
      "step": 2041
    },
    {
      "epoch": 0.6128451380552221,
      "grad_norm": 0.14010770618915558,
      "learning_rate": 0.00017516815014182008,
      "loss": 0.4642,
      "step": 2042
    },
    {
      "epoch": 0.6131452581032413,
      "grad_norm": 0.17600683867931366,
      "learning_rate": 0.00017513359560237314,
      "loss": 0.475,
      "step": 2043
    },
    {
      "epoch": 0.6134453781512605,
      "grad_norm": 0.1615845113992691,
      "learning_rate": 0.00017509902045088582,
      "loss": 0.5072,
      "step": 2044
    },
    {
      "epoch": 0.6137454981992797,
      "grad_norm": 0.16537730395793915,
      "learning_rate": 0.00017506442469684332,
      "loss": 0.5221,
      "step": 2045
    },
    {
      "epoch": 0.614045618247299,
      "grad_norm": 0.14268003404140472,
      "learning_rate": 0.00017502980834973667,
      "loss": 0.4465,
      "step": 2046
    },
    {
      "epoch": 0.6143457382953181,
      "grad_norm": 0.1623399555683136,
      "learning_rate": 0.0001749951714190624,
      "loss": 0.465,
      "step": 2047
    },
    {
      "epoch": 0.6146458583433373,
      "grad_norm": 0.20555807650089264,
      "learning_rate": 0.00017496051391432282,
      "loss": 0.445,
      "step": 2048
    },
    {
      "epoch": 0.6149459783913566,
      "grad_norm": 0.15159904956817627,
      "learning_rate": 0.00017492583584502577,
      "loss": 0.4622,
      "step": 2049
    },
    {
      "epoch": 0.6152460984393757,
      "grad_norm": 0.1567160189151764,
      "learning_rate": 0.0001748911372206848,
      "loss": 0.4422,
      "step": 2050
    },
    {
      "epoch": 0.615546218487395,
      "grad_norm": 0.16784349083900452,
      "learning_rate": 0.0001748564180508191,
      "loss": 0.4832,
      "step": 2051
    },
    {
      "epoch": 0.6158463385354142,
      "grad_norm": 0.1470254361629486,
      "learning_rate": 0.00017482167834495345,
      "loss": 0.4916,
      "step": 2052
    },
    {
      "epoch": 0.6161464585834334,
      "grad_norm": 0.1490541696548462,
      "learning_rate": 0.00017478691811261828,
      "loss": 0.4816,
      "step": 2053
    },
    {
      "epoch": 0.6164465786314526,
      "grad_norm": 0.1885465681552887,
      "learning_rate": 0.0001747521373633497,
      "loss": 0.5393,
      "step": 2054
    },
    {
      "epoch": 0.6167466986794717,
      "grad_norm": 0.16754573583602905,
      "learning_rate": 0.00017471733610668934,
      "loss": 0.5322,
      "step": 2055
    },
    {
      "epoch": 0.617046818727491,
      "grad_norm": 0.21554969251155853,
      "learning_rate": 0.0001746825143521846,
      "loss": 0.4694,
      "step": 2056
    },
    {
      "epoch": 0.6173469387755102,
      "grad_norm": 0.1596878319978714,
      "learning_rate": 0.0001746476721093884,
      "loss": 0.4782,
      "step": 2057
    },
    {
      "epoch": 0.6176470588235294,
      "grad_norm": 0.24440227448940277,
      "learning_rate": 0.00017461280938785932,
      "loss": 0.4607,
      "step": 2058
    },
    {
      "epoch": 0.6179471788715486,
      "grad_norm": 0.1455274522304535,
      "learning_rate": 0.00017457792619716153,
      "loss": 0.4751,
      "step": 2059
    },
    {
      "epoch": 0.6182472989195679,
      "grad_norm": 0.1382697969675064,
      "learning_rate": 0.00017454302254686486,
      "loss": 0.4552,
      "step": 2060
    },
    {
      "epoch": 0.618547418967587,
      "grad_norm": 0.1711137741804123,
      "learning_rate": 0.00017450809844654468,
      "loss": 0.4363,
      "step": 2061
    },
    {
      "epoch": 0.6188475390156063,
      "grad_norm": 0.1538340300321579,
      "learning_rate": 0.00017447315390578207,
      "loss": 0.4643,
      "step": 2062
    },
    {
      "epoch": 0.6191476590636255,
      "grad_norm": 0.14743641018867493,
      "learning_rate": 0.00017443818893416363,
      "loss": 0.4323,
      "step": 2063
    },
    {
      "epoch": 0.6194477791116446,
      "grad_norm": 0.4574187397956848,
      "learning_rate": 0.00017440320354128162,
      "loss": 0.5133,
      "step": 2064
    },
    {
      "epoch": 0.6197478991596639,
      "grad_norm": 0.14564795792102814,
      "learning_rate": 0.00017436819773673383,
      "loss": 0.4476,
      "step": 2065
    },
    {
      "epoch": 0.620048019207683,
      "grad_norm": 0.5146244168281555,
      "learning_rate": 0.00017433317153012375,
      "loss": 0.4766,
      "step": 2066
    },
    {
      "epoch": 0.6203481392557023,
      "grad_norm": 0.20717193186283112,
      "learning_rate": 0.00017429812493106043,
      "loss": 0.5036,
      "step": 2067
    },
    {
      "epoch": 0.6206482593037215,
      "grad_norm": 0.1570674180984497,
      "learning_rate": 0.00017426305794915846,
      "loss": 0.4884,
      "step": 2068
    },
    {
      "epoch": 0.6209483793517407,
      "grad_norm": 0.28760769963264465,
      "learning_rate": 0.00017422797059403814,
      "loss": 0.5122,
      "step": 2069
    },
    {
      "epoch": 0.6212484993997599,
      "grad_norm": 0.15187154710292816,
      "learning_rate": 0.00017419286287532516,
      "loss": 0.4608,
      "step": 2070
    },
    {
      "epoch": 0.6215486194477791,
      "grad_norm": 0.15756598114967346,
      "learning_rate": 0.00017415773480265102,
      "loss": 0.4853,
      "step": 2071
    },
    {
      "epoch": 0.6218487394957983,
      "grad_norm": 0.1937892585992813,
      "learning_rate": 0.00017412258638565268,
      "loss": 0.5064,
      "step": 2072
    },
    {
      "epoch": 0.6221488595438175,
      "grad_norm": 0.17976678907871246,
      "learning_rate": 0.00017408741763397267,
      "loss": 0.4349,
      "step": 2073
    },
    {
      "epoch": 0.6224489795918368,
      "grad_norm": 0.18714259564876556,
      "learning_rate": 0.00017405222855725917,
      "loss": 0.5572,
      "step": 2074
    },
    {
      "epoch": 0.6227490996398559,
      "grad_norm": 0.21988919377326965,
      "learning_rate": 0.0001740170191651659,
      "loss": 0.4587,
      "step": 2075
    },
    {
      "epoch": 0.6230492196878752,
      "grad_norm": 0.17325930297374725,
      "learning_rate": 0.00017398178946735214,
      "loss": 0.4815,
      "step": 2076
    },
    {
      "epoch": 0.6233493397358943,
      "grad_norm": 0.15329764783382416,
      "learning_rate": 0.00017394653947348278,
      "loss": 0.4946,
      "step": 2077
    },
    {
      "epoch": 0.6236494597839136,
      "grad_norm": 0.13897527754306793,
      "learning_rate": 0.0001739112691932282,
      "loss": 0.4417,
      "step": 2078
    },
    {
      "epoch": 0.6239495798319328,
      "grad_norm": 0.17680160701274872,
      "learning_rate": 0.00017387597863626446,
      "loss": 0.485,
      "step": 2079
    },
    {
      "epoch": 0.6242496998799519,
      "grad_norm": 0.2085428088903427,
      "learning_rate": 0.00017384066781227307,
      "loss": 0.4756,
      "step": 2080
    },
    {
      "epoch": 0.6245498199279712,
      "grad_norm": 0.15245382487773895,
      "learning_rate": 0.00017380533673094117,
      "loss": 0.4285,
      "step": 2081
    },
    {
      "epoch": 0.6248499399759904,
      "grad_norm": 0.14067943394184113,
      "learning_rate": 0.0001737699854019614,
      "loss": 0.453,
      "step": 2082
    },
    {
      "epoch": 0.6251500600240096,
      "grad_norm": 0.14680393040180206,
      "learning_rate": 0.00017373461383503208,
      "loss": 0.4824,
      "step": 2083
    },
    {
      "epoch": 0.6254501800720288,
      "grad_norm": 0.14030559360980988,
      "learning_rate": 0.00017369922203985688,
      "loss": 0.4266,
      "step": 2084
    },
    {
      "epoch": 0.6257503001200481,
      "grad_norm": 0.14773330092430115,
      "learning_rate": 0.00017366381002614523,
      "loss": 0.4897,
      "step": 2085
    },
    {
      "epoch": 0.6260504201680672,
      "grad_norm": 0.2262420505285263,
      "learning_rate": 0.00017362837780361196,
      "loss": 0.5152,
      "step": 2086
    },
    {
      "epoch": 0.6263505402160864,
      "grad_norm": 0.1481279730796814,
      "learning_rate": 0.0001735929253819775,
      "loss": 0.4594,
      "step": 2087
    },
    {
      "epoch": 0.6266506602641057,
      "grad_norm": 0.14530371129512787,
      "learning_rate": 0.00017355745277096785,
      "loss": 0.4572,
      "step": 2088
    },
    {
      "epoch": 0.6269507803121248,
      "grad_norm": 0.1739528626203537,
      "learning_rate": 0.00017352195998031447,
      "loss": 0.5099,
      "step": 2089
    },
    {
      "epoch": 0.6272509003601441,
      "grad_norm": 0.1571219265460968,
      "learning_rate": 0.0001734864470197544,
      "loss": 0.5085,
      "step": 2090
    },
    {
      "epoch": 0.6275510204081632,
      "grad_norm": 0.35751938819885254,
      "learning_rate": 0.00017345091389903025,
      "loss": 0.4412,
      "step": 2091
    },
    {
      "epoch": 0.6278511404561825,
      "grad_norm": 0.14211386442184448,
      "learning_rate": 0.00017341536062789008,
      "loss": 0.4501,
      "step": 2092
    },
    {
      "epoch": 0.6281512605042017,
      "grad_norm": 0.14581553637981415,
      "learning_rate": 0.00017337978721608757,
      "loss": 0.4616,
      "step": 2093
    },
    {
      "epoch": 0.6284513805522209,
      "grad_norm": 0.14807532727718353,
      "learning_rate": 0.00017334419367338183,
      "loss": 0.4759,
      "step": 2094
    },
    {
      "epoch": 0.6287515006002401,
      "grad_norm": 0.16016747057437897,
      "learning_rate": 0.00017330858000953758,
      "loss": 0.4912,
      "step": 2095
    },
    {
      "epoch": 0.6290516206482593,
      "grad_norm": 0.1585829108953476,
      "learning_rate": 0.000173272946234325,
      "loss": 0.4647,
      "step": 2096
    },
    {
      "epoch": 0.6293517406962785,
      "grad_norm": 0.14752192795276642,
      "learning_rate": 0.0001732372923575198,
      "loss": 0.4384,
      "step": 2097
    },
    {
      "epoch": 0.6296518607442977,
      "grad_norm": 0.2791168987751007,
      "learning_rate": 0.00017320161838890323,
      "loss": 0.4225,
      "step": 2098
    },
    {
      "epoch": 0.629951980792317,
      "grad_norm": 0.16510701179504395,
      "learning_rate": 0.00017316592433826202,
      "loss": 0.4936,
      "step": 2099
    },
    {
      "epoch": 0.6302521008403361,
      "grad_norm": 0.15952514111995697,
      "learning_rate": 0.00017313021021538844,
      "loss": 0.4983,
      "step": 2100
    },
    {
      "epoch": 0.6305522208883554,
      "grad_norm": 0.1469140350818634,
      "learning_rate": 0.00017309447603008026,
      "loss": 0.4273,
      "step": 2101
    },
    {
      "epoch": 0.6308523409363745,
      "grad_norm": 0.16945834457874298,
      "learning_rate": 0.0001730587217921407,
      "loss": 0.5163,
      "step": 2102
    },
    {
      "epoch": 0.6311524609843937,
      "grad_norm": 0.15591193735599518,
      "learning_rate": 0.00017302294751137855,
      "loss": 0.4628,
      "step": 2103
    },
    {
      "epoch": 0.631452581032413,
      "grad_norm": 0.15113277733325958,
      "learning_rate": 0.00017298715319760807,
      "loss": 0.456,
      "step": 2104
    },
    {
      "epoch": 0.6317527010804321,
      "grad_norm": 0.16945365071296692,
      "learning_rate": 0.00017295133886064906,
      "loss": 0.491,
      "step": 2105
    },
    {
      "epoch": 0.6320528211284514,
      "grad_norm": 0.14711284637451172,
      "learning_rate": 0.0001729155045103267,
      "loss": 0.4822,
      "step": 2106
    },
    {
      "epoch": 0.6323529411764706,
      "grad_norm": 0.14515145123004913,
      "learning_rate": 0.0001728796501564718,
      "loss": 0.4862,
      "step": 2107
    },
    {
      "epoch": 0.6326530612244898,
      "grad_norm": 0.15135280787944794,
      "learning_rate": 0.00017284377580892052,
      "loss": 0.4615,
      "step": 2108
    },
    {
      "epoch": 0.632953181272509,
      "grad_norm": 0.1585320681333542,
      "learning_rate": 0.00017280788147751462,
      "loss": 0.4345,
      "step": 2109
    },
    {
      "epoch": 0.6332533013205283,
      "grad_norm": 0.14420172572135925,
      "learning_rate": 0.0001727719671721013,
      "loss": 0.4516,
      "step": 2110
    },
    {
      "epoch": 0.6335534213685474,
      "grad_norm": 0.13808774948120117,
      "learning_rate": 0.00017273603290253326,
      "loss": 0.431,
      "step": 2111
    },
    {
      "epoch": 0.6338535414165666,
      "grad_norm": 0.14348426461219788,
      "learning_rate": 0.0001727000786786686,
      "loss": 0.4624,
      "step": 2112
    },
    {
      "epoch": 0.6341536614645858,
      "grad_norm": 0.19821301102638245,
      "learning_rate": 0.000172664104510371,
      "loss": 0.4774,
      "step": 2113
    },
    {
      "epoch": 0.634453781512605,
      "grad_norm": 0.15625211596488953,
      "learning_rate": 0.00017262811040750953,
      "loss": 0.5078,
      "step": 2114
    },
    {
      "epoch": 0.6347539015606243,
      "grad_norm": 0.13860677182674408,
      "learning_rate": 0.00017259209637995876,
      "loss": 0.4708,
      "step": 2115
    },
    {
      "epoch": 0.6350540216086434,
      "grad_norm": 0.1412367820739746,
      "learning_rate": 0.00017255606243759872,
      "loss": 0.4723,
      "step": 2116
    },
    {
      "epoch": 0.6353541416566627,
      "grad_norm": 0.14818702638149261,
      "learning_rate": 0.00017252000859031494,
      "loss": 0.5005,
      "step": 2117
    },
    {
      "epoch": 0.6356542617046819,
      "grad_norm": 0.15439516305923462,
      "learning_rate": 0.00017248393484799835,
      "loss": 0.3705,
      "step": 2118
    },
    {
      "epoch": 0.635954381752701,
      "grad_norm": 0.1573125123977661,
      "learning_rate": 0.00017244784122054537,
      "loss": 0.4828,
      "step": 2119
    },
    {
      "epoch": 0.6362545018007203,
      "grad_norm": 0.15407924354076385,
      "learning_rate": 0.0001724117277178579,
      "loss": 0.5148,
      "step": 2120
    },
    {
      "epoch": 0.6365546218487395,
      "grad_norm": 0.15772031247615814,
      "learning_rate": 0.00017237559434984324,
      "loss": 0.5087,
      "step": 2121
    },
    {
      "epoch": 0.6368547418967587,
      "grad_norm": 0.14631694555282593,
      "learning_rate": 0.00017233944112641418,
      "loss": 0.4673,
      "step": 2122
    },
    {
      "epoch": 0.6371548619447779,
      "grad_norm": 0.1371794044971466,
      "learning_rate": 0.00017230326805748892,
      "loss": 0.4077,
      "step": 2123
    },
    {
      "epoch": 0.6374549819927972,
      "grad_norm": 0.1333874613046646,
      "learning_rate": 0.00017226707515299115,
      "loss": 0.4053,
      "step": 2124
    },
    {
      "epoch": 0.6377551020408163,
      "grad_norm": 0.14271295070648193,
      "learning_rate": 0.00017223086242284995,
      "loss": 0.4764,
      "step": 2125
    },
    {
      "epoch": 0.6380552220888356,
      "grad_norm": 0.14769414067268372,
      "learning_rate": 0.0001721946298769999,
      "loss": 0.4937,
      "step": 2126
    },
    {
      "epoch": 0.6383553421368547,
      "grad_norm": 0.16356337070465088,
      "learning_rate": 0.00017215837752538097,
      "loss": 0.445,
      "step": 2127
    },
    {
      "epoch": 0.6386554621848739,
      "grad_norm": 0.17607265710830688,
      "learning_rate": 0.00017212210537793856,
      "loss": 0.446,
      "step": 2128
    },
    {
      "epoch": 0.6389555822328932,
      "grad_norm": 0.15226007997989655,
      "learning_rate": 0.00017208581344462353,
      "loss": 0.4741,
      "step": 2129
    },
    {
      "epoch": 0.6392557022809123,
      "grad_norm": 0.15901261568069458,
      "learning_rate": 0.0001720495017353922,
      "loss": 0.4863,
      "step": 2130
    },
    {
      "epoch": 0.6395558223289316,
      "grad_norm": 0.15675465762615204,
      "learning_rate": 0.00017201317026020618,
      "loss": 0.5179,
      "step": 2131
    },
    {
      "epoch": 0.6398559423769508,
      "grad_norm": 0.13890616595745087,
      "learning_rate": 0.0001719768190290327,
      "loss": 0.3986,
      "step": 2132
    },
    {
      "epoch": 0.64015606242497,
      "grad_norm": 0.13822025060653687,
      "learning_rate": 0.0001719404480518442,
      "loss": 0.4212,
      "step": 2133
    },
    {
      "epoch": 0.6404561824729892,
      "grad_norm": 0.1566293090581894,
      "learning_rate": 0.00017190405733861873,
      "loss": 0.5104,
      "step": 2134
    },
    {
      "epoch": 0.6407563025210085,
      "grad_norm": 0.13361778855323792,
      "learning_rate": 0.00017186764689933957,
      "loss": 0.443,
      "step": 2135
    },
    {
      "epoch": 0.6410564225690276,
      "grad_norm": 0.16091105341911316,
      "learning_rate": 0.00017183121674399558,
      "loss": 0.5039,
      "step": 2136
    },
    {
      "epoch": 0.6413565426170468,
      "grad_norm": 0.14156047999858856,
      "learning_rate": 0.00017179476688258097,
      "loss": 0.4789,
      "step": 2137
    },
    {
      "epoch": 0.641656662665066,
      "grad_norm": 0.20020362734794617,
      "learning_rate": 0.00017175829732509525,
      "loss": 0.5266,
      "step": 2138
    },
    {
      "epoch": 0.6419567827130852,
      "grad_norm": 0.173885315656662,
      "learning_rate": 0.00017172180808154352,
      "loss": 0.5213,
      "step": 2139
    },
    {
      "epoch": 0.6422569027611045,
      "grad_norm": 0.15453475713729858,
      "learning_rate": 0.00017168529916193614,
      "loss": 0.4017,
      "step": 2140
    },
    {
      "epoch": 0.6425570228091236,
      "grad_norm": 0.14919741451740265,
      "learning_rate": 0.00017164877057628892,
      "loss": 0.4756,
      "step": 2141
    },
    {
      "epoch": 0.6428571428571429,
      "grad_norm": 0.16573822498321533,
      "learning_rate": 0.00017161222233462307,
      "loss": 0.5327,
      "step": 2142
    },
    {
      "epoch": 0.6431572629051621,
      "grad_norm": 0.18085473775863647,
      "learning_rate": 0.00017157565444696516,
      "loss": 0.5019,
      "step": 2143
    },
    {
      "epoch": 0.6434573829531812,
      "grad_norm": 0.37763917446136475,
      "learning_rate": 0.00017153906692334717,
      "loss": 0.5499,
      "step": 2144
    },
    {
      "epoch": 0.6437575030012005,
      "grad_norm": 0.2172531634569168,
      "learning_rate": 0.0001715024597738065,
      "loss": 0.4735,
      "step": 2145
    },
    {
      "epoch": 0.6440576230492197,
      "grad_norm": 0.17138592898845673,
      "learning_rate": 0.00017146583300838586,
      "loss": 0.5055,
      "step": 2146
    },
    {
      "epoch": 0.6443577430972389,
      "grad_norm": 0.16673091053962708,
      "learning_rate": 0.00017142918663713342,
      "loss": 0.5186,
      "step": 2147
    },
    {
      "epoch": 0.6446578631452581,
      "grad_norm": 0.19096100330352783,
      "learning_rate": 0.00017139252067010268,
      "loss": 0.4336,
      "step": 2148
    },
    {
      "epoch": 0.6449579831932774,
      "grad_norm": 0.1490613967180252,
      "learning_rate": 0.00017135583511735248,
      "loss": 0.4933,
      "step": 2149
    },
    {
      "epoch": 0.6452581032412965,
      "grad_norm": 0.14809657633304596,
      "learning_rate": 0.00017131912998894717,
      "loss": 0.486,
      "step": 2150
    },
    {
      "epoch": 0.6455582232893158,
      "grad_norm": 0.1331087201833725,
      "learning_rate": 0.00017128240529495632,
      "loss": 0.4362,
      "step": 2151
    },
    {
      "epoch": 0.6458583433373349,
      "grad_norm": 0.15874071419239044,
      "learning_rate": 0.0001712456610454549,
      "loss": 0.4792,
      "step": 2152
    },
    {
      "epoch": 0.6461584633853541,
      "grad_norm": 0.24738092720508575,
      "learning_rate": 0.00017120889725052337,
      "loss": 0.4663,
      "step": 2153
    },
    {
      "epoch": 0.6464585834333734,
      "grad_norm": 0.15679825842380524,
      "learning_rate": 0.0001711721139202474,
      "loss": 0.4808,
      "step": 2154
    },
    {
      "epoch": 0.6467587034813925,
      "grad_norm": 0.29751163721084595,
      "learning_rate": 0.00017113531106471809,
      "loss": 0.4908,
      "step": 2155
    },
    {
      "epoch": 0.6470588235294118,
      "grad_norm": 0.17562802135944366,
      "learning_rate": 0.0001710984886940318,
      "loss": 0.5049,
      "step": 2156
    },
    {
      "epoch": 0.647358943577431,
      "grad_norm": 0.14633187651634216,
      "learning_rate": 0.00017106164681829046,
      "loss": 0.4547,
      "step": 2157
    },
    {
      "epoch": 0.6476590636254502,
      "grad_norm": 0.1577942818403244,
      "learning_rate": 0.00017102478544760112,
      "loss": 0.4259,
      "step": 2158
    },
    {
      "epoch": 0.6479591836734694,
      "grad_norm": 0.1356159895658493,
      "learning_rate": 0.00017098790459207632,
      "loss": 0.4123,
      "step": 2159
    },
    {
      "epoch": 0.6482593037214885,
      "grad_norm": 0.15663506090641022,
      "learning_rate": 0.0001709510042618339,
      "loss": 0.4634,
      "step": 2160
    },
    {
      "epoch": 0.6485594237695078,
      "grad_norm": 0.1408921331167221,
      "learning_rate": 0.00017091408446699697,
      "loss": 0.4169,
      "step": 2161
    },
    {
      "epoch": 0.648859543817527,
      "grad_norm": 0.14489974081516266,
      "learning_rate": 0.00017087714521769415,
      "loss": 0.4642,
      "step": 2162
    },
    {
      "epoch": 0.6491596638655462,
      "grad_norm": 0.13579650223255157,
      "learning_rate": 0.00017084018652405923,
      "loss": 0.404,
      "step": 2163
    },
    {
      "epoch": 0.6494597839135654,
      "grad_norm": 0.13602407276630402,
      "learning_rate": 0.00017080320839623148,
      "loss": 0.4439,
      "step": 2164
    },
    {
      "epoch": 0.6497599039615847,
      "grad_norm": 0.14799612760543823,
      "learning_rate": 0.00017076621084435533,
      "loss": 0.463,
      "step": 2165
    },
    {
      "epoch": 0.6500600240096038,
      "grad_norm": 0.16625499725341797,
      "learning_rate": 0.00017072919387858072,
      "loss": 0.4916,
      "step": 2166
    },
    {
      "epoch": 0.6503601440576231,
      "grad_norm": 0.15468667447566986,
      "learning_rate": 0.0001706921575090628,
      "loss": 0.4974,
      "step": 2167
    },
    {
      "epoch": 0.6506602641056423,
      "grad_norm": 0.1400432586669922,
      "learning_rate": 0.00017065510174596202,
      "loss": 0.4565,
      "step": 2168
    },
    {
      "epoch": 0.6509603841536614,
      "grad_norm": 0.13010244071483612,
      "learning_rate": 0.00017061802659944428,
      "loss": 0.3803,
      "step": 2169
    },
    {
      "epoch": 0.6512605042016807,
      "grad_norm": 0.1378718614578247,
      "learning_rate": 0.00017058093207968067,
      "loss": 0.4357,
      "step": 2170
    },
    {
      "epoch": 0.6515606242496998,
      "grad_norm": 0.15193061530590057,
      "learning_rate": 0.00017054381819684767,
      "loss": 0.4692,
      "step": 2171
    },
    {
      "epoch": 0.6518607442977191,
      "grad_norm": 0.1514962762594223,
      "learning_rate": 0.00017050668496112705,
      "loss": 0.5058,
      "step": 2172
    },
    {
      "epoch": 0.6521608643457383,
      "grad_norm": 0.15576845407485962,
      "learning_rate": 0.00017046953238270587,
      "loss": 0.5009,
      "step": 2173
    },
    {
      "epoch": 0.6524609843937575,
      "grad_norm": 0.143166646361351,
      "learning_rate": 0.00017043236047177654,
      "loss": 0.4734,
      "step": 2174
    },
    {
      "epoch": 0.6527611044417767,
      "grad_norm": 0.1380712240934372,
      "learning_rate": 0.00017039516923853673,
      "loss": 0.4174,
      "step": 2175
    },
    {
      "epoch": 0.6530612244897959,
      "grad_norm": 0.17489846050739288,
      "learning_rate": 0.00017035795869318942,
      "loss": 0.51,
      "step": 2176
    },
    {
      "epoch": 0.6533613445378151,
      "grad_norm": 0.16089336574077606,
      "learning_rate": 0.00017032072884594292,
      "loss": 0.5001,
      "step": 2177
    },
    {
      "epoch": 0.6536614645858343,
      "grad_norm": 0.13599681854248047,
      "learning_rate": 0.00017028347970701085,
      "loss": 0.4047,
      "step": 2178
    },
    {
      "epoch": 0.6539615846338536,
      "grad_norm": 0.14149728417396545,
      "learning_rate": 0.00017024621128661197,
      "loss": 0.4526,
      "step": 2179
    },
    {
      "epoch": 0.6542617046818727,
      "grad_norm": 0.14691421389579773,
      "learning_rate": 0.0001702089235949705,
      "loss": 0.4341,
      "step": 2180
    },
    {
      "epoch": 0.654561824729892,
      "grad_norm": 0.14323483407497406,
      "learning_rate": 0.00017017161664231593,
      "loss": 0.4982,
      "step": 2181
    },
    {
      "epoch": 0.6548619447779112,
      "grad_norm": 0.13906525075435638,
      "learning_rate": 0.00017013429043888297,
      "loss": 0.4486,
      "step": 2182
    },
    {
      "epoch": 0.6551620648259304,
      "grad_norm": 0.6826688051223755,
      "learning_rate": 0.0001700969449949116,
      "loss": 0.4892,
      "step": 2183
    },
    {
      "epoch": 0.6554621848739496,
      "grad_norm": 1.933110237121582,
      "learning_rate": 0.00017005958032064716,
      "loss": 0.5188,
      "step": 2184
    },
    {
      "epoch": 0.6557623049219687,
      "grad_norm": 0.13388942182064056,
      "learning_rate": 0.00017002219642634015,
      "loss": 0.42,
      "step": 2185
    },
    {
      "epoch": 0.656062424969988,
      "grad_norm": 0.39405903220176697,
      "learning_rate": 0.00016998479332224655,
      "loss": 0.4892,
      "step": 2186
    },
    {
      "epoch": 0.6563625450180072,
      "grad_norm": 0.20359550416469574,
      "learning_rate": 0.00016994737101862735,
      "loss": 0.4792,
      "step": 2187
    },
    {
      "epoch": 0.6566626650660264,
      "grad_norm": 0.1704937219619751,
      "learning_rate": 0.00016990992952574895,
      "loss": 0.5432,
      "step": 2188
    },
    {
      "epoch": 0.6569627851140456,
      "grad_norm": 0.15303505957126617,
      "learning_rate": 0.00016987246885388305,
      "loss": 0.4766,
      "step": 2189
    },
    {
      "epoch": 0.6572629051620649,
      "grad_norm": 0.15986792743206024,
      "learning_rate": 0.0001698349890133065,
      "loss": 0.4634,
      "step": 2190
    },
    {
      "epoch": 0.657563025210084,
      "grad_norm": 0.1516830176115036,
      "learning_rate": 0.0001697974900143015,
      "loss": 0.4386,
      "step": 2191
    },
    {
      "epoch": 0.6578631452581032,
      "grad_norm": 0.17122162878513336,
      "learning_rate": 0.00016975997186715542,
      "loss": 0.4553,
      "step": 2192
    },
    {
      "epoch": 0.6581632653061225,
      "grad_norm": 0.16046889126300812,
      "learning_rate": 0.000169722434582161,
      "loss": 0.4854,
      "step": 2193
    },
    {
      "epoch": 0.6584633853541416,
      "grad_norm": 0.18766555190086365,
      "learning_rate": 0.00016968487816961612,
      "loss": 0.4352,
      "step": 2194
    },
    {
      "epoch": 0.6587635054021609,
      "grad_norm": 0.24515730142593384,
      "learning_rate": 0.00016964730263982394,
      "loss": 0.532,
      "step": 2195
    },
    {
      "epoch": 0.65906362545018,
      "grad_norm": 0.18200074136257172,
      "learning_rate": 0.00016960970800309293,
      "loss": 0.517,
      "step": 2196
    },
    {
      "epoch": 0.6593637454981993,
      "grad_norm": 0.16878826916217804,
      "learning_rate": 0.0001695720942697367,
      "loss": 0.4512,
      "step": 2197
    },
    {
      "epoch": 0.6596638655462185,
      "grad_norm": 0.16377224028110504,
      "learning_rate": 0.00016953446145007416,
      "loss": 0.4876,
      "step": 2198
    },
    {
      "epoch": 0.6599639855942377,
      "grad_norm": 0.15861105918884277,
      "learning_rate": 0.00016949680955442945,
      "loss": 0.491,
      "step": 2199
    },
    {
      "epoch": 0.6602641056422569,
      "grad_norm": 0.14829066395759583,
      "learning_rate": 0.0001694591385931319,
      "loss": 0.4204,
      "step": 2200
    },
    {
      "epoch": 0.6605642256902761,
      "grad_norm": 0.1616215854883194,
      "learning_rate": 0.0001694214485765162,
      "loss": 0.4864,
      "step": 2201
    },
    {
      "epoch": 0.6608643457382953,
      "grad_norm": 0.1581946164369583,
      "learning_rate": 0.00016938373951492206,
      "loss": 0.5292,
      "step": 2202
    },
    {
      "epoch": 0.6611644657863145,
      "grad_norm": 0.15371695160865784,
      "learning_rate": 0.00016934601141869456,
      "loss": 0.4524,
      "step": 2203
    },
    {
      "epoch": 0.6614645858343338,
      "grad_norm": 0.15436139702796936,
      "learning_rate": 0.00016930826429818405,
      "loss": 0.4704,
      "step": 2204
    },
    {
      "epoch": 0.6617647058823529,
      "grad_norm": 0.1531285047531128,
      "learning_rate": 0.00016927049816374594,
      "loss": 0.4567,
      "step": 2205
    },
    {
      "epoch": 0.6620648259303722,
      "grad_norm": 0.15854240953922272,
      "learning_rate": 0.00016923271302574094,
      "loss": 0.4497,
      "step": 2206
    },
    {
      "epoch": 0.6623649459783914,
      "grad_norm": 0.15710663795471191,
      "learning_rate": 0.000169194908894535,
      "loss": 0.5279,
      "step": 2207
    },
    {
      "epoch": 0.6626650660264105,
      "grad_norm": 0.15883848071098328,
      "learning_rate": 0.0001691570857804992,
      "loss": 0.4974,
      "step": 2208
    },
    {
      "epoch": 0.6629651860744298,
      "grad_norm": 0.21286913752555847,
      "learning_rate": 0.00016911924369400992,
      "loss": 0.4326,
      "step": 2209
    },
    {
      "epoch": 0.6632653061224489,
      "grad_norm": 0.15384641289710999,
      "learning_rate": 0.00016908138264544874,
      "loss": 0.5119,
      "step": 2210
    },
    {
      "epoch": 0.6635654261704682,
      "grad_norm": 0.16249476373195648,
      "learning_rate": 0.00016904350264520233,
      "loss": 0.5084,
      "step": 2211
    },
    {
      "epoch": 0.6638655462184874,
      "grad_norm": 0.1680646389722824,
      "learning_rate": 0.00016900560370366265,
      "loss": 0.5169,
      "step": 2212
    },
    {
      "epoch": 0.6641656662665066,
      "grad_norm": 0.1526922881603241,
      "learning_rate": 0.0001689676858312269,
      "loss": 0.4992,
      "step": 2213
    },
    {
      "epoch": 0.6644657863145258,
      "grad_norm": 0.4710622727870941,
      "learning_rate": 0.0001689297490382973,
      "loss": 0.464,
      "step": 2214
    },
    {
      "epoch": 0.6647659063625451,
      "grad_norm": 0.1634788066148758,
      "learning_rate": 0.00016889179333528148,
      "loss": 0.4725,
      "step": 2215
    },
    {
      "epoch": 0.6650660264105642,
      "grad_norm": 0.15752537548542023,
      "learning_rate": 0.0001688538187325921,
      "loss": 0.4526,
      "step": 2216
    },
    {
      "epoch": 0.6653661464585834,
      "grad_norm": 0.15170423686504364,
      "learning_rate": 0.00016881582524064706,
      "loss": 0.4586,
      "step": 2217
    },
    {
      "epoch": 0.6656662665066027,
      "grad_norm": 0.1426185667514801,
      "learning_rate": 0.00016877781286986948,
      "loss": 0.4159,
      "step": 2218
    },
    {
      "epoch": 0.6659663865546218,
      "grad_norm": 0.15381565690040588,
      "learning_rate": 0.00016873978163068753,
      "loss": 0.4793,
      "step": 2219
    },
    {
      "epoch": 0.6662665066026411,
      "grad_norm": 0.1260295808315277,
      "learning_rate": 0.00016870173153353478,
      "loss": 0.3845,
      "step": 2220
    },
    {
      "epoch": 0.6665666266506602,
      "grad_norm": 0.1446010321378708,
      "learning_rate": 0.00016866366258884967,
      "loss": 0.448,
      "step": 2221
    },
    {
      "epoch": 0.6668667466986795,
      "grad_norm": 0.18416649103164673,
      "learning_rate": 0.00016862557480707612,
      "loss": 0.4565,
      "step": 2222
    },
    {
      "epoch": 0.6671668667466987,
      "grad_norm": 0.15086296200752258,
      "learning_rate": 0.00016858746819866302,
      "loss": 0.4724,
      "step": 2223
    },
    {
      "epoch": 0.6674669867947179,
      "grad_norm": 0.15301254391670227,
      "learning_rate": 0.00016854934277406446,
      "loss": 0.5115,
      "step": 2224
    },
    {
      "epoch": 0.6677671068427371,
      "grad_norm": 0.24658562242984772,
      "learning_rate": 0.00016851119854373976,
      "loss": 0.5038,
      "step": 2225
    },
    {
      "epoch": 0.6680672268907563,
      "grad_norm": 0.15066738426685333,
      "learning_rate": 0.00016847303551815332,
      "loss": 0.453,
      "step": 2226
    },
    {
      "epoch": 0.6683673469387755,
      "grad_norm": 0.17413273453712463,
      "learning_rate": 0.00016843485370777474,
      "loss": 0.4851,
      "step": 2227
    },
    {
      "epoch": 0.6686674669867947,
      "grad_norm": 0.14235296845436096,
      "learning_rate": 0.00016839665312307878,
      "loss": 0.4635,
      "step": 2228
    },
    {
      "epoch": 0.668967587034814,
      "grad_norm": 0.15093177556991577,
      "learning_rate": 0.00016835843377454527,
      "loss": 0.48,
      "step": 2229
    },
    {
      "epoch": 0.6692677070828331,
      "grad_norm": 0.1457367092370987,
      "learning_rate": 0.0001683201956726593,
      "loss": 0.4722,
      "step": 2230
    },
    {
      "epoch": 0.6695678271308524,
      "grad_norm": 0.17020396888256073,
      "learning_rate": 0.00016828193882791108,
      "loss": 0.4627,
      "step": 2231
    },
    {
      "epoch": 0.6698679471788715,
      "grad_norm": 0.15534807741641998,
      "learning_rate": 0.0001682436632507959,
      "loss": 0.5646,
      "step": 2232
    },
    {
      "epoch": 0.6701680672268907,
      "grad_norm": 0.14641976356506348,
      "learning_rate": 0.0001682053689518142,
      "loss": 0.5024,
      "step": 2233
    },
    {
      "epoch": 0.67046818727491,
      "grad_norm": 0.15433022379875183,
      "learning_rate": 0.0001681670559414716,
      "loss": 0.494,
      "step": 2234
    },
    {
      "epoch": 0.6707683073229291,
      "grad_norm": 0.1448221504688263,
      "learning_rate": 0.00016812872423027888,
      "loss": 0.4797,
      "step": 2235
    },
    {
      "epoch": 0.6710684273709484,
      "grad_norm": 0.13265001773834229,
      "learning_rate": 0.00016809037382875186,
      "loss": 0.4194,
      "step": 2236
    },
    {
      "epoch": 0.6713685474189676,
      "grad_norm": 0.24117717146873474,
      "learning_rate": 0.00016805200474741155,
      "loss": 0.5215,
      "step": 2237
    },
    {
      "epoch": 0.6716686674669868,
      "grad_norm": 0.17228154838085175,
      "learning_rate": 0.00016801361699678405,
      "loss": 0.4976,
      "step": 2238
    },
    {
      "epoch": 0.671968787515006,
      "grad_norm": 0.13660216331481934,
      "learning_rate": 0.00016797521058740063,
      "loss": 0.4128,
      "step": 2239
    },
    {
      "epoch": 0.6722689075630253,
      "grad_norm": 0.21866540610790253,
      "learning_rate": 0.0001679367855297976,
      "loss": 0.4777,
      "step": 2240
    },
    {
      "epoch": 0.6725690276110444,
      "grad_norm": 0.14396953582763672,
      "learning_rate": 0.00016789834183451646,
      "loss": 0.4473,
      "step": 2241
    },
    {
      "epoch": 0.6728691476590636,
      "grad_norm": 0.2414744794368744,
      "learning_rate": 0.00016785987951210385,
      "loss": 0.4548,
      "step": 2242
    },
    {
      "epoch": 0.6731692677070829,
      "grad_norm": 0.1470125913619995,
      "learning_rate": 0.00016782139857311136,
      "loss": 0.4366,
      "step": 2243
    },
    {
      "epoch": 0.673469387755102,
      "grad_norm": 0.13129663467407227,
      "learning_rate": 0.0001677828990280959,
      "loss": 0.4144,
      "step": 2244
    },
    {
      "epoch": 0.6737695078031213,
      "grad_norm": 0.14611317217350006,
      "learning_rate": 0.00016774438088761927,
      "loss": 0.4635,
      "step": 2245
    },
    {
      "epoch": 0.6740696278511404,
      "grad_norm": 0.13981489837169647,
      "learning_rate": 0.0001677058441622486,
      "loss": 0.467,
      "step": 2246
    },
    {
      "epoch": 0.6743697478991597,
      "grad_norm": 0.13604478538036346,
      "learning_rate": 0.00016766728886255592,
      "loss": 0.4421,
      "step": 2247
    },
    {
      "epoch": 0.6746698679471789,
      "grad_norm": 0.1460788995027542,
      "learning_rate": 0.00016762871499911844,
      "loss": 0.476,
      "step": 2248
    },
    {
      "epoch": 0.674969987995198,
      "grad_norm": 0.13810686767101288,
      "learning_rate": 0.0001675901225825185,
      "loss": 0.4452,
      "step": 2249
    },
    {
      "epoch": 0.6752701080432173,
      "grad_norm": 0.15245585143566132,
      "learning_rate": 0.0001675515116233434,
      "loss": 0.4814,
      "step": 2250
    },
    {
      "epoch": 0.6755702280912365,
      "grad_norm": 0.13995683193206787,
      "learning_rate": 0.00016751288213218572,
      "loss": 0.4371,
      "step": 2251
    },
    {
      "epoch": 0.6758703481392557,
      "grad_norm": 0.14143361151218414,
      "learning_rate": 0.00016747423411964295,
      "loss": 0.469,
      "step": 2252
    },
    {
      "epoch": 0.6761704681872749,
      "grad_norm": 0.13966970145702362,
      "learning_rate": 0.00016743556759631778,
      "loss": 0.4406,
      "step": 2253
    },
    {
      "epoch": 0.6764705882352942,
      "grad_norm": 0.13671116530895233,
      "learning_rate": 0.00016739688257281788,
      "loss": 0.4137,
      "step": 2254
    },
    {
      "epoch": 0.6767707082833133,
      "grad_norm": 0.13782845437526703,
      "learning_rate": 0.00016735817905975603,
      "loss": 0.4323,
      "step": 2255
    },
    {
      "epoch": 0.6770708283313326,
      "grad_norm": 0.15411385893821716,
      "learning_rate": 0.00016731945706775013,
      "loss": 0.482,
      "step": 2256
    },
    {
      "epoch": 0.6773709483793517,
      "grad_norm": 0.15254433453083038,
      "learning_rate": 0.00016728071660742315,
      "loss": 0.4975,
      "step": 2257
    },
    {
      "epoch": 0.6776710684273709,
      "grad_norm": 0.14145848155021667,
      "learning_rate": 0.000167241957689403,
      "loss": 0.4492,
      "step": 2258
    },
    {
      "epoch": 0.6779711884753902,
      "grad_norm": 0.14561478793621063,
      "learning_rate": 0.0001672031803243228,
      "loss": 0.4613,
      "step": 2259
    },
    {
      "epoch": 0.6782713085234093,
      "grad_norm": 0.14410771429538727,
      "learning_rate": 0.0001671643845228207,
      "loss": 0.4447,
      "step": 2260
    },
    {
      "epoch": 0.6785714285714286,
      "grad_norm": 0.14435681700706482,
      "learning_rate": 0.00016712557029553983,
      "loss": 0.4498,
      "step": 2261
    },
    {
      "epoch": 0.6788715486194478,
      "grad_norm": 0.1489761620759964,
      "learning_rate": 0.00016708673765312849,
      "loss": 0.4406,
      "step": 2262
    },
    {
      "epoch": 0.679171668667467,
      "grad_norm": 0.14680323004722595,
      "learning_rate": 0.00016704788660623987,
      "loss": 0.4957,
      "step": 2263
    },
    {
      "epoch": 0.6794717887154862,
      "grad_norm": 0.14683204889297485,
      "learning_rate": 0.0001670090171655324,
      "loss": 0.4304,
      "step": 2264
    },
    {
      "epoch": 0.6797719087635054,
      "grad_norm": 0.14627540111541748,
      "learning_rate": 0.00016697012934166944,
      "loss": 0.4955,
      "step": 2265
    },
    {
      "epoch": 0.6800720288115246,
      "grad_norm": 0.16079705953598022,
      "learning_rate": 0.0001669312231453194,
      "loss": 0.4412,
      "step": 2266
    },
    {
      "epoch": 0.6803721488595438,
      "grad_norm": 0.14432165026664734,
      "learning_rate": 0.00016689229858715576,
      "loss": 0.4334,
      "step": 2267
    },
    {
      "epoch": 0.680672268907563,
      "grad_norm": 0.14839954674243927,
      "learning_rate": 0.00016685335567785705,
      "loss": 0.4511,
      "step": 2268
    },
    {
      "epoch": 0.6809723889555822,
      "grad_norm": 0.14657177031040192,
      "learning_rate": 0.00016681439442810679,
      "loss": 0.4664,
      "step": 2269
    },
    {
      "epoch": 0.6812725090036015,
      "grad_norm": 0.14277929067611694,
      "learning_rate": 0.00016677541484859352,
      "loss": 0.4895,
      "step": 2270
    },
    {
      "epoch": 0.6815726290516206,
      "grad_norm": 0.14854009449481964,
      "learning_rate": 0.00016673641695001088,
      "loss": 0.4802,
      "step": 2271
    },
    {
      "epoch": 0.6818727490996399,
      "grad_norm": 0.21721404790878296,
      "learning_rate": 0.00016669740074305752,
      "loss": 0.4721,
      "step": 2272
    },
    {
      "epoch": 0.6821728691476591,
      "grad_norm": 0.14389510452747345,
      "learning_rate": 0.00016665836623843701,
      "loss": 0.4618,
      "step": 2273
    },
    {
      "epoch": 0.6824729891956782,
      "grad_norm": 0.15691958367824554,
      "learning_rate": 0.0001666193134468581,
      "loss": 0.4687,
      "step": 2274
    },
    {
      "epoch": 0.6827731092436975,
      "grad_norm": 0.14597022533416748,
      "learning_rate": 0.00016658024237903445,
      "loss": 0.4747,
      "step": 2275
    },
    {
      "epoch": 0.6830732292917167,
      "grad_norm": 0.15653330087661743,
      "learning_rate": 0.00016654115304568472,
      "loss": 0.4877,
      "step": 2276
    },
    {
      "epoch": 0.6833733493397359,
      "grad_norm": 0.13929207623004913,
      "learning_rate": 0.00016650204545753266,
      "loss": 0.4856,
      "step": 2277
    },
    {
      "epoch": 0.6836734693877551,
      "grad_norm": 0.3877395987510681,
      "learning_rate": 0.000166462919625307,
      "loss": 0.4308,
      "step": 2278
    },
    {
      "epoch": 0.6839735894357744,
      "grad_norm": 0.1269235759973526,
      "learning_rate": 0.00016642377555974142,
      "loss": 0.4114,
      "step": 2279
    },
    {
      "epoch": 0.6842737094837935,
      "grad_norm": 0.1468934714794159,
      "learning_rate": 0.0001663846132715747,
      "loss": 0.5039,
      "step": 2280
    },
    {
      "epoch": 0.6845738295318127,
      "grad_norm": 0.14282235503196716,
      "learning_rate": 0.0001663454327715505,
      "loss": 0.4331,
      "step": 2281
    },
    {
      "epoch": 0.6848739495798319,
      "grad_norm": 0.15750561654567719,
      "learning_rate": 0.00016630623407041758,
      "loss": 0.4619,
      "step": 2282
    },
    {
      "epoch": 0.6851740696278511,
      "grad_norm": 0.18510620296001434,
      "learning_rate": 0.0001662670171789297,
      "loss": 0.4194,
      "step": 2283
    },
    {
      "epoch": 0.6854741896758704,
      "grad_norm": 0.15529020130634308,
      "learning_rate": 0.00016622778210784547,
      "loss": 0.4733,
      "step": 2284
    },
    {
      "epoch": 0.6857743097238895,
      "grad_norm": 0.16339364647865295,
      "learning_rate": 0.00016618852886792862,
      "loss": 0.4461,
      "step": 2285
    },
    {
      "epoch": 0.6860744297719088,
      "grad_norm": 0.14887449145317078,
      "learning_rate": 0.00016614925746994783,
      "loss": 0.4122,
      "step": 2286
    },
    {
      "epoch": 0.686374549819928,
      "grad_norm": 0.17690427601337433,
      "learning_rate": 0.00016610996792467677,
      "loss": 0.4675,
      "step": 2287
    },
    {
      "epoch": 0.6866746698679472,
      "grad_norm": 0.14621487259864807,
      "learning_rate": 0.00016607066024289404,
      "loss": 0.5069,
      "step": 2288
    },
    {
      "epoch": 0.6869747899159664,
      "grad_norm": 0.14996911585330963,
      "learning_rate": 0.0001660313344353833,
      "loss": 0.4516,
      "step": 2289
    },
    {
      "epoch": 0.6872749099639855,
      "grad_norm": 0.1662423014640808,
      "learning_rate": 0.00016599199051293314,
      "loss": 0.459,
      "step": 2290
    },
    {
      "epoch": 0.6875750300120048,
      "grad_norm": 0.19129732251167297,
      "learning_rate": 0.00016595262848633703,
      "loss": 0.4695,
      "step": 2291
    },
    {
      "epoch": 0.687875150060024,
      "grad_norm": 0.14279800653457642,
      "learning_rate": 0.00016591324836639357,
      "loss": 0.4337,
      "step": 2292
    },
    {
      "epoch": 0.6881752701080432,
      "grad_norm": 0.15134789049625397,
      "learning_rate": 0.00016587385016390624,
      "loss": 0.4685,
      "step": 2293
    },
    {
      "epoch": 0.6884753901560624,
      "grad_norm": 0.20491233468055725,
      "learning_rate": 0.00016583443388968344,
      "loss": 0.4921,
      "step": 2294
    },
    {
      "epoch": 0.6887755102040817,
      "grad_norm": 0.1463315188884735,
      "learning_rate": 0.00016579499955453865,
      "loss": 0.469,
      "step": 2295
    },
    {
      "epoch": 0.6890756302521008,
      "grad_norm": 0.2550693452358246,
      "learning_rate": 0.00016575554716929012,
      "loss": 0.4493,
      "step": 2296
    },
    {
      "epoch": 0.68937575030012,
      "grad_norm": 0.26226985454559326,
      "learning_rate": 0.0001657160767447612,
      "loss": 0.4992,
      "step": 2297
    },
    {
      "epoch": 0.6896758703481393,
      "grad_norm": 0.1389845758676529,
      "learning_rate": 0.00016567658829178022,
      "loss": 0.4366,
      "step": 2298
    },
    {
      "epoch": 0.6899759903961584,
      "grad_norm": 0.1418905258178711,
      "learning_rate": 0.0001656370818211803,
      "loss": 0.4513,
      "step": 2299
    },
    {
      "epoch": 0.6902761104441777,
      "grad_norm": 0.13269588351249695,
      "learning_rate": 0.0001655975573437996,
      "loss": 0.4409,
      "step": 2300
    },
    {
      "epoch": 0.6905762304921969,
      "grad_norm": 0.17201949656009674,
      "learning_rate": 0.00016555801487048126,
      "loss": 0.5256,
      "step": 2301
    },
    {
      "epoch": 0.6908763505402161,
      "grad_norm": 0.1453656107187271,
      "learning_rate": 0.00016551845441207326,
      "loss": 0.474,
      "step": 2302
    },
    {
      "epoch": 0.6911764705882353,
      "grad_norm": 0.1528858244419098,
      "learning_rate": 0.00016547887597942855,
      "loss": 0.4822,
      "step": 2303
    },
    {
      "epoch": 0.6914765906362546,
      "grad_norm": 0.14635047316551208,
      "learning_rate": 0.00016543927958340504,
      "loss": 0.4534,
      "step": 2304
    },
    {
      "epoch": 0.6917767106842737,
      "grad_norm": 0.1443641185760498,
      "learning_rate": 0.00016539966523486553,
      "loss": 0.4873,
      "step": 2305
    },
    {
      "epoch": 0.6920768307322929,
      "grad_norm": 0.24002930521965027,
      "learning_rate": 0.00016536003294467778,
      "loss": 0.4502,
      "step": 2306
    },
    {
      "epoch": 0.6923769507803121,
      "grad_norm": 0.2631763219833374,
      "learning_rate": 0.00016532038272371445,
      "loss": 0.5258,
      "step": 2307
    },
    {
      "epoch": 0.6926770708283313,
      "grad_norm": 0.15520347654819489,
      "learning_rate": 0.0001652807145828531,
      "loss": 0.5068,
      "step": 2308
    },
    {
      "epoch": 0.6929771908763506,
      "grad_norm": 0.13766346871852875,
      "learning_rate": 0.0001652410285329763,
      "loss": 0.4309,
      "step": 2309
    },
    {
      "epoch": 0.6932773109243697,
      "grad_norm": 0.14334671199321747,
      "learning_rate": 0.0001652013245849714,
      "loss": 0.4534,
      "step": 2310
    },
    {
      "epoch": 0.693577430972389,
      "grad_norm": 0.1372639685869217,
      "learning_rate": 0.00016516160274973073,
      "loss": 0.4453,
      "step": 2311
    },
    {
      "epoch": 0.6938775510204082,
      "grad_norm": 0.15254907310009003,
      "learning_rate": 0.0001651218630381515,
      "loss": 0.4572,
      "step": 2312
    },
    {
      "epoch": 0.6941776710684273,
      "grad_norm": 3.143188238143921,
      "learning_rate": 0.0001650821054611359,
      "loss": 0.504,
      "step": 2313
    },
    {
      "epoch": 0.6944777911164466,
      "grad_norm": 0.15842002630233765,
      "learning_rate": 0.00016504233002959093,
      "loss": 0.5412,
      "step": 2314
    },
    {
      "epoch": 0.6947779111644657,
      "grad_norm": 0.9213683605194092,
      "learning_rate": 0.0001650025367544285,
      "loss": 0.413,
      "step": 2315
    },
    {
      "epoch": 0.695078031212485,
      "grad_norm": 0.13423404097557068,
      "learning_rate": 0.0001649627256465655,
      "loss": 0.4424,
      "step": 2316
    },
    {
      "epoch": 0.6953781512605042,
      "grad_norm": 0.14566202461719513,
      "learning_rate": 0.00016492289671692366,
      "loss": 0.4464,
      "step": 2317
    },
    {
      "epoch": 0.6956782713085234,
      "grad_norm": 0.1668601930141449,
      "learning_rate": 0.0001648830499764295,
      "loss": 0.4897,
      "step": 2318
    },
    {
      "epoch": 0.6959783913565426,
      "grad_norm": 0.16586680710315704,
      "learning_rate": 0.00016484318543601455,
      "loss": 0.481,
      "step": 2319
    },
    {
      "epoch": 0.6962785114045619,
      "grad_norm": 0.16026067733764648,
      "learning_rate": 0.00016480330310661523,
      "loss": 0.4539,
      "step": 2320
    },
    {
      "epoch": 0.696578631452581,
      "grad_norm": 0.1537049561738968,
      "learning_rate": 0.00016476340299917278,
      "loss": 0.4323,
      "step": 2321
    },
    {
      "epoch": 0.6968787515006002,
      "grad_norm": 0.1631222367286682,
      "learning_rate": 0.0001647234851246333,
      "loss": 0.4406,
      "step": 2322
    },
    {
      "epoch": 0.6971788715486195,
      "grad_norm": 0.18382146954536438,
      "learning_rate": 0.00016468354949394787,
      "loss": 0.4854,
      "step": 2323
    },
    {
      "epoch": 0.6974789915966386,
      "grad_norm": 0.1590641289949417,
      "learning_rate": 0.00016464359611807235,
      "loss": 0.413,
      "step": 2324
    },
    {
      "epoch": 0.6977791116446579,
      "grad_norm": 0.15298865735530853,
      "learning_rate": 0.00016460362500796746,
      "loss": 0.4587,
      "step": 2325
    },
    {
      "epoch": 0.698079231692677,
      "grad_norm": 0.20010384917259216,
      "learning_rate": 0.0001645636361745988,
      "loss": 0.4405,
      "step": 2326
    },
    {
      "epoch": 0.6983793517406963,
      "grad_norm": 0.1749500185251236,
      "learning_rate": 0.0001645236296289369,
      "loss": 0.5469,
      "step": 2327
    },
    {
      "epoch": 0.6986794717887155,
      "grad_norm": 0.1599074900150299,
      "learning_rate": 0.0001644836053819571,
      "loss": 0.453,
      "step": 2328
    },
    {
      "epoch": 0.6989795918367347,
      "grad_norm": 0.14947155117988586,
      "learning_rate": 0.00016444356344463953,
      "loss": 0.3807,
      "step": 2329
    },
    {
      "epoch": 0.6992797118847539,
      "grad_norm": 0.23378188908100128,
      "learning_rate": 0.00016440350382796929,
      "loss": 0.4932,
      "step": 2330
    },
    {
      "epoch": 0.6995798319327731,
      "grad_norm": 0.18306280672550201,
      "learning_rate": 0.00016436342654293625,
      "loss": 0.4734,
      "step": 2331
    },
    {
      "epoch": 0.6998799519807923,
      "grad_norm": 0.240435391664505,
      "learning_rate": 0.00016432333160053522,
      "loss": 0.4744,
      "step": 2332
    },
    {
      "epoch": 0.7001800720288115,
      "grad_norm": 0.17868775129318237,
      "learning_rate": 0.0001642832190117657,
      "loss": 0.4662,
      "step": 2333
    },
    {
      "epoch": 0.7004801920768308,
      "grad_norm": 0.14438675343990326,
      "learning_rate": 0.00016424308878763215,
      "loss": 0.4399,
      "step": 2334
    },
    {
      "epoch": 0.7007803121248499,
      "grad_norm": 0.1518152356147766,
      "learning_rate": 0.00016420294093914386,
      "loss": 0.5056,
      "step": 2335
    },
    {
      "epoch": 0.7010804321728692,
      "grad_norm": 0.1992632895708084,
      "learning_rate": 0.00016416277547731488,
      "loss": 0.4596,
      "step": 2336
    },
    {
      "epoch": 0.7013805522208884,
      "grad_norm": 0.15660887956619263,
      "learning_rate": 0.00016412259241316418,
      "loss": 0.4479,
      "step": 2337
    },
    {
      "epoch": 0.7016806722689075,
      "grad_norm": 0.14185617864131927,
      "learning_rate": 0.00016408239175771553,
      "loss": 0.4052,
      "step": 2338
    },
    {
      "epoch": 0.7019807923169268,
      "grad_norm": 0.14662165939807892,
      "learning_rate": 0.0001640421735219975,
      "loss": 0.4156,
      "step": 2339
    },
    {
      "epoch": 0.7022809123649459,
      "grad_norm": 0.16649018228054047,
      "learning_rate": 0.00016400193771704354,
      "loss": 0.546,
      "step": 2340
    },
    {
      "epoch": 0.7025810324129652,
      "grad_norm": 0.16269463300704956,
      "learning_rate": 0.00016396168435389184,
      "loss": 0.4766,
      "step": 2341
    },
    {
      "epoch": 0.7028811524609844,
      "grad_norm": 0.18485970795154572,
      "learning_rate": 0.00016392141344358544,
      "loss": 0.4581,
      "step": 2342
    },
    {
      "epoch": 0.7031812725090036,
      "grad_norm": 0.16991198062896729,
      "learning_rate": 0.00016388112499717225,
      "loss": 0.4065,
      "step": 2343
    },
    {
      "epoch": 0.7034813925570228,
      "grad_norm": 0.14863085746765137,
      "learning_rate": 0.0001638408190257049,
      "loss": 0.4582,
      "step": 2344
    },
    {
      "epoch": 0.7037815126050421,
      "grad_norm": 0.14156857132911682,
      "learning_rate": 0.00016380049554024086,
      "loss": 0.4515,
      "step": 2345
    },
    {
      "epoch": 0.7040816326530612,
      "grad_norm": 0.15150651335716248,
      "learning_rate": 0.00016376015455184245,
      "loss": 0.4547,
      "step": 2346
    },
    {
      "epoch": 0.7043817527010804,
      "grad_norm": 0.17717133462429047,
      "learning_rate": 0.0001637197960715768,
      "loss": 0.553,
      "step": 2347
    },
    {
      "epoch": 0.7046818727490997,
      "grad_norm": 0.1485554426908493,
      "learning_rate": 0.0001636794201105157,
      "loss": 0.4529,
      "step": 2348
    },
    {
      "epoch": 0.7049819927971188,
      "grad_norm": 0.1734079122543335,
      "learning_rate": 0.0001636390266797359,
      "loss": 0.4458,
      "step": 2349
    },
    {
      "epoch": 0.7052821128451381,
      "grad_norm": 0.1593354195356369,
      "learning_rate": 0.00016359861579031884,
      "loss": 0.4446,
      "step": 2350
    },
    {
      "epoch": 0.7055822328931572,
      "grad_norm": 0.16919587552547455,
      "learning_rate": 0.00016355818745335078,
      "loss": 0.5258,
      "step": 2351
    },
    {
      "epoch": 0.7058823529411765,
      "grad_norm": 0.1594027727842331,
      "learning_rate": 0.0001635177416799228,
      "loss": 0.4679,
      "step": 2352
    },
    {
      "epoch": 0.7061824729891957,
      "grad_norm": 0.14303286373615265,
      "learning_rate": 0.00016347727848113074,
      "loss": 0.4526,
      "step": 2353
    },
    {
      "epoch": 0.7064825930372148,
      "grad_norm": 0.1363154947757721,
      "learning_rate": 0.00016343679786807517,
      "loss": 0.4458,
      "step": 2354
    },
    {
      "epoch": 0.7067827130852341,
      "grad_norm": 0.14714987576007843,
      "learning_rate": 0.00016339629985186153,
      "loss": 0.4277,
      "step": 2355
    },
    {
      "epoch": 0.7070828331332533,
      "grad_norm": 0.15142428874969482,
      "learning_rate": 0.00016335578444359996,
      "loss": 0.5097,
      "step": 2356
    },
    {
      "epoch": 0.7073829531812725,
      "grad_norm": 0.15368203818798065,
      "learning_rate": 0.0001633152516544054,
      "loss": 0.4766,
      "step": 2357
    },
    {
      "epoch": 0.7076830732292917,
      "grad_norm": 0.14941073954105377,
      "learning_rate": 0.00016327470149539756,
      "loss": 0.4105,
      "step": 2358
    },
    {
      "epoch": 0.707983193277311,
      "grad_norm": 0.16012686491012573,
      "learning_rate": 0.0001632341339777009,
      "loss": 0.5073,
      "step": 2359
    },
    {
      "epoch": 0.7082833133253301,
      "grad_norm": 0.13656701147556305,
      "learning_rate": 0.00016319354911244468,
      "loss": 0.4195,
      "step": 2360
    },
    {
      "epoch": 0.7085834333733494,
      "grad_norm": 0.1558884233236313,
      "learning_rate": 0.0001631529469107629,
      "loss": 0.4813,
      "step": 2361
    },
    {
      "epoch": 0.7088835534213686,
      "grad_norm": 0.19369877874851227,
      "learning_rate": 0.00016311232738379423,
      "loss": 0.4347,
      "step": 2362
    },
    {
      "epoch": 0.7091836734693877,
      "grad_norm": 0.16903385519981384,
      "learning_rate": 0.00016307169054268226,
      "loss": 0.4733,
      "step": 2363
    },
    {
      "epoch": 0.709483793517407,
      "grad_norm": 0.14185801148414612,
      "learning_rate": 0.00016303103639857519,
      "loss": 0.4464,
      "step": 2364
    },
    {
      "epoch": 0.7097839135654261,
      "grad_norm": 0.16600801050662994,
      "learning_rate": 0.000162990364962626,
      "loss": 0.5555,
      "step": 2365
    },
    {
      "epoch": 0.7100840336134454,
      "grad_norm": 0.14495305716991425,
      "learning_rate": 0.00016294967624599254,
      "loss": 0.4612,
      "step": 2366
    },
    {
      "epoch": 0.7103841536614646,
      "grad_norm": 0.18455393612384796,
      "learning_rate": 0.00016290897025983715,
      "loss": 0.539,
      "step": 2367
    },
    {
      "epoch": 0.7106842737094838,
      "grad_norm": 0.14817331731319427,
      "learning_rate": 0.00016286824701532718,
      "loss": 0.476,
      "step": 2368
    },
    {
      "epoch": 0.710984393757503,
      "grad_norm": 0.15020863711833954,
      "learning_rate": 0.00016282750652363447,
      "loss": 0.464,
      "step": 2369
    },
    {
      "epoch": 0.7112845138055222,
      "grad_norm": 0.16190224885940552,
      "learning_rate": 0.00016278674879593582,
      "loss": 0.4306,
      "step": 2370
    },
    {
      "epoch": 0.7115846338535414,
      "grad_norm": 0.15611281991004944,
      "learning_rate": 0.00016274597384341254,
      "loss": 0.4695,
      "step": 2371
    },
    {
      "epoch": 0.7118847539015606,
      "grad_norm": 0.1466979831457138,
      "learning_rate": 0.00016270518167725085,
      "loss": 0.4928,
      "step": 2372
    },
    {
      "epoch": 0.7121848739495799,
      "grad_norm": 0.14179770648479462,
      "learning_rate": 0.00016266437230864157,
      "loss": 0.4402,
      "step": 2373
    },
    {
      "epoch": 0.712484993997599,
      "grad_norm": 0.14757367968559265,
      "learning_rate": 0.0001626235457487803,
      "loss": 0.4456,
      "step": 2374
    },
    {
      "epoch": 0.7127851140456183,
      "grad_norm": 0.1571052223443985,
      "learning_rate": 0.0001625827020088673,
      "loss": 0.4685,
      "step": 2375
    },
    {
      "epoch": 0.7130852340936374,
      "grad_norm": 0.22218023240566254,
      "learning_rate": 0.00016254184110010765,
      "loss": 0.5559,
      "step": 2376
    },
    {
      "epoch": 0.7133853541416567,
      "grad_norm": 0.14846636354923248,
      "learning_rate": 0.00016250096303371104,
      "loss": 0.4649,
      "step": 2377
    },
    {
      "epoch": 0.7136854741896759,
      "grad_norm": 0.1457940936088562,
      "learning_rate": 0.00016246006782089187,
      "loss": 0.4535,
      "step": 2378
    },
    {
      "epoch": 0.713985594237695,
      "grad_norm": 0.1568482220172882,
      "learning_rate": 0.0001624191554728693,
      "loss": 0.5334,
      "step": 2379
    },
    {
      "epoch": 0.7142857142857143,
      "grad_norm": 0.14761097729206085,
      "learning_rate": 0.00016237822600086716,
      "loss": 0.4504,
      "step": 2380
    },
    {
      "epoch": 0.7145858343337335,
      "grad_norm": 0.17113368213176727,
      "learning_rate": 0.000162337279416114,
      "loss": 0.5158,
      "step": 2381
    },
    {
      "epoch": 0.7148859543817527,
      "grad_norm": 0.14863641560077667,
      "learning_rate": 0.00016229631572984302,
      "loss": 0.4488,
      "step": 2382
    },
    {
      "epoch": 0.7151860744297719,
      "grad_norm": 0.14860329031944275,
      "learning_rate": 0.00016225533495329214,
      "loss": 0.4718,
      "step": 2383
    },
    {
      "epoch": 0.7154861944777912,
      "grad_norm": 1.8711261749267578,
      "learning_rate": 0.00016221433709770396,
      "loss": 0.5577,
      "step": 2384
    },
    {
      "epoch": 0.7157863145258103,
      "grad_norm": 0.18025538325309753,
      "learning_rate": 0.0001621733221743258,
      "loss": 0.4975,
      "step": 2385
    },
    {
      "epoch": 0.7160864345738295,
      "grad_norm": 28.5136775970459,
      "learning_rate": 0.0001621322901944096,
      "loss": 2.9849,
      "step": 2386
    },
    {
      "epoch": 0.7163865546218487,
      "grad_norm": 1.3544706106185913,
      "learning_rate": 0.00016209124116921207,
      "loss": 0.5188,
      "step": 2387
    },
    {
      "epoch": 0.7166866746698679,
      "grad_norm": 0.16111868619918823,
      "learning_rate": 0.00016205017510999447,
      "loss": 0.4555,
      "step": 2388
    },
    {
      "epoch": 0.7169867947178872,
      "grad_norm": 0.16572299599647522,
      "learning_rate": 0.00016200909202802283,
      "loss": 0.4513,
      "step": 2389
    },
    {
      "epoch": 0.7172869147659063,
      "grad_norm": 0.1773393750190735,
      "learning_rate": 0.00016196799193456785,
      "loss": 0.4331,
      "step": 2390
    },
    {
      "epoch": 0.7175870348139256,
      "grad_norm": 0.4284209907054901,
      "learning_rate": 0.0001619268748409048,
      "loss": 0.4839,
      "step": 2391
    },
    {
      "epoch": 0.7178871548619448,
      "grad_norm": 0.1890149861574173,
      "learning_rate": 0.00016188574075831378,
      "loss": 0.5225,
      "step": 2392
    },
    {
      "epoch": 0.718187274909964,
      "grad_norm": 0.16966569423675537,
      "learning_rate": 0.00016184458969807937,
      "loss": 0.4281,
      "step": 2393
    },
    {
      "epoch": 0.7184873949579832,
      "grad_norm": 0.178705632686615,
      "learning_rate": 0.0001618034216714909,
      "loss": 0.5232,
      "step": 2394
    },
    {
      "epoch": 0.7187875150060024,
      "grad_norm": 0.16386286914348602,
      "learning_rate": 0.0001617622366898424,
      "loss": 0.4477,
      "step": 2395
    },
    {
      "epoch": 0.7190876350540216,
      "grad_norm": 0.17845234274864197,
      "learning_rate": 0.00016172103476443247,
      "loss": 0.514,
      "step": 2396
    },
    {
      "epoch": 0.7193877551020408,
      "grad_norm": 0.16308018565177917,
      "learning_rate": 0.00016167981590656437,
      "loss": 0.4479,
      "step": 2397
    },
    {
      "epoch": 0.71968787515006,
      "grad_norm": 0.15091225504875183,
      "learning_rate": 0.00016163858012754604,
      "loss": 0.4255,
      "step": 2398
    },
    {
      "epoch": 0.7199879951980792,
      "grad_norm": 0.17604851722717285,
      "learning_rate": 0.00016159732743869002,
      "loss": 0.4797,
      "step": 2399
    },
    {
      "epoch": 0.7202881152460985,
      "grad_norm": 0.15736879408359528,
      "learning_rate": 0.00016155605785131357,
      "loss": 0.4726,
      "step": 2400
    },
    {
      "epoch": 0.7205882352941176,
      "grad_norm": 0.16581600904464722,
      "learning_rate": 0.00016151477137673842,
      "loss": 0.478,
      "step": 2401
    },
    {
      "epoch": 0.7208883553421368,
      "grad_norm": 0.19278988242149353,
      "learning_rate": 0.00016147346802629115,
      "loss": 0.4809,
      "step": 2402
    },
    {
      "epoch": 0.7211884753901561,
      "grad_norm": 0.2418685406446457,
      "learning_rate": 0.00016143214781130284,
      "loss": 0.5177,
      "step": 2403
    },
    {
      "epoch": 0.7214885954381752,
      "grad_norm": 0.16705535352230072,
      "learning_rate": 0.00016139081074310915,
      "loss": 0.4942,
      "step": 2404
    },
    {
      "epoch": 0.7217887154861945,
      "grad_norm": 0.13845057785511017,
      "learning_rate": 0.00016134945683305048,
      "loss": 0.4137,
      "step": 2405
    },
    {
      "epoch": 0.7220888355342137,
      "grad_norm": 0.15267415344715118,
      "learning_rate": 0.0001613080860924718,
      "loss": 0.4332,
      "step": 2406
    },
    {
      "epoch": 0.7223889555822329,
      "grad_norm": 0.1513591855764389,
      "learning_rate": 0.00016126669853272274,
      "loss": 0.487,
      "step": 2407
    },
    {
      "epoch": 0.7226890756302521,
      "grad_norm": 0.2217532843351364,
      "learning_rate": 0.0001612252941651574,
      "loss": 0.4184,
      "step": 2408
    },
    {
      "epoch": 0.7229891956782714,
      "grad_norm": 0.2727909982204437,
      "learning_rate": 0.00016118387300113467,
      "loss": 0.4827,
      "step": 2409
    },
    {
      "epoch": 0.7232893157262905,
      "grad_norm": 0.14815635979175568,
      "learning_rate": 0.00016114243505201795,
      "loss": 0.4442,
      "step": 2410
    },
    {
      "epoch": 0.7235894357743097,
      "grad_norm": 0.15210844576358795,
      "learning_rate": 0.0001611009803291753,
      "loss": 0.4809,
      "step": 2411
    },
    {
      "epoch": 0.723889555822329,
      "grad_norm": 0.14424121379852295,
      "learning_rate": 0.00016105950884397926,
      "loss": 0.4478,
      "step": 2412
    },
    {
      "epoch": 0.7241896758703481,
      "grad_norm": 0.14106842875480652,
      "learning_rate": 0.00016101802060780712,
      "loss": 0.4512,
      "step": 2413
    },
    {
      "epoch": 0.7244897959183674,
      "grad_norm": 0.14604397118091583,
      "learning_rate": 0.0001609765156320407,
      "loss": 0.444,
      "step": 2414
    },
    {
      "epoch": 0.7247899159663865,
      "grad_norm": 0.1434173882007599,
      "learning_rate": 0.00016093499392806648,
      "loss": 0.4616,
      "step": 2415
    },
    {
      "epoch": 0.7250900360144058,
      "grad_norm": 0.15155497193336487,
      "learning_rate": 0.00016089345550727532,
      "loss": 0.5038,
      "step": 2416
    },
    {
      "epoch": 0.725390156062425,
      "grad_norm": 0.1462622880935669,
      "learning_rate": 0.00016085190038106292,
      "loss": 0.4598,
      "step": 2417
    },
    {
      "epoch": 0.7256902761104442,
      "grad_norm": 0.20126402378082275,
      "learning_rate": 0.00016081032856082937,
      "loss": 0.4401,
      "step": 2418
    },
    {
      "epoch": 0.7259903961584634,
      "grad_norm": 0.15513941645622253,
      "learning_rate": 0.00016076874005797951,
      "loss": 0.5003,
      "step": 2419
    },
    {
      "epoch": 0.7262905162064826,
      "grad_norm": 0.9181942939758301,
      "learning_rate": 0.0001607271348839226,
      "loss": 0.5313,
      "step": 2420
    },
    {
      "epoch": 0.7265906362545018,
      "grad_norm": 0.1356831192970276,
      "learning_rate": 0.00016068551305007254,
      "loss": 0.4011,
      "step": 2421
    },
    {
      "epoch": 0.726890756302521,
      "grad_norm": 0.13881300389766693,
      "learning_rate": 0.00016064387456784788,
      "loss": 0.4275,
      "step": 2422
    },
    {
      "epoch": 0.7271908763505402,
      "grad_norm": 0.15785065293312073,
      "learning_rate": 0.0001606022194486716,
      "loss": 0.4802,
      "step": 2423
    },
    {
      "epoch": 0.7274909963985594,
      "grad_norm": 0.17977090179920197,
      "learning_rate": 0.00016056054770397128,
      "loss": 0.472,
      "step": 2424
    },
    {
      "epoch": 0.7277911164465787,
      "grad_norm": 0.17413394153118134,
      "learning_rate": 0.00016051885934517917,
      "loss": 0.4735,
      "step": 2425
    },
    {
      "epoch": 0.7280912364945978,
      "grad_norm": 0.1621166616678238,
      "learning_rate": 0.00016047715438373192,
      "loss": 0.5128,
      "step": 2426
    },
    {
      "epoch": 0.728391356542617,
      "grad_norm": 0.16929030418395996,
      "learning_rate": 0.00016043543283107082,
      "loss": 0.5162,
      "step": 2427
    },
    {
      "epoch": 0.7286914765906363,
      "grad_norm": 0.15607744455337524,
      "learning_rate": 0.00016039369469864173,
      "loss": 0.4823,
      "step": 2428
    },
    {
      "epoch": 0.7289915966386554,
      "grad_norm": 0.1620553880929947,
      "learning_rate": 0.000160351939997895,
      "loss": 0.5065,
      "step": 2429
    },
    {
      "epoch": 0.7292917166866747,
      "grad_norm": 0.1509367674589157,
      "learning_rate": 0.00016031016874028557,
      "loss": 0.4166,
      "step": 2430
    },
    {
      "epoch": 0.7295918367346939,
      "grad_norm": 0.16851627826690674,
      "learning_rate": 0.0001602683809372729,
      "loss": 0.5493,
      "step": 2431
    },
    {
      "epoch": 0.7298919567827131,
      "grad_norm": 0.2047121524810791,
      "learning_rate": 0.00016022657660032098,
      "loss": 0.5213,
      "step": 2432
    },
    {
      "epoch": 0.7301920768307323,
      "grad_norm": 0.18215420842170715,
      "learning_rate": 0.00016018475574089837,
      "loss": 0.4492,
      "step": 2433
    },
    {
      "epoch": 0.7304921968787516,
      "grad_norm": 0.15045644342899323,
      "learning_rate": 0.00016014291837047813,
      "loss": 0.4588,
      "step": 2434
    },
    {
      "epoch": 0.7307923169267707,
      "grad_norm": 0.14964286983013153,
      "learning_rate": 0.00016010106450053786,
      "loss": 0.4541,
      "step": 2435
    },
    {
      "epoch": 0.7310924369747899,
      "grad_norm": 0.17790651321411133,
      "learning_rate": 0.00016005919414255972,
      "loss": 0.4132,
      "step": 2436
    },
    {
      "epoch": 0.7313925570228091,
      "grad_norm": 0.320291131734848,
      "learning_rate": 0.00016001730730803035,
      "loss": 0.536,
      "step": 2437
    },
    {
      "epoch": 0.7316926770708283,
      "grad_norm": 0.15852206945419312,
      "learning_rate": 0.00015997540400844088,
      "loss": 0.4806,
      "step": 2438
    },
    {
      "epoch": 0.7319927971188476,
      "grad_norm": 0.14510732889175415,
      "learning_rate": 0.00015993348425528704,
      "loss": 0.4662,
      "step": 2439
    },
    {
      "epoch": 0.7322929171668667,
      "grad_norm": 0.1684138923883438,
      "learning_rate": 0.00015989154806006904,
      "loss": 0.5278,
      "step": 2440
    },
    {
      "epoch": 0.732593037214886,
      "grad_norm": 0.1442684829235077,
      "learning_rate": 0.0001598495954342916,
      "loss": 0.4483,
      "step": 2441
    },
    {
      "epoch": 0.7328931572629052,
      "grad_norm": 0.14993314445018768,
      "learning_rate": 0.00015980762638946388,
      "loss": 0.4991,
      "step": 2442
    },
    {
      "epoch": 0.7331932773109243,
      "grad_norm": 0.14737346768379211,
      "learning_rate": 0.00015976564093709967,
      "loss": 0.447,
      "step": 2443
    },
    {
      "epoch": 0.7334933973589436,
      "grad_norm": 0.5534051060676575,
      "learning_rate": 0.00015972363908871716,
      "loss": 0.4782,
      "step": 2444
    },
    {
      "epoch": 0.7337935174069627,
      "grad_norm": 0.3656732439994812,
      "learning_rate": 0.00015968162085583913,
      "loss": 0.505,
      "step": 2445
    },
    {
      "epoch": 0.734093637454982,
      "grad_norm": 0.17494964599609375,
      "learning_rate": 0.00015963958624999275,
      "loss": 0.4748,
      "step": 2446
    },
    {
      "epoch": 0.7343937575030012,
      "grad_norm": 0.15481603145599365,
      "learning_rate": 0.00015959753528270971,
      "loss": 0.4708,
      "step": 2447
    },
    {
      "epoch": 0.7346938775510204,
      "grad_norm": 0.1475801169872284,
      "learning_rate": 0.00015955546796552627,
      "loss": 0.4227,
      "step": 2448
    },
    {
      "epoch": 0.7349939975990396,
      "grad_norm": 0.2968604564666748,
      "learning_rate": 0.0001595133843099831,
      "loss": 0.5031,
      "step": 2449
    },
    {
      "epoch": 0.7352941176470589,
      "grad_norm": 0.15884873270988464,
      "learning_rate": 0.00015947128432762536,
      "loss": 0.4467,
      "step": 2450
    },
    {
      "epoch": 0.735594237695078,
      "grad_norm": 0.1618734747171402,
      "learning_rate": 0.00015942916803000267,
      "loss": 0.4693,
      "step": 2451
    },
    {
      "epoch": 0.7358943577430972,
      "grad_norm": 0.1525658667087555,
      "learning_rate": 0.00015938703542866923,
      "loss": 0.4293,
      "step": 2452
    },
    {
      "epoch": 0.7361944777911165,
      "grad_norm": 0.1299402117729187,
      "learning_rate": 0.00015934488653518355,
      "loss": 0.3741,
      "step": 2453
    },
    {
      "epoch": 0.7364945978391356,
      "grad_norm": 0.16883011162281036,
      "learning_rate": 0.00015930272136110873,
      "loss": 0.4614,
      "step": 2454
    },
    {
      "epoch": 0.7367947178871549,
      "grad_norm": 0.39602482318878174,
      "learning_rate": 0.0001592605399180123,
      "loss": 0.4385,
      "step": 2455
    },
    {
      "epoch": 0.737094837935174,
      "grad_norm": 0.16625580191612244,
      "learning_rate": 0.0001592183422174663,
      "loss": 0.4745,
      "step": 2456
    },
    {
      "epoch": 0.7373949579831933,
      "grad_norm": 0.14766505360603333,
      "learning_rate": 0.0001591761282710471,
      "loss": 0.4177,
      "step": 2457
    },
    {
      "epoch": 0.7376950780312125,
      "grad_norm": 0.32073864340782166,
      "learning_rate": 0.00015913389809033568,
      "loss": 0.4803,
      "step": 2458
    },
    {
      "epoch": 0.7379951980792316,
      "grad_norm": 0.16001272201538086,
      "learning_rate": 0.0001590916516869174,
      "loss": 0.5038,
      "step": 2459
    },
    {
      "epoch": 0.7382953181272509,
      "grad_norm": 0.14552772045135498,
      "learning_rate": 0.00015904938907238206,
      "loss": 0.4837,
      "step": 2460
    },
    {
      "epoch": 0.7385954381752701,
      "grad_norm": 0.1509009599685669,
      "learning_rate": 0.0001590071102583239,
      "loss": 0.4703,
      "step": 2461
    },
    {
      "epoch": 0.7388955582232893,
      "grad_norm": 0.16939735412597656,
      "learning_rate": 0.00015896481525634162,
      "loss": 0.4524,
      "step": 2462
    },
    {
      "epoch": 0.7391956782713085,
      "grad_norm": 0.15040256083011627,
      "learning_rate": 0.00015892250407803843,
      "loss": 0.4833,
      "step": 2463
    },
    {
      "epoch": 0.7394957983193278,
      "grad_norm": 0.154825821518898,
      "learning_rate": 0.0001588801767350219,
      "loss": 0.4759,
      "step": 2464
    },
    {
      "epoch": 0.7397959183673469,
      "grad_norm": 0.6256540417671204,
      "learning_rate": 0.00015883783323890403,
      "loss": 0.4295,
      "step": 2465
    },
    {
      "epoch": 0.7400960384153662,
      "grad_norm": 1.0148793458938599,
      "learning_rate": 0.00015879547360130128,
      "loss": 0.5452,
      "step": 2466
    },
    {
      "epoch": 0.7403961584633854,
      "grad_norm": 0.14528141915798187,
      "learning_rate": 0.00015875309783383452,
      "loss": 0.4459,
      "step": 2467
    },
    {
      "epoch": 0.7406962785114045,
      "grad_norm": 0.14045490324497223,
      "learning_rate": 0.00015871070594812906,
      "loss": 0.4247,
      "step": 2468
    },
    {
      "epoch": 0.7409963985594238,
      "grad_norm": 0.14894379675388336,
      "learning_rate": 0.00015866829795581464,
      "loss": 0.4313,
      "step": 2469
    },
    {
      "epoch": 0.741296518607443,
      "grad_norm": 0.1819421797990799,
      "learning_rate": 0.00015862587386852541,
      "loss": 0.5641,
      "step": 2470
    },
    {
      "epoch": 0.7415966386554622,
      "grad_norm": 0.1688489019870758,
      "learning_rate": 0.00015858343369789992,
      "loss": 0.4838,
      "step": 2471
    },
    {
      "epoch": 0.7418967587034814,
      "grad_norm": 0.16753920912742615,
      "learning_rate": 0.00015854097745558114,
      "loss": 0.4528,
      "step": 2472
    },
    {
      "epoch": 0.7421968787515006,
      "grad_norm": 0.16875241696834564,
      "learning_rate": 0.00015849850515321648,
      "loss": 0.5313,
      "step": 2473
    },
    {
      "epoch": 0.7424969987995198,
      "grad_norm": 0.18420499563217163,
      "learning_rate": 0.00015845601680245766,
      "loss": 0.512,
      "step": 2474
    },
    {
      "epoch": 0.742797118847539,
      "grad_norm": 0.21641017496585846,
      "learning_rate": 0.000158413512414961,
      "loss": 0.4748,
      "step": 2475
    },
    {
      "epoch": 0.7430972388955582,
      "grad_norm": 0.15973563492298126,
      "learning_rate": 0.00015837099200238696,
      "loss": 0.4729,
      "step": 2476
    },
    {
      "epoch": 0.7433973589435774,
      "grad_norm": 0.3653205633163452,
      "learning_rate": 0.00015832845557640058,
      "loss": 0.459,
      "step": 2477
    },
    {
      "epoch": 0.7436974789915967,
      "grad_norm": 0.16915689408779144,
      "learning_rate": 0.00015828590314867125,
      "loss": 0.5008,
      "step": 2478
    },
    {
      "epoch": 0.7439975990396158,
      "grad_norm": 0.15779462456703186,
      "learning_rate": 0.00015824333473087276,
      "loss": 0.4087,
      "step": 2479
    },
    {
      "epoch": 0.7442977190876351,
      "grad_norm": 0.1613835096359253,
      "learning_rate": 0.0001582007503346832,
      "loss": 0.4711,
      "step": 2480
    },
    {
      "epoch": 0.7445978391356542,
      "grad_norm": 0.18008920550346375,
      "learning_rate": 0.00015815814997178514,
      "loss": 0.5015,
      "step": 2481
    },
    {
      "epoch": 0.7448979591836735,
      "grad_norm": 0.152873694896698,
      "learning_rate": 0.00015811553365386555,
      "loss": 0.4183,
      "step": 2482
    },
    {
      "epoch": 0.7451980792316927,
      "grad_norm": 0.16232550144195557,
      "learning_rate": 0.00015807290139261567,
      "loss": 0.5007,
      "step": 2483
    },
    {
      "epoch": 0.7454981992797118,
      "grad_norm": 0.19482393562793732,
      "learning_rate": 0.0001580302531997312,
      "loss": 0.5102,
      "step": 2484
    },
    {
      "epoch": 0.7457983193277311,
      "grad_norm": 0.15328121185302734,
      "learning_rate": 0.00015798758908691215,
      "loss": 0.4548,
      "step": 2485
    },
    {
      "epoch": 0.7460984393757503,
      "grad_norm": 0.1667821705341339,
      "learning_rate": 0.00015794490906586298,
      "loss": 0.5252,
      "step": 2486
    },
    {
      "epoch": 0.7463985594237695,
      "grad_norm": 0.16482432186603546,
      "learning_rate": 0.00015790221314829244,
      "loss": 0.487,
      "step": 2487
    },
    {
      "epoch": 0.7466986794717887,
      "grad_norm": 0.14885227382183075,
      "learning_rate": 0.00015785950134591364,
      "loss": 0.4354,
      "step": 2488
    },
    {
      "epoch": 0.746998799519808,
      "grad_norm": 0.1504472941160202,
      "learning_rate": 0.0001578167736704441,
      "loss": 0.4777,
      "step": 2489
    },
    {
      "epoch": 0.7472989195678271,
      "grad_norm": 0.17454247176647186,
      "learning_rate": 0.0001577740301336057,
      "loss": 0.5177,
      "step": 2490
    },
    {
      "epoch": 0.7475990396158463,
      "grad_norm": 0.16132484376430511,
      "learning_rate": 0.00015773127074712457,
      "loss": 0.4218,
      "step": 2491
    },
    {
      "epoch": 0.7478991596638656,
      "grad_norm": 0.1640845090150833,
      "learning_rate": 0.00015768849552273129,
      "loss": 0.4685,
      "step": 2492
    },
    {
      "epoch": 0.7481992797118847,
      "grad_norm": 0.15111713111400604,
      "learning_rate": 0.00015764570447216074,
      "loss": 0.4401,
      "step": 2493
    },
    {
      "epoch": 0.748499399759904,
      "grad_norm": 0.16630788147449493,
      "learning_rate": 0.0001576028976071522,
      "loss": 0.4573,
      "step": 2494
    },
    {
      "epoch": 0.7487995198079231,
      "grad_norm": 0.15859933197498322,
      "learning_rate": 0.0001575600749394492,
      "loss": 0.4777,
      "step": 2495
    },
    {
      "epoch": 0.7490996398559424,
      "grad_norm": 0.14920127391815186,
      "learning_rate": 0.00015751723648079965,
      "loss": 0.4593,
      "step": 2496
    },
    {
      "epoch": 0.7493997599039616,
      "grad_norm": 0.16223812103271484,
      "learning_rate": 0.0001574743822429558,
      "loss": 0.4911,
      "step": 2497
    },
    {
      "epoch": 0.7496998799519808,
      "grad_norm": 0.15160585939884186,
      "learning_rate": 0.00015743151223767424,
      "loss": 0.4731,
      "step": 2498
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.16430693864822388,
      "learning_rate": 0.0001573886264767158,
      "loss": 0.4952,
      "step": 2499
    },
    {
      "epoch": 0.7503001200480192,
      "grad_norm": 0.17609624564647675,
      "learning_rate": 0.00015734572497184577,
      "loss": 0.546,
      "step": 2500
    },
    {
      "epoch": 0.7506002400960384,
      "grad_norm": 0.3264397084712982,
      "learning_rate": 0.00015730280773483367,
      "loss": 0.4297,
      "step": 2501
    },
    {
      "epoch": 0.7509003601440576,
      "grad_norm": 0.16511526703834534,
      "learning_rate": 0.00015725987477745328,
      "loss": 0.5182,
      "step": 2502
    },
    {
      "epoch": 0.7512004801920769,
      "grad_norm": 0.17369025945663452,
      "learning_rate": 0.00015721692611148284,
      "loss": 0.4407,
      "step": 2503
    },
    {
      "epoch": 0.751500600240096,
      "grad_norm": 0.13990244269371033,
      "learning_rate": 0.00015717396174870483,
      "loss": 0.4365,
      "step": 2504
    },
    {
      "epoch": 0.7518007202881153,
      "grad_norm": 0.13970094919204712,
      "learning_rate": 0.00015713098170090599,
      "loss": 0.4663,
      "step": 2505
    },
    {
      "epoch": 0.7521008403361344,
      "grad_norm": 0.16043907403945923,
      "learning_rate": 0.00015708798597987742,
      "loss": 0.5133,
      "step": 2506
    },
    {
      "epoch": 0.7524009603841537,
      "grad_norm": 0.1567663550376892,
      "learning_rate": 0.00015704497459741447,
      "loss": 0.498,
      "step": 2507
    },
    {
      "epoch": 0.7527010804321729,
      "grad_norm": 0.21229980885982513,
      "learning_rate": 0.00015700194756531688,
      "loss": 0.4826,
      "step": 2508
    },
    {
      "epoch": 0.753001200480192,
      "grad_norm": 0.14606361091136932,
      "learning_rate": 0.0001569589048953886,
      "loss": 0.4145,
      "step": 2509
    },
    {
      "epoch": 0.7533013205282113,
      "grad_norm": 0.15010032057762146,
      "learning_rate": 0.00015691584659943786,
      "loss": 0.4501,
      "step": 2510
    },
    {
      "epoch": 0.7536014405762305,
      "grad_norm": 0.3521146774291992,
      "learning_rate": 0.00015687277268927724,
      "loss": 0.4969,
      "step": 2511
    },
    {
      "epoch": 0.7539015606242497,
      "grad_norm": 0.16045944392681122,
      "learning_rate": 0.0001568296831767236,
      "loss": 0.4624,
      "step": 2512
    },
    {
      "epoch": 0.7542016806722689,
      "grad_norm": 0.14147594571113586,
      "learning_rate": 0.000156786578073598,
      "loss": 0.4703,
      "step": 2513
    },
    {
      "epoch": 0.7545018007202882,
      "grad_norm": 0.15041673183441162,
      "learning_rate": 0.00015674345739172585,
      "loss": 0.4737,
      "step": 2514
    },
    {
      "epoch": 0.7548019207683073,
      "grad_norm": 0.15191854536533356,
      "learning_rate": 0.00015670032114293683,
      "loss": 0.5047,
      "step": 2515
    },
    {
      "epoch": 0.7551020408163265,
      "grad_norm": 0.18009275197982788,
      "learning_rate": 0.00015665716933906487,
      "loss": 0.4611,
      "step": 2516
    },
    {
      "epoch": 0.7554021608643458,
      "grad_norm": 0.17272666096687317,
      "learning_rate": 0.00015661400199194813,
      "loss": 0.4397,
      "step": 2517
    },
    {
      "epoch": 0.7557022809123649,
      "grad_norm": 0.1610168069601059,
      "learning_rate": 0.00015657081911342916,
      "loss": 0.4273,
      "step": 2518
    },
    {
      "epoch": 0.7560024009603842,
      "grad_norm": 0.1512984335422516,
      "learning_rate": 0.00015652762071535458,
      "loss": 0.4762,
      "step": 2519
    },
    {
      "epoch": 0.7563025210084033,
      "grad_norm": 0.15497024357318878,
      "learning_rate": 0.0001564844068095755,
      "loss": 0.5085,
      "step": 2520
    },
    {
      "epoch": 0.7566026410564226,
      "grad_norm": 0.16191035509109497,
      "learning_rate": 0.00015644117740794706,
      "loss": 0.4851,
      "step": 2521
    },
    {
      "epoch": 0.7569027611044418,
      "grad_norm": 0.18235939741134644,
      "learning_rate": 0.0001563979325223288,
      "loss": 0.4637,
      "step": 2522
    },
    {
      "epoch": 0.757202881152461,
      "grad_norm": 0.1426496058702469,
      "learning_rate": 0.00015635467216458445,
      "loss": 0.4371,
      "step": 2523
    },
    {
      "epoch": 0.7575030012004802,
      "grad_norm": 0.14401482045650482,
      "learning_rate": 0.00015631139634658195,
      "loss": 0.4089,
      "step": 2524
    },
    {
      "epoch": 0.7578031212484994,
      "grad_norm": 0.15074434876441956,
      "learning_rate": 0.0001562681050801936,
      "loss": 0.458,
      "step": 2525
    },
    {
      "epoch": 0.7581032412965186,
      "grad_norm": 0.14815548062324524,
      "learning_rate": 0.0001562247983772958,
      "loss": 0.47,
      "step": 2526
    },
    {
      "epoch": 0.7584033613445378,
      "grad_norm": 0.1517464518547058,
      "learning_rate": 0.0001561814762497693,
      "loss": 0.4807,
      "step": 2527
    },
    {
      "epoch": 0.758703481392557,
      "grad_norm": 0.14972248673439026,
      "learning_rate": 0.000156138138709499,
      "loss": 0.4332,
      "step": 2528
    },
    {
      "epoch": 0.7590036014405762,
      "grad_norm": 0.15079128742218018,
      "learning_rate": 0.00015609478576837402,
      "loss": 0.4871,
      "step": 2529
    },
    {
      "epoch": 0.7593037214885955,
      "grad_norm": 0.5011845231056213,
      "learning_rate": 0.0001560514174382878,
      "loss": 0.5338,
      "step": 2530
    },
    {
      "epoch": 0.7596038415366146,
      "grad_norm": 0.1483888030052185,
      "learning_rate": 0.00015600803373113796,
      "loss": 0.4487,
      "step": 2531
    },
    {
      "epoch": 0.7599039615846338,
      "grad_norm": 0.1570620834827423,
      "learning_rate": 0.00015596463465882622,
      "loss": 0.4682,
      "step": 2532
    },
    {
      "epoch": 0.7602040816326531,
      "grad_norm": 0.15329718589782715,
      "learning_rate": 0.0001559212202332587,
      "loss": 0.5032,
      "step": 2533
    },
    {
      "epoch": 0.7605042016806722,
      "grad_norm": 0.15335699915885925,
      "learning_rate": 0.00015587779046634568,
      "loss": 0.415,
      "step": 2534
    },
    {
      "epoch": 0.7608043217286915,
      "grad_norm": 0.15885575115680695,
      "learning_rate": 0.00015583434537000154,
      "loss": 0.465,
      "step": 2535
    },
    {
      "epoch": 0.7611044417767107,
      "grad_norm": 0.13733403384685516,
      "learning_rate": 0.00015579088495614498,
      "loss": 0.4428,
      "step": 2536
    },
    {
      "epoch": 0.7614045618247299,
      "grad_norm": 0.13970106840133667,
      "learning_rate": 0.00015574740923669886,
      "loss": 0.4248,
      "step": 2537
    },
    {
      "epoch": 0.7617046818727491,
      "grad_norm": 0.14858634769916534,
      "learning_rate": 0.00015570391822359024,
      "loss": 0.463,
      "step": 2538
    },
    {
      "epoch": 0.7620048019207684,
      "grad_norm": 0.15809981524944305,
      "learning_rate": 0.00015566041192875043,
      "loss": 0.4542,
      "step": 2539
    },
    {
      "epoch": 0.7623049219687875,
      "grad_norm": 0.1487644910812378,
      "learning_rate": 0.0001556168903641148,
      "loss": 0.4856,
      "step": 2540
    },
    {
      "epoch": 0.7626050420168067,
      "grad_norm": 0.14323757588863373,
      "learning_rate": 0.00015557335354162306,
      "loss": 0.4678,
      "step": 2541
    },
    {
      "epoch": 0.762905162064826,
      "grad_norm": 0.14858075976371765,
      "learning_rate": 0.00015552980147321902,
      "loss": 0.3994,
      "step": 2542
    },
    {
      "epoch": 0.7632052821128451,
      "grad_norm": 0.2091810554265976,
      "learning_rate": 0.00015548623417085063,
      "loss": 0.4711,
      "step": 2543
    },
    {
      "epoch": 0.7635054021608644,
      "grad_norm": 0.13712801039218903,
      "learning_rate": 0.00015544265164647018,
      "loss": 0.4273,
      "step": 2544
    },
    {
      "epoch": 0.7638055222088835,
      "grad_norm": 0.14469502866268158,
      "learning_rate": 0.00015539905391203398,
      "loss": 0.4167,
      "step": 2545
    },
    {
      "epoch": 0.7641056422569028,
      "grad_norm": 0.1433638483285904,
      "learning_rate": 0.00015535544097950257,
      "loss": 0.4609,
      "step": 2546
    },
    {
      "epoch": 0.764405762304922,
      "grad_norm": 0.4555927813053131,
      "learning_rate": 0.00015531181286084067,
      "loss": 0.502,
      "step": 2547
    },
    {
      "epoch": 0.7647058823529411,
      "grad_norm": 0.1536446213722229,
      "learning_rate": 0.00015526816956801714,
      "loss": 0.46,
      "step": 2548
    },
    {
      "epoch": 0.7650060024009604,
      "grad_norm": 0.15496781468391418,
      "learning_rate": 0.00015522451111300503,
      "loss": 0.5006,
      "step": 2549
    },
    {
      "epoch": 0.7653061224489796,
      "grad_norm": 0.18594437837600708,
      "learning_rate": 0.00015518083750778157,
      "loss": 0.4723,
      "step": 2550
    },
    {
      "epoch": 0.7656062424969988,
      "grad_norm": 0.16826999187469482,
      "learning_rate": 0.00015513714876432802,
      "loss": 0.5019,
      "step": 2551
    },
    {
      "epoch": 0.765906362545018,
      "grad_norm": 0.15920387208461761,
      "learning_rate": 0.00015509344489462995,
      "loss": 0.4593,
      "step": 2552
    },
    {
      "epoch": 0.7662064825930373,
      "grad_norm": 0.16303475201129913,
      "learning_rate": 0.00015504972591067704,
      "loss": 0.4888,
      "step": 2553
    },
    {
      "epoch": 0.7665066026410564,
      "grad_norm": 0.20474757254123688,
      "learning_rate": 0.00015500599182446305,
      "loss": 0.4948,
      "step": 2554
    },
    {
      "epoch": 0.7668067226890757,
      "grad_norm": 0.14326965808868408,
      "learning_rate": 0.0001549622426479859,
      "loss": 0.443,
      "step": 2555
    },
    {
      "epoch": 0.7671068427370948,
      "grad_norm": 0.16902682185173035,
      "learning_rate": 0.00015491847839324777,
      "loss": 0.4591,
      "step": 2556
    },
    {
      "epoch": 0.767406962785114,
      "grad_norm": 0.15285806357860565,
      "learning_rate": 0.00015487469907225475,
      "loss": 0.4428,
      "step": 2557
    },
    {
      "epoch": 0.7677070828331333,
      "grad_norm": 0.18961991369724274,
      "learning_rate": 0.0001548309046970173,
      "loss": 0.5089,
      "step": 2558
    },
    {
      "epoch": 0.7680072028811524,
      "grad_norm": 0.14731541275978088,
      "learning_rate": 0.00015478709527954986,
      "loss": 0.4718,
      "step": 2559
    },
    {
      "epoch": 0.7683073229291717,
      "grad_norm": 0.13845667243003845,
      "learning_rate": 0.00015474327083187105,
      "loss": 0.4403,
      "step": 2560
    },
    {
      "epoch": 0.7686074429771909,
      "grad_norm": 0.1612999141216278,
      "learning_rate": 0.00015469943136600366,
      "loss": 0.5089,
      "step": 2561
    },
    {
      "epoch": 0.7689075630252101,
      "grad_norm": 0.1368875503540039,
      "learning_rate": 0.00015465557689397442,
      "loss": 0.4383,
      "step": 2562
    },
    {
      "epoch": 0.7692076830732293,
      "grad_norm": 0.16148759424686432,
      "learning_rate": 0.00015461170742781438,
      "loss": 0.5166,
      "step": 2563
    },
    {
      "epoch": 0.7695078031212484,
      "grad_norm": 0.18068450689315796,
      "learning_rate": 0.00015456782297955865,
      "loss": 0.4748,
      "step": 2564
    },
    {
      "epoch": 0.7698079231692677,
      "grad_norm": 0.1479911208152771,
      "learning_rate": 0.00015452392356124638,
      "loss": 0.4648,
      "step": 2565
    },
    {
      "epoch": 0.7701080432172869,
      "grad_norm": 0.1680109202861786,
      "learning_rate": 0.00015448000918492086,
      "loss": 0.4356,
      "step": 2566
    },
    {
      "epoch": 0.7704081632653061,
      "grad_norm": 0.14404886960983276,
      "learning_rate": 0.00015443607986262957,
      "loss": 0.4426,
      "step": 2567
    },
    {
      "epoch": 0.7707082833133253,
      "grad_norm": 0.14656315743923187,
      "learning_rate": 0.00015439213560642393,
      "loss": 0.4328,
      "step": 2568
    },
    {
      "epoch": 0.7710084033613446,
      "grad_norm": 0.14843294024467468,
      "learning_rate": 0.00015434817642835959,
      "loss": 0.4031,
      "step": 2569
    },
    {
      "epoch": 0.7713085234093637,
      "grad_norm": 0.13767988979816437,
      "learning_rate": 0.00015430420234049624,
      "loss": 0.4166,
      "step": 2570
    },
    {
      "epoch": 0.771608643457383,
      "grad_norm": 0.16981904208660126,
      "learning_rate": 0.00015426021335489764,
      "loss": 0.4666,
      "step": 2571
    },
    {
      "epoch": 0.7719087635054022,
      "grad_norm": 0.15011709928512573,
      "learning_rate": 0.00015421620948363168,
      "loss": 0.4682,
      "step": 2572
    },
    {
      "epoch": 0.7722088835534213,
      "grad_norm": 0.1532842367887497,
      "learning_rate": 0.00015417219073877037,
      "loss": 0.4622,
      "step": 2573
    },
    {
      "epoch": 0.7725090036014406,
      "grad_norm": 0.5074936151504517,
      "learning_rate": 0.00015412815713238963,
      "loss": 0.4313,
      "step": 2574
    },
    {
      "epoch": 0.7728091236494598,
      "grad_norm": 0.21822358667850494,
      "learning_rate": 0.0001540841086765697,
      "loss": 0.4781,
      "step": 2575
    },
    {
      "epoch": 0.773109243697479,
      "grad_norm": 0.1980976164340973,
      "learning_rate": 0.00015404004538339472,
      "loss": 0.4466,
      "step": 2576
    },
    {
      "epoch": 0.7734093637454982,
      "grad_norm": 0.17648696899414062,
      "learning_rate": 0.0001539959672649529,
      "loss": 0.4553,
      "step": 2577
    },
    {
      "epoch": 0.7737094837935174,
      "grad_norm": 0.3987429141998291,
      "learning_rate": 0.00015395187433333665,
      "loss": 0.4719,
      "step": 2578
    },
    {
      "epoch": 0.7740096038415366,
      "grad_norm": 0.15765416622161865,
      "learning_rate": 0.0001539077666006423,
      "loss": 0.4828,
      "step": 2579
    },
    {
      "epoch": 0.7743097238895558,
      "grad_norm": 0.1482846885919571,
      "learning_rate": 0.00015386364407897035,
      "loss": 0.4396,
      "step": 2580
    },
    {
      "epoch": 0.774609843937575,
      "grad_norm": 0.15453636646270752,
      "learning_rate": 0.00015381950678042524,
      "loss": 0.4964,
      "step": 2581
    },
    {
      "epoch": 0.7749099639855942,
      "grad_norm": 0.17158925533294678,
      "learning_rate": 0.00015377535471711558,
      "loss": 0.515,
      "step": 2582
    },
    {
      "epoch": 0.7752100840336135,
      "grad_norm": 0.1416906863451004,
      "learning_rate": 0.000153731187901154,
      "loss": 0.3999,
      "step": 2583
    },
    {
      "epoch": 0.7755102040816326,
      "grad_norm": 0.15163570642471313,
      "learning_rate": 0.0001536870063446571,
      "loss": 0.4658,
      "step": 2584
    },
    {
      "epoch": 0.7758103241296519,
      "grad_norm": 0.16137585043907166,
      "learning_rate": 0.0001536428100597456,
      "loss": 0.5056,
      "step": 2585
    },
    {
      "epoch": 0.776110444177671,
      "grad_norm": 0.15285837650299072,
      "learning_rate": 0.0001535985990585443,
      "loss": 0.4718,
      "step": 2586
    },
    {
      "epoch": 0.7764105642256903,
      "grad_norm": 0.15122708678245544,
      "learning_rate": 0.00015355437335318195,
      "loss": 0.4593,
      "step": 2587
    },
    {
      "epoch": 0.7767106842737095,
      "grad_norm": 0.3090333640575409,
      "learning_rate": 0.0001535101329557913,
      "loss": 0.4388,
      "step": 2588
    },
    {
      "epoch": 0.7770108043217286,
      "grad_norm": 0.20557376742362976,
      "learning_rate": 0.00015346587787850932,
      "loss": 0.5051,
      "step": 2589
    },
    {
      "epoch": 0.7773109243697479,
      "grad_norm": 0.15038730204105377,
      "learning_rate": 0.00015342160813347676,
      "loss": 0.3762,
      "step": 2590
    },
    {
      "epoch": 0.7776110444177671,
      "grad_norm": 0.15132176876068115,
      "learning_rate": 0.0001533773237328386,
      "loss": 0.4181,
      "step": 2591
    },
    {
      "epoch": 0.7779111644657863,
      "grad_norm": 0.15697380900382996,
      "learning_rate": 0.00015333302468874374,
      "loss": 0.3925,
      "step": 2592
    },
    {
      "epoch": 0.7782112845138055,
      "grad_norm": 0.20264506340026855,
      "learning_rate": 0.0001532887110133451,
      "loss": 0.5233,
      "step": 2593
    },
    {
      "epoch": 0.7785114045618248,
      "grad_norm": 0.27464234828948975,
      "learning_rate": 0.00015324438271879963,
      "loss": 0.4927,
      "step": 2594
    },
    {
      "epoch": 0.7788115246098439,
      "grad_norm": 0.16195513308048248,
      "learning_rate": 0.00015320003981726828,
      "loss": 0.4794,
      "step": 2595
    },
    {
      "epoch": 0.7791116446578632,
      "grad_norm": 0.1584070920944214,
      "learning_rate": 0.00015315568232091603,
      "loss": 0.4716,
      "step": 2596
    },
    {
      "epoch": 0.7794117647058824,
      "grad_norm": 0.142884761095047,
      "learning_rate": 0.00015311131024191182,
      "loss": 0.4211,
      "step": 2597
    },
    {
      "epoch": 0.7797118847539015,
      "grad_norm": 0.1578892320394516,
      "learning_rate": 0.00015306692359242868,
      "loss": 0.4892,
      "step": 2598
    },
    {
      "epoch": 0.7800120048019208,
      "grad_norm": 0.15338781476020813,
      "learning_rate": 0.0001530225223846436,
      "loss": 0.4763,
      "step": 2599
    },
    {
      "epoch": 0.78031212484994,
      "grad_norm": 0.15820817649364471,
      "learning_rate": 0.00015297810663073743,
      "loss": 0.4925,
      "step": 2600
    },
    {
      "epoch": 0.7806122448979592,
      "grad_norm": 0.15015913546085358,
      "learning_rate": 0.0001529336763428952,
      "loss": 0.4847,
      "step": 2601
    },
    {
      "epoch": 0.7809123649459784,
      "grad_norm": 0.15379305183887482,
      "learning_rate": 0.00015288923153330584,
      "loss": 0.4217,
      "step": 2602
    },
    {
      "epoch": 0.7812124849939976,
      "grad_norm": 0.14863692224025726,
      "learning_rate": 0.00015284477221416226,
      "loss": 0.4772,
      "step": 2603
    },
    {
      "epoch": 0.7815126050420168,
      "grad_norm": 0.17082728445529938,
      "learning_rate": 0.00015280029839766134,
      "loss": 0.5133,
      "step": 2604
    },
    {
      "epoch": 0.781812725090036,
      "grad_norm": 0.1583660989999771,
      "learning_rate": 0.00015275581009600403,
      "loss": 0.4573,
      "step": 2605
    },
    {
      "epoch": 0.7821128451380552,
      "grad_norm": 0.17564094066619873,
      "learning_rate": 0.00015271130732139515,
      "loss": 0.443,
      "step": 2606
    },
    {
      "epoch": 0.7824129651860744,
      "grad_norm": 0.16535411775112152,
      "learning_rate": 0.0001526667900860435,
      "loss": 0.4876,
      "step": 2607
    },
    {
      "epoch": 0.7827130852340937,
      "grad_norm": 0.2890619933605194,
      "learning_rate": 0.00015262225840216194,
      "loss": 0.4977,
      "step": 2608
    },
    {
      "epoch": 0.7830132052821128,
      "grad_norm": 0.13368655741214752,
      "learning_rate": 0.00015257771228196715,
      "loss": 0.4076,
      "step": 2609
    },
    {
      "epoch": 0.7833133253301321,
      "grad_norm": 0.14766785502433777,
      "learning_rate": 0.00015253315173767993,
      "loss": 0.4406,
      "step": 2610
    },
    {
      "epoch": 0.7836134453781513,
      "grad_norm": 0.15561902523040771,
      "learning_rate": 0.00015248857678152485,
      "loss": 0.5003,
      "step": 2611
    },
    {
      "epoch": 0.7839135654261705,
      "grad_norm": 0.20068630576133728,
      "learning_rate": 0.0001524439874257306,
      "loss": 0.4634,
      "step": 2612
    },
    {
      "epoch": 0.7842136854741897,
      "grad_norm": 0.1610409915447235,
      "learning_rate": 0.00015239938368252976,
      "loss": 0.471,
      "step": 2613
    },
    {
      "epoch": 0.7845138055222088,
      "grad_norm": 0.16158722341060638,
      "learning_rate": 0.00015235476556415889,
      "loss": 0.4585,
      "step": 2614
    },
    {
      "epoch": 0.7848139255702281,
      "grad_norm": 0.15008698403835297,
      "learning_rate": 0.00015231013308285838,
      "loss": 0.4735,
      "step": 2615
    },
    {
      "epoch": 0.7851140456182473,
      "grad_norm": 0.18932443857192993,
      "learning_rate": 0.00015226548625087266,
      "loss": 0.5461,
      "step": 2616
    },
    {
      "epoch": 0.7854141656662665,
      "grad_norm": 0.15541422367095947,
      "learning_rate": 0.00015222082508045012,
      "loss": 0.4787,
      "step": 2617
    },
    {
      "epoch": 0.7857142857142857,
      "grad_norm": 0.15387295186519623,
      "learning_rate": 0.00015217614958384302,
      "loss": 0.499,
      "step": 2618
    },
    {
      "epoch": 0.786014405762305,
      "grad_norm": 0.15941022336483002,
      "learning_rate": 0.00015213145977330757,
      "loss": 0.4226,
      "step": 2619
    },
    {
      "epoch": 0.7863145258103241,
      "grad_norm": 0.15059997141361237,
      "learning_rate": 0.00015208675566110387,
      "loss": 0.5033,
      "step": 2620
    },
    {
      "epoch": 0.7866146458583433,
      "grad_norm": 0.1610608547925949,
      "learning_rate": 0.00015204203725949604,
      "loss": 0.4464,
      "step": 2621
    },
    {
      "epoch": 0.7869147659063626,
      "grad_norm": 0.13089437782764435,
      "learning_rate": 0.00015199730458075202,
      "loss": 0.3898,
      "step": 2622
    },
    {
      "epoch": 0.7872148859543817,
      "grad_norm": 0.1501428335905075,
      "learning_rate": 0.00015195255763714372,
      "loss": 0.468,
      "step": 2623
    },
    {
      "epoch": 0.787515006002401,
      "grad_norm": 0.15691035985946655,
      "learning_rate": 0.00015190779644094695,
      "loss": 0.4842,
      "step": 2624
    },
    {
      "epoch": 0.7878151260504201,
      "grad_norm": 0.16759933531284332,
      "learning_rate": 0.00015186302100444142,
      "loss": 0.4867,
      "step": 2625
    },
    {
      "epoch": 0.7881152460984394,
      "grad_norm": 0.15055400133132935,
      "learning_rate": 0.0001518182313399108,
      "loss": 0.428,
      "step": 2626
    },
    {
      "epoch": 0.7884153661464586,
      "grad_norm": 0.152202770113945,
      "learning_rate": 0.00015177342745964256,
      "loss": 0.4762,
      "step": 2627
    },
    {
      "epoch": 0.7887154861944778,
      "grad_norm": 0.1485380083322525,
      "learning_rate": 0.00015172860937592817,
      "loss": 0.5,
      "step": 2628
    },
    {
      "epoch": 0.789015606242497,
      "grad_norm": 0.17636558413505554,
      "learning_rate": 0.00015168377710106297,
      "loss": 0.4601,
      "step": 2629
    },
    {
      "epoch": 0.7893157262905162,
      "grad_norm": 0.1553865373134613,
      "learning_rate": 0.0001516389306473461,
      "loss": 0.475,
      "step": 2630
    },
    {
      "epoch": 0.7896158463385354,
      "grad_norm": 0.16150932013988495,
      "learning_rate": 0.0001515940700270808,
      "loss": 0.5349,
      "step": 2631
    },
    {
      "epoch": 0.7899159663865546,
      "grad_norm": 0.15071119368076324,
      "learning_rate": 0.00015154919525257396,
      "loss": 0.4644,
      "step": 2632
    },
    {
      "epoch": 0.7902160864345739,
      "grad_norm": 0.13300053775310516,
      "learning_rate": 0.0001515043063361365,
      "loss": 0.4493,
      "step": 2633
    },
    {
      "epoch": 0.790516206482593,
      "grad_norm": 0.15277032554149628,
      "learning_rate": 0.00015145940329008316,
      "loss": 0.4793,
      "step": 2634
    },
    {
      "epoch": 0.7908163265306123,
      "grad_norm": 0.13567908108234406,
      "learning_rate": 0.00015141448612673262,
      "loss": 0.4319,
      "step": 2635
    },
    {
      "epoch": 0.7911164465786314,
      "grad_norm": 0.18850558996200562,
      "learning_rate": 0.00015136955485840737,
      "loss": 0.4799,
      "step": 2636
    },
    {
      "epoch": 0.7914165666266506,
      "grad_norm": 0.1616249531507492,
      "learning_rate": 0.00015132460949743374,
      "loss": 0.4601,
      "step": 2637
    },
    {
      "epoch": 0.7917166866746699,
      "grad_norm": 0.14946779608726501,
      "learning_rate": 0.000151279650056142,
      "loss": 0.4606,
      "step": 2638
    },
    {
      "epoch": 0.792016806722689,
      "grad_norm": 0.14779013395309448,
      "learning_rate": 0.0001512346765468663,
      "loss": 0.4524,
      "step": 2639
    },
    {
      "epoch": 0.7923169267707083,
      "grad_norm": 0.14556260406970978,
      "learning_rate": 0.00015118968898194458,
      "loss": 0.4458,
      "step": 2640
    },
    {
      "epoch": 0.7926170468187275,
      "grad_norm": 0.13448557257652283,
      "learning_rate": 0.00015114468737371866,
      "loss": 0.4144,
      "step": 2641
    },
    {
      "epoch": 0.7929171668667467,
      "grad_norm": 0.14961867034435272,
      "learning_rate": 0.0001510996717345342,
      "loss": 0.4782,
      "step": 2642
    },
    {
      "epoch": 0.7932172869147659,
      "grad_norm": 0.1369171291589737,
      "learning_rate": 0.00015105464207674073,
      "loss": 0.4393,
      "step": 2643
    },
    {
      "epoch": 0.7935174069627852,
      "grad_norm": 0.1466313898563385,
      "learning_rate": 0.00015100959841269168,
      "loss": 0.4486,
      "step": 2644
    },
    {
      "epoch": 0.7938175270108043,
      "grad_norm": 0.17291010916233063,
      "learning_rate": 0.00015096454075474416,
      "loss": 0.4699,
      "step": 2645
    },
    {
      "epoch": 0.7941176470588235,
      "grad_norm": 0.1477946639060974,
      "learning_rate": 0.0001509194691152593,
      "loss": 0.4767,
      "step": 2646
    },
    {
      "epoch": 0.7944177671068428,
      "grad_norm": 0.14116594195365906,
      "learning_rate": 0.00015087438350660194,
      "loss": 0.439,
      "step": 2647
    },
    {
      "epoch": 0.7947178871548619,
      "grad_norm": 0.1355549544095993,
      "learning_rate": 0.0001508292839411408,
      "loss": 0.4093,
      "step": 2648
    },
    {
      "epoch": 0.7950180072028812,
      "grad_norm": 0.15723030269145966,
      "learning_rate": 0.00015078417043124849,
      "loss": 0.5582,
      "step": 2649
    },
    {
      "epoch": 0.7953181272509003,
      "grad_norm": 0.2367934137582779,
      "learning_rate": 0.00015073904298930132,
      "loss": 0.4499,
      "step": 2650
    },
    {
      "epoch": 0.7956182472989196,
      "grad_norm": 0.16094963252544403,
      "learning_rate": 0.00015069390162767953,
      "loss": 0.4269,
      "step": 2651
    },
    {
      "epoch": 0.7959183673469388,
      "grad_norm": 0.13712561130523682,
      "learning_rate": 0.0001506487463587671,
      "loss": 0.4672,
      "step": 2652
    },
    {
      "epoch": 0.7962184873949579,
      "grad_norm": 0.17767317593097687,
      "learning_rate": 0.00015060357719495188,
      "loss": 0.4718,
      "step": 2653
    },
    {
      "epoch": 0.7965186074429772,
      "grad_norm": 0.14278756082057953,
      "learning_rate": 0.0001505583941486255,
      "loss": 0.4752,
      "step": 2654
    },
    {
      "epoch": 0.7968187274909964,
      "grad_norm": 0.14380548894405365,
      "learning_rate": 0.00015051319723218343,
      "loss": 0.4651,
      "step": 2655
    },
    {
      "epoch": 0.7971188475390156,
      "grad_norm": 0.13241620361804962,
      "learning_rate": 0.0001504679864580249,
      "loss": 0.4127,
      "step": 2656
    },
    {
      "epoch": 0.7974189675870348,
      "grad_norm": 0.14332759380340576,
      "learning_rate": 0.000150422761838553,
      "loss": 0.4809,
      "step": 2657
    },
    {
      "epoch": 0.7977190876350541,
      "grad_norm": 0.19808930158615112,
      "learning_rate": 0.00015037752338617456,
      "loss": 0.3853,
      "step": 2658
    },
    {
      "epoch": 0.7980192076830732,
      "grad_norm": 0.14704017341136932,
      "learning_rate": 0.00015033227111330022,
      "loss": 0.4682,
      "step": 2659
    },
    {
      "epoch": 0.7983193277310925,
      "grad_norm": 0.12785203754901886,
      "learning_rate": 0.00015028700503234447,
      "loss": 0.4025,
      "step": 2660
    },
    {
      "epoch": 0.7986194477791116,
      "grad_norm": 0.13784854114055634,
      "learning_rate": 0.0001502417251557255,
      "loss": 0.4385,
      "step": 2661
    },
    {
      "epoch": 0.7989195678271308,
      "grad_norm": 0.14797067642211914,
      "learning_rate": 0.0001501964314958653,
      "loss": 0.4798,
      "step": 2662
    },
    {
      "epoch": 0.7992196878751501,
      "grad_norm": 0.14484858512878418,
      "learning_rate": 0.00015015112406518974,
      "loss": 0.4767,
      "step": 2663
    },
    {
      "epoch": 0.7995198079231692,
      "grad_norm": 0.16087014973163605,
      "learning_rate": 0.00015010580287612833,
      "loss": 0.5004,
      "step": 2664
    },
    {
      "epoch": 0.7998199279711885,
      "grad_norm": 0.1416206955909729,
      "learning_rate": 0.00015006046794111447,
      "loss": 0.477,
      "step": 2665
    },
    {
      "epoch": 0.8001200480192077,
      "grad_norm": 0.13713401556015015,
      "learning_rate": 0.00015001511927258522,
      "loss": 0.4467,
      "step": 2666
    },
    {
      "epoch": 0.8004201680672269,
      "grad_norm": 0.14090123772621155,
      "learning_rate": 0.00014996975688298152,
      "loss": 0.4527,
      "step": 2667
    },
    {
      "epoch": 0.8007202881152461,
      "grad_norm": 0.1869259923696518,
      "learning_rate": 0.000149924380784748,
      "loss": 0.5236,
      "step": 2668
    },
    {
      "epoch": 0.8010204081632653,
      "grad_norm": 0.14781752228736877,
      "learning_rate": 0.00014987899099033308,
      "loss": 0.5191,
      "step": 2669
    },
    {
      "epoch": 0.8013205282112845,
      "grad_norm": 0.17033900320529938,
      "learning_rate": 0.00014983358751218892,
      "loss": 0.4907,
      "step": 2670
    },
    {
      "epoch": 0.8016206482593037,
      "grad_norm": 0.1332671046257019,
      "learning_rate": 0.00014978817036277145,
      "loss": 0.4062,
      "step": 2671
    },
    {
      "epoch": 0.801920768307323,
      "grad_norm": 0.14560440182685852,
      "learning_rate": 0.0001497427395545403,
      "loss": 0.4512,
      "step": 2672
    },
    {
      "epoch": 0.8022208883553421,
      "grad_norm": 0.15215948224067688,
      "learning_rate": 0.00014969729509995897,
      "loss": 0.5081,
      "step": 2673
    },
    {
      "epoch": 0.8025210084033614,
      "grad_norm": 0.16118304431438446,
      "learning_rate": 0.0001496518370114946,
      "loss": 0.4843,
      "step": 2674
    },
    {
      "epoch": 0.8028211284513805,
      "grad_norm": 0.12681308388710022,
      "learning_rate": 0.00014960636530161807,
      "loss": 0.4165,
      "step": 2675
    },
    {
      "epoch": 0.8031212484993998,
      "grad_norm": 0.14559060335159302,
      "learning_rate": 0.00014956087998280402,
      "loss": 0.4896,
      "step": 2676
    },
    {
      "epoch": 0.803421368547419,
      "grad_norm": 0.14230754971504211,
      "learning_rate": 0.00014951538106753088,
      "loss": 0.4769,
      "step": 2677
    },
    {
      "epoch": 0.8037214885954381,
      "grad_norm": 0.13430102169513702,
      "learning_rate": 0.00014946986856828067,
      "loss": 0.3842,
      "step": 2678
    },
    {
      "epoch": 0.8040216086434574,
      "grad_norm": 0.1401878446340561,
      "learning_rate": 0.0001494243424975393,
      "loss": 0.4326,
      "step": 2679
    },
    {
      "epoch": 0.8043217286914766,
      "grad_norm": 0.14194200932979584,
      "learning_rate": 0.00014937880286779629,
      "loss": 0.4497,
      "step": 2680
    },
    {
      "epoch": 0.8046218487394958,
      "grad_norm": 0.14997471868991852,
      "learning_rate": 0.0001493332496915449,
      "loss": 0.4537,
      "step": 2681
    },
    {
      "epoch": 0.804921968787515,
      "grad_norm": 0.1459532380104065,
      "learning_rate": 0.00014928768298128216,
      "loss": 0.4386,
      "step": 2682
    },
    {
      "epoch": 0.8052220888355343,
      "grad_norm": 0.14103145897388458,
      "learning_rate": 0.00014924210274950875,
      "loss": 0.4325,
      "step": 2683
    },
    {
      "epoch": 0.8055222088835534,
      "grad_norm": 0.14490856230258942,
      "learning_rate": 0.00014919650900872909,
      "loss": 0.456,
      "step": 2684
    },
    {
      "epoch": 0.8058223289315727,
      "grad_norm": 0.15141165256500244,
      "learning_rate": 0.00014915090177145131,
      "loss": 0.4898,
      "step": 2685
    },
    {
      "epoch": 0.8061224489795918,
      "grad_norm": 0.1545751988887787,
      "learning_rate": 0.0001491052810501872,
      "loss": 0.4709,
      "step": 2686
    },
    {
      "epoch": 0.806422569027611,
      "grad_norm": 0.14902053773403168,
      "learning_rate": 0.0001490596468574523,
      "loss": 0.4529,
      "step": 2687
    },
    {
      "epoch": 0.8067226890756303,
      "grad_norm": 0.14312995970249176,
      "learning_rate": 0.00014901399920576585,
      "loss": 0.4168,
      "step": 2688
    },
    {
      "epoch": 0.8070228091236494,
      "grad_norm": 0.14048358798027039,
      "learning_rate": 0.00014896833810765074,
      "loss": 0.4396,
      "step": 2689
    },
    {
      "epoch": 0.8073229291716687,
      "grad_norm": 0.12291613966226578,
      "learning_rate": 0.00014892266357563358,
      "loss": 0.3882,
      "step": 2690
    },
    {
      "epoch": 0.8076230492196879,
      "grad_norm": 0.1517232358455658,
      "learning_rate": 0.0001488769756222446,
      "loss": 0.4675,
      "step": 2691
    },
    {
      "epoch": 0.8079231692677071,
      "grad_norm": 0.14395961165428162,
      "learning_rate": 0.0001488312742600179,
      "loss": 0.3948,
      "step": 2692
    },
    {
      "epoch": 0.8082232893157263,
      "grad_norm": 0.15397968888282776,
      "learning_rate": 0.00014878555950149095,
      "loss": 0.4186,
      "step": 2693
    },
    {
      "epoch": 0.8085234093637454,
      "grad_norm": 0.14338110387325287,
      "learning_rate": 0.00014873983135920517,
      "loss": 0.4721,
      "step": 2694
    },
    {
      "epoch": 0.8088235294117647,
      "grad_norm": 0.14677682518959045,
      "learning_rate": 0.00014869408984570552,
      "loss": 0.4791,
      "step": 2695
    },
    {
      "epoch": 0.8091236494597839,
      "grad_norm": 0.17025801539421082,
      "learning_rate": 0.00014864833497354074,
      "loss": 0.4945,
      "step": 2696
    },
    {
      "epoch": 0.8094237695078031,
      "grad_norm": 0.14594808220863342,
      "learning_rate": 0.00014860256675526304,
      "loss": 0.4342,
      "step": 2697
    },
    {
      "epoch": 0.8097238895558223,
      "grad_norm": 0.14775292575359344,
      "learning_rate": 0.00014855678520342847,
      "loss": 0.4861,
      "step": 2698
    },
    {
      "epoch": 0.8100240096038416,
      "grad_norm": 0.17930883169174194,
      "learning_rate": 0.00014851099033059666,
      "loss": 0.4628,
      "step": 2699
    },
    {
      "epoch": 0.8103241296518607,
      "grad_norm": 0.14827530086040497,
      "learning_rate": 0.0001484651821493309,
      "loss": 0.4498,
      "step": 2700
    },
    {
      "epoch": 0.81062424969988,
      "grad_norm": 0.1567845195531845,
      "learning_rate": 0.00014841936067219814,
      "loss": 0.4385,
      "step": 2701
    },
    {
      "epoch": 0.8109243697478992,
      "grad_norm": 0.16922719776630402,
      "learning_rate": 0.000148373525911769,
      "loss": 0.4984,
      "step": 2702
    },
    {
      "epoch": 0.8112244897959183,
      "grad_norm": 0.14715611934661865,
      "learning_rate": 0.00014832767788061773,
      "loss": 0.4684,
      "step": 2703
    },
    {
      "epoch": 0.8115246098439376,
      "grad_norm": 0.1670290231704712,
      "learning_rate": 0.00014828181659132215,
      "loss": 0.4803,
      "step": 2704
    },
    {
      "epoch": 0.8118247298919568,
      "grad_norm": 0.1431550681591034,
      "learning_rate": 0.00014823594205646385,
      "loss": 0.452,
      "step": 2705
    },
    {
      "epoch": 0.812124849939976,
      "grad_norm": 0.1359039843082428,
      "learning_rate": 0.00014819005428862788,
      "loss": 0.4049,
      "step": 2706
    },
    {
      "epoch": 0.8124249699879952,
      "grad_norm": 0.1706361174583435,
      "learning_rate": 0.0001481441533004032,
      "loss": 0.4852,
      "step": 2707
    },
    {
      "epoch": 0.8127250900360145,
      "grad_norm": 0.14390254020690918,
      "learning_rate": 0.000148098239104382,
      "loss": 0.4607,
      "step": 2708
    },
    {
      "epoch": 0.8130252100840336,
      "grad_norm": 0.14577381312847137,
      "learning_rate": 0.00014805231171316046,
      "loss": 0.5027,
      "step": 2709
    },
    {
      "epoch": 0.8133253301320528,
      "grad_norm": 0.14503417909145355,
      "learning_rate": 0.0001480063711393382,
      "loss": 0.4609,
      "step": 2710
    },
    {
      "epoch": 0.813625450180072,
      "grad_norm": 0.15241298079490662,
      "learning_rate": 0.00014796041739551852,
      "loss": 0.4392,
      "step": 2711
    },
    {
      "epoch": 0.8139255702280912,
      "grad_norm": 0.15596158802509308,
      "learning_rate": 0.00014791445049430825,
      "loss": 0.4462,
      "step": 2712
    },
    {
      "epoch": 0.8142256902761105,
      "grad_norm": 0.1656380444765091,
      "learning_rate": 0.00014786847044831792,
      "loss": 0.516,
      "step": 2713
    },
    {
      "epoch": 0.8145258103241296,
      "grad_norm": 0.1357385665178299,
      "learning_rate": 0.00014782247727016155,
      "loss": 0.4161,
      "step": 2714
    },
    {
      "epoch": 0.8148259303721489,
      "grad_norm": 0.23623062670230865,
      "learning_rate": 0.00014777647097245696,
      "loss": 0.5041,
      "step": 2715
    },
    {
      "epoch": 0.8151260504201681,
      "grad_norm": 0.13372428715229034,
      "learning_rate": 0.00014773045156782537,
      "loss": 0.4487,
      "step": 2716
    },
    {
      "epoch": 0.8154261704681873,
      "grad_norm": 0.13498467206954956,
      "learning_rate": 0.0001476844190688917,
      "loss": 0.4164,
      "step": 2717
    },
    {
      "epoch": 0.8157262905162065,
      "grad_norm": 0.13854321837425232,
      "learning_rate": 0.00014763837348828442,
      "loss": 0.4769,
      "step": 2718
    },
    {
      "epoch": 0.8160264105642256,
      "grad_norm": 0.14869745075702667,
      "learning_rate": 0.00014759231483863568,
      "loss": 0.4603,
      "step": 2719
    },
    {
      "epoch": 0.8163265306122449,
      "grad_norm": 0.1487181931734085,
      "learning_rate": 0.00014754624313258102,
      "loss": 0.5091,
      "step": 2720
    },
    {
      "epoch": 0.8166266506602641,
      "grad_norm": 0.14438748359680176,
      "learning_rate": 0.00014750015838275977,
      "loss": 0.4928,
      "step": 2721
    },
    {
      "epoch": 0.8169267707082833,
      "grad_norm": 0.1376090794801712,
      "learning_rate": 0.00014745406060181476,
      "loss": 0.4515,
      "step": 2722
    },
    {
      "epoch": 0.8172268907563025,
      "grad_norm": 0.2375035583972931,
      "learning_rate": 0.00014740794980239231,
      "loss": 0.4949,
      "step": 2723
    },
    {
      "epoch": 0.8175270108043218,
      "grad_norm": 0.1473246067762375,
      "learning_rate": 0.00014736182599714247,
      "loss": 0.4718,
      "step": 2724
    },
    {
      "epoch": 0.8178271308523409,
      "grad_norm": 0.22862257063388824,
      "learning_rate": 0.00014731568919871873,
      "loss": 0.4534,
      "step": 2725
    },
    {
      "epoch": 0.8181272509003601,
      "grad_norm": 0.1518033742904663,
      "learning_rate": 0.00014726953941977824,
      "loss": 0.4403,
      "step": 2726
    },
    {
      "epoch": 0.8184273709483794,
      "grad_norm": 0.13525591790676117,
      "learning_rate": 0.0001472233766729816,
      "loss": 0.4208,
      "step": 2727
    },
    {
      "epoch": 0.8187274909963985,
      "grad_norm": 0.16344895958900452,
      "learning_rate": 0.00014717720097099307,
      "loss": 0.5802,
      "step": 2728
    },
    {
      "epoch": 0.8190276110444178,
      "grad_norm": 0.13552914559841156,
      "learning_rate": 0.0001471310123264804,
      "loss": 0.436,
      "step": 2729
    },
    {
      "epoch": 0.819327731092437,
      "grad_norm": 0.1389494091272354,
      "learning_rate": 0.00014708481075211498,
      "loss": 0.4497,
      "step": 2730
    },
    {
      "epoch": 0.8196278511404562,
      "grad_norm": 0.13997599482536316,
      "learning_rate": 0.00014703859626057157,
      "loss": 0.4735,
      "step": 2731
    },
    {
      "epoch": 0.8199279711884754,
      "grad_norm": 0.21648424863815308,
      "learning_rate": 0.00014699236886452866,
      "loss": 0.5035,
      "step": 2732
    },
    {
      "epoch": 0.8202280912364946,
      "grad_norm": 0.14384159445762634,
      "learning_rate": 0.00014694612857666823,
      "loss": 0.4853,
      "step": 2733
    },
    {
      "epoch": 0.8205282112845138,
      "grad_norm": 0.14302153885364532,
      "learning_rate": 0.00014689987540967574,
      "loss": 0.4665,
      "step": 2734
    },
    {
      "epoch": 0.820828331332533,
      "grad_norm": 0.13343827426433563,
      "learning_rate": 0.00014685360937624018,
      "loss": 0.405,
      "step": 2735
    },
    {
      "epoch": 0.8211284513805522,
      "grad_norm": 0.1506120264530182,
      "learning_rate": 0.00014680733048905414,
      "loss": 0.4566,
      "step": 2736
    },
    {
      "epoch": 0.8214285714285714,
      "grad_norm": 0.1620592176914215,
      "learning_rate": 0.00014676103876081368,
      "loss": 0.467,
      "step": 2737
    },
    {
      "epoch": 0.8217286914765907,
      "grad_norm": 0.14073067903518677,
      "learning_rate": 0.0001467147342042185,
      "loss": 0.4669,
      "step": 2738
    },
    {
      "epoch": 0.8220288115246098,
      "grad_norm": 0.13470537960529327,
      "learning_rate": 0.00014666841683197155,
      "loss": 0.4334,
      "step": 2739
    },
    {
      "epoch": 0.8223289315726291,
      "grad_norm": 0.14084503054618835,
      "learning_rate": 0.00014662208665677966,
      "loss": 0.4202,
      "step": 2740
    },
    {
      "epoch": 0.8226290516206483,
      "grad_norm": 0.15769736468791962,
      "learning_rate": 0.00014657574369135286,
      "loss": 0.4982,
      "step": 2741
    },
    {
      "epoch": 0.8229291716686674,
      "grad_norm": 0.1900629848241806,
      "learning_rate": 0.00014652938794840483,
      "loss": 0.4747,
      "step": 2742
    },
    {
      "epoch": 0.8232292917166867,
      "grad_norm": 0.13935410976409912,
      "learning_rate": 0.00014648301944065277,
      "loss": 0.4029,
      "step": 2743
    },
    {
      "epoch": 0.8235294117647058,
      "grad_norm": 0.14022275805473328,
      "learning_rate": 0.0001464366381808173,
      "loss": 0.4784,
      "step": 2744
    },
    {
      "epoch": 0.8238295318127251,
      "grad_norm": 0.1625329852104187,
      "learning_rate": 0.00014639024418162263,
      "loss": 0.4318,
      "step": 2745
    },
    {
      "epoch": 0.8241296518607443,
      "grad_norm": 0.15212495625019073,
      "learning_rate": 0.00014634383745579642,
      "loss": 0.4666,
      "step": 2746
    },
    {
      "epoch": 0.8244297719087635,
      "grad_norm": 0.13738644123077393,
      "learning_rate": 0.0001462974180160698,
      "loss": 0.4315,
      "step": 2747
    },
    {
      "epoch": 0.8247298919567827,
      "grad_norm": 0.14229655265808105,
      "learning_rate": 0.00014625098587517737,
      "loss": 0.4678,
      "step": 2748
    },
    {
      "epoch": 0.825030012004802,
      "grad_norm": 0.14815521240234375,
      "learning_rate": 0.00014620454104585738,
      "loss": 0.4424,
      "step": 2749
    },
    {
      "epoch": 0.8253301320528211,
      "grad_norm": 0.14716976881027222,
      "learning_rate": 0.0001461580835408513,
      "loss": 0.4756,
      "step": 2750
    },
    {
      "epoch": 0.8256302521008403,
      "grad_norm": 0.14700216054916382,
      "learning_rate": 0.00014611161337290435,
      "loss": 0.4693,
      "step": 2751
    },
    {
      "epoch": 0.8259303721488596,
      "grad_norm": 0.13168157637119293,
      "learning_rate": 0.00014606513055476496,
      "loss": 0.4187,
      "step": 2752
    },
    {
      "epoch": 0.8262304921968787,
      "grad_norm": 0.1525358408689499,
      "learning_rate": 0.00014601863509918525,
      "loss": 0.4878,
      "step": 2753
    },
    {
      "epoch": 0.826530612244898,
      "grad_norm": 0.14710712432861328,
      "learning_rate": 0.00014597212701892065,
      "loss": 0.4692,
      "step": 2754
    },
    {
      "epoch": 0.8268307322929171,
      "grad_norm": 0.14061634242534637,
      "learning_rate": 0.00014592560632673015,
      "loss": 0.4021,
      "step": 2755
    },
    {
      "epoch": 0.8271308523409364,
      "grad_norm": 0.14602655172348022,
      "learning_rate": 0.00014587907303537616,
      "loss": 0.4828,
      "step": 2756
    },
    {
      "epoch": 0.8274309723889556,
      "grad_norm": 0.1381726861000061,
      "learning_rate": 0.00014583252715762455,
      "loss": 0.4516,
      "step": 2757
    },
    {
      "epoch": 0.8277310924369747,
      "grad_norm": 0.18605683743953705,
      "learning_rate": 0.00014578596870624467,
      "loss": 0.4707,
      "step": 2758
    },
    {
      "epoch": 0.828031212484994,
      "grad_norm": 0.1509706825017929,
      "learning_rate": 0.0001457393976940093,
      "loss": 0.4889,
      "step": 2759
    },
    {
      "epoch": 0.8283313325330132,
      "grad_norm": 0.14689093828201294,
      "learning_rate": 0.00014569281413369462,
      "loss": 0.4191,
      "step": 2760
    },
    {
      "epoch": 0.8286314525810324,
      "grad_norm": 0.14853395521640778,
      "learning_rate": 0.00014564621803808033,
      "loss": 0.4641,
      "step": 2761
    },
    {
      "epoch": 0.8289315726290516,
      "grad_norm": 0.7913419604301453,
      "learning_rate": 0.00014559960941994954,
      "loss": 0.4879,
      "step": 2762
    },
    {
      "epoch": 0.8292316926770709,
      "grad_norm": 0.1619863659143448,
      "learning_rate": 0.00014555298829208876,
      "loss": 0.4998,
      "step": 2763
    },
    {
      "epoch": 0.82953181272509,
      "grad_norm": 0.14276006817817688,
      "learning_rate": 0.00014550635466728802,
      "loss": 0.457,
      "step": 2764
    },
    {
      "epoch": 0.8298319327731093,
      "grad_norm": 0.13974639773368835,
      "learning_rate": 0.00014545970855834063,
      "loss": 0.4358,
      "step": 2765
    },
    {
      "epoch": 0.8301320528211285,
      "grad_norm": 0.14814308285713196,
      "learning_rate": 0.0001454130499780435,
      "loss": 0.4754,
      "step": 2766
    },
    {
      "epoch": 0.8304321728691476,
      "grad_norm": 0.14188772439956665,
      "learning_rate": 0.0001453663789391968,
      "loss": 0.4499,
      "step": 2767
    },
    {
      "epoch": 0.8307322929171669,
      "grad_norm": 0.20982439815998077,
      "learning_rate": 0.0001453196954546043,
      "loss": 0.4604,
      "step": 2768
    },
    {
      "epoch": 0.831032412965186,
      "grad_norm": 0.1626424789428711,
      "learning_rate": 0.00014527299953707296,
      "loss": 0.47,
      "step": 2769
    },
    {
      "epoch": 0.8313325330132053,
      "grad_norm": 0.2350388914346695,
      "learning_rate": 0.00014522629119941333,
      "loss": 0.4388,
      "step": 2770
    },
    {
      "epoch": 0.8316326530612245,
      "grad_norm": 0.14766353368759155,
      "learning_rate": 0.00014517957045443933,
      "loss": 0.4693,
      "step": 2771
    },
    {
      "epoch": 0.8319327731092437,
      "grad_norm": 0.25449541211128235,
      "learning_rate": 0.0001451328373149682,
      "loss": 0.5037,
      "step": 2772
    },
    {
      "epoch": 0.8322328931572629,
      "grad_norm": 0.14723801612854004,
      "learning_rate": 0.00014508609179382067,
      "loss": 0.4968,
      "step": 2773
    },
    {
      "epoch": 0.8325330132052821,
      "grad_norm": 0.1688634157180786,
      "learning_rate": 0.00014503933390382084,
      "loss": 0.4964,
      "step": 2774
    },
    {
      "epoch": 0.8328331332533013,
      "grad_norm": 0.1697065532207489,
      "learning_rate": 0.00014499256365779623,
      "loss": 0.4478,
      "step": 2775
    },
    {
      "epoch": 0.8331332533013205,
      "grad_norm": 0.1368568390607834,
      "learning_rate": 0.00014494578106857768,
      "loss": 0.4438,
      "step": 2776
    },
    {
      "epoch": 0.8334333733493398,
      "grad_norm": 2.361243724822998,
      "learning_rate": 0.00014489898614899945,
      "loss": 0.4527,
      "step": 2777
    },
    {
      "epoch": 0.8337334933973589,
      "grad_norm": 0.14673811197280884,
      "learning_rate": 0.0001448521789118992,
      "loss": 0.4258,
      "step": 2778
    },
    {
      "epoch": 0.8340336134453782,
      "grad_norm": 0.14182722568511963,
      "learning_rate": 0.00014480535937011802,
      "loss": 0.4238,
      "step": 2779
    },
    {
      "epoch": 0.8343337334933973,
      "grad_norm": 14.291014671325684,
      "learning_rate": 0.00014475852753650023,
      "loss": 1.1137,
      "step": 2780
    },
    {
      "epoch": 0.8346338535414166,
      "grad_norm": 0.15664032101631165,
      "learning_rate": 0.00014471168342389363,
      "loss": 0.4579,
      "step": 2781
    },
    {
      "epoch": 0.8349339735894358,
      "grad_norm": 0.18944329023361206,
      "learning_rate": 0.00014466482704514938,
      "loss": 0.4783,
      "step": 2782
    },
    {
      "epoch": 0.8352340936374549,
      "grad_norm": 0.20190639793872833,
      "learning_rate": 0.000144617958413122,
      "loss": 0.4592,
      "step": 2783
    },
    {
      "epoch": 0.8355342136854742,
      "grad_norm": 0.19092018902301788,
      "learning_rate": 0.00014457107754066933,
      "loss": 0.4655,
      "step": 2784
    },
    {
      "epoch": 0.8358343337334934,
      "grad_norm": 0.1797829419374466,
      "learning_rate": 0.0001445241844406526,
      "loss": 0.4773,
      "step": 2785
    },
    {
      "epoch": 0.8361344537815126,
      "grad_norm": 0.16416847705841064,
      "learning_rate": 0.00014447727912593643,
      "loss": 0.4721,
      "step": 2786
    },
    {
      "epoch": 0.8364345738295318,
      "grad_norm": 0.15211446583271027,
      "learning_rate": 0.00014443036160938872,
      "loss": 0.4339,
      "step": 2787
    },
    {
      "epoch": 0.8367346938775511,
      "grad_norm": 0.14469176530838013,
      "learning_rate": 0.00014438343190388076,
      "loss": 0.4558,
      "step": 2788
    },
    {
      "epoch": 0.8370348139255702,
      "grad_norm": 0.16924558579921722,
      "learning_rate": 0.00014433649002228721,
      "loss": 0.4976,
      "step": 2789
    },
    {
      "epoch": 0.8373349339735895,
      "grad_norm": 0.18245111405849457,
      "learning_rate": 0.000144289535977486,
      "loss": 0.4133,
      "step": 2790
    },
    {
      "epoch": 0.8376350540216086,
      "grad_norm": 0.4985876679420471,
      "learning_rate": 0.00014424256978235847,
      "loss": 0.4933,
      "step": 2791
    },
    {
      "epoch": 0.8379351740696278,
      "grad_norm": 0.16431467235088348,
      "learning_rate": 0.0001441955914497892,
      "loss": 0.4111,
      "step": 2792
    },
    {
      "epoch": 0.8382352941176471,
      "grad_norm": 0.24334603548049927,
      "learning_rate": 0.00014414860099266617,
      "loss": 0.4487,
      "step": 2793
    },
    {
      "epoch": 0.8385354141656662,
      "grad_norm": 0.15816469490528107,
      "learning_rate": 0.0001441015984238807,
      "loss": 0.475,
      "step": 2794
    },
    {
      "epoch": 0.8388355342136855,
      "grad_norm": 0.16716502606868744,
      "learning_rate": 0.00014405458375632744,
      "loss": 0.5115,
      "step": 2795
    },
    {
      "epoch": 0.8391356542617047,
      "grad_norm": 0.14759080111980438,
      "learning_rate": 0.00014400755700290423,
      "loss": 0.4338,
      "step": 2796
    },
    {
      "epoch": 0.8394357743097239,
      "grad_norm": 0.21350345015525818,
      "learning_rate": 0.00014396051817651238,
      "loss": 0.4318,
      "step": 2797
    },
    {
      "epoch": 0.8397358943577431,
      "grad_norm": 0.6919642090797424,
      "learning_rate": 0.0001439134672900565,
      "loss": 0.5268,
      "step": 2798
    },
    {
      "epoch": 0.8400360144057623,
      "grad_norm": 0.3596741259098053,
      "learning_rate": 0.00014386640435644436,
      "loss": 0.4604,
      "step": 2799
    },
    {
      "epoch": 0.8403361344537815,
      "grad_norm": 0.1968657374382019,
      "learning_rate": 0.00014381932938858718,
      "loss": 0.4816,
      "step": 2800
    },
    {
      "epoch": 0.8406362545018007,
      "grad_norm": 0.14838272333145142,
      "learning_rate": 0.00014377224239939945,
      "loss": 0.4707,
      "step": 2801
    },
    {
      "epoch": 0.84093637454982,
      "grad_norm": 0.15526807308197021,
      "learning_rate": 0.00014372514340179894,
      "loss": 0.4758,
      "step": 2802
    },
    {
      "epoch": 0.8412364945978391,
      "grad_norm": 0.18176786601543427,
      "learning_rate": 0.00014367803240870672,
      "loss": 0.4563,
      "step": 2803
    },
    {
      "epoch": 0.8415366146458584,
      "grad_norm": 0.14367863535881042,
      "learning_rate": 0.00014363090943304713,
      "loss": 0.4246,
      "step": 2804
    },
    {
      "epoch": 0.8418367346938775,
      "grad_norm": 0.16701172292232513,
      "learning_rate": 0.00014358377448774786,
      "loss": 0.4744,
      "step": 2805
    },
    {
      "epoch": 0.8421368547418968,
      "grad_norm": 0.14857949316501617,
      "learning_rate": 0.00014353662758573977,
      "loss": 0.4361,
      "step": 2806
    },
    {
      "epoch": 0.842436974789916,
      "grad_norm": 0.15754273533821106,
      "learning_rate": 0.00014348946873995718,
      "loss": 0.4611,
      "step": 2807
    },
    {
      "epoch": 0.8427370948379351,
      "grad_norm": 0.2516443431377411,
      "learning_rate": 0.00014344229796333746,
      "loss": 0.5025,
      "step": 2808
    },
    {
      "epoch": 0.8430372148859544,
      "grad_norm": 0.16553343832492828,
      "learning_rate": 0.00014339511526882144,
      "loss": 0.4528,
      "step": 2809
    },
    {
      "epoch": 0.8433373349339736,
      "grad_norm": 0.1507650464773178,
      "learning_rate": 0.0001433479206693532,
      "loss": 0.4749,
      "step": 2810
    },
    {
      "epoch": 0.8436374549819928,
      "grad_norm": 0.1556183397769928,
      "learning_rate": 0.00014330071417787988,
      "loss": 0.4826,
      "step": 2811
    },
    {
      "epoch": 0.843937575030012,
      "grad_norm": 0.15120598673820496,
      "learning_rate": 0.0001432534958073522,
      "loss": 0.4768,
      "step": 2812
    },
    {
      "epoch": 0.8442376950780313,
      "grad_norm": 0.1328650414943695,
      "learning_rate": 0.0001432062655707239,
      "loss": 0.4204,
      "step": 2813
    },
    {
      "epoch": 0.8445378151260504,
      "grad_norm": 0.14978034794330597,
      "learning_rate": 0.0001431590234809521,
      "loss": 0.4661,
      "step": 2814
    },
    {
      "epoch": 0.8448379351740696,
      "grad_norm": 0.15140162408351898,
      "learning_rate": 0.00014311176955099706,
      "loss": 0.441,
      "step": 2815
    },
    {
      "epoch": 0.8451380552220888,
      "grad_norm": 0.15080350637435913,
      "learning_rate": 0.0001430645037938224,
      "loss": 0.475,
      "step": 2816
    },
    {
      "epoch": 0.845438175270108,
      "grad_norm": 0.17098544538021088,
      "learning_rate": 0.0001430172262223949,
      "loss": 0.4928,
      "step": 2817
    },
    {
      "epoch": 0.8457382953181273,
      "grad_norm": 0.1530095487833023,
      "learning_rate": 0.00014296993684968467,
      "loss": 0.4843,
      "step": 2818
    },
    {
      "epoch": 0.8460384153661464,
      "grad_norm": 0.14818990230560303,
      "learning_rate": 0.00014292263568866498,
      "loss": 0.4748,
      "step": 2819
    },
    {
      "epoch": 0.8463385354141657,
      "grad_norm": 0.1625467985868454,
      "learning_rate": 0.0001428753227523124,
      "loss": 0.4293,
      "step": 2820
    },
    {
      "epoch": 0.8466386554621849,
      "grad_norm": 0.1551417112350464,
      "learning_rate": 0.0001428279980536066,
      "loss": 0.4932,
      "step": 2821
    },
    {
      "epoch": 0.8469387755102041,
      "grad_norm": 0.14325112104415894,
      "learning_rate": 0.00014278066160553065,
      "loss": 0.4272,
      "step": 2822
    },
    {
      "epoch": 0.8472388955582233,
      "grad_norm": 0.2540292739868164,
      "learning_rate": 0.00014273331342107073,
      "loss": 0.4257,
      "step": 2823
    },
    {
      "epoch": 0.8475390156062425,
      "grad_norm": 0.15317946672439575,
      "learning_rate": 0.0001426859535132163,
      "loss": 0.4679,
      "step": 2824
    },
    {
      "epoch": 0.8478391356542617,
      "grad_norm": 0.13195572793483734,
      "learning_rate": 0.00014263858189495995,
      "loss": 0.4019,
      "step": 2825
    },
    {
      "epoch": 0.8481392557022809,
      "grad_norm": 0.166551411151886,
      "learning_rate": 0.00014259119857929758,
      "loss": 0.5337,
      "step": 2826
    },
    {
      "epoch": 0.8484393757503002,
      "grad_norm": 0.13715970516204834,
      "learning_rate": 0.0001425438035792283,
      "loss": 0.4264,
      "step": 2827
    },
    {
      "epoch": 0.8487394957983193,
      "grad_norm": 0.12979838252067566,
      "learning_rate": 0.00014249639690775432,
      "loss": 0.3659,
      "step": 2828
    },
    {
      "epoch": 0.8490396158463386,
      "grad_norm": 0.14503979682922363,
      "learning_rate": 0.00014244897857788112,
      "loss": 0.4509,
      "step": 2829
    },
    {
      "epoch": 0.8493397358943577,
      "grad_norm": 0.14053593575954437,
      "learning_rate": 0.0001424015486026174,
      "loss": 0.4479,
      "step": 2830
    },
    {
      "epoch": 0.8496398559423769,
      "grad_norm": 0.22120803594589233,
      "learning_rate": 0.00014235410699497503,
      "loss": 0.5086,
      "step": 2831
    },
    {
      "epoch": 0.8499399759903962,
      "grad_norm": 0.13641591370105743,
      "learning_rate": 0.00014230665376796904,
      "loss": 0.4187,
      "step": 2832
    },
    {
      "epoch": 0.8502400960384153,
      "grad_norm": 0.14362581074237823,
      "learning_rate": 0.00014225918893461773,
      "loss": 0.4234,
      "step": 2833
    },
    {
      "epoch": 0.8505402160864346,
      "grad_norm": 0.18435567617416382,
      "learning_rate": 0.00014221171250794248,
      "loss": 0.4224,
      "step": 2834
    },
    {
      "epoch": 0.8508403361344538,
      "grad_norm": 0.13645349442958832,
      "learning_rate": 0.00014216422450096796,
      "loss": 0.4355,
      "step": 2835
    },
    {
      "epoch": 0.851140456182473,
      "grad_norm": 0.14655910432338715,
      "learning_rate": 0.0001421167249267219,
      "loss": 0.4681,
      "step": 2836
    },
    {
      "epoch": 0.8514405762304922,
      "grad_norm": 0.14065751433372498,
      "learning_rate": 0.00014206921379823528,
      "loss": 0.4638,
      "step": 2837
    },
    {
      "epoch": 0.8517406962785115,
      "grad_norm": 0.16344814002513885,
      "learning_rate": 0.00014202169112854224,
      "loss": 0.4572,
      "step": 2838
    },
    {
      "epoch": 0.8520408163265306,
      "grad_norm": 0.13833872973918915,
      "learning_rate": 0.00014197415693068015,
      "loss": 0.4055,
      "step": 2839
    },
    {
      "epoch": 0.8523409363745498,
      "grad_norm": 0.146205335855484,
      "learning_rate": 0.00014192661121768932,
      "loss": 0.4423,
      "step": 2840
    },
    {
      "epoch": 0.852641056422569,
      "grad_norm": 0.15868382155895233,
      "learning_rate": 0.00014187905400261353,
      "loss": 0.493,
      "step": 2841
    },
    {
      "epoch": 0.8529411764705882,
      "grad_norm": 0.15348432958126068,
      "learning_rate": 0.00014183148529849944,
      "loss": 0.4495,
      "step": 2842
    },
    {
      "epoch": 0.8532412965186075,
      "grad_norm": 0.1486390084028244,
      "learning_rate": 0.00014178390511839704,
      "loss": 0.4924,
      "step": 2843
    },
    {
      "epoch": 0.8535414165666266,
      "grad_norm": 0.14600692689418793,
      "learning_rate": 0.00014173631347535939,
      "loss": 0.4744,
      "step": 2844
    },
    {
      "epoch": 0.8538415366146459,
      "grad_norm": 0.13798721134662628,
      "learning_rate": 0.00014168871038244268,
      "loss": 0.4309,
      "step": 2845
    },
    {
      "epoch": 0.8541416566626651,
      "grad_norm": 0.1473832130432129,
      "learning_rate": 0.00014164109585270635,
      "loss": 0.4477,
      "step": 2846
    },
    {
      "epoch": 0.8544417767106842,
      "grad_norm": 0.5650750398635864,
      "learning_rate": 0.00014159346989921285,
      "loss": 0.398,
      "step": 2847
    },
    {
      "epoch": 0.8547418967587035,
      "grad_norm": 0.1595097780227661,
      "learning_rate": 0.0001415458325350278,
      "loss": 0.4217,
      "step": 2848
    },
    {
      "epoch": 0.8550420168067226,
      "grad_norm": 0.1487688422203064,
      "learning_rate": 0.00014149818377321997,
      "loss": 0.4468,
      "step": 2849
    },
    {
      "epoch": 0.8553421368547419,
      "grad_norm": 0.16221052408218384,
      "learning_rate": 0.0001414505236268613,
      "loss": 0.4624,
      "step": 2850
    },
    {
      "epoch": 0.8556422569027611,
      "grad_norm": 0.1519615799188614,
      "learning_rate": 0.00014140285210902676,
      "loss": 0.4465,
      "step": 2851
    },
    {
      "epoch": 0.8559423769507803,
      "grad_norm": 0.19140374660491943,
      "learning_rate": 0.0001413551692327945,
      "loss": 0.4556,
      "step": 2852
    },
    {
      "epoch": 0.8562424969987995,
      "grad_norm": 0.14043140411376953,
      "learning_rate": 0.00014130747501124572,
      "loss": 0.4532,
      "step": 2853
    },
    {
      "epoch": 0.8565426170468188,
      "grad_norm": 0.15206171572208405,
      "learning_rate": 0.00014125976945746486,
      "loss": 0.4832,
      "step": 2854
    },
    {
      "epoch": 0.8568427370948379,
      "grad_norm": 0.15809527039527893,
      "learning_rate": 0.00014121205258453933,
      "loss": 0.4899,
      "step": 2855
    },
    {
      "epoch": 0.8571428571428571,
      "grad_norm": 0.13327966630458832,
      "learning_rate": 0.00014116432440555978,
      "loss": 0.3969,
      "step": 2856
    },
    {
      "epoch": 0.8574429771908764,
      "grad_norm": 0.14684216678142548,
      "learning_rate": 0.0001411165849336198,
      "loss": 0.472,
      "step": 2857
    },
    {
      "epoch": 0.8577430972388955,
      "grad_norm": 0.17298445105552673,
      "learning_rate": 0.00014106883418181623,
      "loss": 0.4668,
      "step": 2858
    },
    {
      "epoch": 0.8580432172869148,
      "grad_norm": 0.16335934400558472,
      "learning_rate": 0.00014102107216324892,
      "loss": 0.4664,
      "step": 2859
    },
    {
      "epoch": 0.858343337334934,
      "grad_norm": 0.14026081562042236,
      "learning_rate": 0.00014097329889102084,
      "loss": 0.4154,
      "step": 2860
    },
    {
      "epoch": 0.8586434573829532,
      "grad_norm": 0.1268925815820694,
      "learning_rate": 0.00014092551437823803,
      "loss": 0.3778,
      "step": 2861
    },
    {
      "epoch": 0.8589435774309724,
      "grad_norm": 0.14247116446495056,
      "learning_rate": 0.00014087771863800964,
      "loss": 0.3996,
      "step": 2862
    },
    {
      "epoch": 0.8592436974789915,
      "grad_norm": 0.13496030867099762,
      "learning_rate": 0.00014082991168344784,
      "loss": 0.4283,
      "step": 2863
    },
    {
      "epoch": 0.8595438175270108,
      "grad_norm": 0.19215568900108337,
      "learning_rate": 0.00014078209352766795,
      "loss": 0.4934,
      "step": 2864
    },
    {
      "epoch": 0.85984393757503,
      "grad_norm": 0.1477733999490738,
      "learning_rate": 0.00014073426418378837,
      "loss": 0.4555,
      "step": 2865
    },
    {
      "epoch": 0.8601440576230492,
      "grad_norm": 0.14388087391853333,
      "learning_rate": 0.00014068642366493048,
      "loss": 0.461,
      "step": 2866
    },
    {
      "epoch": 0.8604441776710684,
      "grad_norm": 0.1384228765964508,
      "learning_rate": 0.00014063857198421876,
      "loss": 0.4369,
      "step": 2867
    },
    {
      "epoch": 0.8607442977190877,
      "grad_norm": 0.20080068707466125,
      "learning_rate": 0.0001405907091547808,
      "loss": 0.4404,
      "step": 2868
    },
    {
      "epoch": 0.8610444177671068,
      "grad_norm": 0.21571896970272064,
      "learning_rate": 0.00014054283518974726,
      "loss": 0.4584,
      "step": 2869
    },
    {
      "epoch": 0.8613445378151261,
      "grad_norm": 0.16102316975593567,
      "learning_rate": 0.00014049495010225174,
      "loss": 0.4849,
      "step": 2870
    },
    {
      "epoch": 0.8616446578631453,
      "grad_norm": 0.14411479234695435,
      "learning_rate": 0.00014044705390543104,
      "loss": 0.489,
      "step": 2871
    },
    {
      "epoch": 0.8619447779111644,
      "grad_norm": 0.1374804824590683,
      "learning_rate": 0.00014039914661242485,
      "loss": 0.411,
      "step": 2872
    },
    {
      "epoch": 0.8622448979591837,
      "grad_norm": 0.14753234386444092,
      "learning_rate": 0.00014035122823637607,
      "loss": 0.4614,
      "step": 2873
    },
    {
      "epoch": 0.8625450180072028,
      "grad_norm": 0.1469254344701767,
      "learning_rate": 0.00014030329879043048,
      "loss": 0.4451,
      "step": 2874
    },
    {
      "epoch": 0.8628451380552221,
      "grad_norm": 0.1511831283569336,
      "learning_rate": 0.000140255358287737,
      "loss": 0.4819,
      "step": 2875
    },
    {
      "epoch": 0.8631452581032413,
      "grad_norm": 0.1463584452867508,
      "learning_rate": 0.00014020740674144762,
      "loss": 0.4872,
      "step": 2876
    },
    {
      "epoch": 0.8634453781512605,
      "grad_norm": 0.13619887828826904,
      "learning_rate": 0.0001401594441647172,
      "loss": 0.4203,
      "step": 2877
    },
    {
      "epoch": 0.8637454981992797,
      "grad_norm": 0.19682137668132782,
      "learning_rate": 0.00014011147057070374,
      "loss": 0.4392,
      "step": 2878
    },
    {
      "epoch": 0.864045618247299,
      "grad_norm": 0.15196283161640167,
      "learning_rate": 0.00014006348597256832,
      "loss": 0.386,
      "step": 2879
    },
    {
      "epoch": 0.8643457382953181,
      "grad_norm": 0.15986262261867523,
      "learning_rate": 0.00014001549038347488,
      "loss": 0.4645,
      "step": 2880
    },
    {
      "epoch": 0.8646458583433373,
      "grad_norm": 0.13178189098834991,
      "learning_rate": 0.00013996748381659053,
      "loss": 0.3843,
      "step": 2881
    },
    {
      "epoch": 0.8649459783913566,
      "grad_norm": 0.151777982711792,
      "learning_rate": 0.00013991946628508524,
      "loss": 0.4774,
      "step": 2882
    },
    {
      "epoch": 0.8652460984393757,
      "grad_norm": 0.16395652294158936,
      "learning_rate": 0.00013987143780213216,
      "loss": 0.437,
      "step": 2883
    },
    {
      "epoch": 0.865546218487395,
      "grad_norm": 0.1446828693151474,
      "learning_rate": 0.00013982339838090728,
      "loss": 0.4669,
      "step": 2884
    },
    {
      "epoch": 0.8658463385354142,
      "grad_norm": 0.13735127449035645,
      "learning_rate": 0.0001397753480345897,
      "loss": 0.4115,
      "step": 2885
    },
    {
      "epoch": 0.8661464585834334,
      "grad_norm": 0.1632857322692871,
      "learning_rate": 0.00013972728677636145,
      "loss": 0.4536,
      "step": 2886
    },
    {
      "epoch": 0.8664465786314526,
      "grad_norm": 0.16722363233566284,
      "learning_rate": 0.00013967921461940762,
      "loss": 0.4238,
      "step": 2887
    },
    {
      "epoch": 0.8667466986794717,
      "grad_norm": 0.13403132557868958,
      "learning_rate": 0.00013963113157691628,
      "loss": 0.4369,
      "step": 2888
    },
    {
      "epoch": 0.867046818727491,
      "grad_norm": 0.14564616978168488,
      "learning_rate": 0.0001395830376620784,
      "loss": 0.4043,
      "step": 2889
    },
    {
      "epoch": 0.8673469387755102,
      "grad_norm": 0.1612880378961563,
      "learning_rate": 0.00013953493288808804,
      "loss": 0.4563,
      "step": 2890
    },
    {
      "epoch": 0.8676470588235294,
      "grad_norm": 0.15668343007564545,
      "learning_rate": 0.00013948681726814216,
      "loss": 0.5096,
      "step": 2891
    },
    {
      "epoch": 0.8679471788715486,
      "grad_norm": 0.1354898363351822,
      "learning_rate": 0.00013943869081544072,
      "loss": 0.4098,
      "step": 2892
    },
    {
      "epoch": 0.8682472989195679,
      "grad_norm": 0.1431705355644226,
      "learning_rate": 0.0001393905535431867,
      "loss": 0.4267,
      "step": 2893
    },
    {
      "epoch": 0.868547418967587,
      "grad_norm": 0.1419495940208435,
      "learning_rate": 0.000139342405464586,
      "loss": 0.4498,
      "step": 2894
    },
    {
      "epoch": 0.8688475390156063,
      "grad_norm": 0.12562808394432068,
      "learning_rate": 0.00013929424659284747,
      "loss": 0.3835,
      "step": 2895
    },
    {
      "epoch": 0.8691476590636255,
      "grad_norm": 0.14655275642871857,
      "learning_rate": 0.00013924607694118296,
      "loss": 0.4974,
      "step": 2896
    },
    {
      "epoch": 0.8694477791116446,
      "grad_norm": 0.1477247029542923,
      "learning_rate": 0.00013919789652280725,
      "loss": 0.4252,
      "step": 2897
    },
    {
      "epoch": 0.8697478991596639,
      "grad_norm": 0.14200358092784882,
      "learning_rate": 0.0001391497053509381,
      "loss": 0.4469,
      "step": 2898
    },
    {
      "epoch": 0.870048019207683,
      "grad_norm": 0.14584213495254517,
      "learning_rate": 0.00013910150343879622,
      "loss": 0.456,
      "step": 2899
    },
    {
      "epoch": 0.8703481392557023,
      "grad_norm": 0.13909918069839478,
      "learning_rate": 0.00013905329079960522,
      "loss": 0.4648,
      "step": 2900
    },
    {
      "epoch": 0.8706482593037215,
      "grad_norm": 0.146524578332901,
      "learning_rate": 0.00013900506744659168,
      "loss": 0.466,
      "step": 2901
    },
    {
      "epoch": 0.8709483793517407,
      "grad_norm": 0.12789376080036163,
      "learning_rate": 0.00013895683339298516,
      "loss": 0.3878,
      "step": 2902
    },
    {
      "epoch": 0.8712484993997599,
      "grad_norm": 0.13936883211135864,
      "learning_rate": 0.0001389085886520181,
      "loss": 0.4519,
      "step": 2903
    },
    {
      "epoch": 0.8715486194477791,
      "grad_norm": 0.13382497429847717,
      "learning_rate": 0.00013886033323692588,
      "loss": 0.4173,
      "step": 2904
    },
    {
      "epoch": 0.8718487394957983,
      "grad_norm": 0.13834607601165771,
      "learning_rate": 0.00013881206716094685,
      "loss": 0.4493,
      "step": 2905
    },
    {
      "epoch": 0.8721488595438175,
      "grad_norm": 0.13608871400356293,
      "learning_rate": 0.00013876379043732223,
      "loss": 0.4277,
      "step": 2906
    },
    {
      "epoch": 0.8724489795918368,
      "grad_norm": 0.15459437668323517,
      "learning_rate": 0.0001387155030792962,
      "loss": 0.4549,
      "step": 2907
    },
    {
      "epoch": 0.8727490996398559,
      "grad_norm": 0.1430646777153015,
      "learning_rate": 0.00013866720510011582,
      "loss": 0.4418,
      "step": 2908
    },
    {
      "epoch": 0.8730492196878752,
      "grad_norm": 0.17030729353427887,
      "learning_rate": 0.0001386188965130311,
      "loss": 0.4434,
      "step": 2909
    },
    {
      "epoch": 0.8733493397358943,
      "grad_norm": 0.1302502453327179,
      "learning_rate": 0.00013857057733129494,
      "loss": 0.3983,
      "step": 2910
    },
    {
      "epoch": 0.8736494597839136,
      "grad_norm": 0.15272179245948792,
      "learning_rate": 0.00013852224756816315,
      "loss": 0.5,
      "step": 2911
    },
    {
      "epoch": 0.8739495798319328,
      "grad_norm": 0.15235954523086548,
      "learning_rate": 0.00013847390723689454,
      "loss": 0.5164,
      "step": 2912
    },
    {
      "epoch": 0.8742496998799519,
      "grad_norm": 0.17023412883281708,
      "learning_rate": 0.00013842555635075056,
      "loss": 0.4858,
      "step": 2913
    },
    {
      "epoch": 0.8745498199279712,
      "grad_norm": 0.14470688998699188,
      "learning_rate": 0.00013837719492299587,
      "loss": 0.4179,
      "step": 2914
    },
    {
      "epoch": 0.8748499399759904,
      "grad_norm": 0.14784374833106995,
      "learning_rate": 0.00013832882296689776,
      "loss": 0.4445,
      "step": 2915
    },
    {
      "epoch": 0.8751500600240096,
      "grad_norm": 0.1371723860502243,
      "learning_rate": 0.00013828044049572658,
      "loss": 0.4134,
      "step": 2916
    },
    {
      "epoch": 0.8754501800720288,
      "grad_norm": 0.13509762287139893,
      "learning_rate": 0.00013823204752275547,
      "loss": 0.4147,
      "step": 2917
    },
    {
      "epoch": 0.8757503001200481,
      "grad_norm": 0.16309741139411926,
      "learning_rate": 0.00013818364406126054,
      "loss": 0.49,
      "step": 2918
    },
    {
      "epoch": 0.8760504201680672,
      "grad_norm": 0.2603866159915924,
      "learning_rate": 0.0001381352301245207,
      "loss": 0.437,
      "step": 2919
    },
    {
      "epoch": 0.8763505402160864,
      "grad_norm": 0.15486130118370056,
      "learning_rate": 0.00013808680572581776,
      "loss": 0.4294,
      "step": 2920
    },
    {
      "epoch": 0.8766506602641057,
      "grad_norm": 0.18155358731746674,
      "learning_rate": 0.0001380383708784364,
      "loss": 0.4407,
      "step": 2921
    },
    {
      "epoch": 0.8769507803121248,
      "grad_norm": 0.1427716165781021,
      "learning_rate": 0.00013798992559566414,
      "loss": 0.4445,
      "step": 2922
    },
    {
      "epoch": 0.8772509003601441,
      "grad_norm": 0.16872714459896088,
      "learning_rate": 0.0001379414698907914,
      "loss": 0.5084,
      "step": 2923
    },
    {
      "epoch": 0.8775510204081632,
      "grad_norm": 0.14170365035533905,
      "learning_rate": 0.00013789300377711148,
      "loss": 0.4457,
      "step": 2924
    },
    {
      "epoch": 0.8778511404561825,
      "grad_norm": 0.14954358339309692,
      "learning_rate": 0.0001378445272679205,
      "loss": 0.4339,
      "step": 2925
    },
    {
      "epoch": 0.8781512605042017,
      "grad_norm": 0.1393103152513504,
      "learning_rate": 0.00013779604037651738,
      "loss": 0.4561,
      "step": 2926
    },
    {
      "epoch": 0.8784513805522209,
      "grad_norm": 0.2205260545015335,
      "learning_rate": 0.00013774754311620394,
      "loss": 0.4564,
      "step": 2927
    },
    {
      "epoch": 0.8787515006002401,
      "grad_norm": 0.13299301266670227,
      "learning_rate": 0.00013769903550028492,
      "loss": 0.4332,
      "step": 2928
    },
    {
      "epoch": 0.8790516206482593,
      "grad_norm": 0.13900135457515717,
      "learning_rate": 0.0001376505175420678,
      "loss": 0.4416,
      "step": 2929
    },
    {
      "epoch": 0.8793517406962785,
      "grad_norm": 0.14976787567138672,
      "learning_rate": 0.0001376019892548629,
      "loss": 0.5004,
      "step": 2930
    },
    {
      "epoch": 0.8796518607442977,
      "grad_norm": 0.14274270832538605,
      "learning_rate": 0.00013755345065198342,
      "loss": 0.4604,
      "step": 2931
    },
    {
      "epoch": 0.879951980792317,
      "grad_norm": 0.14141829311847687,
      "learning_rate": 0.00013750490174674535,
      "loss": 0.4291,
      "step": 2932
    },
    {
      "epoch": 0.8802521008403361,
      "grad_norm": 0.15518976747989655,
      "learning_rate": 0.00013745634255246756,
      "loss": 0.5244,
      "step": 2933
    },
    {
      "epoch": 0.8805522208883554,
      "grad_norm": 0.13994963467121124,
      "learning_rate": 0.0001374077730824717,
      "loss": 0.442,
      "step": 2934
    },
    {
      "epoch": 0.8808523409363745,
      "grad_norm": 0.18987542390823364,
      "learning_rate": 0.00013735919335008216,
      "loss": 0.5006,
      "step": 2935
    },
    {
      "epoch": 0.8811524609843937,
      "grad_norm": 0.1415923684835434,
      "learning_rate": 0.00013731060336862635,
      "loss": 0.4828,
      "step": 2936
    },
    {
      "epoch": 0.881452581032413,
      "grad_norm": 0.350637823343277,
      "learning_rate": 0.00013726200315143436,
      "loss": 0.452,
      "step": 2937
    },
    {
      "epoch": 0.8817527010804321,
      "grad_norm": 0.14160653948783875,
      "learning_rate": 0.00013721339271183906,
      "loss": 0.4292,
      "step": 2938
    },
    {
      "epoch": 0.8820528211284514,
      "grad_norm": 0.15507519245147705,
      "learning_rate": 0.00013716477206317614,
      "loss": 0.411,
      "step": 2939
    },
    {
      "epoch": 0.8823529411764706,
      "grad_norm": 0.1486891359090805,
      "learning_rate": 0.00013711614121878423,
      "loss": 0.4525,
      "step": 2940
    },
    {
      "epoch": 0.8826530612244898,
      "grad_norm": 0.14755134284496307,
      "learning_rate": 0.00013706750019200448,
      "loss": 0.4768,
      "step": 2941
    },
    {
      "epoch": 0.882953181272509,
      "grad_norm": 0.15835875272750854,
      "learning_rate": 0.00013701884899618117,
      "loss": 0.4316,
      "step": 2942
    },
    {
      "epoch": 0.8832533013205283,
      "grad_norm": 0.1589801013469696,
      "learning_rate": 0.00013697018764466108,
      "loss": 0.514,
      "step": 2943
    },
    {
      "epoch": 0.8835534213685474,
      "grad_norm": 0.14008241891860962,
      "learning_rate": 0.00013692151615079395,
      "loss": 0.4373,
      "step": 2944
    },
    {
      "epoch": 0.8838535414165666,
      "grad_norm": 0.15842609107494354,
      "learning_rate": 0.0001368728345279322,
      "loss": 0.488,
      "step": 2945
    },
    {
      "epoch": 0.8841536614645858,
      "grad_norm": 0.15005439519882202,
      "learning_rate": 0.00013682414278943115,
      "loss": 0.458,
      "step": 2946
    },
    {
      "epoch": 0.884453781512605,
      "grad_norm": 0.22050262987613678,
      "learning_rate": 0.00013677544094864872,
      "loss": 0.4706,
      "step": 2947
    },
    {
      "epoch": 0.8847539015606243,
      "grad_norm": 0.14626656472682953,
      "learning_rate": 0.0001367267290189458,
      "loss": 0.4766,
      "step": 2948
    },
    {
      "epoch": 0.8850540216086434,
      "grad_norm": 0.14207623898983002,
      "learning_rate": 0.00013667800701368585,
      "loss": 0.4309,
      "step": 2949
    },
    {
      "epoch": 0.8853541416566627,
      "grad_norm": 0.14198783040046692,
      "learning_rate": 0.00013662927494623528,
      "loss": 0.4418,
      "step": 2950
    },
    {
      "epoch": 0.8856542617046819,
      "grad_norm": 0.13123789429664612,
      "learning_rate": 0.00013658053282996315,
      "loss": 0.4302,
      "step": 2951
    },
    {
      "epoch": 0.885954381752701,
      "grad_norm": 0.1482761651277542,
      "learning_rate": 0.00013653178067824127,
      "loss": 0.4908,
      "step": 2952
    },
    {
      "epoch": 0.8862545018007203,
      "grad_norm": 0.14108116924762726,
      "learning_rate": 0.0001364830185044443,
      "loss": 0.4474,
      "step": 2953
    },
    {
      "epoch": 0.8865546218487395,
      "grad_norm": 0.1418759524822235,
      "learning_rate": 0.00013643424632194947,
      "loss": 0.4435,
      "step": 2954
    },
    {
      "epoch": 0.8868547418967587,
      "grad_norm": 0.16263645887374878,
      "learning_rate": 0.000136385464144137,
      "loss": 0.4342,
      "step": 2955
    },
    {
      "epoch": 0.8871548619447779,
      "grad_norm": 0.1338297724723816,
      "learning_rate": 0.00013633667198438963,
      "loss": 0.4227,
      "step": 2956
    },
    {
      "epoch": 0.8874549819927972,
      "grad_norm": 0.1473718285560608,
      "learning_rate": 0.00013628786985609296,
      "loss": 0.4202,
      "step": 2957
    },
    {
      "epoch": 0.8877551020408163,
      "grad_norm": 0.15010789036750793,
      "learning_rate": 0.00013623905777263528,
      "loss": 0.4522,
      "step": 2958
    },
    {
      "epoch": 0.8880552220888356,
      "grad_norm": 0.12539362907409668,
      "learning_rate": 0.0001361902357474077,
      "loss": 0.394,
      "step": 2959
    },
    {
      "epoch": 0.8883553421368547,
      "grad_norm": 0.14094272255897522,
      "learning_rate": 0.00013614140379380384,
      "loss": 0.4374,
      "step": 2960
    },
    {
      "epoch": 0.8886554621848739,
      "grad_norm": 0.16918066143989563,
      "learning_rate": 0.00013609256192522031,
      "loss": 0.4613,
      "step": 2961
    },
    {
      "epoch": 0.8889555822328932,
      "grad_norm": 0.2934674322605133,
      "learning_rate": 0.00013604371015505623,
      "loss": 0.4518,
      "step": 2962
    },
    {
      "epoch": 0.8892557022809123,
      "grad_norm": 0.17096976935863495,
      "learning_rate": 0.00013599484849671366,
      "loss": 0.4799,
      "step": 2963
    },
    {
      "epoch": 0.8895558223289316,
      "grad_norm": 0.1428793966770172,
      "learning_rate": 0.00013594597696359704,
      "loss": 0.4616,
      "step": 2964
    },
    {
      "epoch": 0.8898559423769508,
      "grad_norm": 0.14048418402671814,
      "learning_rate": 0.00013589709556911388,
      "loss": 0.4437,
      "step": 2965
    },
    {
      "epoch": 0.89015606242497,
      "grad_norm": 0.13983307778835297,
      "learning_rate": 0.00013584820432667414,
      "loss": 0.4463,
      "step": 2966
    },
    {
      "epoch": 0.8904561824729892,
      "grad_norm": 0.1343764066696167,
      "learning_rate": 0.00013579930324969064,
      "loss": 0.4309,
      "step": 2967
    },
    {
      "epoch": 0.8907563025210085,
      "grad_norm": 0.20858599245548248,
      "learning_rate": 0.00013575039235157874,
      "loss": 0.4996,
      "step": 2968
    },
    {
      "epoch": 0.8910564225690276,
      "grad_norm": 0.14643438160419464,
      "learning_rate": 0.00013570147164575666,
      "loss": 0.5124,
      "step": 2969
    },
    {
      "epoch": 0.8913565426170468,
      "grad_norm": 0.16413362324237823,
      "learning_rate": 0.00013565254114564522,
      "loss": 0.4909,
      "step": 2970
    },
    {
      "epoch": 0.891656662665066,
      "grad_norm": 0.1786835789680481,
      "learning_rate": 0.0001356036008646679,
      "loss": 0.494,
      "step": 2971
    },
    {
      "epoch": 0.8919567827130852,
      "grad_norm": 0.1439962089061737,
      "learning_rate": 0.00013555465081625093,
      "loss": 0.3975,
      "step": 2972
    },
    {
      "epoch": 0.8922569027611045,
      "grad_norm": 0.13219578564167023,
      "learning_rate": 0.0001355056910138232,
      "loss": 0.4205,
      "step": 2973
    },
    {
      "epoch": 0.8925570228091236,
      "grad_norm": 0.14857596158981323,
      "learning_rate": 0.00013545672147081633,
      "loss": 0.4722,
      "step": 2974
    },
    {
      "epoch": 0.8928571428571429,
      "grad_norm": 0.13910064101219177,
      "learning_rate": 0.00013540774220066444,
      "loss": 0.455,
      "step": 2975
    },
    {
      "epoch": 0.8931572629051621,
      "grad_norm": 0.14829768240451813,
      "learning_rate": 0.0001353587532168045,
      "loss": 0.4569,
      "step": 2976
    },
    {
      "epoch": 0.8934573829531812,
      "grad_norm": 0.1404755413532257,
      "learning_rate": 0.00013530975453267602,
      "loss": 0.4638,
      "step": 2977
    },
    {
      "epoch": 0.8937575030012005,
      "grad_norm": 0.1413242369890213,
      "learning_rate": 0.0001352607461617213,
      "loss": 0.4149,
      "step": 2978
    },
    {
      "epoch": 0.8940576230492197,
      "grad_norm": 0.1247020959854126,
      "learning_rate": 0.0001352117281173852,
      "loss": 0.3795,
      "step": 2979
    },
    {
      "epoch": 0.8943577430972389,
      "grad_norm": 0.15720205008983612,
      "learning_rate": 0.00013516270041311523,
      "loss": 0.4755,
      "step": 2980
    },
    {
      "epoch": 0.8946578631452581,
      "grad_norm": 0.2672662138938904,
      "learning_rate": 0.0001351136630623616,
      "loss": 0.428,
      "step": 2981
    },
    {
      "epoch": 0.8949579831932774,
      "grad_norm": 0.13200098276138306,
      "learning_rate": 0.00013506461607857716,
      "loss": 0.3911,
      "step": 2982
    },
    {
      "epoch": 0.8952581032412965,
      "grad_norm": 0.19307288527488708,
      "learning_rate": 0.00013501555947521734,
      "loss": 0.5166,
      "step": 2983
    },
    {
      "epoch": 0.8955582232893158,
      "grad_norm": 0.13294395804405212,
      "learning_rate": 0.0001349664932657403,
      "loss": 0.3987,
      "step": 2984
    },
    {
      "epoch": 0.8958583433373349,
      "grad_norm": 0.1404949575662613,
      "learning_rate": 0.0001349174174636068,
      "loss": 0.4471,
      "step": 2985
    },
    {
      "epoch": 0.8961584633853541,
      "grad_norm": 0.15460175275802612,
      "learning_rate": 0.00013486833208228017,
      "loss": 0.4506,
      "step": 2986
    },
    {
      "epoch": 0.8964585834333734,
      "grad_norm": 0.14664404094219208,
      "learning_rate": 0.00013481923713522646,
      "loss": 0.4175,
      "step": 2987
    },
    {
      "epoch": 0.8967587034813925,
      "grad_norm": 0.15789993107318878,
      "learning_rate": 0.0001347701326359143,
      "loss": 0.4846,
      "step": 2988
    },
    {
      "epoch": 0.8970588235294118,
      "grad_norm": 0.1591576188802719,
      "learning_rate": 0.00013472101859781497,
      "loss": 0.4632,
      "step": 2989
    },
    {
      "epoch": 0.897358943577431,
      "grad_norm": 0.16807615756988525,
      "learning_rate": 0.0001346718950344023,
      "loss": 0.4484,
      "step": 2990
    },
    {
      "epoch": 0.8976590636254502,
      "grad_norm": 0.312802791595459,
      "learning_rate": 0.00013462276195915277,
      "loss": 0.4931,
      "step": 2991
    },
    {
      "epoch": 0.8979591836734694,
      "grad_norm": 0.7276992201805115,
      "learning_rate": 0.00013457361938554557,
      "loss": 0.4803,
      "step": 2992
    },
    {
      "epoch": 0.8982593037214885,
      "grad_norm": 0.18568727374076843,
      "learning_rate": 0.00013452446732706226,
      "loss": 0.4683,
      "step": 2993
    },
    {
      "epoch": 0.8985594237695078,
      "grad_norm": 0.13656973838806152,
      "learning_rate": 0.00013447530579718727,
      "loss": 0.4395,
      "step": 2994
    },
    {
      "epoch": 0.898859543817527,
      "grad_norm": 0.20830455422401428,
      "learning_rate": 0.00013442613480940744,
      "loss": 0.4739,
      "step": 2995
    },
    {
      "epoch": 0.8991596638655462,
      "grad_norm": 0.1854456216096878,
      "learning_rate": 0.00013437695437721226,
      "loss": 0.4528,
      "step": 2996
    },
    {
      "epoch": 0.8994597839135654,
      "grad_norm": 0.17800886929035187,
      "learning_rate": 0.00013432776451409383,
      "loss": 0.4785,
      "step": 2997
    },
    {
      "epoch": 0.8997599039615847,
      "grad_norm": 0.14185847342014313,
      "learning_rate": 0.00013427856523354686,
      "loss": 0.4384,
      "step": 2998
    },
    {
      "epoch": 0.9000600240096038,
      "grad_norm": 0.14251980185508728,
      "learning_rate": 0.00013422935654906851,
      "loss": 0.4509,
      "step": 2999
    },
    {
      "epoch": 0.9003601440576231,
      "grad_norm": 0.13576701283454895,
      "learning_rate": 0.00013418013847415875,
      "loss": 0.4266,
      "step": 3000
    },
    {
      "epoch": 0.9006602641056423,
      "grad_norm": 0.1396171748638153,
      "learning_rate": 0.0001341309110223199,
      "loss": 0.4369,
      "step": 3001
    },
    {
      "epoch": 0.9009603841536614,
      "grad_norm": 0.1498328447341919,
      "learning_rate": 0.00013408167420705697,
      "loss": 0.4939,
      "step": 3002
    },
    {
      "epoch": 0.9012605042016807,
      "grad_norm": 0.1731022447347641,
      "learning_rate": 0.00013403242804187754,
      "loss": 0.4979,
      "step": 3003
    },
    {
      "epoch": 0.9015606242496998,
      "grad_norm": 0.6752466559410095,
      "learning_rate": 0.00013398317254029171,
      "loss": 0.4421,
      "step": 3004
    },
    {
      "epoch": 0.9018607442977191,
      "grad_norm": 0.17650246620178223,
      "learning_rate": 0.00013393390771581216,
      "loss": 0.4403,
      "step": 3005
    },
    {
      "epoch": 0.9021608643457383,
      "grad_norm": 0.25184255838394165,
      "learning_rate": 0.00013388463358195412,
      "loss": 0.4699,
      "step": 3006
    },
    {
      "epoch": 0.9024609843937575,
      "grad_norm": 0.20988434553146362,
      "learning_rate": 0.0001338353501522354,
      "loss": 0.4254,
      "step": 3007
    },
    {
      "epoch": 0.9027611044417767,
      "grad_norm": 3.6127822399139404,
      "learning_rate": 0.0001337860574401764,
      "loss": 0.4764,
      "step": 3008
    },
    {
      "epoch": 0.9030612244897959,
      "grad_norm": 0.16843774914741516,
      "learning_rate": 0.0001337367554592999,
      "loss": 0.5213,
      "step": 3009
    },
    {
      "epoch": 0.9033613445378151,
      "grad_norm": 0.1471468061208725,
      "learning_rate": 0.00013368744422313135,
      "loss": 0.4574,
      "step": 3010
    },
    {
      "epoch": 0.9036614645858343,
      "grad_norm": 0.13623493909835815,
      "learning_rate": 0.00013363812374519882,
      "loss": 0.3986,
      "step": 3011
    },
    {
      "epoch": 0.9039615846338536,
      "grad_norm": 0.15088467299938202,
      "learning_rate": 0.00013358879403903274,
      "loss": 0.4087,
      "step": 3012
    },
    {
      "epoch": 0.9042617046818727,
      "grad_norm": 0.1518915593624115,
      "learning_rate": 0.00013353945511816615,
      "loss": 0.4246,
      "step": 3013
    },
    {
      "epoch": 0.904561824729892,
      "grad_norm": 0.14289763569831848,
      "learning_rate": 0.00013349010699613463,
      "loss": 0.4194,
      "step": 3014
    },
    {
      "epoch": 0.9048619447779112,
      "grad_norm": 0.16750043630599976,
      "learning_rate": 0.00013344074968647628,
      "loss": 0.5288,
      "step": 3015
    },
    {
      "epoch": 0.9051620648259304,
      "grad_norm": 0.164357528090477,
      "learning_rate": 0.0001333913832027317,
      "loss": 0.4477,
      "step": 3016
    },
    {
      "epoch": 0.9054621848739496,
      "grad_norm": 0.2598751485347748,
      "learning_rate": 0.00013334200755844405,
      "loss": 0.4604,
      "step": 3017
    },
    {
      "epoch": 0.9057623049219687,
      "grad_norm": 0.15973882377147675,
      "learning_rate": 0.00013329262276715894,
      "loss": 0.4256,
      "step": 3018
    },
    {
      "epoch": 0.906062424969988,
      "grad_norm": 0.16015294194221497,
      "learning_rate": 0.0001332432288424245,
      "loss": 0.4479,
      "step": 3019
    },
    {
      "epoch": 0.9063625450180072,
      "grad_norm": 0.1549282670021057,
      "learning_rate": 0.00013319382579779143,
      "loss": 0.4861,
      "step": 3020
    },
    {
      "epoch": 0.9066626650660264,
      "grad_norm": 0.19171394407749176,
      "learning_rate": 0.00013314441364681285,
      "loss": 0.4759,
      "step": 3021
    },
    {
      "epoch": 0.9069627851140456,
      "grad_norm": 0.14883162081241608,
      "learning_rate": 0.00013309499240304445,
      "loss": 0.4646,
      "step": 3022
    },
    {
      "epoch": 0.9072629051620649,
      "grad_norm": 0.17286548018455505,
      "learning_rate": 0.00013304556208004438,
      "loss": 0.4056,
      "step": 3023
    },
    {
      "epoch": 0.907563025210084,
      "grad_norm": 0.16968637704849243,
      "learning_rate": 0.00013299612269137328,
      "loss": 0.4726,
      "step": 3024
    },
    {
      "epoch": 0.9078631452581032,
      "grad_norm": 0.145048126578331,
      "learning_rate": 0.00013294667425059426,
      "loss": 0.4549,
      "step": 3025
    },
    {
      "epoch": 0.9081632653061225,
      "grad_norm": 0.15538866817951202,
      "learning_rate": 0.00013289721677127293,
      "loss": 0.4922,
      "step": 3026
    },
    {
      "epoch": 0.9084633853541416,
      "grad_norm": 0.16231182217597961,
      "learning_rate": 0.00013284775026697743,
      "loss": 0.4381,
      "step": 3027
    },
    {
      "epoch": 0.9087635054021609,
      "grad_norm": 0.14964693784713745,
      "learning_rate": 0.00013279827475127829,
      "loss": 0.4085,
      "step": 3028
    },
    {
      "epoch": 0.90906362545018,
      "grad_norm": 0.17131774127483368,
      "learning_rate": 0.00013274879023774853,
      "loss": 0.4953,
      "step": 3029
    },
    {
      "epoch": 0.9093637454981993,
      "grad_norm": 0.3522649109363556,
      "learning_rate": 0.00013269929673996372,
      "loss": 0.4912,
      "step": 3030
    },
    {
      "epoch": 0.9096638655462185,
      "grad_norm": 0.18055719137191772,
      "learning_rate": 0.00013264979427150177,
      "loss": 0.44,
      "step": 3031
    },
    {
      "epoch": 0.9099639855942377,
      "grad_norm": 0.14456139504909515,
      "learning_rate": 0.00013260028284594317,
      "loss": 0.4186,
      "step": 3032
    },
    {
      "epoch": 0.9102641056422569,
      "grad_norm": 0.1525500863790512,
      "learning_rate": 0.00013255076247687075,
      "loss": 0.4163,
      "step": 3033
    },
    {
      "epoch": 0.9105642256902761,
      "grad_norm": 0.1520788073539734,
      "learning_rate": 0.00013250123317786993,
      "loss": 0.4502,
      "step": 3034
    },
    {
      "epoch": 0.9108643457382953,
      "grad_norm": 0.17995305359363556,
      "learning_rate": 0.00013245169496252843,
      "loss": 0.4875,
      "step": 3035
    },
    {
      "epoch": 0.9111644657863145,
      "grad_norm": 0.15856656432151794,
      "learning_rate": 0.00013240214784443653,
      "loss": 0.4148,
      "step": 3036
    },
    {
      "epoch": 0.9114645858343338,
      "grad_norm": 0.16556061804294586,
      "learning_rate": 0.00013235259183718695,
      "loss": 0.4308,
      "step": 3037
    },
    {
      "epoch": 0.9117647058823529,
      "grad_norm": 0.174869105219841,
      "learning_rate": 0.00013230302695437475,
      "loss": 0.4469,
      "step": 3038
    },
    {
      "epoch": 0.9120648259303722,
      "grad_norm": 0.1541697084903717,
      "learning_rate": 0.0001322534532095975,
      "loss": 0.4533,
      "step": 3039
    },
    {
      "epoch": 0.9123649459783914,
      "grad_norm": 0.22935479879379272,
      "learning_rate": 0.00013220387061645518,
      "loss": 0.4675,
      "step": 3040
    },
    {
      "epoch": 0.9126650660264105,
      "grad_norm": 0.15487846732139587,
      "learning_rate": 0.00013215427918855028,
      "loss": 0.4637,
      "step": 3041
    },
    {
      "epoch": 0.9129651860744298,
      "grad_norm": 0.1683686524629593,
      "learning_rate": 0.00013210467893948755,
      "loss": 0.491,
      "step": 3042
    },
    {
      "epoch": 0.9132653061224489,
      "grad_norm": 0.14868466556072235,
      "learning_rate": 0.00013205506988287432,
      "loss": 0.5013,
      "step": 3043
    },
    {
      "epoch": 0.9135654261704682,
      "grad_norm": 0.1356276571750641,
      "learning_rate": 0.00013200545203232017,
      "loss": 0.408,
      "step": 3044
    },
    {
      "epoch": 0.9138655462184874,
      "grad_norm": 0.13295316696166992,
      "learning_rate": 0.00013195582540143728,
      "loss": 0.3908,
      "step": 3045
    },
    {
      "epoch": 0.9141656662665066,
      "grad_norm": 0.14142464101314545,
      "learning_rate": 0.00013190619000384013,
      "loss": 0.4476,
      "step": 3046
    },
    {
      "epoch": 0.9144657863145258,
      "grad_norm": 0.16184107959270477,
      "learning_rate": 0.00013185654585314558,
      "loss": 0.4602,
      "step": 3047
    },
    {
      "epoch": 0.9147659063625451,
      "grad_norm": 0.1475936621427536,
      "learning_rate": 0.00013180689296297295,
      "loss": 0.4466,
      "step": 3048
    },
    {
      "epoch": 0.9150660264105642,
      "grad_norm": 0.15406182408332825,
      "learning_rate": 0.000131757231346944,
      "loss": 0.4617,
      "step": 3049
    },
    {
      "epoch": 0.9153661464585834,
      "grad_norm": 0.15649984776973724,
      "learning_rate": 0.00013170756101868274,
      "loss": 0.4445,
      "step": 3050
    },
    {
      "epoch": 0.9156662665066027,
      "grad_norm": 0.14102435111999512,
      "learning_rate": 0.0001316578819918157,
      "loss": 0.4178,
      "step": 3051
    },
    {
      "epoch": 0.9159663865546218,
      "grad_norm": 0.1508234590291977,
      "learning_rate": 0.00013160819427997174,
      "loss": 0.4607,
      "step": 3052
    },
    {
      "epoch": 0.9162665066026411,
      "grad_norm": 0.14864003658294678,
      "learning_rate": 0.00013155849789678214,
      "loss": 0.454,
      "step": 3053
    },
    {
      "epoch": 0.9165666266506602,
      "grad_norm": 0.1495371162891388,
      "learning_rate": 0.00013150879285588047,
      "loss": 0.4444,
      "step": 3054
    },
    {
      "epoch": 0.9168667466986795,
      "grad_norm": 0.14183548092842102,
      "learning_rate": 0.0001314590791709028,
      "loss": 0.4621,
      "step": 3055
    },
    {
      "epoch": 0.9171668667466987,
      "grad_norm": 0.15212209522724152,
      "learning_rate": 0.0001314093568554875,
      "loss": 0.4733,
      "step": 3056
    },
    {
      "epoch": 0.9174669867947179,
      "grad_norm": 0.16908378899097443,
      "learning_rate": 0.00013135962592327531,
      "loss": 0.5045,
      "step": 3057
    },
    {
      "epoch": 0.9177671068427371,
      "grad_norm": 0.14085766673088074,
      "learning_rate": 0.00013130988638790933,
      "loss": 0.4682,
      "step": 3058
    },
    {
      "epoch": 0.9180672268907563,
      "grad_norm": 0.14654438197612762,
      "learning_rate": 0.00013126013826303501,
      "loss": 0.4488,
      "step": 3059
    },
    {
      "epoch": 0.9183673469387755,
      "grad_norm": 0.15144743025302887,
      "learning_rate": 0.00013121038156230021,
      "loss": 0.4794,
      "step": 3060
    },
    {
      "epoch": 0.9186674669867947,
      "grad_norm": 0.1591755449771881,
      "learning_rate": 0.00013116061629935515,
      "loss": 0.4484,
      "step": 3061
    },
    {
      "epoch": 0.918967587034814,
      "grad_norm": 0.1443132907152176,
      "learning_rate": 0.00013111084248785225,
      "loss": 0.4609,
      "step": 3062
    },
    {
      "epoch": 0.9192677070828331,
      "grad_norm": 0.1388029009103775,
      "learning_rate": 0.00013106106014144648,
      "loss": 0.417,
      "step": 3063
    },
    {
      "epoch": 0.9195678271308524,
      "grad_norm": 0.12858764827251434,
      "learning_rate": 0.00013101126927379504,
      "loss": 0.3952,
      "step": 3064
    },
    {
      "epoch": 0.9198679471788715,
      "grad_norm": 0.1703989952802658,
      "learning_rate": 0.00013096146989855745,
      "loss": 0.4609,
      "step": 3065
    },
    {
      "epoch": 0.9201680672268907,
      "grad_norm": 0.135576993227005,
      "learning_rate": 0.00013091166202939563,
      "loss": 0.415,
      "step": 3066
    },
    {
      "epoch": 0.92046818727491,
      "grad_norm": 0.17634700238704681,
      "learning_rate": 0.00013086184567997374,
      "loss": 0.5223,
      "step": 3067
    },
    {
      "epoch": 0.9207683073229291,
      "grad_norm": 0.15296968817710876,
      "learning_rate": 0.00013081202086395844,
      "loss": 0.478,
      "step": 3068
    },
    {
      "epoch": 0.9210684273709484,
      "grad_norm": 0.14750336110591888,
      "learning_rate": 0.00013076218759501846,
      "loss": 0.4466,
      "step": 3069
    },
    {
      "epoch": 0.9213685474189676,
      "grad_norm": 0.15692031383514404,
      "learning_rate": 0.00013071234588682507,
      "loss": 0.4698,
      "step": 3070
    },
    {
      "epoch": 0.9216686674669868,
      "grad_norm": 0.139719620347023,
      "learning_rate": 0.00013066249575305173,
      "loss": 0.4203,
      "step": 3071
    },
    {
      "epoch": 0.921968787515006,
      "grad_norm": 0.14857828617095947,
      "learning_rate": 0.00013061263720737432,
      "loss": 0.461,
      "step": 3072
    },
    {
      "epoch": 0.9222689075630253,
      "grad_norm": 0.18154451251029968,
      "learning_rate": 0.0001305627702634709,
      "loss": 0.5074,
      "step": 3073
    },
    {
      "epoch": 0.9225690276110444,
      "grad_norm": 0.14855815470218658,
      "learning_rate": 0.00013051289493502192,
      "loss": 0.4307,
      "step": 3074
    },
    {
      "epoch": 0.9228691476590636,
      "grad_norm": 0.1603444218635559,
      "learning_rate": 0.00013046301123571008,
      "loss": 0.5327,
      "step": 3075
    },
    {
      "epoch": 0.9231692677070829,
      "grad_norm": 0.15262074768543243,
      "learning_rate": 0.0001304131191792204,
      "loss": 0.5154,
      "step": 3076
    },
    {
      "epoch": 0.923469387755102,
      "grad_norm": 0.13610374927520752,
      "learning_rate": 0.0001303632187792402,
      "loss": 0.4173,
      "step": 3077
    },
    {
      "epoch": 0.9237695078031213,
      "grad_norm": 0.20092245936393738,
      "learning_rate": 0.00013031331004945913,
      "loss": 0.4564,
      "step": 3078
    },
    {
      "epoch": 0.9240696278511404,
      "grad_norm": 0.13647323846817017,
      "learning_rate": 0.00013026339300356902,
      "loss": 0.408,
      "step": 3079
    },
    {
      "epoch": 0.9243697478991597,
      "grad_norm": 0.1391654759645462,
      "learning_rate": 0.00013021346765526405,
      "loss": 0.445,
      "step": 3080
    },
    {
      "epoch": 0.9246698679471789,
      "grad_norm": 0.15535160899162292,
      "learning_rate": 0.00013016353401824069,
      "loss": 0.3748,
      "step": 3081
    },
    {
      "epoch": 0.924969987995198,
      "grad_norm": 0.14320309460163116,
      "learning_rate": 0.00013011359210619763,
      "loss": 0.4247,
      "step": 3082
    },
    {
      "epoch": 0.9252701080432173,
      "grad_norm": 0.19213740527629852,
      "learning_rate": 0.00013006364193283589,
      "loss": 0.4484,
      "step": 3083
    },
    {
      "epoch": 0.9255702280912365,
      "grad_norm": 0.14360110461711884,
      "learning_rate": 0.0001300136835118587,
      "loss": 0.4354,
      "step": 3084
    },
    {
      "epoch": 0.9258703481392557,
      "grad_norm": 0.12578319013118744,
      "learning_rate": 0.0001299637168569716,
      "loss": 0.377,
      "step": 3085
    },
    {
      "epoch": 0.9261704681872749,
      "grad_norm": 0.14346668124198914,
      "learning_rate": 0.00012991374198188232,
      "loss": 0.4416,
      "step": 3086
    },
    {
      "epoch": 0.9264705882352942,
      "grad_norm": 0.14157746732234955,
      "learning_rate": 0.00012986375890030103,
      "loss": 0.4369,
      "step": 3087
    },
    {
      "epoch": 0.9267707082833133,
      "grad_norm": 0.14177608489990234,
      "learning_rate": 0.00012981376762593983,
      "loss": 0.4527,
      "step": 3088
    },
    {
      "epoch": 0.9270708283313326,
      "grad_norm": 0.15607760846614838,
      "learning_rate": 0.0001297637681725134,
      "loss": 0.4955,
      "step": 3089
    },
    {
      "epoch": 0.9273709483793517,
      "grad_norm": 0.141166552901268,
      "learning_rate": 0.00012971376055373842,
      "loss": 0.4591,
      "step": 3090
    },
    {
      "epoch": 0.9276710684273709,
      "grad_norm": 0.1640852391719818,
      "learning_rate": 0.00012966374478333399,
      "loss": 0.5217,
      "step": 3091
    },
    {
      "epoch": 0.9279711884753902,
      "grad_norm": 0.17959006130695343,
      "learning_rate": 0.00012961372087502126,
      "loss": 0.4773,
      "step": 3092
    },
    {
      "epoch": 0.9282713085234093,
      "grad_norm": 0.1554243564605713,
      "learning_rate": 0.00012956368884252384,
      "loss": 0.4658,
      "step": 3093
    },
    {
      "epoch": 0.9285714285714286,
      "grad_norm": 0.1420896202325821,
      "learning_rate": 0.00012951364869956733,
      "loss": 0.4294,
      "step": 3094
    },
    {
      "epoch": 0.9288715486194478,
      "grad_norm": 0.16320428252220154,
      "learning_rate": 0.00012946360045987975,
      "loss": 0.4194,
      "step": 3095
    },
    {
      "epoch": 0.929171668667467,
      "grad_norm": 0.15008452534675598,
      "learning_rate": 0.0001294135441371912,
      "loss": 0.4515,
      "step": 3096
    },
    {
      "epoch": 0.9294717887154862,
      "grad_norm": 0.1435832530260086,
      "learning_rate": 0.00012936347974523414,
      "loss": 0.4389,
      "step": 3097
    },
    {
      "epoch": 0.9297719087635054,
      "grad_norm": 0.15409572422504425,
      "learning_rate": 0.00012931340729774307,
      "loss": 0.4622,
      "step": 3098
    },
    {
      "epoch": 0.9300720288115246,
      "grad_norm": 0.15281450748443604,
      "learning_rate": 0.00012926332680845488,
      "loss": 0.459,
      "step": 3099
    },
    {
      "epoch": 0.9303721488595438,
      "grad_norm": 0.16661444306373596,
      "learning_rate": 0.0001292132382911085,
      "loss": 0.4718,
      "step": 3100
    },
    {
      "epoch": 0.930672268907563,
      "grad_norm": 0.16447588801383972,
      "learning_rate": 0.00012916314175944515,
      "loss": 0.4005,
      "step": 3101
    },
    {
      "epoch": 0.9309723889555822,
      "grad_norm": 0.13854825496673584,
      "learning_rate": 0.00012911303722720828,
      "loss": 0.4226,
      "step": 3102
    },
    {
      "epoch": 0.9312725090036015,
      "grad_norm": 0.13010156154632568,
      "learning_rate": 0.00012906292470814346,
      "loss": 0.4053,
      "step": 3103
    },
    {
      "epoch": 0.9315726290516206,
      "grad_norm": 0.1506943553686142,
      "learning_rate": 0.00012901280421599853,
      "loss": 0.4671,
      "step": 3104
    },
    {
      "epoch": 0.9318727490996399,
      "grad_norm": 0.13658320903778076,
      "learning_rate": 0.00012896267576452343,
      "loss": 0.4275,
      "step": 3105
    },
    {
      "epoch": 0.9321728691476591,
      "grad_norm": 0.15992045402526855,
      "learning_rate": 0.00012891253936747035,
      "loss": 0.449,
      "step": 3106
    },
    {
      "epoch": 0.9324729891956782,
      "grad_norm": 0.14674633741378784,
      "learning_rate": 0.00012886239503859357,
      "loss": 0.4654,
      "step": 3107
    },
    {
      "epoch": 0.9327731092436975,
      "grad_norm": 0.14501380920410156,
      "learning_rate": 0.0001288122427916497,
      "loss": 0.4752,
      "step": 3108
    },
    {
      "epoch": 0.9330732292917167,
      "grad_norm": 0.9473604559898376,
      "learning_rate": 0.00012876208264039739,
      "loss": 0.4511,
      "step": 3109
    },
    {
      "epoch": 0.9333733493397359,
      "grad_norm": 0.13532473146915436,
      "learning_rate": 0.00012871191459859754,
      "loss": 0.4061,
      "step": 3110
    },
    {
      "epoch": 0.9336734693877551,
      "grad_norm": 0.40912723541259766,
      "learning_rate": 0.0001286617386800131,
      "loss": 0.4824,
      "step": 3111
    },
    {
      "epoch": 0.9339735894357744,
      "grad_norm": 0.1879563182592392,
      "learning_rate": 0.00012861155489840933,
      "loss": 0.5001,
      "step": 3112
    },
    {
      "epoch": 0.9342737094837935,
      "grad_norm": 0.1302296668291092,
      "learning_rate": 0.00012856136326755353,
      "loss": 0.3693,
      "step": 3113
    },
    {
      "epoch": 0.9345738295318127,
      "grad_norm": 0.4808141887187958,
      "learning_rate": 0.00012851116380121526,
      "loss": 0.4287,
      "step": 3114
    },
    {
      "epoch": 0.9348739495798319,
      "grad_norm": 0.14809343218803406,
      "learning_rate": 0.0001284609565131661,
      "loss": 0.4297,
      "step": 3115
    },
    {
      "epoch": 0.9351740696278511,
      "grad_norm": 0.23470531404018402,
      "learning_rate": 0.00012841074141717987,
      "loss": 0.4535,
      "step": 3116
    },
    {
      "epoch": 0.9354741896758704,
      "grad_norm": 0.15969856083393097,
      "learning_rate": 0.0001283605185270325,
      "loss": 0.4477,
      "step": 3117
    },
    {
      "epoch": 0.9357743097238895,
      "grad_norm": 0.15086837112903595,
      "learning_rate": 0.00012831028785650209,
      "loss": 0.4145,
      "step": 3118
    },
    {
      "epoch": 0.9360744297719088,
      "grad_norm": 0.2130202353000641,
      "learning_rate": 0.00012826004941936885,
      "loss": 0.482,
      "step": 3119
    },
    {
      "epoch": 0.936374549819928,
      "grad_norm": 0.2209470272064209,
      "learning_rate": 0.00012820980322941506,
      "loss": 0.5067,
      "step": 3120
    },
    {
      "epoch": 0.9366746698679472,
      "grad_norm": 0.16430065035820007,
      "learning_rate": 0.00012815954930042522,
      "loss": 0.5202,
      "step": 3121
    },
    {
      "epoch": 0.9369747899159664,
      "grad_norm": 0.14141945540905,
      "learning_rate": 0.00012810928764618593,
      "loss": 0.4575,
      "step": 3122
    },
    {
      "epoch": 0.9372749099639855,
      "grad_norm": 0.15663444995880127,
      "learning_rate": 0.0001280590182804859,
      "loss": 0.5294,
      "step": 3123
    },
    {
      "epoch": 0.9375750300120048,
      "grad_norm": 0.1487339287996292,
      "learning_rate": 0.00012800874121711594,
      "loss": 0.4321,
      "step": 3124
    },
    {
      "epoch": 0.937875150060024,
      "grad_norm": 0.16639180481433868,
      "learning_rate": 0.00012795845646986902,
      "loss": 0.503,
      "step": 3125
    },
    {
      "epoch": 0.9381752701080432,
      "grad_norm": 0.13797855377197266,
      "learning_rate": 0.00012790816405254012,
      "loss": 0.4339,
      "step": 3126
    },
    {
      "epoch": 0.9384753901560624,
      "grad_norm": 0.14541450142860413,
      "learning_rate": 0.00012785786397892643,
      "loss": 0.4296,
      "step": 3127
    },
    {
      "epoch": 0.9387755102040817,
      "grad_norm": 0.1672312319278717,
      "learning_rate": 0.00012780755626282721,
      "loss": 0.4394,
      "step": 3128
    },
    {
      "epoch": 0.9390756302521008,
      "grad_norm": 0.14389723539352417,
      "learning_rate": 0.00012775724091804378,
      "loss": 0.445,
      "step": 3129
    },
    {
      "epoch": 0.93937575030012,
      "grad_norm": 0.13469307124614716,
      "learning_rate": 0.00012770691795837956,
      "loss": 0.4061,
      "step": 3130
    },
    {
      "epoch": 0.9396758703481393,
      "grad_norm": 0.1481163203716278,
      "learning_rate": 0.00012765658739764013,
      "loss": 0.4118,
      "step": 3131
    },
    {
      "epoch": 0.9399759903961584,
      "grad_norm": 0.15213724970817566,
      "learning_rate": 0.00012760624924963306,
      "loss": 0.4611,
      "step": 3132
    },
    {
      "epoch": 0.9402761104441777,
      "grad_norm": 0.21984997391700745,
      "learning_rate": 0.00012755590352816806,
      "loss": 0.3983,
      "step": 3133
    },
    {
      "epoch": 0.9405762304921969,
      "grad_norm": 0.17315754294395447,
      "learning_rate": 0.00012750555024705688,
      "loss": 0.5218,
      "step": 3134
    },
    {
      "epoch": 0.9408763505402161,
      "grad_norm": 0.148133784532547,
      "learning_rate": 0.00012745518942011344,
      "loss": 0.4728,
      "step": 3135
    },
    {
      "epoch": 0.9411764705882353,
      "grad_norm": 0.16228275001049042,
      "learning_rate": 0.00012740482106115354,
      "loss": 0.4677,
      "step": 3136
    },
    {
      "epoch": 0.9414765906362546,
      "grad_norm": 0.1501329392194748,
      "learning_rate": 0.00012735444518399526,
      "loss": 0.4265,
      "step": 3137
    },
    {
      "epoch": 0.9417767106842737,
      "grad_norm": 0.17041635513305664,
      "learning_rate": 0.00012730406180245856,
      "loss": 0.4577,
      "step": 3138
    },
    {
      "epoch": 0.9420768307322929,
      "grad_norm": 0.1504543125629425,
      "learning_rate": 0.00012725367093036568,
      "loss": 0.451,
      "step": 3139
    },
    {
      "epoch": 0.9423769507803121,
      "grad_norm": 0.1484091579914093,
      "learning_rate": 0.00012720327258154059,
      "loss": 0.4865,
      "step": 3140
    },
    {
      "epoch": 0.9426770708283313,
      "grad_norm": 0.16023018956184387,
      "learning_rate": 0.00012715286676980963,
      "loss": 0.5077,
      "step": 3141
    },
    {
      "epoch": 0.9429771908763506,
      "grad_norm": 0.1468799114227295,
      "learning_rate": 0.00012710245350900105,
      "loss": 0.4624,
      "step": 3142
    },
    {
      "epoch": 0.9432773109243697,
      "grad_norm": 0.1554957628250122,
      "learning_rate": 0.0001270520328129451,
      "loss": 0.4606,
      "step": 3143
    },
    {
      "epoch": 0.943577430972389,
      "grad_norm": 0.1647556573152542,
      "learning_rate": 0.00012700160469547415,
      "loss": 0.4102,
      "step": 3144
    },
    {
      "epoch": 0.9438775510204082,
      "grad_norm": 0.14057038724422455,
      "learning_rate": 0.00012695116917042255,
      "loss": 0.4527,
      "step": 3145
    },
    {
      "epoch": 0.9441776710684273,
      "grad_norm": 0.1454935371875763,
      "learning_rate": 0.00012690072625162676,
      "loss": 0.4265,
      "step": 3146
    },
    {
      "epoch": 0.9444777911164466,
      "grad_norm": 0.1505882441997528,
      "learning_rate": 0.00012685027595292514,
      "loss": 0.4554,
      "step": 3147
    },
    {
      "epoch": 0.9447779111644657,
      "grad_norm": 0.1458381861448288,
      "learning_rate": 0.0001267998182881582,
      "loss": 0.4541,
      "step": 3148
    },
    {
      "epoch": 0.945078031212485,
      "grad_norm": 0.15787646174430847,
      "learning_rate": 0.00012674935327116842,
      "loss": 0.466,
      "step": 3149
    },
    {
      "epoch": 0.9453781512605042,
      "grad_norm": 0.14523647725582123,
      "learning_rate": 0.00012669888091580033,
      "loss": 0.4605,
      "step": 3150
    },
    {
      "epoch": 0.9456782713085234,
      "grad_norm": 0.13974350690841675,
      "learning_rate": 0.00012664840123590036,
      "loss": 0.4185,
      "step": 3151
    },
    {
      "epoch": 0.9459783913565426,
      "grad_norm": 0.14377835392951965,
      "learning_rate": 0.00012659791424531711,
      "loss": 0.4824,
      "step": 3152
    },
    {
      "epoch": 0.9462785114045619,
      "grad_norm": 0.12975840270519257,
      "learning_rate": 0.00012654741995790102,
      "loss": 0.4008,
      "step": 3153
    },
    {
      "epoch": 0.946578631452581,
      "grad_norm": 0.1532629132270813,
      "learning_rate": 0.00012649691838750475,
      "loss": 0.471,
      "step": 3154
    },
    {
      "epoch": 0.9468787515006002,
      "grad_norm": 0.1365705281496048,
      "learning_rate": 0.00012644640954798271,
      "loss": 0.435,
      "step": 3155
    },
    {
      "epoch": 0.9471788715486195,
      "grad_norm": 0.14507761597633362,
      "learning_rate": 0.00012639589345319146,
      "loss": 0.4561,
      "step": 3156
    },
    {
      "epoch": 0.9474789915966386,
      "grad_norm": 0.16338889300823212,
      "learning_rate": 0.00012634537011698948,
      "loss": 0.4901,
      "step": 3157
    },
    {
      "epoch": 0.9477791116446579,
      "grad_norm": 0.15298539400100708,
      "learning_rate": 0.00012629483955323736,
      "loss": 0.4517,
      "step": 3158
    },
    {
      "epoch": 0.948079231692677,
      "grad_norm": 0.14111188054084778,
      "learning_rate": 0.00012624430177579749,
      "loss": 0.375,
      "step": 3159
    },
    {
      "epoch": 0.9483793517406963,
      "grad_norm": 0.1338079571723938,
      "learning_rate": 0.00012619375679853435,
      "loss": 0.3994,
      "step": 3160
    },
    {
      "epoch": 0.9486794717887155,
      "grad_norm": 0.16571113467216492,
      "learning_rate": 0.00012614320463531442,
      "loss": 0.5047,
      "step": 3161
    },
    {
      "epoch": 0.9489795918367347,
      "grad_norm": 0.1337796449661255,
      "learning_rate": 0.00012609264530000604,
      "loss": 0.4401,
      "step": 3162
    },
    {
      "epoch": 0.9492797118847539,
      "grad_norm": 0.15425826609134674,
      "learning_rate": 0.00012604207880647964,
      "loss": 0.4715,
      "step": 3163
    },
    {
      "epoch": 0.9495798319327731,
      "grad_norm": 0.13787232339382172,
      "learning_rate": 0.0001259915051686075,
      "loss": 0.4469,
      "step": 3164
    },
    {
      "epoch": 0.9498799519807923,
      "grad_norm": 0.1763979196548462,
      "learning_rate": 0.00012594092440026397,
      "loss": 0.4671,
      "step": 3165
    },
    {
      "epoch": 0.9501800720288115,
      "grad_norm": 0.14985892176628113,
      "learning_rate": 0.0001258903365153253,
      "loss": 0.5102,
      "step": 3166
    },
    {
      "epoch": 0.9504801920768308,
      "grad_norm": 0.14893373847007751,
      "learning_rate": 0.00012583974152766966,
      "loss": 0.4811,
      "step": 3167
    },
    {
      "epoch": 0.9507803121248499,
      "grad_norm": 0.14797988533973694,
      "learning_rate": 0.0001257891394511772,
      "loss": 0.4312,
      "step": 3168
    },
    {
      "epoch": 0.9510804321728692,
      "grad_norm": 0.14474913477897644,
      "learning_rate": 0.0001257385302997301,
      "loss": 0.453,
      "step": 3169
    },
    {
      "epoch": 0.9513805522208884,
      "grad_norm": 0.15031698346138,
      "learning_rate": 0.0001256879140872123,
      "loss": 0.4736,
      "step": 3170
    },
    {
      "epoch": 0.9516806722689075,
      "grad_norm": 0.1375945806503296,
      "learning_rate": 0.00012563729082750986,
      "loss": 0.4032,
      "step": 3171
    },
    {
      "epoch": 0.9519807923169268,
      "grad_norm": 0.15056562423706055,
      "learning_rate": 0.00012558666053451062,
      "loss": 0.428,
      "step": 3172
    },
    {
      "epoch": 0.9522809123649459,
      "grad_norm": 0.2670383155345917,
      "learning_rate": 0.0001255360232221045,
      "loss": 0.4268,
      "step": 3173
    },
    {
      "epoch": 0.9525810324129652,
      "grad_norm": 0.1334916651248932,
      "learning_rate": 0.00012548537890418317,
      "loss": 0.3939,
      "step": 3174
    },
    {
      "epoch": 0.9528811524609844,
      "grad_norm": 0.14399544894695282,
      "learning_rate": 0.0001254347275946404,
      "loss": 0.4705,
      "step": 3175
    },
    {
      "epoch": 0.9531812725090036,
      "grad_norm": 0.14479242265224457,
      "learning_rate": 0.00012538406930737175,
      "loss": 0.434,
      "step": 3176
    },
    {
      "epoch": 0.9534813925570228,
      "grad_norm": 0.14071334898471832,
      "learning_rate": 0.00012533340405627475,
      "loss": 0.429,
      "step": 3177
    },
    {
      "epoch": 0.9537815126050421,
      "grad_norm": 0.17476089298725128,
      "learning_rate": 0.00012528273185524885,
      "loss": 0.4767,
      "step": 3178
    },
    {
      "epoch": 0.9540816326530612,
      "grad_norm": 0.13704952597618103,
      "learning_rate": 0.0001252320527181954,
      "loss": 0.4029,
      "step": 3179
    },
    {
      "epoch": 0.9543817527010804,
      "grad_norm": 0.14694024622440338,
      "learning_rate": 0.00012518136665901755,
      "loss": 0.4931,
      "step": 3180
    },
    {
      "epoch": 0.9546818727490997,
      "grad_norm": 0.14101053774356842,
      "learning_rate": 0.00012513067369162052,
      "loss": 0.4315,
      "step": 3181
    },
    {
      "epoch": 0.9549819927971188,
      "grad_norm": 0.16008321940898895,
      "learning_rate": 0.00012507997382991132,
      "loss": 0.4494,
      "step": 3182
    },
    {
      "epoch": 0.9552821128451381,
      "grad_norm": 0.15155091881752014,
      "learning_rate": 0.00012502926708779892,
      "loss": 0.4968,
      "step": 3183
    },
    {
      "epoch": 0.9555822328931572,
      "grad_norm": 0.14365136623382568,
      "learning_rate": 0.00012497855347919407,
      "loss": 0.4271,
      "step": 3184
    },
    {
      "epoch": 0.9558823529411765,
      "grad_norm": 0.16472645103931427,
      "learning_rate": 0.0001249278330180095,
      "loss": 0.4742,
      "step": 3185
    },
    {
      "epoch": 0.9561824729891957,
      "grad_norm": 0.14155378937721252,
      "learning_rate": 0.00012487710571815975,
      "loss": 0.4285,
      "step": 3186
    },
    {
      "epoch": 0.9564825930372148,
      "grad_norm": 0.1432572901248932,
      "learning_rate": 0.00012482637159356131,
      "loss": 0.4675,
      "step": 3187
    },
    {
      "epoch": 0.9567827130852341,
      "grad_norm": 0.15490619838237762,
      "learning_rate": 0.00012477563065813253,
      "loss": 0.5118,
      "step": 3188
    },
    {
      "epoch": 0.9570828331332533,
      "grad_norm": 0.1394316554069519,
      "learning_rate": 0.00012472488292579353,
      "loss": 0.437,
      "step": 3189
    },
    {
      "epoch": 0.9573829531812725,
      "grad_norm": 0.14840246737003326,
      "learning_rate": 0.00012467412841046644,
      "loss": 0.4374,
      "step": 3190
    },
    {
      "epoch": 0.9576830732292917,
      "grad_norm": 0.14005251228809357,
      "learning_rate": 0.00012462336712607515,
      "loss": 0.4277,
      "step": 3191
    },
    {
      "epoch": 0.957983193277311,
      "grad_norm": 0.16837067902088165,
      "learning_rate": 0.00012457259908654544,
      "loss": 0.439,
      "step": 3192
    },
    {
      "epoch": 0.9582833133253301,
      "grad_norm": 0.14483563601970673,
      "learning_rate": 0.00012452182430580487,
      "loss": 0.4938,
      "step": 3193
    },
    {
      "epoch": 0.9585834333733494,
      "grad_norm": 0.14190462231636047,
      "learning_rate": 0.00012447104279778305,
      "loss": 0.4116,
      "step": 3194
    },
    {
      "epoch": 0.9588835534213686,
      "grad_norm": 0.1376616656780243,
      "learning_rate": 0.00012442025457641123,
      "loss": 0.4001,
      "step": 3195
    },
    {
      "epoch": 0.9591836734693877,
      "grad_norm": 0.16073790192604065,
      "learning_rate": 0.00012436945965562258,
      "loss": 0.5094,
      "step": 3196
    },
    {
      "epoch": 0.959483793517407,
      "grad_norm": 0.16511289775371552,
      "learning_rate": 0.00012431865804935207,
      "loss": 0.5465,
      "step": 3197
    },
    {
      "epoch": 0.9597839135654261,
      "grad_norm": 0.23794710636138916,
      "learning_rate": 0.00012426784977153662,
      "loss": 0.542,
      "step": 3198
    },
    {
      "epoch": 0.9600840336134454,
      "grad_norm": 0.2180640995502472,
      "learning_rate": 0.00012421703483611485,
      "loss": 0.5002,
      "step": 3199
    },
    {
      "epoch": 0.9603841536614646,
      "grad_norm": 0.6686155796051025,
      "learning_rate": 0.00012416621325702723,
      "loss": 0.4223,
      "step": 3200
    },
    {
      "epoch": 0.9606842737094838,
      "grad_norm": 0.14647041261196136,
      "learning_rate": 0.00012411538504821613,
      "loss": 0.4171,
      "step": 3201
    },
    {
      "epoch": 0.960984393757503,
      "grad_norm": 0.14124207198619843,
      "learning_rate": 0.00012406455022362565,
      "loss": 0.4106,
      "step": 3202
    },
    {
      "epoch": 0.9612845138055222,
      "grad_norm": 0.14072130620479584,
      "learning_rate": 0.00012401370879720178,
      "loss": 0.4297,
      "step": 3203
    },
    {
      "epoch": 0.9615846338535414,
      "grad_norm": 0.13854216039180756,
      "learning_rate": 0.00012396286078289226,
      "loss": 0.4273,
      "step": 3204
    },
    {
      "epoch": 0.9618847539015606,
      "grad_norm": 0.22632403671741486,
      "learning_rate": 0.00012391200619464663,
      "loss": 0.4806,
      "step": 3205
    },
    {
      "epoch": 0.9621848739495799,
      "grad_norm": 0.15359652042388916,
      "learning_rate": 0.00012386114504641627,
      "loss": 0.4526,
      "step": 3206
    },
    {
      "epoch": 0.962484993997599,
      "grad_norm": 0.17294703423976898,
      "learning_rate": 0.00012381027735215442,
      "loss": 0.5399,
      "step": 3207
    },
    {
      "epoch": 0.9627851140456183,
      "grad_norm": 0.3574408292770386,
      "learning_rate": 0.00012375940312581596,
      "loss": 0.4419,
      "step": 3208
    },
    {
      "epoch": 0.9630852340936374,
      "grad_norm": 0.16574545204639435,
      "learning_rate": 0.00012370852238135775,
      "loss": 0.4979,
      "step": 3209
    },
    {
      "epoch": 0.9633853541416567,
      "grad_norm": 0.15823771059513092,
      "learning_rate": 0.00012365763513273826,
      "loss": 0.4644,
      "step": 3210
    },
    {
      "epoch": 0.9636854741896759,
      "grad_norm": 0.2891182601451874,
      "learning_rate": 0.0001236067413939178,
      "loss": 0.5055,
      "step": 3211
    },
    {
      "epoch": 0.963985594237695,
      "grad_norm": 0.18421053886413574,
      "learning_rate": 0.00012355584117885855,
      "loss": 0.4635,
      "step": 3212
    },
    {
      "epoch": 0.9642857142857143,
      "grad_norm": 0.16126295924186707,
      "learning_rate": 0.00012350493450152437,
      "loss": 0.5054,
      "step": 3213
    },
    {
      "epoch": 0.9645858343337335,
      "grad_norm": 0.14591971039772034,
      "learning_rate": 0.00012345402137588097,
      "loss": 0.4113,
      "step": 3214
    },
    {
      "epoch": 0.9648859543817527,
      "grad_norm": 0.2134801745414734,
      "learning_rate": 0.0001234031018158957,
      "loss": 0.4249,
      "step": 3215
    },
    {
      "epoch": 0.9651860744297719,
      "grad_norm": 0.19483153522014618,
      "learning_rate": 0.00012335217583553783,
      "loss": 0.4881,
      "step": 3216
    },
    {
      "epoch": 0.9654861944777912,
      "grad_norm": 0.15648779273033142,
      "learning_rate": 0.00012330124344877826,
      "loss": 0.4707,
      "step": 3217
    },
    {
      "epoch": 0.9657863145258103,
      "grad_norm": 0.1493963748216629,
      "learning_rate": 0.00012325030466958976,
      "loss": 0.4717,
      "step": 3218
    },
    {
      "epoch": 0.9660864345738295,
      "grad_norm": 0.14706221222877502,
      "learning_rate": 0.00012319935951194677,
      "loss": 0.445,
      "step": 3219
    },
    {
      "epoch": 0.9663865546218487,
      "grad_norm": 0.17727939784526825,
      "learning_rate": 0.0001231484079898255,
      "loss": 0.3959,
      "step": 3220
    },
    {
      "epoch": 0.9666866746698679,
      "grad_norm": 0.17178839445114136,
      "learning_rate": 0.00012309745011720392,
      "loss": 0.4025,
      "step": 3221
    },
    {
      "epoch": 0.9669867947178872,
      "grad_norm": 0.1465953290462494,
      "learning_rate": 0.0001230464859080618,
      "loss": 0.454,
      "step": 3222
    },
    {
      "epoch": 0.9672869147659063,
      "grad_norm": 0.23026442527770996,
      "learning_rate": 0.0001229955153763805,
      "loss": 0.4959,
      "step": 3223
    },
    {
      "epoch": 0.9675870348139256,
      "grad_norm": 0.1530740112066269,
      "learning_rate": 0.00012294453853614325,
      "loss": 0.4733,
      "step": 3224
    },
    {
      "epoch": 0.9678871548619448,
      "grad_norm": 0.1319517195224762,
      "learning_rate": 0.00012289355540133495,
      "loss": 0.3903,
      "step": 3225
    },
    {
      "epoch": 0.968187274909964,
      "grad_norm": 0.14231938123703003,
      "learning_rate": 0.0001228425659859422,
      "loss": 0.4649,
      "step": 3226
    },
    {
      "epoch": 0.9684873949579832,
      "grad_norm": 0.1461937129497528,
      "learning_rate": 0.00012279157030395345,
      "loss": 0.4358,
      "step": 3227
    },
    {
      "epoch": 0.9687875150060024,
      "grad_norm": 0.1504702866077423,
      "learning_rate": 0.0001227405683693587,
      "loss": 0.4432,
      "step": 3228
    },
    {
      "epoch": 0.9690876350540216,
      "grad_norm": 0.19015046954154968,
      "learning_rate": 0.0001226895601961498,
      "loss": 0.4691,
      "step": 3229
    },
    {
      "epoch": 0.9693877551020408,
      "grad_norm": 0.16711188852787018,
      "learning_rate": 0.00012263854579832022,
      "loss": 0.5068,
      "step": 3230
    },
    {
      "epoch": 0.96968787515006,
      "grad_norm": 0.14313435554504395,
      "learning_rate": 0.00012258752518986516,
      "loss": 0.4372,
      "step": 3231
    },
    {
      "epoch": 0.9699879951980792,
      "grad_norm": 0.16503359377384186,
      "learning_rate": 0.00012253649838478157,
      "loss": 0.5157,
      "step": 3232
    },
    {
      "epoch": 0.9702881152460985,
      "grad_norm": 0.15321406722068787,
      "learning_rate": 0.00012248546539706808,
      "loss": 0.4769,
      "step": 3233
    },
    {
      "epoch": 0.9705882352941176,
      "grad_norm": 0.14297358691692352,
      "learning_rate": 0.00012243442624072502,
      "loss": 0.4221,
      "step": 3234
    },
    {
      "epoch": 0.9708883553421368,
      "grad_norm": 0.17347034811973572,
      "learning_rate": 0.00012238338092975432,
      "loss": 0.4864,
      "step": 3235
    },
    {
      "epoch": 0.9711884753901561,
      "grad_norm": 0.13694067299365997,
      "learning_rate": 0.00012233232947815974,
      "loss": 0.4032,
      "step": 3236
    },
    {
      "epoch": 0.9714885954381752,
      "grad_norm": 0.16371691226959229,
      "learning_rate": 0.00012228127189994664,
      "loss": 0.4915,
      "step": 3237
    },
    {
      "epoch": 0.9717887154861945,
      "grad_norm": 0.1552603542804718,
      "learning_rate": 0.0001222302082091221,
      "loss": 0.4993,
      "step": 3238
    },
    {
      "epoch": 0.9720888355342137,
      "grad_norm": 0.14726898074150085,
      "learning_rate": 0.00012217913841969482,
      "loss": 0.4674,
      "step": 3239
    },
    {
      "epoch": 0.9723889555822329,
      "grad_norm": 0.18751665949821472,
      "learning_rate": 0.00012212806254567526,
      "loss": 0.4898,
      "step": 3240
    },
    {
      "epoch": 0.9726890756302521,
      "grad_norm": 0.1408817023038864,
      "learning_rate": 0.00012207698060107545,
      "loss": 0.437,
      "step": 3241
    },
    {
      "epoch": 0.9729891956782714,
      "grad_norm": 0.2426920086145401,
      "learning_rate": 0.00012202589259990916,
      "loss": 0.513,
      "step": 3242
    },
    {
      "epoch": 0.9732893157262905,
      "grad_norm": 0.16936664283275604,
      "learning_rate": 0.00012197479855619179,
      "loss": 0.4941,
      "step": 3243
    },
    {
      "epoch": 0.9735894357743097,
      "grad_norm": 0.14508193731307983,
      "learning_rate": 0.00012192369848394045,
      "loss": 0.4254,
      "step": 3244
    },
    {
      "epoch": 0.973889555822329,
      "grad_norm": 0.14925509691238403,
      "learning_rate": 0.00012187259239717378,
      "loss": 0.4389,
      "step": 3245
    },
    {
      "epoch": 0.9741896758703481,
      "grad_norm": 0.15830858051776886,
      "learning_rate": 0.0001218214803099122,
      "loss": 0.4658,
      "step": 3246
    },
    {
      "epoch": 0.9744897959183674,
      "grad_norm": 0.14749321341514587,
      "learning_rate": 0.00012177036223617775,
      "loss": 0.4627,
      "step": 3247
    },
    {
      "epoch": 0.9747899159663865,
      "grad_norm": 0.15965783596038818,
      "learning_rate": 0.00012171923818999402,
      "loss": 0.52,
      "step": 3248
    },
    {
      "epoch": 0.9750900360144058,
      "grad_norm": 0.16403883695602417,
      "learning_rate": 0.00012166810818538634,
      "loss": 0.5091,
      "step": 3249
    },
    {
      "epoch": 0.975390156062425,
      "grad_norm": 0.1529029756784439,
      "learning_rate": 0.00012161697223638162,
      "loss": 0.467,
      "step": 3250
    },
    {
      "epoch": 0.9756902761104442,
      "grad_norm": 0.1996665745973587,
      "learning_rate": 0.00012156583035700846,
      "loss": 0.4269,
      "step": 3251
    },
    {
      "epoch": 0.9759903961584634,
      "grad_norm": 0.15675577521324158,
      "learning_rate": 0.00012151468256129704,
      "loss": 0.4584,
      "step": 3252
    },
    {
      "epoch": 0.9762905162064826,
      "grad_norm": 0.13851302862167358,
      "learning_rate": 0.00012146352886327916,
      "loss": 0.4267,
      "step": 3253
    },
    {
      "epoch": 0.9765906362545018,
      "grad_norm": 0.15544851124286652,
      "learning_rate": 0.00012141236927698823,
      "loss": 0.4324,
      "step": 3254
    },
    {
      "epoch": 0.976890756302521,
      "grad_norm": 0.16857026517391205,
      "learning_rate": 0.00012136120381645932,
      "loss": 0.4889,
      "step": 3255
    },
    {
      "epoch": 0.9771908763505402,
      "grad_norm": 0.14354467391967773,
      "learning_rate": 0.00012131003249572908,
      "loss": 0.4235,
      "step": 3256
    },
    {
      "epoch": 0.9774909963985594,
      "grad_norm": 0.3445586562156677,
      "learning_rate": 0.00012125885532883579,
      "loss": 0.4473,
      "step": 3257
    },
    {
      "epoch": 0.9777911164465787,
      "grad_norm": 0.14772890508174896,
      "learning_rate": 0.0001212076723298193,
      "loss": 0.4987,
      "step": 3258
    },
    {
      "epoch": 0.9780912364945978,
      "grad_norm": 0.15903234481811523,
      "learning_rate": 0.0001211564835127211,
      "loss": 0.5344,
      "step": 3259
    },
    {
      "epoch": 0.978391356542617,
      "grad_norm": 0.13617999851703644,
      "learning_rate": 0.00012110528889158421,
      "loss": 0.3771,
      "step": 3260
    },
    {
      "epoch": 0.9786914765906363,
      "grad_norm": 0.19996723532676697,
      "learning_rate": 0.00012105408848045335,
      "loss": 0.4192,
      "step": 3261
    },
    {
      "epoch": 0.9789915966386554,
      "grad_norm": 0.14216583967208862,
      "learning_rate": 0.00012100288229337469,
      "loss": 0.4467,
      "step": 3262
    },
    {
      "epoch": 0.9792917166866747,
      "grad_norm": 0.153058722615242,
      "learning_rate": 0.00012095167034439616,
      "loss": 0.4744,
      "step": 3263
    },
    {
      "epoch": 0.9795918367346939,
      "grad_norm": 0.13257387280464172,
      "learning_rate": 0.00012090045264756709,
      "loss": 0.388,
      "step": 3264
    },
    {
      "epoch": 0.9798919567827131,
      "grad_norm": 0.14509105682373047,
      "learning_rate": 0.00012084922921693849,
      "loss": 0.4715,
      "step": 3265
    },
    {
      "epoch": 0.9801920768307323,
      "grad_norm": 0.12990200519561768,
      "learning_rate": 0.00012079800006656292,
      "loss": 0.3935,
      "step": 3266
    },
    {
      "epoch": 0.9804921968787516,
      "grad_norm": 0.14619603753089905,
      "learning_rate": 0.00012074676521049452,
      "loss": 0.4264,
      "step": 3267
    },
    {
      "epoch": 0.9807923169267707,
      "grad_norm": 0.14546331763267517,
      "learning_rate": 0.000120695524662789,
      "loss": 0.4438,
      "step": 3268
    },
    {
      "epoch": 0.9810924369747899,
      "grad_norm": 0.14810235798358917,
      "learning_rate": 0.00012064427843750357,
      "loss": 0.4313,
      "step": 3269
    },
    {
      "epoch": 0.9813925570228091,
      "grad_norm": 0.22097627818584442,
      "learning_rate": 0.00012059302654869707,
      "loss": 0.3912,
      "step": 3270
    },
    {
      "epoch": 0.9816926770708283,
      "grad_norm": 0.14861871302127838,
      "learning_rate": 0.00012054176901042989,
      "loss": 0.4472,
      "step": 3271
    },
    {
      "epoch": 0.9819927971188476,
      "grad_norm": 0.14172734320163727,
      "learning_rate": 0.0001204905058367639,
      "loss": 0.4345,
      "step": 3272
    },
    {
      "epoch": 0.9822929171668667,
      "grad_norm": 0.14876016974449158,
      "learning_rate": 0.00012043923704176259,
      "loss": 0.381,
      "step": 3273
    },
    {
      "epoch": 0.982593037214886,
      "grad_norm": 0.15357892215251923,
      "learning_rate": 0.00012038796263949099,
      "loss": 0.4809,
      "step": 3274
    },
    {
      "epoch": 0.9828931572629052,
      "grad_norm": 0.13547773659229279,
      "learning_rate": 0.00012033668264401558,
      "loss": 0.3985,
      "step": 3275
    },
    {
      "epoch": 0.9831932773109243,
      "grad_norm": 0.15516170859336853,
      "learning_rate": 0.00012028539706940451,
      "loss": 0.4789,
      "step": 3276
    },
    {
      "epoch": 0.9834933973589436,
      "grad_norm": 0.13751676678657532,
      "learning_rate": 0.00012023410592972735,
      "loss": 0.4275,
      "step": 3277
    },
    {
      "epoch": 0.9837935174069627,
      "grad_norm": 0.1439884603023529,
      "learning_rate": 0.00012018280923905528,
      "loss": 0.4411,
      "step": 3278
    },
    {
      "epoch": 0.984093637454982,
      "grad_norm": 0.1395803838968277,
      "learning_rate": 0.00012013150701146086,
      "loss": 0.4343,
      "step": 3279
    },
    {
      "epoch": 0.9843937575030012,
      "grad_norm": 0.15619012713432312,
      "learning_rate": 0.00012008019926101837,
      "loss": 0.4666,
      "step": 3280
    },
    {
      "epoch": 0.9846938775510204,
      "grad_norm": 0.13754403591156006,
      "learning_rate": 0.00012002888600180341,
      "loss": 0.4219,
      "step": 3281
    },
    {
      "epoch": 0.9849939975990396,
      "grad_norm": 0.6203069090843201,
      "learning_rate": 0.00011997756724789333,
      "loss": 0.4206,
      "step": 3282
    },
    {
      "epoch": 0.9852941176470589,
      "grad_norm": 0.13347765803337097,
      "learning_rate": 0.00011992624301336668,
      "loss": 0.3895,
      "step": 3283
    },
    {
      "epoch": 0.985594237695078,
      "grad_norm": 0.14219003915786743,
      "learning_rate": 0.00011987491331230378,
      "loss": 0.4413,
      "step": 3284
    },
    {
      "epoch": 0.9858943577430972,
      "grad_norm": 0.1293162852525711,
      "learning_rate": 0.00011982357815878629,
      "loss": 0.3935,
      "step": 3285
    },
    {
      "epoch": 0.9861944777911165,
      "grad_norm": 0.13473662734031677,
      "learning_rate": 0.00011977223756689746,
      "loss": 0.388,
      "step": 3286
    },
    {
      "epoch": 0.9864945978391356,
      "grad_norm": 0.17074993252754211,
      "learning_rate": 0.00011972089155072195,
      "loss": 0.4125,
      "step": 3287
    },
    {
      "epoch": 0.9867947178871549,
      "grad_norm": 0.14385375380516052,
      "learning_rate": 0.00011966954012434599,
      "loss": 0.3789,
      "step": 3288
    },
    {
      "epoch": 0.987094837935174,
      "grad_norm": 0.20002798736095428,
      "learning_rate": 0.00011961818330185723,
      "loss": 0.46,
      "step": 3289
    },
    {
      "epoch": 0.9873949579831933,
      "grad_norm": 0.15859068930149078,
      "learning_rate": 0.00011956682109734485,
      "loss": 0.4149,
      "step": 3290
    },
    {
      "epoch": 0.9876950780312125,
      "grad_norm": 0.16643311083316803,
      "learning_rate": 0.00011951545352489948,
      "loss": 0.5037,
      "step": 3291
    },
    {
      "epoch": 0.9879951980792316,
      "grad_norm": 0.17133454978466034,
      "learning_rate": 0.00011946408059861316,
      "loss": 0.5094,
      "step": 3292
    },
    {
      "epoch": 0.9882953181272509,
      "grad_norm": 0.1505843847990036,
      "learning_rate": 0.00011941270233257957,
      "loss": 0.4371,
      "step": 3293
    },
    {
      "epoch": 0.9885954381752701,
      "grad_norm": 0.15345150232315063,
      "learning_rate": 0.00011936131874089365,
      "loss": 0.4646,
      "step": 3294
    },
    {
      "epoch": 0.9888955582232893,
      "grad_norm": 0.16357767581939697,
      "learning_rate": 0.00011930992983765196,
      "loss": 0.4674,
      "step": 3295
    },
    {
      "epoch": 0.9891956782713085,
      "grad_norm": 0.14693854749202728,
      "learning_rate": 0.00011925853563695242,
      "loss": 0.4626,
      "step": 3296
    },
    {
      "epoch": 0.9894957983193278,
      "grad_norm": 0.1619652509689331,
      "learning_rate": 0.0001192071361528945,
      "loss": 0.4899,
      "step": 3297
    },
    {
      "epoch": 0.9897959183673469,
      "grad_norm": 0.14258073270320892,
      "learning_rate": 0.00011915573139957898,
      "loss": 0.4449,
      "step": 3298
    },
    {
      "epoch": 0.9900960384153662,
      "grad_norm": 0.1380699872970581,
      "learning_rate": 0.00011910432139110822,
      "loss": 0.4112,
      "step": 3299
    },
    {
      "epoch": 0.9903961584633854,
      "grad_norm": 0.13336259126663208,
      "learning_rate": 0.0001190529061415859,
      "loss": 0.4064,
      "step": 3300
    },
    {
      "epoch": 0.9906962785114045,
      "grad_norm": 0.1317206174135208,
      "learning_rate": 0.00011900148566511733,
      "loss": 0.4186,
      "step": 3301
    },
    {
      "epoch": 0.9909963985594238,
      "grad_norm": 0.16753268241882324,
      "learning_rate": 0.00011895005997580899,
      "loss": 0.4872,
      "step": 3302
    },
    {
      "epoch": 0.991296518607443,
      "grad_norm": 0.15727880597114563,
      "learning_rate": 0.000118898629087769,
      "loss": 0.5063,
      "step": 3303
    },
    {
      "epoch": 0.9915966386554622,
      "grad_norm": 0.14946980774402618,
      "learning_rate": 0.00011884719301510685,
      "loss": 0.46,
      "step": 3304
    },
    {
      "epoch": 0.9918967587034814,
      "grad_norm": 0.13264238834381104,
      "learning_rate": 0.0001187957517719334,
      "loss": 0.4333,
      "step": 3305
    },
    {
      "epoch": 0.9921968787515006,
      "grad_norm": 0.1383105218410492,
      "learning_rate": 0.00011874430537236094,
      "loss": 0.4093,
      "step": 3306
    },
    {
      "epoch": 0.9924969987995198,
      "grad_norm": 0.1599801778793335,
      "learning_rate": 0.00011869285383050328,
      "loss": 0.5252,
      "step": 3307
    },
    {
      "epoch": 0.992797118847539,
      "grad_norm": 0.14416632056236267,
      "learning_rate": 0.00011864139716047549,
      "loss": 0.4752,
      "step": 3308
    },
    {
      "epoch": 0.9930972388955582,
      "grad_norm": 0.16169165074825287,
      "learning_rate": 0.00011858993537639415,
      "loss": 0.4927,
      "step": 3309
    },
    {
      "epoch": 0.9933973589435774,
      "grad_norm": 0.15442326664924622,
      "learning_rate": 0.0001185384684923772,
      "loss": 0.462,
      "step": 3310
    },
    {
      "epoch": 0.9936974789915967,
      "grad_norm": 0.14835773408412933,
      "learning_rate": 0.00011848699652254398,
      "loss": 0.467,
      "step": 3311
    },
    {
      "epoch": 0.9939975990396158,
      "grad_norm": 0.15235137939453125,
      "learning_rate": 0.00011843551948101525,
      "loss": 0.4383,
      "step": 3312
    },
    {
      "epoch": 0.9942977190876351,
      "grad_norm": 0.1400371640920639,
      "learning_rate": 0.00011838403738191317,
      "loss": 0.3838,
      "step": 3313
    },
    {
      "epoch": 0.9945978391356542,
      "grad_norm": 0.14551183581352234,
      "learning_rate": 0.00011833255023936123,
      "loss": 0.4479,
      "step": 3314
    },
    {
      "epoch": 0.9948979591836735,
      "grad_norm": 0.22726750373840332,
      "learning_rate": 0.00011828105806748431,
      "loss": 0.4438,
      "step": 3315
    },
    {
      "epoch": 0.9951980792316927,
      "grad_norm": 0.13362866640090942,
      "learning_rate": 0.00011822956088040878,
      "loss": 0.4369,
      "step": 3316
    },
    {
      "epoch": 0.9954981992797118,
      "grad_norm": 0.16523800790309906,
      "learning_rate": 0.0001181780586922622,
      "loss": 0.4867,
      "step": 3317
    },
    {
      "epoch": 0.9957983193277311,
      "grad_norm": 0.12675073742866516,
      "learning_rate": 0.00011812655151717369,
      "loss": 0.4169,
      "step": 3318
    },
    {
      "epoch": 0.9960984393757503,
      "grad_norm": 0.15196578204631805,
      "learning_rate": 0.00011807503936927363,
      "loss": 0.4511,
      "step": 3319
    },
    {
      "epoch": 0.9963985594237695,
      "grad_norm": 0.13615992665290833,
      "learning_rate": 0.00011802352226269375,
      "loss": 0.404,
      "step": 3320
    },
    {
      "epoch": 0.9966986794717887,
      "grad_norm": 0.1454804539680481,
      "learning_rate": 0.0001179720002115672,
      "loss": 0.4985,
      "step": 3321
    },
    {
      "epoch": 0.996998799519808,
      "grad_norm": 0.14081403613090515,
      "learning_rate": 0.00011792047323002848,
      "loss": 0.4311,
      "step": 3322
    },
    {
      "epoch": 0.9972989195678271,
      "grad_norm": 0.14123167097568512,
      "learning_rate": 0.00011786894133221345,
      "loss": 0.4246,
      "step": 3323
    },
    {
      "epoch": 0.9975990396158463,
      "grad_norm": 0.13770636916160583,
      "learning_rate": 0.00011781740453225923,
      "loss": 0.3965,
      "step": 3324
    },
    {
      "epoch": 0.9978991596638656,
      "grad_norm": 0.1396370232105255,
      "learning_rate": 0.00011776586284430437,
      "loss": 0.4565,
      "step": 3325
    },
    {
      "epoch": 0.9981992797118847,
      "grad_norm": 0.15787461400032043,
      "learning_rate": 0.00011771431628248877,
      "loss": 0.4392,
      "step": 3326
    },
    {
      "epoch": 0.998499399759904,
      "grad_norm": 0.13858865201473236,
      "learning_rate": 0.00011766276486095362,
      "loss": 0.4487,
      "step": 3327
    },
    {
      "epoch": 0.9987995198079231,
      "grad_norm": 0.13797371089458466,
      "learning_rate": 0.00011761120859384147,
      "loss": 0.4489,
      "step": 3328
    },
    {
      "epoch": 0.9990996398559424,
      "grad_norm": 0.2163102775812149,
      "learning_rate": 0.00011755964749529618,
      "loss": 0.4431,
      "step": 3329
    },
    {
      "epoch": 0.9993997599039616,
      "grad_norm": 0.1351035088300705,
      "learning_rate": 0.00011750808157946291,
      "loss": 0.4456,
      "step": 3330
    },
    {
      "epoch": 0.9996998799519808,
      "grad_norm": 0.14229658246040344,
      "learning_rate": 0.00011745651086048825,
      "loss": 0.4418,
      "step": 3331
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.128207266330719,
      "learning_rate": 0.00011740493535252002,
      "loss": 0.3952,
      "step": 3332
    },
    {
      "epoch": 1.0,
      "eval_loss": 0.2915521264076233,
      "eval_runtime": 3781.9927,
      "eval_samples_per_second": 12.251,
      "eval_steps_per_second": 0.766,
      "step": 3332
    },
    {
      "epoch": 1.0003001200480193,
      "grad_norm": 0.1412869095802307,
      "learning_rate": 0.00011735335506970733,
      "loss": 0.4536,
      "step": 3333
    },
    {
      "epoch": 1.0006002400960383,
      "grad_norm": 0.14577415585517883,
      "learning_rate": 0.00011730177002620066,
      "loss": 0.3832,
      "step": 3334
    },
    {
      "epoch": 1.0009003601440576,
      "grad_norm": 0.15070371329784393,
      "learning_rate": 0.0001172501802361518,
      "loss": 0.514,
      "step": 3335
    },
    {
      "epoch": 1.0012004801920769,
      "grad_norm": 0.13936564326286316,
      "learning_rate": 0.00011719858571371373,
      "loss": 0.4466,
      "step": 3336
    },
    {
      "epoch": 1.0015006002400961,
      "grad_norm": 0.14646700024604797,
      "learning_rate": 0.00011714698647304094,
      "loss": 0.4197,
      "step": 3337
    },
    {
      "epoch": 1.0018007202881152,
      "grad_norm": 0.17612683773040771,
      "learning_rate": 0.00011709538252828902,
      "loss": 0.4565,
      "step": 3338
    },
    {
      "epoch": 1.0021008403361344,
      "grad_norm": 0.14111727476119995,
      "learning_rate": 0.00011704377389361492,
      "loss": 0.4107,
      "step": 3339
    },
    {
      "epoch": 1.0024009603841537,
      "grad_norm": 0.13370364904403687,
      "learning_rate": 0.00011699216058317686,
      "loss": 0.4129,
      "step": 3340
    },
    {
      "epoch": 1.0027010804321728,
      "grad_norm": 0.15525327622890472,
      "learning_rate": 0.00011694054261113441,
      "loss": 0.4056,
      "step": 3341
    },
    {
      "epoch": 1.003001200480192,
      "grad_norm": 0.13654319941997528,
      "learning_rate": 0.00011688891999164834,
      "loss": 0.4047,
      "step": 3342
    },
    {
      "epoch": 1.0033013205282113,
      "grad_norm": 0.15027666091918945,
      "learning_rate": 0.00011683729273888075,
      "loss": 0.496,
      "step": 3343
    },
    {
      "epoch": 1.0036014405762306,
      "grad_norm": 0.13460519909858704,
      "learning_rate": 0.00011678566086699492,
      "loss": 0.4255,
      "step": 3344
    },
    {
      "epoch": 1.0039015606242496,
      "grad_norm": 0.1476062536239624,
      "learning_rate": 0.00011673402439015556,
      "loss": 0.4577,
      "step": 3345
    },
    {
      "epoch": 1.004201680672269,
      "grad_norm": 0.18649092316627502,
      "learning_rate": 0.00011668238332252846,
      "loss": 0.3768,
      "step": 3346
    },
    {
      "epoch": 1.0045018007202882,
      "grad_norm": 0.14275908470153809,
      "learning_rate": 0.00011663073767828079,
      "loss": 0.488,
      "step": 3347
    },
    {
      "epoch": 1.0048019207683074,
      "grad_norm": 0.1560365855693817,
      "learning_rate": 0.0001165790874715809,
      "loss": 0.4688,
      "step": 3348
    },
    {
      "epoch": 1.0051020408163265,
      "grad_norm": 0.1347278654575348,
      "learning_rate": 0.00011652743271659853,
      "loss": 0.4214,
      "step": 3349
    },
    {
      "epoch": 1.0054021608643458,
      "grad_norm": 0.1424272209405899,
      "learning_rate": 0.00011647577342750447,
      "loss": 0.4421,
      "step": 3350
    },
    {
      "epoch": 1.005702280912365,
      "grad_norm": 0.16413843631744385,
      "learning_rate": 0.0001164241096184709,
      "loss": 0.4707,
      "step": 3351
    },
    {
      "epoch": 1.006002400960384,
      "grad_norm": 0.24146349728107452,
      "learning_rate": 0.00011637244130367118,
      "loss": 0.4515,
      "step": 3352
    },
    {
      "epoch": 1.0063025210084033,
      "grad_norm": 0.13402995467185974,
      "learning_rate": 0.00011632076849727993,
      "loss": 0.4164,
      "step": 3353
    },
    {
      "epoch": 1.0066026410564226,
      "grad_norm": 0.14538829028606415,
      "learning_rate": 0.00011626909121347301,
      "loss": 0.4664,
      "step": 3354
    },
    {
      "epoch": 1.0069027611044419,
      "grad_norm": 0.42422211170196533,
      "learning_rate": 0.0001162174094664274,
      "loss": 0.4278,
      "step": 3355
    },
    {
      "epoch": 1.007202881152461,
      "grad_norm": 0.14506077766418457,
      "learning_rate": 0.00011616572327032152,
      "loss": 0.3753,
      "step": 3356
    },
    {
      "epoch": 1.0075030012004802,
      "grad_norm": 0.13329797983169556,
      "learning_rate": 0.0001161140326393348,
      "loss": 0.4249,
      "step": 3357
    },
    {
      "epoch": 1.0078031212484995,
      "grad_norm": 0.14923860132694244,
      "learning_rate": 0.00011606233758764802,
      "loss": 0.4146,
      "step": 3358
    },
    {
      "epoch": 1.0081032412965185,
      "grad_norm": 0.14581775665283203,
      "learning_rate": 0.00011601063812944308,
      "loss": 0.4863,
      "step": 3359
    },
    {
      "epoch": 1.0084033613445378,
      "grad_norm": 0.14314207434654236,
      "learning_rate": 0.00011595893427890316,
      "loss": 0.4462,
      "step": 3360
    },
    {
      "epoch": 1.008703481392557,
      "grad_norm": 0.17089812457561493,
      "learning_rate": 0.00011590722605021262,
      "loss": 0.3893,
      "step": 3361
    },
    {
      "epoch": 1.0090036014405763,
      "grad_norm": 0.43584877252578735,
      "learning_rate": 0.00011585551345755702,
      "loss": 0.4452,
      "step": 3362
    },
    {
      "epoch": 1.0093037214885954,
      "grad_norm": 0.18344175815582275,
      "learning_rate": 0.0001158037965151231,
      "loss": 0.4966,
      "step": 3363
    },
    {
      "epoch": 1.0096038415366146,
      "grad_norm": 0.14152958989143372,
      "learning_rate": 0.00011575207523709886,
      "loss": 0.4431,
      "step": 3364
    },
    {
      "epoch": 1.009903961584634,
      "grad_norm": 0.14963635802268982,
      "learning_rate": 0.00011570034963767335,
      "loss": 0.4637,
      "step": 3365
    },
    {
      "epoch": 1.010204081632653,
      "grad_norm": 0.1447412669658661,
      "learning_rate": 0.00011564861973103698,
      "loss": 0.4521,
      "step": 3366
    },
    {
      "epoch": 1.0105042016806722,
      "grad_norm": 0.14700941741466522,
      "learning_rate": 0.0001155968855313812,
      "loss": 0.4774,
      "step": 3367
    },
    {
      "epoch": 1.0108043217286915,
      "grad_norm": 0.14747017621994019,
      "learning_rate": 0.00011554514705289874,
      "loss": 0.4459,
      "step": 3368
    },
    {
      "epoch": 1.0111044417767108,
      "grad_norm": 0.16696229577064514,
      "learning_rate": 0.00011549340430978342,
      "loss": 0.4331,
      "step": 3369
    },
    {
      "epoch": 1.0114045618247298,
      "grad_norm": 0.15966810286045074,
      "learning_rate": 0.00011544165731623029,
      "loss": 0.5101,
      "step": 3370
    },
    {
      "epoch": 1.011704681872749,
      "grad_norm": 0.1359332799911499,
      "learning_rate": 0.00011538990608643554,
      "loss": 0.4032,
      "step": 3371
    },
    {
      "epoch": 1.0120048019207684,
      "grad_norm": 0.16748102009296417,
      "learning_rate": 0.00011533815063459652,
      "loss": 0.4798,
      "step": 3372
    },
    {
      "epoch": 1.0123049219687874,
      "grad_norm": 0.16173438727855682,
      "learning_rate": 0.00011528639097491174,
      "loss": 0.435,
      "step": 3373
    },
    {
      "epoch": 1.0126050420168067,
      "grad_norm": 0.14960527420043945,
      "learning_rate": 0.00011523462712158089,
      "loss": 0.4532,
      "step": 3374
    },
    {
      "epoch": 1.012905162064826,
      "grad_norm": 0.2027568221092224,
      "learning_rate": 0.00011518285908880477,
      "loss": 0.4223,
      "step": 3375
    },
    {
      "epoch": 1.0132052821128452,
      "grad_norm": 0.1638248711824417,
      "learning_rate": 0.00011513108689078537,
      "loss": 0.4469,
      "step": 3376
    },
    {
      "epoch": 1.0135054021608643,
      "grad_norm": 0.16117016971111298,
      "learning_rate": 0.00011507931054172578,
      "loss": 0.4655,
      "step": 3377
    },
    {
      "epoch": 1.0138055222088835,
      "grad_norm": 0.14418861269950867,
      "learning_rate": 0.00011502753005583022,
      "loss": 0.4142,
      "step": 3378
    },
    {
      "epoch": 1.0141056422569028,
      "grad_norm": 0.1503109484910965,
      "learning_rate": 0.00011497574544730416,
      "loss": 0.4762,
      "step": 3379
    },
    {
      "epoch": 1.014405762304922,
      "grad_norm": 0.1379355490207672,
      "learning_rate": 0.00011492395673035401,
      "loss": 0.4089,
      "step": 3380
    },
    {
      "epoch": 1.0147058823529411,
      "grad_norm": 0.1280781477689743,
      "learning_rate": 0.00011487216391918749,
      "loss": 0.4092,
      "step": 3381
    },
    {
      "epoch": 1.0150060024009604,
      "grad_norm": 0.14146004617214203,
      "learning_rate": 0.00011482036702801329,
      "loss": 0.4315,
      "step": 3382
    },
    {
      "epoch": 1.0153061224489797,
      "grad_norm": 0.14633069932460785,
      "learning_rate": 0.00011476856607104138,
      "loss": 0.4224,
      "step": 3383
    },
    {
      "epoch": 1.0156062424969987,
      "grad_norm": 0.13567544519901276,
      "learning_rate": 0.00011471676106248268,
      "loss": 0.4246,
      "step": 3384
    },
    {
      "epoch": 1.015906362545018,
      "grad_norm": 0.14775624871253967,
      "learning_rate": 0.00011466495201654936,
      "loss": 0.4343,
      "step": 3385
    },
    {
      "epoch": 1.0162064825930373,
      "grad_norm": 0.14980390667915344,
      "learning_rate": 0.00011461313894745458,
      "loss": 0.414,
      "step": 3386
    },
    {
      "epoch": 1.0165066026410565,
      "grad_norm": 0.23978058993816376,
      "learning_rate": 0.00011456132186941276,
      "loss": 0.5082,
      "step": 3387
    },
    {
      "epoch": 1.0168067226890756,
      "grad_norm": 0.13374143838882446,
      "learning_rate": 0.00011450950079663918,
      "loss": 0.4122,
      "step": 3388
    },
    {
      "epoch": 1.0171068427370948,
      "grad_norm": 0.1410181224346161,
      "learning_rate": 0.00011445767574335044,
      "loss": 0.4113,
      "step": 3389
    },
    {
      "epoch": 1.017406962785114,
      "grad_norm": 0.13878248631954193,
      "learning_rate": 0.00011440584672376418,
      "loss": 0.4088,
      "step": 3390
    },
    {
      "epoch": 1.0177070828331332,
      "grad_norm": 0.1408509910106659,
      "learning_rate": 0.00011435401375209904,
      "loss": 0.4297,
      "step": 3391
    },
    {
      "epoch": 1.0180072028811524,
      "grad_norm": 0.18739213049411774,
      "learning_rate": 0.00011430217684257478,
      "loss": 0.4468,
      "step": 3392
    },
    {
      "epoch": 1.0183073229291717,
      "grad_norm": 0.14587976038455963,
      "learning_rate": 0.00011425033600941235,
      "loss": 0.4673,
      "step": 3393
    },
    {
      "epoch": 1.018607442977191,
      "grad_norm": 0.12913000583648682,
      "learning_rate": 0.00011419849126683362,
      "loss": 0.4014,
      "step": 3394
    },
    {
      "epoch": 1.01890756302521,
      "grad_norm": 0.14736846089363098,
      "learning_rate": 0.00011414664262906163,
      "loss": 0.4622,
      "step": 3395
    },
    {
      "epoch": 1.0192076830732293,
      "grad_norm": 0.1973484307527542,
      "learning_rate": 0.00011409479011032045,
      "loss": 0.4698,
      "step": 3396
    },
    {
      "epoch": 1.0195078031212486,
      "grad_norm": 0.14890998601913452,
      "learning_rate": 0.00011404293372483519,
      "loss": 0.3946,
      "step": 3397
    },
    {
      "epoch": 1.0198079231692676,
      "grad_norm": 0.1430133879184723,
      "learning_rate": 0.00011399107348683214,
      "loss": 0.4599,
      "step": 3398
    },
    {
      "epoch": 1.0201080432172869,
      "grad_norm": 0.1424548476934433,
      "learning_rate": 0.00011393920941053846,
      "loss": 0.4638,
      "step": 3399
    },
    {
      "epoch": 1.0204081632653061,
      "grad_norm": 0.16244056820869446,
      "learning_rate": 0.00011388734151018252,
      "loss": 0.4923,
      "step": 3400
    },
    {
      "epoch": 1.0207082833133254,
      "grad_norm": 0.17169933021068573,
      "learning_rate": 0.00011383546979999369,
      "loss": 0.4545,
      "step": 3401
    },
    {
      "epoch": 1.0210084033613445,
      "grad_norm": 0.21742023527622223,
      "learning_rate": 0.00011378359429420238,
      "loss": 0.4997,
      "step": 3402
    },
    {
      "epoch": 1.0213085234093637,
      "grad_norm": 0.136476069688797,
      "learning_rate": 0.00011373171500704001,
      "loss": 0.3931,
      "step": 3403
    },
    {
      "epoch": 1.021608643457383,
      "grad_norm": 0.14849358797073364,
      "learning_rate": 0.00011367983195273907,
      "loss": 0.4333,
      "step": 3404
    },
    {
      "epoch": 1.0219087635054023,
      "grad_norm": 0.168882355093956,
      "learning_rate": 0.0001136279451455331,
      "loss": 0.4618,
      "step": 3405
    },
    {
      "epoch": 1.0222088835534213,
      "grad_norm": 0.1467980146408081,
      "learning_rate": 0.00011357605459965668,
      "loss": 0.4228,
      "step": 3406
    },
    {
      "epoch": 1.0225090036014406,
      "grad_norm": 0.16005730628967285,
      "learning_rate": 0.00011352416032934529,
      "loss": 0.4384,
      "step": 3407
    },
    {
      "epoch": 1.0228091236494599,
      "grad_norm": 0.14726972579956055,
      "learning_rate": 0.00011347226234883564,
      "loss": 0.4718,
      "step": 3408
    },
    {
      "epoch": 1.023109243697479,
      "grad_norm": 0.1600033938884735,
      "learning_rate": 0.0001134203606723653,
      "loss": 0.5431,
      "step": 3409
    },
    {
      "epoch": 1.0234093637454982,
      "grad_norm": 0.14114709198474884,
      "learning_rate": 0.00011336845531417286,
      "loss": 0.4359,
      "step": 3410
    },
    {
      "epoch": 1.0237094837935174,
      "grad_norm": 0.14041128754615784,
      "learning_rate": 0.000113316546288498,
      "loss": 0.4486,
      "step": 3411
    },
    {
      "epoch": 1.0240096038415367,
      "grad_norm": 0.14641427993774414,
      "learning_rate": 0.00011326463360958137,
      "loss": 0.5054,
      "step": 3412
    },
    {
      "epoch": 1.0243097238895558,
      "grad_norm": 0.13535800576210022,
      "learning_rate": 0.00011321271729166462,
      "loss": 0.4073,
      "step": 3413
    },
    {
      "epoch": 1.024609843937575,
      "grad_norm": 0.1422286480665207,
      "learning_rate": 0.00011316079734899039,
      "loss": 0.4306,
      "step": 3414
    },
    {
      "epoch": 1.0249099639855943,
      "grad_norm": 0.1431799978017807,
      "learning_rate": 0.0001131088737958023,
      "loss": 0.4733,
      "step": 3415
    },
    {
      "epoch": 1.0252100840336134,
      "grad_norm": 0.15455827116966248,
      "learning_rate": 0.00011305694664634498,
      "loss": 0.4936,
      "step": 3416
    },
    {
      "epoch": 1.0255102040816326,
      "grad_norm": 0.137307807803154,
      "learning_rate": 0.00011300501591486409,
      "loss": 0.4496,
      "step": 3417
    },
    {
      "epoch": 1.025810324129652,
      "grad_norm": 0.1682182252407074,
      "learning_rate": 0.00011295308161560623,
      "loss": 0.4657,
      "step": 3418
    },
    {
      "epoch": 1.0261104441776712,
      "grad_norm": 0.15309010446071625,
      "learning_rate": 0.00011290114376281893,
      "loss": 0.4313,
      "step": 3419
    },
    {
      "epoch": 1.0264105642256902,
      "grad_norm": 0.1386784464120865,
      "learning_rate": 0.00011284920237075076,
      "loss": 0.4212,
      "step": 3420
    },
    {
      "epoch": 1.0267106842737095,
      "grad_norm": 0.14624737203121185,
      "learning_rate": 0.00011279725745365128,
      "loss": 0.4283,
      "step": 3421
    },
    {
      "epoch": 1.0270108043217288,
      "grad_norm": 0.13789516687393188,
      "learning_rate": 0.00011274530902577093,
      "loss": 0.431,
      "step": 3422
    },
    {
      "epoch": 1.0273109243697478,
      "grad_norm": 0.1470216065645218,
      "learning_rate": 0.00011269335710136122,
      "loss": 0.4821,
      "step": 3423
    },
    {
      "epoch": 1.027611044417767,
      "grad_norm": 0.1370469629764557,
      "learning_rate": 0.00011264140169467455,
      "loss": 0.4342,
      "step": 3424
    },
    {
      "epoch": 1.0279111644657863,
      "grad_norm": 0.1463860720396042,
      "learning_rate": 0.00011258944281996424,
      "loss": 0.4563,
      "step": 3425
    },
    {
      "epoch": 1.0282112845138056,
      "grad_norm": 0.15660445392131805,
      "learning_rate": 0.00011253748049148466,
      "loss": 0.4593,
      "step": 3426
    },
    {
      "epoch": 1.0285114045618247,
      "grad_norm": 0.1462990790605545,
      "learning_rate": 0.00011248551472349107,
      "loss": 0.4642,
      "step": 3427
    },
    {
      "epoch": 1.028811524609844,
      "grad_norm": 0.13824063539505005,
      "learning_rate": 0.0001124335455302397,
      "loss": 0.4194,
      "step": 3428
    },
    {
      "epoch": 1.0291116446578632,
      "grad_norm": 0.16063570976257324,
      "learning_rate": 0.00011238157292598768,
      "loss": 0.4513,
      "step": 3429
    },
    {
      "epoch": 1.0294117647058822,
      "grad_norm": 0.13024139404296875,
      "learning_rate": 0.00011232959692499308,
      "loss": 0.4132,
      "step": 3430
    },
    {
      "epoch": 1.0297118847539015,
      "grad_norm": 0.1586381196975708,
      "learning_rate": 0.00011227761754151495,
      "loss": 0.5055,
      "step": 3431
    },
    {
      "epoch": 1.0300120048019208,
      "grad_norm": 0.13493409752845764,
      "learning_rate": 0.00011222563478981325,
      "loss": 0.3771,
      "step": 3432
    },
    {
      "epoch": 1.03031212484994,
      "grad_norm": 0.2602766752243042,
      "learning_rate": 0.00011217364868414883,
      "loss": 0.4896,
      "step": 3433
    },
    {
      "epoch": 1.030612244897959,
      "grad_norm": 0.14443141222000122,
      "learning_rate": 0.00011212165923878348,
      "loss": 0.446,
      "step": 3434
    },
    {
      "epoch": 1.0309123649459784,
      "grad_norm": 0.1360771805047989,
      "learning_rate": 0.0001120696664679799,
      "loss": 0.415,
      "step": 3435
    },
    {
      "epoch": 1.0312124849939976,
      "grad_norm": 0.1296030730009079,
      "learning_rate": 0.00011201767038600172,
      "loss": 0.3967,
      "step": 3436
    },
    {
      "epoch": 1.0315126050420167,
      "grad_norm": 0.15890447795391083,
      "learning_rate": 0.00011196567100711348,
      "loss": 0.427,
      "step": 3437
    },
    {
      "epoch": 1.031812725090036,
      "grad_norm": 0.13646657764911652,
      "learning_rate": 0.00011191366834558062,
      "loss": 0.4204,
      "step": 3438
    },
    {
      "epoch": 1.0321128451380552,
      "grad_norm": 0.13108712434768677,
      "learning_rate": 0.00011186166241566944,
      "loss": 0.4104,
      "step": 3439
    },
    {
      "epoch": 1.0324129651860745,
      "grad_norm": 0.12920904159545898,
      "learning_rate": 0.00011180965323164719,
      "loss": 0.4139,
      "step": 3440
    },
    {
      "epoch": 1.0327130852340936,
      "grad_norm": 0.137689471244812,
      "learning_rate": 0.00011175764080778197,
      "loss": 0.4572,
      "step": 3441
    },
    {
      "epoch": 1.0330132052821128,
      "grad_norm": 0.14051711559295654,
      "learning_rate": 0.00011170562515834285,
      "loss": 0.4512,
      "step": 3442
    },
    {
      "epoch": 1.033313325330132,
      "grad_norm": 0.13927625119686127,
      "learning_rate": 0.00011165360629759969,
      "loss": 0.4003,
      "step": 3443
    },
    {
      "epoch": 1.0336134453781514,
      "grad_norm": 0.13879626989364624,
      "learning_rate": 0.00011160158423982326,
      "loss": 0.4416,
      "step": 3444
    },
    {
      "epoch": 1.0339135654261704,
      "grad_norm": 0.1270892471075058,
      "learning_rate": 0.00011154955899928521,
      "loss": 0.4213,
      "step": 3445
    },
    {
      "epoch": 1.0342136854741897,
      "grad_norm": 0.14068500697612762,
      "learning_rate": 0.0001114975305902581,
      "loss": 0.45,
      "step": 3446
    },
    {
      "epoch": 1.034513805522209,
      "grad_norm": 0.1559022068977356,
      "learning_rate": 0.00011144549902701528,
      "loss": 0.4523,
      "step": 3447
    },
    {
      "epoch": 1.034813925570228,
      "grad_norm": 0.15158261358737946,
      "learning_rate": 0.00011139346432383109,
      "loss": 0.4757,
      "step": 3448
    },
    {
      "epoch": 1.0351140456182473,
      "grad_norm": 0.13974933326244354,
      "learning_rate": 0.00011134142649498056,
      "loss": 0.4278,
      "step": 3449
    },
    {
      "epoch": 1.0354141656662665,
      "grad_norm": 0.1296132653951645,
      "learning_rate": 0.00011128938555473976,
      "loss": 0.395,
      "step": 3450
    },
    {
      "epoch": 1.0357142857142858,
      "grad_norm": 0.14743031561374664,
      "learning_rate": 0.00011123734151738548,
      "loss": 0.4368,
      "step": 3451
    },
    {
      "epoch": 1.0360144057623049,
      "grad_norm": 0.14494973421096802,
      "learning_rate": 0.00011118529439719538,
      "loss": 0.4219,
      "step": 3452
    },
    {
      "epoch": 1.0363145258103241,
      "grad_norm": 0.16061517596244812,
      "learning_rate": 0.00011113324420844801,
      "loss": 0.4366,
      "step": 3453
    },
    {
      "epoch": 1.0366146458583434,
      "grad_norm": 0.18124105036258698,
      "learning_rate": 0.00011108119096542283,
      "loss": 0.4265,
      "step": 3454
    },
    {
      "epoch": 1.0369147659063624,
      "grad_norm": 0.14375890791416168,
      "learning_rate": 0.00011102913468239989,
      "loss": 0.4499,
      "step": 3455
    },
    {
      "epoch": 1.0372148859543817,
      "grad_norm": 0.2264331877231598,
      "learning_rate": 0.00011097707537366036,
      "loss": 0.4447,
      "step": 3456
    },
    {
      "epoch": 1.037515006002401,
      "grad_norm": 0.18140441179275513,
      "learning_rate": 0.00011092501305348604,
      "loss": 0.4455,
      "step": 3457
    },
    {
      "epoch": 1.0378151260504203,
      "grad_norm": 0.15313535928726196,
      "learning_rate": 0.00011087294773615968,
      "loss": 0.4656,
      "step": 3458
    },
    {
      "epoch": 1.0381152460984393,
      "grad_norm": 0.14016591012477875,
      "learning_rate": 0.00011082087943596479,
      "loss": 0.4171,
      "step": 3459
    },
    {
      "epoch": 1.0384153661464586,
      "grad_norm": 0.1311662644147873,
      "learning_rate": 0.00011076880816718569,
      "loss": 0.4179,
      "step": 3460
    },
    {
      "epoch": 1.0387154861944778,
      "grad_norm": 0.16516633331775665,
      "learning_rate": 0.00011071673394410756,
      "loss": 0.4836,
      "step": 3461
    },
    {
      "epoch": 1.039015606242497,
      "grad_norm": 0.12876063585281372,
      "learning_rate": 0.00011066465678101637,
      "loss": 0.389,
      "step": 3462
    },
    {
      "epoch": 1.0393157262905162,
      "grad_norm": 0.1513228714466095,
      "learning_rate": 0.00011061257669219884,
      "loss": 0.4861,
      "step": 3463
    },
    {
      "epoch": 1.0396158463385354,
      "grad_norm": 0.1448003500699997,
      "learning_rate": 0.00011056049369194262,
      "loss": 0.454,
      "step": 3464
    },
    {
      "epoch": 1.0399159663865547,
      "grad_norm": 0.14984130859375,
      "learning_rate": 0.00011050840779453602,
      "loss": 0.4777,
      "step": 3465
    },
    {
      "epoch": 1.0402160864345738,
      "grad_norm": 0.1330723762512207,
      "learning_rate": 0.00011045631901426828,
      "loss": 0.4309,
      "step": 3466
    },
    {
      "epoch": 1.040516206482593,
      "grad_norm": 0.1514752209186554,
      "learning_rate": 0.00011040422736542928,
      "loss": 0.4794,
      "step": 3467
    },
    {
      "epoch": 1.0408163265306123,
      "grad_norm": 0.15264540910720825,
      "learning_rate": 0.0001103521328623098,
      "loss": 0.4107,
      "step": 3468
    },
    {
      "epoch": 1.0411164465786316,
      "grad_norm": 0.1378517597913742,
      "learning_rate": 0.0001103000355192014,
      "loss": 0.4224,
      "step": 3469
    },
    {
      "epoch": 1.0414165666266506,
      "grad_norm": 0.16188235580921173,
      "learning_rate": 0.00011024793535039634,
      "loss": 0.4319,
      "step": 3470
    },
    {
      "epoch": 1.0417166866746699,
      "grad_norm": 0.13926754891872406,
      "learning_rate": 0.00011019583237018773,
      "loss": 0.4563,
      "step": 3471
    },
    {
      "epoch": 1.0420168067226891,
      "grad_norm": 0.15727829933166504,
      "learning_rate": 0.00011014372659286943,
      "loss": 0.5049,
      "step": 3472
    },
    {
      "epoch": 1.0423169267707082,
      "grad_norm": 0.15464873611927032,
      "learning_rate": 0.00011009161803273607,
      "loss": 0.5784,
      "step": 3473
    },
    {
      "epoch": 1.0426170468187275,
      "grad_norm": 0.14383083581924438,
      "learning_rate": 0.00011003950670408296,
      "loss": 0.4512,
      "step": 3474
    },
    {
      "epoch": 1.0429171668667467,
      "grad_norm": 0.14154858887195587,
      "learning_rate": 0.00010998739262120634,
      "loss": 0.4304,
      "step": 3475
    },
    {
      "epoch": 1.043217286914766,
      "grad_norm": 0.14988425374031067,
      "learning_rate": 0.00010993527579840309,
      "loss": 0.4911,
      "step": 3476
    },
    {
      "epoch": 1.043517406962785,
      "grad_norm": 0.1370452344417572,
      "learning_rate": 0.00010988315624997083,
      "loss": 0.4377,
      "step": 3477
    },
    {
      "epoch": 1.0438175270108043,
      "grad_norm": 0.13611678779125214,
      "learning_rate": 0.00010983103399020797,
      "loss": 0.4611,
      "step": 3478
    },
    {
      "epoch": 1.0441176470588236,
      "grad_norm": 0.28860393166542053,
      "learning_rate": 0.00010977890903341368,
      "loss": 0.4169,
      "step": 3479
    },
    {
      "epoch": 1.0444177671068426,
      "grad_norm": 0.13566024601459503,
      "learning_rate": 0.00010972678139388784,
      "loss": 0.4358,
      "step": 3480
    },
    {
      "epoch": 1.044717887154862,
      "grad_norm": 0.1353350579738617,
      "learning_rate": 0.00010967465108593104,
      "loss": 0.4122,
      "step": 3481
    },
    {
      "epoch": 1.0450180072028812,
      "grad_norm": 0.1298576146364212,
      "learning_rate": 0.00010962251812384465,
      "loss": 0.361,
      "step": 3482
    },
    {
      "epoch": 1.0453181272509005,
      "grad_norm": 0.13776518404483795,
      "learning_rate": 0.00010957038252193075,
      "loss": 0.3828,
      "step": 3483
    },
    {
      "epoch": 1.0456182472989195,
      "grad_norm": 0.14902642369270325,
      "learning_rate": 0.00010951824429449218,
      "loss": 0.4707,
      "step": 3484
    },
    {
      "epoch": 1.0459183673469388,
      "grad_norm": 0.1484086811542511,
      "learning_rate": 0.00010946610345583237,
      "loss": 0.4041,
      "step": 3485
    },
    {
      "epoch": 1.046218487394958,
      "grad_norm": 0.15604808926582336,
      "learning_rate": 0.00010941396002025565,
      "loss": 0.4443,
      "step": 3486
    },
    {
      "epoch": 1.046518607442977,
      "grad_norm": 0.1303928792476654,
      "learning_rate": 0.00010936181400206694,
      "loss": 0.3916,
      "step": 3487
    },
    {
      "epoch": 1.0468187274909964,
      "grad_norm": 0.13681869208812714,
      "learning_rate": 0.00010930966541557192,
      "loss": 0.4359,
      "step": 3488
    },
    {
      "epoch": 1.0471188475390156,
      "grad_norm": 0.15033277869224548,
      "learning_rate": 0.00010925751427507691,
      "loss": 0.4903,
      "step": 3489
    },
    {
      "epoch": 1.047418967587035,
      "grad_norm": 0.12609606981277466,
      "learning_rate": 0.00010920536059488904,
      "loss": 0.3909,
      "step": 3490
    },
    {
      "epoch": 1.047719087635054,
      "grad_norm": 0.13723771274089813,
      "learning_rate": 0.00010915320438931602,
      "loss": 0.413,
      "step": 3491
    },
    {
      "epoch": 1.0480192076830732,
      "grad_norm": 0.1487082540988922,
      "learning_rate": 0.00010910104567266637,
      "loss": 0.4592,
      "step": 3492
    },
    {
      "epoch": 1.0483193277310925,
      "grad_norm": 0.13834303617477417,
      "learning_rate": 0.00010904888445924917,
      "loss": 0.4248,
      "step": 3493
    },
    {
      "epoch": 1.0486194477791115,
      "grad_norm": 0.1379755735397339,
      "learning_rate": 0.00010899672076337429,
      "loss": 0.455,
      "step": 3494
    },
    {
      "epoch": 1.0489195678271308,
      "grad_norm": 0.21014422178268433,
      "learning_rate": 0.00010894455459935222,
      "loss": 0.4537,
      "step": 3495
    },
    {
      "epoch": 1.04921968787515,
      "grad_norm": 0.14388030767440796,
      "learning_rate": 0.00010889238598149418,
      "loss": 0.4194,
      "step": 3496
    },
    {
      "epoch": 1.0495198079231693,
      "grad_norm": 0.12824735045433044,
      "learning_rate": 0.00010884021492411196,
      "loss": 0.4114,
      "step": 3497
    },
    {
      "epoch": 1.0498199279711884,
      "grad_norm": 0.15317986905574799,
      "learning_rate": 0.0001087880414415182,
      "loss": 0.4266,
      "step": 3498
    },
    {
      "epoch": 1.0501200480192077,
      "grad_norm": 0.1715250462293625,
      "learning_rate": 0.00010873586554802602,
      "loss": 0.4401,
      "step": 3499
    },
    {
      "epoch": 1.050420168067227,
      "grad_norm": 0.17429319024085999,
      "learning_rate": 0.00010868368725794928,
      "loss": 0.4447,
      "step": 3500
    },
    {
      "epoch": 1.0507202881152462,
      "grad_norm": 0.14579427242279053,
      "learning_rate": 0.00010863150658560255,
      "loss": 0.4341,
      "step": 3501
    },
    {
      "epoch": 1.0510204081632653,
      "grad_norm": 0.16881610453128815,
      "learning_rate": 0.00010857932354530092,
      "loss": 0.4351,
      "step": 3502
    },
    {
      "epoch": 1.0513205282112845,
      "grad_norm": 0.1375623643398285,
      "learning_rate": 0.0001085271381513603,
      "loss": 0.4415,
      "step": 3503
    },
    {
      "epoch": 1.0516206482593038,
      "grad_norm": 0.24451765418052673,
      "learning_rate": 0.00010847495041809705,
      "loss": 0.4476,
      "step": 3504
    },
    {
      "epoch": 1.0519207683073228,
      "grad_norm": 0.1336793452501297,
      "learning_rate": 0.00010842276035982836,
      "loss": 0.437,
      "step": 3505
    },
    {
      "epoch": 1.052220888355342,
      "grad_norm": 0.1505599468946457,
      "learning_rate": 0.00010837056799087193,
      "loss": 0.4668,
      "step": 3506
    },
    {
      "epoch": 1.0525210084033614,
      "grad_norm": 0.1421106457710266,
      "learning_rate": 0.00010831837332554619,
      "loss": 0.4106,
      "step": 3507
    },
    {
      "epoch": 1.0528211284513807,
      "grad_norm": 0.14475306868553162,
      "learning_rate": 0.00010826617637817007,
      "loss": 0.4465,
      "step": 3508
    },
    {
      "epoch": 1.0531212484993997,
      "grad_norm": 0.1494123488664627,
      "learning_rate": 0.00010821397716306328,
      "loss": 0.449,
      "step": 3509
    },
    {
      "epoch": 1.053421368547419,
      "grad_norm": 0.153250589966774,
      "learning_rate": 0.000108161775694546,
      "loss": 0.4954,
      "step": 3510
    },
    {
      "epoch": 1.0537214885954382,
      "grad_norm": 0.13658291101455688,
      "learning_rate": 0.00010810957198693921,
      "loss": 0.4306,
      "step": 3511
    },
    {
      "epoch": 1.0540216086434573,
      "grad_norm": 0.19465994834899902,
      "learning_rate": 0.00010805736605456426,
      "loss": 0.4279,
      "step": 3512
    },
    {
      "epoch": 1.0543217286914766,
      "grad_norm": 0.14101290702819824,
      "learning_rate": 0.00010800515791174337,
      "loss": 0.467,
      "step": 3513
    },
    {
      "epoch": 1.0546218487394958,
      "grad_norm": 0.14020851254463196,
      "learning_rate": 0.0001079529475727992,
      "loss": 0.4168,
      "step": 3514
    },
    {
      "epoch": 1.054921968787515,
      "grad_norm": 0.17031557857990265,
      "learning_rate": 0.00010790073505205505,
      "loss": 0.4145,
      "step": 3515
    },
    {
      "epoch": 1.0552220888355341,
      "grad_norm": 0.13163962960243225,
      "learning_rate": 0.00010784852036383481,
      "loss": 0.3924,
      "step": 3516
    },
    {
      "epoch": 1.0555222088835534,
      "grad_norm": 0.13243068754673004,
      "learning_rate": 0.00010779630352246302,
      "loss": 0.4122,
      "step": 3517
    },
    {
      "epoch": 1.0558223289315727,
      "grad_norm": 0.16340062022209167,
      "learning_rate": 0.00010774408454226477,
      "loss": 0.4847,
      "step": 3518
    },
    {
      "epoch": 1.0561224489795917,
      "grad_norm": 0.1362360268831253,
      "learning_rate": 0.00010769186343756572,
      "loss": 0.3993,
      "step": 3519
    },
    {
      "epoch": 1.056422569027611,
      "grad_norm": 0.14988327026367188,
      "learning_rate": 0.00010763964022269213,
      "loss": 0.4475,
      "step": 3520
    },
    {
      "epoch": 1.0567226890756303,
      "grad_norm": 0.1353144347667694,
      "learning_rate": 0.00010758741491197081,
      "loss": 0.4178,
      "step": 3521
    },
    {
      "epoch": 1.0570228091236495,
      "grad_norm": 0.13179272413253784,
      "learning_rate": 0.00010753518751972927,
      "loss": 0.3937,
      "step": 3522
    },
    {
      "epoch": 1.0573229291716686,
      "grad_norm": 0.1460052877664566,
      "learning_rate": 0.0001074829580602954,
      "loss": 0.403,
      "step": 3523
    },
    {
      "epoch": 1.0576230492196879,
      "grad_norm": 0.14756572246551514,
      "learning_rate": 0.0001074307265479978,
      "loss": 0.4483,
      "step": 3524
    },
    {
      "epoch": 1.0579231692677071,
      "grad_norm": 0.15488174557685852,
      "learning_rate": 0.00010737849299716555,
      "loss": 0.4469,
      "step": 3525
    },
    {
      "epoch": 1.0582232893157264,
      "grad_norm": 0.14268824458122253,
      "learning_rate": 0.00010732625742212842,
      "loss": 0.4326,
      "step": 3526
    },
    {
      "epoch": 1.0585234093637454,
      "grad_norm": 0.13396058976650238,
      "learning_rate": 0.00010727401983721652,
      "loss": 0.4401,
      "step": 3527
    },
    {
      "epoch": 1.0588235294117647,
      "grad_norm": 0.1576491892337799,
      "learning_rate": 0.00010722178025676069,
      "loss": 0.5262,
      "step": 3528
    },
    {
      "epoch": 1.059123649459784,
      "grad_norm": 0.14710913598537445,
      "learning_rate": 0.00010716953869509228,
      "loss": 0.4711,
      "step": 3529
    },
    {
      "epoch": 1.059423769507803,
      "grad_norm": 0.1549479067325592,
      "learning_rate": 0.00010711729516654311,
      "loss": 0.445,
      "step": 3530
    },
    {
      "epoch": 1.0597238895558223,
      "grad_norm": 0.14734241366386414,
      "learning_rate": 0.00010706504968544564,
      "loss": 0.4224,
      "step": 3531
    },
    {
      "epoch": 1.0600240096038416,
      "grad_norm": 0.13729891180992126,
      "learning_rate": 0.0001070128022661328,
      "loss": 0.3868,
      "step": 3532
    },
    {
      "epoch": 1.0603241296518608,
      "grad_norm": 0.1420051008462906,
      "learning_rate": 0.00010696055292293805,
      "loss": 0.3858,
      "step": 3533
    },
    {
      "epoch": 1.06062424969988,
      "grad_norm": 0.13650977611541748,
      "learning_rate": 0.00010690830167019546,
      "loss": 0.434,
      "step": 3534
    },
    {
      "epoch": 1.0609243697478992,
      "grad_norm": 0.14545617997646332,
      "learning_rate": 0.00010685604852223947,
      "loss": 0.4619,
      "step": 3535
    },
    {
      "epoch": 1.0612244897959184,
      "grad_norm": 0.13201873004436493,
      "learning_rate": 0.00010680379349340522,
      "loss": 0.3932,
      "step": 3536
    },
    {
      "epoch": 1.0615246098439375,
      "grad_norm": 0.15978311002254486,
      "learning_rate": 0.00010675153659802824,
      "loss": 0.4167,
      "step": 3537
    },
    {
      "epoch": 1.0618247298919568,
      "grad_norm": 0.13253796100616455,
      "learning_rate": 0.0001066992778504446,
      "loss": 0.4082,
      "step": 3538
    },
    {
      "epoch": 1.062124849939976,
      "grad_norm": 0.15104475617408752,
      "learning_rate": 0.00010664701726499091,
      "loss": 0.4454,
      "step": 3539
    },
    {
      "epoch": 1.0624249699879953,
      "grad_norm": 0.14183232188224792,
      "learning_rate": 0.00010659475485600423,
      "loss": 0.388,
      "step": 3540
    },
    {
      "epoch": 1.0627250900360143,
      "grad_norm": 0.1418931782245636,
      "learning_rate": 0.0001065424906378222,
      "loss": 0.4257,
      "step": 3541
    },
    {
      "epoch": 1.0630252100840336,
      "grad_norm": 0.24038948118686676,
      "learning_rate": 0.00010649022462478286,
      "loss": 0.4164,
      "step": 3542
    },
    {
      "epoch": 1.0633253301320529,
      "grad_norm": 0.13288667798042297,
      "learning_rate": 0.00010643795683122485,
      "loss": 0.3729,
      "step": 3543
    },
    {
      "epoch": 1.063625450180072,
      "grad_norm": 0.1481071263551712,
      "learning_rate": 0.00010638568727148716,
      "loss": 0.457,
      "step": 3544
    },
    {
      "epoch": 1.0639255702280912,
      "grad_norm": 0.1327173411846161,
      "learning_rate": 0.00010633341595990945,
      "loss": 0.4079,
      "step": 3545
    },
    {
      "epoch": 1.0642256902761105,
      "grad_norm": 0.14498308300971985,
      "learning_rate": 0.00010628114291083163,
      "loss": 0.4737,
      "step": 3546
    },
    {
      "epoch": 1.0645258103241297,
      "grad_norm": 0.13920772075653076,
      "learning_rate": 0.0001062288681385943,
      "loss": 0.4698,
      "step": 3547
    },
    {
      "epoch": 1.0648259303721488,
      "grad_norm": 0.1382630616426468,
      "learning_rate": 0.00010617659165753844,
      "loss": 0.4414,
      "step": 3548
    },
    {
      "epoch": 1.065126050420168,
      "grad_norm": 0.14103002846240997,
      "learning_rate": 0.00010612431348200547,
      "loss": 0.4357,
      "step": 3549
    },
    {
      "epoch": 1.0654261704681873,
      "grad_norm": 0.14149099588394165,
      "learning_rate": 0.00010607203362633728,
      "loss": 0.4519,
      "step": 3550
    },
    {
      "epoch": 1.0657262905162064,
      "grad_norm": 0.13301125168800354,
      "learning_rate": 0.00010601975210487633,
      "loss": 0.4205,
      "step": 3551
    },
    {
      "epoch": 1.0660264105642256,
      "grad_norm": 0.17028430104255676,
      "learning_rate": 0.00010596746893196543,
      "loss": 0.4366,
      "step": 3552
    },
    {
      "epoch": 1.066326530612245,
      "grad_norm": 0.15449634194374084,
      "learning_rate": 0.00010591518412194784,
      "loss": 0.4861,
      "step": 3553
    },
    {
      "epoch": 1.0666266506602642,
      "grad_norm": 0.14159183204174042,
      "learning_rate": 0.00010586289768916729,
      "loss": 0.4363,
      "step": 3554
    },
    {
      "epoch": 1.0669267707082832,
      "grad_norm": 0.2055792510509491,
      "learning_rate": 0.000105810609647968,
      "loss": 0.4225,
      "step": 3555
    },
    {
      "epoch": 1.0672268907563025,
      "grad_norm": 0.14333371818065643,
      "learning_rate": 0.0001057583200126946,
      "loss": 0.4095,
      "step": 3556
    },
    {
      "epoch": 1.0675270108043218,
      "grad_norm": 0.1322149783372879,
      "learning_rate": 0.00010570602879769213,
      "loss": 0.4055,
      "step": 3557
    },
    {
      "epoch": 1.0678271308523408,
      "grad_norm": 0.15440814197063446,
      "learning_rate": 0.00010565373601730606,
      "loss": 0.4626,
      "step": 3558
    },
    {
      "epoch": 1.06812725090036,
      "grad_norm": 0.23785772919654846,
      "learning_rate": 0.00010560144168588237,
      "loss": 0.4903,
      "step": 3559
    },
    {
      "epoch": 1.0684273709483794,
      "grad_norm": 0.15003551542758942,
      "learning_rate": 0.00010554914581776738,
      "loss": 0.4466,
      "step": 3560
    },
    {
      "epoch": 1.0687274909963986,
      "grad_norm": 0.1422703117132187,
      "learning_rate": 0.00010549684842730787,
      "loss": 0.388,
      "step": 3561
    },
    {
      "epoch": 1.0690276110444177,
      "grad_norm": 0.15164245665073395,
      "learning_rate": 0.00010544454952885101,
      "loss": 0.4697,
      "step": 3562
    },
    {
      "epoch": 1.069327731092437,
      "grad_norm": 0.15820443630218506,
      "learning_rate": 0.00010539224913674442,
      "loss": 0.4113,
      "step": 3563
    },
    {
      "epoch": 1.0696278511404562,
      "grad_norm": 0.1455969363451004,
      "learning_rate": 0.00010533994726533612,
      "loss": 0.4653,
      "step": 3564
    },
    {
      "epoch": 1.0699279711884755,
      "grad_norm": 0.1496390551328659,
      "learning_rate": 0.0001052876439289745,
      "loss": 0.4244,
      "step": 3565
    },
    {
      "epoch": 1.0702280912364945,
      "grad_norm": 0.351198673248291,
      "learning_rate": 0.0001052353391420084,
      "loss": 0.5013,
      "step": 3566
    },
    {
      "epoch": 1.0705282112845138,
      "grad_norm": 0.14300662279129028,
      "learning_rate": 0.00010518303291878707,
      "loss": 0.3909,
      "step": 3567
    },
    {
      "epoch": 1.070828331332533,
      "grad_norm": 0.12167440354824066,
      "learning_rate": 0.00010513072527366006,
      "loss": 0.3628,
      "step": 3568
    },
    {
      "epoch": 1.0711284513805521,
      "grad_norm": 0.1389562040567398,
      "learning_rate": 0.00010507841622097739,
      "loss": 0.4408,
      "step": 3569
    },
    {
      "epoch": 1.0714285714285714,
      "grad_norm": 0.15119138360023499,
      "learning_rate": 0.00010502610577508949,
      "loss": 0.4429,
      "step": 3570
    },
    {
      "epoch": 1.0717286914765907,
      "grad_norm": 0.17108459770679474,
      "learning_rate": 0.0001049737939503471,
      "loss": 0.4332,
      "step": 3571
    },
    {
      "epoch": 1.07202881152461,
      "grad_norm": 0.1357976347208023,
      "learning_rate": 0.00010492148076110136,
      "loss": 0.3945,
      "step": 3572
    },
    {
      "epoch": 1.072328931572629,
      "grad_norm": 0.12482724338769913,
      "learning_rate": 0.0001048691662217038,
      "loss": 0.3649,
      "step": 3573
    },
    {
      "epoch": 1.0726290516206483,
      "grad_norm": 0.1527443528175354,
      "learning_rate": 0.00010481685034650632,
      "loss": 0.4663,
      "step": 3574
    },
    {
      "epoch": 1.0729291716686675,
      "grad_norm": 0.14375494420528412,
      "learning_rate": 0.00010476453314986122,
      "loss": 0.4593,
      "step": 3575
    },
    {
      "epoch": 1.0732292917166866,
      "grad_norm": 0.13201279938220978,
      "learning_rate": 0.00010471221464612104,
      "loss": 0.4413,
      "step": 3576
    },
    {
      "epoch": 1.0735294117647058,
      "grad_norm": 0.15472981333732605,
      "learning_rate": 0.00010465989484963881,
      "loss": 0.4245,
      "step": 3577
    },
    {
      "epoch": 1.0738295318127251,
      "grad_norm": 0.1417369395494461,
      "learning_rate": 0.00010460757377476792,
      "loss": 0.4461,
      "step": 3578
    },
    {
      "epoch": 1.0741296518607444,
      "grad_norm": 0.12595735490322113,
      "learning_rate": 0.00010455525143586191,
      "loss": 0.3612,
      "step": 3579
    },
    {
      "epoch": 1.0744297719087634,
      "grad_norm": 0.1501154899597168,
      "learning_rate": 0.00010450292784727496,
      "loss": 0.4561,
      "step": 3580
    },
    {
      "epoch": 1.0747298919567827,
      "grad_norm": 0.1509701907634735,
      "learning_rate": 0.00010445060302336137,
      "loss": 0.4871,
      "step": 3581
    },
    {
      "epoch": 1.075030012004802,
      "grad_norm": 0.14055953919887543,
      "learning_rate": 0.00010439827697847587,
      "loss": 0.4506,
      "step": 3582
    },
    {
      "epoch": 1.0753301320528212,
      "grad_norm": 0.15558786690235138,
      "learning_rate": 0.00010434594972697352,
      "loss": 0.457,
      "step": 3583
    },
    {
      "epoch": 1.0756302521008403,
      "grad_norm": 0.14152899384498596,
      "learning_rate": 0.00010429362128320968,
      "loss": 0.4506,
      "step": 3584
    },
    {
      "epoch": 1.0759303721488596,
      "grad_norm": 0.17807260155677795,
      "learning_rate": 0.00010424129166154009,
      "loss": 0.474,
      "step": 3585
    },
    {
      "epoch": 1.0762304921968788,
      "grad_norm": 0.1473320573568344,
      "learning_rate": 0.00010418896087632077,
      "loss": 0.4387,
      "step": 3586
    },
    {
      "epoch": 1.0765306122448979,
      "grad_norm": 0.156795471906662,
      "learning_rate": 0.00010413662894190806,
      "loss": 0.4749,
      "step": 3587
    },
    {
      "epoch": 1.0768307322929171,
      "grad_norm": 0.2006545066833496,
      "learning_rate": 0.00010408429587265862,
      "loss": 0.4646,
      "step": 3588
    },
    {
      "epoch": 1.0771308523409364,
      "grad_norm": 0.14439736306667328,
      "learning_rate": 0.00010403196168292945,
      "loss": 0.4322,
      "step": 3589
    },
    {
      "epoch": 1.0774309723889557,
      "grad_norm": 0.15489919483661652,
      "learning_rate": 0.00010397962638707783,
      "loss": 0.4541,
      "step": 3590
    },
    {
      "epoch": 1.0777310924369747,
      "grad_norm": 0.13993416726589203,
      "learning_rate": 0.00010392728999946136,
      "loss": 0.4179,
      "step": 3591
    },
    {
      "epoch": 1.078031212484994,
      "grad_norm": 0.13038261234760284,
      "learning_rate": 0.00010387495253443787,
      "loss": 0.3982,
      "step": 3592
    },
    {
      "epoch": 1.0783313325330133,
      "grad_norm": 0.1442558765411377,
      "learning_rate": 0.00010382261400636563,
      "loss": 0.4591,
      "step": 3593
    },
    {
      "epoch": 1.0786314525810323,
      "grad_norm": 0.17982600629329681,
      "learning_rate": 0.00010377027442960303,
      "loss": 0.4187,
      "step": 3594
    },
    {
      "epoch": 1.0789315726290516,
      "grad_norm": 0.1430574506521225,
      "learning_rate": 0.0001037179338185089,
      "loss": 0.4561,
      "step": 3595
    },
    {
      "epoch": 1.0792316926770709,
      "grad_norm": 0.13929946720600128,
      "learning_rate": 0.00010366559218744224,
      "loss": 0.4253,
      "step": 3596
    },
    {
      "epoch": 1.0795318127250901,
      "grad_norm": 0.14808815717697144,
      "learning_rate": 0.00010361324955076242,
      "loss": 0.4227,
      "step": 3597
    },
    {
      "epoch": 1.0798319327731092,
      "grad_norm": 0.1456451565027237,
      "learning_rate": 0.00010356090592282899,
      "loss": 0.4738,
      "step": 3598
    },
    {
      "epoch": 1.0801320528211285,
      "grad_norm": 0.14544235169887543,
      "learning_rate": 0.00010350856131800186,
      "loss": 0.4524,
      "step": 3599
    },
    {
      "epoch": 1.0804321728691477,
      "grad_norm": 0.13172706961631775,
      "learning_rate": 0.00010345621575064117,
      "loss": 0.407,
      "step": 3600
    },
    {
      "epoch": 1.0807322929171668,
      "grad_norm": 0.14873874187469482,
      "learning_rate": 0.00010340386923510733,
      "loss": 0.4285,
      "step": 3601
    },
    {
      "epoch": 1.081032412965186,
      "grad_norm": 0.12801893055438995,
      "learning_rate": 0.00010335152178576095,
      "loss": 0.3911,
      "step": 3602
    },
    {
      "epoch": 1.0813325330132053,
      "grad_norm": 0.14230036735534668,
      "learning_rate": 0.00010329917341696304,
      "loss": 0.4634,
      "step": 3603
    },
    {
      "epoch": 1.0816326530612246,
      "grad_norm": 0.14611704647541046,
      "learning_rate": 0.00010324682414307471,
      "loss": 0.4463,
      "step": 3604
    },
    {
      "epoch": 1.0819327731092436,
      "grad_norm": 0.1437060534954071,
      "learning_rate": 0.00010319447397845745,
      "loss": 0.4216,
      "step": 3605
    },
    {
      "epoch": 1.082232893157263,
      "grad_norm": 0.14681190252304077,
      "learning_rate": 0.00010314212293747285,
      "loss": 0.4739,
      "step": 3606
    },
    {
      "epoch": 1.0825330132052822,
      "grad_norm": 0.15303242206573486,
      "learning_rate": 0.00010308977103448283,
      "loss": 0.4153,
      "step": 3607
    },
    {
      "epoch": 1.0828331332533012,
      "grad_norm": 0.16087490320205688,
      "learning_rate": 0.00010303741828384961,
      "loss": 0.4687,
      "step": 3608
    },
    {
      "epoch": 1.0831332533013205,
      "grad_norm": 0.12915025651454926,
      "learning_rate": 0.00010298506469993548,
      "loss": 0.3658,
      "step": 3609
    },
    {
      "epoch": 1.0834333733493398,
      "grad_norm": 0.12952394783496857,
      "learning_rate": 0.00010293271029710307,
      "loss": 0.3763,
      "step": 3610
    },
    {
      "epoch": 1.083733493397359,
      "grad_norm": 0.13518524169921875,
      "learning_rate": 0.00010288035508971523,
      "loss": 0.351,
      "step": 3611
    },
    {
      "epoch": 1.084033613445378,
      "grad_norm": 0.13607051968574524,
      "learning_rate": 0.000102827999092135,
      "loss": 0.4144,
      "step": 3612
    },
    {
      "epoch": 1.0843337334933973,
      "grad_norm": 0.13719278573989868,
      "learning_rate": 0.00010277564231872565,
      "loss": 0.4109,
      "step": 3613
    },
    {
      "epoch": 1.0846338535414166,
      "grad_norm": 0.14822518825531006,
      "learning_rate": 0.00010272328478385065,
      "loss": 0.477,
      "step": 3614
    },
    {
      "epoch": 1.0849339735894357,
      "grad_norm": 0.14354124665260315,
      "learning_rate": 0.0001026709265018737,
      "loss": 0.4505,
      "step": 3615
    },
    {
      "epoch": 1.085234093637455,
      "grad_norm": 0.15144415199756622,
      "learning_rate": 0.0001026185674871587,
      "loss": 0.4718,
      "step": 3616
    },
    {
      "epoch": 1.0855342136854742,
      "grad_norm": 0.13325156271457672,
      "learning_rate": 0.0001025662077540697,
      "loss": 0.406,
      "step": 3617
    },
    {
      "epoch": 1.0858343337334935,
      "grad_norm": 0.12941822409629822,
      "learning_rate": 0.00010251384731697106,
      "loss": 0.3857,
      "step": 3618
    },
    {
      "epoch": 1.0861344537815125,
      "grad_norm": 0.13992074131965637,
      "learning_rate": 0.00010246148619022722,
      "loss": 0.4318,
      "step": 3619
    },
    {
      "epoch": 1.0864345738295318,
      "grad_norm": 0.17397478222846985,
      "learning_rate": 0.00010240912438820289,
      "loss": 0.4922,
      "step": 3620
    },
    {
      "epoch": 1.086734693877551,
      "grad_norm": 0.14126791059970856,
      "learning_rate": 0.00010235676192526289,
      "loss": 0.4282,
      "step": 3621
    },
    {
      "epoch": 1.0870348139255703,
      "grad_norm": 0.13610020279884338,
      "learning_rate": 0.00010230439881577229,
      "loss": 0.3859,
      "step": 3622
    },
    {
      "epoch": 1.0873349339735894,
      "grad_norm": 0.1584099680185318,
      "learning_rate": 0.00010225203507409629,
      "loss": 0.5093,
      "step": 3623
    },
    {
      "epoch": 1.0876350540216086,
      "grad_norm": 0.15142543613910675,
      "learning_rate": 0.0001021996707146003,
      "loss": 0.4195,
      "step": 3624
    },
    {
      "epoch": 1.087935174069628,
      "grad_norm": 0.15312343835830688,
      "learning_rate": 0.00010214730575164988,
      "loss": 0.4334,
      "step": 3625
    },
    {
      "epoch": 1.088235294117647,
      "grad_norm": 0.13882726430892944,
      "learning_rate": 0.0001020949401996107,
      "loss": 0.4498,
      "step": 3626
    },
    {
      "epoch": 1.0885354141656662,
      "grad_norm": 0.14910578727722168,
      "learning_rate": 0.00010204257407284874,
      "loss": 0.4318,
      "step": 3627
    },
    {
      "epoch": 1.0888355342136855,
      "grad_norm": 0.14569604396820068,
      "learning_rate": 0.00010199020738573001,
      "loss": 0.454,
      "step": 3628
    },
    {
      "epoch": 1.0891356542617048,
      "grad_norm": 0.17067281901836395,
      "learning_rate": 0.00010193784015262069,
      "loss": 0.5436,
      "step": 3629
    },
    {
      "epoch": 1.0894357743097238,
      "grad_norm": 0.14397454261779785,
      "learning_rate": 0.00010188547238788713,
      "loss": 0.4489,
      "step": 3630
    },
    {
      "epoch": 1.089735894357743,
      "grad_norm": 0.13740958273410797,
      "learning_rate": 0.00010183310410589589,
      "loss": 0.4277,
      "step": 3631
    },
    {
      "epoch": 1.0900360144057624,
      "grad_norm": 0.14416514337062836,
      "learning_rate": 0.00010178073532101352,
      "loss": 0.4115,
      "step": 3632
    },
    {
      "epoch": 1.0903361344537814,
      "grad_norm": 0.12540937960147858,
      "learning_rate": 0.00010172836604760683,
      "loss": 0.3482,
      "step": 3633
    },
    {
      "epoch": 1.0906362545018007,
      "grad_norm": 0.13062594830989838,
      "learning_rate": 0.00010167599630004271,
      "loss": 0.4033,
      "step": 3634
    },
    {
      "epoch": 1.09093637454982,
      "grad_norm": 0.14485786855220795,
      "learning_rate": 0.0001016236260926883,
      "loss": 0.4511,
      "step": 3635
    },
    {
      "epoch": 1.0912364945978392,
      "grad_norm": 0.14354106783866882,
      "learning_rate": 0.00010157125543991062,
      "loss": 0.4444,
      "step": 3636
    },
    {
      "epoch": 1.0915366146458583,
      "grad_norm": 0.15071211755275726,
      "learning_rate": 0.00010151888435607706,
      "loss": 0.4128,
      "step": 3637
    },
    {
      "epoch": 1.0918367346938775,
      "grad_norm": 0.15684175491333008,
      "learning_rate": 0.000101466512855555,
      "loss": 0.4231,
      "step": 3638
    },
    {
      "epoch": 1.0921368547418968,
      "grad_norm": 0.18450690805912018,
      "learning_rate": 0.00010141414095271193,
      "loss": 0.449,
      "step": 3639
    },
    {
      "epoch": 1.092436974789916,
      "grad_norm": 0.15630345046520233,
      "learning_rate": 0.00010136176866191548,
      "loss": 0.4481,
      "step": 3640
    },
    {
      "epoch": 1.0927370948379351,
      "grad_norm": 0.3001805245876312,
      "learning_rate": 0.00010130939599753346,
      "loss": 0.4483,
      "step": 3641
    },
    {
      "epoch": 1.0930372148859544,
      "grad_norm": 0.23306016623973846,
      "learning_rate": 0.00010125702297393366,
      "loss": 0.4181,
      "step": 3642
    },
    {
      "epoch": 1.0933373349339737,
      "grad_norm": 0.14699408411979675,
      "learning_rate": 0.00010120464960548402,
      "loss": 0.465,
      "step": 3643
    },
    {
      "epoch": 1.0936374549819927,
      "grad_norm": 0.14448916912078857,
      "learning_rate": 0.00010115227590655257,
      "loss": 0.4315,
      "step": 3644
    },
    {
      "epoch": 1.093937575030012,
      "grad_norm": 0.13666747510433197,
      "learning_rate": 0.0001010999018915074,
      "loss": 0.4295,
      "step": 3645
    },
    {
      "epoch": 1.0942376950780313,
      "grad_norm": 0.21696338057518005,
      "learning_rate": 0.0001010475275747168,
      "loss": 0.4799,
      "step": 3646
    },
    {
      "epoch": 1.0945378151260505,
      "grad_norm": 0.15575353801250458,
      "learning_rate": 0.00010099515297054902,
      "loss": 0.4692,
      "step": 3647
    },
    {
      "epoch": 1.0948379351740696,
      "grad_norm": 0.14021803438663483,
      "learning_rate": 0.00010094277809337243,
      "loss": 0.3929,
      "step": 3648
    },
    {
      "epoch": 1.0951380552220888,
      "grad_norm": 0.14631427824497223,
      "learning_rate": 0.00010089040295755546,
      "loss": 0.4501,
      "step": 3649
    },
    {
      "epoch": 1.0954381752701081,
      "grad_norm": 0.16138429939746857,
      "learning_rate": 0.00010083802757746668,
      "loss": 0.4264,
      "step": 3650
    },
    {
      "epoch": 1.0957382953181272,
      "grad_norm": 0.1426657736301422,
      "learning_rate": 0.0001007856519674746,
      "loss": 0.3811,
      "step": 3651
    },
    {
      "epoch": 1.0960384153661464,
      "grad_norm": 0.15724453330039978,
      "learning_rate": 0.00010073327614194794,
      "loss": 0.425,
      "step": 3652
    },
    {
      "epoch": 1.0963385354141657,
      "grad_norm": 0.3289673626422882,
      "learning_rate": 0.0001006809001152554,
      "loss": 0.3751,
      "step": 3653
    },
    {
      "epoch": 1.096638655462185,
      "grad_norm": 0.14599741995334625,
      "learning_rate": 0.00010062852390176569,
      "loss": 0.448,
      "step": 3654
    },
    {
      "epoch": 1.096938775510204,
      "grad_norm": 0.15674425661563873,
      "learning_rate": 0.00010057614751584765,
      "loss": 0.4618,
      "step": 3655
    },
    {
      "epoch": 1.0972388955582233,
      "grad_norm": 0.13821865618228912,
      "learning_rate": 0.00010052377097187015,
      "loss": 0.3746,
      "step": 3656
    },
    {
      "epoch": 1.0975390156062426,
      "grad_norm": 0.1998661309480667,
      "learning_rate": 0.00010047139428420211,
      "loss": 0.4147,
      "step": 3657
    },
    {
      "epoch": 1.0978391356542616,
      "grad_norm": 0.15244896709918976,
      "learning_rate": 0.00010041901746721245,
      "loss": 0.4808,
      "step": 3658
    },
    {
      "epoch": 1.0981392557022809,
      "grad_norm": 0.16402991116046906,
      "learning_rate": 0.00010036664053527012,
      "loss": 0.405,
      "step": 3659
    },
    {
      "epoch": 1.0984393757503002,
      "grad_norm": 0.13648836314678192,
      "learning_rate": 0.0001003142635027442,
      "loss": 0.4282,
      "step": 3660
    },
    {
      "epoch": 1.0987394957983194,
      "grad_norm": 0.1392647624015808,
      "learning_rate": 0.00010026188638400367,
      "loss": 0.4078,
      "step": 3661
    },
    {
      "epoch": 1.0990396158463385,
      "grad_norm": 0.1392301619052887,
      "learning_rate": 0.00010020950919341763,
      "loss": 0.4208,
      "step": 3662
    },
    {
      "epoch": 1.0993397358943577,
      "grad_norm": 0.13113875687122345,
      "learning_rate": 0.00010015713194535512,
      "loss": 0.4064,
      "step": 3663
    },
    {
      "epoch": 1.099639855942377,
      "grad_norm": 0.13509242236614227,
      "learning_rate": 0.00010010475465418527,
      "loss": 0.395,
      "step": 3664
    },
    {
      "epoch": 1.099939975990396,
      "grad_norm": 0.14285650849342346,
      "learning_rate": 0.00010005237733427721,
      "loss": 0.4583,
      "step": 3665
    },
    {
      "epoch": 1.1002400960384153,
      "grad_norm": 0.14350396394729614,
      "learning_rate": 0.0001,
      "loss": 0.393,
      "step": 3666
    },
    {
      "epoch": 1.1005402160864346,
      "grad_norm": 0.18313319981098175,
      "learning_rate": 9.994762266572281e-05,
      "loss": 0.399,
      "step": 3667
    },
    {
      "epoch": 1.1008403361344539,
      "grad_norm": 0.1572713553905487,
      "learning_rate": 9.989524534581471e-05,
      "loss": 0.5052,
      "step": 3668
    },
    {
      "epoch": 1.101140456182473,
      "grad_norm": 0.13151343166828156,
      "learning_rate": 9.98428680546449e-05,
      "loss": 0.3965,
      "step": 3669
    },
    {
      "epoch": 1.1014405762304922,
      "grad_norm": 0.15630406141281128,
      "learning_rate": 9.979049080658242e-05,
      "loss": 0.463,
      "step": 3670
    },
    {
      "epoch": 1.1017406962785115,
      "grad_norm": 0.13366857171058655,
      "learning_rate": 9.973811361599636e-05,
      "loss": 0.3853,
      "step": 3671
    },
    {
      "epoch": 1.1020408163265305,
      "grad_norm": 0.13806574046611786,
      "learning_rate": 9.968573649725583e-05,
      "loss": 0.4184,
      "step": 3672
    },
    {
      "epoch": 1.1023409363745498,
      "grad_norm": 0.131994366645813,
      "learning_rate": 9.963335946472989e-05,
      "loss": 0.3853,
      "step": 3673
    },
    {
      "epoch": 1.102641056422569,
      "grad_norm": 0.14549876749515533,
      "learning_rate": 9.958098253278758e-05,
      "loss": 0.4407,
      "step": 3674
    },
    {
      "epoch": 1.1029411764705883,
      "grad_norm": 0.1381799727678299,
      "learning_rate": 9.952860571579794e-05,
      "loss": 0.3665,
      "step": 3675
    },
    {
      "epoch": 1.1032412965186074,
      "grad_norm": 0.1353103667497635,
      "learning_rate": 9.947622902812984e-05,
      "loss": 0.4143,
      "step": 3676
    },
    {
      "epoch": 1.1035414165666266,
      "grad_norm": 0.15333561599254608,
      "learning_rate": 9.942385248415237e-05,
      "loss": 0.4198,
      "step": 3677
    },
    {
      "epoch": 1.103841536614646,
      "grad_norm": 0.13435952365398407,
      "learning_rate": 9.937147609823434e-05,
      "loss": 0.4037,
      "step": 3678
    },
    {
      "epoch": 1.104141656662665,
      "grad_norm": 0.14614462852478027,
      "learning_rate": 9.931909988474464e-05,
      "loss": 0.4549,
      "step": 3679
    },
    {
      "epoch": 1.1044417767106842,
      "grad_norm": 0.14689116179943085,
      "learning_rate": 9.926672385805207e-05,
      "loss": 0.3681,
      "step": 3680
    },
    {
      "epoch": 1.1047418967587035,
      "grad_norm": 0.16367031633853912,
      "learning_rate": 9.92143480325254e-05,
      "loss": 0.3984,
      "step": 3681
    },
    {
      "epoch": 1.1050420168067228,
      "grad_norm": 0.1460021585226059,
      "learning_rate": 9.916197242253335e-05,
      "loss": 0.4533,
      "step": 3682
    },
    {
      "epoch": 1.1053421368547418,
      "grad_norm": 0.1886349767446518,
      "learning_rate": 9.910959704244458e-05,
      "loss": 0.434,
      "step": 3683
    },
    {
      "epoch": 1.105642256902761,
      "grad_norm": 0.1475280523300171,
      "learning_rate": 9.90572219066276e-05,
      "loss": 0.4006,
      "step": 3684
    },
    {
      "epoch": 1.1059423769507803,
      "grad_norm": 0.14980600774288177,
      "learning_rate": 9.9004847029451e-05,
      "loss": 0.4208,
      "step": 3685
    },
    {
      "epoch": 1.1062424969987996,
      "grad_norm": 0.14014779031276703,
      "learning_rate": 9.895247242528323e-05,
      "loss": 0.4283,
      "step": 3686
    },
    {
      "epoch": 1.1065426170468187,
      "grad_norm": 0.1508089154958725,
      "learning_rate": 9.89000981084926e-05,
      "loss": 0.4432,
      "step": 3687
    },
    {
      "epoch": 1.106842737094838,
      "grad_norm": 0.14019598066806793,
      "learning_rate": 9.884772409344746e-05,
      "loss": 0.4329,
      "step": 3688
    },
    {
      "epoch": 1.1071428571428572,
      "grad_norm": 0.12213557958602905,
      "learning_rate": 9.879535039451603e-05,
      "loss": 0.3514,
      "step": 3689
    },
    {
      "epoch": 1.1074429771908763,
      "grad_norm": 0.1391676515340805,
      "learning_rate": 9.874297702606636e-05,
      "loss": 0.3951,
      "step": 3690
    },
    {
      "epoch": 1.1077430972388955,
      "grad_norm": 0.1456574946641922,
      "learning_rate": 9.869060400246656e-05,
      "loss": 0.4052,
      "step": 3691
    },
    {
      "epoch": 1.1080432172869148,
      "grad_norm": 0.15275728702545166,
      "learning_rate": 9.86382313380845e-05,
      "loss": 0.482,
      "step": 3692
    },
    {
      "epoch": 1.108343337334934,
      "grad_norm": 0.1298411637544632,
      "learning_rate": 9.858585904728809e-05,
      "loss": 0.4111,
      "step": 3693
    },
    {
      "epoch": 1.1086434573829531,
      "grad_norm": 0.1593908816576004,
      "learning_rate": 9.853348714444506e-05,
      "loss": 0.4284,
      "step": 3694
    },
    {
      "epoch": 1.1089435774309724,
      "grad_norm": 0.13594958186149597,
      "learning_rate": 9.848111564392294e-05,
      "loss": 0.4011,
      "step": 3695
    },
    {
      "epoch": 1.1092436974789917,
      "grad_norm": 0.1442171335220337,
      "learning_rate": 9.842874456008939e-05,
      "loss": 0.4511,
      "step": 3696
    },
    {
      "epoch": 1.1095438175270107,
      "grad_norm": 0.141292005777359,
      "learning_rate": 9.837637390731175e-05,
      "loss": 0.4284,
      "step": 3697
    },
    {
      "epoch": 1.10984393757503,
      "grad_norm": 0.14179612696170807,
      "learning_rate": 9.832400369995728e-05,
      "loss": 0.4402,
      "step": 3698
    },
    {
      "epoch": 1.1101440576230492,
      "grad_norm": 0.12931428849697113,
      "learning_rate": 9.82716339523932e-05,
      "loss": 0.3745,
      "step": 3699
    },
    {
      "epoch": 1.1104441776710685,
      "grad_norm": 0.13897705078125,
      "learning_rate": 9.821926467898653e-05,
      "loss": 0.4134,
      "step": 3700
    },
    {
      "epoch": 1.1107442977190876,
      "grad_norm": 0.17523273825645447,
      "learning_rate": 9.816689589410412e-05,
      "loss": 0.4313,
      "step": 3701
    },
    {
      "epoch": 1.1110444177671068,
      "grad_norm": 0.19277136027812958,
      "learning_rate": 9.811452761211288e-05,
      "loss": 0.4088,
      "step": 3702
    },
    {
      "epoch": 1.111344537815126,
      "grad_norm": 0.18094953894615173,
      "learning_rate": 9.806215984737932e-05,
      "loss": 0.4272,
      "step": 3703
    },
    {
      "epoch": 1.1116446578631454,
      "grad_norm": 0.1389746069908142,
      "learning_rate": 9.800979261427001e-05,
      "loss": 0.4336,
      "step": 3704
    },
    {
      "epoch": 1.1119447779111644,
      "grad_norm": 0.16211512684822083,
      "learning_rate": 9.795742592715127e-05,
      "loss": 0.439,
      "step": 3705
    },
    {
      "epoch": 1.1122448979591837,
      "grad_norm": 0.17074798047542572,
      "learning_rate": 9.790505980038928e-05,
      "loss": 0.4038,
      "step": 3706
    },
    {
      "epoch": 1.112545018007203,
      "grad_norm": 0.14179076254367828,
      "learning_rate": 9.785269424835016e-05,
      "loss": 0.4595,
      "step": 3707
    },
    {
      "epoch": 1.112845138055222,
      "grad_norm": 0.1443812996149063,
      "learning_rate": 9.780032928539973e-05,
      "loss": 0.4738,
      "step": 3708
    },
    {
      "epoch": 1.1131452581032413,
      "grad_norm": 0.19112515449523926,
      "learning_rate": 9.774796492590372e-05,
      "loss": 0.4503,
      "step": 3709
    },
    {
      "epoch": 1.1134453781512605,
      "grad_norm": 0.14020976424217224,
      "learning_rate": 9.769560118422773e-05,
      "loss": 0.444,
      "step": 3710
    },
    {
      "epoch": 1.1137454981992798,
      "grad_norm": 0.15076801180839539,
      "learning_rate": 9.76432380747371e-05,
      "loss": 0.3726,
      "step": 3711
    },
    {
      "epoch": 1.1140456182472989,
      "grad_norm": 0.14267252385616302,
      "learning_rate": 9.759087561179712e-05,
      "loss": 0.4061,
      "step": 3712
    },
    {
      "epoch": 1.1143457382953181,
      "grad_norm": 0.1448672115802765,
      "learning_rate": 9.75385138097728e-05,
      "loss": 0.4274,
      "step": 3713
    },
    {
      "epoch": 1.1146458583433374,
      "grad_norm": 0.15275657176971436,
      "learning_rate": 9.748615268302893e-05,
      "loss": 0.4378,
      "step": 3714
    },
    {
      "epoch": 1.1149459783913565,
      "grad_norm": 0.15933093428611755,
      "learning_rate": 9.743379224593032e-05,
      "loss": 0.4597,
      "step": 3715
    },
    {
      "epoch": 1.1152460984393757,
      "grad_norm": 0.1405712515115738,
      "learning_rate": 9.738143251284135e-05,
      "loss": 0.4389,
      "step": 3716
    },
    {
      "epoch": 1.115546218487395,
      "grad_norm": 0.13883735239505768,
      "learning_rate": 9.732907349812632e-05,
      "loss": 0.4352,
      "step": 3717
    },
    {
      "epoch": 1.1158463385354143,
      "grad_norm": 0.1645667403936386,
      "learning_rate": 9.727671521614938e-05,
      "loss": 0.3979,
      "step": 3718
    },
    {
      "epoch": 1.1161464585834333,
      "grad_norm": 0.13730087876319885,
      "learning_rate": 9.72243576812744e-05,
      "loss": 0.4031,
      "step": 3719
    },
    {
      "epoch": 1.1164465786314526,
      "grad_norm": 0.13023704290390015,
      "learning_rate": 9.717200090786501e-05,
      "loss": 0.3587,
      "step": 3720
    },
    {
      "epoch": 1.1167466986794718,
      "grad_norm": 0.17127841711044312,
      "learning_rate": 9.71196449102848e-05,
      "loss": 0.4851,
      "step": 3721
    },
    {
      "epoch": 1.117046818727491,
      "grad_norm": 0.1397799402475357,
      "learning_rate": 9.706728970289695e-05,
      "loss": 0.3994,
      "step": 3722
    },
    {
      "epoch": 1.1173469387755102,
      "grad_norm": 0.13015244901180267,
      "learning_rate": 9.701493530006455e-05,
      "loss": 0.4152,
      "step": 3723
    },
    {
      "epoch": 1.1176470588235294,
      "grad_norm": 0.1434255987405777,
      "learning_rate": 9.696258171615043e-05,
      "loss": 0.4336,
      "step": 3724
    },
    {
      "epoch": 1.1179471788715487,
      "grad_norm": 0.15915736556053162,
      "learning_rate": 9.691022896551715e-05,
      "loss": 0.4547,
      "step": 3725
    },
    {
      "epoch": 1.1182472989195678,
      "grad_norm": 0.15358136594295502,
      "learning_rate": 9.685787706252716e-05,
      "loss": 0.4333,
      "step": 3726
    },
    {
      "epoch": 1.118547418967587,
      "grad_norm": 0.1452532857656479,
      "learning_rate": 9.68055260215426e-05,
      "loss": 0.4303,
      "step": 3727
    },
    {
      "epoch": 1.1188475390156063,
      "grad_norm": 0.1438799947500229,
      "learning_rate": 9.67531758569253e-05,
      "loss": 0.4472,
      "step": 3728
    },
    {
      "epoch": 1.1191476590636253,
      "grad_norm": 0.324162095785141,
      "learning_rate": 9.670082658303698e-05,
      "loss": 0.4478,
      "step": 3729
    },
    {
      "epoch": 1.1194477791116446,
      "grad_norm": 0.3205750584602356,
      "learning_rate": 9.664847821423907e-05,
      "loss": 0.4231,
      "step": 3730
    },
    {
      "epoch": 1.1197478991596639,
      "grad_norm": 0.14763224124908447,
      "learning_rate": 9.65961307648927e-05,
      "loss": 0.4589,
      "step": 3731
    },
    {
      "epoch": 1.1200480192076832,
      "grad_norm": 0.13289707899093628,
      "learning_rate": 9.654378424935885e-05,
      "loss": 0.3955,
      "step": 3732
    },
    {
      "epoch": 1.1203481392557022,
      "grad_norm": 0.1398414522409439,
      "learning_rate": 9.649143868199814e-05,
      "loss": 0.4155,
      "step": 3733
    },
    {
      "epoch": 1.1206482593037215,
      "grad_norm": 0.13791011273860931,
      "learning_rate": 9.643909407717104e-05,
      "loss": 0.4285,
      "step": 3734
    },
    {
      "epoch": 1.1209483793517407,
      "grad_norm": 0.5082004070281982,
      "learning_rate": 9.638675044923763e-05,
      "loss": 0.4751,
      "step": 3735
    },
    {
      "epoch": 1.1212484993997598,
      "grad_norm": 0.14391469955444336,
      "learning_rate": 9.633440781255777e-05,
      "loss": 0.4325,
      "step": 3736
    },
    {
      "epoch": 1.121548619447779,
      "grad_norm": 0.1421026885509491,
      "learning_rate": 9.628206618149113e-05,
      "loss": 0.4431,
      "step": 3737
    },
    {
      "epoch": 1.1218487394957983,
      "grad_norm": 0.1363701969385147,
      "learning_rate": 9.622972557039701e-05,
      "loss": 0.4173,
      "step": 3738
    },
    {
      "epoch": 1.1221488595438176,
      "grad_norm": 0.15972846746444702,
      "learning_rate": 9.617738599363438e-05,
      "loss": 0.4415,
      "step": 3739
    },
    {
      "epoch": 1.1224489795918366,
      "grad_norm": 0.1399405300617218,
      "learning_rate": 9.612504746556215e-05,
      "loss": 0.4091,
      "step": 3740
    },
    {
      "epoch": 1.122749099639856,
      "grad_norm": 0.19689854979515076,
      "learning_rate": 9.607271000053865e-05,
      "loss": 0.4199,
      "step": 3741
    },
    {
      "epoch": 1.1230492196878752,
      "grad_norm": 0.14337493479251862,
      "learning_rate": 9.602037361292218e-05,
      "loss": 0.4093,
      "step": 3742
    },
    {
      "epoch": 1.1233493397358945,
      "grad_norm": 0.12252549827098846,
      "learning_rate": 9.596803831707056e-05,
      "loss": 0.3608,
      "step": 3743
    },
    {
      "epoch": 1.1236494597839135,
      "grad_norm": 0.13820312917232513,
      "learning_rate": 9.591570412734137e-05,
      "loss": 0.4243,
      "step": 3744
    },
    {
      "epoch": 1.1239495798319328,
      "grad_norm": 0.13797035813331604,
      "learning_rate": 9.586337105809195e-05,
      "loss": 0.4505,
      "step": 3745
    },
    {
      "epoch": 1.124249699879952,
      "grad_norm": 0.1960890293121338,
      "learning_rate": 9.581103912367928e-05,
      "loss": 0.3763,
      "step": 3746
    },
    {
      "epoch": 1.124549819927971,
      "grad_norm": 0.14223526418209076,
      "learning_rate": 9.575870833845994e-05,
      "loss": 0.4511,
      "step": 3747
    },
    {
      "epoch": 1.1248499399759904,
      "grad_norm": 0.13488635420799255,
      "learning_rate": 9.570637871679034e-05,
      "loss": 0.4108,
      "step": 3748
    },
    {
      "epoch": 1.1251500600240096,
      "grad_norm": 0.1482326090335846,
      "learning_rate": 9.565405027302652e-05,
      "loss": 0.434,
      "step": 3749
    },
    {
      "epoch": 1.125450180072029,
      "grad_norm": 0.15240339934825897,
      "learning_rate": 9.560172302152414e-05,
      "loss": 0.4656,
      "step": 3750
    },
    {
      "epoch": 1.125750300120048,
      "grad_norm": 0.13519848883152008,
      "learning_rate": 9.554939697663866e-05,
      "loss": 0.4065,
      "step": 3751
    },
    {
      "epoch": 1.1260504201680672,
      "grad_norm": 0.14572805166244507,
      "learning_rate": 9.549707215272505e-05,
      "loss": 0.481,
      "step": 3752
    },
    {
      "epoch": 1.1263505402160865,
      "grad_norm": 0.3154379427433014,
      "learning_rate": 9.544474856413811e-05,
      "loss": 0.4518,
      "step": 3753
    },
    {
      "epoch": 1.1266506602641058,
      "grad_norm": 0.1319061517715454,
      "learning_rate": 9.539242622523215e-05,
      "loss": 0.3781,
      "step": 3754
    },
    {
      "epoch": 1.1269507803121248,
      "grad_norm": 0.13992980122566223,
      "learning_rate": 9.534010515036117e-05,
      "loss": 0.4156,
      "step": 3755
    },
    {
      "epoch": 1.127250900360144,
      "grad_norm": 0.15185213088989258,
      "learning_rate": 9.528778535387897e-05,
      "loss": 0.4115,
      "step": 3756
    },
    {
      "epoch": 1.1275510204081634,
      "grad_norm": 0.13775889575481415,
      "learning_rate": 9.523546685013883e-05,
      "loss": 0.4368,
      "step": 3757
    },
    {
      "epoch": 1.1278511404561824,
      "grad_norm": 0.13281835615634918,
      "learning_rate": 9.518314965349366e-05,
      "loss": 0.3926,
      "step": 3758
    },
    {
      "epoch": 1.1281512605042017,
      "grad_norm": 0.14120443165302277,
      "learning_rate": 9.513083377829622e-05,
      "loss": 0.4279,
      "step": 3759
    },
    {
      "epoch": 1.128451380552221,
      "grad_norm": 0.14515314996242523,
      "learning_rate": 9.507851923889868e-05,
      "loss": 0.4163,
      "step": 3760
    },
    {
      "epoch": 1.1287515006002402,
      "grad_norm": 0.15551668405532837,
      "learning_rate": 9.502620604965293e-05,
      "loss": 0.4462,
      "step": 3761
    },
    {
      "epoch": 1.1290516206482593,
      "grad_norm": 0.37603843212127686,
      "learning_rate": 9.497389422491054e-05,
      "loss": 0.3855,
      "step": 3762
    },
    {
      "epoch": 1.1293517406962785,
      "grad_norm": 0.14419730007648468,
      "learning_rate": 9.492158377902262e-05,
      "loss": 0.4237,
      "step": 3763
    },
    {
      "epoch": 1.1296518607442978,
      "grad_norm": 0.14433281123638153,
      "learning_rate": 9.486927472633996e-05,
      "loss": 0.421,
      "step": 3764
    },
    {
      "epoch": 1.1299519807923168,
      "grad_norm": 0.1525045782327652,
      "learning_rate": 9.4816967081213e-05,
      "loss": 0.424,
      "step": 3765
    },
    {
      "epoch": 1.1302521008403361,
      "grad_norm": 0.16010697185993195,
      "learning_rate": 9.476466085799161e-05,
      "loss": 0.4101,
      "step": 3766
    },
    {
      "epoch": 1.1305522208883554,
      "grad_norm": 0.15306870639324188,
      "learning_rate": 9.471235607102553e-05,
      "loss": 0.4936,
      "step": 3767
    },
    {
      "epoch": 1.1308523409363747,
      "grad_norm": 0.13024035096168518,
      "learning_rate": 9.466005273466393e-05,
      "loss": 0.3764,
      "step": 3768
    },
    {
      "epoch": 1.1311524609843937,
      "grad_norm": 0.13721759617328644,
      "learning_rate": 9.460775086325559e-05,
      "loss": 0.4065,
      "step": 3769
    },
    {
      "epoch": 1.131452581032413,
      "grad_norm": 0.1310778558254242,
      "learning_rate": 9.455545047114901e-05,
      "loss": 0.3956,
      "step": 3770
    },
    {
      "epoch": 1.1317527010804322,
      "grad_norm": 0.1459777057170868,
      "learning_rate": 9.450315157269214e-05,
      "loss": 0.4341,
      "step": 3771
    },
    {
      "epoch": 1.1320528211284513,
      "grad_norm": 0.149492084980011,
      "learning_rate": 9.445085418223264e-05,
      "loss": 0.4543,
      "step": 3772
    },
    {
      "epoch": 1.1323529411764706,
      "grad_norm": 0.1491083949804306,
      "learning_rate": 9.439855831411766e-05,
      "loss": 0.4333,
      "step": 3773
    },
    {
      "epoch": 1.1326530612244898,
      "grad_norm": 0.13482168316841125,
      "learning_rate": 9.434626398269393e-05,
      "loss": 0.3962,
      "step": 3774
    },
    {
      "epoch": 1.132953181272509,
      "grad_norm": 0.14354385435581207,
      "learning_rate": 9.429397120230789e-05,
      "loss": 0.4333,
      "step": 3775
    },
    {
      "epoch": 1.1332533013205282,
      "grad_norm": 0.1358368843793869,
      "learning_rate": 9.424167998730542e-05,
      "loss": 0.3885,
      "step": 3776
    },
    {
      "epoch": 1.1335534213685474,
      "grad_norm": 0.132878839969635,
      "learning_rate": 9.418939035203198e-05,
      "loss": 0.3914,
      "step": 3777
    },
    {
      "epoch": 1.1338535414165667,
      "grad_norm": 0.1317775398492813,
      "learning_rate": 9.413710231083272e-05,
      "loss": 0.3988,
      "step": 3778
    },
    {
      "epoch": 1.1341536614645857,
      "grad_norm": 0.14132483303546906,
      "learning_rate": 9.40848158780522e-05,
      "loss": 0.3987,
      "step": 3779
    },
    {
      "epoch": 1.134453781512605,
      "grad_norm": 0.12594947218894958,
      "learning_rate": 9.40325310680346e-05,
      "loss": 0.3727,
      "step": 3780
    },
    {
      "epoch": 1.1347539015606243,
      "grad_norm": 0.14186976850032806,
      "learning_rate": 9.398024789512369e-05,
      "loss": 0.4442,
      "step": 3781
    },
    {
      "epoch": 1.1350540216086435,
      "grad_norm": 0.13074573874473572,
      "learning_rate": 9.392796637366272e-05,
      "loss": 0.374,
      "step": 3782
    },
    {
      "epoch": 1.1353541416566626,
      "grad_norm": 0.1421251744031906,
      "learning_rate": 9.387568651799457e-05,
      "loss": 0.4349,
      "step": 3783
    },
    {
      "epoch": 1.1356542617046819,
      "grad_norm": 0.15257734060287476,
      "learning_rate": 9.382340834246161e-05,
      "loss": 0.4245,
      "step": 3784
    },
    {
      "epoch": 1.1359543817527011,
      "grad_norm": 0.13763746619224548,
      "learning_rate": 9.377113186140569e-05,
      "loss": 0.4402,
      "step": 3785
    },
    {
      "epoch": 1.1362545018007202,
      "grad_norm": 0.1399967074394226,
      "learning_rate": 9.37188570891684e-05,
      "loss": 0.414,
      "step": 3786
    },
    {
      "epoch": 1.1365546218487395,
      "grad_norm": 0.15171483159065247,
      "learning_rate": 9.366658404009062e-05,
      "loss": 0.4263,
      "step": 3787
    },
    {
      "epoch": 1.1368547418967587,
      "grad_norm": 0.1498602032661438,
      "learning_rate": 9.361431272851285e-05,
      "loss": 0.4179,
      "step": 3788
    },
    {
      "epoch": 1.137154861944778,
      "grad_norm": 0.1423768401145935,
      "learning_rate": 9.356204316877518e-05,
      "loss": 0.4607,
      "step": 3789
    },
    {
      "epoch": 1.137454981992797,
      "grad_norm": 0.1541401445865631,
      "learning_rate": 9.350977537521717e-05,
      "loss": 0.4563,
      "step": 3790
    },
    {
      "epoch": 1.1377551020408163,
      "grad_norm": 0.13775575160980225,
      "learning_rate": 9.345750936217783e-05,
      "loss": 0.4046,
      "step": 3791
    },
    {
      "epoch": 1.1380552220888356,
      "grad_norm": 0.13523773849010468,
      "learning_rate": 9.340524514399579e-05,
      "loss": 0.4038,
      "step": 3792
    },
    {
      "epoch": 1.1383553421368546,
      "grad_norm": 0.1366727203130722,
      "learning_rate": 9.33529827350091e-05,
      "loss": 0.4581,
      "step": 3793
    },
    {
      "epoch": 1.138655462184874,
      "grad_norm": 0.12473126500844955,
      "learning_rate": 9.330072214955542e-05,
      "loss": 0.3717,
      "step": 3794
    },
    {
      "epoch": 1.1389555822328932,
      "grad_norm": 0.15676021575927734,
      "learning_rate": 9.324846340197178e-05,
      "loss": 0.4323,
      "step": 3795
    },
    {
      "epoch": 1.1392557022809124,
      "grad_norm": 0.15680339932441711,
      "learning_rate": 9.319620650659479e-05,
      "loss": 0.433,
      "step": 3796
    },
    {
      "epoch": 1.1395558223289315,
      "grad_norm": 0.14847628772258759,
      "learning_rate": 9.314395147776055e-05,
      "loss": 0.3936,
      "step": 3797
    },
    {
      "epoch": 1.1398559423769508,
      "grad_norm": 0.14503027498722076,
      "learning_rate": 9.30916983298046e-05,
      "loss": 0.4131,
      "step": 3798
    },
    {
      "epoch": 1.14015606242497,
      "grad_norm": 0.17505504190921783,
      "learning_rate": 9.303944707706196e-05,
      "loss": 0.4563,
      "step": 3799
    },
    {
      "epoch": 1.140456182472989,
      "grad_norm": 0.1426694393157959,
      "learning_rate": 9.298719773386724e-05,
      "loss": 0.4162,
      "step": 3800
    },
    {
      "epoch": 1.1407563025210083,
      "grad_norm": 0.1447415053844452,
      "learning_rate": 9.293495031455437e-05,
      "loss": 0.4144,
      "step": 3801
    },
    {
      "epoch": 1.1410564225690276,
      "grad_norm": 0.13297565281391144,
      "learning_rate": 9.288270483345691e-05,
      "loss": 0.3883,
      "step": 3802
    },
    {
      "epoch": 1.1413565426170469,
      "grad_norm": 0.13733936846256256,
      "learning_rate": 9.283046130490777e-05,
      "loss": 0.4019,
      "step": 3803
    },
    {
      "epoch": 1.141656662665066,
      "grad_norm": 0.1973925530910492,
      "learning_rate": 9.277821974323931e-05,
      "loss": 0.4488,
      "step": 3804
    },
    {
      "epoch": 1.1419567827130852,
      "grad_norm": 0.14548492431640625,
      "learning_rate": 9.272598016278352e-05,
      "loss": 0.4546,
      "step": 3805
    },
    {
      "epoch": 1.1422569027611045,
      "grad_norm": 0.15933561325073242,
      "learning_rate": 9.267374257787163e-05,
      "loss": 0.4796,
      "step": 3806
    },
    {
      "epoch": 1.1425570228091237,
      "grad_norm": 0.14018718898296356,
      "learning_rate": 9.262150700283444e-05,
      "loss": 0.4026,
      "step": 3807
    },
    {
      "epoch": 1.1428571428571428,
      "grad_norm": 0.16493748128414154,
      "learning_rate": 9.256927345200221e-05,
      "loss": 0.4453,
      "step": 3808
    },
    {
      "epoch": 1.143157262905162,
      "grad_norm": 0.14382173120975494,
      "learning_rate": 9.251704193970463e-05,
      "loss": 0.4456,
      "step": 3809
    },
    {
      "epoch": 1.1434573829531813,
      "grad_norm": 0.14117833971977234,
      "learning_rate": 9.246481248027077e-05,
      "loss": 0.4647,
      "step": 3810
    },
    {
      "epoch": 1.1437575030012006,
      "grad_norm": 0.15258300304412842,
      "learning_rate": 9.24125850880292e-05,
      "loss": 0.4102,
      "step": 3811
    },
    {
      "epoch": 1.1440576230492197,
      "grad_norm": 0.136815145611763,
      "learning_rate": 9.23603597773079e-05,
      "loss": 0.3884,
      "step": 3812
    },
    {
      "epoch": 1.144357743097239,
      "grad_norm": 0.16704045236110687,
      "learning_rate": 9.23081365624343e-05,
      "loss": 0.4011,
      "step": 3813
    },
    {
      "epoch": 1.1446578631452582,
      "grad_norm": 0.1532745510339737,
      "learning_rate": 9.225591545773526e-05,
      "loss": 0.429,
      "step": 3814
    },
    {
      "epoch": 1.1449579831932772,
      "grad_norm": 0.1496194303035736,
      "learning_rate": 9.220369647753698e-05,
      "loss": 0.4077,
      "step": 3815
    },
    {
      "epoch": 1.1452581032412965,
      "grad_norm": 0.13573145866394043,
      "learning_rate": 9.215147963616522e-05,
      "loss": 0.4027,
      "step": 3816
    },
    {
      "epoch": 1.1455582232893158,
      "grad_norm": 0.1462734192609787,
      "learning_rate": 9.2099264947945e-05,
      "loss": 0.3946,
      "step": 3817
    },
    {
      "epoch": 1.145858343337335,
      "grad_norm": 0.15299293398857117,
      "learning_rate": 9.204705242720081e-05,
      "loss": 0.3987,
      "step": 3818
    },
    {
      "epoch": 1.146158463385354,
      "grad_norm": 0.12202068418264389,
      "learning_rate": 9.199484208825664e-05,
      "loss": 0.3443,
      "step": 3819
    },
    {
      "epoch": 1.1464585834333734,
      "grad_norm": 0.14336168766021729,
      "learning_rate": 9.194263394543575e-05,
      "loss": 0.4018,
      "step": 3820
    },
    {
      "epoch": 1.1467587034813926,
      "grad_norm": 0.14055746793746948,
      "learning_rate": 9.189042801306081e-05,
      "loss": 0.4153,
      "step": 3821
    },
    {
      "epoch": 1.1470588235294117,
      "grad_norm": 0.13209252059459686,
      "learning_rate": 9.183822430545401e-05,
      "loss": 0.4144,
      "step": 3822
    },
    {
      "epoch": 1.147358943577431,
      "grad_norm": 0.1446332335472107,
      "learning_rate": 9.178602283693672e-05,
      "loss": 0.4241,
      "step": 3823
    },
    {
      "epoch": 1.1476590636254502,
      "grad_norm": 0.13562363386154175,
      "learning_rate": 9.173382362182994e-05,
      "loss": 0.4073,
      "step": 3824
    },
    {
      "epoch": 1.1479591836734695,
      "grad_norm": 0.15286138653755188,
      "learning_rate": 9.168162667445384e-05,
      "loss": 0.4395,
      "step": 3825
    },
    {
      "epoch": 1.1482593037214885,
      "grad_norm": 0.13376116752624512,
      "learning_rate": 9.162943200912807e-05,
      "loss": 0.3755,
      "step": 3826
    },
    {
      "epoch": 1.1485594237695078,
      "grad_norm": 0.1457638442516327,
      "learning_rate": 9.157723964017165e-05,
      "loss": 0.4311,
      "step": 3827
    },
    {
      "epoch": 1.148859543817527,
      "grad_norm": 0.17449675500392914,
      "learning_rate": 9.152504958190298e-05,
      "loss": 0.3993,
      "step": 3828
    },
    {
      "epoch": 1.1491596638655461,
      "grad_norm": 0.13668902218341827,
      "learning_rate": 9.147286184863972e-05,
      "loss": 0.4458,
      "step": 3829
    },
    {
      "epoch": 1.1494597839135654,
      "grad_norm": 0.1385473906993866,
      "learning_rate": 9.14206764546991e-05,
      "loss": 0.4185,
      "step": 3830
    },
    {
      "epoch": 1.1497599039615847,
      "grad_norm": 0.176729217171669,
      "learning_rate": 9.136849341439747e-05,
      "loss": 0.334,
      "step": 3831
    },
    {
      "epoch": 1.150060024009604,
      "grad_norm": 0.16116398572921753,
      "learning_rate": 9.131631274205073e-05,
      "loss": 0.4478,
      "step": 3832
    },
    {
      "epoch": 1.150360144057623,
      "grad_norm": 0.12495972961187363,
      "learning_rate": 9.126413445197401e-05,
      "loss": 0.3671,
      "step": 3833
    },
    {
      "epoch": 1.1506602641056423,
      "grad_norm": 0.14716055989265442,
      "learning_rate": 9.121195855848181e-05,
      "loss": 0.4454,
      "step": 3834
    },
    {
      "epoch": 1.1509603841536615,
      "grad_norm": 0.1455603837966919,
      "learning_rate": 9.115978507588805e-05,
      "loss": 0.4099,
      "step": 3835
    },
    {
      "epoch": 1.1512605042016806,
      "grad_norm": 0.14258424937725067,
      "learning_rate": 9.110761401850587e-05,
      "loss": 0.4171,
      "step": 3836
    },
    {
      "epoch": 1.1515606242496998,
      "grad_norm": 0.1323336362838745,
      "learning_rate": 9.10554454006478e-05,
      "loss": 0.3682,
      "step": 3837
    },
    {
      "epoch": 1.1518607442977191,
      "grad_norm": 0.15299271047115326,
      "learning_rate": 9.100327923662573e-05,
      "loss": 0.482,
      "step": 3838
    },
    {
      "epoch": 1.1521608643457384,
      "grad_norm": 0.1293763816356659,
      "learning_rate": 9.095111554075085e-05,
      "loss": 0.3568,
      "step": 3839
    },
    {
      "epoch": 1.1524609843937574,
      "grad_norm": 0.14785557985305786,
      "learning_rate": 9.089895432733364e-05,
      "loss": 0.3778,
      "step": 3840
    },
    {
      "epoch": 1.1527611044417767,
      "grad_norm": 0.12821650505065918,
      "learning_rate": 9.0846795610684e-05,
      "loss": 0.3482,
      "step": 3841
    },
    {
      "epoch": 1.153061224489796,
      "grad_norm": 0.14406611025333405,
      "learning_rate": 9.079463940511096e-05,
      "loss": 0.4383,
      "step": 3842
    },
    {
      "epoch": 1.153361344537815,
      "grad_norm": 0.13842591643333435,
      "learning_rate": 9.074248572492311e-05,
      "loss": 0.3886,
      "step": 3843
    },
    {
      "epoch": 1.1536614645858343,
      "grad_norm": 0.13257431983947754,
      "learning_rate": 9.069033458442813e-05,
      "loss": 0.378,
      "step": 3844
    },
    {
      "epoch": 1.1539615846338536,
      "grad_norm": 0.1456029862165451,
      "learning_rate": 9.063818599793307e-05,
      "loss": 0.4176,
      "step": 3845
    },
    {
      "epoch": 1.1542617046818728,
      "grad_norm": 0.18930472433567047,
      "learning_rate": 9.058603997974437e-05,
      "loss": 0.403,
      "step": 3846
    },
    {
      "epoch": 1.1545618247298919,
      "grad_norm": 0.13556168973445892,
      "learning_rate": 9.053389654416768e-05,
      "loss": 0.4137,
      "step": 3847
    },
    {
      "epoch": 1.1548619447779112,
      "grad_norm": 0.15322500467300415,
      "learning_rate": 9.048175570550786e-05,
      "loss": 0.3904,
      "step": 3848
    },
    {
      "epoch": 1.1551620648259304,
      "grad_norm": 0.13942261040210724,
      "learning_rate": 9.042961747806927e-05,
      "loss": 0.3928,
      "step": 3849
    },
    {
      "epoch": 1.1554621848739495,
      "grad_norm": 0.16500745713710785,
      "learning_rate": 9.037748187615538e-05,
      "loss": 0.4227,
      "step": 3850
    },
    {
      "epoch": 1.1557623049219687,
      "grad_norm": 0.14914722740650177,
      "learning_rate": 9.032534891406897e-05,
      "loss": 0.4351,
      "step": 3851
    },
    {
      "epoch": 1.156062424969988,
      "grad_norm": 0.16964370012283325,
      "learning_rate": 9.027321860611218e-05,
      "loss": 0.4478,
      "step": 3852
    },
    {
      "epoch": 1.1563625450180073,
      "grad_norm": 0.14644289016723633,
      "learning_rate": 9.02210909665863e-05,
      "loss": 0.4449,
      "step": 3853
    },
    {
      "epoch": 1.1566626650660263,
      "grad_norm": 0.1654396653175354,
      "learning_rate": 9.016896600979205e-05,
      "loss": 0.4328,
      "step": 3854
    },
    {
      "epoch": 1.1569627851140456,
      "grad_norm": 0.17683622241020203,
      "learning_rate": 9.01168437500292e-05,
      "loss": 0.4209,
      "step": 3855
    },
    {
      "epoch": 1.1572629051620649,
      "grad_norm": 0.14778678119182587,
      "learning_rate": 9.006472420159692e-05,
      "loss": 0.4671,
      "step": 3856
    },
    {
      "epoch": 1.157563025210084,
      "grad_norm": 0.13837729394435883,
      "learning_rate": 9.001260737879367e-05,
      "loss": 0.4191,
      "step": 3857
    },
    {
      "epoch": 1.1578631452581032,
      "grad_norm": 0.18409544229507446,
      "learning_rate": 8.996049329591705e-05,
      "loss": 0.4062,
      "step": 3858
    },
    {
      "epoch": 1.1581632653061225,
      "grad_norm": 0.15561728179454803,
      "learning_rate": 8.990838196726396e-05,
      "loss": 0.426,
      "step": 3859
    },
    {
      "epoch": 1.1584633853541417,
      "grad_norm": 0.15675321221351624,
      "learning_rate": 8.985627340713061e-05,
      "loss": 0.3808,
      "step": 3860
    },
    {
      "epoch": 1.1587635054021608,
      "grad_norm": 0.14641733467578888,
      "learning_rate": 8.980416762981226e-05,
      "loss": 0.4591,
      "step": 3861
    },
    {
      "epoch": 1.15906362545018,
      "grad_norm": 0.14044514298439026,
      "learning_rate": 8.975206464960368e-05,
      "loss": 0.4438,
      "step": 3862
    },
    {
      "epoch": 1.1593637454981993,
      "grad_norm": 0.1593010425567627,
      "learning_rate": 8.969996448079864e-05,
      "loss": 0.4327,
      "step": 3863
    },
    {
      "epoch": 1.1596638655462184,
      "grad_norm": 0.1507299542427063,
      "learning_rate": 8.96478671376902e-05,
      "loss": 0.4078,
      "step": 3864
    },
    {
      "epoch": 1.1599639855942376,
      "grad_norm": 0.15690159797668457,
      "learning_rate": 8.959577263457074e-05,
      "loss": 0.4326,
      "step": 3865
    },
    {
      "epoch": 1.160264105642257,
      "grad_norm": 0.14754226803779602,
      "learning_rate": 8.954368098573179e-05,
      "loss": 0.4303,
      "step": 3866
    },
    {
      "epoch": 1.1605642256902762,
      "grad_norm": 0.13434180617332458,
      "learning_rate": 8.949159220546398e-05,
      "loss": 0.3831,
      "step": 3867
    },
    {
      "epoch": 1.1608643457382952,
      "grad_norm": 0.13902609050273895,
      "learning_rate": 8.943950630805742e-05,
      "loss": 0.4089,
      "step": 3868
    },
    {
      "epoch": 1.1611644657863145,
      "grad_norm": 0.14676375687122345,
      "learning_rate": 8.938742330780118e-05,
      "loss": 0.4371,
      "step": 3869
    },
    {
      "epoch": 1.1614645858343338,
      "grad_norm": 0.15333369374275208,
      "learning_rate": 8.933534321898367e-05,
      "loss": 0.4572,
      "step": 3870
    },
    {
      "epoch": 1.161764705882353,
      "grad_norm": 0.13503114879131317,
      "learning_rate": 8.928326605589246e-05,
      "loss": 0.3563,
      "step": 3871
    },
    {
      "epoch": 1.162064825930372,
      "grad_norm": 0.2535243630409241,
      "learning_rate": 8.923119183281432e-05,
      "loss": 0.3526,
      "step": 3872
    },
    {
      "epoch": 1.1623649459783914,
      "grad_norm": 0.14466118812561035,
      "learning_rate": 8.917912056403522e-05,
      "loss": 0.442,
      "step": 3873
    },
    {
      "epoch": 1.1626650660264106,
      "grad_norm": 0.1449826955795288,
      "learning_rate": 8.912705226384035e-05,
      "loss": 0.4138,
      "step": 3874
    },
    {
      "epoch": 1.16296518607443,
      "grad_norm": 0.13851653039455414,
      "learning_rate": 8.907498694651397e-05,
      "loss": 0.3779,
      "step": 3875
    },
    {
      "epoch": 1.163265306122449,
      "grad_norm": 0.14941152930259705,
      "learning_rate": 8.902292462633968e-05,
      "loss": 0.4427,
      "step": 3876
    },
    {
      "epoch": 1.1635654261704682,
      "grad_norm": 0.15614332258701324,
      "learning_rate": 8.897086531760014e-05,
      "loss": 0.4358,
      "step": 3877
    },
    {
      "epoch": 1.1638655462184875,
      "grad_norm": 0.14003868401050568,
      "learning_rate": 8.891880903457721e-05,
      "loss": 0.3631,
      "step": 3878
    },
    {
      "epoch": 1.1641656662665065,
      "grad_norm": 0.14047202467918396,
      "learning_rate": 8.886675579155201e-05,
      "loss": 0.3985,
      "step": 3879
    },
    {
      "epoch": 1.1644657863145258,
      "grad_norm": 0.1737263798713684,
      "learning_rate": 8.881470560280465e-05,
      "loss": 0.4331,
      "step": 3880
    },
    {
      "epoch": 1.164765906362545,
      "grad_norm": 0.15184059739112854,
      "learning_rate": 8.876265848261456e-05,
      "loss": 0.4084,
      "step": 3881
    },
    {
      "epoch": 1.1650660264105643,
      "grad_norm": 0.13204747438430786,
      "learning_rate": 8.871061444526027e-05,
      "loss": 0.385,
      "step": 3882
    },
    {
      "epoch": 1.1653661464585834,
      "grad_norm": 0.15236324071884155,
      "learning_rate": 8.865857350501944e-05,
      "loss": 0.4529,
      "step": 3883
    },
    {
      "epoch": 1.1656662665066027,
      "grad_norm": 0.14037908613681793,
      "learning_rate": 8.860653567616893e-05,
      "loss": 0.4244,
      "step": 3884
    },
    {
      "epoch": 1.165966386554622,
      "grad_norm": 0.14538414776325226,
      "learning_rate": 8.855450097298474e-05,
      "loss": 0.4007,
      "step": 3885
    },
    {
      "epoch": 1.166266506602641,
      "grad_norm": 0.1330530345439911,
      "learning_rate": 8.850246940974191e-05,
      "loss": 0.381,
      "step": 3886
    },
    {
      "epoch": 1.1665666266506602,
      "grad_norm": 0.1374024599790573,
      "learning_rate": 8.845044100071482e-05,
      "loss": 0.4161,
      "step": 3887
    },
    {
      "epoch": 1.1668667466986795,
      "grad_norm": 0.14352889358997345,
      "learning_rate": 8.839841576017679e-05,
      "loss": 0.4249,
      "step": 3888
    },
    {
      "epoch": 1.1671668667466988,
      "grad_norm": 0.14906354248523712,
      "learning_rate": 8.834639370240035e-05,
      "loss": 0.4431,
      "step": 3889
    },
    {
      "epoch": 1.1674669867947178,
      "grad_norm": 0.1526843011379242,
      "learning_rate": 8.829437484165718e-05,
      "loss": 0.4105,
      "step": 3890
    },
    {
      "epoch": 1.167767106842737,
      "grad_norm": 0.1684161275625229,
      "learning_rate": 8.824235919221803e-05,
      "loss": 0.3991,
      "step": 3891
    },
    {
      "epoch": 1.1680672268907564,
      "grad_norm": 0.13714158535003662,
      "learning_rate": 8.819034676835282e-05,
      "loss": 0.4156,
      "step": 3892
    },
    {
      "epoch": 1.1683673469387754,
      "grad_norm": 0.15890493988990784,
      "learning_rate": 8.813833758433061e-05,
      "loss": 0.4439,
      "step": 3893
    },
    {
      "epoch": 1.1686674669867947,
      "grad_norm": 0.213880255818367,
      "learning_rate": 8.808633165441942e-05,
      "loss": 0.4688,
      "step": 3894
    },
    {
      "epoch": 1.168967587034814,
      "grad_norm": 0.14558623731136322,
      "learning_rate": 8.803432899288654e-05,
      "loss": 0.4253,
      "step": 3895
    },
    {
      "epoch": 1.1692677070828332,
      "grad_norm": 0.1360839456319809,
      "learning_rate": 8.79823296139983e-05,
      "loss": 0.387,
      "step": 3896
    },
    {
      "epoch": 1.1695678271308523,
      "grad_norm": 0.14573483169078827,
      "learning_rate": 8.793033353202011e-05,
      "loss": 0.3862,
      "step": 3897
    },
    {
      "epoch": 1.1698679471788715,
      "grad_norm": 0.1445828378200531,
      "learning_rate": 8.787834076121655e-05,
      "loss": 0.4294,
      "step": 3898
    },
    {
      "epoch": 1.1701680672268908,
      "grad_norm": 0.14929187297821045,
      "learning_rate": 8.782635131585122e-05,
      "loss": 0.4345,
      "step": 3899
    },
    {
      "epoch": 1.1704681872749099,
      "grad_norm": 0.13889843225479126,
      "learning_rate": 8.777436521018676e-05,
      "loss": 0.3895,
      "step": 3900
    },
    {
      "epoch": 1.1707683073229291,
      "grad_norm": 0.14753101766109467,
      "learning_rate": 8.772238245848506e-05,
      "loss": 0.451,
      "step": 3901
    },
    {
      "epoch": 1.1710684273709484,
      "grad_norm": 0.15742534399032593,
      "learning_rate": 8.767040307500692e-05,
      "loss": 0.4267,
      "step": 3902
    },
    {
      "epoch": 1.1713685474189677,
      "grad_norm": 0.13733577728271484,
      "learning_rate": 8.761842707401233e-05,
      "loss": 0.3828,
      "step": 3903
    },
    {
      "epoch": 1.1716686674669867,
      "grad_norm": 0.18186461925506592,
      "learning_rate": 8.756645446976034e-05,
      "loss": 0.4499,
      "step": 3904
    },
    {
      "epoch": 1.171968787515006,
      "grad_norm": 0.12226764857769012,
      "learning_rate": 8.751448527650892e-05,
      "loss": 0.3218,
      "step": 3905
    },
    {
      "epoch": 1.1722689075630253,
      "grad_norm": 0.13384030759334564,
      "learning_rate": 8.746251950851536e-05,
      "loss": 0.3828,
      "step": 3906
    },
    {
      "epoch": 1.1725690276110443,
      "grad_norm": 0.13068121671676636,
      "learning_rate": 8.741055718003578e-05,
      "loss": 0.3803,
      "step": 3907
    },
    {
      "epoch": 1.1728691476590636,
      "grad_norm": 0.1547713577747345,
      "learning_rate": 8.735859830532549e-05,
      "loss": 0.4431,
      "step": 3908
    },
    {
      "epoch": 1.1731692677070829,
      "grad_norm": 0.151955708861351,
      "learning_rate": 8.73066428986388e-05,
      "loss": 0.3832,
      "step": 3909
    },
    {
      "epoch": 1.1734693877551021,
      "grad_norm": 0.18591925501823425,
      "learning_rate": 8.725469097422912e-05,
      "loss": 0.4592,
      "step": 3910
    },
    {
      "epoch": 1.1737695078031212,
      "grad_norm": 0.13940228521823883,
      "learning_rate": 8.720274254634873e-05,
      "loss": 0.4156,
      "step": 3911
    },
    {
      "epoch": 1.1740696278511404,
      "grad_norm": 0.15555444359779358,
      "learning_rate": 8.715079762924927e-05,
      "loss": 0.4367,
      "step": 3912
    },
    {
      "epoch": 1.1743697478991597,
      "grad_norm": 0.14583249390125275,
      "learning_rate": 8.709885623718109e-05,
      "loss": 0.4288,
      "step": 3913
    },
    {
      "epoch": 1.1746698679471788,
      "grad_norm": 0.14817452430725098,
      "learning_rate": 8.704691838439381e-05,
      "loss": 0.358,
      "step": 3914
    },
    {
      "epoch": 1.174969987995198,
      "grad_norm": 0.14373117685317993,
      "learning_rate": 8.699498408513592e-05,
      "loss": 0.4168,
      "step": 3915
    },
    {
      "epoch": 1.1752701080432173,
      "grad_norm": 0.1352091282606125,
      "learning_rate": 8.694305335365501e-05,
      "loss": 0.3532,
      "step": 3916
    },
    {
      "epoch": 1.1755702280912366,
      "grad_norm": 0.13123095035552979,
      "learning_rate": 8.689112620419772e-05,
      "loss": 0.3772,
      "step": 3917
    },
    {
      "epoch": 1.1758703481392556,
      "grad_norm": 0.14840468764305115,
      "learning_rate": 8.683920265100966e-05,
      "loss": 0.3659,
      "step": 3918
    },
    {
      "epoch": 1.1761704681872749,
      "grad_norm": 0.1469290405511856,
      "learning_rate": 8.678728270833539e-05,
      "loss": 0.3946,
      "step": 3919
    },
    {
      "epoch": 1.1764705882352942,
      "grad_norm": 0.1446821242570877,
      "learning_rate": 8.673536639041864e-05,
      "loss": 0.4272,
      "step": 3920
    },
    {
      "epoch": 1.1767707082833132,
      "grad_norm": 0.15715628862380981,
      "learning_rate": 8.6683453711502e-05,
      "loss": 0.444,
      "step": 3921
    },
    {
      "epoch": 1.1770708283313325,
      "grad_norm": 0.14631730318069458,
      "learning_rate": 8.663154468582715e-05,
      "loss": 0.4167,
      "step": 3922
    },
    {
      "epoch": 1.1773709483793517,
      "grad_norm": 0.1498534232378006,
      "learning_rate": 8.657963932763475e-05,
      "loss": 0.4399,
      "step": 3923
    },
    {
      "epoch": 1.177671068427371,
      "grad_norm": 0.1498582363128662,
      "learning_rate": 8.652773765116435e-05,
      "loss": 0.4093,
      "step": 3924
    },
    {
      "epoch": 1.17797118847539,
      "grad_norm": 0.13406604528427124,
      "learning_rate": 8.647583967065472e-05,
      "loss": 0.3837,
      "step": 3925
    },
    {
      "epoch": 1.1782713085234093,
      "grad_norm": 0.1385580599308014,
      "learning_rate": 8.642394540034336e-05,
      "loss": 0.4273,
      "step": 3926
    },
    {
      "epoch": 1.1785714285714286,
      "grad_norm": 0.17958009243011475,
      "learning_rate": 8.637205485446691e-05,
      "loss": 0.4117,
      "step": 3927
    },
    {
      "epoch": 1.1788715486194479,
      "grad_norm": 0.1370212733745575,
      "learning_rate": 8.632016804726095e-05,
      "loss": 0.4077,
      "step": 3928
    },
    {
      "epoch": 1.179171668667467,
      "grad_norm": 0.13599421083927155,
      "learning_rate": 8.626828499296005e-05,
      "loss": 0.3587,
      "step": 3929
    },
    {
      "epoch": 1.1794717887154862,
      "grad_norm": 0.1433763951063156,
      "learning_rate": 8.621640570579764e-05,
      "loss": 0.4223,
      "step": 3930
    },
    {
      "epoch": 1.1797719087635055,
      "grad_norm": 0.12710444629192352,
      "learning_rate": 8.616453020000635e-05,
      "loss": 0.3452,
      "step": 3931
    },
    {
      "epoch": 1.1800720288115247,
      "grad_norm": 0.13192912936210632,
      "learning_rate": 8.611265848981749e-05,
      "loss": 0.347,
      "step": 3932
    },
    {
      "epoch": 1.1803721488595438,
      "grad_norm": 0.15339438617229462,
      "learning_rate": 8.606079058946157e-05,
      "loss": 0.4744,
      "step": 3933
    },
    {
      "epoch": 1.180672268907563,
      "grad_norm": 0.13660569489002228,
      "learning_rate": 8.600892651316791e-05,
      "loss": 0.3795,
      "step": 3934
    },
    {
      "epoch": 1.1809723889555823,
      "grad_norm": 0.14108389616012573,
      "learning_rate": 8.595706627516482e-05,
      "loss": 0.4166,
      "step": 3935
    },
    {
      "epoch": 1.1812725090036014,
      "grad_norm": 0.15114083886146545,
      "learning_rate": 8.590520988967958e-05,
      "loss": 0.4707,
      "step": 3936
    },
    {
      "epoch": 1.1815726290516206,
      "grad_norm": 0.1492743194103241,
      "learning_rate": 8.585335737093842e-05,
      "loss": 0.4332,
      "step": 3937
    },
    {
      "epoch": 1.18187274909964,
      "grad_norm": 0.16283780336380005,
      "learning_rate": 8.580150873316639e-05,
      "loss": 0.3972,
      "step": 3938
    },
    {
      "epoch": 1.1821728691476592,
      "grad_norm": 0.1415175348520279,
      "learning_rate": 8.574966399058767e-05,
      "loss": 0.3945,
      "step": 3939
    },
    {
      "epoch": 1.1824729891956782,
      "grad_norm": 0.1329096257686615,
      "learning_rate": 8.56978231574252e-05,
      "loss": 0.3587,
      "step": 3940
    },
    {
      "epoch": 1.1827731092436975,
      "grad_norm": 0.15043918788433075,
      "learning_rate": 8.564598624790098e-05,
      "loss": 0.4029,
      "step": 3941
    },
    {
      "epoch": 1.1830732292917168,
      "grad_norm": 0.1478135883808136,
      "learning_rate": 8.559415327623584e-05,
      "loss": 0.424,
      "step": 3942
    },
    {
      "epoch": 1.1833733493397358,
      "grad_norm": 0.14266376197338104,
      "learning_rate": 8.554232425664954e-05,
      "loss": 0.4114,
      "step": 3943
    },
    {
      "epoch": 1.183673469387755,
      "grad_norm": 0.1355818659067154,
      "learning_rate": 8.549049920336086e-05,
      "loss": 0.3965,
      "step": 3944
    },
    {
      "epoch": 1.1839735894357744,
      "grad_norm": 0.1327577829360962,
      "learning_rate": 8.54386781305873e-05,
      "loss": 0.3729,
      "step": 3945
    },
    {
      "epoch": 1.1842737094837936,
      "grad_norm": 0.15911321341991425,
      "learning_rate": 8.538686105254541e-05,
      "loss": 0.3915,
      "step": 3946
    },
    {
      "epoch": 1.1845738295318127,
      "grad_norm": 0.14487755298614502,
      "learning_rate": 8.533504798345065e-05,
      "loss": 0.408,
      "step": 3947
    },
    {
      "epoch": 1.184873949579832,
      "grad_norm": 0.14488984644412994,
      "learning_rate": 8.528323893751736e-05,
      "loss": 0.4323,
      "step": 3948
    },
    {
      "epoch": 1.1851740696278512,
      "grad_norm": 0.14207231998443604,
      "learning_rate": 8.523143392895863e-05,
      "loss": 0.3827,
      "step": 3949
    },
    {
      "epoch": 1.1854741896758703,
      "grad_norm": 0.17004531621932983,
      "learning_rate": 8.517963297198672e-05,
      "loss": 0.3853,
      "step": 3950
    },
    {
      "epoch": 1.1857743097238895,
      "grad_norm": 0.1584494709968567,
      "learning_rate": 8.512783608081252e-05,
      "loss": 0.4096,
      "step": 3951
    },
    {
      "epoch": 1.1860744297719088,
      "grad_norm": 0.12412303686141968,
      "learning_rate": 8.507604326964601e-05,
      "loss": 0.3269,
      "step": 3952
    },
    {
      "epoch": 1.186374549819928,
      "grad_norm": 0.1437680572271347,
      "learning_rate": 8.502425455269588e-05,
      "loss": 0.3892,
      "step": 3953
    },
    {
      "epoch": 1.1866746698679471,
      "grad_norm": 0.1382899433374405,
      "learning_rate": 8.497246994416977e-05,
      "loss": 0.4005,
      "step": 3954
    },
    {
      "epoch": 1.1869747899159664,
      "grad_norm": 0.1461184024810791,
      "learning_rate": 8.492068945827425e-05,
      "loss": 0.4206,
      "step": 3955
    },
    {
      "epoch": 1.1872749099639857,
      "grad_norm": 0.14180120825767517,
      "learning_rate": 8.486891310921468e-05,
      "loss": 0.4101,
      "step": 3956
    },
    {
      "epoch": 1.1875750300120047,
      "grad_norm": 0.1335573047399521,
      "learning_rate": 8.481714091119525e-05,
      "loss": 0.3683,
      "step": 3957
    },
    {
      "epoch": 1.187875150060024,
      "grad_norm": 0.1252361536026001,
      "learning_rate": 8.476537287841915e-05,
      "loss": 0.3476,
      "step": 3958
    },
    {
      "epoch": 1.1881752701080432,
      "grad_norm": 0.13983231782913208,
      "learning_rate": 8.47136090250883e-05,
      "loss": 0.4322,
      "step": 3959
    },
    {
      "epoch": 1.1884753901560625,
      "grad_norm": 0.14104896783828735,
      "learning_rate": 8.466184936540351e-05,
      "loss": 0.3907,
      "step": 3960
    },
    {
      "epoch": 1.1887755102040816,
      "grad_norm": 0.14977477490901947,
      "learning_rate": 8.46100939135645e-05,
      "loss": 0.4129,
      "step": 3961
    },
    {
      "epoch": 1.1890756302521008,
      "grad_norm": 0.1299585998058319,
      "learning_rate": 8.455834268376972e-05,
      "loss": 0.3244,
      "step": 3962
    },
    {
      "epoch": 1.18937575030012,
      "grad_norm": 0.13615921139717102,
      "learning_rate": 8.450659569021662e-05,
      "loss": 0.3732,
      "step": 3963
    },
    {
      "epoch": 1.1896758703481392,
      "grad_norm": 0.1456853151321411,
      "learning_rate": 8.445485294710131e-05,
      "loss": 0.4079,
      "step": 3964
    },
    {
      "epoch": 1.1899759903961584,
      "grad_norm": 0.15386579930782318,
      "learning_rate": 8.440311446861881e-05,
      "loss": 0.4494,
      "step": 3965
    },
    {
      "epoch": 1.1902761104441777,
      "grad_norm": 0.15661481022834778,
      "learning_rate": 8.435138026896305e-05,
      "loss": 0.4654,
      "step": 3966
    },
    {
      "epoch": 1.190576230492197,
      "grad_norm": 0.14809736609458923,
      "learning_rate": 8.429965036232668e-05,
      "loss": 0.3916,
      "step": 3967
    },
    {
      "epoch": 1.190876350540216,
      "grad_norm": 0.1559191644191742,
      "learning_rate": 8.424792476290117e-05,
      "loss": 0.3504,
      "step": 3968
    },
    {
      "epoch": 1.1911764705882353,
      "grad_norm": 0.15862765908241272,
      "learning_rate": 8.419620348487692e-05,
      "loss": 0.4435,
      "step": 3969
    },
    {
      "epoch": 1.1914765906362546,
      "grad_norm": 0.14491339027881622,
      "learning_rate": 8.414448654244297e-05,
      "loss": 0.3922,
      "step": 3970
    },
    {
      "epoch": 1.1917767106842736,
      "grad_norm": 0.14805670082569122,
      "learning_rate": 8.409277394978739e-05,
      "loss": 0.3863,
      "step": 3971
    },
    {
      "epoch": 1.1920768307322929,
      "grad_norm": 0.16750305891036987,
      "learning_rate": 8.404106572109686e-05,
      "loss": 0.3847,
      "step": 3972
    },
    {
      "epoch": 1.1923769507803121,
      "grad_norm": 0.14482684433460236,
      "learning_rate": 8.398936187055693e-05,
      "loss": 0.376,
      "step": 3973
    },
    {
      "epoch": 1.1926770708283314,
      "grad_norm": 0.15524138510227203,
      "learning_rate": 8.3937662412352e-05,
      "loss": 0.4175,
      "step": 3974
    },
    {
      "epoch": 1.1929771908763505,
      "grad_norm": 0.16882078349590302,
      "learning_rate": 8.388596736066523e-05,
      "loss": 0.4294,
      "step": 3975
    },
    {
      "epoch": 1.1932773109243697,
      "grad_norm": 0.1448148787021637,
      "learning_rate": 8.38342767296785e-05,
      "loss": 0.4201,
      "step": 3976
    },
    {
      "epoch": 1.193577430972389,
      "grad_norm": 0.1521812230348587,
      "learning_rate": 8.378259053357261e-05,
      "loss": 0.4441,
      "step": 3977
    },
    {
      "epoch": 1.193877551020408,
      "grad_norm": 0.13205227255821228,
      "learning_rate": 8.373090878652706e-05,
      "loss": 0.3791,
      "step": 3978
    },
    {
      "epoch": 1.1941776710684273,
      "grad_norm": 0.13769741356372833,
      "learning_rate": 8.367923150272008e-05,
      "loss": 0.3589,
      "step": 3979
    },
    {
      "epoch": 1.1944777911164466,
      "grad_norm": 0.1407497078180313,
      "learning_rate": 8.362755869632883e-05,
      "loss": 0.3977,
      "step": 3980
    },
    {
      "epoch": 1.1947779111644659,
      "grad_norm": 0.2244221419095993,
      "learning_rate": 8.35758903815291e-05,
      "loss": 0.3696,
      "step": 3981
    },
    {
      "epoch": 1.195078031212485,
      "grad_norm": 0.1452600508928299,
      "learning_rate": 8.352422657249556e-05,
      "loss": 0.399,
      "step": 3982
    },
    {
      "epoch": 1.1953781512605042,
      "grad_norm": 0.1496151238679886,
      "learning_rate": 8.347256728340152e-05,
      "loss": 0.4202,
      "step": 3983
    },
    {
      "epoch": 1.1956782713085234,
      "grad_norm": 0.16077955067157745,
      "learning_rate": 8.342091252841909e-05,
      "loss": 0.451,
      "step": 3984
    },
    {
      "epoch": 1.1959783913565427,
      "grad_norm": 0.1421745866537094,
      "learning_rate": 8.336926232171925e-05,
      "loss": 0.3981,
      "step": 3985
    },
    {
      "epoch": 1.1962785114045618,
      "grad_norm": 0.13930204510688782,
      "learning_rate": 8.331761667747158e-05,
      "loss": 0.3909,
      "step": 3986
    },
    {
      "epoch": 1.196578631452581,
      "grad_norm": 0.13473330438137054,
      "learning_rate": 8.326597560984445e-05,
      "loss": 0.3925,
      "step": 3987
    },
    {
      "epoch": 1.1968787515006003,
      "grad_norm": 0.1469656080007553,
      "learning_rate": 8.321433913300509e-05,
      "loss": 0.4158,
      "step": 3988
    },
    {
      "epoch": 1.1971788715486196,
      "grad_norm": 0.14228259027004242,
      "learning_rate": 8.31627072611193e-05,
      "loss": 0.3434,
      "step": 3989
    },
    {
      "epoch": 1.1974789915966386,
      "grad_norm": 0.15231838822364807,
      "learning_rate": 8.311108000835167e-05,
      "loss": 0.4376,
      "step": 3990
    },
    {
      "epoch": 1.197779111644658,
      "grad_norm": 0.15814101696014404,
      "learning_rate": 8.30594573888656e-05,
      "loss": 0.4136,
      "step": 3991
    },
    {
      "epoch": 1.1980792316926772,
      "grad_norm": 0.1386740654706955,
      "learning_rate": 8.300783941682315e-05,
      "loss": 0.3935,
      "step": 3992
    },
    {
      "epoch": 1.1983793517406962,
      "grad_norm": 0.15326248109340668,
      "learning_rate": 8.29562261063851e-05,
      "loss": 0.4072,
      "step": 3993
    },
    {
      "epoch": 1.1986794717887155,
      "grad_norm": 0.12558022141456604,
      "learning_rate": 8.290461747171103e-05,
      "loss": 0.3537,
      "step": 3994
    },
    {
      "epoch": 1.1989795918367347,
      "grad_norm": 0.1449139565229416,
      "learning_rate": 8.285301352695905e-05,
      "loss": 0.3984,
      "step": 3995
    },
    {
      "epoch": 1.199279711884754,
      "grad_norm": 0.2730596661567688,
      "learning_rate": 8.280141428628628e-05,
      "loss": 0.4265,
      "step": 3996
    },
    {
      "epoch": 1.199579831932773,
      "grad_norm": 0.14818298816680908,
      "learning_rate": 8.274981976384825e-05,
      "loss": 0.3876,
      "step": 3997
    },
    {
      "epoch": 1.1998799519807923,
      "grad_norm": 0.1385853886604309,
      "learning_rate": 8.269822997379935e-05,
      "loss": 0.4075,
      "step": 3998
    },
    {
      "epoch": 1.2001800720288116,
      "grad_norm": 0.14927978813648224,
      "learning_rate": 8.264664493029268e-05,
      "loss": 0.3505,
      "step": 3999
    },
    {
      "epoch": 1.2004801920768307,
      "grad_norm": 0.14248241484165192,
      "learning_rate": 8.259506464747999e-05,
      "loss": 0.3868,
      "step": 4000
    },
    {
      "epoch": 1.20078031212485,
      "grad_norm": 0.16264191269874573,
      "learning_rate": 8.254348913951176e-05,
      "loss": 0.441,
      "step": 4001
    },
    {
      "epoch": 1.2010804321728692,
      "grad_norm": 0.14608046412467957,
      "learning_rate": 8.24919184205371e-05,
      "loss": 0.4003,
      "step": 4002
    },
    {
      "epoch": 1.2013805522208885,
      "grad_norm": 0.1493375152349472,
      "learning_rate": 8.244035250470384e-05,
      "loss": 0.4105,
      "step": 4003
    },
    {
      "epoch": 1.2016806722689075,
      "grad_norm": 0.1431288868188858,
      "learning_rate": 8.238879140615855e-05,
      "loss": 0.406,
      "step": 4004
    },
    {
      "epoch": 1.2019807923169268,
      "grad_norm": 0.15224696695804596,
      "learning_rate": 8.23372351390464e-05,
      "loss": 0.4334,
      "step": 4005
    },
    {
      "epoch": 1.202280912364946,
      "grad_norm": 0.13419242203235626,
      "learning_rate": 8.228568371751123e-05,
      "loss": 0.3413,
      "step": 4006
    },
    {
      "epoch": 1.202581032412965,
      "grad_norm": 0.14649564027786255,
      "learning_rate": 8.223413715569565e-05,
      "loss": 0.4004,
      "step": 4007
    },
    {
      "epoch": 1.2028811524609844,
      "grad_norm": 0.15351301431655884,
      "learning_rate": 8.218259546774081e-05,
      "loss": 0.4088,
      "step": 4008
    },
    {
      "epoch": 1.2031812725090036,
      "grad_norm": 0.13952364027500153,
      "learning_rate": 8.213105866778659e-05,
      "loss": 0.3773,
      "step": 4009
    },
    {
      "epoch": 1.203481392557023,
      "grad_norm": 0.13849298655986786,
      "learning_rate": 8.207952676997153e-05,
      "loss": 0.398,
      "step": 4010
    },
    {
      "epoch": 1.203781512605042,
      "grad_norm": 0.14556635916233063,
      "learning_rate": 8.20279997884328e-05,
      "loss": 0.423,
      "step": 4011
    },
    {
      "epoch": 1.2040816326530612,
      "grad_norm": 0.14165206253528595,
      "learning_rate": 8.197647773730627e-05,
      "loss": 0.3917,
      "step": 4012
    },
    {
      "epoch": 1.2043817527010805,
      "grad_norm": 0.15320447087287903,
      "learning_rate": 8.192496063072644e-05,
      "loss": 0.398,
      "step": 4013
    },
    {
      "epoch": 1.2046818727490995,
      "grad_norm": 0.1356806606054306,
      "learning_rate": 8.187344848282631e-05,
      "loss": 0.3826,
      "step": 4014
    },
    {
      "epoch": 1.2049819927971188,
      "grad_norm": 0.13317741453647614,
      "learning_rate": 8.182194130773783e-05,
      "loss": 0.3748,
      "step": 4015
    },
    {
      "epoch": 1.205282112845138,
      "grad_norm": 0.13315704464912415,
      "learning_rate": 8.177043911959127e-05,
      "loss": 0.3736,
      "step": 4016
    },
    {
      "epoch": 1.2055822328931574,
      "grad_norm": 0.1495973914861679,
      "learning_rate": 8.17189419325157e-05,
      "loss": 0.4167,
      "step": 4017
    },
    {
      "epoch": 1.2058823529411764,
      "grad_norm": 0.15506240725517273,
      "learning_rate": 8.166744976063881e-05,
      "loss": 0.4152,
      "step": 4018
    },
    {
      "epoch": 1.2061824729891957,
      "grad_norm": 0.1537008285522461,
      "learning_rate": 8.161596261808687e-05,
      "loss": 0.4253,
      "step": 4019
    },
    {
      "epoch": 1.206482593037215,
      "grad_norm": 0.13522090017795563,
      "learning_rate": 8.156448051898476e-05,
      "loss": 0.3744,
      "step": 4020
    },
    {
      "epoch": 1.206782713085234,
      "grad_norm": 0.16604888439178467,
      "learning_rate": 8.151300347745604e-05,
      "loss": 0.4121,
      "step": 4021
    },
    {
      "epoch": 1.2070828331332533,
      "grad_norm": 0.13932572305202484,
      "learning_rate": 8.146153150762281e-05,
      "loss": 0.369,
      "step": 4022
    },
    {
      "epoch": 1.2073829531812725,
      "grad_norm": 0.18557420372962952,
      "learning_rate": 8.141006462360587e-05,
      "loss": 0.3774,
      "step": 4023
    },
    {
      "epoch": 1.2076830732292918,
      "grad_norm": 0.16589613258838654,
      "learning_rate": 8.135860283952453e-05,
      "loss": 0.3496,
      "step": 4024
    },
    {
      "epoch": 1.2079831932773109,
      "grad_norm": 0.1459798514842987,
      "learning_rate": 8.130714616949673e-05,
      "loss": 0.412,
      "step": 4025
    },
    {
      "epoch": 1.2082833133253301,
      "grad_norm": 0.14163541793823242,
      "learning_rate": 8.125569462763907e-05,
      "loss": 0.4032,
      "step": 4026
    },
    {
      "epoch": 1.2085834333733494,
      "grad_norm": 0.1819797307252884,
      "learning_rate": 8.120424822806665e-05,
      "loss": 0.4417,
      "step": 4027
    },
    {
      "epoch": 1.2088835534213684,
      "grad_norm": 0.19398806989192963,
      "learning_rate": 8.115280698489317e-05,
      "loss": 0.3794,
      "step": 4028
    },
    {
      "epoch": 1.2091836734693877,
      "grad_norm": 0.1349167376756668,
      "learning_rate": 8.1101370912231e-05,
      "loss": 0.3868,
      "step": 4029
    },
    {
      "epoch": 1.209483793517407,
      "grad_norm": 0.1430043876171112,
      "learning_rate": 8.1049940024191e-05,
      "loss": 0.3705,
      "step": 4030
    },
    {
      "epoch": 1.2097839135654262,
      "grad_norm": 0.14163738489151,
      "learning_rate": 8.09985143348827e-05,
      "loss": 0.3974,
      "step": 4031
    },
    {
      "epoch": 1.2100840336134453,
      "grad_norm": 0.13367514312267303,
      "learning_rate": 8.09470938584141e-05,
      "loss": 0.3634,
      "step": 4032
    },
    {
      "epoch": 1.2103841536614646,
      "grad_norm": 0.30375921726226807,
      "learning_rate": 8.08956786088918e-05,
      "loss": 0.4015,
      "step": 4033
    },
    {
      "epoch": 1.2106842737094838,
      "grad_norm": 0.13942891359329224,
      "learning_rate": 8.084426860042105e-05,
      "loss": 0.3996,
      "step": 4034
    },
    {
      "epoch": 1.2109843937575029,
      "grad_norm": 0.1532803177833557,
      "learning_rate": 8.079286384710554e-05,
      "loss": 0.4384,
      "step": 4035
    },
    {
      "epoch": 1.2112845138055222,
      "grad_norm": 0.15898968279361725,
      "learning_rate": 8.074146436304757e-05,
      "loss": 0.4394,
      "step": 4036
    },
    {
      "epoch": 1.2115846338535414,
      "grad_norm": 0.1539100557565689,
      "learning_rate": 8.069007016234806e-05,
      "loss": 0.3809,
      "step": 4037
    },
    {
      "epoch": 1.2118847539015607,
      "grad_norm": 0.14513103663921356,
      "learning_rate": 8.063868125910639e-05,
      "loss": 0.3703,
      "step": 4038
    },
    {
      "epoch": 1.2121848739495797,
      "grad_norm": 0.1389445811510086,
      "learning_rate": 8.058729766742045e-05,
      "loss": 0.4107,
      "step": 4039
    },
    {
      "epoch": 1.212484993997599,
      "grad_norm": 0.24129579961299896,
      "learning_rate": 8.053591940138686e-05,
      "loss": 0.3628,
      "step": 4040
    },
    {
      "epoch": 1.2127851140456183,
      "grad_norm": 0.13357827067375183,
      "learning_rate": 8.048454647510055e-05,
      "loss": 0.3598,
      "step": 4041
    },
    {
      "epoch": 1.2130852340936373,
      "grad_norm": 0.12578484416007996,
      "learning_rate": 8.043317890265516e-05,
      "loss": 0.3379,
      "step": 4042
    },
    {
      "epoch": 1.2133853541416566,
      "grad_norm": 0.2575438618659973,
      "learning_rate": 8.038181669814278e-05,
      "loss": 0.3604,
      "step": 4043
    },
    {
      "epoch": 1.2136854741896759,
      "grad_norm": 0.14429044723510742,
      "learning_rate": 8.033045987565401e-05,
      "loss": 0.3945,
      "step": 4044
    },
    {
      "epoch": 1.2139855942376951,
      "grad_norm": 0.2189117670059204,
      "learning_rate": 8.027910844927808e-05,
      "loss": 0.3891,
      "step": 4045
    },
    {
      "epoch": 1.2142857142857142,
      "grad_norm": 0.1560732126235962,
      "learning_rate": 8.022776243310258e-05,
      "loss": 0.4577,
      "step": 4046
    },
    {
      "epoch": 1.2145858343337335,
      "grad_norm": 0.15747374296188354,
      "learning_rate": 8.017642184121372e-05,
      "loss": 0.4084,
      "step": 4047
    },
    {
      "epoch": 1.2148859543817527,
      "grad_norm": 0.1571548581123352,
      "learning_rate": 8.012508668769624e-05,
      "loss": 0.4563,
      "step": 4048
    },
    {
      "epoch": 1.215186074429772,
      "grad_norm": 0.1482471078634262,
      "learning_rate": 8.007375698663335e-05,
      "loss": 0.4163,
      "step": 4049
    },
    {
      "epoch": 1.215486194477791,
      "grad_norm": 0.16825692355632782,
      "learning_rate": 8.002243275210669e-05,
      "loss": 0.4144,
      "step": 4050
    },
    {
      "epoch": 1.2157863145258103,
      "grad_norm": 0.1516883671283722,
      "learning_rate": 7.99711139981966e-05,
      "loss": 0.4205,
      "step": 4051
    },
    {
      "epoch": 1.2160864345738296,
      "grad_norm": 0.15317898988723755,
      "learning_rate": 7.991980073898164e-05,
      "loss": 0.4549,
      "step": 4052
    },
    {
      "epoch": 1.2163865546218489,
      "grad_norm": 0.1662512868642807,
      "learning_rate": 7.986849298853917e-05,
      "loss": 0.4926,
      "step": 4053
    },
    {
      "epoch": 1.216686674669868,
      "grad_norm": 0.1314711719751358,
      "learning_rate": 7.981719076094479e-05,
      "loss": 0.3536,
      "step": 4054
    },
    {
      "epoch": 1.2169867947178872,
      "grad_norm": 0.22032858431339264,
      "learning_rate": 7.976589407027266e-05,
      "loss": 0.418,
      "step": 4055
    },
    {
      "epoch": 1.2172869147659064,
      "grad_norm": 0.1417960524559021,
      "learning_rate": 7.971460293059551e-05,
      "loss": 0.396,
      "step": 4056
    },
    {
      "epoch": 1.2175870348139255,
      "grad_norm": 0.1375245749950409,
      "learning_rate": 7.966331735598445e-05,
      "loss": 0.363,
      "step": 4057
    },
    {
      "epoch": 1.2178871548619448,
      "grad_norm": 0.1464424729347229,
      "learning_rate": 7.961203736050904e-05,
      "loss": 0.3651,
      "step": 4058
    },
    {
      "epoch": 1.218187274909964,
      "grad_norm": 0.13893769681453705,
      "learning_rate": 7.956076295823744e-05,
      "loss": 0.3665,
      "step": 4059
    },
    {
      "epoch": 1.2184873949579833,
      "grad_norm": 0.13774627447128296,
      "learning_rate": 7.950949416323612e-05,
      "loss": 0.3287,
      "step": 4060
    },
    {
      "epoch": 1.2187875150060024,
      "grad_norm": 0.14033019542694092,
      "learning_rate": 7.945823098957015e-05,
      "loss": 0.4057,
      "step": 4061
    },
    {
      "epoch": 1.2190876350540216,
      "grad_norm": 0.14156651496887207,
      "learning_rate": 7.940697345130296e-05,
      "loss": 0.3939,
      "step": 4062
    },
    {
      "epoch": 1.219387755102041,
      "grad_norm": 0.14618803560733795,
      "learning_rate": 7.935572156249644e-05,
      "loss": 0.3754,
      "step": 4063
    },
    {
      "epoch": 1.21968787515006,
      "grad_norm": 0.15165431797504425,
      "learning_rate": 7.930447533721102e-05,
      "loss": 0.3706,
      "step": 4064
    },
    {
      "epoch": 1.2199879951980792,
      "grad_norm": 0.16142675280570984,
      "learning_rate": 7.925323478950551e-05,
      "loss": 0.4285,
      "step": 4065
    },
    {
      "epoch": 1.2202881152460985,
      "grad_norm": 0.14201250672340393,
      "learning_rate": 7.920199993343709e-05,
      "loss": 0.3671,
      "step": 4066
    },
    {
      "epoch": 1.2205882352941178,
      "grad_norm": 0.1458989679813385,
      "learning_rate": 7.915077078306154e-05,
      "loss": 0.4101,
      "step": 4067
    },
    {
      "epoch": 1.2208883553421368,
      "grad_norm": 0.17755059897899628,
      "learning_rate": 7.909954735243295e-05,
      "loss": 0.4466,
      "step": 4068
    },
    {
      "epoch": 1.221188475390156,
      "grad_norm": 0.15208066999912262,
      "learning_rate": 7.904832965560385e-05,
      "loss": 0.3984,
      "step": 4069
    },
    {
      "epoch": 1.2214885954381753,
      "grad_norm": 0.15624454617500305,
      "learning_rate": 7.899711770662532e-05,
      "loss": 0.4145,
      "step": 4070
    },
    {
      "epoch": 1.2217887154861944,
      "grad_norm": 0.1603109985589981,
      "learning_rate": 7.894591151954666e-05,
      "loss": 0.3609,
      "step": 4071
    },
    {
      "epoch": 1.2220888355342137,
      "grad_norm": 0.1522332727909088,
      "learning_rate": 7.889471110841581e-05,
      "loss": 0.3777,
      "step": 4072
    },
    {
      "epoch": 1.222388955582233,
      "grad_norm": 0.15223602950572968,
      "learning_rate": 7.884351648727895e-05,
      "loss": 0.4138,
      "step": 4073
    },
    {
      "epoch": 1.2226890756302522,
      "grad_norm": 0.13721860945224762,
      "learning_rate": 7.879232767018072e-05,
      "loss": 0.3867,
      "step": 4074
    },
    {
      "epoch": 1.2229891956782712,
      "grad_norm": 0.14906872808933258,
      "learning_rate": 7.874114467116422e-05,
      "loss": 0.3974,
      "step": 4075
    },
    {
      "epoch": 1.2232893157262905,
      "grad_norm": 0.14093011617660522,
      "learning_rate": 7.868996750427096e-05,
      "loss": 0.3888,
      "step": 4076
    },
    {
      "epoch": 1.2235894357743098,
      "grad_norm": 0.1495698243379593,
      "learning_rate": 7.863879618354069e-05,
      "loss": 0.4007,
      "step": 4077
    },
    {
      "epoch": 1.2238895558223288,
      "grad_norm": 0.1426883190870285,
      "learning_rate": 7.858763072301181e-05,
      "loss": 0.4609,
      "step": 4078
    },
    {
      "epoch": 1.224189675870348,
      "grad_norm": 0.12629884481430054,
      "learning_rate": 7.853647113672089e-05,
      "loss": 0.3626,
      "step": 4079
    },
    {
      "epoch": 1.2244897959183674,
      "grad_norm": 0.1275751143693924,
      "learning_rate": 7.848531743870297e-05,
      "loss": 0.356,
      "step": 4080
    },
    {
      "epoch": 1.2247899159663866,
      "grad_norm": 0.14233806729316711,
      "learning_rate": 7.843416964299155e-05,
      "loss": 0.4043,
      "step": 4081
    },
    {
      "epoch": 1.2250900360144057,
      "grad_norm": 0.14755657315254211,
      "learning_rate": 7.838302776361837e-05,
      "loss": 0.4217,
      "step": 4082
    },
    {
      "epoch": 1.225390156062425,
      "grad_norm": 0.13869772851467133,
      "learning_rate": 7.833189181461367e-05,
      "loss": 0.3906,
      "step": 4083
    },
    {
      "epoch": 1.2256902761104442,
      "grad_norm": 0.15427730977535248,
      "learning_rate": 7.828076181000603e-05,
      "loss": 0.3837,
      "step": 4084
    },
    {
      "epoch": 1.2259903961584633,
      "grad_norm": 0.14841613173484802,
      "learning_rate": 7.822963776382229e-05,
      "loss": 0.4219,
      "step": 4085
    },
    {
      "epoch": 1.2262905162064826,
      "grad_norm": 0.13470859825611115,
      "learning_rate": 7.817851969008782e-05,
      "loss": 0.3649,
      "step": 4086
    },
    {
      "epoch": 1.2265906362545018,
      "grad_norm": 0.13717618584632874,
      "learning_rate": 7.812740760282624e-05,
      "loss": 0.375,
      "step": 4087
    },
    {
      "epoch": 1.226890756302521,
      "grad_norm": 0.161216139793396,
      "learning_rate": 7.807630151605957e-05,
      "loss": 0.4067,
      "step": 4088
    },
    {
      "epoch": 1.2271908763505401,
      "grad_norm": 0.1632506400346756,
      "learning_rate": 7.802520144380823e-05,
      "loss": 0.383,
      "step": 4089
    },
    {
      "epoch": 1.2274909963985594,
      "grad_norm": 0.20266014337539673,
      "learning_rate": 7.797410740009084e-05,
      "loss": 0.3881,
      "step": 4090
    },
    {
      "epoch": 1.2277911164465787,
      "grad_norm": 0.14472752809524536,
      "learning_rate": 7.792301939892458e-05,
      "loss": 0.3934,
      "step": 4091
    },
    {
      "epoch": 1.2280912364945977,
      "grad_norm": 0.40670523047447205,
      "learning_rate": 7.787193745432478e-05,
      "loss": 0.3612,
      "step": 4092
    },
    {
      "epoch": 1.228391356542617,
      "grad_norm": 0.14954715967178345,
      "learning_rate": 7.78208615803052e-05,
      "loss": 0.4437,
      "step": 4093
    },
    {
      "epoch": 1.2286914765906363,
      "grad_norm": 0.16645628213882446,
      "learning_rate": 7.776979179087793e-05,
      "loss": 0.4545,
      "step": 4094
    },
    {
      "epoch": 1.2289915966386555,
      "grad_norm": 0.1424858123064041,
      "learning_rate": 7.771872810005341e-05,
      "loss": 0.389,
      "step": 4095
    },
    {
      "epoch": 1.2292917166866746,
      "grad_norm": 0.1527540236711502,
      "learning_rate": 7.766767052184027e-05,
      "loss": 0.3866,
      "step": 4096
    },
    {
      "epoch": 1.2295918367346939,
      "grad_norm": 0.14466696977615356,
      "learning_rate": 7.76166190702457e-05,
      "loss": 0.4092,
      "step": 4097
    },
    {
      "epoch": 1.2298919567827131,
      "grad_norm": 0.1450721174478531,
      "learning_rate": 7.756557375927503e-05,
      "loss": 0.3895,
      "step": 4098
    },
    {
      "epoch": 1.2301920768307322,
      "grad_norm": 0.1380809098482132,
      "learning_rate": 7.751453460293193e-05,
      "loss": 0.3663,
      "step": 4099
    },
    {
      "epoch": 1.2304921968787514,
      "grad_norm": 0.15767262876033783,
      "learning_rate": 7.746350161521845e-05,
      "loss": 0.3765,
      "step": 4100
    },
    {
      "epoch": 1.2307923169267707,
      "grad_norm": 0.14586131274700165,
      "learning_rate": 7.741247481013485e-05,
      "loss": 0.4031,
      "step": 4101
    },
    {
      "epoch": 1.23109243697479,
      "grad_norm": 0.19582970440387726,
      "learning_rate": 7.736145420167981e-05,
      "loss": 0.3862,
      "step": 4102
    },
    {
      "epoch": 1.231392557022809,
      "grad_norm": 0.14220896363258362,
      "learning_rate": 7.731043980385026e-05,
      "loss": 0.3616,
      "step": 4103
    },
    {
      "epoch": 1.2316926770708283,
      "grad_norm": 0.1587248295545578,
      "learning_rate": 7.72594316306413e-05,
      "loss": 0.4277,
      "step": 4104
    },
    {
      "epoch": 1.2319927971188476,
      "grad_norm": 0.1498514711856842,
      "learning_rate": 7.720842969604658e-05,
      "loss": 0.441,
      "step": 4105
    },
    {
      "epoch": 1.2322929171668668,
      "grad_norm": 0.1516525149345398,
      "learning_rate": 7.71574340140578e-05,
      "loss": 0.3958,
      "step": 4106
    },
    {
      "epoch": 1.232593037214886,
      "grad_norm": 0.13693274557590485,
      "learning_rate": 7.710644459866507e-05,
      "loss": 0.3975,
      "step": 4107
    },
    {
      "epoch": 1.2328931572629052,
      "grad_norm": 0.15817703306674957,
      "learning_rate": 7.705546146385676e-05,
      "loss": 0.4548,
      "step": 4108
    },
    {
      "epoch": 1.2331932773109244,
      "grad_norm": 0.1288352757692337,
      "learning_rate": 7.700448462361954e-05,
      "loss": 0.3288,
      "step": 4109
    },
    {
      "epoch": 1.2334933973589437,
      "grad_norm": 0.14892712235450745,
      "learning_rate": 7.695351409193823e-05,
      "loss": 0.4075,
      "step": 4110
    },
    {
      "epoch": 1.2337935174069627,
      "grad_norm": 0.1700790822505951,
      "learning_rate": 7.690254988279608e-05,
      "loss": 0.4417,
      "step": 4111
    },
    {
      "epoch": 1.234093637454982,
      "grad_norm": 0.1455063372850418,
      "learning_rate": 7.685159201017451e-05,
      "loss": 0.3726,
      "step": 4112
    },
    {
      "epoch": 1.2343937575030013,
      "grad_norm": 0.14195141196250916,
      "learning_rate": 7.680064048805326e-05,
      "loss": 0.3905,
      "step": 4113
    },
    {
      "epoch": 1.2346938775510203,
      "grad_norm": 0.13561753928661346,
      "learning_rate": 7.674969533041028e-05,
      "loss": 0.373,
      "step": 4114
    },
    {
      "epoch": 1.2349939975990396,
      "grad_norm": 0.13975995779037476,
      "learning_rate": 7.669875655122174e-05,
      "loss": 0.3559,
      "step": 4115
    },
    {
      "epoch": 1.2352941176470589,
      "grad_norm": 0.1478702872991562,
      "learning_rate": 7.664782416446221e-05,
      "loss": 0.3822,
      "step": 4116
    },
    {
      "epoch": 1.2355942376950781,
      "grad_norm": 0.12930183112621307,
      "learning_rate": 7.659689818410433e-05,
      "loss": 0.3364,
      "step": 4117
    },
    {
      "epoch": 1.2358943577430972,
      "grad_norm": 0.15074846148490906,
      "learning_rate": 7.654597862411906e-05,
      "loss": 0.4286,
      "step": 4118
    },
    {
      "epoch": 1.2361944777911165,
      "grad_norm": 0.14158159494400024,
      "learning_rate": 7.649506549847564e-05,
      "loss": 0.3773,
      "step": 4119
    },
    {
      "epoch": 1.2364945978391357,
      "grad_norm": 0.1416250467300415,
      "learning_rate": 7.644415882114145e-05,
      "loss": 0.3834,
      "step": 4120
    },
    {
      "epoch": 1.2367947178871548,
      "grad_norm": 0.14171995222568512,
      "learning_rate": 7.639325860608221e-05,
      "loss": 0.3819,
      "step": 4121
    },
    {
      "epoch": 1.237094837935174,
      "grad_norm": 0.14769646525382996,
      "learning_rate": 7.63423648672618e-05,
      "loss": 0.4328,
      "step": 4122
    },
    {
      "epoch": 1.2373949579831933,
      "grad_norm": 0.2000008076429367,
      "learning_rate": 7.629147761864229e-05,
      "loss": 0.4315,
      "step": 4123
    },
    {
      "epoch": 1.2376950780312126,
      "grad_norm": 0.13998661935329437,
      "learning_rate": 7.624059687418403e-05,
      "loss": 0.3934,
      "step": 4124
    },
    {
      "epoch": 1.2379951980792316,
      "grad_norm": 0.15924957394599915,
      "learning_rate": 7.61897226478456e-05,
      "loss": 0.3787,
      "step": 4125
    },
    {
      "epoch": 1.238295318127251,
      "grad_norm": 0.13868294656276703,
      "learning_rate": 7.613885495358371e-05,
      "loss": 0.3768,
      "step": 4126
    },
    {
      "epoch": 1.2385954381752702,
      "grad_norm": 0.17898708581924438,
      "learning_rate": 7.608799380535339e-05,
      "loss": 0.3926,
      "step": 4127
    },
    {
      "epoch": 1.2388955582232892,
      "grad_norm": 0.14534014463424683,
      "learning_rate": 7.603713921710779e-05,
      "loss": 0.4193,
      "step": 4128
    },
    {
      "epoch": 1.2391956782713085,
      "grad_norm": 0.13152074813842773,
      "learning_rate": 7.598629120279823e-05,
      "loss": 0.3697,
      "step": 4129
    },
    {
      "epoch": 1.2394957983193278,
      "grad_norm": 0.1317710429430008,
      "learning_rate": 7.593544977637436e-05,
      "loss": 0.3629,
      "step": 4130
    },
    {
      "epoch": 1.239795918367347,
      "grad_norm": 0.1301366239786148,
      "learning_rate": 7.588461495178388e-05,
      "loss": 0.357,
      "step": 4131
    },
    {
      "epoch": 1.240096038415366,
      "grad_norm": 0.1713320016860962,
      "learning_rate": 7.583378674297276e-05,
      "loss": 0.3781,
      "step": 4132
    },
    {
      "epoch": 1.2403961584633854,
      "grad_norm": 0.1516532003879547,
      "learning_rate": 7.57829651638852e-05,
      "loss": 0.4168,
      "step": 4133
    },
    {
      "epoch": 1.2406962785114046,
      "grad_norm": 0.13926361501216888,
      "learning_rate": 7.573215022846339e-05,
      "loss": 0.3986,
      "step": 4134
    },
    {
      "epoch": 1.2409963985594237,
      "grad_norm": 0.22416581213474274,
      "learning_rate": 7.568134195064794e-05,
      "loss": 0.4877,
      "step": 4135
    },
    {
      "epoch": 1.241296518607443,
      "grad_norm": 0.14472880959510803,
      "learning_rate": 7.563054034437747e-05,
      "loss": 0.34,
      "step": 4136
    },
    {
      "epoch": 1.2415966386554622,
      "grad_norm": 0.13605383038520813,
      "learning_rate": 7.557974542358878e-05,
      "loss": 0.3713,
      "step": 4137
    },
    {
      "epoch": 1.2418967587034815,
      "grad_norm": 0.15049508213996887,
      "learning_rate": 7.552895720221697e-05,
      "loss": 0.4158,
      "step": 4138
    },
    {
      "epoch": 1.2421968787515005,
      "grad_norm": 0.1582769900560379,
      "learning_rate": 7.547817569419515e-05,
      "loss": 0.4324,
      "step": 4139
    },
    {
      "epoch": 1.2424969987995198,
      "grad_norm": 0.1359313726425171,
      "learning_rate": 7.54274009134546e-05,
      "loss": 0.3952,
      "step": 4140
    },
    {
      "epoch": 1.242797118847539,
      "grad_norm": 0.1425129920244217,
      "learning_rate": 7.537663287392489e-05,
      "loss": 0.3736,
      "step": 4141
    },
    {
      "epoch": 1.2430972388955581,
      "grad_norm": 0.13735489547252655,
      "learning_rate": 7.532587158953357e-05,
      "loss": 0.3828,
      "step": 4142
    },
    {
      "epoch": 1.2433973589435774,
      "grad_norm": 0.14814046025276184,
      "learning_rate": 7.527511707420646e-05,
      "loss": 0.4005,
      "step": 4143
    },
    {
      "epoch": 1.2436974789915967,
      "grad_norm": 0.13848213851451874,
      "learning_rate": 7.52243693418675e-05,
      "loss": 0.3814,
      "step": 4144
    },
    {
      "epoch": 1.243997599039616,
      "grad_norm": 0.1393675059080124,
      "learning_rate": 7.517362840643868e-05,
      "loss": 0.385,
      "step": 4145
    },
    {
      "epoch": 1.244297719087635,
      "grad_norm": 0.13988974690437317,
      "learning_rate": 7.512289428184025e-05,
      "loss": 0.4183,
      "step": 4146
    },
    {
      "epoch": 1.2445978391356542,
      "grad_norm": 0.13378490507602692,
      "learning_rate": 7.507216698199056e-05,
      "loss": 0.4053,
      "step": 4147
    },
    {
      "epoch": 1.2448979591836735,
      "grad_norm": 0.14503192901611328,
      "learning_rate": 7.502144652080597e-05,
      "loss": 0.398,
      "step": 4148
    },
    {
      "epoch": 1.2451980792316926,
      "grad_norm": 0.1375621259212494,
      "learning_rate": 7.497073291220111e-05,
      "loss": 0.3977,
      "step": 4149
    },
    {
      "epoch": 1.2454981992797118,
      "grad_norm": 0.14542661607265472,
      "learning_rate": 7.492002617008866e-05,
      "loss": 0.3893,
      "step": 4150
    },
    {
      "epoch": 1.245798319327731,
      "grad_norm": 0.14464782178401947,
      "learning_rate": 7.486932630837948e-05,
      "loss": 0.3877,
      "step": 4151
    },
    {
      "epoch": 1.2460984393757504,
      "grad_norm": 0.13716407120227814,
      "learning_rate": 7.481863334098247e-05,
      "loss": 0.3596,
      "step": 4152
    },
    {
      "epoch": 1.2463985594237694,
      "grad_norm": 0.15377911925315857,
      "learning_rate": 7.476794728180463e-05,
      "loss": 0.4502,
      "step": 4153
    },
    {
      "epoch": 1.2466986794717887,
      "grad_norm": 0.15197394788265228,
      "learning_rate": 7.471726814475118e-05,
      "loss": 0.4417,
      "step": 4154
    },
    {
      "epoch": 1.246998799519808,
      "grad_norm": 0.14850930869579315,
      "learning_rate": 7.466659594372527e-05,
      "loss": 0.4103,
      "step": 4155
    },
    {
      "epoch": 1.247298919567827,
      "grad_norm": 0.1434469372034073,
      "learning_rate": 7.461593069262826e-05,
      "loss": 0.3743,
      "step": 4156
    },
    {
      "epoch": 1.2475990396158463,
      "grad_norm": 0.13946932554244995,
      "learning_rate": 7.456527240535962e-05,
      "loss": 0.3852,
      "step": 4157
    },
    {
      "epoch": 1.2478991596638656,
      "grad_norm": 0.13940726220607758,
      "learning_rate": 7.451462109581687e-05,
      "loss": 0.4089,
      "step": 4158
    },
    {
      "epoch": 1.2481992797118848,
      "grad_norm": 0.14511612057685852,
      "learning_rate": 7.446397677789551e-05,
      "loss": 0.4035,
      "step": 4159
    },
    {
      "epoch": 1.2484993997599039,
      "grad_norm": 0.13636872172355652,
      "learning_rate": 7.441333946548939e-05,
      "loss": 0.3923,
      "step": 4160
    },
    {
      "epoch": 1.2487995198079231,
      "grad_norm": 0.1461837887763977,
      "learning_rate": 7.436270917249013e-05,
      "loss": 0.3883,
      "step": 4161
    },
    {
      "epoch": 1.2490996398559424,
      "grad_norm": 0.12561574578285217,
      "learning_rate": 7.431208591278771e-05,
      "loss": 0.3404,
      "step": 4162
    },
    {
      "epoch": 1.2493997599039615,
      "grad_norm": 0.14430075883865356,
      "learning_rate": 7.426146970026993e-05,
      "loss": 0.3803,
      "step": 4163
    },
    {
      "epoch": 1.2496998799519807,
      "grad_norm": 0.13891753554344177,
      "learning_rate": 7.421086054882278e-05,
      "loss": 0.3907,
      "step": 4164
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.14693672955036163,
      "learning_rate": 7.416025847233037e-05,
      "loss": 0.4115,
      "step": 4165
    },
    {
      "epoch": 1.2503001200480193,
      "grad_norm": 0.12822362780570984,
      "learning_rate": 7.410966348467476e-05,
      "loss": 0.345,
      "step": 4166
    },
    {
      "epoch": 1.2506002400960385,
      "grad_norm": 0.1424485146999359,
      "learning_rate": 7.405907559973606e-05,
      "loss": 0.3831,
      "step": 4167
    },
    {
      "epoch": 1.2509003601440576,
      "grad_norm": 0.14128592610359192,
      "learning_rate": 7.400849483139252e-05,
      "loss": 0.3899,
      "step": 4168
    },
    {
      "epoch": 1.2512004801920769,
      "grad_norm": 0.14493753015995026,
      "learning_rate": 7.395792119352041e-05,
      "loss": 0.3927,
      "step": 4169
    },
    {
      "epoch": 1.251500600240096,
      "grad_norm": 0.14592917263507843,
      "learning_rate": 7.390735469999398e-05,
      "loss": 0.4198,
      "step": 4170
    },
    {
      "epoch": 1.2518007202881152,
      "grad_norm": 0.14330831170082092,
      "learning_rate": 7.385679536468562e-05,
      "loss": 0.3662,
      "step": 4171
    },
    {
      "epoch": 1.2521008403361344,
      "grad_norm": 0.1558278650045395,
      "learning_rate": 7.380624320146566e-05,
      "loss": 0.45,
      "step": 4172
    },
    {
      "epoch": 1.2524009603841537,
      "grad_norm": 0.16887469589710236,
      "learning_rate": 7.375569822420254e-05,
      "loss": 0.4014,
      "step": 4173
    },
    {
      "epoch": 1.252701080432173,
      "grad_norm": 0.15975980460643768,
      "learning_rate": 7.370516044676267e-05,
      "loss": 0.3844,
      "step": 4174
    },
    {
      "epoch": 1.253001200480192,
      "grad_norm": 0.1546444296836853,
      "learning_rate": 7.365462988301052e-05,
      "loss": 0.4198,
      "step": 4175
    },
    {
      "epoch": 1.2533013205282113,
      "grad_norm": 0.12470715492963791,
      "learning_rate": 7.360410654680858e-05,
      "loss": 0.3186,
      "step": 4176
    },
    {
      "epoch": 1.2536014405762306,
      "grad_norm": 0.14100395143032074,
      "learning_rate": 7.355359045201734e-05,
      "loss": 0.393,
      "step": 4177
    },
    {
      "epoch": 1.2539015606242496,
      "grad_norm": 0.14127913117408752,
      "learning_rate": 7.350308161249528e-05,
      "loss": 0.4104,
      "step": 4178
    },
    {
      "epoch": 1.254201680672269,
      "grad_norm": 0.1390364170074463,
      "learning_rate": 7.345258004209899e-05,
      "loss": 0.3893,
      "step": 4179
    },
    {
      "epoch": 1.2545018007202882,
      "grad_norm": 0.14757046103477478,
      "learning_rate": 7.340208575468291e-05,
      "loss": 0.4398,
      "step": 4180
    },
    {
      "epoch": 1.2548019207683074,
      "grad_norm": 0.143103688955307,
      "learning_rate": 7.335159876409966e-05,
      "loss": 0.4398,
      "step": 4181
    },
    {
      "epoch": 1.2551020408163265,
      "grad_norm": 0.13156215846538544,
      "learning_rate": 7.33011190841997e-05,
      "loss": 0.3688,
      "step": 4182
    },
    {
      "epoch": 1.2554021608643458,
      "grad_norm": 0.16004370152950287,
      "learning_rate": 7.325064672883157e-05,
      "loss": 0.3895,
      "step": 4183
    },
    {
      "epoch": 1.255702280912365,
      "grad_norm": 0.14629091322422028,
      "learning_rate": 7.32001817118418e-05,
      "loss": 0.423,
      "step": 4184
    },
    {
      "epoch": 1.256002400960384,
      "grad_norm": 0.15015138685703278,
      "learning_rate": 7.31497240470749e-05,
      "loss": 0.4306,
      "step": 4185
    },
    {
      "epoch": 1.2563025210084033,
      "grad_norm": 0.19345088303089142,
      "learning_rate": 7.30992737483733e-05,
      "loss": 0.4566,
      "step": 4186
    },
    {
      "epoch": 1.2566026410564226,
      "grad_norm": 0.15372827649116516,
      "learning_rate": 7.304883082957747e-05,
      "loss": 0.3913,
      "step": 4187
    },
    {
      "epoch": 1.2569027611044419,
      "grad_norm": 0.14005455374717712,
      "learning_rate": 7.29983953045259e-05,
      "loss": 0.3894,
      "step": 4188
    },
    {
      "epoch": 1.257202881152461,
      "grad_norm": 0.12181244045495987,
      "learning_rate": 7.294796718705492e-05,
      "loss": 0.3323,
      "step": 4189
    },
    {
      "epoch": 1.2575030012004802,
      "grad_norm": 0.16262075304985046,
      "learning_rate": 7.289754649099897e-05,
      "loss": 0.446,
      "step": 4190
    },
    {
      "epoch": 1.2578031212484995,
      "grad_norm": 0.15365153551101685,
      "learning_rate": 7.284713323019035e-05,
      "loss": 0.4019,
      "step": 4191
    },
    {
      "epoch": 1.2581032412965185,
      "grad_norm": 0.14961516857147217,
      "learning_rate": 7.279672741845942e-05,
      "loss": 0.3561,
      "step": 4192
    },
    {
      "epoch": 1.2584033613445378,
      "grad_norm": 0.12542888522148132,
      "learning_rate": 7.274632906963437e-05,
      "loss": 0.3271,
      "step": 4193
    },
    {
      "epoch": 1.258703481392557,
      "grad_norm": 0.12507741153240204,
      "learning_rate": 7.269593819754142e-05,
      "loss": 0.3316,
      "step": 4194
    },
    {
      "epoch": 1.2590036014405763,
      "grad_norm": 0.13399529457092285,
      "learning_rate": 7.264555481600476e-05,
      "loss": 0.3296,
      "step": 4195
    },
    {
      "epoch": 1.2593037214885954,
      "grad_norm": 0.13027550280094147,
      "learning_rate": 7.259517893884647e-05,
      "loss": 0.3587,
      "step": 4196
    },
    {
      "epoch": 1.2596038415366146,
      "grad_norm": 0.14164437353610992,
      "learning_rate": 7.254481057988658e-05,
      "loss": 0.3872,
      "step": 4197
    },
    {
      "epoch": 1.259903961584634,
      "grad_norm": 0.14004860818386078,
      "learning_rate": 7.249444975294313e-05,
      "loss": 0.3901,
      "step": 4198
    },
    {
      "epoch": 1.260204081632653,
      "grad_norm": 0.13881249725818634,
      "learning_rate": 7.244409647183197e-05,
      "loss": 0.3914,
      "step": 4199
    },
    {
      "epoch": 1.2605042016806722,
      "grad_norm": 0.14471213519573212,
      "learning_rate": 7.239375075036697e-05,
      "loss": 0.4063,
      "step": 4200
    },
    {
      "epoch": 1.2608043217286915,
      "grad_norm": 0.16577906906604767,
      "learning_rate": 7.23434126023599e-05,
      "loss": 0.3837,
      "step": 4201
    },
    {
      "epoch": 1.2611044417767108,
      "grad_norm": 0.14864365756511688,
      "learning_rate": 7.229308204162044e-05,
      "loss": 0.421,
      "step": 4202
    },
    {
      "epoch": 1.2614045618247298,
      "grad_norm": 0.13306841254234314,
      "learning_rate": 7.224275908195626e-05,
      "loss": 0.3264,
      "step": 4203
    },
    {
      "epoch": 1.261704681872749,
      "grad_norm": 0.13763290643692017,
      "learning_rate": 7.219244373717285e-05,
      "loss": 0.3785,
      "step": 4204
    },
    {
      "epoch": 1.2620048019207684,
      "grad_norm": 0.14042170345783234,
      "learning_rate": 7.214213602107357e-05,
      "loss": 0.3886,
      "step": 4205
    },
    {
      "epoch": 1.2623049219687874,
      "grad_norm": 0.17335915565490723,
      "learning_rate": 7.20918359474599e-05,
      "loss": 0.4584,
      "step": 4206
    },
    {
      "epoch": 1.2626050420168067,
      "grad_norm": 0.1289275735616684,
      "learning_rate": 7.204154353013102e-05,
      "loss": 0.3179,
      "step": 4207
    },
    {
      "epoch": 1.262905162064826,
      "grad_norm": 0.13672125339508057,
      "learning_rate": 7.199125878288406e-05,
      "loss": 0.3748,
      "step": 4208
    },
    {
      "epoch": 1.2632052821128452,
      "grad_norm": 0.14343659579753876,
      "learning_rate": 7.19409817195141e-05,
      "loss": 0.3674,
      "step": 4209
    },
    {
      "epoch": 1.2635054021608643,
      "grad_norm": 0.13902506232261658,
      "learning_rate": 7.189071235381406e-05,
      "loss": 0.3879,
      "step": 4210
    },
    {
      "epoch": 1.2638055222088835,
      "grad_norm": 0.12468855828046799,
      "learning_rate": 7.184045069957482e-05,
      "loss": 0.3322,
      "step": 4211
    },
    {
      "epoch": 1.2641056422569028,
      "grad_norm": 0.14416514337062836,
      "learning_rate": 7.179019677058499e-05,
      "loss": 0.4047,
      "step": 4212
    },
    {
      "epoch": 1.2644057623049219,
      "grad_norm": 0.13998650014400482,
      "learning_rate": 7.173995058063119e-05,
      "loss": 0.3885,
      "step": 4213
    },
    {
      "epoch": 1.2647058823529411,
      "grad_norm": 0.14294609427452087,
      "learning_rate": 7.168971214349792e-05,
      "loss": 0.3861,
      "step": 4214
    },
    {
      "epoch": 1.2650060024009604,
      "grad_norm": 0.13009285926818848,
      "learning_rate": 7.16394814729675e-05,
      "loss": 0.3611,
      "step": 4215
    },
    {
      "epoch": 1.2653061224489797,
      "grad_norm": 0.15736983716487885,
      "learning_rate": 7.158925858282012e-05,
      "loss": 0.431,
      "step": 4216
    },
    {
      "epoch": 1.265606242496999,
      "grad_norm": 0.14100012183189392,
      "learning_rate": 7.153904348683393e-05,
      "loss": 0.3805,
      "step": 4217
    },
    {
      "epoch": 1.265906362545018,
      "grad_norm": 0.14148086309432983,
      "learning_rate": 7.148883619878478e-05,
      "loss": 0.395,
      "step": 4218
    },
    {
      "epoch": 1.2662064825930373,
      "grad_norm": 0.13538607954978943,
      "learning_rate": 7.143863673244648e-05,
      "loss": 0.3745,
      "step": 4219
    },
    {
      "epoch": 1.2665066026410563,
      "grad_norm": 0.14992834627628326,
      "learning_rate": 7.138844510159069e-05,
      "loss": 0.4278,
      "step": 4220
    },
    {
      "epoch": 1.2668067226890756,
      "grad_norm": 0.1743413656949997,
      "learning_rate": 7.13382613199869e-05,
      "loss": 0.4418,
      "step": 4221
    },
    {
      "epoch": 1.2671068427370948,
      "grad_norm": 0.14814399182796478,
      "learning_rate": 7.128808540140249e-05,
      "loss": 0.4424,
      "step": 4222
    },
    {
      "epoch": 1.267406962785114,
      "grad_norm": 0.1443677395582199,
      "learning_rate": 7.123791735960265e-05,
      "loss": 0.4125,
      "step": 4223
    },
    {
      "epoch": 1.2677070828331334,
      "grad_norm": 0.18257102370262146,
      "learning_rate": 7.11877572083503e-05,
      "loss": 0.3816,
      "step": 4224
    },
    {
      "epoch": 1.2680072028811524,
      "grad_norm": 0.1487230807542801,
      "learning_rate": 7.113760496140644e-05,
      "loss": 0.4117,
      "step": 4225
    },
    {
      "epoch": 1.2683073229291717,
      "grad_norm": 0.1354549527168274,
      "learning_rate": 7.108746063252971e-05,
      "loss": 0.3603,
      "step": 4226
    },
    {
      "epoch": 1.2686074429771907,
      "grad_norm": 0.14360687136650085,
      "learning_rate": 7.103732423547659e-05,
      "loss": 0.3853,
      "step": 4227
    },
    {
      "epoch": 1.26890756302521,
      "grad_norm": 0.12622900307178497,
      "learning_rate": 7.098719578400148e-05,
      "loss": 0.3271,
      "step": 4228
    },
    {
      "epoch": 1.2692076830732293,
      "grad_norm": 0.1468481868505478,
      "learning_rate": 7.093707529185652e-05,
      "loss": 0.3709,
      "step": 4229
    },
    {
      "epoch": 1.2695078031212486,
      "grad_norm": 0.14633765816688538,
      "learning_rate": 7.088696277279175e-05,
      "loss": 0.3869,
      "step": 4230
    },
    {
      "epoch": 1.2698079231692678,
      "grad_norm": 0.1497291624546051,
      "learning_rate": 7.083685824055489e-05,
      "loss": 0.4076,
      "step": 4231
    },
    {
      "epoch": 1.2701080432172869,
      "grad_norm": 0.14628471434116364,
      "learning_rate": 7.078676170889153e-05,
      "loss": 0.4004,
      "step": 4232
    },
    {
      "epoch": 1.2704081632653061,
      "grad_norm": 0.1478734165430069,
      "learning_rate": 7.073667319154516e-05,
      "loss": 0.4367,
      "step": 4233
    },
    {
      "epoch": 1.2707082833133252,
      "grad_norm": 0.14400777220726013,
      "learning_rate": 7.068659270225692e-05,
      "loss": 0.3671,
      "step": 4234
    },
    {
      "epoch": 1.2710084033613445,
      "grad_norm": 0.13357645273208618,
      "learning_rate": 7.063652025476586e-05,
      "loss": 0.3576,
      "step": 4235
    },
    {
      "epoch": 1.2713085234093637,
      "grad_norm": 0.1438428908586502,
      "learning_rate": 7.05864558628088e-05,
      "loss": 0.3985,
      "step": 4236
    },
    {
      "epoch": 1.271608643457383,
      "grad_norm": 0.14982417225837708,
      "learning_rate": 7.053639954012028e-05,
      "loss": 0.3937,
      "step": 4237
    },
    {
      "epoch": 1.2719087635054023,
      "grad_norm": 0.1599334180355072,
      "learning_rate": 7.048635130043268e-05,
      "loss": 0.4364,
      "step": 4238
    },
    {
      "epoch": 1.2722088835534213,
      "grad_norm": 0.15178163349628448,
      "learning_rate": 7.04363111574762e-05,
      "loss": 0.3944,
      "step": 4239
    },
    {
      "epoch": 1.2725090036014406,
      "grad_norm": 0.13461163640022278,
      "learning_rate": 7.038627912497873e-05,
      "loss": 0.3928,
      "step": 4240
    },
    {
      "epoch": 1.2728091236494599,
      "grad_norm": 0.15435433387756348,
      "learning_rate": 7.033625521666605e-05,
      "loss": 0.3937,
      "step": 4241
    },
    {
      "epoch": 1.273109243697479,
      "grad_norm": 0.1563456654548645,
      "learning_rate": 7.028623944626162e-05,
      "loss": 0.4307,
      "step": 4242
    },
    {
      "epoch": 1.2734093637454982,
      "grad_norm": 0.1376456320285797,
      "learning_rate": 7.023623182748662e-05,
      "loss": 0.3446,
      "step": 4243
    },
    {
      "epoch": 1.2737094837935174,
      "grad_norm": 0.13631781935691833,
      "learning_rate": 7.018623237406019e-05,
      "loss": 0.3721,
      "step": 4244
    },
    {
      "epoch": 1.2740096038415367,
      "grad_norm": 0.14110277593135834,
      "learning_rate": 7.013624109969902e-05,
      "loss": 0.387,
      "step": 4245
    },
    {
      "epoch": 1.2743097238895558,
      "grad_norm": 0.13850592076778412,
      "learning_rate": 7.008625801811767e-05,
      "loss": 0.366,
      "step": 4246
    },
    {
      "epoch": 1.274609843937575,
      "grad_norm": 0.15983489155769348,
      "learning_rate": 7.003628314302844e-05,
      "loss": 0.3785,
      "step": 4247
    },
    {
      "epoch": 1.2749099639855943,
      "grad_norm": 0.1383303701877594,
      "learning_rate": 6.998631648814136e-05,
      "loss": 0.4,
      "step": 4248
    },
    {
      "epoch": 1.2752100840336134,
      "grad_norm": 0.17634017765522003,
      "learning_rate": 6.993635806716412e-05,
      "loss": 0.4315,
      "step": 4249
    },
    {
      "epoch": 1.2755102040816326,
      "grad_norm": 0.1412407010793686,
      "learning_rate": 6.988640789380241e-05,
      "loss": 0.3821,
      "step": 4250
    },
    {
      "epoch": 1.275810324129652,
      "grad_norm": 0.14472277462482452,
      "learning_rate": 6.983646598175932e-05,
      "loss": 0.3821,
      "step": 4251
    },
    {
      "epoch": 1.2761104441776712,
      "grad_norm": 0.13960790634155273,
      "learning_rate": 6.978653234473596e-05,
      "loss": 0.3933,
      "step": 4252
    },
    {
      "epoch": 1.2764105642256902,
      "grad_norm": 0.13559602200984955,
      "learning_rate": 6.973660699643101e-05,
      "loss": 0.3692,
      "step": 4253
    },
    {
      "epoch": 1.2767106842737095,
      "grad_norm": 0.1367492526769638,
      "learning_rate": 6.968668995054087e-05,
      "loss": 0.3742,
      "step": 4254
    },
    {
      "epoch": 1.2770108043217288,
      "grad_norm": 0.13935504853725433,
      "learning_rate": 6.96367812207598e-05,
      "loss": 0.3747,
      "step": 4255
    },
    {
      "epoch": 1.2773109243697478,
      "grad_norm": 0.13473719358444214,
      "learning_rate": 6.958688082077963e-05,
      "loss": 0.3574,
      "step": 4256
    },
    {
      "epoch": 1.277611044417767,
      "grad_norm": 0.14837577939033508,
      "learning_rate": 6.953698876428995e-05,
      "loss": 0.3637,
      "step": 4257
    },
    {
      "epoch": 1.2779111644657863,
      "grad_norm": 0.13354167342185974,
      "learning_rate": 6.948710506497811e-05,
      "loss": 0.3746,
      "step": 4258
    },
    {
      "epoch": 1.2782112845138056,
      "grad_norm": 0.1450507640838623,
      "learning_rate": 6.94372297365291e-05,
      "loss": 0.4138,
      "step": 4259
    },
    {
      "epoch": 1.2785114045618247,
      "grad_norm": 0.1256970465183258,
      "learning_rate": 6.938736279262567e-05,
      "loss": 0.3429,
      "step": 4260
    },
    {
      "epoch": 1.278811524609844,
      "grad_norm": 0.13689365983009338,
      "learning_rate": 6.933750424694828e-05,
      "loss": 0.3369,
      "step": 4261
    },
    {
      "epoch": 1.2791116446578632,
      "grad_norm": 0.1337195336818695,
      "learning_rate": 6.928765411317492e-05,
      "loss": 0.3688,
      "step": 4262
    },
    {
      "epoch": 1.2794117647058822,
      "grad_norm": 0.14053617417812347,
      "learning_rate": 6.923781240498156e-05,
      "loss": 0.3618,
      "step": 4263
    },
    {
      "epoch": 1.2797118847539015,
      "grad_norm": 0.1282060593366623,
      "learning_rate": 6.91879791360416e-05,
      "loss": 0.3366,
      "step": 4264
    },
    {
      "epoch": 1.2800120048019208,
      "grad_norm": 0.1514139473438263,
      "learning_rate": 6.913815432002625e-05,
      "loss": 0.3489,
      "step": 4265
    },
    {
      "epoch": 1.28031212484994,
      "grad_norm": 0.14885970950126648,
      "learning_rate": 6.90883379706044e-05,
      "loss": 0.412,
      "step": 4266
    },
    {
      "epoch": 1.280612244897959,
      "grad_norm": 0.1294536143541336,
      "learning_rate": 6.903853010144259e-05,
      "loss": 0.3594,
      "step": 4267
    },
    {
      "epoch": 1.2809123649459784,
      "grad_norm": 0.13592691719532013,
      "learning_rate": 6.898873072620498e-05,
      "loss": 0.3405,
      "step": 4268
    },
    {
      "epoch": 1.2812124849939976,
      "grad_norm": 0.1535409539937973,
      "learning_rate": 6.893893985855353e-05,
      "loss": 0.4069,
      "step": 4269
    },
    {
      "epoch": 1.2815126050420167,
      "grad_norm": 0.14617381989955902,
      "learning_rate": 6.888915751214774e-05,
      "loss": 0.413,
      "step": 4270
    },
    {
      "epoch": 1.281812725090036,
      "grad_norm": 0.21435479819774628,
      "learning_rate": 6.883938370064489e-05,
      "loss": 0.378,
      "step": 4271
    },
    {
      "epoch": 1.2821128451380552,
      "grad_norm": 0.14236077666282654,
      "learning_rate": 6.87896184376998e-05,
      "loss": 0.3811,
      "step": 4272
    },
    {
      "epoch": 1.2824129651860745,
      "grad_norm": 0.14209921658039093,
      "learning_rate": 6.8739861736965e-05,
      "loss": 0.3519,
      "step": 4273
    },
    {
      "epoch": 1.2827130852340938,
      "grad_norm": 0.13236385583877563,
      "learning_rate": 6.86901136120907e-05,
      "loss": 0.3702,
      "step": 4274
    },
    {
      "epoch": 1.2830132052821128,
      "grad_norm": 0.1324281394481659,
      "learning_rate": 6.864037407672474e-05,
      "loss": 0.3698,
      "step": 4275
    },
    {
      "epoch": 1.283313325330132,
      "grad_norm": 0.14125031232833862,
      "learning_rate": 6.85906431445125e-05,
      "loss": 0.4042,
      "step": 4276
    },
    {
      "epoch": 1.2836134453781511,
      "grad_norm": 0.1484401971101761,
      "learning_rate": 6.85409208290972e-05,
      "loss": 0.4272,
      "step": 4277
    },
    {
      "epoch": 1.2839135654261704,
      "grad_norm": 0.13767188787460327,
      "learning_rate": 6.849120714411954e-05,
      "loss": 0.3574,
      "step": 4278
    },
    {
      "epoch": 1.2842136854741897,
      "grad_norm": 0.13342134654521942,
      "learning_rate": 6.844150210321788e-05,
      "loss": 0.3515,
      "step": 4279
    },
    {
      "epoch": 1.284513805522209,
      "grad_norm": 0.15663060545921326,
      "learning_rate": 6.83918057200283e-05,
      "loss": 0.3796,
      "step": 4280
    },
    {
      "epoch": 1.2848139255702282,
      "grad_norm": 0.14326073229312897,
      "learning_rate": 6.83421180081843e-05,
      "loss": 0.3945,
      "step": 4281
    },
    {
      "epoch": 1.2851140456182473,
      "grad_norm": 0.15463420748710632,
      "learning_rate": 6.829243898131728e-05,
      "loss": 0.4049,
      "step": 4282
    },
    {
      "epoch": 1.2854141656662665,
      "grad_norm": 0.14835111796855927,
      "learning_rate": 6.824276865305604e-05,
      "loss": 0.3844,
      "step": 4283
    },
    {
      "epoch": 1.2857142857142856,
      "grad_norm": 0.12640972435474396,
      "learning_rate": 6.819310703702704e-05,
      "loss": 0.3507,
      "step": 4284
    },
    {
      "epoch": 1.2860144057623049,
      "grad_norm": 0.18915210664272308,
      "learning_rate": 6.814345414685444e-05,
      "loss": 0.4182,
      "step": 4285
    },
    {
      "epoch": 1.2863145258103241,
      "grad_norm": 0.13177894055843353,
      "learning_rate": 6.809380999615993e-05,
      "loss": 0.3386,
      "step": 4286
    },
    {
      "epoch": 1.2866146458583434,
      "grad_norm": 0.14102855324745178,
      "learning_rate": 6.804417459856273e-05,
      "loss": 0.3785,
      "step": 4287
    },
    {
      "epoch": 1.2869147659063627,
      "grad_norm": 0.1460040956735611,
      "learning_rate": 6.799454796767986e-05,
      "loss": 0.3954,
      "step": 4288
    },
    {
      "epoch": 1.2872148859543817,
      "grad_norm": 0.15009137988090515,
      "learning_rate": 6.794493011712573e-05,
      "loss": 0.3971,
      "step": 4289
    },
    {
      "epoch": 1.287515006002401,
      "grad_norm": 0.15093019604682922,
      "learning_rate": 6.789532106051246e-05,
      "loss": 0.3468,
      "step": 4290
    },
    {
      "epoch": 1.28781512605042,
      "grad_norm": 0.15437495708465576,
      "learning_rate": 6.784572081144975e-05,
      "loss": 0.3975,
      "step": 4291
    },
    {
      "epoch": 1.2881152460984393,
      "grad_norm": 0.15075330436229706,
      "learning_rate": 6.77961293835448e-05,
      "loss": 0.4207,
      "step": 4292
    },
    {
      "epoch": 1.2884153661464586,
      "grad_norm": 0.13437460362911224,
      "learning_rate": 6.77465467904025e-05,
      "loss": 0.3654,
      "step": 4293
    },
    {
      "epoch": 1.2887154861944778,
      "grad_norm": 0.14857985079288483,
      "learning_rate": 6.76969730456253e-05,
      "loss": 0.3991,
      "step": 4294
    },
    {
      "epoch": 1.2890156062424971,
      "grad_norm": 0.14068998396396637,
      "learning_rate": 6.764740816281308e-05,
      "loss": 0.3888,
      "step": 4295
    },
    {
      "epoch": 1.2893157262905162,
      "grad_norm": 0.1284976452589035,
      "learning_rate": 6.759785215556348e-05,
      "loss": 0.35,
      "step": 4296
    },
    {
      "epoch": 1.2896158463385354,
      "grad_norm": 0.13528770208358765,
      "learning_rate": 6.75483050374716e-05,
      "loss": 0.3762,
      "step": 4297
    },
    {
      "epoch": 1.2899159663865547,
      "grad_norm": 0.1504359394311905,
      "learning_rate": 6.74987668221301e-05,
      "loss": 0.3541,
      "step": 4298
    },
    {
      "epoch": 1.2902160864345738,
      "grad_norm": 0.13591429591178894,
      "learning_rate": 6.744923752312928e-05,
      "loss": 0.3461,
      "step": 4299
    },
    {
      "epoch": 1.290516206482593,
      "grad_norm": 0.13461627066135406,
      "learning_rate": 6.739971715405684e-05,
      "loss": 0.3765,
      "step": 4300
    },
    {
      "epoch": 1.2908163265306123,
      "grad_norm": 0.12580035626888275,
      "learning_rate": 6.735020572849827e-05,
      "loss": 0.3379,
      "step": 4301
    },
    {
      "epoch": 1.2911164465786316,
      "grad_norm": 0.13868200778961182,
      "learning_rate": 6.730070326003633e-05,
      "loss": 0.4032,
      "step": 4302
    },
    {
      "epoch": 1.2914165666266506,
      "grad_norm": 0.14685222506523132,
      "learning_rate": 6.725120976225148e-05,
      "loss": 0.3878,
      "step": 4303
    },
    {
      "epoch": 1.2917166866746699,
      "grad_norm": 0.15233665704727173,
      "learning_rate": 6.720172524872174e-05,
      "loss": 0.4384,
      "step": 4304
    },
    {
      "epoch": 1.2920168067226891,
      "grad_norm": 0.1587858945131302,
      "learning_rate": 6.715224973302262e-05,
      "loss": 0.4009,
      "step": 4305
    },
    {
      "epoch": 1.2923169267707082,
      "grad_norm": 0.13998299837112427,
      "learning_rate": 6.710278322872706e-05,
      "loss": 0.3761,
      "step": 4306
    },
    {
      "epoch": 1.2926170468187275,
      "grad_norm": 0.1423964649438858,
      "learning_rate": 6.705332574940577e-05,
      "loss": 0.4,
      "step": 4307
    },
    {
      "epoch": 1.2929171668667467,
      "grad_norm": 0.13591860234737396,
      "learning_rate": 6.700387730862676e-05,
      "loss": 0.3785,
      "step": 4308
    },
    {
      "epoch": 1.293217286914766,
      "grad_norm": 0.145105242729187,
      "learning_rate": 6.695443791995564e-05,
      "loss": 0.3881,
      "step": 4309
    },
    {
      "epoch": 1.293517406962785,
      "grad_norm": 0.14852149784564972,
      "learning_rate": 6.690500759695557e-05,
      "loss": 0.3549,
      "step": 4310
    },
    {
      "epoch": 1.2938175270108043,
      "grad_norm": 0.13705085217952728,
      "learning_rate": 6.685558635318716e-05,
      "loss": 0.3725,
      "step": 4311
    },
    {
      "epoch": 1.2941176470588236,
      "grad_norm": 0.14072710275650024,
      "learning_rate": 6.68061742022086e-05,
      "loss": 0.381,
      "step": 4312
    },
    {
      "epoch": 1.2944177671068426,
      "grad_norm": 0.1349777728319168,
      "learning_rate": 6.675677115757555e-05,
      "loss": 0.3241,
      "step": 4313
    },
    {
      "epoch": 1.294717887154862,
      "grad_norm": 0.1425703912973404,
      "learning_rate": 6.670737723284111e-05,
      "loss": 0.3685,
      "step": 4314
    },
    {
      "epoch": 1.2950180072028812,
      "grad_norm": 0.15010738372802734,
      "learning_rate": 6.665799244155599e-05,
      "loss": 0.4021,
      "step": 4315
    },
    {
      "epoch": 1.2953181272509005,
      "grad_norm": 0.14332538843154907,
      "learning_rate": 6.660861679726831e-05,
      "loss": 0.3895,
      "step": 4316
    },
    {
      "epoch": 1.2956182472989195,
      "grad_norm": 0.1347683072090149,
      "learning_rate": 6.655925031352373e-05,
      "loss": 0.3727,
      "step": 4317
    },
    {
      "epoch": 1.2959183673469388,
      "grad_norm": 0.13436463475227356,
      "learning_rate": 6.650989300386539e-05,
      "loss": 0.3522,
      "step": 4318
    },
    {
      "epoch": 1.296218487394958,
      "grad_norm": 0.16632862389087677,
      "learning_rate": 6.646054488183385e-05,
      "loss": 0.3476,
      "step": 4319
    },
    {
      "epoch": 1.296518607442977,
      "grad_norm": 0.13216231763362885,
      "learning_rate": 6.641120596096729e-05,
      "loss": 0.3535,
      "step": 4320
    },
    {
      "epoch": 1.2968187274909964,
      "grad_norm": 0.14814947545528412,
      "learning_rate": 6.636187625480122e-05,
      "loss": 0.4084,
      "step": 4321
    },
    {
      "epoch": 1.2971188475390156,
      "grad_norm": 0.13599202036857605,
      "learning_rate": 6.631255577686863e-05,
      "loss": 0.3794,
      "step": 4322
    },
    {
      "epoch": 1.297418967587035,
      "grad_norm": 0.13081775605678558,
      "learning_rate": 6.626324454070015e-05,
      "loss": 0.3527,
      "step": 4323
    },
    {
      "epoch": 1.297719087635054,
      "grad_norm": 0.1295832246541977,
      "learning_rate": 6.621394255982367e-05,
      "loss": 0.3602,
      "step": 4324
    },
    {
      "epoch": 1.2980192076830732,
      "grad_norm": 0.1495177000761032,
      "learning_rate": 6.616464984776459e-05,
      "loss": 0.4013,
      "step": 4325
    },
    {
      "epoch": 1.2983193277310925,
      "grad_norm": 0.1319665014743805,
      "learning_rate": 6.61153664180459e-05,
      "loss": 0.3559,
      "step": 4326
    },
    {
      "epoch": 1.2986194477791115,
      "grad_norm": 0.14115822315216064,
      "learning_rate": 6.606609228418787e-05,
      "loss": 0.4127,
      "step": 4327
    },
    {
      "epoch": 1.2989195678271308,
      "grad_norm": 0.14079435169696808,
      "learning_rate": 6.601682745970831e-05,
      "loss": 0.3385,
      "step": 4328
    },
    {
      "epoch": 1.29921968787515,
      "grad_norm": 0.140974760055542,
      "learning_rate": 6.596757195812249e-05,
      "loss": 0.4101,
      "step": 4329
    },
    {
      "epoch": 1.2995198079231693,
      "grad_norm": 0.13408349454402924,
      "learning_rate": 6.591832579294303e-05,
      "loss": 0.3618,
      "step": 4330
    },
    {
      "epoch": 1.2998199279711884,
      "grad_norm": 0.22065842151641846,
      "learning_rate": 6.586908897768011e-05,
      "loss": 0.4117,
      "step": 4331
    },
    {
      "epoch": 1.3001200480192077,
      "grad_norm": 0.14712445437908173,
      "learning_rate": 6.58198615258413e-05,
      "loss": 0.4249,
      "step": 4332
    },
    {
      "epoch": 1.300420168067227,
      "grad_norm": 0.1465093344449997,
      "learning_rate": 6.57706434509315e-05,
      "loss": 0.3969,
      "step": 4333
    },
    {
      "epoch": 1.300720288115246,
      "grad_norm": 0.14196144044399261,
      "learning_rate": 6.572143476645319e-05,
      "loss": 0.3926,
      "step": 4334
    },
    {
      "epoch": 1.3010204081632653,
      "grad_norm": 0.14304772019386292,
      "learning_rate": 6.56722354859062e-05,
      "loss": 0.4159,
      "step": 4335
    },
    {
      "epoch": 1.3013205282112845,
      "grad_norm": 0.12894688546657562,
      "learning_rate": 6.562304562278777e-05,
      "loss": 0.3409,
      "step": 4336
    },
    {
      "epoch": 1.3016206482593038,
      "grad_norm": 0.15281268954277039,
      "learning_rate": 6.557386519059258e-05,
      "loss": 0.4035,
      "step": 4337
    },
    {
      "epoch": 1.301920768307323,
      "grad_norm": 0.13902124762535095,
      "learning_rate": 6.552469420281277e-05,
      "loss": 0.3834,
      "step": 4338
    },
    {
      "epoch": 1.302220888355342,
      "grad_norm": 0.18085238337516785,
      "learning_rate": 6.547553267293773e-05,
      "loss": 0.3709,
      "step": 4339
    },
    {
      "epoch": 1.3025210084033614,
      "grad_norm": 0.130579873919487,
      "learning_rate": 6.542638061445447e-05,
      "loss": 0.3443,
      "step": 4340
    },
    {
      "epoch": 1.3028211284513804,
      "grad_norm": 0.14980711042881012,
      "learning_rate": 6.537723804084721e-05,
      "loss": 0.4067,
      "step": 4341
    },
    {
      "epoch": 1.3031212484993997,
      "grad_norm": 0.1403573751449585,
      "learning_rate": 6.532810496559772e-05,
      "loss": 0.3608,
      "step": 4342
    },
    {
      "epoch": 1.303421368547419,
      "grad_norm": 0.27642616629600525,
      "learning_rate": 6.527898140218507e-05,
      "loss": 0.3434,
      "step": 4343
    },
    {
      "epoch": 1.3037214885954382,
      "grad_norm": 0.1325029730796814,
      "learning_rate": 6.52298673640857e-05,
      "loss": 0.3457,
      "step": 4344
    },
    {
      "epoch": 1.3040216086434575,
      "grad_norm": 0.12115392833948135,
      "learning_rate": 6.518076286477357e-05,
      "loss": 0.3199,
      "step": 4345
    },
    {
      "epoch": 1.3043217286914766,
      "grad_norm": 0.15301509201526642,
      "learning_rate": 6.513166791771987e-05,
      "loss": 0.3954,
      "step": 4346
    },
    {
      "epoch": 1.3046218487394958,
      "grad_norm": 0.13748885691165924,
      "learning_rate": 6.508258253639324e-05,
      "loss": 0.3633,
      "step": 4347
    },
    {
      "epoch": 1.3049219687875149,
      "grad_norm": 0.12777800858020782,
      "learning_rate": 6.503350673425972e-05,
      "loss": 0.3416,
      "step": 4348
    },
    {
      "epoch": 1.3052220888355341,
      "grad_norm": 0.12664203345775604,
      "learning_rate": 6.498444052478268e-05,
      "loss": 0.3366,
      "step": 4349
    },
    {
      "epoch": 1.3055222088835534,
      "grad_norm": 0.15768717229366302,
      "learning_rate": 6.493538392142287e-05,
      "loss": 0.3392,
      "step": 4350
    },
    {
      "epoch": 1.3058223289315727,
      "grad_norm": 0.14314647018909454,
      "learning_rate": 6.488633693763844e-05,
      "loss": 0.3946,
      "step": 4351
    },
    {
      "epoch": 1.306122448979592,
      "grad_norm": 0.22907589375972748,
      "learning_rate": 6.48372995868848e-05,
      "loss": 0.3377,
      "step": 4352
    },
    {
      "epoch": 1.306422569027611,
      "grad_norm": 0.1399524062871933,
      "learning_rate": 6.478827188261484e-05,
      "loss": 0.3965,
      "step": 4353
    },
    {
      "epoch": 1.3067226890756303,
      "grad_norm": 0.14998413622379303,
      "learning_rate": 6.473925383827873e-05,
      "loss": 0.3794,
      "step": 4354
    },
    {
      "epoch": 1.3070228091236495,
      "grad_norm": 0.14053849875926971,
      "learning_rate": 6.469024546732399e-05,
      "loss": 0.3852,
      "step": 4355
    },
    {
      "epoch": 1.3073229291716686,
      "grad_norm": 0.13630418479442596,
      "learning_rate": 6.464124678319554e-05,
      "loss": 0.3831,
      "step": 4356
    },
    {
      "epoch": 1.3076230492196879,
      "grad_norm": 0.14305394887924194,
      "learning_rate": 6.459225779933562e-05,
      "loss": 0.379,
      "step": 4357
    },
    {
      "epoch": 1.3079231692677071,
      "grad_norm": 0.1302955448627472,
      "learning_rate": 6.454327852918372e-05,
      "loss": 0.3586,
      "step": 4358
    },
    {
      "epoch": 1.3082232893157264,
      "grad_norm": 0.12461972236633301,
      "learning_rate": 6.449430898617681e-05,
      "loss": 0.3082,
      "step": 4359
    },
    {
      "epoch": 1.3085234093637454,
      "grad_norm": 0.15870194137096405,
      "learning_rate": 6.444534918374906e-05,
      "loss": 0.3826,
      "step": 4360
    },
    {
      "epoch": 1.3088235294117647,
      "grad_norm": 0.1367909014225006,
      "learning_rate": 6.439639913533212e-05,
      "loss": 0.3797,
      "step": 4361
    },
    {
      "epoch": 1.309123649459784,
      "grad_norm": 0.12880021333694458,
      "learning_rate": 6.434745885435482e-05,
      "loss": 0.3103,
      "step": 4362
    },
    {
      "epoch": 1.309423769507803,
      "grad_norm": 0.14461173117160797,
      "learning_rate": 6.429852835424335e-05,
      "loss": 0.3728,
      "step": 4363
    },
    {
      "epoch": 1.3097238895558223,
      "grad_norm": 0.13231562077999115,
      "learning_rate": 6.424960764842129e-05,
      "loss": 0.3521,
      "step": 4364
    },
    {
      "epoch": 1.3100240096038416,
      "grad_norm": 0.14568877220153809,
      "learning_rate": 6.420069675030941e-05,
      "loss": 0.3915,
      "step": 4365
    },
    {
      "epoch": 1.3103241296518608,
      "grad_norm": 0.14014829695224762,
      "learning_rate": 6.415179567332587e-05,
      "loss": 0.3451,
      "step": 4366
    },
    {
      "epoch": 1.31062424969988,
      "grad_norm": 0.13011689484119415,
      "learning_rate": 6.410290443088613e-05,
      "loss": 0.3352,
      "step": 4367
    },
    {
      "epoch": 1.3109243697478992,
      "grad_norm": 0.13787321746349335,
      "learning_rate": 6.405402303640299e-05,
      "loss": 0.3268,
      "step": 4368
    },
    {
      "epoch": 1.3112244897959184,
      "grad_norm": 0.17194852232933044,
      "learning_rate": 6.400515150328639e-05,
      "loss": 0.4184,
      "step": 4369
    },
    {
      "epoch": 1.3115246098439375,
      "grad_norm": 0.1402439922094345,
      "learning_rate": 6.395628984494378e-05,
      "loss": 0.3786,
      "step": 4370
    },
    {
      "epoch": 1.3118247298919568,
      "grad_norm": 0.14670024812221527,
      "learning_rate": 6.39074380747797e-05,
      "loss": 0.3761,
      "step": 4371
    },
    {
      "epoch": 1.312124849939976,
      "grad_norm": 0.1530454009771347,
      "learning_rate": 6.385859620619619e-05,
      "loss": 0.4419,
      "step": 4372
    },
    {
      "epoch": 1.3124249699879953,
      "grad_norm": 0.14788678288459778,
      "learning_rate": 6.380976425259236e-05,
      "loss": 0.3608,
      "step": 4373
    },
    {
      "epoch": 1.3127250900360143,
      "grad_norm": 0.1467645764350891,
      "learning_rate": 6.376094222736473e-05,
      "loss": 0.4164,
      "step": 4374
    },
    {
      "epoch": 1.3130252100840336,
      "grad_norm": 0.14573605358600616,
      "learning_rate": 6.371213014390706e-05,
      "loss": 0.378,
      "step": 4375
    },
    {
      "epoch": 1.3133253301320529,
      "grad_norm": 0.1537141352891922,
      "learning_rate": 6.366332801561042e-05,
      "loss": 0.415,
      "step": 4376
    },
    {
      "epoch": 1.313625450180072,
      "grad_norm": 0.13787482678890228,
      "learning_rate": 6.361453585586304e-05,
      "loss": 0.3391,
      "step": 4377
    },
    {
      "epoch": 1.3139255702280912,
      "grad_norm": 0.24630169570446014,
      "learning_rate": 6.356575367805054e-05,
      "loss": 0.3776,
      "step": 4378
    },
    {
      "epoch": 1.3142256902761105,
      "grad_norm": 0.14193442463874817,
      "learning_rate": 6.351698149555573e-05,
      "loss": 0.401,
      "step": 4379
    },
    {
      "epoch": 1.3145258103241297,
      "grad_norm": 0.13105009496212006,
      "learning_rate": 6.346821932175873e-05,
      "loss": 0.3651,
      "step": 4380
    },
    {
      "epoch": 1.3148259303721488,
      "grad_norm": 0.12521019577980042,
      "learning_rate": 6.341946717003688e-05,
      "loss": 0.3149,
      "step": 4381
    },
    {
      "epoch": 1.315126050420168,
      "grad_norm": 0.1309904307126999,
      "learning_rate": 6.33707250537647e-05,
      "loss": 0.3532,
      "step": 4382
    },
    {
      "epoch": 1.3154261704681873,
      "grad_norm": 0.15112322568893433,
      "learning_rate": 6.332199298631416e-05,
      "loss": 0.4302,
      "step": 4383
    },
    {
      "epoch": 1.3157262905162064,
      "grad_norm": 0.1337057501077652,
      "learning_rate": 6.327327098105426e-05,
      "loss": 0.3649,
      "step": 4384
    },
    {
      "epoch": 1.3160264105642256,
      "grad_norm": 0.1362207680940628,
      "learning_rate": 6.322455905135129e-05,
      "loss": 0.3599,
      "step": 4385
    },
    {
      "epoch": 1.316326530612245,
      "grad_norm": 0.14857180416584015,
      "learning_rate": 6.317585721056889e-05,
      "loss": 0.4208,
      "step": 4386
    },
    {
      "epoch": 1.3166266506602642,
      "grad_norm": 0.1363288164138794,
      "learning_rate": 6.312716547206782e-05,
      "loss": 0.3787,
      "step": 4387
    },
    {
      "epoch": 1.3169267707082832,
      "grad_norm": 0.13450193405151367,
      "learning_rate": 6.307848384920607e-05,
      "loss": 0.3591,
      "step": 4388
    },
    {
      "epoch": 1.3172268907563025,
      "grad_norm": 0.13663101196289062,
      "learning_rate": 6.302981235533896e-05,
      "loss": 0.3525,
      "step": 4389
    },
    {
      "epoch": 1.3175270108043218,
      "grad_norm": 0.14981935918331146,
      "learning_rate": 6.298115100381882e-05,
      "loss": 0.4148,
      "step": 4390
    },
    {
      "epoch": 1.3178271308523408,
      "grad_norm": 0.14152216911315918,
      "learning_rate": 6.293249980799551e-05,
      "loss": 0.3772,
      "step": 4391
    },
    {
      "epoch": 1.31812725090036,
      "grad_norm": 0.1505315750837326,
      "learning_rate": 6.288385878121582e-05,
      "loss": 0.3849,
      "step": 4392
    },
    {
      "epoch": 1.3184273709483794,
      "grad_norm": 0.12864822149276733,
      "learning_rate": 6.283522793682387e-05,
      "loss": 0.321,
      "step": 4393
    },
    {
      "epoch": 1.3187274909963986,
      "grad_norm": 0.14504168927669525,
      "learning_rate": 6.278660728816097e-05,
      "loss": 0.3738,
      "step": 4394
    },
    {
      "epoch": 1.319027611044418,
      "grad_norm": 0.1445467323064804,
      "learning_rate": 6.273799684856568e-05,
      "loss": 0.3945,
      "step": 4395
    },
    {
      "epoch": 1.319327731092437,
      "grad_norm": 0.14859087765216827,
      "learning_rate": 6.268939663137366e-05,
      "loss": 0.4002,
      "step": 4396
    },
    {
      "epoch": 1.3196278511404562,
      "grad_norm": 0.13341623544692993,
      "learning_rate": 6.264080664991785e-05,
      "loss": 0.3408,
      "step": 4397
    },
    {
      "epoch": 1.3199279711884753,
      "grad_norm": 0.13407118618488312,
      "learning_rate": 6.259222691752837e-05,
      "loss": 0.3403,
      "step": 4398
    },
    {
      "epoch": 1.3202280912364945,
      "grad_norm": 0.1552957147359848,
      "learning_rate": 6.254365744753246e-05,
      "loss": 0.4055,
      "step": 4399
    },
    {
      "epoch": 1.3205282112845138,
      "grad_norm": 0.17686431109905243,
      "learning_rate": 6.249509825325467e-05,
      "loss": 0.3901,
      "step": 4400
    },
    {
      "epoch": 1.320828331332533,
      "grad_norm": 0.7685422897338867,
      "learning_rate": 6.24465493480166e-05,
      "loss": 0.3732,
      "step": 4401
    },
    {
      "epoch": 1.3211284513805523,
      "grad_norm": 0.14606846868991852,
      "learning_rate": 6.239801074513714e-05,
      "loss": 0.4273,
      "step": 4402
    },
    {
      "epoch": 1.3214285714285714,
      "grad_norm": 0.14612948894500732,
      "learning_rate": 6.234948245793224e-05,
      "loss": 0.3866,
      "step": 4403
    },
    {
      "epoch": 1.3217286914765907,
      "grad_norm": 0.1363631933927536,
      "learning_rate": 6.230096449971509e-05,
      "loss": 0.3642,
      "step": 4404
    },
    {
      "epoch": 1.3220288115246097,
      "grad_norm": 0.13283447921276093,
      "learning_rate": 6.225245688379607e-05,
      "loss": 0.3577,
      "step": 4405
    },
    {
      "epoch": 1.322328931572629,
      "grad_norm": 0.13503074645996094,
      "learning_rate": 6.220395962348266e-05,
      "loss": 0.3462,
      "step": 4406
    },
    {
      "epoch": 1.3226290516206483,
      "grad_norm": 0.2346821129322052,
      "learning_rate": 6.215547273207953e-05,
      "loss": 0.3796,
      "step": 4407
    },
    {
      "epoch": 1.3229291716686675,
      "grad_norm": 0.1385582536458969,
      "learning_rate": 6.210699622288853e-05,
      "loss": 0.3531,
      "step": 4408
    },
    {
      "epoch": 1.3232292917166868,
      "grad_norm": 0.15664862096309662,
      "learning_rate": 6.205853010920857e-05,
      "loss": 0.4227,
      "step": 4409
    },
    {
      "epoch": 1.3235294117647058,
      "grad_norm": 0.151127889752388,
      "learning_rate": 6.201007440433588e-05,
      "loss": 0.368,
      "step": 4410
    },
    {
      "epoch": 1.3238295318127251,
      "grad_norm": 0.14864672720432281,
      "learning_rate": 6.196162912156363e-05,
      "loss": 0.3827,
      "step": 4411
    },
    {
      "epoch": 1.3241296518607442,
      "grad_norm": 0.1425596922636032,
      "learning_rate": 6.191319427418225e-05,
      "loss": 0.368,
      "step": 4412
    },
    {
      "epoch": 1.3244297719087634,
      "grad_norm": 0.14225400984287262,
      "learning_rate": 6.18647698754793e-05,
      "loss": 0.3683,
      "step": 4413
    },
    {
      "epoch": 1.3247298919567827,
      "grad_norm": 0.14955322444438934,
      "learning_rate": 6.18163559387395e-05,
      "loss": 0.3813,
      "step": 4414
    },
    {
      "epoch": 1.325030012004802,
      "grad_norm": 0.15107311308383942,
      "learning_rate": 6.176795247724452e-05,
      "loss": 0.3703,
      "step": 4415
    },
    {
      "epoch": 1.3253301320528212,
      "grad_norm": 0.1335255354642868,
      "learning_rate": 6.171955950427346e-05,
      "loss": 0.3483,
      "step": 4416
    },
    {
      "epoch": 1.3256302521008403,
      "grad_norm": 0.1327674239873886,
      "learning_rate": 6.167117703310229e-05,
      "loss": 0.3497,
      "step": 4417
    },
    {
      "epoch": 1.3259303721488596,
      "grad_norm": 0.14339998364448547,
      "learning_rate": 6.162280507700418e-05,
      "loss": 0.3877,
      "step": 4418
    },
    {
      "epoch": 1.3262304921968788,
      "grad_norm": 0.1410233974456787,
      "learning_rate": 6.157444364924945e-05,
      "loss": 0.3739,
      "step": 4419
    },
    {
      "epoch": 1.3265306122448979,
      "grad_norm": 0.1621217131614685,
      "learning_rate": 6.152609276310549e-05,
      "loss": 0.3469,
      "step": 4420
    },
    {
      "epoch": 1.3268307322929171,
      "grad_norm": 0.13499078154563904,
      "learning_rate": 6.147775243183684e-05,
      "loss": 0.3543,
      "step": 4421
    },
    {
      "epoch": 1.3271308523409364,
      "grad_norm": 0.1349930465221405,
      "learning_rate": 6.142942266870509e-05,
      "loss": 0.3512,
      "step": 4422
    },
    {
      "epoch": 1.3274309723889557,
      "grad_norm": 0.1554274559020996,
      "learning_rate": 6.138110348696893e-05,
      "loss": 0.3829,
      "step": 4423
    },
    {
      "epoch": 1.3277310924369747,
      "grad_norm": 0.1325419694185257,
      "learning_rate": 6.133279489988421e-05,
      "loss": 0.3456,
      "step": 4424
    },
    {
      "epoch": 1.328031212484994,
      "grad_norm": 0.1462012231349945,
      "learning_rate": 6.128449692070384e-05,
      "loss": 0.3833,
      "step": 4425
    },
    {
      "epoch": 1.3283313325330133,
      "grad_norm": 0.15544404089450836,
      "learning_rate": 6.123620956267778e-05,
      "loss": 0.4109,
      "step": 4426
    },
    {
      "epoch": 1.3286314525810323,
      "grad_norm": 0.1198524534702301,
      "learning_rate": 6.118793283905319e-05,
      "loss": 0.3189,
      "step": 4427
    },
    {
      "epoch": 1.3289315726290516,
      "grad_norm": 0.13243626058101654,
      "learning_rate": 6.113966676307414e-05,
      "loss": 0.3354,
      "step": 4428
    },
    {
      "epoch": 1.3292316926770709,
      "grad_norm": 0.14919179677963257,
      "learning_rate": 6.109141134798194e-05,
      "loss": 0.4128,
      "step": 4429
    },
    {
      "epoch": 1.3295318127250901,
      "grad_norm": 0.1565873920917511,
      "learning_rate": 6.104316660701485e-05,
      "loss": 0.3819,
      "step": 4430
    },
    {
      "epoch": 1.3298319327731092,
      "grad_norm": 0.16061201691627502,
      "learning_rate": 6.099493255340832e-05,
      "loss": 0.406,
      "step": 4431
    },
    {
      "epoch": 1.3301320528211285,
      "grad_norm": 0.14444883167743683,
      "learning_rate": 6.0946709200394804e-05,
      "loss": 0.3822,
      "step": 4432
    },
    {
      "epoch": 1.3304321728691477,
      "grad_norm": 0.1577412635087967,
      "learning_rate": 6.089849656120383e-05,
      "loss": 0.3619,
      "step": 4433
    },
    {
      "epoch": 1.3307322929171668,
      "grad_norm": 0.16566117107868195,
      "learning_rate": 6.085029464906189e-05,
      "loss": 0.3786,
      "step": 4434
    },
    {
      "epoch": 1.331032412965186,
      "grad_norm": 0.14696945250034332,
      "learning_rate": 6.0802103477192775e-05,
      "loss": 0.4221,
      "step": 4435
    },
    {
      "epoch": 1.3313325330132053,
      "grad_norm": 0.14580310881137848,
      "learning_rate": 6.0753923058817084e-05,
      "loss": 0.3926,
      "step": 4436
    },
    {
      "epoch": 1.3316326530612246,
      "grad_norm": 0.1414540857076645,
      "learning_rate": 6.0705753407152565e-05,
      "loss": 0.3649,
      "step": 4437
    },
    {
      "epoch": 1.3319327731092436,
      "grad_norm": 0.1575811207294464,
      "learning_rate": 6.065759453541404e-05,
      "loss": 0.3583,
      "step": 4438
    },
    {
      "epoch": 1.332232893157263,
      "grad_norm": 0.1364908516407013,
      "learning_rate": 6.06094464568133e-05,
      "loss": 0.3741,
      "step": 4439
    },
    {
      "epoch": 1.3325330132052822,
      "grad_norm": 0.14941635727882385,
      "learning_rate": 6.056130918455929e-05,
      "loss": 0.4276,
      "step": 4440
    },
    {
      "epoch": 1.3328331332533012,
      "grad_norm": 0.1328786164522171,
      "learning_rate": 6.0513182731857886e-05,
      "loss": 0.3223,
      "step": 4441
    },
    {
      "epoch": 1.3331332533013205,
      "grad_norm": 0.14873063564300537,
      "learning_rate": 6.0465067111912e-05,
      "loss": 0.3706,
      "step": 4442
    },
    {
      "epoch": 1.3334333733493398,
      "grad_norm": 0.14439992606639862,
      "learning_rate": 6.041696233792162e-05,
      "loss": 0.3943,
      "step": 4443
    },
    {
      "epoch": 1.333733493397359,
      "grad_norm": 0.2462586611509323,
      "learning_rate": 6.0368868423083745e-05,
      "loss": 0.3878,
      "step": 4444
    },
    {
      "epoch": 1.334033613445378,
      "grad_norm": 0.15387386083602905,
      "learning_rate": 6.032078538059236e-05,
      "loss": 0.4222,
      "step": 4445
    },
    {
      "epoch": 1.3343337334933973,
      "grad_norm": 0.1518174707889557,
      "learning_rate": 6.0272713223638564e-05,
      "loss": 0.4005,
      "step": 4446
    },
    {
      "epoch": 1.3346338535414166,
      "grad_norm": 0.15900781750679016,
      "learning_rate": 6.022465196541035e-05,
      "loss": 0.4209,
      "step": 4447
    },
    {
      "epoch": 1.3349339735894357,
      "grad_norm": 0.14064718782901764,
      "learning_rate": 6.0176601619092754e-05,
      "loss": 0.3659,
      "step": 4448
    },
    {
      "epoch": 1.335234093637455,
      "grad_norm": 0.14249639213085175,
      "learning_rate": 6.012856219786789e-05,
      "loss": 0.3847,
      "step": 4449
    },
    {
      "epoch": 1.3355342136854742,
      "grad_norm": 0.2270103543996811,
      "learning_rate": 6.0080533714914766e-05,
      "loss": 0.4128,
      "step": 4450
    },
    {
      "epoch": 1.3358343337334935,
      "grad_norm": 0.15802806615829468,
      "learning_rate": 6.00325161834095e-05,
      "loss": 0.4154,
      "step": 4451
    },
    {
      "epoch": 1.3361344537815127,
      "grad_norm": 0.13083720207214355,
      "learning_rate": 5.9984509616525154e-05,
      "loss": 0.3481,
      "step": 4452
    },
    {
      "epoch": 1.3364345738295318,
      "grad_norm": 0.1440601795911789,
      "learning_rate": 5.99365140274317e-05,
      "loss": 0.3868,
      "step": 4453
    },
    {
      "epoch": 1.336734693877551,
      "grad_norm": 0.1382712870836258,
      "learning_rate": 5.988852942929628e-05,
      "loss": 0.3479,
      "step": 4454
    },
    {
      "epoch": 1.33703481392557,
      "grad_norm": 0.1510000079870224,
      "learning_rate": 5.984055583528285e-05,
      "loss": 0.3897,
      "step": 4455
    },
    {
      "epoch": 1.3373349339735894,
      "grad_norm": 0.14380814135074615,
      "learning_rate": 5.979259325855242e-05,
      "loss": 0.4107,
      "step": 4456
    },
    {
      "epoch": 1.3376350540216086,
      "grad_norm": 0.13862638175487518,
      "learning_rate": 5.974464171226301e-05,
      "loss": 0.3668,
      "step": 4457
    },
    {
      "epoch": 1.337935174069628,
      "grad_norm": 0.1261635273694992,
      "learning_rate": 5.969670120956956e-05,
      "loss": 0.3117,
      "step": 4458
    },
    {
      "epoch": 1.3382352941176472,
      "grad_norm": 0.16036823391914368,
      "learning_rate": 5.9648771763623944e-05,
      "loss": 0.3357,
      "step": 4459
    },
    {
      "epoch": 1.3385354141656662,
      "grad_norm": 0.1365584433078766,
      "learning_rate": 5.9600853387575163e-05,
      "loss": 0.326,
      "step": 4460
    },
    {
      "epoch": 1.3388355342136855,
      "grad_norm": 0.1372981071472168,
      "learning_rate": 5.9552946094568975e-05,
      "loss": 0.3527,
      "step": 4461
    },
    {
      "epoch": 1.3391356542617046,
      "grad_norm": 0.13922113180160522,
      "learning_rate": 5.950504989774825e-05,
      "loss": 0.3296,
      "step": 4462
    },
    {
      "epoch": 1.3394357743097238,
      "grad_norm": 0.1392875760793686,
      "learning_rate": 5.945716481025275e-05,
      "loss": 0.3425,
      "step": 4463
    },
    {
      "epoch": 1.339735894357743,
      "grad_norm": 0.1342422068119049,
      "learning_rate": 5.940929084521918e-05,
      "loss": 0.3487,
      "step": 4464
    },
    {
      "epoch": 1.3400360144057624,
      "grad_norm": 0.13472548127174377,
      "learning_rate": 5.9361428015781275e-05,
      "loss": 0.3389,
      "step": 4465
    },
    {
      "epoch": 1.3403361344537816,
      "grad_norm": 0.1356363594532013,
      "learning_rate": 5.931357633506957e-05,
      "loss": 0.3432,
      "step": 4466
    },
    {
      "epoch": 1.3406362545018007,
      "grad_norm": 0.13877129554748535,
      "learning_rate": 5.926573581621167e-05,
      "loss": 0.3675,
      "step": 4467
    },
    {
      "epoch": 1.34093637454982,
      "grad_norm": 0.1482432633638382,
      "learning_rate": 5.921790647233205e-05,
      "loss": 0.3665,
      "step": 4468
    },
    {
      "epoch": 1.341236494597839,
      "grad_norm": 0.13379403948783875,
      "learning_rate": 5.9170088316552176e-05,
      "loss": 0.363,
      "step": 4469
    },
    {
      "epoch": 1.3415366146458583,
      "grad_norm": 0.14877451956272125,
      "learning_rate": 5.912228136199038e-05,
      "loss": 0.3882,
      "step": 4470
    },
    {
      "epoch": 1.3418367346938775,
      "grad_norm": 0.1336432844400406,
      "learning_rate": 5.907448562176201e-05,
      "loss": 0.3507,
      "step": 4471
    },
    {
      "epoch": 1.3421368547418968,
      "grad_norm": 0.3251681327819824,
      "learning_rate": 5.902670110897917e-05,
      "loss": 0.3442,
      "step": 4472
    },
    {
      "epoch": 1.342436974789916,
      "grad_norm": 0.13489368557929993,
      "learning_rate": 5.89789278367511e-05,
      "loss": 0.3294,
      "step": 4473
    },
    {
      "epoch": 1.3427370948379351,
      "grad_norm": 0.1663886308670044,
      "learning_rate": 5.8931165818183784e-05,
      "loss": 0.3603,
      "step": 4474
    },
    {
      "epoch": 1.3430372148859544,
      "grad_norm": 0.14255166053771973,
      "learning_rate": 5.888341506638021e-05,
      "loss": 0.371,
      "step": 4475
    },
    {
      "epoch": 1.3433373349339737,
      "grad_norm": 0.13316510617733002,
      "learning_rate": 5.8835675594440256e-05,
      "loss": 0.3443,
      "step": 4476
    },
    {
      "epoch": 1.3436374549819927,
      "grad_norm": 0.1348668932914734,
      "learning_rate": 5.87879474154607e-05,
      "loss": 0.3486,
      "step": 4477
    },
    {
      "epoch": 1.343937575030012,
      "grad_norm": 0.1510106921195984,
      "learning_rate": 5.874023054253516e-05,
      "loss": 0.41,
      "step": 4478
    },
    {
      "epoch": 1.3442376950780313,
      "grad_norm": 0.1550380438566208,
      "learning_rate": 5.869252498875432e-05,
      "loss": 0.4148,
      "step": 4479
    },
    {
      "epoch": 1.3445378151260505,
      "grad_norm": 0.1395397037267685,
      "learning_rate": 5.864483076720555e-05,
      "loss": 0.37,
      "step": 4480
    },
    {
      "epoch": 1.3448379351740696,
      "grad_norm": 0.15722878277301788,
      "learning_rate": 5.859714789097328e-05,
      "loss": 0.4301,
      "step": 4481
    },
    {
      "epoch": 1.3451380552220888,
      "grad_norm": 0.14296914637088776,
      "learning_rate": 5.854947637313872e-05,
      "loss": 0.367,
      "step": 4482
    },
    {
      "epoch": 1.3454381752701081,
      "grad_norm": 0.14038380980491638,
      "learning_rate": 5.8501816226780014e-05,
      "loss": 0.3523,
      "step": 4483
    },
    {
      "epoch": 1.3457382953181272,
      "grad_norm": 0.16099123656749725,
      "learning_rate": 5.845416746497221e-05,
      "loss": 0.3836,
      "step": 4484
    },
    {
      "epoch": 1.3460384153661464,
      "grad_norm": 0.16637754440307617,
      "learning_rate": 5.8406530100787196e-05,
      "loss": 0.4044,
      "step": 4485
    },
    {
      "epoch": 1.3463385354141657,
      "grad_norm": 0.1434050351381302,
      "learning_rate": 5.835890414729366e-05,
      "loss": 0.3565,
      "step": 4486
    },
    {
      "epoch": 1.346638655462185,
      "grad_norm": 0.13820092380046844,
      "learning_rate": 5.831128961755734e-05,
      "loss": 0.3659,
      "step": 4487
    },
    {
      "epoch": 1.346938775510204,
      "grad_norm": 0.13020813465118408,
      "learning_rate": 5.8263686524640604e-05,
      "loss": 0.3321,
      "step": 4488
    },
    {
      "epoch": 1.3472388955582233,
      "grad_norm": 0.13537994027137756,
      "learning_rate": 5.821609488160298e-05,
      "loss": 0.3601,
      "step": 4489
    },
    {
      "epoch": 1.3475390156062426,
      "grad_norm": 0.1363266408443451,
      "learning_rate": 5.81685147015006e-05,
      "loss": 0.351,
      "step": 4490
    },
    {
      "epoch": 1.3478391356542616,
      "grad_norm": 0.1465909630060196,
      "learning_rate": 5.81209459973865e-05,
      "loss": 0.3895,
      "step": 4491
    },
    {
      "epoch": 1.3481392557022809,
      "grad_norm": 0.1680755615234375,
      "learning_rate": 5.8073388782310664e-05,
      "loss": 0.3673,
      "step": 4492
    },
    {
      "epoch": 1.3484393757503002,
      "grad_norm": 0.1574047952890396,
      "learning_rate": 5.802584306931991e-05,
      "loss": 0.3667,
      "step": 4493
    },
    {
      "epoch": 1.3487394957983194,
      "grad_norm": 0.14274361729621887,
      "learning_rate": 5.7978308871457754e-05,
      "loss": 0.3709,
      "step": 4494
    },
    {
      "epoch": 1.3490396158463385,
      "grad_norm": 0.14271649718284607,
      "learning_rate": 5.793078620176475e-05,
      "loss": 0.3577,
      "step": 4495
    },
    {
      "epoch": 1.3493397358943577,
      "grad_norm": 0.14356018602848053,
      "learning_rate": 5.788327507327814e-05,
      "loss": 0.3825,
      "step": 4496
    },
    {
      "epoch": 1.349639855942377,
      "grad_norm": 0.16204307973384857,
      "learning_rate": 5.7835775499032074e-05,
      "loss": 0.4131,
      "step": 4497
    },
    {
      "epoch": 1.349939975990396,
      "grad_norm": 0.156327024102211,
      "learning_rate": 5.778828749205756e-05,
      "loss": 0.4101,
      "step": 4498
    },
    {
      "epoch": 1.3502400960384153,
      "grad_norm": 0.137571319937706,
      "learning_rate": 5.7740811065382295e-05,
      "loss": 0.3685,
      "step": 4499
    },
    {
      "epoch": 1.3505402160864346,
      "grad_norm": 0.12720349431037903,
      "learning_rate": 5.769334623203095e-05,
      "loss": 0.3303,
      "step": 4500
    },
    {
      "epoch": 1.3508403361344539,
      "grad_norm": 0.15204890072345734,
      "learning_rate": 5.764589300502501e-05,
      "loss": 0.3668,
      "step": 4501
    },
    {
      "epoch": 1.351140456182473,
      "grad_norm": 0.22256812453269958,
      "learning_rate": 5.7598451397382614e-05,
      "loss": 0.3871,
      "step": 4502
    },
    {
      "epoch": 1.3514405762304922,
      "grad_norm": 0.15941122174263,
      "learning_rate": 5.755102142211892e-05,
      "loss": 0.4042,
      "step": 4503
    },
    {
      "epoch": 1.3517406962785115,
      "grad_norm": 0.1835334300994873,
      "learning_rate": 5.7503603092245714e-05,
      "loss": 0.3371,
      "step": 4504
    },
    {
      "epoch": 1.3520408163265305,
      "grad_norm": 0.14911781251430511,
      "learning_rate": 5.745619642077171e-05,
      "loss": 0.3893,
      "step": 4505
    },
    {
      "epoch": 1.3523409363745498,
      "grad_norm": 0.20111630856990814,
      "learning_rate": 5.740880142070242e-05,
      "loss": 0.4041,
      "step": 4506
    },
    {
      "epoch": 1.352641056422569,
      "grad_norm": 0.14431169629096985,
      "learning_rate": 5.736141810504009e-05,
      "loss": 0.3991,
      "step": 4507
    },
    {
      "epoch": 1.3529411764705883,
      "grad_norm": 0.13495999574661255,
      "learning_rate": 5.731404648678374e-05,
      "loss": 0.3401,
      "step": 4508
    },
    {
      "epoch": 1.3532412965186074,
      "grad_norm": 0.175273135304451,
      "learning_rate": 5.7266686578929286e-05,
      "loss": 0.3792,
      "step": 4509
    },
    {
      "epoch": 1.3535414165666266,
      "grad_norm": 0.14945167303085327,
      "learning_rate": 5.7219338394469356e-05,
      "loss": 0.3717,
      "step": 4510
    },
    {
      "epoch": 1.353841536614646,
      "grad_norm": 0.157761350274086,
      "learning_rate": 5.7172001946393426e-05,
      "loss": 0.3739,
      "step": 4511
    },
    {
      "epoch": 1.354141656662665,
      "grad_norm": 0.13628393411636353,
      "learning_rate": 5.712467724768766e-05,
      "loss": 0.3557,
      "step": 4512
    },
    {
      "epoch": 1.3544417767106842,
      "grad_norm": 0.16030484437942505,
      "learning_rate": 5.7077364311335e-05,
      "loss": 0.4011,
      "step": 4513
    },
    {
      "epoch": 1.3547418967587035,
      "grad_norm": 0.17973542213439941,
      "learning_rate": 5.703006315031534e-05,
      "loss": 0.3889,
      "step": 4514
    },
    {
      "epoch": 1.3550420168067228,
      "grad_norm": 0.13913069665431976,
      "learning_rate": 5.6982773777605125e-05,
      "loss": 0.3828,
      "step": 4515
    },
    {
      "epoch": 1.355342136854742,
      "grad_norm": 0.1738550066947937,
      "learning_rate": 5.693549620617764e-05,
      "loss": 0.3889,
      "step": 4516
    },
    {
      "epoch": 1.355642256902761,
      "grad_norm": 0.14184638857841492,
      "learning_rate": 5.6888230449002954e-05,
      "loss": 0.3882,
      "step": 4517
    },
    {
      "epoch": 1.3559423769507803,
      "grad_norm": 0.14063705503940582,
      "learning_rate": 5.684097651904791e-05,
      "loss": 0.3561,
      "step": 4518
    },
    {
      "epoch": 1.3562424969987994,
      "grad_norm": 0.15086719393730164,
      "learning_rate": 5.67937344292761e-05,
      "loss": 0.3944,
      "step": 4519
    },
    {
      "epoch": 1.3565426170468187,
      "grad_norm": 0.13202974200248718,
      "learning_rate": 5.674650419264782e-05,
      "loss": 0.3302,
      "step": 4520
    },
    {
      "epoch": 1.356842737094838,
      "grad_norm": 0.14360421895980835,
      "learning_rate": 5.6699285822120116e-05,
      "loss": 0.3883,
      "step": 4521
    },
    {
      "epoch": 1.3571428571428572,
      "grad_norm": 0.15063901245594025,
      "learning_rate": 5.6652079330646834e-05,
      "loss": 0.3897,
      "step": 4522
    },
    {
      "epoch": 1.3574429771908765,
      "grad_norm": 0.14402541518211365,
      "learning_rate": 5.660488473117857e-05,
      "loss": 0.3493,
      "step": 4523
    },
    {
      "epoch": 1.3577430972388955,
      "grad_norm": 0.14424912631511688,
      "learning_rate": 5.6557702036662555e-05,
      "loss": 0.3862,
      "step": 4524
    },
    {
      "epoch": 1.3580432172869148,
      "grad_norm": 0.14334195852279663,
      "learning_rate": 5.651053126004284e-05,
      "loss": 0.3907,
      "step": 4525
    },
    {
      "epoch": 1.3583433373349338,
      "grad_norm": 0.13699807226657867,
      "learning_rate": 5.646337241426024e-05,
      "loss": 0.3263,
      "step": 4526
    },
    {
      "epoch": 1.3586434573829531,
      "grad_norm": 0.13770411908626556,
      "learning_rate": 5.6416225512252166e-05,
      "loss": 0.3814,
      "step": 4527
    },
    {
      "epoch": 1.3589435774309724,
      "grad_norm": 0.16407433152198792,
      "learning_rate": 5.63690905669529e-05,
      "loss": 0.3604,
      "step": 4528
    },
    {
      "epoch": 1.3592436974789917,
      "grad_norm": 0.13977479934692383,
      "learning_rate": 5.6321967591293314e-05,
      "loss": 0.3662,
      "step": 4529
    },
    {
      "epoch": 1.359543817527011,
      "grad_norm": 0.14484171569347382,
      "learning_rate": 5.6274856598201066e-05,
      "loss": 0.3747,
      "step": 4530
    },
    {
      "epoch": 1.35984393757503,
      "grad_norm": 0.14661836624145508,
      "learning_rate": 5.622775760060057e-05,
      "loss": 0.3541,
      "step": 4531
    },
    {
      "epoch": 1.3601440576230492,
      "grad_norm": 0.1281379908323288,
      "learning_rate": 5.618067061141283e-05,
      "loss": 0.2959,
      "step": 4532
    },
    {
      "epoch": 1.3604441776710683,
      "grad_norm": 0.13950206339359283,
      "learning_rate": 5.613359564355569e-05,
      "loss": 0.3515,
      "step": 4533
    },
    {
      "epoch": 1.3607442977190876,
      "grad_norm": 0.1432461142539978,
      "learning_rate": 5.608653270994353e-05,
      "loss": 0.3415,
      "step": 4534
    },
    {
      "epoch": 1.3610444177671068,
      "grad_norm": 0.14423154294490814,
      "learning_rate": 5.6039481823487606e-05,
      "loss": 0.3798,
      "step": 4535
    },
    {
      "epoch": 1.361344537815126,
      "grad_norm": 0.1409810185432434,
      "learning_rate": 5.599244299709578e-05,
      "loss": 0.36,
      "step": 4536
    },
    {
      "epoch": 1.3616446578631454,
      "grad_norm": 0.13379138708114624,
      "learning_rate": 5.594541624367262e-05,
      "loss": 0.3241,
      "step": 4537
    },
    {
      "epoch": 1.3619447779111644,
      "grad_norm": 0.13153435289859772,
      "learning_rate": 5.589840157611929e-05,
      "loss": 0.3737,
      "step": 4538
    },
    {
      "epoch": 1.3622448979591837,
      "grad_norm": 0.1384473741054535,
      "learning_rate": 5.585139900733385e-05,
      "loss": 0.3801,
      "step": 4539
    },
    {
      "epoch": 1.362545018007203,
      "grad_norm": 0.14458291232585907,
      "learning_rate": 5.580440855021083e-05,
      "loss": 0.3984,
      "step": 4540
    },
    {
      "epoch": 1.362845138055222,
      "grad_norm": 0.14624524116516113,
      "learning_rate": 5.575743021764159e-05,
      "loss": 0.3836,
      "step": 4541
    },
    {
      "epoch": 1.3631452581032413,
      "grad_norm": 0.14253798127174377,
      "learning_rate": 5.571046402251401e-05,
      "loss": 0.375,
      "step": 4542
    },
    {
      "epoch": 1.3634453781512605,
      "grad_norm": 0.13562585413455963,
      "learning_rate": 5.566350997771279e-05,
      "loss": 0.3354,
      "step": 4543
    },
    {
      "epoch": 1.3637454981992798,
      "grad_norm": 0.19224855303764343,
      "learning_rate": 5.561656809611925e-05,
      "loss": 0.3896,
      "step": 4544
    },
    {
      "epoch": 1.3640456182472989,
      "grad_norm": 0.15661108493804932,
      "learning_rate": 5.556963839061133e-05,
      "loss": 0.3606,
      "step": 4545
    },
    {
      "epoch": 1.3643457382953181,
      "grad_norm": 0.1251692771911621,
      "learning_rate": 5.55227208740636e-05,
      "loss": 0.3294,
      "step": 4546
    },
    {
      "epoch": 1.3646458583433374,
      "grad_norm": 0.22994621098041534,
      "learning_rate": 5.547581555934742e-05,
      "loss": 0.3472,
      "step": 4547
    },
    {
      "epoch": 1.3649459783913565,
      "grad_norm": 0.1264163851737976,
      "learning_rate": 5.542892245933069e-05,
      "loss": 0.3172,
      "step": 4548
    },
    {
      "epoch": 1.3652460984393757,
      "grad_norm": 0.12400945276021957,
      "learning_rate": 5.538204158687803e-05,
      "loss": 0.3014,
      "step": 4549
    },
    {
      "epoch": 1.365546218487395,
      "grad_norm": 0.1770714521408081,
      "learning_rate": 5.533517295485062e-05,
      "loss": 0.3442,
      "step": 4550
    },
    {
      "epoch": 1.3658463385354143,
      "grad_norm": 0.14922089874744415,
      "learning_rate": 5.5288316576106357e-05,
      "loss": 0.3843,
      "step": 4551
    },
    {
      "epoch": 1.3661464585834333,
      "grad_norm": 0.14821475744247437,
      "learning_rate": 5.524147246349979e-05,
      "loss": 0.3758,
      "step": 4552
    },
    {
      "epoch": 1.3664465786314526,
      "grad_norm": 0.137704998254776,
      "learning_rate": 5.519464062988202e-05,
      "loss": 0.3639,
      "step": 4553
    },
    {
      "epoch": 1.3667466986794718,
      "grad_norm": 0.15100188553333282,
      "learning_rate": 5.514782108810079e-05,
      "loss": 0.3729,
      "step": 4554
    },
    {
      "epoch": 1.367046818727491,
      "grad_norm": 0.14446976780891418,
      "learning_rate": 5.5101013851000547e-05,
      "loss": 0.3599,
      "step": 4555
    },
    {
      "epoch": 1.3673469387755102,
      "grad_norm": 0.1340705007314682,
      "learning_rate": 5.505421893142235e-05,
      "loss": 0.3555,
      "step": 4556
    },
    {
      "epoch": 1.3676470588235294,
      "grad_norm": 0.14943592250347137,
      "learning_rate": 5.500743634220379e-05,
      "loss": 0.3818,
      "step": 4557
    },
    {
      "epoch": 1.3679471788715487,
      "grad_norm": 0.13905028998851776,
      "learning_rate": 5.496066609617918e-05,
      "loss": 0.3572,
      "step": 4558
    },
    {
      "epoch": 1.3682472989195678,
      "grad_norm": 0.14025895297527313,
      "learning_rate": 5.4913908206179323e-05,
      "loss": 0.3827,
      "step": 4559
    },
    {
      "epoch": 1.368547418967587,
      "grad_norm": 0.14170074462890625,
      "learning_rate": 5.486716268503182e-05,
      "loss": 0.3596,
      "step": 4560
    },
    {
      "epoch": 1.3688475390156063,
      "grad_norm": 0.15285390615463257,
      "learning_rate": 5.482042954556073e-05,
      "loss": 0.3543,
      "step": 4561
    },
    {
      "epoch": 1.3691476590636253,
      "grad_norm": 0.172708198428154,
      "learning_rate": 5.4773708800586684e-05,
      "loss": 0.3804,
      "step": 4562
    },
    {
      "epoch": 1.3694477791116446,
      "grad_norm": 0.13485442101955414,
      "learning_rate": 5.4727000462927046e-05,
      "loss": 0.3591,
      "step": 4563
    },
    {
      "epoch": 1.3697478991596639,
      "grad_norm": 0.16145791113376617,
      "learning_rate": 5.468030454539574e-05,
      "loss": 0.3517,
      "step": 4564
    },
    {
      "epoch": 1.3700480192076832,
      "grad_norm": 0.1492297351360321,
      "learning_rate": 5.4633621060803185e-05,
      "loss": 0.3921,
      "step": 4565
    },
    {
      "epoch": 1.3703481392557022,
      "grad_norm": 0.14852483570575714,
      "learning_rate": 5.458695002195655e-05,
      "loss": 0.3309,
      "step": 4566
    },
    {
      "epoch": 1.3706482593037215,
      "grad_norm": 0.14032091200351715,
      "learning_rate": 5.4540291441659376e-05,
      "loss": 0.3533,
      "step": 4567
    },
    {
      "epoch": 1.3709483793517407,
      "grad_norm": 0.1701730489730835,
      "learning_rate": 5.449364533271199e-05,
      "loss": 0.3812,
      "step": 4568
    },
    {
      "epoch": 1.3712484993997598,
      "grad_norm": 0.1463678479194641,
      "learning_rate": 5.444701170791125e-05,
      "loss": 0.4203,
      "step": 4569
    },
    {
      "epoch": 1.371548619447779,
      "grad_norm": 0.1414550095796585,
      "learning_rate": 5.440039058005047e-05,
      "loss": 0.397,
      "step": 4570
    },
    {
      "epoch": 1.3718487394957983,
      "grad_norm": 0.1401282399892807,
      "learning_rate": 5.4353781961919694e-05,
      "loss": 0.3497,
      "step": 4571
    },
    {
      "epoch": 1.3721488595438176,
      "grad_norm": 0.14443713426589966,
      "learning_rate": 5.4307185866305386e-05,
      "loss": 0.3754,
      "step": 4572
    },
    {
      "epoch": 1.3724489795918369,
      "grad_norm": 0.13361041247844696,
      "learning_rate": 5.4260602305990705e-05,
      "loss": 0.3557,
      "step": 4573
    },
    {
      "epoch": 1.372749099639856,
      "grad_norm": 0.1520872563123703,
      "learning_rate": 5.4214031293755354e-05,
      "loss": 0.3785,
      "step": 4574
    },
    {
      "epoch": 1.3730492196878752,
      "grad_norm": 0.12615153193473816,
      "learning_rate": 5.416747284237544e-05,
      "loss": 0.2981,
      "step": 4575
    },
    {
      "epoch": 1.3733493397358942,
      "grad_norm": 0.17696449160575867,
      "learning_rate": 5.412092696462383e-05,
      "loss": 0.4158,
      "step": 4576
    },
    {
      "epoch": 1.3736494597839135,
      "grad_norm": 0.1244591623544693,
      "learning_rate": 5.407439367326988e-05,
      "loss": 0.2827,
      "step": 4577
    },
    {
      "epoch": 1.3739495798319328,
      "grad_norm": 0.13495191931724548,
      "learning_rate": 5.402787298107936e-05,
      "loss": 0.361,
      "step": 4578
    },
    {
      "epoch": 1.374249699879952,
      "grad_norm": 0.15213558077812195,
      "learning_rate": 5.398136490081479e-05,
      "loss": 0.3548,
      "step": 4579
    },
    {
      "epoch": 1.3745498199279713,
      "grad_norm": 0.14480598270893097,
      "learning_rate": 5.393486944523505e-05,
      "loss": 0.3635,
      "step": 4580
    },
    {
      "epoch": 1.3748499399759904,
      "grad_norm": 0.14267009496688843,
      "learning_rate": 5.388838662709566e-05,
      "loss": 0.3674,
      "step": 4581
    },
    {
      "epoch": 1.3751500600240096,
      "grad_norm": 0.1517316848039627,
      "learning_rate": 5.384191645914869e-05,
      "loss": 0.41,
      "step": 4582
    },
    {
      "epoch": 1.3754501800720287,
      "grad_norm": 0.14075906574726105,
      "learning_rate": 5.3795458954142664e-05,
      "loss": 0.3578,
      "step": 4583
    },
    {
      "epoch": 1.375750300120048,
      "grad_norm": 0.13696734607219696,
      "learning_rate": 5.3749014124822626e-05,
      "loss": 0.3366,
      "step": 4584
    },
    {
      "epoch": 1.3760504201680672,
      "grad_norm": 0.15023215115070343,
      "learning_rate": 5.3702581983930234e-05,
      "loss": 0.362,
      "step": 4585
    },
    {
      "epoch": 1.3763505402160865,
      "grad_norm": 0.1412237584590912,
      "learning_rate": 5.365616254420364e-05,
      "loss": 0.3591,
      "step": 4586
    },
    {
      "epoch": 1.3766506602641058,
      "grad_norm": 0.14088405668735504,
      "learning_rate": 5.3609755818377396e-05,
      "loss": 0.3458,
      "step": 4587
    },
    {
      "epoch": 1.3769507803121248,
      "grad_norm": 0.13995389640331268,
      "learning_rate": 5.356336181918271e-05,
      "loss": 0.366,
      "step": 4588
    },
    {
      "epoch": 1.377250900360144,
      "grad_norm": 0.13612928986549377,
      "learning_rate": 5.351698055934724e-05,
      "loss": 0.3704,
      "step": 4589
    },
    {
      "epoch": 1.3775510204081631,
      "grad_norm": 0.13480474054813385,
      "learning_rate": 5.347061205159519e-05,
      "loss": 0.372,
      "step": 4590
    },
    {
      "epoch": 1.3778511404561824,
      "grad_norm": 0.15353074669837952,
      "learning_rate": 5.3424256308647194e-05,
      "loss": 0.3824,
      "step": 4591
    },
    {
      "epoch": 1.3781512605042017,
      "grad_norm": 0.14670152962207794,
      "learning_rate": 5.337791334322038e-05,
      "loss": 0.3703,
      "step": 4592
    },
    {
      "epoch": 1.378451380552221,
      "grad_norm": 0.16588912904262543,
      "learning_rate": 5.333158316802842e-05,
      "loss": 0.4031,
      "step": 4593
    },
    {
      "epoch": 1.3787515006002402,
      "grad_norm": 0.14609313011169434,
      "learning_rate": 5.328526579578156e-05,
      "loss": 0.3278,
      "step": 4594
    },
    {
      "epoch": 1.3790516206482593,
      "grad_norm": 0.14630454778671265,
      "learning_rate": 5.323896123918631e-05,
      "loss": 0.3869,
      "step": 4595
    },
    {
      "epoch": 1.3793517406962785,
      "grad_norm": 0.23194457590579987,
      "learning_rate": 5.3192669510945905e-05,
      "loss": 0.4115,
      "step": 4596
    },
    {
      "epoch": 1.3796518607442978,
      "grad_norm": 0.12587039172649384,
      "learning_rate": 5.3146390623759856e-05,
      "loss": 0.3213,
      "step": 4597
    },
    {
      "epoch": 1.3799519807923168,
      "grad_norm": 0.14738459885120392,
      "learning_rate": 5.3100124590324294e-05,
      "loss": 0.3602,
      "step": 4598
    },
    {
      "epoch": 1.3802521008403361,
      "grad_norm": 0.14517807960510254,
      "learning_rate": 5.3053871423331805e-05,
      "loss": 0.4087,
      "step": 4599
    },
    {
      "epoch": 1.3805522208883554,
      "grad_norm": 0.19121530652046204,
      "learning_rate": 5.3007631135471334e-05,
      "loss": 0.3497,
      "step": 4600
    },
    {
      "epoch": 1.3808523409363747,
      "grad_norm": 0.13413242995738983,
      "learning_rate": 5.2961403739428415e-05,
      "loss": 0.3384,
      "step": 4601
    },
    {
      "epoch": 1.3811524609843937,
      "grad_norm": 0.13237817585468292,
      "learning_rate": 5.291518924788507e-05,
      "loss": 0.3211,
      "step": 4602
    },
    {
      "epoch": 1.381452581032413,
      "grad_norm": 0.142702117562294,
      "learning_rate": 5.28689876735196e-05,
      "loss": 0.3559,
      "step": 4603
    },
    {
      "epoch": 1.3817527010804322,
      "grad_norm": 0.1560351699590683,
      "learning_rate": 5.2822799029006964e-05,
      "loss": 0.3976,
      "step": 4604
    },
    {
      "epoch": 1.3820528211284513,
      "grad_norm": 0.14791634678840637,
      "learning_rate": 5.277662332701842e-05,
      "loss": 0.3725,
      "step": 4605
    },
    {
      "epoch": 1.3823529411764706,
      "grad_norm": 0.13751675188541412,
      "learning_rate": 5.2730460580221774e-05,
      "loss": 0.344,
      "step": 4606
    },
    {
      "epoch": 1.3826530612244898,
      "grad_norm": 0.142083540558815,
      "learning_rate": 5.268431080128129e-05,
      "loss": 0.3921,
      "step": 4607
    },
    {
      "epoch": 1.382953181272509,
      "grad_norm": 0.1370341032743454,
      "learning_rate": 5.2638174002857546e-05,
      "loss": 0.3543,
      "step": 4608
    },
    {
      "epoch": 1.3832533013205282,
      "grad_norm": 0.141767218708992,
      "learning_rate": 5.259205019760772e-05,
      "loss": 0.3679,
      "step": 4609
    },
    {
      "epoch": 1.3835534213685474,
      "grad_norm": 0.6375341415405273,
      "learning_rate": 5.2545939398185284e-05,
      "loss": 0.3338,
      "step": 4610
    },
    {
      "epoch": 1.3838535414165667,
      "grad_norm": 0.1471729576587677,
      "learning_rate": 5.249984161724023e-05,
      "loss": 0.4011,
      "step": 4611
    },
    {
      "epoch": 1.3841536614645857,
      "grad_norm": 0.14878086745738983,
      "learning_rate": 5.2453756867419e-05,
      "loss": 0.3518,
      "step": 4612
    },
    {
      "epoch": 1.384453781512605,
      "grad_norm": 0.1476067155599594,
      "learning_rate": 5.240768516136436e-05,
      "loss": 0.3717,
      "step": 4613
    },
    {
      "epoch": 1.3847539015606243,
      "grad_norm": 0.14189663529396057,
      "learning_rate": 5.236162651171557e-05,
      "loss": 0.3301,
      "step": 4614
    },
    {
      "epoch": 1.3850540216086435,
      "grad_norm": 0.14110246300697327,
      "learning_rate": 5.231558093110832e-05,
      "loss": 0.3703,
      "step": 4615
    },
    {
      "epoch": 1.3853541416566626,
      "grad_norm": 0.13561809062957764,
      "learning_rate": 5.226954843217468e-05,
      "loss": 0.3044,
      "step": 4616
    },
    {
      "epoch": 1.3856542617046819,
      "grad_norm": 0.1426294595003128,
      "learning_rate": 5.222352902754307e-05,
      "loss": 0.3423,
      "step": 4617
    },
    {
      "epoch": 1.3859543817527011,
      "grad_norm": 0.1319276988506317,
      "learning_rate": 5.2177522729838444e-05,
      "loss": 0.3246,
      "step": 4618
    },
    {
      "epoch": 1.3862545018007202,
      "grad_norm": 0.1312565952539444,
      "learning_rate": 5.21315295516821e-05,
      "loss": 0.3289,
      "step": 4619
    },
    {
      "epoch": 1.3865546218487395,
      "grad_norm": 0.14471866190433502,
      "learning_rate": 5.208554950569178e-05,
      "loss": 0.3788,
      "step": 4620
    },
    {
      "epoch": 1.3868547418967587,
      "grad_norm": 0.14428383111953735,
      "learning_rate": 5.203958260448152e-05,
      "loss": 0.3806,
      "step": 4621
    },
    {
      "epoch": 1.387154861944778,
      "grad_norm": 0.14408062398433685,
      "learning_rate": 5.199362886066177e-05,
      "loss": 0.3829,
      "step": 4622
    },
    {
      "epoch": 1.387454981992797,
      "grad_norm": 0.1601879894733429,
      "learning_rate": 5.194768828683953e-05,
      "loss": 0.3568,
      "step": 4623
    },
    {
      "epoch": 1.3877551020408163,
      "grad_norm": 0.12715758383274078,
      "learning_rate": 5.190176089561802e-05,
      "loss": 0.3262,
      "step": 4624
    },
    {
      "epoch": 1.3880552220888356,
      "grad_norm": 0.1314728856086731,
      "learning_rate": 5.1855846699596866e-05,
      "loss": 0.3452,
      "step": 4625
    },
    {
      "epoch": 1.3883553421368546,
      "grad_norm": 0.14861977100372314,
      "learning_rate": 5.18099457113721e-05,
      "loss": 0.3532,
      "step": 4626
    },
    {
      "epoch": 1.388655462184874,
      "grad_norm": 0.19684113562107086,
      "learning_rate": 5.17640579435362e-05,
      "loss": 0.3177,
      "step": 4627
    },
    {
      "epoch": 1.3889555822328932,
      "grad_norm": 0.1377023309469223,
      "learning_rate": 5.171818340867787e-05,
      "loss": 0.3373,
      "step": 4628
    },
    {
      "epoch": 1.3892557022809124,
      "grad_norm": 0.13617639243602753,
      "learning_rate": 5.1672322119382325e-05,
      "loss": 0.3193,
      "step": 4629
    },
    {
      "epoch": 1.3895558223289317,
      "grad_norm": 0.14320117235183716,
      "learning_rate": 5.1626474088231004e-05,
      "loss": 0.3603,
      "step": 4630
    },
    {
      "epoch": 1.3898559423769508,
      "grad_norm": 0.15924130380153656,
      "learning_rate": 5.158063932780185e-05,
      "loss": 0.3828,
      "step": 4631
    },
    {
      "epoch": 1.39015606242497,
      "grad_norm": 0.14906635880470276,
      "learning_rate": 5.153481785066914e-05,
      "loss": 0.3798,
      "step": 4632
    },
    {
      "epoch": 1.390456182472989,
      "grad_norm": 0.15486960113048553,
      "learning_rate": 5.1489009669403354e-05,
      "loss": 0.4329,
      "step": 4633
    },
    {
      "epoch": 1.3907563025210083,
      "grad_norm": 0.1481010913848877,
      "learning_rate": 5.144321479657157e-05,
      "loss": 0.3531,
      "step": 4634
    },
    {
      "epoch": 1.3910564225690276,
      "grad_norm": 0.1424623429775238,
      "learning_rate": 5.1397433244736984e-05,
      "loss": 0.3399,
      "step": 4635
    },
    {
      "epoch": 1.3913565426170469,
      "grad_norm": 0.14340506494045258,
      "learning_rate": 5.1351665026459286e-05,
      "loss": 0.3841,
      "step": 4636
    },
    {
      "epoch": 1.3916566626650662,
      "grad_norm": 0.13578680157661438,
      "learning_rate": 5.130591015429449e-05,
      "loss": 0.3528,
      "step": 4637
    },
    {
      "epoch": 1.3919567827130852,
      "grad_norm": 0.13246610760688782,
      "learning_rate": 5.1260168640794845e-05,
      "loss": 0.3341,
      "step": 4638
    },
    {
      "epoch": 1.3922569027611045,
      "grad_norm": 0.14007440209388733,
      "learning_rate": 5.121444049850906e-05,
      "loss": 0.3392,
      "step": 4639
    },
    {
      "epoch": 1.3925570228091235,
      "grad_norm": 0.136866956949234,
      "learning_rate": 5.116872573998217e-05,
      "loss": 0.3413,
      "step": 4640
    },
    {
      "epoch": 1.3928571428571428,
      "grad_norm": 0.17135818302631378,
      "learning_rate": 5.1123024377755394e-05,
      "loss": 0.3666,
      "step": 4641
    },
    {
      "epoch": 1.393157262905162,
      "grad_norm": 0.13718274235725403,
      "learning_rate": 5.107733642436646e-05,
      "loss": 0.3593,
      "step": 4642
    },
    {
      "epoch": 1.3934573829531813,
      "grad_norm": 0.13347193598747253,
      "learning_rate": 5.103166189234927e-05,
      "loss": 0.3328,
      "step": 4643
    },
    {
      "epoch": 1.3937575030012006,
      "grad_norm": 0.21490272879600525,
      "learning_rate": 5.098600079423415e-05,
      "loss": 0.3258,
      "step": 4644
    },
    {
      "epoch": 1.3940576230492197,
      "grad_norm": 0.1582237035036087,
      "learning_rate": 5.0940353142547726e-05,
      "loss": 0.376,
      "step": 4645
    },
    {
      "epoch": 1.394357743097239,
      "grad_norm": 0.13393276929855347,
      "learning_rate": 5.0894718949812855e-05,
      "loss": 0.3301,
      "step": 4646
    },
    {
      "epoch": 1.394657863145258,
      "grad_norm": 0.15065492689609528,
      "learning_rate": 5.084909822854871e-05,
      "loss": 0.3984,
      "step": 4647
    },
    {
      "epoch": 1.3949579831932772,
      "grad_norm": 0.14716662466526031,
      "learning_rate": 5.080349099127093e-05,
      "loss": 0.3829,
      "step": 4648
    },
    {
      "epoch": 1.3952581032412965,
      "grad_norm": 0.1335858702659607,
      "learning_rate": 5.075789725049126e-05,
      "loss": 0.3281,
      "step": 4649
    },
    {
      "epoch": 1.3955582232893158,
      "grad_norm": 0.14929762482643127,
      "learning_rate": 5.071231701871787e-05,
      "loss": 0.2957,
      "step": 4650
    },
    {
      "epoch": 1.395858343337335,
      "grad_norm": 0.12802687287330627,
      "learning_rate": 5.0666750308455116e-05,
      "loss": 0.3058,
      "step": 4651
    },
    {
      "epoch": 1.396158463385354,
      "grad_norm": 0.1500602811574936,
      "learning_rate": 5.0621197132203724e-05,
      "loss": 0.3377,
      "step": 4652
    },
    {
      "epoch": 1.3964585834333734,
      "grad_norm": 0.13277117908000946,
      "learning_rate": 5.057565750246073e-05,
      "loss": 0.33,
      "step": 4653
    },
    {
      "epoch": 1.3967587034813926,
      "grad_norm": 0.14500407874584198,
      "learning_rate": 5.053013143171936e-05,
      "loss": 0.3534,
      "step": 4654
    },
    {
      "epoch": 1.3970588235294117,
      "grad_norm": 0.14183609187602997,
      "learning_rate": 5.0484618932469166e-05,
      "loss": 0.3698,
      "step": 4655
    },
    {
      "epoch": 1.397358943577431,
      "grad_norm": 0.19110167026519775,
      "learning_rate": 5.0439120017195986e-05,
      "loss": 0.4062,
      "step": 4656
    },
    {
      "epoch": 1.3976590636254502,
      "grad_norm": 0.2165842056274414,
      "learning_rate": 5.039363469838196e-05,
      "loss": 0.4034,
      "step": 4657
    },
    {
      "epoch": 1.3979591836734695,
      "grad_norm": 0.1206594929099083,
      "learning_rate": 5.034816298850542e-05,
      "loss": 0.3111,
      "step": 4658
    },
    {
      "epoch": 1.3982593037214885,
      "grad_norm": 0.14055152237415314,
      "learning_rate": 5.0302704900041055e-05,
      "loss": 0.3593,
      "step": 4659
    },
    {
      "epoch": 1.3985594237695078,
      "grad_norm": 0.13259822130203247,
      "learning_rate": 5.025726044545968e-05,
      "loss": 0.3557,
      "step": 4660
    },
    {
      "epoch": 1.398859543817527,
      "grad_norm": 0.13649876415729523,
      "learning_rate": 5.021182963722859e-05,
      "loss": 0.3581,
      "step": 4661
    },
    {
      "epoch": 1.3991596638655461,
      "grad_norm": 0.14338348805904388,
      "learning_rate": 5.0166412487811134e-05,
      "loss": 0.3426,
      "step": 4662
    },
    {
      "epoch": 1.3994597839135654,
      "grad_norm": 0.12635748088359833,
      "learning_rate": 5.012100900966695e-05,
      "loss": 0.3296,
      "step": 4663
    },
    {
      "epoch": 1.3997599039615847,
      "grad_norm": 0.14264103770256042,
      "learning_rate": 5.0075619215252015e-05,
      "loss": 0.3673,
      "step": 4664
    },
    {
      "epoch": 1.400060024009604,
      "grad_norm": 0.18206572532653809,
      "learning_rate": 5.0030243117018515e-05,
      "loss": 0.3736,
      "step": 4665
    },
    {
      "epoch": 1.400360144057623,
      "grad_norm": 0.13971807062625885,
      "learning_rate": 4.9984880727414794e-05,
      "loss": 0.3583,
      "step": 4666
    },
    {
      "epoch": 1.4006602641056423,
      "grad_norm": 0.16418235003948212,
      "learning_rate": 4.993953205888559e-05,
      "loss": 0.3846,
      "step": 4667
    },
    {
      "epoch": 1.4009603841536615,
      "grad_norm": 0.14172151684761047,
      "learning_rate": 4.989419712387169e-05,
      "loss": 0.3775,
      "step": 4668
    },
    {
      "epoch": 1.4012605042016806,
      "grad_norm": 0.14084260165691376,
      "learning_rate": 4.984887593481028e-05,
      "loss": 0.3742,
      "step": 4669
    },
    {
      "epoch": 1.4015606242496998,
      "grad_norm": 0.16080965101718903,
      "learning_rate": 4.980356850413472e-05,
      "loss": 0.391,
      "step": 4670
    },
    {
      "epoch": 1.4018607442977191,
      "grad_norm": 0.12837891280651093,
      "learning_rate": 4.975827484427453e-05,
      "loss": 0.3346,
      "step": 4671
    },
    {
      "epoch": 1.4021608643457384,
      "grad_norm": 0.1495605856180191,
      "learning_rate": 4.971299496765555e-05,
      "loss": 0.3961,
      "step": 4672
    },
    {
      "epoch": 1.4024609843937574,
      "grad_norm": 0.13998694717884064,
      "learning_rate": 4.9667728886699794e-05,
      "loss": 0.3839,
      "step": 4673
    },
    {
      "epoch": 1.4027611044417767,
      "grad_norm": 0.1444583386182785,
      "learning_rate": 4.962247661382545e-05,
      "loss": 0.3967,
      "step": 4674
    },
    {
      "epoch": 1.403061224489796,
      "grad_norm": 0.14261461794376373,
      "learning_rate": 4.957723816144703e-05,
      "loss": 0.3635,
      "step": 4675
    },
    {
      "epoch": 1.403361344537815,
      "grad_norm": 0.1437271535396576,
      "learning_rate": 4.95320135419751e-05,
      "loss": 0.3691,
      "step": 4676
    },
    {
      "epoch": 1.4036614645858343,
      "grad_norm": 0.1375724822282791,
      "learning_rate": 4.948680276781656e-05,
      "loss": 0.361,
      "step": 4677
    },
    {
      "epoch": 1.4039615846338536,
      "grad_norm": 0.1406862735748291,
      "learning_rate": 4.9441605851374504e-05,
      "loss": 0.3841,
      "step": 4678
    },
    {
      "epoch": 1.4042617046818728,
      "grad_norm": 0.13326990604400635,
      "learning_rate": 4.9396422805048127e-05,
      "loss": 0.3409,
      "step": 4679
    },
    {
      "epoch": 1.4045618247298919,
      "grad_norm": 0.1702723652124405,
      "learning_rate": 4.935125364123292e-05,
      "loss": 0.3567,
      "step": 4680
    },
    {
      "epoch": 1.4048619447779112,
      "grad_norm": 0.14896176755428314,
      "learning_rate": 4.930609837232049e-05,
      "loss": 0.383,
      "step": 4681
    },
    {
      "epoch": 1.4051620648259304,
      "grad_norm": 0.13286007940769196,
      "learning_rate": 4.9260957010698674e-05,
      "loss": 0.347,
      "step": 4682
    },
    {
      "epoch": 1.4054621848739495,
      "grad_norm": 0.14665399491786957,
      "learning_rate": 4.921582956875154e-05,
      "loss": 0.3629,
      "step": 4683
    },
    {
      "epoch": 1.4057623049219687,
      "grad_norm": 0.14434906840324402,
      "learning_rate": 4.917071605885923e-05,
      "loss": 0.3899,
      "step": 4684
    },
    {
      "epoch": 1.406062424969988,
      "grad_norm": 0.1460450440645218,
      "learning_rate": 4.912561649339806e-05,
      "loss": 0.3806,
      "step": 4685
    },
    {
      "epoch": 1.4063625450180073,
      "grad_norm": 0.13991695642471313,
      "learning_rate": 4.908053088474074e-05,
      "loss": 0.3856,
      "step": 4686
    },
    {
      "epoch": 1.4066626650660263,
      "grad_norm": 0.13680557906627655,
      "learning_rate": 4.9035459245255886e-05,
      "loss": 0.3398,
      "step": 4687
    },
    {
      "epoch": 1.4069627851140456,
      "grad_norm": 0.13648124039173126,
      "learning_rate": 4.899040158730837e-05,
      "loss": 0.3639,
      "step": 4688
    },
    {
      "epoch": 1.4072629051620649,
      "grad_norm": 0.14287413656711578,
      "learning_rate": 4.894535792325926e-05,
      "loss": 0.3517,
      "step": 4689
    },
    {
      "epoch": 1.407563025210084,
      "grad_norm": 0.1395912617444992,
      "learning_rate": 4.89003282654658e-05,
      "loss": 0.373,
      "step": 4690
    },
    {
      "epoch": 1.4078631452581032,
      "grad_norm": 0.14574593305587769,
      "learning_rate": 4.885531262628137e-05,
      "loss": 0.3862,
      "step": 4691
    },
    {
      "epoch": 1.4081632653061225,
      "grad_norm": 0.13947996497154236,
      "learning_rate": 4.8810311018055455e-05,
      "loss": 0.343,
      "step": 4692
    },
    {
      "epoch": 1.4084633853541417,
      "grad_norm": 0.1489896923303604,
      "learning_rate": 4.8765323453133714e-05,
      "loss": 0.3926,
      "step": 4693
    },
    {
      "epoch": 1.408763505402161,
      "grad_norm": 0.13951550424098969,
      "learning_rate": 4.8720349943858004e-05,
      "loss": 0.3528,
      "step": 4694
    },
    {
      "epoch": 1.40906362545018,
      "grad_norm": 0.15845239162445068,
      "learning_rate": 4.867539050256631e-05,
      "loss": 0.433,
      "step": 4695
    },
    {
      "epoch": 1.4093637454981993,
      "grad_norm": 0.13138680160045624,
      "learning_rate": 4.8630445141592674e-05,
      "loss": 0.3104,
      "step": 4696
    },
    {
      "epoch": 1.4096638655462184,
      "grad_norm": 0.15351898968219757,
      "learning_rate": 4.858551387326743e-05,
      "loss": 0.3692,
      "step": 4697
    },
    {
      "epoch": 1.4099639855942376,
      "grad_norm": 0.27054664492607117,
      "learning_rate": 4.854059670991682e-05,
      "loss": 0.4393,
      "step": 4698
    },
    {
      "epoch": 1.410264105642257,
      "grad_norm": 0.12757544219493866,
      "learning_rate": 4.849569366386352e-05,
      "loss": 0.3188,
      "step": 4699
    },
    {
      "epoch": 1.4105642256902762,
      "grad_norm": 0.13178516924381256,
      "learning_rate": 4.845080474742608e-05,
      "loss": 0.3307,
      "step": 4700
    },
    {
      "epoch": 1.4108643457382954,
      "grad_norm": 0.16457417607307434,
      "learning_rate": 4.840592997291923e-05,
      "loss": 0.3728,
      "step": 4701
    },
    {
      "epoch": 1.4111644657863145,
      "grad_norm": 0.14082013070583344,
      "learning_rate": 4.836106935265389e-05,
      "loss": 0.3321,
      "step": 4702
    },
    {
      "epoch": 1.4114645858343338,
      "grad_norm": 0.15481553971767426,
      "learning_rate": 4.831622289893708e-05,
      "loss": 0.399,
      "step": 4703
    },
    {
      "epoch": 1.4117647058823528,
      "grad_norm": 0.16300667822360992,
      "learning_rate": 4.8271390624071845e-05,
      "loss": 0.389,
      "step": 4704
    },
    {
      "epoch": 1.412064825930372,
      "grad_norm": 0.1538439244031906,
      "learning_rate": 4.822657254035747e-05,
      "loss": 0.3816,
      "step": 4705
    },
    {
      "epoch": 1.4123649459783914,
      "grad_norm": 0.3736082911491394,
      "learning_rate": 4.818176866008923e-05,
      "loss": 0.4044,
      "step": 4706
    },
    {
      "epoch": 1.4126650660264106,
      "grad_norm": 0.13170059025287628,
      "learning_rate": 4.813697899555858e-05,
      "loss": 0.3458,
      "step": 4707
    },
    {
      "epoch": 1.41296518607443,
      "grad_norm": 0.1586686223745346,
      "learning_rate": 4.8092203559053084e-05,
      "loss": 0.397,
      "step": 4708
    },
    {
      "epoch": 1.413265306122449,
      "grad_norm": 0.13637465238571167,
      "learning_rate": 4.8047442362856296e-05,
      "loss": 0.2804,
      "step": 4709
    },
    {
      "epoch": 1.4135654261704682,
      "grad_norm": 0.13989731669425964,
      "learning_rate": 4.800269541924799e-05,
      "loss": 0.3598,
      "step": 4710
    },
    {
      "epoch": 1.4138655462184873,
      "grad_norm": 0.13504591584205627,
      "learning_rate": 4.795796274050399e-05,
      "loss": 0.3374,
      "step": 4711
    },
    {
      "epoch": 1.4141656662665065,
      "grad_norm": 0.14345870912075043,
      "learning_rate": 4.7913244338896135e-05,
      "loss": 0.354,
      "step": 4712
    },
    {
      "epoch": 1.4144657863145258,
      "grad_norm": 0.13402247428894043,
      "learning_rate": 4.786854022669247e-05,
      "loss": 0.3387,
      "step": 4713
    },
    {
      "epoch": 1.414765906362545,
      "grad_norm": 0.13939963281154633,
      "learning_rate": 4.782385041615699e-05,
      "loss": 0.3561,
      "step": 4714
    },
    {
      "epoch": 1.4150660264105643,
      "grad_norm": 0.14345000684261322,
      "learning_rate": 4.7779174919549864e-05,
      "loss": 0.3241,
      "step": 4715
    },
    {
      "epoch": 1.4153661464585834,
      "grad_norm": 0.15040355920791626,
      "learning_rate": 4.7734513749127354e-05,
      "loss": 0.402,
      "step": 4716
    },
    {
      "epoch": 1.4156662665066027,
      "grad_norm": 0.1621185541152954,
      "learning_rate": 4.768986691714168e-05,
      "loss": 0.4094,
      "step": 4717
    },
    {
      "epoch": 1.415966386554622,
      "grad_norm": 0.13609668612480164,
      "learning_rate": 4.764523443584116e-05,
      "loss": 0.3525,
      "step": 4718
    },
    {
      "epoch": 1.416266506602641,
      "grad_norm": 0.13255177438259125,
      "learning_rate": 4.7600616317470236e-05,
      "loss": 0.3326,
      "step": 4719
    },
    {
      "epoch": 1.4165666266506602,
      "grad_norm": 0.1421128660440445,
      "learning_rate": 4.7556012574269395e-05,
      "loss": 0.3404,
      "step": 4720
    },
    {
      "epoch": 1.4168667466986795,
      "grad_norm": 0.14517702162265778,
      "learning_rate": 4.7511423218475184e-05,
      "loss": 0.3906,
      "step": 4721
    },
    {
      "epoch": 1.4171668667466988,
      "grad_norm": 0.16044922173023224,
      "learning_rate": 4.746684826232015e-05,
      "loss": 0.4182,
      "step": 4722
    },
    {
      "epoch": 1.4174669867947178,
      "grad_norm": 0.1331530660390854,
      "learning_rate": 4.7422287718032844e-05,
      "loss": 0.3412,
      "step": 4723
    },
    {
      "epoch": 1.417767106842737,
      "grad_norm": 0.14331965148448944,
      "learning_rate": 4.737774159783809e-05,
      "loss": 0.3207,
      "step": 4724
    },
    {
      "epoch": 1.4180672268907564,
      "grad_norm": 0.1528742015361786,
      "learning_rate": 4.733320991395652e-05,
      "loss": 0.3849,
      "step": 4725
    },
    {
      "epoch": 1.4183673469387754,
      "grad_norm": 0.1370069980621338,
      "learning_rate": 4.7288692678604876e-05,
      "loss": 0.3382,
      "step": 4726
    },
    {
      "epoch": 1.4186674669867947,
      "grad_norm": 0.13793693482875824,
      "learning_rate": 4.724418990399598e-05,
      "loss": 0.3557,
      "step": 4727
    },
    {
      "epoch": 1.418967587034814,
      "grad_norm": 0.13225845992565155,
      "learning_rate": 4.719970160233865e-05,
      "loss": 0.3476,
      "step": 4728
    },
    {
      "epoch": 1.4192677070828332,
      "grad_norm": 0.13750863075256348,
      "learning_rate": 4.7155227785837784e-05,
      "loss": 0.4046,
      "step": 4729
    },
    {
      "epoch": 1.4195678271308523,
      "grad_norm": 0.14539740979671478,
      "learning_rate": 4.7110768466694224e-05,
      "loss": 0.3704,
      "step": 4730
    },
    {
      "epoch": 1.4198679471788715,
      "grad_norm": 0.15951357781887054,
      "learning_rate": 4.706632365710484e-05,
      "loss": 0.4196,
      "step": 4731
    },
    {
      "epoch": 1.4201680672268908,
      "grad_norm": 0.18090002238750458,
      "learning_rate": 4.7021893369262596e-05,
      "loss": 0.3847,
      "step": 4732
    },
    {
      "epoch": 1.4204681872749099,
      "grad_norm": 0.13013353943824768,
      "learning_rate": 4.697747761535646e-05,
      "loss": 0.3451,
      "step": 4733
    },
    {
      "epoch": 1.4207683073229291,
      "grad_norm": 0.18931058049201965,
      "learning_rate": 4.6933076407571316e-05,
      "loss": 0.3749,
      "step": 4734
    },
    {
      "epoch": 1.4210684273709484,
      "grad_norm": 0.12983962893486023,
      "learning_rate": 4.6888689758088166e-05,
      "loss": 0.3344,
      "step": 4735
    },
    {
      "epoch": 1.4213685474189677,
      "grad_norm": 0.15677180886268616,
      "learning_rate": 4.6844317679084015e-05,
      "loss": 0.3454,
      "step": 4736
    },
    {
      "epoch": 1.4216686674669867,
      "grad_norm": 0.15239223837852478,
      "learning_rate": 4.679996018273175e-05,
      "loss": 0.3705,
      "step": 4737
    },
    {
      "epoch": 1.421968787515006,
      "grad_norm": 0.14624455571174622,
      "learning_rate": 4.675561728120043e-05,
      "loss": 0.3943,
      "step": 4738
    },
    {
      "epoch": 1.4222689075630253,
      "grad_norm": 0.15051227807998657,
      "learning_rate": 4.671128898665493e-05,
      "loss": 0.3681,
      "step": 4739
    },
    {
      "epoch": 1.4225690276110443,
      "grad_norm": 0.13271403312683105,
      "learning_rate": 4.666697531125627e-05,
      "loss": 0.3202,
      "step": 4740
    },
    {
      "epoch": 1.4228691476590636,
      "grad_norm": 0.13313762843608856,
      "learning_rate": 4.662267626716141e-05,
      "loss": 0.3587,
      "step": 4741
    },
    {
      "epoch": 1.4231692677070829,
      "grad_norm": 0.13600504398345947,
      "learning_rate": 4.657839186652324e-05,
      "loss": 0.3624,
      "step": 4742
    },
    {
      "epoch": 1.4234693877551021,
      "grad_norm": 0.1591995656490326,
      "learning_rate": 4.653412212149072e-05,
      "loss": 0.3628,
      "step": 4743
    },
    {
      "epoch": 1.4237695078031212,
      "grad_norm": 0.14780530333518982,
      "learning_rate": 4.64898670442087e-05,
      "loss": 0.3125,
      "step": 4744
    },
    {
      "epoch": 1.4240696278511404,
      "grad_norm": 0.15058188140392303,
      "learning_rate": 4.644562664681806e-05,
      "loss": 0.396,
      "step": 4745
    },
    {
      "epoch": 1.4243697478991597,
      "grad_norm": 0.1523490697145462,
      "learning_rate": 4.640140094145572e-05,
      "loss": 0.3463,
      "step": 4746
    },
    {
      "epoch": 1.4246698679471788,
      "grad_norm": 0.14041709899902344,
      "learning_rate": 4.635718994025443e-05,
      "loss": 0.3843,
      "step": 4747
    },
    {
      "epoch": 1.424969987995198,
      "grad_norm": 0.15815354883670807,
      "learning_rate": 4.631299365534291e-05,
      "loss": 0.3847,
      "step": 4748
    },
    {
      "epoch": 1.4252701080432173,
      "grad_norm": 0.1315331608057022,
      "learning_rate": 4.6268812098846034e-05,
      "loss": 0.3182,
      "step": 4749
    },
    {
      "epoch": 1.4255702280912366,
      "grad_norm": 0.13551288843154907,
      "learning_rate": 4.622464528288443e-05,
      "loss": 0.3487,
      "step": 4750
    },
    {
      "epoch": 1.4258703481392558,
      "grad_norm": 0.14589761197566986,
      "learning_rate": 4.6180493219574796e-05,
      "loss": 0.3573,
      "step": 4751
    },
    {
      "epoch": 1.4261704681872749,
      "grad_norm": 0.13148462772369385,
      "learning_rate": 4.613635592102968e-05,
      "loss": 0.3306,
      "step": 4752
    },
    {
      "epoch": 1.4264705882352942,
      "grad_norm": 0.12648631632328033,
      "learning_rate": 4.60922333993577e-05,
      "loss": 0.3242,
      "step": 4753
    },
    {
      "epoch": 1.4267707082833132,
      "grad_norm": 0.15199293196201324,
      "learning_rate": 4.604812566666338e-05,
      "loss": 0.3988,
      "step": 4754
    },
    {
      "epoch": 1.4270708283313325,
      "grad_norm": 0.14493729174137115,
      "learning_rate": 4.600403273504713e-05,
      "loss": 0.3856,
      "step": 4755
    },
    {
      "epoch": 1.4273709483793517,
      "grad_norm": 0.14682787656784058,
      "learning_rate": 4.5959954616605326e-05,
      "loss": 0.3585,
      "step": 4756
    },
    {
      "epoch": 1.427671068427371,
      "grad_norm": 0.13172248005867004,
      "learning_rate": 4.5915891323430316e-05,
      "loss": 0.321,
      "step": 4757
    },
    {
      "epoch": 1.4279711884753903,
      "grad_norm": 0.14919964969158173,
      "learning_rate": 4.587184286761035e-05,
      "loss": 0.3612,
      "step": 4758
    },
    {
      "epoch": 1.4282713085234093,
      "grad_norm": 0.13788238167762756,
      "learning_rate": 4.582780926122967e-05,
      "loss": 0.3721,
      "step": 4759
    },
    {
      "epoch": 1.4285714285714286,
      "grad_norm": 0.144940584897995,
      "learning_rate": 4.578379051636832e-05,
      "loss": 0.3752,
      "step": 4760
    },
    {
      "epoch": 1.4288715486194477,
      "grad_norm": 0.13624529540538788,
      "learning_rate": 4.5739786645102367e-05,
      "loss": 0.3111,
      "step": 4761
    },
    {
      "epoch": 1.429171668667467,
      "grad_norm": 0.14726722240447998,
      "learning_rate": 4.569579765950379e-05,
      "loss": 0.3926,
      "step": 4762
    },
    {
      "epoch": 1.4294717887154862,
      "grad_norm": 0.13372276723384857,
      "learning_rate": 4.5651823571640464e-05,
      "loss": 0.3421,
      "step": 4763
    },
    {
      "epoch": 1.4297719087635055,
      "grad_norm": 0.1457625925540924,
      "learning_rate": 4.560786439357609e-05,
      "loss": 0.3598,
      "step": 4764
    },
    {
      "epoch": 1.4300720288115247,
      "grad_norm": 0.14241823554039001,
      "learning_rate": 4.5563920137370456e-05,
      "loss": 0.3546,
      "step": 4765
    },
    {
      "epoch": 1.4303721488595438,
      "grad_norm": 0.1325092315673828,
      "learning_rate": 4.551999081507915e-05,
      "loss": 0.3146,
      "step": 4766
    },
    {
      "epoch": 1.430672268907563,
      "grad_norm": 0.1474294513463974,
      "learning_rate": 4.547607643875363e-05,
      "loss": 0.3759,
      "step": 4767
    },
    {
      "epoch": 1.430972388955582,
      "grad_norm": 0.1436256319284439,
      "learning_rate": 4.543217702044139e-05,
      "loss": 0.355,
      "step": 4768
    },
    {
      "epoch": 1.4312725090036014,
      "grad_norm": 0.14325234293937683,
      "learning_rate": 4.538829257218559e-05,
      "loss": 0.3472,
      "step": 4769
    },
    {
      "epoch": 1.4315726290516206,
      "grad_norm": 0.1733434945344925,
      "learning_rate": 4.534442310602559e-05,
      "loss": 0.4102,
      "step": 4770
    },
    {
      "epoch": 1.43187274909964,
      "grad_norm": 0.1364019364118576,
      "learning_rate": 4.53005686339964e-05,
      "loss": 0.3467,
      "step": 4771
    },
    {
      "epoch": 1.4321728691476592,
      "grad_norm": 0.1269693672657013,
      "learning_rate": 4.525672916812894e-05,
      "loss": 0.3075,
      "step": 4772
    },
    {
      "epoch": 1.4324729891956782,
      "grad_norm": 0.16181963682174683,
      "learning_rate": 4.5212904720450134e-05,
      "loss": 0.3781,
      "step": 4773
    },
    {
      "epoch": 1.4327731092436975,
      "grad_norm": 0.15238305926322937,
      "learning_rate": 4.5169095302982724e-05,
      "loss": 0.3979,
      "step": 4774
    },
    {
      "epoch": 1.4330732292917168,
      "grad_norm": 0.21794739365577698,
      "learning_rate": 4.512530092774525e-05,
      "loss": 0.333,
      "step": 4775
    },
    {
      "epoch": 1.4333733493397358,
      "grad_norm": 0.14577952027320862,
      "learning_rate": 4.508152160675229e-05,
      "loss": 0.375,
      "step": 4776
    },
    {
      "epoch": 1.433673469387755,
      "grad_norm": 0.14348258078098297,
      "learning_rate": 4.5037757352014106e-05,
      "loss": 0.3577,
      "step": 4777
    },
    {
      "epoch": 1.4339735894357744,
      "grad_norm": 0.14008821547031403,
      "learning_rate": 4.499400817553696e-05,
      "loss": 0.3519,
      "step": 4778
    },
    {
      "epoch": 1.4342737094837936,
      "grad_norm": 0.1430148482322693,
      "learning_rate": 4.495027408932298e-05,
      "loss": 0.3549,
      "step": 4779
    },
    {
      "epoch": 1.4345738295318127,
      "grad_norm": 0.13885506987571716,
      "learning_rate": 4.490655510537004e-05,
      "loss": 0.3413,
      "step": 4780
    },
    {
      "epoch": 1.434873949579832,
      "grad_norm": 0.14540061354637146,
      "learning_rate": 4.486285123567201e-05,
      "loss": 0.3672,
      "step": 4781
    },
    {
      "epoch": 1.4351740696278512,
      "grad_norm": 0.1269986927509308,
      "learning_rate": 4.481916249221847e-05,
      "loss": 0.3177,
      "step": 4782
    },
    {
      "epoch": 1.4354741896758703,
      "grad_norm": 0.12497656792402267,
      "learning_rate": 4.4775488886994965e-05,
      "loss": 0.3093,
      "step": 4783
    },
    {
      "epoch": 1.4357743097238895,
      "grad_norm": 0.1531170904636383,
      "learning_rate": 4.473183043198288e-05,
      "loss": 0.364,
      "step": 4784
    },
    {
      "epoch": 1.4360744297719088,
      "grad_norm": 0.1496521383523941,
      "learning_rate": 4.468818713915934e-05,
      "loss": 0.391,
      "step": 4785
    },
    {
      "epoch": 1.436374549819928,
      "grad_norm": 0.13336549699306488,
      "learning_rate": 4.4644559020497436e-05,
      "loss": 0.3365,
      "step": 4786
    },
    {
      "epoch": 1.4366746698679471,
      "grad_norm": 0.15357303619384766,
      "learning_rate": 4.4600946087966046e-05,
      "loss": 0.3706,
      "step": 4787
    },
    {
      "epoch": 1.4369747899159664,
      "grad_norm": 0.14057567715644836,
      "learning_rate": 4.4557348353529827e-05,
      "loss": 0.3582,
      "step": 4788
    },
    {
      "epoch": 1.4372749099639857,
      "grad_norm": 0.14349892735481262,
      "learning_rate": 4.451376582914939e-05,
      "loss": 0.3767,
      "step": 4789
    },
    {
      "epoch": 1.4375750300120047,
      "grad_norm": 0.13298708200454712,
      "learning_rate": 4.447019852678101e-05,
      "loss": 0.3307,
      "step": 4790
    },
    {
      "epoch": 1.437875150060024,
      "grad_norm": 0.15382781624794006,
      "learning_rate": 4.442664645837694e-05,
      "loss": 0.3984,
      "step": 4791
    },
    {
      "epoch": 1.4381752701080432,
      "grad_norm": 0.14238616824150085,
      "learning_rate": 4.438310963588522e-05,
      "loss": 0.3324,
      "step": 4792
    },
    {
      "epoch": 1.4384753901560625,
      "grad_norm": 0.14080406725406647,
      "learning_rate": 4.4339588071249625e-05,
      "loss": 0.3518,
      "step": 4793
    },
    {
      "epoch": 1.4387755102040816,
      "grad_norm": 0.13965073227882385,
      "learning_rate": 4.429608177640977e-05,
      "loss": 0.3336,
      "step": 4794
    },
    {
      "epoch": 1.4390756302521008,
      "grad_norm": 0.15151721239089966,
      "learning_rate": 4.425259076330115e-05,
      "loss": 0.369,
      "step": 4795
    },
    {
      "epoch": 1.43937575030012,
      "grad_norm": 1.4650521278381348,
      "learning_rate": 4.420911504385507e-05,
      "loss": 0.3401,
      "step": 4796
    },
    {
      "epoch": 1.4396758703481392,
      "grad_norm": 0.14364506304264069,
      "learning_rate": 4.4165654629998485e-05,
      "loss": 0.3819,
      "step": 4797
    },
    {
      "epoch": 1.4399759903961584,
      "grad_norm": 0.13930128514766693,
      "learning_rate": 4.412220953365433e-05,
      "loss": 0.354,
      "step": 4798
    },
    {
      "epoch": 1.4402761104441777,
      "grad_norm": 0.13983362913131714,
      "learning_rate": 4.4078779766741276e-05,
      "loss": 0.3461,
      "step": 4799
    },
    {
      "epoch": 1.440576230492197,
      "grad_norm": 0.17801937460899353,
      "learning_rate": 4.40353653411738e-05,
      "loss": 0.3453,
      "step": 4800
    },
    {
      "epoch": 1.440876350540216,
      "grad_norm": 0.1360776573419571,
      "learning_rate": 4.399196626886212e-05,
      "loss": 0.3572,
      "step": 4801
    },
    {
      "epoch": 1.4411764705882353,
      "grad_norm": 0.13738538324832916,
      "learning_rate": 4.394858256171223e-05,
      "loss": 0.3464,
      "step": 4802
    },
    {
      "epoch": 1.4414765906362546,
      "grad_norm": 0.12927304208278656,
      "learning_rate": 4.390521423162599e-05,
      "loss": 0.3278,
      "step": 4803
    },
    {
      "epoch": 1.4417767106842736,
      "grad_norm": 0.13406582176685333,
      "learning_rate": 4.386186129050105e-05,
      "loss": 0.3337,
      "step": 4804
    },
    {
      "epoch": 1.4420768307322929,
      "grad_norm": 0.1290862262248993,
      "learning_rate": 4.381852375023072e-05,
      "loss": 0.3472,
      "step": 4805
    },
    {
      "epoch": 1.4423769507803121,
      "grad_norm": 0.12430037558078766,
      "learning_rate": 4.377520162270423e-05,
      "loss": 0.3343,
      "step": 4806
    },
    {
      "epoch": 1.4426770708283314,
      "grad_norm": 0.15715405344963074,
      "learning_rate": 4.373189491980639e-05,
      "loss": 0.4192,
      "step": 4807
    },
    {
      "epoch": 1.4429771908763505,
      "grad_norm": 0.13872407376766205,
      "learning_rate": 4.368860365341805e-05,
      "loss": 0.3531,
      "step": 4808
    },
    {
      "epoch": 1.4432773109243697,
      "grad_norm": 0.14024512469768524,
      "learning_rate": 4.364532783541559e-05,
      "loss": 0.372,
      "step": 4809
    },
    {
      "epoch": 1.443577430972389,
      "grad_norm": 0.1702309250831604,
      "learning_rate": 4.360206747767122e-05,
      "loss": 0.335,
      "step": 4810
    },
    {
      "epoch": 1.443877551020408,
      "grad_norm": 0.13013462722301483,
      "learning_rate": 4.355882259205294e-05,
      "loss": 0.3233,
      "step": 4811
    },
    {
      "epoch": 1.4441776710684273,
      "grad_norm": 0.15084701776504517,
      "learning_rate": 4.351559319042453e-05,
      "loss": 0.3926,
      "step": 4812
    },
    {
      "epoch": 1.4444777911164466,
      "grad_norm": 0.13886122405529022,
      "learning_rate": 4.3472379284645405e-05,
      "loss": 0.3669,
      "step": 4813
    },
    {
      "epoch": 1.4447779111644659,
      "grad_norm": 0.1385250985622406,
      "learning_rate": 4.3429180886570886e-05,
      "loss": 0.3678,
      "step": 4814
    },
    {
      "epoch": 1.4450780312124851,
      "grad_norm": 0.14431382715702057,
      "learning_rate": 4.3385998008051884e-05,
      "loss": 0.3676,
      "step": 4815
    },
    {
      "epoch": 1.4453781512605042,
      "grad_norm": 0.1262168139219284,
      "learning_rate": 4.334283066093515e-05,
      "loss": 0.2928,
      "step": 4816
    },
    {
      "epoch": 1.4456782713085234,
      "grad_norm": 0.13885599374771118,
      "learning_rate": 4.3299678857063194e-05,
      "loss": 0.3694,
      "step": 4817
    },
    {
      "epoch": 1.4459783913565425,
      "grad_norm": 0.12713685631752014,
      "learning_rate": 4.325654260827416e-05,
      "loss": 0.309,
      "step": 4818
    },
    {
      "epoch": 1.4462785114045618,
      "grad_norm": 0.1348283588886261,
      "learning_rate": 4.321342192640204e-05,
      "loss": 0.3524,
      "step": 4819
    },
    {
      "epoch": 1.446578631452581,
      "grad_norm": 0.14019560813903809,
      "learning_rate": 4.3170316823276424e-05,
      "loss": 0.3647,
      "step": 4820
    },
    {
      "epoch": 1.4468787515006003,
      "grad_norm": 0.13907159864902496,
      "learning_rate": 4.312722731072275e-05,
      "loss": 0.3614,
      "step": 4821
    },
    {
      "epoch": 1.4471788715486196,
      "grad_norm": 0.1358475685119629,
      "learning_rate": 4.308415340056217e-05,
      "loss": 0.3806,
      "step": 4822
    },
    {
      "epoch": 1.4474789915966386,
      "grad_norm": 0.1443357616662979,
      "learning_rate": 4.304109510461143e-05,
      "loss": 0.3861,
      "step": 4823
    },
    {
      "epoch": 1.447779111644658,
      "grad_norm": 0.1358397752046585,
      "learning_rate": 4.2998052434683125e-05,
      "loss": 0.3661,
      "step": 4824
    },
    {
      "epoch": 1.448079231692677,
      "grad_norm": 0.12247933447360992,
      "learning_rate": 4.2955025402585544e-05,
      "loss": 0.2983,
      "step": 4825
    },
    {
      "epoch": 1.4483793517406962,
      "grad_norm": 0.13556553423404694,
      "learning_rate": 4.291201402012265e-05,
      "loss": 0.3371,
      "step": 4826
    },
    {
      "epoch": 1.4486794717887155,
      "grad_norm": 0.1382128894329071,
      "learning_rate": 4.286901829909406e-05,
      "loss": 0.3597,
      "step": 4827
    },
    {
      "epoch": 1.4489795918367347,
      "grad_norm": 0.1367807388305664,
      "learning_rate": 4.28260382512952e-05,
      "loss": 0.3291,
      "step": 4828
    },
    {
      "epoch": 1.449279711884754,
      "grad_norm": 0.15236921608448029,
      "learning_rate": 4.278307388851716e-05,
      "loss": 0.3163,
      "step": 4829
    },
    {
      "epoch": 1.449579831932773,
      "grad_norm": 0.12621168792247772,
      "learning_rate": 4.274012522254674e-05,
      "loss": 0.3256,
      "step": 4830
    },
    {
      "epoch": 1.4498799519807923,
      "grad_norm": 0.14435584843158722,
      "learning_rate": 4.269719226516641e-05,
      "loss": 0.3828,
      "step": 4831
    },
    {
      "epoch": 1.4501800720288116,
      "grad_norm": 0.12524443864822388,
      "learning_rate": 4.2654275028154224e-05,
      "loss": 0.2974,
      "step": 4832
    },
    {
      "epoch": 1.4504801920768307,
      "grad_norm": 0.15154621005058289,
      "learning_rate": 4.2611373523284205e-05,
      "loss": 0.3877,
      "step": 4833
    },
    {
      "epoch": 1.45078031212485,
      "grad_norm": 0.14805643260478973,
      "learning_rate": 4.2568487762325806e-05,
      "loss": 0.3613,
      "step": 4834
    },
    {
      "epoch": 1.4510804321728692,
      "grad_norm": 0.14266574382781982,
      "learning_rate": 4.252561775704421e-05,
      "loss": 0.3802,
      "step": 4835
    },
    {
      "epoch": 1.4513805522208885,
      "grad_norm": 0.15151824057102203,
      "learning_rate": 4.2482763519200356e-05,
      "loss": 0.3503,
      "step": 4836
    },
    {
      "epoch": 1.4516806722689075,
      "grad_norm": 0.1423339694738388,
      "learning_rate": 4.24399250605508e-05,
      "loss": 0.3557,
      "step": 4837
    },
    {
      "epoch": 1.4519807923169268,
      "grad_norm": 0.14264728128910065,
      "learning_rate": 4.239710239284781e-05,
      "loss": 0.3807,
      "step": 4838
    },
    {
      "epoch": 1.452280912364946,
      "grad_norm": 0.1482369750738144,
      "learning_rate": 4.235429552783928e-05,
      "loss": 0.4035,
      "step": 4839
    },
    {
      "epoch": 1.452581032412965,
      "grad_norm": 0.13236463069915771,
      "learning_rate": 4.231150447726874e-05,
      "loss": 0.3464,
      "step": 4840
    },
    {
      "epoch": 1.4528811524609844,
      "grad_norm": 0.14207333326339722,
      "learning_rate": 4.226872925287545e-05,
      "loss": 0.3694,
      "step": 4841
    },
    {
      "epoch": 1.4531812725090036,
      "grad_norm": 0.2207833230495453,
      "learning_rate": 4.222596986639435e-05,
      "loss": 0.3523,
      "step": 4842
    },
    {
      "epoch": 1.453481392557023,
      "grad_norm": 0.14225220680236816,
      "learning_rate": 4.2183226329555906e-05,
      "loss": 0.348,
      "step": 4843
    },
    {
      "epoch": 1.453781512605042,
      "grad_norm": 0.14505499601364136,
      "learning_rate": 4.214049865408639e-05,
      "loss": 0.3859,
      "step": 4844
    },
    {
      "epoch": 1.4540816326530612,
      "grad_norm": 0.12653787434101105,
      "learning_rate": 4.209778685170759e-05,
      "loss": 0.3332,
      "step": 4845
    },
    {
      "epoch": 1.4543817527010805,
      "grad_norm": 0.16374477744102478,
      "learning_rate": 4.205509093413702e-05,
      "loss": 0.3976,
      "step": 4846
    },
    {
      "epoch": 1.4546818727490995,
      "grad_norm": 0.15292547643184662,
      "learning_rate": 4.201241091308786e-05,
      "loss": 0.4021,
      "step": 4847
    },
    {
      "epoch": 1.4549819927971188,
      "grad_norm": 0.13936926424503326,
      "learning_rate": 4.196974680026882e-05,
      "loss": 0.3697,
      "step": 4848
    },
    {
      "epoch": 1.455282112845138,
      "grad_norm": 0.13519351184368134,
      "learning_rate": 4.192709860738433e-05,
      "loss": 0.3469,
      "step": 4849
    },
    {
      "epoch": 1.4555822328931574,
      "grad_norm": 0.1383168250322342,
      "learning_rate": 4.1884466346134466e-05,
      "loss": 0.3847,
      "step": 4850
    },
    {
      "epoch": 1.4558823529411764,
      "grad_norm": 0.17078574001789093,
      "learning_rate": 4.1841850028214844e-05,
      "loss": 0.3696,
      "step": 4851
    },
    {
      "epoch": 1.4561824729891957,
      "grad_norm": 0.14079110324382782,
      "learning_rate": 4.179924966531683e-05,
      "loss": 0.3394,
      "step": 4852
    },
    {
      "epoch": 1.456482593037215,
      "grad_norm": 0.13478030264377594,
      "learning_rate": 4.1756665269127274e-05,
      "loss": 0.337,
      "step": 4853
    },
    {
      "epoch": 1.456782713085234,
      "grad_norm": 0.13460038602352142,
      "learning_rate": 4.171409685132873e-05,
      "loss": 0.3396,
      "step": 4854
    },
    {
      "epoch": 1.4570828331332533,
      "grad_norm": 0.1408572494983673,
      "learning_rate": 4.167154442359943e-05,
      "loss": 0.3464,
      "step": 4855
    },
    {
      "epoch": 1.4573829531812725,
      "grad_norm": 0.1432645469903946,
      "learning_rate": 4.162900799761308e-05,
      "loss": 0.3597,
      "step": 4856
    },
    {
      "epoch": 1.4576830732292918,
      "grad_norm": 0.1405627578496933,
      "learning_rate": 4.1586487585039e-05,
      "loss": 0.37,
      "step": 4857
    },
    {
      "epoch": 1.4579831932773109,
      "grad_norm": 0.13922898471355438,
      "learning_rate": 4.154398319754232e-05,
      "loss": 0.359,
      "step": 4858
    },
    {
      "epoch": 1.4582833133253301,
      "grad_norm": 0.1451897919178009,
      "learning_rate": 4.1501494846783526e-05,
      "loss": 0.369,
      "step": 4859
    },
    {
      "epoch": 1.4585834333733494,
      "grad_norm": 0.13877728581428528,
      "learning_rate": 4.145902254441888e-05,
      "loss": 0.3481,
      "step": 4860
    },
    {
      "epoch": 1.4588835534213684,
      "grad_norm": 0.15443190932273865,
      "learning_rate": 4.1416566302100094e-05,
      "loss": 0.4045,
      "step": 4861
    },
    {
      "epoch": 1.4591836734693877,
      "grad_norm": 0.1353960931301117,
      "learning_rate": 4.137412613147459e-05,
      "loss": 0.3363,
      "step": 4862
    },
    {
      "epoch": 1.459483793517407,
      "grad_norm": 0.14713869988918304,
      "learning_rate": 4.1331702044185374e-05,
      "loss": 0.3736,
      "step": 4863
    },
    {
      "epoch": 1.4597839135654262,
      "grad_norm": 0.14519952237606049,
      "learning_rate": 4.1289294051870985e-05,
      "loss": 0.3613,
      "step": 4864
    },
    {
      "epoch": 1.4600840336134453,
      "grad_norm": 0.1449516862630844,
      "learning_rate": 4.124690216616552e-05,
      "loss": 0.3976,
      "step": 4865
    },
    {
      "epoch": 1.4603841536614646,
      "grad_norm": 0.1280471831560135,
      "learning_rate": 4.120452639869875e-05,
      "loss": 0.3169,
      "step": 4866
    },
    {
      "epoch": 1.4606842737094838,
      "grad_norm": 0.14638231694698334,
      "learning_rate": 4.116216676109598e-05,
      "loss": 0.331,
      "step": 4867
    },
    {
      "epoch": 1.4609843937575029,
      "grad_norm": 0.14368273317813873,
      "learning_rate": 4.111982326497813e-05,
      "loss": 0.3421,
      "step": 4868
    },
    {
      "epoch": 1.4612845138055222,
      "grad_norm": 0.14355066418647766,
      "learning_rate": 4.1077495921961604e-05,
      "loss": 0.3683,
      "step": 4869
    },
    {
      "epoch": 1.4615846338535414,
      "grad_norm": 0.11699586361646652,
      "learning_rate": 4.1035184743658376e-05,
      "loss": 0.3025,
      "step": 4870
    },
    {
      "epoch": 1.4618847539015607,
      "grad_norm": 0.13622942566871643,
      "learning_rate": 4.0992889741676145e-05,
      "loss": 0.3254,
      "step": 4871
    },
    {
      "epoch": 1.46218487394958,
      "grad_norm": 0.1358841061592102,
      "learning_rate": 4.0950610927618e-05,
      "loss": 0.3134,
      "step": 4872
    },
    {
      "epoch": 1.462484993997599,
      "grad_norm": 0.1376754343509674,
      "learning_rate": 4.090834831308262e-05,
      "loss": 0.3594,
      "step": 4873
    },
    {
      "epoch": 1.4627851140456183,
      "grad_norm": 0.13249529898166656,
      "learning_rate": 4.086610190966431e-05,
      "loss": 0.3302,
      "step": 4874
    },
    {
      "epoch": 1.4630852340936373,
      "grad_norm": 0.13735318183898926,
      "learning_rate": 4.082387172895291e-05,
      "loss": 0.3618,
      "step": 4875
    },
    {
      "epoch": 1.4633853541416566,
      "grad_norm": 0.13755595684051514,
      "learning_rate": 4.078165778253371e-05,
      "loss": 0.3545,
      "step": 4876
    },
    {
      "epoch": 1.4636854741896759,
      "grad_norm": 0.14065246284008026,
      "learning_rate": 4.073946008198771e-05,
      "loss": 0.3828,
      "step": 4877
    },
    {
      "epoch": 1.4639855942376951,
      "grad_norm": 0.16443657875061035,
      "learning_rate": 4.069727863889128e-05,
      "loss": 0.391,
      "step": 4878
    },
    {
      "epoch": 1.4642857142857144,
      "grad_norm": 0.1418590396642685,
      "learning_rate": 4.065511346481645e-05,
      "loss": 0.3804,
      "step": 4879
    },
    {
      "epoch": 1.4645858343337335,
      "grad_norm": 0.15260443091392517,
      "learning_rate": 4.0612964571330805e-05,
      "loss": 0.3628,
      "step": 4880
    },
    {
      "epoch": 1.4648859543817527,
      "grad_norm": 0.1524861603975296,
      "learning_rate": 4.057083196999732e-05,
      "loss": 0.366,
      "step": 4881
    },
    {
      "epoch": 1.4651860744297718,
      "grad_norm": 0.13246721029281616,
      "learning_rate": 4.0528715672374636e-05,
      "loss": 0.3532,
      "step": 4882
    },
    {
      "epoch": 1.465486194477791,
      "grad_norm": 0.13852067291736603,
      "learning_rate": 4.048661569001692e-05,
      "loss": 0.3478,
      "step": 4883
    },
    {
      "epoch": 1.4657863145258103,
      "grad_norm": 0.1355675756931305,
      "learning_rate": 4.044453203447372e-05,
      "loss": 0.3339,
      "step": 4884
    },
    {
      "epoch": 1.4660864345738296,
      "grad_norm": 0.1346714198589325,
      "learning_rate": 4.04024647172903e-05,
      "loss": 0.3571,
      "step": 4885
    },
    {
      "epoch": 1.4663865546218489,
      "grad_norm": 0.14984160661697388,
      "learning_rate": 4.036041375000728e-05,
      "loss": 0.3666,
      "step": 4886
    },
    {
      "epoch": 1.466686674669868,
      "grad_norm": 0.1460392028093338,
      "learning_rate": 4.031837914416088e-05,
      "loss": 0.3305,
      "step": 4887
    },
    {
      "epoch": 1.4669867947178872,
      "grad_norm": 0.1454431265592575,
      "learning_rate": 4.027636091128284e-05,
      "loss": 0.3499,
      "step": 4888
    },
    {
      "epoch": 1.4672869147659062,
      "grad_norm": 0.15010550618171692,
      "learning_rate": 4.023435906290034e-05,
      "loss": 0.3345,
      "step": 4889
    },
    {
      "epoch": 1.4675870348139255,
      "grad_norm": 0.13426147401332855,
      "learning_rate": 4.019237361053615e-05,
      "loss": 0.34,
      "step": 4890
    },
    {
      "epoch": 1.4678871548619448,
      "grad_norm": 0.13417157530784607,
      "learning_rate": 4.0150404565708435e-05,
      "loss": 0.3414,
      "step": 4891
    },
    {
      "epoch": 1.468187274909964,
      "grad_norm": 0.21425917744636536,
      "learning_rate": 4.010845193993096e-05,
      "loss": 0.3959,
      "step": 4892
    },
    {
      "epoch": 1.4684873949579833,
      "grad_norm": 0.14582909643650055,
      "learning_rate": 4.0066515744712974e-05,
      "loss": 0.3999,
      "step": 4893
    },
    {
      "epoch": 1.4687875150060024,
      "grad_norm": 0.12927165627479553,
      "learning_rate": 4.0024595991559166e-05,
      "loss": 0.3333,
      "step": 4894
    },
    {
      "epoch": 1.4690876350540216,
      "grad_norm": 0.1228029727935791,
      "learning_rate": 3.998269269196966e-05,
      "loss": 0.295,
      "step": 4895
    },
    {
      "epoch": 1.469387755102041,
      "grad_norm": 0.13898111879825592,
      "learning_rate": 3.99408058574403e-05,
      "loss": 0.3661,
      "step": 4896
    },
    {
      "epoch": 1.46968787515006,
      "grad_norm": 0.20389845967292786,
      "learning_rate": 3.989893549946213e-05,
      "loss": 0.3409,
      "step": 4897
    },
    {
      "epoch": 1.4699879951980792,
      "grad_norm": 0.12807156145572662,
      "learning_rate": 3.9857081629521896e-05,
      "loss": 0.3217,
      "step": 4898
    },
    {
      "epoch": 1.4702881152460985,
      "grad_norm": 0.13886474072933197,
      "learning_rate": 3.9815244259101644e-05,
      "loss": 0.3526,
      "step": 4899
    },
    {
      "epoch": 1.4705882352941178,
      "grad_norm": 0.14439184963703156,
      "learning_rate": 3.977342339967902e-05,
      "loss": 0.3428,
      "step": 4900
    },
    {
      "epoch": 1.4708883553421368,
      "grad_norm": 0.1395224630832672,
      "learning_rate": 3.973161906272712e-05,
      "loss": 0.4465,
      "step": 4901
    },
    {
      "epoch": 1.471188475390156,
      "grad_norm": 0.1354299634695053,
      "learning_rate": 3.968983125971447e-05,
      "loss": 0.3385,
      "step": 4902
    },
    {
      "epoch": 1.4714885954381753,
      "grad_norm": 0.14423540234565735,
      "learning_rate": 3.964806000210503e-05,
      "loss": 0.3571,
      "step": 4903
    },
    {
      "epoch": 1.4717887154861944,
      "grad_norm": 0.14226119220256805,
      "learning_rate": 3.9606305301358284e-05,
      "loss": 0.3239,
      "step": 4904
    },
    {
      "epoch": 1.4720888355342137,
      "grad_norm": 0.14099536836147308,
      "learning_rate": 3.95645671689292e-05,
      "loss": 0.3768,
      "step": 4905
    },
    {
      "epoch": 1.472388955582233,
      "grad_norm": 0.13216210901737213,
      "learning_rate": 3.95228456162681e-05,
      "loss": 0.3315,
      "step": 4906
    },
    {
      "epoch": 1.4726890756302522,
      "grad_norm": 0.14150847494602203,
      "learning_rate": 3.948114065482087e-05,
      "loss": 0.3574,
      "step": 4907
    },
    {
      "epoch": 1.4729891956782712,
      "grad_norm": 0.12970447540283203,
      "learning_rate": 3.943945229602869e-05,
      "loss": 0.3268,
      "step": 4908
    },
    {
      "epoch": 1.4732893157262905,
      "grad_norm": 0.16438636183738708,
      "learning_rate": 3.939778055132842e-05,
      "loss": 0.3805,
      "step": 4909
    },
    {
      "epoch": 1.4735894357743098,
      "grad_norm": 0.14561450481414795,
      "learning_rate": 3.935612543215216e-05,
      "loss": 0.3685,
      "step": 4910
    },
    {
      "epoch": 1.4738895558223288,
      "grad_norm": 0.14267125725746155,
      "learning_rate": 3.9314486949927467e-05,
      "loss": 0.3541,
      "step": 4911
    },
    {
      "epoch": 1.474189675870348,
      "grad_norm": 0.140004962682724,
      "learning_rate": 3.9272865116077414e-05,
      "loss": 0.3572,
      "step": 4912
    },
    {
      "epoch": 1.4744897959183674,
      "grad_norm": 0.15184147655963898,
      "learning_rate": 3.9231259942020536e-05,
      "loss": 0.3903,
      "step": 4913
    },
    {
      "epoch": 1.4747899159663866,
      "grad_norm": 0.13791409134864807,
      "learning_rate": 3.918967143917064e-05,
      "loss": 0.3658,
      "step": 4914
    },
    {
      "epoch": 1.4750900360144057,
      "grad_norm": 0.13405513763427734,
      "learning_rate": 3.914809961893714e-05,
      "loss": 0.3497,
      "step": 4915
    },
    {
      "epoch": 1.475390156062425,
      "grad_norm": 0.13332565128803253,
      "learning_rate": 3.910654449272469e-05,
      "loss": 0.3307,
      "step": 4916
    },
    {
      "epoch": 1.4756902761104442,
      "grad_norm": 0.1424759030342102,
      "learning_rate": 3.9065006071933544e-05,
      "loss": 0.3695,
      "step": 4917
    },
    {
      "epoch": 1.4759903961584633,
      "grad_norm": 0.13335555791854858,
      "learning_rate": 3.90234843679593e-05,
      "loss": 0.3548,
      "step": 4918
    },
    {
      "epoch": 1.4762905162064826,
      "grad_norm": 0.137489914894104,
      "learning_rate": 3.8981979392192866e-05,
      "loss": 0.3147,
      "step": 4919
    },
    {
      "epoch": 1.4765906362545018,
      "grad_norm": 0.14004887640476227,
      "learning_rate": 3.8940491156020744e-05,
      "loss": 0.3701,
      "step": 4920
    },
    {
      "epoch": 1.476890756302521,
      "grad_norm": 0.15263986587524414,
      "learning_rate": 3.889901967082476e-05,
      "loss": 0.4014,
      "step": 4921
    },
    {
      "epoch": 1.4771908763505401,
      "grad_norm": 0.2184661626815796,
      "learning_rate": 3.885756494798206e-05,
      "loss": 0.3517,
      "step": 4922
    },
    {
      "epoch": 1.4774909963985594,
      "grad_norm": 0.13904131948947906,
      "learning_rate": 3.8816126998865365e-05,
      "loss": 0.3309,
      "step": 4923
    },
    {
      "epoch": 1.4777911164465787,
      "grad_norm": 0.13836504518985748,
      "learning_rate": 3.877470583484262e-05,
      "loss": 0.344,
      "step": 4924
    },
    {
      "epoch": 1.4780912364945977,
      "grad_norm": 0.2074955701828003,
      "learning_rate": 3.873330146727729e-05,
      "loss": 0.3074,
      "step": 4925
    },
    {
      "epoch": 1.478391356542617,
      "grad_norm": 0.14033573865890503,
      "learning_rate": 3.869191390752821e-05,
      "loss": 0.3486,
      "step": 4926
    },
    {
      "epoch": 1.4786914765906363,
      "grad_norm": 0.15356256067752838,
      "learning_rate": 3.8650543166949526e-05,
      "loss": 0.3575,
      "step": 4927
    },
    {
      "epoch": 1.4789915966386555,
      "grad_norm": 0.16172119975090027,
      "learning_rate": 3.860918925689089e-05,
      "loss": 0.3891,
      "step": 4928
    },
    {
      "epoch": 1.4792917166866748,
      "grad_norm": 0.1274283528327942,
      "learning_rate": 3.8567852188697205e-05,
      "loss": 0.3287,
      "step": 4929
    },
    {
      "epoch": 1.4795918367346939,
      "grad_norm": 0.14106817543506622,
      "learning_rate": 3.852653197370885e-05,
      "loss": 0.3704,
      "step": 4930
    },
    {
      "epoch": 1.4798919567827131,
      "grad_norm": 0.1411307454109192,
      "learning_rate": 3.848522862326159e-05,
      "loss": 0.3736,
      "step": 4931
    },
    {
      "epoch": 1.4801920768307322,
      "grad_norm": 0.15969686210155487,
      "learning_rate": 3.8443942148686505e-05,
      "loss": 0.358,
      "step": 4932
    },
    {
      "epoch": 1.4804921968787514,
      "grad_norm": 0.32184886932373047,
      "learning_rate": 3.840267256130997e-05,
      "loss": 0.3139,
      "step": 4933
    },
    {
      "epoch": 1.4807923169267707,
      "grad_norm": 0.3197161853313446,
      "learning_rate": 3.8361419872453985e-05,
      "loss": 0.38,
      "step": 4934
    },
    {
      "epoch": 1.48109243697479,
      "grad_norm": 0.13927941024303436,
      "learning_rate": 3.832018409343567e-05,
      "loss": 0.3543,
      "step": 4935
    },
    {
      "epoch": 1.4813925570228093,
      "grad_norm": 0.14764556288719177,
      "learning_rate": 3.827896523556757e-05,
      "loss": 0.371,
      "step": 4936
    },
    {
      "epoch": 1.4816926770708283,
      "grad_norm": 0.14382779598236084,
      "learning_rate": 3.8237763310157614e-05,
      "loss": 0.3562,
      "step": 4937
    },
    {
      "epoch": 1.4819927971188476,
      "grad_norm": 0.15075556933879852,
      "learning_rate": 3.819657832850909e-05,
      "loss": 0.3548,
      "step": 4938
    },
    {
      "epoch": 1.4822929171668666,
      "grad_norm": 0.12999118864536285,
      "learning_rate": 3.815541030192067e-05,
      "loss": 0.3283,
      "step": 4939
    },
    {
      "epoch": 1.482593037214886,
      "grad_norm": 0.14742016792297363,
      "learning_rate": 3.811425924168628e-05,
      "loss": 0.3818,
      "step": 4940
    },
    {
      "epoch": 1.4828931572629052,
      "grad_norm": 0.12545278668403625,
      "learning_rate": 3.8073125159095225e-05,
      "loss": 0.2996,
      "step": 4941
    },
    {
      "epoch": 1.4831932773109244,
      "grad_norm": 0.1290358453989029,
      "learning_rate": 3.803200806543218e-05,
      "loss": 0.3388,
      "step": 4942
    },
    {
      "epoch": 1.4834933973589437,
      "grad_norm": 0.15177536010742188,
      "learning_rate": 3.799090797197721e-05,
      "loss": 0.4223,
      "step": 4943
    },
    {
      "epoch": 1.4837935174069627,
      "grad_norm": 0.14963765442371368,
      "learning_rate": 3.794982489000556e-05,
      "loss": 0.3861,
      "step": 4944
    },
    {
      "epoch": 1.484093637454982,
      "grad_norm": 0.12542229890823364,
      "learning_rate": 3.790875883078795e-05,
      "loss": 0.3119,
      "step": 4945
    },
    {
      "epoch": 1.484393757503001,
      "grad_norm": 0.14355400204658508,
      "learning_rate": 3.786770980559041e-05,
      "loss": 0.4041,
      "step": 4946
    },
    {
      "epoch": 1.4846938775510203,
      "grad_norm": 0.13719186186790466,
      "learning_rate": 3.7826677825674204e-05,
      "loss": 0.345,
      "step": 4947
    },
    {
      "epoch": 1.4849939975990396,
      "grad_norm": 0.15719708800315857,
      "learning_rate": 3.778566290229607e-05,
      "loss": 0.3721,
      "step": 4948
    },
    {
      "epoch": 1.4852941176470589,
      "grad_norm": 0.14396868646144867,
      "learning_rate": 3.7744665046707886e-05,
      "loss": 0.362,
      "step": 4949
    },
    {
      "epoch": 1.4855942376950781,
      "grad_norm": 0.13629618287086487,
      "learning_rate": 3.770368427015699e-05,
      "loss": 0.3377,
      "step": 4950
    },
    {
      "epoch": 1.4858943577430972,
      "grad_norm": 0.13559213280677795,
      "learning_rate": 3.766272058388604e-05,
      "loss": 0.3181,
      "step": 4951
    },
    {
      "epoch": 1.4861944777911165,
      "grad_norm": 0.15401612222194672,
      "learning_rate": 3.762177399913285e-05,
      "loss": 0.4098,
      "step": 4952
    },
    {
      "epoch": 1.4864945978391357,
      "grad_norm": 0.13623987138271332,
      "learning_rate": 3.758084452713073e-05,
      "loss": 0.3505,
      "step": 4953
    },
    {
      "epoch": 1.4867947178871548,
      "grad_norm": 0.13094072043895721,
      "learning_rate": 3.753993217910815e-05,
      "loss": 0.3286,
      "step": 4954
    },
    {
      "epoch": 1.487094837935174,
      "grad_norm": 0.1474945843219757,
      "learning_rate": 3.749903696628898e-05,
      "loss": 0.3716,
      "step": 4955
    },
    {
      "epoch": 1.4873949579831933,
      "grad_norm": 0.1438131332397461,
      "learning_rate": 3.745815889989237e-05,
      "loss": 0.3337,
      "step": 4956
    },
    {
      "epoch": 1.4876950780312126,
      "grad_norm": 0.13847722113132477,
      "learning_rate": 3.7417297991132696e-05,
      "loss": 0.3528,
      "step": 4957
    },
    {
      "epoch": 1.4879951980792316,
      "grad_norm": 0.12775957584381104,
      "learning_rate": 3.7376454251219704e-05,
      "loss": 0.3065,
      "step": 4958
    },
    {
      "epoch": 1.488295318127251,
      "grad_norm": 0.19468966126441956,
      "learning_rate": 3.733562769135845e-05,
      "loss": 0.3522,
      "step": 4959
    },
    {
      "epoch": 1.4885954381752702,
      "grad_norm": 0.14810489118099213,
      "learning_rate": 3.729481832274916e-05,
      "loss": 0.3835,
      "step": 4960
    },
    {
      "epoch": 1.4888955582232892,
      "grad_norm": 0.14498479664325714,
      "learning_rate": 3.7254026156587475e-05,
      "loss": 0.37,
      "step": 4961
    },
    {
      "epoch": 1.4891956782713085,
      "grad_norm": 0.13816475868225098,
      "learning_rate": 3.72132512040642e-05,
      "loss": 0.3304,
      "step": 4962
    },
    {
      "epoch": 1.4894957983193278,
      "grad_norm": 0.14384940266609192,
      "learning_rate": 3.717249347636551e-05,
      "loss": 0.3834,
      "step": 4963
    },
    {
      "epoch": 1.489795918367347,
      "grad_norm": 0.1332027018070221,
      "learning_rate": 3.713175298467285e-05,
      "loss": 0.3327,
      "step": 4964
    },
    {
      "epoch": 1.490096038415366,
      "grad_norm": 0.14304400980472565,
      "learning_rate": 3.7091029740162875e-05,
      "loss": 0.3589,
      "step": 4965
    },
    {
      "epoch": 1.4903961584633854,
      "grad_norm": 0.14571350812911987,
      "learning_rate": 3.705032375400751e-05,
      "loss": 0.3768,
      "step": 4966
    },
    {
      "epoch": 1.4906962785114046,
      "grad_norm": 0.13593170046806335,
      "learning_rate": 3.700963503737399e-05,
      "loss": 0.3525,
      "step": 4967
    },
    {
      "epoch": 1.4909963985594237,
      "grad_norm": 0.13215714693069458,
      "learning_rate": 3.696896360142483e-05,
      "loss": 0.3438,
      "step": 4968
    },
    {
      "epoch": 1.491296518607443,
      "grad_norm": 0.1519247442483902,
      "learning_rate": 3.692830945731778e-05,
      "loss": 0.3527,
      "step": 4969
    },
    {
      "epoch": 1.4915966386554622,
      "grad_norm": 0.13658547401428223,
      "learning_rate": 3.688767261620578e-05,
      "loss": 0.3478,
      "step": 4970
    },
    {
      "epoch": 1.4918967587034815,
      "grad_norm": 0.13602906465530396,
      "learning_rate": 3.6847053089237116e-05,
      "loss": 0.3562,
      "step": 4971
    },
    {
      "epoch": 1.4921968787515005,
      "grad_norm": 0.15309105813503265,
      "learning_rate": 3.680645088755533e-05,
      "loss": 0.3967,
      "step": 4972
    },
    {
      "epoch": 1.4924969987995198,
      "grad_norm": 0.13403762876987457,
      "learning_rate": 3.6765866022299125e-05,
      "loss": 0.3157,
      "step": 4973
    },
    {
      "epoch": 1.492797118847539,
      "grad_norm": 0.1302029937505722,
      "learning_rate": 3.672529850460246e-05,
      "loss": 0.3342,
      "step": 4974
    },
    {
      "epoch": 1.4930972388955581,
      "grad_norm": 0.14181581139564514,
      "learning_rate": 3.66847483455946e-05,
      "loss": 0.3477,
      "step": 4975
    },
    {
      "epoch": 1.4933973589435774,
      "grad_norm": 0.1388651430606842,
      "learning_rate": 3.6644215556400065e-05,
      "loss": 0.3307,
      "step": 4976
    },
    {
      "epoch": 1.4936974789915967,
      "grad_norm": 0.15407897531986237,
      "learning_rate": 3.6603700148138476e-05,
      "loss": 0.3553,
      "step": 4977
    },
    {
      "epoch": 1.493997599039616,
      "grad_norm": 0.12832655012607574,
      "learning_rate": 3.6563202131924854e-05,
      "loss": 0.3082,
      "step": 4978
    },
    {
      "epoch": 1.494297719087635,
      "grad_norm": 0.16573424637317657,
      "learning_rate": 3.652272151886925e-05,
      "loss": 0.3633,
      "step": 4979
    },
    {
      "epoch": 1.4945978391356542,
      "grad_norm": 0.1342608481645584,
      "learning_rate": 3.64822583200772e-05,
      "loss": 0.3504,
      "step": 4980
    },
    {
      "epoch": 1.4948979591836735,
      "grad_norm": 0.1758069545030594,
      "learning_rate": 3.644181254664925e-05,
      "loss": 0.3236,
      "step": 4981
    },
    {
      "epoch": 1.4951980792316926,
      "grad_norm": 0.14578987658023834,
      "learning_rate": 3.6401384209681186e-05,
      "loss": 0.3612,
      "step": 4982
    },
    {
      "epoch": 1.4954981992797118,
      "grad_norm": 0.12802115082740784,
      "learning_rate": 3.6360973320264125e-05,
      "loss": 0.3038,
      "step": 4983
    },
    {
      "epoch": 1.495798319327731,
      "grad_norm": 0.1445339173078537,
      "learning_rate": 3.632057988948433e-05,
      "loss": 0.339,
      "step": 4984
    },
    {
      "epoch": 1.4960984393757504,
      "grad_norm": 0.14311954379081726,
      "learning_rate": 3.6280203928423225e-05,
      "loss": 0.3449,
      "step": 4985
    },
    {
      "epoch": 1.4963985594237694,
      "grad_norm": 0.1445217877626419,
      "learning_rate": 3.623984544815756e-05,
      "loss": 0.3677,
      "step": 4986
    },
    {
      "epoch": 1.4966986794717887,
      "grad_norm": 0.1266975849866867,
      "learning_rate": 3.619950445975916e-05,
      "loss": 0.3318,
      "step": 4987
    },
    {
      "epoch": 1.496998799519808,
      "grad_norm": 0.1584240347146988,
      "learning_rate": 3.6159180974295124e-05,
      "loss": 0.3361,
      "step": 4988
    },
    {
      "epoch": 1.497298919567827,
      "grad_norm": 0.1305302083492279,
      "learning_rate": 3.611887500282779e-05,
      "loss": 0.3173,
      "step": 4989
    },
    {
      "epoch": 1.4975990396158463,
      "grad_norm": 0.1282181441783905,
      "learning_rate": 3.607858655641457e-05,
      "loss": 0.318,
      "step": 4990
    },
    {
      "epoch": 1.4978991596638656,
      "grad_norm": 0.14413386583328247,
      "learning_rate": 3.60383156461082e-05,
      "loss": 0.3473,
      "step": 4991
    },
    {
      "epoch": 1.4981992797118848,
      "grad_norm": 0.1319807767868042,
      "learning_rate": 3.599806228295647e-05,
      "loss": 0.3275,
      "step": 4992
    },
    {
      "epoch": 1.498499399759904,
      "grad_norm": 0.12965364754199982,
      "learning_rate": 3.595782647800248e-05,
      "loss": 0.314,
      "step": 4993
    },
    {
      "epoch": 1.4987995198079231,
      "grad_norm": 0.15261231362819672,
      "learning_rate": 3.5917608242284476e-05,
      "loss": 0.3604,
      "step": 4994
    },
    {
      "epoch": 1.4990996398559424,
      "grad_norm": 0.1458638608455658,
      "learning_rate": 3.587740758683581e-05,
      "loss": 0.3317,
      "step": 4995
    },
    {
      "epoch": 1.4993997599039615,
      "grad_norm": 0.13909973204135895,
      "learning_rate": 3.583722452268511e-05,
      "loss": 0.3365,
      "step": 4996
    },
    {
      "epoch": 1.4996998799519807,
      "grad_norm": 0.1375804841518402,
      "learning_rate": 3.579705906085618e-05,
      "loss": 0.3128,
      "step": 4997
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.1468920111656189,
      "learning_rate": 3.575691121236785e-05,
      "loss": 0.3497,
      "step": 4998
    },
    {
      "epoch": 1.5003001200480193,
      "grad_norm": 0.13905927538871765,
      "learning_rate": 3.5716780988234324e-05,
      "loss": 0.3517,
      "step": 4999
    },
    {
      "epoch": 1.5006002400960385,
      "grad_norm": 0.17258362472057343,
      "learning_rate": 3.56766683994648e-05,
      "loss": 0.3557,
      "step": 5000
    },
    {
      "epoch": 1.5009003601440576,
      "grad_norm": 0.12960495054721832,
      "learning_rate": 3.563657345706372e-05,
      "loss": 0.3206,
      "step": 5001
    },
    {
      "epoch": 1.5012004801920769,
      "grad_norm": 0.1402052789926529,
      "learning_rate": 3.5596496172030724e-05,
      "loss": 0.3591,
      "step": 5002
    },
    {
      "epoch": 1.501500600240096,
      "grad_norm": 0.1431601196527481,
      "learning_rate": 3.555643655536051e-05,
      "loss": 0.3474,
      "step": 5003
    },
    {
      "epoch": 1.5018007202881152,
      "grad_norm": 0.3191090524196625,
      "learning_rate": 3.5516394618042944e-05,
      "loss": 0.3813,
      "step": 5004
    },
    {
      "epoch": 1.5021008403361344,
      "grad_norm": 0.13000676035881042,
      "learning_rate": 3.5476370371063114e-05,
      "loss": 0.3076,
      "step": 5005
    },
    {
      "epoch": 1.5024009603841537,
      "grad_norm": 0.1422010362148285,
      "learning_rate": 3.5436363825401234e-05,
      "loss": 0.3459,
      "step": 5006
    },
    {
      "epoch": 1.502701080432173,
      "grad_norm": 0.14779047667980194,
      "learning_rate": 3.539637499203259e-05,
      "loss": 0.3802,
      "step": 5007
    },
    {
      "epoch": 1.503001200480192,
      "grad_norm": 0.13392239809036255,
      "learning_rate": 3.535640388192767e-05,
      "loss": 0.3367,
      "step": 5008
    },
    {
      "epoch": 1.5033013205282113,
      "grad_norm": 0.15039633214473724,
      "learning_rate": 3.531645050605211e-05,
      "loss": 0.3627,
      "step": 5009
    },
    {
      "epoch": 1.5036014405762304,
      "grad_norm": 0.14044032990932465,
      "learning_rate": 3.527651487536669e-05,
      "loss": 0.3698,
      "step": 5010
    },
    {
      "epoch": 1.5039015606242496,
      "grad_norm": 0.13334433734416962,
      "learning_rate": 3.5236597000827266e-05,
      "loss": 0.3202,
      "step": 5011
    },
    {
      "epoch": 1.504201680672269,
      "grad_norm": 0.1374587118625641,
      "learning_rate": 3.519669689338478e-05,
      "loss": 0.3147,
      "step": 5012
    },
    {
      "epoch": 1.5045018007202882,
      "grad_norm": 0.1453571766614914,
      "learning_rate": 3.515681456398545e-05,
      "loss": 0.3571,
      "step": 5013
    },
    {
      "epoch": 1.5048019207683074,
      "grad_norm": 0.16557316482067108,
      "learning_rate": 3.511695002357055e-05,
      "loss": 0.3688,
      "step": 5014
    },
    {
      "epoch": 1.5051020408163265,
      "grad_norm": 0.14268682897090912,
      "learning_rate": 3.507710328307638e-05,
      "loss": 0.3739,
      "step": 5015
    },
    {
      "epoch": 1.5054021608643458,
      "grad_norm": 0.1268330067396164,
      "learning_rate": 3.503727435343451e-05,
      "loss": 0.3199,
      "step": 5016
    },
    {
      "epoch": 1.5057022809123648,
      "grad_norm": 0.1584567278623581,
      "learning_rate": 3.499746324557147e-05,
      "loss": 0.3647,
      "step": 5017
    },
    {
      "epoch": 1.506002400960384,
      "grad_norm": 0.14097891747951508,
      "learning_rate": 3.495766997040909e-05,
      "loss": 0.3678,
      "step": 5018
    },
    {
      "epoch": 1.5063025210084033,
      "grad_norm": 0.13286574184894562,
      "learning_rate": 3.4917894538864136e-05,
      "loss": 0.3404,
      "step": 5019
    },
    {
      "epoch": 1.5066026410564226,
      "grad_norm": 0.14076447486877441,
      "learning_rate": 3.487813696184852e-05,
      "loss": 0.3693,
      "step": 5020
    },
    {
      "epoch": 1.5069027611044419,
      "grad_norm": 0.12194446474313736,
      "learning_rate": 3.4838397250269295e-05,
      "loss": 0.2822,
      "step": 5021
    },
    {
      "epoch": 1.5072028811524611,
      "grad_norm": 0.13897386193275452,
      "learning_rate": 3.4798675415028635e-05,
      "loss": 0.3423,
      "step": 5022
    },
    {
      "epoch": 1.5075030012004802,
      "grad_norm": 0.13166168332099915,
      "learning_rate": 3.4758971467023716e-05,
      "loss": 0.3215,
      "step": 5023
    },
    {
      "epoch": 1.5078031212484992,
      "grad_norm": 0.13450632989406586,
      "learning_rate": 3.4719285417146905e-05,
      "loss": 0.3254,
      "step": 5024
    },
    {
      "epoch": 1.5081032412965185,
      "grad_norm": 0.14347293972969055,
      "learning_rate": 3.467961727628557e-05,
      "loss": 0.3639,
      "step": 5025
    },
    {
      "epoch": 1.5084033613445378,
      "grad_norm": 0.14105167984962463,
      "learning_rate": 3.463996705532222e-05,
      "loss": 0.3437,
      "step": 5026
    },
    {
      "epoch": 1.508703481392557,
      "grad_norm": 0.14335931837558746,
      "learning_rate": 3.46003347651345e-05,
      "loss": 0.3381,
      "step": 5027
    },
    {
      "epoch": 1.5090036014405763,
      "grad_norm": 0.1352183073759079,
      "learning_rate": 3.4560720416594985e-05,
      "loss": 0.3409,
      "step": 5028
    },
    {
      "epoch": 1.5093037214885956,
      "grad_norm": 0.16700023412704468,
      "learning_rate": 3.452112402057149e-05,
      "loss": 0.3631,
      "step": 5029
    },
    {
      "epoch": 1.5096038415366146,
      "grad_norm": 0.13665127754211426,
      "learning_rate": 3.448154558792677e-05,
      "loss": 0.3541,
      "step": 5030
    },
    {
      "epoch": 1.5099039615846337,
      "grad_norm": 0.135504350066185,
      "learning_rate": 3.444198512951875e-05,
      "loss": 0.3407,
      "step": 5031
    },
    {
      "epoch": 1.510204081632653,
      "grad_norm": 0.20623868703842163,
      "learning_rate": 3.4402442656200405e-05,
      "loss": 0.3433,
      "step": 5032
    },
    {
      "epoch": 1.5105042016806722,
      "grad_norm": 0.1356758177280426,
      "learning_rate": 3.436291817881971e-05,
      "loss": 0.3423,
      "step": 5033
    },
    {
      "epoch": 1.5108043217286915,
      "grad_norm": 0.13682779669761658,
      "learning_rate": 3.4323411708219786e-05,
      "loss": 0.3654,
      "step": 5034
    },
    {
      "epoch": 1.5111044417767108,
      "grad_norm": 0.13643723726272583,
      "learning_rate": 3.4283923255238805e-05,
      "loss": 0.3394,
      "step": 5035
    },
    {
      "epoch": 1.51140456182473,
      "grad_norm": 0.17409205436706543,
      "learning_rate": 3.424445283070989e-05,
      "loss": 0.3956,
      "step": 5036
    },
    {
      "epoch": 1.511704681872749,
      "grad_norm": 0.13246287405490875,
      "learning_rate": 3.42050004454614e-05,
      "loss": 0.3444,
      "step": 5037
    },
    {
      "epoch": 1.5120048019207684,
      "grad_norm": 0.15758417546749115,
      "learning_rate": 3.416556611031656e-05,
      "loss": 0.367,
      "step": 5038
    },
    {
      "epoch": 1.5123049219687874,
      "grad_norm": 0.13775351643562317,
      "learning_rate": 3.412614983609376e-05,
      "loss": 0.3288,
      "step": 5039
    },
    {
      "epoch": 1.5126050420168067,
      "grad_norm": 0.14162656664848328,
      "learning_rate": 3.408675163360643e-05,
      "loss": 0.375,
      "step": 5040
    },
    {
      "epoch": 1.512905162064826,
      "grad_norm": 0.13742078840732574,
      "learning_rate": 3.4047371513662995e-05,
      "loss": 0.3426,
      "step": 5041
    },
    {
      "epoch": 1.5132052821128452,
      "grad_norm": 0.1556188017129898,
      "learning_rate": 3.400800948706687e-05,
      "loss": 0.3621,
      "step": 5042
    },
    {
      "epoch": 1.5135054021608645,
      "grad_norm": 0.15120911598205566,
      "learning_rate": 3.3968665564616696e-05,
      "loss": 0.3516,
      "step": 5043
    },
    {
      "epoch": 1.5138055222088835,
      "grad_norm": 0.1464402675628662,
      "learning_rate": 3.392933975710598e-05,
      "loss": 0.3689,
      "step": 5044
    },
    {
      "epoch": 1.5141056422569028,
      "grad_norm": 0.16086047887802124,
      "learning_rate": 3.389003207532326e-05,
      "loss": 0.3695,
      "step": 5045
    },
    {
      "epoch": 1.5144057623049219,
      "grad_norm": 0.1495581418275833,
      "learning_rate": 3.385074253005219e-05,
      "loss": 0.2683,
      "step": 5046
    },
    {
      "epoch": 1.5147058823529411,
      "grad_norm": 0.153389573097229,
      "learning_rate": 3.381147113207139e-05,
      "loss": 0.3959,
      "step": 5047
    },
    {
      "epoch": 1.5150060024009604,
      "grad_norm": 0.15461201965808868,
      "learning_rate": 3.377221789215457e-05,
      "loss": 0.3639,
      "step": 5048
    },
    {
      "epoch": 1.5153061224489797,
      "grad_norm": 0.15333007276058197,
      "learning_rate": 3.373298282107036e-05,
      "loss": 0.3764,
      "step": 5049
    },
    {
      "epoch": 1.515606242496999,
      "grad_norm": 0.5077924132347107,
      "learning_rate": 3.369376592958243e-05,
      "loss": 0.3872,
      "step": 5050
    },
    {
      "epoch": 1.515906362545018,
      "grad_norm": 0.13519461452960968,
      "learning_rate": 3.3654567228449507e-05,
      "loss": 0.3458,
      "step": 5051
    },
    {
      "epoch": 1.5162064825930373,
      "grad_norm": 0.13457411527633667,
      "learning_rate": 3.3615386728425334e-05,
      "loss": 0.3426,
      "step": 5052
    },
    {
      "epoch": 1.5165066026410563,
      "grad_norm": 0.14452552795410156,
      "learning_rate": 3.3576224440258586e-05,
      "loss": 0.3459,
      "step": 5053
    },
    {
      "epoch": 1.5168067226890756,
      "grad_norm": 0.1387055516242981,
      "learning_rate": 3.353708037469304e-05,
      "loss": 0.3158,
      "step": 5054
    },
    {
      "epoch": 1.5171068427370948,
      "grad_norm": 0.13338612020015717,
      "learning_rate": 3.349795454246736e-05,
      "loss": 0.3177,
      "step": 5055
    },
    {
      "epoch": 1.517406962785114,
      "grad_norm": 0.1499544382095337,
      "learning_rate": 3.345884695431529e-05,
      "loss": 0.3984,
      "step": 5056
    },
    {
      "epoch": 1.5177070828331334,
      "grad_norm": 0.13665451109409332,
      "learning_rate": 3.34197576209656e-05,
      "loss": 0.3412,
      "step": 5057
    },
    {
      "epoch": 1.5180072028811524,
      "grad_norm": 0.13638487458229065,
      "learning_rate": 3.3380686553141916e-05,
      "loss": 0.3392,
      "step": 5058
    },
    {
      "epoch": 1.5183073229291717,
      "grad_norm": 0.13395284116268158,
      "learning_rate": 3.334163376156298e-05,
      "loss": 0.3449,
      "step": 5059
    },
    {
      "epoch": 1.5186074429771907,
      "grad_norm": 0.1375882923603058,
      "learning_rate": 3.3302599256942524e-05,
      "loss": 0.348,
      "step": 5060
    },
    {
      "epoch": 1.51890756302521,
      "grad_norm": 0.1323576122522354,
      "learning_rate": 3.326358304998913e-05,
      "loss": 0.3393,
      "step": 5061
    },
    {
      "epoch": 1.5192076830732293,
      "grad_norm": 0.1526079922914505,
      "learning_rate": 3.3224585151406515e-05,
      "loss": 0.3878,
      "step": 5062
    },
    {
      "epoch": 1.5195078031212486,
      "grad_norm": 0.12548959255218506,
      "learning_rate": 3.318560557189325e-05,
      "loss": 0.3211,
      "step": 5063
    },
    {
      "epoch": 1.5198079231692678,
      "grad_norm": 0.17658188939094543,
      "learning_rate": 3.314664432214297e-05,
      "loss": 0.3357,
      "step": 5064
    },
    {
      "epoch": 1.5201080432172869,
      "grad_norm": 0.1468879133462906,
      "learning_rate": 3.310770141284426e-05,
      "loss": 0.3722,
      "step": 5065
    },
    {
      "epoch": 1.5204081632653061,
      "grad_norm": 0.16095523536205292,
      "learning_rate": 3.3068776854680617e-05,
      "loss": 0.3582,
      "step": 5066
    },
    {
      "epoch": 1.5207082833133252,
      "grad_norm": 0.1672348976135254,
      "learning_rate": 3.302987065833057e-05,
      "loss": 0.3277,
      "step": 5067
    },
    {
      "epoch": 1.5210084033613445,
      "grad_norm": 0.1487703174352646,
      "learning_rate": 3.299098283446762e-05,
      "loss": 0.3194,
      "step": 5068
    },
    {
      "epoch": 1.5213085234093637,
      "grad_norm": 0.1495758444070816,
      "learning_rate": 3.295211339376014e-05,
      "loss": 0.3806,
      "step": 5069
    },
    {
      "epoch": 1.521608643457383,
      "grad_norm": 0.1635085493326187,
      "learning_rate": 3.2913262346871564e-05,
      "loss": 0.3439,
      "step": 5070
    },
    {
      "epoch": 1.5219087635054023,
      "grad_norm": 0.14415377378463745,
      "learning_rate": 3.2874429704460176e-05,
      "loss": 0.3567,
      "step": 5071
    },
    {
      "epoch": 1.5222088835534213,
      "grad_norm": 0.14679555594921112,
      "learning_rate": 3.283561547717929e-05,
      "loss": 0.3822,
      "step": 5072
    },
    {
      "epoch": 1.5225090036014406,
      "grad_norm": 0.12641476094722748,
      "learning_rate": 3.27968196756772e-05,
      "loss": 0.3094,
      "step": 5073
    },
    {
      "epoch": 1.5228091236494596,
      "grad_norm": 0.14666663110256195,
      "learning_rate": 3.2758042310597036e-05,
      "loss": 0.3933,
      "step": 5074
    },
    {
      "epoch": 1.523109243697479,
      "grad_norm": 0.13911989331245422,
      "learning_rate": 3.271928339257689e-05,
      "loss": 0.3321,
      "step": 5075
    },
    {
      "epoch": 1.5234093637454982,
      "grad_norm": 0.12876641750335693,
      "learning_rate": 3.268054293224987e-05,
      "loss": 0.3133,
      "step": 5076
    },
    {
      "epoch": 1.5237094837935174,
      "grad_norm": 0.12591290473937988,
      "learning_rate": 3.2641820940243974e-05,
      "loss": 0.2988,
      "step": 5077
    },
    {
      "epoch": 1.5240096038415367,
      "grad_norm": 0.12686516344547272,
      "learning_rate": 3.260311742718216e-05,
      "loss": 0.308,
      "step": 5078
    },
    {
      "epoch": 1.5243097238895558,
      "grad_norm": 0.12560750544071198,
      "learning_rate": 3.2564432403682266e-05,
      "loss": 0.3145,
      "step": 5079
    },
    {
      "epoch": 1.524609843937575,
      "grad_norm": 0.1369137167930603,
      "learning_rate": 3.252576588035703e-05,
      "loss": 0.3474,
      "step": 5080
    },
    {
      "epoch": 1.524909963985594,
      "grad_norm": 0.13097818195819855,
      "learning_rate": 3.2487117867814287e-05,
      "loss": 0.3274,
      "step": 5081
    },
    {
      "epoch": 1.5252100840336134,
      "grad_norm": 0.14597076177597046,
      "learning_rate": 3.244848837665662e-05,
      "loss": 0.3392,
      "step": 5082
    },
    {
      "epoch": 1.5255102040816326,
      "grad_norm": 0.14146724343299866,
      "learning_rate": 3.240987741748154e-05,
      "loss": 0.3407,
      "step": 5083
    },
    {
      "epoch": 1.525810324129652,
      "grad_norm": 0.14190976321697235,
      "learning_rate": 3.237128500088157e-05,
      "loss": 0.3641,
      "step": 5084
    },
    {
      "epoch": 1.5261104441776712,
      "grad_norm": 0.13037167489528656,
      "learning_rate": 3.233271113744412e-05,
      "loss": 0.3424,
      "step": 5085
    },
    {
      "epoch": 1.5264105642256904,
      "grad_norm": 0.14053994417190552,
      "learning_rate": 3.2294155837751414e-05,
      "loss": 0.3489,
      "step": 5086
    },
    {
      "epoch": 1.5267106842737095,
      "grad_norm": 0.14578457176685333,
      "learning_rate": 3.225561911238074e-05,
      "loss": 0.3868,
      "step": 5087
    },
    {
      "epoch": 1.5270108043217285,
      "grad_norm": 0.19754479825496674,
      "learning_rate": 3.221710097190414e-05,
      "loss": 0.3485,
      "step": 5088
    },
    {
      "epoch": 1.5273109243697478,
      "grad_norm": 0.16041827201843262,
      "learning_rate": 3.217860142688864e-05,
      "loss": 0.3714,
      "step": 5089
    },
    {
      "epoch": 1.527611044417767,
      "grad_norm": 0.13646718859672546,
      "learning_rate": 3.21401204878962e-05,
      "loss": 0.3332,
      "step": 5090
    },
    {
      "epoch": 1.5279111644657863,
      "grad_norm": 0.13508738577365875,
      "learning_rate": 3.2101658165483536e-05,
      "loss": 0.3278,
      "step": 5091
    },
    {
      "epoch": 1.5282112845138056,
      "grad_norm": 0.1406700760126114,
      "learning_rate": 3.206321447020241e-05,
      "loss": 0.3305,
      "step": 5092
    },
    {
      "epoch": 1.5285114045618249,
      "grad_norm": 0.14580851793289185,
      "learning_rate": 3.202478941259941e-05,
      "loss": 0.3549,
      "step": 5093
    },
    {
      "epoch": 1.528811524609844,
      "grad_norm": 0.12813225388526917,
      "learning_rate": 3.1986383003215956e-05,
      "loss": 0.3258,
      "step": 5094
    },
    {
      "epoch": 1.5291116446578632,
      "grad_norm": 0.1420821100473404,
      "learning_rate": 3.194799525258849e-05,
      "loss": 0.3582,
      "step": 5095
    },
    {
      "epoch": 1.5294117647058822,
      "grad_norm": 0.14169463515281677,
      "learning_rate": 3.190962617124816e-05,
      "loss": 0.3594,
      "step": 5096
    },
    {
      "epoch": 1.5297118847539015,
      "grad_norm": 0.1366046667098999,
      "learning_rate": 3.187127576972112e-05,
      "loss": 0.341,
      "step": 5097
    },
    {
      "epoch": 1.5300120048019208,
      "grad_norm": 0.15085168182849884,
      "learning_rate": 3.1832944058528417e-05,
      "loss": 0.3941,
      "step": 5098
    },
    {
      "epoch": 1.53031212484994,
      "grad_norm": 0.14959454536437988,
      "learning_rate": 3.179463104818582e-05,
      "loss": 0.3454,
      "step": 5099
    },
    {
      "epoch": 1.5306122448979593,
      "grad_norm": 0.1488467901945114,
      "learning_rate": 3.175633674920415e-05,
      "loss": 0.3976,
      "step": 5100
    },
    {
      "epoch": 1.5309123649459784,
      "grad_norm": 0.13554778695106506,
      "learning_rate": 3.171806117208894e-05,
      "loss": 0.3371,
      "step": 5101
    },
    {
      "epoch": 1.5312124849939976,
      "grad_norm": 0.13067273795604706,
      "learning_rate": 3.167980432734069e-05,
      "loss": 0.3399,
      "step": 5102
    },
    {
      "epoch": 1.5315126050420167,
      "grad_norm": 0.14046433568000793,
      "learning_rate": 3.164156622545475e-05,
      "loss": 0.3634,
      "step": 5103
    },
    {
      "epoch": 1.531812725090036,
      "grad_norm": 0.13948778808116913,
      "learning_rate": 3.160334687692128e-05,
      "loss": 0.3373,
      "step": 5104
    },
    {
      "epoch": 1.5321128451380552,
      "grad_norm": 0.1371791511774063,
      "learning_rate": 3.1565146292225255e-05,
      "loss": 0.3558,
      "step": 5105
    },
    {
      "epoch": 1.5324129651860745,
      "grad_norm": 0.16795873641967773,
      "learning_rate": 3.1526964481846686e-05,
      "loss": 0.3789,
      "step": 5106
    },
    {
      "epoch": 1.5327130852340938,
      "grad_norm": 0.13602060079574585,
      "learning_rate": 3.1488801456260245e-05,
      "loss": 0.3624,
      "step": 5107
    },
    {
      "epoch": 1.5330132052821128,
      "grad_norm": 0.1465499848127365,
      "learning_rate": 3.145065722593555e-05,
      "loss": 0.3648,
      "step": 5108
    },
    {
      "epoch": 1.533313325330132,
      "grad_norm": 0.13361485302448273,
      "learning_rate": 3.1412531801337e-05,
      "loss": 0.3288,
      "step": 5109
    },
    {
      "epoch": 1.5336134453781511,
      "grad_norm": 0.1314423680305481,
      "learning_rate": 3.1374425192923874e-05,
      "loss": 0.3078,
      "step": 5110
    },
    {
      "epoch": 1.5339135654261704,
      "grad_norm": 0.13410235941410065,
      "learning_rate": 3.133633741115034e-05,
      "loss": 0.3042,
      "step": 5111
    },
    {
      "epoch": 1.5342136854741897,
      "grad_norm": 0.1429615616798401,
      "learning_rate": 3.129826846646528e-05,
      "loss": 0.3567,
      "step": 5112
    },
    {
      "epoch": 1.534513805522209,
      "grad_norm": 0.1350201666355133,
      "learning_rate": 3.1260218369312476e-05,
      "loss": 0.3678,
      "step": 5113
    },
    {
      "epoch": 1.5348139255702282,
      "grad_norm": 0.14673860371112823,
      "learning_rate": 3.122218713013055e-05,
      "loss": 0.3624,
      "step": 5114
    },
    {
      "epoch": 1.5351140456182473,
      "grad_norm": 0.1545906364917755,
      "learning_rate": 3.118417475935297e-05,
      "loss": 0.3363,
      "step": 5115
    },
    {
      "epoch": 1.5354141656662665,
      "grad_norm": 0.157174751162529,
      "learning_rate": 3.114618126740793e-05,
      "loss": 0.3788,
      "step": 5116
    },
    {
      "epoch": 1.5357142857142856,
      "grad_norm": 0.15201139450073242,
      "learning_rate": 3.1108206664718576e-05,
      "loss": 0.3788,
      "step": 5117
    },
    {
      "epoch": 1.5360144057623049,
      "grad_norm": 0.14599697291851044,
      "learning_rate": 3.10702509617027e-05,
      "loss": 0.3891,
      "step": 5118
    },
    {
      "epoch": 1.5363145258103241,
      "grad_norm": 0.14983749389648438,
      "learning_rate": 3.103231416877315e-05,
      "loss": 0.374,
      "step": 5119
    },
    {
      "epoch": 1.5366146458583434,
      "grad_norm": 0.14620278775691986,
      "learning_rate": 3.099439629633738e-05,
      "loss": 0.351,
      "step": 5120
    },
    {
      "epoch": 1.5369147659063627,
      "grad_norm": 0.13003404438495636,
      "learning_rate": 3.09564973547977e-05,
      "loss": 0.3184,
      "step": 5121
    },
    {
      "epoch": 1.5372148859543817,
      "grad_norm": 0.13253653049468994,
      "learning_rate": 3.0918617354551274e-05,
      "loss": 0.3432,
      "step": 5122
    },
    {
      "epoch": 1.537515006002401,
      "grad_norm": 0.14117096364498138,
      "learning_rate": 3.088075630599008e-05,
      "loss": 0.3601,
      "step": 5123
    },
    {
      "epoch": 1.53781512605042,
      "grad_norm": 0.18316718935966492,
      "learning_rate": 3.084291421950081e-05,
      "loss": 0.3356,
      "step": 5124
    },
    {
      "epoch": 1.5381152460984393,
      "grad_norm": 0.14761757850646973,
      "learning_rate": 3.0805091105465044e-05,
      "loss": 0.3648,
      "step": 5125
    },
    {
      "epoch": 1.5384153661464586,
      "grad_norm": 0.13970275223255157,
      "learning_rate": 3.076728697425908e-05,
      "loss": 0.383,
      "step": 5126
    },
    {
      "epoch": 1.5387154861944778,
      "grad_norm": 0.22657494246959686,
      "learning_rate": 3.0729501836254074e-05,
      "loss": 0.3171,
      "step": 5127
    },
    {
      "epoch": 1.5390156062424971,
      "grad_norm": 0.13654956221580505,
      "learning_rate": 3.069173570181597e-05,
      "loss": 0.3436,
      "step": 5128
    },
    {
      "epoch": 1.5393157262905162,
      "grad_norm": 0.13815683126449585,
      "learning_rate": 3.0653988581305426e-05,
      "loss": 0.3628,
      "step": 5129
    },
    {
      "epoch": 1.5396158463385354,
      "grad_norm": 0.1333167552947998,
      "learning_rate": 3.061626048507794e-05,
      "loss": 0.3195,
      "step": 5130
    },
    {
      "epoch": 1.5399159663865545,
      "grad_norm": 0.14363545179367065,
      "learning_rate": 3.057855142348384e-05,
      "loss": 0.384,
      "step": 5131
    },
    {
      "epoch": 1.5402160864345738,
      "grad_norm": 0.13697920739650726,
      "learning_rate": 3.054086140686808e-05,
      "loss": 0.3477,
      "step": 5132
    },
    {
      "epoch": 1.540516206482593,
      "grad_norm": 0.14586204290390015,
      "learning_rate": 3.0503190445570585e-05,
      "loss": 0.3556,
      "step": 5133
    },
    {
      "epoch": 1.5408163265306123,
      "grad_norm": 0.1277673840522766,
      "learning_rate": 3.0465538549925854e-05,
      "loss": 0.3254,
      "step": 5134
    },
    {
      "epoch": 1.5411164465786316,
      "grad_norm": 0.1379811316728592,
      "learning_rate": 3.0427905730263307e-05,
      "loss": 0.3568,
      "step": 5135
    },
    {
      "epoch": 1.5414165666266506,
      "grad_norm": 0.13037370145320892,
      "learning_rate": 3.0390291996907094e-05,
      "loss": 0.3191,
      "step": 5136
    },
    {
      "epoch": 1.5417166866746699,
      "grad_norm": 0.14184845983982086,
      "learning_rate": 3.0352697360176065e-05,
      "loss": 0.3401,
      "step": 5137
    },
    {
      "epoch": 1.542016806722689,
      "grad_norm": 0.14727821946144104,
      "learning_rate": 3.031512183038392e-05,
      "loss": 0.3251,
      "step": 5138
    },
    {
      "epoch": 1.5423169267707082,
      "grad_norm": 0.24418002367019653,
      "learning_rate": 3.0277565417839026e-05,
      "loss": 0.4121,
      "step": 5139
    },
    {
      "epoch": 1.5426170468187275,
      "grad_norm": 0.12005669623613358,
      "learning_rate": 3.0240028132844577e-05,
      "loss": 0.2846,
      "step": 5140
    },
    {
      "epoch": 1.5429171668667467,
      "grad_norm": 0.1330462247133255,
      "learning_rate": 3.0202509985698535e-05,
      "loss": 0.3317,
      "step": 5141
    },
    {
      "epoch": 1.543217286914766,
      "grad_norm": 0.1424010545015335,
      "learning_rate": 3.0165010986693543e-05,
      "loss": 0.3527,
      "step": 5142
    },
    {
      "epoch": 1.5435174069627853,
      "grad_norm": 0.14978627860546112,
      "learning_rate": 3.0127531146116948e-05,
      "loss": 0.3544,
      "step": 5143
    },
    {
      "epoch": 1.5438175270108043,
      "grad_norm": 0.1406516581773758,
      "learning_rate": 3.0090070474251053e-05,
      "loss": 0.3504,
      "step": 5144
    },
    {
      "epoch": 1.5441176470588234,
      "grad_norm": 0.2728155553340912,
      "learning_rate": 3.005262898137269e-05,
      "loss": 0.3539,
      "step": 5145
    },
    {
      "epoch": 1.5444177671068426,
      "grad_norm": 0.14986300468444824,
      "learning_rate": 3.0015206677753484e-05,
      "loss": 0.3957,
      "step": 5146
    },
    {
      "epoch": 1.544717887154862,
      "grad_norm": 0.13398940861225128,
      "learning_rate": 2.9977803573659834e-05,
      "loss": 0.3314,
      "step": 5147
    },
    {
      "epoch": 1.5450180072028812,
      "grad_norm": 0.13351598381996155,
      "learning_rate": 2.994041967935286e-05,
      "loss": 0.3343,
      "step": 5148
    },
    {
      "epoch": 1.5453181272509005,
      "grad_norm": 0.13882090151309967,
      "learning_rate": 2.990305500508843e-05,
      "loss": 0.3053,
      "step": 5149
    },
    {
      "epoch": 1.5456182472989197,
      "grad_norm": 0.16126897931098938,
      "learning_rate": 2.9865709561117093e-05,
      "loss": 0.3439,
      "step": 5150
    },
    {
      "epoch": 1.5459183673469388,
      "grad_norm": 0.13146162033081055,
      "learning_rate": 2.9828383357684098e-05,
      "loss": 0.3413,
      "step": 5151
    },
    {
      "epoch": 1.5462184873949578,
      "grad_norm": 0.1378421038389206,
      "learning_rate": 2.9791076405029506e-05,
      "loss": 0.3522,
      "step": 5152
    },
    {
      "epoch": 1.546518607442977,
      "grad_norm": 0.13887012004852295,
      "learning_rate": 2.9753788713388075e-05,
      "loss": 0.346,
      "step": 5153
    },
    {
      "epoch": 1.5468187274909964,
      "grad_norm": 0.14214332401752472,
      "learning_rate": 2.9716520292989202e-05,
      "loss": 0.3624,
      "step": 5154
    },
    {
      "epoch": 1.5471188475390156,
      "grad_norm": 0.1410626918077469,
      "learning_rate": 2.9679271154057065e-05,
      "loss": 0.3348,
      "step": 5155
    },
    {
      "epoch": 1.547418967587035,
      "grad_norm": 0.14501497149467468,
      "learning_rate": 2.9642041306810554e-05,
      "loss": 0.3366,
      "step": 5156
    },
    {
      "epoch": 1.5477190876350542,
      "grad_norm": 0.14532721042633057,
      "learning_rate": 2.9604830761463277e-05,
      "loss": 0.366,
      "step": 5157
    },
    {
      "epoch": 1.5480192076830732,
      "grad_norm": 0.13929975032806396,
      "learning_rate": 2.956763952822349e-05,
      "loss": 0.3473,
      "step": 5158
    },
    {
      "epoch": 1.5483193277310925,
      "grad_norm": 0.13484100997447968,
      "learning_rate": 2.9530467617294143e-05,
      "loss": 0.3353,
      "step": 5159
    },
    {
      "epoch": 1.5486194477791115,
      "grad_norm": 0.13206981122493744,
      "learning_rate": 2.949331503887296e-05,
      "loss": 0.3306,
      "step": 5160
    },
    {
      "epoch": 1.5489195678271308,
      "grad_norm": 0.1376311480998993,
      "learning_rate": 2.945618180315236e-05,
      "loss": 0.3328,
      "step": 5161
    },
    {
      "epoch": 1.54921968787515,
      "grad_norm": 0.14766620099544525,
      "learning_rate": 2.9419067920319343e-05,
      "loss": 0.36,
      "step": 5162
    },
    {
      "epoch": 1.5495198079231693,
      "grad_norm": 0.1360243707895279,
      "learning_rate": 2.9381973400555762e-05,
      "loss": 0.3476,
      "step": 5163
    },
    {
      "epoch": 1.5498199279711886,
      "grad_norm": 0.12841099500656128,
      "learning_rate": 2.9344898254038e-05,
      "loss": 0.2991,
      "step": 5164
    },
    {
      "epoch": 1.5501200480192077,
      "grad_norm": 0.14261536300182343,
      "learning_rate": 2.9307842490937232e-05,
      "loss": 0.3875,
      "step": 5165
    },
    {
      "epoch": 1.550420168067227,
      "grad_norm": 0.14344365894794464,
      "learning_rate": 2.9270806121419304e-05,
      "loss": 0.3349,
      "step": 5166
    },
    {
      "epoch": 1.550720288115246,
      "grad_norm": 0.1426357924938202,
      "learning_rate": 2.9233789155644663e-05,
      "loss": 0.3446,
      "step": 5167
    },
    {
      "epoch": 1.5510204081632653,
      "grad_norm": 0.13309688866138458,
      "learning_rate": 2.9196791603768514e-05,
      "loss": 0.3017,
      "step": 5168
    },
    {
      "epoch": 1.5513205282112845,
      "grad_norm": 0.1456434279680252,
      "learning_rate": 2.9159813475940756e-05,
      "loss": 0.3558,
      "step": 5169
    },
    {
      "epoch": 1.5516206482593038,
      "grad_norm": 0.14691193401813507,
      "learning_rate": 2.9122854782305853e-05,
      "loss": 0.3781,
      "step": 5170
    },
    {
      "epoch": 1.551920768307323,
      "grad_norm": 0.13335365056991577,
      "learning_rate": 2.9085915533003037e-05,
      "loss": 0.3391,
      "step": 5171
    },
    {
      "epoch": 1.552220888355342,
      "grad_norm": 0.125112846493721,
      "learning_rate": 2.904899573816613e-05,
      "loss": 0.2738,
      "step": 5172
    },
    {
      "epoch": 1.5525210084033614,
      "grad_norm": 0.1315487027168274,
      "learning_rate": 2.9012095407923677e-05,
      "loss": 0.3344,
      "step": 5173
    },
    {
      "epoch": 1.5528211284513804,
      "grad_norm": 0.13965719938278198,
      "learning_rate": 2.8975214552398888e-05,
      "loss": 0.3801,
      "step": 5174
    },
    {
      "epoch": 1.5531212484993997,
      "grad_norm": 0.16344398260116577,
      "learning_rate": 2.8938353181709576e-05,
      "loss": 0.3921,
      "step": 5175
    },
    {
      "epoch": 1.553421368547419,
      "grad_norm": 0.12906277179718018,
      "learning_rate": 2.89015113059682e-05,
      "loss": 0.3302,
      "step": 5176
    },
    {
      "epoch": 1.5537214885954382,
      "grad_norm": 0.12793534994125366,
      "learning_rate": 2.8864688935281948e-05,
      "loss": 0.3145,
      "step": 5177
    },
    {
      "epoch": 1.5540216086434575,
      "grad_norm": 0.13471034169197083,
      "learning_rate": 2.8827886079752598e-05,
      "loss": 0.3614,
      "step": 5178
    },
    {
      "epoch": 1.5543217286914766,
      "grad_norm": 0.14348182082176208,
      "learning_rate": 2.879110274947664e-05,
      "loss": 0.3566,
      "step": 5179
    },
    {
      "epoch": 1.5546218487394958,
      "grad_norm": 0.1379540115594864,
      "learning_rate": 2.8754338954545078e-05,
      "loss": 0.3603,
      "step": 5180
    },
    {
      "epoch": 1.5549219687875149,
      "grad_norm": 0.1349738985300064,
      "learning_rate": 2.8717594705043694e-05,
      "loss": 0.3629,
      "step": 5181
    },
    {
      "epoch": 1.5552220888355341,
      "grad_norm": 0.14187569916248322,
      "learning_rate": 2.868087001105285e-05,
      "loss": 0.3665,
      "step": 5182
    },
    {
      "epoch": 1.5555222088835534,
      "grad_norm": 0.1274200677871704,
      "learning_rate": 2.864416488264755e-05,
      "loss": 0.308,
      "step": 5183
    },
    {
      "epoch": 1.5558223289315727,
      "grad_norm": 0.1288219839334488,
      "learning_rate": 2.8607479329897367e-05,
      "loss": 0.3393,
      "step": 5184
    },
    {
      "epoch": 1.556122448979592,
      "grad_norm": 0.1553051769733429,
      "learning_rate": 2.85708133628666e-05,
      "loss": 0.3391,
      "step": 5185
    },
    {
      "epoch": 1.556422569027611,
      "grad_norm": 0.13538624346256256,
      "learning_rate": 2.8534166991614142e-05,
      "loss": 0.3562,
      "step": 5186
    },
    {
      "epoch": 1.5567226890756303,
      "grad_norm": 0.14043527841567993,
      "learning_rate": 2.849754022619352e-05,
      "loss": 0.3662,
      "step": 5187
    },
    {
      "epoch": 1.5570228091236493,
      "grad_norm": 0.13456833362579346,
      "learning_rate": 2.8460933076652864e-05,
      "loss": 0.3468,
      "step": 5188
    },
    {
      "epoch": 1.5573229291716686,
      "grad_norm": 0.1411183923482895,
      "learning_rate": 2.8424345553034836e-05,
      "loss": 0.3789,
      "step": 5189
    },
    {
      "epoch": 1.5576230492196879,
      "grad_norm": 0.1420077085494995,
      "learning_rate": 2.8387777665376947e-05,
      "loss": 0.3526,
      "step": 5190
    },
    {
      "epoch": 1.5579231692677071,
      "grad_norm": 0.14961381256580353,
      "learning_rate": 2.83512294237111e-05,
      "loss": 0.3393,
      "step": 5191
    },
    {
      "epoch": 1.5582232893157264,
      "grad_norm": 0.14596201479434967,
      "learning_rate": 2.8314700838063866e-05,
      "loss": 0.303,
      "step": 5192
    },
    {
      "epoch": 1.5585234093637454,
      "grad_norm": 0.14556051790714264,
      "learning_rate": 2.8278191918456475e-05,
      "loss": 0.3635,
      "step": 5193
    },
    {
      "epoch": 1.5588235294117647,
      "grad_norm": 0.1302669495344162,
      "learning_rate": 2.8241702674904756e-05,
      "loss": 0.3203,
      "step": 5194
    },
    {
      "epoch": 1.5591236494597838,
      "grad_norm": 0.13536348938941956,
      "learning_rate": 2.820523311741906e-05,
      "loss": 0.3282,
      "step": 5195
    },
    {
      "epoch": 1.559423769507803,
      "grad_norm": 0.16835986077785492,
      "learning_rate": 2.816878325600444e-05,
      "loss": 0.4177,
      "step": 5196
    },
    {
      "epoch": 1.5597238895558223,
      "grad_norm": 0.14837545156478882,
      "learning_rate": 2.8132353100660447e-05,
      "loss": 0.3293,
      "step": 5197
    },
    {
      "epoch": 1.5600240096038416,
      "grad_norm": 0.14597195386886597,
      "learning_rate": 2.8095942661381304e-05,
      "loss": 0.337,
      "step": 5198
    },
    {
      "epoch": 1.5603241296518608,
      "grad_norm": 0.13168051838874817,
      "learning_rate": 2.8059551948155827e-05,
      "loss": 0.336,
      "step": 5199
    },
    {
      "epoch": 1.5606242496998801,
      "grad_norm": 0.12910909950733185,
      "learning_rate": 2.8023180970967333e-05,
      "loss": 0.3251,
      "step": 5200
    },
    {
      "epoch": 1.5609243697478992,
      "grad_norm": 0.12473164498806,
      "learning_rate": 2.798682973979384e-05,
      "loss": 0.2888,
      "step": 5201
    },
    {
      "epoch": 1.5612244897959182,
      "grad_norm": 0.1480257660150528,
      "learning_rate": 2.7950498264607828e-05,
      "loss": 0.3411,
      "step": 5202
    },
    {
      "epoch": 1.5615246098439375,
      "grad_norm": 0.13709281384944916,
      "learning_rate": 2.7914186555376464e-05,
      "loss": 0.3476,
      "step": 5203
    },
    {
      "epoch": 1.5618247298919568,
      "grad_norm": 0.1475631445646286,
      "learning_rate": 2.7877894622061474e-05,
      "loss": 0.3519,
      "step": 5204
    },
    {
      "epoch": 1.562124849939976,
      "grad_norm": 0.15101012587547302,
      "learning_rate": 2.7841622474619057e-05,
      "loss": 0.3564,
      "step": 5205
    },
    {
      "epoch": 1.5624249699879953,
      "grad_norm": 0.11792006343603134,
      "learning_rate": 2.780537012300011e-05,
      "loss": 0.2761,
      "step": 5206
    },
    {
      "epoch": 1.5627250900360146,
      "grad_norm": 0.12644313275814056,
      "learning_rate": 2.7769137577150072e-05,
      "loss": 0.3149,
      "step": 5207
    },
    {
      "epoch": 1.5630252100840336,
      "grad_norm": 0.14322860538959503,
      "learning_rate": 2.7732924847008867e-05,
      "loss": 0.3762,
      "step": 5208
    },
    {
      "epoch": 1.5633253301320527,
      "grad_norm": 0.13497188687324524,
      "learning_rate": 2.769673194251111e-05,
      "loss": 0.3269,
      "step": 5209
    },
    {
      "epoch": 1.563625450180072,
      "grad_norm": 0.13116684556007385,
      "learning_rate": 2.766055887358584e-05,
      "loss": 0.3188,
      "step": 5210
    },
    {
      "epoch": 1.5639255702280912,
      "grad_norm": 0.1373387724161148,
      "learning_rate": 2.762440565015676e-05,
      "loss": 0.3245,
      "step": 5211
    },
    {
      "epoch": 1.5642256902761105,
      "grad_norm": 0.13331609964370728,
      "learning_rate": 2.7588272282142112e-05,
      "loss": 0.3355,
      "step": 5212
    },
    {
      "epoch": 1.5645258103241297,
      "grad_norm": 0.1577932983636856,
      "learning_rate": 2.755215877945465e-05,
      "loss": 0.391,
      "step": 5213
    },
    {
      "epoch": 1.564825930372149,
      "grad_norm": 0.13589797914028168,
      "learning_rate": 2.7516065152001634e-05,
      "loss": 0.3428,
      "step": 5214
    },
    {
      "epoch": 1.565126050420168,
      "grad_norm": 0.1299768090248108,
      "learning_rate": 2.747999140968507e-05,
      "loss": 0.3207,
      "step": 5215
    },
    {
      "epoch": 1.5654261704681873,
      "grad_norm": 0.13584931194782257,
      "learning_rate": 2.744393756240127e-05,
      "loss": 0.3261,
      "step": 5216
    },
    {
      "epoch": 1.5657262905162064,
      "grad_norm": 0.13659065961837769,
      "learning_rate": 2.7407903620041264e-05,
      "loss": 0.3554,
      "step": 5217
    },
    {
      "epoch": 1.5660264105642256,
      "grad_norm": 0.14530958235263824,
      "learning_rate": 2.7371889592490485e-05,
      "loss": 0.3719,
      "step": 5218
    },
    {
      "epoch": 1.566326530612245,
      "grad_norm": 0.14446115493774414,
      "learning_rate": 2.7335895489628994e-05,
      "loss": 0.3721,
      "step": 5219
    },
    {
      "epoch": 1.5666266506602642,
      "grad_norm": 0.1385437399148941,
      "learning_rate": 2.72999213213314e-05,
      "loss": 0.3531,
      "step": 5220
    },
    {
      "epoch": 1.5669267707082835,
      "grad_norm": 0.14599959552288055,
      "learning_rate": 2.7263967097466768e-05,
      "loss": 0.3797,
      "step": 5221
    },
    {
      "epoch": 1.5672268907563025,
      "grad_norm": 0.14255771040916443,
      "learning_rate": 2.72280328278987e-05,
      "loss": 0.3566,
      "step": 5222
    },
    {
      "epoch": 1.5675270108043218,
      "grad_norm": 0.40354663133621216,
      "learning_rate": 2.7192118522485378e-05,
      "loss": 0.3534,
      "step": 5223
    },
    {
      "epoch": 1.5678271308523408,
      "grad_norm": 0.12925659120082855,
      "learning_rate": 2.7156224191079515e-05,
      "loss": 0.3132,
      "step": 5224
    },
    {
      "epoch": 1.56812725090036,
      "grad_norm": 0.12465454638004303,
      "learning_rate": 2.712034984352825e-05,
      "loss": 0.3152,
      "step": 5225
    },
    {
      "epoch": 1.5684273709483794,
      "grad_norm": 0.1358371078968048,
      "learning_rate": 2.7084495489673346e-05,
      "loss": 0.3388,
      "step": 5226
    },
    {
      "epoch": 1.5687274909963986,
      "grad_norm": 0.17917902767658234,
      "learning_rate": 2.704866113935095e-05,
      "loss": 0.3481,
      "step": 5227
    },
    {
      "epoch": 1.569027611044418,
      "grad_norm": 0.1871613711118698,
      "learning_rate": 2.7012846802391935e-05,
      "loss": 0.342,
      "step": 5228
    },
    {
      "epoch": 1.569327731092437,
      "grad_norm": 0.16490772366523743,
      "learning_rate": 2.697705248862149e-05,
      "loss": 0.3537,
      "step": 5229
    },
    {
      "epoch": 1.5696278511404562,
      "grad_norm": 0.1259879767894745,
      "learning_rate": 2.6941278207859333e-05,
      "loss": 0.313,
      "step": 5230
    },
    {
      "epoch": 1.5699279711884753,
      "grad_norm": 0.1332831084728241,
      "learning_rate": 2.6905523969919767e-05,
      "loss": 0.32,
      "step": 5231
    },
    {
      "epoch": 1.5702280912364945,
      "grad_norm": 0.1395072638988495,
      "learning_rate": 2.6869789784611587e-05,
      "loss": 0.3319,
      "step": 5232
    },
    {
      "epoch": 1.5705282112845138,
      "grad_norm": 0.15206611156463623,
      "learning_rate": 2.683407566173799e-05,
      "loss": 0.3713,
      "step": 5233
    },
    {
      "epoch": 1.570828331332533,
      "grad_norm": 0.14162662625312805,
      "learning_rate": 2.679838161109681e-05,
      "loss": 0.3485,
      "step": 5234
    },
    {
      "epoch": 1.5711284513805523,
      "grad_norm": 0.1351427137851715,
      "learning_rate": 2.6762707642480223e-05,
      "loss": 0.2969,
      "step": 5235
    },
    {
      "epoch": 1.5714285714285714,
      "grad_norm": 0.13487915694713593,
      "learning_rate": 2.6727053765675024e-05,
      "loss": 0.3315,
      "step": 5236
    },
    {
      "epoch": 1.5717286914765907,
      "grad_norm": 0.1483466923236847,
      "learning_rate": 2.6691419990462465e-05,
      "loss": 0.3556,
      "step": 5237
    },
    {
      "epoch": 1.5720288115246097,
      "grad_norm": 0.12802277505397797,
      "learning_rate": 2.6655806326618194e-05,
      "loss": 0.3064,
      "step": 5238
    },
    {
      "epoch": 1.572328931572629,
      "grad_norm": 0.13978759944438934,
      "learning_rate": 2.6620212783912478e-05,
      "loss": 0.3361,
      "step": 5239
    },
    {
      "epoch": 1.5726290516206483,
      "grad_norm": 0.152753084897995,
      "learning_rate": 2.6584639372109942e-05,
      "loss": 0.3462,
      "step": 5240
    },
    {
      "epoch": 1.5729291716686675,
      "grad_norm": 0.14378800988197327,
      "learning_rate": 2.6549086100969768e-05,
      "loss": 0.3703,
      "step": 5241
    },
    {
      "epoch": 1.5732292917166868,
      "grad_norm": 0.132570281624794,
      "learning_rate": 2.6513552980245628e-05,
      "loss": 0.3278,
      "step": 5242
    },
    {
      "epoch": 1.5735294117647058,
      "grad_norm": 0.13974635303020477,
      "learning_rate": 2.6478040019685556e-05,
      "loss": 0.3537,
      "step": 5243
    },
    {
      "epoch": 1.5738295318127251,
      "grad_norm": 0.14354409277439117,
      "learning_rate": 2.6442547229032154e-05,
      "loss": 0.3446,
      "step": 5244
    },
    {
      "epoch": 1.5741296518607442,
      "grad_norm": 0.15387193858623505,
      "learning_rate": 2.6407074618022508e-05,
      "loss": 0.3818,
      "step": 5245
    },
    {
      "epoch": 1.5744297719087634,
      "grad_norm": 0.14714168012142181,
      "learning_rate": 2.6371622196388045e-05,
      "loss": 0.3008,
      "step": 5246
    },
    {
      "epoch": 1.5747298919567827,
      "grad_norm": 0.1442662477493286,
      "learning_rate": 2.6336189973854796e-05,
      "loss": 0.3339,
      "step": 5247
    },
    {
      "epoch": 1.575030012004802,
      "grad_norm": 0.12938161194324493,
      "learning_rate": 2.630077796014312e-05,
      "loss": 0.3124,
      "step": 5248
    },
    {
      "epoch": 1.5753301320528212,
      "grad_norm": 0.15024563670158386,
      "learning_rate": 2.6265386164967943e-05,
      "loss": 0.3416,
      "step": 5249
    },
    {
      "epoch": 1.5756302521008403,
      "grad_norm": 0.13507284224033356,
      "learning_rate": 2.623001459803861e-05,
      "loss": 0.3314,
      "step": 5250
    },
    {
      "epoch": 1.5759303721488596,
      "grad_norm": 0.14251956343650818,
      "learning_rate": 2.6194663269058885e-05,
      "loss": 0.3278,
      "step": 5251
    },
    {
      "epoch": 1.5762304921968786,
      "grad_norm": 0.1405022144317627,
      "learning_rate": 2.6159332187726936e-05,
      "loss": 0.3475,
      "step": 5252
    },
    {
      "epoch": 1.5765306122448979,
      "grad_norm": 0.13790111243724823,
      "learning_rate": 2.612402136373555e-05,
      "loss": 0.3562,
      "step": 5253
    },
    {
      "epoch": 1.5768307322929171,
      "grad_norm": 0.15381157398223877,
      "learning_rate": 2.608873080677181e-05,
      "loss": 0.3815,
      "step": 5254
    },
    {
      "epoch": 1.5771308523409364,
      "grad_norm": 0.13172820210456848,
      "learning_rate": 2.6053460526517236e-05,
      "loss": 0.2976,
      "step": 5255
    },
    {
      "epoch": 1.5774309723889557,
      "grad_norm": 0.13020232319831848,
      "learning_rate": 2.6018210532647848e-05,
      "loss": 0.3058,
      "step": 5256
    },
    {
      "epoch": 1.5777310924369747,
      "grad_norm": 0.13710160553455353,
      "learning_rate": 2.598298083483408e-05,
      "loss": 0.3421,
      "step": 5257
    },
    {
      "epoch": 1.578031212484994,
      "grad_norm": 0.138636976480484,
      "learning_rate": 2.594777144274083e-05,
      "loss": 0.3448,
      "step": 5258
    },
    {
      "epoch": 1.578331332533013,
      "grad_norm": 0.15129081904888153,
      "learning_rate": 2.591258236602736e-05,
      "loss": 0.3643,
      "step": 5259
    },
    {
      "epoch": 1.5786314525810323,
      "grad_norm": 0.1381225436925888,
      "learning_rate": 2.5877413614347358e-05,
      "loss": 0.3301,
      "step": 5260
    },
    {
      "epoch": 1.5789315726290516,
      "grad_norm": 0.1401417851448059,
      "learning_rate": 2.5842265197348993e-05,
      "loss": 0.3597,
      "step": 5261
    },
    {
      "epoch": 1.5792316926770709,
      "grad_norm": 0.13018085062503815,
      "learning_rate": 2.5807137124674864e-05,
      "loss": 0.322,
      "step": 5262
    },
    {
      "epoch": 1.5795318127250901,
      "grad_norm": 0.13071636855602264,
      "learning_rate": 2.5772029405961895e-05,
      "loss": 0.338,
      "step": 5263
    },
    {
      "epoch": 1.5798319327731094,
      "grad_norm": 0.1339547485113144,
      "learning_rate": 2.573694205084155e-05,
      "loss": 0.3647,
      "step": 5264
    },
    {
      "epoch": 1.5801320528211285,
      "grad_norm": 0.13119250535964966,
      "learning_rate": 2.5701875068939585e-05,
      "loss": 0.3234,
      "step": 5265
    },
    {
      "epoch": 1.5804321728691475,
      "grad_norm": 0.1344720870256424,
      "learning_rate": 2.5666828469876247e-05,
      "loss": 0.3265,
      "step": 5266
    },
    {
      "epoch": 1.5807322929171668,
      "grad_norm": 0.14399132132530212,
      "learning_rate": 2.563180226326619e-05,
      "loss": 0.3714,
      "step": 5267
    },
    {
      "epoch": 1.581032412965186,
      "grad_norm": 0.15395571291446686,
      "learning_rate": 2.559679645871842e-05,
      "loss": 0.3698,
      "step": 5268
    },
    {
      "epoch": 1.5813325330132053,
      "grad_norm": 0.12918394804000854,
      "learning_rate": 2.5561811065836384e-05,
      "loss": 0.3321,
      "step": 5269
    },
    {
      "epoch": 1.5816326530612246,
      "grad_norm": 0.1321277916431427,
      "learning_rate": 2.5526846094217948e-05,
      "loss": 0.3496,
      "step": 5270
    },
    {
      "epoch": 1.5819327731092439,
      "grad_norm": 0.1286695897579193,
      "learning_rate": 2.549190155345532e-05,
      "loss": 0.3039,
      "step": 5271
    },
    {
      "epoch": 1.582232893157263,
      "grad_norm": 0.11904725432395935,
      "learning_rate": 2.5456977453135167e-05,
      "loss": 0.2819,
      "step": 5272
    },
    {
      "epoch": 1.582533013205282,
      "grad_norm": 0.12723879516124725,
      "learning_rate": 2.5422073802838476e-05,
      "loss": 0.3384,
      "step": 5273
    },
    {
      "epoch": 1.5828331332533012,
      "grad_norm": 0.136310875415802,
      "learning_rate": 2.5387190612140678e-05,
      "loss": 0.3584,
      "step": 5274
    },
    {
      "epoch": 1.5831332533013205,
      "grad_norm": 0.20283541083335876,
      "learning_rate": 2.5352327890611605e-05,
      "loss": 0.34,
      "step": 5275
    },
    {
      "epoch": 1.5834333733493398,
      "grad_norm": 0.13946253061294556,
      "learning_rate": 2.5317485647815398e-05,
      "loss": 0.3577,
      "step": 5276
    },
    {
      "epoch": 1.583733493397359,
      "grad_norm": 0.12818807363510132,
      "learning_rate": 2.5282663893310643e-05,
      "loss": 0.311,
      "step": 5277
    },
    {
      "epoch": 1.5840336134453783,
      "grad_norm": 0.13578660786151886,
      "learning_rate": 2.524786263665033e-05,
      "loss": 0.3456,
      "step": 5278
    },
    {
      "epoch": 1.5843337334933973,
      "grad_norm": 0.14931534230709076,
      "learning_rate": 2.521308188738173e-05,
      "loss": 0.3522,
      "step": 5279
    },
    {
      "epoch": 1.5846338535414166,
      "grad_norm": 0.13508422672748566,
      "learning_rate": 2.5178321655046577e-05,
      "loss": 0.3234,
      "step": 5280
    },
    {
      "epoch": 1.5849339735894357,
      "grad_norm": 0.18868158757686615,
      "learning_rate": 2.5143581949180915e-05,
      "loss": 0.342,
      "step": 5281
    },
    {
      "epoch": 1.585234093637455,
      "grad_norm": 0.16816258430480957,
      "learning_rate": 2.510886277931519e-05,
      "loss": 0.4059,
      "step": 5282
    },
    {
      "epoch": 1.5855342136854742,
      "grad_norm": 0.26136964559555054,
      "learning_rate": 2.5074164154974245e-05,
      "loss": 0.3438,
      "step": 5283
    },
    {
      "epoch": 1.5858343337334935,
      "grad_norm": 0.17227911949157715,
      "learning_rate": 2.503948608567722e-05,
      "loss": 0.3629,
      "step": 5284
    },
    {
      "epoch": 1.5861344537815127,
      "grad_norm": 0.1345667839050293,
      "learning_rate": 2.5004828580937613e-05,
      "loss": 0.35,
      "step": 5285
    },
    {
      "epoch": 1.5864345738295318,
      "grad_norm": 0.13711421191692352,
      "learning_rate": 2.4970191650263354e-05,
      "loss": 0.33,
      "step": 5286
    },
    {
      "epoch": 1.586734693877551,
      "grad_norm": 0.14105214178562164,
      "learning_rate": 2.4935575303156677e-05,
      "loss": 0.3553,
      "step": 5287
    },
    {
      "epoch": 1.58703481392557,
      "grad_norm": 0.14076834917068481,
      "learning_rate": 2.490097954911421e-05,
      "loss": 0.3633,
      "step": 5288
    },
    {
      "epoch": 1.5873349339735894,
      "grad_norm": 0.17734204232692719,
      "learning_rate": 2.4866404397626885e-05,
      "loss": 0.3428,
      "step": 5289
    },
    {
      "epoch": 1.5876350540216086,
      "grad_norm": 0.12258309870958328,
      "learning_rate": 2.4831849858179913e-05,
      "loss": 0.3083,
      "step": 5290
    },
    {
      "epoch": 1.587935174069628,
      "grad_norm": 0.13976798951625824,
      "learning_rate": 2.4797315940253075e-05,
      "loss": 0.3542,
      "step": 5291
    },
    {
      "epoch": 1.5882352941176472,
      "grad_norm": 0.141945019364357,
      "learning_rate": 2.4762802653320295e-05,
      "loss": 0.3841,
      "step": 5292
    },
    {
      "epoch": 1.5885354141656662,
      "grad_norm": 0.1637776494026184,
      "learning_rate": 2.4728310006849863e-05,
      "loss": 0.3916,
      "step": 5293
    },
    {
      "epoch": 1.5888355342136855,
      "grad_norm": 0.1532609760761261,
      "learning_rate": 2.4693838010304472e-05,
      "loss": 0.3856,
      "step": 5294
    },
    {
      "epoch": 1.5891356542617046,
      "grad_norm": 0.1620074063539505,
      "learning_rate": 2.4659386673141137e-05,
      "loss": 0.4465,
      "step": 5295
    },
    {
      "epoch": 1.5894357743097238,
      "grad_norm": 0.1464465707540512,
      "learning_rate": 2.462495600481115e-05,
      "loss": 0.3593,
      "step": 5296
    },
    {
      "epoch": 1.589735894357743,
      "grad_norm": 0.19453909993171692,
      "learning_rate": 2.4590546014760217e-05,
      "loss": 0.462,
      "step": 5297
    },
    {
      "epoch": 1.5900360144057624,
      "grad_norm": 0.12688057124614716,
      "learning_rate": 2.455615671242827e-05,
      "loss": 0.3361,
      "step": 5298
    },
    {
      "epoch": 1.5903361344537816,
      "grad_norm": 0.12854287028312683,
      "learning_rate": 2.452178810724963e-05,
      "loss": 0.3359,
      "step": 5299
    },
    {
      "epoch": 1.5906362545018007,
      "grad_norm": 0.15983322262763977,
      "learning_rate": 2.448744020865299e-05,
      "loss": 0.3733,
      "step": 5300
    },
    {
      "epoch": 1.59093637454982,
      "grad_norm": 0.1243366226553917,
      "learning_rate": 2.4453113026061225e-05,
      "loss": 0.3484,
      "step": 5301
    },
    {
      "epoch": 1.591236494597839,
      "grad_norm": 0.12155354768037796,
      "learning_rate": 2.4418806568891638e-05,
      "loss": 0.35,
      "step": 5302
    },
    {
      "epoch": 1.5915366146458583,
      "grad_norm": 0.10961625725030899,
      "learning_rate": 2.4384520846555835e-05,
      "loss": 0.2637,
      "step": 5303
    },
    {
      "epoch": 1.5918367346938775,
      "grad_norm": 0.1370275318622589,
      "learning_rate": 2.435025586845966e-05,
      "loss": 0.3819,
      "step": 5304
    },
    {
      "epoch": 1.5921368547418968,
      "grad_norm": 0.13036561012268066,
      "learning_rate": 2.4316011644003367e-05,
      "loss": 0.3608,
      "step": 5305
    },
    {
      "epoch": 1.592436974789916,
      "grad_norm": 0.12635314464569092,
      "learning_rate": 2.4281788182581424e-05,
      "loss": 0.3476,
      "step": 5306
    },
    {
      "epoch": 1.5927370948379351,
      "grad_norm": 0.14407332241535187,
      "learning_rate": 2.424758549358266e-05,
      "loss": 0.4149,
      "step": 5307
    },
    {
      "epoch": 1.5930372148859544,
      "grad_norm": 0.12928353250026703,
      "learning_rate": 2.4213403586390216e-05,
      "loss": 0.347,
      "step": 5308
    },
    {
      "epoch": 1.5933373349339734,
      "grad_norm": 0.19450713694095612,
      "learning_rate": 2.4179242470381457e-05,
      "loss": 0.3485,
      "step": 5309
    },
    {
      "epoch": 1.5936374549819927,
      "grad_norm": 0.13167737424373627,
      "learning_rate": 2.4145102154928156e-05,
      "loss": 0.3479,
      "step": 5310
    },
    {
      "epoch": 1.593937575030012,
      "grad_norm": 0.14388799667358398,
      "learning_rate": 2.411098264939625e-05,
      "loss": 0.3537,
      "step": 5311
    },
    {
      "epoch": 1.5942376950780313,
      "grad_norm": 0.13148075342178345,
      "learning_rate": 2.407688396314607e-05,
      "loss": 0.3536,
      "step": 5312
    },
    {
      "epoch": 1.5945378151260505,
      "grad_norm": 0.1360936015844345,
      "learning_rate": 2.4042806105532224e-05,
      "loss": 0.3769,
      "step": 5313
    },
    {
      "epoch": 1.5948379351740696,
      "grad_norm": 0.1445259004831314,
      "learning_rate": 2.4008749085903547e-05,
      "loss": 0.4002,
      "step": 5314
    },
    {
      "epoch": 1.5951380552220888,
      "grad_norm": 0.1379500776529312,
      "learning_rate": 2.3974712913603136e-05,
      "loss": 0.3729,
      "step": 5315
    },
    {
      "epoch": 1.595438175270108,
      "grad_norm": 0.13159777224063873,
      "learning_rate": 2.3940697597968555e-05,
      "loss": 0.333,
      "step": 5316
    },
    {
      "epoch": 1.5957382953181272,
      "grad_norm": 0.14502441883087158,
      "learning_rate": 2.390670314833142e-05,
      "loss": 0.3638,
      "step": 5317
    },
    {
      "epoch": 1.5960384153661464,
      "grad_norm": 0.13807806372642517,
      "learning_rate": 2.387272957401777e-05,
      "loss": 0.3774,
      "step": 5318
    },
    {
      "epoch": 1.5963385354141657,
      "grad_norm": 0.1350492238998413,
      "learning_rate": 2.3838776884347812e-05,
      "loss": 0.3455,
      "step": 5319
    },
    {
      "epoch": 1.596638655462185,
      "grad_norm": 0.14351780712604523,
      "learning_rate": 2.380484508863611e-05,
      "loss": 0.3827,
      "step": 5320
    },
    {
      "epoch": 1.5969387755102042,
      "grad_norm": 0.14616893231868744,
      "learning_rate": 2.3770934196191485e-05,
      "loss": 0.3513,
      "step": 5321
    },
    {
      "epoch": 1.5972388955582233,
      "grad_norm": 0.12817350029945374,
      "learning_rate": 2.3737044216316972e-05,
      "loss": 0.3259,
      "step": 5322
    },
    {
      "epoch": 1.5975390156062423,
      "grad_norm": 0.13494904339313507,
      "learning_rate": 2.3703175158309887e-05,
      "loss": 0.3648,
      "step": 5323
    },
    {
      "epoch": 1.5978391356542616,
      "grad_norm": 0.12941431999206543,
      "learning_rate": 2.366932703146182e-05,
      "loss": 0.3253,
      "step": 5324
    },
    {
      "epoch": 1.5981392557022809,
      "grad_norm": 0.1348377913236618,
      "learning_rate": 2.363549984505864e-05,
      "loss": 0.3704,
      "step": 5325
    },
    {
      "epoch": 1.5984393757503002,
      "grad_norm": 0.13204167783260345,
      "learning_rate": 2.360169360838046e-05,
      "loss": 0.3487,
      "step": 5326
    },
    {
      "epoch": 1.5987394957983194,
      "grad_norm": 0.13480515778064728,
      "learning_rate": 2.3567908330701582e-05,
      "loss": 0.3632,
      "step": 5327
    },
    {
      "epoch": 1.5990396158463387,
      "grad_norm": 0.13230575621128082,
      "learning_rate": 2.353414402129064e-05,
      "loss": 0.3195,
      "step": 5328
    },
    {
      "epoch": 1.5993397358943577,
      "grad_norm": 0.13184982538223267,
      "learning_rate": 2.3500400689410507e-05,
      "loss": 0.3509,
      "step": 5329
    },
    {
      "epoch": 1.5996398559423768,
      "grad_norm": 0.13251787424087524,
      "learning_rate": 2.346667834431826e-05,
      "loss": 0.3497,
      "step": 5330
    },
    {
      "epoch": 1.599939975990396,
      "grad_norm": 0.14108306169509888,
      "learning_rate": 2.343297699526521e-05,
      "loss": 0.3865,
      "step": 5331
    },
    {
      "epoch": 1.6002400960384153,
      "grad_norm": 0.13187861442565918,
      "learning_rate": 2.339929665149695e-05,
      "loss": 0.3317,
      "step": 5332
    },
    {
      "epoch": 1.6005402160864346,
      "grad_norm": 0.13160671293735504,
      "learning_rate": 2.3365637322253343e-05,
      "loss": 0.339,
      "step": 5333
    },
    {
      "epoch": 1.6008403361344539,
      "grad_norm": 0.14250072836875916,
      "learning_rate": 2.333199901676837e-05,
      "loss": 0.3684,
      "step": 5334
    },
    {
      "epoch": 1.6011404561824731,
      "grad_norm": 0.1388656049966812,
      "learning_rate": 2.329838174427037e-05,
      "loss": 0.3613,
      "step": 5335
    },
    {
      "epoch": 1.6014405762304922,
      "grad_norm": 0.1267981380224228,
      "learning_rate": 2.32647855139818e-05,
      "loss": 0.3245,
      "step": 5336
    },
    {
      "epoch": 1.6017406962785115,
      "grad_norm": 0.13511812686920166,
      "learning_rate": 2.3231210335119447e-05,
      "loss": 0.3543,
      "step": 5337
    },
    {
      "epoch": 1.6020408163265305,
      "grad_norm": 0.12260711938142776,
      "learning_rate": 2.319765621689428e-05,
      "loss": 0.3222,
      "step": 5338
    },
    {
      "epoch": 1.6023409363745498,
      "grad_norm": 0.12926319241523743,
      "learning_rate": 2.3164123168511452e-05,
      "loss": 0.3285,
      "step": 5339
    },
    {
      "epoch": 1.602641056422569,
      "grad_norm": 0.1304606795310974,
      "learning_rate": 2.3130611199170384e-05,
      "loss": 0.3303,
      "step": 5340
    },
    {
      "epoch": 1.6029411764705883,
      "grad_norm": 0.1373075544834137,
      "learning_rate": 2.3097120318064725e-05,
      "loss": 0.3623,
      "step": 5341
    },
    {
      "epoch": 1.6032412965186076,
      "grad_norm": 0.14596083760261536,
      "learning_rate": 2.3063650534382265e-05,
      "loss": 0.3451,
      "step": 5342
    },
    {
      "epoch": 1.6035414165666266,
      "grad_norm": 0.13206394016742706,
      "learning_rate": 2.3030201857305124e-05,
      "loss": 0.3309,
      "step": 5343
    },
    {
      "epoch": 1.603841536614646,
      "grad_norm": 0.13339190185070038,
      "learning_rate": 2.2996774296009482e-05,
      "loss": 0.3646,
      "step": 5344
    },
    {
      "epoch": 1.604141656662665,
      "grad_norm": 0.12648890912532806,
      "learning_rate": 2.296336785966585e-05,
      "loss": 0.3096,
      "step": 5345
    },
    {
      "epoch": 1.6044417767106842,
      "grad_norm": 0.12503592669963837,
      "learning_rate": 2.2929982557438935e-05,
      "loss": 0.3232,
      "step": 5346
    },
    {
      "epoch": 1.6047418967587035,
      "grad_norm": 0.1373262256383896,
      "learning_rate": 2.2896618398487534e-05,
      "loss": 0.373,
      "step": 5347
    },
    {
      "epoch": 1.6050420168067228,
      "grad_norm": 0.13667701184749603,
      "learning_rate": 2.28632753919648e-05,
      "loss": 0.3287,
      "step": 5348
    },
    {
      "epoch": 1.605342136854742,
      "grad_norm": 0.15100489556789398,
      "learning_rate": 2.2829953547017945e-05,
      "loss": 0.4028,
      "step": 5349
    },
    {
      "epoch": 1.605642256902761,
      "grad_norm": 0.130011647939682,
      "learning_rate": 2.2796652872788448e-05,
      "loss": 0.3171,
      "step": 5350
    },
    {
      "epoch": 1.6059423769507803,
      "grad_norm": 0.13675802946090698,
      "learning_rate": 2.2763373378412002e-05,
      "loss": 0.3716,
      "step": 5351
    },
    {
      "epoch": 1.6062424969987994,
      "grad_norm": 0.13907109200954437,
      "learning_rate": 2.2730115073018433e-05,
      "loss": 0.3369,
      "step": 5352
    },
    {
      "epoch": 1.6065426170468187,
      "grad_norm": 0.138726145029068,
      "learning_rate": 2.2696877965731723e-05,
      "loss": 0.3643,
      "step": 5353
    },
    {
      "epoch": 1.606842737094838,
      "grad_norm": 0.12959453463554382,
      "learning_rate": 2.2663662065670187e-05,
      "loss": 0.3278,
      "step": 5354
    },
    {
      "epoch": 1.6071428571428572,
      "grad_norm": 0.14672960340976715,
      "learning_rate": 2.2630467381946152e-05,
      "loss": 0.3871,
      "step": 5355
    },
    {
      "epoch": 1.6074429771908765,
      "grad_norm": 0.12879501283168793,
      "learning_rate": 2.2597293923666263e-05,
      "loss": 0.3165,
      "step": 5356
    },
    {
      "epoch": 1.6077430972388955,
      "grad_norm": 0.13967733085155487,
      "learning_rate": 2.2564141699931207e-05,
      "loss": 0.3689,
      "step": 5357
    },
    {
      "epoch": 1.6080432172869148,
      "grad_norm": 0.1460617035627365,
      "learning_rate": 2.2531010719835943e-05,
      "loss": 0.3635,
      "step": 5358
    },
    {
      "epoch": 1.6083433373349338,
      "grad_norm": 0.1329815536737442,
      "learning_rate": 2.2497900992469623e-05,
      "loss": 0.3449,
      "step": 5359
    },
    {
      "epoch": 1.6086434573829531,
      "grad_norm": 0.12362521886825562,
      "learning_rate": 2.246481252691548e-05,
      "loss": 0.3257,
      "step": 5360
    },
    {
      "epoch": 1.6089435774309724,
      "grad_norm": 0.14099068939685822,
      "learning_rate": 2.243174533225092e-05,
      "loss": 0.3651,
      "step": 5361
    },
    {
      "epoch": 1.6092436974789917,
      "grad_norm": 0.1269274204969406,
      "learning_rate": 2.23986994175476e-05,
      "loss": 0.3161,
      "step": 5362
    },
    {
      "epoch": 1.609543817527011,
      "grad_norm": 0.16197726130485535,
      "learning_rate": 2.2365674791871282e-05,
      "loss": 0.3528,
      "step": 5363
    },
    {
      "epoch": 1.60984393757503,
      "grad_norm": 0.13220979273319244,
      "learning_rate": 2.2332671464281863e-05,
      "loss": 0.3522,
      "step": 5364
    },
    {
      "epoch": 1.6101440576230492,
      "grad_norm": 0.12421213090419769,
      "learning_rate": 2.229968944383346e-05,
      "loss": 0.3048,
      "step": 5365
    },
    {
      "epoch": 1.6104441776710683,
      "grad_norm": 0.13302136957645416,
      "learning_rate": 2.2266728739574283e-05,
      "loss": 0.3266,
      "step": 5366
    },
    {
      "epoch": 1.6107442977190876,
      "grad_norm": 0.1288372278213501,
      "learning_rate": 2.2233789360546788e-05,
      "loss": 0.3034,
      "step": 5367
    },
    {
      "epoch": 1.6110444177671068,
      "grad_norm": 0.19321192800998688,
      "learning_rate": 2.2200871315787452e-05,
      "loss": 0.3439,
      "step": 5368
    },
    {
      "epoch": 1.611344537815126,
      "grad_norm": 0.15680034458637238,
      "learning_rate": 2.216797461432696e-05,
      "loss": 0.3291,
      "step": 5369
    },
    {
      "epoch": 1.6116446578631454,
      "grad_norm": 0.13714157044887543,
      "learning_rate": 2.213509926519016e-05,
      "loss": 0.3425,
      "step": 5370
    },
    {
      "epoch": 1.6119447779111644,
      "grad_norm": 0.12285466492176056,
      "learning_rate": 2.2102245277396073e-05,
      "loss": 0.2918,
      "step": 5371
    },
    {
      "epoch": 1.6122448979591837,
      "grad_norm": 0.1348283290863037,
      "learning_rate": 2.2069412659957734e-05,
      "loss": 0.3331,
      "step": 5372
    },
    {
      "epoch": 1.6125450180072027,
      "grad_norm": 0.13425099849700928,
      "learning_rate": 2.2036601421882464e-05,
      "loss": 0.352,
      "step": 5373
    },
    {
      "epoch": 1.612845138055222,
      "grad_norm": 0.41682004928588867,
      "learning_rate": 2.2003811572171594e-05,
      "loss": 0.3343,
      "step": 5374
    },
    {
      "epoch": 1.6131452581032413,
      "grad_norm": 0.14027799665927887,
      "learning_rate": 2.1971043119820665e-05,
      "loss": 0.3461,
      "step": 5375
    },
    {
      "epoch": 1.6134453781512605,
      "grad_norm": 0.2078617662191391,
      "learning_rate": 2.1938296073819354e-05,
      "loss": 0.3625,
      "step": 5376
    },
    {
      "epoch": 1.6137454981992798,
      "grad_norm": 0.14351028203964233,
      "learning_rate": 2.1905570443151402e-05,
      "loss": 0.3744,
      "step": 5377
    },
    {
      "epoch": 1.614045618247299,
      "grad_norm": 0.13274571299552917,
      "learning_rate": 2.187286623679471e-05,
      "loss": 0.3238,
      "step": 5378
    },
    {
      "epoch": 1.6143457382953181,
      "grad_norm": 0.13354356586933136,
      "learning_rate": 2.184018346372134e-05,
      "loss": 0.3426,
      "step": 5379
    },
    {
      "epoch": 1.6146458583433372,
      "grad_norm": 0.13134168088436127,
      "learning_rate": 2.1807522132897383e-05,
      "loss": 0.3177,
      "step": 5380
    },
    {
      "epoch": 1.6149459783913565,
      "grad_norm": 0.13090597093105316,
      "learning_rate": 2.1774882253283168e-05,
      "loss": 0.338,
      "step": 5381
    },
    {
      "epoch": 1.6152460984393757,
      "grad_norm": 0.13348695635795593,
      "learning_rate": 2.1742263833832998e-05,
      "loss": 0.3206,
      "step": 5382
    },
    {
      "epoch": 1.615546218487395,
      "grad_norm": 0.17404712736606598,
      "learning_rate": 2.1709666883495395e-05,
      "loss": 0.3455,
      "step": 5383
    },
    {
      "epoch": 1.6158463385354143,
      "grad_norm": 0.1371351182460785,
      "learning_rate": 2.167709141121298e-05,
      "loss": 0.3503,
      "step": 5384
    },
    {
      "epoch": 1.6161464585834335,
      "grad_norm": 0.13850924372673035,
      "learning_rate": 2.1644537425922427e-05,
      "loss": 0.3404,
      "step": 5385
    },
    {
      "epoch": 1.6164465786314526,
      "grad_norm": 0.15199445188045502,
      "learning_rate": 2.1612004936554575e-05,
      "loss": 0.3823,
      "step": 5386
    },
    {
      "epoch": 1.6167466986794716,
      "grad_norm": 0.15341348946094513,
      "learning_rate": 2.1579493952034312e-05,
      "loss": 0.3871,
      "step": 5387
    },
    {
      "epoch": 1.617046818727491,
      "grad_norm": 0.13436566293239594,
      "learning_rate": 2.154700448128065e-05,
      "loss": 0.3408,
      "step": 5388
    },
    {
      "epoch": 1.6173469387755102,
      "grad_norm": 0.13752338290214539,
      "learning_rate": 2.1514536533206763e-05,
      "loss": 0.3459,
      "step": 5389
    },
    {
      "epoch": 1.6176470588235294,
      "grad_norm": 0.19831904768943787,
      "learning_rate": 2.148209011671979e-05,
      "loss": 0.3378,
      "step": 5390
    },
    {
      "epoch": 1.6179471788715487,
      "grad_norm": 0.13402965664863586,
      "learning_rate": 2.144966524072105e-05,
      "loss": 0.3408,
      "step": 5391
    },
    {
      "epoch": 1.618247298919568,
      "grad_norm": 0.12890419363975525,
      "learning_rate": 2.141726191410599e-05,
      "loss": 0.3318,
      "step": 5392
    },
    {
      "epoch": 1.618547418967587,
      "grad_norm": 0.13093030452728271,
      "learning_rate": 2.138488014576404e-05,
      "loss": 0.3107,
      "step": 5393
    },
    {
      "epoch": 1.6188475390156063,
      "grad_norm": 0.13020655512809753,
      "learning_rate": 2.135251994457874e-05,
      "loss": 0.3396,
      "step": 5394
    },
    {
      "epoch": 1.6191476590636253,
      "grad_norm": 0.12640948593616486,
      "learning_rate": 2.132018131942779e-05,
      "loss": 0.3196,
      "step": 5395
    },
    {
      "epoch": 1.6194477791116446,
      "grad_norm": 0.14091448485851288,
      "learning_rate": 2.128786427918289e-05,
      "loss": 0.3364,
      "step": 5396
    },
    {
      "epoch": 1.6197478991596639,
      "grad_norm": 0.13731209933757782,
      "learning_rate": 2.1255568832709904e-05,
      "loss": 0.323,
      "step": 5397
    },
    {
      "epoch": 1.6200480192076832,
      "grad_norm": 0.13610906898975372,
      "learning_rate": 2.122329498886868e-05,
      "loss": 0.3347,
      "step": 5398
    },
    {
      "epoch": 1.6203481392557024,
      "grad_norm": 0.14475463330745697,
      "learning_rate": 2.1191042756513114e-05,
      "loss": 0.3649,
      "step": 5399
    },
    {
      "epoch": 1.6206482593037215,
      "grad_norm": 0.13745839893817902,
      "learning_rate": 2.1158812144491357e-05,
      "loss": 0.3576,
      "step": 5400
    },
    {
      "epoch": 1.6209483793517407,
      "grad_norm": 0.14333461225032806,
      "learning_rate": 2.1126603161645454e-05,
      "loss": 0.3593,
      "step": 5401
    },
    {
      "epoch": 1.6212484993997598,
      "grad_norm": 0.2060752958059311,
      "learning_rate": 2.109441581681153e-05,
      "loss": 0.3303,
      "step": 5402
    },
    {
      "epoch": 1.621548619447779,
      "grad_norm": 0.13641700148582458,
      "learning_rate": 2.1062250118819847e-05,
      "loss": 0.3497,
      "step": 5403
    },
    {
      "epoch": 1.6218487394957983,
      "grad_norm": 0.14730916917324066,
      "learning_rate": 2.1030106076494726e-05,
      "loss": 0.3594,
      "step": 5404
    },
    {
      "epoch": 1.6221488595438176,
      "grad_norm": 0.12196728587150574,
      "learning_rate": 2.0997983698654467e-05,
      "loss": 0.3124,
      "step": 5405
    },
    {
      "epoch": 1.6224489795918369,
      "grad_norm": 0.15658676624298096,
      "learning_rate": 2.0965882994111517e-05,
      "loss": 0.3964,
      "step": 5406
    },
    {
      "epoch": 1.622749099639856,
      "grad_norm": 0.1642051339149475,
      "learning_rate": 2.0933803971672295e-05,
      "loss": 0.3354,
      "step": 5407
    },
    {
      "epoch": 1.6230492196878752,
      "grad_norm": 0.1441899985074997,
      "learning_rate": 2.0901746640137333e-05,
      "loss": 0.3449,
      "step": 5408
    },
    {
      "epoch": 1.6233493397358942,
      "grad_norm": 0.13454824686050415,
      "learning_rate": 2.086971100830122e-05,
      "loss": 0.3565,
      "step": 5409
    },
    {
      "epoch": 1.6236494597839135,
      "grad_norm": 0.13018536567687988,
      "learning_rate": 2.0837697084952503e-05,
      "loss": 0.3188,
      "step": 5410
    },
    {
      "epoch": 1.6239495798319328,
      "grad_norm": 0.13388392329216003,
      "learning_rate": 2.080570487887391e-05,
      "loss": 0.3501,
      "step": 5411
    },
    {
      "epoch": 1.624249699879952,
      "grad_norm": 0.1390429586172104,
      "learning_rate": 2.077373439884206e-05,
      "loss": 0.3354,
      "step": 5412
    },
    {
      "epoch": 1.6245498199279713,
      "grad_norm": 0.12056306004524231,
      "learning_rate": 2.074178565362772e-05,
      "loss": 0.3043,
      "step": 5413
    },
    {
      "epoch": 1.6248499399759904,
      "grad_norm": 0.13007420301437378,
      "learning_rate": 2.0709858651995695e-05,
      "loss": 0.3229,
      "step": 5414
    },
    {
      "epoch": 1.6251500600240096,
      "grad_norm": 0.1374148428440094,
      "learning_rate": 2.067795340270473e-05,
      "loss": 0.3537,
      "step": 5415
    },
    {
      "epoch": 1.6254501800720287,
      "grad_norm": 0.1289047747850418,
      "learning_rate": 2.0646069914507704e-05,
      "loss": 0.3079,
      "step": 5416
    },
    {
      "epoch": 1.625750300120048,
      "grad_norm": 0.14260342717170715,
      "learning_rate": 2.0614208196151508e-05,
      "loss": 0.3489,
      "step": 5417
    },
    {
      "epoch": 1.6260504201680672,
      "grad_norm": 0.14635786414146423,
      "learning_rate": 2.0582368256376972e-05,
      "loss": 0.3703,
      "step": 5418
    },
    {
      "epoch": 1.6263505402160865,
      "grad_norm": 0.13332021236419678,
      "learning_rate": 2.0550550103919087e-05,
      "loss": 0.3383,
      "step": 5419
    },
    {
      "epoch": 1.6266506602641058,
      "grad_norm": 0.130903959274292,
      "learning_rate": 2.0518753747506748e-05,
      "loss": 0.3348,
      "step": 5420
    },
    {
      "epoch": 1.6269507803121248,
      "grad_norm": 0.14831160008907318,
      "learning_rate": 2.048697919586292e-05,
      "loss": 0.3663,
      "step": 5421
    },
    {
      "epoch": 1.627250900360144,
      "grad_norm": 0.14494484663009644,
      "learning_rate": 2.0455226457704656e-05,
      "loss": 0.3611,
      "step": 5422
    },
    {
      "epoch": 1.6275510204081631,
      "grad_norm": 0.13702887296676636,
      "learning_rate": 2.0423495541742888e-05,
      "loss": 0.3142,
      "step": 5423
    },
    {
      "epoch": 1.6278511404561824,
      "grad_norm": 0.12333647906780243,
      "learning_rate": 2.0391786456682603e-05,
      "loss": 0.3311,
      "step": 5424
    },
    {
      "epoch": 1.6281512605042017,
      "grad_norm": 0.13059678673744202,
      "learning_rate": 2.0360099211222928e-05,
      "loss": 0.3353,
      "step": 5425
    },
    {
      "epoch": 1.628451380552221,
      "grad_norm": 0.21004725992679596,
      "learning_rate": 2.0328433814056803e-05,
      "loss": 0.343,
      "step": 5426
    },
    {
      "epoch": 1.6287515006002402,
      "grad_norm": 0.1576651930809021,
      "learning_rate": 2.0296790273871323e-05,
      "loss": 0.3474,
      "step": 5427
    },
    {
      "epoch": 1.6290516206482593,
      "grad_norm": 0.13524000346660614,
      "learning_rate": 2.0265168599347482e-05,
      "loss": 0.334,
      "step": 5428
    },
    {
      "epoch": 1.6293517406962785,
      "grad_norm": 0.1273518055677414,
      "learning_rate": 2.0233568799160364e-05,
      "loss": 0.3149,
      "step": 5429
    },
    {
      "epoch": 1.6296518607442976,
      "grad_norm": 0.126450315117836,
      "learning_rate": 2.0201990881979006e-05,
      "loss": 0.3019,
      "step": 5430
    },
    {
      "epoch": 1.6299519807923168,
      "grad_norm": 0.20898903906345367,
      "learning_rate": 2.0170434856466447e-05,
      "loss": 0.3572,
      "step": 5431
    },
    {
      "epoch": 1.6302521008403361,
      "grad_norm": 0.14011222124099731,
      "learning_rate": 2.0138900731279686e-05,
      "loss": 0.3595,
      "step": 5432
    },
    {
      "epoch": 1.6305522208883554,
      "grad_norm": 0.12649571895599365,
      "learning_rate": 2.010738851506977e-05,
      "loss": 0.3063,
      "step": 5433
    },
    {
      "epoch": 1.6308523409363747,
      "grad_norm": 0.1454174965620041,
      "learning_rate": 2.0075898216481746e-05,
      "loss": 0.37,
      "step": 5434
    },
    {
      "epoch": 1.6311524609843937,
      "grad_norm": 0.1352839320898056,
      "learning_rate": 2.0044429844154577e-05,
      "loss": 0.3335,
      "step": 5435
    },
    {
      "epoch": 1.631452581032413,
      "grad_norm": 0.1313468962907791,
      "learning_rate": 2.0012983406721274e-05,
      "loss": 0.3296,
      "step": 5436
    },
    {
      "epoch": 1.631752701080432,
      "grad_norm": 0.18670260906219482,
      "learning_rate": 1.9981558912808752e-05,
      "loss": 0.3488,
      "step": 5437
    },
    {
      "epoch": 1.6320528211284513,
      "grad_norm": 0.13672375679016113,
      "learning_rate": 1.9950156371038053e-05,
      "loss": 0.3515,
      "step": 5438
    },
    {
      "epoch": 1.6323529411764706,
      "grad_norm": 0.13667839765548706,
      "learning_rate": 1.9918775790024047e-05,
      "loss": 0.343,
      "step": 5439
    },
    {
      "epoch": 1.6326530612244898,
      "grad_norm": 0.14519430696964264,
      "learning_rate": 1.9887417178375633e-05,
      "loss": 0.3269,
      "step": 5440
    },
    {
      "epoch": 1.632953181272509,
      "grad_norm": 0.17133067548274994,
      "learning_rate": 1.9856080544695687e-05,
      "loss": 0.3153,
      "step": 5441
    },
    {
      "epoch": 1.6332533013205284,
      "grad_norm": 0.1435331255197525,
      "learning_rate": 1.98247658975811e-05,
      "loss": 0.3279,
      "step": 5442
    },
    {
      "epoch": 1.6335534213685474,
      "grad_norm": 0.13129855692386627,
      "learning_rate": 1.9793473245622616e-05,
      "loss": 0.3123,
      "step": 5443
    },
    {
      "epoch": 1.6338535414165665,
      "grad_norm": 0.13884615898132324,
      "learning_rate": 1.9762202597405088e-05,
      "loss": 0.331,
      "step": 5444
    },
    {
      "epoch": 1.6341536614645857,
      "grad_norm": 0.13685715198516846,
      "learning_rate": 1.9730953961507203e-05,
      "loss": 0.3496,
      "step": 5445
    },
    {
      "epoch": 1.634453781512605,
      "grad_norm": 0.3771587312221527,
      "learning_rate": 1.9699727346501674e-05,
      "loss": 0.3631,
      "step": 5446
    },
    {
      "epoch": 1.6347539015606243,
      "grad_norm": 0.13653336465358734,
      "learning_rate": 1.966852276095521e-05,
      "loss": 0.3424,
      "step": 5447
    },
    {
      "epoch": 1.6350540216086435,
      "grad_norm": 0.13888002932071686,
      "learning_rate": 1.9637340213428368e-05,
      "loss": 0.3464,
      "step": 5448
    },
    {
      "epoch": 1.6353541416566628,
      "grad_norm": 0.13981866836547852,
      "learning_rate": 1.960617971247579e-05,
      "loss": 0.3625,
      "step": 5449
    },
    {
      "epoch": 1.6356542617046819,
      "grad_norm": 0.38733047246932983,
      "learning_rate": 1.957504126664593e-05,
      "loss": 0.2653,
      "step": 5450
    },
    {
      "epoch": 1.635954381752701,
      "grad_norm": 0.20445388555526733,
      "learning_rate": 1.95439248844813e-05,
      "loss": 0.3469,
      "step": 5451
    },
    {
      "epoch": 1.6362545018007202,
      "grad_norm": 0.1441969871520996,
      "learning_rate": 1.9512830574518348e-05,
      "loss": 0.3691,
      "step": 5452
    },
    {
      "epoch": 1.6365546218487395,
      "grad_norm": 0.139201819896698,
      "learning_rate": 1.9481758345287383e-05,
      "loss": 0.3677,
      "step": 5453
    },
    {
      "epoch": 1.6368547418967587,
      "grad_norm": 0.13921897113323212,
      "learning_rate": 1.9450708205312762e-05,
      "loss": 0.3377,
      "step": 5454
    },
    {
      "epoch": 1.637154861944778,
      "grad_norm": 0.13050687313079834,
      "learning_rate": 1.941968016311273e-05,
      "loss": 0.2899,
      "step": 5455
    },
    {
      "epoch": 1.6374549819927973,
      "grad_norm": 0.12800630927085876,
      "learning_rate": 1.9388674227199443e-05,
      "loss": 0.2944,
      "step": 5456
    },
    {
      "epoch": 1.6377551020408163,
      "grad_norm": 0.14279204607009888,
      "learning_rate": 1.9357690406079076e-05,
      "loss": 0.3534,
      "step": 5457
    },
    {
      "epoch": 1.6380552220888356,
      "grad_norm": 0.1505601704120636,
      "learning_rate": 1.932672870825162e-05,
      "loss": 0.3562,
      "step": 5458
    },
    {
      "epoch": 1.6383553421368546,
      "grad_norm": 0.13510508835315704,
      "learning_rate": 1.929578914221111e-05,
      "loss": 0.3237,
      "step": 5459
    },
    {
      "epoch": 1.638655462184874,
      "grad_norm": 0.1296669989824295,
      "learning_rate": 1.9264871716445454e-05,
      "loss": 0.322,
      "step": 5460
    },
    {
      "epoch": 1.6389555822328932,
      "grad_norm": 0.13488991558551788,
      "learning_rate": 1.9233976439436495e-05,
      "loss": 0.3416,
      "step": 5461
    },
    {
      "epoch": 1.6392557022809124,
      "grad_norm": 0.14997981488704681,
      "learning_rate": 1.9203103319659942e-05,
      "loss": 0.3439,
      "step": 5462
    },
    {
      "epoch": 1.6395558223289317,
      "grad_norm": 0.13843125104904175,
      "learning_rate": 1.9172252365585574e-05,
      "loss": 0.3777,
      "step": 5463
    },
    {
      "epoch": 1.6398559423769508,
      "grad_norm": 0.12544837594032288,
      "learning_rate": 1.9141423585676953e-05,
      "loss": 0.2917,
      "step": 5464
    },
    {
      "epoch": 1.64015606242497,
      "grad_norm": 0.12239256501197815,
      "learning_rate": 1.9110616988391572e-05,
      "loss": 0.3061,
      "step": 5465
    },
    {
      "epoch": 1.640456182472989,
      "grad_norm": 0.15580035746097565,
      "learning_rate": 1.90798325821809e-05,
      "loss": 0.371,
      "step": 5466
    },
    {
      "epoch": 1.6407563025210083,
      "grad_norm": 0.12866289913654327,
      "learning_rate": 1.9049070375490273e-05,
      "loss": 0.3229,
      "step": 5467
    },
    {
      "epoch": 1.6410564225690276,
      "grad_norm": 0.13854321837425232,
      "learning_rate": 1.9018330376758997e-05,
      "loss": 0.3643,
      "step": 5468
    },
    {
      "epoch": 1.6413565426170469,
      "grad_norm": 0.13516482710838318,
      "learning_rate": 1.898761259442019e-05,
      "loss": 0.349,
      "step": 5469
    },
    {
      "epoch": 1.6416566626650662,
      "grad_norm": 0.15773414075374603,
      "learning_rate": 1.89569170369009e-05,
      "loss": 0.3718,
      "step": 5470
    },
    {
      "epoch": 1.6419567827130852,
      "grad_norm": 0.13902094960212708,
      "learning_rate": 1.892624371262215e-05,
      "loss": 0.3655,
      "step": 5471
    },
    {
      "epoch": 1.6422569027611045,
      "grad_norm": 0.12148105353116989,
      "learning_rate": 1.8895592629998814e-05,
      "loss": 0.2935,
      "step": 5472
    },
    {
      "epoch": 1.6425570228091235,
      "grad_norm": 0.1329748034477234,
      "learning_rate": 1.8864963797439617e-05,
      "loss": 0.3394,
      "step": 5473
    },
    {
      "epoch": 1.6428571428571428,
      "grad_norm": 0.15787284076213837,
      "learning_rate": 1.8834357223347297e-05,
      "loss": 0.3844,
      "step": 5474
    },
    {
      "epoch": 1.643157262905162,
      "grad_norm": 0.15091612935066223,
      "learning_rate": 1.8803772916118324e-05,
      "loss": 0.3612,
      "step": 5475
    },
    {
      "epoch": 1.6434573829531813,
      "grad_norm": 0.15188480913639069,
      "learning_rate": 1.8773210884143255e-05,
      "loss": 0.3882,
      "step": 5476
    },
    {
      "epoch": 1.6437575030012006,
      "grad_norm": 0.1658872812986374,
      "learning_rate": 1.87426711358064e-05,
      "loss": 0.344,
      "step": 5477
    },
    {
      "epoch": 1.6440576230492197,
      "grad_norm": 0.13719511032104492,
      "learning_rate": 1.8712153679485932e-05,
      "loss": 0.3691,
      "step": 5478
    },
    {
      "epoch": 1.644357743097239,
      "grad_norm": 0.1480136513710022,
      "learning_rate": 1.8681658523554025e-05,
      "loss": 0.3757,
      "step": 5479
    },
    {
      "epoch": 1.644657863145258,
      "grad_norm": 0.13319872319698334,
      "learning_rate": 1.865118567637667e-05,
      "loss": 0.3083,
      "step": 5480
    },
    {
      "epoch": 1.6449579831932772,
      "grad_norm": 0.13993017375469208,
      "learning_rate": 1.8620735146313705e-05,
      "loss": 0.3557,
      "step": 5481
    },
    {
      "epoch": 1.6452581032412965,
      "grad_norm": 0.14838139712810516,
      "learning_rate": 1.859030694171895e-05,
      "loss": 0.3484,
      "step": 5482
    },
    {
      "epoch": 1.6455582232893158,
      "grad_norm": 0.12882272899150848,
      "learning_rate": 1.8559901070939956e-05,
      "loss": 0.3198,
      "step": 5483
    },
    {
      "epoch": 1.645858343337335,
      "grad_norm": 0.14248614013195038,
      "learning_rate": 1.8529517542318265e-05,
      "loss": 0.3543,
      "step": 5484
    },
    {
      "epoch": 1.646158463385354,
      "grad_norm": 0.1413465291261673,
      "learning_rate": 1.8499156364189283e-05,
      "loss": 0.3405,
      "step": 5485
    },
    {
      "epoch": 1.6464585834333734,
      "grad_norm": 0.1345796436071396,
      "learning_rate": 1.8468817544882178e-05,
      "loss": 0.348,
      "step": 5486
    },
    {
      "epoch": 1.6467587034813924,
      "grad_norm": 0.13932184875011444,
      "learning_rate": 1.8438501092720105e-05,
      "loss": 0.3472,
      "step": 5487
    },
    {
      "epoch": 1.6470588235294117,
      "grad_norm": 0.1598397195339203,
      "learning_rate": 1.840820701602004e-05,
      "loss": 0.3611,
      "step": 5488
    },
    {
      "epoch": 1.647358943577431,
      "grad_norm": 0.14302033185958862,
      "learning_rate": 1.8377935323092788e-05,
      "loss": 0.3227,
      "step": 5489
    },
    {
      "epoch": 1.6476590636254502,
      "grad_norm": 0.12700681388378143,
      "learning_rate": 1.834768602224307e-05,
      "loss": 0.3124,
      "step": 5490
    },
    {
      "epoch": 1.6479591836734695,
      "grad_norm": 0.1239708662033081,
      "learning_rate": 1.83174591217694e-05,
      "loss": 0.2994,
      "step": 5491
    },
    {
      "epoch": 1.6482593037214885,
      "grad_norm": 0.13394364714622498,
      "learning_rate": 1.828725462996419e-05,
      "loss": 0.3269,
      "step": 5492
    },
    {
      "epoch": 1.6485594237695078,
      "grad_norm": 0.12652020156383514,
      "learning_rate": 1.825707255511374e-05,
      "loss": 0.3007,
      "step": 5493
    },
    {
      "epoch": 1.6488595438175269,
      "grad_norm": 0.1336975246667862,
      "learning_rate": 1.822691290549813e-05,
      "loss": 0.3347,
      "step": 5494
    },
    {
      "epoch": 1.6491596638655461,
      "grad_norm": 0.12572890520095825,
      "learning_rate": 1.8196775689391266e-05,
      "loss": 0.2905,
      "step": 5495
    },
    {
      "epoch": 1.6494597839135654,
      "grad_norm": 0.1284392923116684,
      "learning_rate": 1.8166660915060986e-05,
      "loss": 0.3219,
      "step": 5496
    },
    {
      "epoch": 1.6497599039615847,
      "grad_norm": 0.13893640041351318,
      "learning_rate": 1.8136568590768944e-05,
      "loss": 0.3341,
      "step": 5497
    },
    {
      "epoch": 1.650060024009604,
      "grad_norm": 0.1879337877035141,
      "learning_rate": 1.8106498724770638e-05,
      "loss": 0.3586,
      "step": 5498
    },
    {
      "epoch": 1.6503601440576232,
      "grad_norm": 0.14967003464698792,
      "learning_rate": 1.8076451325315368e-05,
      "loss": 0.3552,
      "step": 5499
    },
    {
      "epoch": 1.6506602641056423,
      "grad_norm": 0.14558528363704681,
      "learning_rate": 1.8046426400646244e-05,
      "loss": 0.3317,
      "step": 5500
    },
    {
      "epoch": 1.6509603841536613,
      "grad_norm": 0.12756778299808502,
      "learning_rate": 1.801642395900036e-05,
      "loss": 0.2754,
      "step": 5501
    },
    {
      "epoch": 1.6512605042016806,
      "grad_norm": 0.13232913613319397,
      "learning_rate": 1.7986444008608496e-05,
      "loss": 0.3194,
      "step": 5502
    },
    {
      "epoch": 1.6515606242496998,
      "grad_norm": 0.14271697402000427,
      "learning_rate": 1.7956486557695263e-05,
      "loss": 0.3397,
      "step": 5503
    },
    {
      "epoch": 1.6518607442977191,
      "grad_norm": 0.14435574412345886,
      "learning_rate": 1.7926551614479192e-05,
      "loss": 0.3668,
      "step": 5504
    },
    {
      "epoch": 1.6521608643457384,
      "grad_norm": 0.1720234900712967,
      "learning_rate": 1.789663918717258e-05,
      "loss": 0.3604,
      "step": 5505
    },
    {
      "epoch": 1.6524609843937577,
      "grad_norm": 0.13502709567546844,
      "learning_rate": 1.78667492839816e-05,
      "loss": 0.3402,
      "step": 5506
    },
    {
      "epoch": 1.6527611044417767,
      "grad_norm": 0.13042998313903809,
      "learning_rate": 1.7836881913106152e-05,
      "loss": 0.3042,
      "step": 5507
    },
    {
      "epoch": 1.6530612244897958,
      "grad_norm": 0.1390058696269989,
      "learning_rate": 1.7807037082739996e-05,
      "loss": 0.3705,
      "step": 5508
    },
    {
      "epoch": 1.653361344537815,
      "grad_norm": 0.1370200514793396,
      "learning_rate": 1.7777214801070752e-05,
      "loss": 0.3602,
      "step": 5509
    },
    {
      "epoch": 1.6536614645858343,
      "grad_norm": 0.12913063168525696,
      "learning_rate": 1.774741507627984e-05,
      "loss": 0.2908,
      "step": 5510
    },
    {
      "epoch": 1.6539615846338536,
      "grad_norm": 0.13181883096694946,
      "learning_rate": 1.7717637916542408e-05,
      "loss": 0.327,
      "step": 5511
    },
    {
      "epoch": 1.6542617046818728,
      "grad_norm": 0.13132743537425995,
      "learning_rate": 1.768788333002752e-05,
      "loss": 0.317,
      "step": 5512
    },
    {
      "epoch": 1.654561824729892,
      "grad_norm": 0.140101358294487,
      "learning_rate": 1.7658151324898033e-05,
      "loss": 0.3595,
      "step": 5513
    },
    {
      "epoch": 1.6548619447779112,
      "grad_norm": 0.13309207558631897,
      "learning_rate": 1.762844190931051e-05,
      "loss": 0.325,
      "step": 5514
    },
    {
      "epoch": 1.6551620648259304,
      "grad_norm": 0.14855670928955078,
      "learning_rate": 1.7598755091415474e-05,
      "loss": 0.3458,
      "step": 5515
    },
    {
      "epoch": 1.6554621848739495,
      "grad_norm": 0.15083813667297363,
      "learning_rate": 1.7569090879357077e-05,
      "loss": 0.3892,
      "step": 5516
    },
    {
      "epoch": 1.6557623049219687,
      "grad_norm": 0.12831264734268188,
      "learning_rate": 1.75394492812734e-05,
      "loss": 0.3063,
      "step": 5517
    },
    {
      "epoch": 1.656062424969988,
      "grad_norm": 0.14697401225566864,
      "learning_rate": 1.7509830305296304e-05,
      "loss": 0.3457,
      "step": 5518
    },
    {
      "epoch": 1.6563625450180073,
      "grad_norm": 0.13638198375701904,
      "learning_rate": 1.748023395955135e-05,
      "loss": 0.3427,
      "step": 5519
    },
    {
      "epoch": 1.6566626650660266,
      "grad_norm": 0.14705848693847656,
      "learning_rate": 1.7450660252158015e-05,
      "loss": 0.3866,
      "step": 5520
    },
    {
      "epoch": 1.6569627851140456,
      "grad_norm": 0.14067374169826508,
      "learning_rate": 1.7421109191229458e-05,
      "loss": 0.341,
      "step": 5521
    },
    {
      "epoch": 1.6572629051620649,
      "grad_norm": 0.13154152035713196,
      "learning_rate": 1.7391580784872696e-05,
      "loss": 0.3297,
      "step": 5522
    },
    {
      "epoch": 1.657563025210084,
      "grad_norm": 0.12965817749500275,
      "learning_rate": 1.736207504118853e-05,
      "loss": 0.3166,
      "step": 5523
    },
    {
      "epoch": 1.6578631452581032,
      "grad_norm": 0.14938890933990479,
      "learning_rate": 1.7332591968271507e-05,
      "loss": 0.3274,
      "step": 5524
    },
    {
      "epoch": 1.6581632653061225,
      "grad_norm": 0.14167505502700806,
      "learning_rate": 1.730313157420992e-05,
      "loss": 0.3417,
      "step": 5525
    },
    {
      "epoch": 1.6584633853541417,
      "grad_norm": 0.13157668709754944,
      "learning_rate": 1.7273693867085972e-05,
      "loss": 0.3086,
      "step": 5526
    },
    {
      "epoch": 1.658763505402161,
      "grad_norm": 0.13809318840503693,
      "learning_rate": 1.7244278854975504e-05,
      "loss": 0.3837,
      "step": 5527
    },
    {
      "epoch": 1.65906362545018,
      "grad_norm": 0.14108997583389282,
      "learning_rate": 1.721488654594824e-05,
      "loss": 0.3717,
      "step": 5528
    },
    {
      "epoch": 1.6593637454981993,
      "grad_norm": 0.15386879444122314,
      "learning_rate": 1.718551694806755e-05,
      "loss": 0.3282,
      "step": 5529
    },
    {
      "epoch": 1.6596638655462184,
      "grad_norm": 0.15658845007419586,
      "learning_rate": 1.71561700693907e-05,
      "loss": 0.3445,
      "step": 5530
    },
    {
      "epoch": 1.6599639855942376,
      "grad_norm": 0.1463109701871872,
      "learning_rate": 1.712684591796867e-05,
      "loss": 0.3512,
      "step": 5531
    },
    {
      "epoch": 1.660264105642257,
      "grad_norm": 0.12552201747894287,
      "learning_rate": 1.7097544501846185e-05,
      "loss": 0.302,
      "step": 5532
    },
    {
      "epoch": 1.6605642256902762,
      "grad_norm": 0.16053760051727295,
      "learning_rate": 1.7068265829061745e-05,
      "loss": 0.3488,
      "step": 5533
    },
    {
      "epoch": 1.6608643457382954,
      "grad_norm": 0.14493612945079803,
      "learning_rate": 1.703900990764763e-05,
      "loss": 0.3747,
      "step": 5534
    },
    {
      "epoch": 1.6611644657863145,
      "grad_norm": 0.13223378360271454,
      "learning_rate": 1.7009776745629858e-05,
      "loss": 0.3232,
      "step": 5535
    },
    {
      "epoch": 1.6614645858343338,
      "grad_norm": 0.13315922021865845,
      "learning_rate": 1.698056635102826e-05,
      "loss": 0.341,
      "step": 5536
    },
    {
      "epoch": 1.6617647058823528,
      "grad_norm": 0.13376358151435852,
      "learning_rate": 1.6951378731856292e-05,
      "loss": 0.335,
      "step": 5537
    },
    {
      "epoch": 1.662064825930372,
      "grad_norm": 0.12876003980636597,
      "learning_rate": 1.6922213896121296e-05,
      "loss": 0.3229,
      "step": 5538
    },
    {
      "epoch": 1.6623649459783914,
      "grad_norm": 0.15036362409591675,
      "learning_rate": 1.6893071851824328e-05,
      "loss": 0.3764,
      "step": 5539
    },
    {
      "epoch": 1.6626650660264106,
      "grad_norm": 0.13925230503082275,
      "learning_rate": 1.6863952606960132e-05,
      "loss": 0.358,
      "step": 5540
    },
    {
      "epoch": 1.66296518607443,
      "grad_norm": 0.15111415088176727,
      "learning_rate": 1.6834856169517232e-05,
      "loss": 0.3122,
      "step": 5541
    },
    {
      "epoch": 1.663265306122449,
      "grad_norm": 0.15889416635036469,
      "learning_rate": 1.6805782547477935e-05,
      "loss": 0.3673,
      "step": 5542
    },
    {
      "epoch": 1.6635654261704682,
      "grad_norm": 0.14329048991203308,
      "learning_rate": 1.6776731748818254e-05,
      "loss": 0.3623,
      "step": 5543
    },
    {
      "epoch": 1.6638655462184873,
      "grad_norm": 0.1454945206642151,
      "learning_rate": 1.6747703781507905e-05,
      "loss": 0.3686,
      "step": 5544
    },
    {
      "epoch": 1.6641656662665065,
      "grad_norm": 0.1360265016555786,
      "learning_rate": 1.6718698653510433e-05,
      "loss": 0.3642,
      "step": 5545
    },
    {
      "epoch": 1.6644657863145258,
      "grad_norm": 0.13417474925518036,
      "learning_rate": 1.6689716372783002e-05,
      "loss": 0.332,
      "step": 5546
    },
    {
      "epoch": 1.664765906362545,
      "grad_norm": 0.1391753852367401,
      "learning_rate": 1.666075694727661e-05,
      "loss": 0.3443,
      "step": 5547
    },
    {
      "epoch": 1.6650660264105643,
      "grad_norm": 0.13319963216781616,
      "learning_rate": 1.663182038493595e-05,
      "loss": 0.324,
      "step": 5548
    },
    {
      "epoch": 1.6653661464585834,
      "grad_norm": 0.13567818701267242,
      "learning_rate": 1.6602906693699394e-05,
      "loss": 0.3287,
      "step": 5549
    },
    {
      "epoch": 1.6656662665066027,
      "grad_norm": 0.1341031938791275,
      "learning_rate": 1.6574015881499106e-05,
      "loss": 0.3005,
      "step": 5550
    },
    {
      "epoch": 1.6659663865546217,
      "grad_norm": 0.13578887283802032,
      "learning_rate": 1.6545147956260987e-05,
      "loss": 0.3469,
      "step": 5551
    },
    {
      "epoch": 1.666266506602641,
      "grad_norm": 0.1205897331237793,
      "learning_rate": 1.6516302925904547e-05,
      "loss": 0.2774,
      "step": 5552
    },
    {
      "epoch": 1.6665666266506602,
      "grad_norm": 0.12699086964130402,
      "learning_rate": 1.648748079834315e-05,
      "loss": 0.3243,
      "step": 5553
    },
    {
      "epoch": 1.6668667466986795,
      "grad_norm": 0.1304396241903305,
      "learning_rate": 1.645868158148377e-05,
      "loss": 0.3284,
      "step": 5554
    },
    {
      "epoch": 1.6671668667466988,
      "grad_norm": 0.13411088287830353,
      "learning_rate": 1.6429905283227164e-05,
      "loss": 0.3419,
      "step": 5555
    },
    {
      "epoch": 1.667466986794718,
      "grad_norm": 0.1532057374715805,
      "learning_rate": 1.6401151911467815e-05,
      "loss": 0.3682,
      "step": 5556
    },
    {
      "epoch": 1.667767106842737,
      "grad_norm": 0.14714492857456207,
      "learning_rate": 1.6372421474093814e-05,
      "loss": 0.3613,
      "step": 5557
    },
    {
      "epoch": 1.6680672268907561,
      "grad_norm": 0.14000540971755981,
      "learning_rate": 1.6343713978987073e-05,
      "loss": 0.3205,
      "step": 5558
    },
    {
      "epoch": 1.6683673469387754,
      "grad_norm": 0.13985879719257355,
      "learning_rate": 1.6315029434023143e-05,
      "loss": 0.3453,
      "step": 5559
    },
    {
      "epoch": 1.6686674669867947,
      "grad_norm": 0.13146568834781647,
      "learning_rate": 1.6286367847071294e-05,
      "loss": 0.3348,
      "step": 5560
    },
    {
      "epoch": 1.668967587034814,
      "grad_norm": 0.13760332763195038,
      "learning_rate": 1.6257729225994544e-05,
      "loss": 0.345,
      "step": 5561
    },
    {
      "epoch": 1.6692677070828332,
      "grad_norm": 0.1339607685804367,
      "learning_rate": 1.6229113578649547e-05,
      "loss": 0.3365,
      "step": 5562
    },
    {
      "epoch": 1.6695678271308525,
      "grad_norm": 0.13104887306690216,
      "learning_rate": 1.6200520912886618e-05,
      "loss": 0.3335,
      "step": 5563
    },
    {
      "epoch": 1.6698679471788715,
      "grad_norm": 0.14629746973514557,
      "learning_rate": 1.6171951236549932e-05,
      "loss": 0.4062,
      "step": 5564
    },
    {
      "epoch": 1.6701680672268906,
      "grad_norm": 0.14013619720935822,
      "learning_rate": 1.6143404557477183e-05,
      "loss": 0.3638,
      "step": 5565
    },
    {
      "epoch": 1.6704681872749099,
      "grad_norm": 0.1354597955942154,
      "learning_rate": 1.6114880883499873e-05,
      "loss": 0.3543,
      "step": 5566
    },
    {
      "epoch": 1.6707683073229291,
      "grad_norm": 0.13572631776332855,
      "learning_rate": 1.6086380222443087e-05,
      "loss": 0.3399,
      "step": 5567
    },
    {
      "epoch": 1.6710684273709484,
      "grad_norm": 0.1253531277179718,
      "learning_rate": 1.6057902582125683e-05,
      "loss": 0.3035,
      "step": 5568
    },
    {
      "epoch": 1.6713685474189677,
      "grad_norm": 0.1563360095024109,
      "learning_rate": 1.60294479703602e-05,
      "loss": 0.3735,
      "step": 5569
    },
    {
      "epoch": 1.671668667466987,
      "grad_norm": 0.14220857620239258,
      "learning_rate": 1.6001016394952817e-05,
      "loss": 0.3537,
      "step": 5570
    },
    {
      "epoch": 1.671968787515006,
      "grad_norm": 0.12539853155612946,
      "learning_rate": 1.597260786370337e-05,
      "loss": 0.2969,
      "step": 5571
    },
    {
      "epoch": 1.6722689075630253,
      "grad_norm": 0.13844381272792816,
      "learning_rate": 1.594422238440546e-05,
      "loss": 0.3463,
      "step": 5572
    },
    {
      "epoch": 1.6725690276110443,
      "grad_norm": 0.12984538078308105,
      "learning_rate": 1.5915859964846325e-05,
      "loss": 0.3183,
      "step": 5573
    },
    {
      "epoch": 1.6728691476590636,
      "grad_norm": 0.13644136488437653,
      "learning_rate": 1.5887520612806817e-05,
      "loss": 0.3268,
      "step": 5574
    },
    {
      "epoch": 1.6731692677070829,
      "grad_norm": 0.1338503360748291,
      "learning_rate": 1.5859204336061562e-05,
      "loss": 0.315,
      "step": 5575
    },
    {
      "epoch": 1.6734693877551021,
      "grad_norm": 0.14446158707141876,
      "learning_rate": 1.583091114237878e-05,
      "loss": 0.2993,
      "step": 5576
    },
    {
      "epoch": 1.6737695078031214,
      "grad_norm": 0.1409435272216797,
      "learning_rate": 1.5802641039520415e-05,
      "loss": 0.3283,
      "step": 5577
    },
    {
      "epoch": 1.6740696278511404,
      "grad_norm": 0.13679242134094238,
      "learning_rate": 1.5774394035242035e-05,
      "loss": 0.3384,
      "step": 5578
    },
    {
      "epoch": 1.6743697478991597,
      "grad_norm": 0.12947718799114227,
      "learning_rate": 1.574617013729285e-05,
      "loss": 0.3212,
      "step": 5579
    },
    {
      "epoch": 1.6746698679471788,
      "grad_norm": 0.15234880149364471,
      "learning_rate": 1.5717969353415772e-05,
      "loss": 0.3421,
      "step": 5580
    },
    {
      "epoch": 1.674969987995198,
      "grad_norm": 0.12767057120800018,
      "learning_rate": 1.56897916913474e-05,
      "loss": 0.3232,
      "step": 5581
    },
    {
      "epoch": 1.6752701080432173,
      "grad_norm": 0.1383272111415863,
      "learning_rate": 1.566163715881791e-05,
      "loss": 0.3422,
      "step": 5582
    },
    {
      "epoch": 1.6755702280912366,
      "grad_norm": 0.15944012999534607,
      "learning_rate": 1.5633505763551205e-05,
      "loss": 0.3187,
      "step": 5583
    },
    {
      "epoch": 1.6758703481392558,
      "grad_norm": 0.16072912514209747,
      "learning_rate": 1.5605397513264764e-05,
      "loss": 0.3355,
      "step": 5584
    },
    {
      "epoch": 1.6761704681872749,
      "grad_norm": 0.13186360895633698,
      "learning_rate": 1.5577312415669842e-05,
      "loss": 0.3175,
      "step": 5585
    },
    {
      "epoch": 1.6764705882352942,
      "grad_norm": 0.12840430438518524,
      "learning_rate": 1.5549250478471213e-05,
      "loss": 0.2959,
      "step": 5586
    },
    {
      "epoch": 1.6767707082833132,
      "grad_norm": 0.13190403580665588,
      "learning_rate": 1.5521211709367335e-05,
      "loss": 0.3085,
      "step": 5587
    },
    {
      "epoch": 1.6770708283313325,
      "grad_norm": 0.13660357892513275,
      "learning_rate": 1.5493196116050336e-05,
      "loss": 0.3472,
      "step": 5588
    },
    {
      "epoch": 1.6773709483793517,
      "grad_norm": 0.14554022252559662,
      "learning_rate": 1.5465203706206e-05,
      "loss": 0.3534,
      "step": 5589
    },
    {
      "epoch": 1.677671068427371,
      "grad_norm": 0.1355992555618286,
      "learning_rate": 1.5437234487513687e-05,
      "loss": 0.317,
      "step": 5590
    },
    {
      "epoch": 1.6779711884753903,
      "grad_norm": 0.21639618277549744,
      "learning_rate": 1.5409288467646465e-05,
      "loss": 0.333,
      "step": 5591
    },
    {
      "epoch": 1.6782713085234093,
      "grad_norm": 0.13919563591480255,
      "learning_rate": 1.538136565427096e-05,
      "loss": 0.3193,
      "step": 5592
    },
    {
      "epoch": 1.6785714285714286,
      "grad_norm": 0.15131469070911407,
      "learning_rate": 1.5353466055047504e-05,
      "loss": 0.3236,
      "step": 5593
    },
    {
      "epoch": 1.6788715486194477,
      "grad_norm": 0.13067404925823212,
      "learning_rate": 1.532558967763005e-05,
      "loss": 0.3201,
      "step": 5594
    },
    {
      "epoch": 1.679171668667467,
      "grad_norm": 0.14243102073669434,
      "learning_rate": 1.5297736529666117e-05,
      "loss": 0.3494,
      "step": 5595
    },
    {
      "epoch": 1.6794717887154862,
      "grad_norm": 0.13154637813568115,
      "learning_rate": 1.526990661879695e-05,
      "loss": 0.3085,
      "step": 5596
    },
    {
      "epoch": 1.6797719087635055,
      "grad_norm": 0.16695889830589294,
      "learning_rate": 1.5242099952657307e-05,
      "loss": 0.3541,
      "step": 5597
    },
    {
      "epoch": 1.6800720288115247,
      "grad_norm": 0.13061389327049255,
      "learning_rate": 1.521431653887566e-05,
      "loss": 0.3114,
      "step": 5598
    },
    {
      "epoch": 1.6803721488595438,
      "grad_norm": 0.13496339321136475,
      "learning_rate": 1.5186556385074103e-05,
      "loss": 0.3062,
      "step": 5599
    },
    {
      "epoch": 1.680672268907563,
      "grad_norm": 0.13125289976596832,
      "learning_rate": 1.5158819498868248e-05,
      "loss": 0.3207,
      "step": 5600
    },
    {
      "epoch": 1.680972388955582,
      "grad_norm": 0.14022837579250336,
      "learning_rate": 1.5131105887867425e-05,
      "loss": 0.3347,
      "step": 5601
    },
    {
      "epoch": 1.6812725090036014,
      "grad_norm": 0.13820809125900269,
      "learning_rate": 1.5103415559674561e-05,
      "loss": 0.3541,
      "step": 5602
    },
    {
      "epoch": 1.6815726290516206,
      "grad_norm": 0.14528962969779968,
      "learning_rate": 1.5075748521886179e-05,
      "loss": 0.3441,
      "step": 5603
    },
    {
      "epoch": 1.68187274909964,
      "grad_norm": 0.1386173814535141,
      "learning_rate": 1.5048104782092364e-05,
      "loss": 0.3351,
      "step": 5604
    },
    {
      "epoch": 1.6821728691476592,
      "grad_norm": 0.1344204694032669,
      "learning_rate": 1.5020484347876895e-05,
      "loss": 0.3267,
      "step": 5605
    },
    {
      "epoch": 1.6824729891956782,
      "grad_norm": 0.1447894424200058,
      "learning_rate": 1.4992887226817132e-05,
      "loss": 0.332,
      "step": 5606
    },
    {
      "epoch": 1.6827731092436975,
      "grad_norm": 0.1385362595319748,
      "learning_rate": 1.496531342648403e-05,
      "loss": 0.3419,
      "step": 5607
    },
    {
      "epoch": 1.6830732292917165,
      "grad_norm": 0.14121302962303162,
      "learning_rate": 1.4937762954442136e-05,
      "loss": 0.3506,
      "step": 5608
    },
    {
      "epoch": 1.6833733493397358,
      "grad_norm": 0.1456373929977417,
      "learning_rate": 1.4910235818249552e-05,
      "loss": 0.3536,
      "step": 5609
    },
    {
      "epoch": 1.683673469387755,
      "grad_norm": 0.13301417231559753,
      "learning_rate": 1.4882732025458124e-05,
      "loss": 0.3085,
      "step": 5610
    },
    {
      "epoch": 1.6839735894357744,
      "grad_norm": 0.12184537202119827,
      "learning_rate": 1.4855251583613172e-05,
      "loss": 0.2981,
      "step": 5611
    },
    {
      "epoch": 1.6842737094837936,
      "grad_norm": 0.15292181074619293,
      "learning_rate": 1.48277945002536e-05,
      "loss": 0.3656,
      "step": 5612
    },
    {
      "epoch": 1.6845738295318127,
      "grad_norm": 0.13678281009197235,
      "learning_rate": 1.480036078291197e-05,
      "loss": 0.3061,
      "step": 5613
    },
    {
      "epoch": 1.684873949579832,
      "grad_norm": 0.13338017463684082,
      "learning_rate": 1.4772950439114408e-05,
      "loss": 0.3272,
      "step": 5614
    },
    {
      "epoch": 1.685174069627851,
      "grad_norm": 0.13358749449253082,
      "learning_rate": 1.4745563476380652e-05,
      "loss": 0.2983,
      "step": 5615
    },
    {
      "epoch": 1.6854741896758703,
      "grad_norm": 0.13526228070259094,
      "learning_rate": 1.4718199902223984e-05,
      "loss": 0.3391,
      "step": 5616
    },
    {
      "epoch": 1.6857743097238895,
      "grad_norm": 0.13874565064907074,
      "learning_rate": 1.4690859724151262e-05,
      "loss": 0.3166,
      "step": 5617
    },
    {
      "epoch": 1.6860744297719088,
      "grad_norm": 0.1305008977651596,
      "learning_rate": 1.4663542949662967e-05,
      "loss": 0.2987,
      "step": 5618
    },
    {
      "epoch": 1.686374549819928,
      "grad_norm": 0.13568472862243652,
      "learning_rate": 1.463624958625317e-05,
      "loss": 0.3285,
      "step": 5619
    },
    {
      "epoch": 1.6866746698679473,
      "grad_norm": 0.14798003435134888,
      "learning_rate": 1.4608979641409448e-05,
      "loss": 0.364,
      "step": 5620
    },
    {
      "epoch": 1.6869747899159664,
      "grad_norm": 0.1313888132572174,
      "learning_rate": 1.4581733122613028e-05,
      "loss": 0.3244,
      "step": 5621
    },
    {
      "epoch": 1.6872749099639854,
      "grad_norm": 0.14711931347846985,
      "learning_rate": 1.4554510037338654e-05,
      "loss": 0.3295,
      "step": 5622
    },
    {
      "epoch": 1.6875750300120047,
      "grad_norm": 0.1367296576499939,
      "learning_rate": 1.4527310393054693e-05,
      "loss": 0.3393,
      "step": 5623
    },
    {
      "epoch": 1.687875150060024,
      "grad_norm": 0.13434435427188873,
      "learning_rate": 1.4500134197223058e-05,
      "loss": 0.3123,
      "step": 5624
    },
    {
      "epoch": 1.6881752701080432,
      "grad_norm": 0.1336866021156311,
      "learning_rate": 1.4472981457299195e-05,
      "loss": 0.3313,
      "step": 5625
    },
    {
      "epoch": 1.6884753901560625,
      "grad_norm": 0.14393779635429382,
      "learning_rate": 1.4445852180732167e-05,
      "loss": 0.3551,
      "step": 5626
    },
    {
      "epoch": 1.6887755102040818,
      "grad_norm": 0.14075292646884918,
      "learning_rate": 1.4418746374964598e-05,
      "loss": 0.3383,
      "step": 5627
    },
    {
      "epoch": 1.6890756302521008,
      "grad_norm": 0.13475462794303894,
      "learning_rate": 1.4391664047432618e-05,
      "loss": 0.3221,
      "step": 5628
    },
    {
      "epoch": 1.6893757503001199,
      "grad_norm": 0.1758328527212143,
      "learning_rate": 1.4364605205565984e-05,
      "loss": 0.3569,
      "step": 5629
    },
    {
      "epoch": 1.6896758703481392,
      "grad_norm": 0.1369779109954834,
      "learning_rate": 1.4337569856787958e-05,
      "loss": 0.315,
      "step": 5630
    },
    {
      "epoch": 1.6899759903961584,
      "grad_norm": 0.1335962861776352,
      "learning_rate": 1.4310558008515373e-05,
      "loss": 0.3243,
      "step": 5631
    },
    {
      "epoch": 1.6902761104441777,
      "grad_norm": 0.131963849067688,
      "learning_rate": 1.428356966815867e-05,
      "loss": 0.3198,
      "step": 5632
    },
    {
      "epoch": 1.690576230492197,
      "grad_norm": 0.14859957993030548,
      "learning_rate": 1.4256604843121735e-05,
      "loss": 0.3731,
      "step": 5633
    },
    {
      "epoch": 1.6908763505402162,
      "grad_norm": 0.13696502149105072,
      "learning_rate": 1.4229663540802052e-05,
      "loss": 0.3406,
      "step": 5634
    },
    {
      "epoch": 1.6911764705882353,
      "grad_norm": 0.13967718183994293,
      "learning_rate": 1.4202745768590719e-05,
      "loss": 0.3454,
      "step": 5635
    },
    {
      "epoch": 1.6914765906362546,
      "grad_norm": 0.1300186663866043,
      "learning_rate": 1.4175851533872253e-05,
      "loss": 0.3271,
      "step": 5636
    },
    {
      "epoch": 1.6917767106842736,
      "grad_norm": 0.13645406067371368,
      "learning_rate": 1.414898084402484e-05,
      "loss": 0.3508,
      "step": 5637
    },
    {
      "epoch": 1.6920768307322929,
      "grad_norm": 0.14068691432476044,
      "learning_rate": 1.4122133706420093e-05,
      "loss": 0.3228,
      "step": 5638
    },
    {
      "epoch": 1.6923769507803121,
      "grad_norm": 0.14424894750118256,
      "learning_rate": 1.4095310128423233e-05,
      "loss": 0.3767,
      "step": 5639
    },
    {
      "epoch": 1.6926770708283314,
      "grad_norm": 0.14913184940814972,
      "learning_rate": 1.406851011739303e-05,
      "loss": 0.358,
      "step": 5640
    },
    {
      "epoch": 1.6929771908763507,
      "grad_norm": 0.1272694319486618,
      "learning_rate": 1.4041733680681734e-05,
      "loss": 0.3129,
      "step": 5641
    },
    {
      "epoch": 1.6932773109243697,
      "grad_norm": 0.1338389813899994,
      "learning_rate": 1.4014980825635137e-05,
      "loss": 0.3244,
      "step": 5642
    },
    {
      "epoch": 1.693577430972389,
      "grad_norm": 0.13543696701526642,
      "learning_rate": 1.3988251559592592e-05,
      "loss": 0.3224,
      "step": 5643
    },
    {
      "epoch": 1.693877551020408,
      "grad_norm": 0.1341482698917389,
      "learning_rate": 1.3961545889886973e-05,
      "loss": 0.3345,
      "step": 5644
    },
    {
      "epoch": 1.6941776710684273,
      "grad_norm": 0.17108403146266937,
      "learning_rate": 1.3934863823844702e-05,
      "loss": 0.4254,
      "step": 5645
    },
    {
      "epoch": 1.6944777911164466,
      "grad_norm": 0.14934246242046356,
      "learning_rate": 1.3908205368785654e-05,
      "loss": 0.3827,
      "step": 5646
    },
    {
      "epoch": 1.6947779111644659,
      "grad_norm": 0.12500108778476715,
      "learning_rate": 1.3881570532023246e-05,
      "loss": 0.293,
      "step": 5647
    },
    {
      "epoch": 1.6950780312124851,
      "grad_norm": 0.13049247860908508,
      "learning_rate": 1.3854959320864513e-05,
      "loss": 0.3155,
      "step": 5648
    },
    {
      "epoch": 1.6953781512605042,
      "grad_norm": 0.12995624542236328,
      "learning_rate": 1.3828371742609914e-05,
      "loss": 0.3187,
      "step": 5649
    },
    {
      "epoch": 1.6956782713085234,
      "grad_norm": 0.13610060513019562,
      "learning_rate": 1.3801807804553401e-05,
      "loss": 0.3513,
      "step": 5650
    },
    {
      "epoch": 1.6959783913565425,
      "grad_norm": 0.1441008299589157,
      "learning_rate": 1.3775267513982526e-05,
      "loss": 0.3432,
      "step": 5651
    },
    {
      "epoch": 1.6962785114045618,
      "grad_norm": 0.12790493667125702,
      "learning_rate": 1.374875087817833e-05,
      "loss": 0.32,
      "step": 5652
    },
    {
      "epoch": 1.696578631452581,
      "grad_norm": 0.13147617876529694,
      "learning_rate": 1.3722257904415292e-05,
      "loss": 0.3082,
      "step": 5653
    },
    {
      "epoch": 1.6968787515006003,
      "grad_norm": 0.13431623578071594,
      "learning_rate": 1.3695788599961513e-05,
      "loss": 0.3088,
      "step": 5654
    },
    {
      "epoch": 1.6971788715486196,
      "grad_norm": 0.1450989991426468,
      "learning_rate": 1.3669342972078491e-05,
      "loss": 0.3468,
      "step": 5655
    },
    {
      "epoch": 1.6974789915966386,
      "grad_norm": 0.12794899940490723,
      "learning_rate": 1.3642921028021305e-05,
      "loss": 0.295,
      "step": 5656
    },
    {
      "epoch": 1.697779111644658,
      "grad_norm": 0.1322779357433319,
      "learning_rate": 1.3616522775038543e-05,
      "loss": 0.3225,
      "step": 5657
    },
    {
      "epoch": 1.698079231692677,
      "grad_norm": 0.12647461891174316,
      "learning_rate": 1.3590148220372211e-05,
      "loss": 0.3138,
      "step": 5658
    },
    {
      "epoch": 1.6983793517406962,
      "grad_norm": 0.15429219603538513,
      "learning_rate": 1.3563797371257914e-05,
      "loss": 0.3915,
      "step": 5659
    },
    {
      "epoch": 1.6986794717887155,
      "grad_norm": 0.13390842080116272,
      "learning_rate": 1.3537470234924642e-05,
      "loss": 0.3187,
      "step": 5660
    },
    {
      "epoch": 1.6989795918367347,
      "grad_norm": 0.12282349169254303,
      "learning_rate": 1.3511166818595001e-05,
      "loss": 0.2752,
      "step": 5661
    },
    {
      "epoch": 1.699279711884754,
      "grad_norm": 0.14638248085975647,
      "learning_rate": 1.3484887129485025e-05,
      "loss": 0.3508,
      "step": 5662
    },
    {
      "epoch": 1.699579831932773,
      "grad_norm": 0.13514555990695953,
      "learning_rate": 1.3458631174804204e-05,
      "loss": 0.3368,
      "step": 5663
    },
    {
      "epoch": 1.6998799519807923,
      "grad_norm": 0.14483727514743805,
      "learning_rate": 1.34323989617556e-05,
      "loss": 0.3362,
      "step": 5664
    },
    {
      "epoch": 1.7001800720288114,
      "grad_norm": 0.1352541744709015,
      "learning_rate": 1.340619049753572e-05,
      "loss": 0.3323,
      "step": 5665
    },
    {
      "epoch": 1.7004801920768307,
      "grad_norm": 0.13121896982192993,
      "learning_rate": 1.3380005789334516e-05,
      "loss": 0.317,
      "step": 5666
    },
    {
      "epoch": 1.70078031212485,
      "grad_norm": 0.13836635649204254,
      "learning_rate": 1.3353844844335516e-05,
      "loss": 0.3603,
      "step": 5667
    },
    {
      "epoch": 1.7010804321728692,
      "grad_norm": 0.13154856860637665,
      "learning_rate": 1.3327707669715616e-05,
      "loss": 0.3273,
      "step": 5668
    },
    {
      "epoch": 1.7013805522208885,
      "grad_norm": 0.13235405087471008,
      "learning_rate": 1.330159427264529e-05,
      "loss": 0.3131,
      "step": 5669
    },
    {
      "epoch": 1.7016806722689075,
      "grad_norm": 0.7332162857055664,
      "learning_rate": 1.3275504660288462e-05,
      "loss": 0.2848,
      "step": 5670
    },
    {
      "epoch": 1.7019807923169268,
      "grad_norm": 0.1268726885318756,
      "learning_rate": 1.3249438839802497e-05,
      "loss": 0.2979,
      "step": 5671
    },
    {
      "epoch": 1.7022809123649458,
      "grad_norm": 0.17241127789020538,
      "learning_rate": 1.3223396818338207e-05,
      "loss": 0.3856,
      "step": 5672
    },
    {
      "epoch": 1.702581032412965,
      "grad_norm": 0.13850437104701996,
      "learning_rate": 1.3197378603040011e-05,
      "loss": 0.3415,
      "step": 5673
    },
    {
      "epoch": 1.7028811524609844,
      "grad_norm": 0.13507312536239624,
      "learning_rate": 1.3171384201045655e-05,
      "loss": 0.3255,
      "step": 5674
    },
    {
      "epoch": 1.7031812725090036,
      "grad_norm": 0.12530501186847687,
      "learning_rate": 1.3145413619486425e-05,
      "loss": 0.2889,
      "step": 5675
    },
    {
      "epoch": 1.703481392557023,
      "grad_norm": 0.13460050523281097,
      "learning_rate": 1.311946686548703e-05,
      "loss": 0.3323,
      "step": 5676
    },
    {
      "epoch": 1.7037815126050422,
      "grad_norm": 0.13420239090919495,
      "learning_rate": 1.3093543946165665e-05,
      "loss": 0.3217,
      "step": 5677
    },
    {
      "epoch": 1.7040816326530612,
      "grad_norm": 0.13585264980793,
      "learning_rate": 1.3067644868634033e-05,
      "loss": 0.3271,
      "step": 5678
    },
    {
      "epoch": 1.7043817527010803,
      "grad_norm": 0.14396932721138,
      "learning_rate": 1.3041769639997203e-05,
      "loss": 0.397,
      "step": 5679
    },
    {
      "epoch": 1.7046818727490995,
      "grad_norm": 0.1304837018251419,
      "learning_rate": 1.3015918267353743e-05,
      "loss": 0.3261,
      "step": 5680
    },
    {
      "epoch": 1.7049819927971188,
      "grad_norm": 0.13240686058998108,
      "learning_rate": 1.2990090757795692e-05,
      "loss": 0.3205,
      "step": 5681
    },
    {
      "epoch": 1.705282112845138,
      "grad_norm": 0.13736537098884583,
      "learning_rate": 1.2964287118408558e-05,
      "loss": 0.3205,
      "step": 5682
    },
    {
      "epoch": 1.7055822328931574,
      "grad_norm": 0.15131819248199463,
      "learning_rate": 1.2938507356271235e-05,
      "loss": 0.3682,
      "step": 5683
    },
    {
      "epoch": 1.7058823529411766,
      "grad_norm": 0.13606952130794525,
      "learning_rate": 1.2912751478456142e-05,
      "loss": 0.3338,
      "step": 5684
    },
    {
      "epoch": 1.7061824729891957,
      "grad_norm": 0.13007688522338867,
      "learning_rate": 1.288701949202904e-05,
      "loss": 0.3257,
      "step": 5685
    },
    {
      "epoch": 1.7064825930372147,
      "grad_norm": 0.13017094135284424,
      "learning_rate": 1.2861311404049292e-05,
      "loss": 0.3186,
      "step": 5686
    },
    {
      "epoch": 1.706782713085234,
      "grad_norm": 0.13054896891117096,
      "learning_rate": 1.2835627221569579e-05,
      "loss": 0.3057,
      "step": 5687
    },
    {
      "epoch": 1.7070828331332533,
      "grad_norm": 0.1377072036266327,
      "learning_rate": 1.2809966951636032e-05,
      "loss": 0.3688,
      "step": 5688
    },
    {
      "epoch": 1.7073829531812725,
      "grad_norm": 0.13601499795913696,
      "learning_rate": 1.2784330601288297e-05,
      "loss": 0.3423,
      "step": 5689
    },
    {
      "epoch": 1.7076830732292918,
      "grad_norm": 0.12117687612771988,
      "learning_rate": 1.2758718177559403e-05,
      "loss": 0.2921,
      "step": 5690
    },
    {
      "epoch": 1.707983193277311,
      "grad_norm": 0.14201779663562775,
      "learning_rate": 1.2733129687475797e-05,
      "loss": 0.3598,
      "step": 5691
    },
    {
      "epoch": 1.7082833133253301,
      "grad_norm": 0.13235652446746826,
      "learning_rate": 1.2707565138057432e-05,
      "loss": 0.2976,
      "step": 5692
    },
    {
      "epoch": 1.7085834333733494,
      "grad_norm": 0.1324567198753357,
      "learning_rate": 1.2682024536317605e-05,
      "loss": 0.3467,
      "step": 5693
    },
    {
      "epoch": 1.7088835534213684,
      "grad_norm": 0.1447012722492218,
      "learning_rate": 1.2656507889263114e-05,
      "loss": 0.3083,
      "step": 5694
    },
    {
      "epoch": 1.7091836734693877,
      "grad_norm": 0.1385689079761505,
      "learning_rate": 1.2631015203894159e-05,
      "loss": 0.3418,
      "step": 5695
    },
    {
      "epoch": 1.709483793517407,
      "grad_norm": 0.13922815024852753,
      "learning_rate": 1.2605546487204345e-05,
      "loss": 0.3214,
      "step": 5696
    },
    {
      "epoch": 1.7097839135654262,
      "grad_norm": 0.15285637974739075,
      "learning_rate": 1.2580101746180738e-05,
      "loss": 0.393,
      "step": 5697
    },
    {
      "epoch": 1.7100840336134455,
      "grad_norm": 0.153276264667511,
      "learning_rate": 1.2554680987803823e-05,
      "loss": 0.3278,
      "step": 5698
    },
    {
      "epoch": 1.7103841536614646,
      "grad_norm": 0.14622247219085693,
      "learning_rate": 1.2529284219047465e-05,
      "loss": 0.3835,
      "step": 5699
    },
    {
      "epoch": 1.7106842737094838,
      "grad_norm": 0.1340278536081314,
      "learning_rate": 1.2503911446879014e-05,
      "loss": 0.3435,
      "step": 5700
    },
    {
      "epoch": 1.7109843937575029,
      "grad_norm": 0.1415686458349228,
      "learning_rate": 1.2478562678259153e-05,
      "loss": 0.3351,
      "step": 5701
    },
    {
      "epoch": 1.7112845138055222,
      "grad_norm": 0.13266758620738983,
      "learning_rate": 1.2453237920142047e-05,
      "loss": 0.313,
      "step": 5702
    },
    {
      "epoch": 1.7115846338535414,
      "grad_norm": 0.14347507059574127,
      "learning_rate": 1.242793717947528e-05,
      "loss": 0.3406,
      "step": 5703
    },
    {
      "epoch": 1.7118847539015607,
      "grad_norm": 0.1388009488582611,
      "learning_rate": 1.2402660463199767e-05,
      "loss": 0.3525,
      "step": 5704
    },
    {
      "epoch": 1.71218487394958,
      "grad_norm": 0.1338505744934082,
      "learning_rate": 1.2377407778249939e-05,
      "loss": 0.3172,
      "step": 5705
    },
    {
      "epoch": 1.712484993997599,
      "grad_norm": 0.133287712931633,
      "learning_rate": 1.2352179131553532e-05,
      "loss": 0.3187,
      "step": 5706
    },
    {
      "epoch": 1.7127851140456183,
      "grad_norm": 0.1391024887561798,
      "learning_rate": 1.2326974530031766e-05,
      "loss": 0.3381,
      "step": 5707
    },
    {
      "epoch": 1.7130852340936373,
      "grad_norm": 0.15472294390201569,
      "learning_rate": 1.230179398059924e-05,
      "loss": 0.4018,
      "step": 5708
    },
    {
      "epoch": 1.7133853541416566,
      "grad_norm": 0.13657771050930023,
      "learning_rate": 1.2276637490163945e-05,
      "loss": 0.3345,
      "step": 5709
    },
    {
      "epoch": 1.7136854741896759,
      "grad_norm": 0.13321413099765778,
      "learning_rate": 1.2251505065627211e-05,
      "loss": 0.3282,
      "step": 5710
    },
    {
      "epoch": 1.7139855942376951,
      "grad_norm": 0.14870762825012207,
      "learning_rate": 1.2226396713883936e-05,
      "loss": 0.3839,
      "step": 5711
    },
    {
      "epoch": 1.7142857142857144,
      "grad_norm": 0.1392969787120819,
      "learning_rate": 1.2201312441822266e-05,
      "loss": 0.3219,
      "step": 5712
    },
    {
      "epoch": 1.7145858343337335,
      "grad_norm": 0.14034144580364227,
      "learning_rate": 1.217625225632375e-05,
      "loss": 0.3757,
      "step": 5713
    },
    {
      "epoch": 1.7148859543817527,
      "grad_norm": 0.13876692950725555,
      "learning_rate": 1.215121616426339e-05,
      "loss": 0.3196,
      "step": 5714
    },
    {
      "epoch": 1.7151860744297718,
      "grad_norm": 0.14527247846126556,
      "learning_rate": 1.2126204172509547e-05,
      "loss": 0.3407,
      "step": 5715
    },
    {
      "epoch": 1.715486194477791,
      "grad_norm": 0.17731516063213348,
      "learning_rate": 1.2101216287924e-05,
      "loss": 0.4293,
      "step": 5716
    },
    {
      "epoch": 1.7157863145258103,
      "grad_norm": 0.14165017008781433,
      "learning_rate": 1.2076252517361863e-05,
      "loss": 0.3579,
      "step": 5717
    },
    {
      "epoch": 1.7160864345738296,
      "grad_norm": 0.19433917105197906,
      "learning_rate": 1.2051312867671637e-05,
      "loss": 0.4835,
      "step": 5718
    },
    {
      "epoch": 1.7163865546218489,
      "grad_norm": 0.17780497670173645,
      "learning_rate": 1.2026397345695261e-05,
      "loss": 0.3634,
      "step": 5719
    },
    {
      "epoch": 1.716686674669868,
      "grad_norm": 0.13712306320667267,
      "learning_rate": 1.2001505958268045e-05,
      "loss": 0.3226,
      "step": 5720
    },
    {
      "epoch": 1.7169867947178872,
      "grad_norm": 0.15281295776367188,
      "learning_rate": 1.1976638712218591e-05,
      "loss": 0.3207,
      "step": 5721
    },
    {
      "epoch": 1.7172869147659062,
      "grad_norm": 0.13083398342132568,
      "learning_rate": 1.1951795614368988e-05,
      "loss": 0.3064,
      "step": 5722
    },
    {
      "epoch": 1.7175870348139255,
      "grad_norm": 0.1333472579717636,
      "learning_rate": 1.1926976671534662e-05,
      "loss": 0.3406,
      "step": 5723
    },
    {
      "epoch": 1.7178871548619448,
      "grad_norm": 0.1443636268377304,
      "learning_rate": 1.1902181890524378e-05,
      "loss": 0.3642,
      "step": 5724
    },
    {
      "epoch": 1.718187274909964,
      "grad_norm": 0.12199776619672775,
      "learning_rate": 1.1877411278140327e-05,
      "loss": 0.2984,
      "step": 5725
    },
    {
      "epoch": 1.7184873949579833,
      "grad_norm": 0.13965708017349243,
      "learning_rate": 1.1852664841177995e-05,
      "loss": 0.3693,
      "step": 5726
    },
    {
      "epoch": 1.7187875150060024,
      "grad_norm": 0.12849895656108856,
      "learning_rate": 1.1827942586426333e-05,
      "loss": 0.3135,
      "step": 5727
    },
    {
      "epoch": 1.7190876350540216,
      "grad_norm": 0.1383999139070511,
      "learning_rate": 1.18032445206676e-05,
      "loss": 0.3667,
      "step": 5728
    },
    {
      "epoch": 1.7193877551020407,
      "grad_norm": 0.13418391346931458,
      "learning_rate": 1.177857065067739e-05,
      "loss": 0.3105,
      "step": 5729
    },
    {
      "epoch": 1.71968787515006,
      "grad_norm": 0.12100134044885635,
      "learning_rate": 1.1753920983224753e-05,
      "loss": 0.3037,
      "step": 5730
    },
    {
      "epoch": 1.7199879951980792,
      "grad_norm": 0.1342541128396988,
      "learning_rate": 1.1729295525071993e-05,
      "loss": 0.3378,
      "step": 5731
    },
    {
      "epoch": 1.7202881152460985,
      "grad_norm": 0.14842109382152557,
      "learning_rate": 1.1704694282974838e-05,
      "loss": 0.34,
      "step": 5732
    },
    {
      "epoch": 1.7205882352941178,
      "grad_norm": 0.13383586704730988,
      "learning_rate": 1.1680117263682388e-05,
      "loss": 0.3363,
      "step": 5733
    },
    {
      "epoch": 1.7208883553421368,
      "grad_norm": 0.13726623356342316,
      "learning_rate": 1.1655564473937008e-05,
      "loss": 0.3484,
      "step": 5734
    },
    {
      "epoch": 1.721188475390156,
      "grad_norm": 0.14171043038368225,
      "learning_rate": 1.163103592047452e-05,
      "loss": 0.3593,
      "step": 5735
    },
    {
      "epoch": 1.7214885954381751,
      "grad_norm": 0.14122125506401062,
      "learning_rate": 1.1606531610024041e-05,
      "loss": 0.3486,
      "step": 5736
    },
    {
      "epoch": 1.7217887154861944,
      "grad_norm": 0.12228088825941086,
      "learning_rate": 1.1582051549308037e-05,
      "loss": 0.297,
      "step": 5737
    },
    {
      "epoch": 1.7220888355342137,
      "grad_norm": 0.18381671607494354,
      "learning_rate": 1.155759574504235e-05,
      "loss": 0.3073,
      "step": 5738
    },
    {
      "epoch": 1.722388955582233,
      "grad_norm": 0.13159750401973724,
      "learning_rate": 1.153316420393612e-05,
      "loss": 0.3465,
      "step": 5739
    },
    {
      "epoch": 1.7226890756302522,
      "grad_norm": 0.12137060612440109,
      "learning_rate": 1.1508756932691878e-05,
      "loss": 0.2972,
      "step": 5740
    },
    {
      "epoch": 1.7229891956782715,
      "grad_norm": 0.14963874220848083,
      "learning_rate": 1.14843739380055e-05,
      "loss": 0.3397,
      "step": 5741
    },
    {
      "epoch": 1.7232893157262905,
      "grad_norm": 0.13432660698890686,
      "learning_rate": 1.1460015226566168e-05,
      "loss": 0.3184,
      "step": 5742
    },
    {
      "epoch": 1.7235894357743096,
      "grad_norm": 0.13611102104187012,
      "learning_rate": 1.143568080505637e-05,
      "loss": 0.3438,
      "step": 5743
    },
    {
      "epoch": 1.7238895558223288,
      "grad_norm": 0.1286766082048416,
      "learning_rate": 1.1411370680152022e-05,
      "loss": 0.3213,
      "step": 5744
    },
    {
      "epoch": 1.724189675870348,
      "grad_norm": 0.12776191532611847,
      "learning_rate": 1.1387084858522323e-05,
      "loss": 0.3253,
      "step": 5745
    },
    {
      "epoch": 1.7244897959183674,
      "grad_norm": 0.1348733901977539,
      "learning_rate": 1.1362823346829821e-05,
      "loss": 0.3155,
      "step": 5746
    },
    {
      "epoch": 1.7247899159663866,
      "grad_norm": 0.13117042183876038,
      "learning_rate": 1.1338586151730345e-05,
      "loss": 0.3278,
      "step": 5747
    },
    {
      "epoch": 1.725090036014406,
      "grad_norm": 0.13857294619083405,
      "learning_rate": 1.1314373279873114e-05,
      "loss": 0.3598,
      "step": 5748
    },
    {
      "epoch": 1.725390156062425,
      "grad_norm": 0.1337532252073288,
      "learning_rate": 1.1290184737900677e-05,
      "loss": 0.3278,
      "step": 5749
    },
    {
      "epoch": 1.7256902761104442,
      "grad_norm": 0.12995721399784088,
      "learning_rate": 1.1266020532448863e-05,
      "loss": 0.315,
      "step": 5750
    },
    {
      "epoch": 1.7259903961584633,
      "grad_norm": 0.14016960561275482,
      "learning_rate": 1.124188067014681e-05,
      "loss": 0.3571,
      "step": 5751
    },
    {
      "epoch": 1.7262905162064826,
      "grad_norm": 0.14358165860176086,
      "learning_rate": 1.1217765157617055e-05,
      "loss": 0.3646,
      "step": 5752
    },
    {
      "epoch": 1.7265906362545018,
      "grad_norm": 0.1202707290649414,
      "learning_rate": 1.1193674001475408e-05,
      "loss": 0.2831,
      "step": 5753
    },
    {
      "epoch": 1.726890756302521,
      "grad_norm": 0.12729522585868835,
      "learning_rate": 1.1169607208330979e-05,
      "loss": 0.3072,
      "step": 5754
    },
    {
      "epoch": 1.7271908763505404,
      "grad_norm": 0.140628382563591,
      "learning_rate": 1.1145564784786245e-05,
      "loss": 0.3378,
      "step": 5755
    },
    {
      "epoch": 1.7274909963985594,
      "grad_norm": 0.13112959265708923,
      "learning_rate": 1.112154673743694e-05,
      "loss": 0.3419,
      "step": 5756
    },
    {
      "epoch": 1.7277911164465787,
      "grad_norm": 0.13791124522686005,
      "learning_rate": 1.1097553072872157e-05,
      "loss": 0.3393,
      "step": 5757
    },
    {
      "epoch": 1.7280912364945977,
      "grad_norm": 0.15736477077007294,
      "learning_rate": 1.1073583797674291e-05,
      "loss": 0.3615,
      "step": 5758
    },
    {
      "epoch": 1.728391356542617,
      "grad_norm": 0.14608369767665863,
      "learning_rate": 1.1049638918419025e-05,
      "loss": 0.3666,
      "step": 5759
    },
    {
      "epoch": 1.7286914765906363,
      "grad_norm": 0.13242143392562866,
      "learning_rate": 1.1025718441675348e-05,
      "loss": 0.3433,
      "step": 5760
    },
    {
      "epoch": 1.7289915966386555,
      "grad_norm": 0.1380014419555664,
      "learning_rate": 1.1001822374005611e-05,
      "loss": 0.3624,
      "step": 5761
    },
    {
      "epoch": 1.7292917166866748,
      "grad_norm": 0.12307605892419815,
      "learning_rate": 1.097795072196538e-05,
      "loss": 0.3035,
      "step": 5762
    },
    {
      "epoch": 1.7295918367346939,
      "grad_norm": 0.14597541093826294,
      "learning_rate": 1.0954103492103619e-05,
      "loss": 0.3854,
      "step": 5763
    },
    {
      "epoch": 1.7298919567827131,
      "grad_norm": 0.17674827575683594,
      "learning_rate": 1.093028069096248e-05,
      "loss": 0.3735,
      "step": 5764
    },
    {
      "epoch": 1.7301920768307322,
      "grad_norm": 0.12740208208560944,
      "learning_rate": 1.0906482325077517e-05,
      "loss": 0.3211,
      "step": 5765
    },
    {
      "epoch": 1.7304921968787514,
      "grad_norm": 0.13835439085960388,
      "learning_rate": 1.0882708400977537e-05,
      "loss": 0.3217,
      "step": 5766
    },
    {
      "epoch": 1.7307923169267707,
      "grad_norm": 0.13297055661678314,
      "learning_rate": 1.0858958925184626e-05,
      "loss": 0.3256,
      "step": 5767
    },
    {
      "epoch": 1.73109243697479,
      "grad_norm": 0.13225091993808746,
      "learning_rate": 1.0835233904214215e-05,
      "loss": 0.2926,
      "step": 5768
    },
    {
      "epoch": 1.7313925570228093,
      "grad_norm": 0.14690400660037994,
      "learning_rate": 1.0811533344574943e-05,
      "loss": 0.3783,
      "step": 5769
    },
    {
      "epoch": 1.7316926770708283,
      "grad_norm": 0.15814612805843353,
      "learning_rate": 1.0787857252768807e-05,
      "loss": 0.3502,
      "step": 5770
    },
    {
      "epoch": 1.7319927971188476,
      "grad_norm": 0.13790132105350494,
      "learning_rate": 1.0764205635291092e-05,
      "loss": 0.3314,
      "step": 5771
    },
    {
      "epoch": 1.7322929171668666,
      "grad_norm": 0.14764297008514404,
      "learning_rate": 1.0740578498630339e-05,
      "loss": 0.3743,
      "step": 5772
    },
    {
      "epoch": 1.732593037214886,
      "grad_norm": 0.1338595747947693,
      "learning_rate": 1.0716975849268329e-05,
      "loss": 0.3228,
      "step": 5773
    },
    {
      "epoch": 1.7328931572629052,
      "grad_norm": 0.1401987373828888,
      "learning_rate": 1.0693397693680263e-05,
      "loss": 0.3548,
      "step": 5774
    },
    {
      "epoch": 1.7331932773109244,
      "grad_norm": 0.1693205088376999,
      "learning_rate": 1.0669844038334476e-05,
      "loss": 0.3243,
      "step": 5775
    },
    {
      "epoch": 1.7334933973589437,
      "grad_norm": 0.13423842191696167,
      "learning_rate": 1.0646314889692688e-05,
      "loss": 0.34,
      "step": 5776
    },
    {
      "epoch": 1.7337935174069627,
      "grad_norm": 0.13911528885364532,
      "learning_rate": 1.0622810254209814e-05,
      "loss": 0.3614,
      "step": 5777
    },
    {
      "epoch": 1.734093637454982,
      "grad_norm": 0.15006420016288757,
      "learning_rate": 1.0599330138334084e-05,
      "loss": 0.3359,
      "step": 5778
    },
    {
      "epoch": 1.734393757503001,
      "grad_norm": 0.1311221867799759,
      "learning_rate": 1.0575874548507036e-05,
      "loss": 0.3401,
      "step": 5779
    },
    {
      "epoch": 1.7346938775510203,
      "grad_norm": 0.12324915081262589,
      "learning_rate": 1.0552443491163422e-05,
      "loss": 0.3074,
      "step": 5780
    },
    {
      "epoch": 1.7349939975990396,
      "grad_norm": 0.140946164727211,
      "learning_rate": 1.0529036972731255e-05,
      "loss": 0.3578,
      "step": 5781
    },
    {
      "epoch": 1.7352941176470589,
      "grad_norm": 0.13226085901260376,
      "learning_rate": 1.0505654999631865e-05,
      "loss": 0.3147,
      "step": 5782
    },
    {
      "epoch": 1.7355942376950781,
      "grad_norm": 0.13607388734817505,
      "learning_rate": 1.0482297578279854e-05,
      "loss": 0.3367,
      "step": 5783
    },
    {
      "epoch": 1.7358943577430972,
      "grad_norm": 0.1267491728067398,
      "learning_rate": 1.045896471508302e-05,
      "loss": 0.3052,
      "step": 5784
    },
    {
      "epoch": 1.7361944777911165,
      "grad_norm": 0.11670931428670883,
      "learning_rate": 1.0435656416442485e-05,
      "loss": 0.2676,
      "step": 5785
    },
    {
      "epoch": 1.7364945978391355,
      "grad_norm": 0.13407576084136963,
      "learning_rate": 1.0412372688752614e-05,
      "loss": 0.3232,
      "step": 5786
    },
    {
      "epoch": 1.7367947178871548,
      "grad_norm": 0.12784159183502197,
      "learning_rate": 1.0389113538401052e-05,
      "loss": 0.3163,
      "step": 5787
    },
    {
      "epoch": 1.737094837935174,
      "grad_norm": 0.1374209225177765,
      "learning_rate": 1.036587897176865e-05,
      "loss": 0.334,
      "step": 5788
    },
    {
      "epoch": 1.7373949579831933,
      "grad_norm": 0.1251617819070816,
      "learning_rate": 1.0342668995229555e-05,
      "loss": 0.302,
      "step": 5789
    },
    {
      "epoch": 1.7376950780312126,
      "grad_norm": 0.13529962301254272,
      "learning_rate": 1.0319483615151137e-05,
      "loss": 0.3428,
      "step": 5790
    },
    {
      "epoch": 1.7379951980792316,
      "grad_norm": 0.1410738229751587,
      "learning_rate": 1.029632283789409e-05,
      "loss": 0.3559,
      "step": 5791
    },
    {
      "epoch": 1.738295318127251,
      "grad_norm": 0.1510618031024933,
      "learning_rate": 1.0273186669812262e-05,
      "loss": 0.3485,
      "step": 5792
    },
    {
      "epoch": 1.73859543817527,
      "grad_norm": 0.13779668509960175,
      "learning_rate": 1.0250075117252821e-05,
      "loss": 0.333,
      "step": 5793
    },
    {
      "epoch": 1.7388955582232892,
      "grad_norm": 0.15373767912387848,
      "learning_rate": 1.022698818655612e-05,
      "loss": 0.314,
      "step": 5794
    },
    {
      "epoch": 1.7391956782713085,
      "grad_norm": 0.14197716116905212,
      "learning_rate": 1.0203925884055853e-05,
      "loss": 0.3448,
      "step": 5795
    },
    {
      "epoch": 1.7394957983193278,
      "grad_norm": 0.13314983248710632,
      "learning_rate": 1.0180888216078865e-05,
      "loss": 0.3419,
      "step": 5796
    },
    {
      "epoch": 1.739795918367347,
      "grad_norm": 0.1329575926065445,
      "learning_rate": 1.0157875188945254e-05,
      "loss": 0.3018,
      "step": 5797
    },
    {
      "epoch": 1.7400960384153663,
      "grad_norm": 0.17678740620613098,
      "learning_rate": 1.0134886808968403e-05,
      "loss": 0.3794,
      "step": 5798
    },
    {
      "epoch": 1.7403961584633854,
      "grad_norm": 0.138626828789711,
      "learning_rate": 1.0111923082454932e-05,
      "loss": 0.3211,
      "step": 5799
    },
    {
      "epoch": 1.7406962785114044,
      "grad_norm": 0.12878531217575073,
      "learning_rate": 1.0088984015704629e-05,
      "loss": 0.3038,
      "step": 5800
    },
    {
      "epoch": 1.7409963985594237,
      "grad_norm": 0.13110844790935516,
      "learning_rate": 1.006606961501061e-05,
      "loss": 0.3051,
      "step": 5801
    },
    {
      "epoch": 1.741296518607443,
      "grad_norm": 0.15521807968616486,
      "learning_rate": 1.0043179886659137e-05,
      "loss": 0.4017,
      "step": 5802
    },
    {
      "epoch": 1.7415966386554622,
      "grad_norm": 0.15013937652111053,
      "learning_rate": 1.0020314836929778e-05,
      "loss": 0.3396,
      "step": 5803
    },
    {
      "epoch": 1.7418967587034815,
      "grad_norm": 0.1314486414194107,
      "learning_rate": 9.997474472095291e-06,
      "loss": 0.3179,
      "step": 5804
    },
    {
      "epoch": 1.7421968787515008,
      "grad_norm": 0.14400826394557953,
      "learning_rate": 9.974658798421643e-06,
      "loss": 0.3734,
      "step": 5805
    },
    {
      "epoch": 1.7424969987995198,
      "grad_norm": 0.14358164370059967,
      "learning_rate": 9.951867822168082e-06,
      "loss": 0.3622,
      "step": 5806
    },
    {
      "epoch": 1.7427971188475389,
      "grad_norm": 0.13000699877738953,
      "learning_rate": 9.929101549587027e-06,
      "loss": 0.3424,
      "step": 5807
    },
    {
      "epoch": 1.7430972388955581,
      "grad_norm": 0.13931672275066376,
      "learning_rate": 9.906359986924164e-06,
      "loss": 0.3369,
      "step": 5808
    },
    {
      "epoch": 1.7433973589435774,
      "grad_norm": 0.1330275535583496,
      "learning_rate": 9.883643140418387e-06,
      "loss": 0.3279,
      "step": 5809
    },
    {
      "epoch": 1.7436974789915967,
      "grad_norm": 0.15318936109542847,
      "learning_rate": 9.860951016301756e-06,
      "loss": 0.3555,
      "step": 5810
    },
    {
      "epoch": 1.743997599039616,
      "grad_norm": 0.12491942197084427,
      "learning_rate": 9.838283620799638e-06,
      "loss": 0.2907,
      "step": 5811
    },
    {
      "epoch": 1.7442977190876352,
      "grad_norm": 0.13773776590824127,
      "learning_rate": 9.81564096013058e-06,
      "loss": 0.3346,
      "step": 5812
    },
    {
      "epoch": 1.7445978391356542,
      "grad_norm": 0.14335572719573975,
      "learning_rate": 9.793023040506322e-06,
      "loss": 0.351,
      "step": 5813
    },
    {
      "epoch": 1.7448979591836735,
      "grad_norm": 0.1270056813955307,
      "learning_rate": 9.770429868131803e-06,
      "loss": 0.2976,
      "step": 5814
    },
    {
      "epoch": 1.7451980792316926,
      "grad_norm": 0.14624319970607758,
      "learning_rate": 9.74786144920522e-06,
      "loss": 0.3517,
      "step": 5815
    },
    {
      "epoch": 1.7454981992797118,
      "grad_norm": 0.14200744032859802,
      "learning_rate": 9.725317789917964e-06,
      "loss": 0.3661,
      "step": 5816
    },
    {
      "epoch": 1.745798319327731,
      "grad_norm": 0.13641224801540375,
      "learning_rate": 9.702798896454658e-06,
      "loss": 0.324,
      "step": 5817
    },
    {
      "epoch": 1.7460984393757504,
      "grad_norm": 0.14655904471874237,
      "learning_rate": 9.680304774993065e-06,
      "loss": 0.3712,
      "step": 5818
    },
    {
      "epoch": 1.7463985594237696,
      "grad_norm": 0.1340646892786026,
      "learning_rate": 9.657835431704165e-06,
      "loss": 0.3474,
      "step": 5819
    },
    {
      "epoch": 1.7466986794717887,
      "grad_norm": 0.15028226375579834,
      "learning_rate": 9.635390872752237e-06,
      "loss": 0.3052,
      "step": 5820
    },
    {
      "epoch": 1.746998799519808,
      "grad_norm": 0.1361413598060608,
      "learning_rate": 9.612971104294655e-06,
      "loss": 0.3421,
      "step": 5821
    },
    {
      "epoch": 1.747298919567827,
      "grad_norm": 0.1370229870080948,
      "learning_rate": 9.590576132481988e-06,
      "loss": 0.3645,
      "step": 5822
    },
    {
      "epoch": 1.7475990396158463,
      "grad_norm": 0.12540948390960693,
      "learning_rate": 9.568205963458076e-06,
      "loss": 0.3009,
      "step": 5823
    },
    {
      "epoch": 1.7478991596638656,
      "grad_norm": 0.13558447360992432,
      "learning_rate": 9.545860603359924e-06,
      "loss": 0.3289,
      "step": 5824
    },
    {
      "epoch": 1.7481992797118848,
      "grad_norm": 0.12754158675670624,
      "learning_rate": 9.523540058317726e-06,
      "loss": 0.3161,
      "step": 5825
    },
    {
      "epoch": 1.748499399759904,
      "grad_norm": 0.16585437953472137,
      "learning_rate": 9.50124433445485e-06,
      "loss": 0.3261,
      "step": 5826
    },
    {
      "epoch": 1.7487995198079231,
      "grad_norm": 0.13536782562732697,
      "learning_rate": 9.478973437887873e-06,
      "loss": 0.334,
      "step": 5827
    },
    {
      "epoch": 1.7490996398559424,
      "grad_norm": 0.134988933801651,
      "learning_rate": 9.456727374726559e-06,
      "loss": 0.3285,
      "step": 5828
    },
    {
      "epoch": 1.7493997599039615,
      "grad_norm": 0.15050005912780762,
      "learning_rate": 9.434506151073885e-06,
      "loss": 0.3595,
      "step": 5829
    },
    {
      "epoch": 1.7496998799519807,
      "grad_norm": 0.13214461505413055,
      "learning_rate": 9.412309773025952e-06,
      "loss": 0.3347,
      "step": 5830
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.14154385030269623,
      "learning_rate": 9.390138246672131e-06,
      "loss": 0.3509,
      "step": 5831
    },
    {
      "epoch": 1.7503001200480193,
      "grad_norm": 0.14709118008613586,
      "learning_rate": 9.36799157809487e-06,
      "loss": 0.3898,
      "step": 5832
    },
    {
      "epoch": 1.7506002400960385,
      "grad_norm": 0.13362213969230652,
      "learning_rate": 9.345869773369875e-06,
      "loss": 0.3212,
      "step": 5833
    },
    {
      "epoch": 1.7509003601440576,
      "grad_norm": 0.1537594348192215,
      "learning_rate": 9.323772838566037e-06,
      "loss": 0.365,
      "step": 5834
    },
    {
      "epoch": 1.7512004801920769,
      "grad_norm": 0.13459311425685883,
      "learning_rate": 9.301700779745359e-06,
      "loss": 0.3054,
      "step": 5835
    },
    {
      "epoch": 1.751500600240096,
      "grad_norm": 0.12368548661470413,
      "learning_rate": 9.279653602963068e-06,
      "loss": 0.3151,
      "step": 5836
    },
    {
      "epoch": 1.7518007202881152,
      "grad_norm": 0.13498592376708984,
      "learning_rate": 9.25763131426758e-06,
      "loss": 0.3331,
      "step": 5837
    },
    {
      "epoch": 1.7521008403361344,
      "grad_norm": 0.13990454375743866,
      "learning_rate": 9.235633919700414e-06,
      "loss": 0.3613,
      "step": 5838
    },
    {
      "epoch": 1.7524009603841537,
      "grad_norm": 0.13571259379386902,
      "learning_rate": 9.213661425296338e-06,
      "loss": 0.3605,
      "step": 5839
    },
    {
      "epoch": 1.752701080432173,
      "grad_norm": 0.1423293650150299,
      "learning_rate": 9.191713837083238e-06,
      "loss": 0.3442,
      "step": 5840
    },
    {
      "epoch": 1.753001200480192,
      "grad_norm": 0.12556971609592438,
      "learning_rate": 9.169791161082175e-06,
      "loss": 0.297,
      "step": 5841
    },
    {
      "epoch": 1.7533013205282113,
      "grad_norm": 0.14545780420303345,
      "learning_rate": 9.147893403307418e-06,
      "loss": 0.323,
      "step": 5842
    },
    {
      "epoch": 1.7536014405762304,
      "grad_norm": 0.14192785322666168,
      "learning_rate": 9.126020569766336e-06,
      "loss": 0.3551,
      "step": 5843
    },
    {
      "epoch": 1.7539015606242496,
      "grad_norm": 0.16282089054584503,
      "learning_rate": 9.104172666459453e-06,
      "loss": 0.3321,
      "step": 5844
    },
    {
      "epoch": 1.754201680672269,
      "grad_norm": 0.13829950988292694,
      "learning_rate": 9.082349699380588e-06,
      "loss": 0.3415,
      "step": 5845
    },
    {
      "epoch": 1.7545018007202882,
      "grad_norm": 0.1373262256383896,
      "learning_rate": 9.060551674516538e-06,
      "loss": 0.3403,
      "step": 5846
    },
    {
      "epoch": 1.7548019207683074,
      "grad_norm": 0.14032645523548126,
      "learning_rate": 9.038778597847398e-06,
      "loss": 0.3585,
      "step": 5847
    },
    {
      "epoch": 1.7551020408163265,
      "grad_norm": 0.13157224655151367,
      "learning_rate": 9.01703047534631e-06,
      "loss": 0.3272,
      "step": 5848
    },
    {
      "epoch": 1.7554021608643458,
      "grad_norm": 0.12770244479179382,
      "learning_rate": 8.99530731297965e-06,
      "loss": 0.316,
      "step": 5849
    },
    {
      "epoch": 1.7557022809123648,
      "grad_norm": 0.13309158384799957,
      "learning_rate": 8.973609116706926e-06,
      "loss": 0.3057,
      "step": 5850
    },
    {
      "epoch": 1.756002400960384,
      "grad_norm": 0.14228470623493195,
      "learning_rate": 8.95193589248079e-06,
      "loss": 0.3395,
      "step": 5851
    },
    {
      "epoch": 1.7563025210084033,
      "grad_norm": 0.14081576466560364,
      "learning_rate": 8.930287646247015e-06,
      "loss": 0.3626,
      "step": 5852
    },
    {
      "epoch": 1.7566026410564226,
      "grad_norm": 0.1345006823539734,
      "learning_rate": 8.908664383944554e-06,
      "loss": 0.3514,
      "step": 5853
    },
    {
      "epoch": 1.7569027611044419,
      "grad_norm": 0.13585862517356873,
      "learning_rate": 8.887066111505515e-06,
      "loss": 0.3293,
      "step": 5854
    },
    {
      "epoch": 1.7572028811524611,
      "grad_norm": 0.13082218170166016,
      "learning_rate": 8.86549283485516e-06,
      "loss": 0.3116,
      "step": 5855
    },
    {
      "epoch": 1.7575030012004802,
      "grad_norm": 0.1294088065624237,
      "learning_rate": 8.843944559911843e-06,
      "loss": 0.2926,
      "step": 5856
    },
    {
      "epoch": 1.7578031212484992,
      "grad_norm": 0.1375386118888855,
      "learning_rate": 8.822421292587047e-06,
      "loss": 0.3253,
      "step": 5857
    },
    {
      "epoch": 1.7581032412965185,
      "grad_norm": 0.13382361829280853,
      "learning_rate": 8.800923038785502e-06,
      "loss": 0.3276,
      "step": 5858
    },
    {
      "epoch": 1.7584033613445378,
      "grad_norm": 0.13776427507400513,
      "learning_rate": 8.779449804404993e-06,
      "loss": 0.3388,
      "step": 5859
    },
    {
      "epoch": 1.758703481392557,
      "grad_norm": 0.1295420527458191,
      "learning_rate": 8.758001595336418e-06,
      "loss": 0.3082,
      "step": 5860
    },
    {
      "epoch": 1.7590036014405763,
      "grad_norm": 0.13528567552566528,
      "learning_rate": 8.73657841746387e-06,
      "loss": 0.3456,
      "step": 5861
    },
    {
      "epoch": 1.7593037214885956,
      "grad_norm": 0.14936035871505737,
      "learning_rate": 8.71518027666457e-06,
      "loss": 0.3768,
      "step": 5862
    },
    {
      "epoch": 1.7596038415366146,
      "grad_norm": 0.13647547364234924,
      "learning_rate": 8.693807178808822e-06,
      "loss": 0.3217,
      "step": 5863
    },
    {
      "epoch": 1.7599039615846337,
      "grad_norm": 0.13374289870262146,
      "learning_rate": 8.672459129760125e-06,
      "loss": 0.3364,
      "step": 5864
    },
    {
      "epoch": 1.760204081632653,
      "grad_norm": 0.17200399935245514,
      "learning_rate": 8.651136135375026e-06,
      "loss": 0.3566,
      "step": 5865
    },
    {
      "epoch": 1.7605042016806722,
      "grad_norm": 0.1232210174202919,
      "learning_rate": 8.62983820150327e-06,
      "loss": 0.2979,
      "step": 5866
    },
    {
      "epoch": 1.7608043217286915,
      "grad_norm": 0.13488927483558655,
      "learning_rate": 8.608565333987717e-06,
      "loss": 0.3255,
      "step": 5867
    },
    {
      "epoch": 1.7611044417767108,
      "grad_norm": 0.12796378135681152,
      "learning_rate": 8.587317538664307e-06,
      "loss": 0.3192,
      "step": 5868
    },
    {
      "epoch": 1.76140456182473,
      "grad_norm": 0.14889287948608398,
      "learning_rate": 8.566094821362148e-06,
      "loss": 0.3042,
      "step": 5869
    },
    {
      "epoch": 1.761704681872749,
      "grad_norm": 0.13275815546512604,
      "learning_rate": 8.544897187903423e-06,
      "loss": 0.3288,
      "step": 5870
    },
    {
      "epoch": 1.7620048019207684,
      "grad_norm": 0.12997561693191528,
      "learning_rate": 8.52372464410348e-06,
      "loss": 0.3275,
      "step": 5871
    },
    {
      "epoch": 1.7623049219687874,
      "grad_norm": 0.15465877950191498,
      "learning_rate": 8.502577195770777e-06,
      "loss": 0.3424,
      "step": 5872
    },
    {
      "epoch": 1.7626050420168067,
      "grad_norm": 0.134208545088768,
      "learning_rate": 8.481454848706838e-06,
      "loss": 0.3376,
      "step": 5873
    },
    {
      "epoch": 1.762905162064826,
      "grad_norm": 0.12746387720108032,
      "learning_rate": 8.46035760870636e-06,
      "loss": 0.2846,
      "step": 5874
    },
    {
      "epoch": 1.7632052821128452,
      "grad_norm": 0.13657613098621368,
      "learning_rate": 8.439285481557136e-06,
      "loss": 0.3367,
      "step": 5875
    },
    {
      "epoch": 1.7635054021608645,
      "grad_norm": 0.128277987241745,
      "learning_rate": 8.418238473040041e-06,
      "loss": 0.313,
      "step": 5876
    },
    {
      "epoch": 1.7638055222088835,
      "grad_norm": 0.12578245997428894,
      "learning_rate": 8.397216588929101e-06,
      "loss": 0.2953,
      "step": 5877
    },
    {
      "epoch": 1.7641056422569028,
      "grad_norm": 0.1306450366973877,
      "learning_rate": 8.376219834991406e-06,
      "loss": 0.3353,
      "step": 5878
    },
    {
      "epoch": 1.7644057623049219,
      "grad_norm": 0.15677793323993683,
      "learning_rate": 8.355248216987189e-06,
      "loss": 0.3395,
      "step": 5879
    },
    {
      "epoch": 1.7647058823529411,
      "grad_norm": 0.13563622534275055,
      "learning_rate": 8.33430174066978e-06,
      "loss": 0.3219,
      "step": 5880
    },
    {
      "epoch": 1.7650060024009604,
      "grad_norm": 0.14059104025363922,
      "learning_rate": 8.313380411785599e-06,
      "loss": 0.3612,
      "step": 5881
    },
    {
      "epoch": 1.7653061224489797,
      "grad_norm": 0.14718660712242126,
      "learning_rate": 8.292484236074139e-06,
      "loss": 0.3386,
      "step": 5882
    },
    {
      "epoch": 1.765606242496999,
      "grad_norm": 0.14431437849998474,
      "learning_rate": 8.271613219268093e-06,
      "loss": 0.3526,
      "step": 5883
    },
    {
      "epoch": 1.765906362545018,
      "grad_norm": 0.14376048743724823,
      "learning_rate": 8.250767367093126e-06,
      "loss": 0.3241,
      "step": 5884
    },
    {
      "epoch": 1.7662064825930373,
      "grad_norm": 0.13576029241085052,
      "learning_rate": 8.229946685268097e-06,
      "loss": 0.3458,
      "step": 5885
    },
    {
      "epoch": 1.7665066026410563,
      "grad_norm": 0.1485685408115387,
      "learning_rate": 8.209151179504893e-06,
      "loss": 0.3531,
      "step": 5886
    },
    {
      "epoch": 1.7668067226890756,
      "grad_norm": 0.12961770594120026,
      "learning_rate": 8.188380855508536e-06,
      "loss": 0.3223,
      "step": 5887
    },
    {
      "epoch": 1.7671068427370948,
      "grad_norm": 0.13379228115081787,
      "learning_rate": 8.16763571897714e-06,
      "loss": 0.3253,
      "step": 5888
    },
    {
      "epoch": 1.767406962785114,
      "grad_norm": 0.13768836855888367,
      "learning_rate": 8.146915775601882e-06,
      "loss": 0.312,
      "step": 5889
    },
    {
      "epoch": 1.7677070828331334,
      "grad_norm": 0.14346608519554138,
      "learning_rate": 8.126221031067027e-06,
      "loss": 0.3625,
      "step": 5890
    },
    {
      "epoch": 1.7680072028811524,
      "grad_norm": 0.13501888513565063,
      "learning_rate": 8.105551491049945e-06,
      "loss": 0.3347,
      "step": 5891
    },
    {
      "epoch": 1.7683073229291717,
      "grad_norm": 0.5826078057289124,
      "learning_rate": 8.084907161221123e-06,
      "loss": 0.319,
      "step": 5892
    },
    {
      "epoch": 1.7686074429771907,
      "grad_norm": 0.19021013379096985,
      "learning_rate": 8.064288047244039e-06,
      "loss": 0.3612,
      "step": 5893
    },
    {
      "epoch": 1.76890756302521,
      "grad_norm": 0.13114556670188904,
      "learning_rate": 8.043694154775372e-06,
      "loss": 0.3109,
      "step": 5894
    },
    {
      "epoch": 1.7692076830732293,
      "grad_norm": 0.14585594832897186,
      "learning_rate": 8.023125489464744e-06,
      "loss": 0.369,
      "step": 5895
    },
    {
      "epoch": 1.7695078031212486,
      "grad_norm": 0.13737501204013824,
      "learning_rate": 8.00258205695501e-06,
      "loss": 0.3367,
      "step": 5896
    },
    {
      "epoch": 1.7698079231692678,
      "grad_norm": 0.13545137643814087,
      "learning_rate": 7.982063862881994e-06,
      "loss": 0.331,
      "step": 5897
    },
    {
      "epoch": 1.7701080432172869,
      "grad_norm": 0.12734316289424896,
      "learning_rate": 7.961570912874617e-06,
      "loss": 0.3134,
      "step": 5898
    },
    {
      "epoch": 1.7704081632653061,
      "grad_norm": 0.12725022435188293,
      "learning_rate": 7.94110321255489e-06,
      "loss": 0.3137,
      "step": 5899
    },
    {
      "epoch": 1.7707082833133252,
      "grad_norm": 0.12916243076324463,
      "learning_rate": 7.920660767537901e-06,
      "loss": 0.3092,
      "step": 5900
    },
    {
      "epoch": 1.7710084033613445,
      "grad_norm": 0.12457706034183502,
      "learning_rate": 7.900243583431788e-06,
      "loss": 0.2868,
      "step": 5901
    },
    {
      "epoch": 1.7713085234093637,
      "grad_norm": 0.12678676843643188,
      "learning_rate": 7.8798516658378e-06,
      "loss": 0.2941,
      "step": 5902
    },
    {
      "epoch": 1.771608643457383,
      "grad_norm": 0.13747821748256683,
      "learning_rate": 7.859485020350177e-06,
      "loss": 0.3329,
      "step": 5903
    },
    {
      "epoch": 1.7719087635054023,
      "grad_norm": 0.1366930603981018,
      "learning_rate": 7.839143652556314e-06,
      "loss": 0.3282,
      "step": 5904
    },
    {
      "epoch": 1.7722088835534213,
      "grad_norm": 0.15380358695983887,
      "learning_rate": 7.818827568036624e-06,
      "loss": 0.3268,
      "step": 5905
    },
    {
      "epoch": 1.7725090036014406,
      "grad_norm": 0.13130567967891693,
      "learning_rate": 7.798536772364572e-06,
      "loss": 0.3032,
      "step": 5906
    },
    {
      "epoch": 1.7728091236494596,
      "grad_norm": 0.13877171277999878,
      "learning_rate": 7.778271271106719e-06,
      "loss": 0.3384,
      "step": 5907
    },
    {
      "epoch": 1.773109243697479,
      "grad_norm": 0.13448159396648407,
      "learning_rate": 7.758031069822702e-06,
      "loss": 0.3186,
      "step": 5908
    },
    {
      "epoch": 1.7734093637454982,
      "grad_norm": 0.22665102779865265,
      "learning_rate": 7.737816174065138e-06,
      "loss": 0.3249,
      "step": 5909
    },
    {
      "epoch": 1.7737094837935174,
      "grad_norm": 0.3638037443161011,
      "learning_rate": 7.717626589379789e-06,
      "loss": 0.3345,
      "step": 5910
    },
    {
      "epoch": 1.7740096038415367,
      "grad_norm": 0.1367301195859909,
      "learning_rate": 7.697462321305404e-06,
      "loss": 0.3451,
      "step": 5911
    },
    {
      "epoch": 1.7743097238895558,
      "grad_norm": 0.1390855461359024,
      "learning_rate": 7.677323375373835e-06,
      "loss": 0.3084,
      "step": 5912
    },
    {
      "epoch": 1.774609843937575,
      "grad_norm": 0.13941091299057007,
      "learning_rate": 7.657209757109995e-06,
      "loss": 0.3547,
      "step": 5913
    },
    {
      "epoch": 1.774909963985594,
      "grad_norm": 0.1419765055179596,
      "learning_rate": 7.637121472031782e-06,
      "loss": 0.3672,
      "step": 5914
    },
    {
      "epoch": 1.7752100840336134,
      "grad_norm": 0.14254456758499146,
      "learning_rate": 7.617058525650223e-06,
      "loss": 0.2838,
      "step": 5915
    },
    {
      "epoch": 1.7755102040816326,
      "grad_norm": 0.14249834418296814,
      "learning_rate": 7.597020923469322e-06,
      "loss": 0.3307,
      "step": 5916
    },
    {
      "epoch": 1.775810324129652,
      "grad_norm": 0.14962702989578247,
      "learning_rate": 7.577008670986185e-06,
      "loss": 0.3624,
      "step": 5917
    },
    {
      "epoch": 1.7761104441776712,
      "grad_norm": 0.13569994270801544,
      "learning_rate": 7.5570217736909535e-06,
      "loss": 0.3354,
      "step": 5918
    },
    {
      "epoch": 1.7764105642256904,
      "grad_norm": 0.14091704785823822,
      "learning_rate": 7.537060237066806e-06,
      "loss": 0.3263,
      "step": 5919
    },
    {
      "epoch": 1.7767106842737095,
      "grad_norm": 0.1351580172777176,
      "learning_rate": 7.517124066589909e-06,
      "loss": 0.3112,
      "step": 5920
    },
    {
      "epoch": 1.7770108043217285,
      "grad_norm": 0.1452702134847641,
      "learning_rate": 7.497213267729586e-06,
      "loss": 0.357,
      "step": 5921
    },
    {
      "epoch": 1.7773109243697478,
      "grad_norm": 0.11909019947052002,
      "learning_rate": 7.4773278459481234e-06,
      "loss": 0.2723,
      "step": 5922
    },
    {
      "epoch": 1.777611044417767,
      "grad_norm": 0.13754090666770935,
      "learning_rate": 7.4574678067008245e-06,
      "loss": 0.296,
      "step": 5923
    },
    {
      "epoch": 1.7779111644657863,
      "grad_norm": 0.12423918396234512,
      "learning_rate": 7.4376331554360964e-06,
      "loss": 0.2805,
      "step": 5924
    },
    {
      "epoch": 1.7782112845138056,
      "grad_norm": 0.14556850492954254,
      "learning_rate": 7.417823897595322e-06,
      "loss": 0.3754,
      "step": 5925
    },
    {
      "epoch": 1.7785114045618249,
      "grad_norm": 0.21286626160144806,
      "learning_rate": 7.398040038612986e-06,
      "loss": 0.3514,
      "step": 5926
    },
    {
      "epoch": 1.778811524609844,
      "grad_norm": 0.13760562241077423,
      "learning_rate": 7.378281583916535e-06,
      "loss": 0.3351,
      "step": 5927
    },
    {
      "epoch": 1.7791116446578632,
      "grad_norm": 0.13750424981117249,
      "learning_rate": 7.358548538926457e-06,
      "loss": 0.3317,
      "step": 5928
    },
    {
      "epoch": 1.7794117647058822,
      "grad_norm": 0.1289098858833313,
      "learning_rate": 7.338840909056311e-06,
      "loss": 0.3015,
      "step": 5929
    },
    {
      "epoch": 1.7797118847539015,
      "grad_norm": 0.13978558778762817,
      "learning_rate": 7.319158699712669e-06,
      "loss": 0.3486,
      "step": 5930
    },
    {
      "epoch": 1.7800120048019208,
      "grad_norm": 0.14103145897388458,
      "learning_rate": 7.299501916295093e-06,
      "loss": 0.3395,
      "step": 5931
    },
    {
      "epoch": 1.78031212484994,
      "grad_norm": 0.14018738269805908,
      "learning_rate": 7.279870564196201e-06,
      "loss": 0.35,
      "step": 5932
    },
    {
      "epoch": 1.7806122448979593,
      "grad_norm": 0.13689963519573212,
      "learning_rate": 7.26026464880164e-06,
      "loss": 0.348,
      "step": 5933
    },
    {
      "epoch": 1.7809123649459784,
      "grad_norm": 0.12805680930614471,
      "learning_rate": 7.240684175490075e-06,
      "loss": 0.3071,
      "step": 5934
    },
    {
      "epoch": 1.7812124849939976,
      "grad_norm": 0.13557565212249756,
      "learning_rate": 7.2211291496331876e-06,
      "loss": 0.3366,
      "step": 5935
    },
    {
      "epoch": 1.7815126050420167,
      "grad_norm": 0.14251002669334412,
      "learning_rate": 7.20159957659563e-06,
      "loss": 0.3636,
      "step": 5936
    },
    {
      "epoch": 1.781812725090036,
      "grad_norm": 0.13931401073932648,
      "learning_rate": 7.182095461735161e-06,
      "loss": 0.3223,
      "step": 5937
    },
    {
      "epoch": 1.7821128451380552,
      "grad_norm": 0.13256897032260895,
      "learning_rate": 7.1626168104025e-06,
      "loss": 0.3125,
      "step": 5938
    },
    {
      "epoch": 1.7824129651860745,
      "grad_norm": 0.13756443560123444,
      "learning_rate": 7.143163627941385e-06,
      "loss": 0.3507,
      "step": 5939
    },
    {
      "epoch": 1.7827130852340938,
      "grad_norm": 0.14429163932800293,
      "learning_rate": 7.12373591968859e-06,
      "loss": 0.3552,
      "step": 5940
    },
    {
      "epoch": 1.7830132052821128,
      "grad_norm": 0.12304878979921341,
      "learning_rate": 7.104333690973852e-06,
      "loss": 0.2926,
      "step": 5941
    },
    {
      "epoch": 1.783313325330132,
      "grad_norm": 0.13818055391311646,
      "learning_rate": 7.084956947119969e-06,
      "loss": 0.3131,
      "step": 5942
    },
    {
      "epoch": 1.7836134453781511,
      "grad_norm": 0.14789220690727234,
      "learning_rate": 7.065605693442745e-06,
      "loss": 0.3543,
      "step": 5943
    },
    {
      "epoch": 1.7839135654261704,
      "grad_norm": 0.1315280646085739,
      "learning_rate": 7.046279935250943e-06,
      "loss": 0.3336,
      "step": 5944
    },
    {
      "epoch": 1.7842136854741897,
      "grad_norm": 0.14173337817192078,
      "learning_rate": 7.0269796778463906e-06,
      "loss": 0.3321,
      "step": 5945
    },
    {
      "epoch": 1.784513805522209,
      "grad_norm": 0.14042839407920837,
      "learning_rate": 7.007704926523884e-06,
      "loss": 0.322,
      "step": 5946
    },
    {
      "epoch": 1.7848139255702282,
      "grad_norm": 0.13916705548763275,
      "learning_rate": 6.988455686571216e-06,
      "loss": 0.3326,
      "step": 5947
    },
    {
      "epoch": 1.7851140456182473,
      "grad_norm": 0.15724653005599976,
      "learning_rate": 6.9692319632692185e-06,
      "loss": 0.3875,
      "step": 5948
    },
    {
      "epoch": 1.7854141656662665,
      "grad_norm": 0.14085812866687775,
      "learning_rate": 6.950033761891672e-06,
      "loss": 0.3354,
      "step": 5949
    },
    {
      "epoch": 1.7857142857142856,
      "grad_norm": 0.14589902758598328,
      "learning_rate": 6.930861087705398e-06,
      "loss": 0.3516,
      "step": 5950
    },
    {
      "epoch": 1.7860144057623049,
      "grad_norm": 0.1342475861310959,
      "learning_rate": 6.9117139459702105e-06,
      "loss": 0.2982,
      "step": 5951
    },
    {
      "epoch": 1.7863145258103241,
      "grad_norm": 0.1392049491405487,
      "learning_rate": 6.892592341938908e-06,
      "loss": 0.3603,
      "step": 5952
    },
    {
      "epoch": 1.7866146458583434,
      "grad_norm": 0.13203851878643036,
      "learning_rate": 6.873496280857239e-06,
      "loss": 0.3169,
      "step": 5953
    },
    {
      "epoch": 1.7869147659063627,
      "grad_norm": 0.11781606823205948,
      "learning_rate": 6.854425767964034e-06,
      "loss": 0.28,
      "step": 5954
    },
    {
      "epoch": 1.7872148859543817,
      "grad_norm": 0.13385334610939026,
      "learning_rate": 6.835380808491065e-06,
      "loss": 0.3304,
      "step": 5955
    },
    {
      "epoch": 1.787515006002401,
      "grad_norm": 0.13968412578105927,
      "learning_rate": 6.816361407663096e-06,
      "loss": 0.3427,
      "step": 5956
    },
    {
      "epoch": 1.78781512605042,
      "grad_norm": 0.14403066039085388,
      "learning_rate": 6.797367570697866e-06,
      "loss": 0.3407,
      "step": 5957
    },
    {
      "epoch": 1.7881152460984393,
      "grad_norm": 0.13157077133655548,
      "learning_rate": 6.778399302806116e-06,
      "loss": 0.3045,
      "step": 5958
    },
    {
      "epoch": 1.7884153661464586,
      "grad_norm": 0.13473302125930786,
      "learning_rate": 6.7594566091916165e-06,
      "loss": 0.3423,
      "step": 5959
    },
    {
      "epoch": 1.7887154861944778,
      "grad_norm": 0.14571532607078552,
      "learning_rate": 6.7405394950510345e-06,
      "loss": 0.3547,
      "step": 5960
    },
    {
      "epoch": 1.7890156062424971,
      "grad_norm": 0.13778209686279297,
      "learning_rate": 6.721647965574063e-06,
      "loss": 0.3251,
      "step": 5961
    },
    {
      "epoch": 1.7893157262905162,
      "grad_norm": 0.1374538093805313,
      "learning_rate": 6.702782025943377e-06,
      "loss": 0.3319,
      "step": 5962
    },
    {
      "epoch": 1.7896158463385354,
      "grad_norm": 0.14873814582824707,
      "learning_rate": 6.68394168133466e-06,
      "loss": 0.3829,
      "step": 5963
    },
    {
      "epoch": 1.7899159663865545,
      "grad_norm": 0.14020781219005585,
      "learning_rate": 6.665126936916532e-06,
      "loss": 0.3257,
      "step": 5964
    },
    {
      "epoch": 1.7902160864345738,
      "grad_norm": 0.12964782118797302,
      "learning_rate": 6.646337797850588e-06,
      "loss": 0.3252,
      "step": 5965
    },
    {
      "epoch": 1.790516206482593,
      "grad_norm": 0.14314241707324982,
      "learning_rate": 6.6275742692914145e-06,
      "loss": 0.3333,
      "step": 5966
    },
    {
      "epoch": 1.7908163265306123,
      "grad_norm": 0.13476979732513428,
      "learning_rate": 6.608836356386583e-06,
      "loss": 0.3065,
      "step": 5967
    },
    {
      "epoch": 1.7911164465786316,
      "grad_norm": 0.14589078724384308,
      "learning_rate": 6.5901240642766256e-06,
      "loss": 0.3388,
      "step": 5968
    },
    {
      "epoch": 1.7914165666266506,
      "grad_norm": 0.135845348238945,
      "learning_rate": 6.571437398095026e-06,
      "loss": 0.329,
      "step": 5969
    },
    {
      "epoch": 1.7917166866746699,
      "grad_norm": 0.13261300325393677,
      "learning_rate": 6.552776362968271e-06,
      "loss": 0.3308,
      "step": 5970
    },
    {
      "epoch": 1.792016806722689,
      "grad_norm": 0.1329193115234375,
      "learning_rate": 6.534140964015822e-06,
      "loss": 0.3224,
      "step": 5971
    },
    {
      "epoch": 1.7923169267707082,
      "grad_norm": 0.1382874846458435,
      "learning_rate": 6.515531206350045e-06,
      "loss": 0.3134,
      "step": 5972
    },
    {
      "epoch": 1.7926170468187275,
      "grad_norm": 0.1270531266927719,
      "learning_rate": 6.496947095076345e-06,
      "loss": 0.3019,
      "step": 5973
    },
    {
      "epoch": 1.7929171668667467,
      "grad_norm": 0.13709579408168793,
      "learning_rate": 6.478388635293031e-06,
      "loss": 0.3426,
      "step": 5974
    },
    {
      "epoch": 1.793217286914766,
      "grad_norm": 0.1275123655796051,
      "learning_rate": 6.459855832091422e-06,
      "loss": 0.3127,
      "step": 5975
    },
    {
      "epoch": 1.7935174069627853,
      "grad_norm": 0.13616742193698883,
      "learning_rate": 6.441348690555804e-06,
      "loss": 0.3208,
      "step": 5976
    },
    {
      "epoch": 1.7938175270108043,
      "grad_norm": 0.19298814237117767,
      "learning_rate": 6.4228672157633505e-06,
      "loss": 0.333,
      "step": 5977
    },
    {
      "epoch": 1.7941176470588234,
      "grad_norm": 0.13557209074497223,
      "learning_rate": 6.404411412784283e-06,
      "loss": 0.3354,
      "step": 5978
    },
    {
      "epoch": 1.7944177671068426,
      "grad_norm": 0.13909755647182465,
      "learning_rate": 6.385981286681708e-06,
      "loss": 0.3035,
      "step": 5979
    },
    {
      "epoch": 1.794717887154862,
      "grad_norm": 0.12394416332244873,
      "learning_rate": 6.367576842511735e-06,
      "loss": 0.2896,
      "step": 5980
    },
    {
      "epoch": 1.7950180072028812,
      "grad_norm": 0.16678430140018463,
      "learning_rate": 6.349198085323427e-06,
      "loss": 0.3978,
      "step": 5981
    },
    {
      "epoch": 1.7953181272509005,
      "grad_norm": 0.13517844676971436,
      "learning_rate": 6.330845020158771e-06,
      "loss": 0.3176,
      "step": 5982
    },
    {
      "epoch": 1.7956182472989197,
      "grad_norm": 0.1313657909631729,
      "learning_rate": 6.312517652052685e-06,
      "loss": 0.3053,
      "step": 5983
    },
    {
      "epoch": 1.7959183673469388,
      "grad_norm": 0.13233062624931335,
      "learning_rate": 6.294215986033136e-06,
      "loss": 0.3354,
      "step": 5984
    },
    {
      "epoch": 1.7962184873949578,
      "grad_norm": 0.13155484199523926,
      "learning_rate": 6.27594002712093e-06,
      "loss": 0.3378,
      "step": 5985
    },
    {
      "epoch": 1.796518607442977,
      "grad_norm": 0.13558879494667053,
      "learning_rate": 6.257689780329901e-06,
      "loss": 0.3398,
      "step": 5986
    },
    {
      "epoch": 1.7968187274909964,
      "grad_norm": 0.14417846500873566,
      "learning_rate": 6.239465250666754e-06,
      "loss": 0.3284,
      "step": 5987
    },
    {
      "epoch": 1.7971188475390156,
      "grad_norm": 0.1278696209192276,
      "learning_rate": 6.221266443131213e-06,
      "loss": 0.2962,
      "step": 5988
    },
    {
      "epoch": 1.797418967587035,
      "grad_norm": 0.13779926300048828,
      "learning_rate": 6.203093362715906e-06,
      "loss": 0.3422,
      "step": 5989
    },
    {
      "epoch": 1.7977190876350542,
      "grad_norm": 0.12292470782995224,
      "learning_rate": 6.184946014406412e-06,
      "loss": 0.2764,
      "step": 5990
    },
    {
      "epoch": 1.7980192076830732,
      "grad_norm": 0.13775518536567688,
      "learning_rate": 6.166824403181226e-06,
      "loss": 0.3344,
      "step": 5991
    },
    {
      "epoch": 1.7983193277310925,
      "grad_norm": 0.13000649213790894,
      "learning_rate": 6.148728534011805e-06,
      "loss": 0.2886,
      "step": 5992
    },
    {
      "epoch": 1.7986194477791115,
      "grad_norm": 0.13752397894859314,
      "learning_rate": 6.130658411862577e-06,
      "loss": 0.313,
      "step": 5993
    },
    {
      "epoch": 1.7989195678271308,
      "grad_norm": 0.1430034637451172,
      "learning_rate": 6.112614041690856e-06,
      "loss": 0.3374,
      "step": 5994
    },
    {
      "epoch": 1.79921968787515,
      "grad_norm": 0.15081876516342163,
      "learning_rate": 6.094595428446892e-06,
      "loss": 0.3419,
      "step": 5995
    },
    {
      "epoch": 1.7995198079231693,
      "grad_norm": 0.1434541642665863,
      "learning_rate": 6.076602577073898e-06,
      "loss": 0.3438,
      "step": 5996
    },
    {
      "epoch": 1.7998199279711886,
      "grad_norm": 0.14269985258579254,
      "learning_rate": 6.058635492508013e-06,
      "loss": 0.343,
      "step": 5997
    },
    {
      "epoch": 1.8001200480192077,
      "grad_norm": 0.13220560550689697,
      "learning_rate": 6.040694179678308e-06,
      "loss": 0.3192,
      "step": 5998
    },
    {
      "epoch": 1.800420168067227,
      "grad_norm": 0.14262396097183228,
      "learning_rate": 6.022778643506743e-06,
      "loss": 0.3248,
      "step": 5999
    },
    {
      "epoch": 1.800720288115246,
      "grad_norm": 0.15476013720035553,
      "learning_rate": 6.004888888908256e-06,
      "loss": 0.3666,
      "step": 6000
    },
    {
      "epoch": 1.8010204081632653,
      "grad_norm": 0.14492842555046082,
      "learning_rate": 5.987024920790718e-06,
      "loss": 0.3681,
      "step": 6001
    },
    {
      "epoch": 1.8013205282112845,
      "grad_norm": 0.14077229797840118,
      "learning_rate": 5.969186744054866e-06,
      "loss": 0.3446,
      "step": 6002
    },
    {
      "epoch": 1.8016206482593038,
      "grad_norm": 0.12190459668636322,
      "learning_rate": 5.9513743635944305e-06,
      "loss": 0.2874,
      "step": 6003
    },
    {
      "epoch": 1.801920768307323,
      "grad_norm": 0.1958537995815277,
      "learning_rate": 5.9335877842960016e-06,
      "loss": 0.3188,
      "step": 6004
    },
    {
      "epoch": 1.802220888355342,
      "grad_norm": 0.17165255546569824,
      "learning_rate": 5.915827011039166e-06,
      "loss": 0.3645,
      "step": 6005
    },
    {
      "epoch": 1.8025210084033614,
      "grad_norm": 0.14606942236423492,
      "learning_rate": 5.898092048696369e-06,
      "loss": 0.3358,
      "step": 6006
    },
    {
      "epoch": 1.8028211284513804,
      "grad_norm": 0.12356963008642197,
      "learning_rate": 5.8803829021329745e-06,
      "loss": 0.3013,
      "step": 6007
    },
    {
      "epoch": 1.8031212484993997,
      "grad_norm": 0.1409122347831726,
      "learning_rate": 5.862699576207298e-06,
      "loss": 0.341,
      "step": 6008
    },
    {
      "epoch": 1.803421368547419,
      "grad_norm": 0.1566123217344284,
      "learning_rate": 5.845042075770579e-06,
      "loss": 0.3365,
      "step": 6009
    },
    {
      "epoch": 1.8037214885954382,
      "grad_norm": 0.11835382878780365,
      "learning_rate": 5.827410405666911e-06,
      "loss": 0.2722,
      "step": 6010
    },
    {
      "epoch": 1.8040216086434575,
      "grad_norm": 0.12948180735111237,
      "learning_rate": 5.809804570733379e-06,
      "loss": 0.3027,
      "step": 6011
    },
    {
      "epoch": 1.8043217286914766,
      "grad_norm": 0.13504937291145325,
      "learning_rate": 5.792224575799909e-06,
      "loss": 0.3196,
      "step": 6012
    },
    {
      "epoch": 1.8046218487394958,
      "grad_norm": 0.13487012684345245,
      "learning_rate": 5.774670425689388e-06,
      "loss": 0.3207,
      "step": 6013
    },
    {
      "epoch": 1.8049219687875149,
      "grad_norm": 0.1346614956855774,
      "learning_rate": 5.757142125217596e-06,
      "loss": 0.3139,
      "step": 6014
    },
    {
      "epoch": 1.8052220888355341,
      "grad_norm": 0.14863623678684235,
      "learning_rate": 5.73963967919321e-06,
      "loss": 0.3089,
      "step": 6015
    },
    {
      "epoch": 1.8055222088835534,
      "grad_norm": 0.1441640555858612,
      "learning_rate": 5.722163092417854e-06,
      "loss": 0.3358,
      "step": 6016
    },
    {
      "epoch": 1.8058223289315727,
      "grad_norm": 0.1421898901462555,
      "learning_rate": 5.704712369685982e-06,
      "loss": 0.3461,
      "step": 6017
    },
    {
      "epoch": 1.806122448979592,
      "grad_norm": 0.14115412533283234,
      "learning_rate": 5.687287515785034e-06,
      "loss": 0.3346,
      "step": 6018
    },
    {
      "epoch": 1.806422569027611,
      "grad_norm": 0.13820458948612213,
      "learning_rate": 5.669888535495327e-06,
      "loss": 0.3218,
      "step": 6019
    },
    {
      "epoch": 1.8067226890756303,
      "grad_norm": 0.13921624422073364,
      "learning_rate": 5.652515433590033e-06,
      "loss": 0.2962,
      "step": 6020
    },
    {
      "epoch": 1.8070228091236493,
      "grad_norm": 0.13091017305850983,
      "learning_rate": 5.6351682148352956e-06,
      "loss": 0.311,
      "step": 6021
    },
    {
      "epoch": 1.8073229291716686,
      "grad_norm": 0.12136702984571457,
      "learning_rate": 5.61784688399013e-06,
      "loss": 0.283,
      "step": 6022
    },
    {
      "epoch": 1.8076230492196879,
      "grad_norm": 0.13937890529632568,
      "learning_rate": 5.600551445806412e-06,
      "loss": 0.3326,
      "step": 6023
    },
    {
      "epoch": 1.8079231692677071,
      "grad_norm": 0.12617534399032593,
      "learning_rate": 5.583281905028981e-06,
      "loss": 0.2787,
      "step": 6024
    },
    {
      "epoch": 1.8082232893157264,
      "grad_norm": 0.12522746622562408,
      "learning_rate": 5.566038266395501e-06,
      "loss": 0.2995,
      "step": 6025
    },
    {
      "epoch": 1.8085234093637454,
      "grad_norm": 0.13523294031620026,
      "learning_rate": 5.548820534636601e-06,
      "loss": 0.3327,
      "step": 6026
    },
    {
      "epoch": 1.8088235294117647,
      "grad_norm": 0.13850083947181702,
      "learning_rate": 5.531628714475756e-06,
      "loss": 0.3378,
      "step": 6027
    },
    {
      "epoch": 1.8091236494597838,
      "grad_norm": 0.14354543387889862,
      "learning_rate": 5.5144628106293504e-06,
      "loss": 0.3489,
      "step": 6028
    },
    {
      "epoch": 1.809423769507803,
      "grad_norm": 0.13455809652805328,
      "learning_rate": 5.4973228278066165e-06,
      "loss": 0.3046,
      "step": 6029
    },
    {
      "epoch": 1.8097238895558223,
      "grad_norm": 0.15517891943454742,
      "learning_rate": 5.480208770709771e-06,
      "loss": 0.3443,
      "step": 6030
    },
    {
      "epoch": 1.8100240096038416,
      "grad_norm": 0.13433986902236938,
      "learning_rate": 5.463120644033826e-06,
      "loss": 0.3268,
      "step": 6031
    },
    {
      "epoch": 1.8103241296518608,
      "grad_norm": 0.1414816975593567,
      "learning_rate": 5.4460584524667066e-06,
      "loss": 0.3199,
      "step": 6032
    },
    {
      "epoch": 1.8106242496998801,
      "grad_norm": 0.2488507181406021,
      "learning_rate": 5.4290222006892376e-06,
      "loss": 0.3088,
      "step": 6033
    },
    {
      "epoch": 1.8109243697478992,
      "grad_norm": 0.1471622735261917,
      "learning_rate": 5.412011893375124e-06,
      "loss": 0.3491,
      "step": 6034
    },
    {
      "epoch": 1.8112244897959182,
      "grad_norm": 0.14099839329719543,
      "learning_rate": 5.395027535190967e-06,
      "loss": 0.3299,
      "step": 6035
    },
    {
      "epoch": 1.8115246098439375,
      "grad_norm": 0.16024468839168549,
      "learning_rate": 5.378069130796193e-06,
      "loss": 0.3463,
      "step": 6036
    },
    {
      "epoch": 1.8118247298919568,
      "grad_norm": 0.135604590177536,
      "learning_rate": 5.36113668484316e-06,
      "loss": 0.322,
      "step": 6037
    },
    {
      "epoch": 1.812124849939976,
      "grad_norm": 0.1391889750957489,
      "learning_rate": 5.344230201977096e-06,
      "loss": 0.2822,
      "step": 6038
    },
    {
      "epoch": 1.8124249699879953,
      "grad_norm": 0.15819205343723297,
      "learning_rate": 5.327349686836103e-06,
      "loss": 0.337,
      "step": 6039
    },
    {
      "epoch": 1.8127250900360146,
      "grad_norm": 0.13925877213478088,
      "learning_rate": 5.310495144051142e-06,
      "loss": 0.3259,
      "step": 6040
    },
    {
      "epoch": 1.8130252100840336,
      "grad_norm": 0.14417746663093567,
      "learning_rate": 5.293666578246081e-06,
      "loss": 0.3574,
      "step": 6041
    },
    {
      "epoch": 1.8133253301320527,
      "grad_norm": 0.13997943699359894,
      "learning_rate": 5.2768639940376285e-06,
      "loss": 0.3283,
      "step": 6042
    },
    {
      "epoch": 1.813625450180072,
      "grad_norm": 0.14645910263061523,
      "learning_rate": 5.260087396035385e-06,
      "loss": 0.3107,
      "step": 6043
    },
    {
      "epoch": 1.8139255702280912,
      "grad_norm": 0.138387531042099,
      "learning_rate": 5.243336788841835e-06,
      "loss": 0.3173,
      "step": 6044
    },
    {
      "epoch": 1.8142256902761105,
      "grad_norm": 0.15049739181995392,
      "learning_rate": 5.226612177052292e-06,
      "loss": 0.3612,
      "step": 6045
    },
    {
      "epoch": 1.8145258103241297,
      "grad_norm": 0.12821055948734283,
      "learning_rate": 5.209913565254964e-06,
      "loss": 0.2949,
      "step": 6046
    },
    {
      "epoch": 1.814825930372149,
      "grad_norm": 0.1518714874982834,
      "learning_rate": 5.193240958030954e-06,
      "loss": 0.3597,
      "step": 6047
    },
    {
      "epoch": 1.815126050420168,
      "grad_norm": 0.136074498295784,
      "learning_rate": 5.1765943599541565e-06,
      "loss": 0.3219,
      "step": 6048
    },
    {
      "epoch": 1.8154261704681873,
      "grad_norm": 0.13379958271980286,
      "learning_rate": 5.159973775591409e-06,
      "loss": 0.2928,
      "step": 6049
    },
    {
      "epoch": 1.8157262905162064,
      "grad_norm": 0.141684889793396,
      "learning_rate": 5.143379209502352e-06,
      "loss": 0.3436,
      "step": 6050
    },
    {
      "epoch": 1.8160264105642256,
      "grad_norm": 0.133530855178833,
      "learning_rate": 5.126810666239523e-06,
      "loss": 0.3262,
      "step": 6051
    },
    {
      "epoch": 1.816326530612245,
      "grad_norm": 0.14149607717990875,
      "learning_rate": 5.1102681503483405e-06,
      "loss": 0.3661,
      "step": 6052
    },
    {
      "epoch": 1.8166266506602642,
      "grad_norm": 0.13825611770153046,
      "learning_rate": 5.093751666367008e-06,
      "loss": 0.3447,
      "step": 6053
    },
    {
      "epoch": 1.8169267707082835,
      "grad_norm": 0.13014687597751617,
      "learning_rate": 5.077261218826657e-06,
      "loss": 0.3188,
      "step": 6054
    },
    {
      "epoch": 1.8172268907563025,
      "grad_norm": 0.16165469586849213,
      "learning_rate": 5.060796812251267e-06,
      "loss": 0.3487,
      "step": 6055
    },
    {
      "epoch": 1.8175270108043218,
      "grad_norm": 0.136117085814476,
      "learning_rate": 5.0443584511576266e-06,
      "loss": 0.3348,
      "step": 6056
    },
    {
      "epoch": 1.8178271308523408,
      "grad_norm": 0.1398078203201294,
      "learning_rate": 5.02794614005544e-06,
      "loss": 0.3206,
      "step": 6057
    },
    {
      "epoch": 1.81812725090036,
      "grad_norm": 0.13721197843551636,
      "learning_rate": 5.011559883447215e-06,
      "loss": 0.3112,
      "step": 6058
    },
    {
      "epoch": 1.8184273709483794,
      "grad_norm": 0.13817249238491058,
      "learning_rate": 4.9951996858283445e-06,
      "loss": 0.3026,
      "step": 6059
    },
    {
      "epoch": 1.8187274909963986,
      "grad_norm": 0.15346643328666687,
      "learning_rate": 4.978865551687062e-06,
      "loss": 0.4138,
      "step": 6060
    },
    {
      "epoch": 1.819027611044418,
      "grad_norm": 0.1322508603334427,
      "learning_rate": 4.96255748550446e-06,
      "loss": 0.3104,
      "step": 6061
    },
    {
      "epoch": 1.819327731092437,
      "grad_norm": 0.1391531378030777,
      "learning_rate": 4.9462754917544375e-06,
      "loss": 0.3215,
      "step": 6062
    },
    {
      "epoch": 1.8196278511404562,
      "grad_norm": 0.15099678933620453,
      "learning_rate": 4.930019574903788e-06,
      "loss": 0.3407,
      "step": 6063
    },
    {
      "epoch": 1.8199279711884753,
      "grad_norm": 0.14447703957557678,
      "learning_rate": 4.913789739412145e-06,
      "loss": 0.3536,
      "step": 6064
    },
    {
      "epoch": 1.8202280912364945,
      "grad_norm": 0.14205755293369293,
      "learning_rate": 4.89758598973199e-06,
      "loss": 0.3422,
      "step": 6065
    },
    {
      "epoch": 1.8205282112845138,
      "grad_norm": 0.1352236121892929,
      "learning_rate": 4.881408330308612e-06,
      "loss": 0.3327,
      "step": 6066
    },
    {
      "epoch": 1.820828331332533,
      "grad_norm": 0.12289389967918396,
      "learning_rate": 4.86525676558015e-06,
      "loss": 0.2921,
      "step": 6067
    },
    {
      "epoch": 1.8211284513805523,
      "grad_norm": 0.13680601119995117,
      "learning_rate": 4.84913129997765e-06,
      "loss": 0.3273,
      "step": 6068
    },
    {
      "epoch": 1.8214285714285714,
      "grad_norm": 0.14029449224472046,
      "learning_rate": 4.8330319379249255e-06,
      "loss": 0.331,
      "step": 6069
    },
    {
      "epoch": 1.8217286914765907,
      "grad_norm": 0.14590826630592346,
      "learning_rate": 4.8169586838386346e-06,
      "loss": 0.3302,
      "step": 6070
    },
    {
      "epoch": 1.8220288115246097,
      "grad_norm": 0.13038024306297302,
      "learning_rate": 4.800911542128295e-06,
      "loss": 0.3136,
      "step": 6071
    },
    {
      "epoch": 1.822328931572629,
      "grad_norm": 0.1627214103937149,
      "learning_rate": 4.784890517196283e-06,
      "loss": 0.2965,
      "step": 6072
    },
    {
      "epoch": 1.8226290516206483,
      "grad_norm": 0.14358121156692505,
      "learning_rate": 4.768895613437763e-06,
      "loss": 0.3538,
      "step": 6073
    },
    {
      "epoch": 1.8229291716686675,
      "grad_norm": 0.1441047191619873,
      "learning_rate": 4.752926835240756e-06,
      "loss": 0.3311,
      "step": 6074
    },
    {
      "epoch": 1.8232292917166868,
      "grad_norm": 0.13355863094329834,
      "learning_rate": 4.7369841869861045e-06,
      "loss": 0.2886,
      "step": 6075
    },
    {
      "epoch": 1.8235294117647058,
      "grad_norm": 0.14023597538471222,
      "learning_rate": 4.721067673047497e-06,
      "loss": 0.3348,
      "step": 6076
    },
    {
      "epoch": 1.8238295318127251,
      "grad_norm": 0.13049958646297455,
      "learning_rate": 4.705177297791463e-06,
      "loss": 0.3054,
      "step": 6077
    },
    {
      "epoch": 1.8241296518607442,
      "grad_norm": 0.13980326056480408,
      "learning_rate": 4.689313065577328e-06,
      "loss": 0.3282,
      "step": 6078
    },
    {
      "epoch": 1.8244297719087634,
      "grad_norm": 0.13872842490673065,
      "learning_rate": 4.673474980757264e-06,
      "loss": 0.3089,
      "step": 6079
    },
    {
      "epoch": 1.8247298919567827,
      "grad_norm": 0.13736894726753235,
      "learning_rate": 4.657663047676264e-06,
      "loss": 0.3271,
      "step": 6080
    },
    {
      "epoch": 1.825030012004802,
      "grad_norm": 0.13429389894008636,
      "learning_rate": 4.6418772706721565e-06,
      "loss": 0.3148,
      "step": 6081
    },
    {
      "epoch": 1.8253301320528212,
      "grad_norm": 0.14426174759864807,
      "learning_rate": 4.6261176540755904e-06,
      "loss": 0.3375,
      "step": 6082
    },
    {
      "epoch": 1.8256302521008403,
      "grad_norm": 0.13774125277996063,
      "learning_rate": 4.610384202210028e-06,
      "loss": 0.3294,
      "step": 6083
    },
    {
      "epoch": 1.8259303721488596,
      "grad_norm": 0.16279757022857666,
      "learning_rate": 4.5946769193917714e-06,
      "loss": 0.2977,
      "step": 6084
    },
    {
      "epoch": 1.8262304921968786,
      "grad_norm": 0.1437031626701355,
      "learning_rate": 4.578995809929931e-06,
      "loss": 0.3454,
      "step": 6085
    },
    {
      "epoch": 1.8265306122448979,
      "grad_norm": 0.13721896708011627,
      "learning_rate": 4.563340878126432e-06,
      "loss": 0.3293,
      "step": 6086
    },
    {
      "epoch": 1.8268307322929171,
      "grad_norm": 0.12678766250610352,
      "learning_rate": 4.547712128276038e-06,
      "loss": 0.2902,
      "step": 6087
    },
    {
      "epoch": 1.8271308523409364,
      "grad_norm": 0.1424795538187027,
      "learning_rate": 4.532109564666298e-06,
      "loss": 0.3391,
      "step": 6088
    },
    {
      "epoch": 1.8274309723889557,
      "grad_norm": 0.13385315239429474,
      "learning_rate": 4.51653319157761e-06,
      "loss": 0.3196,
      "step": 6089
    },
    {
      "epoch": 1.8277310924369747,
      "grad_norm": 0.13906684517860413,
      "learning_rate": 4.500983013283188e-06,
      "loss": 0.3381,
      "step": 6090
    },
    {
      "epoch": 1.828031212484994,
      "grad_norm": 0.1481311172246933,
      "learning_rate": 4.485459034049022e-06,
      "loss": 0.337,
      "step": 6091
    },
    {
      "epoch": 1.828331332533013,
      "grad_norm": 0.12925542891025543,
      "learning_rate": 4.4699612581339255e-06,
      "loss": 0.3032,
      "step": 6092
    },
    {
      "epoch": 1.8286314525810323,
      "grad_norm": 0.13815073668956757,
      "learning_rate": 4.454489689789576e-06,
      "loss": 0.3315,
      "step": 6093
    },
    {
      "epoch": 1.8289315726290516,
      "grad_norm": 0.14559106528759003,
      "learning_rate": 4.439044333260389e-06,
      "loss": 0.3465,
      "step": 6094
    },
    {
      "epoch": 1.8292316926770709,
      "grad_norm": 0.14658205211162567,
      "learning_rate": 4.423625192783643e-06,
      "loss": 0.3493,
      "step": 6095
    },
    {
      "epoch": 1.8295318127250901,
      "grad_norm": 0.1342991292476654,
      "learning_rate": 4.408232272589375e-06,
      "loss": 0.3209,
      "step": 6096
    },
    {
      "epoch": 1.8298319327731094,
      "grad_norm": 0.1332305371761322,
      "learning_rate": 4.3928655769004735e-06,
      "loss": 0.3115,
      "step": 6097
    },
    {
      "epoch": 1.8301320528211285,
      "grad_norm": 0.14046160876750946,
      "learning_rate": 4.3775251099326234e-06,
      "loss": 0.335,
      "step": 6098
    },
    {
      "epoch": 1.8304321728691475,
      "grad_norm": 0.14250709116458893,
      "learning_rate": 4.362210875894302e-06,
      "loss": 0.3192,
      "step": 6099
    },
    {
      "epoch": 1.8307322929171668,
      "grad_norm": 0.15121778845787048,
      "learning_rate": 4.34692287898677e-06,
      "loss": 0.3292,
      "step": 6100
    },
    {
      "epoch": 1.831032412965186,
      "grad_norm": 0.13479244709014893,
      "learning_rate": 4.33166112340413e-06,
      "loss": 0.3366,
      "step": 6101
    },
    {
      "epoch": 1.8313325330132053,
      "grad_norm": 0.14662350714206696,
      "learning_rate": 4.316425613333286e-06,
      "loss": 0.3178,
      "step": 6102
    },
    {
      "epoch": 1.8316326530612246,
      "grad_norm": 0.13920946419239044,
      "learning_rate": 4.301216352953896e-06,
      "loss": 0.3309,
      "step": 6103
    },
    {
      "epoch": 1.8319327731092439,
      "grad_norm": 0.14373935759067535,
      "learning_rate": 4.286033346438478e-06,
      "loss": 0.3579,
      "step": 6104
    },
    {
      "epoch": 1.832232893157263,
      "grad_norm": 0.13850827515125275,
      "learning_rate": 4.270876597952278e-06,
      "loss": 0.3567,
      "step": 6105
    },
    {
      "epoch": 1.832533013205282,
      "grad_norm": 0.1590890735387802,
      "learning_rate": 4.255746111653425e-06,
      "loss": 0.3535,
      "step": 6106
    },
    {
      "epoch": 1.8328331332533012,
      "grad_norm": 0.1391279101371765,
      "learning_rate": 4.240641891692754e-06,
      "loss": 0.316,
      "step": 6107
    },
    {
      "epoch": 1.8331332533013205,
      "grad_norm": 0.13280515372753143,
      "learning_rate": 4.225563942213939e-06,
      "loss": 0.3132,
      "step": 6108
    },
    {
      "epoch": 1.8334333733493398,
      "grad_norm": 0.15908294916152954,
      "learning_rate": 4.21051226735345e-06,
      "loss": 0.3567,
      "step": 6109
    },
    {
      "epoch": 1.833733493397359,
      "grad_norm": 0.13278742134571075,
      "learning_rate": 4.195486871240562e-06,
      "loss": 0.3045,
      "step": 6110
    },
    {
      "epoch": 1.8340336134453783,
      "grad_norm": 0.1318618208169937,
      "learning_rate": 4.180487757997276e-06,
      "loss": 0.299,
      "step": 6111
    },
    {
      "epoch": 1.8343337334933973,
      "grad_norm": 0.187179833650589,
      "learning_rate": 4.165514931738468e-06,
      "loss": 0.4666,
      "step": 6112
    },
    {
      "epoch": 1.8346338535414166,
      "grad_norm": 0.13341879844665527,
      "learning_rate": 4.150568396571741e-06,
      "loss": 0.327,
      "step": 6113
    },
    {
      "epoch": 1.8349339735894357,
      "grad_norm": 0.13785672187805176,
      "learning_rate": 4.135648156597493e-06,
      "loss": 0.3386,
      "step": 6114
    },
    {
      "epoch": 1.835234093637455,
      "grad_norm": 0.13748951256275177,
      "learning_rate": 4.120754215908962e-06,
      "loss": 0.3249,
      "step": 6115
    },
    {
      "epoch": 1.8355342136854742,
      "grad_norm": 0.1445334553718567,
      "learning_rate": 4.105886578592089e-06,
      "loss": 0.3285,
      "step": 6116
    },
    {
      "epoch": 1.8358343337334935,
      "grad_norm": 0.13730578124523163,
      "learning_rate": 4.091045248725645e-06,
      "loss": 0.3357,
      "step": 6117
    },
    {
      "epoch": 1.8361344537815127,
      "grad_norm": 0.13931235671043396,
      "learning_rate": 4.076230230381217e-06,
      "loss": 0.3319,
      "step": 6118
    },
    {
      "epoch": 1.8364345738295318,
      "grad_norm": 0.13775765895843506,
      "learning_rate": 4.061441527623078e-06,
      "loss": 0.3037,
      "step": 6119
    },
    {
      "epoch": 1.836734693877551,
      "grad_norm": 0.12979570031166077,
      "learning_rate": 4.046679144508392e-06,
      "loss": 0.3191,
      "step": 6120
    },
    {
      "epoch": 1.83703481392557,
      "grad_norm": 0.14721934497356415,
      "learning_rate": 4.031943085087009e-06,
      "loss": 0.3514,
      "step": 6121
    },
    {
      "epoch": 1.8373349339735894,
      "grad_norm": 0.12928462028503418,
      "learning_rate": 4.017233353401617e-06,
      "loss": 0.2874,
      "step": 6122
    },
    {
      "epoch": 1.8376350540216086,
      "grad_norm": 0.1679869145154953,
      "learning_rate": 4.002549953487678e-06,
      "loss": 0.3385,
      "step": 6123
    },
    {
      "epoch": 1.837935174069628,
      "grad_norm": 0.12665621936321259,
      "learning_rate": 3.987892889373368e-06,
      "loss": 0.2923,
      "step": 6124
    },
    {
      "epoch": 1.8382352941176472,
      "grad_norm": 0.1349416971206665,
      "learning_rate": 3.973262165079738e-06,
      "loss": 0.314,
      "step": 6125
    },
    {
      "epoch": 1.8385354141656662,
      "grad_norm": 0.14436101913452148,
      "learning_rate": 3.958657784620512e-06,
      "loss": 0.3367,
      "step": 6126
    },
    {
      "epoch": 1.8388355342136855,
      "grad_norm": 0.1507471650838852,
      "learning_rate": 3.944079752002272e-06,
      "loss": 0.3592,
      "step": 6127
    },
    {
      "epoch": 1.8391356542617046,
      "grad_norm": 0.13125640153884888,
      "learning_rate": 3.92952807122432e-06,
      "loss": 0.3067,
      "step": 6128
    },
    {
      "epoch": 1.8394357743097238,
      "grad_norm": 0.15047404170036316,
      "learning_rate": 3.91500274627874e-06,
      "loss": 0.3058,
      "step": 6129
    },
    {
      "epoch": 1.839735894357743,
      "grad_norm": 0.16610467433929443,
      "learning_rate": 3.900503781150366e-06,
      "loss": 0.3592,
      "step": 6130
    },
    {
      "epoch": 1.8400360144057624,
      "grad_norm": 0.13654379546642303,
      "learning_rate": 3.886031179816874e-06,
      "loss": 0.3265,
      "step": 6131
    },
    {
      "epoch": 1.8403361344537816,
      "grad_norm": 0.17456600069999695,
      "learning_rate": 3.871584946248618e-06,
      "loss": 0.3436,
      "step": 6132
    },
    {
      "epoch": 1.8406362545018007,
      "grad_norm": 0.13722942769527435,
      "learning_rate": 3.857165084408776e-06,
      "loss": 0.3314,
      "step": 6133
    },
    {
      "epoch": 1.84093637454982,
      "grad_norm": 0.1379007250070572,
      "learning_rate": 3.842771598253247e-06,
      "loss": 0.3313,
      "step": 6134
    },
    {
      "epoch": 1.841236494597839,
      "grad_norm": 0.13639438152313232,
      "learning_rate": 3.828404491730741e-06,
      "loss": 0.3197,
      "step": 6135
    },
    {
      "epoch": 1.8415366146458583,
      "grad_norm": 0.14406685531139374,
      "learning_rate": 3.8140637687827164e-06,
      "loss": 0.2994,
      "step": 6136
    },
    {
      "epoch": 1.8418367346938775,
      "grad_norm": 0.13586309552192688,
      "learning_rate": 3.7997494333433692e-06,
      "loss": 0.3434,
      "step": 6137
    },
    {
      "epoch": 1.8421368547418968,
      "grad_norm": 0.1358279585838318,
      "learning_rate": 3.785461489339659e-06,
      "loss": 0.3033,
      "step": 6138
    },
    {
      "epoch": 1.842436974789916,
      "grad_norm": 0.14901627600193024,
      "learning_rate": 3.77119994069135e-06,
      "loss": 0.3284,
      "step": 6139
    },
    {
      "epoch": 1.8427370948379351,
      "grad_norm": 0.14877700805664062,
      "learning_rate": 3.7569647913109243e-06,
      "loss": 0.338,
      "step": 6140
    },
    {
      "epoch": 1.8430372148859544,
      "grad_norm": 0.13883346319198608,
      "learning_rate": 3.7427560451036125e-06,
      "loss": 0.3173,
      "step": 6141
    },
    {
      "epoch": 1.8433373349339734,
      "grad_norm": 0.13665060698986053,
      "learning_rate": 3.728573705967442e-06,
      "loss": 0.334,
      "step": 6142
    },
    {
      "epoch": 1.8436374549819927,
      "grad_norm": 0.13771137595176697,
      "learning_rate": 3.7144177777931777e-06,
      "loss": 0.3363,
      "step": 6143
    },
    {
      "epoch": 1.843937575030012,
      "grad_norm": 0.26751354336738586,
      "learning_rate": 3.7002882644643356e-06,
      "loss": 0.3369,
      "step": 6144
    },
    {
      "epoch": 1.8442376950780313,
      "grad_norm": 0.1271308958530426,
      "learning_rate": 3.6861851698571815e-06,
      "loss": 0.3021,
      "step": 6145
    },
    {
      "epoch": 1.8445378151260505,
      "grad_norm": 0.19478288292884827,
      "learning_rate": 3.6721084978407206e-06,
      "loss": 0.328,
      "step": 6146
    },
    {
      "epoch": 1.8448379351740696,
      "grad_norm": 0.13766752183437347,
      "learning_rate": 3.6580582522767417e-06,
      "loss": 0.3071,
      "step": 6147
    },
    {
      "epoch": 1.8451380552220888,
      "grad_norm": 0.13980470597743988,
      "learning_rate": 3.6440344370197834e-06,
      "loss": 0.3354,
      "step": 6148
    },
    {
      "epoch": 1.845438175270108,
      "grad_norm": 0.14060752093791962,
      "learning_rate": 3.6300370559170904e-06,
      "loss": 0.3525,
      "step": 6149
    },
    {
      "epoch": 1.8457382953181272,
      "grad_norm": 0.1450841873884201,
      "learning_rate": 3.6160661128087025e-06,
      "loss": 0.3382,
      "step": 6150
    },
    {
      "epoch": 1.8460384153661464,
      "grad_norm": 0.14197222888469696,
      "learning_rate": 3.6021216115273758e-06,
      "loss": 0.3325,
      "step": 6151
    },
    {
      "epoch": 1.8463385354141657,
      "grad_norm": 0.12967342138290405,
      "learning_rate": 3.5882035558986284e-06,
      "loss": 0.3034,
      "step": 6152
    },
    {
      "epoch": 1.846638655462185,
      "grad_norm": 0.1390308290719986,
      "learning_rate": 3.5743119497407386e-06,
      "loss": 0.353,
      "step": 6153
    },
    {
      "epoch": 1.8469387755102042,
      "grad_norm": 0.15909169614315033,
      "learning_rate": 3.560446796864669e-06,
      "loss": 0.306,
      "step": 6154
    },
    {
      "epoch": 1.8472388955582233,
      "grad_norm": 0.12947727739810944,
      "learning_rate": 3.5466081010742e-06,
      "loss": 0.3016,
      "step": 6155
    },
    {
      "epoch": 1.8475390156062423,
      "grad_norm": 0.14105425775051117,
      "learning_rate": 3.5327958661658058e-06,
      "loss": 0.3306,
      "step": 6156
    },
    {
      "epoch": 1.8478391356542616,
      "grad_norm": 0.1362878829240799,
      "learning_rate": 3.519010095928721e-06,
      "loss": 0.2877,
      "step": 6157
    },
    {
      "epoch": 1.8481392557022809,
      "grad_norm": 0.1474706381559372,
      "learning_rate": 3.5052507941449097e-06,
      "loss": 0.3723,
      "step": 6158
    },
    {
      "epoch": 1.8484393757503002,
      "grad_norm": 0.1433713734149933,
      "learning_rate": 3.491517964589064e-06,
      "loss": 0.3019,
      "step": 6159
    },
    {
      "epoch": 1.8487394957983194,
      "grad_norm": 0.1212991252541542,
      "learning_rate": 3.4778116110286473e-06,
      "loss": 0.2583,
      "step": 6160
    },
    {
      "epoch": 1.8490396158463387,
      "grad_norm": 0.1453191190958023,
      "learning_rate": 3.4641317372238414e-06,
      "loss": 0.3144,
      "step": 6161
    },
    {
      "epoch": 1.8493397358943577,
      "grad_norm": 0.1551435887813568,
      "learning_rate": 3.4504783469275547e-06,
      "loss": 0.3199,
      "step": 6162
    },
    {
      "epoch": 1.8496398559423768,
      "grad_norm": 0.147239089012146,
      "learning_rate": 3.436851443885447e-06,
      "loss": 0.3611,
      "step": 6163
    },
    {
      "epoch": 1.849939975990396,
      "grad_norm": 0.13194699585437775,
      "learning_rate": 3.4232510318358833e-06,
      "loss": 0.2908,
      "step": 6164
    },
    {
      "epoch": 1.8502400960384153,
      "grad_norm": 0.1313251256942749,
      "learning_rate": 3.4096771145099904e-06,
      "loss": 0.3066,
      "step": 6165
    },
    {
      "epoch": 1.8505402160864346,
      "grad_norm": 0.13080264627933502,
      "learning_rate": 3.3961296956316335e-06,
      "loss": 0.2998,
      "step": 6166
    },
    {
      "epoch": 1.8508403361344539,
      "grad_norm": 0.12654796242713928,
      "learning_rate": 3.3826087789173734e-06,
      "loss": 0.3054,
      "step": 6167
    },
    {
      "epoch": 1.8511404561824731,
      "grad_norm": 0.13733084499835968,
      "learning_rate": 3.369114368076509e-06,
      "loss": 0.3307,
      "step": 6168
    },
    {
      "epoch": 1.8514405762304922,
      "grad_norm": 0.13465648889541626,
      "learning_rate": 3.355646466811113e-06,
      "loss": 0.3306,
      "step": 6169
    },
    {
      "epoch": 1.8517406962785115,
      "grad_norm": 0.13510553538799286,
      "learning_rate": 3.34220507881593e-06,
      "loss": 0.3206,
      "step": 6170
    },
    {
      "epoch": 1.8520408163265305,
      "grad_norm": 0.13174894452095032,
      "learning_rate": 3.3287902077784317e-06,
      "loss": 0.285,
      "step": 6171
    },
    {
      "epoch": 1.8523409363745498,
      "grad_norm": 0.13492131233215332,
      "learning_rate": 3.3154018573788528e-06,
      "loss": 0.3102,
      "step": 6172
    },
    {
      "epoch": 1.852641056422569,
      "grad_norm": 0.1444476842880249,
      "learning_rate": 3.3020400312901324e-06,
      "loss": 0.3472,
      "step": 6173
    },
    {
      "epoch": 1.8529411764705883,
      "grad_norm": 0.6041731238365173,
      "learning_rate": 3.28870473317795e-06,
      "loss": 0.3192,
      "step": 6174
    },
    {
      "epoch": 1.8532412965186076,
      "grad_norm": 0.14589573442935944,
      "learning_rate": 3.2753959667006673e-06,
      "loss": 0.3472,
      "step": 6175
    },
    {
      "epoch": 1.8535414165666266,
      "grad_norm": 0.13637158274650574,
      "learning_rate": 3.2621137355093756e-06,
      "loss": 0.3383,
      "step": 6176
    },
    {
      "epoch": 1.853841536614646,
      "grad_norm": 0.14120420813560486,
      "learning_rate": 3.24885804324796e-06,
      "loss": 0.3095,
      "step": 6177
    },
    {
      "epoch": 1.854141656662665,
      "grad_norm": 0.13430336117744446,
      "learning_rate": 3.2356288935529335e-06,
      "loss": 0.3196,
      "step": 6178
    },
    {
      "epoch": 1.8544417767106842,
      "grad_norm": 0.12128697335720062,
      "learning_rate": 3.2224262900535483e-06,
      "loss": 0.2697,
      "step": 6179
    },
    {
      "epoch": 1.8547418967587035,
      "grad_norm": 0.12741613388061523,
      "learning_rate": 3.209250236371797e-06,
      "loss": 0.2973,
      "step": 6180
    },
    {
      "epoch": 1.8550420168067228,
      "grad_norm": 0.13828550279140472,
      "learning_rate": 3.1961007361223983e-06,
      "loss": 0.3176,
      "step": 6181
    },
    {
      "epoch": 1.855342136854742,
      "grad_norm": 0.14088614284992218,
      "learning_rate": 3.1829777929127447e-06,
      "loss": 0.3294,
      "step": 6182
    },
    {
      "epoch": 1.855642256902761,
      "grad_norm": 0.13326530158519745,
      "learning_rate": 3.1698814103429895e-06,
      "loss": 0.3191,
      "step": 6183
    },
    {
      "epoch": 1.8559423769507803,
      "grad_norm": 0.13025276362895966,
      "learning_rate": 3.156811592005937e-06,
      "loss": 0.3197,
      "step": 6184
    },
    {
      "epoch": 1.8562424969987994,
      "grad_norm": 0.14010116457939148,
      "learning_rate": 3.143768341487163e-06,
      "loss": 0.3246,
      "step": 6185
    },
    {
      "epoch": 1.8565426170468187,
      "grad_norm": 0.1410944014787674,
      "learning_rate": 3.13075166236495e-06,
      "loss": 0.3408,
      "step": 6186
    },
    {
      "epoch": 1.856842737094838,
      "grad_norm": 0.17456680536270142,
      "learning_rate": 3.1177615582102528e-06,
      "loss": 0.3391,
      "step": 6187
    },
    {
      "epoch": 1.8571428571428572,
      "grad_norm": 0.12378163635730743,
      "learning_rate": 3.1047980325867643e-06,
      "loss": 0.2792,
      "step": 6188
    },
    {
      "epoch": 1.8574429771908765,
      "grad_norm": 0.13946925103664398,
      "learning_rate": 3.091861089050874e-06,
      "loss": 0.334,
      "step": 6189
    },
    {
      "epoch": 1.8577430972388955,
      "grad_norm": 0.14545568823814392,
      "learning_rate": 3.0789507311516864e-06,
      "loss": 0.3196,
      "step": 6190
    },
    {
      "epoch": 1.8580432172869148,
      "grad_norm": 0.13843731582164764,
      "learning_rate": 3.0660669624310245e-06,
      "loss": 0.33,
      "step": 6191
    },
    {
      "epoch": 1.8583433373349338,
      "grad_norm": 0.12731510400772095,
      "learning_rate": 3.053209786423372e-06,
      "loss": 0.2945,
      "step": 6192
    },
    {
      "epoch": 1.8586434573829531,
      "grad_norm": 0.1322941929101944,
      "learning_rate": 3.0403792066559744e-06,
      "loss": 0.2708,
      "step": 6193
    },
    {
      "epoch": 1.8589435774309724,
      "grad_norm": 0.12273052334785461,
      "learning_rate": 3.027575226648749e-06,
      "loss": 0.2851,
      "step": 6194
    },
    {
      "epoch": 1.8592436974789917,
      "grad_norm": 0.13278886675834656,
      "learning_rate": 3.014797849914319e-06,
      "loss": 0.304,
      "step": 6195
    },
    {
      "epoch": 1.859543817527011,
      "grad_norm": 0.1393134891986847,
      "learning_rate": 3.0020470799580146e-06,
      "loss": 0.3533,
      "step": 6196
    },
    {
      "epoch": 1.85984393757503,
      "grad_norm": 0.14386825263500214,
      "learning_rate": 2.9893229202778374e-06,
      "loss": 0.3234,
      "step": 6197
    },
    {
      "epoch": 1.8601440576230492,
      "grad_norm": 0.13732436299324036,
      "learning_rate": 2.9766253743645502e-06,
      "loss": 0.3315,
      "step": 6198
    },
    {
      "epoch": 1.8604441776710683,
      "grad_norm": 0.13331647217273712,
      "learning_rate": 2.9639544457015666e-06,
      "loss": 0.3119,
      "step": 6199
    },
    {
      "epoch": 1.8607442977190876,
      "grad_norm": 0.14015549421310425,
      "learning_rate": 2.9513101377650175e-06,
      "loss": 0.3078,
      "step": 6200
    },
    {
      "epoch": 1.8610444177671068,
      "grad_norm": 0.1331723928451538,
      "learning_rate": 2.9386924540236948e-06,
      "loss": 0.3048,
      "step": 6201
    },
    {
      "epoch": 1.861344537815126,
      "grad_norm": 0.14064151048660278,
      "learning_rate": 2.9261013979391407e-06,
      "loss": 0.3428,
      "step": 6202
    },
    {
      "epoch": 1.8616446578631454,
      "grad_norm": 0.14177489280700684,
      "learning_rate": 2.9135369729655583e-06,
      "loss": 0.351,
      "step": 6203
    },
    {
      "epoch": 1.8619447779111644,
      "grad_norm": 0.1311153918504715,
      "learning_rate": 2.9009991825498684e-06,
      "loss": 0.2902,
      "step": 6204
    },
    {
      "epoch": 1.8622448979591837,
      "grad_norm": 0.137837752699852,
      "learning_rate": 2.888488030131653e-06,
      "loss": 0.3241,
      "step": 6205
    },
    {
      "epoch": 1.8625450180072027,
      "grad_norm": 0.13662120699882507,
      "learning_rate": 2.8760035191432e-06,
      "loss": 0.3118,
      "step": 6206
    },
    {
      "epoch": 1.862845138055222,
      "grad_norm": 0.13921856880187988,
      "learning_rate": 2.863545653009525e-06,
      "loss": 0.3412,
      "step": 6207
    },
    {
      "epoch": 1.8631452581032413,
      "grad_norm": 0.13769812881946564,
      "learning_rate": 2.851114435148261e-06,
      "loss": 0.3458,
      "step": 6208
    },
    {
      "epoch": 1.8634453781512605,
      "grad_norm": 0.12593281269073486,
      "learning_rate": 2.83870986896978e-06,
      "loss": 0.2957,
      "step": 6209
    },
    {
      "epoch": 1.8637454981992798,
      "grad_norm": 0.13339969515800476,
      "learning_rate": 2.8263319578771485e-06,
      "loss": 0.307,
      "step": 6210
    },
    {
      "epoch": 1.864045618247299,
      "grad_norm": 0.11993400007486343,
      "learning_rate": 2.813980705266095e-06,
      "loss": 0.2759,
      "step": 6211
    },
    {
      "epoch": 1.8643457382953181,
      "grad_norm": 0.13781361281871796,
      "learning_rate": 2.801656114525031e-06,
      "loss": 0.3271,
      "step": 6212
    },
    {
      "epoch": 1.8646458583433372,
      "grad_norm": 0.12544366717338562,
      "learning_rate": 2.789358189035096e-06,
      "loss": 0.2687,
      "step": 6213
    },
    {
      "epoch": 1.8649459783913565,
      "grad_norm": 0.14004136621952057,
      "learning_rate": 2.777086932170048e-06,
      "loss": 0.3392,
      "step": 6214
    },
    {
      "epoch": 1.8652460984393757,
      "grad_norm": 0.12872520089149475,
      "learning_rate": 2.7648423472963927e-06,
      "loss": 0.318,
      "step": 6215
    },
    {
      "epoch": 1.865546218487395,
      "grad_norm": 0.13727736473083496,
      "learning_rate": 2.752624437773299e-06,
      "loss": 0.331,
      "step": 6216
    },
    {
      "epoch": 1.8658463385354143,
      "grad_norm": 0.12658123672008514,
      "learning_rate": 2.740433206952575e-06,
      "loss": 0.2883,
      "step": 6217
    },
    {
      "epoch": 1.8661464585834335,
      "grad_norm": 0.13739939033985138,
      "learning_rate": 2.7282686581787674e-06,
      "loss": 0.3244,
      "step": 6218
    },
    {
      "epoch": 1.8664465786314526,
      "grad_norm": 0.1341625303030014,
      "learning_rate": 2.7161307947890957e-06,
      "loss": 0.2999,
      "step": 6219
    },
    {
      "epoch": 1.8667466986794716,
      "grad_norm": 0.12846803665161133,
      "learning_rate": 2.704019620113407e-06,
      "loss": 0.311,
      "step": 6220
    },
    {
      "epoch": 1.867046818727491,
      "grad_norm": 0.1295066922903061,
      "learning_rate": 2.6919351374743e-06,
      "loss": 0.2861,
      "step": 6221
    },
    {
      "epoch": 1.8673469387755102,
      "grad_norm": 0.13613493740558624,
      "learning_rate": 2.6798773501869878e-06,
      "loss": 0.3222,
      "step": 6222
    },
    {
      "epoch": 1.8676470588235294,
      "grad_norm": 0.14230984449386597,
      "learning_rate": 2.6678462615593925e-06,
      "loss": 0.3588,
      "step": 6223
    },
    {
      "epoch": 1.8679471788715487,
      "grad_norm": 0.13602472841739655,
      "learning_rate": 2.6558418748921177e-06,
      "loss": 0.2879,
      "step": 6224
    },
    {
      "epoch": 1.868247298919568,
      "grad_norm": 0.14377835392951965,
      "learning_rate": 2.643864193478407e-06,
      "loss": 0.3034,
      "step": 6225
    },
    {
      "epoch": 1.868547418967587,
      "grad_norm": 0.13924340903759003,
      "learning_rate": 2.6319132206042206e-06,
      "loss": 0.3208,
      "step": 6226
    },
    {
      "epoch": 1.8688475390156063,
      "grad_norm": 0.12001512944698334,
      "learning_rate": 2.6199889595481584e-06,
      "loss": 0.2733,
      "step": 6227
    },
    {
      "epoch": 1.8691476590636253,
      "grad_norm": 0.14791560173034668,
      "learning_rate": 2.608091413581504e-06,
      "loss": 0.3565,
      "step": 6228
    },
    {
      "epoch": 1.8694477791116446,
      "grad_norm": 0.1310638189315796,
      "learning_rate": 2.5962205859682343e-06,
      "loss": 0.3036,
      "step": 6229
    },
    {
      "epoch": 1.8697478991596639,
      "grad_norm": 0.1368735432624817,
      "learning_rate": 2.584376479964945e-06,
      "loss": 0.316,
      "step": 6230
    },
    {
      "epoch": 1.8700480192076832,
      "grad_norm": 0.17357869446277618,
      "learning_rate": 2.572559098820937e-06,
      "loss": 0.3233,
      "step": 6231
    },
    {
      "epoch": 1.8703481392557024,
      "grad_norm": 0.14382725954055786,
      "learning_rate": 2.5607684457782055e-06,
      "loss": 0.3314,
      "step": 6232
    },
    {
      "epoch": 1.8706482593037215,
      "grad_norm": 0.13554109632968903,
      "learning_rate": 2.54900452407133e-06,
      "loss": 0.331,
      "step": 6233
    },
    {
      "epoch": 1.8709483793517407,
      "grad_norm": 0.12987254559993744,
      "learning_rate": 2.5372673369276514e-06,
      "loss": 0.275,
      "step": 6234
    },
    {
      "epoch": 1.8712484993997598,
      "grad_norm": 0.13484419882297516,
      "learning_rate": 2.5255568875671042e-06,
      "loss": 0.3219,
      "step": 6235
    },
    {
      "epoch": 1.871548619447779,
      "grad_norm": 0.1299813985824585,
      "learning_rate": 2.5138731792023197e-06,
      "loss": 0.296,
      "step": 6236
    },
    {
      "epoch": 1.8718487394957983,
      "grad_norm": 0.13492323458194733,
      "learning_rate": 2.5022162150386107e-06,
      "loss": 0.3178,
      "step": 6237
    },
    {
      "epoch": 1.8721488595438176,
      "grad_norm": 0.13738395273685455,
      "learning_rate": 2.490585998273909e-06,
      "loss": 0.3024,
      "step": 6238
    },
    {
      "epoch": 1.8724489795918369,
      "grad_norm": 0.13431774079799652,
      "learning_rate": 2.478982532098828e-06,
      "loss": 0.3195,
      "step": 6239
    },
    {
      "epoch": 1.872749099639856,
      "grad_norm": 0.14140775799751282,
      "learning_rate": 2.4674058196966663e-06,
      "loss": 0.3145,
      "step": 6240
    },
    {
      "epoch": 1.8730492196878752,
      "grad_norm": 0.1380709409713745,
      "learning_rate": 2.4558558642433615e-06,
      "loss": 0.3109,
      "step": 6241
    },
    {
      "epoch": 1.8733493397358942,
      "grad_norm": 0.12591363489627838,
      "learning_rate": 2.44433266890749e-06,
      "loss": 0.2855,
      "step": 6242
    },
    {
      "epoch": 1.8736494597839135,
      "grad_norm": 0.14441455900669098,
      "learning_rate": 2.432836236850322e-06,
      "loss": 0.3523,
      "step": 6243
    },
    {
      "epoch": 1.8739495798319328,
      "grad_norm": 0.1465701162815094,
      "learning_rate": 2.421366571225769e-06,
      "loss": 0.3623,
      "step": 6244
    },
    {
      "epoch": 1.874249699879952,
      "grad_norm": 0.13750213384628296,
      "learning_rate": 2.409923675180403e-06,
      "loss": 0.3363,
      "step": 6245
    },
    {
      "epoch": 1.8745498199279713,
      "grad_norm": 0.12628985941410065,
      "learning_rate": 2.3985075518534682e-06,
      "loss": 0.295,
      "step": 6246
    },
    {
      "epoch": 1.8748499399759904,
      "grad_norm": 0.13401252031326294,
      "learning_rate": 2.387118204376804e-06,
      "loss": 0.3156,
      "step": 6247
    },
    {
      "epoch": 1.8751500600240096,
      "grad_norm": 0.1269846260547638,
      "learning_rate": 2.375755635874988e-06,
      "loss": 0.2952,
      "step": 6248
    },
    {
      "epoch": 1.8754501800720287,
      "grad_norm": 0.13198496401309967,
      "learning_rate": 2.364419849465205e-06,
      "loss": 0.296,
      "step": 6249
    },
    {
      "epoch": 1.875750300120048,
      "grad_norm": 0.13994579017162323,
      "learning_rate": 2.353110848257267e-06,
      "loss": 0.3445,
      "step": 6250
    },
    {
      "epoch": 1.8760504201680672,
      "grad_norm": 0.21288661658763885,
      "learning_rate": 2.3418286353537154e-06,
      "loss": 0.3113,
      "step": 6251
    },
    {
      "epoch": 1.8763505402160865,
      "grad_norm": 0.13985422253608704,
      "learning_rate": 2.3305732138496404e-06,
      "loss": 0.3039,
      "step": 6252
    },
    {
      "epoch": 1.8766506602641058,
      "grad_norm": 0.13241468369960785,
      "learning_rate": 2.3193445868328944e-06,
      "loss": 0.3079,
      "step": 6253
    },
    {
      "epoch": 1.8769507803121248,
      "grad_norm": 0.13896723091602325,
      "learning_rate": 2.308142757383902e-06,
      "loss": 0.3081,
      "step": 6254
    },
    {
      "epoch": 1.877250900360144,
      "grad_norm": 0.14832334220409393,
      "learning_rate": 2.2969677285757385e-06,
      "loss": 0.3642,
      "step": 6255
    },
    {
      "epoch": 1.8775510204081631,
      "grad_norm": 0.1311514973640442,
      "learning_rate": 2.2858195034741626e-06,
      "loss": 0.317,
      "step": 6256
    },
    {
      "epoch": 1.8778511404561824,
      "grad_norm": 0.28102830052375793,
      "learning_rate": 2.274698085137561e-06,
      "loss": 0.3163,
      "step": 6257
    },
    {
      "epoch": 1.8781512605042017,
      "grad_norm": 0.1358410269021988,
      "learning_rate": 2.26360347661696e-06,
      "loss": 0.3264,
      "step": 6258
    },
    {
      "epoch": 1.878451380552221,
      "grad_norm": 0.14667531847953796,
      "learning_rate": 2.2525356809560472e-06,
      "loss": 0.3245,
      "step": 6259
    },
    {
      "epoch": 1.8787515006002402,
      "grad_norm": 0.12747597694396973,
      "learning_rate": 2.241494701191127e-06,
      "loss": 0.306,
      "step": 6260
    },
    {
      "epoch": 1.8790516206482593,
      "grad_norm": 0.13438037037849426,
      "learning_rate": 2.2304805403511873e-06,
      "loss": 0.3146,
      "step": 6261
    },
    {
      "epoch": 1.8793517406962785,
      "grad_norm": 0.14660976827144623,
      "learning_rate": 2.219493201457834e-06,
      "loss": 0.3551,
      "step": 6262
    },
    {
      "epoch": 1.8796518607442976,
      "grad_norm": 0.13732437789440155,
      "learning_rate": 2.208532687525311e-06,
      "loss": 0.3265,
      "step": 6263
    },
    {
      "epoch": 1.8799519807923168,
      "grad_norm": 0.1420273333787918,
      "learning_rate": 2.197599001560502e-06,
      "loss": 0.3023,
      "step": 6264
    },
    {
      "epoch": 1.8802521008403361,
      "grad_norm": 0.15207207202911377,
      "learning_rate": 2.186692146562963e-06,
      "loss": 0.3678,
      "step": 6265
    },
    {
      "epoch": 1.8805522208883554,
      "grad_norm": 0.13141755759716034,
      "learning_rate": 2.175812125524834e-06,
      "loss": 0.3135,
      "step": 6266
    },
    {
      "epoch": 1.8808523409363747,
      "grad_norm": 0.15551365911960602,
      "learning_rate": 2.16495894143095e-06,
      "loss": 0.3508,
      "step": 6267
    },
    {
      "epoch": 1.8811524609843937,
      "grad_norm": 0.1405211091041565,
      "learning_rate": 2.154132597258729e-06,
      "loss": 0.3328,
      "step": 6268
    },
    {
      "epoch": 1.881452581032413,
      "grad_norm": 0.13350637257099152,
      "learning_rate": 2.143333095978284e-06,
      "loss": 0.3246,
      "step": 6269
    },
    {
      "epoch": 1.881752701080432,
      "grad_norm": 0.13670623302459717,
      "learning_rate": 2.1325604405523334e-06,
      "loss": 0.3061,
      "step": 6270
    },
    {
      "epoch": 1.8820528211284513,
      "grad_norm": 0.1253797709941864,
      "learning_rate": 2.1218146339362143e-06,
      "loss": 0.2961,
      "step": 6271
    },
    {
      "epoch": 1.8823529411764706,
      "grad_norm": 0.133013516664505,
      "learning_rate": 2.1110956790779123e-06,
      "loss": 0.325,
      "step": 6272
    },
    {
      "epoch": 1.8826530612244898,
      "grad_norm": 0.13871300220489502,
      "learning_rate": 2.100403578918053e-06,
      "loss": 0.3338,
      "step": 6273
    },
    {
      "epoch": 1.882953181272509,
      "grad_norm": 0.13345804810523987,
      "learning_rate": 2.0897383363899124e-06,
      "loss": 0.3085,
      "step": 6274
    },
    {
      "epoch": 1.8832533013205284,
      "grad_norm": 0.1403323858976364,
      "learning_rate": 2.079099954419361e-06,
      "loss": 0.3655,
      "step": 6275
    },
    {
      "epoch": 1.8835534213685474,
      "grad_norm": 0.13470247387886047,
      "learning_rate": 2.06848843592492e-06,
      "loss": 0.3061,
      "step": 6276
    },
    {
      "epoch": 1.8838535414165665,
      "grad_norm": 0.15145735442638397,
      "learning_rate": 2.0579037838177164e-06,
      "loss": 0.3408,
      "step": 6277
    },
    {
      "epoch": 1.8841536614645857,
      "grad_norm": 0.13776715099811554,
      "learning_rate": 2.047346001001571e-06,
      "loss": 0.3192,
      "step": 6278
    },
    {
      "epoch": 1.884453781512605,
      "grad_norm": 0.14677351713180542,
      "learning_rate": 2.0368150903728677e-06,
      "loss": 0.3179,
      "step": 6279
    },
    {
      "epoch": 1.8847539015606243,
      "grad_norm": 0.14180903136730194,
      "learning_rate": 2.026311054820629e-06,
      "loss": 0.3353,
      "step": 6280
    },
    {
      "epoch": 1.8850540216086435,
      "grad_norm": 0.13480877876281738,
      "learning_rate": 2.015833897226538e-06,
      "loss": 0.3031,
      "step": 6281
    },
    {
      "epoch": 1.8853541416566628,
      "grad_norm": 0.13390487432479858,
      "learning_rate": 2.0053836204648625e-06,
      "loss": 0.3205,
      "step": 6282
    },
    {
      "epoch": 1.8856542617046819,
      "grad_norm": 0.13204741477966309,
      "learning_rate": 1.9949602274025424e-06,
      "loss": 0.3097,
      "step": 6283
    },
    {
      "epoch": 1.885954381752701,
      "grad_norm": 0.1440712958574295,
      "learning_rate": 1.984563720899091e-06,
      "loss": 0.35,
      "step": 6284
    },
    {
      "epoch": 1.8862545018007202,
      "grad_norm": 0.1389112025499344,
      "learning_rate": 1.9741941038066815e-06,
      "loss": 0.3161,
      "step": 6285
    },
    {
      "epoch": 1.8865546218487395,
      "grad_norm": 0.1399288922548294,
      "learning_rate": 1.963851378970094e-06,
      "loss": 0.3146,
      "step": 6286
    },
    {
      "epoch": 1.8868547418967587,
      "grad_norm": 0.13302282989025116,
      "learning_rate": 1.9535355492267483e-06,
      "loss": 0.3035,
      "step": 6287
    },
    {
      "epoch": 1.887154861944778,
      "grad_norm": 0.13178716599941254,
      "learning_rate": 1.943246617406669e-06,
      "loss": 0.2989,
      "step": 6288
    },
    {
      "epoch": 1.8874549819927973,
      "grad_norm": 0.1290881186723709,
      "learning_rate": 1.932984586332487e-06,
      "loss": 0.2986,
      "step": 6289
    },
    {
      "epoch": 1.8877551020408163,
      "grad_norm": 0.13396479189395905,
      "learning_rate": 1.922749458819506e-06,
      "loss": 0.322,
      "step": 6290
    },
    {
      "epoch": 1.8880552220888356,
      "grad_norm": 0.1531609743833542,
      "learning_rate": 1.9125412376755912e-06,
      "loss": 0.2845,
      "step": 6291
    },
    {
      "epoch": 1.8883553421368546,
      "grad_norm": 0.1545073240995407,
      "learning_rate": 1.9023599257012692e-06,
      "loss": 0.3098,
      "step": 6292
    },
    {
      "epoch": 1.888655462184874,
      "grad_norm": 0.15298312902450562,
      "learning_rate": 1.8922055256896499e-06,
      "loss": 0.3225,
      "step": 6293
    },
    {
      "epoch": 1.8889555822328932,
      "grad_norm": 0.13482391834259033,
      "learning_rate": 1.8820780404264827e-06,
      "loss": 0.3218,
      "step": 6294
    },
    {
      "epoch": 1.8892557022809124,
      "grad_norm": 0.1857229620218277,
      "learning_rate": 1.871977472690134e-06,
      "loss": 0.3341,
      "step": 6295
    },
    {
      "epoch": 1.8895558223289317,
      "grad_norm": 0.13722006976604462,
      "learning_rate": 1.8619038252515653e-06,
      "loss": 0.3266,
      "step": 6296
    },
    {
      "epoch": 1.8898559423769508,
      "grad_norm": 0.13641677796840668,
      "learning_rate": 1.8518571008743769e-06,
      "loss": 0.3169,
      "step": 6297
    },
    {
      "epoch": 1.89015606242497,
      "grad_norm": 0.1337769478559494,
      "learning_rate": 1.8418373023147639e-06,
      "loss": 0.3164,
      "step": 6298
    },
    {
      "epoch": 1.890456182472989,
      "grad_norm": 0.14053909480571747,
      "learning_rate": 1.83184443232155e-06,
      "loss": 0.3049,
      "step": 6299
    },
    {
      "epoch": 1.8907563025210083,
      "grad_norm": 0.1474149376153946,
      "learning_rate": 1.8218784936361644e-06,
      "loss": 0.3539,
      "step": 6300
    },
    {
      "epoch": 1.8910564225690276,
      "grad_norm": 0.1520894169807434,
      "learning_rate": 1.8119394889926532e-06,
      "loss": 0.3675,
      "step": 6301
    },
    {
      "epoch": 1.8913565426170469,
      "grad_norm": 0.141342893242836,
      "learning_rate": 1.8020274211176469e-06,
      "loss": 0.3524,
      "step": 6302
    },
    {
      "epoch": 1.8916566626650662,
      "grad_norm": 0.14925958216190338,
      "learning_rate": 1.7921422927304254e-06,
      "loss": 0.3447,
      "step": 6303
    },
    {
      "epoch": 1.8919567827130852,
      "grad_norm": 0.13178478181362152,
      "learning_rate": 1.782284106542864e-06,
      "loss": 0.277,
      "step": 6304
    },
    {
      "epoch": 1.8922569027611045,
      "grad_norm": 0.12817543745040894,
      "learning_rate": 1.772452865259433e-06,
      "loss": 0.3033,
      "step": 6305
    },
    {
      "epoch": 1.8925570228091235,
      "grad_norm": 0.13969522714614868,
      "learning_rate": 1.762648571577219e-06,
      "loss": 0.3311,
      "step": 6306
    },
    {
      "epoch": 1.8928571428571428,
      "grad_norm": 0.13275279104709625,
      "learning_rate": 1.752871228185926e-06,
      "loss": 0.3204,
      "step": 6307
    },
    {
      "epoch": 1.893157262905162,
      "grad_norm": 0.1373925358057022,
      "learning_rate": 1.7431208377678531e-06,
      "loss": 0.326,
      "step": 6308
    },
    {
      "epoch": 1.8934573829531813,
      "grad_norm": 0.1355675309896469,
      "learning_rate": 1.733397402997916e-06,
      "loss": 0.3346,
      "step": 6309
    },
    {
      "epoch": 1.8937575030012006,
      "grad_norm": 0.12994515895843506,
      "learning_rate": 1.7237009265436032e-06,
      "loss": 0.2927,
      "step": 6310
    },
    {
      "epoch": 1.8940576230492197,
      "grad_norm": 0.1230861097574234,
      "learning_rate": 1.7140314110650535e-06,
      "loss": 0.2682,
      "step": 6311
    },
    {
      "epoch": 1.894357743097239,
      "grad_norm": 0.14534206688404083,
      "learning_rate": 1.704388859214978e-06,
      "loss": 0.3348,
      "step": 6312
    },
    {
      "epoch": 1.894657863145258,
      "grad_norm": 0.1521633118391037,
      "learning_rate": 1.6947732736387168e-06,
      "loss": 0.3034,
      "step": 6313
    },
    {
      "epoch": 1.8949579831932772,
      "grad_norm": 0.12484516948461533,
      "learning_rate": 1.6851846569741813e-06,
      "loss": 0.2813,
      "step": 6314
    },
    {
      "epoch": 1.8952581032412965,
      "grad_norm": 0.15173867344856262,
      "learning_rate": 1.675623011851879e-06,
      "loss": 0.3582,
      "step": 6315
    },
    {
      "epoch": 1.8955582232893158,
      "grad_norm": 0.12107089906930923,
      "learning_rate": 1.6660883408949778e-06,
      "loss": 0.2877,
      "step": 6316
    },
    {
      "epoch": 1.895858343337335,
      "grad_norm": 0.1327318549156189,
      "learning_rate": 1.6565806467191859e-06,
      "loss": 0.315,
      "step": 6317
    },
    {
      "epoch": 1.896158463385354,
      "grad_norm": 0.14046049118041992,
      "learning_rate": 1.6470999319328161e-06,
      "loss": 0.3211,
      "step": 6318
    },
    {
      "epoch": 1.8964585834333734,
      "grad_norm": 0.13437533378601074,
      "learning_rate": 1.6376461991368218e-06,
      "loss": 0.2932,
      "step": 6319
    },
    {
      "epoch": 1.8967587034813924,
      "grad_norm": 0.1479692906141281,
      "learning_rate": 1.6282194509247063e-06,
      "loss": 0.3447,
      "step": 6320
    },
    {
      "epoch": 1.8970588235294117,
      "grad_norm": 0.1407313048839569,
      "learning_rate": 1.6188196898826003e-06,
      "loss": 0.3301,
      "step": 6321
    },
    {
      "epoch": 1.897358943577431,
      "grad_norm": 0.1369362771511078,
      "learning_rate": 1.6094469185892191e-06,
      "loss": 0.3149,
      "step": 6322
    },
    {
      "epoch": 1.8976590636254502,
      "grad_norm": 3.8090240955352783,
      "learning_rate": 1.6001011396158617e-06,
      "loss": 0.3424,
      "step": 6323
    },
    {
      "epoch": 1.8979591836734695,
      "grad_norm": 0.14959876239299774,
      "learning_rate": 1.5907823555264434e-06,
      "loss": 0.3371,
      "step": 6324
    },
    {
      "epoch": 1.8982593037214885,
      "grad_norm": 0.13633541762828827,
      "learning_rate": 1.581490568877475e-06,
      "loss": 0.325,
      "step": 6325
    },
    {
      "epoch": 1.8985594237695078,
      "grad_norm": 0.13324010372161865,
      "learning_rate": 1.572225782218051e-06,
      "loss": 0.312,
      "step": 6326
    },
    {
      "epoch": 1.8988595438175269,
      "grad_norm": 0.20001792907714844,
      "learning_rate": 1.5629879980898376e-06,
      "loss": 0.3349,
      "step": 6327
    },
    {
      "epoch": 1.8991596638655461,
      "grad_norm": 0.13812509179115295,
      "learning_rate": 1.5537772190271416e-06,
      "loss": 0.3196,
      "step": 6328
    },
    {
      "epoch": 1.8994597839135654,
      "grad_norm": 0.14415699243545532,
      "learning_rate": 1.5445934475568192e-06,
      "loss": 0.3431,
      "step": 6329
    },
    {
      "epoch": 1.8997599039615847,
      "grad_norm": 0.15070272982120514,
      "learning_rate": 1.5354366861983438e-06,
      "loss": 0.3123,
      "step": 6330
    },
    {
      "epoch": 1.900060024009604,
      "grad_norm": 0.13872075080871582,
      "learning_rate": 1.5263069374637507e-06,
      "loss": 0.3242,
      "step": 6331
    },
    {
      "epoch": 1.9003601440576232,
      "grad_norm": 0.13535228371620178,
      "learning_rate": 1.5172042038577028e-06,
      "loss": 0.3043,
      "step": 6332
    },
    {
      "epoch": 1.9006602641056423,
      "grad_norm": 0.13848936557769775,
      "learning_rate": 1.5081284878774138e-06,
      "loss": 0.3087,
      "step": 6333
    },
    {
      "epoch": 1.9009603841536613,
      "grad_norm": 0.14651234447956085,
      "learning_rate": 1.4990797920127141e-06,
      "loss": 0.3517,
      "step": 6334
    },
    {
      "epoch": 1.9012605042016806,
      "grad_norm": 0.14521829783916473,
      "learning_rate": 1.4900581187459961e-06,
      "loss": 0.3502,
      "step": 6335
    },
    {
      "epoch": 1.9015606242496998,
      "grad_norm": 0.13988415896892548,
      "learning_rate": 1.4810634705522686e-06,
      "loss": 0.2946,
      "step": 6336
    },
    {
      "epoch": 1.9018607442977191,
      "grad_norm": 0.1395883411169052,
      "learning_rate": 1.472095849899091e-06,
      "loss": 0.3162,
      "step": 6337
    },
    {
      "epoch": 1.9021608643457384,
      "grad_norm": 0.13737809658050537,
      "learning_rate": 1.4631552592466514e-06,
      "loss": 0.3351,
      "step": 6338
    },
    {
      "epoch": 1.9024609843937577,
      "grad_norm": 0.13726972043514252,
      "learning_rate": 1.4542417010476873e-06,
      "loss": 0.3002,
      "step": 6339
    },
    {
      "epoch": 1.9027611044417767,
      "grad_norm": 0.17059005796909332,
      "learning_rate": 1.4453551777475094e-06,
      "loss": 0.4004,
      "step": 6340
    },
    {
      "epoch": 1.9030612244897958,
      "grad_norm": 0.14700105786323547,
      "learning_rate": 1.4364956917840678e-06,
      "loss": 0.3654,
      "step": 6341
    },
    {
      "epoch": 1.903361344537815,
      "grad_norm": 0.1453956961631775,
      "learning_rate": 1.4276632455878403e-06,
      "loss": 0.3261,
      "step": 6342
    },
    {
      "epoch": 1.9036614645858343,
      "grad_norm": 0.1321887969970703,
      "learning_rate": 1.418857841581922e-06,
      "loss": 0.2817,
      "step": 6343
    },
    {
      "epoch": 1.9039615846338536,
      "grad_norm": 0.13092279434204102,
      "learning_rate": 1.4100794821819585e-06,
      "loss": 0.2898,
      "step": 6344
    },
    {
      "epoch": 1.9042617046818728,
      "grad_norm": 0.130708247423172,
      "learning_rate": 1.40132816979619e-06,
      "loss": 0.3026,
      "step": 6345
    },
    {
      "epoch": 1.904561824729892,
      "grad_norm": 0.12597669661045074,
      "learning_rate": 1.3926039068254626e-06,
      "loss": 0.2967,
      "step": 6346
    },
    {
      "epoch": 1.9048619447779112,
      "grad_norm": 0.14716726541519165,
      "learning_rate": 1.383906695663173e-06,
      "loss": 0.3724,
      "step": 6347
    },
    {
      "epoch": 1.9051620648259304,
      "grad_norm": 0.13780756294727325,
      "learning_rate": 1.3752365386952681e-06,
      "loss": 0.3179,
      "step": 6348
    },
    {
      "epoch": 1.9054621848739495,
      "grad_norm": 0.1345573216676712,
      "learning_rate": 1.3665934383003343e-06,
      "loss": 0.3294,
      "step": 6349
    },
    {
      "epoch": 1.9057623049219687,
      "grad_norm": 0.13241131603717804,
      "learning_rate": 1.3579773968495191e-06,
      "loss": 0.2994,
      "step": 6350
    },
    {
      "epoch": 1.906062424969988,
      "grad_norm": 0.13944432139396667,
      "learning_rate": 1.3493884167064986e-06,
      "loss": 0.3159,
      "step": 6351
    },
    {
      "epoch": 1.9063625450180073,
      "grad_norm": 0.14155784249305725,
      "learning_rate": 1.3408265002275877e-06,
      "loss": 0.3411,
      "step": 6352
    },
    {
      "epoch": 1.9066626650660266,
      "grad_norm": 0.14534442126750946,
      "learning_rate": 1.332291649761641e-06,
      "loss": 0.3306,
      "step": 6353
    },
    {
      "epoch": 1.9069627851140456,
      "grad_norm": 0.13481180369853973,
      "learning_rate": 1.323783867650097e-06,
      "loss": 0.3298,
      "step": 6354
    },
    {
      "epoch": 1.9072629051620649,
      "grad_norm": 0.12702417373657227,
      "learning_rate": 1.3153031562269768e-06,
      "loss": 0.2922,
      "step": 6355
    },
    {
      "epoch": 1.907563025210084,
      "grad_norm": 0.28436294198036194,
      "learning_rate": 1.3068495178188533e-06,
      "loss": 0.3397,
      "step": 6356
    },
    {
      "epoch": 1.9078631452581032,
      "grad_norm": 0.1368831992149353,
      "learning_rate": 1.2984229547448935e-06,
      "loss": 0.3242,
      "step": 6357
    },
    {
      "epoch": 1.9081632653061225,
      "grad_norm": 0.14487719535827637,
      "learning_rate": 1.2900234693168255e-06,
      "loss": 0.3448,
      "step": 6358
    },
    {
      "epoch": 1.9084633853541417,
      "grad_norm": 0.1391216218471527,
      "learning_rate": 1.2816510638389512e-06,
      "loss": 0.3179,
      "step": 6359
    },
    {
      "epoch": 1.908763505402161,
      "grad_norm": 0.12535832822322845,
      "learning_rate": 1.2733057406081438e-06,
      "loss": 0.2929,
      "step": 6360
    },
    {
      "epoch": 1.90906362545018,
      "grad_norm": 0.1655777245759964,
      "learning_rate": 1.2649875019138501e-06,
      "loss": 0.3486,
      "step": 6361
    },
    {
      "epoch": 1.9093637454981993,
      "grad_norm": 0.14286132156848907,
      "learning_rate": 1.256696350038078e-06,
      "loss": 0.3336,
      "step": 6362
    },
    {
      "epoch": 1.9096638655462184,
      "grad_norm": 0.13462306559085846,
      "learning_rate": 1.2484322872554299e-06,
      "loss": 0.314,
      "step": 6363
    },
    {
      "epoch": 1.9099639855942376,
      "grad_norm": 0.1311066746711731,
      "learning_rate": 1.2401953158330148e-06,
      "loss": 0.2993,
      "step": 6364
    },
    {
      "epoch": 1.910264105642257,
      "grad_norm": 0.15170760452747345,
      "learning_rate": 1.2319854380305918e-06,
      "loss": 0.2987,
      "step": 6365
    },
    {
      "epoch": 1.9105642256902762,
      "grad_norm": 0.13527582585811615,
      "learning_rate": 1.2238026561004369e-06,
      "loss": 0.3204,
      "step": 6366
    },
    {
      "epoch": 1.9108643457382954,
      "grad_norm": 0.14560282230377197,
      "learning_rate": 1.2156469722873875e-06,
      "loss": 0.343,
      "step": 6367
    },
    {
      "epoch": 1.9111644657863145,
      "grad_norm": 0.1309434473514557,
      "learning_rate": 1.2075183888288767e-06,
      "loss": 0.292,
      "step": 6368
    },
    {
      "epoch": 1.9114645858343338,
      "grad_norm": 0.12859490513801575,
      "learning_rate": 1.1994169079548756e-06,
      "loss": 0.3092,
      "step": 6369
    },
    {
      "epoch": 1.9117647058823528,
      "grad_norm": 0.13696551322937012,
      "learning_rate": 1.1913425318879511e-06,
      "loss": 0.3166,
      "step": 6370
    },
    {
      "epoch": 1.912064825930372,
      "grad_norm": 0.13910551369190216,
      "learning_rate": 1.1832952628431981e-06,
      "loss": 0.3227,
      "step": 6371
    },
    {
      "epoch": 1.9123649459783914,
      "grad_norm": 0.13507243990898132,
      "learning_rate": 1.175275103028306e-06,
      "loss": 0.3335,
      "step": 6372
    },
    {
      "epoch": 1.9126650660264106,
      "grad_norm": 0.1377187818288803,
      "learning_rate": 1.1672820546435038e-06,
      "loss": 0.3268,
      "step": 6373
    },
    {
      "epoch": 1.91296518607443,
      "grad_norm": 0.14127525687217712,
      "learning_rate": 1.1593161198815927e-06,
      "loss": 0.3476,
      "step": 6374
    },
    {
      "epoch": 1.913265306122449,
      "grad_norm": 0.14496168494224548,
      "learning_rate": 1.1513773009279472e-06,
      "loss": 0.3512,
      "step": 6375
    },
    {
      "epoch": 1.9135654261704682,
      "grad_norm": 0.13064956665039062,
      "learning_rate": 1.1434655999604805e-06,
      "loss": 0.2882,
      "step": 6376
    },
    {
      "epoch": 1.9138655462184873,
      "grad_norm": 0.12650299072265625,
      "learning_rate": 1.1355810191496674e-06,
      "loss": 0.2786,
      "step": 6377
    },
    {
      "epoch": 1.9141656662665065,
      "grad_norm": 0.13515569269657135,
      "learning_rate": 1.1277235606585778e-06,
      "loss": 0.3191,
      "step": 6378
    },
    {
      "epoch": 1.9144657863145258,
      "grad_norm": 0.14777721464633942,
      "learning_rate": 1.1198932266427985e-06,
      "loss": 0.3264,
      "step": 6379
    },
    {
      "epoch": 1.914765906362545,
      "grad_norm": 0.13458096981048584,
      "learning_rate": 1.1120900192505e-06,
      "loss": 0.3169,
      "step": 6380
    },
    {
      "epoch": 1.9150660264105643,
      "grad_norm": 0.1463477462530136,
      "learning_rate": 1.1043139406223813e-06,
      "loss": 0.3233,
      "step": 6381
    },
    {
      "epoch": 1.9153661464585834,
      "grad_norm": 0.13509635627269745,
      "learning_rate": 1.0965649928917466e-06,
      "loss": 0.3077,
      "step": 6382
    },
    {
      "epoch": 1.9156662665066027,
      "grad_norm": 0.12657250463962555,
      "learning_rate": 1.088843178184429e-06,
      "loss": 0.2954,
      "step": 6383
    },
    {
      "epoch": 1.9159663865546217,
      "grad_norm": 0.13948281109333038,
      "learning_rate": 1.0811484986188002e-06,
      "loss": 0.3244,
      "step": 6384
    },
    {
      "epoch": 1.916266506602641,
      "grad_norm": 0.13280120491981506,
      "learning_rate": 1.073480956305839e-06,
      "loss": 0.3264,
      "step": 6385
    },
    {
      "epoch": 1.9165666266506602,
      "grad_norm": 0.1396474540233612,
      "learning_rate": 1.0658405533489956e-06,
      "loss": 0.3166,
      "step": 6386
    },
    {
      "epoch": 1.9168667466986795,
      "grad_norm": 0.13262183964252472,
      "learning_rate": 1.0582272918443825e-06,
      "loss": 0.3328,
      "step": 6387
    },
    {
      "epoch": 1.9171668667466988,
      "grad_norm": 0.14330746233463287,
      "learning_rate": 1.0506411738805845e-06,
      "loss": 0.3334,
      "step": 6388
    },
    {
      "epoch": 1.917466986794718,
      "grad_norm": 0.14604748785495758,
      "learning_rate": 1.0430822015387698e-06,
      "loss": 0.355,
      "step": 6389
    },
    {
      "epoch": 1.917767106842737,
      "grad_norm": 0.1383308321237564,
      "learning_rate": 1.0355503768926466e-06,
      "loss": 0.3327,
      "step": 6390
    },
    {
      "epoch": 1.9180672268907561,
      "grad_norm": 0.13492268323898315,
      "learning_rate": 1.0280457020085067e-06,
      "loss": 0.3161,
      "step": 6391
    },
    {
      "epoch": 1.9183673469387754,
      "grad_norm": 0.13790442049503326,
      "learning_rate": 1.020568178945147e-06,
      "loss": 0.3404,
      "step": 6392
    },
    {
      "epoch": 1.9186674669867947,
      "grad_norm": 0.13623499870300293,
      "learning_rate": 1.0131178097539605e-06,
      "loss": 0.3125,
      "step": 6393
    },
    {
      "epoch": 1.918967587034814,
      "grad_norm": 0.14850133657455444,
      "learning_rate": 1.0056945964788566e-06,
      "loss": 0.3303,
      "step": 6394
    },
    {
      "epoch": 1.9192677070828332,
      "grad_norm": 0.1354813277721405,
      "learning_rate": 9.98298541156306e-07,
      "loss": 0.2941,
      "step": 6395
    },
    {
      "epoch": 1.9195678271308525,
      "grad_norm": 0.12442360073328018,
      "learning_rate": 9.909296458153528e-07,
      "loss": 0.2821,
      "step": 6396
    },
    {
      "epoch": 1.9198679471788715,
      "grad_norm": 0.13599467277526855,
      "learning_rate": 9.83587912477546e-07,
      "loss": 0.3295,
      "step": 6397
    },
    {
      "epoch": 1.9201680672268906,
      "grad_norm": 0.1269826591014862,
      "learning_rate": 9.762733431570192e-07,
      "loss": 0.2964,
      "step": 6398
    },
    {
      "epoch": 1.9204681872749099,
      "grad_norm": 0.1483106166124344,
      "learning_rate": 9.68985939860434e-07,
      "loss": 0.369,
      "step": 6399
    },
    {
      "epoch": 1.9207683073229291,
      "grad_norm": 0.13698875904083252,
      "learning_rate": 9.61725704587002e-07,
      "loss": 0.3383,
      "step": 6400
    },
    {
      "epoch": 1.9210684273709484,
      "grad_norm": 0.14250008761882782,
      "learning_rate": 9.54492639328508e-07,
      "loss": 0.3152,
      "step": 6401
    },
    {
      "epoch": 1.9213685474189677,
      "grad_norm": 0.19880473613739014,
      "learning_rate": 9.472867460692314e-07,
      "loss": 0.3343,
      "step": 6402
    },
    {
      "epoch": 1.921668667466987,
      "grad_norm": 0.14081600308418274,
      "learning_rate": 9.401080267860351e-07,
      "loss": 0.2955,
      "step": 6403
    },
    {
      "epoch": 1.921968787515006,
      "grad_norm": 0.13255588710308075,
      "learning_rate": 9.32956483448344e-07,
      "loss": 0.3268,
      "step": 6404
    },
    {
      "epoch": 1.9222689075630253,
      "grad_norm": 0.15398523211479187,
      "learning_rate": 9.258321180180663e-07,
      "loss": 0.3647,
      "step": 6405
    },
    {
      "epoch": 1.9225690276110443,
      "grad_norm": 0.13041505217552185,
      "learning_rate": 9.187349324497052e-07,
      "loss": 0.3074,
      "step": 6406
    },
    {
      "epoch": 1.9228691476590636,
      "grad_norm": 0.1473788172006607,
      "learning_rate": 9.116649286902923e-07,
      "loss": 0.3741,
      "step": 6407
    },
    {
      "epoch": 1.9231692677070829,
      "grad_norm": 0.14926253259181976,
      "learning_rate": 9.046221086793983e-07,
      "loss": 0.3567,
      "step": 6408
    },
    {
      "epoch": 1.9234693877551021,
      "grad_norm": 0.12680687010288239,
      "learning_rate": 8.976064743491552e-07,
      "loss": 0.2923,
      "step": 6409
    },
    {
      "epoch": 1.9237695078031214,
      "grad_norm": 0.13940927386283875,
      "learning_rate": 8.906180276242015e-07,
      "loss": 0.3216,
      "step": 6410
    },
    {
      "epoch": 1.9240696278511404,
      "grad_norm": 0.12795254588127136,
      "learning_rate": 8.83656770421748e-07,
      "loss": 0.2889,
      "step": 6411
    },
    {
      "epoch": 1.9243697478991597,
      "grad_norm": 0.12966111302375793,
      "learning_rate": 8.767227046515225e-07,
      "loss": 0.3146,
      "step": 6412
    },
    {
      "epoch": 1.9246698679471788,
      "grad_norm": 0.12147068977355957,
      "learning_rate": 8.698158322158256e-07,
      "loss": 0.2657,
      "step": 6413
    },
    {
      "epoch": 1.924969987995198,
      "grad_norm": 0.13021109998226166,
      "learning_rate": 8.62936155009475e-07,
      "loss": 0.2975,
      "step": 6414
    },
    {
      "epoch": 1.9252701080432173,
      "grad_norm": 0.18556343019008636,
      "learning_rate": 8.560836749198165e-07,
      "loss": 0.3116,
      "step": 6415
    },
    {
      "epoch": 1.9255702280912366,
      "grad_norm": 0.1517120897769928,
      "learning_rate": 8.492583938267684e-07,
      "loss": 0.3077,
      "step": 6416
    },
    {
      "epoch": 1.9258703481392558,
      "grad_norm": 0.14694081246852875,
      "learning_rate": 8.424603136027553e-07,
      "loss": 0.2696,
      "step": 6417
    },
    {
      "epoch": 1.9261704681872749,
      "grad_norm": 0.13473621010780334,
      "learning_rate": 8.356894361127632e-07,
      "loss": 0.3118,
      "step": 6418
    },
    {
      "epoch": 1.9264705882352942,
      "grad_norm": 0.13514980673789978,
      "learning_rate": 8.28945763214295e-07,
      "loss": 0.3083,
      "step": 6419
    },
    {
      "epoch": 1.9267707082833132,
      "grad_norm": 0.13456521928310394,
      "learning_rate": 8.22229296757393e-07,
      "loss": 0.3228,
      "step": 6420
    },
    {
      "epoch": 1.9270708283313325,
      "grad_norm": 0.1491681933403015,
      "learning_rate": 8.155400385846613e-07,
      "loss": 0.3463,
      "step": 6421
    },
    {
      "epoch": 1.9273709483793517,
      "grad_norm": 0.14137840270996094,
      "learning_rate": 8.088779905312205e-07,
      "loss": 0.3295,
      "step": 6422
    },
    {
      "epoch": 1.927671068427371,
      "grad_norm": 0.1492571234703064,
      "learning_rate": 8.022431544247089e-07,
      "loss": 0.3616,
      "step": 6423
    },
    {
      "epoch": 1.9279711884753903,
      "grad_norm": 0.13985922932624817,
      "learning_rate": 7.95635532085326e-07,
      "loss": 0.3373,
      "step": 6424
    },
    {
      "epoch": 1.9282713085234093,
      "grad_norm": 0.14516708254814148,
      "learning_rate": 7.890551253258105e-07,
      "loss": 0.3349,
      "step": 6425
    },
    {
      "epoch": 1.9285714285714286,
      "grad_norm": 0.1339079886674881,
      "learning_rate": 7.825019359514074e-07,
      "loss": 0.3046,
      "step": 6426
    },
    {
      "epoch": 1.9288715486194477,
      "grad_norm": 0.12760967016220093,
      "learning_rate": 7.759759657599008e-07,
      "loss": 0.293,
      "step": 6427
    },
    {
      "epoch": 1.929171668667467,
      "grad_norm": 0.13753746449947357,
      "learning_rate": 7.694772165416364e-07,
      "loss": 0.3201,
      "step": 6428
    },
    {
      "epoch": 1.9294717887154862,
      "grad_norm": 0.1318732351064682,
      "learning_rate": 7.630056900794658e-07,
      "loss": 0.3123,
      "step": 6429
    },
    {
      "epoch": 1.9297719087635055,
      "grad_norm": 0.1316746026277542,
      "learning_rate": 7.565613881487687e-07,
      "loss": 0.3313,
      "step": 6430
    },
    {
      "epoch": 1.9300720288115247,
      "grad_norm": 0.14019489288330078,
      "learning_rate": 7.501443125174757e-07,
      "loss": 0.3272,
      "step": 6431
    },
    {
      "epoch": 1.9303721488595438,
      "grad_norm": 0.13703711330890656,
      "learning_rate": 7.437544649460337e-07,
      "loss": 0.3283,
      "step": 6432
    },
    {
      "epoch": 1.930672268907563,
      "grad_norm": 0.124041348695755,
      "learning_rate": 7.373918471874186e-07,
      "loss": 0.2811,
      "step": 6433
    },
    {
      "epoch": 1.930972388955582,
      "grad_norm": 0.12873201072216034,
      "learning_rate": 7.310564609871451e-07,
      "loss": 0.3065,
      "step": 6434
    },
    {
      "epoch": 1.9312725090036014,
      "grad_norm": 0.12377699464559555,
      "learning_rate": 7.247483080832562e-07,
      "loss": 0.2889,
      "step": 6435
    },
    {
      "epoch": 1.9315726290516206,
      "grad_norm": 0.13439233601093292,
      "learning_rate": 7.184673902063343e-07,
      "loss": 0.3289,
      "step": 6436
    },
    {
      "epoch": 1.93187274909964,
      "grad_norm": 0.12906034290790558,
      "learning_rate": 7.122137090794457e-07,
      "loss": 0.2994,
      "step": 6437
    },
    {
      "epoch": 1.9321728691476592,
      "grad_norm": 0.13723750412464142,
      "learning_rate": 7.059872664182398e-07,
      "loss": 0.3202,
      "step": 6438
    },
    {
      "epoch": 1.9324729891956782,
      "grad_norm": 0.13962934911251068,
      "learning_rate": 6.99788063930873e-07,
      "loss": 0.3247,
      "step": 6439
    },
    {
      "epoch": 1.9327731092436975,
      "grad_norm": 0.1447930932044983,
      "learning_rate": 6.936161033180066e-07,
      "loss": 0.3359,
      "step": 6440
    },
    {
      "epoch": 1.9330732292917165,
      "grad_norm": 0.13536879420280457,
      "learning_rate": 6.874713862728643e-07,
      "loss": 0.3049,
      "step": 6441
    },
    {
      "epoch": 1.9333733493397358,
      "grad_norm": 0.2102050930261612,
      "learning_rate": 6.813539144811642e-07,
      "loss": 0.286,
      "step": 6442
    },
    {
      "epoch": 1.933673469387755,
      "grad_norm": 0.1379159837961197,
      "learning_rate": 6.75263689621175e-07,
      "loss": 0.3416,
      "step": 6443
    },
    {
      "epoch": 1.9339735894357744,
      "grad_norm": 0.14714962244033813,
      "learning_rate": 6.692007133636824e-07,
      "loss": 0.3524,
      "step": 6444
    },
    {
      "epoch": 1.9342737094837936,
      "grad_norm": 0.1256219893693924,
      "learning_rate": 6.631649873719781e-07,
      "loss": 0.2596,
      "step": 6445
    },
    {
      "epoch": 1.9345738295318127,
      "grad_norm": 0.13435864448547363,
      "learning_rate": 6.571565133019153e-07,
      "loss": 0.2911,
      "step": 6446
    },
    {
      "epoch": 1.934873949579832,
      "grad_norm": 0.12946513295173645,
      "learning_rate": 6.511752928018422e-07,
      "loss": 0.3092,
      "step": 6447
    },
    {
      "epoch": 1.935174069627851,
      "grad_norm": 0.2115807682275772,
      "learning_rate": 6.45221327512624e-07,
      "loss": 0.3168,
      "step": 6448
    },
    {
      "epoch": 1.9354741896758703,
      "grad_norm": 0.13688799738883972,
      "learning_rate": 6.392946190676763e-07,
      "loss": 0.3171,
      "step": 6449
    },
    {
      "epoch": 1.9357743097238895,
      "grad_norm": 0.13157397508621216,
      "learning_rate": 6.333951690929318e-07,
      "loss": 0.2952,
      "step": 6450
    },
    {
      "epoch": 1.9360744297719088,
      "grad_norm": 0.148507758975029,
      "learning_rate": 6.275229792068183e-07,
      "loss": 0.3397,
      "step": 6451
    },
    {
      "epoch": 1.936374549819928,
      "grad_norm": 0.14567214250564575,
      "learning_rate": 6.216780510203136e-07,
      "loss": 0.3507,
      "step": 6452
    },
    {
      "epoch": 1.9366746698679473,
      "grad_norm": 0.14839902520179749,
      "learning_rate": 6.158603861369017e-07,
      "loss": 0.3664,
      "step": 6453
    },
    {
      "epoch": 1.9369747899159664,
      "grad_norm": 0.138484388589859,
      "learning_rate": 6.10069986152595e-07,
      "loss": 0.3245,
      "step": 6454
    },
    {
      "epoch": 1.9372749099639854,
      "grad_norm": 0.15200287103652954,
      "learning_rate": 6.043068526559337e-07,
      "loss": 0.3764,
      "step": 6455
    },
    {
      "epoch": 1.9375750300120047,
      "grad_norm": 0.13714081048965454,
      "learning_rate": 5.985709872279644e-07,
      "loss": 0.3084,
      "step": 6456
    },
    {
      "epoch": 1.937875150060024,
      "grad_norm": 0.14301633834838867,
      "learning_rate": 5.928623914422282e-07,
      "loss": 0.3569,
      "step": 6457
    },
    {
      "epoch": 1.9381752701080432,
      "grad_norm": 0.13486430048942566,
      "learning_rate": 5.871810668648503e-07,
      "loss": 0.3099,
      "step": 6458
    },
    {
      "epoch": 1.9384753901560625,
      "grad_norm": 0.14129821956157684,
      "learning_rate": 5.815270150544172e-07,
      "loss": 0.3073,
      "step": 6459
    },
    {
      "epoch": 1.9387755102040818,
      "grad_norm": 0.13441157341003418,
      "learning_rate": 5.759002375620548e-07,
      "loss": 0.309,
      "step": 6460
    },
    {
      "epoch": 1.9390756302521008,
      "grad_norm": 0.1334627866744995,
      "learning_rate": 5.703007359314172e-07,
      "loss": 0.3143,
      "step": 6461
    },
    {
      "epoch": 1.9393757503001199,
      "grad_norm": 0.13053907454013824,
      "learning_rate": 5.647285116986311e-07,
      "loss": 0.2893,
      "step": 6462
    },
    {
      "epoch": 1.9396758703481392,
      "grad_norm": 0.1443127542734146,
      "learning_rate": 5.591835663924183e-07,
      "loss": 0.2946,
      "step": 6463
    },
    {
      "epoch": 1.9399759903961584,
      "grad_norm": 0.14227718114852905,
      "learning_rate": 5.536659015339396e-07,
      "loss": 0.323,
      "step": 6464
    },
    {
      "epoch": 1.9402761104441777,
      "grad_norm": 0.13019327819347382,
      "learning_rate": 5.481755186369064e-07,
      "loss": 0.277,
      "step": 6465
    },
    {
      "epoch": 1.940576230492197,
      "grad_norm": 0.19664429128170013,
      "learning_rate": 5.427124192075473e-07,
      "loss": 0.3693,
      "step": 6466
    },
    {
      "epoch": 1.9408763505402162,
      "grad_norm": 0.13815642893314362,
      "learning_rate": 5.372766047446076e-07,
      "loss": 0.3371,
      "step": 6467
    },
    {
      "epoch": 1.9411764705882353,
      "grad_norm": 0.14420412480831146,
      "learning_rate": 5.31868076739328e-07,
      "loss": 0.3298,
      "step": 6468
    },
    {
      "epoch": 1.9414765906362546,
      "grad_norm": 0.1388360857963562,
      "learning_rate": 5.264868366754772e-07,
      "loss": 0.2977,
      "step": 6469
    },
    {
      "epoch": 1.9417767106842736,
      "grad_norm": 0.13571693003177643,
      "learning_rate": 5.211328860293519e-07,
      "loss": 0.3245,
      "step": 6470
    },
    {
      "epoch": 1.9420768307322929,
      "grad_norm": 0.1365104615688324,
      "learning_rate": 5.158062262697217e-07,
      "loss": 0.3233,
      "step": 6471
    },
    {
      "epoch": 1.9423769507803121,
      "grad_norm": 0.1375596821308136,
      "learning_rate": 5.105068588579176e-07,
      "loss": 0.344,
      "step": 6472
    },
    {
      "epoch": 1.9426770708283314,
      "grad_norm": 0.14423203468322754,
      "learning_rate": 5.052347852477546e-07,
      "loss": 0.3578,
      "step": 6473
    },
    {
      "epoch": 1.9429771908763507,
      "grad_norm": 0.1386280208826065,
      "learning_rate": 4.999900068855645e-07,
      "loss": 0.3251,
      "step": 6474
    },
    {
      "epoch": 1.9432773109243697,
      "grad_norm": 0.13474127650260925,
      "learning_rate": 4.947725252101854e-07,
      "loss": 0.3256,
      "step": 6475
    },
    {
      "epoch": 1.943577430972389,
      "grad_norm": 0.13253982365131378,
      "learning_rate": 4.895823416529832e-07,
      "loss": 0.2867,
      "step": 6476
    },
    {
      "epoch": 1.943877551020408,
      "grad_norm": 0.13384990394115448,
      "learning_rate": 4.844194576378191e-07,
      "loss": 0.3187,
      "step": 6477
    },
    {
      "epoch": 1.9441776710684273,
      "grad_norm": 0.1321321576833725,
      "learning_rate": 4.792838745810712e-07,
      "loss": 0.3056,
      "step": 6478
    },
    {
      "epoch": 1.9444777911164466,
      "grad_norm": 0.13640731573104858,
      "learning_rate": 4.741755938916237e-07,
      "loss": 0.3207,
      "step": 6479
    },
    {
      "epoch": 1.9447779111644659,
      "grad_norm": 0.1355549395084381,
      "learning_rate": 4.6909461697088874e-07,
      "loss": 0.3141,
      "step": 6480
    },
    {
      "epoch": 1.9450780312124851,
      "grad_norm": 0.13593652844429016,
      "learning_rate": 4.6404094521276256e-07,
      "loss": 0.3282,
      "step": 6481
    },
    {
      "epoch": 1.9453781512605042,
      "grad_norm": 0.13409483432769775,
      "learning_rate": 4.5901458000366937e-07,
      "loss": 0.3311,
      "step": 6482
    },
    {
      "epoch": 1.9456782713085234,
      "grad_norm": 0.12935426831245422,
      "learning_rate": 4.5401552272252847e-07,
      "loss": 0.2935,
      "step": 6483
    },
    {
      "epoch": 1.9459783913565425,
      "grad_norm": 0.14159248769283295,
      "learning_rate": 4.490437747407761e-07,
      "loss": 0.3422,
      "step": 6484
    },
    {
      "epoch": 1.9462785114045618,
      "grad_norm": 0.12727589905261993,
      "learning_rate": 4.4409933742235455e-07,
      "loss": 0.2879,
      "step": 6485
    },
    {
      "epoch": 1.946578631452581,
      "grad_norm": 0.14284352958202362,
      "learning_rate": 4.3918221212371215e-07,
      "loss": 0.3316,
      "step": 6486
    },
    {
      "epoch": 1.9468787515006003,
      "grad_norm": 0.12972243130207062,
      "learning_rate": 4.3429240019380313e-07,
      "loss": 0.3138,
      "step": 6487
    },
    {
      "epoch": 1.9471788715486196,
      "grad_norm": 0.13691653311252594,
      "learning_rate": 4.2942990297411003e-07,
      "loss": 0.3217,
      "step": 6488
    },
    {
      "epoch": 1.9474789915966386,
      "grad_norm": 0.13877379894256592,
      "learning_rate": 4.2459472179857683e-07,
      "loss": 0.3463,
      "step": 6489
    },
    {
      "epoch": 1.947779111644658,
      "grad_norm": 0.13706190884113312,
      "learning_rate": 4.197868579936981e-07,
      "loss": 0.3185,
      "step": 6490
    },
    {
      "epoch": 1.948079231692677,
      "grad_norm": 0.11960577964782715,
      "learning_rate": 4.1500631287844095e-07,
      "loss": 0.27,
      "step": 6491
    },
    {
      "epoch": 1.9483793517406962,
      "grad_norm": 0.13107003271579742,
      "learning_rate": 4.1025308776430074e-07,
      "loss": 0.2798,
      "step": 6492
    },
    {
      "epoch": 1.9486794717887155,
      "grad_norm": 0.1464453488588333,
      "learning_rate": 4.055271839552788e-07,
      "loss": 0.3588,
      "step": 6493
    },
    {
      "epoch": 1.9489795918367347,
      "grad_norm": 0.13021965324878693,
      "learning_rate": 4.0082860274787136e-07,
      "loss": 0.3166,
      "step": 6494
    },
    {
      "epoch": 1.949279711884754,
      "grad_norm": 0.13702203333377838,
      "learning_rate": 3.9615734543106965e-07,
      "loss": 0.3327,
      "step": 6495
    },
    {
      "epoch": 1.949579831932773,
      "grad_norm": 0.23359109461307526,
      "learning_rate": 3.915134132863707e-07,
      "loss": 0.3223,
      "step": 6496
    },
    {
      "epoch": 1.9498799519807923,
      "grad_norm": 0.14288662374019623,
      "learning_rate": 3.8689680758781097e-07,
      "loss": 0.3269,
      "step": 6497
    },
    {
      "epoch": 1.9501800720288114,
      "grad_norm": 0.1728636920452118,
      "learning_rate": 3.8230752960188856e-07,
      "loss": 0.363,
      "step": 6498
    },
    {
      "epoch": 1.9504801920768307,
      "grad_norm": 0.14792786538600922,
      "learning_rate": 3.777455805876184e-07,
      "loss": 0.3396,
      "step": 6499
    },
    {
      "epoch": 1.95078031212485,
      "grad_norm": 0.13643766939640045,
      "learning_rate": 3.732109617965218e-07,
      "loss": 0.3061,
      "step": 6500
    },
    {
      "epoch": 1.9510804321728692,
      "grad_norm": 0.13429324328899384,
      "learning_rate": 3.6870367447262575e-07,
      "loss": 0.3222,
      "step": 6501
    },
    {
      "epoch": 1.9513805522208885,
      "grad_norm": 0.13534994423389435,
      "learning_rate": 3.642237198524412e-07,
      "loss": 0.3378,
      "step": 6502
    },
    {
      "epoch": 1.9516806722689075,
      "grad_norm": 0.12566448748111725,
      "learning_rate": 3.59771099164985e-07,
      "loss": 0.2892,
      "step": 6503
    },
    {
      "epoch": 1.9519807923169268,
      "grad_norm": 0.13708549737930298,
      "learning_rate": 3.553458136318022e-07,
      "loss": 0.2964,
      "step": 6504
    },
    {
      "epoch": 1.9522809123649458,
      "grad_norm": 0.12881408631801605,
      "learning_rate": 3.5094786446692176e-07,
      "loss": 0.3036,
      "step": 6505
    },
    {
      "epoch": 1.952581032412965,
      "grad_norm": 0.13288307189941406,
      "learning_rate": 3.465772528768452e-07,
      "loss": 0.2775,
      "step": 6506
    },
    {
      "epoch": 1.9528811524609844,
      "grad_norm": 0.13816514611244202,
      "learning_rate": 3.422339800606245e-07,
      "loss": 0.3356,
      "step": 6507
    },
    {
      "epoch": 1.9531812725090036,
      "grad_norm": 0.15389056503772736,
      "learning_rate": 3.3791804720977317e-07,
      "loss": 0.3055,
      "step": 6508
    },
    {
      "epoch": 1.953481392557023,
      "grad_norm": 0.1330726593732834,
      "learning_rate": 3.3362945550832193e-07,
      "loss": 0.3045,
      "step": 6509
    },
    {
      "epoch": 1.9537815126050422,
      "grad_norm": 0.15567626059055328,
      "learning_rate": 3.293682061327963e-07,
      "loss": 0.3367,
      "step": 6510
    },
    {
      "epoch": 1.9540816326530612,
      "grad_norm": 0.1269209086894989,
      "learning_rate": 3.251343002522278e-07,
      "loss": 0.2885,
      "step": 6511
    },
    {
      "epoch": 1.9543817527010803,
      "grad_norm": 0.14024633169174194,
      "learning_rate": 3.2092773902812065e-07,
      "loss": 0.3536,
      "step": 6512
    },
    {
      "epoch": 1.9546818727490995,
      "grad_norm": 0.13049684464931488,
      "learning_rate": 3.1674852361451845e-07,
      "loss": 0.3069,
      "step": 6513
    },
    {
      "epoch": 1.9549819927971188,
      "grad_norm": 0.1355288028717041,
      "learning_rate": 3.1259665515793736e-07,
      "loss": 0.3177,
      "step": 6514
    },
    {
      "epoch": 1.955282112845138,
      "grad_norm": 0.1447446644306183,
      "learning_rate": 3.0847213479737736e-07,
      "loss": 0.3527,
      "step": 6515
    },
    {
      "epoch": 1.9555822328931574,
      "grad_norm": 0.13086079061031342,
      "learning_rate": 3.043749636643778e-07,
      "loss": 0.3017,
      "step": 6516
    },
    {
      "epoch": 1.9558823529411766,
      "grad_norm": 0.1448763906955719,
      "learning_rate": 3.0030514288292843e-07,
      "loss": 0.3334,
      "step": 6517
    },
    {
      "epoch": 1.9561824729891957,
      "grad_norm": 0.13058732450008392,
      "learning_rate": 2.962626735695584e-07,
      "loss": 0.301,
      "step": 6518
    },
    {
      "epoch": 1.9564825930372147,
      "grad_norm": 0.13642174005508423,
      "learning_rate": 2.9224755683325835e-07,
      "loss": 0.3271,
      "step": 6519
    },
    {
      "epoch": 1.956782713085234,
      "grad_norm": 0.16053105890750885,
      "learning_rate": 2.882597937755249e-07,
      "loss": 0.3601,
      "step": 6520
    },
    {
      "epoch": 1.9570828331332533,
      "grad_norm": 0.13160178065299988,
      "learning_rate": 2.8429938549037195e-07,
      "loss": 0.3119,
      "step": 6521
    },
    {
      "epoch": 1.9573829531812725,
      "grad_norm": 0.13854114711284637,
      "learning_rate": 2.803663330642747e-07,
      "loss": 0.3053,
      "step": 6522
    },
    {
      "epoch": 1.9576830732292918,
      "grad_norm": 0.13475844264030457,
      "learning_rate": 2.7646063757623684e-07,
      "loss": 0.3062,
      "step": 6523
    },
    {
      "epoch": 1.957983193277311,
      "grad_norm": 0.1336299628019333,
      "learning_rate": 2.7258230009774564e-07,
      "loss": 0.3118,
      "step": 6524
    },
    {
      "epoch": 1.9582833133253301,
      "grad_norm": 0.1466909497976303,
      "learning_rate": 2.6873132169275005e-07,
      "loss": 0.3514,
      "step": 6525
    },
    {
      "epoch": 1.9585834333733494,
      "grad_norm": 0.12885448336601257,
      "learning_rate": 2.649077034177494e-07,
      "loss": 0.297,
      "step": 6526
    },
    {
      "epoch": 1.9588835534213684,
      "grad_norm": 0.12889419496059418,
      "learning_rate": 2.6111144632169347e-07,
      "loss": 0.2792,
      "step": 6527
    },
    {
      "epoch": 1.9591836734693877,
      "grad_norm": 0.24885424971580505,
      "learning_rate": 2.5734255144604923e-07,
      "loss": 0.3579,
      "step": 6528
    },
    {
      "epoch": 1.959483793517407,
      "grad_norm": 0.1575038582086563,
      "learning_rate": 2.5360101982476735e-07,
      "loss": 0.3783,
      "step": 6529
    },
    {
      "epoch": 1.9597839135654262,
      "grad_norm": 0.16567088663578033,
      "learning_rate": 2.498868524843045e-07,
      "loss": 0.3695,
      "step": 6530
    },
    {
      "epoch": 1.9600840336134455,
      "grad_norm": 0.14174482226371765,
      "learning_rate": 2.462000504435791e-07,
      "loss": 0.3543,
      "step": 6531
    },
    {
      "epoch": 1.9603841536614646,
      "grad_norm": 0.1361747682094574,
      "learning_rate": 2.4254061471403745e-07,
      "loss": 0.2955,
      "step": 6532
    },
    {
      "epoch": 1.9606842737094838,
      "grad_norm": 0.1308164745569229,
      "learning_rate": 2.389085462995988e-07,
      "loss": 0.2976,
      "step": 6533
    },
    {
      "epoch": 1.9609843937575029,
      "grad_norm": 0.1284637451171875,
      "learning_rate": 2.3530384619668833e-07,
      "loss": 0.2951,
      "step": 6534
    },
    {
      "epoch": 1.9612845138055222,
      "grad_norm": 0.13373473286628723,
      "learning_rate": 2.3172651539420387e-07,
      "loss": 0.3109,
      "step": 6535
    },
    {
      "epoch": 1.9615846338535414,
      "grad_norm": 0.1285080760717392,
      "learning_rate": 2.2817655487353819e-07,
      "loss": 0.3022,
      "step": 6536
    },
    {
      "epoch": 1.9618847539015607,
      "grad_norm": 0.14141826331615448,
      "learning_rate": 2.246539656086011e-07,
      "loss": 0.336,
      "step": 6537
    },
    {
      "epoch": 1.96218487394958,
      "grad_norm": 0.1411728709936142,
      "learning_rate": 2.2115874856577512e-07,
      "loss": 0.3233,
      "step": 6538
    },
    {
      "epoch": 1.962484993997599,
      "grad_norm": 0.1605583131313324,
      "learning_rate": 2.1769090470391552e-07,
      "loss": 0.3788,
      "step": 6539
    },
    {
      "epoch": 1.9627851140456183,
      "grad_norm": 0.13145780563354492,
      "learning_rate": 2.1425043497439456e-07,
      "loss": 0.3067,
      "step": 6540
    },
    {
      "epoch": 1.9630852340936373,
      "grad_norm": 0.39733070135116577,
      "learning_rate": 2.108373403210573e-07,
      "loss": 0.3441,
      "step": 6541
    },
    {
      "epoch": 1.9633853541416566,
      "grad_norm": 0.1398887187242508,
      "learning_rate": 2.0745162168026576e-07,
      "loss": 0.3276,
      "step": 6542
    },
    {
      "epoch": 1.9636854741896759,
      "grad_norm": 0.14685961604118347,
      "learning_rate": 2.040932799808326e-07,
      "loss": 0.3502,
      "step": 6543
    },
    {
      "epoch": 1.9639855942376951,
      "grad_norm": 0.137023463845253,
      "learning_rate": 2.0076231614409858e-07,
      "loss": 0.3303,
      "step": 6544
    },
    {
      "epoch": 1.9642857142857144,
      "grad_norm": 0.1411551833152771,
      "learning_rate": 1.9745873108385494e-07,
      "loss": 0.3581,
      "step": 6545
    },
    {
      "epoch": 1.9645858343337335,
      "grad_norm": 0.13286294043064117,
      "learning_rate": 1.9418252570642115e-07,
      "loss": 0.2939,
      "step": 6546
    },
    {
      "epoch": 1.9648859543817527,
      "grad_norm": 0.13654114305973053,
      "learning_rate": 1.9093370091057826e-07,
      "loss": 0.2977,
      "step": 6547
    },
    {
      "epoch": 1.9651860744297718,
      "grad_norm": 0.14429955184459686,
      "learning_rate": 1.8771225758761334e-07,
      "loss": 0.3418,
      "step": 6548
    },
    {
      "epoch": 1.965486194477791,
      "grad_norm": 0.13695666193962097,
      "learning_rate": 1.84518196621275e-07,
      "loss": 0.3337,
      "step": 6549
    },
    {
      "epoch": 1.9657863145258103,
      "grad_norm": 0.1368245631456375,
      "learning_rate": 1.8135151888782899e-07,
      "loss": 0.3357,
      "step": 6550
    },
    {
      "epoch": 1.9660864345738296,
      "grad_norm": 0.13573360443115234,
      "learning_rate": 1.7821222525601367e-07,
      "loss": 0.3194,
      "step": 6551
    },
    {
      "epoch": 1.9663865546218489,
      "grad_norm": 0.12387190759181976,
      "learning_rate": 1.751003165870624e-07,
      "loss": 0.2838,
      "step": 6552
    },
    {
      "epoch": 1.966686674669868,
      "grad_norm": 0.1596154421567917,
      "learning_rate": 1.7201579373469222e-07,
      "loss": 0.2843,
      "step": 6553
    },
    {
      "epoch": 1.9669867947178872,
      "grad_norm": 0.1379440575838089,
      "learning_rate": 1.68958657545093e-07,
      "loss": 0.3187,
      "step": 6554
    },
    {
      "epoch": 1.9672869147659062,
      "grad_norm": 0.14830918610095978,
      "learning_rate": 1.6592890885697154e-07,
      "loss": 0.3553,
      "step": 6555
    },
    {
      "epoch": 1.9675870348139255,
      "grad_norm": 0.1428672969341278,
      "learning_rate": 1.6292654850149635e-07,
      "loss": 0.3343,
      "step": 6556
    },
    {
      "epoch": 1.9678871548619448,
      "grad_norm": 0.1255159080028534,
      "learning_rate": 1.5995157730233078e-07,
      "loss": 0.2831,
      "step": 6557
    },
    {
      "epoch": 1.968187274909964,
      "grad_norm": 0.13673074543476105,
      "learning_rate": 1.57003996075622e-07,
      "loss": 0.3325,
      "step": 6558
    },
    {
      "epoch": 1.9684873949579833,
      "grad_norm": 0.1297430694103241,
      "learning_rate": 1.5408380563001201e-07,
      "loss": 0.3088,
      "step": 6559
    },
    {
      "epoch": 1.9687875150060024,
      "grad_norm": 0.14209860563278198,
      "learning_rate": 1.5119100676662667e-07,
      "loss": 0.3209,
      "step": 6560
    },
    {
      "epoch": 1.9690876350540216,
      "grad_norm": 0.14692017436027527,
      "learning_rate": 1.483256002790534e-07,
      "loss": 0.3308,
      "step": 6561
    },
    {
      "epoch": 1.9693877551020407,
      "grad_norm": 0.22346758842468262,
      "learning_rate": 1.454875869533967e-07,
      "loss": 0.3626,
      "step": 6562
    },
    {
      "epoch": 1.96968787515006,
      "grad_norm": 0.13532333076000214,
      "learning_rate": 1.426769675682227e-07,
      "loss": 0.3124,
      "step": 6563
    },
    {
      "epoch": 1.9699879951980792,
      "grad_norm": 0.14279678463935852,
      "learning_rate": 1.3989374289461453e-07,
      "loss": 0.3689,
      "step": 6564
    },
    {
      "epoch": 1.9702881152460985,
      "grad_norm": 0.13842518627643585,
      "learning_rate": 1.3713791369609485e-07,
      "loss": 0.3364,
      "step": 6565
    },
    {
      "epoch": 1.9705882352941178,
      "grad_norm": 0.12624448537826538,
      "learning_rate": 1.344094807287033e-07,
      "loss": 0.3029,
      "step": 6566
    },
    {
      "epoch": 1.9708883553421368,
      "grad_norm": 0.14754411578178406,
      "learning_rate": 1.3170844474095223e-07,
      "loss": 0.3388,
      "step": 6567
    },
    {
      "epoch": 1.971188475390156,
      "grad_norm": 0.1261216700077057,
      "learning_rate": 1.290348064738378e-07,
      "loss": 0.2859,
      "step": 6568
    },
    {
      "epoch": 1.9714885954381751,
      "grad_norm": 0.14190185070037842,
      "learning_rate": 1.2638856666085108e-07,
      "loss": 0.3544,
      "step": 6569
    },
    {
      "epoch": 1.9717887154861944,
      "grad_norm": 0.14383383095264435,
      "learning_rate": 1.2376972602795578e-07,
      "loss": 0.3575,
      "step": 6570
    },
    {
      "epoch": 1.9720888355342137,
      "grad_norm": 0.13956892490386963,
      "learning_rate": 1.2117828529358832e-07,
      "loss": 0.3361,
      "step": 6571
    },
    {
      "epoch": 1.972388955582233,
      "grad_norm": 0.14807261526584625,
      "learning_rate": 1.1861424516869113e-07,
      "loss": 0.3459,
      "step": 6572
    },
    {
      "epoch": 1.9726890756302522,
      "grad_norm": 0.13360875844955444,
      "learning_rate": 1.160776063566793e-07,
      "loss": 0.315,
      "step": 6573
    },
    {
      "epoch": 1.9729891956782715,
      "grad_norm": 0.14176610112190247,
      "learning_rate": 1.1356836955345174e-07,
      "loss": 0.3671,
      "step": 6574
    },
    {
      "epoch": 1.9732893157262905,
      "grad_norm": 0.143992617726326,
      "learning_rate": 1.1108653544738001e-07,
      "loss": 0.3537,
      "step": 6575
    },
    {
      "epoch": 1.9735894357743096,
      "grad_norm": 0.1435699164867401,
      "learning_rate": 1.086321047193306e-07,
      "loss": 0.2967,
      "step": 6576
    },
    {
      "epoch": 1.9738895558223288,
      "grad_norm": 0.14607293903827667,
      "learning_rate": 1.0620507804265378e-07,
      "loss": 0.3161,
      "step": 6577
    },
    {
      "epoch": 1.974189675870348,
      "grad_norm": 0.14399291574954987,
      "learning_rate": 1.0380545608317249e-07,
      "loss": 0.3256,
      "step": 6578
    },
    {
      "epoch": 1.9744897959183674,
      "grad_norm": 0.14030712842941284,
      "learning_rate": 1.0143323949919348e-07,
      "loss": 0.3303,
      "step": 6579
    },
    {
      "epoch": 1.9747899159663866,
      "grad_norm": 0.1557692587375641,
      "learning_rate": 9.908842894151837e-08,
      "loss": 0.3634,
      "step": 6580
    },
    {
      "epoch": 1.975090036014406,
      "grad_norm": 0.1479206681251526,
      "learning_rate": 9.67710250533993e-08,
      "loss": 0.3574,
      "step": 6581
    },
    {
      "epoch": 1.975390156062425,
      "grad_norm": 0.14016804099082947,
      "learning_rate": 9.448102847060548e-08,
      "loss": 0.3373,
      "step": 6582
    },
    {
      "epoch": 1.9756902761104442,
      "grad_norm": 0.2394646406173706,
      "learning_rate": 9.221843982136768e-08,
      "loss": 0.301,
      "step": 6583
    },
    {
      "epoch": 1.9759903961584633,
      "grad_norm": 0.1508098840713501,
      "learning_rate": 8.998325972640053e-08,
      "loss": 0.3216,
      "step": 6584
    },
    {
      "epoch": 1.9762905162064826,
      "grad_norm": 0.13166458904743195,
      "learning_rate": 8.77754887989024e-08,
      "loss": 0.3017,
      "step": 6585
    },
    {
      "epoch": 1.9765906362545018,
      "grad_norm": 0.13379202783107758,
      "learning_rate": 8.559512764454436e-08,
      "loss": 0.3,
      "step": 6586
    },
    {
      "epoch": 1.976890756302521,
      "grad_norm": 0.2006409913301468,
      "learning_rate": 8.344217686148125e-08,
      "loss": 0.3411,
      "step": 6587
    },
    {
      "epoch": 1.9771908763505404,
      "grad_norm": 0.1287575513124466,
      "learning_rate": 8.131663704035176e-08,
      "loss": 0.3021,
      "step": 6588
    },
    {
      "epoch": 1.9774909963985594,
      "grad_norm": 0.13200537860393524,
      "learning_rate": 7.921850876428937e-08,
      "loss": 0.3174,
      "step": 6589
    },
    {
      "epoch": 1.9777911164465787,
      "grad_norm": 0.13923829793930054,
      "learning_rate": 7.714779260886707e-08,
      "loss": 0.3576,
      "step": 6590
    },
    {
      "epoch": 1.9780912364945977,
      "grad_norm": 0.15228983759880066,
      "learning_rate": 7.510448914217483e-08,
      "loss": 0.3829,
      "step": 6591
    },
    {
      "epoch": 1.978391356542617,
      "grad_norm": 0.12499164044857025,
      "learning_rate": 7.308859892477538e-08,
      "loss": 0.2745,
      "step": 6592
    },
    {
      "epoch": 1.9786914765906363,
      "grad_norm": 0.12901915609836578,
      "learning_rate": 7.1100122509693e-08,
      "loss": 0.2978,
      "step": 6593
    },
    {
      "epoch": 1.9789915966386555,
      "grad_norm": 0.4345323145389557,
      "learning_rate": 6.9139060442458e-08,
      "loss": 0.3215,
      "step": 6594
    },
    {
      "epoch": 1.9792917166866748,
      "grad_norm": 0.14252708852291107,
      "learning_rate": 6.720541326105112e-08,
      "loss": 0.3377,
      "step": 6595
    },
    {
      "epoch": 1.9795918367346939,
      "grad_norm": 0.13123819231987,
      "learning_rate": 6.529918149594805e-08,
      "loss": 0.2767,
      "step": 6596
    },
    {
      "epoch": 1.9798919567827131,
      "grad_norm": 0.13903144001960754,
      "learning_rate": 6.342036567009713e-08,
      "loss": 0.3356,
      "step": 6597
    },
    {
      "epoch": 1.9801920768307322,
      "grad_norm": 0.12551505863666534,
      "learning_rate": 6.15689662989527e-08,
      "loss": 0.282,
      "step": 6598
    },
    {
      "epoch": 1.9804921968787514,
      "grad_norm": 0.13220173120498657,
      "learning_rate": 5.97449838903974e-08,
      "loss": 0.3032,
      "step": 6599
    },
    {
      "epoch": 1.9807923169267707,
      "grad_norm": 0.13848182559013367,
      "learning_rate": 5.7948418944842043e-08,
      "loss": 0.313,
      "step": 6600
    },
    {
      "epoch": 1.98109243697479,
      "grad_norm": 0.13549165427684784,
      "learning_rate": 5.617927195513684e-08,
      "loss": 0.2995,
      "step": 6601
    },
    {
      "epoch": 1.9813925570228093,
      "grad_norm": 0.12920106947422028,
      "learning_rate": 5.443754340663798e-08,
      "loss": 0.2794,
      "step": 6602
    },
    {
      "epoch": 1.9816926770708283,
      "grad_norm": 0.13664115965366364,
      "learning_rate": 5.2723233777163264e-08,
      "loss": 0.3129,
      "step": 6603
    },
    {
      "epoch": 1.9819927971188476,
      "grad_norm": 0.13307543098926544,
      "learning_rate": 5.103634353701425e-08,
      "loss": 0.3056,
      "step": 6604
    },
    {
      "epoch": 1.9822929171668666,
      "grad_norm": 0.12146459519863129,
      "learning_rate": 4.937687314897632e-08,
      "loss": 0.273,
      "step": 6605
    },
    {
      "epoch": 1.982593037214886,
      "grad_norm": 0.14381779730319977,
      "learning_rate": 4.774482306829642e-08,
      "loss": 0.3365,
      "step": 6606
    },
    {
      "epoch": 1.9828931572629052,
      "grad_norm": 0.13763312995433807,
      "learning_rate": 4.6140193742716386e-08,
      "loss": 0.2844,
      "step": 6607
    },
    {
      "epoch": 1.9831932773109244,
      "grad_norm": 0.14180998504161835,
      "learning_rate": 4.4562985612439654e-08,
      "loss": 0.3379,
      "step": 6608
    },
    {
      "epoch": 1.9834933973589437,
      "grad_norm": 0.13503345847129822,
      "learning_rate": 4.3013199110164546e-08,
      "loss": 0.3064,
      "step": 6609
    },
    {
      "epoch": 1.9837935174069627,
      "grad_norm": 0.1461196392774582,
      "learning_rate": 4.149083466105097e-08,
      "loss": 0.3128,
      "step": 6610
    },
    {
      "epoch": 1.984093637454982,
      "grad_norm": 0.13612636923789978,
      "learning_rate": 3.999589268274262e-08,
      "loss": 0.314,
      "step": 6611
    },
    {
      "epoch": 1.984393757503001,
      "grad_norm": 0.13711991906166077,
      "learning_rate": 3.852837358535588e-08,
      "loss": 0.333,
      "step": 6612
    },
    {
      "epoch": 1.9846938775510203,
      "grad_norm": 0.13318751752376556,
      "learning_rate": 3.708827777150203e-08,
      "loss": 0.3011,
      "step": 6613
    },
    {
      "epoch": 1.9849939975990396,
      "grad_norm": 0.13475048542022705,
      "learning_rate": 3.5675605636242834e-08,
      "loss": 0.2911,
      "step": 6614
    },
    {
      "epoch": 1.9852941176470589,
      "grad_norm": 0.141270250082016,
      "learning_rate": 3.429035756713494e-08,
      "loss": 0.2776,
      "step": 6615
    },
    {
      "epoch": 1.9855942376950781,
      "grad_norm": 0.13348527252674103,
      "learning_rate": 3.29325339441966e-08,
      "loss": 0.3176,
      "step": 6616
    },
    {
      "epoch": 1.9858943577430972,
      "grad_norm": 0.1243927925825119,
      "learning_rate": 3.160213513994093e-08,
      "loss": 0.2785,
      "step": 6617
    },
    {
      "epoch": 1.9861944777911165,
      "grad_norm": 0.12726986408233643,
      "learning_rate": 3.029916151934264e-08,
      "loss": 0.2822,
      "step": 6618
    },
    {
      "epoch": 1.9864945978391355,
      "grad_norm": 0.1278541535139084,
      "learning_rate": 2.9023613439860264e-08,
      "loss": 0.2964,
      "step": 6619
    },
    {
      "epoch": 1.9867947178871548,
      "grad_norm": 0.13514913618564606,
      "learning_rate": 2.7775491251413877e-08,
      "loss": 0.2679,
      "step": 6620
    },
    {
      "epoch": 1.987094837935174,
      "grad_norm": 0.14197710156440735,
      "learning_rate": 2.655479529642957e-08,
      "loss": 0.3246,
      "step": 6621
    },
    {
      "epoch": 1.9873949579831933,
      "grad_norm": 0.1379374861717224,
      "learning_rate": 2.536152590978391e-08,
      "loss": 0.2937,
      "step": 6622
    },
    {
      "epoch": 1.9876950780312126,
      "grad_norm": 0.14387285709381104,
      "learning_rate": 2.4195683418826166e-08,
      "loss": 0.3585,
      "step": 6623
    },
    {
      "epoch": 1.9879951980792316,
      "grad_norm": 0.1456388235092163,
      "learning_rate": 2.305726814341158e-08,
      "loss": 0.362,
      "step": 6624
    },
    {
      "epoch": 1.988295318127251,
      "grad_norm": 0.13793382048606873,
      "learning_rate": 2.1946280395845896e-08,
      "loss": 0.3162,
      "step": 6625
    },
    {
      "epoch": 1.98859543817527,
      "grad_norm": 0.14543136954307556,
      "learning_rate": 2.0862720480896437e-08,
      "loss": 0.3259,
      "step": 6626
    },
    {
      "epoch": 1.9888955582232892,
      "grad_norm": 0.14183880388736725,
      "learning_rate": 1.9806588695847616e-08,
      "loss": 0.3312,
      "step": 6627
    },
    {
      "epoch": 1.9891956782713085,
      "grad_norm": 0.14327046275138855,
      "learning_rate": 1.8777885330434343e-08,
      "loss": 0.3273,
      "step": 6628
    },
    {
      "epoch": 1.9894957983193278,
      "grad_norm": 0.14274021983146667,
      "learning_rate": 1.77766106668531e-08,
      "loss": 0.3486,
      "step": 6629
    },
    {
      "epoch": 1.989795918367347,
      "grad_norm": 0.13794536888599396,
      "learning_rate": 1.6802764979817474e-08,
      "loss": 0.3162,
      "step": 6630
    },
    {
      "epoch": 1.9900960384153663,
      "grad_norm": 0.1309654861688614,
      "learning_rate": 1.5856348536469335e-08,
      "loss": 0.2896,
      "step": 6631
    },
    {
      "epoch": 1.9903961584633854,
      "grad_norm": 0.12529993057250977,
      "learning_rate": 1.4937361596456533e-08,
      "loss": 0.2938,
      "step": 6632
    },
    {
      "epoch": 1.9906962785114044,
      "grad_norm": 0.12475324422121048,
      "learning_rate": 1.4045804411888519e-08,
      "loss": 0.3028,
      "step": 6633
    },
    {
      "epoch": 1.9909963985594237,
      "grad_norm": 0.14064736664295197,
      "learning_rate": 1.3181677227358524e-08,
      "loss": 0.3366,
      "step": 6634
    },
    {
      "epoch": 1.991296518607443,
      "grad_norm": 0.15773440897464752,
      "learning_rate": 1.2344980279932472e-08,
      "loss": 0.3596,
      "step": 6635
    },
    {
      "epoch": 1.9915966386554622,
      "grad_norm": 0.13815179467201233,
      "learning_rate": 1.153571379913787e-08,
      "loss": 0.3334,
      "step": 6636
    },
    {
      "epoch": 1.9918967587034815,
      "grad_norm": 0.12979494035243988,
      "learning_rate": 1.075387800699712e-08,
      "loss": 0.3144,
      "step": 6637
    },
    {
      "epoch": 1.9921968787515008,
      "grad_norm": 0.1313927173614502,
      "learning_rate": 9.999473117994207e-09,
      "loss": 0.2959,
      "step": 6638
    },
    {
      "epoch": 1.9924969987995198,
      "grad_norm": 0.15096567571163177,
      "learning_rate": 9.272499339096907e-09,
      "loss": 0.3656,
      "step": 6639
    },
    {
      "epoch": 1.9927971188475389,
      "grad_norm": 0.13682463765144348,
      "learning_rate": 8.572956869734583e-09,
      "loss": 0.3431,
      "step": 6640
    },
    {
      "epoch": 1.9930972388955581,
      "grad_norm": 0.14152565598487854,
      "learning_rate": 7.900845901820386e-09,
      "loss": 0.3461,
      "step": 6641
    },
    {
      "epoch": 1.9933973589435774,
      "grad_norm": 0.13964256644248962,
      "learning_rate": 7.256166619729055e-09,
      "loss": 0.3275,
      "step": 6642
    },
    {
      "epoch": 1.9936974789915967,
      "grad_norm": 0.13702847063541412,
      "learning_rate": 6.638919200352423e-09,
      "loss": 0.3272,
      "step": 6643
    },
    {
      "epoch": 1.993997599039616,
      "grad_norm": 0.13425695896148682,
      "learning_rate": 6.049103812988399e-09,
      "loss": 0.3083,
      "step": 6644
    },
    {
      "epoch": 1.9942977190876352,
      "grad_norm": 0.1240556463599205,
      "learning_rate": 5.486720619474195e-09,
      "loss": 0.2745,
      "step": 6645
    },
    {
      "epoch": 1.9945978391356542,
      "grad_norm": 0.1433292180299759,
      "learning_rate": 4.9517697740864014e-09,
      "loss": 0.3145,
      "step": 6646
    },
    {
      "epoch": 1.9948979591836735,
      "grad_norm": 0.1353660225868225,
      "learning_rate": 4.444251423563195e-09,
      "loss": 0.316,
      "step": 6647
    },
    {
      "epoch": 1.9951980792316926,
      "grad_norm": 0.1324574053287506,
      "learning_rate": 3.964165707170953e-09,
      "loss": 0.3157,
      "step": 6648
    },
    {
      "epoch": 1.9954981992797118,
      "grad_norm": 0.14322207868099213,
      "learning_rate": 3.5115127565821248e-09,
      "loss": 0.3434,
      "step": 6649
    },
    {
      "epoch": 1.995798319327731,
      "grad_norm": 0.143235981464386,
      "learning_rate": 3.0862926959973617e-09,
      "loss": 0.2979,
      "step": 6650
    },
    {
      "epoch": 1.9960984393757504,
      "grad_norm": 0.14621366560459137,
      "learning_rate": 2.6885056420677955e-09,
      "loss": 0.3256,
      "step": 6651
    },
    {
      "epoch": 1.9963985594237696,
      "grad_norm": 0.12844175100326538,
      "learning_rate": 2.3181517039061462e-09,
      "loss": 0.2874,
      "step": 6652
    },
    {
      "epoch": 1.9966986794717887,
      "grad_norm": 0.14089906215667725,
      "learning_rate": 1.975230983142229e-09,
      "loss": 0.3557,
      "step": 6653
    },
    {
      "epoch": 1.996998799519808,
      "grad_norm": 0.13490818440914154,
      "learning_rate": 1.6597435738341384e-09,
      "loss": 0.3061,
      "step": 6654
    },
    {
      "epoch": 1.997298919567827,
      "grad_norm": 0.13551375269889832,
      "learning_rate": 1.3716895625348614e-09,
      "loss": 0.2963,
      "step": 6655
    },
    {
      "epoch": 1.9975990396158463,
      "grad_norm": 0.14081865549087524,
      "learning_rate": 1.111069028258971e-09,
      "loss": 0.2799,
      "step": 6656
    },
    {
      "epoch": 1.9978991596638656,
      "grad_norm": 0.13580313324928284,
      "learning_rate": 8.778820425270339e-10,
      "loss": 0.3239,
      "step": 6657
    },
    {
      "epoch": 1.9981992797118848,
      "grad_norm": 0.13394388556480408,
      "learning_rate": 6.721286692989992e-10,
      "loss": 0.3128,
      "step": 6658
    },
    {
      "epoch": 1.998499399759904,
      "grad_norm": 0.13376793265342712,
      "learning_rate": 4.938089650186051e-10,
      "loss": 0.3188,
      "step": 6659
    },
    {
      "epoch": 1.9987995198079231,
      "grad_norm": 0.1346546858549118,
      "learning_rate": 3.429229786133803e-10,
      "loss": 0.3196,
      "step": 6660
    },
    {
      "epoch": 1.9990996398559424,
      "grad_norm": 0.13749544322490692,
      "learning_rate": 2.1947075147243924e-10,
      "loss": 0.3127,
      "step": 6661
    },
    {
      "epoch": 1.9993997599039615,
      "grad_norm": 0.17157703638076782,
      "learning_rate": 1.2345231745758412e-10,
      "loss": 0.3164,
      "step": 6662
    },
    {
      "epoch": 1.9996998799519807,
      "grad_norm": 0.13034595549106598,
      "learning_rate": 5.486770291440735e-11,
      "loss": 0.3217,
      "step": 6663
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.12287092208862305,
      "learning_rate": 1.3716926672291408e-11,
      "loss": 0.2773,
      "step": 6664
    },
    {
      "epoch": 2.0,
      "eval_loss": 0.27432090044021606,
      "eval_runtime": 3791.9201,
      "eval_samples_per_second": 12.219,
      "eval_steps_per_second": 0.764,
      "step": 6664
    }
  ],
  "logging_steps": 1,
  "max_steps": 6664,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 6.481222963259716e+19,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}