diff --git "a/trainer_state.json" "b/trainer_state.json"
new file mode 100644--- /dev/null
+++ "b/trainer_state.json"
@@ -0,0 +1,87533 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9019084382553483,
+  "eval_steps": 500,
+  "global_step": 12500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 7.215267506042787e-05,
+      "grad_norm": 0.4064047336578369,
+      "learning_rate": 4e-05,
+      "loss": 0.4446,
+      "step": 1
+    },
+    {
+      "epoch": 0.00014430535012085574,
+      "grad_norm": 0.4091169834136963,
+      "learning_rate": 8e-05,
+      "loss": 0.4237,
+      "step": 2
+    },
+    {
+      "epoch": 0.0002164580251812836,
+      "grad_norm": 0.3088938593864441,
+      "learning_rate": 0.00012,
+      "loss": 0.4564,
+      "step": 3
+    },
+    {
+      "epoch": 0.0002886107002417115,
+      "grad_norm": 0.2945241630077362,
+      "learning_rate": 0.00016,
+      "loss": 0.405,
+      "step": 4
+    },
+    {
+      "epoch": 0.0003607633753021393,
+      "grad_norm": 0.311696320772171,
+      "learning_rate": 0.0002,
+      "loss": 0.3555,
+      "step": 5
+    },
+    {
+      "epoch": 0.0004329160503625672,
+      "grad_norm": 0.26907986402511597,
+      "learning_rate": 0.00019999711358060327,
+      "loss": 0.3276,
+      "step": 6
+    },
+    {
+      "epoch": 0.000505068725422995,
+      "grad_norm": 0.43013739585876465,
+      "learning_rate": 0.00019999422716120654,
+      "loss": 0.2577,
+      "step": 7
+    },
+    {
+      "epoch": 0.000577221400483423,
+      "grad_norm": 0.5789860486984253,
+      "learning_rate": 0.0001999913407418098,
+      "loss": 0.328,
+      "step": 8
+    },
+    {
+      "epoch": 0.0006493740755438508,
+      "grad_norm": 0.22699680924415588,
+      "learning_rate": 0.00019998845432241306,
+      "loss": 0.2613,
+      "step": 9
+    },
+    {
+      "epoch": 0.0007215267506042786,
+      "grad_norm": 0.21423082053661346,
+      "learning_rate": 0.00019998556790301632,
+      "loss": 0.2668,
+      "step": 10
+    },
+    {
+      "epoch": 0.0007936794256647065,
+      "grad_norm": 0.22188498079776764,
+      "learning_rate": 0.0001999826814836196,
+      "loss": 0.2194,
+      "step": 11
+    },
+    {
+      "epoch": 0.0008658321007251344,
+      "grad_norm": 0.15280145406723022,
+      "learning_rate": 0.00019997979506422285,
+      "loss": 0.2203,
+      "step": 12
+    },
+    {
+      "epoch": 0.0009379847757855622,
+      "grad_norm": 0.19237686693668365,
+      "learning_rate": 0.0001999769086448261,
+      "loss": 0.2326,
+      "step": 13
+    },
+    {
+      "epoch": 0.00101013745084599,
+      "grad_norm": 0.16257072985172272,
+      "learning_rate": 0.00019997402222542935,
+      "loss": 0.2396,
+      "step": 14
+    },
+    {
+      "epoch": 0.001082290125906418,
+      "grad_norm": 0.16879022121429443,
+      "learning_rate": 0.0001999711358060326,
+      "loss": 0.2542,
+      "step": 15
+    },
+    {
+      "epoch": 0.001154442800966846,
+      "grad_norm": 0.15356074273586273,
+      "learning_rate": 0.0001999682493866359,
+      "loss": 0.2433,
+      "step": 16
+    },
+    {
+      "epoch": 0.0012265954760272736,
+      "grad_norm": 0.1393197476863861,
+      "learning_rate": 0.00019996536296723916,
+      "loss": 0.214,
+      "step": 17
+    },
+    {
+      "epoch": 0.0012987481510877016,
+      "grad_norm": 0.14472635090351105,
+      "learning_rate": 0.00019996247654784243,
+      "loss": 0.2378,
+      "step": 18
+    },
+    {
+      "epoch": 0.0013709008261481295,
+      "grad_norm": 0.2018187940120697,
+      "learning_rate": 0.00019995959012844566,
+      "loss": 0.2555,
+      "step": 19
+    },
+    {
+      "epoch": 0.0014430535012085572,
+      "grad_norm": 0.1205526664853096,
+      "learning_rate": 0.00019995670370904893,
+      "loss": 0.2554,
+      "step": 20
+    },
+    {
+      "epoch": 0.0015152061762689851,
+      "grad_norm": 0.17153723537921906,
+      "learning_rate": 0.0001999538172896522,
+      "loss": 0.2014,
+      "step": 21
+    },
+    {
+      "epoch": 0.001587358851329413,
+      "grad_norm": 0.1561480462551117,
+      "learning_rate": 0.00019995093087025545,
+      "loss": 0.2274,
+      "step": 22
+    },
+    {
+      "epoch": 0.0016595115263898408,
+      "grad_norm": 0.13652458786964417,
+      "learning_rate": 0.00019994804445085874,
+      "loss": 0.2294,
+      "step": 23
+    },
+    {
+      "epoch": 0.0017316642014502687,
+      "grad_norm": 0.13224199414253235,
+      "learning_rate": 0.00019994515803146198,
+      "loss": 0.2178,
+      "step": 24
+    },
+    {
+      "epoch": 0.0018038168765106967,
+      "grad_norm": 0.13510029017925262,
+      "learning_rate": 0.00019994227161206524,
+      "loss": 0.2151,
+      "step": 25
+    },
+    {
+      "epoch": 0.0018759695515711244,
+      "grad_norm": 0.11560554802417755,
+      "learning_rate": 0.0001999393851926685,
+      "loss": 0.2607,
+      "step": 26
+    },
+    {
+      "epoch": 0.0019481222266315523,
+      "grad_norm": 0.13390889763832092,
+      "learning_rate": 0.00019993649877327177,
+      "loss": 0.2261,
+      "step": 27
+    },
+    {
+      "epoch": 0.00202027490169198,
+      "grad_norm": 0.12282073497772217,
+      "learning_rate": 0.00019993361235387503,
+      "loss": 0.2208,
+      "step": 28
+    },
+    {
+      "epoch": 0.002092427576752408,
+      "grad_norm": 0.12827306985855103,
+      "learning_rate": 0.0001999307259344783,
+      "loss": 0.2006,
+      "step": 29
+    },
+    {
+      "epoch": 0.002164580251812836,
+      "grad_norm": 0.12924723327159882,
+      "learning_rate": 0.00019992783951508156,
+      "loss": 0.1889,
+      "step": 30
+    },
+    {
+      "epoch": 0.002236732926873264,
+      "grad_norm": 0.13463133573532104,
+      "learning_rate": 0.00019992495309568482,
+      "loss": 0.2547,
+      "step": 31
+    },
+    {
+      "epoch": 0.002308885601933692,
+      "grad_norm": 0.20450986921787262,
+      "learning_rate": 0.00019992206667628808,
+      "loss": 0.1662,
+      "step": 32
+    },
+    {
+      "epoch": 0.0023810382769941197,
+      "grad_norm": 0.16089263558387756,
+      "learning_rate": 0.00019991918025689134,
+      "loss": 0.2459,
+      "step": 33
+    },
+    {
+      "epoch": 0.0024531909520545472,
+      "grad_norm": 0.2842683792114258,
+      "learning_rate": 0.0001999162938374946,
+      "loss": 0.2433,
+      "step": 34
+    },
+    {
+      "epoch": 0.002525343627114975,
+      "grad_norm": 0.13188135623931885,
+      "learning_rate": 0.00019991340741809784,
+      "loss": 0.2258,
+      "step": 35
+    },
+    {
+      "epoch": 0.002597496302175403,
+      "grad_norm": 0.11618123203516006,
+      "learning_rate": 0.0001999105209987011,
+      "loss": 0.2393,
+      "step": 36
+    },
+    {
+      "epoch": 0.002669648977235831,
+      "grad_norm": 0.12183308601379395,
+      "learning_rate": 0.0001999076345793044,
+      "loss": 0.2065,
+      "step": 37
+    },
+    {
+      "epoch": 0.002741801652296259,
+      "grad_norm": 0.11567720770835876,
+      "learning_rate": 0.00019990474815990766,
+      "loss": 0.2237,
+      "step": 38
+    },
+    {
+      "epoch": 0.002813954327356687,
+      "grad_norm": 0.12731248140335083,
+      "learning_rate": 0.00019990186174051092,
+      "loss": 0.1821,
+      "step": 39
+    },
+    {
+      "epoch": 0.0028861070024171144,
+      "grad_norm": 0.11343208700418472,
+      "learning_rate": 0.00019989897532111416,
+      "loss": 0.2308,
+      "step": 40
+    },
+    {
+      "epoch": 0.0029582596774775424,
+      "grad_norm": 0.09807998687028885,
+      "learning_rate": 0.00019989608890171742,
+      "loss": 0.2089,
+      "step": 41
+    },
+    {
+      "epoch": 0.0030304123525379703,
+      "grad_norm": 0.11223408579826355,
+      "learning_rate": 0.00019989320248232068,
+      "loss": 0.2324,
+      "step": 42
+    },
+    {
+      "epoch": 0.0031025650275983982,
+      "grad_norm": 0.10409168899059296,
+      "learning_rate": 0.00019989031606292395,
+      "loss": 0.209,
+      "step": 43
+    },
+    {
+      "epoch": 0.003174717702658826,
+      "grad_norm": 0.3627258837223053,
+      "learning_rate": 0.00019988742964352724,
+      "loss": 0.1686,
+      "step": 44
+    },
+    {
+      "epoch": 0.003246870377719254,
+      "grad_norm": 0.11073557287454605,
+      "learning_rate": 0.00019988454322413047,
+      "loss": 0.1728,
+      "step": 45
+    },
+    {
+      "epoch": 0.0033190230527796816,
+      "grad_norm": 0.1800556778907776,
+      "learning_rate": 0.00019988165680473374,
+      "loss": 0.2397,
+      "step": 46
+    },
+    {
+      "epoch": 0.0033911757278401095,
+      "grad_norm": 0.11776737868785858,
+      "learning_rate": 0.000199878770385337,
+      "loss": 0.2432,
+      "step": 47
+    },
+    {
+      "epoch": 0.0034633284029005375,
+      "grad_norm": 0.11778844147920609,
+      "learning_rate": 0.00019987588396594026,
+      "loss": 0.2224,
+      "step": 48
+    },
+    {
+      "epoch": 0.0035354810779609654,
+      "grad_norm": 0.14010971784591675,
+      "learning_rate": 0.00019987299754654352,
+      "loss": 0.2071,
+      "step": 49
+    },
+    {
+      "epoch": 0.0036076337530213934,
+      "grad_norm": 0.20106203854084015,
+      "learning_rate": 0.0001998701111271468,
+      "loss": 0.215,
+      "step": 50
+    },
+    {
+      "epoch": 0.0036797864280818213,
+      "grad_norm": 0.2706080377101898,
+      "learning_rate": 0.00019986722470775005,
+      "loss": 0.2544,
+      "step": 51
+    },
+    {
+      "epoch": 0.003751939103142249,
+      "grad_norm": 0.3544776141643524,
+      "learning_rate": 0.0001998643382883533,
+      "loss": 0.2248,
+      "step": 52
+    },
+    {
+      "epoch": 0.0038240917782026767,
+      "grad_norm": 0.25506851077079773,
+      "learning_rate": 0.00019986145186895658,
+      "loss": 0.2055,
+      "step": 53
+    },
+    {
+      "epoch": 0.0038962444532631047,
+      "grad_norm": 0.14315347373485565,
+      "learning_rate": 0.00019985856544955984,
+      "loss": 0.2289,
+      "step": 54
+    },
+    {
+      "epoch": 0.003968397128323533,
+      "grad_norm": 0.11961130797863007,
+      "learning_rate": 0.0001998556790301631,
+      "loss": 0.1826,
+      "step": 55
+    },
+    {
+      "epoch": 0.00404054980338396,
+      "grad_norm": 0.09153182804584503,
+      "learning_rate": 0.00019985279261076634,
+      "loss": 0.2162,
+      "step": 56
+    },
+    {
+      "epoch": 0.0041127024784443885,
+      "grad_norm": 0.1307227462530136,
+      "learning_rate": 0.0001998499061913696,
+      "loss": 0.2662,
+      "step": 57
+    },
+    {
+      "epoch": 0.004184855153504816,
+      "grad_norm": 0.10103806108236313,
+      "learning_rate": 0.0001998470197719729,
+      "loss": 0.1798,
+      "step": 58
+    },
+    {
+      "epoch": 0.004257007828565244,
+      "grad_norm": 0.14712268114089966,
+      "learning_rate": 0.00019984413335257615,
+      "loss": 0.2556,
+      "step": 59
+    },
+    {
+      "epoch": 0.004329160503625672,
+      "grad_norm": 0.08857639878988266,
+      "learning_rate": 0.00019984124693317942,
+      "loss": 0.1957,
+      "step": 60
+    },
+    {
+      "epoch": 0.0044013131786861,
+      "grad_norm": 0.12688061594963074,
+      "learning_rate": 0.00019983836051378265,
+      "loss": 0.2482,
+      "step": 61
+    },
+    {
+      "epoch": 0.004473465853746528,
+      "grad_norm": 0.09386658668518066,
+      "learning_rate": 0.00019983547409438591,
+      "loss": 0.1949,
+      "step": 62
+    },
+    {
+      "epoch": 0.004545618528806955,
+      "grad_norm": 0.10285080969333649,
+      "learning_rate": 0.00019983258767498918,
+      "loss": 0.186,
+      "step": 63
+    },
+    {
+      "epoch": 0.004617771203867384,
+      "grad_norm": 0.08779972791671753,
+      "learning_rate": 0.00019982970125559244,
+      "loss": 0.178,
+      "step": 64
+    },
+    {
+      "epoch": 0.004689923878927811,
+      "grad_norm": 0.15989039838314056,
+      "learning_rate": 0.00019982681483619573,
+      "loss": 0.2476,
+      "step": 65
+    },
+    {
+      "epoch": 0.0047620765539882395,
+      "grad_norm": 0.14378827810287476,
+      "learning_rate": 0.00019982392841679897,
+      "loss": 0.2609,
+      "step": 66
+    },
+    {
+      "epoch": 0.004834229229048667,
+      "grad_norm": 0.10464286059141159,
+      "learning_rate": 0.00019982104199740223,
+      "loss": 0.1926,
+      "step": 67
+    },
+    {
+      "epoch": 0.0049063819041090945,
+      "grad_norm": 0.11753690242767334,
+      "learning_rate": 0.0001998181555780055,
+      "loss": 0.1781,
+      "step": 68
+    },
+    {
+      "epoch": 0.004978534579169523,
+      "grad_norm": 0.14723753929138184,
+      "learning_rate": 0.00019981526915860876,
+      "loss": 0.2429,
+      "step": 69
+    },
+    {
+      "epoch": 0.00505068725422995,
+      "grad_norm": 0.14722511172294617,
+      "learning_rate": 0.00019981238273921202,
+      "loss": 0.2199,
+      "step": 70
+    },
+    {
+      "epoch": 0.005122839929290379,
+      "grad_norm": 0.11642223596572876,
+      "learning_rate": 0.00019980949631981528,
+      "loss": 0.1837,
+      "step": 71
+    },
+    {
+      "epoch": 0.005194992604350806,
+      "grad_norm": 0.10480733215808868,
+      "learning_rate": 0.00019980660990041854,
+      "loss": 0.204,
+      "step": 72
+    },
+    {
+      "epoch": 0.005267145279411235,
+      "grad_norm": 0.09915363788604736,
+      "learning_rate": 0.0001998037234810218,
+      "loss": 0.1918,
+      "step": 73
+    },
+    {
+      "epoch": 0.005339297954471662,
+      "grad_norm": 0.1103401780128479,
+      "learning_rate": 0.00019980083706162507,
+      "loss": 0.165,
+      "step": 74
+    },
+    {
+      "epoch": 0.00541145062953209,
+      "grad_norm": 0.11738649010658264,
+      "learning_rate": 0.00019979795064222833,
+      "loss": 0.2065,
+      "step": 75
+    },
+    {
+      "epoch": 0.005483603304592518,
+      "grad_norm": 0.11540112644433975,
+      "learning_rate": 0.0001997950642228316,
+      "loss": 0.2224,
+      "step": 76
+    },
+    {
+      "epoch": 0.0055557559796529455,
+      "grad_norm": 0.08912346512079239,
+      "learning_rate": 0.00019979217780343483,
+      "loss": 0.182,
+      "step": 77
+    },
+    {
+      "epoch": 0.005627908654713374,
+      "grad_norm": 0.11579877883195877,
+      "learning_rate": 0.0001997892913840381,
+      "loss": 0.2424,
+      "step": 78
+    },
+    {
+      "epoch": 0.005700061329773801,
+      "grad_norm": 0.09065508097410202,
+      "learning_rate": 0.00019978640496464138,
+      "loss": 0.2211,
+      "step": 79
+    },
+    {
+      "epoch": 0.005772214004834229,
+      "grad_norm": 0.13304202258586884,
+      "learning_rate": 0.00019978351854524465,
+      "loss": 0.2186,
+      "step": 80
+    },
+    {
+      "epoch": 0.005844366679894657,
+      "grad_norm": 0.11378730833530426,
+      "learning_rate": 0.0001997806321258479,
+      "loss": 0.1959,
+      "step": 81
+    },
+    {
+      "epoch": 0.005916519354955085,
+      "grad_norm": 0.10128432512283325,
+      "learning_rate": 0.00019977774570645115,
+      "loss": 0.2345,
+      "step": 82
+    },
+    {
+      "epoch": 0.005988672030015513,
+      "grad_norm": 0.1330646276473999,
+      "learning_rate": 0.0001997748592870544,
+      "loss": 0.2207,
+      "step": 83
+    },
+    {
+      "epoch": 0.006060824705075941,
+      "grad_norm": 0.11121273785829544,
+      "learning_rate": 0.00019977197286765767,
+      "loss": 0.2075,
+      "step": 84
+    },
+    {
+      "epoch": 0.006132977380136369,
+      "grad_norm": 0.09679929167032242,
+      "learning_rate": 0.00019976908644826093,
+      "loss": 0.219,
+      "step": 85
+    },
+    {
+      "epoch": 0.0062051300551967965,
+      "grad_norm": 0.1264830380678177,
+      "learning_rate": 0.00019976620002886422,
+      "loss": 0.2433,
+      "step": 86
+    },
+    {
+      "epoch": 0.006277282730257224,
+      "grad_norm": 0.0941806361079216,
+      "learning_rate": 0.00019976331360946746,
+      "loss": 0.2117,
+      "step": 87
+    },
+    {
+      "epoch": 0.006349435405317652,
+      "grad_norm": 0.09046623110771179,
+      "learning_rate": 0.00019976042719007072,
+      "loss": 0.1889,
+      "step": 88
+    },
+    {
+      "epoch": 0.00642158808037808,
+      "grad_norm": 0.12082833051681519,
+      "learning_rate": 0.000199757540770674,
+      "loss": 0.227,
+      "step": 89
+    },
+    {
+      "epoch": 0.006493740755438508,
+      "grad_norm": 0.12156786024570465,
+      "learning_rate": 0.00019975465435127725,
+      "loss": 0.2019,
+      "step": 90
+    },
+    {
+      "epoch": 0.006565893430498936,
+      "grad_norm": 0.08919201791286469,
+      "learning_rate": 0.0001997517679318805,
+      "loss": 0.1537,
+      "step": 91
+    },
+    {
+      "epoch": 0.006638046105559363,
+      "grad_norm": 0.10201513767242432,
+      "learning_rate": 0.00019974888151248378,
+      "loss": 0.1898,
+      "step": 92
+    },
+    {
+      "epoch": 0.006710198780619792,
+      "grad_norm": 0.12830446660518646,
+      "learning_rate": 0.00019974599509308704,
+      "loss": 0.2381,
+      "step": 93
+    },
+    {
+      "epoch": 0.006782351455680219,
+      "grad_norm": 0.11241954565048218,
+      "learning_rate": 0.0001997431086736903,
+      "loss": 0.2021,
+      "step": 94
+    },
+    {
+      "epoch": 0.0068545041307406475,
+      "grad_norm": 0.11420100927352905,
+      "learning_rate": 0.00019974022225429356,
+      "loss": 0.188,
+      "step": 95
+    },
+    {
+      "epoch": 0.006926656805801075,
+      "grad_norm": 0.12041347473859787,
+      "learning_rate": 0.00019973733583489683,
+      "loss": 0.2477,
+      "step": 96
+    },
+    {
+      "epoch": 0.006998809480861503,
+      "grad_norm": 0.10603255778551102,
+      "learning_rate": 0.0001997344494155001,
+      "loss": 0.1971,
+      "step": 97
+    },
+    {
+      "epoch": 0.007070962155921931,
+      "grad_norm": 0.1029348373413086,
+      "learning_rate": 0.00019973156299610333,
+      "loss": 0.2247,
+      "step": 98
+    },
+    {
+      "epoch": 0.007143114830982358,
+      "grad_norm": 0.1543446034193039,
+      "learning_rate": 0.0001997286765767066,
+      "loss": 0.2284,
+      "step": 99
+    },
+    {
+      "epoch": 0.007215267506042787,
+      "grad_norm": 0.10962533950805664,
+      "learning_rate": 0.00019972579015730988,
+      "loss": 0.1839,
+      "step": 100
+    },
+    {
+      "epoch": 0.007287420181103214,
+      "grad_norm": 0.15612627565860748,
+      "learning_rate": 0.00019972290373791314,
+      "loss": 0.1894,
+      "step": 101
+    },
+    {
+      "epoch": 0.007359572856163643,
+      "grad_norm": 0.11387870460748672,
+      "learning_rate": 0.0001997200173185164,
+      "loss": 0.1916,
+      "step": 102
+    },
+    {
+      "epoch": 0.00743172553122407,
+      "grad_norm": 0.09269845485687256,
+      "learning_rate": 0.00019971713089911964,
+      "loss": 0.162,
+      "step": 103
+    },
+    {
+      "epoch": 0.007503878206284498,
+      "grad_norm": 0.10952046513557434,
+      "learning_rate": 0.0001997142444797229,
+      "loss": 0.1859,
+      "step": 104
+    },
+    {
+      "epoch": 0.007576030881344926,
+      "grad_norm": 0.0959896668791771,
+      "learning_rate": 0.00019971135806032617,
+      "loss": 0.2286,
+      "step": 105
+    },
+    {
+      "epoch": 0.0076481835564053535,
+      "grad_norm": 0.09485447406768799,
+      "learning_rate": 0.00019970847164092943,
+      "loss": 0.2151,
+      "step": 106
+    },
+    {
+      "epoch": 0.007720336231465782,
+      "grad_norm": 0.11261118948459625,
+      "learning_rate": 0.0001997055852215327,
+      "loss": 0.2022,
+      "step": 107
+    },
+    {
+      "epoch": 0.007792488906526209,
+      "grad_norm": 0.126130148768425,
+      "learning_rate": 0.00019970269880213595,
+      "loss": 0.2423,
+      "step": 108
+    },
+    {
+      "epoch": 0.007864641581586637,
+      "grad_norm": 0.10524505376815796,
+      "learning_rate": 0.00019969981238273922,
+      "loss": 0.2011,
+      "step": 109
+    },
+    {
+      "epoch": 0.007936794256647065,
+      "grad_norm": 0.08516758680343628,
+      "learning_rate": 0.00019969692596334248,
+      "loss": 0.1857,
+      "step": 110
+    },
+    {
+      "epoch": 0.008008946931707494,
+      "grad_norm": 0.10273415595293045,
+      "learning_rate": 0.00019969403954394574,
+      "loss": 0.2111,
+      "step": 111
+    },
+    {
+      "epoch": 0.00808109960676792,
+      "grad_norm": 0.11832364648580551,
+      "learning_rate": 0.000199691153124549,
+      "loss": 0.2038,
+      "step": 112
+    },
+    {
+      "epoch": 0.008153252281828349,
+      "grad_norm": 0.10919308662414551,
+      "learning_rate": 0.00019968826670515227,
+      "loss": 0.2113,
+      "step": 113
+    },
+    {
+      "epoch": 0.008225404956888777,
+      "grad_norm": 0.12120673805475235,
+      "learning_rate": 0.0001996853802857555,
+      "loss": 0.1866,
+      "step": 114
+    },
+    {
+      "epoch": 0.008297557631949205,
+      "grad_norm": 0.1033109650015831,
+      "learning_rate": 0.0001996824938663588,
+      "loss": 0.2323,
+      "step": 115
+    },
+    {
+      "epoch": 0.008369710307009632,
+      "grad_norm": 0.12112661451101303,
+      "learning_rate": 0.00019967960744696206,
+      "loss": 0.2291,
+      "step": 116
+    },
+    {
+      "epoch": 0.00844186298207006,
+      "grad_norm": 0.07624496519565582,
+      "learning_rate": 0.00019967672102756532,
+      "loss": 0.1695,
+      "step": 117
+    },
+    {
+      "epoch": 0.008514015657130489,
+      "grad_norm": 0.07618191838264465,
+      "learning_rate": 0.00019967383460816858,
+      "loss": 0.1675,
+      "step": 118
+    },
+    {
+      "epoch": 0.008586168332190915,
+      "grad_norm": 0.10014130920171738,
+      "learning_rate": 0.00019967094818877185,
+      "loss": 0.2024,
+      "step": 119
+    },
+    {
+      "epoch": 0.008658321007251344,
+      "grad_norm": 0.12303517013788223,
+      "learning_rate": 0.00019966806176937508,
+      "loss": 0.1653,
+      "step": 120
+    },
+    {
+      "epoch": 0.008730473682311772,
+      "grad_norm": 0.10565467923879623,
+      "learning_rate": 0.00019966517534997835,
+      "loss": 0.1942,
+      "step": 121
+    },
+    {
+      "epoch": 0.0088026263573722,
+      "grad_norm": 0.10748612135648727,
+      "learning_rate": 0.00019966228893058164,
+      "loss": 0.1983,
+      "step": 122
+    },
+    {
+      "epoch": 0.008874779032432627,
+      "grad_norm": 0.09169552475214005,
+      "learning_rate": 0.0001996594025111849,
+      "loss": 0.1889,
+      "step": 123
+    },
+    {
+      "epoch": 0.008946931707493055,
+      "grad_norm": 0.09497978538274765,
+      "learning_rate": 0.00019965651609178816,
+      "loss": 0.1736,
+      "step": 124
+    },
+    {
+      "epoch": 0.009019084382553484,
+      "grad_norm": 0.12305834144353867,
+      "learning_rate": 0.0001996536296723914,
+      "loss": 0.2279,
+      "step": 125
+    },
+    {
+      "epoch": 0.00909123705761391,
+      "grad_norm": 0.10519791394472122,
+      "learning_rate": 0.00019965074325299466,
+      "loss": 0.1727,
+      "step": 126
+    },
+    {
+      "epoch": 0.009163389732674339,
+      "grad_norm": 0.0940847247838974,
+      "learning_rate": 0.00019964785683359792,
+      "loss": 0.2071,
+      "step": 127
+    },
+    {
+      "epoch": 0.009235542407734767,
+      "grad_norm": 0.09172669053077698,
+      "learning_rate": 0.00019964497041420119,
+      "loss": 0.1632,
+      "step": 128
+    },
+    {
+      "epoch": 0.009307695082795194,
+      "grad_norm": 0.11957385390996933,
+      "learning_rate": 0.00019964208399480448,
+      "loss": 0.2209,
+      "step": 129
+    },
+    {
+      "epoch": 0.009379847757855622,
+      "grad_norm": 0.08616233617067337,
+      "learning_rate": 0.0001996391975754077,
+      "loss": 0.1936,
+      "step": 130
+    },
+    {
+      "epoch": 0.00945200043291605,
+      "grad_norm": 0.09599475562572479,
+      "learning_rate": 0.00019963631115601098,
+      "loss": 0.2139,
+      "step": 131
+    },
+    {
+      "epoch": 0.009524153107976479,
+      "grad_norm": 0.11431720107793808,
+      "learning_rate": 0.00019963342473661424,
+      "loss": 0.1974,
+      "step": 132
+    },
+    {
+      "epoch": 0.009596305783036906,
+      "grad_norm": 0.12138434499502182,
+      "learning_rate": 0.0001996305383172175,
+      "loss": 0.1607,
+      "step": 133
+    },
+    {
+      "epoch": 0.009668458458097334,
+      "grad_norm": 0.0832832008600235,
+      "learning_rate": 0.00019962765189782076,
+      "loss": 0.1999,
+      "step": 134
+    },
+    {
+      "epoch": 0.009740611133157762,
+      "grad_norm": 0.12049597501754761,
+      "learning_rate": 0.00019962476547842403,
+      "loss": 0.2017,
+      "step": 135
+    },
+    {
+      "epoch": 0.009812763808218189,
+      "grad_norm": 0.1307975798845291,
+      "learning_rate": 0.0001996218790590273,
+      "loss": 0.2409,
+      "step": 136
+    },
+    {
+      "epoch": 0.009884916483278617,
+      "grad_norm": 0.09753284603357315,
+      "learning_rate": 0.00019961899263963055,
+      "loss": 0.1278,
+      "step": 137
+    },
+    {
+      "epoch": 0.009957069158339046,
+      "grad_norm": 0.1166425570845604,
+      "learning_rate": 0.00019961610622023382,
+      "loss": 0.2263,
+      "step": 138
+    },
+    {
+      "epoch": 0.010029221833399474,
+      "grad_norm": 0.11471468210220337,
+      "learning_rate": 0.00019961321980083708,
+      "loss": 0.2244,
+      "step": 139
+    },
+    {
+      "epoch": 0.0101013745084599,
+      "grad_norm": 0.11294344067573547,
+      "learning_rate": 0.00019961033338144034,
+      "loss": 0.2296,
+      "step": 140
+    },
+    {
+      "epoch": 0.010173527183520329,
+      "grad_norm": 0.09066087007522583,
+      "learning_rate": 0.00019960744696204358,
+      "loss": 0.1726,
+      "step": 141
+    },
+    {
+      "epoch": 0.010245679858580757,
+      "grad_norm": 0.11471463739871979,
+      "learning_rate": 0.00019960456054264684,
+      "loss": 0.2075,
+      "step": 142
+    },
+    {
+      "epoch": 0.010317832533641184,
+      "grad_norm": 0.11662106215953827,
+      "learning_rate": 0.00019960167412325013,
+      "loss": 0.2538,
+      "step": 143
+    },
+    {
+      "epoch": 0.010389985208701612,
+      "grad_norm": 0.12267880141735077,
+      "learning_rate": 0.0001995987877038534,
+      "loss": 0.2094,
+      "step": 144
+    },
+    {
+      "epoch": 0.01046213788376204,
+      "grad_norm": 0.1315954029560089,
+      "learning_rate": 0.00019959590128445666,
+      "loss": 0.2061,
+      "step": 145
+    },
+    {
+      "epoch": 0.01053429055882247,
+      "grad_norm": 0.09367269277572632,
+      "learning_rate": 0.0001995930148650599,
+      "loss": 0.1829,
+      "step": 146
+    },
+    {
+      "epoch": 0.010606443233882896,
+      "grad_norm": 0.10474827885627747,
+      "learning_rate": 0.00019959012844566315,
+      "loss": 0.1826,
+      "step": 147
+    },
+    {
+      "epoch": 0.010678595908943324,
+      "grad_norm": 0.08622976392507553,
+      "learning_rate": 0.00019958724202626642,
+      "loss": 0.1816,
+      "step": 148
+    },
+    {
+      "epoch": 0.010750748584003753,
+      "grad_norm": 0.08372960984706879,
+      "learning_rate": 0.00019958435560686968,
+      "loss": 0.2197,
+      "step": 149
+    },
+    {
+      "epoch": 0.01082290125906418,
+      "grad_norm": 0.09493155032396317,
+      "learning_rate": 0.00019958146918747297,
+      "loss": 0.2209,
+      "step": 150
+    },
+    {
+      "epoch": 0.010895053934124608,
+      "grad_norm": 0.10592415183782578,
+      "learning_rate": 0.0001995785827680762,
+      "loss": 0.2309,
+      "step": 151
+    },
+    {
+      "epoch": 0.010967206609185036,
+      "grad_norm": 0.10627980530261993,
+      "learning_rate": 0.00019957569634867947,
+      "loss": 0.1596,
+      "step": 152
+    },
+    {
+      "epoch": 0.011039359284245463,
+      "grad_norm": 0.0862220972776413,
+      "learning_rate": 0.00019957280992928273,
+      "loss": 0.165,
+      "step": 153
+    },
+    {
+      "epoch": 0.011111511959305891,
+      "grad_norm": 0.07782924920320511,
+      "learning_rate": 0.000199569923509886,
+      "loss": 0.1936,
+      "step": 154
+    },
+    {
+      "epoch": 0.01118366463436632,
+      "grad_norm": 0.08833504468202591,
+      "learning_rate": 0.00019956703709048926,
+      "loss": 0.1869,
+      "step": 155
+    },
+    {
+      "epoch": 0.011255817309426748,
+      "grad_norm": 0.09978055208921432,
+      "learning_rate": 0.00019956415067109252,
+      "loss": 0.1544,
+      "step": 156
+    },
+    {
+      "epoch": 0.011327969984487174,
+      "grad_norm": 0.11412329226732254,
+      "learning_rate": 0.00019956126425169578,
+      "loss": 0.2143,
+      "step": 157
+    },
+    {
+      "epoch": 0.011400122659547603,
+      "grad_norm": 0.10396774858236313,
+      "learning_rate": 0.00019955837783229905,
+      "loss": 0.1566,
+      "step": 158
+    },
+    {
+      "epoch": 0.011472275334608031,
+      "grad_norm": 0.11875411868095398,
+      "learning_rate": 0.0001995554914129023,
+      "loss": 0.2081,
+      "step": 159
+    },
+    {
+      "epoch": 0.011544428009668458,
+      "grad_norm": 0.12535615265369415,
+      "learning_rate": 0.00019955260499350557,
+      "loss": 0.2066,
+      "step": 160
+    },
+    {
+      "epoch": 0.011616580684728886,
+      "grad_norm": 0.12918449938297272,
+      "learning_rate": 0.00019954971857410884,
+      "loss": 0.206,
+      "step": 161
+    },
+    {
+      "epoch": 0.011688733359789314,
+      "grad_norm": 0.11256776750087738,
+      "learning_rate": 0.00019954683215471207,
+      "loss": 0.1751,
+      "step": 162
+    },
+    {
+      "epoch": 0.011760886034849743,
+      "grad_norm": 0.13259808719158173,
+      "learning_rate": 0.00019954394573531533,
+      "loss": 0.2242,
+      "step": 163
+    },
+    {
+      "epoch": 0.01183303870991017,
+      "grad_norm": 0.12500351667404175,
+      "learning_rate": 0.00019954105931591862,
+      "loss": 0.2372,
+      "step": 164
+    },
+    {
+      "epoch": 0.011905191384970598,
+      "grad_norm": 0.08265276998281479,
+      "learning_rate": 0.0001995381728965219,
+      "loss": 0.1907,
+      "step": 165
+    },
+    {
+      "epoch": 0.011977344060031026,
+      "grad_norm": 0.09001553803682327,
+      "learning_rate": 0.00019953528647712515,
+      "loss": 0.1423,
+      "step": 166
+    },
+    {
+      "epoch": 0.012049496735091453,
+      "grad_norm": 0.12104904651641846,
+      "learning_rate": 0.00019953240005772839,
+      "loss": 0.191,
+      "step": 167
+    },
+    {
+      "epoch": 0.012121649410151881,
+      "grad_norm": 0.09061753004789352,
+      "learning_rate": 0.00019952951363833165,
+      "loss": 0.2026,
+      "step": 168
+    },
+    {
+      "epoch": 0.01219380208521231,
+      "grad_norm": 0.0975954681634903,
+      "learning_rate": 0.0001995266272189349,
+      "loss": 0.2199,
+      "step": 169
+    },
+    {
+      "epoch": 0.012265954760272738,
+      "grad_norm": 0.07146725058555603,
+      "learning_rate": 0.00019952374079953817,
+      "loss": 0.1557,
+      "step": 170
+    },
+    {
+      "epoch": 0.012338107435333165,
+      "grad_norm": 0.11715718358755112,
+      "learning_rate": 0.00019952085438014146,
+      "loss": 0.2198,
+      "step": 171
+    },
+    {
+      "epoch": 0.012410260110393593,
+      "grad_norm": 0.12954525649547577,
+      "learning_rate": 0.0001995179679607447,
+      "loss": 0.2212,
+      "step": 172
+    },
+    {
+      "epoch": 0.012482412785454021,
+      "grad_norm": 0.10778049379587173,
+      "learning_rate": 0.00019951508154134796,
+      "loss": 0.1775,
+      "step": 173
+    },
+    {
+      "epoch": 0.012554565460514448,
+      "grad_norm": 0.10183624178171158,
+      "learning_rate": 0.00019951219512195123,
+      "loss": 0.2009,
+      "step": 174
+    },
+    {
+      "epoch": 0.012626718135574876,
+      "grad_norm": 0.10871998220682144,
+      "learning_rate": 0.0001995093087025545,
+      "loss": 0.2297,
+      "step": 175
+    },
+    {
+      "epoch": 0.012698870810635305,
+      "grad_norm": 0.0910383015871048,
+      "learning_rate": 0.00019950642228315775,
+      "loss": 0.1571,
+      "step": 176
+    },
+    {
+      "epoch": 0.012771023485695733,
+      "grad_norm": 0.1175563782453537,
+      "learning_rate": 0.00019950353586376102,
+      "loss": 0.1874,
+      "step": 177
+    },
+    {
+      "epoch": 0.01284317616075616,
+      "grad_norm": 0.11125342547893524,
+      "learning_rate": 0.00019950064944436428,
+      "loss": 0.1946,
+      "step": 178
+    },
+    {
+      "epoch": 0.012915328835816588,
+      "grad_norm": 0.08524177223443985,
+      "learning_rate": 0.00019949776302496754,
+      "loss": 0.1982,
+      "step": 179
+    },
+    {
+      "epoch": 0.012987481510877016,
+      "grad_norm": 0.10269410908222198,
+      "learning_rate": 0.0001994948766055708,
+      "loss": 0.1501,
+      "step": 180
+    },
+    {
+      "epoch": 0.013059634185937443,
+      "grad_norm": 0.09117331355810165,
+      "learning_rate": 0.00019949199018617407,
+      "loss": 0.1941,
+      "step": 181
+    },
+    {
+      "epoch": 0.013131786860997871,
+      "grad_norm": 0.12752005457878113,
+      "learning_rate": 0.00019948910376677733,
+      "loss": 0.1972,
+      "step": 182
+    },
+    {
+      "epoch": 0.0132039395360583,
+      "grad_norm": 0.09392760694026947,
+      "learning_rate": 0.00019948621734738057,
+      "loss": 0.1545,
+      "step": 183
+    },
+    {
+      "epoch": 0.013276092211118726,
+      "grad_norm": 0.0918208435177803,
+      "learning_rate": 0.00019948333092798383,
+      "loss": 0.1953,
+      "step": 184
+    },
+    {
+      "epoch": 0.013348244886179155,
+      "grad_norm": 0.138113334774971,
+      "learning_rate": 0.00019948044450858712,
+      "loss": 0.2497,
+      "step": 185
+    },
+    {
+      "epoch": 0.013420397561239583,
+      "grad_norm": 0.10034741461277008,
+      "learning_rate": 0.00019947755808919038,
+      "loss": 0.205,
+      "step": 186
+    },
+    {
+      "epoch": 0.013492550236300012,
+      "grad_norm": 0.14482171833515167,
+      "learning_rate": 0.00019947467166979364,
+      "loss": 0.217,
+      "step": 187
+    },
+    {
+      "epoch": 0.013564702911360438,
+      "grad_norm": 0.15158987045288086,
+      "learning_rate": 0.00019947178525039688,
+      "loss": 0.2686,
+      "step": 188
+    },
+    {
+      "epoch": 0.013636855586420867,
+      "grad_norm": 0.10592421144247055,
+      "learning_rate": 0.00019946889883100014,
+      "loss": 0.2172,
+      "step": 189
+    },
+    {
+      "epoch": 0.013709008261481295,
+      "grad_norm": 0.10008352994918823,
+      "learning_rate": 0.0001994660124116034,
+      "loss": 0.211,
+      "step": 190
+    },
+    {
+      "epoch": 0.013781160936541722,
+      "grad_norm": 0.09804289788007736,
+      "learning_rate": 0.00019946312599220667,
+      "loss": 0.1999,
+      "step": 191
+    },
+    {
+      "epoch": 0.01385331361160215,
+      "grad_norm": 0.12120037525892258,
+      "learning_rate": 0.00019946023957280996,
+      "loss": 0.2195,
+      "step": 192
+    },
+    {
+      "epoch": 0.013925466286662578,
+      "grad_norm": 0.08471149951219559,
+      "learning_rate": 0.0001994573531534132,
+      "loss": 0.1866,
+      "step": 193
+    },
+    {
+      "epoch": 0.013997618961723007,
+      "grad_norm": 0.11269561201334,
+      "learning_rate": 0.00019945446673401646,
+      "loss": 0.1874,
+      "step": 194
+    },
+    {
+      "epoch": 0.014069771636783433,
+      "grad_norm": 0.160725399851799,
+      "learning_rate": 0.00019945158031461972,
+      "loss": 0.2352,
+      "step": 195
+    },
+    {
+      "epoch": 0.014141924311843862,
+      "grad_norm": 0.09515868127346039,
+      "learning_rate": 0.00019944869389522298,
+      "loss": 0.1824,
+      "step": 196
+    },
+    {
+      "epoch": 0.01421407698690429,
+      "grad_norm": 0.13595260679721832,
+      "learning_rate": 0.00019944580747582625,
+      "loss": 0.2187,
+      "step": 197
+    },
+    {
+      "epoch": 0.014286229661964717,
+      "grad_norm": 0.11196540296077728,
+      "learning_rate": 0.0001994429210564295,
+      "loss": 0.2037,
+      "step": 198
+    },
+    {
+      "epoch": 0.014358382337025145,
+      "grad_norm": 0.10346249490976334,
+      "learning_rate": 0.00019944003463703277,
+      "loss": 0.1673,
+      "step": 199
+    },
+    {
+      "epoch": 0.014430535012085573,
+      "grad_norm": 0.09775584191083908,
+      "learning_rate": 0.00019943714821763604,
+      "loss": 0.173,
+      "step": 200
+    },
+    {
+      "epoch": 0.014502687687146002,
+      "grad_norm": 0.1201501190662384,
+      "learning_rate": 0.0001994342617982393,
+      "loss": 0.1857,
+      "step": 201
+    },
+    {
+      "epoch": 0.014574840362206428,
+      "grad_norm": 0.12281841039657593,
+      "learning_rate": 0.00019943137537884256,
+      "loss": 0.1431,
+      "step": 202
+    },
+    {
+      "epoch": 0.014646993037266857,
+      "grad_norm": 0.10916811227798462,
+      "learning_rate": 0.00019942848895944582,
+      "loss": 0.2277,
+      "step": 203
+    },
+    {
+      "epoch": 0.014719145712327285,
+      "grad_norm": 0.08134673535823822,
+      "learning_rate": 0.00019942560254004906,
+      "loss": 0.2047,
+      "step": 204
+    },
+    {
+      "epoch": 0.014791298387387712,
+      "grad_norm": 0.12304705381393433,
+      "learning_rate": 0.00019942271612065232,
+      "loss": 0.209,
+      "step": 205
+    },
+    {
+      "epoch": 0.01486345106244814,
+      "grad_norm": 0.13313840329647064,
+      "learning_rate": 0.0001994198297012556,
+      "loss": 0.2049,
+      "step": 206
+    },
+    {
+      "epoch": 0.014935603737508569,
+      "grad_norm": 0.08853999525308609,
+      "learning_rate": 0.00019941694328185888,
+      "loss": 0.1843,
+      "step": 207
+    },
+    {
+      "epoch": 0.015007756412568995,
+      "grad_norm": 0.09260358661413193,
+      "learning_rate": 0.00019941405686246214,
+      "loss": 0.2099,
+      "step": 208
+    },
+    {
+      "epoch": 0.015079909087629424,
+      "grad_norm": 0.08907170593738556,
+      "learning_rate": 0.00019941117044306537,
+      "loss": 0.1899,
+      "step": 209
+    },
+    {
+      "epoch": 0.015152061762689852,
+      "grad_norm": 0.09744348376989365,
+      "learning_rate": 0.00019940828402366864,
+      "loss": 0.2257,
+      "step": 210
+    },
+    {
+      "epoch": 0.01522421443775028,
+      "grad_norm": 0.08101709187030792,
+      "learning_rate": 0.0001994053976042719,
+      "loss": 0.1763,
+      "step": 211
+    },
+    {
+      "epoch": 0.015296367112810707,
+      "grad_norm": 0.11687085032463074,
+      "learning_rate": 0.00019940251118487516,
+      "loss": 0.173,
+      "step": 212
+    },
+    {
+      "epoch": 0.015368519787871135,
+      "grad_norm": 0.11114943027496338,
+      "learning_rate": 0.00019939962476547845,
+      "loss": 0.1958,
+      "step": 213
+    },
+    {
+      "epoch": 0.015440672462931564,
+      "grad_norm": 0.17458894848823547,
+      "learning_rate": 0.0001993967383460817,
+      "loss": 0.2033,
+      "step": 214
+    },
+    {
+      "epoch": 0.01551282513799199,
+      "grad_norm": 0.10832903534173965,
+      "learning_rate": 0.00019939385192668495,
+      "loss": 0.1901,
+      "step": 215
+    },
+    {
+      "epoch": 0.015584977813052419,
+      "grad_norm": 0.10273167490959167,
+      "learning_rate": 0.00019939096550728821,
+      "loss": 0.2046,
+      "step": 216
+    },
+    {
+      "epoch": 0.015657130488112847,
+      "grad_norm": 0.10712496936321259,
+      "learning_rate": 0.00019938807908789148,
+      "loss": 0.1952,
+      "step": 217
+    },
+    {
+      "epoch": 0.015729283163173274,
+      "grad_norm": 0.09058426320552826,
+      "learning_rate": 0.00019938519266849474,
+      "loss": 0.1985,
+      "step": 218
+    },
+    {
+      "epoch": 0.015801435838233704,
+      "grad_norm": 0.10236865282058716,
+      "learning_rate": 0.000199382306249098,
+      "loss": 0.2118,
+      "step": 219
+    },
+    {
+      "epoch": 0.01587358851329413,
+      "grad_norm": 0.10352004319429398,
+      "learning_rate": 0.00019937941982970127,
+      "loss": 0.2027,
+      "step": 220
+    },
+    {
+      "epoch": 0.015945741188354557,
+      "grad_norm": 0.0897122472524643,
+      "learning_rate": 0.00019937653341030453,
+      "loss": 0.1682,
+      "step": 221
+    },
+    {
+      "epoch": 0.016017893863414987,
+      "grad_norm": 0.1268249899148941,
+      "learning_rate": 0.0001993736469909078,
+      "loss": 0.2118,
+      "step": 222
+    },
+    {
+      "epoch": 0.016090046538475414,
+      "grad_norm": 0.10709933191537857,
+      "learning_rate": 0.00019937076057151106,
+      "loss": 0.2165,
+      "step": 223
+    },
+    {
+      "epoch": 0.01616219921353584,
+      "grad_norm": 0.11887793987989426,
+      "learning_rate": 0.00019936787415211432,
+      "loss": 0.1918,
+      "step": 224
+    },
+    {
+      "epoch": 0.01623435188859627,
+      "grad_norm": 0.13047105073928833,
+      "learning_rate": 0.00019936498773271755,
+      "loss": 0.1893,
+      "step": 225
+    },
+    {
+      "epoch": 0.016306504563656697,
+      "grad_norm": 0.1016681045293808,
+      "learning_rate": 0.00019936210131332082,
+      "loss": 0.1479,
+      "step": 226
+    },
+    {
+      "epoch": 0.016378657238717124,
+      "grad_norm": 0.10665053129196167,
+      "learning_rate": 0.0001993592148939241,
+      "loss": 0.2123,
+      "step": 227
+    },
+    {
+      "epoch": 0.016450809913777554,
+      "grad_norm": 0.1363358050584793,
+      "learning_rate": 0.00019935632847452737,
+      "loss": 0.2246,
+      "step": 228
+    },
+    {
+      "epoch": 0.01652296258883798,
+      "grad_norm": 0.10417909920215607,
+      "learning_rate": 0.00019935344205513063,
+      "loss": 0.1528,
+      "step": 229
+    },
+    {
+      "epoch": 0.01659511526389841,
+      "grad_norm": 0.09790351241827011,
+      "learning_rate": 0.00019935055563573387,
+      "loss": 0.2257,
+      "step": 230
+    },
+    {
+      "epoch": 0.016667267938958837,
+      "grad_norm": 0.09754263609647751,
+      "learning_rate": 0.00019934766921633713,
+      "loss": 0.1574,
+      "step": 231
+    },
+    {
+      "epoch": 0.016739420614019264,
+      "grad_norm": 0.16002969443798065,
+      "learning_rate": 0.0001993447827969404,
+      "loss": 0.2083,
+      "step": 232
+    },
+    {
+      "epoch": 0.016811573289079694,
+      "grad_norm": 0.09000971168279648,
+      "learning_rate": 0.00019934189637754366,
+      "loss": 0.1675,
+      "step": 233
+    },
+    {
+      "epoch": 0.01688372596414012,
+      "grad_norm": 0.11512437462806702,
+      "learning_rate": 0.00019933900995814695,
+      "loss": 0.193,
+      "step": 234
+    },
+    {
+      "epoch": 0.016955878639200547,
+      "grad_norm": 0.11979632079601288,
+      "learning_rate": 0.00019933612353875018,
+      "loss": 0.2054,
+      "step": 235
+    },
+    {
+      "epoch": 0.017028031314260977,
+      "grad_norm": 0.11952786147594452,
+      "learning_rate": 0.00019933323711935345,
+      "loss": 0.1668,
+      "step": 236
+    },
+    {
+      "epoch": 0.017100183989321404,
+      "grad_norm": 0.10959208011627197,
+      "learning_rate": 0.0001993303506999567,
+      "loss": 0.216,
+      "step": 237
+    },
+    {
+      "epoch": 0.01717233666438183,
+      "grad_norm": 0.12177053093910217,
+      "learning_rate": 0.00019932746428055997,
+      "loss": 0.2314,
+      "step": 238
+    },
+    {
+      "epoch": 0.01724448933944226,
+      "grad_norm": 0.10955671221017838,
+      "learning_rate": 0.00019932457786116324,
+      "loss": 0.2267,
+      "step": 239
+    },
+    {
+      "epoch": 0.017316642014502687,
+      "grad_norm": 0.11161771416664124,
+      "learning_rate": 0.0001993216914417665,
+      "loss": 0.2044,
+      "step": 240
+    },
+    {
+      "epoch": 0.017388794689563114,
+      "grad_norm": 0.10793675482273102,
+      "learning_rate": 0.00019931880502236976,
+      "loss": 0.1914,
+      "step": 241
+    },
+    {
+      "epoch": 0.017460947364623544,
+      "grad_norm": 0.17880329489707947,
+      "learning_rate": 0.00019931591860297302,
+      "loss": 0.2513,
+      "step": 242
+    },
+    {
+      "epoch": 0.01753310003968397,
+      "grad_norm": 0.08590879291296005,
+      "learning_rate": 0.0001993130321835763,
+      "loss": 0.1594,
+      "step": 243
+    },
+    {
+      "epoch": 0.0176052527147444,
+      "grad_norm": 0.09276142716407776,
+      "learning_rate": 0.00019931014576417955,
+      "loss": 0.1619,
+      "step": 244
+    },
+    {
+      "epoch": 0.017677405389804828,
+      "grad_norm": 0.11142954975366592,
+      "learning_rate": 0.0001993072593447828,
+      "loss": 0.2105,
+      "step": 245
+    },
+    {
+      "epoch": 0.017749558064865254,
+      "grad_norm": 0.0828106701374054,
+      "learning_rate": 0.00019930437292538608,
+      "loss": 0.1931,
+      "step": 246
+    },
+    {
+      "epoch": 0.017821710739925684,
+      "grad_norm": 0.13978850841522217,
+      "learning_rate": 0.0001993014865059893,
+      "loss": 0.2162,
+      "step": 247
+    },
+    {
+      "epoch": 0.01789386341498611,
+      "grad_norm": 0.10149714350700378,
+      "learning_rate": 0.0001992986000865926,
+      "loss": 0.1972,
+      "step": 248
+    },
+    {
+      "epoch": 0.017966016090046538,
+      "grad_norm": 0.0819367840886116,
+      "learning_rate": 0.00019929571366719586,
+      "loss": 0.2244,
+      "step": 249
+    },
+    {
+      "epoch": 0.018038168765106968,
+      "grad_norm": 0.12735427916049957,
+      "learning_rate": 0.00019929282724779913,
+      "loss": 0.2066,
+      "step": 250
+    },
+    {
+      "epoch": 0.018110321440167394,
+      "grad_norm": 0.1065041646361351,
+      "learning_rate": 0.0001992899408284024,
+      "loss": 0.1747,
+      "step": 251
+    },
+    {
+      "epoch": 0.01818247411522782,
+      "grad_norm": 0.10140767693519592,
+      "learning_rate": 0.00019928705440900563,
+      "loss": 0.2042,
+      "step": 252
+    },
+    {
+      "epoch": 0.01825462679028825,
+      "grad_norm": 0.11508601158857346,
+      "learning_rate": 0.0001992841679896089,
+      "loss": 0.1923,
+      "step": 253
+    },
+    {
+      "epoch": 0.018326779465348678,
+      "grad_norm": 0.10136186331510544,
+      "learning_rate": 0.00019928128157021215,
+      "loss": 0.1708,
+      "step": 254
+    },
+    {
+      "epoch": 0.018398932140409104,
+      "grad_norm": 0.09931738674640656,
+      "learning_rate": 0.00019927839515081544,
+      "loss": 0.1829,
+      "step": 255
+    },
+    {
+      "epoch": 0.018471084815469534,
+      "grad_norm": 0.1116984635591507,
+      "learning_rate": 0.0001992755087314187,
+      "loss": 0.2105,
+      "step": 256
+    },
+    {
+      "epoch": 0.01854323749052996,
+      "grad_norm": 0.08264172822237015,
+      "learning_rate": 0.00019927262231202194,
+      "loss": 0.2176,
+      "step": 257
+    },
+    {
+      "epoch": 0.018615390165590388,
+      "grad_norm": 0.09286441653966904,
+      "learning_rate": 0.0001992697358926252,
+      "loss": 0.1286,
+      "step": 258
+    },
+    {
+      "epoch": 0.018687542840650818,
+      "grad_norm": 0.08997014909982681,
+      "learning_rate": 0.00019926684947322847,
+      "loss": 0.1848,
+      "step": 259
+    },
+    {
+      "epoch": 0.018759695515711244,
+      "grad_norm": 0.11541948467493057,
+      "learning_rate": 0.00019926396305383173,
+      "loss": 0.1909,
+      "step": 260
+    },
+    {
+      "epoch": 0.018831848190771674,
+      "grad_norm": 0.09805244952440262,
+      "learning_rate": 0.000199261076634435,
+      "loss": 0.2093,
+      "step": 261
+    },
+    {
+      "epoch": 0.0189040008658321,
+      "grad_norm": 0.09658953547477722,
+      "learning_rate": 0.00019925819021503826,
+      "loss": 0.1757,
+      "step": 262
+    },
+    {
+      "epoch": 0.018976153540892528,
+      "grad_norm": 0.09282110631465912,
+      "learning_rate": 0.00019925530379564152,
+      "loss": 0.1895,
+      "step": 263
+    },
+    {
+      "epoch": 0.019048306215952958,
+      "grad_norm": 0.11182378232479095,
+      "learning_rate": 0.00019925241737624478,
+      "loss": 0.1958,
+      "step": 264
+    },
+    {
+      "epoch": 0.019120458891013385,
+      "grad_norm": 0.09475011378526688,
+      "learning_rate": 0.00019924953095684804,
+      "loss": 0.1566,
+      "step": 265
+    },
+    {
+      "epoch": 0.01919261156607381,
+      "grad_norm": 0.09486480802297592,
+      "learning_rate": 0.0001992466445374513,
+      "loss": 0.1876,
+      "step": 266
+    },
+    {
+      "epoch": 0.01926476424113424,
+      "grad_norm": 0.11596336960792542,
+      "learning_rate": 0.00019924375811805457,
+      "loss": 0.2141,
+      "step": 267
+    },
+    {
+      "epoch": 0.019336916916194668,
+      "grad_norm": 0.10102172940969467,
+      "learning_rate": 0.0001992408716986578,
+      "loss": 0.2738,
+      "step": 268
+    },
+    {
+      "epoch": 0.019409069591255095,
+      "grad_norm": 0.10024615377187729,
+      "learning_rate": 0.0001992379852792611,
+      "loss": 0.1769,
+      "step": 269
+    },
+    {
+      "epoch": 0.019481222266315525,
+      "grad_norm": 0.1163974329829216,
+      "learning_rate": 0.00019923509885986436,
+      "loss": 0.248,
+      "step": 270
+    },
+    {
+      "epoch": 0.01955337494137595,
+      "grad_norm": 0.07875881344079971,
+      "learning_rate": 0.00019923221244046762,
+      "loss": 0.2059,
+      "step": 271
+    },
+    {
+      "epoch": 0.019625527616436378,
+      "grad_norm": 0.11664444208145142,
+      "learning_rate": 0.00019922932602107088,
+      "loss": 0.1966,
+      "step": 272
+    },
+    {
+      "epoch": 0.019697680291496808,
+      "grad_norm": 0.11768995225429535,
+      "learning_rate": 0.00019922643960167412,
+      "loss": 0.1731,
+      "step": 273
+    },
+    {
+      "epoch": 0.019769832966557235,
+      "grad_norm": 0.13091285526752472,
+      "learning_rate": 0.00019922355318227738,
+      "loss": 0.1955,
+      "step": 274
+    },
+    {
+      "epoch": 0.01984198564161766,
+      "grad_norm": 0.08766860514879227,
+      "learning_rate": 0.00019922066676288065,
+      "loss": 0.159,
+      "step": 275
+    },
+    {
+      "epoch": 0.01991413831667809,
+      "grad_norm": 0.08569549769163132,
+      "learning_rate": 0.00019921778034348394,
+      "loss": 0.1847,
+      "step": 276
+    },
+    {
+      "epoch": 0.019986290991738518,
+      "grad_norm": 0.08787121623754501,
+      "learning_rate": 0.0001992148939240872,
+      "loss": 0.1943,
+      "step": 277
+    },
+    {
+      "epoch": 0.020058443666798948,
+      "grad_norm": 0.10062495619058609,
+      "learning_rate": 0.00019921200750469043,
+      "loss": 0.2028,
+      "step": 278
+    },
+    {
+      "epoch": 0.020130596341859375,
+      "grad_norm": 0.11618790775537491,
+      "learning_rate": 0.0001992091210852937,
+      "loss": 0.1724,
+      "step": 279
+    },
+    {
+      "epoch": 0.0202027490169198,
+      "grad_norm": 0.08776410669088364,
+      "learning_rate": 0.00019920623466589696,
+      "loss": 0.2305,
+      "step": 280
+    },
+    {
+      "epoch": 0.02027490169198023,
+      "grad_norm": 0.10468615591526031,
+      "learning_rate": 0.00019920334824650022,
+      "loss": 0.171,
+      "step": 281
+    },
+    {
+      "epoch": 0.020347054367040658,
+      "grad_norm": 0.08151569217443466,
+      "learning_rate": 0.0001992004618271035,
+      "loss": 0.1512,
+      "step": 282
+    },
+    {
+      "epoch": 0.020419207042101085,
+      "grad_norm": 0.09557295590639114,
+      "learning_rate": 0.00019919757540770675,
+      "loss": 0.1675,
+      "step": 283
+    },
+    {
+      "epoch": 0.020491359717161515,
+      "grad_norm": 0.13301190733909607,
+      "learning_rate": 0.00019919468898831,
+      "loss": 0.1818,
+      "step": 284
+    },
+    {
+      "epoch": 0.02056351239222194,
+      "grad_norm": 0.10880567878484726,
+      "learning_rate": 0.00019919180256891328,
+      "loss": 0.2223,
+      "step": 285
+    },
+    {
+      "epoch": 0.020635665067282368,
+      "grad_norm": 0.13995389640331268,
+      "learning_rate": 0.00019918891614951654,
+      "loss": 0.1912,
+      "step": 286
+    },
+    {
+      "epoch": 0.020707817742342798,
+      "grad_norm": 0.11351920664310455,
+      "learning_rate": 0.0001991860297301198,
+      "loss": 0.1991,
+      "step": 287
+    },
+    {
+      "epoch": 0.020779970417403225,
+      "grad_norm": 0.1257031112909317,
+      "learning_rate": 0.00019918314331072306,
+      "loss": 0.1956,
+      "step": 288
+    },
+    {
+      "epoch": 0.02085212309246365,
+      "grad_norm": 0.12956175208091736,
+      "learning_rate": 0.0001991802568913263,
+      "loss": 0.2028,
+      "step": 289
+    },
+    {
+      "epoch": 0.02092427576752408,
+      "grad_norm": 0.11403210461139679,
+      "learning_rate": 0.0001991773704719296,
+      "loss": 0.2217,
+      "step": 290
+    },
+    {
+      "epoch": 0.020996428442584508,
+      "grad_norm": 0.08624578267335892,
+      "learning_rate": 0.00019917448405253285,
+      "loss": 0.2113,
+      "step": 291
+    },
+    {
+      "epoch": 0.02106858111764494,
+      "grad_norm": 0.07737656682729721,
+      "learning_rate": 0.00019917159763313612,
+      "loss": 0.1596,
+      "step": 292
+    },
+    {
+      "epoch": 0.021140733792705365,
+      "grad_norm": 0.107571080327034,
+      "learning_rate": 0.00019916871121373938,
+      "loss": 0.1912,
+      "step": 293
+    },
+    {
+      "epoch": 0.02121288646776579,
+      "grad_norm": 0.10967458039522171,
+      "learning_rate": 0.00019916582479434261,
+      "loss": 0.1997,
+      "step": 294
+    },
+    {
+      "epoch": 0.02128503914282622,
+      "grad_norm": 0.09726656973361969,
+      "learning_rate": 0.00019916293837494588,
+      "loss": 0.2109,
+      "step": 295
+    },
+    {
+      "epoch": 0.02135719181788665,
+      "grad_norm": 0.13646143674850464,
+      "learning_rate": 0.00019916005195554914,
+      "loss": 0.1724,
+      "step": 296
+    },
+    {
+      "epoch": 0.021429344492947075,
+      "grad_norm": 0.15287163853645325,
+      "learning_rate": 0.00019915716553615243,
+      "loss": 0.2461,
+      "step": 297
+    },
+    {
+      "epoch": 0.021501497168007505,
+      "grad_norm": 0.11280661821365356,
+      "learning_rate": 0.0001991542791167557,
+      "loss": 0.2093,
+      "step": 298
+    },
+    {
+      "epoch": 0.021573649843067932,
+      "grad_norm": 0.12336954474449158,
+      "learning_rate": 0.00019915139269735893,
+      "loss": 0.1926,
+      "step": 299
+    },
+    {
+      "epoch": 0.02164580251812836,
+      "grad_norm": 0.10354389250278473,
+      "learning_rate": 0.0001991485062779622,
+      "loss": 0.1568,
+      "step": 300
+    },
+    {
+      "epoch": 0.02171795519318879,
+      "grad_norm": 0.07334471493959427,
+      "learning_rate": 0.00019914561985856545,
+      "loss": 0.2174,
+      "step": 301
+    },
+    {
+      "epoch": 0.021790107868249215,
+      "grad_norm": 0.11739696562290192,
+      "learning_rate": 0.00019914273343916872,
+      "loss": 0.198,
+      "step": 302
+    },
+    {
+      "epoch": 0.021862260543309642,
+      "grad_norm": 0.08452331274747849,
+      "learning_rate": 0.00019913984701977198,
+      "loss": 0.2173,
+      "step": 303
+    },
+    {
+      "epoch": 0.021934413218370072,
+      "grad_norm": 0.09673911333084106,
+      "learning_rate": 0.00019913696060037524,
+      "loss": 0.1304,
+      "step": 304
+    },
+    {
+      "epoch": 0.0220065658934305,
+      "grad_norm": 0.08587480336427689,
+      "learning_rate": 0.0001991340741809785,
+      "loss": 0.1477,
+      "step": 305
+    },
+    {
+      "epoch": 0.022078718568490925,
+      "grad_norm": 0.0859738439321518,
+      "learning_rate": 0.00019913118776158177,
+      "loss": 0.1676,
+      "step": 306
+    },
+    {
+      "epoch": 0.022150871243551355,
+      "grad_norm": 0.11247943341732025,
+      "learning_rate": 0.00019912830134218503,
+      "loss": 0.1838,
+      "step": 307
+    },
+    {
+      "epoch": 0.022223023918611782,
+      "grad_norm": 0.09340526908636093,
+      "learning_rate": 0.0001991254149227883,
+      "loss": 0.1447,
+      "step": 308
+    },
+    {
+      "epoch": 0.022295176593672212,
+      "grad_norm": 0.099124975502491,
+      "learning_rate": 0.00019912252850339156,
+      "loss": 0.1725,
+      "step": 309
+    },
+    {
+      "epoch": 0.02236732926873264,
+      "grad_norm": 0.11203792691230774,
+      "learning_rate": 0.0001991196420839948,
+      "loss": 0.1841,
+      "step": 310
+    },
+    {
+      "epoch": 0.022439481943793065,
+      "grad_norm": 0.11389707773923874,
+      "learning_rate": 0.00019911675566459806,
+      "loss": 0.1977,
+      "step": 311
+    },
+    {
+      "epoch": 0.022511634618853495,
+      "grad_norm": 0.09892468899488449,
+      "learning_rate": 0.00019911386924520135,
+      "loss": 0.1473,
+      "step": 312
+    },
+    {
+      "epoch": 0.022583787293913922,
+      "grad_norm": 0.09083772450685501,
+      "learning_rate": 0.0001991109828258046,
+      "loss": 0.2101,
+      "step": 313
+    },
+    {
+      "epoch": 0.02265593996897435,
+      "grad_norm": 0.09250988066196442,
+      "learning_rate": 0.00019910809640640787,
+      "loss": 0.1889,
+      "step": 314
+    },
+    {
+      "epoch": 0.02272809264403478,
+      "grad_norm": 0.08847782015800476,
+      "learning_rate": 0.0001991052099870111,
+      "loss": 0.2198,
+      "step": 315
+    },
+    {
+      "epoch": 0.022800245319095205,
+      "grad_norm": 0.08334208279848099,
+      "learning_rate": 0.00019910232356761437,
+      "loss": 0.1677,
+      "step": 316
+    },
+    {
+      "epoch": 0.022872397994155632,
+      "grad_norm": 0.08387161046266556,
+      "learning_rate": 0.00019909943714821763,
+      "loss": 0.1982,
+      "step": 317
+    },
+    {
+      "epoch": 0.022944550669216062,
+      "grad_norm": 0.10020548105239868,
+      "learning_rate": 0.0001990965507288209,
+      "loss": 0.2098,
+      "step": 318
+    },
+    {
+      "epoch": 0.02301670334427649,
+      "grad_norm": 0.09410782158374786,
+      "learning_rate": 0.0001990936643094242,
+      "loss": 0.1557,
+      "step": 319
+    },
+    {
+      "epoch": 0.023088856019336915,
+      "grad_norm": 0.10243143886327744,
+      "learning_rate": 0.00019909077789002742,
+      "loss": 0.1781,
+      "step": 320
+    },
+    {
+      "epoch": 0.023161008694397345,
+      "grad_norm": 0.09203314036130905,
+      "learning_rate": 0.00019908789147063069,
+      "loss": 0.1438,
+      "step": 321
+    },
+    {
+      "epoch": 0.023233161369457772,
+      "grad_norm": 0.10731811821460724,
+      "learning_rate": 0.00019908500505123395,
+      "loss": 0.1463,
+      "step": 322
+    },
+    {
+      "epoch": 0.023305314044518202,
+      "grad_norm": 0.13198642432689667,
+      "learning_rate": 0.0001990821186318372,
+      "loss": 0.2845,
+      "step": 323
+    },
+    {
+      "epoch": 0.02337746671957863,
+      "grad_norm": 0.10663548856973648,
+      "learning_rate": 0.00019907923221244048,
+      "loss": 0.1802,
+      "step": 324
+    },
+    {
+      "epoch": 0.023449619394639055,
+      "grad_norm": 0.07761314511299133,
+      "learning_rate": 0.00019907634579304374,
+      "loss": 0.1921,
+      "step": 325
+    },
+    {
+      "epoch": 0.023521772069699486,
+      "grad_norm": 0.10983889549970627,
+      "learning_rate": 0.000199073459373647,
+      "loss": 0.1734,
+      "step": 326
+    },
+    {
+      "epoch": 0.023593924744759912,
+      "grad_norm": 0.10811863094568253,
+      "learning_rate": 0.00019907057295425026,
+      "loss": 0.1821,
+      "step": 327
+    },
+    {
+      "epoch": 0.02366607741982034,
+      "grad_norm": 0.09619367122650146,
+      "learning_rate": 0.00019906768653485353,
+      "loss": 0.1961,
+      "step": 328
+    },
+    {
+      "epoch": 0.02373823009488077,
+      "grad_norm": 0.1124391108751297,
+      "learning_rate": 0.0001990648001154568,
+      "loss": 0.1885,
+      "step": 329
+    },
+    {
+      "epoch": 0.023810382769941196,
+      "grad_norm": 0.12001251429319382,
+      "learning_rate": 0.00019906191369606005,
+      "loss": 0.2287,
+      "step": 330
+    },
+    {
+      "epoch": 0.023882535445001622,
+      "grad_norm": 0.08693698793649673,
+      "learning_rate": 0.0001990590272766633,
+      "loss": 0.229,
+      "step": 331
+    },
+    {
+      "epoch": 0.023954688120062052,
+      "grad_norm": 0.11607387661933899,
+      "learning_rate": 0.00019905614085726655,
+      "loss": 0.1936,
+      "step": 332
+    },
+    {
+      "epoch": 0.02402684079512248,
+      "grad_norm": 0.09263213723897934,
+      "learning_rate": 0.00019905325443786984,
+      "loss": 0.19,
+      "step": 333
+    },
+    {
+      "epoch": 0.024098993470182906,
+      "grad_norm": 0.09929122775793076,
+      "learning_rate": 0.0001990503680184731,
+      "loss": 0.1874,
+      "step": 334
+    },
+    {
+      "epoch": 0.024171146145243336,
+      "grad_norm": 0.09905881434679031,
+      "learning_rate": 0.00019904748159907637,
+      "loss": 0.1787,
+      "step": 335
+    },
+    {
+      "epoch": 0.024243298820303762,
+      "grad_norm": 0.11537329107522964,
+      "learning_rate": 0.0001990445951796796,
+      "loss": 0.2046,
+      "step": 336
+    },
+    {
+      "epoch": 0.02431545149536419,
+      "grad_norm": 0.1090816855430603,
+      "learning_rate": 0.00019904170876028287,
+      "loss": 0.1524,
+      "step": 337
+    },
+    {
+      "epoch": 0.02438760417042462,
+      "grad_norm": 0.09531082957983017,
+      "learning_rate": 0.00019903882234088613,
+      "loss": 0.1949,
+      "step": 338
+    },
+    {
+      "epoch": 0.024459756845485046,
+      "grad_norm": 0.11652404814958572,
+      "learning_rate": 0.0001990359359214894,
+      "loss": 0.1892,
+      "step": 339
+    },
+    {
+      "epoch": 0.024531909520545476,
+      "grad_norm": 0.08294610679149628,
+      "learning_rate": 0.00019903304950209268,
+      "loss": 0.1688,
+      "step": 340
+    },
+    {
+      "epoch": 0.024604062195605902,
+      "grad_norm": 0.07933833450078964,
+      "learning_rate": 0.00019903016308269592,
+      "loss": 0.1871,
+      "step": 341
+    },
+    {
+      "epoch": 0.02467621487066633,
+      "grad_norm": 0.09110282361507416,
+      "learning_rate": 0.00019902727666329918,
+      "loss": 0.2057,
+      "step": 342
+    },
+    {
+      "epoch": 0.02474836754572676,
+      "grad_norm": 0.0912768542766571,
+      "learning_rate": 0.00019902439024390244,
+      "loss": 0.1817,
+      "step": 343
+    },
+    {
+      "epoch": 0.024820520220787186,
+      "grad_norm": 0.12034342437982559,
+      "learning_rate": 0.0001990215038245057,
+      "loss": 0.2095,
+      "step": 344
+    },
+    {
+      "epoch": 0.024892672895847612,
+      "grad_norm": 0.08294276893138885,
+      "learning_rate": 0.00019901861740510897,
+      "loss": 0.2083,
+      "step": 345
+    },
+    {
+      "epoch": 0.024964825570908043,
+      "grad_norm": 0.08428749442100525,
+      "learning_rate": 0.00019901573098571223,
+      "loss": 0.1915,
+      "step": 346
+    },
+    {
+      "epoch": 0.02503697824596847,
+      "grad_norm": 0.09327653795480728,
+      "learning_rate": 0.0001990128445663155,
+      "loss": 0.1996,
+      "step": 347
+    },
+    {
+      "epoch": 0.025109130921028896,
+      "grad_norm": 0.1007128432393074,
+      "learning_rate": 0.00019900995814691876,
+      "loss": 0.2161,
+      "step": 348
+    },
+    {
+      "epoch": 0.025181283596089326,
+      "grad_norm": 0.07850717753171921,
+      "learning_rate": 0.00019900707172752202,
+      "loss": 0.1771,
+      "step": 349
+    },
+    {
+      "epoch": 0.025253436271149753,
+      "grad_norm": 0.09387306123971939,
+      "learning_rate": 0.00019900418530812528,
+      "loss": 0.1898,
+      "step": 350
+    },
+    {
+      "epoch": 0.02532558894621018,
+      "grad_norm": 0.11617643386125565,
+      "learning_rate": 0.00019900129888872855,
+      "loss": 0.2539,
+      "step": 351
+    },
+    {
+      "epoch": 0.02539774162127061,
+      "grad_norm": 0.08300226926803589,
+      "learning_rate": 0.0001989984124693318,
+      "loss": 0.1891,
+      "step": 352
+    },
+    {
+      "epoch": 0.025469894296331036,
+      "grad_norm": 0.09945572912693024,
+      "learning_rate": 0.00019899552604993505,
+      "loss": 0.1913,
+      "step": 353
+    },
+    {
+      "epoch": 0.025542046971391466,
+      "grad_norm": 0.09289573132991791,
+      "learning_rate": 0.00019899263963053834,
+      "loss": 0.1854,
+      "step": 354
+    },
+    {
+      "epoch": 0.025614199646451893,
+      "grad_norm": 0.08331576734781265,
+      "learning_rate": 0.0001989897532111416,
+      "loss": 0.126,
+      "step": 355
+    },
+    {
+      "epoch": 0.02568635232151232,
+      "grad_norm": 0.1058429703116417,
+      "learning_rate": 0.00019898686679174486,
+      "loss": 0.205,
+      "step": 356
+    },
+    {
+      "epoch": 0.02575850499657275,
+      "grad_norm": 0.11856674402952194,
+      "learning_rate": 0.00019898398037234812,
+      "loss": 0.1962,
+      "step": 357
+    },
+    {
+      "epoch": 0.025830657671633176,
+      "grad_norm": 0.09198472648859024,
+      "learning_rate": 0.00019898109395295136,
+      "loss": 0.1879,
+      "step": 358
+    },
+    {
+      "epoch": 0.025902810346693603,
+      "grad_norm": 0.09058953076601028,
+      "learning_rate": 0.00019897820753355462,
+      "loss": 0.1632,
+      "step": 359
+    },
+    {
+      "epoch": 0.025974963021754033,
+      "grad_norm": 0.08021187037229538,
+      "learning_rate": 0.00019897532111415789,
+      "loss": 0.165,
+      "step": 360
+    },
+    {
+      "epoch": 0.02604711569681446,
+      "grad_norm": 0.0965966060757637,
+      "learning_rate": 0.00019897243469476118,
+      "loss": 0.1542,
+      "step": 361
+    },
+    {
+      "epoch": 0.026119268371874886,
+      "grad_norm": 0.08407099545001984,
+      "learning_rate": 0.00019896954827536444,
+      "loss": 0.1826,
+      "step": 362
+    },
+    {
+      "epoch": 0.026191421046935316,
+      "grad_norm": 0.10092399269342422,
+      "learning_rate": 0.00019896666185596767,
+      "loss": 0.1754,
+      "step": 363
+    },
+    {
+      "epoch": 0.026263573721995743,
+      "grad_norm": 0.09448864310979843,
+      "learning_rate": 0.00019896377543657094,
+      "loss": 0.1839,
+      "step": 364
+    },
+    {
+      "epoch": 0.02633572639705617,
+      "grad_norm": 0.13143576681613922,
+      "learning_rate": 0.0001989608890171742,
+      "loss": 0.1908,
+      "step": 365
+    },
+    {
+      "epoch": 0.0264078790721166,
+      "grad_norm": 0.12041793018579483,
+      "learning_rate": 0.00019895800259777746,
+      "loss": 0.2097,
+      "step": 366
+    },
+    {
+      "epoch": 0.026480031747177026,
+      "grad_norm": 0.105339415371418,
+      "learning_rate": 0.00019895511617838073,
+      "loss": 0.18,
+      "step": 367
+    },
+    {
+      "epoch": 0.026552184422237453,
+      "grad_norm": 0.10656247287988663,
+      "learning_rate": 0.000198952229758984,
+      "loss": 0.2078,
+      "step": 368
+    },
+    {
+      "epoch": 0.026624337097297883,
+      "grad_norm": 0.11251800507307053,
+      "learning_rate": 0.00019894934333958725,
+      "loss": 0.1981,
+      "step": 369
+    },
+    {
+      "epoch": 0.02669648977235831,
+      "grad_norm": 0.12720337510108948,
+      "learning_rate": 0.00019894645692019052,
+      "loss": 0.2709,
+      "step": 370
+    },
+    {
+      "epoch": 0.02676864244741874,
+      "grad_norm": 0.09239493310451508,
+      "learning_rate": 0.00019894357050079378,
+      "loss": 0.2046,
+      "step": 371
+    },
+    {
+      "epoch": 0.026840795122479166,
+      "grad_norm": 0.10885576903820038,
+      "learning_rate": 0.00019894068408139704,
+      "loss": 0.1987,
+      "step": 372
+    },
+    {
+      "epoch": 0.026912947797539593,
+      "grad_norm": 0.11913982033729553,
+      "learning_rate": 0.0001989377976620003,
+      "loss": 0.1626,
+      "step": 373
+    },
+    {
+      "epoch": 0.026985100472600023,
+      "grad_norm": 0.12544821202754974,
+      "learning_rate": 0.00019893491124260354,
+      "loss": 0.2216,
+      "step": 374
+    },
+    {
+      "epoch": 0.02705725314766045,
+      "grad_norm": 0.13693448901176453,
+      "learning_rate": 0.00019893202482320683,
+      "loss": 0.2212,
+      "step": 375
+    },
+    {
+      "epoch": 0.027129405822720876,
+      "grad_norm": 0.08375770598649979,
+      "learning_rate": 0.0001989291384038101,
+      "loss": 0.1654,
+      "step": 376
+    },
+    {
+      "epoch": 0.027201558497781306,
+      "grad_norm": 0.11591768264770508,
+      "learning_rate": 0.00019892625198441336,
+      "loss": 0.1436,
+      "step": 377
+    },
+    {
+      "epoch": 0.027273711172841733,
+      "grad_norm": 0.11632189899682999,
+      "learning_rate": 0.00019892336556501662,
+      "loss": 0.2036,
+      "step": 378
+    },
+    {
+      "epoch": 0.02734586384790216,
+      "grad_norm": 0.10341499745845795,
+      "learning_rate": 0.00019892047914561985,
+      "loss": 0.2585,
+      "step": 379
+    },
+    {
+      "epoch": 0.02741801652296259,
+      "grad_norm": 0.10301853716373444,
+      "learning_rate": 0.00019891759272622312,
+      "loss": 0.1743,
+      "step": 380
+    },
+    {
+      "epoch": 0.027490169198023016,
+      "grad_norm": 0.10895366221666336,
+      "learning_rate": 0.00019891470630682638,
+      "loss": 0.2726,
+      "step": 381
+    },
+    {
+      "epoch": 0.027562321873083443,
+      "grad_norm": 0.09963389486074448,
+      "learning_rate": 0.00019891181988742967,
+      "loss": 0.2418,
+      "step": 382
+    },
+    {
+      "epoch": 0.027634474548143873,
+      "grad_norm": 0.09963802248239517,
+      "learning_rate": 0.00019890893346803293,
+      "loss": 0.2392,
+      "step": 383
+    },
+    {
+      "epoch": 0.0277066272232043,
+      "grad_norm": 0.08482670038938522,
+      "learning_rate": 0.00019890604704863617,
+      "loss": 0.1992,
+      "step": 384
+    },
+    {
+      "epoch": 0.027778779898264726,
+      "grad_norm": 0.09376131743192673,
+      "learning_rate": 0.00019890316062923943,
+      "loss": 0.1705,
+      "step": 385
+    },
+    {
+      "epoch": 0.027850932573325157,
+      "grad_norm": 0.12203828245401382,
+      "learning_rate": 0.0001989002742098427,
+      "loss": 0.1923,
+      "step": 386
+    },
+    {
+      "epoch": 0.027923085248385583,
+      "grad_norm": 0.09458067268133163,
+      "learning_rate": 0.00019889738779044596,
+      "loss": 0.1744,
+      "step": 387
+    },
+    {
+      "epoch": 0.027995237923446013,
+      "grad_norm": 0.09280339628458023,
+      "learning_rate": 0.00019889450137104922,
+      "loss": 0.1624,
+      "step": 388
+    },
+    {
+      "epoch": 0.02806739059850644,
+      "grad_norm": 0.11521881073713303,
+      "learning_rate": 0.00019889161495165248,
+      "loss": 0.2032,
+      "step": 389
+    },
+    {
+      "epoch": 0.028139543273566867,
+      "grad_norm": 0.10273708403110504,
+      "learning_rate": 0.00019888872853225575,
+      "loss": 0.1503,
+      "step": 390
+    },
+    {
+      "epoch": 0.028211695948627297,
+      "grad_norm": 0.12336786091327667,
+      "learning_rate": 0.000198885842112859,
+      "loss": 0.2347,
+      "step": 391
+    },
+    {
+      "epoch": 0.028283848623687723,
+      "grad_norm": 0.08287611603736877,
+      "learning_rate": 0.00019888295569346227,
+      "loss": 0.1682,
+      "step": 392
+    },
+    {
+      "epoch": 0.02835600129874815,
+      "grad_norm": 0.0861361101269722,
+      "learning_rate": 0.00019888006927406554,
+      "loss": 0.19,
+      "step": 393
+    },
+    {
+      "epoch": 0.02842815397380858,
+      "grad_norm": 0.1366668939590454,
+      "learning_rate": 0.0001988771828546688,
+      "loss": 0.1839,
+      "step": 394
+    },
+    {
+      "epoch": 0.028500306648869007,
+      "grad_norm": 0.1496826559305191,
+      "learning_rate": 0.00019887429643527203,
+      "loss": 0.204,
+      "step": 395
+    },
+    {
+      "epoch": 0.028572459323929433,
+      "grad_norm": 0.12759724259376526,
+      "learning_rate": 0.00019887141001587532,
+      "loss": 0.2151,
+      "step": 396
+    },
+    {
+      "epoch": 0.028644611998989863,
+      "grad_norm": 0.08731114119291306,
+      "learning_rate": 0.0001988685235964786,
+      "loss": 0.1315,
+      "step": 397
+    },
+    {
+      "epoch": 0.02871676467405029,
+      "grad_norm": 0.0812445878982544,
+      "learning_rate": 0.00019886563717708185,
+      "loss": 0.1613,
+      "step": 398
+    },
+    {
+      "epoch": 0.028788917349110717,
+      "grad_norm": 0.20545347034931183,
+      "learning_rate": 0.0001988627507576851,
+      "loss": 0.2087,
+      "step": 399
+    },
+    {
+      "epoch": 0.028861070024171147,
+      "grad_norm": 0.09925662726163864,
+      "learning_rate": 0.00019885986433828835,
+      "loss": 0.199,
+      "step": 400
+    },
+    {
+      "epoch": 0.028933222699231573,
+      "grad_norm": 0.08507594466209412,
+      "learning_rate": 0.0001988569779188916,
+      "loss": 0.1732,
+      "step": 401
+    },
+    {
+      "epoch": 0.029005375374292004,
+      "grad_norm": 0.12562596797943115,
+      "learning_rate": 0.00019885409149949487,
+      "loss": 0.2141,
+      "step": 402
+    },
+    {
+      "epoch": 0.02907752804935243,
+      "grad_norm": 0.07977975159883499,
+      "learning_rate": 0.00019885120508009816,
+      "loss": 0.1969,
+      "step": 403
+    },
+    {
+      "epoch": 0.029149680724412857,
+      "grad_norm": 0.10498231649398804,
+      "learning_rate": 0.00019884831866070143,
+      "loss": 0.1847,
+      "step": 404
+    },
+    {
+      "epoch": 0.029221833399473287,
+      "grad_norm": 0.10222010314464569,
+      "learning_rate": 0.00019884543224130466,
+      "loss": 0.1883,
+      "step": 405
+    },
+    {
+      "epoch": 0.029293986074533714,
+      "grad_norm": 0.1068202406167984,
+      "learning_rate": 0.00019884254582190793,
+      "loss": 0.1362,
+      "step": 406
+    },
+    {
+      "epoch": 0.02936613874959414,
+      "grad_norm": 0.13500475883483887,
+      "learning_rate": 0.0001988396594025112,
+      "loss": 0.2243,
+      "step": 407
+    },
+    {
+      "epoch": 0.02943829142465457,
+      "grad_norm": 0.11419196426868439,
+      "learning_rate": 0.00019883677298311445,
+      "loss": 0.2222,
+      "step": 408
+    },
+    {
+      "epoch": 0.029510444099714997,
+      "grad_norm": 0.09644989669322968,
+      "learning_rate": 0.00019883388656371771,
+      "loss": 0.1744,
+      "step": 409
+    },
+    {
+      "epoch": 0.029582596774775424,
+      "grad_norm": 0.09299837052822113,
+      "learning_rate": 0.00019883100014432098,
+      "loss": 0.1963,
+      "step": 410
+    },
+    {
+      "epoch": 0.029654749449835854,
+      "grad_norm": 0.08479341864585876,
+      "learning_rate": 0.00019882811372492424,
+      "loss": 0.2138,
+      "step": 411
+    },
+    {
+      "epoch": 0.02972690212489628,
+      "grad_norm": 0.12209715694189072,
+      "learning_rate": 0.0001988252273055275,
+      "loss": 0.2051,
+      "step": 412
+    },
+    {
+      "epoch": 0.029799054799956707,
+      "grad_norm": 0.1003933921456337,
+      "learning_rate": 0.00019882234088613077,
+      "loss": 0.176,
+      "step": 413
+    },
+    {
+      "epoch": 0.029871207475017137,
+      "grad_norm": 0.11374977976083755,
+      "learning_rate": 0.00019881945446673403,
+      "loss": 0.177,
+      "step": 414
+    },
+    {
+      "epoch": 0.029943360150077564,
+      "grad_norm": 0.13240540027618408,
+      "learning_rate": 0.0001988165680473373,
+      "loss": 0.1991,
+      "step": 415
+    },
+    {
+      "epoch": 0.03001551282513799,
+      "grad_norm": 0.082539863884449,
+      "learning_rate": 0.00019881368162794053,
+      "loss": 0.1831,
+      "step": 416
+    },
+    {
+      "epoch": 0.03008766550019842,
+      "grad_norm": 0.08623918890953064,
+      "learning_rate": 0.00019881079520854382,
+      "loss": 0.1593,
+      "step": 417
+    },
+    {
+      "epoch": 0.030159818175258847,
+      "grad_norm": 0.09880755096673965,
+      "learning_rate": 0.00019880790878914708,
+      "loss": 0.1569,
+      "step": 418
+    },
+    {
+      "epoch": 0.030231970850319277,
+      "grad_norm": 0.09099980443716049,
+      "learning_rate": 0.00019880502236975034,
+      "loss": 0.1791,
+      "step": 419
+    },
+    {
+      "epoch": 0.030304123525379704,
+      "grad_norm": 0.08195766806602478,
+      "learning_rate": 0.0001988021359503536,
+      "loss": 0.1759,
+      "step": 420
+    },
+    {
+      "epoch": 0.03037627620044013,
+      "grad_norm": 0.08706981688737869,
+      "learning_rate": 0.00019879924953095684,
+      "loss": 0.1947,
+      "step": 421
+    },
+    {
+      "epoch": 0.03044842887550056,
+      "grad_norm": 0.08890935033559799,
+      "learning_rate": 0.0001987963631115601,
+      "loss": 0.1449,
+      "step": 422
+    },
+    {
+      "epoch": 0.030520581550560987,
+      "grad_norm": 0.08960860967636108,
+      "learning_rate": 0.00019879347669216337,
+      "loss": 0.1971,
+      "step": 423
+    },
+    {
+      "epoch": 0.030592734225621414,
+      "grad_norm": 0.09105844795703888,
+      "learning_rate": 0.00019879059027276666,
+      "loss": 0.1579,
+      "step": 424
+    },
+    {
+      "epoch": 0.030664886900681844,
+      "grad_norm": 0.0840487852692604,
+      "learning_rate": 0.00019878770385336992,
+      "loss": 0.1593,
+      "step": 425
+    },
+    {
+      "epoch": 0.03073703957574227,
+      "grad_norm": 0.10095898807048798,
+      "learning_rate": 0.00019878481743397316,
+      "loss": 0.2121,
+      "step": 426
+    },
+    {
+      "epoch": 0.030809192250802697,
+      "grad_norm": 0.08656778931617737,
+      "learning_rate": 0.00019878193101457642,
+      "loss": 0.1855,
+      "step": 427
+    },
+    {
+      "epoch": 0.030881344925863127,
+      "grad_norm": 0.12554162740707397,
+      "learning_rate": 0.00019877904459517968,
+      "loss": 0.1579,
+      "step": 428
+    },
+    {
+      "epoch": 0.030953497600923554,
+      "grad_norm": 0.1407862901687622,
+      "learning_rate": 0.00019877615817578295,
+      "loss": 0.2155,
+      "step": 429
+    },
+    {
+      "epoch": 0.03102565027598398,
+      "grad_norm": 0.11547860503196716,
+      "learning_rate": 0.0001987732717563862,
+      "loss": 0.1828,
+      "step": 430
+    },
+    {
+      "epoch": 0.03109780295104441,
+      "grad_norm": 0.09747839719057083,
+      "learning_rate": 0.00019877038533698947,
+      "loss": 0.1626,
+      "step": 431
+    },
+    {
+      "epoch": 0.031169955626104837,
+      "grad_norm": 0.10276393592357635,
+      "learning_rate": 0.00019876749891759274,
+      "loss": 0.2393,
+      "step": 432
+    },
+    {
+      "epoch": 0.031242108301165267,
+      "grad_norm": 0.09598229825496674,
+      "learning_rate": 0.000198764612498196,
+      "loss": 0.1932,
+      "step": 433
+    },
+    {
+      "epoch": 0.031314260976225694,
+      "grad_norm": 0.08966390043497086,
+      "learning_rate": 0.00019876172607879926,
+      "loss": 0.1931,
+      "step": 434
+    },
+    {
+      "epoch": 0.031386413651286124,
+      "grad_norm": 0.10524100810289383,
+      "learning_rate": 0.00019875883965940252,
+      "loss": 0.1931,
+      "step": 435
+    },
+    {
+      "epoch": 0.03145856632634655,
+      "grad_norm": 0.09162607789039612,
+      "learning_rate": 0.0001987559532400058,
+      "loss": 0.1788,
+      "step": 436
+    },
+    {
+      "epoch": 0.03153071900140698,
+      "grad_norm": 0.13454949855804443,
+      "learning_rate": 0.00019875306682060902,
+      "loss": 0.1972,
+      "step": 437
+    },
+    {
+      "epoch": 0.03160287167646741,
+      "grad_norm": 0.13340908288955688,
+      "learning_rate": 0.0001987501804012123,
+      "loss": 0.2229,
+      "step": 438
+    },
+    {
+      "epoch": 0.03167502435152783,
+      "grad_norm": 0.12137065082788467,
+      "learning_rate": 0.00019874729398181558,
+      "loss": 0.1873,
+      "step": 439
+    },
+    {
+      "epoch": 0.03174717702658826,
+      "grad_norm": 0.08705832809209824,
+      "learning_rate": 0.00019874440756241884,
+      "loss": 0.1627,
+      "step": 440
+    },
+    {
+      "epoch": 0.03181932970164869,
+      "grad_norm": 0.10541950911283493,
+      "learning_rate": 0.0001987415211430221,
+      "loss": 0.1981,
+      "step": 441
+    },
+    {
+      "epoch": 0.031891482376709114,
+      "grad_norm": 0.07744797319173813,
+      "learning_rate": 0.00019873863472362534,
+      "loss": 0.2073,
+      "step": 442
+    },
+    {
+      "epoch": 0.031963635051769544,
+      "grad_norm": 0.09692710638046265,
+      "learning_rate": 0.0001987357483042286,
+      "loss": 0.179,
+      "step": 443
+    },
+    {
+      "epoch": 0.032035787726829974,
+      "grad_norm": 0.2575322389602661,
+      "learning_rate": 0.00019873286188483186,
+      "loss": 0.1625,
+      "step": 444
+    },
+    {
+      "epoch": 0.0321079404018904,
+      "grad_norm": 0.10213745385408401,
+      "learning_rate": 0.00019872997546543515,
+      "loss": 0.1663,
+      "step": 445
+    },
+    {
+      "epoch": 0.03218009307695083,
+      "grad_norm": 0.1628570258617401,
+      "learning_rate": 0.00019872708904603842,
+      "loss": 0.207,
+      "step": 446
+    },
+    {
+      "epoch": 0.03225224575201126,
+      "grad_norm": 0.23218366503715515,
+      "learning_rate": 0.00019872420262664165,
+      "loss": 0.1867,
+      "step": 447
+    },
+    {
+      "epoch": 0.03232439842707168,
+      "grad_norm": 0.09101596474647522,
+      "learning_rate": 0.00019872131620724491,
+      "loss": 0.2133,
+      "step": 448
+    },
+    {
+      "epoch": 0.03239655110213211,
+      "grad_norm": 0.10827759653329849,
+      "learning_rate": 0.00019871842978784818,
+      "loss": 0.1562,
+      "step": 449
+    },
+    {
+      "epoch": 0.03246870377719254,
+      "grad_norm": 0.08563897013664246,
+      "learning_rate": 0.00019871554336845144,
+      "loss": 0.1678,
+      "step": 450
+    },
+    {
+      "epoch": 0.032540856452252964,
+      "grad_norm": 0.13903899490833282,
+      "learning_rate": 0.0001987126569490547,
+      "loss": 0.1915,
+      "step": 451
+    },
+    {
+      "epoch": 0.032613009127313394,
+      "grad_norm": 0.13983823359012604,
+      "learning_rate": 0.00019870977052965797,
+      "loss": 0.1932,
+      "step": 452
+    },
+    {
+      "epoch": 0.032685161802373824,
+      "grad_norm": 0.06946656852960587,
+      "learning_rate": 0.00019870688411026123,
+      "loss": 0.1394,
+      "step": 453
+    },
+    {
+      "epoch": 0.03275731447743425,
+      "grad_norm": 0.08703076094388962,
+      "learning_rate": 0.0001987039976908645,
+      "loss": 0.1516,
+      "step": 454
+    },
+    {
+      "epoch": 0.03282946715249468,
+      "grad_norm": 0.0886911153793335,
+      "learning_rate": 0.00019870111127146776,
+      "loss": 0.1921,
+      "step": 455
+    },
+    {
+      "epoch": 0.03290161982755511,
+      "grad_norm": 0.10807514935731888,
+      "learning_rate": 0.00019869822485207102,
+      "loss": 0.2187,
+      "step": 456
+    },
+    {
+      "epoch": 0.03297377250261554,
+      "grad_norm": 0.10857339948415756,
+      "learning_rate": 0.00019869533843267428,
+      "loss": 0.183,
+      "step": 457
+    },
+    {
+      "epoch": 0.03304592517767596,
+      "grad_norm": 0.08645518869161606,
+      "learning_rate": 0.00019869245201327752,
+      "loss": 0.2359,
+      "step": 458
+    },
+    {
+      "epoch": 0.03311807785273639,
+      "grad_norm": 0.09541124105453491,
+      "learning_rate": 0.0001986895655938808,
+      "loss": 0.2041,
+      "step": 459
+    },
+    {
+      "epoch": 0.03319023052779682,
+      "grad_norm": 0.09739275276660919,
+      "learning_rate": 0.00019868667917448407,
+      "loss": 0.1494,
+      "step": 460
+    },
+    {
+      "epoch": 0.033262383202857244,
+      "grad_norm": 0.10426805168390274,
+      "learning_rate": 0.00019868379275508733,
+      "loss": 0.183,
+      "step": 461
+    },
+    {
+      "epoch": 0.033334535877917675,
+      "grad_norm": 0.13501611351966858,
+      "learning_rate": 0.0001986809063356906,
+      "loss": 0.1999,
+      "step": 462
+    },
+    {
+      "epoch": 0.033406688552978105,
+      "grad_norm": 0.1044807955622673,
+      "learning_rate": 0.00019867801991629386,
+      "loss": 0.1828,
+      "step": 463
+    },
+    {
+      "epoch": 0.03347884122803853,
+      "grad_norm": 0.09801997244358063,
+      "learning_rate": 0.0001986751334968971,
+      "loss": 0.19,
+      "step": 464
+    },
+    {
+      "epoch": 0.03355099390309896,
+      "grad_norm": 0.09675435721874237,
+      "learning_rate": 0.00019867224707750036,
+      "loss": 0.2026,
+      "step": 465
+    },
+    {
+      "epoch": 0.03362314657815939,
+      "grad_norm": 0.07665897905826569,
+      "learning_rate": 0.00019866936065810365,
+      "loss": 0.2056,
+      "step": 466
+    },
+    {
+      "epoch": 0.03369529925321981,
+      "grad_norm": 0.10023979097604752,
+      "learning_rate": 0.0001986664742387069,
+      "loss": 0.1935,
+      "step": 467
+    },
+    {
+      "epoch": 0.03376745192828024,
+      "grad_norm": 0.1360999494791031,
+      "learning_rate": 0.00019866358781931017,
+      "loss": 0.1964,
+      "step": 468
+    },
+    {
+      "epoch": 0.03383960460334067,
+      "grad_norm": 0.11151537299156189,
+      "learning_rate": 0.0001986607013999134,
+      "loss": 0.1422,
+      "step": 469
+    },
+    {
+      "epoch": 0.033911757278401095,
+      "grad_norm": 0.09877774864435196,
+      "learning_rate": 0.00019865781498051667,
+      "loss": 0.2298,
+      "step": 470
+    },
+    {
+      "epoch": 0.033983909953461525,
+      "grad_norm": 0.10054466873407364,
+      "learning_rate": 0.00019865492856111993,
+      "loss": 0.1612,
+      "step": 471
+    },
+    {
+      "epoch": 0.034056062628521955,
+      "grad_norm": 0.09187640994787216,
+      "learning_rate": 0.0001986520421417232,
+      "loss": 0.158,
+      "step": 472
+    },
+    {
+      "epoch": 0.03412821530358238,
+      "grad_norm": 0.0961819589138031,
+      "learning_rate": 0.0001986491557223265,
+      "loss": 0.1946,
+      "step": 473
+    },
+    {
+      "epoch": 0.03420036797864281,
+      "grad_norm": 0.12049927562475204,
+      "learning_rate": 0.00019864626930292972,
+      "loss": 0.1582,
+      "step": 474
+    },
+    {
+      "epoch": 0.03427252065370324,
+      "grad_norm": 0.09458652138710022,
+      "learning_rate": 0.00019864338288353299,
+      "loss": 0.1743,
+      "step": 475
+    },
+    {
+      "epoch": 0.03434467332876366,
+      "grad_norm": 0.10272986441850662,
+      "learning_rate": 0.00019864049646413625,
+      "loss": 0.2269,
+      "step": 476
+    },
+    {
+      "epoch": 0.03441682600382409,
+      "grad_norm": 0.12928758561611176,
+      "learning_rate": 0.0001986376100447395,
+      "loss": 0.1989,
+      "step": 477
+    },
+    {
+      "epoch": 0.03448897867888452,
+      "grad_norm": 0.0998743325471878,
+      "learning_rate": 0.00019863472362534278,
+      "loss": 0.1788,
+      "step": 478
+    },
+    {
+      "epoch": 0.034561131353944945,
+      "grad_norm": 0.11031027883291245,
+      "learning_rate": 0.00019863183720594604,
+      "loss": 0.2189,
+      "step": 479
+    },
+    {
+      "epoch": 0.034633284029005375,
+      "grad_norm": 0.08675051480531693,
+      "learning_rate": 0.0001986289507865493,
+      "loss": 0.1687,
+      "step": 480
+    },
+    {
+      "epoch": 0.034705436704065805,
+      "grad_norm": 0.08388551324605942,
+      "learning_rate": 0.00019862606436715256,
+      "loss": 0.1719,
+      "step": 481
+    },
+    {
+      "epoch": 0.03477758937912623,
+      "grad_norm": 0.09967280924320221,
+      "learning_rate": 0.00019862317794775583,
+      "loss": 0.1702,
+      "step": 482
+    },
+    {
+      "epoch": 0.03484974205418666,
+      "grad_norm": 0.13154242932796478,
+      "learning_rate": 0.0001986202915283591,
+      "loss": 0.1838,
+      "step": 483
+    },
+    {
+      "epoch": 0.03492189472924709,
+      "grad_norm": 0.08529820293188095,
+      "learning_rate": 0.00019861740510896235,
+      "loss": 0.1792,
+      "step": 484
+    },
+    {
+      "epoch": 0.03499404740430751,
+      "grad_norm": 0.09009307622909546,
+      "learning_rate": 0.0001986145186895656,
+      "loss": 0.189,
+      "step": 485
+    },
+    {
+      "epoch": 0.03506620007936794,
+      "grad_norm": 0.10295873880386353,
+      "learning_rate": 0.00019861163227016885,
+      "loss": 0.1849,
+      "step": 486
+    },
+    {
+      "epoch": 0.03513835275442837,
+      "grad_norm": 0.13797029852867126,
+      "learning_rate": 0.00019860874585077214,
+      "loss": 0.1871,
+      "step": 487
+    },
+    {
+      "epoch": 0.0352105054294888,
+      "grad_norm": 0.09551911801099777,
+      "learning_rate": 0.0001986058594313754,
+      "loss": 0.1643,
+      "step": 488
+    },
+    {
+      "epoch": 0.035282658104549225,
+      "grad_norm": 0.11506037414073944,
+      "learning_rate": 0.00019860297301197867,
+      "loss": 0.1496,
+      "step": 489
+    },
+    {
+      "epoch": 0.035354810779609655,
+      "grad_norm": 0.09025515615940094,
+      "learning_rate": 0.0001986000865925819,
+      "loss": 0.1772,
+      "step": 490
+    },
+    {
+      "epoch": 0.035426963454670085,
+      "grad_norm": 0.10068268328905106,
+      "learning_rate": 0.00019859720017318517,
+      "loss": 0.1583,
+      "step": 491
+    },
+    {
+      "epoch": 0.03549911612973051,
+      "grad_norm": 0.1159784346818924,
+      "learning_rate": 0.00019859431375378843,
+      "loss": 0.1908,
+      "step": 492
+    },
+    {
+      "epoch": 0.03557126880479094,
+      "grad_norm": 0.1259685754776001,
+      "learning_rate": 0.0001985914273343917,
+      "loss": 0.1654,
+      "step": 493
+    },
+    {
+      "epoch": 0.03564342147985137,
+      "grad_norm": 0.1246333047747612,
+      "learning_rate": 0.00019858854091499498,
+      "loss": 0.2091,
+      "step": 494
+    },
+    {
+      "epoch": 0.03571557415491179,
+      "grad_norm": 0.15852630138397217,
+      "learning_rate": 0.00019858565449559822,
+      "loss": 0.227,
+      "step": 495
+    },
+    {
+      "epoch": 0.03578772682997222,
+      "grad_norm": 0.10838798433542252,
+      "learning_rate": 0.00019858276807620148,
+      "loss": 0.1773,
+      "step": 496
+    },
+    {
+      "epoch": 0.03585987950503265,
+      "grad_norm": 0.09128434211015701,
+      "learning_rate": 0.00019857988165680474,
+      "loss": 0.1643,
+      "step": 497
+    },
+    {
+      "epoch": 0.035932032180093075,
+      "grad_norm": 0.17223364114761353,
+      "learning_rate": 0.000198576995237408,
+      "loss": 0.1859,
+      "step": 498
+    },
+    {
+      "epoch": 0.036004184855153505,
+      "grad_norm": 0.1001114621758461,
+      "learning_rate": 0.00019857410881801127,
+      "loss": 0.1668,
+      "step": 499
+    },
+    {
+      "epoch": 0.036076337530213935,
+      "grad_norm": 0.08362135291099548,
+      "learning_rate": 0.00019857122239861453,
+      "loss": 0.2022,
+      "step": 500
+    },
+    {
+      "epoch": 0.03614849020527436,
+      "grad_norm": 0.10879844427108765,
+      "learning_rate": 0.00019856833597921777,
+      "loss": 0.217,
+      "step": 501
+    },
+    {
+      "epoch": 0.03622064288033479,
+      "grad_norm": 0.1339205950498581,
+      "learning_rate": 0.00019856544955982106,
+      "loss": 0.1782,
+      "step": 502
+    },
+    {
+      "epoch": 0.03629279555539522,
+      "grad_norm": 0.11790255457162857,
+      "learning_rate": 0.00019856256314042432,
+      "loss": 0.21,
+      "step": 503
+    },
+    {
+      "epoch": 0.03636494823045564,
+      "grad_norm": 0.13036282360553741,
+      "learning_rate": 0.00019855967672102758,
+      "loss": 0.2059,
+      "step": 504
+    },
+    {
+      "epoch": 0.03643710090551607,
+      "grad_norm": 0.14188435673713684,
+      "learning_rate": 0.00019855679030163085,
+      "loss": 0.2295,
+      "step": 505
+    },
+    {
+      "epoch": 0.0365092535805765,
+      "grad_norm": 0.18794411420822144,
+      "learning_rate": 0.00019855390388223408,
+      "loss": 0.1999,
+      "step": 506
+    },
+    {
+      "epoch": 0.036581406255636925,
+      "grad_norm": 0.2316272109746933,
+      "learning_rate": 0.00019855101746283735,
+      "loss": 0.1816,
+      "step": 507
+    },
+    {
+      "epoch": 0.036653558930697355,
+      "grad_norm": 0.1288416087627411,
+      "learning_rate": 0.0001985481310434406,
+      "loss": 0.1966,
+      "step": 508
+    },
+    {
+      "epoch": 0.036725711605757785,
+      "grad_norm": 0.10679508745670319,
+      "learning_rate": 0.0001985452446240439,
+      "loss": 0.154,
+      "step": 509
+    },
+    {
+      "epoch": 0.03679786428081821,
+      "grad_norm": 0.0898621529340744,
+      "learning_rate": 0.00019854235820464716,
+      "loss": 0.1894,
+      "step": 510
+    },
+    {
+      "epoch": 0.03687001695587864,
+      "grad_norm": 0.0908215343952179,
+      "learning_rate": 0.0001985394717852504,
+      "loss": 0.1742,
+      "step": 511
+    },
+    {
+      "epoch": 0.03694216963093907,
+      "grad_norm": 0.17195391654968262,
+      "learning_rate": 0.00019853658536585366,
+      "loss": 0.2291,
+      "step": 512
+    },
+    {
+      "epoch": 0.03701432230599949,
+      "grad_norm": 0.12300444394350052,
+      "learning_rate": 0.00019853369894645692,
+      "loss": 0.1971,
+      "step": 513
+    },
+    {
+      "epoch": 0.03708647498105992,
+      "grad_norm": 0.10059063881635666,
+      "learning_rate": 0.00019853081252706019,
+      "loss": 0.1918,
+      "step": 514
+    },
+    {
+      "epoch": 0.03715862765612035,
+      "grad_norm": 0.10917804390192032,
+      "learning_rate": 0.00019852792610766345,
+      "loss": 0.1648,
+      "step": 515
+    },
+    {
+      "epoch": 0.037230780331180775,
+      "grad_norm": 0.10756178200244904,
+      "learning_rate": 0.0001985250396882667,
+      "loss": 0.1771,
+      "step": 516
+    },
+    {
+      "epoch": 0.037302933006241205,
+      "grad_norm": 0.10932893306016922,
+      "learning_rate": 0.00019852215326886997,
+      "loss": 0.1977,
+      "step": 517
+    },
+    {
+      "epoch": 0.037375085681301635,
+      "grad_norm": 0.15212245285511017,
+      "learning_rate": 0.00019851926684947324,
+      "loss": 0.2453,
+      "step": 518
+    },
+    {
+      "epoch": 0.03744723835636206,
+      "grad_norm": 0.08777447789907455,
+      "learning_rate": 0.0001985163804300765,
+      "loss": 0.1742,
+      "step": 519
+    },
+    {
+      "epoch": 0.03751939103142249,
+      "grad_norm": 0.10264381021261215,
+      "learning_rate": 0.00019851349401067976,
+      "loss": 0.205,
+      "step": 520
+    },
+    {
+      "epoch": 0.03759154370648292,
+      "grad_norm": 0.09408518671989441,
+      "learning_rate": 0.00019851060759128303,
+      "loss": 0.1924,
+      "step": 521
+    },
+    {
+      "epoch": 0.03766369638154335,
+      "grad_norm": 0.09707022458314896,
+      "learning_rate": 0.00019850772117188626,
+      "loss": 0.1457,
+      "step": 522
+    },
+    {
+      "epoch": 0.03773584905660377,
+      "grad_norm": 0.08716564625501633,
+      "learning_rate": 0.00019850483475248955,
+      "loss": 0.1853,
+      "step": 523
+    },
+    {
+      "epoch": 0.0378080017316642,
+      "grad_norm": 0.17085005342960358,
+      "learning_rate": 0.00019850194833309282,
+      "loss": 0.2285,
+      "step": 524
+    },
+    {
+      "epoch": 0.03788015440672463,
+      "grad_norm": 0.1418502926826477,
+      "learning_rate": 0.00019849906191369608,
+      "loss": 0.1484,
+      "step": 525
+    },
+    {
+      "epoch": 0.037952307081785056,
+      "grad_norm": 0.11434414982795715,
+      "learning_rate": 0.00019849617549429934,
+      "loss": 0.1856,
+      "step": 526
+    },
+    {
+      "epoch": 0.038024459756845486,
+      "grad_norm": 0.07442096620798111,
+      "learning_rate": 0.00019849328907490258,
+      "loss": 0.1715,
+      "step": 527
+    },
+    {
+      "epoch": 0.038096612431905916,
+      "grad_norm": 0.09527246654033661,
+      "learning_rate": 0.00019849040265550584,
+      "loss": 0.1711,
+      "step": 528
+    },
+    {
+      "epoch": 0.03816876510696634,
+      "grad_norm": 0.09395115822553635,
+      "learning_rate": 0.0001984875162361091,
+      "loss": 0.1553,
+      "step": 529
+    },
+    {
+      "epoch": 0.03824091778202677,
+      "grad_norm": 0.10012837499380112,
+      "learning_rate": 0.0001984846298167124,
+      "loss": 0.1637,
+      "step": 530
+    },
+    {
+      "epoch": 0.0383130704570872,
+      "grad_norm": 0.12191860377788544,
+      "learning_rate": 0.00019848174339731566,
+      "loss": 0.2048,
+      "step": 531
+    },
+    {
+      "epoch": 0.03838522313214762,
+      "grad_norm": 0.14264048635959625,
+      "learning_rate": 0.0001984788569779189,
+      "loss": 0.1559,
+      "step": 532
+    },
+    {
+      "epoch": 0.03845737580720805,
+      "grad_norm": 0.11062449216842651,
+      "learning_rate": 0.00019847597055852215,
+      "loss": 0.2168,
+      "step": 533
+    },
+    {
+      "epoch": 0.03852952848226848,
+      "grad_norm": 0.12129320949316025,
+      "learning_rate": 0.00019847308413912542,
+      "loss": 0.1881,
+      "step": 534
+    },
+    {
+      "epoch": 0.038601681157328906,
+      "grad_norm": 0.10533594340085983,
+      "learning_rate": 0.00019847019771972868,
+      "loss": 0.1531,
+      "step": 535
+    },
+    {
+      "epoch": 0.038673833832389336,
+      "grad_norm": 0.10378196835517883,
+      "learning_rate": 0.00019846731130033194,
+      "loss": 0.2205,
+      "step": 536
+    },
+    {
+      "epoch": 0.038745986507449766,
+      "grad_norm": 0.11916801333427429,
+      "learning_rate": 0.0001984644248809352,
+      "loss": 0.2088,
+      "step": 537
+    },
+    {
+      "epoch": 0.03881813918251019,
+      "grad_norm": 0.07917596399784088,
+      "learning_rate": 0.00019846153846153847,
+      "loss": 0.1905,
+      "step": 538
+    },
+    {
+      "epoch": 0.03889029185757062,
+      "grad_norm": 0.11136099696159363,
+      "learning_rate": 0.00019845865204214173,
+      "loss": 0.2169,
+      "step": 539
+    },
+    {
+      "epoch": 0.03896244453263105,
+      "grad_norm": 0.1269828975200653,
+      "learning_rate": 0.000198455765622745,
+      "loss": 0.2285,
+      "step": 540
+    },
+    {
+      "epoch": 0.03903459720769147,
+      "grad_norm": 0.10883588343858719,
+      "learning_rate": 0.00019845287920334826,
+      "loss": 0.1356,
+      "step": 541
+    },
+    {
+      "epoch": 0.0391067498827519,
+      "grad_norm": 0.12637732923030853,
+      "learning_rate": 0.00019844999278395152,
+      "loss": 0.1686,
+      "step": 542
+    },
+    {
+      "epoch": 0.03917890255781233,
+      "grad_norm": 0.08260592073202133,
+      "learning_rate": 0.00019844710636455476,
+      "loss": 0.247,
+      "step": 543
+    },
+    {
+      "epoch": 0.039251055232872756,
+      "grad_norm": 0.1035764068365097,
+      "learning_rate": 0.00019844421994515805,
+      "loss": 0.1732,
+      "step": 544
+    },
+    {
+      "epoch": 0.039323207907933186,
+      "grad_norm": 0.10097551345825195,
+      "learning_rate": 0.0001984413335257613,
+      "loss": 0.2054,
+      "step": 545
+    },
+    {
+      "epoch": 0.039395360582993616,
+      "grad_norm": 0.10047705471515656,
+      "learning_rate": 0.00019843844710636457,
+      "loss": 0.1334,
+      "step": 546
+    },
+    {
+      "epoch": 0.03946751325805404,
+      "grad_norm": 0.12177751958370209,
+      "learning_rate": 0.00019843556068696784,
+      "loss": 0.1698,
+      "step": 547
+    },
+    {
+      "epoch": 0.03953966593311447,
+      "grad_norm": 0.11659875512123108,
+      "learning_rate": 0.00019843267426757107,
+      "loss": 0.1794,
+      "step": 548
+    },
+    {
+      "epoch": 0.0396118186081749,
+      "grad_norm": 0.11983144283294678,
+      "learning_rate": 0.00019842978784817433,
+      "loss": 0.2204,
+      "step": 549
+    },
+    {
+      "epoch": 0.03968397128323532,
+      "grad_norm": 0.09205503761768341,
+      "learning_rate": 0.0001984269014287776,
+      "loss": 0.1878,
+      "step": 550
+    },
+    {
+      "epoch": 0.03975612395829575,
+      "grad_norm": 0.09256859123706818,
+      "learning_rate": 0.0001984240150093809,
+      "loss": 0.2018,
+      "step": 551
+    },
+    {
+      "epoch": 0.03982827663335618,
+      "grad_norm": 0.10241582244634628,
+      "learning_rate": 0.00019842112858998415,
+      "loss": 0.2129,
+      "step": 552
+    },
+    {
+      "epoch": 0.03990042930841661,
+      "grad_norm": 0.09551103413105011,
+      "learning_rate": 0.00019841824217058739,
+      "loss": 0.1742,
+      "step": 553
+    },
+    {
+      "epoch": 0.039972581983477036,
+      "grad_norm": 0.10912424325942993,
+      "learning_rate": 0.00019841535575119065,
+      "loss": 0.1939,
+      "step": 554
+    },
+    {
+      "epoch": 0.040044734658537466,
+      "grad_norm": 0.10542840510606766,
+      "learning_rate": 0.0001984124693317939,
+      "loss": 0.1925,
+      "step": 555
+    },
+    {
+      "epoch": 0.040116887333597896,
+      "grad_norm": 0.09988424926996231,
+      "learning_rate": 0.00019840958291239717,
+      "loss": 0.1617,
+      "step": 556
+    },
+    {
+      "epoch": 0.04018904000865832,
+      "grad_norm": 0.10071025788784027,
+      "learning_rate": 0.00019840669649300044,
+      "loss": 0.1637,
+      "step": 557
+    },
+    {
+      "epoch": 0.04026119268371875,
+      "grad_norm": 0.09819203615188599,
+      "learning_rate": 0.0001984038100736037,
+      "loss": 0.1398,
+      "step": 558
+    },
+    {
+      "epoch": 0.04033334535877918,
+      "grad_norm": 0.09179025143384933,
+      "learning_rate": 0.00019840092365420696,
+      "loss": 0.1722,
+      "step": 559
+    },
+    {
+      "epoch": 0.0404054980338396,
+      "grad_norm": 0.093745656311512,
+      "learning_rate": 0.00019839803723481023,
+      "loss": 0.1665,
+      "step": 560
+    },
+    {
+      "epoch": 0.04047765070890003,
+      "grad_norm": 0.12472759187221527,
+      "learning_rate": 0.0001983951508154135,
+      "loss": 0.142,
+      "step": 561
+    },
+    {
+      "epoch": 0.04054980338396046,
+      "grad_norm": 0.1199507862329483,
+      "learning_rate": 0.00019839226439601675,
+      "loss": 0.1699,
+      "step": 562
+    },
+    {
+      "epoch": 0.040621956059020886,
+      "grad_norm": 0.12098688632249832,
+      "learning_rate": 0.00019838937797662002,
+      "loss": 0.1944,
+      "step": 563
+    },
+    {
+      "epoch": 0.040694108734081316,
+      "grad_norm": 0.11055096238851547,
+      "learning_rate": 0.00019838649155722325,
+      "loss": 0.1638,
+      "step": 564
+    },
+    {
+      "epoch": 0.040766261409141746,
+      "grad_norm": 0.1174546554684639,
+      "learning_rate": 0.00019838360513782654,
+      "loss": 0.1746,
+      "step": 565
+    },
+    {
+      "epoch": 0.04083841408420217,
+      "grad_norm": 0.08360788226127625,
+      "learning_rate": 0.0001983807187184298,
+      "loss": 0.1667,
+      "step": 566
+    },
+    {
+      "epoch": 0.0409105667592626,
+      "grad_norm": 0.10997113585472107,
+      "learning_rate": 0.00019837783229903307,
+      "loss": 0.1781,
+      "step": 567
+    },
+    {
+      "epoch": 0.04098271943432303,
+      "grad_norm": 0.09272412210702896,
+      "learning_rate": 0.00019837494587963633,
+      "loss": 0.1386,
+      "step": 568
+    },
+    {
+      "epoch": 0.04105487210938345,
+      "grad_norm": 0.15601404011249542,
+      "learning_rate": 0.00019837205946023957,
+      "loss": 0.1773,
+      "step": 569
+    },
+    {
+      "epoch": 0.04112702478444388,
+      "grad_norm": 0.09089847654104233,
+      "learning_rate": 0.00019836917304084283,
+      "loss": 0.1821,
+      "step": 570
+    },
+    {
+      "epoch": 0.04119917745950431,
+      "grad_norm": 0.14816875755786896,
+      "learning_rate": 0.0001983662866214461,
+      "loss": 0.1865,
+      "step": 571
+    },
+    {
+      "epoch": 0.041271330134564736,
+      "grad_norm": 0.08765484392642975,
+      "learning_rate": 0.00019836340020204938,
+      "loss": 0.2037,
+      "step": 572
+    },
+    {
+      "epoch": 0.041343482809625166,
+      "grad_norm": 0.11936503648757935,
+      "learning_rate": 0.00019836051378265264,
+      "loss": 0.1625,
+      "step": 573
+    },
+    {
+      "epoch": 0.041415635484685596,
+      "grad_norm": 0.09595091640949249,
+      "learning_rate": 0.00019835762736325588,
+      "loss": 0.2067,
+      "step": 574
+    },
+    {
+      "epoch": 0.04148778815974602,
+      "grad_norm": 0.07997038960456848,
+      "learning_rate": 0.00019835474094385914,
+      "loss": 0.1594,
+      "step": 575
+    },
+    {
+      "epoch": 0.04155994083480645,
+      "grad_norm": 0.11834511905908585,
+      "learning_rate": 0.0001983518545244624,
+      "loss": 0.1785,
+      "step": 576
+    },
+    {
+      "epoch": 0.04163209350986688,
+      "grad_norm": 0.0922444686293602,
+      "learning_rate": 0.00019834896810506567,
+      "loss": 0.1694,
+      "step": 577
+    },
+    {
+      "epoch": 0.0417042461849273,
+      "grad_norm": 0.09497426450252533,
+      "learning_rate": 0.00019834608168566893,
+      "loss": 0.173,
+      "step": 578
+    },
+    {
+      "epoch": 0.04177639885998773,
+      "grad_norm": 0.11161245405673981,
+      "learning_rate": 0.0001983431952662722,
+      "loss": 0.1949,
+      "step": 579
+    },
+    {
+      "epoch": 0.04184855153504816,
+      "grad_norm": 0.09253062307834625,
+      "learning_rate": 0.00019834030884687546,
+      "loss": 0.1983,
+      "step": 580
+    },
+    {
+      "epoch": 0.041920704210108586,
+      "grad_norm": 0.10557336360216141,
+      "learning_rate": 0.00019833742242747872,
+      "loss": 0.1835,
+      "step": 581
+    },
+    {
+      "epoch": 0.041992856885169016,
+      "grad_norm": 0.15258745849132538,
+      "learning_rate": 0.00019833453600808198,
+      "loss": 0.1598,
+      "step": 582
+    },
+    {
+      "epoch": 0.04206500956022945,
+      "grad_norm": 0.09645214676856995,
+      "learning_rate": 0.00019833164958868525,
+      "loss": 0.1596,
+      "step": 583
+    },
+    {
+      "epoch": 0.04213716223528988,
+      "grad_norm": 0.10072506964206696,
+      "learning_rate": 0.0001983287631692885,
+      "loss": 0.191,
+      "step": 584
+    },
+    {
+      "epoch": 0.0422093149103503,
+      "grad_norm": 0.12612341344356537,
+      "learning_rate": 0.00019832587674989177,
+      "loss": 0.1558,
+      "step": 585
+    },
+    {
+      "epoch": 0.04228146758541073,
+      "grad_norm": 0.08388601988554001,
+      "learning_rate": 0.00019832299033049504,
+      "loss": 0.1369,
+      "step": 586
+    },
+    {
+      "epoch": 0.04235362026047116,
+      "grad_norm": 0.1265336275100708,
+      "learning_rate": 0.0001983201039110983,
+      "loss": 0.1663,
+      "step": 587
+    },
+    {
+      "epoch": 0.04242577293553158,
+      "grad_norm": 0.09034436196088791,
+      "learning_rate": 0.00019831721749170156,
+      "loss": 0.1655,
+      "step": 588
+    },
+    {
+      "epoch": 0.04249792561059201,
+      "grad_norm": 0.10816913843154907,
+      "learning_rate": 0.00019831433107230482,
+      "loss": 0.1637,
+      "step": 589
+    },
+    {
+      "epoch": 0.04257007828565244,
+      "grad_norm": 0.08399533480405807,
+      "learning_rate": 0.0001983114446529081,
+      "loss": 0.1515,
+      "step": 590
+    },
+    {
+      "epoch": 0.04264223096071287,
+      "grad_norm": 0.0992622897028923,
+      "learning_rate": 0.00019830855823351132,
+      "loss": 0.1813,
+      "step": 591
+    },
+    {
+      "epoch": 0.0427143836357733,
+      "grad_norm": 0.10754331201314926,
+      "learning_rate": 0.00019830567181411459,
+      "loss": 0.1651,
+      "step": 592
+    },
+    {
+      "epoch": 0.04278653631083373,
+      "grad_norm": 0.09457876533269882,
+      "learning_rate": 0.00019830278539471788,
+      "loss": 0.1797,
+      "step": 593
+    },
+    {
+      "epoch": 0.04285868898589415,
+      "grad_norm": 0.08354930579662323,
+      "learning_rate": 0.00019829989897532114,
+      "loss": 0.1702,
+      "step": 594
+    },
+    {
+      "epoch": 0.04293084166095458,
+      "grad_norm": 0.13465763628482819,
+      "learning_rate": 0.0001982970125559244,
+      "loss": 0.1844,
+      "step": 595
+    },
+    {
+      "epoch": 0.04300299433601501,
+      "grad_norm": 0.12427057325839996,
+      "learning_rate": 0.00019829412613652764,
+      "loss": 0.2262,
+      "step": 596
+    },
+    {
+      "epoch": 0.04307514701107543,
+      "grad_norm": 0.1210947260260582,
+      "learning_rate": 0.0001982912397171309,
+      "loss": 0.2213,
+      "step": 597
+    },
+    {
+      "epoch": 0.043147299686135863,
+      "grad_norm": 0.11663418263196945,
+      "learning_rate": 0.00019828835329773416,
+      "loss": 0.1689,
+      "step": 598
+    },
+    {
+      "epoch": 0.043219452361196294,
+      "grad_norm": 0.11826334148645401,
+      "learning_rate": 0.00019828546687833743,
+      "loss": 0.1991,
+      "step": 599
+    },
+    {
+      "epoch": 0.04329160503625672,
+      "grad_norm": 0.11704009026288986,
+      "learning_rate": 0.00019828258045894072,
+      "loss": 0.1546,
+      "step": 600
+    },
+    {
+      "epoch": 0.04336375771131715,
+      "grad_norm": 0.13563813269138336,
+      "learning_rate": 0.00019827969403954395,
+      "loss": 0.1606,
+      "step": 601
+    },
+    {
+      "epoch": 0.04343591038637758,
+      "grad_norm": 0.10411211103200912,
+      "learning_rate": 0.00019827680762014721,
+      "loss": 0.1432,
+      "step": 602
+    },
+    {
+      "epoch": 0.043508063061438,
+      "grad_norm": 0.11733534187078476,
+      "learning_rate": 0.00019827392120075048,
+      "loss": 0.1368,
+      "step": 603
+    },
+    {
+      "epoch": 0.04358021573649843,
+      "grad_norm": 0.1115807443857193,
+      "learning_rate": 0.00019827103478135374,
+      "loss": 0.1922,
+      "step": 604
+    },
+    {
+      "epoch": 0.04365236841155886,
+      "grad_norm": 0.1343086063861847,
+      "learning_rate": 0.000198268148361957,
+      "loss": 0.2196,
+      "step": 605
+    },
+    {
+      "epoch": 0.043724521086619283,
+      "grad_norm": 0.09392105042934418,
+      "learning_rate": 0.00019826526194256027,
+      "loss": 0.1787,
+      "step": 606
+    },
+    {
+      "epoch": 0.043796673761679714,
+      "grad_norm": 0.09433528035879135,
+      "learning_rate": 0.00019826237552316353,
+      "loss": 0.1883,
+      "step": 607
+    },
+    {
+      "epoch": 0.043868826436740144,
+      "grad_norm": 0.11469694972038269,
+      "learning_rate": 0.0001982594891037668,
+      "loss": 0.2255,
+      "step": 608
+    },
+    {
+      "epoch": 0.04394097911180057,
+      "grad_norm": 0.1101691946387291,
+      "learning_rate": 0.00019825660268437006,
+      "loss": 0.1569,
+      "step": 609
+    },
+    {
+      "epoch": 0.044013131786861,
+      "grad_norm": 0.10026239603757858,
+      "learning_rate": 0.00019825371626497332,
+      "loss": 0.1712,
+      "step": 610
+    },
+    {
+      "epoch": 0.04408528446192143,
+      "grad_norm": 0.15787126123905182,
+      "learning_rate": 0.00019825082984557658,
+      "loss": 0.186,
+      "step": 611
+    },
+    {
+      "epoch": 0.04415743713698185,
+      "grad_norm": 0.10418775677680969,
+      "learning_rate": 0.00019824794342617982,
+      "loss": 0.1462,
+      "step": 612
+    },
+    {
+      "epoch": 0.04422958981204228,
+      "grad_norm": 0.0792064517736435,
+      "learning_rate": 0.00019824505700678308,
+      "loss": 0.1684,
+      "step": 613
+    },
+    {
+      "epoch": 0.04430174248710271,
+      "grad_norm": 0.10991806536912918,
+      "learning_rate": 0.00019824217058738637,
+      "loss": 0.1581,
+      "step": 614
+    },
+    {
+      "epoch": 0.04437389516216314,
+      "grad_norm": 0.08922984451055527,
+      "learning_rate": 0.00019823928416798963,
+      "loss": 0.1659,
+      "step": 615
+    },
+    {
+      "epoch": 0.044446047837223564,
+      "grad_norm": 0.1666286438703537,
+      "learning_rate": 0.0001982363977485929,
+      "loss": 0.1732,
+      "step": 616
+    },
+    {
+      "epoch": 0.044518200512283994,
+      "grad_norm": 0.1010286882519722,
+      "learning_rate": 0.00019823351132919613,
+      "loss": 0.1621,
+      "step": 617
+    },
+    {
+      "epoch": 0.044590353187344424,
+      "grad_norm": 0.09831476211547852,
+      "learning_rate": 0.0001982306249097994,
+      "loss": 0.185,
+      "step": 618
+    },
+    {
+      "epoch": 0.04466250586240485,
+      "grad_norm": 0.10219982266426086,
+      "learning_rate": 0.00019822773849040266,
+      "loss": 0.1577,
+      "step": 619
+    },
+    {
+      "epoch": 0.04473465853746528,
+      "grad_norm": 0.08898953348398209,
+      "learning_rate": 0.00019822485207100592,
+      "loss": 0.1435,
+      "step": 620
+    },
+    {
+      "epoch": 0.04480681121252571,
+      "grad_norm": 0.11173056066036224,
+      "learning_rate": 0.0001982219656516092,
+      "loss": 0.2292,
+      "step": 621
+    },
+    {
+      "epoch": 0.04487896388758613,
+      "grad_norm": 0.11962389200925827,
+      "learning_rate": 0.00019821907923221245,
+      "loss": 0.1796,
+      "step": 622
+    },
+    {
+      "epoch": 0.04495111656264656,
+      "grad_norm": 0.09178833663463593,
+      "learning_rate": 0.0001982161928128157,
+      "loss": 0.1968,
+      "step": 623
+    },
+    {
+      "epoch": 0.04502326923770699,
+      "grad_norm": 0.09955684840679169,
+      "learning_rate": 0.00019821330639341897,
+      "loss": 0.2178,
+      "step": 624
+    },
+    {
+      "epoch": 0.045095421912767414,
+      "grad_norm": 0.08512556552886963,
+      "learning_rate": 0.00019821041997402223,
+      "loss": 0.1563,
+      "step": 625
+    },
+    {
+      "epoch": 0.045167574587827844,
+      "grad_norm": 0.10034400224685669,
+      "learning_rate": 0.0001982075335546255,
+      "loss": 0.2094,
+      "step": 626
+    },
+    {
+      "epoch": 0.045239727262888274,
+      "grad_norm": 0.1015506461262703,
+      "learning_rate": 0.00019820464713522876,
+      "loss": 0.2016,
+      "step": 627
+    },
+    {
+      "epoch": 0.0453118799379487,
+      "grad_norm": 0.08328647911548615,
+      "learning_rate": 0.00019820176071583202,
+      "loss": 0.1894,
+      "step": 628
+    },
+    {
+      "epoch": 0.04538403261300913,
+      "grad_norm": 0.09488657861948013,
+      "learning_rate": 0.0001981988742964353,
+      "loss": 0.1643,
+      "step": 629
+    },
+    {
+      "epoch": 0.04545618528806956,
+      "grad_norm": 0.13005909323692322,
+      "learning_rate": 0.00019819598787703855,
+      "loss": 0.1713,
+      "step": 630
+    },
+    {
+      "epoch": 0.04552833796312998,
+      "grad_norm": 0.09195161610841751,
+      "learning_rate": 0.0001981931014576418,
+      "loss": 0.1754,
+      "step": 631
+    },
+    {
+      "epoch": 0.04560049063819041,
+      "grad_norm": 0.09518012404441833,
+      "learning_rate": 0.00019819021503824508,
+      "loss": 0.1907,
+      "step": 632
+    },
+    {
+      "epoch": 0.04567264331325084,
+      "grad_norm": 0.12911607325077057,
+      "learning_rate": 0.0001981873286188483,
+      "loss": 0.1522,
+      "step": 633
+    },
+    {
+      "epoch": 0.045744795988311264,
+      "grad_norm": 0.09490635991096497,
+      "learning_rate": 0.00019818444219945157,
+      "loss": 0.1362,
+      "step": 634
+    },
+    {
+      "epoch": 0.045816948663371694,
+      "grad_norm": 0.10800332576036453,
+      "learning_rate": 0.00019818155578005486,
+      "loss": 0.1842,
+      "step": 635
+    },
+    {
+      "epoch": 0.045889101338432124,
+      "grad_norm": 0.10239040851593018,
+      "learning_rate": 0.00019817866936065813,
+      "loss": 0.1807,
+      "step": 636
+    },
+    {
+      "epoch": 0.04596125401349255,
+      "grad_norm": 0.11252974718809128,
+      "learning_rate": 0.0001981757829412614,
+      "loss": 0.1881,
+      "step": 637
+    },
+    {
+      "epoch": 0.04603340668855298,
+      "grad_norm": 0.10153605788946152,
+      "learning_rate": 0.00019817289652186463,
+      "loss": 0.2006,
+      "step": 638
+    },
+    {
+      "epoch": 0.04610555936361341,
+      "grad_norm": 0.09800973534584045,
+      "learning_rate": 0.0001981700101024679,
+      "loss": 0.1909,
+      "step": 639
+    },
+    {
+      "epoch": 0.04617771203867383,
+      "grad_norm": 0.08246664702892303,
+      "learning_rate": 0.00019816712368307115,
+      "loss": 0.1475,
+      "step": 640
+    },
+    {
+      "epoch": 0.04624986471373426,
+      "grad_norm": 0.08874426782131195,
+      "learning_rate": 0.00019816423726367441,
+      "loss": 0.1972,
+      "step": 641
+    },
+    {
+      "epoch": 0.04632201738879469,
+      "grad_norm": 0.08816955238580704,
+      "learning_rate": 0.0001981613508442777,
+      "loss": 0.1459,
+      "step": 642
+    },
+    {
+      "epoch": 0.046394170063855114,
+      "grad_norm": 0.10154946893453598,
+      "learning_rate": 0.00019815846442488094,
+      "loss": 0.2218,
+      "step": 643
+    },
+    {
+      "epoch": 0.046466322738915544,
+      "grad_norm": 0.1170118972659111,
+      "learning_rate": 0.0001981555780054842,
+      "loss": 0.1207,
+      "step": 644
+    },
+    {
+      "epoch": 0.046538475413975974,
+      "grad_norm": 0.1154472604393959,
+      "learning_rate": 0.00019815269158608747,
+      "loss": 0.1987,
+      "step": 645
+    },
+    {
+      "epoch": 0.046610628089036404,
+      "grad_norm": 0.12462370842695236,
+      "learning_rate": 0.00019814980516669073,
+      "loss": 0.1656,
+      "step": 646
+    },
+    {
+      "epoch": 0.04668278076409683,
+      "grad_norm": 0.0983177199959755,
+      "learning_rate": 0.000198146918747294,
+      "loss": 0.154,
+      "step": 647
+    },
+    {
+      "epoch": 0.04675493343915726,
+      "grad_norm": 0.10129359364509583,
+      "learning_rate": 0.00019814403232789726,
+      "loss": 0.1307,
+      "step": 648
+    },
+    {
+      "epoch": 0.04682708611421769,
+      "grad_norm": 0.16896426677703857,
+      "learning_rate": 0.00019814114590850052,
+      "loss": 0.1752,
+      "step": 649
+    },
+    {
+      "epoch": 0.04689923878927811,
+      "grad_norm": 0.09189584851264954,
+      "learning_rate": 0.00019813825948910378,
+      "loss": 0.2356,
+      "step": 650
+    },
+    {
+      "epoch": 0.04697139146433854,
+      "grad_norm": 0.10254523903131485,
+      "learning_rate": 0.00019813537306970704,
+      "loss": 0.2083,
+      "step": 651
+    },
+    {
+      "epoch": 0.04704354413939897,
+      "grad_norm": 0.14748401939868927,
+      "learning_rate": 0.0001981324866503103,
+      "loss": 0.2093,
+      "step": 652
+    },
+    {
+      "epoch": 0.047115696814459394,
+      "grad_norm": 0.08709222823381424,
+      "learning_rate": 0.00019812960023091357,
+      "loss": 0.2254,
+      "step": 653
+    },
+    {
+      "epoch": 0.047187849489519824,
+      "grad_norm": 0.10217154771089554,
+      "learning_rate": 0.0001981267138115168,
+      "loss": 0.2106,
+      "step": 654
+    },
+    {
+      "epoch": 0.047260002164580255,
+      "grad_norm": 0.08088697493076324,
+      "learning_rate": 0.00019812382739212007,
+      "loss": 0.1829,
+      "step": 655
+    },
+    {
+      "epoch": 0.04733215483964068,
+      "grad_norm": 0.08585620671510696,
+      "learning_rate": 0.00019812094097272336,
+      "loss": 0.2046,
+      "step": 656
+    },
+    {
+      "epoch": 0.04740430751470111,
+      "grad_norm": 0.0953826904296875,
+      "learning_rate": 0.00019811805455332662,
+      "loss": 0.2091,
+      "step": 657
+    },
+    {
+      "epoch": 0.04747646018976154,
+      "grad_norm": 0.0792909488081932,
+      "learning_rate": 0.00019811516813392988,
+      "loss": 0.2172,
+      "step": 658
+    },
+    {
+      "epoch": 0.04754861286482196,
+      "grad_norm": 0.10460888594388962,
+      "learning_rate": 0.00019811228171453312,
+      "loss": 0.1897,
+      "step": 659
+    },
+    {
+      "epoch": 0.04762076553988239,
+      "grad_norm": 0.10770123451948166,
+      "learning_rate": 0.00019810939529513638,
+      "loss": 0.1761,
+      "step": 660
+    },
+    {
+      "epoch": 0.04769291821494282,
+      "grad_norm": 0.16509467363357544,
+      "learning_rate": 0.00019810650887573965,
+      "loss": 0.2253,
+      "step": 661
+    },
+    {
+      "epoch": 0.047765070890003244,
+      "grad_norm": 0.11456334590911865,
+      "learning_rate": 0.0001981036224563429,
+      "loss": 0.1971,
+      "step": 662
+    },
+    {
+      "epoch": 0.047837223565063675,
+      "grad_norm": 0.10152343660593033,
+      "learning_rate": 0.0001981007360369462,
+      "loss": 0.1994,
+      "step": 663
+    },
+    {
+      "epoch": 0.047909376240124105,
+      "grad_norm": 0.12069497257471085,
+      "learning_rate": 0.00019809784961754943,
+      "loss": 0.2312,
+      "step": 664
+    },
+    {
+      "epoch": 0.04798152891518453,
+      "grad_norm": 0.10090707987546921,
+      "learning_rate": 0.0001980949631981527,
+      "loss": 0.1837,
+      "step": 665
+    },
+    {
+      "epoch": 0.04805368159024496,
+      "grad_norm": 0.09290478378534317,
+      "learning_rate": 0.00019809207677875596,
+      "loss": 0.2106,
+      "step": 666
+    },
+    {
+      "epoch": 0.04812583426530539,
+      "grad_norm": 0.11278660595417023,
+      "learning_rate": 0.00019808919035935922,
+      "loss": 0.2042,
+      "step": 667
+    },
+    {
+      "epoch": 0.04819798694036581,
+      "grad_norm": 0.08206578344106674,
+      "learning_rate": 0.00019808630393996249,
+      "loss": 0.2105,
+      "step": 668
+    },
+    {
+      "epoch": 0.04827013961542624,
+      "grad_norm": 0.08444409817457199,
+      "learning_rate": 0.00019808341752056575,
+      "loss": 0.1807,
+      "step": 669
+    },
+    {
+      "epoch": 0.04834229229048667,
+      "grad_norm": 0.09454233944416046,
+      "learning_rate": 0.000198080531101169,
+      "loss": 0.1319,
+      "step": 670
+    },
+    {
+      "epoch": 0.048414444965547095,
+      "grad_norm": 0.10502707213163376,
+      "learning_rate": 0.00019807764468177228,
+      "loss": 0.2048,
+      "step": 671
+    },
+    {
+      "epoch": 0.048486597640607525,
+      "grad_norm": 0.11612027138471603,
+      "learning_rate": 0.00019807475826237554,
+      "loss": 0.1878,
+      "step": 672
+    },
+    {
+      "epoch": 0.048558750315667955,
+      "grad_norm": 0.1150597631931305,
+      "learning_rate": 0.0001980718718429788,
+      "loss": 0.2027,
+      "step": 673
+    },
+    {
+      "epoch": 0.04863090299072838,
+      "grad_norm": 0.11744673550128937,
+      "learning_rate": 0.00019806898542358206,
+      "loss": 0.173,
+      "step": 674
+    },
+    {
+      "epoch": 0.04870305566578881,
+      "grad_norm": 0.1384035348892212,
+      "learning_rate": 0.0001980660990041853,
+      "loss": 0.2282,
+      "step": 675
+    },
+    {
+      "epoch": 0.04877520834084924,
+      "grad_norm": 0.12562517821788788,
+      "learning_rate": 0.00019806321258478856,
+      "loss": 0.1754,
+      "step": 676
+    },
+    {
+      "epoch": 0.04884736101590967,
+      "grad_norm": 0.13998053967952728,
+      "learning_rate": 0.00019806032616539185,
+      "loss": 0.1818,
+      "step": 677
+    },
+    {
+      "epoch": 0.04891951369097009,
+      "grad_norm": 0.1093667671084404,
+      "learning_rate": 0.00019805743974599512,
+      "loss": 0.167,
+      "step": 678
+    },
+    {
+      "epoch": 0.04899166636603052,
+      "grad_norm": 0.11169123649597168,
+      "learning_rate": 0.00019805455332659838,
+      "loss": 0.1963,
+      "step": 679
+    },
+    {
+      "epoch": 0.04906381904109095,
+      "grad_norm": 0.11249633878469467,
+      "learning_rate": 0.00019805166690720161,
+      "loss": 0.2211,
+      "step": 680
+    },
+    {
+      "epoch": 0.049135971716151375,
+      "grad_norm": 0.10238689184188843,
+      "learning_rate": 0.00019804878048780488,
+      "loss": 0.1911,
+      "step": 681
+    },
+    {
+      "epoch": 0.049208124391211805,
+      "grad_norm": 0.09815354645252228,
+      "learning_rate": 0.00019804589406840814,
+      "loss": 0.1566,
+      "step": 682
+    },
+    {
+      "epoch": 0.049280277066272235,
+      "grad_norm": 0.09693794697523117,
+      "learning_rate": 0.0001980430076490114,
+      "loss": 0.1876,
+      "step": 683
+    },
+    {
+      "epoch": 0.04935242974133266,
+      "grad_norm": 0.0989205613732338,
+      "learning_rate": 0.0001980401212296147,
+      "loss": 0.1785,
+      "step": 684
+    },
+    {
+      "epoch": 0.04942458241639309,
+      "grad_norm": 0.1322227567434311,
+      "learning_rate": 0.00019803723481021793,
+      "loss": 0.216,
+      "step": 685
+    },
+    {
+      "epoch": 0.04949673509145352,
+      "grad_norm": 0.08061541616916656,
+      "learning_rate": 0.0001980343483908212,
+      "loss": 0.1985,
+      "step": 686
+    },
+    {
+      "epoch": 0.04956888776651394,
+      "grad_norm": 0.07591982185840607,
+      "learning_rate": 0.00019803146197142445,
+      "loss": 0.1757,
+      "step": 687
+    },
+    {
+      "epoch": 0.04964104044157437,
+      "grad_norm": 0.08189503103494644,
+      "learning_rate": 0.00019802857555202772,
+      "loss": 0.1801,
+      "step": 688
+    },
+    {
+      "epoch": 0.0497131931166348,
+      "grad_norm": 0.09155537933111191,
+      "learning_rate": 0.00019802568913263098,
+      "loss": 0.1492,
+      "step": 689
+    },
+    {
+      "epoch": 0.049785345791695225,
+      "grad_norm": 0.09057817608118057,
+      "learning_rate": 0.00019802280271323424,
+      "loss": 0.1734,
+      "step": 690
+    },
+    {
+      "epoch": 0.049857498466755655,
+      "grad_norm": 0.07322797179222107,
+      "learning_rate": 0.0001980199162938375,
+      "loss": 0.161,
+      "step": 691
+    },
+    {
+      "epoch": 0.049929651141816085,
+      "grad_norm": 0.14538775384426117,
+      "learning_rate": 0.00019801702987444077,
+      "loss": 0.1784,
+      "step": 692
+    },
+    {
+      "epoch": 0.05000180381687651,
+      "grad_norm": 0.11068142205476761,
+      "learning_rate": 0.00019801414345504403,
+      "loss": 0.1881,
+      "step": 693
+    },
+    {
+      "epoch": 0.05007395649193694,
+      "grad_norm": 0.09967081993818283,
+      "learning_rate": 0.0001980112570356473,
+      "loss": 0.225,
+      "step": 694
+    },
+    {
+      "epoch": 0.05014610916699737,
+      "grad_norm": 0.08908654004335403,
+      "learning_rate": 0.00019800837061625056,
+      "loss": 0.185,
+      "step": 695
+    },
+    {
+      "epoch": 0.05021826184205779,
+      "grad_norm": 0.11593464761972427,
+      "learning_rate": 0.00019800548419685382,
+      "loss": 0.214,
+      "step": 696
+    },
+    {
+      "epoch": 0.05029041451711822,
+      "grad_norm": 0.08826512098312378,
+      "learning_rate": 0.00019800259777745706,
+      "loss": 0.1974,
+      "step": 697
+    },
+    {
+      "epoch": 0.05036256719217865,
+      "grad_norm": 0.12316125631332397,
+      "learning_rate": 0.00019799971135806032,
+      "loss": 0.1621,
+      "step": 698
+    },
+    {
+      "epoch": 0.050434719867239075,
+      "grad_norm": 0.11270264536142349,
+      "learning_rate": 0.0001979968249386636,
+      "loss": 0.1789,
+      "step": 699
+    },
+    {
+      "epoch": 0.050506872542299505,
+      "grad_norm": 0.08740193396806717,
+      "learning_rate": 0.00019799393851926687,
+      "loss": 0.1843,
+      "step": 700
+    },
+    {
+      "epoch": 0.050579025217359935,
+      "grad_norm": 0.09596771746873856,
+      "learning_rate": 0.00019799105209987014,
+      "loss": 0.1776,
+      "step": 701
+    },
+    {
+      "epoch": 0.05065117789242036,
+      "grad_norm": 0.08829741179943085,
+      "learning_rate": 0.00019798816568047337,
+      "loss": 0.1942,
+      "step": 702
+    },
+    {
+      "epoch": 0.05072333056748079,
+      "grad_norm": 0.11870314925909042,
+      "learning_rate": 0.00019798527926107663,
+      "loss": 0.2104,
+      "step": 703
+    },
+    {
+      "epoch": 0.05079548324254122,
+      "grad_norm": 0.08130889385938644,
+      "learning_rate": 0.0001979823928416799,
+      "loss": 0.1645,
+      "step": 704
+    },
+    {
+      "epoch": 0.05086763591760164,
+      "grad_norm": 0.10137905180454254,
+      "learning_rate": 0.00019797950642228316,
+      "loss": 0.1645,
+      "step": 705
+    },
+    {
+      "epoch": 0.05093978859266207,
+      "grad_norm": 0.11528564244508743,
+      "learning_rate": 0.00019797662000288645,
+      "loss": 0.1545,
+      "step": 706
+    },
+    {
+      "epoch": 0.0510119412677225,
+      "grad_norm": 0.11569073796272278,
+      "learning_rate": 0.00019797373358348969,
+      "loss": 0.1696,
+      "step": 707
+    },
+    {
+      "epoch": 0.05108409394278293,
+      "grad_norm": 0.08077122271060944,
+      "learning_rate": 0.00019797084716409295,
+      "loss": 0.1832,
+      "step": 708
+    },
+    {
+      "epoch": 0.051156246617843355,
+      "grad_norm": 0.08243294805288315,
+      "learning_rate": 0.0001979679607446962,
+      "loss": 0.1951,
+      "step": 709
+    },
+    {
+      "epoch": 0.051228399292903785,
+      "grad_norm": 0.10372677445411682,
+      "learning_rate": 0.00019796507432529947,
+      "loss": 0.1642,
+      "step": 710
+    },
+    {
+      "epoch": 0.051300551967964216,
+      "grad_norm": 0.10604501515626907,
+      "learning_rate": 0.00019796218790590274,
+      "loss": 0.2088,
+      "step": 711
+    },
+    {
+      "epoch": 0.05137270464302464,
+      "grad_norm": 0.10709025710821152,
+      "learning_rate": 0.000197959301486506,
+      "loss": 0.1519,
+      "step": 712
+    },
+    {
+      "epoch": 0.05144485731808507,
+      "grad_norm": 0.10411393642425537,
+      "learning_rate": 0.00019795641506710926,
+      "loss": 0.2138,
+      "step": 713
+    },
+    {
+      "epoch": 0.0515170099931455,
+      "grad_norm": 0.11297295987606049,
+      "learning_rate": 0.00019795352864771253,
+      "loss": 0.1728,
+      "step": 714
+    },
+    {
+      "epoch": 0.05158916266820592,
+      "grad_norm": 0.0939662978053093,
+      "learning_rate": 0.0001979506422283158,
+      "loss": 0.2185,
+      "step": 715
+    },
+    {
+      "epoch": 0.05166131534326635,
+      "grad_norm": 0.07480581849813461,
+      "learning_rate": 0.00019794775580891905,
+      "loss": 0.1902,
+      "step": 716
+    },
+    {
+      "epoch": 0.05173346801832678,
+      "grad_norm": 0.09393037110567093,
+      "learning_rate": 0.00019794486938952232,
+      "loss": 0.1825,
+      "step": 717
+    },
+    {
+      "epoch": 0.051805620693387205,
+      "grad_norm": 0.086279958486557,
+      "learning_rate": 0.00019794198297012555,
+      "loss": 0.2103,
+      "step": 718
+    },
+    {
+      "epoch": 0.051877773368447636,
+      "grad_norm": 0.09034014493227005,
+      "learning_rate": 0.00019793909655072881,
+      "loss": 0.1879,
+      "step": 719
+    },
+    {
+      "epoch": 0.051949926043508066,
+      "grad_norm": 0.09409305453300476,
+      "learning_rate": 0.0001979362101313321,
+      "loss": 0.1037,
+      "step": 720
+    },
+    {
+      "epoch": 0.05202207871856849,
+      "grad_norm": 0.10134784877300262,
+      "learning_rate": 0.00019793332371193537,
+      "loss": 0.1811,
+      "step": 721
+    },
+    {
+      "epoch": 0.05209423139362892,
+      "grad_norm": 0.10113094747066498,
+      "learning_rate": 0.00019793043729253863,
+      "loss": 0.1572,
+      "step": 722
+    },
+    {
+      "epoch": 0.05216638406868935,
+      "grad_norm": 0.126359760761261,
+      "learning_rate": 0.00019792755087314187,
+      "loss": 0.2265,
+      "step": 723
+    },
+    {
+      "epoch": 0.05223853674374977,
+      "grad_norm": 0.11764882504940033,
+      "learning_rate": 0.00019792466445374513,
+      "loss": 0.1743,
+      "step": 724
+    },
+    {
+      "epoch": 0.0523106894188102,
+      "grad_norm": 0.0821533054113388,
+      "learning_rate": 0.0001979217780343484,
+      "loss": 0.1595,
+      "step": 725
+    },
+    {
+      "epoch": 0.05238284209387063,
+      "grad_norm": 0.10342086851596832,
+      "learning_rate": 0.00019791889161495165,
+      "loss": 0.1718,
+      "step": 726
+    },
+    {
+      "epoch": 0.052454994768931056,
+      "grad_norm": 0.13683348894119263,
+      "learning_rate": 0.00019791600519555494,
+      "loss": 0.162,
+      "step": 727
+    },
+    {
+      "epoch": 0.052527147443991486,
+      "grad_norm": 0.08543882519006729,
+      "learning_rate": 0.00019791311877615818,
+      "loss": 0.1992,
+      "step": 728
+    },
+    {
+      "epoch": 0.052599300119051916,
+      "grad_norm": 0.09584735333919525,
+      "learning_rate": 0.00019791023235676144,
+      "loss": 0.1365,
+      "step": 729
+    },
+    {
+      "epoch": 0.05267145279411234,
+      "grad_norm": 0.14931128919124603,
+      "learning_rate": 0.0001979073459373647,
+      "loss": 0.1962,
+      "step": 730
+    },
+    {
+      "epoch": 0.05274360546917277,
+      "grad_norm": 0.10803002119064331,
+      "learning_rate": 0.00019790445951796797,
+      "loss": 0.13,
+      "step": 731
+    },
+    {
+      "epoch": 0.0528157581442332,
+      "grad_norm": 0.1050458624958992,
+      "learning_rate": 0.00019790157309857123,
+      "loss": 0.2151,
+      "step": 732
+    },
+    {
+      "epoch": 0.05288791081929362,
+      "grad_norm": 0.07899843156337738,
+      "learning_rate": 0.0001978986866791745,
+      "loss": 0.1776,
+      "step": 733
+    },
+    {
+      "epoch": 0.05296006349435405,
+      "grad_norm": 0.09396897256374359,
+      "learning_rate": 0.00019789580025977776,
+      "loss": 0.1372,
+      "step": 734
+    },
+    {
+      "epoch": 0.05303221616941448,
+      "grad_norm": 0.14268507063388824,
+      "learning_rate": 0.00019789291384038102,
+      "loss": 0.2246,
+      "step": 735
+    },
+    {
+      "epoch": 0.053104368844474906,
+      "grad_norm": 0.09302389621734619,
+      "learning_rate": 0.00019789002742098428,
+      "loss": 0.1988,
+      "step": 736
+    },
+    {
+      "epoch": 0.053176521519535336,
+      "grad_norm": 0.1272112876176834,
+      "learning_rate": 0.00019788714100158755,
+      "loss": 0.1537,
+      "step": 737
+    },
+    {
+      "epoch": 0.053248674194595766,
+      "grad_norm": 0.09457764029502869,
+      "learning_rate": 0.0001978842545821908,
+      "loss": 0.1599,
+      "step": 738
+    },
+    {
+      "epoch": 0.05332082686965619,
+      "grad_norm": 0.09066502749919891,
+      "learning_rate": 0.00019788136816279405,
+      "loss": 0.1865,
+      "step": 739
+    },
+    {
+      "epoch": 0.05339297954471662,
+      "grad_norm": 0.09228852391242981,
+      "learning_rate": 0.0001978784817433973,
+      "loss": 0.1781,
+      "step": 740
+    },
+    {
+      "epoch": 0.05346513221977705,
+      "grad_norm": 0.12406047433614731,
+      "learning_rate": 0.0001978755953240006,
+      "loss": 0.2168,
+      "step": 741
+    },
+    {
+      "epoch": 0.05353728489483748,
+      "grad_norm": 0.10978226363658905,
+      "learning_rate": 0.00019787270890460386,
+      "loss": 0.1879,
+      "step": 742
+    },
+    {
+      "epoch": 0.0536094375698979,
+      "grad_norm": 0.11857189983129501,
+      "learning_rate": 0.00019786982248520712,
+      "loss": 0.2438,
+      "step": 743
+    },
+    {
+      "epoch": 0.05368159024495833,
+      "grad_norm": 0.11693238466978073,
+      "learning_rate": 0.00019786693606581036,
+      "loss": 0.1973,
+      "step": 744
+    },
+    {
+      "epoch": 0.05375374292001876,
+      "grad_norm": 0.09075228124856949,
+      "learning_rate": 0.00019786404964641362,
+      "loss": 0.1361,
+      "step": 745
+    },
+    {
+      "epoch": 0.053825895595079186,
+      "grad_norm": 0.09898970276117325,
+      "learning_rate": 0.00019786116322701689,
+      "loss": 0.1743,
+      "step": 746
+    },
+    {
+      "epoch": 0.053898048270139616,
+      "grad_norm": 0.09719021618366241,
+      "learning_rate": 0.00019785827680762015,
+      "loss": 0.1934,
+      "step": 747
+    },
+    {
+      "epoch": 0.053970200945200046,
+      "grad_norm": 0.09431437402963638,
+      "learning_rate": 0.00019785539038822344,
+      "loss": 0.2014,
+      "step": 748
+    },
+    {
+      "epoch": 0.05404235362026047,
+      "grad_norm": 0.11232171952724457,
+      "learning_rate": 0.00019785250396882667,
+      "loss": 0.1365,
+      "step": 749
+    },
+    {
+      "epoch": 0.0541145062953209,
+      "grad_norm": 0.09927115589380264,
+      "learning_rate": 0.00019784961754942994,
+      "loss": 0.1727,
+      "step": 750
+    },
+    {
+      "epoch": 0.05418665897038133,
+      "grad_norm": 0.08680518716573715,
+      "learning_rate": 0.0001978467311300332,
+      "loss": 0.1887,
+      "step": 751
+    },
+    {
+      "epoch": 0.05425881164544175,
+      "grad_norm": 0.11436621099710464,
+      "learning_rate": 0.00019784384471063646,
+      "loss": 0.218,
+      "step": 752
+    },
+    {
+      "epoch": 0.05433096432050218,
+      "grad_norm": 0.11894845962524414,
+      "learning_rate": 0.00019784095829123973,
+      "loss": 0.1375,
+      "step": 753
+    },
+    {
+      "epoch": 0.05440311699556261,
+      "grad_norm": 0.09046079963445663,
+      "learning_rate": 0.000197838071871843,
+      "loss": 0.178,
+      "step": 754
+    },
+    {
+      "epoch": 0.054475269670623036,
+      "grad_norm": 0.35691019892692566,
+      "learning_rate": 0.00019783518545244625,
+      "loss": 0.1678,
+      "step": 755
+    },
+    {
+      "epoch": 0.054547422345683466,
+      "grad_norm": 0.15913482010364532,
+      "learning_rate": 0.00019783229903304952,
+      "loss": 0.1991,
+      "step": 756
+    },
+    {
+      "epoch": 0.054619575020743896,
+      "grad_norm": 0.13208337128162384,
+      "learning_rate": 0.00019782941261365278,
+      "loss": 0.1762,
+      "step": 757
+    },
+    {
+      "epoch": 0.05469172769580432,
+      "grad_norm": 0.1702233850955963,
+      "learning_rate": 0.00019782652619425604,
+      "loss": 0.1493,
+      "step": 758
+    },
+    {
+      "epoch": 0.05476388037086475,
+      "grad_norm": 0.1387334018945694,
+      "learning_rate": 0.0001978236397748593,
+      "loss": 0.194,
+      "step": 759
+    },
+    {
+      "epoch": 0.05483603304592518,
+      "grad_norm": 0.1383606642484665,
+      "learning_rate": 0.00019782075335546254,
+      "loss": 0.2381,
+      "step": 760
+    },
+    {
+      "epoch": 0.0549081857209856,
+      "grad_norm": 0.08609382063150406,
+      "learning_rate": 0.0001978178669360658,
+      "loss": 0.18,
+      "step": 761
+    },
+    {
+      "epoch": 0.05498033839604603,
+      "grad_norm": 0.13723242282867432,
+      "learning_rate": 0.0001978149805166691,
+      "loss": 0.1756,
+      "step": 762
+    },
+    {
+      "epoch": 0.05505249107110646,
+      "grad_norm": 0.09601780772209167,
+      "learning_rate": 0.00019781209409727236,
+      "loss": 0.1518,
+      "step": 763
+    },
+    {
+      "epoch": 0.055124643746166886,
+      "grad_norm": 0.28943732380867004,
+      "learning_rate": 0.00019780920767787562,
+      "loss": 0.1632,
+      "step": 764
+    },
+    {
+      "epoch": 0.055196796421227316,
+      "grad_norm": 0.11374638974666595,
+      "learning_rate": 0.00019780632125847885,
+      "loss": 0.1644,
+      "step": 765
+    },
+    {
+      "epoch": 0.055268949096287746,
+      "grad_norm": 0.1234770193696022,
+      "learning_rate": 0.00019780343483908212,
+      "loss": 0.2059,
+      "step": 766
+    },
+    {
+      "epoch": 0.05534110177134817,
+      "grad_norm": 0.09630633145570755,
+      "learning_rate": 0.00019780054841968538,
+      "loss": 0.189,
+      "step": 767
+    },
+    {
+      "epoch": 0.0554132544464086,
+      "grad_norm": 0.11028503626585007,
+      "learning_rate": 0.00019779766200028864,
+      "loss": 0.1629,
+      "step": 768
+    },
+    {
+      "epoch": 0.05548540712146903,
+      "grad_norm": 0.09348886460065842,
+      "learning_rate": 0.00019779477558089193,
+      "loss": 0.1601,
+      "step": 769
+    },
+    {
+      "epoch": 0.05555755979652945,
+      "grad_norm": 0.08891778439283371,
+      "learning_rate": 0.00019779188916149517,
+      "loss": 0.1983,
+      "step": 770
+    },
+    {
+      "epoch": 0.05562971247158988,
+      "grad_norm": 0.10887889564037323,
+      "learning_rate": 0.00019778900274209843,
+      "loss": 0.16,
+      "step": 771
+    },
+    {
+      "epoch": 0.05570186514665031,
+      "grad_norm": 0.09718909859657288,
+      "learning_rate": 0.0001977861163227017,
+      "loss": 0.1322,
+      "step": 772
+    },
+    {
+      "epoch": 0.05577401782171074,
+      "grad_norm": 0.11404313892126083,
+      "learning_rate": 0.00019778322990330496,
+      "loss": 0.2253,
+      "step": 773
+    },
+    {
+      "epoch": 0.055846170496771166,
+      "grad_norm": 0.12127923965454102,
+      "learning_rate": 0.00019778034348390822,
+      "loss": 0.1852,
+      "step": 774
+    },
+    {
+      "epoch": 0.055918323171831597,
+      "grad_norm": 0.11532068252563477,
+      "learning_rate": 0.00019777745706451148,
+      "loss": 0.2012,
+      "step": 775
+    },
+    {
+      "epoch": 0.05599047584689203,
+      "grad_norm": 0.09808876365423203,
+      "learning_rate": 0.00019777457064511475,
+      "loss": 0.1932,
+      "step": 776
+    },
+    {
+      "epoch": 0.05606262852195245,
+      "grad_norm": 0.1004500538110733,
+      "learning_rate": 0.000197771684225718,
+      "loss": 0.2526,
+      "step": 777
+    },
+    {
+      "epoch": 0.05613478119701288,
+      "grad_norm": 0.12450846284627914,
+      "learning_rate": 0.00019776879780632127,
+      "loss": 0.2019,
+      "step": 778
+    },
+    {
+      "epoch": 0.05620693387207331,
+      "grad_norm": 0.12701664865016937,
+      "learning_rate": 0.00019776591138692454,
+      "loss": 0.2032,
+      "step": 779
+    },
+    {
+      "epoch": 0.05627908654713373,
+      "grad_norm": 0.14145851135253906,
+      "learning_rate": 0.0001977630249675278,
+      "loss": 0.1964,
+      "step": 780
+    },
+    {
+      "epoch": 0.05635123922219416,
+      "grad_norm": 0.11641249805688858,
+      "learning_rate": 0.00019776013854813103,
+      "loss": 0.2092,
+      "step": 781
+    },
+    {
+      "epoch": 0.05642339189725459,
+      "grad_norm": 0.12057143449783325,
+      "learning_rate": 0.0001977572521287343,
+      "loss": 0.2071,
+      "step": 782
+    },
+    {
+      "epoch": 0.056495544572315017,
+      "grad_norm": 0.09990455210208893,
+      "learning_rate": 0.0001977543657093376,
+      "loss": 0.1968,
+      "step": 783
+    },
+    {
+      "epoch": 0.05656769724737545,
+      "grad_norm": 0.09977443516254425,
+      "learning_rate": 0.00019775147928994085,
+      "loss": 0.187,
+      "step": 784
+    },
+    {
+      "epoch": 0.05663984992243588,
+      "grad_norm": 0.11669214814901352,
+      "learning_rate": 0.0001977485928705441,
+      "loss": 0.2187,
+      "step": 785
+    },
+    {
+      "epoch": 0.0567120025974963,
+      "grad_norm": 0.11437924206256866,
+      "learning_rate": 0.00019774570645114735,
+      "loss": 0.205,
+      "step": 786
+    },
+    {
+      "epoch": 0.05678415527255673,
+      "grad_norm": 0.13286525011062622,
+      "learning_rate": 0.0001977428200317506,
+      "loss": 0.1939,
+      "step": 787
+    },
+    {
+      "epoch": 0.05685630794761716,
+      "grad_norm": 0.15068671107292175,
+      "learning_rate": 0.00019773993361235387,
+      "loss": 0.1648,
+      "step": 788
+    },
+    {
+      "epoch": 0.05692846062267758,
+      "grad_norm": 0.15105897188186646,
+      "learning_rate": 0.00019773704719295714,
+      "loss": 0.2111,
+      "step": 789
+    },
+    {
+      "epoch": 0.05700061329773801,
+      "grad_norm": 0.0838138684630394,
+      "learning_rate": 0.00019773416077356043,
+      "loss": 0.1812,
+      "step": 790
+    },
+    {
+      "epoch": 0.057072765972798443,
+      "grad_norm": 0.11074309051036835,
+      "learning_rate": 0.00019773127435416366,
+      "loss": 0.174,
+      "step": 791
+    },
+    {
+      "epoch": 0.05714491864785887,
+      "grad_norm": 0.12502411007881165,
+      "learning_rate": 0.00019772838793476693,
+      "loss": 0.1996,
+      "step": 792
+    },
+    {
+      "epoch": 0.0572170713229193,
+      "grad_norm": 0.11896955966949463,
+      "learning_rate": 0.0001977255015153702,
+      "loss": 0.1898,
+      "step": 793
+    },
+    {
+      "epoch": 0.05728922399797973,
+      "grad_norm": 0.10241451859474182,
+      "learning_rate": 0.00019772261509597345,
+      "loss": 0.16,
+      "step": 794
+    },
+    {
+      "epoch": 0.05736137667304015,
+      "grad_norm": 0.11450854688882828,
+      "learning_rate": 0.00019771972867657671,
+      "loss": 0.1798,
+      "step": 795
+    },
+    {
+      "epoch": 0.05743352934810058,
+      "grad_norm": 0.12698432803153992,
+      "learning_rate": 0.00019771684225717998,
+      "loss": 0.1536,
+      "step": 796
+    },
+    {
+      "epoch": 0.05750568202316101,
+      "grad_norm": 0.0907829999923706,
+      "learning_rate": 0.00019771395583778324,
+      "loss": 0.1776,
+      "step": 797
+    },
+    {
+      "epoch": 0.05757783469822143,
+      "grad_norm": 0.08737281709909439,
+      "learning_rate": 0.0001977110694183865,
+      "loss": 0.1806,
+      "step": 798
+    },
+    {
+      "epoch": 0.057649987373281864,
+      "grad_norm": 0.09027869254350662,
+      "learning_rate": 0.00019770818299898977,
+      "loss": 0.1328,
+      "step": 799
+    },
+    {
+      "epoch": 0.057722140048342294,
+      "grad_norm": 0.11647447943687439,
+      "learning_rate": 0.00019770529657959303,
+      "loss": 0.2173,
+      "step": 800
+    },
+    {
+      "epoch": 0.05779429272340272,
+      "grad_norm": 0.0926554799079895,
+      "learning_rate": 0.0001977024101601963,
+      "loss": 0.228,
+      "step": 801
+    },
+    {
+      "epoch": 0.05786644539846315,
+      "grad_norm": 0.1256769746541977,
+      "learning_rate": 0.00019769952374079953,
+      "loss": 0.2025,
+      "step": 802
+    },
+    {
+      "epoch": 0.05793859807352358,
+      "grad_norm": 0.1006527915596962,
+      "learning_rate": 0.0001976966373214028,
+      "loss": 0.1862,
+      "step": 803
+    },
+    {
+      "epoch": 0.05801075074858401,
+      "grad_norm": 0.10265506058931351,
+      "learning_rate": 0.00019769375090200608,
+      "loss": 0.173,
+      "step": 804
+    },
+    {
+      "epoch": 0.05808290342364443,
+      "grad_norm": 0.09494657814502716,
+      "learning_rate": 0.00019769086448260934,
+      "loss": 0.2186,
+      "step": 805
+    },
+    {
+      "epoch": 0.05815505609870486,
+      "grad_norm": 0.1186690479516983,
+      "learning_rate": 0.0001976879780632126,
+      "loss": 0.212,
+      "step": 806
+    },
+    {
+      "epoch": 0.05822720877376529,
+      "grad_norm": 0.09116118401288986,
+      "learning_rate": 0.00019768509164381584,
+      "loss": 0.2086,
+      "step": 807
+    },
+    {
+      "epoch": 0.058299361448825714,
+      "grad_norm": 0.10806886851787567,
+      "learning_rate": 0.0001976822052244191,
+      "loss": 0.158,
+      "step": 808
+    },
+    {
+      "epoch": 0.058371514123886144,
+      "grad_norm": 0.09465332329273224,
+      "learning_rate": 0.00019767931880502237,
+      "loss": 0.1479,
+      "step": 809
+    },
+    {
+      "epoch": 0.058443666798946574,
+      "grad_norm": 0.10689811408519745,
+      "learning_rate": 0.00019767643238562563,
+      "loss": 0.1648,
+      "step": 810
+    },
+    {
+      "epoch": 0.058515819474007,
+      "grad_norm": 0.08733882009983063,
+      "learning_rate": 0.00019767354596622892,
+      "loss": 0.1963,
+      "step": 811
+    },
+    {
+      "epoch": 0.05858797214906743,
+      "grad_norm": 0.10385697335004807,
+      "learning_rate": 0.00019767065954683218,
+      "loss": 0.1631,
+      "step": 812
+    },
+    {
+      "epoch": 0.05866012482412786,
+      "grad_norm": 0.1437450498342514,
+      "learning_rate": 0.00019766777312743542,
+      "loss": 0.1684,
+      "step": 813
+    },
+    {
+      "epoch": 0.05873227749918828,
+      "grad_norm": 0.08907853811979294,
+      "learning_rate": 0.00019766488670803868,
+      "loss": 0.1789,
+      "step": 814
+    },
+    {
+      "epoch": 0.05880443017424871,
+      "grad_norm": 0.08962389826774597,
+      "learning_rate": 0.00019766200028864195,
+      "loss": 0.17,
+      "step": 815
+    },
+    {
+      "epoch": 0.05887658284930914,
+      "grad_norm": 0.10819995403289795,
+      "learning_rate": 0.0001976591138692452,
+      "loss": 0.2291,
+      "step": 816
+    },
+    {
+      "epoch": 0.058948735524369564,
+      "grad_norm": 0.09052648395299911,
+      "learning_rate": 0.00019765622744984847,
+      "loss": 0.1897,
+      "step": 817
+    },
+    {
+      "epoch": 0.059020888199429994,
+      "grad_norm": 0.10309138894081116,
+      "learning_rate": 0.00019765334103045173,
+      "loss": 0.1634,
+      "step": 818
+    },
+    {
+      "epoch": 0.059093040874490424,
+      "grad_norm": 0.0909322202205658,
+      "learning_rate": 0.000197650454611055,
+      "loss": 0.1774,
+      "step": 819
+    },
+    {
+      "epoch": 0.05916519354955085,
+      "grad_norm": 0.0867924764752388,
+      "learning_rate": 0.00019764756819165826,
+      "loss": 0.1937,
+      "step": 820
+    },
+    {
+      "epoch": 0.05923734622461128,
+      "grad_norm": 0.08892381936311722,
+      "learning_rate": 0.00019764468177226152,
+      "loss": 0.1359,
+      "step": 821
+    },
+    {
+      "epoch": 0.05930949889967171,
+      "grad_norm": 0.101357601583004,
+      "learning_rate": 0.0001976417953528648,
+      "loss": 0.1597,
+      "step": 822
+    },
+    {
+      "epoch": 0.05938165157473213,
+      "grad_norm": 0.10044662654399872,
+      "learning_rate": 0.00019763890893346805,
+      "loss": 0.1363,
+      "step": 823
+    },
+    {
+      "epoch": 0.05945380424979256,
+      "grad_norm": 0.09395581483840942,
+      "learning_rate": 0.00019763602251407129,
+      "loss": 0.1674,
+      "step": 824
+    },
+    {
+      "epoch": 0.05952595692485299,
+      "grad_norm": 0.10966863483190536,
+      "learning_rate": 0.00019763313609467458,
+      "loss": 0.1458,
+      "step": 825
+    },
+    {
+      "epoch": 0.059598109599913414,
+      "grad_norm": 0.1251005083322525,
+      "learning_rate": 0.00019763024967527784,
+      "loss": 0.26,
+      "step": 826
+    },
+    {
+      "epoch": 0.059670262274973844,
+      "grad_norm": 0.09615496546030045,
+      "learning_rate": 0.0001976273632558811,
+      "loss": 0.1879,
+      "step": 827
+    },
+    {
+      "epoch": 0.059742414950034274,
+      "grad_norm": 0.10016679763793945,
+      "learning_rate": 0.00019762447683648436,
+      "loss": 0.1718,
+      "step": 828
+    },
+    {
+      "epoch": 0.0598145676250947,
+      "grad_norm": 0.11508508771657944,
+      "learning_rate": 0.0001976215904170876,
+      "loss": 0.1755,
+      "step": 829
+    },
+    {
+      "epoch": 0.05988672030015513,
+      "grad_norm": 0.08913619071245193,
+      "learning_rate": 0.00019761870399769086,
+      "loss": 0.148,
+      "step": 830
+    },
+    {
+      "epoch": 0.05995887297521556,
+      "grad_norm": 0.09995342791080475,
+      "learning_rate": 0.00019761581757829413,
+      "loss": 0.1599,
+      "step": 831
+    },
+    {
+      "epoch": 0.06003102565027598,
+      "grad_norm": 0.14899706840515137,
+      "learning_rate": 0.00019761293115889742,
+      "loss": 0.1411,
+      "step": 832
+    },
+    {
+      "epoch": 0.06010317832533641,
+      "grad_norm": 0.13075637817382812,
+      "learning_rate": 0.00019761004473950068,
+      "loss": 0.2015,
+      "step": 833
+    },
+    {
+      "epoch": 0.06017533100039684,
+      "grad_norm": 0.10823075473308563,
+      "learning_rate": 0.00019760715832010391,
+      "loss": 0.1733,
+      "step": 834
+    },
+    {
+      "epoch": 0.06024748367545727,
+      "grad_norm": 0.09528510272502899,
+      "learning_rate": 0.00019760427190070718,
+      "loss": 0.1984,
+      "step": 835
+    },
+    {
+      "epoch": 0.060319636350517694,
+      "grad_norm": 0.14064089953899384,
+      "learning_rate": 0.00019760138548131044,
+      "loss": 0.2143,
+      "step": 836
+    },
+    {
+      "epoch": 0.060391789025578124,
+      "grad_norm": 0.08362717926502228,
+      "learning_rate": 0.0001975984990619137,
+      "loss": 0.1659,
+      "step": 837
+    },
+    {
+      "epoch": 0.060463941700638554,
+      "grad_norm": 0.09632274508476257,
+      "learning_rate": 0.00019759561264251697,
+      "loss": 0.1424,
+      "step": 838
+    },
+    {
+      "epoch": 0.06053609437569898,
+      "grad_norm": 0.0978073701262474,
+      "learning_rate": 0.00019759272622312023,
+      "loss": 0.1318,
+      "step": 839
+    },
+    {
+      "epoch": 0.06060824705075941,
+      "grad_norm": 0.11037838459014893,
+      "learning_rate": 0.0001975898398037235,
+      "loss": 0.2055,
+      "step": 840
+    },
+    {
+      "epoch": 0.06068039972581984,
+      "grad_norm": 0.09025062620639801,
+      "learning_rate": 0.00019758695338432675,
+      "loss": 0.1897,
+      "step": 841
+    },
+    {
+      "epoch": 0.06075255240088026,
+      "grad_norm": 0.14597173035144806,
+      "learning_rate": 0.00019758406696493002,
+      "loss": 0.2609,
+      "step": 842
+    },
+    {
+      "epoch": 0.06082470507594069,
+      "grad_norm": 0.09020640701055527,
+      "learning_rate": 0.00019758118054553328,
+      "loss": 0.1906,
+      "step": 843
+    },
+    {
+      "epoch": 0.06089685775100112,
+      "grad_norm": 0.0924547016620636,
+      "learning_rate": 0.00019757829412613654,
+      "loss": 0.2269,
+      "step": 844
+    },
+    {
+      "epoch": 0.060969010426061544,
+      "grad_norm": 0.12248987704515457,
+      "learning_rate": 0.00019757540770673978,
+      "loss": 0.1816,
+      "step": 845
+    },
+    {
+      "epoch": 0.061041163101121974,
+      "grad_norm": 0.07481729239225388,
+      "learning_rate": 0.00019757252128734307,
+      "loss": 0.154,
+      "step": 846
+    },
+    {
+      "epoch": 0.061113315776182404,
+      "grad_norm": 0.08164883404970169,
+      "learning_rate": 0.00019756963486794633,
+      "loss": 0.1804,
+      "step": 847
+    },
+    {
+      "epoch": 0.06118546845124283,
+      "grad_norm": 0.09927359223365784,
+      "learning_rate": 0.0001975667484485496,
+      "loss": 0.2308,
+      "step": 848
+    },
+    {
+      "epoch": 0.06125762112630326,
+      "grad_norm": 0.14488515257835388,
+      "learning_rate": 0.00019756386202915286,
+      "loss": 0.228,
+      "step": 849
+    },
+    {
+      "epoch": 0.06132977380136369,
+      "grad_norm": 0.10332830250263214,
+      "learning_rate": 0.0001975609756097561,
+      "loss": 0.1946,
+      "step": 850
+    },
+    {
+      "epoch": 0.06140192647642411,
+      "grad_norm": 0.1296503245830536,
+      "learning_rate": 0.00019755808919035936,
+      "loss": 0.1589,
+      "step": 851
+    },
+    {
+      "epoch": 0.06147407915148454,
+      "grad_norm": 0.12913277745246887,
+      "learning_rate": 0.00019755520277096262,
+      "loss": 0.2017,
+      "step": 852
+    },
+    {
+      "epoch": 0.06154623182654497,
+      "grad_norm": 0.09249990433454514,
+      "learning_rate": 0.0001975523163515659,
+      "loss": 0.1749,
+      "step": 853
+    },
+    {
+      "epoch": 0.061618384501605394,
+      "grad_norm": 0.12817887961864471,
+      "learning_rate": 0.00019754942993216917,
+      "loss": 0.1677,
+      "step": 854
+    },
+    {
+      "epoch": 0.061690537176665824,
+      "grad_norm": 0.09516768902540207,
+      "learning_rate": 0.0001975465435127724,
+      "loss": 0.1482,
+      "step": 855
+    },
+    {
+      "epoch": 0.061762689851726255,
+      "grad_norm": 0.1048058345913887,
+      "learning_rate": 0.00019754365709337567,
+      "loss": 0.2181,
+      "step": 856
+    },
+    {
+      "epoch": 0.06183484252678668,
+      "grad_norm": 0.11138293892145157,
+      "learning_rate": 0.00019754077067397893,
+      "loss": 0.2056,
+      "step": 857
+    },
+    {
+      "epoch": 0.06190699520184711,
+      "grad_norm": 0.09336519241333008,
+      "learning_rate": 0.0001975378842545822,
+      "loss": 0.2204,
+      "step": 858
+    },
+    {
+      "epoch": 0.06197914787690754,
+      "grad_norm": 0.07680778205394745,
+      "learning_rate": 0.00019753499783518546,
+      "loss": 0.152,
+      "step": 859
+    },
+    {
+      "epoch": 0.06205130055196796,
+      "grad_norm": 0.11393015831708908,
+      "learning_rate": 0.00019753211141578872,
+      "loss": 0.1537,
+      "step": 860
+    },
+    {
+      "epoch": 0.06212345322702839,
+      "grad_norm": 0.09241417795419693,
+      "learning_rate": 0.00019752922499639199,
+      "loss": 0.2049,
+      "step": 861
+    },
+    {
+      "epoch": 0.06219560590208882,
+      "grad_norm": 0.08792451024055481,
+      "learning_rate": 0.00019752633857699525,
+      "loss": 0.179,
+      "step": 862
+    },
+    {
+      "epoch": 0.062267758577149245,
+      "grad_norm": 0.0883568525314331,
+      "learning_rate": 0.0001975234521575985,
+      "loss": 0.1811,
+      "step": 863
+    },
+    {
+      "epoch": 0.062339911252209675,
+      "grad_norm": 0.11310116201639175,
+      "learning_rate": 0.00019752056573820178,
+      "loss": 0.1885,
+      "step": 864
+    },
+    {
+      "epoch": 0.062412063927270105,
+      "grad_norm": 0.07407009601593018,
+      "learning_rate": 0.00019751767931880504,
+      "loss": 0.1885,
+      "step": 865
+    },
+    {
+      "epoch": 0.062484216602330535,
+      "grad_norm": 0.1365663707256317,
+      "learning_rate": 0.00019751479289940827,
+      "loss": 0.1182,
+      "step": 866
+    },
+    {
+      "epoch": 0.06255636927739096,
+      "grad_norm": 0.07840821146965027,
+      "learning_rate": 0.00019751190648001156,
+      "loss": 0.1688,
+      "step": 867
+    },
+    {
+      "epoch": 0.06262852195245139,
+      "grad_norm": 0.10654614120721817,
+      "learning_rate": 0.00019750902006061483,
+      "loss": 0.1288,
+      "step": 868
+    },
+    {
+      "epoch": 0.06270067462751182,
+      "grad_norm": 0.10260630398988724,
+      "learning_rate": 0.0001975061336412181,
+      "loss": 0.2249,
+      "step": 869
+    },
+    {
+      "epoch": 0.06277282730257225,
+      "grad_norm": 0.11337298899888992,
+      "learning_rate": 0.00019750324722182135,
+      "loss": 0.2393,
+      "step": 870
+    },
+    {
+      "epoch": 0.06284497997763266,
+      "grad_norm": 0.09580923616886139,
+      "learning_rate": 0.0001975003608024246,
+      "loss": 0.2109,
+      "step": 871
+    },
+    {
+      "epoch": 0.0629171326526931,
+      "grad_norm": 0.09207534044981003,
+      "learning_rate": 0.00019749747438302785,
+      "loss": 0.2034,
+      "step": 872
+    },
+    {
+      "epoch": 0.06298928532775352,
+      "grad_norm": 0.1136569008231163,
+      "learning_rate": 0.00019749458796363111,
+      "loss": 0.1593,
+      "step": 873
+    },
+    {
+      "epoch": 0.06306143800281395,
+      "grad_norm": 0.10841278731822968,
+      "learning_rate": 0.0001974917015442344,
+      "loss": 0.1655,
+      "step": 874
+    },
+    {
+      "epoch": 0.06313359067787438,
+      "grad_norm": 0.1440107375383377,
+      "learning_rate": 0.00019748881512483767,
+      "loss": 0.1858,
+      "step": 875
+    },
+    {
+      "epoch": 0.06320574335293482,
+      "grad_norm": 0.10138542205095291,
+      "learning_rate": 0.0001974859287054409,
+      "loss": 0.2196,
+      "step": 876
+    },
+    {
+      "epoch": 0.06327789602799523,
+      "grad_norm": 0.10658544301986694,
+      "learning_rate": 0.00019748304228604417,
+      "loss": 0.2105,
+      "step": 877
+    },
+    {
+      "epoch": 0.06335004870305566,
+      "grad_norm": 0.09705675393342972,
+      "learning_rate": 0.00019748015586664743,
+      "loss": 0.1708,
+      "step": 878
+    },
+    {
+      "epoch": 0.06342220137811609,
+      "grad_norm": 0.11171989887952805,
+      "learning_rate": 0.0001974772694472507,
+      "loss": 0.1685,
+      "step": 879
+    },
+    {
+      "epoch": 0.06349435405317652,
+      "grad_norm": 0.07426609098911285,
+      "learning_rate": 0.00019747438302785395,
+      "loss": 0.1925,
+      "step": 880
+    },
+    {
+      "epoch": 0.06356650672823695,
+      "grad_norm": 0.1063368171453476,
+      "learning_rate": 0.00019747149660845722,
+      "loss": 0.1822,
+      "step": 881
+    },
+    {
+      "epoch": 0.06363865940329738,
+      "grad_norm": 0.12676234543323517,
+      "learning_rate": 0.00019746861018906048,
+      "loss": 0.1912,
+      "step": 882
+    },
+    {
+      "epoch": 0.06371081207835781,
+      "grad_norm": 0.10321086645126343,
+      "learning_rate": 0.00019746572376966374,
+      "loss": 0.164,
+      "step": 883
+    },
+    {
+      "epoch": 0.06378296475341823,
+      "grad_norm": 0.0895160511136055,
+      "learning_rate": 0.000197462837350267,
+      "loss": 0.1928,
+      "step": 884
+    },
+    {
+      "epoch": 0.06385511742847866,
+      "grad_norm": 0.09870103001594543,
+      "learning_rate": 0.00019745995093087027,
+      "loss": 0.2003,
+      "step": 885
+    },
+    {
+      "epoch": 0.06392727010353909,
+      "grad_norm": 0.0940144807100296,
+      "learning_rate": 0.00019745706451147353,
+      "loss": 0.1727,
+      "step": 886
+    },
+    {
+      "epoch": 0.06399942277859952,
+      "grad_norm": 0.11259470134973526,
+      "learning_rate": 0.00019745417809207677,
+      "loss": 0.2041,
+      "step": 887
+    },
+    {
+      "epoch": 0.06407157545365995,
+      "grad_norm": 0.08136259019374847,
+      "learning_rate": 0.00019745129167268003,
+      "loss": 0.1506,
+      "step": 888
+    },
+    {
+      "epoch": 0.06414372812872038,
+      "grad_norm": 0.11417430639266968,
+      "learning_rate": 0.00019744840525328332,
+      "loss": 0.1939,
+      "step": 889
+    },
+    {
+      "epoch": 0.0642158808037808,
+      "grad_norm": 0.11523161828517914,
+      "learning_rate": 0.00019744551883388658,
+      "loss": 0.1678,
+      "step": 890
+    },
+    {
+      "epoch": 0.06428803347884122,
+      "grad_norm": 0.1095183789730072,
+      "learning_rate": 0.00019744263241448985,
+      "loss": 0.1182,
+      "step": 891
+    },
+    {
+      "epoch": 0.06436018615390166,
+      "grad_norm": 0.08293008804321289,
+      "learning_rate": 0.00019743974599509308,
+      "loss": 0.1762,
+      "step": 892
+    },
+    {
+      "epoch": 0.06443233882896209,
+      "grad_norm": 0.08579592406749725,
+      "learning_rate": 0.00019743685957569635,
+      "loss": 0.1645,
+      "step": 893
+    },
+    {
+      "epoch": 0.06450449150402252,
+      "grad_norm": 0.09980572015047073,
+      "learning_rate": 0.0001974339731562996,
+      "loss": 0.1829,
+      "step": 894
+    },
+    {
+      "epoch": 0.06457664417908295,
+      "grad_norm": 0.10224307328462601,
+      "learning_rate": 0.00019743108673690287,
+      "loss": 0.1701,
+      "step": 895
+    },
+    {
+      "epoch": 0.06464879685414336,
+      "grad_norm": 0.0910172089934349,
+      "learning_rate": 0.00019742820031750616,
+      "loss": 0.153,
+      "step": 896
+    },
+    {
+      "epoch": 0.06472094952920379,
+      "grad_norm": 0.09426335990428925,
+      "learning_rate": 0.0001974253138981094,
+      "loss": 0.1888,
+      "step": 897
+    },
+    {
+      "epoch": 0.06479310220426422,
+      "grad_norm": 0.11477576196193695,
+      "learning_rate": 0.00019742242747871266,
+      "loss": 0.1528,
+      "step": 898
+    },
+    {
+      "epoch": 0.06486525487932465,
+      "grad_norm": 0.09423889219760895,
+      "learning_rate": 0.00019741954105931592,
+      "loss": 0.1631,
+      "step": 899
+    },
+    {
+      "epoch": 0.06493740755438508,
+      "grad_norm": 0.08917507529258728,
+      "learning_rate": 0.00019741665463991919,
+      "loss": 0.1684,
+      "step": 900
+    },
+    {
+      "epoch": 0.06500956022944551,
+      "grad_norm": 0.10076791793107986,
+      "learning_rate": 0.00019741376822052245,
+      "loss": 0.1609,
+      "step": 901
+    },
+    {
+      "epoch": 0.06508171290450593,
+      "grad_norm": 0.1011313796043396,
+      "learning_rate": 0.0001974108818011257,
+      "loss": 0.1489,
+      "step": 902
+    },
+    {
+      "epoch": 0.06515386557956636,
+      "grad_norm": 0.09580199420452118,
+      "learning_rate": 0.00019740799538172897,
+      "loss": 0.1983,
+      "step": 903
+    },
+    {
+      "epoch": 0.06522601825462679,
+      "grad_norm": 0.1301235854625702,
+      "learning_rate": 0.00019740510896233224,
+      "loss": 0.2307,
+      "step": 904
+    },
+    {
+      "epoch": 0.06529817092968722,
+      "grad_norm": 0.11178337782621384,
+      "learning_rate": 0.0001974022225429355,
+      "loss": 0.1782,
+      "step": 905
+    },
+    {
+      "epoch": 0.06537032360474765,
+      "grad_norm": 0.108871228992939,
+      "learning_rate": 0.00019739933612353876,
+      "loss": 0.1613,
+      "step": 906
+    },
+    {
+      "epoch": 0.06544247627980808,
+      "grad_norm": 0.08900240808725357,
+      "learning_rate": 0.00019739644970414203,
+      "loss": 0.1448,
+      "step": 907
+    },
+    {
+      "epoch": 0.0655146289548685,
+      "grad_norm": 0.08600308746099472,
+      "learning_rate": 0.00019739356328474526,
+      "loss": 0.2338,
+      "step": 908
+    },
+    {
+      "epoch": 0.06558678162992893,
+      "grad_norm": 0.10805013030767441,
+      "learning_rate": 0.00019739067686534853,
+      "loss": 0.195,
+      "step": 909
+    },
+    {
+      "epoch": 0.06565893430498936,
+      "grad_norm": 0.13240516185760498,
+      "learning_rate": 0.00019738779044595182,
+      "loss": 0.2384,
+      "step": 910
+    },
+    {
+      "epoch": 0.06573108698004979,
+      "grad_norm": 0.09833045303821564,
+      "learning_rate": 0.00019738490402655508,
+      "loss": 0.1868,
+      "step": 911
+    },
+    {
+      "epoch": 0.06580323965511022,
+      "grad_norm": 0.09301164746284485,
+      "learning_rate": 0.00019738201760715834,
+      "loss": 0.1649,
+      "step": 912
+    },
+    {
+      "epoch": 0.06587539233017065,
+      "grad_norm": 0.10523568093776703,
+      "learning_rate": 0.00019737913118776158,
+      "loss": 0.2057,
+      "step": 913
+    },
+    {
+      "epoch": 0.06594754500523108,
+      "grad_norm": 0.1101023256778717,
+      "learning_rate": 0.00019737624476836484,
+      "loss": 0.1605,
+      "step": 914
+    },
+    {
+      "epoch": 0.06601969768029149,
+      "grad_norm": 0.1447688192129135,
+      "learning_rate": 0.0001973733583489681,
+      "loss": 0.1954,
+      "step": 915
+    },
+    {
+      "epoch": 0.06609185035535192,
+      "grad_norm": 0.1215805783867836,
+      "learning_rate": 0.00019737047192957137,
+      "loss": 0.1871,
+      "step": 916
+    },
+    {
+      "epoch": 0.06616400303041235,
+      "grad_norm": 0.1299552321434021,
+      "learning_rate": 0.00019736758551017466,
+      "loss": 0.182,
+      "step": 917
+    },
+    {
+      "epoch": 0.06623615570547278,
+      "grad_norm": 0.10295114666223526,
+      "learning_rate": 0.0001973646990907779,
+      "loss": 0.221,
+      "step": 918
+    },
+    {
+      "epoch": 0.06630830838053321,
+      "grad_norm": 0.10897116363048553,
+      "learning_rate": 0.00019736181267138115,
+      "loss": 0.1452,
+      "step": 919
+    },
+    {
+      "epoch": 0.06638046105559364,
+      "grad_norm": 0.09574306756258011,
+      "learning_rate": 0.00019735892625198442,
+      "loss": 0.1695,
+      "step": 920
+    },
+    {
+      "epoch": 0.06645261373065406,
+      "grad_norm": 0.12180160731077194,
+      "learning_rate": 0.00019735603983258768,
+      "loss": 0.1928,
+      "step": 921
+    },
+    {
+      "epoch": 0.06652476640571449,
+      "grad_norm": 0.09444516152143478,
+      "learning_rate": 0.00019735315341319094,
+      "loss": 0.1188,
+      "step": 922
+    },
+    {
+      "epoch": 0.06659691908077492,
+      "grad_norm": 0.11703677475452423,
+      "learning_rate": 0.0001973502669937942,
+      "loss": 0.1623,
+      "step": 923
+    },
+    {
+      "epoch": 0.06666907175583535,
+      "grad_norm": 0.08961249142885208,
+      "learning_rate": 0.00019734738057439747,
+      "loss": 0.21,
+      "step": 924
+    },
+    {
+      "epoch": 0.06674122443089578,
+      "grad_norm": 0.11890044808387756,
+      "learning_rate": 0.00019734449415500073,
+      "loss": 0.1562,
+      "step": 925
+    },
+    {
+      "epoch": 0.06681337710595621,
+      "grad_norm": 0.11046113073825836,
+      "learning_rate": 0.000197341607735604,
+      "loss": 0.1453,
+      "step": 926
+    },
+    {
+      "epoch": 0.06688552978101663,
+      "grad_norm": 0.0871260017156601,
+      "learning_rate": 0.00019733872131620726,
+      "loss": 0.1935,
+      "step": 927
+    },
+    {
+      "epoch": 0.06695768245607706,
+      "grad_norm": 0.08003012835979462,
+      "learning_rate": 0.00019733583489681052,
+      "loss": 0.1737,
+      "step": 928
+    },
+    {
+      "epoch": 0.06702983513113749,
+      "grad_norm": 0.10298404097557068,
+      "learning_rate": 0.00019733294847741378,
+      "loss": 0.1584,
+      "step": 929
+    },
+    {
+      "epoch": 0.06710198780619792,
+      "grad_norm": 0.12050239741802216,
+      "learning_rate": 0.00019733006205801702,
+      "loss": 0.2297,
+      "step": 930
+    },
+    {
+      "epoch": 0.06717414048125835,
+      "grad_norm": 0.09646282345056534,
+      "learning_rate": 0.0001973271756386203,
+      "loss": 0.1768,
+      "step": 931
+    },
+    {
+      "epoch": 0.06724629315631878,
+      "grad_norm": 0.09590929746627808,
+      "learning_rate": 0.00019732428921922357,
+      "loss": 0.1385,
+      "step": 932
+    },
+    {
+      "epoch": 0.06731844583137919,
+      "grad_norm": 0.10071932524442673,
+      "learning_rate": 0.00019732140279982684,
+      "loss": 0.2076,
+      "step": 933
+    },
+    {
+      "epoch": 0.06739059850643962,
+      "grad_norm": 0.10966494679450989,
+      "learning_rate": 0.0001973185163804301,
+      "loss": 0.2114,
+      "step": 934
+    },
+    {
+      "epoch": 0.06746275118150005,
+      "grad_norm": 0.08536006510257721,
+      "learning_rate": 0.00019731562996103333,
+      "loss": 0.1917,
+      "step": 935
+    },
+    {
+      "epoch": 0.06753490385656048,
+      "grad_norm": 0.089837945997715,
+      "learning_rate": 0.0001973127435416366,
+      "loss": 0.1454,
+      "step": 936
+    },
+    {
+      "epoch": 0.06760705653162091,
+      "grad_norm": 0.08100643008947372,
+      "learning_rate": 0.00019730985712223986,
+      "loss": 0.1833,
+      "step": 937
+    },
+    {
+      "epoch": 0.06767920920668134,
+      "grad_norm": 0.09855519980192184,
+      "learning_rate": 0.00019730697070284315,
+      "loss": 0.2072,
+      "step": 938
+    },
+    {
+      "epoch": 0.06775136188174176,
+      "grad_norm": 0.10398002713918686,
+      "learning_rate": 0.0001973040842834464,
+      "loss": 0.1914,
+      "step": 939
+    },
+    {
+      "epoch": 0.06782351455680219,
+      "grad_norm": 0.11511258035898209,
+      "learning_rate": 0.00019730119786404965,
+      "loss": 0.1788,
+      "step": 940
+    },
+    {
+      "epoch": 0.06789566723186262,
+      "grad_norm": 0.08235523104667664,
+      "learning_rate": 0.0001972983114446529,
+      "loss": 0.2027,
+      "step": 941
+    },
+    {
+      "epoch": 0.06796781990692305,
+      "grad_norm": 0.07562468945980072,
+      "learning_rate": 0.00019729542502525617,
+      "loss": 0.1686,
+      "step": 942
+    },
+    {
+      "epoch": 0.06803997258198348,
+      "grad_norm": 0.07198230177164078,
+      "learning_rate": 0.00019729253860585944,
+      "loss": 0.2053,
+      "step": 943
+    },
+    {
+      "epoch": 0.06811212525704391,
+      "grad_norm": 0.07216834276914597,
+      "learning_rate": 0.0001972896521864627,
+      "loss": 0.1892,
+      "step": 944
+    },
+    {
+      "epoch": 0.06818427793210434,
+      "grad_norm": 0.06897939741611481,
+      "learning_rate": 0.00019728676576706596,
+      "loss": 0.1581,
+      "step": 945
+    },
+    {
+      "epoch": 0.06825643060716476,
+      "grad_norm": 0.0761767327785492,
+      "learning_rate": 0.00019728387934766923,
+      "loss": 0.1801,
+      "step": 946
+    },
+    {
+      "epoch": 0.06832858328222519,
+      "grad_norm": 0.09585803747177124,
+      "learning_rate": 0.0001972809929282725,
+      "loss": 0.2046,
+      "step": 947
+    },
+    {
+      "epoch": 0.06840073595728562,
+      "grad_norm": 0.07439012825489044,
+      "learning_rate": 0.00019727810650887575,
+      "loss": 0.1596,
+      "step": 948
+    },
+    {
+      "epoch": 0.06847288863234605,
+      "grad_norm": 0.11946660280227661,
+      "learning_rate": 0.00019727522008947901,
+      "loss": 0.1758,
+      "step": 949
+    },
+    {
+      "epoch": 0.06854504130740648,
+      "grad_norm": 0.096490778028965,
+      "learning_rate": 0.00019727233367008228,
+      "loss": 0.1824,
+      "step": 950
+    },
+    {
+      "epoch": 0.0686171939824669,
+      "grad_norm": 0.12073836475610733,
+      "learning_rate": 0.00019726944725068551,
+      "loss": 0.1525,
+      "step": 951
+    },
+    {
+      "epoch": 0.06868934665752732,
+      "grad_norm": 0.137812539935112,
+      "learning_rate": 0.0001972665608312888,
+      "loss": 0.1564,
+      "step": 952
+    },
+    {
+      "epoch": 0.06876149933258775,
+      "grad_norm": 0.1103888601064682,
+      "learning_rate": 0.00019726367441189207,
+      "loss": 0.1475,
+      "step": 953
+    },
+    {
+      "epoch": 0.06883365200764818,
+      "grad_norm": 0.08909444510936737,
+      "learning_rate": 0.00019726078799249533,
+      "loss": 0.1765,
+      "step": 954
+    },
+    {
+      "epoch": 0.06890580468270861,
+      "grad_norm": 0.09811177104711533,
+      "learning_rate": 0.0001972579015730986,
+      "loss": 0.1684,
+      "step": 955
+    },
+    {
+      "epoch": 0.06897795735776904,
+      "grad_norm": 0.0942537784576416,
+      "learning_rate": 0.00019725501515370183,
+      "loss": 0.1621,
+      "step": 956
+    },
+    {
+      "epoch": 0.06905011003282947,
+      "grad_norm": 0.11584122478961945,
+      "learning_rate": 0.0001972521287343051,
+      "loss": 0.2448,
+      "step": 957
+    },
+    {
+      "epoch": 0.06912226270788989,
+      "grad_norm": 0.08744748681783676,
+      "learning_rate": 0.00019724924231490835,
+      "loss": 0.2051,
+      "step": 958
+    },
+    {
+      "epoch": 0.06919441538295032,
+      "grad_norm": 0.12525174021720886,
+      "learning_rate": 0.00019724635589551164,
+      "loss": 0.1545,
+      "step": 959
+    },
+    {
+      "epoch": 0.06926656805801075,
+      "grad_norm": 0.09919869899749756,
+      "learning_rate": 0.0001972434694761149,
+      "loss": 0.1952,
+      "step": 960
+    },
+    {
+      "epoch": 0.06933872073307118,
+      "grad_norm": 0.10349156707525253,
+      "learning_rate": 0.00019724058305671814,
+      "loss": 0.1692,
+      "step": 961
+    },
+    {
+      "epoch": 0.06941087340813161,
+      "grad_norm": 0.11499597877264023,
+      "learning_rate": 0.0001972376966373214,
+      "loss": 0.2181,
+      "step": 962
+    },
+    {
+      "epoch": 0.06948302608319204,
+      "grad_norm": 0.08199431002140045,
+      "learning_rate": 0.00019723481021792467,
+      "loss": 0.1664,
+      "step": 963
+    },
+    {
+      "epoch": 0.06955517875825246,
+      "grad_norm": 0.09471738338470459,
+      "learning_rate": 0.00019723192379852793,
+      "loss": 0.1669,
+      "step": 964
+    },
+    {
+      "epoch": 0.06962733143331289,
+      "grad_norm": 0.1080731600522995,
+      "learning_rate": 0.0001972290373791312,
+      "loss": 0.2029,
+      "step": 965
+    },
+    {
+      "epoch": 0.06969948410837332,
+      "grad_norm": 0.09761572629213333,
+      "learning_rate": 0.00019722615095973446,
+      "loss": 0.2349,
+      "step": 966
+    },
+    {
+      "epoch": 0.06977163678343375,
+      "grad_norm": 0.08952382206916809,
+      "learning_rate": 0.00019722326454033772,
+      "loss": 0.1348,
+      "step": 967
+    },
+    {
+      "epoch": 0.06984378945849418,
+      "grad_norm": 0.08425026386976242,
+      "learning_rate": 0.00019722037812094098,
+      "loss": 0.212,
+      "step": 968
+    },
+    {
+      "epoch": 0.0699159421335546,
+      "grad_norm": 0.12387498468160629,
+      "learning_rate": 0.00019721749170154425,
+      "loss": 0.1754,
+      "step": 969
+    },
+    {
+      "epoch": 0.06998809480861502,
+      "grad_norm": 0.12308784574270248,
+      "learning_rate": 0.0001972146052821475,
+      "loss": 0.183,
+      "step": 970
+    },
+    {
+      "epoch": 0.07006024748367545,
+      "grad_norm": 0.08441948890686035,
+      "learning_rate": 0.00019721171886275077,
+      "loss": 0.1386,
+      "step": 971
+    },
+    {
+      "epoch": 0.07013240015873588,
+      "grad_norm": 0.09757070243358612,
+      "learning_rate": 0.000197208832443354,
+      "loss": 0.177,
+      "step": 972
+    },
+    {
+      "epoch": 0.07020455283379631,
+      "grad_norm": 0.09631665050983429,
+      "learning_rate": 0.0001972059460239573,
+      "loss": 0.1936,
+      "step": 973
+    },
+    {
+      "epoch": 0.07027670550885674,
+      "grad_norm": 0.09656485170125961,
+      "learning_rate": 0.00019720305960456056,
+      "loss": 0.1684,
+      "step": 974
+    },
+    {
+      "epoch": 0.07034885818391717,
+      "grad_norm": 0.14445240795612335,
+      "learning_rate": 0.00019720017318516382,
+      "loss": 0.1749,
+      "step": 975
+    },
+    {
+      "epoch": 0.0704210108589776,
+      "grad_norm": 0.11951139569282532,
+      "learning_rate": 0.0001971972867657671,
+      "loss": 0.2132,
+      "step": 976
+    },
+    {
+      "epoch": 0.07049316353403802,
+      "grad_norm": 0.11091769486665726,
+      "learning_rate": 0.00019719440034637032,
+      "loss": 0.194,
+      "step": 977
+    },
+    {
+      "epoch": 0.07056531620909845,
+      "grad_norm": 0.18498805165290833,
+      "learning_rate": 0.00019719151392697359,
+      "loss": 0.1952,
+      "step": 978
+    },
+    {
+      "epoch": 0.07063746888415888,
+      "grad_norm": 0.1627284437417984,
+      "learning_rate": 0.00019718862750757685,
+      "loss": 0.211,
+      "step": 979
+    },
+    {
+      "epoch": 0.07070962155921931,
+      "grad_norm": 0.08590367436408997,
+      "learning_rate": 0.00019718574108818014,
+      "loss": 0.2123,
+      "step": 980
+    },
+    {
+      "epoch": 0.07078177423427974,
+      "grad_norm": 0.11220984905958176,
+      "learning_rate": 0.0001971828546687834,
+      "loss": 0.1771,
+      "step": 981
+    },
+    {
+      "epoch": 0.07085392690934017,
+      "grad_norm": 0.12736214697360992,
+      "learning_rate": 0.00019717996824938664,
+      "loss": 0.1532,
+      "step": 982
+    },
+    {
+      "epoch": 0.07092607958440059,
+      "grad_norm": 0.10040099918842316,
+      "learning_rate": 0.0001971770818299899,
+      "loss": 0.2033,
+      "step": 983
+    },
+    {
+      "epoch": 0.07099823225946102,
+      "grad_norm": 0.07447941601276398,
+      "learning_rate": 0.00019717419541059316,
+      "loss": 0.132,
+      "step": 984
+    },
+    {
+      "epoch": 0.07107038493452145,
+      "grad_norm": 0.11526428908109665,
+      "learning_rate": 0.00019717130899119643,
+      "loss": 0.1395,
+      "step": 985
+    },
+    {
+      "epoch": 0.07114253760958188,
+      "grad_norm": 0.12547869980335236,
+      "learning_rate": 0.0001971684225717997,
+      "loss": 0.1404,
+      "step": 986
+    },
+    {
+      "epoch": 0.0712146902846423,
+      "grad_norm": 0.17025943100452423,
+      "learning_rate": 0.00019716553615240295,
+      "loss": 0.1794,
+      "step": 987
+    },
+    {
+      "epoch": 0.07128684295970274,
+      "grad_norm": 0.10432759672403336,
+      "learning_rate": 0.00019716264973300621,
+      "loss": 0.1592,
+      "step": 988
+    },
+    {
+      "epoch": 0.07135899563476315,
+      "grad_norm": 0.15890909731388092,
+      "learning_rate": 0.00019715976331360948,
+      "loss": 0.262,
+      "step": 989
+    },
+    {
+      "epoch": 0.07143114830982358,
+      "grad_norm": 0.13272574543952942,
+      "learning_rate": 0.00019715687689421274,
+      "loss": 0.1584,
+      "step": 990
+    },
+    {
+      "epoch": 0.07150330098488401,
+      "grad_norm": 0.08162888139486313,
+      "learning_rate": 0.000197153990474816,
+      "loss": 0.1998,
+      "step": 991
+    },
+    {
+      "epoch": 0.07157545365994444,
+      "grad_norm": 0.09594608843326569,
+      "learning_rate": 0.00019715110405541927,
+      "loss": 0.1447,
+      "step": 992
+    },
+    {
+      "epoch": 0.07164760633500487,
+      "grad_norm": 0.11136913299560547,
+      "learning_rate": 0.0001971482176360225,
+      "loss": 0.192,
+      "step": 993
+    },
+    {
+      "epoch": 0.0717197590100653,
+      "grad_norm": 0.11597933620214462,
+      "learning_rate": 0.0001971453312166258,
+      "loss": 0.188,
+      "step": 994
+    },
+    {
+      "epoch": 0.07179191168512572,
+      "grad_norm": 0.10965341329574585,
+      "learning_rate": 0.00019714244479722906,
+      "loss": 0.1712,
+      "step": 995
+    },
+    {
+      "epoch": 0.07186406436018615,
+      "grad_norm": 0.09491154551506042,
+      "learning_rate": 0.00019713955837783232,
+      "loss": 0.1851,
+      "step": 996
+    },
+    {
+      "epoch": 0.07193621703524658,
+      "grad_norm": 0.10133666545152664,
+      "learning_rate": 0.00019713667195843558,
+      "loss": 0.194,
+      "step": 997
+    },
+    {
+      "epoch": 0.07200836971030701,
+      "grad_norm": 0.09461408853530884,
+      "learning_rate": 0.00019713378553903882,
+      "loss": 0.1784,
+      "step": 998
+    },
+    {
+      "epoch": 0.07208052238536744,
+      "grad_norm": 0.08958426862955093,
+      "learning_rate": 0.00019713089911964208,
+      "loss": 0.2179,
+      "step": 999
+    },
+    {
+      "epoch": 0.07215267506042787,
+      "grad_norm": 0.08880864828824997,
+      "learning_rate": 0.00019712801270024534,
+      "loss": 0.1661,
+      "step": 1000
+    },
+    {
+      "epoch": 0.07222482773548829,
+      "grad_norm": 0.11652393639087677,
+      "learning_rate": 0.00019712512628084863,
+      "loss": 0.1578,
+      "step": 1001
+    },
+    {
+      "epoch": 0.07229698041054872,
+      "grad_norm": 0.17131830751895905,
+      "learning_rate": 0.0001971222398614519,
+      "loss": 0.2147,
+      "step": 1002
+    },
+    {
+      "epoch": 0.07236913308560915,
+      "grad_norm": 0.10526955872774124,
+      "learning_rate": 0.00019711935344205513,
+      "loss": 0.2008,
+      "step": 1003
+    },
+    {
+      "epoch": 0.07244128576066958,
+      "grad_norm": 0.12301938980817795,
+      "learning_rate": 0.0001971164670226584,
+      "loss": 0.1413,
+      "step": 1004
+    },
+    {
+      "epoch": 0.07251343843573001,
+      "grad_norm": 0.09197913110256195,
+      "learning_rate": 0.00019711358060326166,
+      "loss": 0.1553,
+      "step": 1005
+    },
+    {
+      "epoch": 0.07258559111079044,
+      "grad_norm": 0.11922527104616165,
+      "learning_rate": 0.00019711069418386492,
+      "loss": 0.2427,
+      "step": 1006
+    },
+    {
+      "epoch": 0.07265774378585087,
+      "grad_norm": 0.10435183346271515,
+      "learning_rate": 0.00019710780776446818,
+      "loss": 0.1628,
+      "step": 1007
+    },
+    {
+      "epoch": 0.07272989646091128,
+      "grad_norm": 0.09419992566108704,
+      "learning_rate": 0.00019710492134507145,
+      "loss": 0.1985,
+      "step": 1008
+    },
+    {
+      "epoch": 0.07280204913597171,
+      "grad_norm": 0.11829221993684769,
+      "learning_rate": 0.0001971020349256747,
+      "loss": 0.1782,
+      "step": 1009
+    },
+    {
+      "epoch": 0.07287420181103214,
+      "grad_norm": 0.09988191723823547,
+      "learning_rate": 0.00019709914850627797,
+      "loss": 0.1895,
+      "step": 1010
+    },
+    {
+      "epoch": 0.07294635448609257,
+      "grad_norm": 0.1229916587471962,
+      "learning_rate": 0.00019709626208688123,
+      "loss": 0.1646,
+      "step": 1011
+    },
+    {
+      "epoch": 0.073018507161153,
+      "grad_norm": 0.08967486023902893,
+      "learning_rate": 0.0001970933756674845,
+      "loss": 0.1948,
+      "step": 1012
+    },
+    {
+      "epoch": 0.07309065983621343,
+      "grad_norm": 0.10921390354633331,
+      "learning_rate": 0.00019709048924808776,
+      "loss": 0.1574,
+      "step": 1013
+    },
+    {
+      "epoch": 0.07316281251127385,
+      "grad_norm": 0.13804689049720764,
+      "learning_rate": 0.000197087602828691,
+      "loss": 0.1984,
+      "step": 1014
+    },
+    {
+      "epoch": 0.07323496518633428,
+      "grad_norm": 0.11681187152862549,
+      "learning_rate": 0.0001970847164092943,
+      "loss": 0.1783,
+      "step": 1015
+    },
+    {
+      "epoch": 0.07330711786139471,
+      "grad_norm": 0.11250089108943939,
+      "learning_rate": 0.00019708182998989755,
+      "loss": 0.1954,
+      "step": 1016
+    },
+    {
+      "epoch": 0.07337927053645514,
+      "grad_norm": 0.10940463095903397,
+      "learning_rate": 0.0001970789435705008,
+      "loss": 0.1874,
+      "step": 1017
+    },
+    {
+      "epoch": 0.07345142321151557,
+      "grad_norm": 0.09099728614091873,
+      "learning_rate": 0.00019707605715110408,
+      "loss": 0.2076,
+      "step": 1018
+    },
+    {
+      "epoch": 0.073523575886576,
+      "grad_norm": 0.10192853212356567,
+      "learning_rate": 0.0001970731707317073,
+      "loss": 0.2001,
+      "step": 1019
+    },
+    {
+      "epoch": 0.07359572856163642,
+      "grad_norm": 0.09343042969703674,
+      "learning_rate": 0.00019707028431231057,
+      "loss": 0.2092,
+      "step": 1020
+    },
+    {
+      "epoch": 0.07366788123669685,
+      "grad_norm": 0.10208447277545929,
+      "learning_rate": 0.00019706739789291384,
+      "loss": 0.1896,
+      "step": 1021
+    },
+    {
+      "epoch": 0.07374003391175728,
+      "grad_norm": 0.09970434010028839,
+      "learning_rate": 0.00019706451147351713,
+      "loss": 0.1718,
+      "step": 1022
+    },
+    {
+      "epoch": 0.07381218658681771,
+      "grad_norm": 0.11564663052558899,
+      "learning_rate": 0.0001970616250541204,
+      "loss": 0.1859,
+      "step": 1023
+    },
+    {
+      "epoch": 0.07388433926187814,
+      "grad_norm": 0.11144092679023743,
+      "learning_rate": 0.00019705873863472363,
+      "loss": 0.1895,
+      "step": 1024
+    },
+    {
+      "epoch": 0.07395649193693857,
+      "grad_norm": 0.1214640662074089,
+      "learning_rate": 0.0001970558522153269,
+      "loss": 0.1183,
+      "step": 1025
+    },
+    {
+      "epoch": 0.07402864461199898,
+      "grad_norm": 0.13590200245380402,
+      "learning_rate": 0.00019705296579593015,
+      "loss": 0.2695,
+      "step": 1026
+    },
+    {
+      "epoch": 0.07410079728705941,
+      "grad_norm": 0.10062559694051743,
+      "learning_rate": 0.00019705007937653341,
+      "loss": 0.2017,
+      "step": 1027
+    },
+    {
+      "epoch": 0.07417294996211984,
+      "grad_norm": 0.09059272706508636,
+      "learning_rate": 0.00019704719295713668,
+      "loss": 0.1862,
+      "step": 1028
+    },
+    {
+      "epoch": 0.07424510263718027,
+      "grad_norm": 0.10175316780805588,
+      "learning_rate": 0.00019704430653773994,
+      "loss": 0.157,
+      "step": 1029
+    },
+    {
+      "epoch": 0.0743172553122407,
+      "grad_norm": 0.11331373453140259,
+      "learning_rate": 0.0001970414201183432,
+      "loss": 0.1846,
+      "step": 1030
+    },
+    {
+      "epoch": 0.07438940798730113,
+      "grad_norm": 0.07486823946237564,
+      "learning_rate": 0.00019703853369894647,
+      "loss": 0.1812,
+      "step": 1031
+    },
+    {
+      "epoch": 0.07446156066236155,
+      "grad_norm": 0.10093438625335693,
+      "learning_rate": 0.00019703564727954973,
+      "loss": 0.1548,
+      "step": 1032
+    },
+    {
+      "epoch": 0.07453371333742198,
+      "grad_norm": 0.10439279675483704,
+      "learning_rate": 0.000197032760860153,
+      "loss": 0.1327,
+      "step": 1033
+    },
+    {
+      "epoch": 0.07460586601248241,
+      "grad_norm": 0.08357524126768112,
+      "learning_rate": 0.00019702987444075625,
+      "loss": 0.1591,
+      "step": 1034
+    },
+    {
+      "epoch": 0.07467801868754284,
+      "grad_norm": 0.09035192430019379,
+      "learning_rate": 0.00019702698802135952,
+      "loss": 0.1327,
+      "step": 1035
+    },
+    {
+      "epoch": 0.07475017136260327,
+      "grad_norm": 0.08603281527757645,
+      "learning_rate": 0.00019702410160196278,
+      "loss": 0.2147,
+      "step": 1036
+    },
+    {
+      "epoch": 0.0748223240376637,
+      "grad_norm": 0.1261157989501953,
+      "learning_rate": 0.00019702121518256604,
+      "loss": 0.1838,
+      "step": 1037
+    },
+    {
+      "epoch": 0.07489447671272412,
+      "grad_norm": 0.11462076753377914,
+      "learning_rate": 0.0001970183287631693,
+      "loss": 0.1553,
+      "step": 1038
+    },
+    {
+      "epoch": 0.07496662938778455,
+      "grad_norm": 0.11107989400625229,
+      "learning_rate": 0.00019701544234377257,
+      "loss": 0.1559,
+      "step": 1039
+    },
+    {
+      "epoch": 0.07503878206284498,
+      "grad_norm": 0.12100755423307419,
+      "learning_rate": 0.00019701255592437583,
+      "loss": 0.1868,
+      "step": 1040
+    },
+    {
+      "epoch": 0.07511093473790541,
+      "grad_norm": 0.08341117948293686,
+      "learning_rate": 0.00019700966950497907,
+      "loss": 0.1399,
+      "step": 1041
+    },
+    {
+      "epoch": 0.07518308741296584,
+      "grad_norm": 0.09755191951990128,
+      "learning_rate": 0.00019700678308558233,
+      "loss": 0.1681,
+      "step": 1042
+    },
+    {
+      "epoch": 0.07525524008802627,
+      "grad_norm": 0.11107684671878815,
+      "learning_rate": 0.00019700389666618562,
+      "loss": 0.187,
+      "step": 1043
+    },
+    {
+      "epoch": 0.0753273927630867,
+      "grad_norm": 0.10081485658884048,
+      "learning_rate": 0.00019700101024678888,
+      "loss": 0.1734,
+      "step": 1044
+    },
+    {
+      "epoch": 0.07539954543814711,
+      "grad_norm": 0.09995072335004807,
+      "learning_rate": 0.00019699812382739215,
+      "loss": 0.1679,
+      "step": 1045
+    },
+    {
+      "epoch": 0.07547169811320754,
+      "grad_norm": 0.11626127362251282,
+      "learning_rate": 0.00019699523740799538,
+      "loss": 0.1877,
+      "step": 1046
+    },
+    {
+      "epoch": 0.07554385078826797,
+      "grad_norm": 0.13476620614528656,
+      "learning_rate": 0.00019699235098859865,
+      "loss": 0.1746,
+      "step": 1047
+    },
+    {
+      "epoch": 0.0756160034633284,
+      "grad_norm": 0.07211948931217194,
+      "learning_rate": 0.0001969894645692019,
+      "loss": 0.164,
+      "step": 1048
+    },
+    {
+      "epoch": 0.07568815613838883,
+      "grad_norm": 0.14787274599075317,
+      "learning_rate": 0.00019698657814980517,
+      "loss": 0.1627,
+      "step": 1049
+    },
+    {
+      "epoch": 0.07576030881344926,
+      "grad_norm": 0.13697263598442078,
+      "learning_rate": 0.00019698369173040846,
+      "loss": 0.2167,
+      "step": 1050
+    },
+    {
+      "epoch": 0.07583246148850968,
+      "grad_norm": 0.10592759400606155,
+      "learning_rate": 0.0001969808053110117,
+      "loss": 0.1767,
+      "step": 1051
+    },
+    {
+      "epoch": 0.07590461416357011,
+      "grad_norm": 0.11751779913902283,
+      "learning_rate": 0.00019697791889161496,
+      "loss": 0.1859,
+      "step": 1052
+    },
+    {
+      "epoch": 0.07597676683863054,
+      "grad_norm": 0.10343729704618454,
+      "learning_rate": 0.00019697503247221822,
+      "loss": 0.2388,
+      "step": 1053
+    },
+    {
+      "epoch": 0.07604891951369097,
+      "grad_norm": 0.09543080627918243,
+      "learning_rate": 0.00019697214605282149,
+      "loss": 0.1869,
+      "step": 1054
+    },
+    {
+      "epoch": 0.0761210721887514,
+      "grad_norm": 0.08784335851669312,
+      "learning_rate": 0.00019696925963342475,
+      "loss": 0.1665,
+      "step": 1055
+    },
+    {
+      "epoch": 0.07619322486381183,
+      "grad_norm": 0.08296474069356918,
+      "learning_rate": 0.000196966373214028,
+      "loss": 0.1704,
+      "step": 1056
+    },
+    {
+      "epoch": 0.07626537753887225,
+      "grad_norm": 0.11708515137434006,
+      "learning_rate": 0.00019696348679463127,
+      "loss": 0.1648,
+      "step": 1057
+    },
+    {
+      "epoch": 0.07633753021393268,
+      "grad_norm": 0.10343474894762039,
+      "learning_rate": 0.00019696060037523454,
+      "loss": 0.1674,
+      "step": 1058
+    },
+    {
+      "epoch": 0.07640968288899311,
+      "grad_norm": 0.11188158392906189,
+      "learning_rate": 0.0001969577139558378,
+      "loss": 0.1915,
+      "step": 1059
+    },
+    {
+      "epoch": 0.07648183556405354,
+      "grad_norm": 0.10264435410499573,
+      "learning_rate": 0.00019695482753644106,
+      "loss": 0.1711,
+      "step": 1060
+    },
+    {
+      "epoch": 0.07655398823911397,
+      "grad_norm": 0.11818800866603851,
+      "learning_rate": 0.00019695194111704433,
+      "loss": 0.1847,
+      "step": 1061
+    },
+    {
+      "epoch": 0.0766261409141744,
+      "grad_norm": 0.10329384356737137,
+      "learning_rate": 0.00019694905469764756,
+      "loss": 0.1858,
+      "step": 1062
+    },
+    {
+      "epoch": 0.07669829358923481,
+      "grad_norm": 0.10981201380491257,
+      "learning_rate": 0.00019694616827825083,
+      "loss": 0.2187,
+      "step": 1063
+    },
+    {
+      "epoch": 0.07677044626429524,
+      "grad_norm": 0.08695390820503235,
+      "learning_rate": 0.00019694328185885412,
+      "loss": 0.1824,
+      "step": 1064
+    },
+    {
+      "epoch": 0.07684259893935567,
+      "grad_norm": 0.09196928143501282,
+      "learning_rate": 0.00019694039543945738,
+      "loss": 0.1538,
+      "step": 1065
+    },
+    {
+      "epoch": 0.0769147516144161,
+      "grad_norm": 0.08134390413761139,
+      "learning_rate": 0.00019693750902006064,
+      "loss": 0.2178,
+      "step": 1066
+    },
+    {
+      "epoch": 0.07698690428947653,
+      "grad_norm": 0.08974107354879379,
+      "learning_rate": 0.00019693462260066388,
+      "loss": 0.176,
+      "step": 1067
+    },
+    {
+      "epoch": 0.07705905696453696,
+      "grad_norm": 0.09833265095949173,
+      "learning_rate": 0.00019693173618126714,
+      "loss": 0.1878,
+      "step": 1068
+    },
+    {
+      "epoch": 0.07713120963959738,
+      "grad_norm": 0.10632316023111343,
+      "learning_rate": 0.0001969288497618704,
+      "loss": 0.1922,
+      "step": 1069
+    },
+    {
+      "epoch": 0.07720336231465781,
+      "grad_norm": 0.1112053319811821,
+      "learning_rate": 0.00019692596334247367,
+      "loss": 0.1548,
+      "step": 1070
+    },
+    {
+      "epoch": 0.07727551498971824,
+      "grad_norm": 0.10864856839179993,
+      "learning_rate": 0.00019692307692307696,
+      "loss": 0.1791,
+      "step": 1071
+    },
+    {
+      "epoch": 0.07734766766477867,
+      "grad_norm": 0.07739702612161636,
+      "learning_rate": 0.0001969201905036802,
+      "loss": 0.1628,
+      "step": 1072
+    },
+    {
+      "epoch": 0.0774198203398391,
+      "grad_norm": 0.09403367340564728,
+      "learning_rate": 0.00019691730408428345,
+      "loss": 0.1485,
+      "step": 1073
+    },
+    {
+      "epoch": 0.07749197301489953,
+      "grad_norm": 0.10768328607082367,
+      "learning_rate": 0.00019691441766488672,
+      "loss": 0.156,
+      "step": 1074
+    },
+    {
+      "epoch": 0.07756412568995996,
+      "grad_norm": 0.10109016299247742,
+      "learning_rate": 0.00019691153124548998,
+      "loss": 0.1485,
+      "step": 1075
+    },
+    {
+      "epoch": 0.07763627836502038,
+      "grad_norm": 0.10066195577383041,
+      "learning_rate": 0.00019690864482609324,
+      "loss": 0.1837,
+      "step": 1076
+    },
+    {
+      "epoch": 0.07770843104008081,
+      "grad_norm": 0.11048019677400589,
+      "learning_rate": 0.0001969057584066965,
+      "loss": 0.1931,
+      "step": 1077
+    },
+    {
+      "epoch": 0.07778058371514124,
+      "grad_norm": 0.1042243242263794,
+      "learning_rate": 0.00019690287198729977,
+      "loss": 0.1405,
+      "step": 1078
+    },
+    {
+      "epoch": 0.07785273639020167,
+      "grad_norm": 0.08323122560977936,
+      "learning_rate": 0.00019689998556790303,
+      "loss": 0.1908,
+      "step": 1079
+    },
+    {
+      "epoch": 0.0779248890652621,
+      "grad_norm": 0.10694662481546402,
+      "learning_rate": 0.0001968970991485063,
+      "loss": 0.1343,
+      "step": 1080
+    },
+    {
+      "epoch": 0.07799704174032253,
+      "grad_norm": 0.12616930902004242,
+      "learning_rate": 0.00019689421272910956,
+      "loss": 0.2282,
+      "step": 1081
+    },
+    {
+      "epoch": 0.07806919441538294,
+      "grad_norm": 0.1291603147983551,
+      "learning_rate": 0.00019689132630971282,
+      "loss": 0.1912,
+      "step": 1082
+    },
+    {
+      "epoch": 0.07814134709044337,
+      "grad_norm": 0.11131034046411514,
+      "learning_rate": 0.00019688843989031606,
+      "loss": 0.1971,
+      "step": 1083
+    },
+    {
+      "epoch": 0.0782134997655038,
+      "grad_norm": 0.11821369081735611,
+      "learning_rate": 0.00019688555347091932,
+      "loss": 0.199,
+      "step": 1084
+    },
+    {
+      "epoch": 0.07828565244056424,
+      "grad_norm": 0.1535119265317917,
+      "learning_rate": 0.00019688266705152258,
+      "loss": 0.15,
+      "step": 1085
+    },
+    {
+      "epoch": 0.07835780511562467,
+      "grad_norm": 0.10242103785276413,
+      "learning_rate": 0.00019687978063212587,
+      "loss": 0.1796,
+      "step": 1086
+    },
+    {
+      "epoch": 0.0784299577906851,
+      "grad_norm": 0.0983833447098732,
+      "learning_rate": 0.00019687689421272914,
+      "loss": 0.1702,
+      "step": 1087
+    },
+    {
+      "epoch": 0.07850211046574551,
+      "grad_norm": 0.0991702526807785,
+      "learning_rate": 0.00019687400779333237,
+      "loss": 0.1483,
+      "step": 1088
+    },
+    {
+      "epoch": 0.07857426314080594,
+      "grad_norm": 0.10307537019252777,
+      "learning_rate": 0.00019687112137393563,
+      "loss": 0.1982,
+      "step": 1089
+    },
+    {
+      "epoch": 0.07864641581586637,
+      "grad_norm": 0.07653114944696426,
+      "learning_rate": 0.0001968682349545389,
+      "loss": 0.1948,
+      "step": 1090
+    },
+    {
+      "epoch": 0.0787185684909268,
+      "grad_norm": 0.09349947422742844,
+      "learning_rate": 0.00019686534853514216,
+      "loss": 0.1875,
+      "step": 1091
+    },
+    {
+      "epoch": 0.07879072116598723,
+      "grad_norm": 0.11042381823062897,
+      "learning_rate": 0.00019686246211574542,
+      "loss": 0.2184,
+      "step": 1092
+    },
+    {
+      "epoch": 0.07886287384104766,
+      "grad_norm": 0.09334763139486313,
+      "learning_rate": 0.00019685957569634869,
+      "loss": 0.1927,
+      "step": 1093
+    },
+    {
+      "epoch": 0.07893502651610808,
+      "grad_norm": 0.09462998807430267,
+      "learning_rate": 0.00019685668927695195,
+      "loss": 0.1587,
+      "step": 1094
+    },
+    {
+      "epoch": 0.07900717919116851,
+      "grad_norm": 0.08485822379589081,
+      "learning_rate": 0.0001968538028575552,
+      "loss": 0.1815,
+      "step": 1095
+    },
+    {
+      "epoch": 0.07907933186622894,
+      "grad_norm": 0.08465959876775742,
+      "learning_rate": 0.00019685091643815847,
+      "loss": 0.1805,
+      "step": 1096
+    },
+    {
+      "epoch": 0.07915148454128937,
+      "grad_norm": 0.08012067526578903,
+      "learning_rate": 0.00019684803001876174,
+      "loss": 0.1194,
+      "step": 1097
+    },
+    {
+      "epoch": 0.0792236372163498,
+      "grad_norm": 0.09432154148817062,
+      "learning_rate": 0.000196845143599365,
+      "loss": 0.1646,
+      "step": 1098
+    },
+    {
+      "epoch": 0.07929578989141023,
+      "grad_norm": 0.07500480860471725,
+      "learning_rate": 0.00019684225717996824,
+      "loss": 0.1987,
+      "step": 1099
+    },
+    {
+      "epoch": 0.07936794256647065,
+      "grad_norm": 0.08517158776521683,
+      "learning_rate": 0.00019683937076057153,
+      "loss": 0.1765,
+      "step": 1100
+    },
+    {
+      "epoch": 0.07944009524153108,
+      "grad_norm": 0.13203036785125732,
+      "learning_rate": 0.0001968364843411748,
+      "loss": 0.1771,
+      "step": 1101
+    },
+    {
+      "epoch": 0.0795122479165915,
+      "grad_norm": 0.11799517273902893,
+      "learning_rate": 0.00019683359792177805,
+      "loss": 0.1671,
+      "step": 1102
+    },
+    {
+      "epoch": 0.07958440059165194,
+      "grad_norm": 0.09855810552835464,
+      "learning_rate": 0.00019683071150238132,
+      "loss": 0.1851,
+      "step": 1103
+    },
+    {
+      "epoch": 0.07965655326671237,
+      "grad_norm": 0.09309118241071701,
+      "learning_rate": 0.00019682782508298455,
+      "loss": 0.2037,
+      "step": 1104
+    },
+    {
+      "epoch": 0.0797287059417728,
+      "grad_norm": 0.08551649749279022,
+      "learning_rate": 0.00019682493866358781,
+      "loss": 0.1802,
+      "step": 1105
+    },
+    {
+      "epoch": 0.07980085861683323,
+      "grad_norm": 0.10401365160942078,
+      "learning_rate": 0.00019682205224419108,
+      "loss": 0.2303,
+      "step": 1106
+    },
+    {
+      "epoch": 0.07987301129189364,
+      "grad_norm": 0.11717145144939423,
+      "learning_rate": 0.00019681916582479437,
+      "loss": 0.2169,
+      "step": 1107
+    },
+    {
+      "epoch": 0.07994516396695407,
+      "grad_norm": 0.11967819184064865,
+      "learning_rate": 0.00019681627940539763,
+      "loss": 0.1258,
+      "step": 1108
+    },
+    {
+      "epoch": 0.0800173166420145,
+      "grad_norm": 0.08622618764638901,
+      "learning_rate": 0.00019681339298600087,
+      "loss": 0.157,
+      "step": 1109
+    },
+    {
+      "epoch": 0.08008946931707493,
+      "grad_norm": 0.10485079884529114,
+      "learning_rate": 0.00019681050656660413,
+      "loss": 0.1563,
+      "step": 1110
+    },
+    {
+      "epoch": 0.08016162199213536,
+      "grad_norm": 0.08495404571294785,
+      "learning_rate": 0.0001968076201472074,
+      "loss": 0.1957,
+      "step": 1111
+    },
+    {
+      "epoch": 0.08023377466719579,
+      "grad_norm": 0.100055031478405,
+      "learning_rate": 0.00019680473372781065,
+      "loss": 0.1845,
+      "step": 1112
+    },
+    {
+      "epoch": 0.08030592734225621,
+      "grad_norm": 0.0924968346953392,
+      "learning_rate": 0.00019680184730841392,
+      "loss": 0.2243,
+      "step": 1113
+    },
+    {
+      "epoch": 0.08037808001731664,
+      "grad_norm": 0.13996759057044983,
+      "learning_rate": 0.00019679896088901718,
+      "loss": 0.2316,
+      "step": 1114
+    },
+    {
+      "epoch": 0.08045023269237707,
+      "grad_norm": 0.08373535424470901,
+      "learning_rate": 0.00019679607446962044,
+      "loss": 0.1856,
+      "step": 1115
+    },
+    {
+      "epoch": 0.0805223853674375,
+      "grad_norm": 0.0935160368680954,
+      "learning_rate": 0.0001967931880502237,
+      "loss": 0.19,
+      "step": 1116
+    },
+    {
+      "epoch": 0.08059453804249793,
+      "grad_norm": 0.11538738757371902,
+      "learning_rate": 0.00019679030163082697,
+      "loss": 0.1846,
+      "step": 1117
+    },
+    {
+      "epoch": 0.08066669071755836,
+      "grad_norm": 0.0877399742603302,
+      "learning_rate": 0.00019678741521143023,
+      "loss": 0.1535,
+      "step": 1118
+    },
+    {
+      "epoch": 0.08073884339261878,
+      "grad_norm": 0.08489222079515457,
+      "learning_rate": 0.0001967845287920335,
+      "loss": 0.1773,
+      "step": 1119
+    },
+    {
+      "epoch": 0.0808109960676792,
+      "grad_norm": 0.07990337163209915,
+      "learning_rate": 0.00019678164237263673,
+      "loss": 0.1776,
+      "step": 1120
+    },
+    {
+      "epoch": 0.08088314874273964,
+      "grad_norm": 0.10108862072229385,
+      "learning_rate": 0.00019677875595324002,
+      "loss": 0.2599,
+      "step": 1121
+    },
+    {
+      "epoch": 0.08095530141780007,
+      "grad_norm": 0.0793904960155487,
+      "learning_rate": 0.00019677586953384328,
+      "loss": 0.1569,
+      "step": 1122
+    },
+    {
+      "epoch": 0.0810274540928605,
+      "grad_norm": 0.1183282658457756,
+      "learning_rate": 0.00019677298311444655,
+      "loss": 0.1651,
+      "step": 1123
+    },
+    {
+      "epoch": 0.08109960676792093,
+      "grad_norm": 0.08836779743432999,
+      "learning_rate": 0.0001967700966950498,
+      "loss": 0.1462,
+      "step": 1124
+    },
+    {
+      "epoch": 0.08117175944298134,
+      "grad_norm": 0.11735153943300247,
+      "learning_rate": 0.00019676721027565305,
+      "loss": 0.2149,
+      "step": 1125
+    },
+    {
+      "epoch": 0.08124391211804177,
+      "grad_norm": 0.11171982437372208,
+      "learning_rate": 0.0001967643238562563,
+      "loss": 0.1914,
+      "step": 1126
+    },
+    {
+      "epoch": 0.0813160647931022,
+      "grad_norm": 0.137335866689682,
+      "learning_rate": 0.00019676143743685957,
+      "loss": 0.1839,
+      "step": 1127
+    },
+    {
+      "epoch": 0.08138821746816263,
+      "grad_norm": 0.11637444794178009,
+      "learning_rate": 0.00019675855101746286,
+      "loss": 0.1445,
+      "step": 1128
+    },
+    {
+      "epoch": 0.08146037014322306,
+      "grad_norm": 0.12163744121789932,
+      "learning_rate": 0.00019675566459806612,
+      "loss": 0.1604,
+      "step": 1129
+    },
+    {
+      "epoch": 0.08153252281828349,
+      "grad_norm": 0.12734180688858032,
+      "learning_rate": 0.00019675277817866936,
+      "loss": 0.2484,
+      "step": 1130
+    },
+    {
+      "epoch": 0.08160467549334391,
+      "grad_norm": 0.12239005416631699,
+      "learning_rate": 0.00019674989175927262,
+      "loss": 0.2532,
+      "step": 1131
+    },
+    {
+      "epoch": 0.08167682816840434,
+      "grad_norm": 0.12187651544809341,
+      "learning_rate": 0.00019674700533987589,
+      "loss": 0.1701,
+      "step": 1132
+    },
+    {
+      "epoch": 0.08174898084346477,
+      "grad_norm": 0.14173132181167603,
+      "learning_rate": 0.00019674411892047915,
+      "loss": 0.1907,
+      "step": 1133
+    },
+    {
+      "epoch": 0.0818211335185252,
+      "grad_norm": 0.10581263154745102,
+      "learning_rate": 0.0001967412325010824,
+      "loss": 0.2139,
+      "step": 1134
+    },
+    {
+      "epoch": 0.08189328619358563,
+      "grad_norm": 0.10544908046722412,
+      "learning_rate": 0.00019673834608168567,
+      "loss": 0.1428,
+      "step": 1135
+    },
+    {
+      "epoch": 0.08196543886864606,
+      "grad_norm": 0.096528060734272,
+      "learning_rate": 0.00019673545966228894,
+      "loss": 0.1651,
+      "step": 1136
+    },
+    {
+      "epoch": 0.08203759154370649,
+      "grad_norm": 0.08613178133964539,
+      "learning_rate": 0.0001967325732428922,
+      "loss": 0.1922,
+      "step": 1137
+    },
+    {
+      "epoch": 0.0821097442187669,
+      "grad_norm": 0.09921222925186157,
+      "learning_rate": 0.00019672968682349546,
+      "loss": 0.1547,
+      "step": 1138
+    },
+    {
+      "epoch": 0.08218189689382734,
+      "grad_norm": 0.08947184681892395,
+      "learning_rate": 0.00019672680040409873,
+      "loss": 0.1771,
+      "step": 1139
+    },
+    {
+      "epoch": 0.08225404956888777,
+      "grad_norm": 0.09903664886951447,
+      "learning_rate": 0.000196723913984702,
+      "loss": 0.1525,
+      "step": 1140
+    },
+    {
+      "epoch": 0.0823262022439482,
+      "grad_norm": 0.0916438177227974,
+      "learning_rate": 0.00019672102756530522,
+      "loss": 0.2297,
+      "step": 1141
+    },
+    {
+      "epoch": 0.08239835491900863,
+      "grad_norm": 0.08452580869197845,
+      "learning_rate": 0.00019671814114590851,
+      "loss": 0.1251,
+      "step": 1142
+    },
+    {
+      "epoch": 0.08247050759406906,
+      "grad_norm": 0.11102423816919327,
+      "learning_rate": 0.00019671525472651178,
+      "loss": 0.1875,
+      "step": 1143
+    },
+    {
+      "epoch": 0.08254266026912947,
+      "grad_norm": 0.08565377444028854,
+      "learning_rate": 0.00019671236830711504,
+      "loss": 0.1532,
+      "step": 1144
+    },
+    {
+      "epoch": 0.0826148129441899,
+      "grad_norm": 0.1014544740319252,
+      "learning_rate": 0.0001967094818877183,
+      "loss": 0.1683,
+      "step": 1145
+    },
+    {
+      "epoch": 0.08268696561925033,
+      "grad_norm": 0.10558659583330154,
+      "learning_rate": 0.00019670659546832154,
+      "loss": 0.1624,
+      "step": 1146
+    },
+    {
+      "epoch": 0.08275911829431076,
+      "grad_norm": 0.09612471610307693,
+      "learning_rate": 0.0001967037090489248,
+      "loss": 0.179,
+      "step": 1147
+    },
+    {
+      "epoch": 0.08283127096937119,
+      "grad_norm": 0.09432287514209747,
+      "learning_rate": 0.00019670082262952807,
+      "loss": 0.1544,
+      "step": 1148
+    },
+    {
+      "epoch": 0.08290342364443162,
+      "grad_norm": 0.10006943345069885,
+      "learning_rate": 0.00019669793621013136,
+      "loss": 0.1908,
+      "step": 1149
+    },
+    {
+      "epoch": 0.08297557631949204,
+      "grad_norm": 0.08191773295402527,
+      "learning_rate": 0.00019669504979073462,
+      "loss": 0.2056,
+      "step": 1150
+    },
+    {
+      "epoch": 0.08304772899455247,
+      "grad_norm": 0.12594346702098846,
+      "learning_rate": 0.00019669216337133785,
+      "loss": 0.1722,
+      "step": 1151
+    },
+    {
+      "epoch": 0.0831198816696129,
+      "grad_norm": 0.08761388808488846,
+      "learning_rate": 0.00019668927695194112,
+      "loss": 0.161,
+      "step": 1152
+    },
+    {
+      "epoch": 0.08319203434467333,
+      "grad_norm": 0.12430427223443985,
+      "learning_rate": 0.00019668639053254438,
+      "loss": 0.1482,
+      "step": 1153
+    },
+    {
+      "epoch": 0.08326418701973376,
+      "grad_norm": 0.12445410341024399,
+      "learning_rate": 0.00019668350411314764,
+      "loss": 0.1945,
+      "step": 1154
+    },
+    {
+      "epoch": 0.08333633969479419,
+      "grad_norm": 0.13118945062160492,
+      "learning_rate": 0.0001966806176937509,
+      "loss": 0.1537,
+      "step": 1155
+    },
+    {
+      "epoch": 0.0834084923698546,
+      "grad_norm": 0.07383442670106888,
+      "learning_rate": 0.00019667773127435417,
+      "loss": 0.1932,
+      "step": 1156
+    },
+    {
+      "epoch": 0.08348064504491504,
+      "grad_norm": 0.12663637101650238,
+      "learning_rate": 0.00019667484485495743,
+      "loss": 0.1956,
+      "step": 1157
+    },
+    {
+      "epoch": 0.08355279771997547,
+      "grad_norm": 0.0797528624534607,
+      "learning_rate": 0.0001966719584355607,
+      "loss": 0.2317,
+      "step": 1158
+    },
+    {
+      "epoch": 0.0836249503950359,
+      "grad_norm": 0.13396123051643372,
+      "learning_rate": 0.00019666907201616396,
+      "loss": 0.2296,
+      "step": 1159
+    },
+    {
+      "epoch": 0.08369710307009633,
+      "grad_norm": 0.08744298666715622,
+      "learning_rate": 0.00019666618559676722,
+      "loss": 0.1572,
+      "step": 1160
+    },
+    {
+      "epoch": 0.08376925574515676,
+      "grad_norm": 0.09590718150138855,
+      "learning_rate": 0.00019666329917737048,
+      "loss": 0.1724,
+      "step": 1161
+    },
+    {
+      "epoch": 0.08384140842021717,
+      "grad_norm": 0.08623688668012619,
+      "learning_rate": 0.00019666041275797375,
+      "loss": 0.156,
+      "step": 1162
+    },
+    {
+      "epoch": 0.0839135610952776,
+      "grad_norm": 0.14770671725273132,
+      "learning_rate": 0.000196657526338577,
+      "loss": 0.1919,
+      "step": 1163
+    },
+    {
+      "epoch": 0.08398571377033803,
+      "grad_norm": 0.11314375698566437,
+      "learning_rate": 0.00019665463991918027,
+      "loss": 0.1295,
+      "step": 1164
+    },
+    {
+      "epoch": 0.08405786644539846,
+      "grad_norm": 0.10177276283502579,
+      "learning_rate": 0.00019665175349978353,
+      "loss": 0.1967,
+      "step": 1165
+    },
+    {
+      "epoch": 0.0841300191204589,
+      "grad_norm": 0.17404918372631073,
+      "learning_rate": 0.0001966488670803868,
+      "loss": 0.2155,
+      "step": 1166
+    },
+    {
+      "epoch": 0.08420217179551932,
+      "grad_norm": 0.07917987555265427,
+      "learning_rate": 0.00019664598066099006,
+      "loss": 0.1971,
+      "step": 1167
+    },
+    {
+      "epoch": 0.08427432447057975,
+      "grad_norm": 0.10241827368736267,
+      "learning_rate": 0.0001966430942415933,
+      "loss": 0.173,
+      "step": 1168
+    },
+    {
+      "epoch": 0.08434647714564017,
+      "grad_norm": 0.09378177672624588,
+      "learning_rate": 0.00019664020782219656,
+      "loss": 0.1652,
+      "step": 1169
+    },
+    {
+      "epoch": 0.0844186298207006,
+      "grad_norm": 0.09627138823270798,
+      "learning_rate": 0.00019663732140279985,
+      "loss": 0.1774,
+      "step": 1170
+    },
+    {
+      "epoch": 0.08449078249576103,
+      "grad_norm": 0.1196071207523346,
+      "learning_rate": 0.0001966344349834031,
+      "loss": 0.1497,
+      "step": 1171
+    },
+    {
+      "epoch": 0.08456293517082146,
+      "grad_norm": 0.10868272185325623,
+      "learning_rate": 0.00019663154856400638,
+      "loss": 0.2377,
+      "step": 1172
+    },
+    {
+      "epoch": 0.08463508784588189,
+      "grad_norm": 0.10114963352680206,
+      "learning_rate": 0.0001966286621446096,
+      "loss": 0.2186,
+      "step": 1173
+    },
+    {
+      "epoch": 0.08470724052094232,
+      "grad_norm": 0.10729393362998962,
+      "learning_rate": 0.00019662577572521287,
+      "loss": 0.1126,
+      "step": 1174
+    },
+    {
+      "epoch": 0.08477939319600274,
+      "grad_norm": 0.10542712360620499,
+      "learning_rate": 0.00019662288930581614,
+      "loss": 0.1955,
+      "step": 1175
+    },
+    {
+      "epoch": 0.08485154587106317,
+      "grad_norm": 0.10797982662916183,
+      "learning_rate": 0.0001966200028864194,
+      "loss": 0.2124,
+      "step": 1176
+    },
+    {
+      "epoch": 0.0849236985461236,
+      "grad_norm": 0.09008362144231796,
+      "learning_rate": 0.0001966171164670227,
+      "loss": 0.1638,
+      "step": 1177
+    },
+    {
+      "epoch": 0.08499585122118403,
+      "grad_norm": 0.1290101408958435,
+      "learning_rate": 0.00019661423004762593,
+      "loss": 0.1333,
+      "step": 1178
+    },
+    {
+      "epoch": 0.08506800389624446,
+      "grad_norm": 0.13693585991859436,
+      "learning_rate": 0.0001966113436282292,
+      "loss": 0.1503,
+      "step": 1179
+    },
+    {
+      "epoch": 0.08514015657130489,
+      "grad_norm": 0.13027171790599823,
+      "learning_rate": 0.00019660845720883245,
+      "loss": 0.1838,
+      "step": 1180
+    },
+    {
+      "epoch": 0.0852123092463653,
+      "grad_norm": 0.08405692875385284,
+      "learning_rate": 0.00019660557078943571,
+      "loss": 0.2052,
+      "step": 1181
+    },
+    {
+      "epoch": 0.08528446192142573,
+      "grad_norm": 0.13299930095672607,
+      "learning_rate": 0.00019660268437003898,
+      "loss": 0.1651,
+      "step": 1182
+    },
+    {
+      "epoch": 0.08535661459648616,
+      "grad_norm": 0.10912304371595383,
+      "learning_rate": 0.00019659979795064224,
+      "loss": 0.1503,
+      "step": 1183
+    },
+    {
+      "epoch": 0.0854287672715466,
+      "grad_norm": 0.14194819331169128,
+      "learning_rate": 0.0001965969115312455,
+      "loss": 0.1891,
+      "step": 1184
+    },
+    {
+      "epoch": 0.08550091994660702,
+      "grad_norm": 0.13994021713733673,
+      "learning_rate": 0.00019659402511184877,
+      "loss": 0.2066,
+      "step": 1185
+    },
+    {
+      "epoch": 0.08557307262166745,
+      "grad_norm": 0.10049525648355484,
+      "learning_rate": 0.00019659113869245203,
+      "loss": 0.2056,
+      "step": 1186
+    },
+    {
+      "epoch": 0.08564522529672787,
+      "grad_norm": 0.08980170637369156,
+      "learning_rate": 0.0001965882522730553,
+      "loss": 0.1698,
+      "step": 1187
+    },
+    {
+      "epoch": 0.0857173779717883,
+      "grad_norm": 0.08638997375965118,
+      "learning_rate": 0.00019658536585365856,
+      "loss": 0.2183,
+      "step": 1188
+    },
+    {
+      "epoch": 0.08578953064684873,
+      "grad_norm": 0.09948911517858505,
+      "learning_rate": 0.0001965824794342618,
+      "loss": 0.1589,
+      "step": 1189
+    },
+    {
+      "epoch": 0.08586168332190916,
+      "grad_norm": 0.1180720329284668,
+      "learning_rate": 0.00019657959301486505,
+      "loss": 0.1794,
+      "step": 1190
+    },
+    {
+      "epoch": 0.08593383599696959,
+      "grad_norm": 0.11050887405872345,
+      "learning_rate": 0.00019657670659546834,
+      "loss": 0.1852,
+      "step": 1191
+    },
+    {
+      "epoch": 0.08600598867203002,
+      "grad_norm": 0.08088482171297073,
+      "learning_rate": 0.0001965738201760716,
+      "loss": 0.1616,
+      "step": 1192
+    },
+    {
+      "epoch": 0.08607814134709044,
+      "grad_norm": 0.11148399114608765,
+      "learning_rate": 0.00019657093375667487,
+      "loss": 0.142,
+      "step": 1193
+    },
+    {
+      "epoch": 0.08615029402215087,
+      "grad_norm": 0.12479886412620544,
+      "learning_rate": 0.0001965680473372781,
+      "loss": 0.2248,
+      "step": 1194
+    },
+    {
+      "epoch": 0.0862224466972113,
+      "grad_norm": 0.09896652400493622,
+      "learning_rate": 0.00019656516091788137,
+      "loss": 0.1401,
+      "step": 1195
+    },
+    {
+      "epoch": 0.08629459937227173,
+      "grad_norm": 0.11217128485441208,
+      "learning_rate": 0.00019656227449848463,
+      "loss": 0.1598,
+      "step": 1196
+    },
+    {
+      "epoch": 0.08636675204733216,
+      "grad_norm": 0.08890548348426819,
+      "learning_rate": 0.0001965593880790879,
+      "loss": 0.12,
+      "step": 1197
+    },
+    {
+      "epoch": 0.08643890472239259,
+      "grad_norm": 0.16628938913345337,
+      "learning_rate": 0.00019655650165969118,
+      "loss": 0.1875,
+      "step": 1198
+    },
+    {
+      "epoch": 0.08651105739745302,
+      "grad_norm": 0.11361134797334671,
+      "learning_rate": 0.00019655361524029442,
+      "loss": 0.2373,
+      "step": 1199
+    },
+    {
+      "epoch": 0.08658321007251343,
+      "grad_norm": 0.108740895986557,
+      "learning_rate": 0.00019655072882089768,
+      "loss": 0.1734,
+      "step": 1200
+    },
+    {
+      "epoch": 0.08665536274757386,
+      "grad_norm": 0.08382868021726608,
+      "learning_rate": 0.00019654784240150095,
+      "loss": 0.1664,
+      "step": 1201
+    },
+    {
+      "epoch": 0.0867275154226343,
+      "grad_norm": 0.10402888059616089,
+      "learning_rate": 0.0001965449559821042,
+      "loss": 0.1608,
+      "step": 1202
+    },
+    {
+      "epoch": 0.08679966809769472,
+      "grad_norm": 0.09176527708768845,
+      "learning_rate": 0.00019654206956270747,
+      "loss": 0.1876,
+      "step": 1203
+    },
+    {
+      "epoch": 0.08687182077275515,
+      "grad_norm": 0.08105053752660751,
+      "learning_rate": 0.00019653918314331073,
+      "loss": 0.1778,
+      "step": 1204
+    },
+    {
+      "epoch": 0.08694397344781558,
+      "grad_norm": 0.08214316517114639,
+      "learning_rate": 0.000196536296723914,
+      "loss": 0.1846,
+      "step": 1205
+    },
+    {
+      "epoch": 0.087016126122876,
+      "grad_norm": 0.1279810667037964,
+      "learning_rate": 0.00019653341030451726,
+      "loss": 0.1731,
+      "step": 1206
+    },
+    {
+      "epoch": 0.08708827879793643,
+      "grad_norm": 0.09262233972549438,
+      "learning_rate": 0.00019653052388512052,
+      "loss": 0.1863,
+      "step": 1207
+    },
+    {
+      "epoch": 0.08716043147299686,
+      "grad_norm": 0.12644372880458832,
+      "learning_rate": 0.00019652763746572379,
+      "loss": 0.1779,
+      "step": 1208
+    },
+    {
+      "epoch": 0.08723258414805729,
+      "grad_norm": 0.07663599401712418,
+      "learning_rate": 0.00019652475104632705,
+      "loss": 0.1311,
+      "step": 1209
+    },
+    {
+      "epoch": 0.08730473682311772,
+      "grad_norm": 0.10031388700008392,
+      "learning_rate": 0.00019652186462693029,
+      "loss": 0.1757,
+      "step": 1210
+    },
+    {
+      "epoch": 0.08737688949817815,
+      "grad_norm": 0.09371486306190491,
+      "learning_rate": 0.00019651897820753355,
+      "loss": 0.1984,
+      "step": 1211
+    },
+    {
+      "epoch": 0.08744904217323857,
+      "grad_norm": 0.10455991327762604,
+      "learning_rate": 0.00019651609178813684,
+      "loss": 0.1622,
+      "step": 1212
+    },
+    {
+      "epoch": 0.087521194848299,
+      "grad_norm": 0.1045440062880516,
+      "learning_rate": 0.0001965132053687401,
+      "loss": 0.1987,
+      "step": 1213
+    },
+    {
+      "epoch": 0.08759334752335943,
+      "grad_norm": 0.11814934015274048,
+      "learning_rate": 0.00019651031894934336,
+      "loss": 0.1644,
+      "step": 1214
+    },
+    {
+      "epoch": 0.08766550019841986,
+      "grad_norm": 0.09841133654117584,
+      "learning_rate": 0.0001965074325299466,
+      "loss": 0.1716,
+      "step": 1215
+    },
+    {
+      "epoch": 0.08773765287348029,
+      "grad_norm": 0.09436903148889542,
+      "learning_rate": 0.00019650454611054986,
+      "loss": 0.2072,
+      "step": 1216
+    },
+    {
+      "epoch": 0.08780980554854072,
+      "grad_norm": 0.11973531544208527,
+      "learning_rate": 0.00019650165969115313,
+      "loss": 0.2328,
+      "step": 1217
+    },
+    {
+      "epoch": 0.08788195822360113,
+      "grad_norm": 0.08357010781764984,
+      "learning_rate": 0.0001964987732717564,
+      "loss": 0.1726,
+      "step": 1218
+    },
+    {
+      "epoch": 0.08795411089866156,
+      "grad_norm": 0.09621085226535797,
+      "learning_rate": 0.00019649588685235968,
+      "loss": 0.1573,
+      "step": 1219
+    },
+    {
+      "epoch": 0.088026263573722,
+      "grad_norm": 0.08089716732501984,
+      "learning_rate": 0.00019649300043296291,
+      "loss": 0.1485,
+      "step": 1220
+    },
+    {
+      "epoch": 0.08809841624878242,
+      "grad_norm": 0.10134495049715042,
+      "learning_rate": 0.00019649011401356618,
+      "loss": 0.1628,
+      "step": 1221
+    },
+    {
+      "epoch": 0.08817056892384285,
+      "grad_norm": 0.10229915380477905,
+      "learning_rate": 0.00019648722759416944,
+      "loss": 0.164,
+      "step": 1222
+    },
+    {
+      "epoch": 0.08824272159890328,
+      "grad_norm": 0.1260552704334259,
+      "learning_rate": 0.0001964843411747727,
+      "loss": 0.1824,
+      "step": 1223
+    },
+    {
+      "epoch": 0.0883148742739637,
+      "grad_norm": 0.08896566182374954,
+      "learning_rate": 0.00019648145475537597,
+      "loss": 0.1831,
+      "step": 1224
+    },
+    {
+      "epoch": 0.08838702694902413,
+      "grad_norm": 0.12777185440063477,
+      "learning_rate": 0.00019647856833597923,
+      "loss": 0.1803,
+      "step": 1225
+    },
+    {
+      "epoch": 0.08845917962408456,
+      "grad_norm": 0.10827764123678207,
+      "learning_rate": 0.0001964756819165825,
+      "loss": 0.1915,
+      "step": 1226
+    },
+    {
+      "epoch": 0.08853133229914499,
+      "grad_norm": 0.09689563512802124,
+      "learning_rate": 0.00019647279549718575,
+      "loss": 0.1168,
+      "step": 1227
+    },
+    {
+      "epoch": 0.08860348497420542,
+      "grad_norm": 0.11655878275632858,
+      "learning_rate": 0.00019646990907778902,
+      "loss": 0.1583,
+      "step": 1228
+    },
+    {
+      "epoch": 0.08867563764926585,
+      "grad_norm": 0.10739599168300629,
+      "learning_rate": 0.00019646702265839228,
+      "loss": 0.1698,
+      "step": 1229
+    },
+    {
+      "epoch": 0.08874779032432628,
+      "grad_norm": 0.07532233744859695,
+      "learning_rate": 0.00019646413623899554,
+      "loss": 0.1712,
+      "step": 1230
+    },
+    {
+      "epoch": 0.0888199429993867,
+      "grad_norm": 0.10705269128084183,
+      "learning_rate": 0.00019646124981959878,
+      "loss": 0.2176,
+      "step": 1231
+    },
+    {
+      "epoch": 0.08889209567444713,
+      "grad_norm": 0.07377097010612488,
+      "learning_rate": 0.00019645836340020204,
+      "loss": 0.1904,
+      "step": 1232
+    },
+    {
+      "epoch": 0.08896424834950756,
+      "grad_norm": 0.08052223920822144,
+      "learning_rate": 0.00019645547698080533,
+      "loss": 0.1457,
+      "step": 1233
+    },
+    {
+      "epoch": 0.08903640102456799,
+      "grad_norm": 0.10867580771446228,
+      "learning_rate": 0.0001964525905614086,
+      "loss": 0.1826,
+      "step": 1234
+    },
+    {
+      "epoch": 0.08910855369962842,
+      "grad_norm": 0.07923214137554169,
+      "learning_rate": 0.00019644970414201186,
+      "loss": 0.1916,
+      "step": 1235
+    },
+    {
+      "epoch": 0.08918070637468885,
+      "grad_norm": 0.09806831926107407,
+      "learning_rate": 0.0001964468177226151,
+      "loss": 0.1921,
+      "step": 1236
+    },
+    {
+      "epoch": 0.08925285904974926,
+      "grad_norm": 0.08039659261703491,
+      "learning_rate": 0.00019644393130321836,
+      "loss": 0.1799,
+      "step": 1237
+    },
+    {
+      "epoch": 0.0893250117248097,
+      "grad_norm": 0.11143870651721954,
+      "learning_rate": 0.00019644104488382162,
+      "loss": 0.1252,
+      "step": 1238
+    },
+    {
+      "epoch": 0.08939716439987012,
+      "grad_norm": 0.11356040090322495,
+      "learning_rate": 0.00019643815846442488,
+      "loss": 0.198,
+      "step": 1239
+    },
+    {
+      "epoch": 0.08946931707493055,
+      "grad_norm": 0.10516027361154556,
+      "learning_rate": 0.00019643527204502817,
+      "loss": 0.2374,
+      "step": 1240
+    },
+    {
+      "epoch": 0.08954146974999098,
+      "grad_norm": 0.09420349448919296,
+      "learning_rate": 0.0001964323856256314,
+      "loss": 0.2187,
+      "step": 1241
+    },
+    {
+      "epoch": 0.08961362242505141,
+      "grad_norm": 0.11258124560117722,
+      "learning_rate": 0.00019642949920623467,
+      "loss": 0.1896,
+      "step": 1242
+    },
+    {
+      "epoch": 0.08968577510011183,
+      "grad_norm": 0.10528513789176941,
+      "learning_rate": 0.00019642661278683793,
+      "loss": 0.1911,
+      "step": 1243
+    },
+    {
+      "epoch": 0.08975792777517226,
+      "grad_norm": 0.10851096361875534,
+      "learning_rate": 0.0001964237263674412,
+      "loss": 0.1566,
+      "step": 1244
+    },
+    {
+      "epoch": 0.08983008045023269,
+      "grad_norm": 0.10526153445243835,
+      "learning_rate": 0.00019642083994804446,
+      "loss": 0.1836,
+      "step": 1245
+    },
+    {
+      "epoch": 0.08990223312529312,
+      "grad_norm": 0.09496541321277618,
+      "learning_rate": 0.00019641795352864772,
+      "loss": 0.1467,
+      "step": 1246
+    },
+    {
+      "epoch": 0.08997438580035355,
+      "grad_norm": 0.09573590755462646,
+      "learning_rate": 0.00019641506710925099,
+      "loss": 0.1629,
+      "step": 1247
+    },
+    {
+      "epoch": 0.09004653847541398,
+      "grad_norm": 0.09841755032539368,
+      "learning_rate": 0.00019641218068985425,
+      "loss": 0.1682,
+      "step": 1248
+    },
+    {
+      "epoch": 0.0901186911504744,
+      "grad_norm": 0.10681945830583572,
+      "learning_rate": 0.0001964092942704575,
+      "loss": 0.1816,
+      "step": 1249
+    },
+    {
+      "epoch": 0.09019084382553483,
+      "grad_norm": 0.12309513986110687,
+      "learning_rate": 0.00019640640785106077,
+      "loss": 0.1984,
+      "step": 1250
+    },
+    {
+      "epoch": 0.09026299650059526,
+      "grad_norm": 0.08377508074045181,
+      "learning_rate": 0.00019640352143166404,
+      "loss": 0.1832,
+      "step": 1251
+    },
+    {
+      "epoch": 0.09033514917565569,
+      "grad_norm": 0.10586296021938324,
+      "learning_rate": 0.00019640063501226727,
+      "loss": 0.1582,
+      "step": 1252
+    },
+    {
+      "epoch": 0.09040730185071612,
+      "grad_norm": 0.09107067435979843,
+      "learning_rate": 0.00019639774859287054,
+      "loss": 0.1677,
+      "step": 1253
+    },
+    {
+      "epoch": 0.09047945452577655,
+      "grad_norm": 0.09457964450120926,
+      "learning_rate": 0.00019639486217347383,
+      "loss": 0.1474,
+      "step": 1254
+    },
+    {
+      "epoch": 0.09055160720083696,
+      "grad_norm": 0.08729027211666107,
+      "learning_rate": 0.0001963919757540771,
+      "loss": 0.1455,
+      "step": 1255
+    },
+    {
+      "epoch": 0.0906237598758974,
+      "grad_norm": 0.1331973373889923,
+      "learning_rate": 0.00019638908933468035,
+      "loss": 0.191,
+      "step": 1256
+    },
+    {
+      "epoch": 0.09069591255095782,
+      "grad_norm": 0.07884123176336288,
+      "learning_rate": 0.0001963862029152836,
+      "loss": 0.161,
+      "step": 1257
+    },
+    {
+      "epoch": 0.09076806522601825,
+      "grad_norm": 0.09368634223937988,
+      "learning_rate": 0.00019638331649588685,
+      "loss": 0.1526,
+      "step": 1258
+    },
+    {
+      "epoch": 0.09084021790107868,
+      "grad_norm": 0.10609408468008041,
+      "learning_rate": 0.00019638043007649011,
+      "loss": 0.203,
+      "step": 1259
+    },
+    {
+      "epoch": 0.09091237057613911,
+      "grad_norm": 0.13501423597335815,
+      "learning_rate": 0.00019637754365709338,
+      "loss": 0.2078,
+      "step": 1260
+    },
+    {
+      "epoch": 0.09098452325119954,
+      "grad_norm": 0.08630816638469696,
+      "learning_rate": 0.00019637465723769667,
+      "loss": 0.1602,
+      "step": 1261
+    },
+    {
+      "epoch": 0.09105667592625996,
+      "grad_norm": 0.08650074899196625,
+      "learning_rate": 0.0001963717708182999,
+      "loss": 0.1545,
+      "step": 1262
+    },
+    {
+      "epoch": 0.09112882860132039,
+      "grad_norm": 0.08092677593231201,
+      "learning_rate": 0.00019636888439890317,
+      "loss": 0.1308,
+      "step": 1263
+    },
+    {
+      "epoch": 0.09120098127638082,
+      "grad_norm": 0.08196258544921875,
+      "learning_rate": 0.00019636599797950643,
+      "loss": 0.1415,
+      "step": 1264
+    },
+    {
+      "epoch": 0.09127313395144125,
+      "grad_norm": 0.08814175426959991,
+      "learning_rate": 0.0001963631115601097,
+      "loss": 0.169,
+      "step": 1265
+    },
+    {
+      "epoch": 0.09134528662650168,
+      "grad_norm": 0.11394612491130829,
+      "learning_rate": 0.00019636022514071295,
+      "loss": 0.2231,
+      "step": 1266
+    },
+    {
+      "epoch": 0.09141743930156211,
+      "grad_norm": 0.08286873996257782,
+      "learning_rate": 0.00019635733872131622,
+      "loss": 0.2207,
+      "step": 1267
+    },
+    {
+      "epoch": 0.09148959197662253,
+      "grad_norm": 0.11120649427175522,
+      "learning_rate": 0.00019635445230191948,
+      "loss": 0.1806,
+      "step": 1268
+    },
+    {
+      "epoch": 0.09156174465168296,
+      "grad_norm": 0.13481257855892181,
+      "learning_rate": 0.00019635156588252274,
+      "loss": 0.1771,
+      "step": 1269
+    },
+    {
+      "epoch": 0.09163389732674339,
+      "grad_norm": 0.08911901712417603,
+      "learning_rate": 0.000196348679463126,
+      "loss": 0.1489,
+      "step": 1270
+    },
+    {
+      "epoch": 0.09170605000180382,
+      "grad_norm": 0.12352734804153442,
+      "learning_rate": 0.00019634579304372927,
+      "loss": 0.1912,
+      "step": 1271
+    },
+    {
+      "epoch": 0.09177820267686425,
+      "grad_norm": 0.1041436716914177,
+      "learning_rate": 0.00019634290662433253,
+      "loss": 0.205,
+      "step": 1272
+    },
+    {
+      "epoch": 0.09185035535192468,
+      "grad_norm": 0.09959304332733154,
+      "learning_rate": 0.0001963400202049358,
+      "loss": 0.1857,
+      "step": 1273
+    },
+    {
+      "epoch": 0.0919225080269851,
+      "grad_norm": 0.09183462709188461,
+      "learning_rate": 0.00019633713378553903,
+      "loss": 0.1586,
+      "step": 1274
+    },
+    {
+      "epoch": 0.09199466070204552,
+      "grad_norm": 0.08344271034002304,
+      "learning_rate": 0.00019633424736614232,
+      "loss": 0.1967,
+      "step": 1275
+    },
+    {
+      "epoch": 0.09206681337710595,
+      "grad_norm": 0.08204984664916992,
+      "learning_rate": 0.00019633136094674558,
+      "loss": 0.1647,
+      "step": 1276
+    },
+    {
+      "epoch": 0.09213896605216639,
+      "grad_norm": 0.11258382350206375,
+      "learning_rate": 0.00019632847452734885,
+      "loss": 0.1751,
+      "step": 1277
+    },
+    {
+      "epoch": 0.09221111872722682,
+      "grad_norm": 0.10533545166254044,
+      "learning_rate": 0.0001963255881079521,
+      "loss": 0.1371,
+      "step": 1278
+    },
+    {
+      "epoch": 0.09228327140228725,
+      "grad_norm": 0.07517839968204498,
+      "learning_rate": 0.00019632270168855535,
+      "loss": 0.1603,
+      "step": 1279
+    },
+    {
+      "epoch": 0.09235542407734766,
+      "grad_norm": 0.09975581616163254,
+      "learning_rate": 0.0001963198152691586,
+      "loss": 0.1834,
+      "step": 1280
+    },
+    {
+      "epoch": 0.09242757675240809,
+      "grad_norm": 0.08929795026779175,
+      "learning_rate": 0.00019631692884976187,
+      "loss": 0.1156,
+      "step": 1281
+    },
+    {
+      "epoch": 0.09249972942746852,
+      "grad_norm": 0.10165119916200638,
+      "learning_rate": 0.00019631404243036513,
+      "loss": 0.1557,
+      "step": 1282
+    },
+    {
+      "epoch": 0.09257188210252895,
+      "grad_norm": 0.08109511435031891,
+      "learning_rate": 0.00019631115601096842,
+      "loss": 0.1985,
+      "step": 1283
+    },
+    {
+      "epoch": 0.09264403477758938,
+      "grad_norm": 0.11984848231077194,
+      "learning_rate": 0.00019630826959157166,
+      "loss": 0.2291,
+      "step": 1284
+    },
+    {
+      "epoch": 0.09271618745264981,
+      "grad_norm": 0.101690873503685,
+      "learning_rate": 0.00019630538317217492,
+      "loss": 0.1889,
+      "step": 1285
+    },
+    {
+      "epoch": 0.09278834012771023,
+      "grad_norm": 0.10042715072631836,
+      "learning_rate": 0.00019630249675277819,
+      "loss": 0.1608,
+      "step": 1286
+    },
+    {
+      "epoch": 0.09286049280277066,
+      "grad_norm": 0.110273078083992,
+      "learning_rate": 0.00019629961033338145,
+      "loss": 0.149,
+      "step": 1287
+    },
+    {
+      "epoch": 0.09293264547783109,
+      "grad_norm": 0.10497692972421646,
+      "learning_rate": 0.0001962967239139847,
+      "loss": 0.1997,
+      "step": 1288
+    },
+    {
+      "epoch": 0.09300479815289152,
+      "grad_norm": 0.08940223604440689,
+      "learning_rate": 0.00019629383749458797,
+      "loss": 0.1184,
+      "step": 1289
+    },
+    {
+      "epoch": 0.09307695082795195,
+      "grad_norm": 0.10932713001966476,
+      "learning_rate": 0.00019629095107519124,
+      "loss": 0.1967,
+      "step": 1290
+    },
+    {
+      "epoch": 0.09314910350301238,
+      "grad_norm": 0.08406266570091248,
+      "learning_rate": 0.0001962880646557945,
+      "loss": 0.1615,
+      "step": 1291
+    },
+    {
+      "epoch": 0.09322125617807281,
+      "grad_norm": 0.08657065033912659,
+      "learning_rate": 0.00019628517823639776,
+      "loss": 0.1742,
+      "step": 1292
+    },
+    {
+      "epoch": 0.09329340885313323,
+      "grad_norm": 0.09516967087984085,
+      "learning_rate": 0.00019628229181700103,
+      "loss": 0.1821,
+      "step": 1293
+    },
+    {
+      "epoch": 0.09336556152819366,
+      "grad_norm": 0.11135697364807129,
+      "learning_rate": 0.0001962794053976043,
+      "loss": 0.1969,
+      "step": 1294
+    },
+    {
+      "epoch": 0.09343771420325409,
+      "grad_norm": 0.12651841342449188,
+      "learning_rate": 0.00019627651897820753,
+      "loss": 0.2189,
+      "step": 1295
+    },
+    {
+      "epoch": 0.09350986687831452,
+      "grad_norm": 0.08434838056564331,
+      "learning_rate": 0.0001962736325588108,
+      "loss": 0.1582,
+      "step": 1296
+    },
+    {
+      "epoch": 0.09358201955337495,
+      "grad_norm": 0.10231063514947891,
+      "learning_rate": 0.00019627074613941408,
+      "loss": 0.1773,
+      "step": 1297
+    },
+    {
+      "epoch": 0.09365417222843538,
+      "grad_norm": 0.1403086632490158,
+      "learning_rate": 0.00019626785972001734,
+      "loss": 0.1989,
+      "step": 1298
+    },
+    {
+      "epoch": 0.09372632490349579,
+      "grad_norm": 0.10541427135467529,
+      "learning_rate": 0.0001962649733006206,
+      "loss": 0.2189,
+      "step": 1299
+    },
+    {
+      "epoch": 0.09379847757855622,
+      "grad_norm": 0.14472924172878265,
+      "learning_rate": 0.00019626208688122384,
+      "loss": 0.1733,
+      "step": 1300
+    },
+    {
+      "epoch": 0.09387063025361665,
+      "grad_norm": 0.09914866834878922,
+      "learning_rate": 0.0001962592004618271,
+      "loss": 0.1519,
+      "step": 1301
+    },
+    {
+      "epoch": 0.09394278292867708,
+      "grad_norm": 0.10584486275911331,
+      "learning_rate": 0.00019625631404243037,
+      "loss": 0.1786,
+      "step": 1302
+    },
+    {
+      "epoch": 0.09401493560373751,
+      "grad_norm": 0.13134770095348358,
+      "learning_rate": 0.00019625342762303363,
+      "loss": 0.1605,
+      "step": 1303
+    },
+    {
+      "epoch": 0.09408708827879794,
+      "grad_norm": 0.09906939417123795,
+      "learning_rate": 0.00019625054120363692,
+      "loss": 0.1696,
+      "step": 1304
+    },
+    {
+      "epoch": 0.09415924095385836,
+      "grad_norm": 0.10578920692205429,
+      "learning_rate": 0.00019624765478424015,
+      "loss": 0.1924,
+      "step": 1305
+    },
+    {
+      "epoch": 0.09423139362891879,
+      "grad_norm": 0.10673406720161438,
+      "learning_rate": 0.00019624476836484342,
+      "loss": 0.1971,
+      "step": 1306
+    },
+    {
+      "epoch": 0.09430354630397922,
+      "grad_norm": 0.11942119151353836,
+      "learning_rate": 0.00019624188194544668,
+      "loss": 0.1865,
+      "step": 1307
+    },
+    {
+      "epoch": 0.09437569897903965,
+      "grad_norm": 0.10493606328964233,
+      "learning_rate": 0.00019623899552604994,
+      "loss": 0.16,
+      "step": 1308
+    },
+    {
+      "epoch": 0.09444785165410008,
+      "grad_norm": 0.10441295802593231,
+      "learning_rate": 0.0001962361091066532,
+      "loss": 0.175,
+      "step": 1309
+    },
+    {
+      "epoch": 0.09452000432916051,
+      "grad_norm": 0.12301217764616013,
+      "learning_rate": 0.00019623322268725647,
+      "loss": 0.1717,
+      "step": 1310
+    },
+    {
+      "epoch": 0.09459215700422093,
+      "grad_norm": 0.07668205350637436,
+      "learning_rate": 0.00019623033626785973,
+      "loss": 0.1145,
+      "step": 1311
+    },
+    {
+      "epoch": 0.09466430967928136,
+      "grad_norm": 0.09936694800853729,
+      "learning_rate": 0.000196227449848463,
+      "loss": 0.1496,
+      "step": 1312
+    },
+    {
+      "epoch": 0.09473646235434179,
+      "grad_norm": 0.11265905201435089,
+      "learning_rate": 0.00019622456342906626,
+      "loss": 0.1435,
+      "step": 1313
+    },
+    {
+      "epoch": 0.09480861502940222,
+      "grad_norm": 0.10028904676437378,
+      "learning_rate": 0.00019622167700966952,
+      "loss": 0.197,
+      "step": 1314
+    },
+    {
+      "epoch": 0.09488076770446265,
+      "grad_norm": 0.07026661932468414,
+      "learning_rate": 0.00019621879059027278,
+      "loss": 0.2116,
+      "step": 1315
+    },
+    {
+      "epoch": 0.09495292037952308,
+      "grad_norm": 0.10615105926990509,
+      "learning_rate": 0.00019621590417087602,
+      "loss": 0.2152,
+      "step": 1316
+    },
+    {
+      "epoch": 0.09502507305458349,
+      "grad_norm": 0.1014433428645134,
+      "learning_rate": 0.00019621301775147928,
+      "loss": 0.1702,
+      "step": 1317
+    },
+    {
+      "epoch": 0.09509722572964392,
+      "grad_norm": 0.10013467073440552,
+      "learning_rate": 0.00019621013133208257,
+      "loss": 0.1884,
+      "step": 1318
+    },
+    {
+      "epoch": 0.09516937840470435,
+      "grad_norm": 0.11329302936792374,
+      "learning_rate": 0.00019620724491268584,
+      "loss": 0.1707,
+      "step": 1319
+    },
+    {
+      "epoch": 0.09524153107976478,
+      "grad_norm": 0.10168576240539551,
+      "learning_rate": 0.0001962043584932891,
+      "loss": 0.1698,
+      "step": 1320
+    },
+    {
+      "epoch": 0.09531368375482521,
+      "grad_norm": 0.09280646592378616,
+      "learning_rate": 0.00019620147207389233,
+      "loss": 0.1553,
+      "step": 1321
+    },
+    {
+      "epoch": 0.09538583642988564,
+      "grad_norm": 0.10105575621128082,
+      "learning_rate": 0.0001961985856544956,
+      "loss": 0.2239,
+      "step": 1322
+    },
+    {
+      "epoch": 0.09545798910494607,
+      "grad_norm": 0.08288126438856125,
+      "learning_rate": 0.00019619569923509886,
+      "loss": 0.193,
+      "step": 1323
+    },
+    {
+      "epoch": 0.09553014178000649,
+      "grad_norm": 0.1465429663658142,
+      "learning_rate": 0.00019619281281570212,
+      "loss": 0.2039,
+      "step": 1324
+    },
+    {
+      "epoch": 0.09560229445506692,
+      "grad_norm": 0.12137012183666229,
+      "learning_rate": 0.0001961899263963054,
+      "loss": 0.1393,
+      "step": 1325
+    },
+    {
+      "epoch": 0.09567444713012735,
+      "grad_norm": 0.09829273074865341,
+      "learning_rate": 0.00019618703997690865,
+      "loss": 0.2547,
+      "step": 1326
+    },
+    {
+      "epoch": 0.09574659980518778,
+      "grad_norm": 0.13386209309101105,
+      "learning_rate": 0.0001961841535575119,
+      "loss": 0.1183,
+      "step": 1327
+    },
+    {
+      "epoch": 0.09581875248024821,
+      "grad_norm": 0.12641870975494385,
+      "learning_rate": 0.00019618126713811517,
+      "loss": 0.1984,
+      "step": 1328
+    },
+    {
+      "epoch": 0.09589090515530864,
+      "grad_norm": 0.1062311977148056,
+      "learning_rate": 0.00019617838071871844,
+      "loss": 0.171,
+      "step": 1329
+    },
+    {
+      "epoch": 0.09596305783036906,
+      "grad_norm": 0.08198920637369156,
+      "learning_rate": 0.0001961754942993217,
+      "loss": 0.1579,
+      "step": 1330
+    },
+    {
+      "epoch": 0.09603521050542949,
+      "grad_norm": 0.09765391051769257,
+      "learning_rate": 0.00019617260787992496,
+      "loss": 0.1361,
+      "step": 1331
+    },
+    {
+      "epoch": 0.09610736318048992,
+      "grad_norm": 0.12339577823877335,
+      "learning_rate": 0.00019616972146052823,
+      "loss": 0.1762,
+      "step": 1332
+    },
+    {
+      "epoch": 0.09617951585555035,
+      "grad_norm": 0.11164752393960953,
+      "learning_rate": 0.0001961668350411315,
+      "loss": 0.1621,
+      "step": 1333
+    },
+    {
+      "epoch": 0.09625166853061078,
+      "grad_norm": 0.114966481924057,
+      "learning_rate": 0.00019616394862173475,
+      "loss": 0.1737,
+      "step": 1334
+    },
+    {
+      "epoch": 0.0963238212056712,
+      "grad_norm": 0.11052409559488297,
+      "learning_rate": 0.00019616106220233801,
+      "loss": 0.2064,
+      "step": 1335
+    },
+    {
+      "epoch": 0.09639597388073162,
+      "grad_norm": 0.08219371736049652,
+      "learning_rate": 0.00019615817578294128,
+      "loss": 0.196,
+      "step": 1336
+    },
+    {
+      "epoch": 0.09646812655579205,
+      "grad_norm": 0.12721070647239685,
+      "learning_rate": 0.0001961552893635445,
+      "loss": 0.1684,
+      "step": 1337
+    },
+    {
+      "epoch": 0.09654027923085248,
+      "grad_norm": 0.10690361261367798,
+      "learning_rate": 0.00019615240294414778,
+      "loss": 0.1265,
+      "step": 1338
+    },
+    {
+      "epoch": 0.09661243190591291,
+      "grad_norm": 0.0963577851653099,
+      "learning_rate": 0.00019614951652475107,
+      "loss": 0.1894,
+      "step": 1339
+    },
+    {
+      "epoch": 0.09668458458097334,
+      "grad_norm": 0.11837514489889145,
+      "learning_rate": 0.00019614663010535433,
+      "loss": 0.2115,
+      "step": 1340
+    },
+    {
+      "epoch": 0.09675673725603377,
+      "grad_norm": 0.09708958864212036,
+      "learning_rate": 0.0001961437436859576,
+      "loss": 0.1593,
+      "step": 1341
+    },
+    {
+      "epoch": 0.09682888993109419,
+      "grad_norm": 0.10255219042301178,
+      "learning_rate": 0.00019614085726656083,
+      "loss": 0.1921,
+      "step": 1342
+    },
+    {
+      "epoch": 0.09690104260615462,
+      "grad_norm": 0.12087555229663849,
+      "learning_rate": 0.0001961379708471641,
+      "loss": 0.2046,
+      "step": 1343
+    },
+    {
+      "epoch": 0.09697319528121505,
+      "grad_norm": 0.11907682567834854,
+      "learning_rate": 0.00019613508442776735,
+      "loss": 0.2054,
+      "step": 1344
+    },
+    {
+      "epoch": 0.09704534795627548,
+      "grad_norm": 0.09736671298742294,
+      "learning_rate": 0.00019613219800837062,
+      "loss": 0.1358,
+      "step": 1345
+    },
+    {
+      "epoch": 0.09711750063133591,
+      "grad_norm": 0.1320803314447403,
+      "learning_rate": 0.0001961293115889739,
+      "loss": 0.1826,
+      "step": 1346
+    },
+    {
+      "epoch": 0.09718965330639634,
+      "grad_norm": 0.1130470335483551,
+      "learning_rate": 0.00019612642516957714,
+      "loss": 0.1957,
+      "step": 1347
+    },
+    {
+      "epoch": 0.09726180598145676,
+      "grad_norm": 0.08177877962589264,
+      "learning_rate": 0.0001961235387501804,
+      "loss": 0.1725,
+      "step": 1348
+    },
+    {
+      "epoch": 0.09733395865651719,
+      "grad_norm": 0.0829310342669487,
+      "learning_rate": 0.00019612065233078367,
+      "loss": 0.1714,
+      "step": 1349
+    },
+    {
+      "epoch": 0.09740611133157762,
+      "grad_norm": 0.1222817674279213,
+      "learning_rate": 0.00019611776591138693,
+      "loss": 0.2691,
+      "step": 1350
+    },
+    {
+      "epoch": 0.09747826400663805,
+      "grad_norm": 0.07808643579483032,
+      "learning_rate": 0.0001961148794919902,
+      "loss": 0.2056,
+      "step": 1351
+    },
+    {
+      "epoch": 0.09755041668169848,
+      "grad_norm": 0.08548199385404587,
+      "learning_rate": 0.00019611199307259346,
+      "loss": 0.1849,
+      "step": 1352
+    },
+    {
+      "epoch": 0.0976225693567589,
+      "grad_norm": 0.10797590017318726,
+      "learning_rate": 0.00019610910665319672,
+      "loss": 0.1903,
+      "step": 1353
+    },
+    {
+      "epoch": 0.09769472203181934,
+      "grad_norm": 0.13164952397346497,
+      "learning_rate": 0.00019610622023379998,
+      "loss": 0.1855,
+      "step": 1354
+    },
+    {
+      "epoch": 0.09776687470687975,
+      "grad_norm": 0.08189984411001205,
+      "learning_rate": 0.00019610333381440325,
+      "loss": 0.1786,
+      "step": 1355
+    },
+    {
+      "epoch": 0.09783902738194018,
+      "grad_norm": 0.08770643919706345,
+      "learning_rate": 0.0001961004473950065,
+      "loss": 0.1575,
+      "step": 1356
+    },
+    {
+      "epoch": 0.09791118005700061,
+      "grad_norm": 0.09954666346311569,
+      "learning_rate": 0.00019609756097560977,
+      "loss": 0.1713,
+      "step": 1357
+    },
+    {
+      "epoch": 0.09798333273206104,
+      "grad_norm": 0.08523159474134445,
+      "learning_rate": 0.000196094674556213,
+      "loss": 0.1947,
+      "step": 1358
+    },
+    {
+      "epoch": 0.09805548540712147,
+      "grad_norm": 0.08452267944812775,
+      "learning_rate": 0.00019609178813681627,
+      "loss": 0.1361,
+      "step": 1359
+    },
+    {
+      "epoch": 0.0981276380821819,
+      "grad_norm": 0.0849485918879509,
+      "learning_rate": 0.00019608890171741956,
+      "loss": 0.1569,
+      "step": 1360
+    },
+    {
+      "epoch": 0.09819979075724232,
+      "grad_norm": 0.10117647051811218,
+      "learning_rate": 0.00019608601529802282,
+      "loss": 0.1745,
+      "step": 1361
+    },
+    {
+      "epoch": 0.09827194343230275,
+      "grad_norm": 0.10660509765148163,
+      "learning_rate": 0.0001960831288786261,
+      "loss": 0.1587,
+      "step": 1362
+    },
+    {
+      "epoch": 0.09834409610736318,
+      "grad_norm": 0.10959257185459137,
+      "learning_rate": 0.00019608024245922932,
+      "loss": 0.1666,
+      "step": 1363
+    },
+    {
+      "epoch": 0.09841624878242361,
+      "grad_norm": 0.11396210640668869,
+      "learning_rate": 0.00019607735603983259,
+      "loss": 0.1734,
+      "step": 1364
+    },
+    {
+      "epoch": 0.09848840145748404,
+      "grad_norm": 0.11623997986316681,
+      "learning_rate": 0.00019607446962043585,
+      "loss": 0.1813,
+      "step": 1365
+    },
+    {
+      "epoch": 0.09856055413254447,
+      "grad_norm": 0.08269207179546356,
+      "learning_rate": 0.0001960715832010391,
+      "loss": 0.1407,
+      "step": 1366
+    },
+    {
+      "epoch": 0.09863270680760489,
+      "grad_norm": 0.11096511036157608,
+      "learning_rate": 0.0001960686967816424,
+      "loss": 0.1442,
+      "step": 1367
+    },
+    {
+      "epoch": 0.09870485948266532,
+      "grad_norm": 0.07984766364097595,
+      "learning_rate": 0.00019606581036224564,
+      "loss": 0.1939,
+      "step": 1368
+    },
+    {
+      "epoch": 0.09877701215772575,
+      "grad_norm": 0.08203259110450745,
+      "learning_rate": 0.0001960629239428489,
+      "loss": 0.1831,
+      "step": 1369
+    },
+    {
+      "epoch": 0.09884916483278618,
+      "grad_norm": 0.09003271162509918,
+      "learning_rate": 0.00019606003752345216,
+      "loss": 0.1968,
+      "step": 1370
+    },
+    {
+      "epoch": 0.0989213175078466,
+      "grad_norm": 0.10677336156368256,
+      "learning_rate": 0.00019605715110405543,
+      "loss": 0.1771,
+      "step": 1371
+    },
+    {
+      "epoch": 0.09899347018290704,
+      "grad_norm": 0.08796536922454834,
+      "learning_rate": 0.0001960542646846587,
+      "loss": 0.1318,
+      "step": 1372
+    },
+    {
+      "epoch": 0.09906562285796745,
+      "grad_norm": 0.09083075076341629,
+      "learning_rate": 0.00019605137826526195,
+      "loss": 0.1625,
+      "step": 1373
+    },
+    {
+      "epoch": 0.09913777553302788,
+      "grad_norm": 0.0895177498459816,
+      "learning_rate": 0.00019604849184586521,
+      "loss": 0.1992,
+      "step": 1374
+    },
+    {
+      "epoch": 0.09920992820808831,
+      "grad_norm": 0.08662980794906616,
+      "learning_rate": 0.00019604560542646848,
+      "loss": 0.1895,
+      "step": 1375
+    },
+    {
+      "epoch": 0.09928208088314874,
+      "grad_norm": 0.11467055231332779,
+      "learning_rate": 0.00019604271900707174,
+      "loss": 0.1559,
+      "step": 1376
+    },
+    {
+      "epoch": 0.09935423355820917,
+      "grad_norm": 0.09523959457874298,
+      "learning_rate": 0.000196039832587675,
+      "loss": 0.1757,
+      "step": 1377
+    },
+    {
+      "epoch": 0.0994263862332696,
+      "grad_norm": 0.08383305370807648,
+      "learning_rate": 0.00019603694616827827,
+      "loss": 0.2024,
+      "step": 1378
+    },
+    {
+      "epoch": 0.09949853890833002,
+      "grad_norm": 0.09364939481019974,
+      "learning_rate": 0.00019603405974888153,
+      "loss": 0.2114,
+      "step": 1379
+    },
+    {
+      "epoch": 0.09957069158339045,
+      "grad_norm": 0.11210822314023972,
+      "learning_rate": 0.00019603117332948476,
+      "loss": 0.2107,
+      "step": 1380
+    },
+    {
+      "epoch": 0.09964284425845088,
+      "grad_norm": 0.11405359208583832,
+      "learning_rate": 0.00019602828691008805,
+      "loss": 0.1591,
+      "step": 1381
+    },
+    {
+      "epoch": 0.09971499693351131,
+      "grad_norm": 0.0980084240436554,
+      "learning_rate": 0.00019602540049069132,
+      "loss": 0.2372,
+      "step": 1382
+    },
+    {
+      "epoch": 0.09978714960857174,
+      "grad_norm": 0.08842863887548447,
+      "learning_rate": 0.00019602251407129458,
+      "loss": 0.1548,
+      "step": 1383
+    },
+    {
+      "epoch": 0.09985930228363217,
+      "grad_norm": 0.08733103424310684,
+      "learning_rate": 0.00019601962765189784,
+      "loss": 0.1947,
+      "step": 1384
+    },
+    {
+      "epoch": 0.0999314549586926,
+      "grad_norm": 0.1021757498383522,
+      "learning_rate": 0.00019601674123250108,
+      "loss": 0.1453,
+      "step": 1385
+    },
+    {
+      "epoch": 0.10000360763375302,
+      "grad_norm": 0.09830335527658463,
+      "learning_rate": 0.00019601385481310434,
+      "loss": 0.1332,
+      "step": 1386
+    },
+    {
+      "epoch": 0.10007576030881345,
+      "grad_norm": 0.12369471043348312,
+      "learning_rate": 0.0001960109683937076,
+      "loss": 0.1892,
+      "step": 1387
+    },
+    {
+      "epoch": 0.10014791298387388,
+      "grad_norm": 0.111944779753685,
+      "learning_rate": 0.0001960080819743109,
+      "loss": 0.1579,
+      "step": 1388
+    },
+    {
+      "epoch": 0.10022006565893431,
+      "grad_norm": 0.10621371865272522,
+      "learning_rate": 0.00019600519555491416,
+      "loss": 0.1645,
+      "step": 1389
+    },
+    {
+      "epoch": 0.10029221833399474,
+      "grad_norm": 0.14481788873672485,
+      "learning_rate": 0.0001960023091355174,
+      "loss": 0.1319,
+      "step": 1390
+    },
+    {
+      "epoch": 0.10036437100905517,
+      "grad_norm": 0.10226688534021378,
+      "learning_rate": 0.00019599942271612066,
+      "loss": 0.1653,
+      "step": 1391
+    },
+    {
+      "epoch": 0.10043652368411558,
+      "grad_norm": 0.2072441279888153,
+      "learning_rate": 0.00019599653629672392,
+      "loss": 0.1961,
+      "step": 1392
+    },
+    {
+      "epoch": 0.10050867635917601,
+      "grad_norm": 0.10382064431905746,
+      "learning_rate": 0.00019599364987732718,
+      "loss": 0.1648,
+      "step": 1393
+    },
+    {
+      "epoch": 0.10058082903423644,
+      "grad_norm": 0.12104596942663193,
+      "learning_rate": 0.00019599076345793045,
+      "loss": 0.1877,
+      "step": 1394
+    },
+    {
+      "epoch": 0.10065298170929687,
+      "grad_norm": 0.10014254599809647,
+      "learning_rate": 0.0001959878770385337,
+      "loss": 0.1435,
+      "step": 1395
+    },
+    {
+      "epoch": 0.1007251343843573,
+      "grad_norm": 0.09614317119121552,
+      "learning_rate": 0.00019598499061913697,
+      "loss": 0.1442,
+      "step": 1396
+    },
+    {
+      "epoch": 0.10079728705941773,
+      "grad_norm": 0.0885712131857872,
+      "learning_rate": 0.00019598210419974023,
+      "loss": 0.1523,
+      "step": 1397
+    },
+    {
+      "epoch": 0.10086943973447815,
+      "grad_norm": 0.10141981393098831,
+      "learning_rate": 0.0001959792177803435,
+      "loss": 0.1827,
+      "step": 1398
+    },
+    {
+      "epoch": 0.10094159240953858,
+      "grad_norm": 0.12346911430358887,
+      "learning_rate": 0.00019597633136094676,
+      "loss": 0.1847,
+      "step": 1399
+    },
+    {
+      "epoch": 0.10101374508459901,
+      "grad_norm": 0.0943661779165268,
+      "learning_rate": 0.00019597344494155002,
+      "loss": 0.1962,
+      "step": 1400
+    },
+    {
+      "epoch": 0.10108589775965944,
+      "grad_norm": 0.1327252984046936,
+      "learning_rate": 0.00019597055852215326,
+      "loss": 0.2174,
+      "step": 1401
+    },
+    {
+      "epoch": 0.10115805043471987,
+      "grad_norm": 0.09227154403924942,
+      "learning_rate": 0.00019596767210275655,
+      "loss": 0.1983,
+      "step": 1402
+    },
+    {
+      "epoch": 0.1012302031097803,
+      "grad_norm": 0.11135019361972809,
+      "learning_rate": 0.0001959647856833598,
+      "loss": 0.1826,
+      "step": 1403
+    },
+    {
+      "epoch": 0.10130235578484072,
+      "grad_norm": 0.10142301023006439,
+      "learning_rate": 0.00019596189926396308,
+      "loss": 0.1814,
+      "step": 1404
+    },
+    {
+      "epoch": 0.10137450845990115,
+      "grad_norm": 0.12080994993448257,
+      "learning_rate": 0.00019595901284456634,
+      "loss": 0.2221,
+      "step": 1405
+    },
+    {
+      "epoch": 0.10144666113496158,
+      "grad_norm": 0.09118806570768356,
+      "learning_rate": 0.00019595612642516957,
+      "loss": 0.1475,
+      "step": 1406
+    },
+    {
+      "epoch": 0.10151881381002201,
+      "grad_norm": 0.10375121235847473,
+      "learning_rate": 0.00019595324000577284,
+      "loss": 0.1894,
+      "step": 1407
+    },
+    {
+      "epoch": 0.10159096648508244,
+      "grad_norm": 0.10166884958744049,
+      "learning_rate": 0.0001959503535863761,
+      "loss": 0.1522,
+      "step": 1408
+    },
+    {
+      "epoch": 0.10166311916014287,
+      "grad_norm": 0.09178225696086884,
+      "learning_rate": 0.0001959474671669794,
+      "loss": 0.1603,
+      "step": 1409
+    },
+    {
+      "epoch": 0.10173527183520328,
+      "grad_norm": 0.09689350426197052,
+      "learning_rate": 0.00019594458074758265,
+      "loss": 0.1607,
+      "step": 1410
+    },
+    {
+      "epoch": 0.10180742451026371,
+      "grad_norm": 0.1341812163591385,
+      "learning_rate": 0.0001959416943281859,
+      "loss": 0.1851,
+      "step": 1411
+    },
+    {
+      "epoch": 0.10187957718532414,
+      "grad_norm": 0.10188182443380356,
+      "learning_rate": 0.00019593880790878915,
+      "loss": 0.1766,
+      "step": 1412
+    },
+    {
+      "epoch": 0.10195172986038457,
+      "grad_norm": 0.1186114251613617,
+      "learning_rate": 0.00019593592148939241,
+      "loss": 0.1471,
+      "step": 1413
+    },
+    {
+      "epoch": 0.102023882535445,
+      "grad_norm": 0.10667675733566284,
+      "learning_rate": 0.00019593303506999568,
+      "loss": 0.1608,
+      "step": 1414
+    },
+    {
+      "epoch": 0.10209603521050543,
+      "grad_norm": 0.09607324004173279,
+      "learning_rate": 0.00019593014865059894,
+      "loss": 0.1984,
+      "step": 1415
+    },
+    {
+      "epoch": 0.10216818788556586,
+      "grad_norm": 0.09958678483963013,
+      "learning_rate": 0.0001959272622312022,
+      "loss": 0.1772,
+      "step": 1416
+    },
+    {
+      "epoch": 0.10224034056062628,
+      "grad_norm": 0.09940075874328613,
+      "learning_rate": 0.00019592437581180547,
+      "loss": 0.1814,
+      "step": 1417
+    },
+    {
+      "epoch": 0.10231249323568671,
+      "grad_norm": 0.09523070603609085,
+      "learning_rate": 0.00019592148939240873,
+      "loss": 0.2013,
+      "step": 1418
+    },
+    {
+      "epoch": 0.10238464591074714,
+      "grad_norm": 0.10605626553297043,
+      "learning_rate": 0.000195918602973012,
+      "loss": 0.1434,
+      "step": 1419
+    },
+    {
+      "epoch": 0.10245679858580757,
+      "grad_norm": 0.11710896342992783,
+      "learning_rate": 0.00019591571655361525,
+      "loss": 0.1903,
+      "step": 1420
+    },
+    {
+      "epoch": 0.102528951260868,
+      "grad_norm": 0.07783154398202896,
+      "learning_rate": 0.00019591283013421852,
+      "loss": 0.1334,
+      "step": 1421
+    },
+    {
+      "epoch": 0.10260110393592843,
+      "grad_norm": 0.10030199587345123,
+      "learning_rate": 0.00019590994371482175,
+      "loss": 0.185,
+      "step": 1422
+    },
+    {
+      "epoch": 0.10267325661098885,
+      "grad_norm": 0.11000876873731613,
+      "learning_rate": 0.00019590705729542504,
+      "loss": 0.1661,
+      "step": 1423
+    },
+    {
+      "epoch": 0.10274540928604928,
+      "grad_norm": 0.10521721839904785,
+      "learning_rate": 0.0001959041708760283,
+      "loss": 0.2145,
+      "step": 1424
+    },
+    {
+      "epoch": 0.10281756196110971,
+      "grad_norm": 0.07904649525880814,
+      "learning_rate": 0.00019590128445663157,
+      "loss": 0.1375,
+      "step": 1425
+    },
+    {
+      "epoch": 0.10288971463617014,
+      "grad_norm": 0.0979461818933487,
+      "learning_rate": 0.00019589839803723483,
+      "loss": 0.1924,
+      "step": 1426
+    },
+    {
+      "epoch": 0.10296186731123057,
+      "grad_norm": 0.07898340374231339,
+      "learning_rate": 0.00019589551161783807,
+      "loss": 0.1944,
+      "step": 1427
+    },
+    {
+      "epoch": 0.103034019986291,
+      "grad_norm": 0.1010308638215065,
+      "learning_rate": 0.00019589262519844133,
+      "loss": 0.1628,
+      "step": 1428
+    },
+    {
+      "epoch": 0.10310617266135141,
+      "grad_norm": 0.10152356326580048,
+      "learning_rate": 0.0001958897387790446,
+      "loss": 0.1568,
+      "step": 1429
+    },
+    {
+      "epoch": 0.10317832533641184,
+      "grad_norm": 0.08201948553323746,
+      "learning_rate": 0.00019588685235964788,
+      "loss": 0.1974,
+      "step": 1430
+    },
+    {
+      "epoch": 0.10325047801147227,
+      "grad_norm": 0.11100131273269653,
+      "learning_rate": 0.00019588396594025115,
+      "loss": 0.1299,
+      "step": 1431
+    },
+    {
+      "epoch": 0.1033226306865327,
+      "grad_norm": 0.1156645119190216,
+      "learning_rate": 0.00019588107952085438,
+      "loss": 0.1461,
+      "step": 1432
+    },
+    {
+      "epoch": 0.10339478336159313,
+      "grad_norm": 0.13311316072940826,
+      "learning_rate": 0.00019587819310145765,
+      "loss": 0.1769,
+      "step": 1433
+    },
+    {
+      "epoch": 0.10346693603665356,
+      "grad_norm": 0.145923912525177,
+      "learning_rate": 0.0001958753066820609,
+      "loss": 0.1783,
+      "step": 1434
+    },
+    {
+      "epoch": 0.10353908871171398,
+      "grad_norm": 0.1302771270275116,
+      "learning_rate": 0.00019587242026266417,
+      "loss": 0.1602,
+      "step": 1435
+    },
+    {
+      "epoch": 0.10361124138677441,
+      "grad_norm": 0.11546266824007034,
+      "learning_rate": 0.00019586953384326743,
+      "loss": 0.1572,
+      "step": 1436
+    },
+    {
+      "epoch": 0.10368339406183484,
+      "grad_norm": 0.11403997242450714,
+      "learning_rate": 0.0001958666474238707,
+      "loss": 0.209,
+      "step": 1437
+    },
+    {
+      "epoch": 0.10375554673689527,
+      "grad_norm": 0.10422119498252869,
+      "learning_rate": 0.00019586376100447396,
+      "loss": 0.1745,
+      "step": 1438
+    },
+    {
+      "epoch": 0.1038276994119557,
+      "grad_norm": 0.10391309857368469,
+      "learning_rate": 0.00019586087458507722,
+      "loss": 0.1888,
+      "step": 1439
+    },
+    {
+      "epoch": 0.10389985208701613,
+      "grad_norm": 0.09338228404521942,
+      "learning_rate": 0.00019585798816568049,
+      "loss": 0.1704,
+      "step": 1440
+    },
+    {
+      "epoch": 0.10397200476207655,
+      "grad_norm": 0.12580829858779907,
+      "learning_rate": 0.00019585510174628375,
+      "loss": 0.2125,
+      "step": 1441
+    },
+    {
+      "epoch": 0.10404415743713698,
+      "grad_norm": 0.08979323506355286,
+      "learning_rate": 0.000195852215326887,
+      "loss": 0.1669,
+      "step": 1442
+    },
+    {
+      "epoch": 0.10411631011219741,
+      "grad_norm": 0.08331722021102905,
+      "learning_rate": 0.00019584932890749025,
+      "loss": 0.1573,
+      "step": 1443
+    },
+    {
+      "epoch": 0.10418846278725784,
+      "grad_norm": 0.09525292366743088,
+      "learning_rate": 0.00019584644248809354,
+      "loss": 0.1159,
+      "step": 1444
+    },
+    {
+      "epoch": 0.10426061546231827,
+      "grad_norm": 0.09168519824743271,
+      "learning_rate": 0.0001958435560686968,
+      "loss": 0.1735,
+      "step": 1445
+    },
+    {
+      "epoch": 0.1043327681373787,
+      "grad_norm": 0.1023101806640625,
+      "learning_rate": 0.00019584066964930006,
+      "loss": 0.1785,
+      "step": 1446
+    },
+    {
+      "epoch": 0.10440492081243911,
+      "grad_norm": 0.10439193993806839,
+      "learning_rate": 0.00019583778322990333,
+      "loss": 0.1829,
+      "step": 1447
+    },
+    {
+      "epoch": 0.10447707348749954,
+      "grad_norm": 0.13283102214336395,
+      "learning_rate": 0.00019583489681050656,
+      "loss": 0.2259,
+      "step": 1448
+    },
+    {
+      "epoch": 0.10454922616255997,
+      "grad_norm": 0.11387287080287933,
+      "learning_rate": 0.00019583201039110983,
+      "loss": 0.1728,
+      "step": 1449
+    },
+    {
+      "epoch": 0.1046213788376204,
+      "grad_norm": 0.10596080124378204,
+      "learning_rate": 0.0001958291239717131,
+      "loss": 0.1797,
+      "step": 1450
+    },
+    {
+      "epoch": 0.10469353151268083,
+      "grad_norm": 0.10325146466493607,
+      "learning_rate": 0.00019582623755231638,
+      "loss": 0.1658,
+      "step": 1451
+    },
+    {
+      "epoch": 0.10476568418774126,
+      "grad_norm": 0.10266296565532684,
+      "learning_rate": 0.00019582335113291964,
+      "loss": 0.1042,
+      "step": 1452
+    },
+    {
+      "epoch": 0.1048378368628017,
+      "grad_norm": 0.08678191900253296,
+      "learning_rate": 0.00019582046471352288,
+      "loss": 0.1679,
+      "step": 1453
+    },
+    {
+      "epoch": 0.10490998953786211,
+      "grad_norm": 0.07487280666828156,
+      "learning_rate": 0.00019581757829412614,
+      "loss": 0.1272,
+      "step": 1454
+    },
+    {
+      "epoch": 0.10498214221292254,
+      "grad_norm": 0.09336668252944946,
+      "learning_rate": 0.0001958146918747294,
+      "loss": 0.1733,
+      "step": 1455
+    },
+    {
+      "epoch": 0.10505429488798297,
+      "grad_norm": 0.10310094803571701,
+      "learning_rate": 0.00019581180545533267,
+      "loss": 0.2009,
+      "step": 1456
+    },
+    {
+      "epoch": 0.1051264475630434,
+      "grad_norm": 0.13999375700950623,
+      "learning_rate": 0.00019580891903593593,
+      "loss": 0.1613,
+      "step": 1457
+    },
+    {
+      "epoch": 0.10519860023810383,
+      "grad_norm": 0.1124359667301178,
+      "learning_rate": 0.0001958060326165392,
+      "loss": 0.1823,
+      "step": 1458
+    },
+    {
+      "epoch": 0.10527075291316426,
+      "grad_norm": 0.11623510718345642,
+      "learning_rate": 0.00019580314619714245,
+      "loss": 0.1867,
+      "step": 1459
+    },
+    {
+      "epoch": 0.10534290558822468,
+      "grad_norm": 0.11208704859018326,
+      "learning_rate": 0.00019580025977774572,
+      "loss": 0.1965,
+      "step": 1460
+    },
+    {
+      "epoch": 0.10541505826328511,
+      "grad_norm": 0.10755287855863571,
+      "learning_rate": 0.00019579737335834898,
+      "loss": 0.189,
+      "step": 1461
+    },
+    {
+      "epoch": 0.10548721093834554,
+      "grad_norm": 0.09834279119968414,
+      "learning_rate": 0.00019579448693895224,
+      "loss": 0.1299,
+      "step": 1462
+    },
+    {
+      "epoch": 0.10555936361340597,
+      "grad_norm": 0.09488647431135178,
+      "learning_rate": 0.0001957916005195555,
+      "loss": 0.1659,
+      "step": 1463
+    },
+    {
+      "epoch": 0.1056315162884664,
+      "grad_norm": 0.1207553967833519,
+      "learning_rate": 0.00019578871410015874,
+      "loss": 0.1402,
+      "step": 1464
+    },
+    {
+      "epoch": 0.10570366896352683,
+      "grad_norm": 0.11781546473503113,
+      "learning_rate": 0.00019578582768076203,
+      "loss": 0.1758,
+      "step": 1465
+    },
+    {
+      "epoch": 0.10577582163858724,
+      "grad_norm": 0.11281391978263855,
+      "learning_rate": 0.0001957829412613653,
+      "loss": 0.1741,
+      "step": 1466
+    },
+    {
+      "epoch": 0.10584797431364767,
+      "grad_norm": 0.1315668672323227,
+      "learning_rate": 0.00019578005484196856,
+      "loss": 0.1777,
+      "step": 1467
+    },
+    {
+      "epoch": 0.1059201269887081,
+      "grad_norm": 0.1111454963684082,
+      "learning_rate": 0.00019577716842257182,
+      "loss": 0.1368,
+      "step": 1468
+    },
+    {
+      "epoch": 0.10599227966376853,
+      "grad_norm": 0.11257988959550858,
+      "learning_rate": 0.00019577428200317506,
+      "loss": 0.1658,
+      "step": 1469
+    },
+    {
+      "epoch": 0.10606443233882897,
+      "grad_norm": 0.08634955435991287,
+      "learning_rate": 0.00019577139558377832,
+      "loss": 0.1979,
+      "step": 1470
+    },
+    {
+      "epoch": 0.1061365850138894,
+      "grad_norm": 0.08781882375478745,
+      "learning_rate": 0.00019576850916438158,
+      "loss": 0.1314,
+      "step": 1471
+    },
+    {
+      "epoch": 0.10620873768894981,
+      "grad_norm": 0.08333154767751694,
+      "learning_rate": 0.00019576562274498487,
+      "loss": 0.1581,
+      "step": 1472
+    },
+    {
+      "epoch": 0.10628089036401024,
+      "grad_norm": 0.10107365995645523,
+      "learning_rate": 0.00019576273632558814,
+      "loss": 0.1643,
+      "step": 1473
+    },
+    {
+      "epoch": 0.10635304303907067,
+      "grad_norm": 0.08048343658447266,
+      "learning_rate": 0.00019575984990619137,
+      "loss": 0.1469,
+      "step": 1474
+    },
+    {
+      "epoch": 0.1064251957141311,
+      "grad_norm": 0.08940555900335312,
+      "learning_rate": 0.00019575696348679463,
+      "loss": 0.2077,
+      "step": 1475
+    },
+    {
+      "epoch": 0.10649734838919153,
+      "grad_norm": 0.08923406898975372,
+      "learning_rate": 0.0001957540770673979,
+      "loss": 0.1601,
+      "step": 1476
+    },
+    {
+      "epoch": 0.10656950106425196,
+      "grad_norm": 0.07388593256473541,
+      "learning_rate": 0.00019575119064800116,
+      "loss": 0.1766,
+      "step": 1477
+    },
+    {
+      "epoch": 0.10664165373931238,
+      "grad_norm": 0.1094571202993393,
+      "learning_rate": 0.00019574830422860442,
+      "loss": 0.2324,
+      "step": 1478
+    },
+    {
+      "epoch": 0.10671380641437281,
+      "grad_norm": 0.10609541088342667,
+      "learning_rate": 0.00019574541780920769,
+      "loss": 0.1805,
+      "step": 1479
+    },
+    {
+      "epoch": 0.10678595908943324,
+      "grad_norm": 0.08510361611843109,
+      "learning_rate": 0.00019574253138981095,
+      "loss": 0.1686,
+      "step": 1480
+    },
+    {
+      "epoch": 0.10685811176449367,
+      "grad_norm": 0.11509072035551071,
+      "learning_rate": 0.0001957396449704142,
+      "loss": 0.1218,
+      "step": 1481
+    },
+    {
+      "epoch": 0.1069302644395541,
+      "grad_norm": 0.10149054229259491,
+      "learning_rate": 0.00019573675855101747,
+      "loss": 0.175,
+      "step": 1482
+    },
+    {
+      "epoch": 0.10700241711461453,
+      "grad_norm": 0.09114514291286469,
+      "learning_rate": 0.00019573387213162074,
+      "loss": 0.1839,
+      "step": 1483
+    },
+    {
+      "epoch": 0.10707456978967496,
+      "grad_norm": 0.08684788644313812,
+      "learning_rate": 0.000195730985712224,
+      "loss": 0.2015,
+      "step": 1484
+    },
+    {
+      "epoch": 0.10714672246473537,
+      "grad_norm": 0.09528748691082001,
+      "learning_rate": 0.00019572809929282724,
+      "loss": 0.2082,
+      "step": 1485
+    },
+    {
+      "epoch": 0.1072188751397958,
+      "grad_norm": 0.11334405094385147,
+      "learning_rate": 0.0001957252128734305,
+      "loss": 0.1956,
+      "step": 1486
+    },
+    {
+      "epoch": 0.10729102781485624,
+      "grad_norm": 0.1072673425078392,
+      "learning_rate": 0.0001957223264540338,
+      "loss": 0.1729,
+      "step": 1487
+    },
+    {
+      "epoch": 0.10736318048991667,
+      "grad_norm": 0.08538205176591873,
+      "learning_rate": 0.00019571944003463705,
+      "loss": 0.2203,
+      "step": 1488
+    },
+    {
+      "epoch": 0.1074353331649771,
+      "grad_norm": 0.11392519623041153,
+      "learning_rate": 0.00019571655361524031,
+      "loss": 0.1691,
+      "step": 1489
+    },
+    {
+      "epoch": 0.10750748584003753,
+      "grad_norm": 0.10131382197141647,
+      "learning_rate": 0.00019571366719584355,
+      "loss": 0.1073,
+      "step": 1490
+    },
+    {
+      "epoch": 0.10757963851509794,
+      "grad_norm": 0.09362057596445084,
+      "learning_rate": 0.00019571078077644681,
+      "loss": 0.1805,
+      "step": 1491
+    },
+    {
+      "epoch": 0.10765179119015837,
+      "grad_norm": 0.11688234657049179,
+      "learning_rate": 0.00019570789435705008,
+      "loss": 0.1806,
+      "step": 1492
+    },
+    {
+      "epoch": 0.1077239438652188,
+      "grad_norm": 0.09200257807970047,
+      "learning_rate": 0.00019570500793765334,
+      "loss": 0.2012,
+      "step": 1493
+    },
+    {
+      "epoch": 0.10779609654027923,
+      "grad_norm": 0.08040358126163483,
+      "learning_rate": 0.00019570212151825663,
+      "loss": 0.1734,
+      "step": 1494
+    },
+    {
+      "epoch": 0.10786824921533966,
+      "grad_norm": 0.09139800816774368,
+      "learning_rate": 0.00019569923509885987,
+      "loss": 0.1674,
+      "step": 1495
+    },
+    {
+      "epoch": 0.10794040189040009,
+      "grad_norm": 0.0903816744685173,
+      "learning_rate": 0.00019569634867946313,
+      "loss": 0.1911,
+      "step": 1496
+    },
+    {
+      "epoch": 0.10801255456546051,
+      "grad_norm": 0.10706637799739838,
+      "learning_rate": 0.0001956934622600664,
+      "loss": 0.177,
+      "step": 1497
+    },
+    {
+      "epoch": 0.10808470724052094,
+      "grad_norm": 0.12878459692001343,
+      "learning_rate": 0.00019569057584066965,
+      "loss": 0.1312,
+      "step": 1498
+    },
+    {
+      "epoch": 0.10815685991558137,
+      "grad_norm": 0.09276431053876877,
+      "learning_rate": 0.00019568768942127292,
+      "loss": 0.169,
+      "step": 1499
+    },
+    {
+      "epoch": 0.1082290125906418,
+      "grad_norm": 0.12874440848827362,
+      "learning_rate": 0.00019568480300187618,
+      "loss": 0.1653,
+      "step": 1500
+    },
+    {
+      "epoch": 0.10830116526570223,
+      "grad_norm": 0.09412311762571335,
+      "learning_rate": 0.00019568191658247944,
+      "loss": 0.1277,
+      "step": 1501
+    },
+    {
+      "epoch": 0.10837331794076266,
+      "grad_norm": 0.11970049142837524,
+      "learning_rate": 0.0001956790301630827,
+      "loss": 0.1501,
+      "step": 1502
+    },
+    {
+      "epoch": 0.10844547061582308,
+      "grad_norm": 0.10821323841810226,
+      "learning_rate": 0.00019567614374368597,
+      "loss": 0.1541,
+      "step": 1503
+    },
+    {
+      "epoch": 0.1085176232908835,
+      "grad_norm": 0.08639746904373169,
+      "learning_rate": 0.00019567325732428923,
+      "loss": 0.1369,
+      "step": 1504
+    },
+    {
+      "epoch": 0.10858977596594394,
+      "grad_norm": 0.12217281758785248,
+      "learning_rate": 0.0001956703709048925,
+      "loss": 0.1869,
+      "step": 1505
+    },
+    {
+      "epoch": 0.10866192864100437,
+      "grad_norm": 0.0951509103178978,
+      "learning_rate": 0.00019566748448549576,
+      "loss": 0.16,
+      "step": 1506
+    },
+    {
+      "epoch": 0.1087340813160648,
+      "grad_norm": 0.07016367465257645,
+      "learning_rate": 0.000195664598066099,
+      "loss": 0.1543,
+      "step": 1507
+    },
+    {
+      "epoch": 0.10880623399112523,
+      "grad_norm": 0.0987938717007637,
+      "learning_rate": 0.00019566171164670228,
+      "loss": 0.1773,
+      "step": 1508
+    },
+    {
+      "epoch": 0.10887838666618564,
+      "grad_norm": 0.10179755836725235,
+      "learning_rate": 0.00019565882522730555,
+      "loss": 0.1666,
+      "step": 1509
+    },
+    {
+      "epoch": 0.10895053934124607,
+      "grad_norm": 0.0916648656129837,
+      "learning_rate": 0.0001956559388079088,
+      "loss": 0.1513,
+      "step": 1510
+    },
+    {
+      "epoch": 0.1090226920163065,
+      "grad_norm": 0.11163599789142609,
+      "learning_rate": 0.00019565305238851207,
+      "loss": 0.1825,
+      "step": 1511
+    },
+    {
+      "epoch": 0.10909484469136693,
+      "grad_norm": 0.11444026231765747,
+      "learning_rate": 0.0001956501659691153,
+      "loss": 0.2145,
+      "step": 1512
+    },
+    {
+      "epoch": 0.10916699736642736,
+      "grad_norm": 0.09019803255796432,
+      "learning_rate": 0.00019564727954971857,
+      "loss": 0.2128,
+      "step": 1513
+    },
+    {
+      "epoch": 0.10923915004148779,
+      "grad_norm": 0.08074397593736649,
+      "learning_rate": 0.00019564439313032183,
+      "loss": 0.1521,
+      "step": 1514
+    },
+    {
+      "epoch": 0.10931130271654822,
+      "grad_norm": 0.09058935195207596,
+      "learning_rate": 0.00019564150671092512,
+      "loss": 0.1599,
+      "step": 1515
+    },
+    {
+      "epoch": 0.10938345539160864,
+      "grad_norm": 0.0978584885597229,
+      "learning_rate": 0.0001956386202915284,
+      "loss": 0.2016,
+      "step": 1516
+    },
+    {
+      "epoch": 0.10945560806666907,
+      "grad_norm": 0.0993848517537117,
+      "learning_rate": 0.00019563573387213162,
+      "loss": 0.145,
+      "step": 1517
+    },
+    {
+      "epoch": 0.1095277607417295,
+      "grad_norm": 0.09804774075746536,
+      "learning_rate": 0.00019563284745273489,
+      "loss": 0.1784,
+      "step": 1518
+    },
+    {
+      "epoch": 0.10959991341678993,
+      "grad_norm": 0.10656802356243134,
+      "learning_rate": 0.00019562996103333815,
+      "loss": 0.177,
+      "step": 1519
+    },
+    {
+      "epoch": 0.10967206609185036,
+      "grad_norm": 0.11479037255048752,
+      "learning_rate": 0.0001956270746139414,
+      "loss": 0.1352,
+      "step": 1520
+    },
+    {
+      "epoch": 0.10974421876691079,
+      "grad_norm": 0.09832447022199631,
+      "learning_rate": 0.00019562418819454467,
+      "loss": 0.1955,
+      "step": 1521
+    },
+    {
+      "epoch": 0.1098163714419712,
+      "grad_norm": 0.07826386392116547,
+      "learning_rate": 0.00019562130177514794,
+      "loss": 0.1794,
+      "step": 1522
+    },
+    {
+      "epoch": 0.10988852411703164,
+      "grad_norm": 0.10118061304092407,
+      "learning_rate": 0.0001956184153557512,
+      "loss": 0.1235,
+      "step": 1523
+    },
+    {
+      "epoch": 0.10996067679209207,
+      "grad_norm": 0.08155392110347748,
+      "learning_rate": 0.00019561552893635446,
+      "loss": 0.1757,
+      "step": 1524
+    },
+    {
+      "epoch": 0.1100328294671525,
+      "grad_norm": 0.10516910254955292,
+      "learning_rate": 0.00019561264251695773,
+      "loss": 0.1905,
+      "step": 1525
+    },
+    {
+      "epoch": 0.11010498214221293,
+      "grad_norm": 0.09425098448991776,
+      "learning_rate": 0.000195609756097561,
+      "loss": 0.1523,
+      "step": 1526
+    },
+    {
+      "epoch": 0.11017713481727336,
+      "grad_norm": 0.08117407560348511,
+      "learning_rate": 0.00019560686967816425,
+      "loss": 0.1487,
+      "step": 1527
+    },
+    {
+      "epoch": 0.11024928749233377,
+      "grad_norm": 0.07771728187799454,
+      "learning_rate": 0.0001956039832587675,
+      "loss": 0.1793,
+      "step": 1528
+    },
+    {
+      "epoch": 0.1103214401673942,
+      "grad_norm": 0.10780569165945053,
+      "learning_rate": 0.00019560109683937078,
+      "loss": 0.2023,
+      "step": 1529
+    },
+    {
+      "epoch": 0.11039359284245463,
+      "grad_norm": 0.08715084940195084,
+      "learning_rate": 0.00019559821041997404,
+      "loss": 0.1709,
+      "step": 1530
+    },
+    {
+      "epoch": 0.11046574551751506,
+      "grad_norm": 0.09841153770685196,
+      "learning_rate": 0.0001955953240005773,
+      "loss": 0.1775,
+      "step": 1531
+    },
+    {
+      "epoch": 0.11053789819257549,
+      "grad_norm": 0.08543652296066284,
+      "learning_rate": 0.00019559243758118057,
+      "loss": 0.1415,
+      "step": 1532
+    },
+    {
+      "epoch": 0.11061005086763592,
+      "grad_norm": 0.11666995286941528,
+      "learning_rate": 0.0001955895511617838,
+      "loss": 0.168,
+      "step": 1533
+    },
+    {
+      "epoch": 0.11068220354269634,
+      "grad_norm": 0.1458568274974823,
+      "learning_rate": 0.00019558666474238707,
+      "loss": 0.1881,
+      "step": 1534
+    },
+    {
+      "epoch": 0.11075435621775677,
+      "grad_norm": 0.08567783981561661,
+      "learning_rate": 0.00019558377832299033,
+      "loss": 0.1388,
+      "step": 1535
+    },
+    {
+      "epoch": 0.1108265088928172,
+      "grad_norm": 0.09290947765111923,
+      "learning_rate": 0.00019558089190359362,
+      "loss": 0.1727,
+      "step": 1536
+    },
+    {
+      "epoch": 0.11089866156787763,
+      "grad_norm": 0.09171988070011139,
+      "learning_rate": 0.00019557800548419688,
+      "loss": 0.1762,
+      "step": 1537
+    },
+    {
+      "epoch": 0.11097081424293806,
+      "grad_norm": 0.1283859759569168,
+      "learning_rate": 0.00019557511906480012,
+      "loss": 0.1589,
+      "step": 1538
+    },
+    {
+      "epoch": 0.11104296691799849,
+      "grad_norm": 0.10872713476419449,
+      "learning_rate": 0.00019557223264540338,
+      "loss": 0.1557,
+      "step": 1539
+    },
+    {
+      "epoch": 0.1111151195930589,
+      "grad_norm": 0.10511759668588638,
+      "learning_rate": 0.00019556934622600664,
+      "loss": 0.1625,
+      "step": 1540
+    },
+    {
+      "epoch": 0.11118727226811934,
+      "grad_norm": 0.13609780371189117,
+      "learning_rate": 0.0001955664598066099,
+      "loss": 0.2419,
+      "step": 1541
+    },
+    {
+      "epoch": 0.11125942494317977,
+      "grad_norm": 0.09898494929075241,
+      "learning_rate": 0.00019556357338721317,
+      "loss": 0.1473,
+      "step": 1542
+    },
+    {
+      "epoch": 0.1113315776182402,
+      "grad_norm": 0.09187084436416626,
+      "learning_rate": 0.00019556068696781643,
+      "loss": 0.158,
+      "step": 1543
+    },
+    {
+      "epoch": 0.11140373029330063,
+      "grad_norm": 0.08768919110298157,
+      "learning_rate": 0.0001955578005484197,
+      "loss": 0.1775,
+      "step": 1544
+    },
+    {
+      "epoch": 0.11147588296836106,
+      "grad_norm": 0.10175874084234238,
+      "learning_rate": 0.00019555491412902296,
+      "loss": 0.1393,
+      "step": 1545
+    },
+    {
+      "epoch": 0.11154803564342149,
+      "grad_norm": 0.07539024949073792,
+      "learning_rate": 0.00019555202770962622,
+      "loss": 0.1957,
+      "step": 1546
+    },
+    {
+      "epoch": 0.1116201883184819,
+      "grad_norm": 0.11006683856248856,
+      "learning_rate": 0.00019554914129022948,
+      "loss": 0.1453,
+      "step": 1547
+    },
+    {
+      "epoch": 0.11169234099354233,
+      "grad_norm": 0.1028423011302948,
+      "learning_rate": 0.00019554625487083275,
+      "loss": 0.2072,
+      "step": 1548
+    },
+    {
+      "epoch": 0.11176449366860276,
+      "grad_norm": 0.09654255211353302,
+      "learning_rate": 0.00019554336845143598,
+      "loss": 0.1575,
+      "step": 1549
+    },
+    {
+      "epoch": 0.11183664634366319,
+      "grad_norm": 0.09816180914640427,
+      "learning_rate": 0.00019554048203203927,
+      "loss": 0.1517,
+      "step": 1550
+    },
+    {
+      "epoch": 0.11190879901872362,
+      "grad_norm": 0.08485947549343109,
+      "learning_rate": 0.00019553759561264253,
+      "loss": 0.1849,
+      "step": 1551
+    },
+    {
+      "epoch": 0.11198095169378405,
+      "grad_norm": 0.09766913950443268,
+      "learning_rate": 0.0001955347091932458,
+      "loss": 0.1676,
+      "step": 1552
+    },
+    {
+      "epoch": 0.11205310436884447,
+      "grad_norm": 0.11628931015729904,
+      "learning_rate": 0.00019553182277384906,
+      "loss": 0.1864,
+      "step": 1553
+    },
+    {
+      "epoch": 0.1121252570439049,
+      "grad_norm": 0.10539587587118149,
+      "learning_rate": 0.0001955289363544523,
+      "loss": 0.1292,
+      "step": 1554
+    },
+    {
+      "epoch": 0.11219740971896533,
+      "grad_norm": 0.1077449768781662,
+      "learning_rate": 0.00019552604993505556,
+      "loss": 0.1268,
+      "step": 1555
+    },
+    {
+      "epoch": 0.11226956239402576,
+      "grad_norm": 0.11250253021717072,
+      "learning_rate": 0.00019552316351565882,
+      "loss": 0.1739,
+      "step": 1556
+    },
+    {
+      "epoch": 0.11234171506908619,
+      "grad_norm": 0.0985347256064415,
+      "learning_rate": 0.0001955202770962621,
+      "loss": 0.1718,
+      "step": 1557
+    },
+    {
+      "epoch": 0.11241386774414662,
+      "grad_norm": 0.10934750735759735,
+      "learning_rate": 0.00019551739067686538,
+      "loss": 0.1863,
+      "step": 1558
+    },
+    {
+      "epoch": 0.11248602041920704,
+      "grad_norm": 0.10609246790409088,
+      "learning_rate": 0.0001955145042574686,
+      "loss": 0.1707,
+      "step": 1559
+    },
+    {
+      "epoch": 0.11255817309426747,
+      "grad_norm": 0.10776393860578537,
+      "learning_rate": 0.00019551161783807187,
+      "loss": 0.181,
+      "step": 1560
+    },
+    {
+      "epoch": 0.1126303257693279,
+      "grad_norm": 0.1034221202135086,
+      "learning_rate": 0.00019550873141867514,
+      "loss": 0.1665,
+      "step": 1561
+    },
+    {
+      "epoch": 0.11270247844438833,
+      "grad_norm": 0.11250422894954681,
+      "learning_rate": 0.0001955058449992784,
+      "loss": 0.1729,
+      "step": 1562
+    },
+    {
+      "epoch": 0.11277463111944876,
+      "grad_norm": 0.10397494584321976,
+      "learning_rate": 0.00019550295857988166,
+      "loss": 0.1848,
+      "step": 1563
+    },
+    {
+      "epoch": 0.11284678379450919,
+      "grad_norm": 0.11455897241830826,
+      "learning_rate": 0.00019550007216048493,
+      "loss": 0.1534,
+      "step": 1564
+    },
+    {
+      "epoch": 0.1129189364695696,
+      "grad_norm": 0.07431942969560623,
+      "learning_rate": 0.0001954971857410882,
+      "loss": 0.1523,
+      "step": 1565
+    },
+    {
+      "epoch": 0.11299108914463003,
+      "grad_norm": 0.11153899133205414,
+      "learning_rate": 0.00019549429932169145,
+      "loss": 0.1257,
+      "step": 1566
+    },
+    {
+      "epoch": 0.11306324181969046,
+      "grad_norm": 0.1270129382610321,
+      "learning_rate": 0.00019549141290229471,
+      "loss": 0.1419,
+      "step": 1567
+    },
+    {
+      "epoch": 0.1131353944947509,
+      "grad_norm": 0.11523669213056564,
+      "learning_rate": 0.00019548852648289798,
+      "loss": 0.1652,
+      "step": 1568
+    },
+    {
+      "epoch": 0.11320754716981132,
+      "grad_norm": 0.11013611406087875,
+      "learning_rate": 0.00019548564006350124,
+      "loss": 0.1768,
+      "step": 1569
+    },
+    {
+      "epoch": 0.11327969984487175,
+      "grad_norm": 0.12150274962186813,
+      "learning_rate": 0.00019548275364410448,
+      "loss": 0.184,
+      "step": 1570
+    },
+    {
+      "epoch": 0.11335185251993217,
+      "grad_norm": 0.09226622432470322,
+      "learning_rate": 0.00019547986722470777,
+      "loss": 0.2027,
+      "step": 1571
+    },
+    {
+      "epoch": 0.1134240051949926,
+      "grad_norm": 0.1071082353591919,
+      "learning_rate": 0.00019547698080531103,
+      "loss": 0.1827,
+      "step": 1572
+    },
+    {
+      "epoch": 0.11349615787005303,
+      "grad_norm": 0.10402612388134003,
+      "learning_rate": 0.0001954740943859143,
+      "loss": 0.1321,
+      "step": 1573
+    },
+    {
+      "epoch": 0.11356831054511346,
+      "grad_norm": 0.09049825370311737,
+      "learning_rate": 0.00019547120796651755,
+      "loss": 0.1583,
+      "step": 1574
+    },
+    {
+      "epoch": 0.11364046322017389,
+      "grad_norm": 0.10656756907701492,
+      "learning_rate": 0.0001954683215471208,
+      "loss": 0.1589,
+      "step": 1575
+    },
+    {
+      "epoch": 0.11371261589523432,
+      "grad_norm": 0.0985848605632782,
+      "learning_rate": 0.00019546543512772405,
+      "loss": 0.1589,
+      "step": 1576
+    },
+    {
+      "epoch": 0.11378476857029475,
+      "grad_norm": 0.11201207339763641,
+      "learning_rate": 0.00019546254870832732,
+      "loss": 0.1625,
+      "step": 1577
+    },
+    {
+      "epoch": 0.11385692124535517,
+      "grad_norm": 0.12026341259479523,
+      "learning_rate": 0.0001954596622889306,
+      "loss": 0.1625,
+      "step": 1578
+    },
+    {
+      "epoch": 0.1139290739204156,
+      "grad_norm": 0.12375893443822861,
+      "learning_rate": 0.00019545677586953387,
+      "loss": 0.1375,
+      "step": 1579
+    },
+    {
+      "epoch": 0.11400122659547603,
+      "grad_norm": 0.10440582782030106,
+      "learning_rate": 0.0001954538894501371,
+      "loss": 0.1857,
+      "step": 1580
+    },
+    {
+      "epoch": 0.11407337927053646,
+      "grad_norm": 0.1234811544418335,
+      "learning_rate": 0.00019545100303074037,
+      "loss": 0.2006,
+      "step": 1581
+    },
+    {
+      "epoch": 0.11414553194559689,
+      "grad_norm": 0.07862017303705215,
+      "learning_rate": 0.00019544811661134363,
+      "loss": 0.1567,
+      "step": 1582
+    },
+    {
+      "epoch": 0.11421768462065732,
+      "grad_norm": 0.10096339136362076,
+      "learning_rate": 0.0001954452301919469,
+      "loss": 0.1572,
+      "step": 1583
+    },
+    {
+      "epoch": 0.11428983729571773,
+      "grad_norm": 0.08745244890451431,
+      "learning_rate": 0.00019544234377255016,
+      "loss": 0.1643,
+      "step": 1584
+    },
+    {
+      "epoch": 0.11436198997077816,
+      "grad_norm": 0.11481693387031555,
+      "learning_rate": 0.00019543945735315342,
+      "loss": 0.1985,
+      "step": 1585
+    },
+    {
+      "epoch": 0.1144341426458386,
+      "grad_norm": 0.09870748966932297,
+      "learning_rate": 0.00019543657093375668,
+      "loss": 0.2008,
+      "step": 1586
+    },
+    {
+      "epoch": 0.11450629532089902,
+      "grad_norm": 0.07589593529701233,
+      "learning_rate": 0.00019543368451435995,
+      "loss": 0.1787,
+      "step": 1587
+    },
+    {
+      "epoch": 0.11457844799595945,
+      "grad_norm": 0.08917496353387833,
+      "learning_rate": 0.0001954307980949632,
+      "loss": 0.1816,
+      "step": 1588
+    },
+    {
+      "epoch": 0.11465060067101988,
+      "grad_norm": 0.105776846408844,
+      "learning_rate": 0.00019542791167556647,
+      "loss": 0.1531,
+      "step": 1589
+    },
+    {
+      "epoch": 0.1147227533460803,
+      "grad_norm": 0.10567320138216019,
+      "learning_rate": 0.00019542502525616973,
+      "loss": 0.1735,
+      "step": 1590
+    },
+    {
+      "epoch": 0.11479490602114073,
+      "grad_norm": 0.12525592744350433,
+      "learning_rate": 0.00019542213883677297,
+      "loss": 0.2097,
+      "step": 1591
+    },
+    {
+      "epoch": 0.11486705869620116,
+      "grad_norm": 0.07959985733032227,
+      "learning_rate": 0.00019541925241737626,
+      "loss": 0.1516,
+      "step": 1592
+    },
+    {
+      "epoch": 0.11493921137126159,
+      "grad_norm": 0.08625461161136627,
+      "learning_rate": 0.00019541636599797952,
+      "loss": 0.1585,
+      "step": 1593
+    },
+    {
+      "epoch": 0.11501136404632202,
+      "grad_norm": 0.13847310841083527,
+      "learning_rate": 0.00019541347957858279,
+      "loss": 0.1463,
+      "step": 1594
+    },
+    {
+      "epoch": 0.11508351672138245,
+      "grad_norm": 0.10553795844316483,
+      "learning_rate": 0.00019541059315918605,
+      "loss": 0.1763,
+      "step": 1595
+    },
+    {
+      "epoch": 0.11515566939644287,
+      "grad_norm": 0.12277592718601227,
+      "learning_rate": 0.00019540770673978928,
+      "loss": 0.1423,
+      "step": 1596
+    },
+    {
+      "epoch": 0.1152278220715033,
+      "grad_norm": 0.11685627698898315,
+      "learning_rate": 0.00019540482032039255,
+      "loss": 0.1841,
+      "step": 1597
+    },
+    {
+      "epoch": 0.11529997474656373,
+      "grad_norm": 0.08955440670251846,
+      "learning_rate": 0.0001954019339009958,
+      "loss": 0.1408,
+      "step": 1598
+    },
+    {
+      "epoch": 0.11537212742162416,
+      "grad_norm": 0.10267322510480881,
+      "learning_rate": 0.0001953990474815991,
+      "loss": 0.1594,
+      "step": 1599
+    },
+    {
+      "epoch": 0.11544428009668459,
+      "grad_norm": 0.10553912818431854,
+      "learning_rate": 0.00019539616106220236,
+      "loss": 0.1533,
+      "step": 1600
+    },
+    {
+      "epoch": 0.11551643277174502,
+      "grad_norm": 0.11601997911930084,
+      "learning_rate": 0.0001953932746428056,
+      "loss": 0.1846,
+      "step": 1601
+    },
+    {
+      "epoch": 0.11558858544680543,
+      "grad_norm": 0.0766264870762825,
+      "learning_rate": 0.00019539038822340886,
+      "loss": 0.1782,
+      "step": 1602
+    },
+    {
+      "epoch": 0.11566073812186586,
+      "grad_norm": 0.09016980975866318,
+      "learning_rate": 0.00019538750180401213,
+      "loss": 0.1318,
+      "step": 1603
+    },
+    {
+      "epoch": 0.1157328907969263,
+      "grad_norm": 0.11082588136196136,
+      "learning_rate": 0.0001953846153846154,
+      "loss": 0.1902,
+      "step": 1604
+    },
+    {
+      "epoch": 0.11580504347198672,
+      "grad_norm": 0.11670586466789246,
+      "learning_rate": 0.00019538172896521865,
+      "loss": 0.1491,
+      "step": 1605
+    },
+    {
+      "epoch": 0.11587719614704715,
+      "grad_norm": 0.10419308394193649,
+      "learning_rate": 0.00019537884254582191,
+      "loss": 0.1363,
+      "step": 1606
+    },
+    {
+      "epoch": 0.11594934882210758,
+      "grad_norm": 0.08640503883361816,
+      "learning_rate": 0.00019537595612642518,
+      "loss": 0.1925,
+      "step": 1607
+    },
+    {
+      "epoch": 0.11602150149716801,
+      "grad_norm": 0.08510327339172363,
+      "learning_rate": 0.00019537306970702844,
+      "loss": 0.1509,
+      "step": 1608
+    },
+    {
+      "epoch": 0.11609365417222843,
+      "grad_norm": 0.08421725779771805,
+      "learning_rate": 0.0001953701832876317,
+      "loss": 0.1933,
+      "step": 1609
+    },
+    {
+      "epoch": 0.11616580684728886,
+      "grad_norm": 0.13186711072921753,
+      "learning_rate": 0.00019536729686823497,
+      "loss": 0.1862,
+      "step": 1610
+    },
+    {
+      "epoch": 0.11623795952234929,
+      "grad_norm": 0.10276197642087936,
+      "learning_rate": 0.00019536441044883823,
+      "loss": 0.1773,
+      "step": 1611
+    },
+    {
+      "epoch": 0.11631011219740972,
+      "grad_norm": 0.09136662632226944,
+      "learning_rate": 0.0001953615240294415,
+      "loss": 0.1462,
+      "step": 1612
+    },
+    {
+      "epoch": 0.11638226487247015,
+      "grad_norm": 0.10753851383924484,
+      "learning_rate": 0.00019535863761004475,
+      "loss": 0.1734,
+      "step": 1613
+    },
+    {
+      "epoch": 0.11645441754753058,
+      "grad_norm": 0.08567678928375244,
+      "learning_rate": 0.00019535575119064802,
+      "loss": 0.1715,
+      "step": 1614
+    },
+    {
+      "epoch": 0.116526570222591,
+      "grad_norm": 0.10128544270992279,
+      "learning_rate": 0.00019535286477125128,
+      "loss": 0.1622,
+      "step": 1615
+    },
+    {
+      "epoch": 0.11659872289765143,
+      "grad_norm": 0.09036710858345032,
+      "learning_rate": 0.00019534997835185454,
+      "loss": 0.1923,
+      "step": 1616
+    },
+    {
+      "epoch": 0.11667087557271186,
+      "grad_norm": 0.09953843057155609,
+      "learning_rate": 0.0001953470919324578,
+      "loss": 0.1499,
+      "step": 1617
+    },
+    {
+      "epoch": 0.11674302824777229,
+      "grad_norm": 0.10273440927267075,
+      "learning_rate": 0.00019534420551306104,
+      "loss": 0.1543,
+      "step": 1618
+    },
+    {
+      "epoch": 0.11681518092283272,
+      "grad_norm": 0.1256304830312729,
+      "learning_rate": 0.0001953413190936643,
+      "loss": 0.1577,
+      "step": 1619
+    },
+    {
+      "epoch": 0.11688733359789315,
+      "grad_norm": 0.1289820522069931,
+      "learning_rate": 0.0001953384326742676,
+      "loss": 0.1909,
+      "step": 1620
+    },
+    {
+      "epoch": 0.11695948627295356,
+      "grad_norm": 0.11304235458374023,
+      "learning_rate": 0.00019533554625487086,
+      "loss": 0.1849,
+      "step": 1621
+    },
+    {
+      "epoch": 0.117031638948014,
+      "grad_norm": 0.11579146981239319,
+      "learning_rate": 0.00019533265983547412,
+      "loss": 0.153,
+      "step": 1622
+    },
+    {
+      "epoch": 0.11710379162307442,
+      "grad_norm": 0.0958210751414299,
+      "learning_rate": 0.00019532977341607736,
+      "loss": 0.118,
+      "step": 1623
+    },
+    {
+      "epoch": 0.11717594429813485,
+      "grad_norm": 0.1111057698726654,
+      "learning_rate": 0.00019532688699668062,
+      "loss": 0.1566,
+      "step": 1624
+    },
+    {
+      "epoch": 0.11724809697319528,
+      "grad_norm": 0.12752115726470947,
+      "learning_rate": 0.00019532400057728388,
+      "loss": 0.163,
+      "step": 1625
+    },
+    {
+      "epoch": 0.11732024964825571,
+      "grad_norm": 0.09364619851112366,
+      "learning_rate": 0.00019532111415788715,
+      "loss": 0.1347,
+      "step": 1626
+    },
+    {
+      "epoch": 0.11739240232331613,
+      "grad_norm": 0.10146146267652512,
+      "learning_rate": 0.00019531822773849044,
+      "loss": 0.2043,
+      "step": 1627
+    },
+    {
+      "epoch": 0.11746455499837656,
+      "grad_norm": 0.09532111883163452,
+      "learning_rate": 0.00019531534131909367,
+      "loss": 0.1745,
+      "step": 1628
+    },
+    {
+      "epoch": 0.11753670767343699,
+      "grad_norm": 0.14005210995674133,
+      "learning_rate": 0.00019531245489969693,
+      "loss": 0.1598,
+      "step": 1629
+    },
+    {
+      "epoch": 0.11760886034849742,
+      "grad_norm": 0.08359965682029724,
+      "learning_rate": 0.0001953095684803002,
+      "loss": 0.1939,
+      "step": 1630
+    },
+    {
+      "epoch": 0.11768101302355785,
+      "grad_norm": 0.10559289902448654,
+      "learning_rate": 0.00019530668206090346,
+      "loss": 0.2026,
+      "step": 1631
+    },
+    {
+      "epoch": 0.11775316569861828,
+      "grad_norm": 0.12911143898963928,
+      "learning_rate": 0.00019530379564150672,
+      "loss": 0.1926,
+      "step": 1632
+    },
+    {
+      "epoch": 0.1178253183736787,
+      "grad_norm": 0.10912565886974335,
+      "learning_rate": 0.00019530090922210999,
+      "loss": 0.1911,
+      "step": 1633
+    },
+    {
+      "epoch": 0.11789747104873913,
+      "grad_norm": 0.0978940799832344,
+      "learning_rate": 0.00019529802280271325,
+      "loss": 0.1991,
+      "step": 1634
+    },
+    {
+      "epoch": 0.11796962372379956,
+      "grad_norm": 0.09110507369041443,
+      "learning_rate": 0.0001952951363833165,
+      "loss": 0.1923,
+      "step": 1635
+    },
+    {
+      "epoch": 0.11804177639885999,
+      "grad_norm": 0.1002359539270401,
+      "learning_rate": 0.00019529224996391977,
+      "loss": 0.1648,
+      "step": 1636
+    },
+    {
+      "epoch": 0.11811392907392042,
+      "grad_norm": 0.1239672303199768,
+      "learning_rate": 0.00019528936354452304,
+      "loss": 0.1559,
+      "step": 1637
+    },
+    {
+      "epoch": 0.11818608174898085,
+      "grad_norm": 0.08740098774433136,
+      "learning_rate": 0.0001952864771251263,
+      "loss": 0.1789,
+      "step": 1638
+    },
+    {
+      "epoch": 0.11825823442404128,
+      "grad_norm": 0.10871398448944092,
+      "learning_rate": 0.00019528359070572954,
+      "loss": 0.178,
+      "step": 1639
+    },
+    {
+      "epoch": 0.1183303870991017,
+      "grad_norm": 0.11047673970460892,
+      "learning_rate": 0.0001952807042863328,
+      "loss": 0.2121,
+      "step": 1640
+    },
+    {
+      "epoch": 0.11840253977416212,
+      "grad_norm": 0.10063087940216064,
+      "learning_rate": 0.0001952778178669361,
+      "loss": 0.167,
+      "step": 1641
+    },
+    {
+      "epoch": 0.11847469244922255,
+      "grad_norm": 0.09995663166046143,
+      "learning_rate": 0.00019527493144753935,
+      "loss": 0.1805,
+      "step": 1642
+    },
+    {
+      "epoch": 0.11854684512428298,
+      "grad_norm": 0.10936806350946426,
+      "learning_rate": 0.00019527204502814262,
+      "loss": 0.1924,
+      "step": 1643
+    },
+    {
+      "epoch": 0.11861899779934341,
+      "grad_norm": 0.08394020050764084,
+      "learning_rate": 0.00019526915860874585,
+      "loss": 0.154,
+      "step": 1644
+    },
+    {
+      "epoch": 0.11869115047440384,
+      "grad_norm": 0.11076677590608597,
+      "learning_rate": 0.00019526627218934911,
+      "loss": 0.1793,
+      "step": 1645
+    },
+    {
+      "epoch": 0.11876330314946426,
+      "grad_norm": 0.10604029893875122,
+      "learning_rate": 0.00019526338576995238,
+      "loss": 0.1375,
+      "step": 1646
+    },
+    {
+      "epoch": 0.11883545582452469,
+      "grad_norm": 0.1203065738081932,
+      "learning_rate": 0.00019526049935055564,
+      "loss": 0.2158,
+      "step": 1647
+    },
+    {
+      "epoch": 0.11890760849958512,
+      "grad_norm": 0.09037487953901291,
+      "learning_rate": 0.00019525761293115893,
+      "loss": 0.1883,
+      "step": 1648
+    },
+    {
+      "epoch": 0.11897976117464555,
+      "grad_norm": 0.07776744663715363,
+      "learning_rate": 0.00019525472651176217,
+      "loss": 0.1345,
+      "step": 1649
+    },
+    {
+      "epoch": 0.11905191384970598,
+      "grad_norm": 0.07779388129711151,
+      "learning_rate": 0.00019525184009236543,
+      "loss": 0.1715,
+      "step": 1650
+    },
+    {
+      "epoch": 0.11912406652476641,
+      "grad_norm": 0.08709803223609924,
+      "learning_rate": 0.0001952489536729687,
+      "loss": 0.1528,
+      "step": 1651
+    },
+    {
+      "epoch": 0.11919621919982683,
+      "grad_norm": 0.11145090311765671,
+      "learning_rate": 0.00019524606725357195,
+      "loss": 0.1767,
+      "step": 1652
+    },
+    {
+      "epoch": 0.11926837187488726,
+      "grad_norm": 0.10255642980337143,
+      "learning_rate": 0.00019524318083417522,
+      "loss": 0.1929,
+      "step": 1653
+    },
+    {
+      "epoch": 0.11934052454994769,
+      "grad_norm": 0.09450104832649231,
+      "learning_rate": 0.00019524029441477848,
+      "loss": 0.1896,
+      "step": 1654
+    },
+    {
+      "epoch": 0.11941267722500812,
+      "grad_norm": 0.11130858957767487,
+      "learning_rate": 0.00019523740799538174,
+      "loss": 0.1638,
+      "step": 1655
+    },
+    {
+      "epoch": 0.11948482990006855,
+      "grad_norm": 0.08688686043024063,
+      "learning_rate": 0.000195234521575985,
+      "loss": 0.1483,
+      "step": 1656
+    },
+    {
+      "epoch": 0.11955698257512898,
+      "grad_norm": 0.11365038901567459,
+      "learning_rate": 0.00019523163515658827,
+      "loss": 0.184,
+      "step": 1657
+    },
+    {
+      "epoch": 0.1196291352501894,
+      "grad_norm": 0.10233087092638016,
+      "learning_rate": 0.00019522874873719153,
+      "loss": 0.1884,
+      "step": 1658
+    },
+    {
+      "epoch": 0.11970128792524982,
+      "grad_norm": 0.09503836929798126,
+      "learning_rate": 0.0001952258623177948,
+      "loss": 0.1698,
+      "step": 1659
+    },
+    {
+      "epoch": 0.11977344060031025,
+      "grad_norm": 0.07523205131292343,
+      "learning_rate": 0.00019522297589839803,
+      "loss": 0.1337,
+      "step": 1660
+    },
+    {
+      "epoch": 0.11984559327537068,
+      "grad_norm": 0.08313417434692383,
+      "learning_rate": 0.0001952200894790013,
+      "loss": 0.1379,
+      "step": 1661
+    },
+    {
+      "epoch": 0.11991774595043111,
+      "grad_norm": 0.09840066730976105,
+      "learning_rate": 0.00019521720305960458,
+      "loss": 0.173,
+      "step": 1662
+    },
+    {
+      "epoch": 0.11998989862549155,
+      "grad_norm": 0.12853272259235382,
+      "learning_rate": 0.00019521431664020785,
+      "loss": 0.1646,
+      "step": 1663
+    },
+    {
+      "epoch": 0.12006205130055196,
+      "grad_norm": 0.10483130812644958,
+      "learning_rate": 0.0001952114302208111,
+      "loss": 0.1382,
+      "step": 1664
+    },
+    {
+      "epoch": 0.12013420397561239,
+      "grad_norm": 0.14726237952709198,
+      "learning_rate": 0.00019520854380141435,
+      "loss": 0.225,
+      "step": 1665
+    },
+    {
+      "epoch": 0.12020635665067282,
+      "grad_norm": 0.1127578616142273,
+      "learning_rate": 0.0001952056573820176,
+      "loss": 0.1695,
+      "step": 1666
+    },
+    {
+      "epoch": 0.12027850932573325,
+      "grad_norm": 0.11055461317300797,
+      "learning_rate": 0.00019520277096262087,
+      "loss": 0.1303,
+      "step": 1667
+    },
+    {
+      "epoch": 0.12035066200079368,
+      "grad_norm": 0.11091664433479309,
+      "learning_rate": 0.00019519988454322413,
+      "loss": 0.1941,
+      "step": 1668
+    },
+    {
+      "epoch": 0.12042281467585411,
+      "grad_norm": 0.10414857417345047,
+      "learning_rate": 0.0001951969981238274,
+      "loss": 0.1641,
+      "step": 1669
+    },
+    {
+      "epoch": 0.12049496735091454,
+      "grad_norm": 0.09459809213876724,
+      "learning_rate": 0.00019519411170443066,
+      "loss": 0.1808,
+      "step": 1670
+    },
+    {
+      "epoch": 0.12056712002597496,
+      "grad_norm": 0.09319958090782166,
+      "learning_rate": 0.00019519122528503392,
+      "loss": 0.1927,
+      "step": 1671
+    },
+    {
+      "epoch": 0.12063927270103539,
+      "grad_norm": 0.11097006499767303,
+      "learning_rate": 0.00019518833886563719,
+      "loss": 0.2317,
+      "step": 1672
+    },
+    {
+      "epoch": 0.12071142537609582,
+      "grad_norm": 0.11948166787624359,
+      "learning_rate": 0.00019518545244624045,
+      "loss": 0.1461,
+      "step": 1673
+    },
+    {
+      "epoch": 0.12078357805115625,
+      "grad_norm": 0.14143267273902893,
+      "learning_rate": 0.0001951825660268437,
+      "loss": 0.1854,
+      "step": 1674
+    },
+    {
+      "epoch": 0.12085573072621668,
+      "grad_norm": 0.08225080370903015,
+      "learning_rate": 0.00019517967960744697,
+      "loss": 0.1516,
+      "step": 1675
+    },
+    {
+      "epoch": 0.12092788340127711,
+      "grad_norm": 0.08731880784034729,
+      "learning_rate": 0.0001951767931880502,
+      "loss": 0.1624,
+      "step": 1676
+    },
+    {
+      "epoch": 0.12100003607633752,
+      "grad_norm": 0.11483090370893478,
+      "learning_rate": 0.0001951739067686535,
+      "loss": 0.2156,
+      "step": 1677
+    },
+    {
+      "epoch": 0.12107218875139795,
+      "grad_norm": 0.10485479235649109,
+      "learning_rate": 0.00019517102034925676,
+      "loss": 0.165,
+      "step": 1678
+    },
+    {
+      "epoch": 0.12114434142645839,
+      "grad_norm": 0.11514680087566376,
+      "learning_rate": 0.00019516813392986003,
+      "loss": 0.1726,
+      "step": 1679
+    },
+    {
+      "epoch": 0.12121649410151882,
+      "grad_norm": 0.10860565304756165,
+      "learning_rate": 0.0001951652475104633,
+      "loss": 0.1405,
+      "step": 1680
+    },
+    {
+      "epoch": 0.12128864677657925,
+      "grad_norm": 0.10668021440505981,
+      "learning_rate": 0.00019516236109106652,
+      "loss": 0.1898,
+      "step": 1681
+    },
+    {
+      "epoch": 0.12136079945163968,
+      "grad_norm": 0.0982346162199974,
+      "learning_rate": 0.0001951594746716698,
+      "loss": 0.1302,
+      "step": 1682
+    },
+    {
+      "epoch": 0.12143295212670009,
+      "grad_norm": 0.10028962790966034,
+      "learning_rate": 0.00019515658825227305,
+      "loss": 0.1509,
+      "step": 1683
+    },
+    {
+      "epoch": 0.12150510480176052,
+      "grad_norm": 0.10393217951059341,
+      "learning_rate": 0.00019515370183287634,
+      "loss": 0.1409,
+      "step": 1684
+    },
+    {
+      "epoch": 0.12157725747682095,
+      "grad_norm": 0.09106632322072983,
+      "learning_rate": 0.0001951508154134796,
+      "loss": 0.1388,
+      "step": 1685
+    },
+    {
+      "epoch": 0.12164941015188138,
+      "grad_norm": 0.10406849533319473,
+      "learning_rate": 0.00019514792899408284,
+      "loss": 0.2068,
+      "step": 1686
+    },
+    {
+      "epoch": 0.12172156282694181,
+      "grad_norm": 0.1421014368534088,
+      "learning_rate": 0.0001951450425746861,
+      "loss": 0.1819,
+      "step": 1687
+    },
+    {
+      "epoch": 0.12179371550200224,
+      "grad_norm": 0.09534769505262375,
+      "learning_rate": 0.00019514215615528937,
+      "loss": 0.1508,
+      "step": 1688
+    },
+    {
+      "epoch": 0.12186586817706266,
+      "grad_norm": 0.12619756162166595,
+      "learning_rate": 0.00019513926973589263,
+      "loss": 0.164,
+      "step": 1689
+    },
+    {
+      "epoch": 0.12193802085212309,
+      "grad_norm": 0.1002705991268158,
+      "learning_rate": 0.0001951363833164959,
+      "loss": 0.1623,
+      "step": 1690
+    },
+    {
+      "epoch": 0.12201017352718352,
+      "grad_norm": 0.08136925101280212,
+      "learning_rate": 0.00019513349689709915,
+      "loss": 0.1039,
+      "step": 1691
+    },
+    {
+      "epoch": 0.12208232620224395,
+      "grad_norm": 0.08619451522827148,
+      "learning_rate": 0.00019513061047770242,
+      "loss": 0.1516,
+      "step": 1692
+    },
+    {
+      "epoch": 0.12215447887730438,
+      "grad_norm": 0.0915481299161911,
+      "learning_rate": 0.00019512772405830568,
+      "loss": 0.163,
+      "step": 1693
+    },
+    {
+      "epoch": 0.12222663155236481,
+      "grad_norm": 0.10088937729597092,
+      "learning_rate": 0.00019512483763890894,
+      "loss": 0.1918,
+      "step": 1694
+    },
+    {
+      "epoch": 0.12229878422742523,
+      "grad_norm": 0.10313641279935837,
+      "learning_rate": 0.0001951219512195122,
+      "loss": 0.1931,
+      "step": 1695
+    },
+    {
+      "epoch": 0.12237093690248566,
+      "grad_norm": 0.11182332783937454,
+      "learning_rate": 0.00019511906480011547,
+      "loss": 0.1783,
+      "step": 1696
+    },
+    {
+      "epoch": 0.12244308957754609,
+      "grad_norm": 0.08624817430973053,
+      "learning_rate": 0.0001951161783807187,
+      "loss": 0.1667,
+      "step": 1697
+    },
+    {
+      "epoch": 0.12251524225260652,
+      "grad_norm": 0.09656798839569092,
+      "learning_rate": 0.000195113291961322,
+      "loss": 0.1418,
+      "step": 1698
+    },
+    {
+      "epoch": 0.12258739492766695,
+      "grad_norm": 0.10291645675897598,
+      "learning_rate": 0.00019511040554192526,
+      "loss": 0.1306,
+      "step": 1699
+    },
+    {
+      "epoch": 0.12265954760272738,
+      "grad_norm": 0.08263203501701355,
+      "learning_rate": 0.00019510751912252852,
+      "loss": 0.223,
+      "step": 1700
+    },
+    {
+      "epoch": 0.1227317002777878,
+      "grad_norm": 0.09930901229381561,
+      "learning_rate": 0.00019510463270313178,
+      "loss": 0.1213,
+      "step": 1701
+    },
+    {
+      "epoch": 0.12280385295284822,
+      "grad_norm": 0.08658596873283386,
+      "learning_rate": 0.00019510174628373502,
+      "loss": 0.1608,
+      "step": 1702
+    },
+    {
+      "epoch": 0.12287600562790865,
+      "grad_norm": 0.12086457759141922,
+      "learning_rate": 0.00019509885986433828,
+      "loss": 0.1929,
+      "step": 1703
+    },
+    {
+      "epoch": 0.12294815830296908,
+      "grad_norm": 0.09840729832649231,
+      "learning_rate": 0.00019509597344494154,
+      "loss": 0.1773,
+      "step": 1704
+    },
+    {
+      "epoch": 0.12302031097802951,
+      "grad_norm": 0.17573504149913788,
+      "learning_rate": 0.00019509308702554484,
+      "loss": 0.2211,
+      "step": 1705
+    },
+    {
+      "epoch": 0.12309246365308994,
+      "grad_norm": 0.11827810853719711,
+      "learning_rate": 0.0001950902006061481,
+      "loss": 0.1823,
+      "step": 1706
+    },
+    {
+      "epoch": 0.12316461632815037,
+      "grad_norm": 0.11534013599157333,
+      "learning_rate": 0.00019508731418675133,
+      "loss": 0.196,
+      "step": 1707
+    },
+    {
+      "epoch": 0.12323676900321079,
+      "grad_norm": 0.10985036194324493,
+      "learning_rate": 0.0001950844277673546,
+      "loss": 0.1829,
+      "step": 1708
+    },
+    {
+      "epoch": 0.12330892167827122,
+      "grad_norm": 0.09258478134870529,
+      "learning_rate": 0.00019508154134795786,
+      "loss": 0.2058,
+      "step": 1709
+    },
+    {
+      "epoch": 0.12338107435333165,
+      "grad_norm": 0.10214863717556,
+      "learning_rate": 0.00019507865492856112,
+      "loss": 0.2164,
+      "step": 1710
+    },
+    {
+      "epoch": 0.12345322702839208,
+      "grad_norm": 0.16184085607528687,
+      "learning_rate": 0.00019507576850916439,
+      "loss": 0.203,
+      "step": 1711
+    },
+    {
+      "epoch": 0.12352537970345251,
+      "grad_norm": 0.10491573065519333,
+      "learning_rate": 0.00019507288208976765,
+      "loss": 0.1248,
+      "step": 1712
+    },
+    {
+      "epoch": 0.12359753237851294,
+      "grad_norm": 0.11957216262817383,
+      "learning_rate": 0.0001950699956703709,
+      "loss": 0.1726,
+      "step": 1713
+    },
+    {
+      "epoch": 0.12366968505357336,
+      "grad_norm": 0.1047922745347023,
+      "learning_rate": 0.00019506710925097417,
+      "loss": 0.2082,
+      "step": 1714
+    },
+    {
+      "epoch": 0.12374183772863379,
+      "grad_norm": 0.10638313740491867,
+      "learning_rate": 0.00019506422283157744,
+      "loss": 0.1508,
+      "step": 1715
+    },
+    {
+      "epoch": 0.12381399040369422,
+      "grad_norm": 0.12371479719877243,
+      "learning_rate": 0.0001950613364121807,
+      "loss": 0.1322,
+      "step": 1716
+    },
+    {
+      "epoch": 0.12388614307875465,
+      "grad_norm": 0.13481566309928894,
+      "learning_rate": 0.00019505844999278396,
+      "loss": 0.1856,
+      "step": 1717
+    },
+    {
+      "epoch": 0.12395829575381508,
+      "grad_norm": 0.14135459065437317,
+      "learning_rate": 0.0001950555635733872,
+      "loss": 0.1781,
+      "step": 1718
+    },
+    {
+      "epoch": 0.1240304484288755,
+      "grad_norm": 0.13202592730522156,
+      "learning_rate": 0.0001950526771539905,
+      "loss": 0.1651,
+      "step": 1719
+    },
+    {
+      "epoch": 0.12410260110393592,
+      "grad_norm": 0.09290815889835358,
+      "learning_rate": 0.00019504979073459375,
+      "loss": 0.1565,
+      "step": 1720
+    },
+    {
+      "epoch": 0.12417475377899635,
+      "grad_norm": 0.10955124348402023,
+      "learning_rate": 0.00019504690431519701,
+      "loss": 0.1963,
+      "step": 1721
+    },
+    {
+      "epoch": 0.12424690645405678,
+      "grad_norm": 0.10231587290763855,
+      "learning_rate": 0.00019504401789580028,
+      "loss": 0.2123,
+      "step": 1722
+    },
+    {
+      "epoch": 0.12431905912911721,
+      "grad_norm": 0.1099744439125061,
+      "learning_rate": 0.0001950411314764035,
+      "loss": 0.1897,
+      "step": 1723
+    },
+    {
+      "epoch": 0.12439121180417764,
+      "grad_norm": 0.11467958241701126,
+      "learning_rate": 0.00019503824505700678,
+      "loss": 0.2005,
+      "step": 1724
+    },
+    {
+      "epoch": 0.12446336447923807,
+      "grad_norm": 0.10473181307315826,
+      "learning_rate": 0.00019503535863761004,
+      "loss": 0.2056,
+      "step": 1725
+    },
+    {
+      "epoch": 0.12453551715429849,
+      "grad_norm": 0.11245948821306229,
+      "learning_rate": 0.00019503247221821333,
+      "loss": 0.1327,
+      "step": 1726
+    },
+    {
+      "epoch": 0.12460766982935892,
+      "grad_norm": 0.10495847463607788,
+      "learning_rate": 0.0001950295857988166,
+      "loss": 0.1853,
+      "step": 1727
+    },
+    {
+      "epoch": 0.12467982250441935,
+      "grad_norm": 0.12626883387565613,
+      "learning_rate": 0.00019502669937941986,
+      "loss": 0.216,
+      "step": 1728
+    },
+    {
+      "epoch": 0.12475197517947978,
+      "grad_norm": 0.10064269602298737,
+      "learning_rate": 0.0001950238129600231,
+      "loss": 0.115,
+      "step": 1729
+    },
+    {
+      "epoch": 0.12482412785454021,
+      "grad_norm": 0.09452646970748901,
+      "learning_rate": 0.00019502092654062635,
+      "loss": 0.1808,
+      "step": 1730
+    },
+    {
+      "epoch": 0.12489628052960064,
+      "grad_norm": 0.1017569899559021,
+      "learning_rate": 0.00019501804012122962,
+      "loss": 0.1619,
+      "step": 1731
+    },
+    {
+      "epoch": 0.12496843320466107,
+      "grad_norm": 0.10112976282835007,
+      "learning_rate": 0.00019501515370183288,
+      "loss": 0.1686,
+      "step": 1732
+    },
+    {
+      "epoch": 0.1250405858797215,
+      "grad_norm": 0.10376875102519989,
+      "learning_rate": 0.00019501226728243617,
+      "loss": 0.1839,
+      "step": 1733
+    },
+    {
+      "epoch": 0.12511273855478192,
+      "grad_norm": 0.11961390823125839,
+      "learning_rate": 0.0001950093808630394,
+      "loss": 0.2095,
+      "step": 1734
+    },
+    {
+      "epoch": 0.12518489122984236,
+      "grad_norm": 0.10202952474355698,
+      "learning_rate": 0.00019500649444364267,
+      "loss": 0.1537,
+      "step": 1735
+    },
+    {
+      "epoch": 0.12525704390490278,
+      "grad_norm": 0.08299683779478073,
+      "learning_rate": 0.00019500360802424593,
+      "loss": 0.184,
+      "step": 1736
+    },
+    {
+      "epoch": 0.1253291965799632,
+      "grad_norm": 0.09358169138431549,
+      "learning_rate": 0.0001950007216048492,
+      "loss": 0.1424,
+      "step": 1737
+    },
+    {
+      "epoch": 0.12540134925502364,
+      "grad_norm": 0.08781565725803375,
+      "learning_rate": 0.00019499783518545246,
+      "loss": 0.1685,
+      "step": 1738
+    },
+    {
+      "epoch": 0.12547350193008405,
+      "grad_norm": 0.10147970169782639,
+      "learning_rate": 0.00019499494876605572,
+      "loss": 0.1333,
+      "step": 1739
+    },
+    {
+      "epoch": 0.1255456546051445,
+      "grad_norm": 0.12914858758449554,
+      "learning_rate": 0.00019499206234665898,
+      "loss": 0.1214,
+      "step": 1740
+    },
+    {
+      "epoch": 0.1256178072802049,
+      "grad_norm": 0.1447230875492096,
+      "learning_rate": 0.00019498917592726225,
+      "loss": 0.12,
+      "step": 1741
+    },
+    {
+      "epoch": 0.12568995995526533,
+      "grad_norm": 0.10296161472797394,
+      "learning_rate": 0.0001949862895078655,
+      "loss": 0.1166,
+      "step": 1742
+    },
+    {
+      "epoch": 0.12576211263032577,
+      "grad_norm": 0.1182079017162323,
+      "learning_rate": 0.00019498340308846877,
+      "loss": 0.1606,
+      "step": 1743
+    },
+    {
+      "epoch": 0.1258342653053862,
+      "grad_norm": 0.1383650302886963,
+      "learning_rate": 0.00019498051666907203,
+      "loss": 0.1936,
+      "step": 1744
+    },
+    {
+      "epoch": 0.12590641798044663,
+      "grad_norm": 0.09023472666740417,
+      "learning_rate": 0.00019497763024967527,
+      "loss": 0.1789,
+      "step": 1745
+    },
+    {
+      "epoch": 0.12597857065550705,
+      "grad_norm": 0.130020871758461,
+      "learning_rate": 0.00019497474383027853,
+      "loss": 0.213,
+      "step": 1746
+    },
+    {
+      "epoch": 0.1260507233305675,
+      "grad_norm": 0.10528901219367981,
+      "learning_rate": 0.00019497185741088182,
+      "loss": 0.1635,
+      "step": 1747
+    },
+    {
+      "epoch": 0.1261228760056279,
+      "grad_norm": 0.11983592808246613,
+      "learning_rate": 0.00019496897099148509,
+      "loss": 0.139,
+      "step": 1748
+    },
+    {
+      "epoch": 0.12619502868068833,
+      "grad_norm": 0.09582766890525818,
+      "learning_rate": 0.00019496608457208835,
+      "loss": 0.1395,
+      "step": 1749
+    },
+    {
+      "epoch": 0.12626718135574877,
+      "grad_norm": 0.12090190500020981,
+      "learning_rate": 0.00019496319815269159,
+      "loss": 0.1818,
+      "step": 1750
+    },
+    {
+      "epoch": 0.12633933403080919,
+      "grad_norm": 0.11621379107236862,
+      "learning_rate": 0.00019496031173329485,
+      "loss": 0.1754,
+      "step": 1751
+    },
+    {
+      "epoch": 0.12641148670586963,
+      "grad_norm": 0.1177566647529602,
+      "learning_rate": 0.0001949574253138981,
+      "loss": 0.1402,
+      "step": 1752
+    },
+    {
+      "epoch": 0.12648363938093005,
+      "grad_norm": 0.09794148057699203,
+      "learning_rate": 0.00019495453889450137,
+      "loss": 0.1986,
+      "step": 1753
+    },
+    {
+      "epoch": 0.12655579205599046,
+      "grad_norm": 0.11040861904621124,
+      "learning_rate": 0.00019495165247510466,
+      "loss": 0.145,
+      "step": 1754
+    },
+    {
+      "epoch": 0.1266279447310509,
+      "grad_norm": 0.08527898788452148,
+      "learning_rate": 0.0001949487660557079,
+      "loss": 0.158,
+      "step": 1755
+    },
+    {
+      "epoch": 0.12670009740611132,
+      "grad_norm": 0.11260313540697098,
+      "learning_rate": 0.00019494587963631116,
+      "loss": 0.1364,
+      "step": 1756
+    },
+    {
+      "epoch": 0.12677225008117177,
+      "grad_norm": 0.10999318212270737,
+      "learning_rate": 0.00019494299321691443,
+      "loss": 0.1579,
+      "step": 1757
+    },
+    {
+      "epoch": 0.12684440275623218,
+      "grad_norm": 0.11420169472694397,
+      "learning_rate": 0.0001949401067975177,
+      "loss": 0.1892,
+      "step": 1758
+    },
+    {
+      "epoch": 0.12691655543129263,
+      "grad_norm": 0.11526145786046982,
+      "learning_rate": 0.00019493722037812095,
+      "loss": 0.2043,
+      "step": 1759
+    },
+    {
+      "epoch": 0.12698870810635304,
+      "grad_norm": 0.1126892939209938,
+      "learning_rate": 0.00019493433395872421,
+      "loss": 0.1905,
+      "step": 1760
+    },
+    {
+      "epoch": 0.12706086078141346,
+      "grad_norm": 0.10353560000658035,
+      "learning_rate": 0.00019493144753932748,
+      "loss": 0.1542,
+      "step": 1761
+    },
+    {
+      "epoch": 0.1271330134564739,
+      "grad_norm": 0.0921681746840477,
+      "learning_rate": 0.00019492856111993074,
+      "loss": 0.1878,
+      "step": 1762
+    },
+    {
+      "epoch": 0.12720516613153432,
+      "grad_norm": 0.1161520853638649,
+      "learning_rate": 0.000194925674700534,
+      "loss": 0.1212,
+      "step": 1763
+    },
+    {
+      "epoch": 0.12727731880659476,
+      "grad_norm": 0.09371655434370041,
+      "learning_rate": 0.00019492278828113727,
+      "loss": 0.1605,
+      "step": 1764
+    },
+    {
+      "epoch": 0.12734947148165518,
+      "grad_norm": 0.07800782471895218,
+      "learning_rate": 0.00019491990186174053,
+      "loss": 0.1915,
+      "step": 1765
+    },
+    {
+      "epoch": 0.12742162415671562,
+      "grad_norm": 0.08994324505329132,
+      "learning_rate": 0.00019491701544234376,
+      "loss": 0.2124,
+      "step": 1766
+    },
+    {
+      "epoch": 0.12749377683177604,
+      "grad_norm": 0.09498997032642365,
+      "learning_rate": 0.00019491412902294703,
+      "loss": 0.1627,
+      "step": 1767
+    },
+    {
+      "epoch": 0.12756592950683646,
+      "grad_norm": 0.10948889702558517,
+      "learning_rate": 0.00019491124260355032,
+      "loss": 0.1443,
+      "step": 1768
+    },
+    {
+      "epoch": 0.1276380821818969,
+      "grad_norm": 0.10349909216165543,
+      "learning_rate": 0.00019490835618415358,
+      "loss": 0.1674,
+      "step": 1769
+    },
+    {
+      "epoch": 0.12771023485695732,
+      "grad_norm": 0.09799700230360031,
+      "learning_rate": 0.00019490546976475684,
+      "loss": 0.1926,
+      "step": 1770
+    },
+    {
+      "epoch": 0.12778238753201776,
+      "grad_norm": 0.11425378918647766,
+      "learning_rate": 0.00019490258334536008,
+      "loss": 0.1984,
+      "step": 1771
+    },
+    {
+      "epoch": 0.12785454020707818,
+      "grad_norm": 0.09551405161619186,
+      "learning_rate": 0.00019489969692596334,
+      "loss": 0.1712,
+      "step": 1772
+    },
+    {
+      "epoch": 0.1279266928821386,
+      "grad_norm": 0.1070886105298996,
+      "learning_rate": 0.0001948968105065666,
+      "loss": 0.1371,
+      "step": 1773
+    },
+    {
+      "epoch": 0.12799884555719904,
+      "grad_norm": 0.09951864928007126,
+      "learning_rate": 0.00019489392408716987,
+      "loss": 0.1281,
+      "step": 1774
+    },
+    {
+      "epoch": 0.12807099823225945,
+      "grad_norm": 0.10588033497333527,
+      "learning_rate": 0.00019489103766777316,
+      "loss": 0.1838,
+      "step": 1775
+    },
+    {
+      "epoch": 0.1281431509073199,
+      "grad_norm": 0.1081177219748497,
+      "learning_rate": 0.0001948881512483764,
+      "loss": 0.1113,
+      "step": 1776
+    },
+    {
+      "epoch": 0.1282153035823803,
+      "grad_norm": 0.10994046181440353,
+      "learning_rate": 0.00019488526482897966,
+      "loss": 0.1365,
+      "step": 1777
+    },
+    {
+      "epoch": 0.12828745625744076,
+      "grad_norm": 0.10076797008514404,
+      "learning_rate": 0.00019488237840958292,
+      "loss": 0.1585,
+      "step": 1778
+    },
+    {
+      "epoch": 0.12835960893250117,
+      "grad_norm": 0.129889577627182,
+      "learning_rate": 0.00019487949199018618,
+      "loss": 0.1926,
+      "step": 1779
+    },
+    {
+      "epoch": 0.1284317616075616,
+      "grad_norm": 0.10640796273946762,
+      "learning_rate": 0.00019487660557078945,
+      "loss": 0.2057,
+      "step": 1780
+    },
+    {
+      "epoch": 0.12850391428262203,
+      "grad_norm": 0.08477828651666641,
+      "learning_rate": 0.0001948737191513927,
+      "loss": 0.1444,
+      "step": 1781
+    },
+    {
+      "epoch": 0.12857606695768245,
+      "grad_norm": 0.08494777977466583,
+      "learning_rate": 0.00019487083273199597,
+      "loss": 0.1356,
+      "step": 1782
+    },
+    {
+      "epoch": 0.1286482196327429,
+      "grad_norm": 0.08862127363681793,
+      "learning_rate": 0.00019486794631259923,
+      "loss": 0.1545,
+      "step": 1783
+    },
+    {
+      "epoch": 0.1287203723078033,
+      "grad_norm": 0.08903194963932037,
+      "learning_rate": 0.0001948650598932025,
+      "loss": 0.1755,
+      "step": 1784
+    },
+    {
+      "epoch": 0.12879252498286373,
+      "grad_norm": 0.11961119621992111,
+      "learning_rate": 0.00019486217347380576,
+      "loss": 0.141,
+      "step": 1785
+    },
+    {
+      "epoch": 0.12886467765792417,
+      "grad_norm": 0.0972859337925911,
+      "learning_rate": 0.00019485928705440902,
+      "loss": 0.1719,
+      "step": 1786
+    },
+    {
+      "epoch": 0.1289368303329846,
+      "grad_norm": 0.11359525471925735,
+      "learning_rate": 0.00019485640063501226,
+      "loss": 0.1921,
+      "step": 1787
+    },
+    {
+      "epoch": 0.12900898300804503,
+      "grad_norm": 0.1031796783208847,
+      "learning_rate": 0.00019485351421561552,
+      "loss": 0.1678,
+      "step": 1788
+    },
+    {
+      "epoch": 0.12908113568310545,
+      "grad_norm": 0.1136065348982811,
+      "learning_rate": 0.0001948506277962188,
+      "loss": 0.1921,
+      "step": 1789
+    },
+    {
+      "epoch": 0.1291532883581659,
+      "grad_norm": 0.11536866426467896,
+      "learning_rate": 0.00019484774137682207,
+      "loss": 0.2159,
+      "step": 1790
+    },
+    {
+      "epoch": 0.1292254410332263,
+      "grad_norm": 0.11552587151527405,
+      "learning_rate": 0.00019484485495742534,
+      "loss": 0.1385,
+      "step": 1791
+    },
+    {
+      "epoch": 0.12929759370828672,
+      "grad_norm": 0.09561877697706223,
+      "learning_rate": 0.00019484196853802857,
+      "loss": 0.1334,
+      "step": 1792
+    },
+    {
+      "epoch": 0.12936974638334717,
+      "grad_norm": 0.11691746860742569,
+      "learning_rate": 0.00019483908211863184,
+      "loss": 0.1323,
+      "step": 1793
+    },
+    {
+      "epoch": 0.12944189905840758,
+      "grad_norm": 0.11047590523958206,
+      "learning_rate": 0.0001948361956992351,
+      "loss": 0.1599,
+      "step": 1794
+    },
+    {
+      "epoch": 0.12951405173346803,
+      "grad_norm": 0.12649111449718475,
+      "learning_rate": 0.00019483330927983836,
+      "loss": 0.1529,
+      "step": 1795
+    },
+    {
+      "epoch": 0.12958620440852844,
+      "grad_norm": 0.11031024903059006,
+      "learning_rate": 0.00019483042286044165,
+      "loss": 0.1502,
+      "step": 1796
+    },
+    {
+      "epoch": 0.1296583570835889,
+      "grad_norm": 0.11705251038074493,
+      "learning_rate": 0.0001948275364410449,
+      "loss": 0.1905,
+      "step": 1797
+    },
+    {
+      "epoch": 0.1297305097586493,
+      "grad_norm": 0.09784354269504547,
+      "learning_rate": 0.00019482465002164815,
+      "loss": 0.1761,
+      "step": 1798
+    },
+    {
+      "epoch": 0.12980266243370972,
+      "grad_norm": 0.0812259167432785,
+      "learning_rate": 0.00019482176360225141,
+      "loss": 0.2058,
+      "step": 1799
+    },
+    {
+      "epoch": 0.12987481510877016,
+      "grad_norm": 0.093953937292099,
+      "learning_rate": 0.00019481887718285468,
+      "loss": 0.1552,
+      "step": 1800
+    },
+    {
+      "epoch": 0.12994696778383058,
+      "grad_norm": 0.0977918803691864,
+      "learning_rate": 0.00019481599076345794,
+      "loss": 0.1953,
+      "step": 1801
+    },
+    {
+      "epoch": 0.13001912045889102,
+      "grad_norm": 0.08500286936759949,
+      "learning_rate": 0.0001948131043440612,
+      "loss": 0.1679,
+      "step": 1802
+    },
+    {
+      "epoch": 0.13009127313395144,
+      "grad_norm": 0.1158568263053894,
+      "learning_rate": 0.00019481021792466447,
+      "loss": 0.1697,
+      "step": 1803
+    },
+    {
+      "epoch": 0.13016342580901186,
+      "grad_norm": 0.09711387008428574,
+      "learning_rate": 0.00019480733150526773,
+      "loss": 0.1522,
+      "step": 1804
+    },
+    {
+      "epoch": 0.1302355784840723,
+      "grad_norm": 0.08754990249872208,
+      "learning_rate": 0.000194804445085871,
+      "loss": 0.1696,
+      "step": 1805
+    },
+    {
+      "epoch": 0.13030773115913272,
+      "grad_norm": 0.10196410864591599,
+      "learning_rate": 0.00019480155866647425,
+      "loss": 0.1643,
+      "step": 1806
+    },
+    {
+      "epoch": 0.13037988383419316,
+      "grad_norm": 0.1056194081902504,
+      "learning_rate": 0.00019479867224707752,
+      "loss": 0.1424,
+      "step": 1807
+    },
+    {
+      "epoch": 0.13045203650925358,
+      "grad_norm": 0.07937432825565338,
+      "learning_rate": 0.00019479578582768075,
+      "loss": 0.1661,
+      "step": 1808
+    },
+    {
+      "epoch": 0.13052418918431402,
+      "grad_norm": 0.11897121369838715,
+      "learning_rate": 0.00019479289940828402,
+      "loss": 0.146,
+      "step": 1809
+    },
+    {
+      "epoch": 0.13059634185937444,
+      "grad_norm": 0.08482971787452698,
+      "learning_rate": 0.0001947900129888873,
+      "loss": 0.1845,
+      "step": 1810
+    },
+    {
+      "epoch": 0.13066849453443485,
+      "grad_norm": 0.09962055087089539,
+      "learning_rate": 0.00019478712656949057,
+      "loss": 0.1175,
+      "step": 1811
+    },
+    {
+      "epoch": 0.1307406472094953,
+      "grad_norm": 0.1010812520980835,
+      "learning_rate": 0.00019478424015009383,
+      "loss": 0.1942,
+      "step": 1812
+    },
+    {
+      "epoch": 0.1308127998845557,
+      "grad_norm": 0.09674418717622757,
+      "learning_rate": 0.00019478135373069707,
+      "loss": 0.1658,
+      "step": 1813
+    },
+    {
+      "epoch": 0.13088495255961616,
+      "grad_norm": 0.11402438580989838,
+      "learning_rate": 0.00019477846731130033,
+      "loss": 0.1824,
+      "step": 1814
+    },
+    {
+      "epoch": 0.13095710523467657,
+      "grad_norm": 0.13043256103992462,
+      "learning_rate": 0.0001947755808919036,
+      "loss": 0.1987,
+      "step": 1815
+    },
+    {
+      "epoch": 0.131029257909737,
+      "grad_norm": 0.14432241022586823,
+      "learning_rate": 0.00019477269447250686,
+      "loss": 0.1455,
+      "step": 1816
+    },
+    {
+      "epoch": 0.13110141058479743,
+      "grad_norm": 0.10322273522615433,
+      "learning_rate": 0.00019476980805311015,
+      "loss": 0.2021,
+      "step": 1817
+    },
+    {
+      "epoch": 0.13117356325985785,
+      "grad_norm": 0.08912544697523117,
+      "learning_rate": 0.00019476692163371338,
+      "loss": 0.1385,
+      "step": 1818
+    },
+    {
+      "epoch": 0.1312457159349183,
+      "grad_norm": 0.10674835741519928,
+      "learning_rate": 0.00019476403521431665,
+      "loss": 0.202,
+      "step": 1819
+    },
+    {
+      "epoch": 0.1313178686099787,
+      "grad_norm": 0.10658169537782669,
+      "learning_rate": 0.0001947611487949199,
+      "loss": 0.2185,
+      "step": 1820
+    },
+    {
+      "epoch": 0.13139002128503915,
+      "grad_norm": 0.09862136840820312,
+      "learning_rate": 0.00019475826237552317,
+      "loss": 0.1156,
+      "step": 1821
+    },
+    {
+      "epoch": 0.13146217396009957,
+      "grad_norm": 0.10120377689599991,
+      "learning_rate": 0.00019475537595612643,
+      "loss": 0.1456,
+      "step": 1822
+    },
+    {
+      "epoch": 0.13153432663516,
+      "grad_norm": 0.08391633629798889,
+      "learning_rate": 0.0001947524895367297,
+      "loss": 0.1388,
+      "step": 1823
+    },
+    {
+      "epoch": 0.13160647931022043,
+      "grad_norm": 0.1270926147699356,
+      "learning_rate": 0.00019474960311733296,
+      "loss": 0.1729,
+      "step": 1824
+    },
+    {
+      "epoch": 0.13167863198528085,
+      "grad_norm": 0.10564117878675461,
+      "learning_rate": 0.00019474671669793622,
+      "loss": 0.1369,
+      "step": 1825
+    },
+    {
+      "epoch": 0.1317507846603413,
+      "grad_norm": 0.1271250993013382,
+      "learning_rate": 0.00019474383027853949,
+      "loss": 0.1726,
+      "step": 1826
+    },
+    {
+      "epoch": 0.1318229373354017,
+      "grad_norm": 0.09364531934261322,
+      "learning_rate": 0.00019474094385914275,
+      "loss": 0.159,
+      "step": 1827
+    },
+    {
+      "epoch": 0.13189509001046215,
+      "grad_norm": 0.1323820948600769,
+      "learning_rate": 0.000194738057439746,
+      "loss": 0.2438,
+      "step": 1828
+    },
+    {
+      "epoch": 0.13196724268552257,
+      "grad_norm": 0.11897341907024384,
+      "learning_rate": 0.00019473517102034925,
+      "loss": 0.1228,
+      "step": 1829
+    },
+    {
+      "epoch": 0.13203939536058298,
+      "grad_norm": 0.1225760281085968,
+      "learning_rate": 0.0001947322846009525,
+      "loss": 0.1622,
+      "step": 1830
+    },
+    {
+      "epoch": 0.13211154803564343,
+      "grad_norm": 0.10834752768278122,
+      "learning_rate": 0.0001947293981815558,
+      "loss": 0.1722,
+      "step": 1831
+    },
+    {
+      "epoch": 0.13218370071070384,
+      "grad_norm": 0.11604952067136765,
+      "learning_rate": 0.00019472651176215906,
+      "loss": 0.2066,
+      "step": 1832
+    },
+    {
+      "epoch": 0.1322558533857643,
+      "grad_norm": 0.1207766979932785,
+      "learning_rate": 0.00019472362534276233,
+      "loss": 0.1554,
+      "step": 1833
+    },
+    {
+      "epoch": 0.1323280060608247,
+      "grad_norm": 0.11709817498922348,
+      "learning_rate": 0.00019472073892336556,
+      "loss": 0.1966,
+      "step": 1834
+    },
+    {
+      "epoch": 0.13240015873588512,
+      "grad_norm": 0.09540904313325882,
+      "learning_rate": 0.00019471785250396883,
+      "loss": 0.1972,
+      "step": 1835
+    },
+    {
+      "epoch": 0.13247231141094556,
+      "grad_norm": 0.12114161998033524,
+      "learning_rate": 0.0001947149660845721,
+      "loss": 0.1502,
+      "step": 1836
+    },
+    {
+      "epoch": 0.13254446408600598,
+      "grad_norm": 0.08637981116771698,
+      "learning_rate": 0.00019471207966517535,
+      "loss": 0.2123,
+      "step": 1837
+    },
+    {
+      "epoch": 0.13261661676106642,
+      "grad_norm": 0.09737266600131989,
+      "learning_rate": 0.00019470919324577864,
+      "loss": 0.1474,
+      "step": 1838
+    },
+    {
+      "epoch": 0.13268876943612684,
+      "grad_norm": 0.16904398798942566,
+      "learning_rate": 0.00019470630682638188,
+      "loss": 0.1927,
+      "step": 1839
+    },
+    {
+      "epoch": 0.13276092211118729,
+      "grad_norm": 0.09032367169857025,
+      "learning_rate": 0.00019470342040698514,
+      "loss": 0.1916,
+      "step": 1840
+    },
+    {
+      "epoch": 0.1328330747862477,
+      "grad_norm": 0.15205013751983643,
+      "learning_rate": 0.0001947005339875884,
+      "loss": 0.2313,
+      "step": 1841
+    },
+    {
+      "epoch": 0.13290522746130812,
+      "grad_norm": 0.10285206884145737,
+      "learning_rate": 0.00019469764756819167,
+      "loss": 0.2256,
+      "step": 1842
+    },
+    {
+      "epoch": 0.13297738013636856,
+      "grad_norm": 0.07314669340848923,
+      "learning_rate": 0.00019469476114879493,
+      "loss": 0.1652,
+      "step": 1843
+    },
+    {
+      "epoch": 0.13304953281142898,
+      "grad_norm": 0.08424288779497147,
+      "learning_rate": 0.0001946918747293982,
+      "loss": 0.1769,
+      "step": 1844
+    },
+    {
+      "epoch": 0.13312168548648942,
+      "grad_norm": 0.1007574275135994,
+      "learning_rate": 0.00019468898831000145,
+      "loss": 0.1512,
+      "step": 1845
+    },
+    {
+      "epoch": 0.13319383816154984,
+      "grad_norm": 0.09143602102994919,
+      "learning_rate": 0.00019468610189060472,
+      "loss": 0.1753,
+      "step": 1846
+    },
+    {
+      "epoch": 0.13326599083661025,
+      "grad_norm": 0.09254894405603409,
+      "learning_rate": 0.00019468321547120798,
+      "loss": 0.1769,
+      "step": 1847
+    },
+    {
+      "epoch": 0.1333381435116707,
+      "grad_norm": 0.11452503502368927,
+      "learning_rate": 0.00019468032905181124,
+      "loss": 0.1905,
+      "step": 1848
+    },
+    {
+      "epoch": 0.13341029618673111,
+      "grad_norm": 0.09670179337263107,
+      "learning_rate": 0.0001946774426324145,
+      "loss": 0.1682,
+      "step": 1849
+    },
+    {
+      "epoch": 0.13348244886179156,
+      "grad_norm": 0.09185737371444702,
+      "learning_rate": 0.00019467455621301777,
+      "loss": 0.1554,
+      "step": 1850
+    },
+    {
+      "epoch": 0.13355460153685197,
+      "grad_norm": 0.11319153755903244,
+      "learning_rate": 0.000194671669793621,
+      "loss": 0.1502,
+      "step": 1851
+    },
+    {
+      "epoch": 0.13362675421191242,
+      "grad_norm": 0.09828914701938629,
+      "learning_rate": 0.0001946687833742243,
+      "loss": 0.1499,
+      "step": 1852
+    },
+    {
+      "epoch": 0.13369890688697283,
+      "grad_norm": 0.08897096663713455,
+      "learning_rate": 0.00019466589695482756,
+      "loss": 0.1501,
+      "step": 1853
+    },
+    {
+      "epoch": 0.13377105956203325,
+      "grad_norm": 0.12592235207557678,
+      "learning_rate": 0.00019466301053543082,
+      "loss": 0.1774,
+      "step": 1854
+    },
+    {
+      "epoch": 0.1338432122370937,
+      "grad_norm": 0.1032068282365799,
+      "learning_rate": 0.00019466012411603408,
+      "loss": 0.1933,
+      "step": 1855
+    },
+    {
+      "epoch": 0.1339153649121541,
+      "grad_norm": 0.09040278196334839,
+      "learning_rate": 0.00019465723769663732,
+      "loss": 0.1217,
+      "step": 1856
+    },
+    {
+      "epoch": 0.13398751758721456,
+      "grad_norm": 0.12092549353837967,
+      "learning_rate": 0.00019465435127724058,
+      "loss": 0.1422,
+      "step": 1857
+    },
+    {
+      "epoch": 0.13405967026227497,
+      "grad_norm": 0.11543098092079163,
+      "learning_rate": 0.00019465146485784385,
+      "loss": 0.2229,
+      "step": 1858
+    },
+    {
+      "epoch": 0.13413182293733542,
+      "grad_norm": 0.10631363093852997,
+      "learning_rate": 0.00019464857843844714,
+      "loss": 0.1798,
+      "step": 1859
+    },
+    {
+      "epoch": 0.13420397561239583,
+      "grad_norm": 0.11276703327894211,
+      "learning_rate": 0.0001946456920190504,
+      "loss": 0.1369,
+      "step": 1860
+    },
+    {
+      "epoch": 0.13427612828745625,
+      "grad_norm": 0.09238825738430023,
+      "learning_rate": 0.00019464280559965363,
+      "loss": 0.1926,
+      "step": 1861
+    },
+    {
+      "epoch": 0.1343482809625167,
+      "grad_norm": 0.10447818040847778,
+      "learning_rate": 0.0001946399191802569,
+      "loss": 0.1148,
+      "step": 1862
+    },
+    {
+      "epoch": 0.1344204336375771,
+      "grad_norm": 0.12700176239013672,
+      "learning_rate": 0.00019463703276086016,
+      "loss": 0.1665,
+      "step": 1863
+    },
+    {
+      "epoch": 0.13449258631263755,
+      "grad_norm": 0.10683518648147583,
+      "learning_rate": 0.00019463414634146342,
+      "loss": 0.139,
+      "step": 1864
+    },
+    {
+      "epoch": 0.13456473898769797,
+      "grad_norm": 0.08258277922868729,
+      "learning_rate": 0.00019463125992206669,
+      "loss": 0.1678,
+      "step": 1865
+    },
+    {
+      "epoch": 0.13463689166275838,
+      "grad_norm": 0.10914620757102966,
+      "learning_rate": 0.00019462837350266995,
+      "loss": 0.2019,
+      "step": 1866
+    },
+    {
+      "epoch": 0.13470904433781883,
+      "grad_norm": 0.09897346794605255,
+      "learning_rate": 0.0001946254870832732,
+      "loss": 0.1521,
+      "step": 1867
+    },
+    {
+      "epoch": 0.13478119701287924,
+      "grad_norm": 0.09971527755260468,
+      "learning_rate": 0.00019462260066387647,
+      "loss": 0.1715,
+      "step": 1868
+    },
+    {
+      "epoch": 0.1348533496879397,
+      "grad_norm": 0.11484746634960175,
+      "learning_rate": 0.00019461971424447974,
+      "loss": 0.2191,
+      "step": 1869
+    },
+    {
+      "epoch": 0.1349255023630001,
+      "grad_norm": 0.09504850953817368,
+      "learning_rate": 0.000194616827825083,
+      "loss": 0.2144,
+      "step": 1870
+    },
+    {
+      "epoch": 0.13499765503806055,
+      "grad_norm": 0.10230226814746857,
+      "learning_rate": 0.00019461394140568626,
+      "loss": 0.2234,
+      "step": 1871
+    },
+    {
+      "epoch": 0.13506980771312097,
+      "grad_norm": 0.09885798394680023,
+      "learning_rate": 0.0001946110549862895,
+      "loss": 0.1635,
+      "step": 1872
+    },
+    {
+      "epoch": 0.13514196038818138,
+      "grad_norm": 0.09148430079221725,
+      "learning_rate": 0.00019460816856689276,
+      "loss": 0.1906,
+      "step": 1873
+    },
+    {
+      "epoch": 0.13521411306324183,
+      "grad_norm": 0.10847075283527374,
+      "learning_rate": 0.00019460528214749605,
+      "loss": 0.1491,
+      "step": 1874
+    },
+    {
+      "epoch": 0.13528626573830224,
+      "grad_norm": 0.0958617776632309,
+      "learning_rate": 0.00019460239572809931,
+      "loss": 0.2183,
+      "step": 1875
+    },
+    {
+      "epoch": 0.13535841841336269,
+      "grad_norm": 0.09568075835704803,
+      "learning_rate": 0.00019459950930870258,
+      "loss": 0.1558,
+      "step": 1876
+    },
+    {
+      "epoch": 0.1354305710884231,
+      "grad_norm": 0.09332866221666336,
+      "learning_rate": 0.00019459662288930581,
+      "loss": 0.1599,
+      "step": 1877
+    },
+    {
+      "epoch": 0.13550272376348352,
+      "grad_norm": 0.09271859377622604,
+      "learning_rate": 0.00019459373646990908,
+      "loss": 0.1796,
+      "step": 1878
+    },
+    {
+      "epoch": 0.13557487643854396,
+      "grad_norm": 0.1295979619026184,
+      "learning_rate": 0.00019459085005051234,
+      "loss": 0.19,
+      "step": 1879
+    },
+    {
+      "epoch": 0.13564702911360438,
+      "grad_norm": 0.0919765904545784,
+      "learning_rate": 0.0001945879636311156,
+      "loss": 0.1655,
+      "step": 1880
+    },
+    {
+      "epoch": 0.13571918178866482,
+      "grad_norm": 0.08181504905223846,
+      "learning_rate": 0.0001945850772117189,
+      "loss": 0.1394,
+      "step": 1881
+    },
+    {
+      "epoch": 0.13579133446372524,
+      "grad_norm": 0.10545797646045685,
+      "learning_rate": 0.00019458219079232213,
+      "loss": 0.1501,
+      "step": 1882
+    },
+    {
+      "epoch": 0.13586348713878568,
+      "grad_norm": 0.09444499015808105,
+      "learning_rate": 0.0001945793043729254,
+      "loss": 0.1779,
+      "step": 1883
+    },
+    {
+      "epoch": 0.1359356398138461,
+      "grad_norm": 0.07674098014831543,
+      "learning_rate": 0.00019457641795352865,
+      "loss": 0.1684,
+      "step": 1884
+    },
+    {
+      "epoch": 0.13600779248890651,
+      "grad_norm": 0.11596457660198212,
+      "learning_rate": 0.00019457353153413192,
+      "loss": 0.1599,
+      "step": 1885
+    },
+    {
+      "epoch": 0.13607994516396696,
+      "grad_norm": 0.11078653484582901,
+      "learning_rate": 0.00019457064511473518,
+      "loss": 0.1681,
+      "step": 1886
+    },
+    {
+      "epoch": 0.13615209783902738,
+      "grad_norm": 0.11662109941244125,
+      "learning_rate": 0.00019456775869533844,
+      "loss": 0.1897,
+      "step": 1887
+    },
+    {
+      "epoch": 0.13622425051408782,
+      "grad_norm": 0.11415568739175797,
+      "learning_rate": 0.0001945648722759417,
+      "loss": 0.2309,
+      "step": 1888
+    },
+    {
+      "epoch": 0.13629640318914824,
+      "grad_norm": 0.105406254529953,
+      "learning_rate": 0.00019456198585654497,
+      "loss": 0.2058,
+      "step": 1889
+    },
+    {
+      "epoch": 0.13636855586420868,
+      "grad_norm": 0.11074227094650269,
+      "learning_rate": 0.00019455909943714823,
+      "loss": 0.1473,
+      "step": 1890
+    },
+    {
+      "epoch": 0.1364407085392691,
+      "grad_norm": 0.10308904945850372,
+      "learning_rate": 0.0001945562130177515,
+      "loss": 0.1371,
+      "step": 1891
+    },
+    {
+      "epoch": 0.1365128612143295,
+      "grad_norm": 0.07854129374027252,
+      "learning_rate": 0.00019455332659835476,
+      "loss": 0.1641,
+      "step": 1892
+    },
+    {
+      "epoch": 0.13658501388938996,
+      "grad_norm": 0.12944363057613373,
+      "learning_rate": 0.000194550440178958,
+      "loss": 0.171,
+      "step": 1893
+    },
+    {
+      "epoch": 0.13665716656445037,
+      "grad_norm": 0.08137572556734085,
+      "learning_rate": 0.00019454755375956126,
+      "loss": 0.2022,
+      "step": 1894
+    },
+    {
+      "epoch": 0.13672931923951082,
+      "grad_norm": 0.10226333141326904,
+      "learning_rate": 0.00019454466734016455,
+      "loss": 0.1736,
+      "step": 1895
+    },
+    {
+      "epoch": 0.13680147191457123,
+      "grad_norm": 0.12806734442710876,
+      "learning_rate": 0.0001945417809207678,
+      "loss": 0.2227,
+      "step": 1896
+    },
+    {
+      "epoch": 0.13687362458963165,
+      "grad_norm": 0.09774816781282425,
+      "learning_rate": 0.00019453889450137107,
+      "loss": 0.1582,
+      "step": 1897
+    },
+    {
+      "epoch": 0.1369457772646921,
+      "grad_norm": 0.10411229729652405,
+      "learning_rate": 0.0001945360080819743,
+      "loss": 0.1476,
+      "step": 1898
+    },
+    {
+      "epoch": 0.1370179299397525,
+      "grad_norm": 0.11847859621047974,
+      "learning_rate": 0.00019453312166257757,
+      "loss": 0.1689,
+      "step": 1899
+    },
+    {
+      "epoch": 0.13709008261481295,
+      "grad_norm": 0.11297795176506042,
+      "learning_rate": 0.00019453023524318083,
+      "loss": 0.2132,
+      "step": 1900
+    },
+    {
+      "epoch": 0.13716223528987337,
+      "grad_norm": 0.11359799653291702,
+      "learning_rate": 0.0001945273488237841,
+      "loss": 0.1307,
+      "step": 1901
+    },
+    {
+      "epoch": 0.1372343879649338,
+      "grad_norm": 0.13657835125923157,
+      "learning_rate": 0.0001945244624043874,
+      "loss": 0.2036,
+      "step": 1902
+    },
+    {
+      "epoch": 0.13730654063999423,
+      "grad_norm": 0.15571734309196472,
+      "learning_rate": 0.00019452157598499062,
+      "loss": 0.2117,
+      "step": 1903
+    },
+    {
+      "epoch": 0.13737869331505465,
+      "grad_norm": 0.1211012601852417,
+      "learning_rate": 0.00019451868956559389,
+      "loss": 0.1738,
+      "step": 1904
+    },
+    {
+      "epoch": 0.1374508459901151,
+      "grad_norm": 0.0977838858962059,
+      "learning_rate": 0.00019451580314619715,
+      "loss": 0.1783,
+      "step": 1905
+    },
+    {
+      "epoch": 0.1375229986651755,
+      "grad_norm": 0.09584508091211319,
+      "learning_rate": 0.0001945129167268004,
+      "loss": 0.1419,
+      "step": 1906
+    },
+    {
+      "epoch": 0.13759515134023595,
+      "grad_norm": 0.10878670960664749,
+      "learning_rate": 0.00019451003030740367,
+      "loss": 0.1396,
+      "step": 1907
+    },
+    {
+      "epoch": 0.13766730401529637,
+      "grad_norm": 0.09390582889318466,
+      "learning_rate": 0.00019450714388800694,
+      "loss": 0.1816,
+      "step": 1908
+    },
+    {
+      "epoch": 0.13773945669035678,
+      "grad_norm": 0.09813504666090012,
+      "learning_rate": 0.0001945042574686102,
+      "loss": 0.2146,
+      "step": 1909
+    },
+    {
+      "epoch": 0.13781160936541723,
+      "grad_norm": 0.08499909937381744,
+      "learning_rate": 0.00019450137104921346,
+      "loss": 0.163,
+      "step": 1910
+    },
+    {
+      "epoch": 0.13788376204047764,
+      "grad_norm": 0.08602595329284668,
+      "learning_rate": 0.00019449848462981673,
+      "loss": 0.167,
+      "step": 1911
+    },
+    {
+      "epoch": 0.13795591471553809,
+      "grad_norm": 0.08455024659633636,
+      "learning_rate": 0.00019449559821042,
+      "loss": 0.1724,
+      "step": 1912
+    },
+    {
+      "epoch": 0.1380280673905985,
+      "grad_norm": 0.08802422136068344,
+      "learning_rate": 0.00019449271179102325,
+      "loss": 0.1765,
+      "step": 1913
+    },
+    {
+      "epoch": 0.13810022006565895,
+      "grad_norm": 0.10855334252119064,
+      "learning_rate": 0.0001944898253716265,
+      "loss": 0.177,
+      "step": 1914
+    },
+    {
+      "epoch": 0.13817237274071936,
+      "grad_norm": 0.11316860467195511,
+      "learning_rate": 0.00019448693895222975,
+      "loss": 0.1573,
+      "step": 1915
+    },
+    {
+      "epoch": 0.13824452541577978,
+      "grad_norm": 0.10946952551603317,
+      "learning_rate": 0.00019448405253283304,
+      "loss": 0.156,
+      "step": 1916
+    },
+    {
+      "epoch": 0.13831667809084022,
+      "grad_norm": 0.09252781420946121,
+      "learning_rate": 0.0001944811661134363,
+      "loss": 0.1535,
+      "step": 1917
+    },
+    {
+      "epoch": 0.13838883076590064,
+      "grad_norm": 0.12185255438089371,
+      "learning_rate": 0.00019447827969403957,
+      "loss": 0.1442,
+      "step": 1918
+    },
+    {
+      "epoch": 0.13846098344096108,
+      "grad_norm": 0.09439793974161148,
+      "learning_rate": 0.0001944753932746428,
+      "loss": 0.1578,
+      "step": 1919
+    },
+    {
+      "epoch": 0.1385331361160215,
+      "grad_norm": 0.09366545081138611,
+      "learning_rate": 0.00019447250685524606,
+      "loss": 0.1105,
+      "step": 1920
+    },
+    {
+      "epoch": 0.13860528879108194,
+      "grad_norm": 0.14343176782131195,
+      "learning_rate": 0.00019446962043584933,
+      "loss": 0.1632,
+      "step": 1921
+    },
+    {
+      "epoch": 0.13867744146614236,
+      "grad_norm": 0.12436092644929886,
+      "learning_rate": 0.0001944667340164526,
+      "loss": 0.2113,
+      "step": 1922
+    },
+    {
+      "epoch": 0.13874959414120278,
+      "grad_norm": 0.0957041010260582,
+      "learning_rate": 0.00019446384759705588,
+      "loss": 0.1225,
+      "step": 1923
+    },
+    {
+      "epoch": 0.13882174681626322,
+      "grad_norm": 0.12467844784259796,
+      "learning_rate": 0.00019446096117765912,
+      "loss": 0.1908,
+      "step": 1924
+    },
+    {
+      "epoch": 0.13889389949132364,
+      "grad_norm": 0.14240126311779022,
+      "learning_rate": 0.00019445807475826238,
+      "loss": 0.2221,
+      "step": 1925
+    },
+    {
+      "epoch": 0.13896605216638408,
+      "grad_norm": 0.07637129724025726,
+      "learning_rate": 0.00019445518833886564,
+      "loss": 0.1789,
+      "step": 1926
+    },
+    {
+      "epoch": 0.1390382048414445,
+      "grad_norm": 0.09346475452184677,
+      "learning_rate": 0.0001944523019194689,
+      "loss": 0.1883,
+      "step": 1927
+    },
+    {
+      "epoch": 0.1391103575165049,
+      "grad_norm": 0.10540024191141129,
+      "learning_rate": 0.00019444941550007217,
+      "loss": 0.1405,
+      "step": 1928
+    },
+    {
+      "epoch": 0.13918251019156536,
+      "grad_norm": 0.09005219489336014,
+      "learning_rate": 0.00019444652908067543,
+      "loss": 0.185,
+      "step": 1929
+    },
+    {
+      "epoch": 0.13925466286662577,
+      "grad_norm": 0.12073545157909393,
+      "learning_rate": 0.0001944436426612787,
+      "loss": 0.1647,
+      "step": 1930
+    },
+    {
+      "epoch": 0.13932681554168622,
+      "grad_norm": 0.12057455629110336,
+      "learning_rate": 0.00019444075624188196,
+      "loss": 0.2004,
+      "step": 1931
+    },
+    {
+      "epoch": 0.13939896821674663,
+      "grad_norm": 0.107715904712677,
+      "learning_rate": 0.00019443786982248522,
+      "loss": 0.1593,
+      "step": 1932
+    },
+    {
+      "epoch": 0.13947112089180708,
+      "grad_norm": 0.0985242947936058,
+      "learning_rate": 0.00019443498340308848,
+      "loss": 0.1693,
+      "step": 1933
+    },
+    {
+      "epoch": 0.1395432735668675,
+      "grad_norm": 0.09869597107172012,
+      "learning_rate": 0.00019443209698369175,
+      "loss": 0.1748,
+      "step": 1934
+    },
+    {
+      "epoch": 0.1396154262419279,
+      "grad_norm": 0.11241772025823593,
+      "learning_rate": 0.00019442921056429498,
+      "loss": 0.137,
+      "step": 1935
+    },
+    {
+      "epoch": 0.13968757891698835,
+      "grad_norm": 0.11124883592128754,
+      "learning_rate": 0.00019442632414489824,
+      "loss": 0.1555,
+      "step": 1936
+    },
+    {
+      "epoch": 0.13975973159204877,
+      "grad_norm": 0.0856330394744873,
+      "learning_rate": 0.00019442343772550153,
+      "loss": 0.1238,
+      "step": 1937
+    },
+    {
+      "epoch": 0.1398318842671092,
+      "grad_norm": 0.10875482112169266,
+      "learning_rate": 0.0001944205513061048,
+      "loss": 0.1169,
+      "step": 1938
+    },
+    {
+      "epoch": 0.13990403694216963,
+      "grad_norm": 0.1266622692346573,
+      "learning_rate": 0.00019441766488670806,
+      "loss": 0.2094,
+      "step": 1939
+    },
+    {
+      "epoch": 0.13997618961723005,
+      "grad_norm": 0.08601401746273041,
+      "learning_rate": 0.0001944147784673113,
+      "loss": 0.1181,
+      "step": 1940
+    },
+    {
+      "epoch": 0.1400483422922905,
+      "grad_norm": 0.11201686412096024,
+      "learning_rate": 0.00019441189204791456,
+      "loss": 0.1631,
+      "step": 1941
+    },
+    {
+      "epoch": 0.1401204949673509,
+      "grad_norm": 0.08824379742145538,
+      "learning_rate": 0.00019440900562851782,
+      "loss": 0.1511,
+      "step": 1942
+    },
+    {
+      "epoch": 0.14019264764241135,
+      "grad_norm": 0.10340140759944916,
+      "learning_rate": 0.00019440611920912109,
+      "loss": 0.1422,
+      "step": 1943
+    },
+    {
+      "epoch": 0.14026480031747177,
+      "grad_norm": 0.10507574677467346,
+      "learning_rate": 0.00019440323278972438,
+      "loss": 0.1216,
+      "step": 1944
+    },
+    {
+      "epoch": 0.1403369529925322,
+      "grad_norm": 0.10035047680139542,
+      "learning_rate": 0.0001944003463703276,
+      "loss": 0.1783,
+      "step": 1945
+    },
+    {
+      "epoch": 0.14040910566759263,
+      "grad_norm": 0.09842780977487564,
+      "learning_rate": 0.00019439745995093087,
+      "loss": 0.1415,
+      "step": 1946
+    },
+    {
+      "epoch": 0.14048125834265304,
+      "grad_norm": 0.08447740972042084,
+      "learning_rate": 0.00019439457353153414,
+      "loss": 0.1409,
+      "step": 1947
+    },
+    {
+      "epoch": 0.1405534110177135,
+      "grad_norm": 0.10483802855014801,
+      "learning_rate": 0.0001943916871121374,
+      "loss": 0.1897,
+      "step": 1948
+    },
+    {
+      "epoch": 0.1406255636927739,
+      "grad_norm": 0.12013499438762665,
+      "learning_rate": 0.00019438880069274066,
+      "loss": 0.2073,
+      "step": 1949
+    },
+    {
+      "epoch": 0.14069771636783435,
+      "grad_norm": 0.11932390183210373,
+      "learning_rate": 0.00019438591427334393,
+      "loss": 0.1487,
+      "step": 1950
+    },
+    {
+      "epoch": 0.14076986904289476,
+      "grad_norm": 0.12911708652973175,
+      "learning_rate": 0.0001943830278539472,
+      "loss": 0.1897,
+      "step": 1951
+    },
+    {
+      "epoch": 0.1408420217179552,
+      "grad_norm": 0.09570632874965668,
+      "learning_rate": 0.00019438014143455045,
+      "loss": 0.202,
+      "step": 1952
+    },
+    {
+      "epoch": 0.14091417439301562,
+      "grad_norm": 0.13848714530467987,
+      "learning_rate": 0.00019437725501515371,
+      "loss": 0.1811,
+      "step": 1953
+    },
+    {
+      "epoch": 0.14098632706807604,
+      "grad_norm": 0.11773217469453812,
+      "learning_rate": 0.00019437436859575698,
+      "loss": 0.1762,
+      "step": 1954
+    },
+    {
+      "epoch": 0.14105847974313648,
+      "grad_norm": 0.11776330322027206,
+      "learning_rate": 0.00019437148217636024,
+      "loss": 0.1865,
+      "step": 1955
+    },
+    {
+      "epoch": 0.1411306324181969,
+      "grad_norm": 0.1038830578327179,
+      "learning_rate": 0.0001943685957569635,
+      "loss": 0.1383,
+      "step": 1956
+    },
+    {
+      "epoch": 0.14120278509325734,
+      "grad_norm": 0.09537827223539352,
+      "learning_rate": 0.00019436570933756674,
+      "loss": 0.1467,
+      "step": 1957
+    },
+    {
+      "epoch": 0.14127493776831776,
+      "grad_norm": 0.11215817928314209,
+      "learning_rate": 0.00019436282291817003,
+      "loss": 0.1738,
+      "step": 1958
+    },
+    {
+      "epoch": 0.14134709044337818,
+      "grad_norm": 0.08892468363046646,
+      "learning_rate": 0.0001943599364987733,
+      "loss": 0.19,
+      "step": 1959
+    },
+    {
+      "epoch": 0.14141924311843862,
+      "grad_norm": 0.1363980621099472,
+      "learning_rate": 0.00019435705007937655,
+      "loss": 0.1803,
+      "step": 1960
+    },
+    {
+      "epoch": 0.14149139579349904,
+      "grad_norm": 0.09271281212568283,
+      "learning_rate": 0.00019435416365997982,
+      "loss": 0.143,
+      "step": 1961
+    },
+    {
+      "epoch": 0.14156354846855948,
+      "grad_norm": 0.09465882927179337,
+      "learning_rate": 0.00019435127724058305,
+      "loss": 0.1877,
+      "step": 1962
+    },
+    {
+      "epoch": 0.1416357011436199,
+      "grad_norm": 0.16298234462738037,
+      "learning_rate": 0.00019434839082118632,
+      "loss": 0.2021,
+      "step": 1963
+    },
+    {
+      "epoch": 0.14170785381868034,
+      "grad_norm": 0.1135706827044487,
+      "learning_rate": 0.00019434550440178958,
+      "loss": 0.2151,
+      "step": 1964
+    },
+    {
+      "epoch": 0.14178000649374076,
+      "grad_norm": 0.08292524516582489,
+      "learning_rate": 0.00019434261798239287,
+      "loss": 0.1909,
+      "step": 1965
+    },
+    {
+      "epoch": 0.14185215916880117,
+      "grad_norm": 0.1263180375099182,
+      "learning_rate": 0.00019433973156299613,
+      "loss": 0.1869,
+      "step": 1966
+    },
+    {
+      "epoch": 0.14192431184386162,
+      "grad_norm": 0.12897604703903198,
+      "learning_rate": 0.00019433684514359937,
+      "loss": 0.1787,
+      "step": 1967
+    },
+    {
+      "epoch": 0.14199646451892203,
+      "grad_norm": 0.08955071121454239,
+      "learning_rate": 0.00019433395872420263,
+      "loss": 0.1883,
+      "step": 1968
+    },
+    {
+      "epoch": 0.14206861719398248,
+      "grad_norm": 0.09205218404531479,
+      "learning_rate": 0.0001943310723048059,
+      "loss": 0.2319,
+      "step": 1969
+    },
+    {
+      "epoch": 0.1421407698690429,
+      "grad_norm": 0.10378434509038925,
+      "learning_rate": 0.00019432818588540916,
+      "loss": 0.1748,
+      "step": 1970
+    },
+    {
+      "epoch": 0.1422129225441033,
+      "grad_norm": 0.10211426019668579,
+      "learning_rate": 0.00019432529946601242,
+      "loss": 0.1973,
+      "step": 1971
+    },
+    {
+      "epoch": 0.14228507521916375,
+      "grad_norm": 0.10617047548294067,
+      "learning_rate": 0.00019432241304661568,
+      "loss": 0.172,
+      "step": 1972
+    },
+    {
+      "epoch": 0.14235722789422417,
+      "grad_norm": 0.12520731985569,
+      "learning_rate": 0.00019431952662721895,
+      "loss": 0.1725,
+      "step": 1973
+    },
+    {
+      "epoch": 0.1424293805692846,
+      "grad_norm": 0.09489186853170395,
+      "learning_rate": 0.0001943166402078222,
+      "loss": 0.1586,
+      "step": 1974
+    },
+    {
+      "epoch": 0.14250153324434503,
+      "grad_norm": 0.09619477391242981,
+      "learning_rate": 0.00019431375378842547,
+      "loss": 0.1627,
+      "step": 1975
+    },
+    {
+      "epoch": 0.14257368591940547,
+      "grad_norm": 0.10977262258529663,
+      "learning_rate": 0.00019431086736902873,
+      "loss": 0.1902,
+      "step": 1976
+    },
+    {
+      "epoch": 0.1426458385944659,
+      "grad_norm": 0.11333739757537842,
+      "learning_rate": 0.000194307980949632,
+      "loss": 0.1579,
+      "step": 1977
+    },
+    {
+      "epoch": 0.1427179912695263,
+      "grad_norm": 0.10972239822149277,
+      "learning_rate": 0.00019430509453023523,
+      "loss": 0.1612,
+      "step": 1978
+    },
+    {
+      "epoch": 0.14279014394458675,
+      "grad_norm": 0.11612124741077423,
+      "learning_rate": 0.00019430220811083852,
+      "loss": 0.1494,
+      "step": 1979
+    },
+    {
+      "epoch": 0.14286229661964717,
+      "grad_norm": 0.11529509723186493,
+      "learning_rate": 0.00019429932169144179,
+      "loss": 0.1768,
+      "step": 1980
+    },
+    {
+      "epoch": 0.1429344492947076,
+      "grad_norm": 0.09690426290035248,
+      "learning_rate": 0.00019429643527204505,
+      "loss": 0.1722,
+      "step": 1981
+    },
+    {
+      "epoch": 0.14300660196976803,
+      "grad_norm": 0.10925997048616409,
+      "learning_rate": 0.0001942935488526483,
+      "loss": 0.1886,
+      "step": 1982
+    },
+    {
+      "epoch": 0.14307875464482847,
+      "grad_norm": 0.12713110446929932,
+      "learning_rate": 0.00019429066243325155,
+      "loss": 0.1769,
+      "step": 1983
+    },
+    {
+      "epoch": 0.1431509073198889,
+      "grad_norm": 0.1117352619767189,
+      "learning_rate": 0.0001942877760138548,
+      "loss": 0.1929,
+      "step": 1984
+    },
+    {
+      "epoch": 0.1432230599949493,
+      "grad_norm": 0.10470899194478989,
+      "learning_rate": 0.00019428488959445807,
+      "loss": 0.1455,
+      "step": 1985
+    },
+    {
+      "epoch": 0.14329521267000975,
+      "grad_norm": 0.12778021395206451,
+      "learning_rate": 0.00019428200317506136,
+      "loss": 0.1646,
+      "step": 1986
+    },
+    {
+      "epoch": 0.14336736534507016,
+      "grad_norm": 0.12582525610923767,
+      "learning_rate": 0.00019427911675566463,
+      "loss": 0.237,
+      "step": 1987
+    },
+    {
+      "epoch": 0.1434395180201306,
+      "grad_norm": 0.09902142733335495,
+      "learning_rate": 0.00019427623033626786,
+      "loss": 0.1767,
+      "step": 1988
+    },
+    {
+      "epoch": 0.14351167069519102,
+      "grad_norm": 0.09508440643548965,
+      "learning_rate": 0.00019427334391687113,
+      "loss": 0.1734,
+      "step": 1989
+    },
+    {
+      "epoch": 0.14358382337025144,
+      "grad_norm": 0.09515651315450668,
+      "learning_rate": 0.0001942704574974744,
+      "loss": 0.1623,
+      "step": 1990
+    },
+    {
+      "epoch": 0.14365597604531188,
+      "grad_norm": 0.10254396498203278,
+      "learning_rate": 0.00019426757107807765,
+      "loss": 0.154,
+      "step": 1991
+    },
+    {
+      "epoch": 0.1437281287203723,
+      "grad_norm": 0.0942922905087471,
+      "learning_rate": 0.00019426468465868091,
+      "loss": 0.1381,
+      "step": 1992
+    },
+    {
+      "epoch": 0.14380028139543274,
+      "grad_norm": 0.0835883840918541,
+      "learning_rate": 0.00019426179823928418,
+      "loss": 0.1729,
+      "step": 1993
+    },
+    {
+      "epoch": 0.14387243407049316,
+      "grad_norm": 0.11193643510341644,
+      "learning_rate": 0.00019425891181988744,
+      "loss": 0.1195,
+      "step": 1994
+    },
+    {
+      "epoch": 0.1439445867455536,
+      "grad_norm": 0.13489171862602234,
+      "learning_rate": 0.0001942560254004907,
+      "loss": 0.1484,
+      "step": 1995
+    },
+    {
+      "epoch": 0.14401673942061402,
+      "grad_norm": 0.09976416081190109,
+      "learning_rate": 0.00019425313898109397,
+      "loss": 0.1413,
+      "step": 1996
+    },
+    {
+      "epoch": 0.14408889209567444,
+      "grad_norm": 0.11297965794801712,
+      "learning_rate": 0.00019425025256169723,
+      "loss": 0.1699,
+      "step": 1997
+    },
+    {
+      "epoch": 0.14416104477073488,
+      "grad_norm": 0.08634833991527557,
+      "learning_rate": 0.0001942473661423005,
+      "loss": 0.1921,
+      "step": 1998
+    },
+    {
+      "epoch": 0.1442331974457953,
+      "grad_norm": 0.12895886600017548,
+      "learning_rate": 0.00019424447972290373,
+      "loss": 0.2203,
+      "step": 1999
+    },
+    {
+      "epoch": 0.14430535012085574,
+      "grad_norm": 0.10602421313524246,
+      "learning_rate": 0.00019424159330350702,
+      "loss": 0.1411,
+      "step": 2000
+    },
+    {
+      "epoch": 0.14437750279591616,
+      "grad_norm": 0.09780508279800415,
+      "learning_rate": 0.00019423870688411028,
+      "loss": 0.1217,
+      "step": 2001
+    },
+    {
+      "epoch": 0.14444965547097657,
+      "grad_norm": 0.10753864049911499,
+      "learning_rate": 0.00019423582046471354,
+      "loss": 0.1232,
+      "step": 2002
+    },
+    {
+      "epoch": 0.14452180814603702,
+      "grad_norm": 0.10284475982189178,
+      "learning_rate": 0.0001942329340453168,
+      "loss": 0.146,
+      "step": 2003
+    },
+    {
+      "epoch": 0.14459396082109743,
+      "grad_norm": 0.11914925277233124,
+      "learning_rate": 0.00019423004762592004,
+      "loss": 0.1712,
+      "step": 2004
+    },
+    {
+      "epoch": 0.14466611349615788,
+      "grad_norm": 0.10978496074676514,
+      "learning_rate": 0.0001942271612065233,
+      "loss": 0.2042,
+      "step": 2005
+    },
+    {
+      "epoch": 0.1447382661712183,
+      "grad_norm": 0.10618040710687637,
+      "learning_rate": 0.00019422427478712657,
+      "loss": 0.1746,
+      "step": 2006
+    },
+    {
+      "epoch": 0.14481041884627874,
+      "grad_norm": 0.11452309042215347,
+      "learning_rate": 0.00019422138836772986,
+      "loss": 0.1598,
+      "step": 2007
+    },
+    {
+      "epoch": 0.14488257152133915,
+      "grad_norm": 0.11929607391357422,
+      "learning_rate": 0.00019421850194833312,
+      "loss": 0.1393,
+      "step": 2008
+    },
+    {
+      "epoch": 0.14495472419639957,
+      "grad_norm": 0.10663137584924698,
+      "learning_rate": 0.00019421561552893636,
+      "loss": 0.1993,
+      "step": 2009
+    },
+    {
+      "epoch": 0.14502687687146001,
+      "grad_norm": 0.09824167937040329,
+      "learning_rate": 0.00019421272910953962,
+      "loss": 0.1712,
+      "step": 2010
+    },
+    {
+      "epoch": 0.14509902954652043,
+      "grad_norm": 0.10585514456033707,
+      "learning_rate": 0.00019420984269014288,
+      "loss": 0.1899,
+      "step": 2011
+    },
+    {
+      "epoch": 0.14517118222158087,
+      "grad_norm": 0.1193956658244133,
+      "learning_rate": 0.00019420695627074615,
+      "loss": 0.2205,
+      "step": 2012
+    },
+    {
+      "epoch": 0.1452433348966413,
+      "grad_norm": 0.15590347349643707,
+      "learning_rate": 0.0001942040698513494,
+      "loss": 0.2128,
+      "step": 2013
+    },
+    {
+      "epoch": 0.14531548757170173,
+      "grad_norm": 0.12089014053344727,
+      "learning_rate": 0.00019420118343195267,
+      "loss": 0.1908,
+      "step": 2014
+    },
+    {
+      "epoch": 0.14538764024676215,
+      "grad_norm": 0.09876030683517456,
+      "learning_rate": 0.00019419829701255593,
+      "loss": 0.1806,
+      "step": 2015
+    },
+    {
+      "epoch": 0.14545979292182257,
+      "grad_norm": 0.14723807573318481,
+      "learning_rate": 0.0001941954105931592,
+      "loss": 0.234,
+      "step": 2016
+    },
+    {
+      "epoch": 0.145531945596883,
+      "grad_norm": 0.11394286155700684,
+      "learning_rate": 0.00019419252417376246,
+      "loss": 0.1521,
+      "step": 2017
+    },
+    {
+      "epoch": 0.14560409827194343,
+      "grad_norm": 0.09444501250982285,
+      "learning_rate": 0.00019418963775436572,
+      "loss": 0.1686,
+      "step": 2018
+    },
+    {
+      "epoch": 0.14567625094700387,
+      "grad_norm": 0.11814798414707184,
+      "learning_rate": 0.00019418675133496899,
+      "loss": 0.1781,
+      "step": 2019
+    },
+    {
+      "epoch": 0.1457484036220643,
+      "grad_norm": 0.08465192466974258,
+      "learning_rate": 0.00019418386491557222,
+      "loss": 0.1583,
+      "step": 2020
+    },
+    {
+      "epoch": 0.1458205562971247,
+      "grad_norm": 0.10846804827451706,
+      "learning_rate": 0.0001941809784961755,
+      "loss": 0.1428,
+      "step": 2021
+    },
+    {
+      "epoch": 0.14589270897218515,
+      "grad_norm": 0.10981445759534836,
+      "learning_rate": 0.00019417809207677877,
+      "loss": 0.2108,
+      "step": 2022
+    },
+    {
+      "epoch": 0.14596486164724556,
+      "grad_norm": 0.10098119080066681,
+      "learning_rate": 0.00019417520565738204,
+      "loss": 0.155,
+      "step": 2023
+    },
+    {
+      "epoch": 0.146037014322306,
+      "grad_norm": 0.088902547955513,
+      "learning_rate": 0.0001941723192379853,
+      "loss": 0.1681,
+      "step": 2024
+    },
+    {
+      "epoch": 0.14610916699736642,
+      "grad_norm": 0.10116327553987503,
+      "learning_rate": 0.00019416943281858854,
+      "loss": 0.1989,
+      "step": 2025
+    },
+    {
+      "epoch": 0.14618131967242687,
+      "grad_norm": 0.09185565263032913,
+      "learning_rate": 0.0001941665463991918,
+      "loss": 0.1841,
+      "step": 2026
+    },
+    {
+      "epoch": 0.14625347234748728,
+      "grad_norm": 0.10266850143671036,
+      "learning_rate": 0.00019416365997979506,
+      "loss": 0.2001,
+      "step": 2027
+    },
+    {
+      "epoch": 0.1463256250225477,
+      "grad_norm": 0.1450365036725998,
+      "learning_rate": 0.00019416077356039835,
+      "loss": 0.1862,
+      "step": 2028
+    },
+    {
+      "epoch": 0.14639777769760814,
+      "grad_norm": 0.11125296354293823,
+      "learning_rate": 0.00019415788714100162,
+      "loss": 0.1762,
+      "step": 2029
+    },
+    {
+      "epoch": 0.14646993037266856,
+      "grad_norm": 0.09041983634233475,
+      "learning_rate": 0.00019415500072160485,
+      "loss": 0.1851,
+      "step": 2030
+    },
+    {
+      "epoch": 0.146542083047729,
+      "grad_norm": 0.10607034713029861,
+      "learning_rate": 0.00019415211430220811,
+      "loss": 0.1271,
+      "step": 2031
+    },
+    {
+      "epoch": 0.14661423572278942,
+      "grad_norm": 0.10995256155729294,
+      "learning_rate": 0.00019414922788281138,
+      "loss": 0.1591,
+      "step": 2032
+    },
+    {
+      "epoch": 0.14668638839784984,
+      "grad_norm": 0.11722482740879059,
+      "learning_rate": 0.00019414634146341464,
+      "loss": 0.1574,
+      "step": 2033
+    },
+    {
+      "epoch": 0.14675854107291028,
+      "grad_norm": 0.16310542821884155,
+      "learning_rate": 0.0001941434550440179,
+      "loss": 0.1847,
+      "step": 2034
+    },
+    {
+      "epoch": 0.1468306937479707,
+      "grad_norm": 0.1479705572128296,
+      "learning_rate": 0.00019414056862462117,
+      "loss": 0.198,
+      "step": 2035
+    },
+    {
+      "epoch": 0.14690284642303114,
+      "grad_norm": 0.09503147006034851,
+      "learning_rate": 0.00019413768220522443,
+      "loss": 0.1295,
+      "step": 2036
+    },
+    {
+      "epoch": 0.14697499909809156,
+      "grad_norm": 0.12894858419895172,
+      "learning_rate": 0.0001941347957858277,
+      "loss": 0.1788,
+      "step": 2037
+    },
+    {
+      "epoch": 0.147047151773152,
+      "grad_norm": 0.10748642683029175,
+      "learning_rate": 0.00019413190936643095,
+      "loss": 0.1436,
+      "step": 2038
+    },
+    {
+      "epoch": 0.14711930444821242,
+      "grad_norm": 0.097632497549057,
+      "learning_rate": 0.00019412902294703422,
+      "loss": 0.158,
+      "step": 2039
+    },
+    {
+      "epoch": 0.14719145712327283,
+      "grad_norm": 0.09195639938116074,
+      "learning_rate": 0.00019412613652763748,
+      "loss": 0.1506,
+      "step": 2040
+    },
+    {
+      "epoch": 0.14726360979833328,
+      "grad_norm": 0.11854150891304016,
+      "learning_rate": 0.00019412325010824072,
+      "loss": 0.1444,
+      "step": 2041
+    },
+    {
+      "epoch": 0.1473357624733937,
+      "grad_norm": 0.12206149101257324,
+      "learning_rate": 0.000194120363688844,
+      "loss": 0.2044,
+      "step": 2042
+    },
+    {
+      "epoch": 0.14740791514845414,
+      "grad_norm": 0.09963233768939972,
+      "learning_rate": 0.00019411747726944727,
+      "loss": 0.1322,
+      "step": 2043
+    },
+    {
+      "epoch": 0.14748006782351455,
+      "grad_norm": 0.09679488092660904,
+      "learning_rate": 0.00019411459085005053,
+      "loss": 0.1614,
+      "step": 2044
+    },
+    {
+      "epoch": 0.14755222049857497,
+      "grad_norm": 0.08919347077608109,
+      "learning_rate": 0.0001941117044306538,
+      "loss": 0.1638,
+      "step": 2045
+    },
+    {
+      "epoch": 0.14762437317363541,
+      "grad_norm": 0.10073181241750717,
+      "learning_rate": 0.00019410881801125703,
+      "loss": 0.1578,
+      "step": 2046
+    },
+    {
+      "epoch": 0.14769652584869583,
+      "grad_norm": 0.09946445375680923,
+      "learning_rate": 0.0001941059315918603,
+      "loss": 0.1246,
+      "step": 2047
+    },
+    {
+      "epoch": 0.14776867852375627,
+      "grad_norm": 0.12240669876337051,
+      "learning_rate": 0.00019410304517246356,
+      "loss": 0.1702,
+      "step": 2048
+    },
+    {
+      "epoch": 0.1478408311988167,
+      "grad_norm": 0.13062608242034912,
+      "learning_rate": 0.00019410015875306685,
+      "loss": 0.1688,
+      "step": 2049
+    },
+    {
+      "epoch": 0.14791298387387714,
+      "grad_norm": 0.10089318454265594,
+      "learning_rate": 0.0001940972723336701,
+      "loss": 0.1463,
+      "step": 2050
+    },
+    {
+      "epoch": 0.14798513654893755,
+      "grad_norm": 0.10280703753232956,
+      "learning_rate": 0.00019409438591427335,
+      "loss": 0.1733,
+      "step": 2051
+    },
+    {
+      "epoch": 0.14805728922399797,
+      "grad_norm": 0.10768252611160278,
+      "learning_rate": 0.0001940914994948766,
+      "loss": 0.1955,
+      "step": 2052
+    },
+    {
+      "epoch": 0.1481294418990584,
+      "grad_norm": 0.10142546147108078,
+      "learning_rate": 0.00019408861307547987,
+      "loss": 0.1717,
+      "step": 2053
+    },
+    {
+      "epoch": 0.14820159457411883,
+      "grad_norm": 0.08525700867176056,
+      "learning_rate": 0.00019408572665608313,
+      "loss": 0.1226,
+      "step": 2054
+    },
+    {
+      "epoch": 0.14827374724917927,
+      "grad_norm": 0.19013550877571106,
+      "learning_rate": 0.0001940828402366864,
+      "loss": 0.1807,
+      "step": 2055
+    },
+    {
+      "epoch": 0.1483458999242397,
+      "grad_norm": 0.08787938207387924,
+      "learning_rate": 0.00019407995381728966,
+      "loss": 0.1473,
+      "step": 2056
+    },
+    {
+      "epoch": 0.14841805259930013,
+      "grad_norm": 0.10118581354618073,
+      "learning_rate": 0.00019407706739789292,
+      "loss": 0.1856,
+      "step": 2057
+    },
+    {
+      "epoch": 0.14849020527436055,
+      "grad_norm": 0.10909256339073181,
+      "learning_rate": 0.00019407418097849619,
+      "loss": 0.1626,
+      "step": 2058
+    },
+    {
+      "epoch": 0.14856235794942096,
+      "grad_norm": 0.12503749132156372,
+      "learning_rate": 0.00019407129455909945,
+      "loss": 0.1626,
+      "step": 2059
+    },
+    {
+      "epoch": 0.1486345106244814,
+      "grad_norm": 0.0884174332022667,
+      "learning_rate": 0.0001940684081397027,
+      "loss": 0.1542,
+      "step": 2060
+    },
+    {
+      "epoch": 0.14870666329954182,
+      "grad_norm": 0.08488410711288452,
+      "learning_rate": 0.00019406552172030597,
+      "loss": 0.143,
+      "step": 2061
+    },
+    {
+      "epoch": 0.14877881597460227,
+      "grad_norm": 0.0907353013753891,
+      "learning_rate": 0.0001940626353009092,
+      "loss": 0.1397,
+      "step": 2062
+    },
+    {
+      "epoch": 0.14885096864966268,
+      "grad_norm": 0.10294927656650543,
+      "learning_rate": 0.00019405974888151247,
+      "loss": 0.2162,
+      "step": 2063
+    },
+    {
+      "epoch": 0.1489231213247231,
+      "grad_norm": 0.10255087167024612,
+      "learning_rate": 0.00019405686246211576,
+      "loss": 0.1783,
+      "step": 2064
+    },
+    {
+      "epoch": 0.14899527399978355,
+      "grad_norm": 0.10541793704032898,
+      "learning_rate": 0.00019405397604271903,
+      "loss": 0.1471,
+      "step": 2065
+    },
+    {
+      "epoch": 0.14906742667484396,
+      "grad_norm": 0.09721536934375763,
+      "learning_rate": 0.0001940510896233223,
+      "loss": 0.1421,
+      "step": 2066
+    },
+    {
+      "epoch": 0.1491395793499044,
+      "grad_norm": 0.09638044238090515,
+      "learning_rate": 0.00019404820320392552,
+      "loss": 0.1849,
+      "step": 2067
+    },
+    {
+      "epoch": 0.14921173202496482,
+      "grad_norm": 0.12224458903074265,
+      "learning_rate": 0.0001940453167845288,
+      "loss": 0.1846,
+      "step": 2068
+    },
+    {
+      "epoch": 0.14928388470002527,
+      "grad_norm": 0.13559360802173615,
+      "learning_rate": 0.00019404243036513205,
+      "loss": 0.1324,
+      "step": 2069
+    },
+    {
+      "epoch": 0.14935603737508568,
+      "grad_norm": 0.10047730058431625,
+      "learning_rate": 0.0001940395439457353,
+      "loss": 0.1733,
+      "step": 2070
+    },
+    {
+      "epoch": 0.1494281900501461,
+      "grad_norm": 0.10166924446821213,
+      "learning_rate": 0.0001940366575263386,
+      "loss": 0.1772,
+      "step": 2071
+    },
+    {
+      "epoch": 0.14950034272520654,
+      "grad_norm": 0.08213718980550766,
+      "learning_rate": 0.00019403377110694184,
+      "loss": 0.155,
+      "step": 2072
+    },
+    {
+      "epoch": 0.14957249540026696,
+      "grad_norm": 0.11648483574390411,
+      "learning_rate": 0.0001940308846875451,
+      "loss": 0.1453,
+      "step": 2073
+    },
+    {
+      "epoch": 0.1496446480753274,
+      "grad_norm": 0.09811491519212723,
+      "learning_rate": 0.00019402799826814837,
+      "loss": 0.1383,
+      "step": 2074
+    },
+    {
+      "epoch": 0.14971680075038782,
+      "grad_norm": 0.09943008422851562,
+      "learning_rate": 0.00019402511184875163,
+      "loss": 0.149,
+      "step": 2075
+    },
+    {
+      "epoch": 0.14978895342544823,
+      "grad_norm": 0.09095396101474762,
+      "learning_rate": 0.0001940222254293549,
+      "loss": 0.1747,
+      "step": 2076
+    },
+    {
+      "epoch": 0.14986110610050868,
+      "grad_norm": 0.09340521693229675,
+      "learning_rate": 0.00019401933900995815,
+      "loss": 0.1428,
+      "step": 2077
+    },
+    {
+      "epoch": 0.1499332587755691,
+      "grad_norm": 0.10803819447755814,
+      "learning_rate": 0.00019401645259056142,
+      "loss": 0.1745,
+      "step": 2078
+    },
+    {
+      "epoch": 0.15000541145062954,
+      "grad_norm": 0.0965447798371315,
+      "learning_rate": 0.00019401356617116468,
+      "loss": 0.173,
+      "step": 2079
+    },
+    {
+      "epoch": 0.15007756412568996,
+      "grad_norm": 0.09196187555789948,
+      "learning_rate": 0.00019401067975176794,
+      "loss": 0.2279,
+      "step": 2080
+    },
+    {
+      "epoch": 0.1501497168007504,
+      "grad_norm": 0.0951419547200203,
+      "learning_rate": 0.0001940077933323712,
+      "loss": 0.1623,
+      "step": 2081
+    },
+    {
+      "epoch": 0.15022186947581082,
+      "grad_norm": 0.09142082929611206,
+      "learning_rate": 0.00019400490691297447,
+      "loss": 0.1723,
+      "step": 2082
+    },
+    {
+      "epoch": 0.15029402215087123,
+      "grad_norm": 0.09847602993249893,
+      "learning_rate": 0.00019400202049357773,
+      "loss": 0.1311,
+      "step": 2083
+    },
+    {
+      "epoch": 0.15036617482593168,
+      "grad_norm": 0.11255521327257156,
+      "learning_rate": 0.00019399913407418097,
+      "loss": 0.1861,
+      "step": 2084
+    },
+    {
+      "epoch": 0.1504383275009921,
+      "grad_norm": 0.11026148498058319,
+      "learning_rate": 0.00019399624765478426,
+      "loss": 0.1332,
+      "step": 2085
+    },
+    {
+      "epoch": 0.15051048017605254,
+      "grad_norm": 0.09424188733100891,
+      "learning_rate": 0.00019399336123538752,
+      "loss": 0.1839,
+      "step": 2086
+    },
+    {
+      "epoch": 0.15058263285111295,
+      "grad_norm": 0.09201314300298691,
+      "learning_rate": 0.00019399047481599078,
+      "loss": 0.1479,
+      "step": 2087
+    },
+    {
+      "epoch": 0.1506547855261734,
+      "grad_norm": 0.12157163769006729,
+      "learning_rate": 0.00019398758839659405,
+      "loss": 0.1567,
+      "step": 2088
+    },
+    {
+      "epoch": 0.1507269382012338,
+      "grad_norm": 0.08465547859668732,
+      "learning_rate": 0.00019398470197719728,
+      "loss": 0.138,
+      "step": 2089
+    },
+    {
+      "epoch": 0.15079909087629423,
+      "grad_norm": 0.0765736773610115,
+      "learning_rate": 0.00019398181555780054,
+      "loss": 0.1473,
+      "step": 2090
+    },
+    {
+      "epoch": 0.15087124355135467,
+      "grad_norm": 0.09816624224185944,
+      "learning_rate": 0.0001939789291384038,
+      "loss": 0.1539,
+      "step": 2091
+    },
+    {
+      "epoch": 0.1509433962264151,
+      "grad_norm": 0.09986189752817154,
+      "learning_rate": 0.0001939760427190071,
+      "loss": 0.1679,
+      "step": 2092
+    },
+    {
+      "epoch": 0.15101554890147553,
+      "grad_norm": 0.11674615740776062,
+      "learning_rate": 0.00019397315629961036,
+      "loss": 0.1389,
+      "step": 2093
+    },
+    {
+      "epoch": 0.15108770157653595,
+      "grad_norm": 0.11395678669214249,
+      "learning_rate": 0.0001939702698802136,
+      "loss": 0.142,
+      "step": 2094
+    },
+    {
+      "epoch": 0.15115985425159637,
+      "grad_norm": 0.10832277685403824,
+      "learning_rate": 0.00019396738346081686,
+      "loss": 0.1535,
+      "step": 2095
+    },
+    {
+      "epoch": 0.1512320069266568,
+      "grad_norm": 0.11256370693445206,
+      "learning_rate": 0.00019396449704142012,
+      "loss": 0.1809,
+      "step": 2096
+    },
+    {
+      "epoch": 0.15130415960171723,
+      "grad_norm": 0.11528280377388,
+      "learning_rate": 0.00019396161062202339,
+      "loss": 0.1339,
+      "step": 2097
+    },
+    {
+      "epoch": 0.15137631227677767,
+      "grad_norm": 0.09868552535772324,
+      "learning_rate": 0.00019395872420262665,
+      "loss": 0.1552,
+      "step": 2098
+    },
+    {
+      "epoch": 0.15144846495183809,
+      "grad_norm": 0.1049208790063858,
+      "learning_rate": 0.0001939558377832299,
+      "loss": 0.1688,
+      "step": 2099
+    },
+    {
+      "epoch": 0.15152061762689853,
+      "grad_norm": 0.08519754558801651,
+      "learning_rate": 0.00019395295136383317,
+      "loss": 0.1878,
+      "step": 2100
+    },
+    {
+      "epoch": 0.15159277030195895,
+      "grad_norm": 0.09680404514074326,
+      "learning_rate": 0.00019395006494443644,
+      "loss": 0.1586,
+      "step": 2101
+    },
+    {
+      "epoch": 0.15166492297701936,
+      "grad_norm": 0.11290711164474487,
+      "learning_rate": 0.0001939471785250397,
+      "loss": 0.1597,
+      "step": 2102
+    },
+    {
+      "epoch": 0.1517370756520798,
+      "grad_norm": 0.09537848085165024,
+      "learning_rate": 0.00019394429210564296,
+      "loss": 0.2275,
+      "step": 2103
+    },
+    {
+      "epoch": 0.15180922832714022,
+      "grad_norm": 0.08265725523233414,
+      "learning_rate": 0.00019394140568624623,
+      "loss": 0.1428,
+      "step": 2104
+    },
+    {
+      "epoch": 0.15188138100220067,
+      "grad_norm": 0.09027748554944992,
+      "learning_rate": 0.00019393851926684946,
+      "loss": 0.1272,
+      "step": 2105
+    },
+    {
+      "epoch": 0.15195353367726108,
+      "grad_norm": 0.10689380019903183,
+      "learning_rate": 0.00019393563284745275,
+      "loss": 0.1369,
+      "step": 2106
+    },
+    {
+      "epoch": 0.1520256863523215,
+      "grad_norm": 0.10896710306406021,
+      "learning_rate": 0.00019393274642805601,
+      "loss": 0.1801,
+      "step": 2107
+    },
+    {
+      "epoch": 0.15209783902738194,
+      "grad_norm": 0.13935106992721558,
+      "learning_rate": 0.00019392986000865928,
+      "loss": 0.1628,
+      "step": 2108
+    },
+    {
+      "epoch": 0.15216999170244236,
+      "grad_norm": 0.12007452547550201,
+      "learning_rate": 0.00019392697358926254,
+      "loss": 0.1343,
+      "step": 2109
+    },
+    {
+      "epoch": 0.1522421443775028,
+      "grad_norm": 0.1309816837310791,
+      "learning_rate": 0.00019392408716986578,
+      "loss": 0.1575,
+      "step": 2110
+    },
+    {
+      "epoch": 0.15231429705256322,
+      "grad_norm": 0.13151468336582184,
+      "learning_rate": 0.00019392120075046904,
+      "loss": 0.1374,
+      "step": 2111
+    },
+    {
+      "epoch": 0.15238644972762366,
+      "grad_norm": 0.11823946237564087,
+      "learning_rate": 0.0001939183143310723,
+      "loss": 0.1567,
+      "step": 2112
+    },
+    {
+      "epoch": 0.15245860240268408,
+      "grad_norm": 0.1083201989531517,
+      "learning_rate": 0.0001939154279116756,
+      "loss": 0.1858,
+      "step": 2113
+    },
+    {
+      "epoch": 0.1525307550777445,
+      "grad_norm": 0.1123339906334877,
+      "learning_rate": 0.00019391254149227885,
+      "loss": 0.1731,
+      "step": 2114
+    },
+    {
+      "epoch": 0.15260290775280494,
+      "grad_norm": 0.08939201384782791,
+      "learning_rate": 0.0001939096550728821,
+      "loss": 0.1823,
+      "step": 2115
+    },
+    {
+      "epoch": 0.15267506042786536,
+      "grad_norm": 0.09028434008359909,
+      "learning_rate": 0.00019390676865348535,
+      "loss": 0.1502,
+      "step": 2116
+    },
+    {
+      "epoch": 0.1527472131029258,
+      "grad_norm": 0.1175919696688652,
+      "learning_rate": 0.00019390388223408862,
+      "loss": 0.1618,
+      "step": 2117
+    },
+    {
+      "epoch": 0.15281936577798622,
+      "grad_norm": 0.12090510874986649,
+      "learning_rate": 0.00019390099581469188,
+      "loss": 0.1444,
+      "step": 2118
+    },
+    {
+      "epoch": 0.15289151845304666,
+      "grad_norm": 0.1123272255063057,
+      "learning_rate": 0.00019389810939529514,
+      "loss": 0.1279,
+      "step": 2119
+    },
+    {
+      "epoch": 0.15296367112810708,
+      "grad_norm": 0.10467422753572464,
+      "learning_rate": 0.0001938952229758984,
+      "loss": 0.192,
+      "step": 2120
+    },
+    {
+      "epoch": 0.1530358238031675,
+      "grad_norm": 0.1312311738729477,
+      "learning_rate": 0.00019389233655650167,
+      "loss": 0.1893,
+      "step": 2121
+    },
+    {
+      "epoch": 0.15310797647822794,
+      "grad_norm": 0.1281033158302307,
+      "learning_rate": 0.00019388945013710493,
+      "loss": 0.1529,
+      "step": 2122
+    },
+    {
+      "epoch": 0.15318012915328835,
+      "grad_norm": 0.08082175254821777,
+      "learning_rate": 0.0001938865637177082,
+      "loss": 0.1717,
+      "step": 2123
+    },
+    {
+      "epoch": 0.1532522818283488,
+      "grad_norm": 0.15641988813877106,
+      "learning_rate": 0.00019388367729831146,
+      "loss": 0.1819,
+      "step": 2124
+    },
+    {
+      "epoch": 0.1533244345034092,
+      "grad_norm": 0.09862814843654633,
+      "learning_rate": 0.00019388079087891472,
+      "loss": 0.1349,
+      "step": 2125
+    },
+    {
+      "epoch": 0.15339658717846963,
+      "grad_norm": 0.09552069008350372,
+      "learning_rate": 0.00019387790445951796,
+      "loss": 0.2239,
+      "step": 2126
+    },
+    {
+      "epoch": 0.15346873985353007,
+      "grad_norm": 0.142461359500885,
+      "learning_rate": 0.00019387501804012125,
+      "loss": 0.1413,
+      "step": 2127
+    },
+    {
+      "epoch": 0.1535408925285905,
+      "grad_norm": 0.09516191482543945,
+      "learning_rate": 0.0001938721316207245,
+      "loss": 0.1565,
+      "step": 2128
+    },
+    {
+      "epoch": 0.15361304520365093,
+      "grad_norm": 0.09291546791791916,
+      "learning_rate": 0.00019386924520132777,
+      "loss": 0.1653,
+      "step": 2129
+    },
+    {
+      "epoch": 0.15368519787871135,
+      "grad_norm": 0.16194333136081696,
+      "learning_rate": 0.00019386635878193103,
+      "loss": 0.2035,
+      "step": 2130
+    },
+    {
+      "epoch": 0.1537573505537718,
+      "grad_norm": 0.11512897163629532,
+      "learning_rate": 0.00019386347236253427,
+      "loss": 0.1014,
+      "step": 2131
+    },
+    {
+      "epoch": 0.1538295032288322,
+      "grad_norm": 0.11555943638086319,
+      "learning_rate": 0.00019386058594313753,
+      "loss": 0.2309,
+      "step": 2132
+    },
+    {
+      "epoch": 0.15390165590389263,
+      "grad_norm": 0.10416556894779205,
+      "learning_rate": 0.0001938576995237408,
+      "loss": 0.1617,
+      "step": 2133
+    },
+    {
+      "epoch": 0.15397380857895307,
+      "grad_norm": 0.10757438093423843,
+      "learning_rate": 0.00019385481310434409,
+      "loss": 0.2139,
+      "step": 2134
+    },
+    {
+      "epoch": 0.15404596125401349,
+      "grad_norm": 0.10977016389369965,
+      "learning_rate": 0.00019385192668494735,
+      "loss": 0.2053,
+      "step": 2135
+    },
+    {
+      "epoch": 0.15411811392907393,
+      "grad_norm": 0.10284312814474106,
+      "learning_rate": 0.00019384904026555059,
+      "loss": 0.1563,
+      "step": 2136
+    },
+    {
+      "epoch": 0.15419026660413435,
+      "grad_norm": 0.11248984932899475,
+      "learning_rate": 0.00019384615384615385,
+      "loss": 0.1516,
+      "step": 2137
+    },
+    {
+      "epoch": 0.15426241927919476,
+      "grad_norm": 0.0976649597287178,
+      "learning_rate": 0.0001938432674267571,
+      "loss": 0.1447,
+      "step": 2138
+    },
+    {
+      "epoch": 0.1543345719542552,
+      "grad_norm": 0.12282803654670715,
+      "learning_rate": 0.00019384038100736037,
+      "loss": 0.1759,
+      "step": 2139
+    },
+    {
+      "epoch": 0.15440672462931562,
+      "grad_norm": 0.11239445954561234,
+      "learning_rate": 0.00019383749458796364,
+      "loss": 0.2223,
+      "step": 2140
+    },
+    {
+      "epoch": 0.15447887730437607,
+      "grad_norm": 0.11776033788919449,
+      "learning_rate": 0.0001938346081685669,
+      "loss": 0.2105,
+      "step": 2141
+    },
+    {
+      "epoch": 0.15455102997943648,
+      "grad_norm": 0.0812140703201294,
+      "learning_rate": 0.00019383172174917016,
+      "loss": 0.1552,
+      "step": 2142
+    },
+    {
+      "epoch": 0.15462318265449693,
+      "grad_norm": 0.12144932895898819,
+      "learning_rate": 0.00019382883532977343,
+      "loss": 0.165,
+      "step": 2143
+    },
+    {
+      "epoch": 0.15469533532955734,
+      "grad_norm": 0.12033209204673767,
+      "learning_rate": 0.0001938259489103767,
+      "loss": 0.1523,
+      "step": 2144
+    },
+    {
+      "epoch": 0.15476748800461776,
+      "grad_norm": 0.09720002114772797,
+      "learning_rate": 0.00019382306249097995,
+      "loss": 0.1637,
+      "step": 2145
+    },
+    {
+      "epoch": 0.1548396406796782,
+      "grad_norm": 0.12371323257684708,
+      "learning_rate": 0.00019382017607158321,
+      "loss": 0.2387,
+      "step": 2146
+    },
+    {
+      "epoch": 0.15491179335473862,
+      "grad_norm": 0.11101663112640381,
+      "learning_rate": 0.00019381728965218645,
+      "loss": 0.1638,
+      "step": 2147
+    },
+    {
+      "epoch": 0.15498394602979906,
+      "grad_norm": 0.10234714299440384,
+      "learning_rate": 0.00019381440323278974,
+      "loss": 0.1607,
+      "step": 2148
+    },
+    {
+      "epoch": 0.15505609870485948,
+      "grad_norm": 0.11836856603622437,
+      "learning_rate": 0.000193811516813393,
+      "loss": 0.1584,
+      "step": 2149
+    },
+    {
+      "epoch": 0.15512825137991992,
+      "grad_norm": 0.08731173723936081,
+      "learning_rate": 0.00019380863039399627,
+      "loss": 0.1681,
+      "step": 2150
+    },
+    {
+      "epoch": 0.15520040405498034,
+      "grad_norm": 0.09717448055744171,
+      "learning_rate": 0.00019380574397459953,
+      "loss": 0.1758,
+      "step": 2151
+    },
+    {
+      "epoch": 0.15527255673004076,
+      "grad_norm": 0.11712459474802017,
+      "learning_rate": 0.00019380285755520276,
+      "loss": 0.1758,
+      "step": 2152
+    },
+    {
+      "epoch": 0.1553447094051012,
+      "grad_norm": 0.08976966142654419,
+      "learning_rate": 0.00019379997113580603,
+      "loss": 0.1641,
+      "step": 2153
+    },
+    {
+      "epoch": 0.15541686208016162,
+      "grad_norm": 0.08658537268638611,
+      "learning_rate": 0.0001937970847164093,
+      "loss": 0.1674,
+      "step": 2154
+    },
+    {
+      "epoch": 0.15548901475522206,
+      "grad_norm": 0.09062183648347855,
+      "learning_rate": 0.00019379419829701258,
+      "loss": 0.143,
+      "step": 2155
+    },
+    {
+      "epoch": 0.15556116743028248,
+      "grad_norm": 0.10794931650161743,
+      "learning_rate": 0.00019379131187761584,
+      "loss": 0.2079,
+      "step": 2156
+    },
+    {
+      "epoch": 0.1556333201053429,
+      "grad_norm": 0.14580285549163818,
+      "learning_rate": 0.00019378842545821908,
+      "loss": 0.1764,
+      "step": 2157
+    },
+    {
+      "epoch": 0.15570547278040334,
+      "grad_norm": 0.0971672311425209,
+      "learning_rate": 0.00019378553903882234,
+      "loss": 0.1797,
+      "step": 2158
+    },
+    {
+      "epoch": 0.15577762545546375,
+      "grad_norm": 0.10162393748760223,
+      "learning_rate": 0.0001937826526194256,
+      "loss": 0.1689,
+      "step": 2159
+    },
+    {
+      "epoch": 0.1558497781305242,
+      "grad_norm": 0.09460558742284775,
+      "learning_rate": 0.00019377976620002887,
+      "loss": 0.158,
+      "step": 2160
+    },
+    {
+      "epoch": 0.1559219308055846,
+      "grad_norm": 0.11674042046070099,
+      "learning_rate": 0.00019377687978063213,
+      "loss": 0.1995,
+      "step": 2161
+    },
+    {
+      "epoch": 0.15599408348064506,
+      "grad_norm": 0.09924761950969696,
+      "learning_rate": 0.0001937739933612354,
+      "loss": 0.1761,
+      "step": 2162
+    },
+    {
+      "epoch": 0.15606623615570547,
+      "grad_norm": 0.12028750777244568,
+      "learning_rate": 0.00019377110694183866,
+      "loss": 0.1683,
+      "step": 2163
+    },
+    {
+      "epoch": 0.1561383888307659,
+      "grad_norm": 0.10652889311313629,
+      "learning_rate": 0.00019376822052244192,
+      "loss": 0.1868,
+      "step": 2164
+    },
+    {
+      "epoch": 0.15621054150582633,
+      "grad_norm": 0.1042298898100853,
+      "learning_rate": 0.00019376533410304518,
+      "loss": 0.1889,
+      "step": 2165
+    },
+    {
+      "epoch": 0.15628269418088675,
+      "grad_norm": 0.0930410623550415,
+      "learning_rate": 0.00019376244768364845,
+      "loss": 0.1612,
+      "step": 2166
+    },
+    {
+      "epoch": 0.1563548468559472,
+      "grad_norm": 0.08166609704494476,
+      "learning_rate": 0.0001937595612642517,
+      "loss": 0.1889,
+      "step": 2167
+    },
+    {
+      "epoch": 0.1564269995310076,
+      "grad_norm": 0.09500760585069656,
+      "learning_rate": 0.00019375667484485494,
+      "loss": 0.1847,
+      "step": 2168
+    },
+    {
+      "epoch": 0.15649915220606803,
+      "grad_norm": 0.10849113762378693,
+      "learning_rate": 0.00019375378842545823,
+      "loss": 0.1745,
+      "step": 2169
+    },
+    {
+      "epoch": 0.15657130488112847,
+      "grad_norm": 0.08661068975925446,
+      "learning_rate": 0.0001937509020060615,
+      "loss": 0.1455,
+      "step": 2170
+    },
+    {
+      "epoch": 0.1566434575561889,
+      "grad_norm": 0.11696840822696686,
+      "learning_rate": 0.00019374801558666476,
+      "loss": 0.1396,
+      "step": 2171
+    },
+    {
+      "epoch": 0.15671561023124933,
+      "grad_norm": 0.08419200032949448,
+      "learning_rate": 0.00019374512916726802,
+      "loss": 0.1242,
+      "step": 2172
+    },
+    {
+      "epoch": 0.15678776290630975,
+      "grad_norm": 0.08925656229257584,
+      "learning_rate": 0.00019374224274787126,
+      "loss": 0.1693,
+      "step": 2173
+    },
+    {
+      "epoch": 0.1568599155813702,
+      "grad_norm": 0.10732999444007874,
+      "learning_rate": 0.00019373935632847452,
+      "loss": 0.1577,
+      "step": 2174
+    },
+    {
+      "epoch": 0.1569320682564306,
+      "grad_norm": 0.08582869917154312,
+      "learning_rate": 0.00019373646990907778,
+      "loss": 0.128,
+      "step": 2175
+    },
+    {
+      "epoch": 0.15700422093149102,
+      "grad_norm": 0.10227958112955093,
+      "learning_rate": 0.00019373358348968107,
+      "loss": 0.1814,
+      "step": 2176
+    },
+    {
+      "epoch": 0.15707637360655147,
+      "grad_norm": 0.10264535993337631,
+      "learning_rate": 0.00019373069707028434,
+      "loss": 0.1629,
+      "step": 2177
+    },
+    {
+      "epoch": 0.15714852628161188,
+      "grad_norm": 0.08098578453063965,
+      "learning_rate": 0.00019372781065088757,
+      "loss": 0.1958,
+      "step": 2178
+    },
+    {
+      "epoch": 0.15722067895667233,
+      "grad_norm": 0.09199430793523788,
+      "learning_rate": 0.00019372492423149084,
+      "loss": 0.2053,
+      "step": 2179
+    },
+    {
+      "epoch": 0.15729283163173274,
+      "grad_norm": 0.10968173295259476,
+      "learning_rate": 0.0001937220378120941,
+      "loss": 0.1753,
+      "step": 2180
+    },
+    {
+      "epoch": 0.1573649843067932,
+      "grad_norm": 0.1071987971663475,
+      "learning_rate": 0.00019371915139269736,
+      "loss": 0.1894,
+      "step": 2181
+    },
+    {
+      "epoch": 0.1574371369818536,
+      "grad_norm": 0.1399054378271103,
+      "learning_rate": 0.00019371626497330063,
+      "loss": 0.1645,
+      "step": 2182
+    },
+    {
+      "epoch": 0.15750928965691402,
+      "grad_norm": 0.11898241192102432,
+      "learning_rate": 0.0001937133785539039,
+      "loss": 0.1419,
+      "step": 2183
+    },
+    {
+      "epoch": 0.15758144233197446,
+      "grad_norm": 0.12177052348852158,
+      "learning_rate": 0.00019371049213450715,
+      "loss": 0.2028,
+      "step": 2184
+    },
+    {
+      "epoch": 0.15765359500703488,
+      "grad_norm": 0.12613415718078613,
+      "learning_rate": 0.00019370760571511041,
+      "loss": 0.193,
+      "step": 2185
+    },
+    {
+      "epoch": 0.15772574768209532,
+      "grad_norm": 0.1313813477754593,
+      "learning_rate": 0.00019370471929571368,
+      "loss": 0.1802,
+      "step": 2186
+    },
+    {
+      "epoch": 0.15779790035715574,
+      "grad_norm": 0.10271768271923065,
+      "learning_rate": 0.00019370183287631694,
+      "loss": 0.1372,
+      "step": 2187
+    },
+    {
+      "epoch": 0.15787005303221616,
+      "grad_norm": 0.11866496503353119,
+      "learning_rate": 0.0001936989464569202,
+      "loss": 0.1582,
+      "step": 2188
+    },
+    {
+      "epoch": 0.1579422057072766,
+      "grad_norm": 0.08273667097091675,
+      "learning_rate": 0.00019369606003752347,
+      "loss": 0.2097,
+      "step": 2189
+    },
+    {
+      "epoch": 0.15801435838233702,
+      "grad_norm": 0.08744383603334427,
+      "learning_rate": 0.00019369317361812673,
+      "loss": 0.229,
+      "step": 2190
+    },
+    {
+      "epoch": 0.15808651105739746,
+      "grad_norm": 0.10181351006031036,
+      "learning_rate": 0.00019369028719873,
+      "loss": 0.1521,
+      "step": 2191
+    },
+    {
+      "epoch": 0.15815866373245788,
+      "grad_norm": 0.09820724278688431,
+      "learning_rate": 0.00019368740077933325,
+      "loss": 0.2011,
+      "step": 2192
+    },
+    {
+      "epoch": 0.15823081640751832,
+      "grad_norm": 0.09312766790390015,
+      "learning_rate": 0.00019368451435993652,
+      "loss": 0.1985,
+      "step": 2193
+    },
+    {
+      "epoch": 0.15830296908257874,
+      "grad_norm": 0.10480905324220657,
+      "learning_rate": 0.00019368162794053978,
+      "loss": 0.1862,
+      "step": 2194
+    },
+    {
+      "epoch": 0.15837512175763915,
+      "grad_norm": 0.10711691528558731,
+      "learning_rate": 0.00019367874152114302,
+      "loss": 0.1601,
+      "step": 2195
+    },
+    {
+      "epoch": 0.1584472744326996,
+      "grad_norm": 0.1277097761631012,
+      "learning_rate": 0.00019367585510174628,
+      "loss": 0.2108,
+      "step": 2196
+    },
+    {
+      "epoch": 0.15851942710776,
+      "grad_norm": 0.14156398177146912,
+      "learning_rate": 0.00019367296868234957,
+      "loss": 0.1836,
+      "step": 2197
+    },
+    {
+      "epoch": 0.15859157978282046,
+      "grad_norm": 0.0874231830239296,
+      "learning_rate": 0.00019367008226295283,
+      "loss": 0.202,
+      "step": 2198
+    },
+    {
+      "epoch": 0.15866373245788087,
+      "grad_norm": 0.08548527210950851,
+      "learning_rate": 0.0001936671958435561,
+      "loss": 0.1939,
+      "step": 2199
+    },
+    {
+      "epoch": 0.1587358851329413,
+      "grad_norm": 0.10133644193410873,
+      "learning_rate": 0.00019366430942415933,
+      "loss": 0.1292,
+      "step": 2200
+    },
+    {
+      "epoch": 0.15880803780800173,
+      "grad_norm": 0.09973689913749695,
+      "learning_rate": 0.0001936614230047626,
+      "loss": 0.1267,
+      "step": 2201
+    },
+    {
+      "epoch": 0.15888019048306215,
+      "grad_norm": 0.11650566756725311,
+      "learning_rate": 0.00019365853658536586,
+      "loss": 0.1686,
+      "step": 2202
+    },
+    {
+      "epoch": 0.1589523431581226,
+      "grad_norm": 0.09599412977695465,
+      "learning_rate": 0.00019365565016596912,
+      "loss": 0.1675,
+      "step": 2203
+    },
+    {
+      "epoch": 0.159024495833183,
+      "grad_norm": 0.12936724722385406,
+      "learning_rate": 0.0001936527637465724,
+      "loss": 0.1783,
+      "step": 2204
+    },
+    {
+      "epoch": 0.15909664850824345,
+      "grad_norm": 0.1200418621301651,
+      "learning_rate": 0.00019364987732717565,
+      "loss": 0.2125,
+      "step": 2205
+    },
+    {
+      "epoch": 0.15916880118330387,
+      "grad_norm": 0.12506982684135437,
+      "learning_rate": 0.0001936469909077789,
+      "loss": 0.1663,
+      "step": 2206
+    },
+    {
+      "epoch": 0.1592409538583643,
+      "grad_norm": 0.12607517838478088,
+      "learning_rate": 0.00019364410448838217,
+      "loss": 0.1716,
+      "step": 2207
+    },
+    {
+      "epoch": 0.15931310653342473,
+      "grad_norm": 0.12602943181991577,
+      "learning_rate": 0.00019364121806898543,
+      "loss": 0.1949,
+      "step": 2208
+    },
+    {
+      "epoch": 0.15938525920848515,
+      "grad_norm": 0.09044279903173447,
+      "learning_rate": 0.0001936383316495887,
+      "loss": 0.1656,
+      "step": 2209
+    },
+    {
+      "epoch": 0.1594574118835456,
+      "grad_norm": 0.1014801487326622,
+      "learning_rate": 0.00019363544523019196,
+      "loss": 0.1379,
+      "step": 2210
+    },
+    {
+      "epoch": 0.159529564558606,
+      "grad_norm": 0.143855020403862,
+      "learning_rate": 0.00019363255881079522,
+      "loss": 0.1992,
+      "step": 2211
+    },
+    {
+      "epoch": 0.15960171723366645,
+      "grad_norm": 0.09931483119726181,
+      "learning_rate": 0.00019362967239139849,
+      "loss": 0.1715,
+      "step": 2212
+    },
+    {
+      "epoch": 0.15967386990872687,
+      "grad_norm": 0.11490177363157272,
+      "learning_rate": 0.00019362678597200175,
+      "loss": 0.1756,
+      "step": 2213
+    },
+    {
+      "epoch": 0.15974602258378728,
+      "grad_norm": 0.10598494857549667,
+      "learning_rate": 0.000193623899552605,
+      "loss": 0.1875,
+      "step": 2214
+    },
+    {
+      "epoch": 0.15981817525884773,
+      "grad_norm": 0.109975166618824,
+      "learning_rate": 0.00019362101313320827,
+      "loss": 0.1705,
+      "step": 2215
+    },
+    {
+      "epoch": 0.15989032793390814,
+      "grad_norm": 0.1269775778055191,
+      "learning_rate": 0.0001936181267138115,
+      "loss": 0.2032,
+      "step": 2216
+    },
+    {
+      "epoch": 0.1599624806089686,
+      "grad_norm": 0.09906476736068726,
+      "learning_rate": 0.00019361524029441477,
+      "loss": 0.181,
+      "step": 2217
+    },
+    {
+      "epoch": 0.160034633284029,
+      "grad_norm": 0.09717681258916855,
+      "learning_rate": 0.00019361235387501806,
+      "loss": 0.1818,
+      "step": 2218
+    },
+    {
+      "epoch": 0.16010678595908942,
+      "grad_norm": 0.0946667492389679,
+      "learning_rate": 0.00019360946745562133,
+      "loss": 0.1486,
+      "step": 2219
+    },
+    {
+      "epoch": 0.16017893863414986,
+      "grad_norm": 0.1109989807009697,
+      "learning_rate": 0.0001936065810362246,
+      "loss": 0.1428,
+      "step": 2220
+    },
+    {
+      "epoch": 0.16025109130921028,
+      "grad_norm": 0.10091498494148254,
+      "learning_rate": 0.00019360369461682782,
+      "loss": 0.169,
+      "step": 2221
+    },
+    {
+      "epoch": 0.16032324398427072,
+      "grad_norm": 0.10776679217815399,
+      "learning_rate": 0.0001936008081974311,
+      "loss": 0.1712,
+      "step": 2222
+    },
+    {
+      "epoch": 0.16039539665933114,
+      "grad_norm": 0.11089781671762466,
+      "learning_rate": 0.00019359792177803435,
+      "loss": 0.2127,
+      "step": 2223
+    },
+    {
+      "epoch": 0.16046754933439158,
+      "grad_norm": 0.1164630874991417,
+      "learning_rate": 0.00019359503535863761,
+      "loss": 0.1456,
+      "step": 2224
+    },
+    {
+      "epoch": 0.160539702009452,
+      "grad_norm": 0.11441965401172638,
+      "learning_rate": 0.0001935921489392409,
+      "loss": 0.164,
+      "step": 2225
+    },
+    {
+      "epoch": 0.16061185468451242,
+      "grad_norm": 0.10857464373111725,
+      "learning_rate": 0.00019358926251984414,
+      "loss": 0.1597,
+      "step": 2226
+    },
+    {
+      "epoch": 0.16068400735957286,
+      "grad_norm": 0.11473193019628525,
+      "learning_rate": 0.0001935863761004474,
+      "loss": 0.1757,
+      "step": 2227
+    },
+    {
+      "epoch": 0.16075616003463328,
+      "grad_norm": 0.12451031059026718,
+      "learning_rate": 0.00019358348968105067,
+      "loss": 0.1495,
+      "step": 2228
+    },
+    {
+      "epoch": 0.16082831270969372,
+      "grad_norm": 0.12104455381631851,
+      "learning_rate": 0.00019358060326165393,
+      "loss": 0.1389,
+      "step": 2229
+    },
+    {
+      "epoch": 0.16090046538475414,
+      "grad_norm": 0.12772192060947418,
+      "learning_rate": 0.0001935777168422572,
+      "loss": 0.208,
+      "step": 2230
+    },
+    {
+      "epoch": 0.16097261805981455,
+      "grad_norm": 0.13302107155323029,
+      "learning_rate": 0.00019357483042286045,
+      "loss": 0.1717,
+      "step": 2231
+    },
+    {
+      "epoch": 0.161044770734875,
+      "grad_norm": 0.11099439114332199,
+      "learning_rate": 0.00019357194400346372,
+      "loss": 0.1563,
+      "step": 2232
+    },
+    {
+      "epoch": 0.16111692340993541,
+      "grad_norm": 0.13218525052070618,
+      "learning_rate": 0.00019356905758406698,
+      "loss": 0.1688,
+      "step": 2233
+    },
+    {
+      "epoch": 0.16118907608499586,
+      "grad_norm": 0.1205550953745842,
+      "learning_rate": 0.00019356617116467024,
+      "loss": 0.1322,
+      "step": 2234
+    },
+    {
+      "epoch": 0.16126122876005627,
+      "grad_norm": 0.16065192222595215,
+      "learning_rate": 0.0001935632847452735,
+      "loss": 0.1724,
+      "step": 2235
+    },
+    {
+      "epoch": 0.16133338143511672,
+      "grad_norm": 0.08642034977674484,
+      "learning_rate": 0.00019356039832587677,
+      "loss": 0.1477,
+      "step": 2236
+    },
+    {
+      "epoch": 0.16140553411017713,
+      "grad_norm": 0.10994623601436615,
+      "learning_rate": 0.00019355751190648,
+      "loss": 0.1642,
+      "step": 2237
+    },
+    {
+      "epoch": 0.16147768678523755,
+      "grad_norm": 0.12689784169197083,
+      "learning_rate": 0.00019355462548708327,
+      "loss": 0.1916,
+      "step": 2238
+    },
+    {
+      "epoch": 0.161549839460298,
+      "grad_norm": 0.09766849130392075,
+      "learning_rate": 0.00019355173906768656,
+      "loss": 0.1336,
+      "step": 2239
+    },
+    {
+      "epoch": 0.1616219921353584,
+      "grad_norm": 0.11204902082681656,
+      "learning_rate": 0.00019354885264828982,
+      "loss": 0.2044,
+      "step": 2240
+    },
+    {
+      "epoch": 0.16169414481041886,
+      "grad_norm": 0.11278552561998367,
+      "learning_rate": 0.00019354596622889308,
+      "loss": 0.153,
+      "step": 2241
+    },
+    {
+      "epoch": 0.16176629748547927,
+      "grad_norm": 0.10636945068836212,
+      "learning_rate": 0.00019354307980949632,
+      "loss": 0.1827,
+      "step": 2242
+    },
+    {
+      "epoch": 0.16183845016053972,
+      "grad_norm": 0.10965976119041443,
+      "learning_rate": 0.00019354019339009958,
+      "loss": 0.1321,
+      "step": 2243
+    },
+    {
+      "epoch": 0.16191060283560013,
+      "grad_norm": 0.11676888912916183,
+      "learning_rate": 0.00019353730697070285,
+      "loss": 0.1791,
+      "step": 2244
+    },
+    {
+      "epoch": 0.16198275551066055,
+      "grad_norm": 0.11976998299360275,
+      "learning_rate": 0.0001935344205513061,
+      "loss": 0.2231,
+      "step": 2245
+    },
+    {
+      "epoch": 0.162054908185721,
+      "grad_norm": 0.1357678771018982,
+      "learning_rate": 0.0001935315341319094,
+      "loss": 0.1573,
+      "step": 2246
+    },
+    {
+      "epoch": 0.1621270608607814,
+      "grad_norm": 0.09829548001289368,
+      "learning_rate": 0.00019352864771251263,
+      "loss": 0.1865,
+      "step": 2247
+    },
+    {
+      "epoch": 0.16219921353584185,
+      "grad_norm": 0.08333700150251389,
+      "learning_rate": 0.0001935257612931159,
+      "loss": 0.1929,
+      "step": 2248
+    },
+    {
+      "epoch": 0.16227136621090227,
+      "grad_norm": 0.09477890282869339,
+      "learning_rate": 0.00019352287487371916,
+      "loss": 0.1576,
+      "step": 2249
+    },
+    {
+      "epoch": 0.16234351888596268,
+      "grad_norm": 0.0912022739648819,
+      "learning_rate": 0.00019351998845432242,
+      "loss": 0.1589,
+      "step": 2250
+    },
+    {
+      "epoch": 0.16241567156102313,
+      "grad_norm": 0.11333397775888443,
+      "learning_rate": 0.00019351710203492569,
+      "loss": 0.1641,
+      "step": 2251
+    },
+    {
+      "epoch": 0.16248782423608354,
+      "grad_norm": 0.10127349197864532,
+      "learning_rate": 0.00019351421561552895,
+      "loss": 0.1438,
+      "step": 2252
+    },
+    {
+      "epoch": 0.162559976911144,
+      "grad_norm": 0.09116199612617493,
+      "learning_rate": 0.0001935113291961322,
+      "loss": 0.2014,
+      "step": 2253
+    },
+    {
+      "epoch": 0.1626321295862044,
+      "grad_norm": 0.09721777588129044,
+      "learning_rate": 0.00019350844277673547,
+      "loss": 0.1275,
+      "step": 2254
+    },
+    {
+      "epoch": 0.16270428226126485,
+      "grad_norm": 0.12168851494789124,
+      "learning_rate": 0.00019350555635733874,
+      "loss": 0.1581,
+      "step": 2255
+    },
+    {
+      "epoch": 0.16277643493632526,
+      "grad_norm": 0.1065860167145729,
+      "learning_rate": 0.000193502669937942,
+      "loss": 0.1792,
+      "step": 2256
+    },
+    {
+      "epoch": 0.16284858761138568,
+      "grad_norm": 0.11777549237012863,
+      "learning_rate": 0.00019349978351854526,
+      "loss": 0.1588,
+      "step": 2257
+    },
+    {
+      "epoch": 0.16292074028644613,
+      "grad_norm": 0.1272294819355011,
+      "learning_rate": 0.0001934968970991485,
+      "loss": 0.1635,
+      "step": 2258
+    },
+    {
+      "epoch": 0.16299289296150654,
+      "grad_norm": 0.12159106135368347,
+      "learning_rate": 0.00019349401067975176,
+      "loss": 0.1577,
+      "step": 2259
+    },
+    {
+      "epoch": 0.16306504563656699,
+      "grad_norm": 0.10105059295892715,
+      "learning_rate": 0.00019349112426035502,
+      "loss": 0.1884,
+      "step": 2260
+    },
+    {
+      "epoch": 0.1631371983116274,
+      "grad_norm": 0.1152854785323143,
+      "learning_rate": 0.00019348823784095831,
+      "loss": 0.1443,
+      "step": 2261
+    },
+    {
+      "epoch": 0.16320935098668782,
+      "grad_norm": 0.0869210883975029,
+      "learning_rate": 0.00019348535142156158,
+      "loss": 0.1848,
+      "step": 2262
+    },
+    {
+      "epoch": 0.16328150366174826,
+      "grad_norm": 0.1061026081442833,
+      "learning_rate": 0.0001934824650021648,
+      "loss": 0.1658,
+      "step": 2263
+    },
+    {
+      "epoch": 0.16335365633680868,
+      "grad_norm": 0.10692035406827927,
+      "learning_rate": 0.00019347957858276808,
+      "loss": 0.1439,
+      "step": 2264
+    },
+    {
+      "epoch": 0.16342580901186912,
+      "grad_norm": 0.0852183923125267,
+      "learning_rate": 0.00019347669216337134,
+      "loss": 0.1552,
+      "step": 2265
+    },
+    {
+      "epoch": 0.16349796168692954,
+      "grad_norm": 0.08484194427728653,
+      "learning_rate": 0.0001934738057439746,
+      "loss": 0.1303,
+      "step": 2266
+    },
+    {
+      "epoch": 0.16357011436198998,
+      "grad_norm": 0.09818069636821747,
+      "learning_rate": 0.00019347091932457787,
+      "loss": 0.1647,
+      "step": 2267
+    },
+    {
+      "epoch": 0.1636422670370504,
+      "grad_norm": 0.12069786339998245,
+      "learning_rate": 0.00019346803290518113,
+      "loss": 0.1827,
+      "step": 2268
+    },
+    {
+      "epoch": 0.16371441971211081,
+      "grad_norm": 0.10593380779027939,
+      "learning_rate": 0.0001934651464857844,
+      "loss": 0.1872,
+      "step": 2269
+    },
+    {
+      "epoch": 0.16378657238717126,
+      "grad_norm": 0.11723898351192474,
+      "learning_rate": 0.00019346226006638765,
+      "loss": 0.1629,
+      "step": 2270
+    },
+    {
+      "epoch": 0.16385872506223167,
+      "grad_norm": 0.09910007566213608,
+      "learning_rate": 0.00019345937364699092,
+      "loss": 0.1778,
+      "step": 2271
+    },
+    {
+      "epoch": 0.16393087773729212,
+      "grad_norm": 0.10543759912252426,
+      "learning_rate": 0.00019345648722759418,
+      "loss": 0.2005,
+      "step": 2272
+    },
+    {
+      "epoch": 0.16400303041235254,
+      "grad_norm": 0.10561928153038025,
+      "learning_rate": 0.00019345360080819744,
+      "loss": 0.1714,
+      "step": 2273
+    },
+    {
+      "epoch": 0.16407518308741298,
+      "grad_norm": 0.08312105387449265,
+      "learning_rate": 0.00019345071438880068,
+      "loss": 0.1865,
+      "step": 2274
+    },
+    {
+      "epoch": 0.1641473357624734,
+      "grad_norm": 0.08049603551626205,
+      "learning_rate": 0.00019344782796940397,
+      "loss": 0.1019,
+      "step": 2275
+    },
+    {
+      "epoch": 0.1642194884375338,
+      "grad_norm": 0.10335752367973328,
+      "learning_rate": 0.00019344494155000723,
+      "loss": 0.2238,
+      "step": 2276
+    },
+    {
+      "epoch": 0.16429164111259426,
+      "grad_norm": 0.10471571236848831,
+      "learning_rate": 0.0001934420551306105,
+      "loss": 0.1611,
+      "step": 2277
+    },
+    {
+      "epoch": 0.16436379378765467,
+      "grad_norm": 0.10903488099575043,
+      "learning_rate": 0.00019343916871121376,
+      "loss": 0.1799,
+      "step": 2278
+    },
+    {
+      "epoch": 0.16443594646271512,
+      "grad_norm": 0.10458233207464218,
+      "learning_rate": 0.000193436282291817,
+      "loss": 0.1665,
+      "step": 2279
+    },
+    {
+      "epoch": 0.16450809913777553,
+      "grad_norm": 0.12355650961399078,
+      "learning_rate": 0.00019343339587242026,
+      "loss": 0.2636,
+      "step": 2280
+    },
+    {
+      "epoch": 0.16458025181283595,
+      "grad_norm": 0.11420013010501862,
+      "learning_rate": 0.00019343050945302352,
+      "loss": 0.1303,
+      "step": 2281
+    },
+    {
+      "epoch": 0.1646524044878964,
+      "grad_norm": 0.11854225397109985,
+      "learning_rate": 0.0001934276230336268,
+      "loss": 0.1505,
+      "step": 2282
+    },
+    {
+      "epoch": 0.1647245571629568,
+      "grad_norm": 0.14060252904891968,
+      "learning_rate": 0.00019342473661423007,
+      "loss": 0.1488,
+      "step": 2283
+    },
+    {
+      "epoch": 0.16479670983801725,
+      "grad_norm": 0.11233499646186829,
+      "learning_rate": 0.0001934218501948333,
+      "loss": 0.1616,
+      "step": 2284
+    },
+    {
+      "epoch": 0.16486886251307767,
+      "grad_norm": 0.10339425504207611,
+      "learning_rate": 0.00019341896377543657,
+      "loss": 0.1892,
+      "step": 2285
+    },
+    {
+      "epoch": 0.1649410151881381,
+      "grad_norm": 0.13305120170116425,
+      "learning_rate": 0.00019341607735603983,
+      "loss": 0.212,
+      "step": 2286
+    },
+    {
+      "epoch": 0.16501316786319853,
+      "grad_norm": 0.09930705279111862,
+      "learning_rate": 0.0001934131909366431,
+      "loss": 0.1401,
+      "step": 2287
+    },
+    {
+      "epoch": 0.16508532053825895,
+      "grad_norm": 0.09781865030527115,
+      "learning_rate": 0.00019341030451724636,
+      "loss": 0.1714,
+      "step": 2288
+    },
+    {
+      "epoch": 0.1651574732133194,
+      "grad_norm": 0.10273580253124237,
+      "learning_rate": 0.00019340741809784962,
+      "loss": 0.1578,
+      "step": 2289
+    },
+    {
+      "epoch": 0.1652296258883798,
+      "grad_norm": 0.10975068807601929,
+      "learning_rate": 0.00019340453167845289,
+      "loss": 0.1728,
+      "step": 2290
+    },
+    {
+      "epoch": 0.16530177856344025,
+      "grad_norm": 0.10452619940042496,
+      "learning_rate": 0.00019340164525905615,
+      "loss": 0.1471,
+      "step": 2291
+    },
+    {
+      "epoch": 0.16537393123850067,
+      "grad_norm": 0.11115576326847076,
+      "learning_rate": 0.0001933987588396594,
+      "loss": 0.1697,
+      "step": 2292
+    },
+    {
+      "epoch": 0.16544608391356108,
+      "grad_norm": 0.11397110670804977,
+      "learning_rate": 0.00019339587242026267,
+      "loss": 0.1428,
+      "step": 2293
+    },
+    {
+      "epoch": 0.16551823658862153,
+      "grad_norm": 0.12284787744283676,
+      "learning_rate": 0.00019339298600086594,
+      "loss": 0.1353,
+      "step": 2294
+    },
+    {
+      "epoch": 0.16559038926368194,
+      "grad_norm": 0.10357421636581421,
+      "learning_rate": 0.0001933900995814692,
+      "loss": 0.1307,
+      "step": 2295
+    },
+    {
+      "epoch": 0.16566254193874239,
+      "grad_norm": 0.10247401148080826,
+      "learning_rate": 0.00019338721316207246,
+      "loss": 0.1461,
+      "step": 2296
+    },
+    {
+      "epoch": 0.1657346946138028,
+      "grad_norm": 0.09464439004659653,
+      "learning_rate": 0.00019338432674267573,
+      "loss": 0.1649,
+      "step": 2297
+    },
+    {
+      "epoch": 0.16580684728886325,
+      "grad_norm": 0.10897719860076904,
+      "learning_rate": 0.000193381440323279,
+      "loss": 0.2221,
+      "step": 2298
+    },
+    {
+      "epoch": 0.16587899996392366,
+      "grad_norm": 0.1040739193558693,
+      "learning_rate": 0.00019337855390388225,
+      "loss": 0.1588,
+      "step": 2299
+    },
+    {
+      "epoch": 0.16595115263898408,
+      "grad_norm": 0.11330358684062958,
+      "learning_rate": 0.00019337566748448551,
+      "loss": 0.172,
+      "step": 2300
+    },
+    {
+      "epoch": 0.16602330531404452,
+      "grad_norm": 0.10044106096029282,
+      "learning_rate": 0.00019337278106508875,
+      "loss": 0.1805,
+      "step": 2301
+    },
+    {
+      "epoch": 0.16609545798910494,
+      "grad_norm": 0.0981152132153511,
+      "learning_rate": 0.000193369894645692,
+      "loss": 0.1646,
+      "step": 2302
+    },
+    {
+      "epoch": 0.16616761066416538,
+      "grad_norm": 0.09416420757770538,
+      "learning_rate": 0.0001933670082262953,
+      "loss": 0.1378,
+      "step": 2303
+    },
+    {
+      "epoch": 0.1662397633392258,
+      "grad_norm": 0.14568422734737396,
+      "learning_rate": 0.00019336412180689857,
+      "loss": 0.1851,
+      "step": 2304
+    },
+    {
+      "epoch": 0.16631191601428624,
+      "grad_norm": 0.0925934836268425,
+      "learning_rate": 0.00019336123538750183,
+      "loss": 0.1784,
+      "step": 2305
+    },
+    {
+      "epoch": 0.16638406868934666,
+      "grad_norm": 0.11252991110086441,
+      "learning_rate": 0.00019335834896810506,
+      "loss": 0.1639,
+      "step": 2306
+    },
+    {
+      "epoch": 0.16645622136440708,
+      "grad_norm": 0.10067819058895111,
+      "learning_rate": 0.00019335546254870833,
+      "loss": 0.1457,
+      "step": 2307
+    },
+    {
+      "epoch": 0.16652837403946752,
+      "grad_norm": 0.10660653561353683,
+      "learning_rate": 0.0001933525761293116,
+      "loss": 0.1504,
+      "step": 2308
+    },
+    {
+      "epoch": 0.16660052671452794,
+      "grad_norm": 0.10056259483098984,
+      "learning_rate": 0.00019334968970991485,
+      "loss": 0.2131,
+      "step": 2309
+    },
+    {
+      "epoch": 0.16667267938958838,
+      "grad_norm": 0.10658378154039383,
+      "learning_rate": 0.00019334680329051814,
+      "loss": 0.174,
+      "step": 2310
+    },
+    {
+      "epoch": 0.1667448320646488,
+      "grad_norm": 0.12302286922931671,
+      "learning_rate": 0.00019334391687112138,
+      "loss": 0.1729,
+      "step": 2311
+    },
+    {
+      "epoch": 0.1668169847397092,
+      "grad_norm": 0.09981415420770645,
+      "learning_rate": 0.00019334103045172464,
+      "loss": 0.165,
+      "step": 2312
+    },
+    {
+      "epoch": 0.16688913741476966,
+      "grad_norm": 0.10932218283414841,
+      "learning_rate": 0.0001933381440323279,
+      "loss": 0.1452,
+      "step": 2313
+    },
+    {
+      "epoch": 0.16696129008983007,
+      "grad_norm": 0.09633667767047882,
+      "learning_rate": 0.00019333525761293117,
+      "loss": 0.1822,
+      "step": 2314
+    },
+    {
+      "epoch": 0.16703344276489052,
+      "grad_norm": 0.10372579097747803,
+      "learning_rate": 0.00019333237119353443,
+      "loss": 0.1534,
+      "step": 2315
+    },
+    {
+      "epoch": 0.16710559543995093,
+      "grad_norm": 0.1061662957072258,
+      "learning_rate": 0.0001933294847741377,
+      "loss": 0.1239,
+      "step": 2316
+    },
+    {
+      "epoch": 0.16717774811501138,
+      "grad_norm": 0.09024469554424286,
+      "learning_rate": 0.00019332659835474096,
+      "loss": 0.1143,
+      "step": 2317
+    },
+    {
+      "epoch": 0.1672499007900718,
+      "grad_norm": 0.10284189879894257,
+      "learning_rate": 0.00019332371193534422,
+      "loss": 0.1853,
+      "step": 2318
+    },
+    {
+      "epoch": 0.1673220534651322,
+      "grad_norm": 0.10474637895822525,
+      "learning_rate": 0.00019332082551594748,
+      "loss": 0.1611,
+      "step": 2319
+    },
+    {
+      "epoch": 0.16739420614019265,
+      "grad_norm": 0.10879724472761154,
+      "learning_rate": 0.00019331793909655075,
+      "loss": 0.1297,
+      "step": 2320
+    },
+    {
+      "epoch": 0.16746635881525307,
+      "grad_norm": 0.11982034891843796,
+      "learning_rate": 0.000193315052677154,
+      "loss": 0.1903,
+      "step": 2321
+    },
+    {
+      "epoch": 0.1675385114903135,
+      "grad_norm": 0.1146751120686531,
+      "learning_rate": 0.00019331216625775724,
+      "loss": 0.1926,
+      "step": 2322
+    },
+    {
+      "epoch": 0.16761066416537393,
+      "grad_norm": 0.12385010719299316,
+      "learning_rate": 0.0001933092798383605,
+      "loss": 0.1683,
+      "step": 2323
+    },
+    {
+      "epoch": 0.16768281684043435,
+      "grad_norm": 0.10480430722236633,
+      "learning_rate": 0.0001933063934189638,
+      "loss": 0.2119,
+      "step": 2324
+    },
+    {
+      "epoch": 0.1677549695154948,
+      "grad_norm": 0.15246498584747314,
+      "learning_rate": 0.00019330350699956706,
+      "loss": 0.1945,
+      "step": 2325
+    },
+    {
+      "epoch": 0.1678271221905552,
+      "grad_norm": 0.09928609430789948,
+      "learning_rate": 0.00019330062058017032,
+      "loss": 0.2073,
+      "step": 2326
+    },
+    {
+      "epoch": 0.16789927486561565,
+      "grad_norm": 0.10196902602910995,
+      "learning_rate": 0.00019329773416077356,
+      "loss": 0.1749,
+      "step": 2327
+    },
+    {
+      "epoch": 0.16797142754067607,
+      "grad_norm": 0.11232678592205048,
+      "learning_rate": 0.00019329484774137682,
+      "loss": 0.1576,
+      "step": 2328
+    },
+    {
+      "epoch": 0.1680435802157365,
+      "grad_norm": 0.10379444807767868,
+      "learning_rate": 0.00019329196132198008,
+      "loss": 0.1423,
+      "step": 2329
+    },
+    {
+      "epoch": 0.16811573289079693,
+      "grad_norm": 0.10844355821609497,
+      "learning_rate": 0.00019328907490258335,
+      "loss": 0.1809,
+      "step": 2330
+    },
+    {
+      "epoch": 0.16818788556585734,
+      "grad_norm": 0.09713105112314224,
+      "learning_rate": 0.00019328618848318664,
+      "loss": 0.1865,
+      "step": 2331
+    },
+    {
+      "epoch": 0.1682600382409178,
+      "grad_norm": 0.08709347248077393,
+      "learning_rate": 0.00019328330206378987,
+      "loss": 0.1612,
+      "step": 2332
+    },
+    {
+      "epoch": 0.1683321909159782,
+      "grad_norm": 0.09003622084856033,
+      "learning_rate": 0.00019328041564439314,
+      "loss": 0.1728,
+      "step": 2333
+    },
+    {
+      "epoch": 0.16840434359103865,
+      "grad_norm": 0.07673250883817673,
+      "learning_rate": 0.0001932775292249964,
+      "loss": 0.1515,
+      "step": 2334
+    },
+    {
+      "epoch": 0.16847649626609906,
+      "grad_norm": 0.1532372087240219,
+      "learning_rate": 0.00019327464280559966,
+      "loss": 0.1785,
+      "step": 2335
+    },
+    {
+      "epoch": 0.1685486489411595,
+      "grad_norm": 0.10392007976770401,
+      "learning_rate": 0.00019327175638620293,
+      "loss": 0.1887,
+      "step": 2336
+    },
+    {
+      "epoch": 0.16862080161621992,
+      "grad_norm": 0.15098121762275696,
+      "learning_rate": 0.0001932688699668062,
+      "loss": 0.1305,
+      "step": 2337
+    },
+    {
+      "epoch": 0.16869295429128034,
+      "grad_norm": 0.15759523212909698,
+      "learning_rate": 0.00019326598354740945,
+      "loss": 0.1842,
+      "step": 2338
+    },
+    {
+      "epoch": 0.16876510696634078,
+      "grad_norm": 0.12343751639127731,
+      "learning_rate": 0.00019326309712801271,
+      "loss": 0.1749,
+      "step": 2339
+    },
+    {
+      "epoch": 0.1688372596414012,
+      "grad_norm": 0.09935485571622849,
+      "learning_rate": 0.00019326021070861598,
+      "loss": 0.1787,
+      "step": 2340
+    },
+    {
+      "epoch": 0.16890941231646164,
+      "grad_norm": 0.10357770323753357,
+      "learning_rate": 0.00019325732428921924,
+      "loss": 0.1751,
+      "step": 2341
+    },
+    {
+      "epoch": 0.16898156499152206,
+      "grad_norm": 0.1449156552553177,
+      "learning_rate": 0.0001932544378698225,
+      "loss": 0.1437,
+      "step": 2342
+    },
+    {
+      "epoch": 0.16905371766658248,
+      "grad_norm": 0.10184285044670105,
+      "learning_rate": 0.00019325155145042574,
+      "loss": 0.1394,
+      "step": 2343
+    },
+    {
+      "epoch": 0.16912587034164292,
+      "grad_norm": 0.08780080825090408,
+      "learning_rate": 0.000193248665031029,
+      "loss": 0.1691,
+      "step": 2344
+    },
+    {
+      "epoch": 0.16919802301670334,
+      "grad_norm": 0.12476243823766708,
+      "learning_rate": 0.0001932457786116323,
+      "loss": 0.1498,
+      "step": 2345
+    },
+    {
+      "epoch": 0.16927017569176378,
+      "grad_norm": 0.11500517278909683,
+      "learning_rate": 0.00019324289219223555,
+      "loss": 0.2247,
+      "step": 2346
+    },
+    {
+      "epoch": 0.1693423283668242,
+      "grad_norm": 0.10371655970811844,
+      "learning_rate": 0.00019324000577283882,
+      "loss": 0.1764,
+      "step": 2347
+    },
+    {
+      "epoch": 0.16941448104188464,
+      "grad_norm": 0.1306602954864502,
+      "learning_rate": 0.00019323711935344205,
+      "loss": 0.1783,
+      "step": 2348
+    },
+    {
+      "epoch": 0.16948663371694506,
+      "grad_norm": 0.08568400889635086,
+      "learning_rate": 0.00019323423293404532,
+      "loss": 0.1726,
+      "step": 2349
+    },
+    {
+      "epoch": 0.16955878639200547,
+      "grad_norm": 0.13382382690906525,
+      "learning_rate": 0.00019323134651464858,
+      "loss": 0.1861,
+      "step": 2350
+    },
+    {
+      "epoch": 0.16963093906706592,
+      "grad_norm": 0.1182415708899498,
+      "learning_rate": 0.00019322846009525184,
+      "loss": 0.1278,
+      "step": 2351
+    },
+    {
+      "epoch": 0.16970309174212633,
+      "grad_norm": 0.11527302116155624,
+      "learning_rate": 0.00019322557367585513,
+      "loss": 0.1562,
+      "step": 2352
+    },
+    {
+      "epoch": 0.16977524441718678,
+      "grad_norm": 0.11253364384174347,
+      "learning_rate": 0.00019322268725645837,
+      "loss": 0.0954,
+      "step": 2353
+    },
+    {
+      "epoch": 0.1698473970922472,
+      "grad_norm": 0.12901516258716583,
+      "learning_rate": 0.00019321980083706163,
+      "loss": 0.1748,
+      "step": 2354
+    },
+    {
+      "epoch": 0.1699195497673076,
+      "grad_norm": 0.10245874524116516,
+      "learning_rate": 0.0001932169144176649,
+      "loss": 0.1775,
+      "step": 2355
+    },
+    {
+      "epoch": 0.16999170244236805,
+      "grad_norm": 0.09780610352754593,
+      "learning_rate": 0.00019321402799826816,
+      "loss": 0.155,
+      "step": 2356
+    },
+    {
+      "epoch": 0.17006385511742847,
+      "grad_norm": 0.09187139570713043,
+      "learning_rate": 0.00019321114157887142,
+      "loss": 0.1706,
+      "step": 2357
+    },
+    {
+      "epoch": 0.1701360077924889,
+      "grad_norm": 0.144823357462883,
+      "learning_rate": 0.00019320825515947468,
+      "loss": 0.1642,
+      "step": 2358
+    },
+    {
+      "epoch": 0.17020816046754933,
+      "grad_norm": 0.1118035688996315,
+      "learning_rate": 0.00019320536874007795,
+      "loss": 0.1515,
+      "step": 2359
+    },
+    {
+      "epoch": 0.17028031314260977,
+      "grad_norm": 0.10937260091304779,
+      "learning_rate": 0.0001932024823206812,
+      "loss": 0.165,
+      "step": 2360
+    },
+    {
+      "epoch": 0.1703524658176702,
+      "grad_norm": 0.12276444584131241,
+      "learning_rate": 0.00019319959590128447,
+      "loss": 0.1457,
+      "step": 2361
+    },
+    {
+      "epoch": 0.1704246184927306,
+      "grad_norm": 0.09414833784103394,
+      "learning_rate": 0.00019319670948188773,
+      "loss": 0.1957,
+      "step": 2362
+    },
+    {
+      "epoch": 0.17049677116779105,
+      "grad_norm": 0.13359293341636658,
+      "learning_rate": 0.000193193823062491,
+      "loss": 0.1163,
+      "step": 2363
+    },
+    {
+      "epoch": 0.17056892384285147,
+      "grad_norm": 0.12425096333026886,
+      "learning_rate": 0.00019319093664309423,
+      "loss": 0.1781,
+      "step": 2364
+    },
+    {
+      "epoch": 0.1706410765179119,
+      "grad_norm": 0.10758412629365921,
+      "learning_rate": 0.0001931880502236975,
+      "loss": 0.1899,
+      "step": 2365
+    },
+    {
+      "epoch": 0.17071322919297233,
+      "grad_norm": 0.09605345875024796,
+      "learning_rate": 0.00019318516380430079,
+      "loss": 0.1694,
+      "step": 2366
+    },
+    {
+      "epoch": 0.17078538186803277,
+      "grad_norm": 0.12173870205879211,
+      "learning_rate": 0.00019318227738490405,
+      "loss": 0.1703,
+      "step": 2367
+    },
+    {
+      "epoch": 0.1708575345430932,
+      "grad_norm": 0.12638935446739197,
+      "learning_rate": 0.0001931793909655073,
+      "loss": 0.2067,
+      "step": 2368
+    },
+    {
+      "epoch": 0.1709296872181536,
+      "grad_norm": 0.11101670563220978,
+      "learning_rate": 0.00019317650454611055,
+      "loss": 0.2279,
+      "step": 2369
+    },
+    {
+      "epoch": 0.17100183989321405,
+      "grad_norm": 0.09994448721408844,
+      "learning_rate": 0.0001931736181267138,
+      "loss": 0.218,
+      "step": 2370
+    },
+    {
+      "epoch": 0.17107399256827446,
+      "grad_norm": 0.1153847947716713,
+      "learning_rate": 0.00019317073170731707,
+      "loss": 0.2328,
+      "step": 2371
+    },
+    {
+      "epoch": 0.1711461452433349,
+      "grad_norm": 0.13979651033878326,
+      "learning_rate": 0.00019316784528792034,
+      "loss": 0.1631,
+      "step": 2372
+    },
+    {
+      "epoch": 0.17121829791839532,
+      "grad_norm": 0.11289111524820328,
+      "learning_rate": 0.00019316495886852363,
+      "loss": 0.1486,
+      "step": 2373
+    },
+    {
+      "epoch": 0.17129045059345574,
+      "grad_norm": 0.08866613358259201,
+      "learning_rate": 0.00019316207244912686,
+      "loss": 0.1804,
+      "step": 2374
+    },
+    {
+      "epoch": 0.17136260326851618,
+      "grad_norm": 0.12603819370269775,
+      "learning_rate": 0.00019315918602973013,
+      "loss": 0.1929,
+      "step": 2375
+    },
+    {
+      "epoch": 0.1714347559435766,
+      "grad_norm": 0.1151173859834671,
+      "learning_rate": 0.0001931562996103334,
+      "loss": 0.1452,
+      "step": 2376
+    },
+    {
+      "epoch": 0.17150690861863704,
+      "grad_norm": 0.10496576875448227,
+      "learning_rate": 0.00019315341319093665,
+      "loss": 0.1625,
+      "step": 2377
+    },
+    {
+      "epoch": 0.17157906129369746,
+      "grad_norm": 0.15824727714061737,
+      "learning_rate": 0.00019315052677153991,
+      "loss": 0.1282,
+      "step": 2378
+    },
+    {
+      "epoch": 0.1716512139687579,
+      "grad_norm": 0.08507123589515686,
+      "learning_rate": 0.00019314764035214318,
+      "loss": 0.147,
+      "step": 2379
+    },
+    {
+      "epoch": 0.17172336664381832,
+      "grad_norm": 0.11039454489946365,
+      "learning_rate": 0.00019314475393274644,
+      "loss": 0.1098,
+      "step": 2380
+    },
+    {
+      "epoch": 0.17179551931887874,
+      "grad_norm": 0.08755354583263397,
+      "learning_rate": 0.0001931418675133497,
+      "loss": 0.1515,
+      "step": 2381
+    },
+    {
+      "epoch": 0.17186767199393918,
+      "grad_norm": 0.11836013942956924,
+      "learning_rate": 0.00019313898109395297,
+      "loss": 0.1708,
+      "step": 2382
+    },
+    {
+      "epoch": 0.1719398246689996,
+      "grad_norm": 0.13358359038829803,
+      "learning_rate": 0.00019313609467455623,
+      "loss": 0.1909,
+      "step": 2383
+    },
+    {
+      "epoch": 0.17201197734406004,
+      "grad_norm": 0.09466656297445297,
+      "learning_rate": 0.0001931332082551595,
+      "loss": 0.1541,
+      "step": 2384
+    },
+    {
+      "epoch": 0.17208413001912046,
+      "grad_norm": 0.10225172340869904,
+      "learning_rate": 0.00019313032183576273,
+      "loss": 0.1458,
+      "step": 2385
+    },
+    {
+      "epoch": 0.17215628269418087,
+      "grad_norm": 0.09411462396383286,
+      "learning_rate": 0.000193127435416366,
+      "loss": 0.171,
+      "step": 2386
+    },
+    {
+      "epoch": 0.17222843536924132,
+      "grad_norm": 0.11404234915971756,
+      "learning_rate": 0.00019312454899696928,
+      "loss": 0.1751,
+      "step": 2387
+    },
+    {
+      "epoch": 0.17230058804430173,
+      "grad_norm": 0.1240854263305664,
+      "learning_rate": 0.00019312166257757254,
+      "loss": 0.2152,
+      "step": 2388
+    },
+    {
+      "epoch": 0.17237274071936218,
+      "grad_norm": 0.09119916707277298,
+      "learning_rate": 0.0001931187761581758,
+      "loss": 0.1732,
+      "step": 2389
+    },
+    {
+      "epoch": 0.1724448933944226,
+      "grad_norm": 0.12059430032968521,
+      "learning_rate": 0.00019311588973877904,
+      "loss": 0.2001,
+      "step": 2390
+    },
+    {
+      "epoch": 0.17251704606948304,
+      "grad_norm": 0.10738098621368408,
+      "learning_rate": 0.0001931130033193823,
+      "loss": 0.183,
+      "step": 2391
+    },
+    {
+      "epoch": 0.17258919874454345,
+      "grad_norm": 0.08014591783285141,
+      "learning_rate": 0.00019311011689998557,
+      "loss": 0.1185,
+      "step": 2392
+    },
+    {
+      "epoch": 0.17266135141960387,
+      "grad_norm": 0.15968464314937592,
+      "learning_rate": 0.00019310723048058883,
+      "loss": 0.1773,
+      "step": 2393
+    },
+    {
+      "epoch": 0.17273350409466431,
+      "grad_norm": 0.10324010252952576,
+      "learning_rate": 0.00019310434406119212,
+      "loss": 0.1825,
+      "step": 2394
+    },
+    {
+      "epoch": 0.17280565676972473,
+      "grad_norm": 0.12751446664333344,
+      "learning_rate": 0.00019310145764179536,
+      "loss": 0.158,
+      "step": 2395
+    },
+    {
+      "epoch": 0.17287780944478517,
+      "grad_norm": 0.10440117120742798,
+      "learning_rate": 0.00019309857122239862,
+      "loss": 0.1772,
+      "step": 2396
+    },
+    {
+      "epoch": 0.1729499621198456,
+      "grad_norm": 0.11839499324560165,
+      "learning_rate": 0.00019309568480300188,
+      "loss": 0.1507,
+      "step": 2397
+    },
+    {
+      "epoch": 0.17302211479490603,
+      "grad_norm": 0.09740056842565536,
+      "learning_rate": 0.00019309279838360515,
+      "loss": 0.1601,
+      "step": 2398
+    },
+    {
+      "epoch": 0.17309426746996645,
+      "grad_norm": 0.08723609149456024,
+      "learning_rate": 0.0001930899119642084,
+      "loss": 0.1731,
+      "step": 2399
+    },
+    {
+      "epoch": 0.17316642014502687,
+      "grad_norm": 0.1224941685795784,
+      "learning_rate": 0.00019308702554481167,
+      "loss": 0.1299,
+      "step": 2400
+    },
+    {
+      "epoch": 0.1732385728200873,
+      "grad_norm": 0.12198638170957565,
+      "learning_rate": 0.00019308413912541493,
+      "loss": 0.1758,
+      "step": 2401
+    },
+    {
+      "epoch": 0.17331072549514773,
+      "grad_norm": 0.11759866029024124,
+      "learning_rate": 0.0001930812527060182,
+      "loss": 0.1445,
+      "step": 2402
+    },
+    {
+      "epoch": 0.17338287817020817,
+      "grad_norm": 0.11716272681951523,
+      "learning_rate": 0.00019307836628662146,
+      "loss": 0.1983,
+      "step": 2403
+    },
+    {
+      "epoch": 0.1734550308452686,
+      "grad_norm": 0.11761771887540817,
+      "learning_rate": 0.00019307547986722472,
+      "loss": 0.1539,
+      "step": 2404
+    },
+    {
+      "epoch": 0.173527183520329,
+      "grad_norm": 0.11381730437278748,
+      "learning_rate": 0.00019307259344782799,
+      "loss": 0.1637,
+      "step": 2405
+    },
+    {
+      "epoch": 0.17359933619538945,
+      "grad_norm": 0.11773020029067993,
+      "learning_rate": 0.00019306970702843122,
+      "loss": 0.1539,
+      "step": 2406
+    },
+    {
+      "epoch": 0.17367148887044986,
+      "grad_norm": 0.08517422527074814,
+      "learning_rate": 0.00019306682060903448,
+      "loss": 0.2072,
+      "step": 2407
+    },
+    {
+      "epoch": 0.1737436415455103,
+      "grad_norm": 0.13268540799617767,
+      "learning_rate": 0.00019306393418963777,
+      "loss": 0.1786,
+      "step": 2408
+    },
+    {
+      "epoch": 0.17381579422057072,
+      "grad_norm": 0.10123803466558456,
+      "learning_rate": 0.00019306104777024104,
+      "loss": 0.1678,
+      "step": 2409
+    },
+    {
+      "epoch": 0.17388794689563117,
+      "grad_norm": 0.10507965087890625,
+      "learning_rate": 0.0001930581613508443,
+      "loss": 0.1454,
+      "step": 2410
+    },
+    {
+      "epoch": 0.17396009957069158,
+      "grad_norm": 0.10920926183462143,
+      "learning_rate": 0.00019305527493144754,
+      "loss": 0.1498,
+      "step": 2411
+    },
+    {
+      "epoch": 0.174032252245752,
+      "grad_norm": 0.10431352257728577,
+      "learning_rate": 0.0001930523885120508,
+      "loss": 0.1378,
+      "step": 2412
+    },
+    {
+      "epoch": 0.17410440492081244,
+      "grad_norm": 0.10723818838596344,
+      "learning_rate": 0.00019304950209265406,
+      "loss": 0.1334,
+      "step": 2413
+    },
+    {
+      "epoch": 0.17417655759587286,
+      "grad_norm": 0.10780136287212372,
+      "learning_rate": 0.00019304661567325732,
+      "loss": 0.1427,
+      "step": 2414
+    },
+    {
+      "epoch": 0.1742487102709333,
+      "grad_norm": 0.18205569684505463,
+      "learning_rate": 0.00019304372925386061,
+      "loss": 0.2193,
+      "step": 2415
+    },
+    {
+      "epoch": 0.17432086294599372,
+      "grad_norm": 0.15683774650096893,
+      "learning_rate": 0.00019304084283446385,
+      "loss": 0.1789,
+      "step": 2416
+    },
+    {
+      "epoch": 0.17439301562105414,
+      "grad_norm": 0.08884182572364807,
+      "learning_rate": 0.00019303795641506711,
+      "loss": 0.1223,
+      "step": 2417
+    },
+    {
+      "epoch": 0.17446516829611458,
+      "grad_norm": 0.11683040857315063,
+      "learning_rate": 0.00019303506999567038,
+      "loss": 0.1857,
+      "step": 2418
+    },
+    {
+      "epoch": 0.174537320971175,
+      "grad_norm": 0.15970462560653687,
+      "learning_rate": 0.00019303218357627364,
+      "loss": 0.1684,
+      "step": 2419
+    },
+    {
+      "epoch": 0.17460947364623544,
+      "grad_norm": 0.11105070263147354,
+      "learning_rate": 0.0001930292971568769,
+      "loss": 0.1835,
+      "step": 2420
+    },
+    {
+      "epoch": 0.17468162632129586,
+      "grad_norm": 0.12702690064907074,
+      "learning_rate": 0.00019302641073748017,
+      "loss": 0.1917,
+      "step": 2421
+    },
+    {
+      "epoch": 0.1747537789963563,
+      "grad_norm": 0.10951557010412216,
+      "learning_rate": 0.00019302352431808343,
+      "loss": 0.154,
+      "step": 2422
+    },
+    {
+      "epoch": 0.17482593167141672,
+      "grad_norm": 0.10169512778520584,
+      "learning_rate": 0.0001930206378986867,
+      "loss": 0.1557,
+      "step": 2423
+    },
+    {
+      "epoch": 0.17489808434647713,
+      "grad_norm": 0.1406065970659256,
+      "learning_rate": 0.00019301775147928995,
+      "loss": 0.1466,
+      "step": 2424
+    },
+    {
+      "epoch": 0.17497023702153758,
+      "grad_norm": 0.15568317472934723,
+      "learning_rate": 0.00019301486505989322,
+      "loss": 0.1885,
+      "step": 2425
+    },
+    {
+      "epoch": 0.175042389696598,
+      "grad_norm": 0.10522514581680298,
+      "learning_rate": 0.00019301197864049648,
+      "loss": 0.1622,
+      "step": 2426
+    },
+    {
+      "epoch": 0.17511454237165844,
+      "grad_norm": 0.1026366651058197,
+      "learning_rate": 0.00019300909222109974,
+      "loss": 0.1999,
+      "step": 2427
+    },
+    {
+      "epoch": 0.17518669504671885,
+      "grad_norm": 0.1487097442150116,
+      "learning_rate": 0.00019300620580170298,
+      "loss": 0.2077,
+      "step": 2428
+    },
+    {
+      "epoch": 0.1752588477217793,
+      "grad_norm": 0.10587791353464127,
+      "learning_rate": 0.00019300331938230627,
+      "loss": 0.1311,
+      "step": 2429
+    },
+    {
+      "epoch": 0.17533100039683971,
+      "grad_norm": 0.10131558030843735,
+      "learning_rate": 0.00019300043296290953,
+      "loss": 0.1353,
+      "step": 2430
+    },
+    {
+      "epoch": 0.17540315307190013,
+      "grad_norm": 0.11770360916852951,
+      "learning_rate": 0.0001929975465435128,
+      "loss": 0.19,
+      "step": 2431
+    },
+    {
+      "epoch": 0.17547530574696057,
+      "grad_norm": 0.10171361267566681,
+      "learning_rate": 0.00019299466012411606,
+      "loss": 0.1825,
+      "step": 2432
+    },
+    {
+      "epoch": 0.175547458422021,
+      "grad_norm": 0.12811347842216492,
+      "learning_rate": 0.0001929917737047193,
+      "loss": 0.1983,
+      "step": 2433
+    },
+    {
+      "epoch": 0.17561961109708144,
+      "grad_norm": 0.10407224297523499,
+      "learning_rate": 0.00019298888728532256,
+      "loss": 0.1566,
+      "step": 2434
+    },
+    {
+      "epoch": 0.17569176377214185,
+      "grad_norm": 0.09604757279157639,
+      "learning_rate": 0.00019298600086592582,
+      "loss": 0.1551,
+      "step": 2435
+    },
+    {
+      "epoch": 0.17576391644720227,
+      "grad_norm": 0.09429436177015305,
+      "learning_rate": 0.0001929831144465291,
+      "loss": 0.1617,
+      "step": 2436
+    },
+    {
+      "epoch": 0.1758360691222627,
+      "grad_norm": 0.09811285138130188,
+      "learning_rate": 0.00019298022802713237,
+      "loss": 0.1994,
+      "step": 2437
+    },
+    {
+      "epoch": 0.17590822179732313,
+      "grad_norm": 0.09046891331672668,
+      "learning_rate": 0.0001929773416077356,
+      "loss": 0.1091,
+      "step": 2438
+    },
+    {
+      "epoch": 0.17598037447238357,
+      "grad_norm": 0.130147784948349,
+      "learning_rate": 0.00019297445518833887,
+      "loss": 0.1649,
+      "step": 2439
+    },
+    {
+      "epoch": 0.176052527147444,
+      "grad_norm": 0.10541129112243652,
+      "learning_rate": 0.00019297156876894213,
+      "loss": 0.1769,
+      "step": 2440
+    },
+    {
+      "epoch": 0.17612467982250443,
+      "grad_norm": 0.12361346185207367,
+      "learning_rate": 0.0001929686823495454,
+      "loss": 0.1589,
+      "step": 2441
+    },
+    {
+      "epoch": 0.17619683249756485,
+      "grad_norm": 0.12085054069757462,
+      "learning_rate": 0.00019296579593014866,
+      "loss": 0.1732,
+      "step": 2442
+    },
+    {
+      "epoch": 0.17626898517262526,
+      "grad_norm": 0.10131417214870453,
+      "learning_rate": 0.00019296290951075192,
+      "loss": 0.1803,
+      "step": 2443
+    },
+    {
+      "epoch": 0.1763411378476857,
+      "grad_norm": 0.09739714860916138,
+      "learning_rate": 0.00019296002309135519,
+      "loss": 0.1738,
+      "step": 2444
+    },
+    {
+      "epoch": 0.17641329052274612,
+      "grad_norm": 0.09897683560848236,
+      "learning_rate": 0.00019295713667195845,
+      "loss": 0.187,
+      "step": 2445
+    },
+    {
+      "epoch": 0.17648544319780657,
+      "grad_norm": 0.09419049322605133,
+      "learning_rate": 0.0001929542502525617,
+      "loss": 0.1542,
+      "step": 2446
+    },
+    {
+      "epoch": 0.17655759587286698,
+      "grad_norm": 0.10498613864183426,
+      "learning_rate": 0.00019295136383316497,
+      "loss": 0.176,
+      "step": 2447
+    },
+    {
+      "epoch": 0.1766297485479274,
+      "grad_norm": 0.13526107370853424,
+      "learning_rate": 0.00019294847741376824,
+      "loss": 0.1597,
+      "step": 2448
+    },
+    {
+      "epoch": 0.17670190122298784,
+      "grad_norm": 0.09594099223613739,
+      "learning_rate": 0.00019294559099437147,
+      "loss": 0.1705,
+      "step": 2449
+    },
+    {
+      "epoch": 0.17677405389804826,
+      "grad_norm": 0.1210499256849289,
+      "learning_rate": 0.00019294270457497476,
+      "loss": 0.1298,
+      "step": 2450
+    },
+    {
+      "epoch": 0.1768462065731087,
+      "grad_norm": 0.10663893073797226,
+      "learning_rate": 0.00019293981815557803,
+      "loss": 0.1931,
+      "step": 2451
+    },
+    {
+      "epoch": 0.17691835924816912,
+      "grad_norm": 0.10738261789083481,
+      "learning_rate": 0.0001929369317361813,
+      "loss": 0.1941,
+      "step": 2452
+    },
+    {
+      "epoch": 0.17699051192322957,
+      "grad_norm": 0.10399194061756134,
+      "learning_rate": 0.00019293404531678455,
+      "loss": 0.2344,
+      "step": 2453
+    },
+    {
+      "epoch": 0.17706266459828998,
+      "grad_norm": 0.09402377903461456,
+      "learning_rate": 0.0001929311588973878,
+      "loss": 0.1542,
+      "step": 2454
+    },
+    {
+      "epoch": 0.1771348172733504,
+      "grad_norm": 0.11488594114780426,
+      "learning_rate": 0.00019292827247799105,
+      "loss": 0.1302,
+      "step": 2455
+    },
+    {
+      "epoch": 0.17720696994841084,
+      "grad_norm": 0.10841730237007141,
+      "learning_rate": 0.0001929253860585943,
+      "loss": 0.1471,
+      "step": 2456
+    },
+    {
+      "epoch": 0.17727912262347126,
+      "grad_norm": 0.09893527626991272,
+      "learning_rate": 0.00019292249963919758,
+      "loss": 0.1645,
+      "step": 2457
+    },
+    {
+      "epoch": 0.1773512752985317,
+      "grad_norm": 0.1275603026151657,
+      "learning_rate": 0.00019291961321980087,
+      "loss": 0.1024,
+      "step": 2458
+    },
+    {
+      "epoch": 0.17742342797359212,
+      "grad_norm": 0.10018088668584824,
+      "learning_rate": 0.0001929167268004041,
+      "loss": 0.1572,
+      "step": 2459
+    },
+    {
+      "epoch": 0.17749558064865256,
+      "grad_norm": 0.13826268911361694,
+      "learning_rate": 0.00019291384038100737,
+      "loss": 0.1963,
+      "step": 2460
+    },
+    {
+      "epoch": 0.17756773332371298,
+      "grad_norm": 0.12875942885875702,
+      "learning_rate": 0.00019291095396161063,
+      "loss": 0.1136,
+      "step": 2461
+    },
+    {
+      "epoch": 0.1776398859987734,
+      "grad_norm": 0.0969284251332283,
+      "learning_rate": 0.0001929080675422139,
+      "loss": 0.1702,
+      "step": 2462
+    },
+    {
+      "epoch": 0.17771203867383384,
+      "grad_norm": 0.09803799539804459,
+      "learning_rate": 0.00019290518112281715,
+      "loss": 0.193,
+      "step": 2463
+    },
+    {
+      "epoch": 0.17778419134889425,
+      "grad_norm": 0.13163645565509796,
+      "learning_rate": 0.00019290229470342042,
+      "loss": 0.1273,
+      "step": 2464
+    },
+    {
+      "epoch": 0.1778563440239547,
+      "grad_norm": 0.10827518999576569,
+      "learning_rate": 0.00019289940828402368,
+      "loss": 0.1639,
+      "step": 2465
+    },
+    {
+      "epoch": 0.17792849669901512,
+      "grad_norm": 0.10406279563903809,
+      "learning_rate": 0.00019289652186462694,
+      "loss": 0.1677,
+      "step": 2466
+    },
+    {
+      "epoch": 0.17800064937407553,
+      "grad_norm": 0.11019009351730347,
+      "learning_rate": 0.0001928936354452302,
+      "loss": 0.1992,
+      "step": 2467
+    },
+    {
+      "epoch": 0.17807280204913598,
+      "grad_norm": 0.1016637310385704,
+      "learning_rate": 0.00019289074902583347,
+      "loss": 0.1469,
+      "step": 2468
+    },
+    {
+      "epoch": 0.1781449547241964,
+      "grad_norm": 0.10010644793510437,
+      "learning_rate": 0.00019288786260643673,
+      "loss": 0.1226,
+      "step": 2469
+    },
+    {
+      "epoch": 0.17821710739925684,
+      "grad_norm": 0.19892233610153198,
+      "learning_rate": 0.00019288497618703997,
+      "loss": 0.2157,
+      "step": 2470
+    },
+    {
+      "epoch": 0.17828926007431725,
+      "grad_norm": 0.11943595111370087,
+      "learning_rate": 0.00019288208976764323,
+      "loss": 0.1482,
+      "step": 2471
+    },
+    {
+      "epoch": 0.1783614127493777,
+      "grad_norm": 0.09678040444850922,
+      "learning_rate": 0.00019287920334824652,
+      "loss": 0.1555,
+      "step": 2472
+    },
+    {
+      "epoch": 0.1784335654244381,
+      "grad_norm": 0.11203937232494354,
+      "learning_rate": 0.00019287631692884978,
+      "loss": 0.1725,
+      "step": 2473
+    },
+    {
+      "epoch": 0.17850571809949853,
+      "grad_norm": 0.10264791548252106,
+      "learning_rate": 0.00019287343050945305,
+      "loss": 0.1598,
+      "step": 2474
+    },
+    {
+      "epoch": 0.17857787077455897,
+      "grad_norm": 0.11397009342908859,
+      "learning_rate": 0.00019287054409005628,
+      "loss": 0.1627,
+      "step": 2475
+    },
+    {
+      "epoch": 0.1786500234496194,
+      "grad_norm": 0.11981376260519028,
+      "learning_rate": 0.00019286765767065954,
+      "loss": 0.1643,
+      "step": 2476
+    },
+    {
+      "epoch": 0.17872217612467983,
+      "grad_norm": 0.1312798261642456,
+      "learning_rate": 0.0001928647712512628,
+      "loss": 0.1575,
+      "step": 2477
+    },
+    {
+      "epoch": 0.17879432879974025,
+      "grad_norm": 0.140218123793602,
+      "learning_rate": 0.00019286188483186607,
+      "loss": 0.1921,
+      "step": 2478
+    },
+    {
+      "epoch": 0.17886648147480066,
+      "grad_norm": 0.1026497408747673,
+      "learning_rate": 0.00019285899841246936,
+      "loss": 0.2242,
+      "step": 2479
+    },
+    {
+      "epoch": 0.1789386341498611,
+      "grad_norm": 0.1413601189851761,
+      "learning_rate": 0.0001928561119930726,
+      "loss": 0.1298,
+      "step": 2480
+    },
+    {
+      "epoch": 0.17901078682492153,
+      "grad_norm": 0.09540276229381561,
+      "learning_rate": 0.00019285322557367586,
+      "loss": 0.1595,
+      "step": 2481
+    },
+    {
+      "epoch": 0.17908293949998197,
+      "grad_norm": 0.08501024544239044,
+      "learning_rate": 0.00019285033915427912,
+      "loss": 0.1599,
+      "step": 2482
+    },
+    {
+      "epoch": 0.17915509217504239,
+      "grad_norm": 0.10932368785142899,
+      "learning_rate": 0.00019284745273488239,
+      "loss": 0.2136,
+      "step": 2483
+    },
+    {
+      "epoch": 0.17922724485010283,
+      "grad_norm": 0.09894034266471863,
+      "learning_rate": 0.00019284456631548565,
+      "loss": 0.1926,
+      "step": 2484
+    },
+    {
+      "epoch": 0.17929939752516325,
+      "grad_norm": 0.09841787070035934,
+      "learning_rate": 0.0001928416798960889,
+      "loss": 0.1734,
+      "step": 2485
+    },
+    {
+      "epoch": 0.17937155020022366,
+      "grad_norm": 0.10464194416999817,
+      "learning_rate": 0.00019283879347669217,
+      "loss": 0.1717,
+      "step": 2486
+    },
+    {
+      "epoch": 0.1794437028752841,
+      "grad_norm": 0.10199958831071854,
+      "learning_rate": 0.00019283590705729544,
+      "loss": 0.1482,
+      "step": 2487
+    },
+    {
+      "epoch": 0.17951585555034452,
+      "grad_norm": 0.08751571178436279,
+      "learning_rate": 0.0001928330206378987,
+      "loss": 0.1094,
+      "step": 2488
+    },
+    {
+      "epoch": 0.17958800822540497,
+      "grad_norm": 0.11693772673606873,
+      "learning_rate": 0.00019283013421850196,
+      "loss": 0.1885,
+      "step": 2489
+    },
+    {
+      "epoch": 0.17966016090046538,
+      "grad_norm": 0.10919500887393951,
+      "learning_rate": 0.00019282724779910523,
+      "loss": 0.1584,
+      "step": 2490
+    },
+    {
+      "epoch": 0.17973231357552583,
+      "grad_norm": 0.11971744894981384,
+      "learning_rate": 0.00019282436137970846,
+      "loss": 0.157,
+      "step": 2491
+    },
+    {
+      "epoch": 0.17980446625058624,
+      "grad_norm": 0.15336687862873077,
+      "learning_rate": 0.00019282147496031172,
+      "loss": 0.1952,
+      "step": 2492
+    },
+    {
+      "epoch": 0.17987661892564666,
+      "grad_norm": 0.1436939239501953,
+      "learning_rate": 0.00019281858854091501,
+      "loss": 0.1478,
+      "step": 2493
+    },
+    {
+      "epoch": 0.1799487716007071,
+      "grad_norm": 0.1352475881576538,
+      "learning_rate": 0.00019281570212151828,
+      "loss": 0.1621,
+      "step": 2494
+    },
+    {
+      "epoch": 0.18002092427576752,
+      "grad_norm": 0.12049786746501923,
+      "learning_rate": 0.00019281281570212154,
+      "loss": 0.1737,
+      "step": 2495
+    },
+    {
+      "epoch": 0.18009307695082796,
+      "grad_norm": 0.11868083477020264,
+      "learning_rate": 0.00019280992928272478,
+      "loss": 0.1521,
+      "step": 2496
+    },
+    {
+      "epoch": 0.18016522962588838,
+      "grad_norm": 0.09464991092681885,
+      "learning_rate": 0.00019280704286332804,
+      "loss": 0.1801,
+      "step": 2497
+    },
+    {
+      "epoch": 0.1802373823009488,
+      "grad_norm": 0.11203427612781525,
+      "learning_rate": 0.0001928041564439313,
+      "loss": 0.1716,
+      "step": 2498
+    },
+    {
+      "epoch": 0.18030953497600924,
+      "grad_norm": 0.0979161262512207,
+      "learning_rate": 0.00019280127002453456,
+      "loss": 0.1732,
+      "step": 2499
+    },
+    {
+      "epoch": 0.18038168765106966,
+      "grad_norm": 0.09018220752477646,
+      "learning_rate": 0.00019279838360513785,
+      "loss": 0.1433,
+      "step": 2500
+    },
+    {
+      "epoch": 0.1804538403261301,
+      "grad_norm": 0.09370570629835129,
+      "learning_rate": 0.0001927954971857411,
+      "loss": 0.1446,
+      "step": 2501
+    },
+    {
+      "epoch": 0.18052599300119052,
+      "grad_norm": 0.10692581534385681,
+      "learning_rate": 0.00019279261076634435,
+      "loss": 0.1434,
+      "step": 2502
+    },
+    {
+      "epoch": 0.18059814567625096,
+      "grad_norm": 0.12523172795772552,
+      "learning_rate": 0.00019278972434694762,
+      "loss": 0.1837,
+      "step": 2503
+    },
+    {
+      "epoch": 0.18067029835131138,
+      "grad_norm": 0.08301182836294174,
+      "learning_rate": 0.00019278683792755088,
+      "loss": 0.2049,
+      "step": 2504
+    },
+    {
+      "epoch": 0.1807424510263718,
+      "grad_norm": 0.09529439359903336,
+      "learning_rate": 0.00019278395150815414,
+      "loss": 0.1667,
+      "step": 2505
+    },
+    {
+      "epoch": 0.18081460370143224,
+      "grad_norm": 0.1019364520907402,
+      "learning_rate": 0.0001927810650887574,
+      "loss": 0.2199,
+      "step": 2506
+    },
+    {
+      "epoch": 0.18088675637649265,
+      "grad_norm": 0.1121387854218483,
+      "learning_rate": 0.00019277817866936067,
+      "loss": 0.1385,
+      "step": 2507
+    },
+    {
+      "epoch": 0.1809589090515531,
+      "grad_norm": 0.0993325337767601,
+      "learning_rate": 0.00019277529224996393,
+      "loss": 0.135,
+      "step": 2508
+    },
+    {
+      "epoch": 0.1810310617266135,
+      "grad_norm": 0.08489389717578888,
+      "learning_rate": 0.0001927724058305672,
+      "loss": 0.1392,
+      "step": 2509
+    },
+    {
+      "epoch": 0.18110321440167393,
+      "grad_norm": 0.10872458666563034,
+      "learning_rate": 0.00019276951941117046,
+      "loss": 0.1647,
+      "step": 2510
+    },
+    {
+      "epoch": 0.18117536707673437,
+      "grad_norm": 0.12486433982849121,
+      "learning_rate": 0.00019276663299177372,
+      "loss": 0.1372,
+      "step": 2511
+    },
+    {
+      "epoch": 0.1812475197517948,
+      "grad_norm": 0.10761299729347229,
+      "learning_rate": 0.00019276374657237696,
+      "loss": 0.1716,
+      "step": 2512
+    },
+    {
+      "epoch": 0.18131967242685523,
+      "grad_norm": 0.10109484195709229,
+      "learning_rate": 0.00019276086015298022,
+      "loss": 0.1726,
+      "step": 2513
+    },
+    {
+      "epoch": 0.18139182510191565,
+      "grad_norm": 0.09445323795080185,
+      "learning_rate": 0.0001927579737335835,
+      "loss": 0.1212,
+      "step": 2514
+    },
+    {
+      "epoch": 0.1814639777769761,
+      "grad_norm": 0.12211337685585022,
+      "learning_rate": 0.00019275508731418677,
+      "loss": 0.1474,
+      "step": 2515
+    },
+    {
+      "epoch": 0.1815361304520365,
+      "grad_norm": 0.11376041173934937,
+      "learning_rate": 0.00019275220089479003,
+      "loss": 0.1504,
+      "step": 2516
+    },
+    {
+      "epoch": 0.18160828312709693,
+      "grad_norm": 0.130318284034729,
+      "learning_rate": 0.00019274931447539327,
+      "loss": 0.1754,
+      "step": 2517
+    },
+    {
+      "epoch": 0.18168043580215737,
+      "grad_norm": 0.10562107712030411,
+      "learning_rate": 0.00019274642805599653,
+      "loss": 0.1757,
+      "step": 2518
+    },
+    {
+      "epoch": 0.18175258847721779,
+      "grad_norm": 0.09059479832649231,
+      "learning_rate": 0.0001927435416365998,
+      "loss": 0.167,
+      "step": 2519
+    },
+    {
+      "epoch": 0.18182474115227823,
+      "grad_norm": 0.11078401654958725,
+      "learning_rate": 0.00019274065521720306,
+      "loss": 0.1871,
+      "step": 2520
+    },
+    {
+      "epoch": 0.18189689382733865,
+      "grad_norm": 0.13319344818592072,
+      "learning_rate": 0.00019273776879780635,
+      "loss": 0.1739,
+      "step": 2521
+    },
+    {
+      "epoch": 0.1819690465023991,
+      "grad_norm": 0.09682480245828629,
+      "learning_rate": 0.00019273488237840958,
+      "loss": 0.1527,
+      "step": 2522
+    },
+    {
+      "epoch": 0.1820411991774595,
+      "grad_norm": 0.09306000173091888,
+      "learning_rate": 0.00019273199595901285,
+      "loss": 0.1591,
+      "step": 2523
+    },
+    {
+      "epoch": 0.18211335185251992,
+      "grad_norm": 0.11338866502046585,
+      "learning_rate": 0.0001927291095396161,
+      "loss": 0.155,
+      "step": 2524
+    },
+    {
+      "epoch": 0.18218550452758037,
+      "grad_norm": 0.09607966244220734,
+      "learning_rate": 0.00019272622312021937,
+      "loss": 0.1522,
+      "step": 2525
+    },
+    {
+      "epoch": 0.18225765720264078,
+      "grad_norm": 0.13238127529621124,
+      "learning_rate": 0.00019272333670082264,
+      "loss": 0.136,
+      "step": 2526
+    },
+    {
+      "epoch": 0.18232980987770123,
+      "grad_norm": 0.13262353837490082,
+      "learning_rate": 0.0001927204502814259,
+      "loss": 0.167,
+      "step": 2527
+    },
+    {
+      "epoch": 0.18240196255276164,
+      "grad_norm": 0.10406520217657089,
+      "learning_rate": 0.00019271756386202916,
+      "loss": 0.135,
+      "step": 2528
+    },
+    {
+      "epoch": 0.18247411522782206,
+      "grad_norm": 0.1390426605939865,
+      "learning_rate": 0.00019271467744263243,
+      "loss": 0.1838,
+      "step": 2529
+    },
+    {
+      "epoch": 0.1825462679028825,
+      "grad_norm": 0.09890906512737274,
+      "learning_rate": 0.0001927117910232357,
+      "loss": 0.151,
+      "step": 2530
+    },
+    {
+      "epoch": 0.18261842057794292,
+      "grad_norm": 0.10099121183156967,
+      "learning_rate": 0.00019270890460383895,
+      "loss": 0.1531,
+      "step": 2531
+    },
+    {
+      "epoch": 0.18269057325300336,
+      "grad_norm": 0.1050528958439827,
+      "learning_rate": 0.00019270601818444221,
+      "loss": 0.1536,
+      "step": 2532
+    },
+    {
+      "epoch": 0.18276272592806378,
+      "grad_norm": 0.08454670011997223,
+      "learning_rate": 0.00019270313176504548,
+      "loss": 0.1672,
+      "step": 2533
+    },
+    {
+      "epoch": 0.18283487860312422,
+      "grad_norm": 0.09192633628845215,
+      "learning_rate": 0.0001927002453456487,
+      "loss": 0.1723,
+      "step": 2534
+    },
+    {
+      "epoch": 0.18290703127818464,
+      "grad_norm": 0.10295901447534561,
+      "learning_rate": 0.000192697358926252,
+      "loss": 0.1387,
+      "step": 2535
+    },
+    {
+      "epoch": 0.18297918395324506,
+      "grad_norm": 0.0948997363448143,
+      "learning_rate": 0.00019269447250685527,
+      "loss": 0.1655,
+      "step": 2536
+    },
+    {
+      "epoch": 0.1830513366283055,
+      "grad_norm": 0.10542097687721252,
+      "learning_rate": 0.00019269158608745853,
+      "loss": 0.1618,
+      "step": 2537
+    },
+    {
+      "epoch": 0.18312348930336592,
+      "grad_norm": 0.1150515228509903,
+      "learning_rate": 0.0001926886996680618,
+      "loss": 0.1504,
+      "step": 2538
+    },
+    {
+      "epoch": 0.18319564197842636,
+      "grad_norm": 0.11629487574100494,
+      "learning_rate": 0.00019268581324866503,
+      "loss": 0.1609,
+      "step": 2539
+    },
+    {
+      "epoch": 0.18326779465348678,
+      "grad_norm": 0.11165773868560791,
+      "learning_rate": 0.0001926829268292683,
+      "loss": 0.1511,
+      "step": 2540
+    },
+    {
+      "epoch": 0.1833399473285472,
+      "grad_norm": 0.11779014766216278,
+      "learning_rate": 0.00019268004040987155,
+      "loss": 0.1827,
+      "step": 2541
+    },
+    {
+      "epoch": 0.18341210000360764,
+      "grad_norm": 0.10018985718488693,
+      "learning_rate": 0.00019267715399047484,
+      "loss": 0.1803,
+      "step": 2542
+    },
+    {
+      "epoch": 0.18348425267866805,
+      "grad_norm": 0.12660762667655945,
+      "learning_rate": 0.0001926742675710781,
+      "loss": 0.1804,
+      "step": 2543
+    },
+    {
+      "epoch": 0.1835564053537285,
+      "grad_norm": 0.10073110461235046,
+      "learning_rate": 0.00019267138115168134,
+      "loss": 0.1949,
+      "step": 2544
+    },
+    {
+      "epoch": 0.1836285580287889,
+      "grad_norm": 0.15038368105888367,
+      "learning_rate": 0.0001926684947322846,
+      "loss": 0.2406,
+      "step": 2545
+    },
+    {
+      "epoch": 0.18370071070384936,
+      "grad_norm": 0.1125505194067955,
+      "learning_rate": 0.00019266560831288787,
+      "loss": 0.2075,
+      "step": 2546
+    },
+    {
+      "epoch": 0.18377286337890977,
+      "grad_norm": 0.0908120647072792,
+      "learning_rate": 0.00019266272189349113,
+      "loss": 0.1807,
+      "step": 2547
+    },
+    {
+      "epoch": 0.1838450160539702,
+      "grad_norm": 0.12920835614204407,
+      "learning_rate": 0.0001926598354740944,
+      "loss": 0.1864,
+      "step": 2548
+    },
+    {
+      "epoch": 0.18391716872903063,
+      "grad_norm": 0.1229652538895607,
+      "learning_rate": 0.00019265694905469766,
+      "loss": 0.1311,
+      "step": 2549
+    },
+    {
+      "epoch": 0.18398932140409105,
+      "grad_norm": 0.11898133158683777,
+      "learning_rate": 0.00019265406263530092,
+      "loss": 0.1303,
+      "step": 2550
+    },
+    {
+      "epoch": 0.1840614740791515,
+      "grad_norm": 0.09316210448741913,
+      "learning_rate": 0.00019265117621590418,
+      "loss": 0.1293,
+      "step": 2551
+    },
+    {
+      "epoch": 0.1841336267542119,
+      "grad_norm": 0.12110738456249237,
+      "learning_rate": 0.00019264828979650745,
+      "loss": 0.1711,
+      "step": 2552
+    },
+    {
+      "epoch": 0.18420577942927235,
+      "grad_norm": 0.10128328204154968,
+      "learning_rate": 0.0001926454033771107,
+      "loss": 0.1686,
+      "step": 2553
+    },
+    {
+      "epoch": 0.18427793210433277,
+      "grad_norm": 0.09798388183116913,
+      "learning_rate": 0.00019264251695771397,
+      "loss": 0.1716,
+      "step": 2554
+    },
+    {
+      "epoch": 0.1843500847793932,
+      "grad_norm": 0.12545473873615265,
+      "learning_rate": 0.0001926396305383172,
+      "loss": 0.1823,
+      "step": 2555
+    },
+    {
+      "epoch": 0.18442223745445363,
+      "grad_norm": 0.12609075009822845,
+      "learning_rate": 0.0001926367441189205,
+      "loss": 0.1552,
+      "step": 2556
+    },
+    {
+      "epoch": 0.18449439012951405,
+      "grad_norm": 0.1211591586470604,
+      "learning_rate": 0.00019263385769952376,
+      "loss": 0.1661,
+      "step": 2557
+    },
+    {
+      "epoch": 0.1845665428045745,
+      "grad_norm": 0.09905112534761429,
+      "learning_rate": 0.00019263097128012702,
+      "loss": 0.182,
+      "step": 2558
+    },
+    {
+      "epoch": 0.1846386954796349,
+      "grad_norm": 0.10378430783748627,
+      "learning_rate": 0.00019262808486073029,
+      "loss": 0.1729,
+      "step": 2559
+    },
+    {
+      "epoch": 0.18471084815469532,
+      "grad_norm": 0.1171564906835556,
+      "learning_rate": 0.00019262519844133352,
+      "loss": 0.1537,
+      "step": 2560
+    },
+    {
+      "epoch": 0.18478300082975577,
+      "grad_norm": 0.12228664755821228,
+      "learning_rate": 0.00019262231202193678,
+      "loss": 0.1954,
+      "step": 2561
+    },
+    {
+      "epoch": 0.18485515350481618,
+      "grad_norm": 0.10984918475151062,
+      "learning_rate": 0.00019261942560254005,
+      "loss": 0.1702,
+      "step": 2562
+    },
+    {
+      "epoch": 0.18492730617987663,
+      "grad_norm": 0.11387166380882263,
+      "learning_rate": 0.00019261653918314334,
+      "loss": 0.1633,
+      "step": 2563
+    },
+    {
+      "epoch": 0.18499945885493704,
+      "grad_norm": 0.1470084935426712,
+      "learning_rate": 0.0001926136527637466,
+      "loss": 0.1654,
+      "step": 2564
+    },
+    {
+      "epoch": 0.1850716115299975,
+      "grad_norm": 0.09996671974658966,
+      "learning_rate": 0.00019261076634434984,
+      "loss": 0.2324,
+      "step": 2565
+    },
+    {
+      "epoch": 0.1851437642050579,
+      "grad_norm": 0.13593421876430511,
+      "learning_rate": 0.0001926078799249531,
+      "loss": 0.2202,
+      "step": 2566
+    },
+    {
+      "epoch": 0.18521591688011832,
+      "grad_norm": 0.08700580149888992,
+      "learning_rate": 0.00019260499350555636,
+      "loss": 0.1401,
+      "step": 2567
+    },
+    {
+      "epoch": 0.18528806955517876,
+      "grad_norm": 0.11398850381374359,
+      "learning_rate": 0.00019260210708615963,
+      "loss": 0.1733,
+      "step": 2568
+    },
+    {
+      "epoch": 0.18536022223023918,
+      "grad_norm": 0.09895952045917511,
+      "learning_rate": 0.0001925992206667629,
+      "loss": 0.2249,
+      "step": 2569
+    },
+    {
+      "epoch": 0.18543237490529962,
+      "grad_norm": 0.12415144592523575,
+      "learning_rate": 0.00019259633424736615,
+      "loss": 0.2309,
+      "step": 2570
+    },
+    {
+      "epoch": 0.18550452758036004,
+      "grad_norm": 0.10959386825561523,
+      "learning_rate": 0.00019259344782796941,
+      "loss": 0.2302,
+      "step": 2571
+    },
+    {
+      "epoch": 0.18557668025542046,
+      "grad_norm": 0.0890393927693367,
+      "learning_rate": 0.00019259056140857268,
+      "loss": 0.1575,
+      "step": 2572
+    },
+    {
+      "epoch": 0.1856488329304809,
+      "grad_norm": 0.09315341711044312,
+      "learning_rate": 0.00019258767498917594,
+      "loss": 0.1541,
+      "step": 2573
+    },
+    {
+      "epoch": 0.18572098560554132,
+      "grad_norm": 0.10352786630392075,
+      "learning_rate": 0.0001925847885697792,
+      "loss": 0.1917,
+      "step": 2574
+    },
+    {
+      "epoch": 0.18579313828060176,
+      "grad_norm": 0.12139669060707092,
+      "learning_rate": 0.00019258190215038247,
+      "loss": 0.1942,
+      "step": 2575
+    },
+    {
+      "epoch": 0.18586529095566218,
+      "grad_norm": 0.10332541167736053,
+      "learning_rate": 0.0001925790157309857,
+      "loss": 0.1508,
+      "step": 2576
+    },
+    {
+      "epoch": 0.18593744363072262,
+      "grad_norm": 0.10060901939868927,
+      "learning_rate": 0.000192576129311589,
+      "loss": 0.1717,
+      "step": 2577
+    },
+    {
+      "epoch": 0.18600959630578304,
+      "grad_norm": 0.11494073271751404,
+      "learning_rate": 0.00019257324289219225,
+      "loss": 0.1392,
+      "step": 2578
+    },
+    {
+      "epoch": 0.18608174898084345,
+      "grad_norm": 0.10815019905567169,
+      "learning_rate": 0.00019257035647279552,
+      "loss": 0.2348,
+      "step": 2579
+    },
+    {
+      "epoch": 0.1861539016559039,
+      "grad_norm": 0.10394209623336792,
+      "learning_rate": 0.00019256747005339878,
+      "loss": 0.2208,
+      "step": 2580
+    },
+    {
+      "epoch": 0.1862260543309643,
+      "grad_norm": 0.09226703643798828,
+      "learning_rate": 0.00019256458363400202,
+      "loss": 0.1539,
+      "step": 2581
+    },
+    {
+      "epoch": 0.18629820700602476,
+      "grad_norm": 0.12375635653734207,
+      "learning_rate": 0.00019256169721460528,
+      "loss": 0.1241,
+      "step": 2582
+    },
+    {
+      "epoch": 0.18637035968108517,
+      "grad_norm": 0.10145562142133713,
+      "learning_rate": 0.00019255881079520854,
+      "loss": 0.1774,
+      "step": 2583
+    },
+    {
+      "epoch": 0.18644251235614562,
+      "grad_norm": 0.10398755967617035,
+      "learning_rate": 0.00019255592437581183,
+      "loss": 0.1931,
+      "step": 2584
+    },
+    {
+      "epoch": 0.18651466503120603,
+      "grad_norm": 0.10972506552934647,
+      "learning_rate": 0.0001925530379564151,
+      "loss": 0.1863,
+      "step": 2585
+    },
+    {
+      "epoch": 0.18658681770626645,
+      "grad_norm": 0.09676441550254822,
+      "learning_rate": 0.00019255015153701833,
+      "loss": 0.1602,
+      "step": 2586
+    },
+    {
+      "epoch": 0.1866589703813269,
+      "grad_norm": 0.09614020586013794,
+      "learning_rate": 0.0001925472651176216,
+      "loss": 0.149,
+      "step": 2587
+    },
+    {
+      "epoch": 0.1867311230563873,
+      "grad_norm": 0.09428024291992188,
+      "learning_rate": 0.00019254437869822486,
+      "loss": 0.1444,
+      "step": 2588
+    },
+    {
+      "epoch": 0.18680327573144775,
+      "grad_norm": 0.08294445276260376,
+      "learning_rate": 0.00019254149227882812,
+      "loss": 0.1203,
+      "step": 2589
+    },
+    {
+      "epoch": 0.18687542840650817,
+      "grad_norm": 0.09792113304138184,
+      "learning_rate": 0.00019253860585943138,
+      "loss": 0.1312,
+      "step": 2590
+    },
+    {
+      "epoch": 0.1869475810815686,
+      "grad_norm": 0.09841245412826538,
+      "learning_rate": 0.00019253571944003465,
+      "loss": 0.1562,
+      "step": 2591
+    },
+    {
+      "epoch": 0.18701973375662903,
+      "grad_norm": 0.09592325985431671,
+      "learning_rate": 0.0001925328330206379,
+      "loss": 0.1313,
+      "step": 2592
+    },
+    {
+      "epoch": 0.18709188643168945,
+      "grad_norm": 0.10660023242235184,
+      "learning_rate": 0.00019252994660124117,
+      "loss": 0.1376,
+      "step": 2593
+    },
+    {
+      "epoch": 0.1871640391067499,
+      "grad_norm": 0.11521776765584946,
+      "learning_rate": 0.00019252706018184443,
+      "loss": 0.1718,
+      "step": 2594
+    },
+    {
+      "epoch": 0.1872361917818103,
+      "grad_norm": 0.09450531005859375,
+      "learning_rate": 0.0001925241737624477,
+      "loss": 0.1841,
+      "step": 2595
+    },
+    {
+      "epoch": 0.18730834445687075,
+      "grad_norm": 0.13401101529598236,
+      "learning_rate": 0.00019252128734305096,
+      "loss": 0.2122,
+      "step": 2596
+    },
+    {
+      "epoch": 0.18738049713193117,
+      "grad_norm": 0.133869469165802,
+      "learning_rate": 0.0001925184009236542,
+      "loss": 0.163,
+      "step": 2597
+    },
+    {
+      "epoch": 0.18745264980699158,
+      "grad_norm": 0.08174880594015121,
+      "learning_rate": 0.00019251551450425749,
+      "loss": 0.1994,
+      "step": 2598
+    },
+    {
+      "epoch": 0.18752480248205203,
+      "grad_norm": 0.08731729537248611,
+      "learning_rate": 0.00019251262808486075,
+      "loss": 0.1421,
+      "step": 2599
+    },
+    {
+      "epoch": 0.18759695515711244,
+      "grad_norm": 0.11166815459728241,
+      "learning_rate": 0.000192509741665464,
+      "loss": 0.1765,
+      "step": 2600
+    },
+    {
+      "epoch": 0.1876691078321729,
+      "grad_norm": 0.1146954819560051,
+      "learning_rate": 0.00019250685524606727,
+      "loss": 0.1676,
+      "step": 2601
+    },
+    {
+      "epoch": 0.1877412605072333,
+      "grad_norm": 0.09924327582120895,
+      "learning_rate": 0.0001925039688266705,
+      "loss": 0.1573,
+      "step": 2602
+    },
+    {
+      "epoch": 0.18781341318229372,
+      "grad_norm": 0.10343486070632935,
+      "learning_rate": 0.00019250108240727377,
+      "loss": 0.1352,
+      "step": 2603
+    },
+    {
+      "epoch": 0.18788556585735416,
+      "grad_norm": 0.08584459125995636,
+      "learning_rate": 0.00019249819598787704,
+      "loss": 0.1924,
+      "step": 2604
+    },
+    {
+      "epoch": 0.18795771853241458,
+      "grad_norm": 0.10155855119228363,
+      "learning_rate": 0.00019249530956848033,
+      "loss": 0.1907,
+      "step": 2605
+    },
+    {
+      "epoch": 0.18802987120747502,
+      "grad_norm": 0.10088808834552765,
+      "learning_rate": 0.0001924924231490836,
+      "loss": 0.1457,
+      "step": 2606
+    },
+    {
+      "epoch": 0.18810202388253544,
+      "grad_norm": 0.0985383465886116,
+      "learning_rate": 0.00019248953672968682,
+      "loss": 0.1897,
+      "step": 2607
+    },
+    {
+      "epoch": 0.18817417655759588,
+      "grad_norm": 0.10378298163414001,
+      "learning_rate": 0.0001924866503102901,
+      "loss": 0.1524,
+      "step": 2608
+    },
+    {
+      "epoch": 0.1882463292326563,
+      "grad_norm": 0.12473829090595245,
+      "learning_rate": 0.00019248376389089335,
+      "loss": 0.1566,
+      "step": 2609
+    },
+    {
+      "epoch": 0.18831848190771672,
+      "grad_norm": 0.1098468080163002,
+      "learning_rate": 0.0001924808774714966,
+      "loss": 0.2182,
+      "step": 2610
+    },
+    {
+      "epoch": 0.18839063458277716,
+      "grad_norm": 0.09675396233797073,
+      "learning_rate": 0.00019247799105209988,
+      "loss": 0.1436,
+      "step": 2611
+    },
+    {
+      "epoch": 0.18846278725783758,
+      "grad_norm": 0.08573716878890991,
+      "learning_rate": 0.00019247510463270314,
+      "loss": 0.1762,
+      "step": 2612
+    },
+    {
+      "epoch": 0.18853493993289802,
+      "grad_norm": 0.08802764117717743,
+      "learning_rate": 0.0001924722182133064,
+      "loss": 0.1698,
+      "step": 2613
+    },
+    {
+      "epoch": 0.18860709260795844,
+      "grad_norm": 0.09013257920742035,
+      "learning_rate": 0.00019246933179390967,
+      "loss": 0.1848,
+      "step": 2614
+    },
+    {
+      "epoch": 0.18867924528301888,
+      "grad_norm": 0.08657081425189972,
+      "learning_rate": 0.00019246644537451293,
+      "loss": 0.1726,
+      "step": 2615
+    },
+    {
+      "epoch": 0.1887513979580793,
+      "grad_norm": 0.10190070420503616,
+      "learning_rate": 0.0001924635589551162,
+      "loss": 0.186,
+      "step": 2616
+    },
+    {
+      "epoch": 0.18882355063313971,
+      "grad_norm": 0.09923119097948074,
+      "learning_rate": 0.00019246067253571945,
+      "loss": 0.1401,
+      "step": 2617
+    },
+    {
+      "epoch": 0.18889570330820016,
+      "grad_norm": 0.09834610670804977,
+      "learning_rate": 0.0001924577861163227,
+      "loss": 0.184,
+      "step": 2618
+    },
+    {
+      "epoch": 0.18896785598326057,
+      "grad_norm": 0.11239048838615417,
+      "learning_rate": 0.00019245489969692598,
+      "loss": 0.1629,
+      "step": 2619
+    },
+    {
+      "epoch": 0.18904000865832102,
+      "grad_norm": 0.14011728763580322,
+      "learning_rate": 0.00019245201327752924,
+      "loss": 0.1499,
+      "step": 2620
+    },
+    {
+      "epoch": 0.18911216133338143,
+      "grad_norm": 0.10546540468931198,
+      "learning_rate": 0.0001924491268581325,
+      "loss": 0.1378,
+      "step": 2621
+    },
+    {
+      "epoch": 0.18918431400844185,
+      "grad_norm": 0.10639491677284241,
+      "learning_rate": 0.00019244624043873577,
+      "loss": 0.1846,
+      "step": 2622
+    },
+    {
+      "epoch": 0.1892564666835023,
+      "grad_norm": 0.12428712844848633,
+      "learning_rate": 0.000192443354019339,
+      "loss": 0.1778,
+      "step": 2623
+    },
+    {
+      "epoch": 0.1893286193585627,
+      "grad_norm": 0.09785983711481094,
+      "learning_rate": 0.00019244046759994227,
+      "loss": 0.1766,
+      "step": 2624
+    },
+    {
+      "epoch": 0.18940077203362315,
+      "grad_norm": 0.12053379416465759,
+      "learning_rate": 0.00019243758118054553,
+      "loss": 0.1428,
+      "step": 2625
+    },
+    {
+      "epoch": 0.18947292470868357,
+      "grad_norm": 0.11321604996919632,
+      "learning_rate": 0.00019243469476114882,
+      "loss": 0.1368,
+      "step": 2626
+    },
+    {
+      "epoch": 0.18954507738374402,
+      "grad_norm": 0.10080399364233017,
+      "learning_rate": 0.00019243180834175208,
+      "loss": 0.1684,
+      "step": 2627
+    },
+    {
+      "epoch": 0.18961723005880443,
+      "grad_norm": 0.12170656770467758,
+      "learning_rate": 0.00019242892192235532,
+      "loss": 0.164,
+      "step": 2628
+    },
+    {
+      "epoch": 0.18968938273386485,
+      "grad_norm": 0.12450167536735535,
+      "learning_rate": 0.00019242603550295858,
+      "loss": 0.1932,
+      "step": 2629
+    },
+    {
+      "epoch": 0.1897615354089253,
+      "grad_norm": 0.1402718424797058,
+      "learning_rate": 0.00019242314908356184,
+      "loss": 0.192,
+      "step": 2630
+    },
+    {
+      "epoch": 0.1898336880839857,
+      "grad_norm": 0.38308095932006836,
+      "learning_rate": 0.0001924202626641651,
+      "loss": 0.1575,
+      "step": 2631
+    },
+    {
+      "epoch": 0.18990584075904615,
+      "grad_norm": 0.11162221431732178,
+      "learning_rate": 0.00019241737624476837,
+      "loss": 0.1695,
+      "step": 2632
+    },
+    {
+      "epoch": 0.18997799343410657,
+      "grad_norm": 0.16652381420135498,
+      "learning_rate": 0.00019241448982537163,
+      "loss": 0.1193,
+      "step": 2633
+    },
+    {
+      "epoch": 0.19005014610916698,
+      "grad_norm": 0.16206100583076477,
+      "learning_rate": 0.0001924116034059749,
+      "loss": 0.2161,
+      "step": 2634
+    },
+    {
+      "epoch": 0.19012229878422743,
+      "grad_norm": 0.1144791916012764,
+      "learning_rate": 0.00019240871698657816,
+      "loss": 0.1752,
+      "step": 2635
+    },
+    {
+      "epoch": 0.19019445145928784,
+      "grad_norm": 0.14860175549983978,
+      "learning_rate": 0.00019240583056718142,
+      "loss": 0.1665,
+      "step": 2636
+    },
+    {
+      "epoch": 0.1902666041343483,
+      "grad_norm": 0.09630924463272095,
+      "learning_rate": 0.00019240294414778469,
+      "loss": 0.1827,
+      "step": 2637
+    },
+    {
+      "epoch": 0.1903387568094087,
+      "grad_norm": 0.1070321798324585,
+      "learning_rate": 0.00019240005772838795,
+      "loss": 0.1331,
+      "step": 2638
+    },
+    {
+      "epoch": 0.19041090948446915,
+      "grad_norm": 0.10391581058502197,
+      "learning_rate": 0.00019239717130899118,
+      "loss": 0.1732,
+      "step": 2639
+    },
+    {
+      "epoch": 0.19048306215952956,
+      "grad_norm": 0.10819260776042938,
+      "learning_rate": 0.00019239428488959447,
+      "loss": 0.1693,
+      "step": 2640
+    },
+    {
+      "epoch": 0.19055521483458998,
+      "grad_norm": 0.12624964118003845,
+      "learning_rate": 0.00019239139847019774,
+      "loss": 0.1855,
+      "step": 2641
+    },
+    {
+      "epoch": 0.19062736750965042,
+      "grad_norm": 0.11198997497558594,
+      "learning_rate": 0.000192388512050801,
+      "loss": 0.1963,
+      "step": 2642
+    },
+    {
+      "epoch": 0.19069952018471084,
+      "grad_norm": 0.09690425544977188,
+      "learning_rate": 0.00019238562563140426,
+      "loss": 0.1878,
+      "step": 2643
+    },
+    {
+      "epoch": 0.19077167285977129,
+      "grad_norm": 0.19683675467967987,
+      "learning_rate": 0.00019238273921200753,
+      "loss": 0.1794,
+      "step": 2644
+    },
+    {
+      "epoch": 0.1908438255348317,
+      "grad_norm": 0.10952795296907425,
+      "learning_rate": 0.00019237985279261076,
+      "loss": 0.1849,
+      "step": 2645
+    },
+    {
+      "epoch": 0.19091597820989215,
+      "grad_norm": 0.09718668460845947,
+      "learning_rate": 0.00019237696637321402,
+      "loss": 0.1835,
+      "step": 2646
+    },
+    {
+      "epoch": 0.19098813088495256,
+      "grad_norm": 0.11417441070079803,
+      "learning_rate": 0.00019237407995381731,
+      "loss": 0.1974,
+      "step": 2647
+    },
+    {
+      "epoch": 0.19106028356001298,
+      "grad_norm": 0.09637610614299774,
+      "learning_rate": 0.00019237119353442058,
+      "loss": 0.178,
+      "step": 2648
+    },
+    {
+      "epoch": 0.19113243623507342,
+      "grad_norm": 0.08902224153280258,
+      "learning_rate": 0.00019236830711502384,
+      "loss": 0.1692,
+      "step": 2649
+    },
+    {
+      "epoch": 0.19120458891013384,
+      "grad_norm": 0.12328056246042252,
+      "learning_rate": 0.00019236542069562708,
+      "loss": 0.1091,
+      "step": 2650
+    },
+    {
+      "epoch": 0.19127674158519428,
+      "grad_norm": 0.09165138751268387,
+      "learning_rate": 0.00019236253427623034,
+      "loss": 0.165,
+      "step": 2651
+    },
+    {
+      "epoch": 0.1913488942602547,
+      "grad_norm": 0.10049431025981903,
+      "learning_rate": 0.0001923596478568336,
+      "loss": 0.151,
+      "step": 2652
+    },
+    {
+      "epoch": 0.19142104693531511,
+      "grad_norm": 0.10753556340932846,
+      "learning_rate": 0.00019235676143743686,
+      "loss": 0.145,
+      "step": 2653
+    },
+    {
+      "epoch": 0.19149319961037556,
+      "grad_norm": 0.11923553794622421,
+      "learning_rate": 0.00019235387501804013,
+      "loss": 0.1713,
+      "step": 2654
+    },
+    {
+      "epoch": 0.19156535228543597,
+      "grad_norm": 0.12246944010257721,
+      "learning_rate": 0.0001923509885986434,
+      "loss": 0.1532,
+      "step": 2655
+    },
+    {
+      "epoch": 0.19163750496049642,
+      "grad_norm": 0.14121495187282562,
+      "learning_rate": 0.00019234810217924665,
+      "loss": 0.2189,
+      "step": 2656
+    },
+    {
+      "epoch": 0.19170965763555683,
+      "grad_norm": 0.11362232267856598,
+      "learning_rate": 0.00019234521575984992,
+      "loss": 0.2029,
+      "step": 2657
+    },
+    {
+      "epoch": 0.19178181031061728,
+      "grad_norm": 0.1333397775888443,
+      "learning_rate": 0.00019234232934045318,
+      "loss": 0.1472,
+      "step": 2658
+    },
+    {
+      "epoch": 0.1918539629856777,
+      "grad_norm": 0.10232511907815933,
+      "learning_rate": 0.00019233944292105644,
+      "loss": 0.1862,
+      "step": 2659
+    },
+    {
+      "epoch": 0.1919261156607381,
+      "grad_norm": 0.10242033749818802,
+      "learning_rate": 0.0001923365565016597,
+      "loss": 0.1907,
+      "step": 2660
+    },
+    {
+      "epoch": 0.19199826833579856,
+      "grad_norm": 0.12704692780971527,
+      "learning_rate": 0.00019233367008226294,
+      "loss": 0.1682,
+      "step": 2661
+    },
+    {
+      "epoch": 0.19207042101085897,
+      "grad_norm": 0.10353595018386841,
+      "learning_rate": 0.00019233078366286623,
+      "loss": 0.1588,
+      "step": 2662
+    },
+    {
+      "epoch": 0.19214257368591942,
+      "grad_norm": 0.1133752167224884,
+      "learning_rate": 0.0001923278972434695,
+      "loss": 0.1579,
+      "step": 2663
+    },
+    {
+      "epoch": 0.19221472636097983,
+      "grad_norm": 0.10782555490732193,
+      "learning_rate": 0.00019232501082407276,
+      "loss": 0.1505,
+      "step": 2664
+    },
+    {
+      "epoch": 0.19228687903604025,
+      "grad_norm": 0.1164044663310051,
+      "learning_rate": 0.00019232212440467602,
+      "loss": 0.1738,
+      "step": 2665
+    },
+    {
+      "epoch": 0.1923590317111007,
+      "grad_norm": 0.09039617329835892,
+      "learning_rate": 0.00019231923798527926,
+      "loss": 0.1875,
+      "step": 2666
+    },
+    {
+      "epoch": 0.1924311843861611,
+      "grad_norm": 0.1016000509262085,
+      "learning_rate": 0.00019231635156588252,
+      "loss": 0.1345,
+      "step": 2667
+    },
+    {
+      "epoch": 0.19250333706122155,
+      "grad_norm": 0.09287573397159576,
+      "learning_rate": 0.00019231346514648578,
+      "loss": 0.1379,
+      "step": 2668
+    },
+    {
+      "epoch": 0.19257548973628197,
+      "grad_norm": 0.12193844467401505,
+      "learning_rate": 0.00019231057872708907,
+      "loss": 0.1735,
+      "step": 2669
+    },
+    {
+      "epoch": 0.1926476424113424,
+      "grad_norm": 0.12390395998954773,
+      "learning_rate": 0.00019230769230769233,
+      "loss": 0.1557,
+      "step": 2670
+    },
+    {
+      "epoch": 0.19271979508640283,
+      "grad_norm": 0.09519024193286896,
+      "learning_rate": 0.00019230480588829557,
+      "loss": 0.1581,
+      "step": 2671
+    },
+    {
+      "epoch": 0.19279194776146324,
+      "grad_norm": 0.12861637771129608,
+      "learning_rate": 0.00019230191946889883,
+      "loss": 0.1445,
+      "step": 2672
+    },
+    {
+      "epoch": 0.1928641004365237,
+      "grad_norm": 0.1024019792675972,
+      "learning_rate": 0.0001922990330495021,
+      "loss": 0.1357,
+      "step": 2673
+    },
+    {
+      "epoch": 0.1929362531115841,
+      "grad_norm": 0.10311330854892731,
+      "learning_rate": 0.00019229614663010536,
+      "loss": 0.1821,
+      "step": 2674
+    },
+    {
+      "epoch": 0.19300840578664455,
+      "grad_norm": 0.10143375396728516,
+      "learning_rate": 0.00019229326021070862,
+      "loss": 0.1418,
+      "step": 2675
+    },
+    {
+      "epoch": 0.19308055846170497,
+      "grad_norm": 0.11127184331417084,
+      "learning_rate": 0.00019229037379131189,
+      "loss": 0.193,
+      "step": 2676
+    },
+    {
+      "epoch": 0.1931527111367654,
+      "grad_norm": 0.1024523377418518,
+      "learning_rate": 0.00019228748737191515,
+      "loss": 0.1124,
+      "step": 2677
+    },
+    {
+      "epoch": 0.19322486381182583,
+      "grad_norm": 0.10198451578617096,
+      "learning_rate": 0.0001922846009525184,
+      "loss": 0.1819,
+      "step": 2678
+    },
+    {
+      "epoch": 0.19329701648688624,
+      "grad_norm": 0.13140185177326202,
+      "learning_rate": 0.00019228171453312167,
+      "loss": 0.1434,
+      "step": 2679
+    },
+    {
+      "epoch": 0.19336916916194669,
+      "grad_norm": 0.08785970509052277,
+      "learning_rate": 0.00019227882811372494,
+      "loss": 0.1228,
+      "step": 2680
+    },
+    {
+      "epoch": 0.1934413218370071,
+      "grad_norm": 0.08911599218845367,
+      "learning_rate": 0.0001922759416943282,
+      "loss": 0.1648,
+      "step": 2681
+    },
+    {
+      "epoch": 0.19351347451206755,
+      "grad_norm": 0.1131206527352333,
+      "learning_rate": 0.00019227305527493144,
+      "loss": 0.2027,
+      "step": 2682
+    },
+    {
+      "epoch": 0.19358562718712796,
+      "grad_norm": 0.09311474859714508,
+      "learning_rate": 0.00019227016885553473,
+      "loss": 0.133,
+      "step": 2683
+    },
+    {
+      "epoch": 0.19365777986218838,
+      "grad_norm": 0.14921562373638153,
+      "learning_rate": 0.000192267282436138,
+      "loss": 0.1894,
+      "step": 2684
+    },
+    {
+      "epoch": 0.19372993253724882,
+      "grad_norm": 0.1435394287109375,
+      "learning_rate": 0.00019226439601674125,
+      "loss": 0.1447,
+      "step": 2685
+    },
+    {
+      "epoch": 0.19380208521230924,
+      "grad_norm": 0.11073862016201019,
+      "learning_rate": 0.00019226150959734451,
+      "loss": 0.1524,
+      "step": 2686
+    },
+    {
+      "epoch": 0.19387423788736968,
+      "grad_norm": 0.11298488080501556,
+      "learning_rate": 0.00019225862317794775,
+      "loss": 0.1576,
+      "step": 2687
+    },
+    {
+      "epoch": 0.1939463905624301,
+      "grad_norm": 0.11842662841081619,
+      "learning_rate": 0.000192255736758551,
+      "loss": 0.1702,
+      "step": 2688
+    },
+    {
+      "epoch": 0.19401854323749054,
+      "grad_norm": 0.1126062273979187,
+      "learning_rate": 0.00019225285033915428,
+      "loss": 0.1704,
+      "step": 2689
+    },
+    {
+      "epoch": 0.19409069591255096,
+      "grad_norm": 0.11057586222887039,
+      "learning_rate": 0.00019224996391975757,
+      "loss": 0.1566,
+      "step": 2690
+    },
+    {
+      "epoch": 0.19416284858761138,
+      "grad_norm": 0.11670485138893127,
+      "learning_rate": 0.00019224707750036083,
+      "loss": 0.1672,
+      "step": 2691
+    },
+    {
+      "epoch": 0.19423500126267182,
+      "grad_norm": 0.21490761637687683,
+      "learning_rate": 0.00019224419108096406,
+      "loss": 0.2064,
+      "step": 2692
+    },
+    {
+      "epoch": 0.19430715393773224,
+      "grad_norm": 0.11341680586338043,
+      "learning_rate": 0.00019224130466156733,
+      "loss": 0.1794,
+      "step": 2693
+    },
+    {
+      "epoch": 0.19437930661279268,
+      "grad_norm": 0.09577616304159164,
+      "learning_rate": 0.0001922384182421706,
+      "loss": 0.1553,
+      "step": 2694
+    },
+    {
+      "epoch": 0.1944514592878531,
+      "grad_norm": 0.10448767989873886,
+      "learning_rate": 0.00019223553182277385,
+      "loss": 0.188,
+      "step": 2695
+    },
+    {
+      "epoch": 0.1945236119629135,
+      "grad_norm": 0.11948369443416595,
+      "learning_rate": 0.00019223264540337712,
+      "loss": 0.1362,
+      "step": 2696
+    },
+    {
+      "epoch": 0.19459576463797396,
+      "grad_norm": 0.11946840584278107,
+      "learning_rate": 0.00019222975898398038,
+      "loss": 0.142,
+      "step": 2697
+    },
+    {
+      "epoch": 0.19466791731303437,
+      "grad_norm": 0.11634260416030884,
+      "learning_rate": 0.00019222687256458364,
+      "loss": 0.1802,
+      "step": 2698
+    },
+    {
+      "epoch": 0.19474006998809482,
+      "grad_norm": 0.09894415736198425,
+      "learning_rate": 0.0001922239861451869,
+      "loss": 0.171,
+      "step": 2699
+    },
+    {
+      "epoch": 0.19481222266315523,
+      "grad_norm": 0.15403254330158234,
+      "learning_rate": 0.00019222109972579017,
+      "loss": 0.1629,
+      "step": 2700
+    },
+    {
+      "epoch": 0.19488437533821568,
+      "grad_norm": 0.16679127514362335,
+      "learning_rate": 0.00019221821330639343,
+      "loss": 0.1866,
+      "step": 2701
+    },
+    {
+      "epoch": 0.1949565280132761,
+      "grad_norm": 0.11218617111444473,
+      "learning_rate": 0.0001922153268869967,
+      "loss": 0.1852,
+      "step": 2702
+    },
+    {
+      "epoch": 0.1950286806883365,
+      "grad_norm": 0.18190835416316986,
+      "learning_rate": 0.00019221244046759993,
+      "loss": 0.1637,
+      "step": 2703
+    },
+    {
+      "epoch": 0.19510083336339695,
+      "grad_norm": 0.08974639326334,
+      "learning_rate": 0.00019220955404820322,
+      "loss": 0.2067,
+      "step": 2704
+    },
+    {
+      "epoch": 0.19517298603845737,
+      "grad_norm": 0.1249222680926323,
+      "learning_rate": 0.00019220666762880648,
+      "loss": 0.18,
+      "step": 2705
+    },
+    {
+      "epoch": 0.1952451387135178,
+      "grad_norm": 0.09737565368413925,
+      "learning_rate": 0.00019220378120940975,
+      "loss": 0.1286,
+      "step": 2706
+    },
+    {
+      "epoch": 0.19531729138857823,
+      "grad_norm": 0.10697299987077713,
+      "learning_rate": 0.000192200894790013,
+      "loss": 0.1476,
+      "step": 2707
+    },
+    {
+      "epoch": 0.19538944406363867,
+      "grad_norm": 0.10189025849103928,
+      "learning_rate": 0.00019219800837061624,
+      "loss": 0.1714,
+      "step": 2708
+    },
+    {
+      "epoch": 0.1954615967386991,
+      "grad_norm": 0.11121237277984619,
+      "learning_rate": 0.0001921951219512195,
+      "loss": 0.1894,
+      "step": 2709
+    },
+    {
+      "epoch": 0.1955337494137595,
+      "grad_norm": 0.1351558119058609,
+      "learning_rate": 0.00019219223553182277,
+      "loss": 0.1662,
+      "step": 2710
+    },
+    {
+      "epoch": 0.19560590208881995,
+      "grad_norm": 0.10621657222509384,
+      "learning_rate": 0.00019218934911242606,
+      "loss": 0.1292,
+      "step": 2711
+    },
+    {
+      "epoch": 0.19567805476388037,
+      "grad_norm": 0.11616257578134537,
+      "learning_rate": 0.00019218646269302932,
+      "loss": 0.1725,
+      "step": 2712
+    },
+    {
+      "epoch": 0.1957502074389408,
+      "grad_norm": 0.09937336295843124,
+      "learning_rate": 0.00019218357627363256,
+      "loss": 0.1548,
+      "step": 2713
+    },
+    {
+      "epoch": 0.19582236011400123,
+      "grad_norm": 0.097499780356884,
+      "learning_rate": 0.00019218068985423582,
+      "loss": 0.1249,
+      "step": 2714
+    },
+    {
+      "epoch": 0.19589451278906164,
+      "grad_norm": 0.11946720629930496,
+      "learning_rate": 0.00019217780343483908,
+      "loss": 0.159,
+      "step": 2715
+    },
+    {
+      "epoch": 0.1959666654641221,
+      "grad_norm": 0.1145441010594368,
+      "learning_rate": 0.00019217491701544235,
+      "loss": 0.1583,
+      "step": 2716
+    },
+    {
+      "epoch": 0.1960388181391825,
+      "grad_norm": 0.12353675067424774,
+      "learning_rate": 0.0001921720305960456,
+      "loss": 0.1819,
+      "step": 2717
+    },
+    {
+      "epoch": 0.19611097081424295,
+      "grad_norm": 0.1489514261484146,
+      "learning_rate": 0.00019216914417664887,
+      "loss": 0.1741,
+      "step": 2718
+    },
+    {
+      "epoch": 0.19618312348930336,
+      "grad_norm": 0.08473455905914307,
+      "learning_rate": 0.00019216625775725214,
+      "loss": 0.1341,
+      "step": 2719
+    },
+    {
+      "epoch": 0.1962552761643638,
+      "grad_norm": 0.10539857298135757,
+      "learning_rate": 0.0001921633713378554,
+      "loss": 0.145,
+      "step": 2720
+    },
+    {
+      "epoch": 0.19632742883942422,
+      "grad_norm": 0.10995670408010483,
+      "learning_rate": 0.00019216048491845866,
+      "loss": 0.1665,
+      "step": 2721
+    },
+    {
+      "epoch": 0.19639958151448464,
+      "grad_norm": 0.12223058193922043,
+      "learning_rate": 0.00019215759849906193,
+      "loss": 0.1956,
+      "step": 2722
+    },
+    {
+      "epoch": 0.19647173418954508,
+      "grad_norm": 0.11050442606210709,
+      "learning_rate": 0.0001921547120796652,
+      "loss": 0.1682,
+      "step": 2723
+    },
+    {
+      "epoch": 0.1965438868646055,
+      "grad_norm": 0.11254192888736725,
+      "learning_rate": 0.00019215182566026842,
+      "loss": 0.1601,
+      "step": 2724
+    },
+    {
+      "epoch": 0.19661603953966594,
+      "grad_norm": 0.12485778331756592,
+      "learning_rate": 0.00019214893924087171,
+      "loss": 0.1618,
+      "step": 2725
+    },
+    {
+      "epoch": 0.19668819221472636,
+      "grad_norm": 0.09796317666769028,
+      "learning_rate": 0.00019214605282147498,
+      "loss": 0.1907,
+      "step": 2726
+    },
+    {
+      "epoch": 0.19676034488978678,
+      "grad_norm": 0.1087847352027893,
+      "learning_rate": 0.00019214316640207824,
+      "loss": 0.1331,
+      "step": 2727
+    },
+    {
+      "epoch": 0.19683249756484722,
+      "grad_norm": 0.10561531782150269,
+      "learning_rate": 0.0001921402799826815,
+      "loss": 0.1408,
+      "step": 2728
+    },
+    {
+      "epoch": 0.19690465023990764,
+      "grad_norm": 0.09105509519577026,
+      "learning_rate": 0.00019213739356328474,
+      "loss": 0.207,
+      "step": 2729
+    },
+    {
+      "epoch": 0.19697680291496808,
+      "grad_norm": 0.10997123271226883,
+      "learning_rate": 0.000192134507143888,
+      "loss": 0.1938,
+      "step": 2730
+    },
+    {
+      "epoch": 0.1970489555900285,
+      "grad_norm": 0.11604651063680649,
+      "learning_rate": 0.00019213162072449126,
+      "loss": 0.1932,
+      "step": 2731
+    },
+    {
+      "epoch": 0.19712110826508894,
+      "grad_norm": 0.0898117646574974,
+      "learning_rate": 0.00019212873430509455,
+      "loss": 0.1623,
+      "step": 2732
+    },
+    {
+      "epoch": 0.19719326094014936,
+      "grad_norm": 0.1296977400779724,
+      "learning_rate": 0.00019212584788569782,
+      "loss": 0.2296,
+      "step": 2733
+    },
+    {
+      "epoch": 0.19726541361520977,
+      "grad_norm": 0.09977329522371292,
+      "learning_rate": 0.00019212296146630105,
+      "loss": 0.1267,
+      "step": 2734
+    },
+    {
+      "epoch": 0.19733756629027022,
+      "grad_norm": 0.11098989844322205,
+      "learning_rate": 0.00019212007504690432,
+      "loss": 0.1853,
+      "step": 2735
+    },
+    {
+      "epoch": 0.19740971896533063,
+      "grad_norm": 0.12152834236621857,
+      "learning_rate": 0.00019211718862750758,
+      "loss": 0.1582,
+      "step": 2736
+    },
+    {
+      "epoch": 0.19748187164039108,
+      "grad_norm": 0.12584428489208221,
+      "learning_rate": 0.00019211430220811084,
+      "loss": 0.1826,
+      "step": 2737
+    },
+    {
+      "epoch": 0.1975540243154515,
+      "grad_norm": 0.11646226048469543,
+      "learning_rate": 0.0001921114157887141,
+      "loss": 0.1609,
+      "step": 2738
+    },
+    {
+      "epoch": 0.19762617699051194,
+      "grad_norm": 0.10537946224212646,
+      "learning_rate": 0.00019210852936931737,
+      "loss": 0.2103,
+      "step": 2739
+    },
+    {
+      "epoch": 0.19769832966557235,
+      "grad_norm": 0.11018376052379608,
+      "learning_rate": 0.00019210564294992063,
+      "loss": 0.1759,
+      "step": 2740
+    },
+    {
+      "epoch": 0.19777048234063277,
+      "grad_norm": 0.15066130459308624,
+      "learning_rate": 0.0001921027565305239,
+      "loss": 0.1411,
+      "step": 2741
+    },
+    {
+      "epoch": 0.1978426350156932,
+      "grad_norm": 0.10171657055616379,
+      "learning_rate": 0.00019209987011112716,
+      "loss": 0.1499,
+      "step": 2742
+    },
+    {
+      "epoch": 0.19791478769075363,
+      "grad_norm": 0.08922936022281647,
+      "learning_rate": 0.00019209698369173042,
+      "loss": 0.1409,
+      "step": 2743
+    },
+    {
+      "epoch": 0.19798694036581407,
+      "grad_norm": 0.1321311891078949,
+      "learning_rate": 0.00019209409727233368,
+      "loss": 0.2057,
+      "step": 2744
+    },
+    {
+      "epoch": 0.1980590930408745,
+      "grad_norm": 0.14305776357650757,
+      "learning_rate": 0.00019209121085293692,
+      "loss": 0.1298,
+      "step": 2745
+    },
+    {
+      "epoch": 0.1981312457159349,
+      "grad_norm": 0.08078758418560028,
+      "learning_rate": 0.0001920883244335402,
+      "loss": 0.1526,
+      "step": 2746
+    },
+    {
+      "epoch": 0.19820339839099535,
+      "grad_norm": 0.11699184775352478,
+      "learning_rate": 0.00019208543801414347,
+      "loss": 0.1661,
+      "step": 2747
+    },
+    {
+      "epoch": 0.19827555106605577,
+      "grad_norm": 0.11144742369651794,
+      "learning_rate": 0.00019208255159474673,
+      "loss": 0.1587,
+      "step": 2748
+    },
+    {
+      "epoch": 0.1983477037411162,
+      "grad_norm": 0.13285599648952484,
+      "learning_rate": 0.00019207966517535,
+      "loss": 0.227,
+      "step": 2749
+    },
+    {
+      "epoch": 0.19841985641617663,
+      "grad_norm": 0.11385666579008102,
+      "learning_rate": 0.00019207677875595323,
+      "loss": 0.1164,
+      "step": 2750
+    },
+    {
+      "epoch": 0.19849200909123707,
+      "grad_norm": 0.17403168976306915,
+      "learning_rate": 0.0001920738923365565,
+      "loss": 0.1651,
+      "step": 2751
+    },
+    {
+      "epoch": 0.1985641617662975,
+      "grad_norm": 0.10947877913713455,
+      "learning_rate": 0.00019207100591715976,
+      "loss": 0.1555,
+      "step": 2752
+    },
+    {
+      "epoch": 0.1986363144413579,
+      "grad_norm": 0.13744814693927765,
+      "learning_rate": 0.00019206811949776305,
+      "loss": 0.2367,
+      "step": 2753
+    },
+    {
+      "epoch": 0.19870846711641835,
+      "grad_norm": 0.09754101932048798,
+      "learning_rate": 0.0001920652330783663,
+      "loss": 0.1478,
+      "step": 2754
+    },
+    {
+      "epoch": 0.19878061979147876,
+      "grad_norm": 0.09394462406635284,
+      "learning_rate": 0.00019206234665896955,
+      "loss": 0.1497,
+      "step": 2755
+    },
+    {
+      "epoch": 0.1988527724665392,
+      "grad_norm": 0.11369165033102036,
+      "learning_rate": 0.0001920594602395728,
+      "loss": 0.2044,
+      "step": 2756
+    },
+    {
+      "epoch": 0.19892492514159962,
+      "grad_norm": 0.10330383479595184,
+      "learning_rate": 0.00019205657382017607,
+      "loss": 0.1497,
+      "step": 2757
+    },
+    {
+      "epoch": 0.19899707781666004,
+      "grad_norm": 0.10162612050771713,
+      "learning_rate": 0.00019205368740077934,
+      "loss": 0.1602,
+      "step": 2758
+    },
+    {
+      "epoch": 0.19906923049172048,
+      "grad_norm": 0.09283189475536346,
+      "learning_rate": 0.0001920508009813826,
+      "loss": 0.1232,
+      "step": 2759
+    },
+    {
+      "epoch": 0.1991413831667809,
+      "grad_norm": 0.09310007095336914,
+      "learning_rate": 0.00019204791456198586,
+      "loss": 0.1668,
+      "step": 2760
+    },
+    {
+      "epoch": 0.19921353584184134,
+      "grad_norm": 0.09826725721359253,
+      "learning_rate": 0.00019204502814258912,
+      "loss": 0.2198,
+      "step": 2761
+    },
+    {
+      "epoch": 0.19928568851690176,
+      "grad_norm": 0.08991654962301254,
+      "learning_rate": 0.0001920421417231924,
+      "loss": 0.1652,
+      "step": 2762
+    },
+    {
+      "epoch": 0.1993578411919622,
+      "grad_norm": 0.10206000506877899,
+      "learning_rate": 0.00019203925530379565,
+      "loss": 0.1718,
+      "step": 2763
+    },
+    {
+      "epoch": 0.19942999386702262,
+      "grad_norm": 0.11106408387422562,
+      "learning_rate": 0.00019203636888439891,
+      "loss": 0.1797,
+      "step": 2764
+    },
+    {
+      "epoch": 0.19950214654208304,
+      "grad_norm": 0.10109609365463257,
+      "learning_rate": 0.00019203348246500218,
+      "loss": 0.1481,
+      "step": 2765
+    },
+    {
+      "epoch": 0.19957429921714348,
+      "grad_norm": 0.11432026326656342,
+      "learning_rate": 0.00019203059604560544,
+      "loss": 0.1739,
+      "step": 2766
+    },
+    {
+      "epoch": 0.1996464518922039,
+      "grad_norm": 0.1216094046831131,
+      "learning_rate": 0.0001920277096262087,
+      "loss": 0.1703,
+      "step": 2767
+    },
+    {
+      "epoch": 0.19971860456726434,
+      "grad_norm": 0.1027732789516449,
+      "learning_rate": 0.00019202482320681197,
+      "loss": 0.1404,
+      "step": 2768
+    },
+    {
+      "epoch": 0.19979075724232476,
+      "grad_norm": 0.08884584158658981,
+      "learning_rate": 0.00019202193678741523,
+      "loss": 0.1589,
+      "step": 2769
+    },
+    {
+      "epoch": 0.1998629099173852,
+      "grad_norm": 0.18108291923999786,
+      "learning_rate": 0.0001920190503680185,
+      "loss": 0.2151,
+      "step": 2770
+    },
+    {
+      "epoch": 0.19993506259244562,
+      "grad_norm": 0.0910463035106659,
+      "learning_rate": 0.00019201616394862175,
+      "loss": 0.1887,
+      "step": 2771
+    },
+    {
+      "epoch": 0.20000721526750603,
+      "grad_norm": 0.09141043573617935,
+      "learning_rate": 0.000192013277529225,
+      "loss": 0.1428,
+      "step": 2772
+    },
+    {
+      "epoch": 0.20007936794256648,
+      "grad_norm": 0.10917165875434875,
+      "learning_rate": 0.00019201039110982825,
+      "loss": 0.1422,
+      "step": 2773
+    },
+    {
+      "epoch": 0.2001515206176269,
+      "grad_norm": 0.10315337777137756,
+      "learning_rate": 0.00019200750469043154,
+      "loss": 0.1784,
+      "step": 2774
+    },
+    {
+      "epoch": 0.20022367329268734,
+      "grad_norm": 0.10837302356958389,
+      "learning_rate": 0.0001920046182710348,
+      "loss": 0.1691,
+      "step": 2775
+    },
+    {
+      "epoch": 0.20029582596774775,
+      "grad_norm": 0.13153184950351715,
+      "learning_rate": 0.00019200173185163807,
+      "loss": 0.1985,
+      "step": 2776
+    },
+    {
+      "epoch": 0.20036797864280817,
+      "grad_norm": 0.10565356910228729,
+      "learning_rate": 0.0001919988454322413,
+      "loss": 0.1602,
+      "step": 2777
+    },
+    {
+      "epoch": 0.20044013131786861,
+      "grad_norm": 0.10361316055059433,
+      "learning_rate": 0.00019199595901284457,
+      "loss": 0.1682,
+      "step": 2778
+    },
+    {
+      "epoch": 0.20051228399292903,
+      "grad_norm": 0.12748175859451294,
+      "learning_rate": 0.00019199307259344783,
+      "loss": 0.1404,
+      "step": 2779
+    },
+    {
+      "epoch": 0.20058443666798947,
+      "grad_norm": 0.09716325998306274,
+      "learning_rate": 0.0001919901861740511,
+      "loss": 0.111,
+      "step": 2780
+    },
+    {
+      "epoch": 0.2006565893430499,
+      "grad_norm": 0.17050409317016602,
+      "learning_rate": 0.00019198729975465438,
+      "loss": 0.1974,
+      "step": 2781
+    },
+    {
+      "epoch": 0.20072874201811033,
+      "grad_norm": 0.09248002618551254,
+      "learning_rate": 0.00019198441333525762,
+      "loss": 0.1602,
+      "step": 2782
+    },
+    {
+      "epoch": 0.20080089469317075,
+      "grad_norm": 0.08953045308589935,
+      "learning_rate": 0.00019198152691586088,
+      "loss": 0.1698,
+      "step": 2783
+    },
+    {
+      "epoch": 0.20087304736823117,
+      "grad_norm": 0.12317956984043121,
+      "learning_rate": 0.00019197864049646415,
+      "loss": 0.2052,
+      "step": 2784
+    },
+    {
+      "epoch": 0.2009452000432916,
+      "grad_norm": 0.10364929586648941,
+      "learning_rate": 0.0001919757540770674,
+      "loss": 0.1787,
+      "step": 2785
+    },
+    {
+      "epoch": 0.20101735271835203,
+      "grad_norm": 0.1174645721912384,
+      "learning_rate": 0.00019197286765767067,
+      "loss": 0.2013,
+      "step": 2786
+    },
+    {
+      "epoch": 0.20108950539341247,
+      "grad_norm": 0.12904039025306702,
+      "learning_rate": 0.00019196998123827393,
+      "loss": 0.1326,
+      "step": 2787
+    },
+    {
+      "epoch": 0.2011616580684729,
+      "grad_norm": 0.1159813329577446,
+      "learning_rate": 0.0001919670948188772,
+      "loss": 0.1674,
+      "step": 2788
+    },
+    {
+      "epoch": 0.2012338107435333,
+      "grad_norm": 0.12107376009225845,
+      "learning_rate": 0.00019196420839948046,
+      "loss": 0.1459,
+      "step": 2789
+    },
+    {
+      "epoch": 0.20130596341859375,
+      "grad_norm": 0.11082377284765244,
+      "learning_rate": 0.00019196132198008372,
+      "loss": 0.1312,
+      "step": 2790
+    },
+    {
+      "epoch": 0.20137811609365416,
+      "grad_norm": 0.09276112914085388,
+      "learning_rate": 0.00019195843556068699,
+      "loss": 0.1522,
+      "step": 2791
+    },
+    {
+      "epoch": 0.2014502687687146,
+      "grad_norm": 0.09536813199520111,
+      "learning_rate": 0.00019195554914129025,
+      "loss": 0.1426,
+      "step": 2792
+    },
+    {
+      "epoch": 0.20152242144377502,
+      "grad_norm": 0.1258544921875,
+      "learning_rate": 0.00019195266272189348,
+      "loss": 0.1246,
+      "step": 2793
+    },
+    {
+      "epoch": 0.20159457411883547,
+      "grad_norm": 0.10447549819946289,
+      "learning_rate": 0.00019194977630249675,
+      "loss": 0.1726,
+      "step": 2794
+    },
+    {
+      "epoch": 0.20166672679389588,
+      "grad_norm": 0.10352654755115509,
+      "learning_rate": 0.00019194688988310004,
+      "loss": 0.1757,
+      "step": 2795
+    },
+    {
+      "epoch": 0.2017388794689563,
+      "grad_norm": 0.12527719140052795,
+      "learning_rate": 0.0001919440034637033,
+      "loss": 0.2088,
+      "step": 2796
+    },
+    {
+      "epoch": 0.20181103214401674,
+      "grad_norm": 0.09983530640602112,
+      "learning_rate": 0.00019194111704430656,
+      "loss": 0.161,
+      "step": 2797
+    },
+    {
+      "epoch": 0.20188318481907716,
+      "grad_norm": 0.09162452071905136,
+      "learning_rate": 0.0001919382306249098,
+      "loss": 0.18,
+      "step": 2798
+    },
+    {
+      "epoch": 0.2019553374941376,
+      "grad_norm": 0.109310083091259,
+      "learning_rate": 0.00019193534420551306,
+      "loss": 0.161,
+      "step": 2799
+    },
+    {
+      "epoch": 0.20202749016919802,
+      "grad_norm": 0.09814836084842682,
+      "learning_rate": 0.00019193245778611632,
+      "loss": 0.162,
+      "step": 2800
+    },
+    {
+      "epoch": 0.20209964284425846,
+      "grad_norm": 0.13046738505363464,
+      "learning_rate": 0.0001919295713667196,
+      "loss": 0.1757,
+      "step": 2801
+    },
+    {
+      "epoch": 0.20217179551931888,
+      "grad_norm": 0.11764957755804062,
+      "learning_rate": 0.00019192668494732288,
+      "loss": 0.173,
+      "step": 2802
+    },
+    {
+      "epoch": 0.2022439481943793,
+      "grad_norm": 0.11445638537406921,
+      "learning_rate": 0.0001919237985279261,
+      "loss": 0.1719,
+      "step": 2803
+    },
+    {
+      "epoch": 0.20231610086943974,
+      "grad_norm": 0.11463334411382675,
+      "learning_rate": 0.00019192091210852938,
+      "loss": 0.1545,
+      "step": 2804
+    },
+    {
+      "epoch": 0.20238825354450016,
+      "grad_norm": 0.09536374360322952,
+      "learning_rate": 0.00019191802568913264,
+      "loss": 0.1495,
+      "step": 2805
+    },
+    {
+      "epoch": 0.2024604062195606,
+      "grad_norm": 0.09586406499147415,
+      "learning_rate": 0.0001919151392697359,
+      "loss": 0.1777,
+      "step": 2806
+    },
+    {
+      "epoch": 0.20253255889462102,
+      "grad_norm": 0.10726621001958847,
+      "learning_rate": 0.00019191225285033917,
+      "loss": 0.1317,
+      "step": 2807
+    },
+    {
+      "epoch": 0.20260471156968143,
+      "grad_norm": 0.09684295207262039,
+      "learning_rate": 0.00019190936643094243,
+      "loss": 0.1371,
+      "step": 2808
+    },
+    {
+      "epoch": 0.20267686424474188,
+      "grad_norm": 0.108054518699646,
+      "learning_rate": 0.0001919064800115457,
+      "loss": 0.1505,
+      "step": 2809
+    },
+    {
+      "epoch": 0.2027490169198023,
+      "grad_norm": 0.1165657788515091,
+      "learning_rate": 0.00019190359359214895,
+      "loss": 0.1844,
+      "step": 2810
+    },
+    {
+      "epoch": 0.20282116959486274,
+      "grad_norm": 0.10754991322755814,
+      "learning_rate": 0.00019190070717275222,
+      "loss": 0.161,
+      "step": 2811
+    },
+    {
+      "epoch": 0.20289332226992315,
+      "grad_norm": 0.12590479850769043,
+      "learning_rate": 0.00019189782075335548,
+      "loss": 0.1631,
+      "step": 2812
+    },
+    {
+      "epoch": 0.2029654749449836,
+      "grad_norm": 0.09850574284791946,
+      "learning_rate": 0.00019189493433395874,
+      "loss": 0.1037,
+      "step": 2813
+    },
+    {
+      "epoch": 0.20303762762004401,
+      "grad_norm": 0.1069825142621994,
+      "learning_rate": 0.00019189204791456198,
+      "loss": 0.1669,
+      "step": 2814
+    },
+    {
+      "epoch": 0.20310978029510443,
+      "grad_norm": 0.09667236357927322,
+      "learning_rate": 0.00019188916149516524,
+      "loss": 0.1858,
+      "step": 2815
+    },
+    {
+      "epoch": 0.20318193297016487,
+      "grad_norm": 0.10493231564760208,
+      "learning_rate": 0.00019188627507576853,
+      "loss": 0.1521,
+      "step": 2816
+    },
+    {
+      "epoch": 0.2032540856452253,
+      "grad_norm": 0.11691664159297943,
+      "learning_rate": 0.0001918833886563718,
+      "loss": 0.1328,
+      "step": 2817
+    },
+    {
+      "epoch": 0.20332623832028573,
+      "grad_norm": 0.11871805042028427,
+      "learning_rate": 0.00019188050223697506,
+      "loss": 0.121,
+      "step": 2818
+    },
+    {
+      "epoch": 0.20339839099534615,
+      "grad_norm": 0.10238055884838104,
+      "learning_rate": 0.0001918776158175783,
+      "loss": 0.1265,
+      "step": 2819
+    },
+    {
+      "epoch": 0.20347054367040657,
+      "grad_norm": 0.12179987132549286,
+      "learning_rate": 0.00019187472939818156,
+      "loss": 0.1236,
+      "step": 2820
+    },
+    {
+      "epoch": 0.203542696345467,
+      "grad_norm": 0.13222713768482208,
+      "learning_rate": 0.00019187184297878482,
+      "loss": 0.1727,
+      "step": 2821
+    },
+    {
+      "epoch": 0.20361484902052743,
+      "grad_norm": 0.11437147855758667,
+      "learning_rate": 0.00019186895655938808,
+      "loss": 0.1206,
+      "step": 2822
+    },
+    {
+      "epoch": 0.20368700169558787,
+      "grad_norm": 0.14340893924236298,
+      "learning_rate": 0.00019186607013999137,
+      "loss": 0.1365,
+      "step": 2823
+    },
+    {
+      "epoch": 0.2037591543706483,
+      "grad_norm": 0.08930408209562302,
+      "learning_rate": 0.0001918631837205946,
+      "loss": 0.1495,
+      "step": 2824
+    },
+    {
+      "epoch": 0.20383130704570873,
+      "grad_norm": 0.09873627871274948,
+      "learning_rate": 0.00019186029730119787,
+      "loss": 0.1484,
+      "step": 2825
+    },
+    {
+      "epoch": 0.20390345972076915,
+      "grad_norm": 0.10518117249011993,
+      "learning_rate": 0.00019185741088180113,
+      "loss": 0.1574,
+      "step": 2826
+    },
+    {
+      "epoch": 0.20397561239582956,
+      "grad_norm": 0.12713289260864258,
+      "learning_rate": 0.0001918545244624044,
+      "loss": 0.152,
+      "step": 2827
+    },
+    {
+      "epoch": 0.20404776507089,
+      "grad_norm": 0.11654222011566162,
+      "learning_rate": 0.00019185163804300766,
+      "loss": 0.1762,
+      "step": 2828
+    },
+    {
+      "epoch": 0.20411991774595042,
+      "grad_norm": 0.11516798287630081,
+      "learning_rate": 0.00019184875162361092,
+      "loss": 0.138,
+      "step": 2829
+    },
+    {
+      "epoch": 0.20419207042101087,
+      "grad_norm": 0.08868873864412308,
+      "learning_rate": 0.00019184586520421419,
+      "loss": 0.1865,
+      "step": 2830
+    },
+    {
+      "epoch": 0.20426422309607128,
+      "grad_norm": 0.11917749792337418,
+      "learning_rate": 0.00019184297878481745,
+      "loss": 0.1713,
+      "step": 2831
+    },
+    {
+      "epoch": 0.20433637577113173,
+      "grad_norm": 0.1309102177619934,
+      "learning_rate": 0.0001918400923654207,
+      "loss": 0.1773,
+      "step": 2832
+    },
+    {
+      "epoch": 0.20440852844619214,
+      "grad_norm": 0.10248704999685287,
+      "learning_rate": 0.00019183720594602397,
+      "loss": 0.1367,
+      "step": 2833
+    },
+    {
+      "epoch": 0.20448068112125256,
+      "grad_norm": 0.11289657652378082,
+      "learning_rate": 0.00019183431952662724,
+      "loss": 0.1478,
+      "step": 2834
+    },
+    {
+      "epoch": 0.204552833796313,
+      "grad_norm": 0.10069409757852554,
+      "learning_rate": 0.00019183143310723047,
+      "loss": 0.1606,
+      "step": 2835
+    },
+    {
+      "epoch": 0.20462498647137342,
+      "grad_norm": 0.10064855217933655,
+      "learning_rate": 0.00019182854668783374,
+      "loss": 0.1771,
+      "step": 2836
+    },
+    {
+      "epoch": 0.20469713914643387,
+      "grad_norm": 0.0808321163058281,
+      "learning_rate": 0.00019182566026843703,
+      "loss": 0.1805,
+      "step": 2837
+    },
+    {
+      "epoch": 0.20476929182149428,
+      "grad_norm": 0.09613669663667679,
+      "learning_rate": 0.0001918227738490403,
+      "loss": 0.1979,
+      "step": 2838
+    },
+    {
+      "epoch": 0.2048414444965547,
+      "grad_norm": 0.10829374194145203,
+      "learning_rate": 0.00019181988742964355,
+      "loss": 0.1421,
+      "step": 2839
+    },
+    {
+      "epoch": 0.20491359717161514,
+      "grad_norm": 0.09369208663702011,
+      "learning_rate": 0.0001918170010102468,
+      "loss": 0.1579,
+      "step": 2840
+    },
+    {
+      "epoch": 0.20498574984667556,
+      "grad_norm": 0.12511524558067322,
+      "learning_rate": 0.00019181411459085005,
+      "loss": 0.1656,
+      "step": 2841
+    },
+    {
+      "epoch": 0.205057902521736,
+      "grad_norm": 0.11365848034620285,
+      "learning_rate": 0.0001918112281714533,
+      "loss": 0.1608,
+      "step": 2842
+    },
+    {
+      "epoch": 0.20513005519679642,
+      "grad_norm": 0.12248922139406204,
+      "learning_rate": 0.00019180834175205658,
+      "loss": 0.1933,
+      "step": 2843
+    },
+    {
+      "epoch": 0.20520220787185686,
+      "grad_norm": 0.08641145378351212,
+      "learning_rate": 0.00019180545533265984,
+      "loss": 0.1935,
+      "step": 2844
+    },
+    {
+      "epoch": 0.20527436054691728,
+      "grad_norm": 0.08721951395273209,
+      "learning_rate": 0.0001918025689132631,
+      "loss": 0.1815,
+      "step": 2845
+    },
+    {
+      "epoch": 0.2053465132219777,
+      "grad_norm": 0.11892024427652359,
+      "learning_rate": 0.00019179968249386636,
+      "loss": 0.1964,
+      "step": 2846
+    },
+    {
+      "epoch": 0.20541866589703814,
+      "grad_norm": 0.10294033586978912,
+      "learning_rate": 0.00019179679607446963,
+      "loss": 0.1705,
+      "step": 2847
+    },
+    {
+      "epoch": 0.20549081857209855,
+      "grad_norm": 0.10080970823764801,
+      "learning_rate": 0.0001917939096550729,
+      "loss": 0.1362,
+      "step": 2848
+    },
+    {
+      "epoch": 0.205562971247159,
+      "grad_norm": 0.11637333780527115,
+      "learning_rate": 0.00019179102323567615,
+      "loss": 0.1374,
+      "step": 2849
+    },
+    {
+      "epoch": 0.20563512392221941,
+      "grad_norm": 0.09417811781167984,
+      "learning_rate": 0.00019178813681627942,
+      "loss": 0.1653,
+      "step": 2850
+    },
+    {
+      "epoch": 0.20570727659727983,
+      "grad_norm": 0.08716203272342682,
+      "learning_rate": 0.00019178525039688265,
+      "loss": 0.1416,
+      "step": 2851
+    },
+    {
+      "epoch": 0.20577942927234028,
+      "grad_norm": 0.09685619920492172,
+      "learning_rate": 0.00019178236397748594,
+      "loss": 0.1677,
+      "step": 2852
+    },
+    {
+      "epoch": 0.2058515819474007,
+      "grad_norm": 0.11075788736343384,
+      "learning_rate": 0.0001917794775580892,
+      "loss": 0.1525,
+      "step": 2853
+    },
+    {
+      "epoch": 0.20592373462246114,
+      "grad_norm": 0.10047938674688339,
+      "learning_rate": 0.00019177659113869247,
+      "loss": 0.1717,
+      "step": 2854
+    },
+    {
+      "epoch": 0.20599588729752155,
+      "grad_norm": 0.1073223203420639,
+      "learning_rate": 0.00019177370471929573,
+      "loss": 0.1471,
+      "step": 2855
+    },
+    {
+      "epoch": 0.206068039972582,
+      "grad_norm": 0.11038212478160858,
+      "learning_rate": 0.00019177081829989897,
+      "loss": 0.1768,
+      "step": 2856
+    },
+    {
+      "epoch": 0.2061401926476424,
+      "grad_norm": 0.14250384271144867,
+      "learning_rate": 0.00019176793188050223,
+      "loss": 0.1612,
+      "step": 2857
+    },
+    {
+      "epoch": 0.20621234532270283,
+      "grad_norm": 0.09535922855138779,
+      "learning_rate": 0.0001917650454611055,
+      "loss": 0.1846,
+      "step": 2858
+    },
+    {
+      "epoch": 0.20628449799776327,
+      "grad_norm": 0.08837927877902985,
+      "learning_rate": 0.00019176215904170878,
+      "loss": 0.2123,
+      "step": 2859
+    },
+    {
+      "epoch": 0.2063566506728237,
+      "grad_norm": 0.13702549040317535,
+      "learning_rate": 0.00019175927262231205,
+      "loss": 0.1898,
+      "step": 2860
+    },
+    {
+      "epoch": 0.20642880334788413,
+      "grad_norm": 0.17124445736408234,
+      "learning_rate": 0.00019175638620291528,
+      "loss": 0.1385,
+      "step": 2861
+    },
+    {
+      "epoch": 0.20650095602294455,
+      "grad_norm": 0.11649008840322495,
+      "learning_rate": 0.00019175349978351854,
+      "loss": 0.1569,
+      "step": 2862
+    },
+    {
+      "epoch": 0.20657310869800496,
+      "grad_norm": 0.10121750086545944,
+      "learning_rate": 0.0001917506133641218,
+      "loss": 0.1926,
+      "step": 2863
+    },
+    {
+      "epoch": 0.2066452613730654,
+      "grad_norm": 0.09889732301235199,
+      "learning_rate": 0.00019174772694472507,
+      "loss": 0.1631,
+      "step": 2864
+    },
+    {
+      "epoch": 0.20671741404812582,
+      "grad_norm": 0.10092010349035263,
+      "learning_rate": 0.00019174484052532833,
+      "loss": 0.1761,
+      "step": 2865
+    },
+    {
+      "epoch": 0.20678956672318627,
+      "grad_norm": 0.12512940168380737,
+      "learning_rate": 0.0001917419541059316,
+      "loss": 0.1802,
+      "step": 2866
+    },
+    {
+      "epoch": 0.20686171939824669,
+      "grad_norm": 0.09495396912097931,
+      "learning_rate": 0.00019173906768653486,
+      "loss": 0.1253,
+      "step": 2867
+    },
+    {
+      "epoch": 0.20693387207330713,
+      "grad_norm": 0.16718077659606934,
+      "learning_rate": 0.00019173618126713812,
+      "loss": 0.1576,
+      "step": 2868
+    },
+    {
+      "epoch": 0.20700602474836755,
+      "grad_norm": 0.10851576924324036,
+      "learning_rate": 0.00019173329484774138,
+      "loss": 0.1319,
+      "step": 2869
+    },
+    {
+      "epoch": 0.20707817742342796,
+      "grad_norm": 0.1416240930557251,
+      "learning_rate": 0.00019173040842834465,
+      "loss": 0.1587,
+      "step": 2870
+    },
+    {
+      "epoch": 0.2071503300984884,
+      "grad_norm": 0.11564162373542786,
+      "learning_rate": 0.0001917275220089479,
+      "loss": 0.1991,
+      "step": 2871
+    },
+    {
+      "epoch": 0.20722248277354882,
+      "grad_norm": 0.114931121468544,
+      "learning_rate": 0.00019172463558955117,
+      "loss": 0.1375,
+      "step": 2872
+    },
+    {
+      "epoch": 0.20729463544860927,
+      "grad_norm": 0.12055815756320953,
+      "learning_rate": 0.00019172174917015444,
+      "loss": 0.205,
+      "step": 2873
+    },
+    {
+      "epoch": 0.20736678812366968,
+      "grad_norm": 0.09396502375602722,
+      "learning_rate": 0.0001917188627507577,
+      "loss": 0.1844,
+      "step": 2874
+    },
+    {
+      "epoch": 0.20743894079873013,
+      "grad_norm": 0.1035708636045456,
+      "learning_rate": 0.00019171597633136096,
+      "loss": 0.1642,
+      "step": 2875
+    },
+    {
+      "epoch": 0.20751109347379054,
+      "grad_norm": 0.1005023792386055,
+      "learning_rate": 0.00019171308991196423,
+      "loss": 0.1491,
+      "step": 2876
+    },
+    {
+      "epoch": 0.20758324614885096,
+      "grad_norm": 0.1094493642449379,
+      "learning_rate": 0.0001917102034925675,
+      "loss": 0.2038,
+      "step": 2877
+    },
+    {
+      "epoch": 0.2076553988239114,
+      "grad_norm": 0.0964755117893219,
+      "learning_rate": 0.00019170731707317072,
+      "loss": 0.1884,
+      "step": 2878
+    },
+    {
+      "epoch": 0.20772755149897182,
+      "grad_norm": 0.12796197831630707,
+      "learning_rate": 0.000191704430653774,
+      "loss": 0.1468,
+      "step": 2879
+    },
+    {
+      "epoch": 0.20779970417403226,
+      "grad_norm": 0.12494756281375885,
+      "learning_rate": 0.00019170154423437728,
+      "loss": 0.1768,
+      "step": 2880
+    },
+    {
+      "epoch": 0.20787185684909268,
+      "grad_norm": 0.12739567458629608,
+      "learning_rate": 0.00019169865781498054,
+      "loss": 0.1578,
+      "step": 2881
+    },
+    {
+      "epoch": 0.2079440095241531,
+      "grad_norm": 0.16343368589878082,
+      "learning_rate": 0.0001916957713955838,
+      "loss": 0.219,
+      "step": 2882
+    },
+    {
+      "epoch": 0.20801616219921354,
+      "grad_norm": 0.12768669426441193,
+      "learning_rate": 0.00019169288497618704,
+      "loss": 0.1762,
+      "step": 2883
+    },
+    {
+      "epoch": 0.20808831487427396,
+      "grad_norm": 0.13747148215770721,
+      "learning_rate": 0.0001916899985567903,
+      "loss": 0.1697,
+      "step": 2884
+    },
+    {
+      "epoch": 0.2081604675493344,
+      "grad_norm": 0.10046649724245071,
+      "learning_rate": 0.00019168711213739356,
+      "loss": 0.1539,
+      "step": 2885
+    },
+    {
+      "epoch": 0.20823262022439482,
+      "grad_norm": 0.10763157159090042,
+      "learning_rate": 0.00019168422571799683,
+      "loss": 0.127,
+      "step": 2886
+    },
+    {
+      "epoch": 0.20830477289945526,
+      "grad_norm": 0.13337762653827667,
+      "learning_rate": 0.00019168133929860012,
+      "loss": 0.1887,
+      "step": 2887
+    },
+    {
+      "epoch": 0.20837692557451568,
+      "grad_norm": 0.13826853036880493,
+      "learning_rate": 0.00019167845287920335,
+      "loss": 0.1365,
+      "step": 2888
+    },
+    {
+      "epoch": 0.2084490782495761,
+      "grad_norm": 0.10385999083518982,
+      "learning_rate": 0.00019167556645980662,
+      "loss": 0.1486,
+      "step": 2889
+    },
+    {
+      "epoch": 0.20852123092463654,
+      "grad_norm": 0.09602903574705124,
+      "learning_rate": 0.00019167268004040988,
+      "loss": 0.172,
+      "step": 2890
+    },
+    {
+      "epoch": 0.20859338359969695,
+      "grad_norm": 0.09549222141504288,
+      "learning_rate": 0.00019166979362101314,
+      "loss": 0.1291,
+      "step": 2891
+    },
+    {
+      "epoch": 0.2086655362747574,
+      "grad_norm": 0.09645403176546097,
+      "learning_rate": 0.0001916669072016164,
+      "loss": 0.1772,
+      "step": 2892
+    },
+    {
+      "epoch": 0.2087376889498178,
+      "grad_norm": 0.12109261751174927,
+      "learning_rate": 0.00019166402078221967,
+      "loss": 0.1943,
+      "step": 2893
+    },
+    {
+      "epoch": 0.20880984162487823,
+      "grad_norm": 0.11377090960741043,
+      "learning_rate": 0.00019166113436282293,
+      "loss": 0.1528,
+      "step": 2894
+    },
+    {
+      "epoch": 0.20888199429993867,
+      "grad_norm": 0.10664435476064682,
+      "learning_rate": 0.0001916582479434262,
+      "loss": 0.2075,
+      "step": 2895
+    },
+    {
+      "epoch": 0.2089541469749991,
+      "grad_norm": 0.14281053841114044,
+      "learning_rate": 0.00019165536152402946,
+      "loss": 0.1748,
+      "step": 2896
+    },
+    {
+      "epoch": 0.20902629965005953,
+      "grad_norm": 0.11265065521001816,
+      "learning_rate": 0.00019165247510463272,
+      "loss": 0.1176,
+      "step": 2897
+    },
+    {
+      "epoch": 0.20909845232511995,
+      "grad_norm": 0.10463102161884308,
+      "learning_rate": 0.00019164958868523598,
+      "loss": 0.1907,
+      "step": 2898
+    },
+    {
+      "epoch": 0.2091706050001804,
+      "grad_norm": 0.12577512860298157,
+      "learning_rate": 0.00019164670226583922,
+      "loss": 0.1556,
+      "step": 2899
+    },
+    {
+      "epoch": 0.2092427576752408,
+      "grad_norm": 0.12625141441822052,
+      "learning_rate": 0.00019164381584644248,
+      "loss": 0.1633,
+      "step": 2900
+    },
+    {
+      "epoch": 0.20931491035030123,
+      "grad_norm": 0.1196192130446434,
+      "learning_rate": 0.00019164092942704577,
+      "loss": 0.1529,
+      "step": 2901
+    },
+    {
+      "epoch": 0.20938706302536167,
+      "grad_norm": 0.11164863407611847,
+      "learning_rate": 0.00019163804300764903,
+      "loss": 0.1327,
+      "step": 2902
+    },
+    {
+      "epoch": 0.20945921570042209,
+      "grad_norm": 0.09533857554197311,
+      "learning_rate": 0.0001916351565882523,
+      "loss": 0.1671,
+      "step": 2903
+    },
+    {
+      "epoch": 0.20953136837548253,
+      "grad_norm": 0.10212215781211853,
+      "learning_rate": 0.00019163227016885553,
+      "loss": 0.1618,
+      "step": 2904
+    },
+    {
+      "epoch": 0.20960352105054295,
+      "grad_norm": 0.09262704104185104,
+      "learning_rate": 0.0001916293837494588,
+      "loss": 0.1759,
+      "step": 2905
+    },
+    {
+      "epoch": 0.2096756737256034,
+      "grad_norm": 0.08996018022298813,
+      "learning_rate": 0.00019162649733006206,
+      "loss": 0.1603,
+      "step": 2906
+    },
+    {
+      "epoch": 0.2097478264006638,
+      "grad_norm": 0.1032821536064148,
+      "learning_rate": 0.00019162361091066532,
+      "loss": 0.2387,
+      "step": 2907
+    },
+    {
+      "epoch": 0.20981997907572422,
+      "grad_norm": 0.0967317521572113,
+      "learning_rate": 0.0001916207244912686,
+      "loss": 0.1469,
+      "step": 2908
+    },
+    {
+      "epoch": 0.20989213175078467,
+      "grad_norm": 0.09922664612531662,
+      "learning_rate": 0.00019161783807187185,
+      "loss": 0.1919,
+      "step": 2909
+    },
+    {
+      "epoch": 0.20996428442584508,
+      "grad_norm": 0.08808860182762146,
+      "learning_rate": 0.0001916149516524751,
+      "loss": 0.1554,
+      "step": 2910
+    },
+    {
+      "epoch": 0.21003643710090553,
+      "grad_norm": 0.11175331473350525,
+      "learning_rate": 0.00019161206523307837,
+      "loss": 0.2047,
+      "step": 2911
+    },
+    {
+      "epoch": 0.21010858977596594,
+      "grad_norm": 0.1385175883769989,
+      "learning_rate": 0.00019160917881368164,
+      "loss": 0.1589,
+      "step": 2912
+    },
+    {
+      "epoch": 0.21018074245102636,
+      "grad_norm": 0.11769337952136993,
+      "learning_rate": 0.0001916062923942849,
+      "loss": 0.1651,
+      "step": 2913
+    },
+    {
+      "epoch": 0.2102528951260868,
+      "grad_norm": 0.10524415969848633,
+      "learning_rate": 0.00019160340597488816,
+      "loss": 0.1827,
+      "step": 2914
+    },
+    {
+      "epoch": 0.21032504780114722,
+      "grad_norm": 0.15402990579605103,
+      "learning_rate": 0.00019160051955549143,
+      "loss": 0.145,
+      "step": 2915
+    },
+    {
+      "epoch": 0.21039720047620766,
+      "grad_norm": 0.08767557144165039,
+      "learning_rate": 0.0001915976331360947,
+      "loss": 0.1558,
+      "step": 2916
+    },
+    {
+      "epoch": 0.21046935315126808,
+      "grad_norm": 0.13662488758563995,
+      "learning_rate": 0.00019159474671669795,
+      "loss": 0.1743,
+      "step": 2917
+    },
+    {
+      "epoch": 0.21054150582632852,
+      "grad_norm": 0.09334705770015717,
+      "learning_rate": 0.00019159186029730121,
+      "loss": 0.1683,
+      "step": 2918
+    },
+    {
+      "epoch": 0.21061365850138894,
+      "grad_norm": 0.19569367170333862,
+      "learning_rate": 0.00019158897387790448,
+      "loss": 0.1536,
+      "step": 2919
+    },
+    {
+      "epoch": 0.21068581117644936,
+      "grad_norm": 0.09817244112491608,
+      "learning_rate": 0.0001915860874585077,
+      "loss": 0.1578,
+      "step": 2920
+    },
+    {
+      "epoch": 0.2107579638515098,
+      "grad_norm": 0.10119809955358505,
+      "learning_rate": 0.00019158320103911098,
+      "loss": 0.1348,
+      "step": 2921
+    },
+    {
+      "epoch": 0.21083011652657022,
+      "grad_norm": 0.11312271654605865,
+      "learning_rate": 0.00019158031461971427,
+      "loss": 0.1169,
+      "step": 2922
+    },
+    {
+      "epoch": 0.21090226920163066,
+      "grad_norm": 0.09860809892416,
+      "learning_rate": 0.00019157742820031753,
+      "loss": 0.1417,
+      "step": 2923
+    },
+    {
+      "epoch": 0.21097442187669108,
+      "grad_norm": 0.17511418461799622,
+      "learning_rate": 0.0001915745417809208,
+      "loss": 0.1677,
+      "step": 2924
+    },
+    {
+      "epoch": 0.2110465745517515,
+      "grad_norm": 0.1460758000612259,
+      "learning_rate": 0.00019157165536152403,
+      "loss": 0.1962,
+      "step": 2925
+    },
+    {
+      "epoch": 0.21111872722681194,
+      "grad_norm": 0.10037209093570709,
+      "learning_rate": 0.0001915687689421273,
+      "loss": 0.1742,
+      "step": 2926
+    },
+    {
+      "epoch": 0.21119087990187235,
+      "grad_norm": 0.08584155887365341,
+      "learning_rate": 0.00019156588252273055,
+      "loss": 0.1463,
+      "step": 2927
+    },
+    {
+      "epoch": 0.2112630325769328,
+      "grad_norm": 0.14209772646427155,
+      "learning_rate": 0.00019156299610333382,
+      "loss": 0.1466,
+      "step": 2928
+    },
+    {
+      "epoch": 0.2113351852519932,
+      "grad_norm": 0.1284438967704773,
+      "learning_rate": 0.0001915601096839371,
+      "loss": 0.1588,
+      "step": 2929
+    },
+    {
+      "epoch": 0.21140733792705366,
+      "grad_norm": 0.09972860664129257,
+      "learning_rate": 0.00019155722326454034,
+      "loss": 0.1931,
+      "step": 2930
+    },
+    {
+      "epoch": 0.21147949060211407,
+      "grad_norm": 0.14195489883422852,
+      "learning_rate": 0.0001915543368451436,
+      "loss": 0.186,
+      "step": 2931
+    },
+    {
+      "epoch": 0.2115516432771745,
+      "grad_norm": 0.10553430765867233,
+      "learning_rate": 0.00019155145042574687,
+      "loss": 0.1062,
+      "step": 2932
+    },
+    {
+      "epoch": 0.21162379595223493,
+      "grad_norm": 0.1190626472234726,
+      "learning_rate": 0.00019154856400635013,
+      "loss": 0.1828,
+      "step": 2933
+    },
+    {
+      "epoch": 0.21169594862729535,
+      "grad_norm": 0.1097407415509224,
+      "learning_rate": 0.0001915456775869534,
+      "loss": 0.1551,
+      "step": 2934
+    },
+    {
+      "epoch": 0.2117681013023558,
+      "grad_norm": 0.11319176852703094,
+      "learning_rate": 0.00019154279116755666,
+      "loss": 0.1491,
+      "step": 2935
+    },
+    {
+      "epoch": 0.2118402539774162,
+      "grad_norm": 0.10741961747407913,
+      "learning_rate": 0.00019153990474815992,
+      "loss": 0.1723,
+      "step": 2936
+    },
+    {
+      "epoch": 0.21191240665247665,
+      "grad_norm": 0.1034005731344223,
+      "learning_rate": 0.00019153701832876318,
+      "loss": 0.184,
+      "step": 2937
+    },
+    {
+      "epoch": 0.21198455932753707,
+      "grad_norm": 0.12362311035394669,
+      "learning_rate": 0.00019153413190936645,
+      "loss": 0.1185,
+      "step": 2938
+    },
+    {
+      "epoch": 0.21205671200259749,
+      "grad_norm": 0.11626561731100082,
+      "learning_rate": 0.0001915312454899697,
+      "loss": 0.1414,
+      "step": 2939
+    },
+    {
+      "epoch": 0.21212886467765793,
+      "grad_norm": 0.10941056162118912,
+      "learning_rate": 0.00019152835907057297,
+      "loss": 0.1416,
+      "step": 2940
+    },
+    {
+      "epoch": 0.21220101735271835,
+      "grad_norm": 0.086311936378479,
+      "learning_rate": 0.0001915254726511762,
+      "loss": 0.1494,
+      "step": 2941
+    },
+    {
+      "epoch": 0.2122731700277788,
+      "grad_norm": 0.1236078068614006,
+      "learning_rate": 0.00019152258623177947,
+      "loss": 0.1859,
+      "step": 2942
+    },
+    {
+      "epoch": 0.2123453227028392,
+      "grad_norm": 0.14309728145599365,
+      "learning_rate": 0.00019151969981238276,
+      "loss": 0.2143,
+      "step": 2943
+    },
+    {
+      "epoch": 0.21241747537789962,
+      "grad_norm": 0.10707279294729233,
+      "learning_rate": 0.00019151681339298602,
+      "loss": 0.1604,
+      "step": 2944
+    },
+    {
+      "epoch": 0.21248962805296007,
+      "grad_norm": 0.11479876190423965,
+      "learning_rate": 0.00019151392697358929,
+      "loss": 0.1564,
+      "step": 2945
+    },
+    {
+      "epoch": 0.21256178072802048,
+      "grad_norm": 0.12447728961706161,
+      "learning_rate": 0.00019151104055419252,
+      "loss": 0.1724,
+      "step": 2946
+    },
+    {
+      "epoch": 0.21263393340308093,
+      "grad_norm": 0.12344259768724442,
+      "learning_rate": 0.00019150815413479578,
+      "loss": 0.1851,
+      "step": 2947
+    },
+    {
+      "epoch": 0.21270608607814134,
+      "grad_norm": 0.10869882255792618,
+      "learning_rate": 0.00019150526771539905,
+      "loss": 0.1996,
+      "step": 2948
+    },
+    {
+      "epoch": 0.2127782387532018,
+      "grad_norm": 0.08875145763158798,
+      "learning_rate": 0.0001915023812960023,
+      "loss": 0.1327,
+      "step": 2949
+    },
+    {
+      "epoch": 0.2128503914282622,
+      "grad_norm": 0.09537830203771591,
+      "learning_rate": 0.0001914994948766056,
+      "loss": 0.1982,
+      "step": 2950
+    },
+    {
+      "epoch": 0.21292254410332262,
+      "grad_norm": 0.1370457410812378,
+      "learning_rate": 0.00019149660845720884,
+      "loss": 0.1879,
+      "step": 2951
+    },
+    {
+      "epoch": 0.21299469677838306,
+      "grad_norm": 0.09743336588144302,
+      "learning_rate": 0.0001914937220378121,
+      "loss": 0.1397,
+      "step": 2952
+    },
+    {
+      "epoch": 0.21306684945344348,
+      "grad_norm": 0.09189710021018982,
+      "learning_rate": 0.00019149083561841536,
+      "loss": 0.1825,
+      "step": 2953
+    },
+    {
+      "epoch": 0.21313900212850392,
+      "grad_norm": 0.14623673260211945,
+      "learning_rate": 0.00019148794919901862,
+      "loss": 0.1614,
+      "step": 2954
+    },
+    {
+      "epoch": 0.21321115480356434,
+      "grad_norm": 0.12172803282737732,
+      "learning_rate": 0.0001914850627796219,
+      "loss": 0.223,
+      "step": 2955
+    },
+    {
+      "epoch": 0.21328330747862476,
+      "grad_norm": 0.11884385347366333,
+      "learning_rate": 0.00019148217636022515,
+      "loss": 0.1287,
+      "step": 2956
+    },
+    {
+      "epoch": 0.2133554601536852,
+      "grad_norm": 0.10667689889669418,
+      "learning_rate": 0.00019147928994082841,
+      "loss": 0.1682,
+      "step": 2957
+    },
+    {
+      "epoch": 0.21342761282874562,
+      "grad_norm": 0.09498683363199234,
+      "learning_rate": 0.00019147640352143168,
+      "loss": 0.1062,
+      "step": 2958
+    },
+    {
+      "epoch": 0.21349976550380606,
+      "grad_norm": 0.09856303781270981,
+      "learning_rate": 0.00019147351710203494,
+      "loss": 0.1967,
+      "step": 2959
+    },
+    {
+      "epoch": 0.21357191817886648,
+      "grad_norm": 0.11182020604610443,
+      "learning_rate": 0.0001914706306826382,
+      "loss": 0.207,
+      "step": 2960
+    },
+    {
+      "epoch": 0.21364407085392692,
+      "grad_norm": 0.11299645900726318,
+      "learning_rate": 0.00019146774426324147,
+      "loss": 0.2056,
+      "step": 2961
+    },
+    {
+      "epoch": 0.21371622352898734,
+      "grad_norm": 0.11871636658906937,
+      "learning_rate": 0.0001914648578438447,
+      "loss": 0.1811,
+      "step": 2962
+    },
+    {
+      "epoch": 0.21378837620404775,
+      "grad_norm": 0.10775674134492874,
+      "learning_rate": 0.00019146197142444796,
+      "loss": 0.1377,
+      "step": 2963
+    },
+    {
+      "epoch": 0.2138605288791082,
+      "grad_norm": 0.1358514130115509,
+      "learning_rate": 0.00019145908500505125,
+      "loss": 0.174,
+      "step": 2964
+    },
+    {
+      "epoch": 0.2139326815541686,
+      "grad_norm": 0.11490422487258911,
+      "learning_rate": 0.00019145619858565452,
+      "loss": 0.1962,
+      "step": 2965
+    },
+    {
+      "epoch": 0.21400483422922906,
+      "grad_norm": 0.1141917034983635,
+      "learning_rate": 0.00019145331216625778,
+      "loss": 0.1818,
+      "step": 2966
+    },
+    {
+      "epoch": 0.21407698690428947,
+      "grad_norm": 0.12067859619855881,
+      "learning_rate": 0.00019145042574686102,
+      "loss": 0.1473,
+      "step": 2967
+    },
+    {
+      "epoch": 0.21414913957934992,
+      "grad_norm": 0.10996080935001373,
+      "learning_rate": 0.00019144753932746428,
+      "loss": 0.1726,
+      "step": 2968
+    },
+    {
+      "epoch": 0.21422129225441033,
+      "grad_norm": 0.13538621366024017,
+      "learning_rate": 0.00019144465290806754,
+      "loss": 0.1607,
+      "step": 2969
+    },
+    {
+      "epoch": 0.21429344492947075,
+      "grad_norm": 0.09215865284204483,
+      "learning_rate": 0.0001914417664886708,
+      "loss": 0.1534,
+      "step": 2970
+    },
+    {
+      "epoch": 0.2143655976045312,
+      "grad_norm": 0.09540208429098129,
+      "learning_rate": 0.0001914388800692741,
+      "loss": 0.1916,
+      "step": 2971
+    },
+    {
+      "epoch": 0.2144377502795916,
+      "grad_norm": 0.10014095902442932,
+      "learning_rate": 0.00019143599364987733,
+      "loss": 0.1561,
+      "step": 2972
+    },
+    {
+      "epoch": 0.21450990295465205,
+      "grad_norm": 0.09593141078948975,
+      "learning_rate": 0.0001914331072304806,
+      "loss": 0.1565,
+      "step": 2973
+    },
+    {
+      "epoch": 0.21458205562971247,
+      "grad_norm": 0.10834231972694397,
+      "learning_rate": 0.00019143022081108386,
+      "loss": 0.154,
+      "step": 2974
+    },
+    {
+      "epoch": 0.2146542083047729,
+      "grad_norm": 0.11025810986757278,
+      "learning_rate": 0.00019142733439168712,
+      "loss": 0.2079,
+      "step": 2975
+    },
+    {
+      "epoch": 0.21472636097983333,
+      "grad_norm": 0.13344484567642212,
+      "learning_rate": 0.00019142444797229038,
+      "loss": 0.1311,
+      "step": 2976
+    },
+    {
+      "epoch": 0.21479851365489375,
+      "grad_norm": 0.13370724022388458,
+      "learning_rate": 0.00019142156155289364,
+      "loss": 0.2002,
+      "step": 2977
+    },
+    {
+      "epoch": 0.2148706663299542,
+      "grad_norm": 0.12002386152744293,
+      "learning_rate": 0.0001914186751334969,
+      "loss": 0.1451,
+      "step": 2978
+    },
+    {
+      "epoch": 0.2149428190050146,
+      "grad_norm": 0.128787562251091,
+      "learning_rate": 0.00019141578871410017,
+      "loss": 0.2031,
+      "step": 2979
+    },
+    {
+      "epoch": 0.21501497168007505,
+      "grad_norm": 0.11301092058420181,
+      "learning_rate": 0.00019141290229470343,
+      "loss": 0.1534,
+      "step": 2980
+    },
+    {
+      "epoch": 0.21508712435513547,
+      "grad_norm": 0.10730845481157303,
+      "learning_rate": 0.0001914100158753067,
+      "loss": 0.1985,
+      "step": 2981
+    },
+    {
+      "epoch": 0.21515927703019588,
+      "grad_norm": 0.10858388245105743,
+      "learning_rate": 0.00019140712945590996,
+      "loss": 0.1915,
+      "step": 2982
+    },
+    {
+      "epoch": 0.21523142970525633,
+      "grad_norm": 0.12959133088588715,
+      "learning_rate": 0.0001914042430365132,
+      "loss": 0.1395,
+      "step": 2983
+    },
+    {
+      "epoch": 0.21530358238031674,
+      "grad_norm": 0.13987712562084198,
+      "learning_rate": 0.00019140135661711646,
+      "loss": 0.1777,
+      "step": 2984
+    },
+    {
+      "epoch": 0.2153757350553772,
+      "grad_norm": 0.107297882437706,
+      "learning_rate": 0.00019139847019771975,
+      "loss": 0.153,
+      "step": 2985
+    },
+    {
+      "epoch": 0.2154478877304376,
+      "grad_norm": 0.0874083861708641,
+      "learning_rate": 0.000191395583778323,
+      "loss": 0.1361,
+      "step": 2986
+    },
+    {
+      "epoch": 0.21552004040549802,
+      "grad_norm": 0.08346877247095108,
+      "learning_rate": 0.00019139269735892627,
+      "loss": 0.1945,
+      "step": 2987
+    },
+    {
+      "epoch": 0.21559219308055846,
+      "grad_norm": 0.09217635542154312,
+      "learning_rate": 0.0001913898109395295,
+      "loss": 0.1037,
+      "step": 2988
+    },
+    {
+      "epoch": 0.21566434575561888,
+      "grad_norm": 0.09559168666601181,
+      "learning_rate": 0.00019138692452013277,
+      "loss": 0.1419,
+      "step": 2989
+    },
+    {
+      "epoch": 0.21573649843067932,
+      "grad_norm": 0.13188686966896057,
+      "learning_rate": 0.00019138403810073604,
+      "loss": 0.1728,
+      "step": 2990
+    },
+    {
+      "epoch": 0.21580865110573974,
+      "grad_norm": 0.13026142120361328,
+      "learning_rate": 0.0001913811516813393,
+      "loss": 0.1051,
+      "step": 2991
+    },
+    {
+      "epoch": 0.21588080378080018,
+      "grad_norm": 0.12346373498439789,
+      "learning_rate": 0.0001913782652619426,
+      "loss": 0.174,
+      "step": 2992
+    },
+    {
+      "epoch": 0.2159529564558606,
+      "grad_norm": 0.10167650878429413,
+      "learning_rate": 0.00019137537884254585,
+      "loss": 0.182,
+      "step": 2993
+    },
+    {
+      "epoch": 0.21602510913092102,
+      "grad_norm": 0.12183064967393875,
+      "learning_rate": 0.0001913724924231491,
+      "loss": 0.1568,
+      "step": 2994
+    },
+    {
+      "epoch": 0.21609726180598146,
+      "grad_norm": 0.1322290003299713,
+      "learning_rate": 0.00019136960600375235,
+      "loss": 0.1851,
+      "step": 2995
+    },
+    {
+      "epoch": 0.21616941448104188,
+      "grad_norm": 0.11985625326633453,
+      "learning_rate": 0.0001913667195843556,
+      "loss": 0.1265,
+      "step": 2996
+    },
+    {
+      "epoch": 0.21624156715610232,
+      "grad_norm": 0.12829148769378662,
+      "learning_rate": 0.00019136383316495888,
+      "loss": 0.1566,
+      "step": 2997
+    },
+    {
+      "epoch": 0.21631371983116274,
+      "grad_norm": 0.10552884638309479,
+      "learning_rate": 0.00019136094674556214,
+      "loss": 0.1533,
+      "step": 2998
+    },
+    {
+      "epoch": 0.21638587250622318,
+      "grad_norm": 0.12362322956323624,
+      "learning_rate": 0.0001913580603261654,
+      "loss": 0.2138,
+      "step": 2999
+    },
+    {
+      "epoch": 0.2164580251812836,
+      "grad_norm": 0.09862411767244339,
+      "learning_rate": 0.00019135517390676867,
+      "loss": 0.1708,
+      "step": 3000
+    },
+    {
+      "epoch": 0.216530177856344,
+      "grad_norm": 0.1280146986246109,
+      "learning_rate": 0.00019135228748737193,
+      "loss": 0.1904,
+      "step": 3001
+    },
+    {
+      "epoch": 0.21660233053140446,
+      "grad_norm": 0.1173025518655777,
+      "learning_rate": 0.0001913494010679752,
+      "loss": 0.1402,
+      "step": 3002
+    },
+    {
+      "epoch": 0.21667448320646487,
+      "grad_norm": 0.1203983724117279,
+      "learning_rate": 0.00019134651464857845,
+      "loss": 0.1818,
+      "step": 3003
+    },
+    {
+      "epoch": 0.21674663588152532,
+      "grad_norm": 0.12568947672843933,
+      "learning_rate": 0.00019134362822918172,
+      "loss": 0.1846,
+      "step": 3004
+    },
+    {
+      "epoch": 0.21681878855658573,
+      "grad_norm": 0.12149185687303543,
+      "learning_rate": 0.00019134074180978495,
+      "loss": 0.208,
+      "step": 3005
+    },
+    {
+      "epoch": 0.21689094123164615,
+      "grad_norm": 0.12697440385818481,
+      "learning_rate": 0.00019133785539038824,
+      "loss": 0.2275,
+      "step": 3006
+    },
+    {
+      "epoch": 0.2169630939067066,
+      "grad_norm": 0.10044363886117935,
+      "learning_rate": 0.0001913349689709915,
+      "loss": 0.1716,
+      "step": 3007
+    },
+    {
+      "epoch": 0.217035246581767,
+      "grad_norm": 0.11203659325838089,
+      "learning_rate": 0.00019133208255159477,
+      "loss": 0.1759,
+      "step": 3008
+    },
+    {
+      "epoch": 0.21710739925682745,
+      "grad_norm": 0.1191219836473465,
+      "learning_rate": 0.00019132919613219803,
+      "loss": 0.1691,
+      "step": 3009
+    },
+    {
+      "epoch": 0.21717955193188787,
+      "grad_norm": 0.12754131853580475,
+      "learning_rate": 0.00019132630971280127,
+      "loss": 0.126,
+      "step": 3010
+    },
+    {
+      "epoch": 0.21725170460694831,
+      "grad_norm": 0.13199105858802795,
+      "learning_rate": 0.00019132342329340453,
+      "loss": 0.1483,
+      "step": 3011
+    },
+    {
+      "epoch": 0.21732385728200873,
+      "grad_norm": 0.09693431109189987,
+      "learning_rate": 0.0001913205368740078,
+      "loss": 0.1428,
+      "step": 3012
+    },
+    {
+      "epoch": 0.21739600995706915,
+      "grad_norm": 0.1105511337518692,
+      "learning_rate": 0.00019131765045461108,
+      "loss": 0.1549,
+      "step": 3013
+    },
+    {
+      "epoch": 0.2174681626321296,
+      "grad_norm": 0.11166469007730484,
+      "learning_rate": 0.00019131476403521435,
+      "loss": 0.1292,
+      "step": 3014
+    },
+    {
+      "epoch": 0.21754031530719,
+      "grad_norm": 0.1035347580909729,
+      "learning_rate": 0.00019131187761581758,
+      "loss": 0.146,
+      "step": 3015
+    },
+    {
+      "epoch": 0.21761246798225045,
+      "grad_norm": 0.12998312711715698,
+      "learning_rate": 0.00019130899119642084,
+      "loss": 0.2104,
+      "step": 3016
+    },
+    {
+      "epoch": 0.21768462065731087,
+      "grad_norm": 0.10929346084594727,
+      "learning_rate": 0.0001913061047770241,
+      "loss": 0.186,
+      "step": 3017
+    },
+    {
+      "epoch": 0.21775677333237128,
+      "grad_norm": 0.11475662142038345,
+      "learning_rate": 0.00019130321835762737,
+      "loss": 0.1326,
+      "step": 3018
+    },
+    {
+      "epoch": 0.21782892600743173,
+      "grad_norm": 0.10569846630096436,
+      "learning_rate": 0.00019130033193823063,
+      "loss": 0.1797,
+      "step": 3019
+    },
+    {
+      "epoch": 0.21790107868249214,
+      "grad_norm": 0.09804397821426392,
+      "learning_rate": 0.0001912974455188339,
+      "loss": 0.1646,
+      "step": 3020
+    },
+    {
+      "epoch": 0.2179732313575526,
+      "grad_norm": 0.10693866014480591,
+      "learning_rate": 0.00019129455909943716,
+      "loss": 0.1377,
+      "step": 3021
+    },
+    {
+      "epoch": 0.218045384032613,
+      "grad_norm": 0.12969979643821716,
+      "learning_rate": 0.00019129167268004042,
+      "loss": 0.1416,
+      "step": 3022
+    },
+    {
+      "epoch": 0.21811753670767345,
+      "grad_norm": 0.11125360429286957,
+      "learning_rate": 0.00019128878626064369,
+      "loss": 0.1604,
+      "step": 3023
+    },
+    {
+      "epoch": 0.21818968938273386,
+      "grad_norm": 0.0953187644481659,
+      "learning_rate": 0.00019128589984124695,
+      "loss": 0.1974,
+      "step": 3024
+    },
+    {
+      "epoch": 0.21826184205779428,
+      "grad_norm": 0.11042159795761108,
+      "learning_rate": 0.0001912830134218502,
+      "loss": 0.1408,
+      "step": 3025
+    },
+    {
+      "epoch": 0.21833399473285472,
+      "grad_norm": 0.10423165559768677,
+      "learning_rate": 0.00019128012700245345,
+      "loss": 0.1304,
+      "step": 3026
+    },
+    {
+      "epoch": 0.21840614740791514,
+      "grad_norm": 0.12057065218687057,
+      "learning_rate": 0.00019127724058305674,
+      "loss": 0.2244,
+      "step": 3027
+    },
+    {
+      "epoch": 0.21847830008297559,
+      "grad_norm": 0.09170324355363846,
+      "learning_rate": 0.00019127435416366,
+      "loss": 0.2083,
+      "step": 3028
+    },
+    {
+      "epoch": 0.218550452758036,
+      "grad_norm": 0.12476850301027298,
+      "learning_rate": 0.00019127146774426326,
+      "loss": 0.1567,
+      "step": 3029
+    },
+    {
+      "epoch": 0.21862260543309645,
+      "grad_norm": 0.09885885566473007,
+      "learning_rate": 0.00019126858132486653,
+      "loss": 0.195,
+      "step": 3030
+    },
+    {
+      "epoch": 0.21869475810815686,
+      "grad_norm": 0.12118660658597946,
+      "learning_rate": 0.00019126569490546976,
+      "loss": 0.1311,
+      "step": 3031
+    },
+    {
+      "epoch": 0.21876691078321728,
+      "grad_norm": 0.13478021323680878,
+      "learning_rate": 0.00019126280848607302,
+      "loss": 0.1493,
+      "step": 3032
+    },
+    {
+      "epoch": 0.21883906345827772,
+      "grad_norm": 0.11218003183603287,
+      "learning_rate": 0.0001912599220666763,
+      "loss": 0.1607,
+      "step": 3033
+    },
+    {
+      "epoch": 0.21891121613333814,
+      "grad_norm": 0.12066203355789185,
+      "learning_rate": 0.00019125703564727958,
+      "loss": 0.1581,
+      "step": 3034
+    },
+    {
+      "epoch": 0.21898336880839858,
+      "grad_norm": 0.12402987480163574,
+      "learning_rate": 0.00019125414922788284,
+      "loss": 0.2097,
+      "step": 3035
+    },
+    {
+      "epoch": 0.219055521483459,
+      "grad_norm": 0.1115858182311058,
+      "learning_rate": 0.00019125126280848608,
+      "loss": 0.1534,
+      "step": 3036
+    },
+    {
+      "epoch": 0.21912767415851941,
+      "grad_norm": 0.09497125446796417,
+      "learning_rate": 0.00019124837638908934,
+      "loss": 0.1503,
+      "step": 3037
+    },
+    {
+      "epoch": 0.21919982683357986,
+      "grad_norm": 0.11514826864004135,
+      "learning_rate": 0.0001912454899696926,
+      "loss": 0.1594,
+      "step": 3038
+    },
+    {
+      "epoch": 0.21927197950864027,
+      "grad_norm": 0.0928627997636795,
+      "learning_rate": 0.00019124260355029586,
+      "loss": 0.193,
+      "step": 3039
+    },
+    {
+      "epoch": 0.21934413218370072,
+      "grad_norm": 0.113450787961483,
+      "learning_rate": 0.00019123971713089913,
+      "loss": 0.1353,
+      "step": 3040
+    },
+    {
+      "epoch": 0.21941628485876113,
+      "grad_norm": 0.09100908041000366,
+      "learning_rate": 0.0001912368307115024,
+      "loss": 0.1948,
+      "step": 3041
+    },
+    {
+      "epoch": 0.21948843753382158,
+      "grad_norm": 0.08781091123819351,
+      "learning_rate": 0.00019123394429210565,
+      "loss": 0.1401,
+      "step": 3042
+    },
+    {
+      "epoch": 0.219560590208882,
+      "grad_norm": 0.08657009154558182,
+      "learning_rate": 0.00019123105787270892,
+      "loss": 0.1571,
+      "step": 3043
+    },
+    {
+      "epoch": 0.2196327428839424,
+      "grad_norm": 0.10702019184827805,
+      "learning_rate": 0.00019122817145331218,
+      "loss": 0.1428,
+      "step": 3044
+    },
+    {
+      "epoch": 0.21970489555900286,
+      "grad_norm": 0.10281525552272797,
+      "learning_rate": 0.00019122528503391544,
+      "loss": 0.177,
+      "step": 3045
+    },
+    {
+      "epoch": 0.21977704823406327,
+      "grad_norm": 0.08736959099769592,
+      "learning_rate": 0.0001912223986145187,
+      "loss": 0.1571,
+      "step": 3046
+    },
+    {
+      "epoch": 0.21984920090912372,
+      "grad_norm": 0.11643321067094803,
+      "learning_rate": 0.00019121951219512194,
+      "loss": 0.1386,
+      "step": 3047
+    },
+    {
+      "epoch": 0.21992135358418413,
+      "grad_norm": 0.11699894815683365,
+      "learning_rate": 0.0001912166257757252,
+      "loss": 0.1531,
+      "step": 3048
+    },
+    {
+      "epoch": 0.21999350625924455,
+      "grad_norm": 0.12016183882951736,
+      "learning_rate": 0.0001912137393563285,
+      "loss": 0.1519,
+      "step": 3049
+    },
+    {
+      "epoch": 0.220065658934305,
+      "grad_norm": 0.09290867298841476,
+      "learning_rate": 0.00019121085293693176,
+      "loss": 0.1796,
+      "step": 3050
+    },
+    {
+      "epoch": 0.2201378116093654,
+      "grad_norm": 0.1268221139907837,
+      "learning_rate": 0.00019120796651753502,
+      "loss": 0.1367,
+      "step": 3051
+    },
+    {
+      "epoch": 0.22020996428442585,
+      "grad_norm": 0.10724075883626938,
+      "learning_rate": 0.00019120508009813826,
+      "loss": 0.1374,
+      "step": 3052
+    },
+    {
+      "epoch": 0.22028211695948627,
+      "grad_norm": 0.10854970663785934,
+      "learning_rate": 0.00019120219367874152,
+      "loss": 0.1701,
+      "step": 3053
+    },
+    {
+      "epoch": 0.2203542696345467,
+      "grad_norm": 0.12508173286914825,
+      "learning_rate": 0.00019119930725934478,
+      "loss": 0.1551,
+      "step": 3054
+    },
+    {
+      "epoch": 0.22042642230960713,
+      "grad_norm": 0.1480616331100464,
+      "learning_rate": 0.00019119642083994804,
+      "loss": 0.1589,
+      "step": 3055
+    },
+    {
+      "epoch": 0.22049857498466754,
+      "grad_norm": 0.11518403887748718,
+      "learning_rate": 0.00019119353442055133,
+      "loss": 0.2207,
+      "step": 3056
+    },
+    {
+      "epoch": 0.220570727659728,
+      "grad_norm": 0.14292766153812408,
+      "learning_rate": 0.00019119064800115457,
+      "loss": 0.1321,
+      "step": 3057
+    },
+    {
+      "epoch": 0.2206428803347884,
+      "grad_norm": 0.09787577390670776,
+      "learning_rate": 0.00019118776158175783,
+      "loss": 0.2041,
+      "step": 3058
+    },
+    {
+      "epoch": 0.22071503300984885,
+      "grad_norm": 0.12522047758102417,
+      "learning_rate": 0.0001911848751623611,
+      "loss": 0.1175,
+      "step": 3059
+    },
+    {
+      "epoch": 0.22078718568490927,
+      "grad_norm": 0.11362268030643463,
+      "learning_rate": 0.00019118198874296436,
+      "loss": 0.1269,
+      "step": 3060
+    },
+    {
+      "epoch": 0.2208593383599697,
+      "grad_norm": 0.1254872977733612,
+      "learning_rate": 0.00019117910232356762,
+      "loss": 0.122,
+      "step": 3061
+    },
+    {
+      "epoch": 0.22093149103503013,
+      "grad_norm": 0.12300896644592285,
+      "learning_rate": 0.00019117621590417088,
+      "loss": 0.1664,
+      "step": 3062
+    },
+    {
+      "epoch": 0.22100364371009054,
+      "grad_norm": 0.09971299767494202,
+      "learning_rate": 0.00019117332948477415,
+      "loss": 0.1542,
+      "step": 3063
+    },
+    {
+      "epoch": 0.22107579638515099,
+      "grad_norm": 0.09395595639944077,
+      "learning_rate": 0.0001911704430653774,
+      "loss": 0.1641,
+      "step": 3064
+    },
+    {
+      "epoch": 0.2211479490602114,
+      "grad_norm": 0.13465718924999237,
+      "learning_rate": 0.00019116755664598067,
+      "loss": 0.145,
+      "step": 3065
+    },
+    {
+      "epoch": 0.22122010173527185,
+      "grad_norm": 0.11642010509967804,
+      "learning_rate": 0.00019116467022658394,
+      "loss": 0.1488,
+      "step": 3066
+    },
+    {
+      "epoch": 0.22129225441033226,
+      "grad_norm": 0.12988223135471344,
+      "learning_rate": 0.0001911617838071872,
+      "loss": 0.2116,
+      "step": 3067
+    },
+    {
+      "epoch": 0.22136440708539268,
+      "grad_norm": 0.10000425577163696,
+      "learning_rate": 0.00019115889738779044,
+      "loss": 0.1726,
+      "step": 3068
+    },
+    {
+      "epoch": 0.22143655976045312,
+      "grad_norm": 0.11322981864213943,
+      "learning_rate": 0.0001911560109683937,
+      "loss": 0.2174,
+      "step": 3069
+    },
+    {
+      "epoch": 0.22150871243551354,
+      "grad_norm": 0.16038878262043,
+      "learning_rate": 0.000191153124548997,
+      "loss": 0.1741,
+      "step": 3070
+    },
+    {
+      "epoch": 0.22158086511057398,
+      "grad_norm": 0.11883338540792465,
+      "learning_rate": 0.00019115023812960025,
+      "loss": 0.1211,
+      "step": 3071
+    },
+    {
+      "epoch": 0.2216530177856344,
+      "grad_norm": 0.11840777099132538,
+      "learning_rate": 0.00019114735171020351,
+      "loss": 0.1558,
+      "step": 3072
+    },
+    {
+      "epoch": 0.22172517046069484,
+      "grad_norm": 0.09917975962162018,
+      "learning_rate": 0.00019114446529080675,
+      "loss": 0.1485,
+      "step": 3073
+    },
+    {
+      "epoch": 0.22179732313575526,
+      "grad_norm": 0.11423740535974503,
+      "learning_rate": 0.00019114157887141,
+      "loss": 0.1703,
+      "step": 3074
+    },
+    {
+      "epoch": 0.22186947581081568,
+      "grad_norm": 0.15673841536045074,
+      "learning_rate": 0.00019113869245201328,
+      "loss": 0.1398,
+      "step": 3075
+    },
+    {
+      "epoch": 0.22194162848587612,
+      "grad_norm": 0.09600669890642166,
+      "learning_rate": 0.00019113580603261654,
+      "loss": 0.1484,
+      "step": 3076
+    },
+    {
+      "epoch": 0.22201378116093654,
+      "grad_norm": 0.10209406912326813,
+      "learning_rate": 0.00019113291961321983,
+      "loss": 0.1613,
+      "step": 3077
+    },
+    {
+      "epoch": 0.22208593383599698,
+      "grad_norm": 0.10075556486845016,
+      "learning_rate": 0.00019113003319382306,
+      "loss": 0.1599,
+      "step": 3078
+    },
+    {
+      "epoch": 0.2221580865110574,
+      "grad_norm": 0.11392730474472046,
+      "learning_rate": 0.00019112714677442633,
+      "loss": 0.1592,
+      "step": 3079
+    },
+    {
+      "epoch": 0.2222302391861178,
+      "grad_norm": 0.11423641443252563,
+      "learning_rate": 0.0001911242603550296,
+      "loss": 0.1579,
+      "step": 3080
+    },
+    {
+      "epoch": 0.22230239186117826,
+      "grad_norm": 0.11044574528932571,
+      "learning_rate": 0.00019112137393563285,
+      "loss": 0.1136,
+      "step": 3081
+    },
+    {
+      "epoch": 0.22237454453623867,
+      "grad_norm": 0.10218565165996552,
+      "learning_rate": 0.00019111848751623612,
+      "loss": 0.1612,
+      "step": 3082
+    },
+    {
+      "epoch": 0.22244669721129912,
+      "grad_norm": 0.11987396329641342,
+      "learning_rate": 0.00019111560109683938,
+      "loss": 0.1751,
+      "step": 3083
+    },
+    {
+      "epoch": 0.22251884988635953,
+      "grad_norm": 0.12039399892091751,
+      "learning_rate": 0.00019111271467744264,
+      "loss": 0.1596,
+      "step": 3084
+    },
+    {
+      "epoch": 0.22259100256141998,
+      "grad_norm": 0.11418411880731583,
+      "learning_rate": 0.0001911098282580459,
+      "loss": 0.1349,
+      "step": 3085
+    },
+    {
+      "epoch": 0.2226631552364804,
+      "grad_norm": 0.10532049834728241,
+      "learning_rate": 0.00019110694183864917,
+      "loss": 0.1326,
+      "step": 3086
+    },
+    {
+      "epoch": 0.2227353079115408,
+      "grad_norm": 0.09723195433616638,
+      "learning_rate": 0.00019110405541925243,
+      "loss": 0.1468,
+      "step": 3087
+    },
+    {
+      "epoch": 0.22280746058660125,
+      "grad_norm": 0.09533675760030746,
+      "learning_rate": 0.0001911011689998557,
+      "loss": 0.1551,
+      "step": 3088
+    },
+    {
+      "epoch": 0.22287961326166167,
+      "grad_norm": 0.13404034078121185,
+      "learning_rate": 0.00019109828258045893,
+      "loss": 0.1659,
+      "step": 3089
+    },
+    {
+      "epoch": 0.2229517659367221,
+      "grad_norm": 0.10431547462940216,
+      "learning_rate": 0.0001910953961610622,
+      "loss": 0.1477,
+      "step": 3090
+    },
+    {
+      "epoch": 0.22302391861178253,
+      "grad_norm": 0.11172360181808472,
+      "learning_rate": 0.00019109250974166548,
+      "loss": 0.121,
+      "step": 3091
+    },
+    {
+      "epoch": 0.22309607128684297,
+      "grad_norm": 0.12391429394483566,
+      "learning_rate": 0.00019108962332226875,
+      "loss": 0.2049,
+      "step": 3092
+    },
+    {
+      "epoch": 0.2231682239619034,
+      "grad_norm": 0.10060824453830719,
+      "learning_rate": 0.000191086736902872,
+      "loss": 0.1771,
+      "step": 3093
+    },
+    {
+      "epoch": 0.2232403766369638,
+      "grad_norm": 0.10428790748119354,
+      "learning_rate": 0.00019108385048347524,
+      "loss": 0.175,
+      "step": 3094
+    },
+    {
+      "epoch": 0.22331252931202425,
+      "grad_norm": 0.09861437231302261,
+      "learning_rate": 0.0001910809640640785,
+      "loss": 0.1792,
+      "step": 3095
+    },
+    {
+      "epoch": 0.22338468198708467,
+      "grad_norm": 0.10132397711277008,
+      "learning_rate": 0.00019107807764468177,
+      "loss": 0.1169,
+      "step": 3096
+    },
+    {
+      "epoch": 0.2234568346621451,
+      "grad_norm": 0.11215358972549438,
+      "learning_rate": 0.00019107519122528503,
+      "loss": 0.1656,
+      "step": 3097
+    },
+    {
+      "epoch": 0.22352898733720553,
+      "grad_norm": 0.10548488795757294,
+      "learning_rate": 0.00019107230480588832,
+      "loss": 0.2087,
+      "step": 3098
+    },
+    {
+      "epoch": 0.22360114001226594,
+      "grad_norm": 0.10882903635501862,
+      "learning_rate": 0.00019106941838649156,
+      "loss": 0.1485,
+      "step": 3099
+    },
+    {
+      "epoch": 0.22367329268732639,
+      "grad_norm": 0.12354625761508942,
+      "learning_rate": 0.00019106653196709482,
+      "loss": 0.1761,
+      "step": 3100
+    },
+    {
+      "epoch": 0.2237454453623868,
+      "grad_norm": 0.09949934482574463,
+      "learning_rate": 0.00019106364554769808,
+      "loss": 0.1178,
+      "step": 3101
+    },
+    {
+      "epoch": 0.22381759803744725,
+      "grad_norm": 0.151101216673851,
+      "learning_rate": 0.00019106075912830135,
+      "loss": 0.1846,
+      "step": 3102
+    },
+    {
+      "epoch": 0.22388975071250766,
+      "grad_norm": 0.11479262262582779,
+      "learning_rate": 0.0001910578727089046,
+      "loss": 0.2069,
+      "step": 3103
+    },
+    {
+      "epoch": 0.2239619033875681,
+      "grad_norm": 0.10903146862983704,
+      "learning_rate": 0.00019105498628950787,
+      "loss": 0.1742,
+      "step": 3104
+    },
+    {
+      "epoch": 0.22403405606262852,
+      "grad_norm": 0.08431587368249893,
+      "learning_rate": 0.00019105209987011114,
+      "loss": 0.156,
+      "step": 3105
+    },
+    {
+      "epoch": 0.22410620873768894,
+      "grad_norm": 0.10032767057418823,
+      "learning_rate": 0.0001910492134507144,
+      "loss": 0.1504,
+      "step": 3106
+    },
+    {
+      "epoch": 0.22417836141274938,
+      "grad_norm": 0.1023092269897461,
+      "learning_rate": 0.00019104632703131766,
+      "loss": 0.1612,
+      "step": 3107
+    },
+    {
+      "epoch": 0.2242505140878098,
+      "grad_norm": 0.0954068973660469,
+      "learning_rate": 0.00019104344061192093,
+      "loss": 0.1334,
+      "step": 3108
+    },
+    {
+      "epoch": 0.22432266676287024,
+      "grad_norm": 0.11150647699832916,
+      "learning_rate": 0.0001910405541925242,
+      "loss": 0.1877,
+      "step": 3109
+    },
+    {
+      "epoch": 0.22439481943793066,
+      "grad_norm": 0.10485085099935532,
+      "learning_rate": 0.00019103766777312745,
+      "loss": 0.1624,
+      "step": 3110
+    },
+    {
+      "epoch": 0.22446697211299108,
+      "grad_norm": 0.10941198468208313,
+      "learning_rate": 0.0001910347813537307,
+      "loss": 0.1276,
+      "step": 3111
+    },
+    {
+      "epoch": 0.22453912478805152,
+      "grad_norm": 0.1089855507016182,
+      "learning_rate": 0.00019103189493433398,
+      "loss": 0.1347,
+      "step": 3112
+    },
+    {
+      "epoch": 0.22461127746311194,
+      "grad_norm": 0.11594249308109283,
+      "learning_rate": 0.00019102900851493724,
+      "loss": 0.1606,
+      "step": 3113
+    },
+    {
+      "epoch": 0.22468343013817238,
+      "grad_norm": 0.10764208436012268,
+      "learning_rate": 0.0001910261220955405,
+      "loss": 0.1326,
+      "step": 3114
+    },
+    {
+      "epoch": 0.2247555828132328,
+      "grad_norm": 0.124549500644207,
+      "learning_rate": 0.00019102323567614377,
+      "loss": 0.1678,
+      "step": 3115
+    },
+    {
+      "epoch": 0.22482773548829324,
+      "grad_norm": 0.1057669147849083,
+      "learning_rate": 0.000191020349256747,
+      "loss": 0.1681,
+      "step": 3116
+    },
+    {
+      "epoch": 0.22489988816335366,
+      "grad_norm": 0.12275435030460358,
+      "learning_rate": 0.00019101746283735026,
+      "loss": 0.1619,
+      "step": 3117
+    },
+    {
+      "epoch": 0.22497204083841407,
+      "grad_norm": 0.09398017078638077,
+      "learning_rate": 0.00019101457641795353,
+      "loss": 0.1691,
+      "step": 3118
+    },
+    {
+      "epoch": 0.22504419351347452,
+      "grad_norm": 0.09353601932525635,
+      "learning_rate": 0.00019101168999855682,
+      "loss": 0.1682,
+      "step": 3119
+    },
+    {
+      "epoch": 0.22511634618853493,
+      "grad_norm": 0.09263613820075989,
+      "learning_rate": 0.00019100880357916008,
+      "loss": 0.18,
+      "step": 3120
+    },
+    {
+      "epoch": 0.22518849886359538,
+      "grad_norm": 0.11538559198379517,
+      "learning_rate": 0.00019100591715976332,
+      "loss": 0.1233,
+      "step": 3121
+    },
+    {
+      "epoch": 0.2252606515386558,
+      "grad_norm": 0.09187686443328857,
+      "learning_rate": 0.00019100303074036658,
+      "loss": 0.1298,
+      "step": 3122
+    },
+    {
+      "epoch": 0.22533280421371624,
+      "grad_norm": 0.11960215121507645,
+      "learning_rate": 0.00019100014432096984,
+      "loss": 0.2008,
+      "step": 3123
+    },
+    {
+      "epoch": 0.22540495688877665,
+      "grad_norm": 0.10731208324432373,
+      "learning_rate": 0.0001909972579015731,
+      "loss": 0.1744,
+      "step": 3124
+    },
+    {
+      "epoch": 0.22547710956383707,
+      "grad_norm": 0.10698284208774567,
+      "learning_rate": 0.00019099437148217637,
+      "loss": 0.1521,
+      "step": 3125
+    },
+    {
+      "epoch": 0.2255492622388975,
+      "grad_norm": 0.12406127899885178,
+      "learning_rate": 0.00019099148506277963,
+      "loss": 0.1518,
+      "step": 3126
+    },
+    {
+      "epoch": 0.22562141491395793,
+      "grad_norm": 0.1567186415195465,
+      "learning_rate": 0.0001909885986433829,
+      "loss": 0.2005,
+      "step": 3127
+    },
+    {
+      "epoch": 0.22569356758901837,
+      "grad_norm": 0.10155721008777618,
+      "learning_rate": 0.00019098571222398616,
+      "loss": 0.1554,
+      "step": 3128
+    },
+    {
+      "epoch": 0.2257657202640788,
+      "grad_norm": 0.15428893268108368,
+      "learning_rate": 0.00019098282580458942,
+      "loss": 0.1672,
+      "step": 3129
+    },
+    {
+      "epoch": 0.2258378729391392,
+      "grad_norm": 0.11552326381206512,
+      "learning_rate": 0.00019097993938519268,
+      "loss": 0.14,
+      "step": 3130
+    },
+    {
+      "epoch": 0.22591002561419965,
+      "grad_norm": 0.1283775120973587,
+      "learning_rate": 0.00019097705296579595,
+      "loss": 0.2078,
+      "step": 3131
+    },
+    {
+      "epoch": 0.22598217828926007,
+      "grad_norm": 0.09192639589309692,
+      "learning_rate": 0.00019097416654639918,
+      "loss": 0.1366,
+      "step": 3132
+    },
+    {
+      "epoch": 0.2260543309643205,
+      "grad_norm": 0.09906932711601257,
+      "learning_rate": 0.00019097128012700247,
+      "loss": 0.1406,
+      "step": 3133
+    },
+    {
+      "epoch": 0.22612648363938093,
+      "grad_norm": 0.1220705658197403,
+      "learning_rate": 0.00019096839370760573,
+      "loss": 0.187,
+      "step": 3134
+    },
+    {
+      "epoch": 0.22619863631444137,
+      "grad_norm": 0.1256432682275772,
+      "learning_rate": 0.000190965507288209,
+      "loss": 0.1441,
+      "step": 3135
+    },
+    {
+      "epoch": 0.2262707889895018,
+      "grad_norm": 0.10777739435434341,
+      "learning_rate": 0.00019096262086881226,
+      "loss": 0.1595,
+      "step": 3136
+    },
+    {
+      "epoch": 0.2263429416645622,
+      "grad_norm": 0.09372089058160782,
+      "learning_rate": 0.0001909597344494155,
+      "loss": 0.1858,
+      "step": 3137
+    },
+    {
+      "epoch": 0.22641509433962265,
+      "grad_norm": 0.10302763432264328,
+      "learning_rate": 0.00019095684803001876,
+      "loss": 0.1532,
+      "step": 3138
+    },
+    {
+      "epoch": 0.22648724701468306,
+      "grad_norm": 0.10827448219060898,
+      "learning_rate": 0.00019095396161062202,
+      "loss": 0.1759,
+      "step": 3139
+    },
+    {
+      "epoch": 0.2265593996897435,
+      "grad_norm": 0.10127770155668259,
+      "learning_rate": 0.0001909510751912253,
+      "loss": 0.1424,
+      "step": 3140
+    },
+    {
+      "epoch": 0.22663155236480392,
+      "grad_norm": 0.1250356286764145,
+      "learning_rate": 0.00019094818877182857,
+      "loss": 0.14,
+      "step": 3141
+    },
+    {
+      "epoch": 0.22670370503986434,
+      "grad_norm": 0.09610163420438766,
+      "learning_rate": 0.0001909453023524318,
+      "loss": 0.1763,
+      "step": 3142
+    },
+    {
+      "epoch": 0.22677585771492478,
+      "grad_norm": 0.1547233760356903,
+      "learning_rate": 0.00019094241593303507,
+      "loss": 0.1494,
+      "step": 3143
+    },
+    {
+      "epoch": 0.2268480103899852,
+      "grad_norm": 0.10779442638158798,
+      "learning_rate": 0.00019093952951363834,
+      "loss": 0.1992,
+      "step": 3144
+    },
+    {
+      "epoch": 0.22692016306504564,
+      "grad_norm": 0.13055872917175293,
+      "learning_rate": 0.0001909366430942416,
+      "loss": 0.195,
+      "step": 3145
+    },
+    {
+      "epoch": 0.22699231574010606,
+      "grad_norm": 0.1157553419470787,
+      "learning_rate": 0.00019093375667484486,
+      "loss": 0.1614,
+      "step": 3146
+    },
+    {
+      "epoch": 0.2270644684151665,
+      "grad_norm": 0.17626427114009857,
+      "learning_rate": 0.00019093087025544812,
+      "loss": 0.1898,
+      "step": 3147
+    },
+    {
+      "epoch": 0.22713662109022692,
+      "grad_norm": 0.0990791916847229,
+      "learning_rate": 0.0001909279838360514,
+      "loss": 0.2164,
+      "step": 3148
+    },
+    {
+      "epoch": 0.22720877376528734,
+      "grad_norm": 0.19372622668743134,
+      "learning_rate": 0.00019092509741665465,
+      "loss": 0.1583,
+      "step": 3149
+    },
+    {
+      "epoch": 0.22728092644034778,
+      "grad_norm": 0.1041846051812172,
+      "learning_rate": 0.00019092221099725791,
+      "loss": 0.1766,
+      "step": 3150
+    },
+    {
+      "epoch": 0.2273530791154082,
+      "grad_norm": 0.1104101613163948,
+      "learning_rate": 0.00019091932457786118,
+      "loss": 0.1466,
+      "step": 3151
+    },
+    {
+      "epoch": 0.22742523179046864,
+      "grad_norm": 0.09540078043937683,
+      "learning_rate": 0.00019091643815846444,
+      "loss": 0.1366,
+      "step": 3152
+    },
+    {
+      "epoch": 0.22749738446552906,
+      "grad_norm": 0.14345785975456238,
+      "learning_rate": 0.00019091355173906768,
+      "loss": 0.1839,
+      "step": 3153
+    },
+    {
+      "epoch": 0.2275695371405895,
+      "grad_norm": 0.10636702924966812,
+      "learning_rate": 0.00019091066531967097,
+      "loss": 0.1484,
+      "step": 3154
+    },
+    {
+      "epoch": 0.22764168981564992,
+      "grad_norm": 0.11496289819478989,
+      "learning_rate": 0.00019090777890027423,
+      "loss": 0.1803,
+      "step": 3155
+    },
+    {
+      "epoch": 0.22771384249071033,
+      "grad_norm": 0.13613425195217133,
+      "learning_rate": 0.0001909048924808775,
+      "loss": 0.1839,
+      "step": 3156
+    },
+    {
+      "epoch": 0.22778599516577078,
+      "grad_norm": 0.10940530151128769,
+      "learning_rate": 0.00019090200606148075,
+      "loss": 0.1476,
+      "step": 3157
+    },
+    {
+      "epoch": 0.2278581478408312,
+      "grad_norm": 0.08880080282688141,
+      "learning_rate": 0.000190899119642084,
+      "loss": 0.1605,
+      "step": 3158
+    },
+    {
+      "epoch": 0.22793030051589164,
+      "grad_norm": 0.1113625019788742,
+      "learning_rate": 0.00019089623322268725,
+      "loss": 0.1393,
+      "step": 3159
+    },
+    {
+      "epoch": 0.22800245319095205,
+      "grad_norm": 0.12891051173210144,
+      "learning_rate": 0.00019089334680329052,
+      "loss": 0.194,
+      "step": 3160
+    },
+    {
+      "epoch": 0.22807460586601247,
+      "grad_norm": 0.0975043773651123,
+      "learning_rate": 0.0001908904603838938,
+      "loss": 0.1996,
+      "step": 3161
+    },
+    {
+      "epoch": 0.2281467585410729,
+      "grad_norm": 0.09029150754213333,
+      "learning_rate": 0.00019088757396449707,
+      "loss": 0.1742,
+      "step": 3162
+    },
+    {
+      "epoch": 0.22821891121613333,
+      "grad_norm": 0.07928082346916199,
+      "learning_rate": 0.0001908846875451003,
+      "loss": 0.1659,
+      "step": 3163
+    },
+    {
+      "epoch": 0.22829106389119377,
+      "grad_norm": 0.10646059364080429,
+      "learning_rate": 0.00019088180112570357,
+      "loss": 0.1513,
+      "step": 3164
+    },
+    {
+      "epoch": 0.2283632165662542,
+      "grad_norm": 0.10814210027456284,
+      "learning_rate": 0.00019087891470630683,
+      "loss": 0.1667,
+      "step": 3165
+    },
+    {
+      "epoch": 0.22843536924131463,
+      "grad_norm": 0.09975995868444443,
+      "learning_rate": 0.0001908760282869101,
+      "loss": 0.1305,
+      "step": 3166
+    },
+    {
+      "epoch": 0.22850752191637505,
+      "grad_norm": 0.12126446515321732,
+      "learning_rate": 0.00019087314186751336,
+      "loss": 0.2065,
+      "step": 3167
+    },
+    {
+      "epoch": 0.22857967459143547,
+      "grad_norm": 0.09508046507835388,
+      "learning_rate": 0.00019087025544811662,
+      "loss": 0.1939,
+      "step": 3168
+    },
+    {
+      "epoch": 0.2286518272664959,
+      "grad_norm": 0.11263048648834229,
+      "learning_rate": 0.00019086736902871988,
+      "loss": 0.1556,
+      "step": 3169
+    },
+    {
+      "epoch": 0.22872397994155633,
+      "grad_norm": 0.1208372488617897,
+      "learning_rate": 0.00019086448260932314,
+      "loss": 0.1911,
+      "step": 3170
+    },
+    {
+      "epoch": 0.22879613261661677,
+      "grad_norm": 0.11846572160720825,
+      "learning_rate": 0.0001908615961899264,
+      "loss": 0.1708,
+      "step": 3171
+    },
+    {
+      "epoch": 0.2288682852916772,
+      "grad_norm": 0.1360793560743332,
+      "learning_rate": 0.00019085870977052967,
+      "loss": 0.1534,
+      "step": 3172
+    },
+    {
+      "epoch": 0.2289404379667376,
+      "grad_norm": 0.09993666410446167,
+      "learning_rate": 0.00019085582335113293,
+      "loss": 0.1468,
+      "step": 3173
+    },
+    {
+      "epoch": 0.22901259064179805,
+      "grad_norm": 0.09834518283605576,
+      "learning_rate": 0.00019085293693173617,
+      "loss": 0.1472,
+      "step": 3174
+    },
+    {
+      "epoch": 0.22908474331685846,
+      "grad_norm": 0.08449302613735199,
+      "learning_rate": 0.00019085005051233946,
+      "loss": 0.1319,
+      "step": 3175
+    },
+    {
+      "epoch": 0.2291568959919189,
+      "grad_norm": 0.12296770513057709,
+      "learning_rate": 0.00019084716409294272,
+      "loss": 0.1943,
+      "step": 3176
+    },
+    {
+      "epoch": 0.22922904866697932,
+      "grad_norm": 0.10122761875391006,
+      "learning_rate": 0.00019084427767354599,
+      "loss": 0.1603,
+      "step": 3177
+    },
+    {
+      "epoch": 0.22930120134203977,
+      "grad_norm": 0.10421742498874664,
+      "learning_rate": 0.00019084139125414925,
+      "loss": 0.1439,
+      "step": 3178
+    },
+    {
+      "epoch": 0.22937335401710018,
+      "grad_norm": 0.11106200516223907,
+      "learning_rate": 0.00019083850483475248,
+      "loss": 0.1647,
+      "step": 3179
+    },
+    {
+      "epoch": 0.2294455066921606,
+      "grad_norm": 0.08921913802623749,
+      "learning_rate": 0.00019083561841535575,
+      "loss": 0.1295,
+      "step": 3180
+    },
+    {
+      "epoch": 0.22951765936722104,
+      "grad_norm": 0.10120546072721481,
+      "learning_rate": 0.000190832731995959,
+      "loss": 0.1552,
+      "step": 3181
+    },
+    {
+      "epoch": 0.22958981204228146,
+      "grad_norm": 0.09767742455005646,
+      "learning_rate": 0.0001908298455765623,
+      "loss": 0.1868,
+      "step": 3182
+    },
+    {
+      "epoch": 0.2296619647173419,
+      "grad_norm": 0.11393865942955017,
+      "learning_rate": 0.00019082695915716556,
+      "loss": 0.1778,
+      "step": 3183
+    },
+    {
+      "epoch": 0.22973411739240232,
+      "grad_norm": 0.12658797204494476,
+      "learning_rate": 0.0001908240727377688,
+      "loss": 0.1707,
+      "step": 3184
+    },
+    {
+      "epoch": 0.22980627006746276,
+      "grad_norm": 0.112017422914505,
+      "learning_rate": 0.00019082118631837206,
+      "loss": 0.1774,
+      "step": 3185
+    },
+    {
+      "epoch": 0.22987842274252318,
+      "grad_norm": 0.11038339883089066,
+      "learning_rate": 0.00019081829989897532,
+      "loss": 0.1414,
+      "step": 3186
+    },
+    {
+      "epoch": 0.2299505754175836,
+      "grad_norm": 0.10965468734502792,
+      "learning_rate": 0.0001908154134795786,
+      "loss": 0.1886,
+      "step": 3187
+    },
+    {
+      "epoch": 0.23002272809264404,
+      "grad_norm": 0.08305226266384125,
+      "learning_rate": 0.00019081252706018185,
+      "loss": 0.193,
+      "step": 3188
+    },
+    {
+      "epoch": 0.23009488076770446,
+      "grad_norm": 0.10485181957483292,
+      "learning_rate": 0.0001908096406407851,
+      "loss": 0.1421,
+      "step": 3189
+    },
+    {
+      "epoch": 0.2301670334427649,
+      "grad_norm": 0.09514374285936356,
+      "learning_rate": 0.00019080675422138838,
+      "loss": 0.1807,
+      "step": 3190
+    },
+    {
+      "epoch": 0.23023918611782532,
+      "grad_norm": 0.09134036302566528,
+      "learning_rate": 0.00019080386780199164,
+      "loss": 0.1273,
+      "step": 3191
+    },
+    {
+      "epoch": 0.23031133879288573,
+      "grad_norm": 0.11478359252214432,
+      "learning_rate": 0.0001908009813825949,
+      "loss": 0.1358,
+      "step": 3192
+    },
+    {
+      "epoch": 0.23038349146794618,
+      "grad_norm": 0.11170293390750885,
+      "learning_rate": 0.00019079809496319817,
+      "loss": 0.2004,
+      "step": 3193
+    },
+    {
+      "epoch": 0.2304556441430066,
+      "grad_norm": 0.13690118491649628,
+      "learning_rate": 0.00019079520854380143,
+      "loss": 0.1525,
+      "step": 3194
+    },
+    {
+      "epoch": 0.23052779681806704,
+      "grad_norm": 0.1377672702074051,
+      "learning_rate": 0.00019079232212440466,
+      "loss": 0.187,
+      "step": 3195
+    },
+    {
+      "epoch": 0.23059994949312745,
+      "grad_norm": 0.1272856742143631,
+      "learning_rate": 0.00019078943570500795,
+      "loss": 0.208,
+      "step": 3196
+    },
+    {
+      "epoch": 0.2306721021681879,
+      "grad_norm": 0.13685926795005798,
+      "learning_rate": 0.00019078654928561122,
+      "loss": 0.1613,
+      "step": 3197
+    },
+    {
+      "epoch": 0.23074425484324831,
+      "grad_norm": 0.11943011730909348,
+      "learning_rate": 0.00019078366286621448,
+      "loss": 0.1984,
+      "step": 3198
+    },
+    {
+      "epoch": 0.23081640751830873,
+      "grad_norm": 0.12155202031135559,
+      "learning_rate": 0.00019078077644681774,
+      "loss": 0.1649,
+      "step": 3199
+    },
+    {
+      "epoch": 0.23088856019336917,
+      "grad_norm": 0.09924157708883286,
+      "learning_rate": 0.00019077789002742098,
+      "loss": 0.1882,
+      "step": 3200
+    },
+    {
+      "epoch": 0.2309607128684296,
+      "grad_norm": 0.09386096149682999,
+      "learning_rate": 0.00019077500360802424,
+      "loss": 0.1452,
+      "step": 3201
+    },
+    {
+      "epoch": 0.23103286554349003,
+      "grad_norm": 0.11926762759685516,
+      "learning_rate": 0.0001907721171886275,
+      "loss": 0.13,
+      "step": 3202
+    },
+    {
+      "epoch": 0.23110501821855045,
+      "grad_norm": 0.11140086501836777,
+      "learning_rate": 0.0001907692307692308,
+      "loss": 0.1827,
+      "step": 3203
+    },
+    {
+      "epoch": 0.23117717089361087,
+      "grad_norm": 0.11237042397260666,
+      "learning_rate": 0.00019076634434983406,
+      "loss": 0.2004,
+      "step": 3204
+    },
+    {
+      "epoch": 0.2312493235686713,
+      "grad_norm": 0.09138461947441101,
+      "learning_rate": 0.0001907634579304373,
+      "loss": 0.1688,
+      "step": 3205
+    },
+    {
+      "epoch": 0.23132147624373173,
+      "grad_norm": 0.09387119114398956,
+      "learning_rate": 0.00019076057151104056,
+      "loss": 0.1438,
+      "step": 3206
+    },
+    {
+      "epoch": 0.23139362891879217,
+      "grad_norm": 0.1088188886642456,
+      "learning_rate": 0.00019075768509164382,
+      "loss": 0.2102,
+      "step": 3207
+    },
+    {
+      "epoch": 0.2314657815938526,
+      "grad_norm": 0.09382703900337219,
+      "learning_rate": 0.00019075479867224708,
+      "loss": 0.1768,
+      "step": 3208
+    },
+    {
+      "epoch": 0.23153793426891303,
+      "grad_norm": 0.09567832946777344,
+      "learning_rate": 0.00019075191225285034,
+      "loss": 0.1488,
+      "step": 3209
+    },
+    {
+      "epoch": 0.23161008694397345,
+      "grad_norm": 0.11694233864545822,
+      "learning_rate": 0.0001907490258334536,
+      "loss": 0.1808,
+      "step": 3210
+    },
+    {
+      "epoch": 0.23168223961903386,
+      "grad_norm": 0.14368030428886414,
+      "learning_rate": 0.00019074613941405687,
+      "loss": 0.1594,
+      "step": 3211
+    },
+    {
+      "epoch": 0.2317543922940943,
+      "grad_norm": 0.12889264523983002,
+      "learning_rate": 0.00019074325299466013,
+      "loss": 0.1941,
+      "step": 3212
+    },
+    {
+      "epoch": 0.23182654496915472,
+      "grad_norm": 0.09148585796356201,
+      "learning_rate": 0.0001907403665752634,
+      "loss": 0.1295,
+      "step": 3213
+    },
+    {
+      "epoch": 0.23189869764421517,
+      "grad_norm": 0.13587994873523712,
+      "learning_rate": 0.00019073748015586666,
+      "loss": 0.1788,
+      "step": 3214
+    },
+    {
+      "epoch": 0.23197085031927558,
+      "grad_norm": 0.12354405224323273,
+      "learning_rate": 0.00019073459373646992,
+      "loss": 0.1216,
+      "step": 3215
+    },
+    {
+      "epoch": 0.23204300299433603,
+      "grad_norm": 0.11031404882669449,
+      "learning_rate": 0.00019073170731707319,
+      "loss": 0.1764,
+      "step": 3216
+    },
+    {
+      "epoch": 0.23211515566939644,
+      "grad_norm": 0.14193016290664673,
+      "learning_rate": 0.00019072882089767645,
+      "loss": 0.1294,
+      "step": 3217
+    },
+    {
+      "epoch": 0.23218730834445686,
+      "grad_norm": 0.15411879122257233,
+      "learning_rate": 0.0001907259344782797,
+      "loss": 0.2023,
+      "step": 3218
+    },
+    {
+      "epoch": 0.2322594610195173,
+      "grad_norm": 0.11790381371974945,
+      "learning_rate": 0.00019072304805888297,
+      "loss": 0.1778,
+      "step": 3219
+    },
+    {
+      "epoch": 0.23233161369457772,
+      "grad_norm": 0.1162402480840683,
+      "learning_rate": 0.00019072016163948624,
+      "loss": 0.1825,
+      "step": 3220
+    },
+    {
+      "epoch": 0.23240376636963817,
+      "grad_norm": 0.11275645345449448,
+      "learning_rate": 0.0001907172752200895,
+      "loss": 0.1703,
+      "step": 3221
+    },
+    {
+      "epoch": 0.23247591904469858,
+      "grad_norm": 0.13524194061756134,
+      "learning_rate": 0.00019071438880069274,
+      "loss": 0.1836,
+      "step": 3222
+    },
+    {
+      "epoch": 0.232548071719759,
+      "grad_norm": 0.09859395027160645,
+      "learning_rate": 0.000190711502381296,
+      "loss": 0.2146,
+      "step": 3223
+    },
+    {
+      "epoch": 0.23262022439481944,
+      "grad_norm": 0.09172052145004272,
+      "learning_rate": 0.0001907086159618993,
+      "loss": 0.1919,
+      "step": 3224
+    },
+    {
+      "epoch": 0.23269237706987986,
+      "grad_norm": 0.10333575308322906,
+      "learning_rate": 0.00019070572954250255,
+      "loss": 0.1809,
+      "step": 3225
+    },
+    {
+      "epoch": 0.2327645297449403,
+      "grad_norm": 0.11006314307451248,
+      "learning_rate": 0.00019070284312310581,
+      "loss": 0.1058,
+      "step": 3226
+    },
+    {
+      "epoch": 0.23283668242000072,
+      "grad_norm": 0.09686331450939178,
+      "learning_rate": 0.00019069995670370905,
+      "loss": 0.1585,
+      "step": 3227
+    },
+    {
+      "epoch": 0.23290883509506116,
+      "grad_norm": 0.11920811980962753,
+      "learning_rate": 0.0001906970702843123,
+      "loss": 0.2086,
+      "step": 3228
+    },
+    {
+      "epoch": 0.23298098777012158,
+      "grad_norm": 0.12744984030723572,
+      "learning_rate": 0.00019069418386491558,
+      "loss": 0.1615,
+      "step": 3229
+    },
+    {
+      "epoch": 0.233053140445182,
+      "grad_norm": 0.10401850193738937,
+      "learning_rate": 0.00019069129744551884,
+      "loss": 0.1448,
+      "step": 3230
+    },
+    {
+      "epoch": 0.23312529312024244,
+      "grad_norm": 0.1329621970653534,
+      "learning_rate": 0.00019068841102612213,
+      "loss": 0.1623,
+      "step": 3231
+    },
+    {
+      "epoch": 0.23319744579530285,
+      "grad_norm": 0.14025752246379852,
+      "learning_rate": 0.00019068552460672536,
+      "loss": 0.1652,
+      "step": 3232
+    },
+    {
+      "epoch": 0.2332695984703633,
+      "grad_norm": 0.11895523220300674,
+      "learning_rate": 0.00019068263818732863,
+      "loss": 0.142,
+      "step": 3233
+    },
+    {
+      "epoch": 0.23334175114542371,
+      "grad_norm": 0.11003698408603668,
+      "learning_rate": 0.0001906797517679319,
+      "loss": 0.1355,
+      "step": 3234
+    },
+    {
+      "epoch": 0.23341390382048413,
+      "grad_norm": 0.1019037514925003,
+      "learning_rate": 0.00019067686534853515,
+      "loss": 0.1667,
+      "step": 3235
+    },
+    {
+      "epoch": 0.23348605649554457,
+      "grad_norm": 0.14654062688350677,
+      "learning_rate": 0.00019067397892913842,
+      "loss": 0.1235,
+      "step": 3236
+    },
+    {
+      "epoch": 0.233558209170605,
+      "grad_norm": 0.10918853431940079,
+      "learning_rate": 0.00019067109250974168,
+      "loss": 0.1502,
+      "step": 3237
+    },
+    {
+      "epoch": 0.23363036184566544,
+      "grad_norm": 0.13547173142433167,
+      "learning_rate": 0.00019066820609034492,
+      "loss": 0.1639,
+      "step": 3238
+    },
+    {
+      "epoch": 0.23370251452072585,
+      "grad_norm": 0.10551789402961731,
+      "learning_rate": 0.0001906653196709482,
+      "loss": 0.1407,
+      "step": 3239
+    },
+    {
+      "epoch": 0.2337746671957863,
+      "grad_norm": 0.1089998185634613,
+      "learning_rate": 0.00019066243325155147,
+      "loss": 0.1574,
+      "step": 3240
+    },
+    {
+      "epoch": 0.2338468198708467,
+      "grad_norm": 0.14473126828670502,
+      "learning_rate": 0.00019065954683215473,
+      "loss": 0.1954,
+      "step": 3241
+    },
+    {
+      "epoch": 0.23391897254590713,
+      "grad_norm": 0.14464539289474487,
+      "learning_rate": 0.000190656660412758,
+      "loss": 0.2045,
+      "step": 3242
+    },
+    {
+      "epoch": 0.23399112522096757,
+      "grad_norm": 0.1070796549320221,
+      "learning_rate": 0.00019065377399336123,
+      "loss": 0.1938,
+      "step": 3243
+    },
+    {
+      "epoch": 0.234063277896028,
+      "grad_norm": 0.0994974672794342,
+      "learning_rate": 0.0001906508875739645,
+      "loss": 0.1438,
+      "step": 3244
+    },
+    {
+      "epoch": 0.23413543057108843,
+      "grad_norm": 0.14842146635055542,
+      "learning_rate": 0.00019064800115456776,
+      "loss": 0.1839,
+      "step": 3245
+    },
+    {
+      "epoch": 0.23420758324614885,
+      "grad_norm": 0.1003551185131073,
+      "learning_rate": 0.00019064511473517105,
+      "loss": 0.1168,
+      "step": 3246
+    },
+    {
+      "epoch": 0.2342797359212093,
+      "grad_norm": 0.11478232592344284,
+      "learning_rate": 0.0001906422283157743,
+      "loss": 0.1729,
+      "step": 3247
+    },
+    {
+      "epoch": 0.2343518885962697,
+      "grad_norm": 0.09958324581384659,
+      "learning_rate": 0.00019063934189637754,
+      "loss": 0.1219,
+      "step": 3248
+    },
+    {
+      "epoch": 0.23442404127133012,
+      "grad_norm": 0.10597258061170578,
+      "learning_rate": 0.0001906364554769808,
+      "loss": 0.1747,
+      "step": 3249
+    },
+    {
+      "epoch": 0.23449619394639057,
+      "grad_norm": 0.14413249492645264,
+      "learning_rate": 0.00019063356905758407,
+      "loss": 0.1426,
+      "step": 3250
+    },
+    {
+      "epoch": 0.23456834662145098,
+      "grad_norm": 0.1292029619216919,
+      "learning_rate": 0.00019063068263818733,
+      "loss": 0.1527,
+      "step": 3251
+    },
+    {
+      "epoch": 0.23464049929651143,
+      "grad_norm": 0.1137336865067482,
+      "learning_rate": 0.0001906277962187906,
+      "loss": 0.1778,
+      "step": 3252
+    },
+    {
+      "epoch": 0.23471265197157185,
+      "grad_norm": 0.1345532238483429,
+      "learning_rate": 0.00019062490979939386,
+      "loss": 0.1664,
+      "step": 3253
+    },
+    {
+      "epoch": 0.23478480464663226,
+      "grad_norm": 0.14330095052719116,
+      "learning_rate": 0.00019062202337999712,
+      "loss": 0.1812,
+      "step": 3254
+    },
+    {
+      "epoch": 0.2348569573216927,
+      "grad_norm": 0.10712531208992004,
+      "learning_rate": 0.00019061913696060038,
+      "loss": 0.2,
+      "step": 3255
+    },
+    {
+      "epoch": 0.23492910999675312,
+      "grad_norm": 0.14136013388633728,
+      "learning_rate": 0.00019061625054120365,
+      "loss": 0.1447,
+      "step": 3256
+    },
+    {
+      "epoch": 0.23500126267181357,
+      "grad_norm": 0.11800672113895416,
+      "learning_rate": 0.0001906133641218069,
+      "loss": 0.162,
+      "step": 3257
+    },
+    {
+      "epoch": 0.23507341534687398,
+      "grad_norm": 0.10882166773080826,
+      "learning_rate": 0.00019061047770241017,
+      "loss": 0.1226,
+      "step": 3258
+    },
+    {
+      "epoch": 0.23514556802193443,
+      "grad_norm": 0.13625170290470123,
+      "learning_rate": 0.0001906075912830134,
+      "loss": 0.17,
+      "step": 3259
+    },
+    {
+      "epoch": 0.23521772069699484,
+      "grad_norm": 0.10706209391355515,
+      "learning_rate": 0.0001906047048636167,
+      "loss": 0.1387,
+      "step": 3260
+    },
+    {
+      "epoch": 0.23528987337205526,
+      "grad_norm": 0.10762275755405426,
+      "learning_rate": 0.00019060181844421996,
+      "loss": 0.1385,
+      "step": 3261
+    },
+    {
+      "epoch": 0.2353620260471157,
+      "grad_norm": 0.11794031411409378,
+      "learning_rate": 0.00019059893202482323,
+      "loss": 0.1782,
+      "step": 3262
+    },
+    {
+      "epoch": 0.23543417872217612,
+      "grad_norm": 0.10584767907857895,
+      "learning_rate": 0.0001905960456054265,
+      "loss": 0.1208,
+      "step": 3263
+    },
+    {
+      "epoch": 0.23550633139723656,
+      "grad_norm": 0.0948035717010498,
+      "learning_rate": 0.00019059315918602972,
+      "loss": 0.1484,
+      "step": 3264
+    },
+    {
+      "epoch": 0.23557848407229698,
+      "grad_norm": 0.12207180261611938,
+      "learning_rate": 0.000190590272766633,
+      "loss": 0.1708,
+      "step": 3265
+    },
+    {
+      "epoch": 0.2356506367473574,
+      "grad_norm": 0.1034015640616417,
+      "learning_rate": 0.00019058738634723625,
+      "loss": 0.1828,
+      "step": 3266
+    },
+    {
+      "epoch": 0.23572278942241784,
+      "grad_norm": 0.11126287281513214,
+      "learning_rate": 0.00019058449992783954,
+      "loss": 0.203,
+      "step": 3267
+    },
+    {
+      "epoch": 0.23579494209747826,
+      "grad_norm": 0.09588288515806198,
+      "learning_rate": 0.0001905816135084428,
+      "loss": 0.1663,
+      "step": 3268
+    },
+    {
+      "epoch": 0.2358670947725387,
+      "grad_norm": 0.10574699938297272,
+      "learning_rate": 0.00019057872708904604,
+      "loss": 0.147,
+      "step": 3269
+    },
+    {
+      "epoch": 0.23593924744759912,
+      "grad_norm": 0.1389194130897522,
+      "learning_rate": 0.0001905758406696493,
+      "loss": 0.1739,
+      "step": 3270
+    },
+    {
+      "epoch": 0.23601140012265956,
+      "grad_norm": 0.09925451129674911,
+      "learning_rate": 0.00019057295425025256,
+      "loss": 0.1984,
+      "step": 3271
+    },
+    {
+      "epoch": 0.23608355279771998,
+      "grad_norm": 0.12089280039072037,
+      "learning_rate": 0.00019057006783085583,
+      "loss": 0.1985,
+      "step": 3272
+    },
+    {
+      "epoch": 0.2361557054727804,
+      "grad_norm": 0.1208878830075264,
+      "learning_rate": 0.0001905671814114591,
+      "loss": 0.1474,
+      "step": 3273
+    },
+    {
+      "epoch": 0.23622785814784084,
+      "grad_norm": 0.11745128035545349,
+      "learning_rate": 0.00019056429499206235,
+      "loss": 0.1903,
+      "step": 3274
+    },
+    {
+      "epoch": 0.23630001082290125,
+      "grad_norm": 0.0823647603392601,
+      "learning_rate": 0.00019056140857266562,
+      "loss": 0.1603,
+      "step": 3275
+    },
+    {
+      "epoch": 0.2363721634979617,
+      "grad_norm": 0.10410257428884506,
+      "learning_rate": 0.00019055852215326888,
+      "loss": 0.1805,
+      "step": 3276
+    },
+    {
+      "epoch": 0.2364443161730221,
+      "grad_norm": 0.10450460016727448,
+      "learning_rate": 0.00019055563573387214,
+      "loss": 0.1593,
+      "step": 3277
+    },
+    {
+      "epoch": 0.23651646884808256,
+      "grad_norm": 0.0935407504439354,
+      "learning_rate": 0.0001905527493144754,
+      "loss": 0.1638,
+      "step": 3278
+    },
+    {
+      "epoch": 0.23658862152314297,
+      "grad_norm": 0.0930311381816864,
+      "learning_rate": 0.00019054986289507867,
+      "loss": 0.1332,
+      "step": 3279
+    },
+    {
+      "epoch": 0.2366607741982034,
+      "grad_norm": 0.09465755522251129,
+      "learning_rate": 0.0001905469764756819,
+      "loss": 0.1857,
+      "step": 3280
+    },
+    {
+      "epoch": 0.23673292687326383,
+      "grad_norm": 0.09868931770324707,
+      "learning_rate": 0.0001905440900562852,
+      "loss": 0.155,
+      "step": 3281
+    },
+    {
+      "epoch": 0.23680507954832425,
+      "grad_norm": 0.11090152710676193,
+      "learning_rate": 0.00019054120363688846,
+      "loss": 0.1929,
+      "step": 3282
+    },
+    {
+      "epoch": 0.2368772322233847,
+      "grad_norm": 0.10099887102842331,
+      "learning_rate": 0.00019053831721749172,
+      "loss": 0.1279,
+      "step": 3283
+    },
+    {
+      "epoch": 0.2369493848984451,
+      "grad_norm": 0.116920106112957,
+      "learning_rate": 0.00019053543079809498,
+      "loss": 0.1571,
+      "step": 3284
+    },
+    {
+      "epoch": 0.23702153757350553,
+      "grad_norm": 0.11076688766479492,
+      "learning_rate": 0.00019053254437869822,
+      "loss": 0.1592,
+      "step": 3285
+    },
+    {
+      "epoch": 0.23709369024856597,
+      "grad_norm": 0.1265278160572052,
+      "learning_rate": 0.00019052965795930148,
+      "loss": 0.1657,
+      "step": 3286
+    },
+    {
+      "epoch": 0.23716584292362639,
+      "grad_norm": 0.1301831156015396,
+      "learning_rate": 0.00019052677153990474,
+      "loss": 0.1941,
+      "step": 3287
+    },
+    {
+      "epoch": 0.23723799559868683,
+      "grad_norm": 0.1072435975074768,
+      "learning_rate": 0.00019052388512050803,
+      "loss": 0.1303,
+      "step": 3288
+    },
+    {
+      "epoch": 0.23731014827374725,
+      "grad_norm": 0.13425888121128082,
+      "learning_rate": 0.0001905209987011113,
+      "loss": 0.2139,
+      "step": 3289
+    },
+    {
+      "epoch": 0.2373823009488077,
+      "grad_norm": 0.09891893714666367,
+      "learning_rate": 0.00019051811228171453,
+      "loss": 0.1291,
+      "step": 3290
+    },
+    {
+      "epoch": 0.2374544536238681,
+      "grad_norm": 0.11196228861808777,
+      "learning_rate": 0.0001905152258623178,
+      "loss": 0.1684,
+      "step": 3291
+    },
+    {
+      "epoch": 0.23752660629892852,
+      "grad_norm": 0.0930328220129013,
+      "learning_rate": 0.00019051233944292106,
+      "loss": 0.1454,
+      "step": 3292
+    },
+    {
+      "epoch": 0.23759875897398897,
+      "grad_norm": 0.10707025229930878,
+      "learning_rate": 0.00019050945302352432,
+      "loss": 0.2065,
+      "step": 3293
+    },
+    {
+      "epoch": 0.23767091164904938,
+      "grad_norm": 0.11718625575304031,
+      "learning_rate": 0.00019050656660412758,
+      "loss": 0.1971,
+      "step": 3294
+    },
+    {
+      "epoch": 0.23774306432410983,
+      "grad_norm": 0.09377551078796387,
+      "learning_rate": 0.00019050368018473085,
+      "loss": 0.1582,
+      "step": 3295
+    },
+    {
+      "epoch": 0.23781521699917024,
+      "grad_norm": 0.101633220911026,
+      "learning_rate": 0.0001905007937653341,
+      "loss": 0.1712,
+      "step": 3296
+    },
+    {
+      "epoch": 0.23788736967423066,
+      "grad_norm": 0.09976430237293243,
+      "learning_rate": 0.00019049790734593737,
+      "loss": 0.1484,
+      "step": 3297
+    },
+    {
+      "epoch": 0.2379595223492911,
+      "grad_norm": 0.11419283598661423,
+      "learning_rate": 0.00019049502092654064,
+      "loss": 0.1674,
+      "step": 3298
+    },
+    {
+      "epoch": 0.23803167502435152,
+      "grad_norm": 0.10907099395990372,
+      "learning_rate": 0.0001904921345071439,
+      "loss": 0.1734,
+      "step": 3299
+    },
+    {
+      "epoch": 0.23810382769941196,
+      "grad_norm": 0.09745454788208008,
+      "learning_rate": 0.00019048924808774716,
+      "loss": 0.139,
+      "step": 3300
+    },
+    {
+      "epoch": 0.23817598037447238,
+      "grad_norm": 0.09598489105701447,
+      "learning_rate": 0.0001904863616683504,
+      "loss": 0.1545,
+      "step": 3301
+    },
+    {
+      "epoch": 0.23824813304953282,
+      "grad_norm": 0.10601425915956497,
+      "learning_rate": 0.0001904834752489537,
+      "loss": 0.1595,
+      "step": 3302
+    },
+    {
+      "epoch": 0.23832028572459324,
+      "grad_norm": 0.10679695010185242,
+      "learning_rate": 0.00019048058882955695,
+      "loss": 0.1384,
+      "step": 3303
+    },
+    {
+      "epoch": 0.23839243839965366,
+      "grad_norm": 0.10327916592359543,
+      "learning_rate": 0.00019047770241016021,
+      "loss": 0.1657,
+      "step": 3304
+    },
+    {
+      "epoch": 0.2384645910747141,
+      "grad_norm": 0.09387264400720596,
+      "learning_rate": 0.00019047481599076348,
+      "loss": 0.1098,
+      "step": 3305
+    },
+    {
+      "epoch": 0.23853674374977452,
+      "grad_norm": 0.1033811941742897,
+      "learning_rate": 0.0001904719295713667,
+      "loss": 0.1819,
+      "step": 3306
+    },
+    {
+      "epoch": 0.23860889642483496,
+      "grad_norm": 0.09968467056751251,
+      "learning_rate": 0.00019046904315196998,
+      "loss": 0.1609,
+      "step": 3307
+    },
+    {
+      "epoch": 0.23868104909989538,
+      "grad_norm": 0.0858912244439125,
+      "learning_rate": 0.00019046615673257324,
+      "loss": 0.0986,
+      "step": 3308
+    },
+    {
+      "epoch": 0.23875320177495582,
+      "grad_norm": 0.09396117925643921,
+      "learning_rate": 0.00019046327031317653,
+      "loss": 0.1328,
+      "step": 3309
+    },
+    {
+      "epoch": 0.23882535445001624,
+      "grad_norm": 0.11658573895692825,
+      "learning_rate": 0.0001904603838937798,
+      "loss": 0.1457,
+      "step": 3310
+    },
+    {
+      "epoch": 0.23889750712507665,
+      "grad_norm": 0.08398326486349106,
+      "learning_rate": 0.00019045749747438303,
+      "loss": 0.1687,
+      "step": 3311
+    },
+    {
+      "epoch": 0.2389696598001371,
+      "grad_norm": 0.10841450840234756,
+      "learning_rate": 0.0001904546110549863,
+      "loss": 0.1672,
+      "step": 3312
+    },
+    {
+      "epoch": 0.2390418124751975,
+      "grad_norm": 0.1065574437379837,
+      "learning_rate": 0.00019045172463558955,
+      "loss": 0.1538,
+      "step": 3313
+    },
+    {
+      "epoch": 0.23911396515025796,
+      "grad_norm": 0.1297336369752884,
+      "learning_rate": 0.00019044883821619282,
+      "loss": 0.1549,
+      "step": 3314
+    },
+    {
+      "epoch": 0.23918611782531837,
+      "grad_norm": 0.11699077486991882,
+      "learning_rate": 0.00019044595179679608,
+      "loss": 0.1442,
+      "step": 3315
+    },
+    {
+      "epoch": 0.2392582705003788,
+      "grad_norm": 0.14201852679252625,
+      "learning_rate": 0.00019044306537739934,
+      "loss": 0.1751,
+      "step": 3316
+    },
+    {
+      "epoch": 0.23933042317543923,
+      "grad_norm": 0.13381065428256989,
+      "learning_rate": 0.0001904401789580026,
+      "loss": 0.1672,
+      "step": 3317
+    },
+    {
+      "epoch": 0.23940257585049965,
+      "grad_norm": 0.10775529593229294,
+      "learning_rate": 0.00019043729253860587,
+      "loss": 0.1859,
+      "step": 3318
+    },
+    {
+      "epoch": 0.2394747285255601,
+      "grad_norm": 0.12804460525512695,
+      "learning_rate": 0.00019043440611920913,
+      "loss": 0.1927,
+      "step": 3319
+    },
+    {
+      "epoch": 0.2395468812006205,
+      "grad_norm": 0.13988925516605377,
+      "learning_rate": 0.0001904315196998124,
+      "loss": 0.2239,
+      "step": 3320
+    },
+    {
+      "epoch": 0.23961903387568095,
+      "grad_norm": 0.12417380511760712,
+      "learning_rate": 0.00019042863328041566,
+      "loss": 0.2235,
+      "step": 3321
+    },
+    {
+      "epoch": 0.23969118655074137,
+      "grad_norm": 0.12068769335746765,
+      "learning_rate": 0.0001904257468610189,
+      "loss": 0.1781,
+      "step": 3322
+    },
+    {
+      "epoch": 0.23976333922580179,
+      "grad_norm": 0.1000375971198082,
+      "learning_rate": 0.00019042286044162218,
+      "loss": 0.1527,
+      "step": 3323
+    },
+    {
+      "epoch": 0.23983549190086223,
+      "grad_norm": 0.0985485389828682,
+      "learning_rate": 0.00019041997402222545,
+      "loss": 0.1428,
+      "step": 3324
+    },
+    {
+      "epoch": 0.23990764457592265,
+      "grad_norm": 0.1019006073474884,
+      "learning_rate": 0.0001904170876028287,
+      "loss": 0.1431,
+      "step": 3325
+    },
+    {
+      "epoch": 0.2399797972509831,
+      "grad_norm": 0.11141661554574966,
+      "learning_rate": 0.00019041420118343197,
+      "loss": 0.1982,
+      "step": 3326
+    },
+    {
+      "epoch": 0.2400519499260435,
+      "grad_norm": 0.1347435712814331,
+      "learning_rate": 0.0001904113147640352,
+      "loss": 0.1341,
+      "step": 3327
+    },
+    {
+      "epoch": 0.24012410260110392,
+      "grad_norm": 0.09408620744943619,
+      "learning_rate": 0.00019040842834463847,
+      "loss": 0.1067,
+      "step": 3328
+    },
+    {
+      "epoch": 0.24019625527616437,
+      "grad_norm": 0.10930059850215912,
+      "learning_rate": 0.00019040554192524173,
+      "loss": 0.1691,
+      "step": 3329
+    },
+    {
+      "epoch": 0.24026840795122478,
+      "grad_norm": 0.0910833477973938,
+      "learning_rate": 0.00019040265550584502,
+      "loss": 0.2006,
+      "step": 3330
+    },
+    {
+      "epoch": 0.24034056062628523,
+      "grad_norm": 0.12467349320650101,
+      "learning_rate": 0.00019039976908644829,
+      "loss": 0.1754,
+      "step": 3331
+    },
+    {
+      "epoch": 0.24041271330134564,
+      "grad_norm": 0.11248049885034561,
+      "learning_rate": 0.00019039688266705152,
+      "loss": 0.1977,
+      "step": 3332
+    },
+    {
+      "epoch": 0.2404848659764061,
+      "grad_norm": 0.08451106399297714,
+      "learning_rate": 0.00019039399624765478,
+      "loss": 0.1796,
+      "step": 3333
+    },
+    {
+      "epoch": 0.2405570186514665,
+      "grad_norm": 0.0945352241396904,
+      "learning_rate": 0.00019039110982825805,
+      "loss": 0.2041,
+      "step": 3334
+    },
+    {
+      "epoch": 0.24062917132652692,
+      "grad_norm": 0.10887964814901352,
+      "learning_rate": 0.0001903882234088613,
+      "loss": 0.1232,
+      "step": 3335
+    },
+    {
+      "epoch": 0.24070132400158736,
+      "grad_norm": 0.09830755740404129,
+      "learning_rate": 0.00019038533698946457,
+      "loss": 0.1501,
+      "step": 3336
+    },
+    {
+      "epoch": 0.24077347667664778,
+      "grad_norm": 0.11321946978569031,
+      "learning_rate": 0.00019038245057006786,
+      "loss": 0.1731,
+      "step": 3337
+    },
+    {
+      "epoch": 0.24084562935170822,
+      "grad_norm": 0.09077171236276627,
+      "learning_rate": 0.0001903795641506711,
+      "loss": 0.1461,
+      "step": 3338
+    },
+    {
+      "epoch": 0.24091778202676864,
+      "grad_norm": 0.0952875092625618,
+      "learning_rate": 0.00019037667773127436,
+      "loss": 0.1817,
+      "step": 3339
+    },
+    {
+      "epoch": 0.24098993470182908,
+      "grad_norm": 0.12886834144592285,
+      "learning_rate": 0.00019037379131187762,
+      "loss": 0.1984,
+      "step": 3340
+    },
+    {
+      "epoch": 0.2410620873768895,
+      "grad_norm": 0.11286803334951401,
+      "learning_rate": 0.0001903709048924809,
+      "loss": 0.1888,
+      "step": 3341
+    },
+    {
+      "epoch": 0.24113424005194992,
+      "grad_norm": 0.11777891218662262,
+      "learning_rate": 0.00019036801847308415,
+      "loss": 0.2311,
+      "step": 3342
+    },
+    {
+      "epoch": 0.24120639272701036,
+      "grad_norm": 0.10411883145570755,
+      "learning_rate": 0.0001903651320536874,
+      "loss": 0.1985,
+      "step": 3343
+    },
+    {
+      "epoch": 0.24127854540207078,
+      "grad_norm": 0.12743018567562103,
+      "learning_rate": 0.00019036224563429068,
+      "loss": 0.1463,
+      "step": 3344
+    },
+    {
+      "epoch": 0.24135069807713122,
+      "grad_norm": 0.12642966210842133,
+      "learning_rate": 0.00019035935921489394,
+      "loss": 0.1371,
+      "step": 3345
+    },
+    {
+      "epoch": 0.24142285075219164,
+      "grad_norm": 0.09358017146587372,
+      "learning_rate": 0.0001903564727954972,
+      "loss": 0.1312,
+      "step": 3346
+    },
+    {
+      "epoch": 0.24149500342725205,
+      "grad_norm": 0.10470343381166458,
+      "learning_rate": 0.00019035358637610047,
+      "loss": 0.16,
+      "step": 3347
+    },
+    {
+      "epoch": 0.2415671561023125,
+      "grad_norm": 0.10156462341547012,
+      "learning_rate": 0.00019035069995670373,
+      "loss": 0.228,
+      "step": 3348
+    },
+    {
+      "epoch": 0.2416393087773729,
+      "grad_norm": 0.1206686943769455,
+      "learning_rate": 0.00019034781353730696,
+      "loss": 0.1816,
+      "step": 3349
+    },
+    {
+      "epoch": 0.24171146145243336,
+      "grad_norm": 0.11973816901445389,
+      "learning_rate": 0.00019034492711791023,
+      "loss": 0.1634,
+      "step": 3350
+    },
+    {
+      "epoch": 0.24178361412749377,
+      "grad_norm": 0.10858598351478577,
+      "learning_rate": 0.00019034204069851352,
+      "loss": 0.1473,
+      "step": 3351
+    },
+    {
+      "epoch": 0.24185576680255422,
+      "grad_norm": 0.10533450543880463,
+      "learning_rate": 0.00019033915427911678,
+      "loss": 0.1859,
+      "step": 3352
+    },
+    {
+      "epoch": 0.24192791947761463,
+      "grad_norm": 0.12177108228206635,
+      "learning_rate": 0.00019033626785972004,
+      "loss": 0.1484,
+      "step": 3353
+    },
+    {
+      "epoch": 0.24200007215267505,
+      "grad_norm": 0.14595696330070496,
+      "learning_rate": 0.00019033338144032328,
+      "loss": 0.1656,
+      "step": 3354
+    },
+    {
+      "epoch": 0.2420722248277355,
+      "grad_norm": 0.12721535563468933,
+      "learning_rate": 0.00019033049502092654,
+      "loss": 0.1897,
+      "step": 3355
+    },
+    {
+      "epoch": 0.2421443775027959,
+      "grad_norm": 0.11117599159479141,
+      "learning_rate": 0.0001903276086015298,
+      "loss": 0.1826,
+      "step": 3356
+    },
+    {
+      "epoch": 0.24221653017785635,
+      "grad_norm": 0.13842089474201202,
+      "learning_rate": 0.00019032472218213307,
+      "loss": 0.1662,
+      "step": 3357
+    },
+    {
+      "epoch": 0.24228868285291677,
+      "grad_norm": 0.12665043771266937,
+      "learning_rate": 0.00019032183576273636,
+      "loss": 0.1141,
+      "step": 3358
+    },
+    {
+      "epoch": 0.2423608355279772,
+      "grad_norm": 0.13461072742938995,
+      "learning_rate": 0.0001903189493433396,
+      "loss": 0.1817,
+      "step": 3359
+    },
+    {
+      "epoch": 0.24243298820303763,
+      "grad_norm": 0.12616859376430511,
+      "learning_rate": 0.00019031606292394286,
+      "loss": 0.1765,
+      "step": 3360
+    },
+    {
+      "epoch": 0.24250514087809805,
+      "grad_norm": 0.12274335324764252,
+      "learning_rate": 0.00019031317650454612,
+      "loss": 0.212,
+      "step": 3361
+    },
+    {
+      "epoch": 0.2425772935531585,
+      "grad_norm": 0.09972148388624191,
+      "learning_rate": 0.00019031029008514938,
+      "loss": 0.1656,
+      "step": 3362
+    },
+    {
+      "epoch": 0.2426494462282189,
+      "grad_norm": 0.1198907345533371,
+      "learning_rate": 0.00019030740366575264,
+      "loss": 0.1293,
+      "step": 3363
+    },
+    {
+      "epoch": 0.24272159890327935,
+      "grad_norm": 0.10844511538743973,
+      "learning_rate": 0.0001903045172463559,
+      "loss": 0.1771,
+      "step": 3364
+    },
+    {
+      "epoch": 0.24279375157833977,
+      "grad_norm": 0.10141333192586899,
+      "learning_rate": 0.00019030163082695917,
+      "loss": 0.1704,
+      "step": 3365
+    },
+    {
+      "epoch": 0.24286590425340018,
+      "grad_norm": 0.09217889606952667,
+      "learning_rate": 0.00019029874440756243,
+      "loss": 0.1328,
+      "step": 3366
+    },
+    {
+      "epoch": 0.24293805692846063,
+      "grad_norm": 0.10080622881650925,
+      "learning_rate": 0.0001902958579881657,
+      "loss": 0.1322,
+      "step": 3367
+    },
+    {
+      "epoch": 0.24301020960352104,
+      "grad_norm": 0.10517612099647522,
+      "learning_rate": 0.00019029297156876896,
+      "loss": 0.1661,
+      "step": 3368
+    },
+    {
+      "epoch": 0.2430823622785815,
+      "grad_norm": 0.11337990313768387,
+      "learning_rate": 0.00019029008514937222,
+      "loss": 0.1677,
+      "step": 3369
+    },
+    {
+      "epoch": 0.2431545149536419,
+      "grad_norm": 0.11382631957530975,
+      "learning_rate": 0.00019028719872997546,
+      "loss": 0.1728,
+      "step": 3370
+    },
+    {
+      "epoch": 0.24322666762870235,
+      "grad_norm": 0.12541048228740692,
+      "learning_rate": 0.00019028431231057872,
+      "loss": 0.1693,
+      "step": 3371
+    },
+    {
+      "epoch": 0.24329882030376276,
+      "grad_norm": 0.12730221450328827,
+      "learning_rate": 0.000190281425891182,
+      "loss": 0.1598,
+      "step": 3372
+    },
+    {
+      "epoch": 0.24337097297882318,
+      "grad_norm": 0.10680249333381653,
+      "learning_rate": 0.00019027853947178527,
+      "loss": 0.1749,
+      "step": 3373
+    },
+    {
+      "epoch": 0.24344312565388362,
+      "grad_norm": 0.1114056333899498,
+      "learning_rate": 0.00019027565305238854,
+      "loss": 0.1434,
+      "step": 3374
+    },
+    {
+      "epoch": 0.24351527832894404,
+      "grad_norm": 0.1178140789270401,
+      "learning_rate": 0.00019027276663299177,
+      "loss": 0.1457,
+      "step": 3375
+    },
+    {
+      "epoch": 0.24358743100400448,
+      "grad_norm": 0.10966756194829941,
+      "learning_rate": 0.00019026988021359504,
+      "loss": 0.1997,
+      "step": 3376
+    },
+    {
+      "epoch": 0.2436595836790649,
+      "grad_norm": 0.10562735795974731,
+      "learning_rate": 0.0001902669937941983,
+      "loss": 0.2047,
+      "step": 3377
+    },
+    {
+      "epoch": 0.24373173635412532,
+      "grad_norm": 0.09306290000677109,
+      "learning_rate": 0.00019026410737480156,
+      "loss": 0.1157,
+      "step": 3378
+    },
+    {
+      "epoch": 0.24380388902918576,
+      "grad_norm": 0.10318143665790558,
+      "learning_rate": 0.00019026122095540485,
+      "loss": 0.0993,
+      "step": 3379
+    },
+    {
+      "epoch": 0.24387604170424618,
+      "grad_norm": 0.15777446329593658,
+      "learning_rate": 0.0001902583345360081,
+      "loss": 0.2304,
+      "step": 3380
+    },
+    {
+      "epoch": 0.24394819437930662,
+      "grad_norm": 0.10680831223726273,
+      "learning_rate": 0.00019025544811661135,
+      "loss": 0.1504,
+      "step": 3381
+    },
+    {
+      "epoch": 0.24402034705436704,
+      "grad_norm": 0.12741363048553467,
+      "learning_rate": 0.0001902525616972146,
+      "loss": 0.1984,
+      "step": 3382
+    },
+    {
+      "epoch": 0.24409249972942748,
+      "grad_norm": 0.10081455856561661,
+      "learning_rate": 0.00019024967527781788,
+      "loss": 0.1591,
+      "step": 3383
+    },
+    {
+      "epoch": 0.2441646524044879,
+      "grad_norm": 0.11780191957950592,
+      "learning_rate": 0.00019024678885842114,
+      "loss": 0.1666,
+      "step": 3384
+    },
+    {
+      "epoch": 0.2442368050795483,
+      "grad_norm": 0.08748682588338852,
+      "learning_rate": 0.0001902439024390244,
+      "loss": 0.1822,
+      "step": 3385
+    },
+    {
+      "epoch": 0.24430895775460876,
+      "grad_norm": 0.09807809442281723,
+      "learning_rate": 0.00019024101601962766,
+      "loss": 0.184,
+      "step": 3386
+    },
+    {
+      "epoch": 0.24438111042966917,
+      "grad_norm": 0.10223960131406784,
+      "learning_rate": 0.00019023812960023093,
+      "loss": 0.1693,
+      "step": 3387
+    },
+    {
+      "epoch": 0.24445326310472962,
+      "grad_norm": 0.08662978559732437,
+      "learning_rate": 0.0001902352431808342,
+      "loss": 0.1766,
+      "step": 3388
+    },
+    {
+      "epoch": 0.24452541577979003,
+      "grad_norm": 0.08942607790231705,
+      "learning_rate": 0.00019023235676143745,
+      "loss": 0.1274,
+      "step": 3389
+    },
+    {
+      "epoch": 0.24459756845485045,
+      "grad_norm": 0.1172434389591217,
+      "learning_rate": 0.00019022947034204072,
+      "loss": 0.1108,
+      "step": 3390
+    },
+    {
+      "epoch": 0.2446697211299109,
+      "grad_norm": 0.1019260361790657,
+      "learning_rate": 0.00019022658392264395,
+      "loss": 0.1431,
+      "step": 3391
+    },
+    {
+      "epoch": 0.2447418738049713,
+      "grad_norm": 0.12758377194404602,
+      "learning_rate": 0.00019022369750324722,
+      "loss": 0.2125,
+      "step": 3392
+    },
+    {
+      "epoch": 0.24481402648003175,
+      "grad_norm": 0.12854115664958954,
+      "learning_rate": 0.0001902208110838505,
+      "loss": 0.175,
+      "step": 3393
+    },
+    {
+      "epoch": 0.24488617915509217,
+      "grad_norm": 0.08560032397508621,
+      "learning_rate": 0.00019021792466445377,
+      "loss": 0.1714,
+      "step": 3394
+    },
+    {
+      "epoch": 0.24495833183015261,
+      "grad_norm": 0.10468869656324387,
+      "learning_rate": 0.00019021503824505703,
+      "loss": 0.1782,
+      "step": 3395
+    },
+    {
+      "epoch": 0.24503048450521303,
+      "grad_norm": 0.1147155836224556,
+      "learning_rate": 0.00019021215182566027,
+      "loss": 0.1654,
+      "step": 3396
+    },
+    {
+      "epoch": 0.24510263718027345,
+      "grad_norm": 0.11898034811019897,
+      "learning_rate": 0.00019020926540626353,
+      "loss": 0.1781,
+      "step": 3397
+    },
+    {
+      "epoch": 0.2451747898553339,
+      "grad_norm": 0.09727160632610321,
+      "learning_rate": 0.0001902063789868668,
+      "loss": 0.1412,
+      "step": 3398
+    },
+    {
+      "epoch": 0.2452469425303943,
+      "grad_norm": 0.14130552113056183,
+      "learning_rate": 0.00019020349256747006,
+      "loss": 0.2089,
+      "step": 3399
+    },
+    {
+      "epoch": 0.24531909520545475,
+      "grad_norm": 0.15899445116519928,
+      "learning_rate": 0.00019020060614807335,
+      "loss": 0.1687,
+      "step": 3400
+    },
+    {
+      "epoch": 0.24539124788051517,
+      "grad_norm": 0.15017631649971008,
+      "learning_rate": 0.00019019771972867658,
+      "loss": 0.165,
+      "step": 3401
+    },
+    {
+      "epoch": 0.2454634005555756,
+      "grad_norm": 0.10212711244821548,
+      "learning_rate": 0.00019019483330927984,
+      "loss": 0.1294,
+      "step": 3402
+    },
+    {
+      "epoch": 0.24553555323063603,
+      "grad_norm": 0.11916355043649673,
+      "learning_rate": 0.0001901919468898831,
+      "loss": 0.1724,
+      "step": 3403
+    },
+    {
+      "epoch": 0.24560770590569644,
+      "grad_norm": 0.09966582804918289,
+      "learning_rate": 0.00019018906047048637,
+      "loss": 0.1529,
+      "step": 3404
+    },
+    {
+      "epoch": 0.2456798585807569,
+      "grad_norm": 0.10150911659002304,
+      "learning_rate": 0.00019018617405108963,
+      "loss": 0.1836,
+      "step": 3405
+    },
+    {
+      "epoch": 0.2457520112558173,
+      "grad_norm": 0.10047971457242966,
+      "learning_rate": 0.0001901832876316929,
+      "loss": 0.1581,
+      "step": 3406
+    },
+    {
+      "epoch": 0.24582416393087775,
+      "grad_norm": 0.11941426247358322,
+      "learning_rate": 0.00019018040121229616,
+      "loss": 0.1423,
+      "step": 3407
+    },
+    {
+      "epoch": 0.24589631660593816,
+      "grad_norm": 0.09558165073394775,
+      "learning_rate": 0.00019017751479289942,
+      "loss": 0.1712,
+      "step": 3408
+    },
+    {
+      "epoch": 0.24596846928099858,
+      "grad_norm": 0.10940393060445786,
+      "learning_rate": 0.00019017462837350269,
+      "loss": 0.1649,
+      "step": 3409
+    },
+    {
+      "epoch": 0.24604062195605902,
+      "grad_norm": 0.10541975498199463,
+      "learning_rate": 0.00019017174195410595,
+      "loss": 0.1764,
+      "step": 3410
+    },
+    {
+      "epoch": 0.24611277463111944,
+      "grad_norm": 0.11231281608343124,
+      "learning_rate": 0.0001901688555347092,
+      "loss": 0.163,
+      "step": 3411
+    },
+    {
+      "epoch": 0.24618492730617988,
+      "grad_norm": 0.0928051769733429,
+      "learning_rate": 0.00019016596911531245,
+      "loss": 0.1887,
+      "step": 3412
+    },
+    {
+      "epoch": 0.2462570799812403,
+      "grad_norm": 0.11161345988512039,
+      "learning_rate": 0.0001901630826959157,
+      "loss": 0.1469,
+      "step": 3413
+    },
+    {
+      "epoch": 0.24632923265630075,
+      "grad_norm": 0.11575374752283096,
+      "learning_rate": 0.000190160196276519,
+      "loss": 0.1466,
+      "step": 3414
+    },
+    {
+      "epoch": 0.24640138533136116,
+      "grad_norm": 0.10162404179573059,
+      "learning_rate": 0.00019015730985712226,
+      "loss": 0.1193,
+      "step": 3415
+    },
+    {
+      "epoch": 0.24647353800642158,
+      "grad_norm": 0.10334353148937225,
+      "learning_rate": 0.00019015442343772553,
+      "loss": 0.1586,
+      "step": 3416
+    },
+    {
+      "epoch": 0.24654569068148202,
+      "grad_norm": 0.08884553611278534,
+      "learning_rate": 0.00019015153701832876,
+      "loss": 0.1715,
+      "step": 3417
+    },
+    {
+      "epoch": 0.24661784335654244,
+      "grad_norm": 0.1290484517812729,
+      "learning_rate": 0.00019014865059893202,
+      "loss": 0.1596,
+      "step": 3418
+    },
+    {
+      "epoch": 0.24668999603160288,
+      "grad_norm": 0.12575063109397888,
+      "learning_rate": 0.0001901457641795353,
+      "loss": 0.1052,
+      "step": 3419
+    },
+    {
+      "epoch": 0.2467621487066633,
+      "grad_norm": 0.10406794399023056,
+      "learning_rate": 0.00019014287776013855,
+      "loss": 0.1728,
+      "step": 3420
+    },
+    {
+      "epoch": 0.24683430138172371,
+      "grad_norm": 0.109982430934906,
+      "learning_rate": 0.00019013999134074184,
+      "loss": 0.1586,
+      "step": 3421
+    },
+    {
+      "epoch": 0.24690645405678416,
+      "grad_norm": 0.11729655414819717,
+      "learning_rate": 0.00019013710492134508,
+      "loss": 0.1731,
+      "step": 3422
+    },
+    {
+      "epoch": 0.24697860673184457,
+      "grad_norm": 0.08943957835435867,
+      "learning_rate": 0.00019013421850194834,
+      "loss": 0.1607,
+      "step": 3423
+    },
+    {
+      "epoch": 0.24705075940690502,
+      "grad_norm": 0.13747897744178772,
+      "learning_rate": 0.0001901313320825516,
+      "loss": 0.1816,
+      "step": 3424
+    },
+    {
+      "epoch": 0.24712291208196543,
+      "grad_norm": 0.1077960953116417,
+      "learning_rate": 0.00019012844566315486,
+      "loss": 0.1282,
+      "step": 3425
+    },
+    {
+      "epoch": 0.24719506475702588,
+      "grad_norm": 0.10450281202793121,
+      "learning_rate": 0.00019012555924375813,
+      "loss": 0.19,
+      "step": 3426
+    },
+    {
+      "epoch": 0.2472672174320863,
+      "grad_norm": 0.10756547749042511,
+      "learning_rate": 0.0001901226728243614,
+      "loss": 0.1612,
+      "step": 3427
+    },
+    {
+      "epoch": 0.2473393701071467,
+      "grad_norm": 0.08175303786993027,
+      "learning_rate": 0.00019011978640496465,
+      "loss": 0.1479,
+      "step": 3428
+    },
+    {
+      "epoch": 0.24741152278220715,
+      "grad_norm": 0.10902395844459534,
+      "learning_rate": 0.00019011689998556792,
+      "loss": 0.1431,
+      "step": 3429
+    },
+    {
+      "epoch": 0.24748367545726757,
+      "grad_norm": 0.10441279411315918,
+      "learning_rate": 0.00019011401356617118,
+      "loss": 0.1727,
+      "step": 3430
+    },
+    {
+      "epoch": 0.24755582813232802,
+      "grad_norm": 0.08806274086236954,
+      "learning_rate": 0.00019011112714677444,
+      "loss": 0.1463,
+      "step": 3431
+    },
+    {
+      "epoch": 0.24762798080738843,
+      "grad_norm": 0.141118586063385,
+      "learning_rate": 0.0001901082407273777,
+      "loss": 0.1821,
+      "step": 3432
+    },
+    {
+      "epoch": 0.24770013348244888,
+      "grad_norm": 0.10650867223739624,
+      "learning_rate": 0.00019010535430798094,
+      "loss": 0.1422,
+      "step": 3433
+    },
+    {
+      "epoch": 0.2477722861575093,
+      "grad_norm": 0.11610765755176544,
+      "learning_rate": 0.0001901024678885842,
+      "loss": 0.1737,
+      "step": 3434
+    },
+    {
+      "epoch": 0.2478444388325697,
+      "grad_norm": 0.10462258756160736,
+      "learning_rate": 0.00019009958146918747,
+      "loss": 0.1156,
+      "step": 3435
+    },
+    {
+      "epoch": 0.24791659150763015,
+      "grad_norm": 0.11078470945358276,
+      "learning_rate": 0.00019009669504979076,
+      "loss": 0.1429,
+      "step": 3436
+    },
+    {
+      "epoch": 0.24798874418269057,
+      "grad_norm": 0.13857358694076538,
+      "learning_rate": 0.00019009380863039402,
+      "loss": 0.2146,
+      "step": 3437
+    },
+    {
+      "epoch": 0.248060896857751,
+      "grad_norm": 0.18643376231193542,
+      "learning_rate": 0.00019009092221099726,
+      "loss": 0.1982,
+      "step": 3438
+    },
+    {
+      "epoch": 0.24813304953281143,
+      "grad_norm": 0.0913734957575798,
+      "learning_rate": 0.00019008803579160052,
+      "loss": 0.1216,
+      "step": 3439
+    },
+    {
+      "epoch": 0.24820520220787184,
+      "grad_norm": 0.09168455004692078,
+      "learning_rate": 0.00019008514937220378,
+      "loss": 0.1903,
+      "step": 3440
+    },
+    {
+      "epoch": 0.2482773548829323,
+      "grad_norm": 0.10040730237960815,
+      "learning_rate": 0.00019008226295280704,
+      "loss": 0.148,
+      "step": 3441
+    },
+    {
+      "epoch": 0.2483495075579927,
+      "grad_norm": 0.10486883670091629,
+      "learning_rate": 0.0001900793765334103,
+      "loss": 0.1377,
+      "step": 3442
+    },
+    {
+      "epoch": 0.24842166023305315,
+      "grad_norm": 0.1028137058019638,
+      "learning_rate": 0.00019007649011401357,
+      "loss": 0.148,
+      "step": 3443
+    },
+    {
+      "epoch": 0.24849381290811356,
+      "grad_norm": 0.11558540910482407,
+      "learning_rate": 0.00019007360369461683,
+      "loss": 0.1617,
+      "step": 3444
+    },
+    {
+      "epoch": 0.248565965583174,
+      "grad_norm": 0.09808561950922012,
+      "learning_rate": 0.0001900707172752201,
+      "loss": 0.1421,
+      "step": 3445
+    },
+    {
+      "epoch": 0.24863811825823443,
+      "grad_norm": 0.13802699744701385,
+      "learning_rate": 0.00019006783085582336,
+      "loss": 0.1876,
+      "step": 3446
+    },
+    {
+      "epoch": 0.24871027093329484,
+      "grad_norm": 0.10349813848733902,
+      "learning_rate": 0.00019006494443642662,
+      "loss": 0.1674,
+      "step": 3447
+    },
+    {
+      "epoch": 0.24878242360835529,
+      "grad_norm": 0.10739285498857498,
+      "learning_rate": 0.00019006205801702988,
+      "loss": 0.1963,
+      "step": 3448
+    },
+    {
+      "epoch": 0.2488545762834157,
+      "grad_norm": 0.10991799831390381,
+      "learning_rate": 0.00019005917159763315,
+      "loss": 0.181,
+      "step": 3449
+    },
+    {
+      "epoch": 0.24892672895847615,
+      "grad_norm": 0.10631420463323593,
+      "learning_rate": 0.0001900562851782364,
+      "loss": 0.182,
+      "step": 3450
+    },
+    {
+      "epoch": 0.24899888163353656,
+      "grad_norm": 0.1767328828573227,
+      "learning_rate": 0.00019005339875883967,
+      "loss": 0.1831,
+      "step": 3451
+    },
+    {
+      "epoch": 0.24907103430859698,
+      "grad_norm": 0.09794510155916214,
+      "learning_rate": 0.00019005051233944294,
+      "loss": 0.1805,
+      "step": 3452
+    },
+    {
+      "epoch": 0.24914318698365742,
+      "grad_norm": 0.12249629199504852,
+      "learning_rate": 0.0001900476259200462,
+      "loss": 0.1658,
+      "step": 3453
+    },
+    {
+      "epoch": 0.24921533965871784,
+      "grad_norm": 0.13031835854053497,
+      "learning_rate": 0.00019004473950064946,
+      "loss": 0.1184,
+      "step": 3454
+    },
+    {
+      "epoch": 0.24928749233377828,
+      "grad_norm": 0.12428729981184006,
+      "learning_rate": 0.0001900418530812527,
+      "loss": 0.1604,
+      "step": 3455
+    },
+    {
+      "epoch": 0.2493596450088387,
+      "grad_norm": 0.09246498346328735,
+      "learning_rate": 0.00019003896666185596,
+      "loss": 0.1555,
+      "step": 3456
+    },
+    {
+      "epoch": 0.24943179768389914,
+      "grad_norm": 0.09440509974956512,
+      "learning_rate": 0.00019003608024245925,
+      "loss": 0.2146,
+      "step": 3457
+    },
+    {
+      "epoch": 0.24950395035895956,
+      "grad_norm": 0.08850990235805511,
+      "learning_rate": 0.00019003319382306251,
+      "loss": 0.2196,
+      "step": 3458
+    },
+    {
+      "epoch": 0.24957610303401997,
+      "grad_norm": 0.09511906653642654,
+      "learning_rate": 0.00019003030740366578,
+      "loss": 0.1117,
+      "step": 3459
+    },
+    {
+      "epoch": 0.24964825570908042,
+      "grad_norm": 0.11032702773809433,
+      "learning_rate": 0.000190027420984269,
+      "loss": 0.1305,
+      "step": 3460
+    },
+    {
+      "epoch": 0.24972040838414084,
+      "grad_norm": 0.10022285580635071,
+      "learning_rate": 0.00019002453456487228,
+      "loss": 0.1479,
+      "step": 3461
+    },
+    {
+      "epoch": 0.24979256105920128,
+      "grad_norm": 0.09211345762014389,
+      "learning_rate": 0.00019002164814547554,
+      "loss": 0.1409,
+      "step": 3462
+    },
+    {
+      "epoch": 0.2498647137342617,
+      "grad_norm": 0.10632354021072388,
+      "learning_rate": 0.0001900187617260788,
+      "loss": 0.1489,
+      "step": 3463
+    },
+    {
+      "epoch": 0.24993686640932214,
+      "grad_norm": 0.10797733068466187,
+      "learning_rate": 0.0001900158753066821,
+      "loss": 0.1376,
+      "step": 3464
+    },
+    {
+      "epoch": 0.25000901908438256,
+      "grad_norm": 0.12420342862606049,
+      "learning_rate": 0.00019001298888728533,
+      "loss": 0.2198,
+      "step": 3465
+    },
+    {
+      "epoch": 0.250081171759443,
+      "grad_norm": 0.1036577820777893,
+      "learning_rate": 0.0001900101024678886,
+      "loss": 0.1677,
+      "step": 3466
+    },
+    {
+      "epoch": 0.2501533244345034,
+      "grad_norm": 0.11751674860715866,
+      "learning_rate": 0.00019000721604849185,
+      "loss": 0.1746,
+      "step": 3467
+    },
+    {
+      "epoch": 0.25022547710956383,
+      "grad_norm": 0.11562912166118622,
+      "learning_rate": 0.00019000432962909512,
+      "loss": 0.166,
+      "step": 3468
+    },
+    {
+      "epoch": 0.2502976297846243,
+      "grad_norm": 0.11643790453672409,
+      "learning_rate": 0.00019000144320969838,
+      "loss": 0.167,
+      "step": 3469
+    },
+    {
+      "epoch": 0.2503697824596847,
+      "grad_norm": 0.12111453711986542,
+      "learning_rate": 0.00018999855679030164,
+      "loss": 0.1769,
+      "step": 3470
+    },
+    {
+      "epoch": 0.2504419351347451,
+      "grad_norm": 0.11380564421415329,
+      "learning_rate": 0.0001899956703709049,
+      "loss": 0.1852,
+      "step": 3471
+    },
+    {
+      "epoch": 0.25051408780980555,
+      "grad_norm": 0.1100783571600914,
+      "learning_rate": 0.00018999278395150817,
+      "loss": 0.1419,
+      "step": 3472
+    },
+    {
+      "epoch": 0.250586240484866,
+      "grad_norm": 0.12145480513572693,
+      "learning_rate": 0.00018998989753211143,
+      "loss": 0.1455,
+      "step": 3473
+    },
+    {
+      "epoch": 0.2506583931599264,
+      "grad_norm": 0.13141202926635742,
+      "learning_rate": 0.0001899870111127147,
+      "loss": 0.157,
+      "step": 3474
+    },
+    {
+      "epoch": 0.25073054583498683,
+      "grad_norm": 0.09900374710559845,
+      "learning_rate": 0.00018998412469331796,
+      "loss": 0.1379,
+      "step": 3475
+    },
+    {
+      "epoch": 0.2508026985100473,
+      "grad_norm": 0.09456542879343033,
+      "learning_rate": 0.0001899812382739212,
+      "loss": 0.1831,
+      "step": 3476
+    },
+    {
+      "epoch": 0.25087485118510766,
+      "grad_norm": 0.11174774914979935,
+      "learning_rate": 0.00018997835185452446,
+      "loss": 0.1477,
+      "step": 3477
+    },
+    {
+      "epoch": 0.2509470038601681,
+      "grad_norm": 0.1130797490477562,
+      "learning_rate": 0.00018997546543512775,
+      "loss": 0.1874,
+      "step": 3478
+    },
+    {
+      "epoch": 0.25101915653522855,
+      "grad_norm": 0.09996349364519119,
+      "learning_rate": 0.000189972579015731,
+      "loss": 0.1372,
+      "step": 3479
+    },
+    {
+      "epoch": 0.251091309210289,
+      "grad_norm": 0.09237122535705566,
+      "learning_rate": 0.00018996969259633427,
+      "loss": 0.131,
+      "step": 3480
+    },
+    {
+      "epoch": 0.2511634618853494,
+      "grad_norm": 0.11162920296192169,
+      "learning_rate": 0.0001899668061769375,
+      "loss": 0.1687,
+      "step": 3481
+    },
+    {
+      "epoch": 0.2512356145604098,
+      "grad_norm": 0.10475852340459824,
+      "learning_rate": 0.00018996391975754077,
+      "loss": 0.1336,
+      "step": 3482
+    },
+    {
+      "epoch": 0.25130776723547027,
+      "grad_norm": 0.09986816346645355,
+      "learning_rate": 0.00018996103333814403,
+      "loss": 0.1376,
+      "step": 3483
+    },
+    {
+      "epoch": 0.25137991991053066,
+      "grad_norm": 0.1363130658864975,
+      "learning_rate": 0.0001899581469187473,
+      "loss": 0.1732,
+      "step": 3484
+    },
+    {
+      "epoch": 0.2514520725855911,
+      "grad_norm": 0.12526144087314606,
+      "learning_rate": 0.00018995526049935059,
+      "loss": 0.1946,
+      "step": 3485
+    },
+    {
+      "epoch": 0.25152422526065155,
+      "grad_norm": 0.09262970089912415,
+      "learning_rate": 0.00018995237407995382,
+      "loss": 0.1687,
+      "step": 3486
+    },
+    {
+      "epoch": 0.251596377935712,
+      "grad_norm": 0.10301724821329117,
+      "learning_rate": 0.00018994948766055708,
+      "loss": 0.1549,
+      "step": 3487
+    },
+    {
+      "epoch": 0.2516685306107724,
+      "grad_norm": 0.11157647520303726,
+      "learning_rate": 0.00018994660124116035,
+      "loss": 0.1551,
+      "step": 3488
+    },
+    {
+      "epoch": 0.2517406832858328,
+      "grad_norm": 0.11049095541238785,
+      "learning_rate": 0.0001899437148217636,
+      "loss": 0.1465,
+      "step": 3489
+    },
+    {
+      "epoch": 0.25181283596089327,
+      "grad_norm": 0.1106208935379982,
+      "learning_rate": 0.00018994082840236687,
+      "loss": 0.1204,
+      "step": 3490
+    },
+    {
+      "epoch": 0.25188498863595365,
+      "grad_norm": 0.1438845843076706,
+      "learning_rate": 0.00018993794198297014,
+      "loss": 0.1313,
+      "step": 3491
+    },
+    {
+      "epoch": 0.2519571413110141,
+      "grad_norm": 0.11887115240097046,
+      "learning_rate": 0.0001899350555635734,
+      "loss": 0.1527,
+      "step": 3492
+    },
+    {
+      "epoch": 0.25202929398607454,
+      "grad_norm": 0.1293410360813141,
+      "learning_rate": 0.00018993216914417666,
+      "loss": 0.1478,
+      "step": 3493
+    },
+    {
+      "epoch": 0.252101446661135,
+      "grad_norm": 0.10676635801792145,
+      "learning_rate": 0.00018992928272477992,
+      "loss": 0.1604,
+      "step": 3494
+    },
+    {
+      "epoch": 0.2521735993361954,
+      "grad_norm": 0.13810612261295319,
+      "learning_rate": 0.0001899263963053832,
+      "loss": 0.1081,
+      "step": 3495
+    },
+    {
+      "epoch": 0.2522457520112558,
+      "grad_norm": 0.12804998457431793,
+      "learning_rate": 0.00018992350988598645,
+      "loss": 0.151,
+      "step": 3496
+    },
+    {
+      "epoch": 0.25231790468631626,
+      "grad_norm": 0.09013412892818451,
+      "learning_rate": 0.0001899206234665897,
+      "loss": 0.1803,
+      "step": 3497
+    },
+    {
+      "epoch": 0.25239005736137665,
+      "grad_norm": 0.11779167503118515,
+      "learning_rate": 0.00018991773704719295,
+      "loss": 0.2159,
+      "step": 3498
+    },
+    {
+      "epoch": 0.2524622100364371,
+      "grad_norm": 0.1196313127875328,
+      "learning_rate": 0.00018991485062779624,
+      "loss": 0.2082,
+      "step": 3499
+    },
+    {
+      "epoch": 0.25253436271149754,
+      "grad_norm": 0.10573788732290268,
+      "learning_rate": 0.0001899119642083995,
+      "loss": 0.1439,
+      "step": 3500
+    },
+    {
+      "epoch": 0.252606515386558,
+      "grad_norm": 0.16237285733222961,
+      "learning_rate": 0.00018990907778900277,
+      "loss": 0.1524,
+      "step": 3501
+    },
+    {
+      "epoch": 0.25267866806161837,
+      "grad_norm": 0.101177878677845,
+      "learning_rate": 0.000189906191369606,
+      "loss": 0.1778,
+      "step": 3502
+    },
+    {
+      "epoch": 0.2527508207366788,
+      "grad_norm": 0.14791998267173767,
+      "learning_rate": 0.00018990330495020926,
+      "loss": 0.207,
+      "step": 3503
+    },
+    {
+      "epoch": 0.25282297341173926,
+      "grad_norm": 0.12691575288772583,
+      "learning_rate": 0.00018990041853081253,
+      "loss": 0.15,
+      "step": 3504
+    },
+    {
+      "epoch": 0.25289512608679965,
+      "grad_norm": 0.10003713518381119,
+      "learning_rate": 0.0001898975321114158,
+      "loss": 0.13,
+      "step": 3505
+    },
+    {
+      "epoch": 0.2529672787618601,
+      "grad_norm": 0.08510126918554306,
+      "learning_rate": 0.00018989464569201908,
+      "loss": 0.1463,
+      "step": 3506
+    },
+    {
+      "epoch": 0.25303943143692054,
+      "grad_norm": 0.087553009390831,
+      "learning_rate": 0.00018989175927262232,
+      "loss": 0.1488,
+      "step": 3507
+    },
+    {
+      "epoch": 0.2531115841119809,
+      "grad_norm": 0.11553581058979034,
+      "learning_rate": 0.00018988887285322558,
+      "loss": 0.1294,
+      "step": 3508
+    },
+    {
+      "epoch": 0.25318373678704137,
+      "grad_norm": 0.1350739449262619,
+      "learning_rate": 0.00018988598643382884,
+      "loss": 0.1414,
+      "step": 3509
+    },
+    {
+      "epoch": 0.2532558894621018,
+      "grad_norm": 0.1024918481707573,
+      "learning_rate": 0.0001898831000144321,
+      "loss": 0.1101,
+      "step": 3510
+    },
+    {
+      "epoch": 0.25332804213716226,
+      "grad_norm": 0.11271020770072937,
+      "learning_rate": 0.00018988021359503537,
+      "loss": 0.1579,
+      "step": 3511
+    },
+    {
+      "epoch": 0.25340019481222265,
+      "grad_norm": 0.1149035319685936,
+      "learning_rate": 0.00018987732717563863,
+      "loss": 0.1487,
+      "step": 3512
+    },
+    {
+      "epoch": 0.2534723474872831,
+      "grad_norm": 0.13088317215442657,
+      "learning_rate": 0.0001898744407562419,
+      "loss": 0.1902,
+      "step": 3513
+    },
+    {
+      "epoch": 0.25354450016234353,
+      "grad_norm": 0.09780467301607132,
+      "learning_rate": 0.00018987155433684516,
+      "loss": 0.1394,
+      "step": 3514
+    },
+    {
+      "epoch": 0.2536166528374039,
+      "grad_norm": 0.11135496944189072,
+      "learning_rate": 0.00018986866791744842,
+      "loss": 0.1806,
+      "step": 3515
+    },
+    {
+      "epoch": 0.25368880551246437,
+      "grad_norm": 0.10907930880784988,
+      "learning_rate": 0.00018986578149805168,
+      "loss": 0.1606,
+      "step": 3516
+    },
+    {
+      "epoch": 0.2537609581875248,
+      "grad_norm": 0.09134551137685776,
+      "learning_rate": 0.00018986289507865495,
+      "loss": 0.1284,
+      "step": 3517
+    },
+    {
+      "epoch": 0.25383311086258525,
+      "grad_norm": 0.15910980105400085,
+      "learning_rate": 0.00018986000865925818,
+      "loss": 0.2182,
+      "step": 3518
+    },
+    {
+      "epoch": 0.25390526353764564,
+      "grad_norm": 0.11735296249389648,
+      "learning_rate": 0.00018985712223986144,
+      "loss": 0.2251,
+      "step": 3519
+    },
+    {
+      "epoch": 0.2539774162127061,
+      "grad_norm": 0.11389101296663284,
+      "learning_rate": 0.00018985423582046473,
+      "loss": 0.1897,
+      "step": 3520
+    },
+    {
+      "epoch": 0.25404956888776653,
+      "grad_norm": 0.11940047144889832,
+      "learning_rate": 0.000189851349401068,
+      "loss": 0.1513,
+      "step": 3521
+    },
+    {
+      "epoch": 0.2541217215628269,
+      "grad_norm": 0.11212513595819473,
+      "learning_rate": 0.00018984846298167126,
+      "loss": 0.1766,
+      "step": 3522
+    },
+    {
+      "epoch": 0.25419387423788736,
+      "grad_norm": 0.12622421979904175,
+      "learning_rate": 0.0001898455765622745,
+      "loss": 0.1799,
+      "step": 3523
+    },
+    {
+      "epoch": 0.2542660269129478,
+      "grad_norm": 0.11228041350841522,
+      "learning_rate": 0.00018984269014287776,
+      "loss": 0.1154,
+      "step": 3524
+    },
+    {
+      "epoch": 0.25433817958800825,
+      "grad_norm": 0.1204824447631836,
+      "learning_rate": 0.00018983980372348102,
+      "loss": 0.1532,
+      "step": 3525
+    },
+    {
+      "epoch": 0.25441033226306864,
+      "grad_norm": 0.13384214043617249,
+      "learning_rate": 0.00018983691730408428,
+      "loss": 0.2145,
+      "step": 3526
+    },
+    {
+      "epoch": 0.2544824849381291,
+      "grad_norm": 0.09223582595586777,
+      "learning_rate": 0.00018983403088468757,
+      "loss": 0.1159,
+      "step": 3527
+    },
+    {
+      "epoch": 0.2545546376131895,
+      "grad_norm": 0.11915893107652664,
+      "learning_rate": 0.0001898311444652908,
+      "loss": 0.1494,
+      "step": 3528
+    },
+    {
+      "epoch": 0.2546267902882499,
+      "grad_norm": 0.09959257394075394,
+      "learning_rate": 0.00018982825804589407,
+      "loss": 0.1784,
+      "step": 3529
+    },
+    {
+      "epoch": 0.25469894296331036,
+      "grad_norm": 0.1257259100675583,
+      "learning_rate": 0.00018982537162649734,
+      "loss": 0.1652,
+      "step": 3530
+    },
+    {
+      "epoch": 0.2547710956383708,
+      "grad_norm": 0.09877978265285492,
+      "learning_rate": 0.0001898224852071006,
+      "loss": 0.1677,
+      "step": 3531
+    },
+    {
+      "epoch": 0.25484324831343125,
+      "grad_norm": 0.10418039560317993,
+      "learning_rate": 0.00018981959878770386,
+      "loss": 0.2078,
+      "step": 3532
+    },
+    {
+      "epoch": 0.25491540098849164,
+      "grad_norm": 0.14371944963932037,
+      "learning_rate": 0.00018981671236830712,
+      "loss": 0.2339,
+      "step": 3533
+    },
+    {
+      "epoch": 0.2549875536635521,
+      "grad_norm": 0.12156295776367188,
+      "learning_rate": 0.0001898138259489104,
+      "loss": 0.1863,
+      "step": 3534
+    },
+    {
+      "epoch": 0.2550597063386125,
+      "grad_norm": 0.13483689725399017,
+      "learning_rate": 0.00018981093952951365,
+      "loss": 0.1332,
+      "step": 3535
+    },
+    {
+      "epoch": 0.2551318590136729,
+      "grad_norm": 0.07853590697050095,
+      "learning_rate": 0.0001898080531101169,
+      "loss": 0.141,
+      "step": 3536
+    },
+    {
+      "epoch": 0.25520401168873336,
+      "grad_norm": 0.10824296623468399,
+      "learning_rate": 0.00018980516669072018,
+      "loss": 0.164,
+      "step": 3537
+    },
+    {
+      "epoch": 0.2552761643637938,
+      "grad_norm": 0.1052640974521637,
+      "learning_rate": 0.00018980228027132344,
+      "loss": 0.124,
+      "step": 3538
+    },
+    {
+      "epoch": 0.2553483170388542,
+      "grad_norm": 0.11124280840158463,
+      "learning_rate": 0.00018979939385192668,
+      "loss": 0.1573,
+      "step": 3539
+    },
+    {
+      "epoch": 0.25542046971391463,
+      "grad_norm": 0.1169007271528244,
+      "learning_rate": 0.00018979650743252994,
+      "loss": 0.1753,
+      "step": 3540
+    },
+    {
+      "epoch": 0.2554926223889751,
+      "grad_norm": 0.10264059901237488,
+      "learning_rate": 0.00018979362101313323,
+      "loss": 0.157,
+      "step": 3541
+    },
+    {
+      "epoch": 0.2555647750640355,
+      "grad_norm": 0.12313199788331985,
+      "learning_rate": 0.0001897907345937365,
+      "loss": 0.1744,
+      "step": 3542
+    },
+    {
+      "epoch": 0.2556369277390959,
+      "grad_norm": 0.10002419352531433,
+      "learning_rate": 0.00018978784817433975,
+      "loss": 0.1586,
+      "step": 3543
+    },
+    {
+      "epoch": 0.25570908041415635,
+      "grad_norm": 0.11884690076112747,
+      "learning_rate": 0.000189784961754943,
+      "loss": 0.1761,
+      "step": 3544
+    },
+    {
+      "epoch": 0.2557812330892168,
+      "grad_norm": 0.11634685844182968,
+      "learning_rate": 0.00018978207533554625,
+      "loss": 0.1918,
+      "step": 3545
+    },
+    {
+      "epoch": 0.2558533857642772,
+      "grad_norm": 0.13089223206043243,
+      "learning_rate": 0.00018977918891614952,
+      "loss": 0.1446,
+      "step": 3546
+    },
+    {
+      "epoch": 0.25592553843933763,
+      "grad_norm": 0.1116749569773674,
+      "learning_rate": 0.00018977630249675278,
+      "loss": 0.1381,
+      "step": 3547
+    },
+    {
+      "epoch": 0.2559976911143981,
+      "grad_norm": 0.12038832157850266,
+      "learning_rate": 0.00018977341607735607,
+      "loss": 0.1312,
+      "step": 3548
+    },
+    {
+      "epoch": 0.2560698437894585,
+      "grad_norm": 0.11651638150215149,
+      "learning_rate": 0.0001897705296579593,
+      "loss": 0.1588,
+      "step": 3549
+    },
+    {
+      "epoch": 0.2561419964645189,
+      "grad_norm": 0.1340457946062088,
+      "learning_rate": 0.00018976764323856257,
+      "loss": 0.1697,
+      "step": 3550
+    },
+    {
+      "epoch": 0.25621414913957935,
+      "grad_norm": 0.11867430806159973,
+      "learning_rate": 0.00018976475681916583,
+      "loss": 0.1801,
+      "step": 3551
+    },
+    {
+      "epoch": 0.2562863018146398,
+      "grad_norm": 0.1400105208158493,
+      "learning_rate": 0.0001897618703997691,
+      "loss": 0.2189,
+      "step": 3552
+    },
+    {
+      "epoch": 0.2563584544897002,
+      "grad_norm": 0.0946824699640274,
+      "learning_rate": 0.00018975898398037236,
+      "loss": 0.1552,
+      "step": 3553
+    },
+    {
+      "epoch": 0.2564306071647606,
+      "grad_norm": 0.08589153736829758,
+      "learning_rate": 0.00018975609756097562,
+      "loss": 0.1707,
+      "step": 3554
+    },
+    {
+      "epoch": 0.25650275983982107,
+      "grad_norm": 0.12594255805015564,
+      "learning_rate": 0.00018975321114157888,
+      "loss": 0.1107,
+      "step": 3555
+    },
+    {
+      "epoch": 0.2565749125148815,
+      "grad_norm": 0.11282055079936981,
+      "learning_rate": 0.00018975032472218214,
+      "loss": 0.1644,
+      "step": 3556
+    },
+    {
+      "epoch": 0.2566470651899419,
+      "grad_norm": 0.12821117043495178,
+      "learning_rate": 0.0001897474383027854,
+      "loss": 0.1475,
+      "step": 3557
+    },
+    {
+      "epoch": 0.25671921786500235,
+      "grad_norm": 0.10788974910974503,
+      "learning_rate": 0.00018974455188338867,
+      "loss": 0.1369,
+      "step": 3558
+    },
+    {
+      "epoch": 0.2567913705400628,
+      "grad_norm": 0.12976844608783722,
+      "learning_rate": 0.00018974166546399193,
+      "loss": 0.1695,
+      "step": 3559
+    },
+    {
+      "epoch": 0.2568635232151232,
+      "grad_norm": 0.11886755377054214,
+      "learning_rate": 0.0001897387790445952,
+      "loss": 0.1728,
+      "step": 3560
+    },
+    {
+      "epoch": 0.2569356758901836,
+      "grad_norm": 0.12633520364761353,
+      "learning_rate": 0.00018973589262519843,
+      "loss": 0.1143,
+      "step": 3561
+    },
+    {
+      "epoch": 0.25700782856524407,
+      "grad_norm": 0.10834062844514847,
+      "learning_rate": 0.00018973300620580172,
+      "loss": 0.1467,
+      "step": 3562
+    },
+    {
+      "epoch": 0.2570799812403045,
+      "grad_norm": 0.14430686831474304,
+      "learning_rate": 0.00018973011978640499,
+      "loss": 0.2205,
+      "step": 3563
+    },
+    {
+      "epoch": 0.2571521339153649,
+      "grad_norm": 0.13141347467899323,
+      "learning_rate": 0.00018972723336700825,
+      "loss": 0.1295,
+      "step": 3564
+    },
+    {
+      "epoch": 0.25722428659042534,
+      "grad_norm": 0.1020764708518982,
+      "learning_rate": 0.0001897243469476115,
+      "loss": 0.1944,
+      "step": 3565
+    },
+    {
+      "epoch": 0.2572964392654858,
+      "grad_norm": 0.1081843450665474,
+      "learning_rate": 0.00018972146052821475,
+      "loss": 0.1463,
+      "step": 3566
+    },
+    {
+      "epoch": 0.2573685919405462,
+      "grad_norm": 0.10524830967187881,
+      "learning_rate": 0.000189718574108818,
+      "loss": 0.1747,
+      "step": 3567
+    },
+    {
+      "epoch": 0.2574407446156066,
+      "grad_norm": 0.10554425418376923,
+      "learning_rate": 0.00018971568768942127,
+      "loss": 0.1796,
+      "step": 3568
+    },
+    {
+      "epoch": 0.25751289729066706,
+      "grad_norm": 0.0920037180185318,
+      "learning_rate": 0.00018971280127002456,
+      "loss": 0.1876,
+      "step": 3569
+    },
+    {
+      "epoch": 0.25758504996572745,
+      "grad_norm": 0.09690480679273605,
+      "learning_rate": 0.00018970991485062783,
+      "loss": 0.1565,
+      "step": 3570
+    },
+    {
+      "epoch": 0.2576572026407879,
+      "grad_norm": 0.10666646808385849,
+      "learning_rate": 0.00018970702843123106,
+      "loss": 0.1836,
+      "step": 3571
+    },
+    {
+      "epoch": 0.25772935531584834,
+      "grad_norm": 0.10854260623455048,
+      "learning_rate": 0.00018970414201183432,
+      "loss": 0.2031,
+      "step": 3572
+    },
+    {
+      "epoch": 0.2578015079909088,
+      "grad_norm": 0.12234905362129211,
+      "learning_rate": 0.0001897012555924376,
+      "loss": 0.177,
+      "step": 3573
+    },
+    {
+      "epoch": 0.2578736606659692,
+      "grad_norm": 0.09497665613889694,
+      "learning_rate": 0.00018969836917304085,
+      "loss": 0.2068,
+      "step": 3574
+    },
+    {
+      "epoch": 0.2579458133410296,
+      "grad_norm": 0.17623476684093475,
+      "learning_rate": 0.0001896954827536441,
+      "loss": 0.1507,
+      "step": 3575
+    },
+    {
+      "epoch": 0.25801796601609006,
+      "grad_norm": 0.09530671685934067,
+      "learning_rate": 0.00018969259633424738,
+      "loss": 0.1634,
+      "step": 3576
+    },
+    {
+      "epoch": 0.25809011869115045,
+      "grad_norm": 0.10053442418575287,
+      "learning_rate": 0.00018968970991485064,
+      "loss": 0.1796,
+      "step": 3577
+    },
+    {
+      "epoch": 0.2581622713662109,
+      "grad_norm": 0.12041661888360977,
+      "learning_rate": 0.0001896868234954539,
+      "loss": 0.1869,
+      "step": 3578
+    },
+    {
+      "epoch": 0.25823442404127134,
+      "grad_norm": 0.10001831501722336,
+      "learning_rate": 0.00018968393707605716,
+      "loss": 0.1732,
+      "step": 3579
+    },
+    {
+      "epoch": 0.2583065767163318,
+      "grad_norm": 0.09121733903884888,
+      "learning_rate": 0.00018968105065666043,
+      "loss": 0.1112,
+      "step": 3580
+    },
+    {
+      "epoch": 0.25837872939139217,
+      "grad_norm": 0.11876653879880905,
+      "learning_rate": 0.0001896781642372637,
+      "loss": 0.1667,
+      "step": 3581
+    },
+    {
+      "epoch": 0.2584508820664526,
+      "grad_norm": 0.10456927120685577,
+      "learning_rate": 0.00018967527781786693,
+      "loss": 0.1559,
+      "step": 3582
+    },
+    {
+      "epoch": 0.25852303474151306,
+      "grad_norm": 0.1214747354388237,
+      "learning_rate": 0.00018967239139847022,
+      "loss": 0.1742,
+      "step": 3583
+    },
+    {
+      "epoch": 0.25859518741657345,
+      "grad_norm": 0.10480388253927231,
+      "learning_rate": 0.00018966950497907348,
+      "loss": 0.1585,
+      "step": 3584
+    },
+    {
+      "epoch": 0.2586673400916339,
+      "grad_norm": 0.09965568035840988,
+      "learning_rate": 0.00018966661855967674,
+      "loss": 0.1745,
+      "step": 3585
+    },
+    {
+      "epoch": 0.25873949276669433,
+      "grad_norm": 0.12093115597963333,
+      "learning_rate": 0.00018966373214028,
+      "loss": 0.1547,
+      "step": 3586
+    },
+    {
+      "epoch": 0.2588116454417548,
+      "grad_norm": 0.1278897076845169,
+      "learning_rate": 0.00018966084572088324,
+      "loss": 0.149,
+      "step": 3587
+    },
+    {
+      "epoch": 0.25888379811681517,
+      "grad_norm": 0.08690732717514038,
+      "learning_rate": 0.0001896579593014865,
+      "loss": 0.165,
+      "step": 3588
+    },
+    {
+      "epoch": 0.2589559507918756,
+      "grad_norm": 0.108178049325943,
+      "learning_rate": 0.00018965507288208977,
+      "loss": 0.1891,
+      "step": 3589
+    },
+    {
+      "epoch": 0.25902810346693605,
+      "grad_norm": 0.09039347618818283,
+      "learning_rate": 0.00018965218646269306,
+      "loss": 0.182,
+      "step": 3590
+    },
+    {
+      "epoch": 0.25910025614199644,
+      "grad_norm": 0.10826592147350311,
+      "learning_rate": 0.00018964930004329632,
+      "loss": 0.1582,
+      "step": 3591
+    },
+    {
+      "epoch": 0.2591724088170569,
+      "grad_norm": 0.09563131630420685,
+      "learning_rate": 0.00018964641362389956,
+      "loss": 0.1596,
+      "step": 3592
+    },
+    {
+      "epoch": 0.25924456149211733,
+      "grad_norm": 0.13261514902114868,
+      "learning_rate": 0.00018964352720450282,
+      "loss": 0.163,
+      "step": 3593
+    },
+    {
+      "epoch": 0.2593167141671778,
+      "grad_norm": 0.12152953445911407,
+      "learning_rate": 0.00018964064078510608,
+      "loss": 0.1316,
+      "step": 3594
+    },
+    {
+      "epoch": 0.25938886684223816,
+      "grad_norm": 0.11141938716173172,
+      "learning_rate": 0.00018963775436570934,
+      "loss": 0.172,
+      "step": 3595
+    },
+    {
+      "epoch": 0.2594610195172986,
+      "grad_norm": 0.13898305594921112,
+      "learning_rate": 0.0001896348679463126,
+      "loss": 0.2022,
+      "step": 3596
+    },
+    {
+      "epoch": 0.25953317219235905,
+      "grad_norm": 0.11607407033443451,
+      "learning_rate": 0.00018963198152691587,
+      "loss": 0.1919,
+      "step": 3597
+    },
+    {
+      "epoch": 0.25960532486741944,
+      "grad_norm": 0.12388519197702408,
+      "learning_rate": 0.00018962909510751913,
+      "loss": 0.161,
+      "step": 3598
+    },
+    {
+      "epoch": 0.2596774775424799,
+      "grad_norm": 0.09459841996431351,
+      "learning_rate": 0.0001896262086881224,
+      "loss": 0.1909,
+      "step": 3599
+    },
+    {
+      "epoch": 0.25974963021754033,
+      "grad_norm": 0.10941751301288605,
+      "learning_rate": 0.00018962332226872566,
+      "loss": 0.159,
+      "step": 3600
+    },
+    {
+      "epoch": 0.2598217828926007,
+      "grad_norm": 0.1132725328207016,
+      "learning_rate": 0.00018962043584932892,
+      "loss": 0.1276,
+      "step": 3601
+    },
+    {
+      "epoch": 0.25989393556766116,
+      "grad_norm": 0.11622916907072067,
+      "learning_rate": 0.00018961754942993218,
+      "loss": 0.1255,
+      "step": 3602
+    },
+    {
+      "epoch": 0.2599660882427216,
+      "grad_norm": 0.10722587257623672,
+      "learning_rate": 0.00018961466301053542,
+      "loss": 0.135,
+      "step": 3603
+    },
+    {
+      "epoch": 0.26003824091778205,
+      "grad_norm": 0.09481862187385559,
+      "learning_rate": 0.0001896117765911387,
+      "loss": 0.156,
+      "step": 3604
+    },
+    {
+      "epoch": 0.26011039359284244,
+      "grad_norm": 0.1511927843093872,
+      "learning_rate": 0.00018960889017174197,
+      "loss": 0.1911,
+      "step": 3605
+    },
+    {
+      "epoch": 0.2601825462679029,
+      "grad_norm": 0.11891227215528488,
+      "learning_rate": 0.00018960600375234524,
+      "loss": 0.1898,
+      "step": 3606
+    },
+    {
+      "epoch": 0.2602546989429633,
+      "grad_norm": 0.11076851189136505,
+      "learning_rate": 0.0001896031173329485,
+      "loss": 0.0932,
+      "step": 3607
+    },
+    {
+      "epoch": 0.2603268516180237,
+      "grad_norm": 0.09624168276786804,
+      "learning_rate": 0.00018960023091355174,
+      "loss": 0.1357,
+      "step": 3608
+    },
+    {
+      "epoch": 0.26039900429308416,
+      "grad_norm": 0.10173476487398148,
+      "learning_rate": 0.000189597344494155,
+      "loss": 0.1637,
+      "step": 3609
+    },
+    {
+      "epoch": 0.2604711569681446,
+      "grad_norm": 0.1195240169763565,
+      "learning_rate": 0.00018959445807475826,
+      "loss": 0.1517,
+      "step": 3610
+    },
+    {
+      "epoch": 0.26054330964320505,
+      "grad_norm": 0.13871614634990692,
+      "learning_rate": 0.00018959157165536155,
+      "loss": 0.2041,
+      "step": 3611
+    },
+    {
+      "epoch": 0.26061546231826543,
+      "grad_norm": 0.13739337027072906,
+      "learning_rate": 0.00018958868523596481,
+      "loss": 0.2188,
+      "step": 3612
+    },
+    {
+      "epoch": 0.2606876149933259,
+      "grad_norm": 0.12913809716701508,
+      "learning_rate": 0.00018958579881656805,
+      "loss": 0.1665,
+      "step": 3613
+    },
+    {
+      "epoch": 0.2607597676683863,
+      "grad_norm": 0.13116084039211273,
+      "learning_rate": 0.0001895829123971713,
+      "loss": 0.1634,
+      "step": 3614
+    },
+    {
+      "epoch": 0.2608319203434467,
+      "grad_norm": 0.12041059136390686,
+      "learning_rate": 0.00018958002597777458,
+      "loss": 0.1619,
+      "step": 3615
+    },
+    {
+      "epoch": 0.26090407301850715,
+      "grad_norm": 0.13146141171455383,
+      "learning_rate": 0.00018957713955837784,
+      "loss": 0.1554,
+      "step": 3616
+    },
+    {
+      "epoch": 0.2609762256935676,
+      "grad_norm": 0.10757823288440704,
+      "learning_rate": 0.0001895742531389811,
+      "loss": 0.2041,
+      "step": 3617
+    },
+    {
+      "epoch": 0.26104837836862804,
+      "grad_norm": 0.11519914865493774,
+      "learning_rate": 0.00018957136671958436,
+      "loss": 0.1857,
+      "step": 3618
+    },
+    {
+      "epoch": 0.26112053104368843,
+      "grad_norm": 0.12142335623502731,
+      "learning_rate": 0.00018956848030018763,
+      "loss": 0.1589,
+      "step": 3619
+    },
+    {
+      "epoch": 0.2611926837187489,
+      "grad_norm": 0.1048182025551796,
+      "learning_rate": 0.0001895655938807909,
+      "loss": 0.1479,
+      "step": 3620
+    },
+    {
+      "epoch": 0.2612648363938093,
+      "grad_norm": 0.09723210334777832,
+      "learning_rate": 0.00018956270746139415,
+      "loss": 0.1832,
+      "step": 3621
+    },
+    {
+      "epoch": 0.2613369890688697,
+      "grad_norm": 0.10473082959651947,
+      "learning_rate": 0.00018955982104199742,
+      "loss": 0.1649,
+      "step": 3622
+    },
+    {
+      "epoch": 0.26140914174393015,
+      "grad_norm": 0.11956755071878433,
+      "learning_rate": 0.00018955693462260068,
+      "loss": 0.1179,
+      "step": 3623
+    },
+    {
+      "epoch": 0.2614812944189906,
+      "grad_norm": 0.10788846760988235,
+      "learning_rate": 0.00018955404820320392,
+      "loss": 0.1832,
+      "step": 3624
+    },
+    {
+      "epoch": 0.26155344709405104,
+      "grad_norm": 0.10262494534254074,
+      "learning_rate": 0.0001895511617838072,
+      "loss": 0.1945,
+      "step": 3625
+    },
+    {
+      "epoch": 0.2616255997691114,
+      "grad_norm": 0.12115581333637238,
+      "learning_rate": 0.00018954827536441047,
+      "loss": 0.2295,
+      "step": 3626
+    },
+    {
+      "epoch": 0.26169775244417187,
+      "grad_norm": 0.13347704708576202,
+      "learning_rate": 0.00018954538894501373,
+      "loss": 0.1444,
+      "step": 3627
+    },
+    {
+      "epoch": 0.2617699051192323,
+      "grad_norm": 0.09329728782176971,
+      "learning_rate": 0.000189542502525617,
+      "loss": 0.1849,
+      "step": 3628
+    },
+    {
+      "epoch": 0.2618420577942927,
+      "grad_norm": 0.1016915887594223,
+      "learning_rate": 0.00018953961610622023,
+      "loss": 0.1416,
+      "step": 3629
+    },
+    {
+      "epoch": 0.26191421046935315,
+      "grad_norm": 0.1248478889465332,
+      "learning_rate": 0.0001895367296868235,
+      "loss": 0.1706,
+      "step": 3630
+    },
+    {
+      "epoch": 0.2619863631444136,
+      "grad_norm": 0.09999848157167435,
+      "learning_rate": 0.00018953384326742676,
+      "loss": 0.1512,
+      "step": 3631
+    },
+    {
+      "epoch": 0.262058515819474,
+      "grad_norm": 0.1144399642944336,
+      "learning_rate": 0.00018953095684803002,
+      "loss": 0.1192,
+      "step": 3632
+    },
+    {
+      "epoch": 0.2621306684945344,
+      "grad_norm": 0.1034519225358963,
+      "learning_rate": 0.0001895280704286333,
+      "loss": 0.166,
+      "step": 3633
+    },
+    {
+      "epoch": 0.26220282116959487,
+      "grad_norm": 0.14921246469020844,
+      "learning_rate": 0.00018952518400923654,
+      "loss": 0.1258,
+      "step": 3634
+    },
+    {
+      "epoch": 0.2622749738446553,
+      "grad_norm": 0.10548774152994156,
+      "learning_rate": 0.0001895222975898398,
+      "loss": 0.1511,
+      "step": 3635
+    },
+    {
+      "epoch": 0.2623471265197157,
+      "grad_norm": 0.12472370266914368,
+      "learning_rate": 0.00018951941117044307,
+      "loss": 0.1733,
+      "step": 3636
+    },
+    {
+      "epoch": 0.26241927919477614,
+      "grad_norm": 0.10761553794145584,
+      "learning_rate": 0.00018951652475104633,
+      "loss": 0.194,
+      "step": 3637
+    },
+    {
+      "epoch": 0.2624914318698366,
+      "grad_norm": 0.11572976410388947,
+      "learning_rate": 0.0001895136383316496,
+      "loss": 0.1905,
+      "step": 3638
+    },
+    {
+      "epoch": 0.262563584544897,
+      "grad_norm": 0.1269570291042328,
+      "learning_rate": 0.00018951075191225286,
+      "loss": 0.1376,
+      "step": 3639
+    },
+    {
+      "epoch": 0.2626357372199574,
+      "grad_norm": 0.10389404743909836,
+      "learning_rate": 0.00018950786549285612,
+      "loss": 0.1459,
+      "step": 3640
+    },
+    {
+      "epoch": 0.26270788989501787,
+      "grad_norm": 0.10264139622449875,
+      "learning_rate": 0.00018950497907345938,
+      "loss": 0.1984,
+      "step": 3641
+    },
+    {
+      "epoch": 0.2627800425700783,
+      "grad_norm": 0.1579146385192871,
+      "learning_rate": 0.00018950209265406265,
+      "loss": 0.163,
+      "step": 3642
+    },
+    {
+      "epoch": 0.2628521952451387,
+      "grad_norm": 0.12138903141021729,
+      "learning_rate": 0.0001894992062346659,
+      "loss": 0.1668,
+      "step": 3643
+    },
+    {
+      "epoch": 0.26292434792019914,
+      "grad_norm": 0.11000584810972214,
+      "learning_rate": 0.00018949631981526917,
+      "loss": 0.1787,
+      "step": 3644
+    },
+    {
+      "epoch": 0.2629965005952596,
+      "grad_norm": 0.11004672199487686,
+      "learning_rate": 0.0001894934333958724,
+      "loss": 0.1586,
+      "step": 3645
+    },
+    {
+      "epoch": 0.26306865327032,
+      "grad_norm": 0.13046477735042572,
+      "learning_rate": 0.00018949054697647567,
+      "loss": 0.196,
+      "step": 3646
+    },
+    {
+      "epoch": 0.2631408059453804,
+      "grad_norm": 0.1197001039981842,
+      "learning_rate": 0.00018948766055707896,
+      "loss": 0.1421,
+      "step": 3647
+    },
+    {
+      "epoch": 0.26321295862044086,
+      "grad_norm": 0.11334479600191116,
+      "learning_rate": 0.00018948477413768223,
+      "loss": 0.1675,
+      "step": 3648
+    },
+    {
+      "epoch": 0.2632851112955013,
+      "grad_norm": 0.11990846693515778,
+      "learning_rate": 0.0001894818877182855,
+      "loss": 0.1294,
+      "step": 3649
+    },
+    {
+      "epoch": 0.2633572639705617,
+      "grad_norm": 0.11262289434671402,
+      "learning_rate": 0.00018947900129888872,
+      "loss": 0.1453,
+      "step": 3650
+    },
+    {
+      "epoch": 0.26342941664562214,
+      "grad_norm": 0.1628699153661728,
+      "learning_rate": 0.000189476114879492,
+      "loss": 0.2236,
+      "step": 3651
+    },
+    {
+      "epoch": 0.2635015693206826,
+      "grad_norm": 0.13269752264022827,
+      "learning_rate": 0.00018947322846009525,
+      "loss": 0.2132,
+      "step": 3652
+    },
+    {
+      "epoch": 0.26357372199574297,
+      "grad_norm": 0.09555750340223312,
+      "learning_rate": 0.0001894703420406985,
+      "loss": 0.1869,
+      "step": 3653
+    },
+    {
+      "epoch": 0.2636458746708034,
+      "grad_norm": 0.1075914055109024,
+      "learning_rate": 0.0001894674556213018,
+      "loss": 0.1991,
+      "step": 3654
+    },
+    {
+      "epoch": 0.26371802734586386,
+      "grad_norm": 0.10003098845481873,
+      "learning_rate": 0.00018946456920190504,
+      "loss": 0.1083,
+      "step": 3655
+    },
+    {
+      "epoch": 0.2637901800209243,
+      "grad_norm": 0.11860654503107071,
+      "learning_rate": 0.0001894616827825083,
+      "loss": 0.2015,
+      "step": 3656
+    },
+    {
+      "epoch": 0.2638623326959847,
+      "grad_norm": 0.09559154510498047,
+      "learning_rate": 0.00018945879636311156,
+      "loss": 0.1851,
+      "step": 3657
+    },
+    {
+      "epoch": 0.26393448537104514,
+      "grad_norm": 0.10533902049064636,
+      "learning_rate": 0.00018945590994371483,
+      "loss": 0.163,
+      "step": 3658
+    },
+    {
+      "epoch": 0.2640066380461056,
+      "grad_norm": 0.10047412663698196,
+      "learning_rate": 0.0001894530235243181,
+      "loss": 0.1725,
+      "step": 3659
+    },
+    {
+      "epoch": 0.26407879072116597,
+      "grad_norm": 0.08557490259408951,
+      "learning_rate": 0.00018945013710492135,
+      "loss": 0.1207,
+      "step": 3660
+    },
+    {
+      "epoch": 0.2641509433962264,
+      "grad_norm": 0.10779914259910583,
+      "learning_rate": 0.00018944725068552462,
+      "loss": 0.1795,
+      "step": 3661
+    },
+    {
+      "epoch": 0.26422309607128686,
+      "grad_norm": 0.1896720826625824,
+      "learning_rate": 0.00018944436426612788,
+      "loss": 0.162,
+      "step": 3662
+    },
+    {
+      "epoch": 0.26429524874634724,
+      "grad_norm": 0.11711643636226654,
+      "learning_rate": 0.00018944147784673114,
+      "loss": 0.2044,
+      "step": 3663
+    },
+    {
+      "epoch": 0.2643674014214077,
+      "grad_norm": 0.0934373065829277,
+      "learning_rate": 0.0001894385914273344,
+      "loss": 0.1858,
+      "step": 3664
+    },
+    {
+      "epoch": 0.26443955409646813,
+      "grad_norm": 0.12046694755554199,
+      "learning_rate": 0.00018943570500793767,
+      "loss": 0.1324,
+      "step": 3665
+    },
+    {
+      "epoch": 0.2645117067715286,
+      "grad_norm": 0.11609145253896713,
+      "learning_rate": 0.0001894328185885409,
+      "loss": 0.1798,
+      "step": 3666
+    },
+    {
+      "epoch": 0.26458385944658896,
+      "grad_norm": 0.15844784677028656,
+      "learning_rate": 0.00018942993216914417,
+      "loss": 0.1913,
+      "step": 3667
+    },
+    {
+      "epoch": 0.2646560121216494,
+      "grad_norm": 0.09626557677984238,
+      "learning_rate": 0.00018942704574974746,
+      "loss": 0.1481,
+      "step": 3668
+    },
+    {
+      "epoch": 0.26472816479670985,
+      "grad_norm": 0.10835087299346924,
+      "learning_rate": 0.00018942415933035072,
+      "loss": 0.1756,
+      "step": 3669
+    },
+    {
+      "epoch": 0.26480031747177024,
+      "grad_norm": 0.10344719886779785,
+      "learning_rate": 0.00018942127291095398,
+      "loss": 0.1515,
+      "step": 3670
+    },
+    {
+      "epoch": 0.2648724701468307,
+      "grad_norm": 0.09639471769332886,
+      "learning_rate": 0.00018941838649155722,
+      "loss": 0.1462,
+      "step": 3671
+    },
+    {
+      "epoch": 0.26494462282189113,
+      "grad_norm": 0.08890553563833237,
+      "learning_rate": 0.00018941550007216048,
+      "loss": 0.1329,
+      "step": 3672
+    },
+    {
+      "epoch": 0.2650167754969516,
+      "grad_norm": 0.10019133985042572,
+      "learning_rate": 0.00018941261365276374,
+      "loss": 0.1715,
+      "step": 3673
+    },
+    {
+      "epoch": 0.26508892817201196,
+      "grad_norm": 0.106917604804039,
+      "learning_rate": 0.000189409727233367,
+      "loss": 0.1643,
+      "step": 3674
+    },
+    {
+      "epoch": 0.2651610808470724,
+      "grad_norm": 0.12348364293575287,
+      "learning_rate": 0.0001894068408139703,
+      "loss": 0.1493,
+      "step": 3675
+    },
+    {
+      "epoch": 0.26523323352213285,
+      "grad_norm": 0.11973647773265839,
+      "learning_rate": 0.00018940395439457353,
+      "loss": 0.1608,
+      "step": 3676
+    },
+    {
+      "epoch": 0.26530538619719324,
+      "grad_norm": 0.12044582515954971,
+      "learning_rate": 0.0001894010679751768,
+      "loss": 0.1581,
+      "step": 3677
+    },
+    {
+      "epoch": 0.2653775388722537,
+      "grad_norm": 0.11655533313751221,
+      "learning_rate": 0.00018939818155578006,
+      "loss": 0.1468,
+      "step": 3678
+    },
+    {
+      "epoch": 0.2654496915473141,
+      "grad_norm": 0.13247819244861603,
+      "learning_rate": 0.00018939529513638332,
+      "loss": 0.2026,
+      "step": 3679
+    },
+    {
+      "epoch": 0.26552184422237457,
+      "grad_norm": 0.10399710386991501,
+      "learning_rate": 0.00018939240871698658,
+      "loss": 0.1578,
+      "step": 3680
+    },
+    {
+      "epoch": 0.26559399689743496,
+      "grad_norm": 0.14507174491882324,
+      "learning_rate": 0.00018938952229758985,
+      "loss": 0.156,
+      "step": 3681
+    },
+    {
+      "epoch": 0.2656661495724954,
+      "grad_norm": 0.10088624060153961,
+      "learning_rate": 0.0001893866358781931,
+      "loss": 0.1309,
+      "step": 3682
+    },
+    {
+      "epoch": 0.26573830224755585,
+      "grad_norm": 0.10670887678861618,
+      "learning_rate": 0.00018938374945879637,
+      "loss": 0.186,
+      "step": 3683
+    },
+    {
+      "epoch": 0.26581045492261623,
+      "grad_norm": 0.10915090143680573,
+      "learning_rate": 0.00018938086303939964,
+      "loss": 0.1492,
+      "step": 3684
+    },
+    {
+      "epoch": 0.2658826075976767,
+      "grad_norm": 0.11927489191293716,
+      "learning_rate": 0.0001893779766200029,
+      "loss": 0.1882,
+      "step": 3685
+    },
+    {
+      "epoch": 0.2659547602727371,
+      "grad_norm": 0.09340450912714005,
+      "learning_rate": 0.00018937509020060616,
+      "loss": 0.1327,
+      "step": 3686
+    },
+    {
+      "epoch": 0.26602691294779757,
+      "grad_norm": 0.1078324019908905,
+      "learning_rate": 0.00018937220378120942,
+      "loss": 0.1279,
+      "step": 3687
+    },
+    {
+      "epoch": 0.26609906562285796,
+      "grad_norm": 0.09562065452337265,
+      "learning_rate": 0.00018936931736181266,
+      "loss": 0.1378,
+      "step": 3688
+    },
+    {
+      "epoch": 0.2661712182979184,
+      "grad_norm": 0.10261200368404388,
+      "learning_rate": 0.00018936643094241595,
+      "loss": 0.1695,
+      "step": 3689
+    },
+    {
+      "epoch": 0.26624337097297884,
+      "grad_norm": 0.10890012234449387,
+      "learning_rate": 0.00018936354452301921,
+      "loss": 0.1438,
+      "step": 3690
+    },
+    {
+      "epoch": 0.26631552364803923,
+      "grad_norm": 0.09240952134132385,
+      "learning_rate": 0.00018936065810362248,
+      "loss": 0.1076,
+      "step": 3691
+    },
+    {
+      "epoch": 0.2663876763230997,
+      "grad_norm": 0.1261812299489975,
+      "learning_rate": 0.00018935777168422574,
+      "loss": 0.166,
+      "step": 3692
+    },
+    {
+      "epoch": 0.2664598289981601,
+      "grad_norm": 0.12737056612968445,
+      "learning_rate": 0.00018935488526482898,
+      "loss": 0.1715,
+      "step": 3693
+    },
+    {
+      "epoch": 0.2665319816732205,
+      "grad_norm": 0.12897031009197235,
+      "learning_rate": 0.00018935199884543224,
+      "loss": 0.1753,
+      "step": 3694
+    },
+    {
+      "epoch": 0.26660413434828095,
+      "grad_norm": 0.11706625670194626,
+      "learning_rate": 0.0001893491124260355,
+      "loss": 0.1702,
+      "step": 3695
+    },
+    {
+      "epoch": 0.2666762870233414,
+      "grad_norm": 0.11923673748970032,
+      "learning_rate": 0.0001893462260066388,
+      "loss": 0.1451,
+      "step": 3696
+    },
+    {
+      "epoch": 0.26674843969840184,
+      "grad_norm": 0.09781758487224579,
+      "learning_rate": 0.00018934333958724205,
+      "loss": 0.1429,
+      "step": 3697
+    },
+    {
+      "epoch": 0.26682059237346223,
+      "grad_norm": 0.1293449103832245,
+      "learning_rate": 0.0001893404531678453,
+      "loss": 0.14,
+      "step": 3698
+    },
+    {
+      "epoch": 0.2668927450485227,
+      "grad_norm": 0.10683408379554749,
+      "learning_rate": 0.00018933756674844855,
+      "loss": 0.1595,
+      "step": 3699
+    },
+    {
+      "epoch": 0.2669648977235831,
+      "grad_norm": 0.10115299373865128,
+      "learning_rate": 0.00018933468032905182,
+      "loss": 0.1624,
+      "step": 3700
+    },
+    {
+      "epoch": 0.2670370503986435,
+      "grad_norm": 0.08999868482351303,
+      "learning_rate": 0.00018933179390965508,
+      "loss": 0.1804,
+      "step": 3701
+    },
+    {
+      "epoch": 0.26710920307370395,
+      "grad_norm": 0.09947782754898071,
+      "learning_rate": 0.00018932890749025834,
+      "loss": 0.1512,
+      "step": 3702
+    },
+    {
+      "epoch": 0.2671813557487644,
+      "grad_norm": 0.11422078311443329,
+      "learning_rate": 0.0001893260210708616,
+      "loss": 0.1508,
+      "step": 3703
+    },
+    {
+      "epoch": 0.26725350842382484,
+      "grad_norm": 0.10992544144392014,
+      "learning_rate": 0.00018932313465146487,
+      "loss": 0.1943,
+      "step": 3704
+    },
+    {
+      "epoch": 0.2673256610988852,
+      "grad_norm": 0.09266640245914459,
+      "learning_rate": 0.00018932024823206813,
+      "loss": 0.1889,
+      "step": 3705
+    },
+    {
+      "epoch": 0.26739781377394567,
+      "grad_norm": 0.11159055680036545,
+      "learning_rate": 0.0001893173618126714,
+      "loss": 0.1612,
+      "step": 3706
+    },
+    {
+      "epoch": 0.2674699664490061,
+      "grad_norm": 0.11246234178543091,
+      "learning_rate": 0.00018931447539327466,
+      "loss": 0.1306,
+      "step": 3707
+    },
+    {
+      "epoch": 0.2675421191240665,
+      "grad_norm": 0.11322403699159622,
+      "learning_rate": 0.00018931158897387792,
+      "loss": 0.1338,
+      "step": 3708
+    },
+    {
+      "epoch": 0.26761427179912695,
+      "grad_norm": 0.11647389084100723,
+      "learning_rate": 0.00018930870255448115,
+      "loss": 0.1287,
+      "step": 3709
+    },
+    {
+      "epoch": 0.2676864244741874,
+      "grad_norm": 0.10262130945920944,
+      "learning_rate": 0.00018930581613508444,
+      "loss": 0.1796,
+      "step": 3710
+    },
+    {
+      "epoch": 0.26775857714924783,
+      "grad_norm": 0.13843820989131927,
+      "learning_rate": 0.0001893029297156877,
+      "loss": 0.2134,
+      "step": 3711
+    },
+    {
+      "epoch": 0.2678307298243082,
+      "grad_norm": 0.09976322203874588,
+      "learning_rate": 0.00018930004329629097,
+      "loss": 0.1382,
+      "step": 3712
+    },
+    {
+      "epoch": 0.26790288249936867,
+      "grad_norm": 0.10742239654064178,
+      "learning_rate": 0.00018929715687689423,
+      "loss": 0.1685,
+      "step": 3713
+    },
+    {
+      "epoch": 0.2679750351744291,
+      "grad_norm": 0.13246366381645203,
+      "learning_rate": 0.00018929427045749747,
+      "loss": 0.2164,
+      "step": 3714
+    },
+    {
+      "epoch": 0.2680471878494895,
+      "grad_norm": 0.12941241264343262,
+      "learning_rate": 0.00018929138403810073,
+      "loss": 0.1481,
+      "step": 3715
+    },
+    {
+      "epoch": 0.26811934052454994,
+      "grad_norm": 0.09542439877986908,
+      "learning_rate": 0.000189288497618704,
+      "loss": 0.1766,
+      "step": 3716
+    },
+    {
+      "epoch": 0.2681914931996104,
+      "grad_norm": 0.09997952729463577,
+      "learning_rate": 0.00018928561119930729,
+      "loss": 0.145,
+      "step": 3717
+    },
+    {
+      "epoch": 0.26826364587467083,
+      "grad_norm": 0.09770691394805908,
+      "learning_rate": 0.00018928272477991055,
+      "loss": 0.1748,
+      "step": 3718
+    },
+    {
+      "epoch": 0.2683357985497312,
+      "grad_norm": 0.10574066638946533,
+      "learning_rate": 0.00018927983836051378,
+      "loss": 0.1815,
+      "step": 3719
+    },
+    {
+      "epoch": 0.26840795122479166,
+      "grad_norm": 0.1131848618388176,
+      "learning_rate": 0.00018927695194111705,
+      "loss": 0.1126,
+      "step": 3720
+    },
+    {
+      "epoch": 0.2684801038998521,
+      "grad_norm": 0.1036604568362236,
+      "learning_rate": 0.0001892740655217203,
+      "loss": 0.1304,
+      "step": 3721
+    },
+    {
+      "epoch": 0.2685522565749125,
+      "grad_norm": 0.111257404088974,
+      "learning_rate": 0.00018927117910232357,
+      "loss": 0.1506,
+      "step": 3722
+    },
+    {
+      "epoch": 0.26862440924997294,
+      "grad_norm": 0.0995304137468338,
+      "learning_rate": 0.00018926829268292684,
+      "loss": 0.1737,
+      "step": 3723
+    },
+    {
+      "epoch": 0.2686965619250334,
+      "grad_norm": 0.11207863688468933,
+      "learning_rate": 0.0001892654062635301,
+      "loss": 0.1918,
+      "step": 3724
+    },
+    {
+      "epoch": 0.26876871460009377,
+      "grad_norm": 0.139298215508461,
+      "learning_rate": 0.00018926251984413336,
+      "loss": 0.174,
+      "step": 3725
+    },
+    {
+      "epoch": 0.2688408672751542,
+      "grad_norm": 0.12480378895998001,
+      "learning_rate": 0.00018925963342473662,
+      "loss": 0.1476,
+      "step": 3726
+    },
+    {
+      "epoch": 0.26891301995021466,
+      "grad_norm": 0.11126791685819626,
+      "learning_rate": 0.0001892567470053399,
+      "loss": 0.1864,
+      "step": 3727
+    },
+    {
+      "epoch": 0.2689851726252751,
+      "grad_norm": 0.13878969848155975,
+      "learning_rate": 0.00018925386058594315,
+      "loss": 0.1637,
+      "step": 3728
+    },
+    {
+      "epoch": 0.2690573253003355,
+      "grad_norm": 0.11918002367019653,
+      "learning_rate": 0.0001892509741665464,
+      "loss": 0.182,
+      "step": 3729
+    },
+    {
+      "epoch": 0.26912947797539594,
+      "grad_norm": 0.1296512633562088,
+      "learning_rate": 0.00018924808774714965,
+      "loss": 0.1888,
+      "step": 3730
+    },
+    {
+      "epoch": 0.2692016306504564,
+      "grad_norm": 0.10598678141832352,
+      "learning_rate": 0.00018924520132775294,
+      "loss": 0.195,
+      "step": 3731
+    },
+    {
+      "epoch": 0.26927378332551677,
+      "grad_norm": 0.14444386959075928,
+      "learning_rate": 0.0001892423149083562,
+      "loss": 0.148,
+      "step": 3732
+    },
+    {
+      "epoch": 0.2693459360005772,
+      "grad_norm": 0.10955000668764114,
+      "learning_rate": 0.00018923942848895947,
+      "loss": 0.1118,
+      "step": 3733
+    },
+    {
+      "epoch": 0.26941808867563766,
+      "grad_norm": 0.13034574687480927,
+      "learning_rate": 0.00018923654206956273,
+      "loss": 0.1556,
+      "step": 3734
+    },
+    {
+      "epoch": 0.2694902413506981,
+      "grad_norm": 0.12457529455423355,
+      "learning_rate": 0.00018923365565016596,
+      "loss": 0.1635,
+      "step": 3735
+    },
+    {
+      "epoch": 0.2695623940257585,
+      "grad_norm": 0.13422954082489014,
+      "learning_rate": 0.00018923076923076923,
+      "loss": 0.1447,
+      "step": 3736
+    },
+    {
+      "epoch": 0.26963454670081893,
+      "grad_norm": 0.10142651200294495,
+      "learning_rate": 0.0001892278828113725,
+      "loss": 0.2051,
+      "step": 3737
+    },
+    {
+      "epoch": 0.2697066993758794,
+      "grad_norm": 0.13308723270893097,
+      "learning_rate": 0.00018922499639197578,
+      "loss": 0.2012,
+      "step": 3738
+    },
+    {
+      "epoch": 0.26977885205093977,
+      "grad_norm": 0.12129965424537659,
+      "learning_rate": 0.00018922210997257904,
+      "loss": 0.1591,
+      "step": 3739
+    },
+    {
+      "epoch": 0.2698510047260002,
+      "grad_norm": 0.1246509999036789,
+      "learning_rate": 0.00018921922355318228,
+      "loss": 0.1245,
+      "step": 3740
+    },
+    {
+      "epoch": 0.26992315740106065,
+      "grad_norm": 0.12997084856033325,
+      "learning_rate": 0.00018921633713378554,
+      "loss": 0.1159,
+      "step": 3741
+    },
+    {
+      "epoch": 0.2699953100761211,
+      "grad_norm": 0.10414636880159378,
+      "learning_rate": 0.0001892134507143888,
+      "loss": 0.1973,
+      "step": 3742
+    },
+    {
+      "epoch": 0.2700674627511815,
+      "grad_norm": 0.097514308989048,
+      "learning_rate": 0.00018921056429499207,
+      "loss": 0.1326,
+      "step": 3743
+    },
+    {
+      "epoch": 0.27013961542624193,
+      "grad_norm": 0.09992020577192307,
+      "learning_rate": 0.00018920767787559533,
+      "loss": 0.1957,
+      "step": 3744
+    },
+    {
+      "epoch": 0.2702117681013024,
+      "grad_norm": 0.10262467712163925,
+      "learning_rate": 0.0001892047914561986,
+      "loss": 0.1407,
+      "step": 3745
+    },
+    {
+      "epoch": 0.27028392077636276,
+      "grad_norm": 0.0982523113489151,
+      "learning_rate": 0.00018920190503680186,
+      "loss": 0.1374,
+      "step": 3746
+    },
+    {
+      "epoch": 0.2703560734514232,
+      "grad_norm": 0.10197173804044724,
+      "learning_rate": 0.00018919901861740512,
+      "loss": 0.1541,
+      "step": 3747
+    },
+    {
+      "epoch": 0.27042822612648365,
+      "grad_norm": 0.12074144929647446,
+      "learning_rate": 0.00018919613219800838,
+      "loss": 0.1624,
+      "step": 3748
+    },
+    {
+      "epoch": 0.2705003788015441,
+      "grad_norm": 0.11587917059659958,
+      "learning_rate": 0.00018919324577861164,
+      "loss": 0.2164,
+      "step": 3749
+    },
+    {
+      "epoch": 0.2705725314766045,
+      "grad_norm": 0.11887791752815247,
+      "learning_rate": 0.0001891903593592149,
+      "loss": 0.172,
+      "step": 3750
+    },
+    {
+      "epoch": 0.2706446841516649,
+      "grad_norm": 0.127192422747612,
+      "learning_rate": 0.00018918747293981814,
+      "loss": 0.1608,
+      "step": 3751
+    },
+    {
+      "epoch": 0.27071683682672537,
+      "grad_norm": 0.12658534944057465,
+      "learning_rate": 0.00018918458652042143,
+      "loss": 0.1763,
+      "step": 3752
+    },
+    {
+      "epoch": 0.27078898950178576,
+      "grad_norm": 0.11674284189939499,
+      "learning_rate": 0.0001891817001010247,
+      "loss": 0.1629,
+      "step": 3753
+    },
+    {
+      "epoch": 0.2708611421768462,
+      "grad_norm": 0.09319207817316055,
+      "learning_rate": 0.00018917881368162796,
+      "loss": 0.1449,
+      "step": 3754
+    },
+    {
+      "epoch": 0.27093329485190665,
+      "grad_norm": 0.11137358844280243,
+      "learning_rate": 0.00018917592726223122,
+      "loss": 0.1375,
+      "step": 3755
+    },
+    {
+      "epoch": 0.27100544752696704,
+      "grad_norm": 0.11353585869073868,
+      "learning_rate": 0.00018917304084283446,
+      "loss": 0.1249,
+      "step": 3756
+    },
+    {
+      "epoch": 0.2710776002020275,
+      "grad_norm": 0.09032389521598816,
+      "learning_rate": 0.00018917015442343772,
+      "loss": 0.1456,
+      "step": 3757
+    },
+    {
+      "epoch": 0.2711497528770879,
+      "grad_norm": 0.14036047458648682,
+      "learning_rate": 0.00018916726800404098,
+      "loss": 0.1609,
+      "step": 3758
+    },
+    {
+      "epoch": 0.27122190555214837,
+      "grad_norm": 0.13396713137626648,
+      "learning_rate": 0.00018916438158464427,
+      "loss": 0.1756,
+      "step": 3759
+    },
+    {
+      "epoch": 0.27129405822720876,
+      "grad_norm": 0.0830783098936081,
+      "learning_rate": 0.00018916149516524754,
+      "loss": 0.1528,
+      "step": 3760
+    },
+    {
+      "epoch": 0.2713662109022692,
+      "grad_norm": 0.12058752775192261,
+      "learning_rate": 0.00018915860874585077,
+      "loss": 0.1757,
+      "step": 3761
+    },
+    {
+      "epoch": 0.27143836357732964,
+      "grad_norm": 0.12686319649219513,
+      "learning_rate": 0.00018915572232645404,
+      "loss": 0.1608,
+      "step": 3762
+    },
+    {
+      "epoch": 0.27151051625239003,
+      "grad_norm": 0.12383957207202911,
+      "learning_rate": 0.0001891528359070573,
+      "loss": 0.1136,
+      "step": 3763
+    },
+    {
+      "epoch": 0.2715826689274505,
+      "grad_norm": 0.14094068109989166,
+      "learning_rate": 0.00018914994948766056,
+      "loss": 0.1892,
+      "step": 3764
+    },
+    {
+      "epoch": 0.2716548216025109,
+      "grad_norm": 0.10473047941923141,
+      "learning_rate": 0.00018914706306826382,
+      "loss": 0.1918,
+      "step": 3765
+    },
+    {
+      "epoch": 0.27172697427757136,
+      "grad_norm": 0.13546206057071686,
+      "learning_rate": 0.0001891441766488671,
+      "loss": 0.1736,
+      "step": 3766
+    },
+    {
+      "epoch": 0.27179912695263175,
+      "grad_norm": 0.1025552973151207,
+      "learning_rate": 0.00018914129022947035,
+      "loss": 0.1631,
+      "step": 3767
+    },
+    {
+      "epoch": 0.2718712796276922,
+      "grad_norm": 0.1197451502084732,
+      "learning_rate": 0.0001891384038100736,
+      "loss": 0.22,
+      "step": 3768
+    },
+    {
+      "epoch": 0.27194343230275264,
+      "grad_norm": 0.10611274093389511,
+      "learning_rate": 0.00018913551739067688,
+      "loss": 0.1648,
+      "step": 3769
+    },
+    {
+      "epoch": 0.27201558497781303,
+      "grad_norm": 0.12307115644216537,
+      "learning_rate": 0.00018913263097128014,
+      "loss": 0.1849,
+      "step": 3770
+    },
+    {
+      "epoch": 0.2720877376528735,
+      "grad_norm": 0.10913196206092834,
+      "learning_rate": 0.0001891297445518834,
+      "loss": 0.1617,
+      "step": 3771
+    },
+    {
+      "epoch": 0.2721598903279339,
+      "grad_norm": 0.09902161359786987,
+      "learning_rate": 0.00018912685813248664,
+      "loss": 0.1149,
+      "step": 3772
+    },
+    {
+      "epoch": 0.27223204300299436,
+      "grad_norm": 0.11324041336774826,
+      "learning_rate": 0.00018912397171308993,
+      "loss": 0.1259,
+      "step": 3773
+    },
+    {
+      "epoch": 0.27230419567805475,
+      "grad_norm": 0.10454663634300232,
+      "learning_rate": 0.0001891210852936932,
+      "loss": 0.1425,
+      "step": 3774
+    },
+    {
+      "epoch": 0.2723763483531152,
+      "grad_norm": 0.10768377780914307,
+      "learning_rate": 0.00018911819887429645,
+      "loss": 0.1837,
+      "step": 3775
+    },
+    {
+      "epoch": 0.27244850102817564,
+      "grad_norm": 0.11168798059225082,
+      "learning_rate": 0.00018911531245489972,
+      "loss": 0.1619,
+      "step": 3776
+    },
+    {
+      "epoch": 0.272520653703236,
+      "grad_norm": 0.11275070905685425,
+      "learning_rate": 0.00018911242603550295,
+      "loss": 0.1425,
+      "step": 3777
+    },
+    {
+      "epoch": 0.27259280637829647,
+      "grad_norm": 0.10287392884492874,
+      "learning_rate": 0.00018910953961610622,
+      "loss": 0.1216,
+      "step": 3778
+    },
+    {
+      "epoch": 0.2726649590533569,
+      "grad_norm": 0.11645212024450302,
+      "learning_rate": 0.00018910665319670948,
+      "loss": 0.1777,
+      "step": 3779
+    },
+    {
+      "epoch": 0.27273711172841736,
+      "grad_norm": 0.11161747574806213,
+      "learning_rate": 0.00018910376677731277,
+      "loss": 0.1717,
+      "step": 3780
+    },
+    {
+      "epoch": 0.27280926440347775,
+      "grad_norm": 0.11371717602014542,
+      "learning_rate": 0.00018910088035791603,
+      "loss": 0.1827,
+      "step": 3781
+    },
+    {
+      "epoch": 0.2728814170785382,
+      "grad_norm": 0.1188749298453331,
+      "learning_rate": 0.00018909799393851927,
+      "loss": 0.1374,
+      "step": 3782
+    },
+    {
+      "epoch": 0.27295356975359863,
+      "grad_norm": 0.1261162906885147,
+      "learning_rate": 0.00018909510751912253,
+      "loss": 0.1285,
+      "step": 3783
+    },
+    {
+      "epoch": 0.273025722428659,
+      "grad_norm": 0.120500348508358,
+      "learning_rate": 0.0001890922210997258,
+      "loss": 0.15,
+      "step": 3784
+    },
+    {
+      "epoch": 0.27309787510371947,
+      "grad_norm": 0.12924271821975708,
+      "learning_rate": 0.00018908933468032906,
+      "loss": 0.177,
+      "step": 3785
+    },
+    {
+      "epoch": 0.2731700277787799,
+      "grad_norm": 0.1256682574748993,
+      "learning_rate": 0.00018908644826093232,
+      "loss": 0.1975,
+      "step": 3786
+    },
+    {
+      "epoch": 0.2732421804538403,
+      "grad_norm": 0.10796575993299484,
+      "learning_rate": 0.00018908356184153558,
+      "loss": 0.1844,
+      "step": 3787
+    },
+    {
+      "epoch": 0.27331433312890074,
+      "grad_norm": 0.13851940631866455,
+      "learning_rate": 0.00018908067542213884,
+      "loss": 0.1671,
+      "step": 3788
+    },
+    {
+      "epoch": 0.2733864858039612,
+      "grad_norm": 0.11101972311735153,
+      "learning_rate": 0.0001890777890027421,
+      "loss": 0.1249,
+      "step": 3789
+    },
+    {
+      "epoch": 0.27345863847902163,
+      "grad_norm": 0.11235811561346054,
+      "learning_rate": 0.00018907490258334537,
+      "loss": 0.1172,
+      "step": 3790
+    },
+    {
+      "epoch": 0.273530791154082,
+      "grad_norm": 0.08978880196809769,
+      "learning_rate": 0.00018907201616394863,
+      "loss": 0.1713,
+      "step": 3791
+    },
+    {
+      "epoch": 0.27360294382914246,
+      "grad_norm": 0.11776753515005112,
+      "learning_rate": 0.0001890691297445519,
+      "loss": 0.1518,
+      "step": 3792
+    },
+    {
+      "epoch": 0.2736750965042029,
+      "grad_norm": 0.11837328225374222,
+      "learning_rate": 0.00018906624332515516,
+      "loss": 0.1939,
+      "step": 3793
+    },
+    {
+      "epoch": 0.2737472491792633,
+      "grad_norm": 0.1469045728445053,
+      "learning_rate": 0.00018906335690575842,
+      "loss": 0.1756,
+      "step": 3794
+    },
+    {
+      "epoch": 0.27381940185432374,
+      "grad_norm": 0.12482008337974548,
+      "learning_rate": 0.00018906047048636168,
+      "loss": 0.1506,
+      "step": 3795
+    },
+    {
+      "epoch": 0.2738915545293842,
+      "grad_norm": 0.12701426446437836,
+      "learning_rate": 0.00018905758406696495,
+      "loss": 0.1602,
+      "step": 3796
+    },
+    {
+      "epoch": 0.27396370720444463,
+      "grad_norm": 0.10903041064739227,
+      "learning_rate": 0.0001890546976475682,
+      "loss": 0.1266,
+      "step": 3797
+    },
+    {
+      "epoch": 0.274035859879505,
+      "grad_norm": 0.09815473854541779,
+      "learning_rate": 0.00018905181122817147,
+      "loss": 0.1691,
+      "step": 3798
+    },
+    {
+      "epoch": 0.27410801255456546,
+      "grad_norm": 0.09265413880348206,
+      "learning_rate": 0.0001890489248087747,
+      "loss": 0.149,
+      "step": 3799
+    },
+    {
+      "epoch": 0.2741801652296259,
+      "grad_norm": 0.09038549661636353,
+      "learning_rate": 0.00018904603838937797,
+      "loss": 0.1739,
+      "step": 3800
+    },
+    {
+      "epoch": 0.2742523179046863,
+      "grad_norm": 0.13240946829319,
+      "learning_rate": 0.00018904315196998126,
+      "loss": 0.1932,
+      "step": 3801
+    },
+    {
+      "epoch": 0.27432447057974674,
+      "grad_norm": 0.11062067002058029,
+      "learning_rate": 0.00018904026555058453,
+      "loss": 0.1538,
+      "step": 3802
+    },
+    {
+      "epoch": 0.2743966232548072,
+      "grad_norm": 0.1425238698720932,
+      "learning_rate": 0.0001890373791311878,
+      "loss": 0.145,
+      "step": 3803
+    },
+    {
+      "epoch": 0.2744687759298676,
+      "grad_norm": 0.1070200502872467,
+      "learning_rate": 0.00018903449271179102,
+      "loss": 0.1648,
+      "step": 3804
+    },
+    {
+      "epoch": 0.274540928604928,
+      "grad_norm": 0.11594266444444656,
+      "learning_rate": 0.0001890316062923943,
+      "loss": 0.2021,
+      "step": 3805
+    },
+    {
+      "epoch": 0.27461308127998846,
+      "grad_norm": 0.10769648104906082,
+      "learning_rate": 0.00018902871987299755,
+      "loss": 0.1322,
+      "step": 3806
+    },
+    {
+      "epoch": 0.2746852339550489,
+      "grad_norm": 0.1301393359899521,
+      "learning_rate": 0.0001890258334536008,
+      "loss": 0.181,
+      "step": 3807
+    },
+    {
+      "epoch": 0.2747573866301093,
+      "grad_norm": 0.11476437002420425,
+      "learning_rate": 0.0001890229470342041,
+      "loss": 0.1417,
+      "step": 3808
+    },
+    {
+      "epoch": 0.27482953930516973,
+      "grad_norm": 0.09497443586587906,
+      "learning_rate": 0.00018902006061480734,
+      "loss": 0.1801,
+      "step": 3809
+    },
+    {
+      "epoch": 0.2749016919802302,
+      "grad_norm": 0.09713861346244812,
+      "learning_rate": 0.0001890171741954106,
+      "loss": 0.1347,
+      "step": 3810
+    },
+    {
+      "epoch": 0.2749738446552906,
+      "grad_norm": 0.13135769963264465,
+      "learning_rate": 0.00018901428777601386,
+      "loss": 0.1606,
+      "step": 3811
+    },
+    {
+      "epoch": 0.275045997330351,
+      "grad_norm": 0.09864150732755661,
+      "learning_rate": 0.00018901140135661713,
+      "loss": 0.1105,
+      "step": 3812
+    },
+    {
+      "epoch": 0.27511815000541145,
+      "grad_norm": 0.1272011399269104,
+      "learning_rate": 0.0001890085149372204,
+      "loss": 0.128,
+      "step": 3813
+    },
+    {
+      "epoch": 0.2751903026804719,
+      "grad_norm": 0.1033303365111351,
+      "learning_rate": 0.00018900562851782365,
+      "loss": 0.1681,
+      "step": 3814
+    },
+    {
+      "epoch": 0.2752624553555323,
+      "grad_norm": 0.14046432077884674,
+      "learning_rate": 0.00018900274209842692,
+      "loss": 0.1473,
+      "step": 3815
+    },
+    {
+      "epoch": 0.27533460803059273,
+      "grad_norm": 0.10199149698019028,
+      "learning_rate": 0.00018899985567903018,
+      "loss": 0.1824,
+      "step": 3816
+    },
+    {
+      "epoch": 0.2754067607056532,
+      "grad_norm": 0.1071721762418747,
+      "learning_rate": 0.00018899696925963344,
+      "loss": 0.1544,
+      "step": 3817
+    },
+    {
+      "epoch": 0.27547891338071356,
+      "grad_norm": 0.08951333910226822,
+      "learning_rate": 0.0001889940828402367,
+      "loss": 0.2149,
+      "step": 3818
+    },
+    {
+      "epoch": 0.275551066055774,
+      "grad_norm": 0.11420239508152008,
+      "learning_rate": 0.00018899119642083997,
+      "loss": 0.1265,
+      "step": 3819
+    },
+    {
+      "epoch": 0.27562321873083445,
+      "grad_norm": 0.12129448354244232,
+      "learning_rate": 0.0001889883100014432,
+      "loss": 0.1559,
+      "step": 3820
+    },
+    {
+      "epoch": 0.2756953714058949,
+      "grad_norm": 0.15682537853717804,
+      "learning_rate": 0.00018898542358204647,
+      "loss": 0.1485,
+      "step": 3821
+    },
+    {
+      "epoch": 0.2757675240809553,
+      "grad_norm": 0.10432837903499603,
+      "learning_rate": 0.00018898253716264973,
+      "loss": 0.1827,
+      "step": 3822
+    },
+    {
+      "epoch": 0.27583967675601573,
+      "grad_norm": 0.12009520828723907,
+      "learning_rate": 0.00018897965074325302,
+      "loss": 0.1801,
+      "step": 3823
+    },
+    {
+      "epoch": 0.27591182943107617,
+      "grad_norm": 0.12202169746160507,
+      "learning_rate": 0.00018897676432385628,
+      "loss": 0.1873,
+      "step": 3824
+    },
+    {
+      "epoch": 0.27598398210613656,
+      "grad_norm": 0.12997078895568848,
+      "learning_rate": 0.00018897387790445952,
+      "loss": 0.1842,
+      "step": 3825
+    },
+    {
+      "epoch": 0.276056134781197,
+      "grad_norm": 0.09522189944982529,
+      "learning_rate": 0.00018897099148506278,
+      "loss": 0.1388,
+      "step": 3826
+    },
+    {
+      "epoch": 0.27612828745625745,
+      "grad_norm": 0.09096496552228928,
+      "learning_rate": 0.00018896810506566604,
+      "loss": 0.1958,
+      "step": 3827
+    },
+    {
+      "epoch": 0.2762004401313179,
+      "grad_norm": 0.11269880086183548,
+      "learning_rate": 0.0001889652186462693,
+      "loss": 0.203,
+      "step": 3828
+    },
+    {
+      "epoch": 0.2762725928063783,
+      "grad_norm": 0.10250306874513626,
+      "learning_rate": 0.00018896233222687257,
+      "loss": 0.1931,
+      "step": 3829
+    },
+    {
+      "epoch": 0.2763447454814387,
+      "grad_norm": 0.10111036151647568,
+      "learning_rate": 0.00018895944580747583,
+      "loss": 0.1257,
+      "step": 3830
+    },
+    {
+      "epoch": 0.27641689815649917,
+      "grad_norm": 0.09969083219766617,
+      "learning_rate": 0.0001889565593880791,
+      "loss": 0.1363,
+      "step": 3831
+    },
+    {
+      "epoch": 0.27648905083155956,
+      "grad_norm": 0.12945561110973358,
+      "learning_rate": 0.00018895367296868236,
+      "loss": 0.1663,
+      "step": 3832
+    },
+    {
+      "epoch": 0.27656120350662,
+      "grad_norm": 0.11256492137908936,
+      "learning_rate": 0.00018895078654928562,
+      "loss": 0.2091,
+      "step": 3833
+    },
+    {
+      "epoch": 0.27663335618168045,
+      "grad_norm": 0.11610375344753265,
+      "learning_rate": 0.00018894790012988888,
+      "loss": 0.1667,
+      "step": 3834
+    },
+    {
+      "epoch": 0.2767055088567409,
+      "grad_norm": 0.11551795899868011,
+      "learning_rate": 0.00018894501371049215,
+      "loss": 0.1446,
+      "step": 3835
+    },
+    {
+      "epoch": 0.2767776615318013,
+      "grad_norm": 0.10832656919956207,
+      "learning_rate": 0.00018894212729109538,
+      "loss": 0.202,
+      "step": 3836
+    },
+    {
+      "epoch": 0.2768498142068617,
+      "grad_norm": 0.09568379819393158,
+      "learning_rate": 0.00018893924087169867,
+      "loss": 0.1885,
+      "step": 3837
+    },
+    {
+      "epoch": 0.27692196688192217,
+      "grad_norm": 0.09933038800954819,
+      "learning_rate": 0.00018893635445230194,
+      "loss": 0.1824,
+      "step": 3838
+    },
+    {
+      "epoch": 0.27699411955698255,
+      "grad_norm": 0.08672601729631424,
+      "learning_rate": 0.0001889334680329052,
+      "loss": 0.1698,
+      "step": 3839
+    },
+    {
+      "epoch": 0.277066272232043,
+      "grad_norm": 0.10415983200073242,
+      "learning_rate": 0.00018893058161350846,
+      "loss": 0.16,
+      "step": 3840
+    },
+    {
+      "epoch": 0.27713842490710344,
+      "grad_norm": 0.11694998294115067,
+      "learning_rate": 0.0001889276951941117,
+      "loss": 0.1328,
+      "step": 3841
+    },
+    {
+      "epoch": 0.2772105775821639,
+      "grad_norm": 0.1068284884095192,
+      "learning_rate": 0.00018892480877471496,
+      "loss": 0.1654,
+      "step": 3842
+    },
+    {
+      "epoch": 0.2772827302572243,
+      "grad_norm": 0.11396096646785736,
+      "learning_rate": 0.00018892192235531822,
+      "loss": 0.153,
+      "step": 3843
+    },
+    {
+      "epoch": 0.2773548829322847,
+      "grad_norm": 0.1199285089969635,
+      "learning_rate": 0.00018891903593592151,
+      "loss": 0.157,
+      "step": 3844
+    },
+    {
+      "epoch": 0.27742703560734516,
+      "grad_norm": 0.12460009753704071,
+      "learning_rate": 0.00018891614951652478,
+      "loss": 0.1268,
+      "step": 3845
+    },
+    {
+      "epoch": 0.27749918828240555,
+      "grad_norm": 0.12122397124767303,
+      "learning_rate": 0.000188913263097128,
+      "loss": 0.1929,
+      "step": 3846
+    },
+    {
+      "epoch": 0.277571340957466,
+      "grad_norm": 0.12009304016828537,
+      "learning_rate": 0.00018891037667773128,
+      "loss": 0.1462,
+      "step": 3847
+    },
+    {
+      "epoch": 0.27764349363252644,
+      "grad_norm": 0.1184767559170723,
+      "learning_rate": 0.00018890749025833454,
+      "loss": 0.1794,
+      "step": 3848
+    },
+    {
+      "epoch": 0.2777156463075868,
+      "grad_norm": 0.11086839437484741,
+      "learning_rate": 0.0001889046038389378,
+      "loss": 0.2225,
+      "step": 3849
+    },
+    {
+      "epoch": 0.27778779898264727,
+      "grad_norm": 0.1261485517024994,
+      "learning_rate": 0.00018890171741954106,
+      "loss": 0.1344,
+      "step": 3850
+    },
+    {
+      "epoch": 0.2778599516577077,
+      "grad_norm": 0.10495270788669586,
+      "learning_rate": 0.00018889883100014433,
+      "loss": 0.1623,
+      "step": 3851
+    },
+    {
+      "epoch": 0.27793210433276816,
+      "grad_norm": 0.10916092246770859,
+      "learning_rate": 0.0001888959445807476,
+      "loss": 0.1888,
+      "step": 3852
+    },
+    {
+      "epoch": 0.27800425700782855,
+      "grad_norm": 0.12027977406978607,
+      "learning_rate": 0.00018889305816135085,
+      "loss": 0.1618,
+      "step": 3853
+    },
+    {
+      "epoch": 0.278076409682889,
+      "grad_norm": 0.09191562980413437,
+      "learning_rate": 0.00018889017174195412,
+      "loss": 0.1426,
+      "step": 3854
+    },
+    {
+      "epoch": 0.27814856235794944,
+      "grad_norm": 0.0947212353348732,
+      "learning_rate": 0.00018888728532255738,
+      "loss": 0.12,
+      "step": 3855
+    },
+    {
+      "epoch": 0.2782207150330098,
+      "grad_norm": 0.12886327505111694,
+      "learning_rate": 0.00018888439890316064,
+      "loss": 0.1192,
+      "step": 3856
+    },
+    {
+      "epoch": 0.27829286770807027,
+      "grad_norm": 0.09498050063848495,
+      "learning_rate": 0.00018888151248376388,
+      "loss": 0.1316,
+      "step": 3857
+    },
+    {
+      "epoch": 0.2783650203831307,
+      "grad_norm": 0.1054006963968277,
+      "learning_rate": 0.00018887862606436717,
+      "loss": 0.1113,
+      "step": 3858
+    },
+    {
+      "epoch": 0.27843717305819116,
+      "grad_norm": 0.1302202194929123,
+      "learning_rate": 0.00018887573964497043,
+      "loss": 0.1784,
+      "step": 3859
+    },
+    {
+      "epoch": 0.27850932573325154,
+      "grad_norm": 0.13806942105293274,
+      "learning_rate": 0.0001888728532255737,
+      "loss": 0.1905,
+      "step": 3860
+    },
+    {
+      "epoch": 0.278581478408312,
+      "grad_norm": 0.10921463370323181,
+      "learning_rate": 0.00018886996680617696,
+      "loss": 0.1385,
+      "step": 3861
+    },
+    {
+      "epoch": 0.27865363108337243,
+      "grad_norm": 0.10545797646045685,
+      "learning_rate": 0.0001888670803867802,
+      "loss": 0.1844,
+      "step": 3862
+    },
+    {
+      "epoch": 0.2787257837584328,
+      "grad_norm": 0.10389558225870132,
+      "learning_rate": 0.00018886419396738346,
+      "loss": 0.1429,
+      "step": 3863
+    },
+    {
+      "epoch": 0.27879793643349327,
+      "grad_norm": 0.11613049358129501,
+      "learning_rate": 0.00018886130754798672,
+      "loss": 0.1973,
+      "step": 3864
+    },
+    {
+      "epoch": 0.2788700891085537,
+      "grad_norm": 0.09906083345413208,
+      "learning_rate": 0.00018885842112859,
+      "loss": 0.1599,
+      "step": 3865
+    },
+    {
+      "epoch": 0.27894224178361415,
+      "grad_norm": 0.15463513135910034,
+      "learning_rate": 0.00018885553470919327,
+      "loss": 0.1318,
+      "step": 3866
+    },
+    {
+      "epoch": 0.27901439445867454,
+      "grad_norm": 0.09531057626008987,
+      "learning_rate": 0.0001888526482897965,
+      "loss": 0.1453,
+      "step": 3867
+    },
+    {
+      "epoch": 0.279086547133735,
+      "grad_norm": 0.09789328277111053,
+      "learning_rate": 0.00018884976187039977,
+      "loss": 0.1928,
+      "step": 3868
+    },
+    {
+      "epoch": 0.27915869980879543,
+      "grad_norm": 0.12422343343496323,
+      "learning_rate": 0.00018884687545100303,
+      "loss": 0.1469,
+      "step": 3869
+    },
+    {
+      "epoch": 0.2792308524838558,
+      "grad_norm": 0.10871326178312302,
+      "learning_rate": 0.0001888439890316063,
+      "loss": 0.14,
+      "step": 3870
+    },
+    {
+      "epoch": 0.27930300515891626,
+      "grad_norm": 0.10778743028640747,
+      "learning_rate": 0.00018884110261220956,
+      "loss": 0.1523,
+      "step": 3871
+    },
+    {
+      "epoch": 0.2793751578339767,
+      "grad_norm": 0.08779001981019974,
+      "learning_rate": 0.00018883821619281282,
+      "loss": 0.1802,
+      "step": 3872
+    },
+    {
+      "epoch": 0.27944731050903715,
+      "grad_norm": 0.10399382561445236,
+      "learning_rate": 0.00018883532977341608,
+      "loss": 0.162,
+      "step": 3873
+    },
+    {
+      "epoch": 0.27951946318409754,
+      "grad_norm": 0.1193578690290451,
+      "learning_rate": 0.00018883244335401935,
+      "loss": 0.1275,
+      "step": 3874
+    },
+    {
+      "epoch": 0.279591615859158,
+      "grad_norm": 0.0955120399594307,
+      "learning_rate": 0.0001888295569346226,
+      "loss": 0.1615,
+      "step": 3875
+    },
+    {
+      "epoch": 0.2796637685342184,
+      "grad_norm": 0.13648675382137299,
+      "learning_rate": 0.00018882667051522587,
+      "loss": 0.1867,
+      "step": 3876
+    },
+    {
+      "epoch": 0.2797359212092788,
+      "grad_norm": 0.11467991769313812,
+      "learning_rate": 0.00018882378409582914,
+      "loss": 0.1679,
+      "step": 3877
+    },
+    {
+      "epoch": 0.27980807388433926,
+      "grad_norm": 0.10961662977933884,
+      "learning_rate": 0.00018882089767643237,
+      "loss": 0.1773,
+      "step": 3878
+    },
+    {
+      "epoch": 0.2798802265593997,
+      "grad_norm": 0.11874514073133469,
+      "learning_rate": 0.00018881801125703566,
+      "loss": 0.1586,
+      "step": 3879
+    },
+    {
+      "epoch": 0.2799523792344601,
+      "grad_norm": 0.13788311183452606,
+      "learning_rate": 0.00018881512483763892,
+      "loss": 0.2045,
+      "step": 3880
+    },
+    {
+      "epoch": 0.28002453190952054,
+      "grad_norm": 0.1315164566040039,
+      "learning_rate": 0.0001888122384182422,
+      "loss": 0.1471,
+      "step": 3881
+    },
+    {
+      "epoch": 0.280096684584581,
+      "grad_norm": 0.10409369319677353,
+      "learning_rate": 0.00018880935199884545,
+      "loss": 0.1833,
+      "step": 3882
+    },
+    {
+      "epoch": 0.2801688372596414,
+      "grad_norm": 0.14403219521045685,
+      "learning_rate": 0.00018880646557944869,
+      "loss": 0.166,
+      "step": 3883
+    },
+    {
+      "epoch": 0.2802409899347018,
+      "grad_norm": 0.13879208266735077,
+      "learning_rate": 0.00018880357916005195,
+      "loss": 0.1389,
+      "step": 3884
+    },
+    {
+      "epoch": 0.28031314260976226,
+      "grad_norm": 0.12207477539777756,
+      "learning_rate": 0.0001888006927406552,
+      "loss": 0.1949,
+      "step": 3885
+    },
+    {
+      "epoch": 0.2803852952848227,
+      "grad_norm": 0.10924966633319855,
+      "learning_rate": 0.0001887978063212585,
+      "loss": 0.1432,
+      "step": 3886
+    },
+    {
+      "epoch": 0.2804574479598831,
+      "grad_norm": 0.11043348908424377,
+      "learning_rate": 0.00018879491990186177,
+      "loss": 0.1715,
+      "step": 3887
+    },
+    {
+      "epoch": 0.28052960063494353,
+      "grad_norm": 0.12954466044902802,
+      "learning_rate": 0.000188792033482465,
+      "loss": 0.1936,
+      "step": 3888
+    },
+    {
+      "epoch": 0.280601753310004,
+      "grad_norm": 0.12958849966526031,
+      "learning_rate": 0.00018878914706306826,
+      "loss": 0.1475,
+      "step": 3889
+    },
+    {
+      "epoch": 0.2806739059850644,
+      "grad_norm": 0.08727343380451202,
+      "learning_rate": 0.00018878626064367153,
+      "loss": 0.1755,
+      "step": 3890
+    },
+    {
+      "epoch": 0.2807460586601248,
+      "grad_norm": 0.09146881103515625,
+      "learning_rate": 0.0001887833742242748,
+      "loss": 0.1693,
+      "step": 3891
+    },
+    {
+      "epoch": 0.28081821133518525,
+      "grad_norm": 0.1067313477396965,
+      "learning_rate": 0.00018878048780487805,
+      "loss": 0.1402,
+      "step": 3892
+    },
+    {
+      "epoch": 0.2808903640102457,
+      "grad_norm": 0.11091002076864243,
+      "learning_rate": 0.00018877760138548132,
+      "loss": 0.143,
+      "step": 3893
+    },
+    {
+      "epoch": 0.2809625166853061,
+      "grad_norm": 0.11535555124282837,
+      "learning_rate": 0.00018877471496608458,
+      "loss": 0.1776,
+      "step": 3894
+    },
+    {
+      "epoch": 0.28103466936036653,
+      "grad_norm": 0.12002919614315033,
+      "learning_rate": 0.00018877182854668784,
+      "loss": 0.1982,
+      "step": 3895
+    },
+    {
+      "epoch": 0.281106822035427,
+      "grad_norm": 0.11106644570827484,
+      "learning_rate": 0.0001887689421272911,
+      "loss": 0.1349,
+      "step": 3896
+    },
+    {
+      "epoch": 0.2811789747104874,
+      "grad_norm": 0.11119067668914795,
+      "learning_rate": 0.00018876605570789437,
+      "loss": 0.1834,
+      "step": 3897
+    },
+    {
+      "epoch": 0.2812511273855478,
+      "grad_norm": 0.10117504000663757,
+      "learning_rate": 0.00018876316928849763,
+      "loss": 0.1251,
+      "step": 3898
+    },
+    {
+      "epoch": 0.28132328006060825,
+      "grad_norm": 0.08032572269439697,
+      "learning_rate": 0.00018876028286910087,
+      "loss": 0.1559,
+      "step": 3899
+    },
+    {
+      "epoch": 0.2813954327356687,
+      "grad_norm": 0.1198943629860878,
+      "learning_rate": 0.00018875739644970416,
+      "loss": 0.1174,
+      "step": 3900
+    },
+    {
+      "epoch": 0.2814675854107291,
+      "grad_norm": 0.11611675471067429,
+      "learning_rate": 0.00018875451003030742,
+      "loss": 0.2006,
+      "step": 3901
+    },
+    {
+      "epoch": 0.2815397380857895,
+      "grad_norm": 0.14057551324367523,
+      "learning_rate": 0.00018875162361091068,
+      "loss": 0.1047,
+      "step": 3902
+    },
+    {
+      "epoch": 0.28161189076084997,
+      "grad_norm": 0.09256484359502792,
+      "learning_rate": 0.00018874873719151394,
+      "loss": 0.1624,
+      "step": 3903
+    },
+    {
+      "epoch": 0.2816840434359104,
+      "grad_norm": 0.10634186118841171,
+      "learning_rate": 0.0001887458507721172,
+      "loss": 0.1478,
+      "step": 3904
+    },
+    {
+      "epoch": 0.2817561961109708,
+      "grad_norm": 0.11302176862955093,
+      "learning_rate": 0.00018874296435272044,
+      "loss": 0.1519,
+      "step": 3905
+    },
+    {
+      "epoch": 0.28182834878603125,
+      "grad_norm": 0.12342038750648499,
+      "learning_rate": 0.0001887400779333237,
+      "loss": 0.1651,
+      "step": 3906
+    },
+    {
+      "epoch": 0.2819005014610917,
+      "grad_norm": 0.10243307799100876,
+      "learning_rate": 0.000188737191513927,
+      "loss": 0.1709,
+      "step": 3907
+    },
+    {
+      "epoch": 0.2819726541361521,
+      "grad_norm": 0.14496873319149017,
+      "learning_rate": 0.00018873430509453026,
+      "loss": 0.1658,
+      "step": 3908
+    },
+    {
+      "epoch": 0.2820448068112125,
+      "grad_norm": 0.123151034116745,
+      "learning_rate": 0.00018873141867513352,
+      "loss": 0.1899,
+      "step": 3909
+    },
+    {
+      "epoch": 0.28211695948627297,
+      "grad_norm": 0.11360703408718109,
+      "learning_rate": 0.00018872853225573676,
+      "loss": 0.1467,
+      "step": 3910
+    },
+    {
+      "epoch": 0.28218911216133336,
+      "grad_norm": 0.09966452419757843,
+      "learning_rate": 0.00018872564583634002,
+      "loss": 0.1606,
+      "step": 3911
+    },
+    {
+      "epoch": 0.2822612648363938,
+      "grad_norm": 0.1278088390827179,
+      "learning_rate": 0.00018872275941694328,
+      "loss": 0.1878,
+      "step": 3912
+    },
+    {
+      "epoch": 0.28233341751145424,
+      "grad_norm": 0.08595690876245499,
+      "learning_rate": 0.00018871987299754655,
+      "loss": 0.1312,
+      "step": 3913
+    },
+    {
+      "epoch": 0.2824055701865147,
+      "grad_norm": 0.10899940878152847,
+      "learning_rate": 0.00018871698657814984,
+      "loss": 0.1792,
+      "step": 3914
+    },
+    {
+      "epoch": 0.2824777228615751,
+      "grad_norm": 0.11173483729362488,
+      "learning_rate": 0.00018871410015875307,
+      "loss": 0.1327,
+      "step": 3915
+    },
+    {
+      "epoch": 0.2825498755366355,
+      "grad_norm": 0.08659325540065765,
+      "learning_rate": 0.00018871121373935634,
+      "loss": 0.1844,
+      "step": 3916
+    },
+    {
+      "epoch": 0.28262202821169596,
+      "grad_norm": 0.10769529640674591,
+      "learning_rate": 0.0001887083273199596,
+      "loss": 0.1749,
+      "step": 3917
+    },
+    {
+      "epoch": 0.28269418088675635,
+      "grad_norm": 0.10912948846817017,
+      "learning_rate": 0.00018870544090056286,
+      "loss": 0.2171,
+      "step": 3918
+    },
+    {
+      "epoch": 0.2827663335618168,
+      "grad_norm": 0.0865086242556572,
+      "learning_rate": 0.00018870255448116612,
+      "loss": 0.1299,
+      "step": 3919
+    },
+    {
+      "epoch": 0.28283848623687724,
+      "grad_norm": 0.11570220440626144,
+      "learning_rate": 0.0001886996680617694,
+      "loss": 0.1479,
+      "step": 3920
+    },
+    {
+      "epoch": 0.2829106389119377,
+      "grad_norm": 0.11451957374811172,
+      "learning_rate": 0.00018869678164237265,
+      "loss": 0.2033,
+      "step": 3921
+    },
+    {
+      "epoch": 0.2829827915869981,
+      "grad_norm": 0.10973011702299118,
+      "learning_rate": 0.0001886938952229759,
+      "loss": 0.1507,
+      "step": 3922
+    },
+    {
+      "epoch": 0.2830549442620585,
+      "grad_norm": 0.10192826390266418,
+      "learning_rate": 0.00018869100880357918,
+      "loss": 0.1661,
+      "step": 3923
+    },
+    {
+      "epoch": 0.28312709693711896,
+      "grad_norm": 0.11305376887321472,
+      "learning_rate": 0.00018868812238418244,
+      "loss": 0.1885,
+      "step": 3924
+    },
+    {
+      "epoch": 0.28319924961217935,
+      "grad_norm": 0.11453882604837418,
+      "learning_rate": 0.0001886852359647857,
+      "loss": 0.1129,
+      "step": 3925
+    },
+    {
+      "epoch": 0.2832714022872398,
+      "grad_norm": 0.14879728853702545,
+      "learning_rate": 0.00018868234954538894,
+      "loss": 0.1632,
+      "step": 3926
+    },
+    {
+      "epoch": 0.28334355496230024,
+      "grad_norm": 0.10178165137767792,
+      "learning_rate": 0.0001886794631259922,
+      "loss": 0.1971,
+      "step": 3927
+    },
+    {
+      "epoch": 0.2834157076373607,
+      "grad_norm": 0.1239686906337738,
+      "learning_rate": 0.0001886765767065955,
+      "loss": 0.1824,
+      "step": 3928
+    },
+    {
+      "epoch": 0.28348786031242107,
+      "grad_norm": 0.11629156023263931,
+      "learning_rate": 0.00018867369028719875,
+      "loss": 0.2027,
+      "step": 3929
+    },
+    {
+      "epoch": 0.2835600129874815,
+      "grad_norm": 0.1069471687078476,
+      "learning_rate": 0.00018867080386780202,
+      "loss": 0.2064,
+      "step": 3930
+    },
+    {
+      "epoch": 0.28363216566254196,
+      "grad_norm": 0.12007010728120804,
+      "learning_rate": 0.00018866791744840525,
+      "loss": 0.1326,
+      "step": 3931
+    },
+    {
+      "epoch": 0.28370431833760235,
+      "grad_norm": 0.0960017666220665,
+      "learning_rate": 0.00018866503102900852,
+      "loss": 0.1152,
+      "step": 3932
+    },
+    {
+      "epoch": 0.2837764710126628,
+      "grad_norm": 0.12916745245456696,
+      "learning_rate": 0.00018866214460961178,
+      "loss": 0.1586,
+      "step": 3933
+    },
+    {
+      "epoch": 0.28384862368772323,
+      "grad_norm": 0.10532090812921524,
+      "learning_rate": 0.00018865925819021504,
+      "loss": 0.1473,
+      "step": 3934
+    },
+    {
+      "epoch": 0.2839207763627837,
+      "grad_norm": 0.11855144798755646,
+      "learning_rate": 0.00018865637177081833,
+      "loss": 0.1472,
+      "step": 3935
+    },
+    {
+      "epoch": 0.28399292903784407,
+      "grad_norm": 0.09988211840391159,
+      "learning_rate": 0.00018865348535142157,
+      "loss": 0.1344,
+      "step": 3936
+    },
+    {
+      "epoch": 0.2840650817129045,
+      "grad_norm": 0.2618612051010132,
+      "learning_rate": 0.00018865059893202483,
+      "loss": 0.1355,
+      "step": 3937
+    },
+    {
+      "epoch": 0.28413723438796495,
+      "grad_norm": 0.1132231205701828,
+      "learning_rate": 0.0001886477125126281,
+      "loss": 0.1693,
+      "step": 3938
+    },
+    {
+      "epoch": 0.28420938706302534,
+      "grad_norm": 0.12109064310789108,
+      "learning_rate": 0.00018864482609323136,
+      "loss": 0.1847,
+      "step": 3939
+    },
+    {
+      "epoch": 0.2842815397380858,
+      "grad_norm": 0.13896405696868896,
+      "learning_rate": 0.00018864193967383462,
+      "loss": 0.1732,
+      "step": 3940
+    },
+    {
+      "epoch": 0.28435369241314623,
+      "grad_norm": 0.11479993909597397,
+      "learning_rate": 0.00018863905325443788,
+      "loss": 0.1557,
+      "step": 3941
+    },
+    {
+      "epoch": 0.2844258450882066,
+      "grad_norm": 0.10381763428449631,
+      "learning_rate": 0.00018863616683504114,
+      "loss": 0.1921,
+      "step": 3942
+    },
+    {
+      "epoch": 0.28449799776326706,
+      "grad_norm": 0.11239650100469589,
+      "learning_rate": 0.0001886332804156444,
+      "loss": 0.1338,
+      "step": 3943
+    },
+    {
+      "epoch": 0.2845701504383275,
+      "grad_norm": 0.11402308195829391,
+      "learning_rate": 0.00018863039399624767,
+      "loss": 0.1415,
+      "step": 3944
+    },
+    {
+      "epoch": 0.28464230311338795,
+      "grad_norm": 0.19553448259830475,
+      "learning_rate": 0.00018862750757685093,
+      "loss": 0.131,
+      "step": 3945
+    },
+    {
+      "epoch": 0.28471445578844834,
+      "grad_norm": 0.44547462463378906,
+      "learning_rate": 0.0001886246211574542,
+      "loss": 0.1869,
+      "step": 3946
+    },
+    {
+      "epoch": 0.2847866084635088,
+      "grad_norm": 0.4798113703727722,
+      "learning_rate": 0.00018862173473805743,
+      "loss": 0.1679,
+      "step": 3947
+    },
+    {
+      "epoch": 0.2848587611385692,
+      "grad_norm": 0.15806955099105835,
+      "learning_rate": 0.0001886188483186607,
+      "loss": 0.1831,
+      "step": 3948
+    },
+    {
+      "epoch": 0.2849309138136296,
+      "grad_norm": 0.24062255024909973,
+      "learning_rate": 0.00018861596189926399,
+      "loss": 0.1884,
+      "step": 3949
+    },
+    {
+      "epoch": 0.28500306648869006,
+      "grad_norm": 0.3865458071231842,
+      "learning_rate": 0.00018861307547986725,
+      "loss": 0.1539,
+      "step": 3950
+    },
+    {
+      "epoch": 0.2850752191637505,
+      "grad_norm": 0.41536572575569153,
+      "learning_rate": 0.0001886101890604705,
+      "loss": 0.12,
+      "step": 3951
+    },
+    {
+      "epoch": 0.28514737183881095,
+      "grad_norm": 0.26495084166526794,
+      "learning_rate": 0.00018860730264107375,
+      "loss": 0.1189,
+      "step": 3952
+    },
+    {
+      "epoch": 0.28521952451387134,
+      "grad_norm": 0.14461775124073029,
+      "learning_rate": 0.000188604416221677,
+      "loss": 0.1612,
+      "step": 3953
+    },
+    {
+      "epoch": 0.2852916771889318,
+      "grad_norm": 0.3562706410884857,
+      "learning_rate": 0.00018860152980228027,
+      "loss": 0.2264,
+      "step": 3954
+    },
+    {
+      "epoch": 0.2853638298639922,
+      "grad_norm": 0.1219395101070404,
+      "learning_rate": 0.00018859864338288354,
+      "loss": 0.1723,
+      "step": 3955
+    },
+    {
+      "epoch": 0.2854359825390526,
+      "grad_norm": 0.14860716462135315,
+      "learning_rate": 0.00018859575696348683,
+      "loss": 0.1644,
+      "step": 3956
+    },
+    {
+      "epoch": 0.28550813521411306,
+      "grad_norm": 0.5113199353218079,
+      "learning_rate": 0.00018859287054409006,
+      "loss": 0.1648,
+      "step": 3957
+    },
+    {
+      "epoch": 0.2855802878891735,
+      "grad_norm": 0.12189047038555145,
+      "learning_rate": 0.00018858998412469332,
+      "loss": 0.2147,
+      "step": 3958
+    },
+    {
+      "epoch": 0.28565244056423394,
+      "grad_norm": 0.10717660188674927,
+      "learning_rate": 0.0001885870977052966,
+      "loss": 0.157,
+      "step": 3959
+    },
+    {
+      "epoch": 0.28572459323929433,
+      "grad_norm": 0.18845148384571075,
+      "learning_rate": 0.00018858421128589985,
+      "loss": 0.1717,
+      "step": 3960
+    },
+    {
+      "epoch": 0.2857967459143548,
+      "grad_norm": 0.10378044098615646,
+      "learning_rate": 0.0001885813248665031,
+      "loss": 0.1588,
+      "step": 3961
+    },
+    {
+      "epoch": 0.2858688985894152,
+      "grad_norm": 0.11192762851715088,
+      "learning_rate": 0.00018857843844710638,
+      "loss": 0.186,
+      "step": 3962
+    },
+    {
+      "epoch": 0.2859410512644756,
+      "grad_norm": 0.10364487767219543,
+      "learning_rate": 0.00018857555202770964,
+      "loss": 0.1886,
+      "step": 3963
+    },
+    {
+      "epoch": 0.28601320393953605,
+      "grad_norm": 0.11991937458515167,
+      "learning_rate": 0.0001885726656083129,
+      "loss": 0.1597,
+      "step": 3964
+    },
+    {
+      "epoch": 0.2860853566145965,
+      "grad_norm": 0.11884426325559616,
+      "learning_rate": 0.00018856977918891616,
+      "loss": 0.1297,
+      "step": 3965
+    },
+    {
+      "epoch": 0.28615750928965694,
+      "grad_norm": 0.16260336339473724,
+      "learning_rate": 0.00018856689276951943,
+      "loss": 0.1478,
+      "step": 3966
+    },
+    {
+      "epoch": 0.28622966196471733,
+      "grad_norm": 0.12283408641815186,
+      "learning_rate": 0.0001885640063501227,
+      "loss": 0.1809,
+      "step": 3967
+    },
+    {
+      "epoch": 0.2863018146397778,
+      "grad_norm": 0.1470719277858734,
+      "learning_rate": 0.00018856111993072593,
+      "loss": 0.1567,
+      "step": 3968
+    },
+    {
+      "epoch": 0.2863739673148382,
+      "grad_norm": 0.13070467114448547,
+      "learning_rate": 0.0001885582335113292,
+      "loss": 0.1354,
+      "step": 3969
+    },
+    {
+      "epoch": 0.2864461199898986,
+      "grad_norm": 0.13749848306179047,
+      "learning_rate": 0.00018855534709193248,
+      "loss": 0.1292,
+      "step": 3970
+    },
+    {
+      "epoch": 0.28651827266495905,
+      "grad_norm": 0.09411299228668213,
+      "learning_rate": 0.00018855246067253574,
+      "loss": 0.1691,
+      "step": 3971
+    },
+    {
+      "epoch": 0.2865904253400195,
+      "grad_norm": 0.12227313220500946,
+      "learning_rate": 0.000188549574253139,
+      "loss": 0.1288,
+      "step": 3972
+    },
+    {
+      "epoch": 0.2866625780150799,
+      "grad_norm": 0.10804979503154755,
+      "learning_rate": 0.00018854668783374224,
+      "loss": 0.1803,
+      "step": 3973
+    },
+    {
+      "epoch": 0.2867347306901403,
+      "grad_norm": 0.10891221463680267,
+      "learning_rate": 0.0001885438014143455,
+      "loss": 0.1358,
+      "step": 3974
+    },
+    {
+      "epoch": 0.28680688336520077,
+      "grad_norm": 0.10148027539253235,
+      "learning_rate": 0.00018854091499494877,
+      "loss": 0.1354,
+      "step": 3975
+    },
+    {
+      "epoch": 0.2868790360402612,
+      "grad_norm": 0.11599002033472061,
+      "learning_rate": 0.00018853802857555203,
+      "loss": 0.1266,
+      "step": 3976
+    },
+    {
+      "epoch": 0.2869511887153216,
+      "grad_norm": 0.10220354050397873,
+      "learning_rate": 0.00018853514215615532,
+      "loss": 0.1739,
+      "step": 3977
+    },
+    {
+      "epoch": 0.28702334139038205,
+      "grad_norm": 0.13676996529102325,
+      "learning_rate": 0.00018853225573675856,
+      "loss": 0.1635,
+      "step": 3978
+    },
+    {
+      "epoch": 0.2870954940654425,
+      "grad_norm": 0.108549565076828,
+      "learning_rate": 0.00018852936931736182,
+      "loss": 0.1409,
+      "step": 3979
+    },
+    {
+      "epoch": 0.2871676467405029,
+      "grad_norm": 0.11575684696435928,
+      "learning_rate": 0.00018852648289796508,
+      "loss": 0.1629,
+      "step": 3980
+    },
+    {
+      "epoch": 0.2872397994155633,
+      "grad_norm": 0.10409197956323624,
+      "learning_rate": 0.00018852359647856834,
+      "loss": 0.1423,
+      "step": 3981
+    },
+    {
+      "epoch": 0.28731195209062377,
+      "grad_norm": 0.12796637415885925,
+      "learning_rate": 0.0001885207100591716,
+      "loss": 0.1709,
+      "step": 3982
+    },
+    {
+      "epoch": 0.2873841047656842,
+      "grad_norm": 0.13579745590686798,
+      "learning_rate": 0.00018851782363977487,
+      "loss": 0.1449,
+      "step": 3983
+    },
+    {
+      "epoch": 0.2874562574407446,
+      "grad_norm": 0.16057123243808746,
+      "learning_rate": 0.00018851493722037813,
+      "loss": 0.1708,
+      "step": 3984
+    },
+    {
+      "epoch": 0.28752841011580504,
+      "grad_norm": 0.12024898827075958,
+      "learning_rate": 0.0001885120508009814,
+      "loss": 0.1512,
+      "step": 3985
+    },
+    {
+      "epoch": 0.2876005627908655,
+      "grad_norm": 0.13466006517410278,
+      "learning_rate": 0.00018850916438158466,
+      "loss": 0.1547,
+      "step": 3986
+    },
+    {
+      "epoch": 0.2876727154659259,
+      "grad_norm": 0.2027030736207962,
+      "learning_rate": 0.00018850627796218792,
+      "loss": 0.2007,
+      "step": 3987
+    },
+    {
+      "epoch": 0.2877448681409863,
+      "grad_norm": 0.10973954200744629,
+      "learning_rate": 0.00018850339154279118,
+      "loss": 0.1728,
+      "step": 3988
+    },
+    {
+      "epoch": 0.28781702081604676,
+      "grad_norm": 0.10212751477956772,
+      "learning_rate": 0.00018850050512339442,
+      "loss": 0.203,
+      "step": 3989
+    },
+    {
+      "epoch": 0.2878891734911072,
+      "grad_norm": 0.10394073277711868,
+      "learning_rate": 0.00018849761870399768,
+      "loss": 0.195,
+      "step": 3990
+    },
+    {
+      "epoch": 0.2879613261661676,
+      "grad_norm": 0.15195779502391815,
+      "learning_rate": 0.00018849473228460097,
+      "loss": 0.1347,
+      "step": 3991
+    },
+    {
+      "epoch": 0.28803347884122804,
+      "grad_norm": 0.1137838140130043,
+      "learning_rate": 0.00018849184586520424,
+      "loss": 0.1627,
+      "step": 3992
+    },
+    {
+      "epoch": 0.2881056315162885,
+      "grad_norm": 1.0346198081970215,
+      "learning_rate": 0.0001884889594458075,
+      "loss": 0.1806,
+      "step": 3993
+    },
+    {
+      "epoch": 0.2881777841913489,
+      "grad_norm": 0.12991052865982056,
+      "learning_rate": 0.00018848607302641074,
+      "loss": 0.1341,
+      "step": 3994
+    },
+    {
+      "epoch": 0.2882499368664093,
+      "grad_norm": 0.11583830416202545,
+      "learning_rate": 0.000188483186607014,
+      "loss": 0.135,
+      "step": 3995
+    },
+    {
+      "epoch": 0.28832208954146976,
+      "grad_norm": 2.006385087966919,
+      "learning_rate": 0.00018848030018761726,
+      "loss": 0.1573,
+      "step": 3996
+    },
+    {
+      "epoch": 0.2883942422165302,
+      "grad_norm": 0.24502481520175934,
+      "learning_rate": 0.00018847741376822052,
+      "loss": 0.1416,
+      "step": 3997
+    },
+    {
+      "epoch": 0.2884663948915906,
+      "grad_norm": 0.6196821928024292,
+      "learning_rate": 0.00018847452734882381,
+      "loss": 0.1717,
+      "step": 3998
+    },
+    {
+      "epoch": 0.28853854756665104,
+      "grad_norm": 0.23879577219486237,
+      "learning_rate": 0.00018847164092942705,
+      "loss": 0.1534,
+      "step": 3999
+    },
+    {
+      "epoch": 0.2886107002417115,
+      "grad_norm": 0.13388881087303162,
+      "learning_rate": 0.0001884687545100303,
+      "loss": 0.1506,
+      "step": 4000
+    },
+    {
+      "epoch": 0.28868285291677187,
+      "grad_norm": 0.11309870332479477,
+      "learning_rate": 0.00018846586809063358,
+      "loss": 0.1596,
+      "step": 4001
+    },
+    {
+      "epoch": 0.2887550055918323,
+      "grad_norm": 0.12458118051290512,
+      "learning_rate": 0.00018846298167123684,
+      "loss": 0.1693,
+      "step": 4002
+    },
+    {
+      "epoch": 0.28882715826689276,
+      "grad_norm": 0.1627064347267151,
+      "learning_rate": 0.0001884600952518401,
+      "loss": 0.1911,
+      "step": 4003
+    },
+    {
+      "epoch": 0.28889931094195315,
+      "grad_norm": 0.13531793653964996,
+      "learning_rate": 0.00018845720883244336,
+      "loss": 0.1798,
+      "step": 4004
+    },
+    {
+      "epoch": 0.2889714636170136,
+      "grad_norm": 0.235118567943573,
+      "learning_rate": 0.00018845432241304663,
+      "loss": 0.1795,
+      "step": 4005
+    },
+    {
+      "epoch": 0.28904361629207403,
+      "grad_norm": 0.13340897858142853,
+      "learning_rate": 0.0001884514359936499,
+      "loss": 0.13,
+      "step": 4006
+    },
+    {
+      "epoch": 0.2891157689671345,
+      "grad_norm": 0.13717308640480042,
+      "learning_rate": 0.00018844854957425315,
+      "loss": 0.1247,
+      "step": 4007
+    },
+    {
+      "epoch": 0.28918792164219487,
+      "grad_norm": 0.12918154895305634,
+      "learning_rate": 0.00018844566315485642,
+      "loss": 0.1828,
+      "step": 4008
+    },
+    {
+      "epoch": 0.2892600743172553,
+      "grad_norm": 0.12834350764751434,
+      "learning_rate": 0.00018844277673545968,
+      "loss": 0.1492,
+      "step": 4009
+    },
+    {
+      "epoch": 0.28933222699231576,
+      "grad_norm": 0.1537393033504486,
+      "learning_rate": 0.00018843989031606291,
+      "loss": 0.1459,
+      "step": 4010
+    },
+    {
+      "epoch": 0.28940437966737614,
+      "grad_norm": 0.2367478758096695,
+      "learning_rate": 0.00018843700389666618,
+      "loss": 0.1588,
+      "step": 4011
+    },
+    {
+      "epoch": 0.2894765323424366,
+      "grad_norm": 0.1759425699710846,
+      "learning_rate": 0.00018843411747726947,
+      "loss": 0.1786,
+      "step": 4012
+    },
+    {
+      "epoch": 0.28954868501749703,
+      "grad_norm": 0.12869958579540253,
+      "learning_rate": 0.00018843123105787273,
+      "loss": 0.17,
+      "step": 4013
+    },
+    {
+      "epoch": 0.2896208376925575,
+      "grad_norm": 0.13942141830921173,
+      "learning_rate": 0.000188428344638476,
+      "loss": 0.1487,
+      "step": 4014
+    },
+    {
+      "epoch": 0.28969299036761786,
+      "grad_norm": 0.13664652407169342,
+      "learning_rate": 0.00018842545821907923,
+      "loss": 0.1811,
+      "step": 4015
+    },
+    {
+      "epoch": 0.2897651430426783,
+      "grad_norm": 0.15287500619888306,
+      "learning_rate": 0.0001884225717996825,
+      "loss": 0.2197,
+      "step": 4016
+    },
+    {
+      "epoch": 0.28983729571773875,
+      "grad_norm": 0.11458779126405716,
+      "learning_rate": 0.00018841968538028576,
+      "loss": 0.1378,
+      "step": 4017
+    },
+    {
+      "epoch": 0.28990944839279914,
+      "grad_norm": 0.1235150545835495,
+      "learning_rate": 0.00018841679896088902,
+      "loss": 0.156,
+      "step": 4018
+    },
+    {
+      "epoch": 0.2899816010678596,
+      "grad_norm": 0.17336517572402954,
+      "learning_rate": 0.00018841391254149228,
+      "loss": 0.159,
+      "step": 4019
+    },
+    {
+      "epoch": 0.29005375374292003,
+      "grad_norm": 0.1811176836490631,
+      "learning_rate": 0.00018841102612209554,
+      "loss": 0.1585,
+      "step": 4020
+    },
+    {
+      "epoch": 0.2901259064179805,
+      "grad_norm": 0.11761893332004547,
+      "learning_rate": 0.0001884081397026988,
+      "loss": 0.1588,
+      "step": 4021
+    },
+    {
+      "epoch": 0.29019805909304086,
+      "grad_norm": 0.12895290553569794,
+      "learning_rate": 0.00018840525328330207,
+      "loss": 0.1059,
+      "step": 4022
+    },
+    {
+      "epoch": 0.2902702117681013,
+      "grad_norm": 0.1484558880329132,
+      "learning_rate": 0.00018840236686390533,
+      "loss": 0.1516,
+      "step": 4023
+    },
+    {
+      "epoch": 0.29034236444316175,
+      "grad_norm": 0.11516424268484116,
+      "learning_rate": 0.0001883994804445086,
+      "loss": 0.1209,
+      "step": 4024
+    },
+    {
+      "epoch": 0.29041451711822214,
+      "grad_norm": 0.17815934121608734,
+      "learning_rate": 0.00018839659402511186,
+      "loss": 0.1315,
+      "step": 4025
+    },
+    {
+      "epoch": 0.2904866697932826,
+      "grad_norm": 0.12998010218143463,
+      "learning_rate": 0.00018839370760571512,
+      "loss": 0.1297,
+      "step": 4026
+    },
+    {
+      "epoch": 0.290558822468343,
+      "grad_norm": 0.15977595746517181,
+      "learning_rate": 0.00018839082118631838,
+      "loss": 0.1891,
+      "step": 4027
+    },
+    {
+      "epoch": 0.29063097514340347,
+      "grad_norm": 0.12049231678247452,
+      "learning_rate": 0.00018838793476692165,
+      "loss": 0.1191,
+      "step": 4028
+    },
+    {
+      "epoch": 0.29070312781846386,
+      "grad_norm": 0.14954523742198944,
+      "learning_rate": 0.0001883850483475249,
+      "loss": 0.1315,
+      "step": 4029
+    },
+    {
+      "epoch": 0.2907752804935243,
+      "grad_norm": 0.1778295636177063,
+      "learning_rate": 0.00018838216192812817,
+      "loss": 0.1761,
+      "step": 4030
+    },
+    {
+      "epoch": 0.29084743316858475,
+      "grad_norm": 0.1837385594844818,
+      "learning_rate": 0.00018837927550873144,
+      "loss": 0.168,
+      "step": 4031
+    },
+    {
+      "epoch": 0.29091958584364513,
+      "grad_norm": 0.12899182736873627,
+      "learning_rate": 0.00018837638908933467,
+      "loss": 0.1394,
+      "step": 4032
+    },
+    {
+      "epoch": 0.2909917385187056,
+      "grad_norm": 0.15641450881958008,
+      "learning_rate": 0.00018837350266993793,
+      "loss": 0.1565,
+      "step": 4033
+    },
+    {
+      "epoch": 0.291063891193766,
+      "grad_norm": 0.12071597576141357,
+      "learning_rate": 0.00018837061625054122,
+      "loss": 0.1602,
+      "step": 4034
+    },
+    {
+      "epoch": 0.2911360438688264,
+      "grad_norm": 0.12751080095767975,
+      "learning_rate": 0.0001883677298311445,
+      "loss": 0.1713,
+      "step": 4035
+    },
+    {
+      "epoch": 0.29120819654388685,
+      "grad_norm": 0.13059987127780914,
+      "learning_rate": 0.00018836484341174775,
+      "loss": 0.1678,
+      "step": 4036
+    },
+    {
+      "epoch": 0.2912803492189473,
+      "grad_norm": 0.11258640140295029,
+      "learning_rate": 0.000188361956992351,
+      "loss": 0.1422,
+      "step": 4037
+    },
+    {
+      "epoch": 0.29135250189400774,
+      "grad_norm": 0.12477847933769226,
+      "learning_rate": 0.00018835907057295425,
+      "loss": 0.1751,
+      "step": 4038
+    },
+    {
+      "epoch": 0.29142465456906813,
+      "grad_norm": 0.10548390448093414,
+      "learning_rate": 0.0001883561841535575,
+      "loss": 0.1278,
+      "step": 4039
+    },
+    {
+      "epoch": 0.2914968072441286,
+      "grad_norm": 0.11356746405363083,
+      "learning_rate": 0.00018835329773416078,
+      "loss": 0.186,
+      "step": 4040
+    },
+    {
+      "epoch": 0.291568959919189,
+      "grad_norm": 0.11771997064352036,
+      "learning_rate": 0.00018835041131476407,
+      "loss": 0.1914,
+      "step": 4041
+    },
+    {
+      "epoch": 0.2916411125942494,
+      "grad_norm": 0.10375703871250153,
+      "learning_rate": 0.0001883475248953673,
+      "loss": 0.1668,
+      "step": 4042
+    },
+    {
+      "epoch": 0.29171326526930985,
+      "grad_norm": 0.1297137439250946,
+      "learning_rate": 0.00018834463847597056,
+      "loss": 0.1757,
+      "step": 4043
+    },
+    {
+      "epoch": 0.2917854179443703,
+      "grad_norm": 0.122823566198349,
+      "learning_rate": 0.00018834175205657383,
+      "loss": 0.1312,
+      "step": 4044
+    },
+    {
+      "epoch": 0.29185757061943074,
+      "grad_norm": 0.11158803850412369,
+      "learning_rate": 0.0001883388656371771,
+      "loss": 0.1748,
+      "step": 4045
+    },
+    {
+      "epoch": 0.29192972329449113,
+      "grad_norm": 0.12954041361808777,
+      "learning_rate": 0.00018833597921778035,
+      "loss": 0.1422,
+      "step": 4046
+    },
+    {
+      "epoch": 0.29200187596955157,
+      "grad_norm": 0.11176861077547073,
+      "learning_rate": 0.00018833309279838362,
+      "loss": 0.1899,
+      "step": 4047
+    },
+    {
+      "epoch": 0.292074028644612,
+      "grad_norm": 0.13838274776935577,
+      "learning_rate": 0.00018833020637898688,
+      "loss": 0.1687,
+      "step": 4048
+    },
+    {
+      "epoch": 0.2921461813196724,
+      "grad_norm": 0.11651098728179932,
+      "learning_rate": 0.00018832731995959014,
+      "loss": 0.1347,
+      "step": 4049
+    },
+    {
+      "epoch": 0.29221833399473285,
+      "grad_norm": 0.1457776576280594,
+      "learning_rate": 0.0001883244335401934,
+      "loss": 0.1436,
+      "step": 4050
+    },
+    {
+      "epoch": 0.2922904866697933,
+      "grad_norm": 0.16298915445804596,
+      "learning_rate": 0.00018832154712079667,
+      "loss": 0.1437,
+      "step": 4051
+    },
+    {
+      "epoch": 0.29236263934485374,
+      "grad_norm": 0.12889905273914337,
+      "learning_rate": 0.00018831866070139993,
+      "loss": 0.1518,
+      "step": 4052
+    },
+    {
+      "epoch": 0.2924347920199141,
+      "grad_norm": 0.102033831179142,
+      "learning_rate": 0.00018831577428200317,
+      "loss": 0.1563,
+      "step": 4053
+    },
+    {
+      "epoch": 0.29250694469497457,
+      "grad_norm": 0.1378796100616455,
+      "learning_rate": 0.00018831288786260643,
+      "loss": 0.16,
+      "step": 4054
+    },
+    {
+      "epoch": 0.292579097370035,
+      "grad_norm": 0.12615390121936798,
+      "learning_rate": 0.00018831000144320972,
+      "loss": 0.1797,
+      "step": 4055
+    },
+    {
+      "epoch": 0.2926512500450954,
+      "grad_norm": 0.12225451320409775,
+      "learning_rate": 0.00018830711502381298,
+      "loss": 0.1335,
+      "step": 4056
+    },
+    {
+      "epoch": 0.29272340272015585,
+      "grad_norm": 0.1552281379699707,
+      "learning_rate": 0.00018830422860441625,
+      "loss": 0.1351,
+      "step": 4057
+    },
+    {
+      "epoch": 0.2927955553952163,
+      "grad_norm": 0.17103096842765808,
+      "learning_rate": 0.00018830134218501948,
+      "loss": 0.1857,
+      "step": 4058
+    },
+    {
+      "epoch": 0.29286770807027673,
+      "grad_norm": 0.14693373441696167,
+      "learning_rate": 0.00018829845576562274,
+      "loss": 0.1428,
+      "step": 4059
+    },
+    {
+      "epoch": 0.2929398607453371,
+      "grad_norm": 0.13027967512607574,
+      "learning_rate": 0.000188295569346226,
+      "loss": 0.1567,
+      "step": 4060
+    },
+    {
+      "epoch": 0.29301201342039757,
+      "grad_norm": 0.09253823757171631,
+      "learning_rate": 0.00018829268292682927,
+      "loss": 0.1417,
+      "step": 4061
+    },
+    {
+      "epoch": 0.293084166095458,
+      "grad_norm": 0.10826113075017929,
+      "learning_rate": 0.00018828979650743256,
+      "loss": 0.1642,
+      "step": 4062
+    },
+    {
+      "epoch": 0.2931563187705184,
+      "grad_norm": 0.12423907965421677,
+      "learning_rate": 0.0001882869100880358,
+      "loss": 0.159,
+      "step": 4063
+    },
+    {
+      "epoch": 0.29322847144557884,
+      "grad_norm": 0.10775986313819885,
+      "learning_rate": 0.00018828402366863906,
+      "loss": 0.1565,
+      "step": 4064
+    },
+    {
+      "epoch": 0.2933006241206393,
+      "grad_norm": 0.12115251272916794,
+      "learning_rate": 0.00018828113724924232,
+      "loss": 0.1536,
+      "step": 4065
+    },
+    {
+      "epoch": 0.2933727767956997,
+      "grad_norm": 0.10422796756029129,
+      "learning_rate": 0.00018827825082984558,
+      "loss": 0.1733,
+      "step": 4066
+    },
+    {
+      "epoch": 0.2934449294707601,
+      "grad_norm": 0.1318529099225998,
+      "learning_rate": 0.00018827536441044885,
+      "loss": 0.187,
+      "step": 4067
+    },
+    {
+      "epoch": 0.29351708214582056,
+      "grad_norm": 0.10229651629924774,
+      "learning_rate": 0.0001882724779910521,
+      "loss": 0.149,
+      "step": 4068
+    },
+    {
+      "epoch": 0.293589234820881,
+      "grad_norm": 0.17193888127803802,
+      "learning_rate": 0.00018826959157165537,
+      "loss": 0.2011,
+      "step": 4069
+    },
+    {
+      "epoch": 0.2936613874959414,
+      "grad_norm": 0.11080505698919296,
+      "learning_rate": 0.00018826670515225864,
+      "loss": 0.1973,
+      "step": 4070
+    },
+    {
+      "epoch": 0.29373354017100184,
+      "grad_norm": 0.164872944355011,
+      "learning_rate": 0.0001882638187328619,
+      "loss": 0.177,
+      "step": 4071
+    },
+    {
+      "epoch": 0.2938056928460623,
+      "grad_norm": 0.12402703613042831,
+      "learning_rate": 0.00018826093231346516,
+      "loss": 0.1304,
+      "step": 4072
+    },
+    {
+      "epoch": 0.29387784552112267,
+      "grad_norm": 0.12431920319795609,
+      "learning_rate": 0.00018825804589406842,
+      "loss": 0.1985,
+      "step": 4073
+    },
+    {
+      "epoch": 0.2939499981961831,
+      "grad_norm": 0.11424025893211365,
+      "learning_rate": 0.00018825515947467166,
+      "loss": 0.166,
+      "step": 4074
+    },
+    {
+      "epoch": 0.29402215087124356,
+      "grad_norm": 0.1481512039899826,
+      "learning_rate": 0.00018825227305527492,
+      "loss": 0.1308,
+      "step": 4075
+    },
+    {
+      "epoch": 0.294094303546304,
+      "grad_norm": 0.13233418762683868,
+      "learning_rate": 0.0001882493866358782,
+      "loss": 0.1528,
+      "step": 4076
+    },
+    {
+      "epoch": 0.2941664562213644,
+      "grad_norm": 0.1373155415058136,
+      "learning_rate": 0.00018824650021648148,
+      "loss": 0.1981,
+      "step": 4077
+    },
+    {
+      "epoch": 0.29423860889642484,
+      "grad_norm": 0.15324561297893524,
+      "learning_rate": 0.00018824361379708474,
+      "loss": 0.1932,
+      "step": 4078
+    },
+    {
+      "epoch": 0.2943107615714853,
+      "grad_norm": 0.10070636123418808,
+      "learning_rate": 0.00018824072737768798,
+      "loss": 0.1665,
+      "step": 4079
+    },
+    {
+      "epoch": 0.29438291424654567,
+      "grad_norm": 0.14961004257202148,
+      "learning_rate": 0.00018823784095829124,
+      "loss": 0.166,
+      "step": 4080
+    },
+    {
+      "epoch": 0.2944550669216061,
+      "grad_norm": 0.11874378472566605,
+      "learning_rate": 0.0001882349545388945,
+      "loss": 0.1582,
+      "step": 4081
+    },
+    {
+      "epoch": 0.29452721959666656,
+      "grad_norm": 0.11802510917186737,
+      "learning_rate": 0.00018823206811949776,
+      "loss": 0.1271,
+      "step": 4082
+    },
+    {
+      "epoch": 0.294599372271727,
+      "grad_norm": 0.1156727597117424,
+      "learning_rate": 0.00018822918170010105,
+      "loss": 0.1494,
+      "step": 4083
+    },
+    {
+      "epoch": 0.2946715249467874,
+      "grad_norm": 0.13092243671417236,
+      "learning_rate": 0.0001882262952807043,
+      "loss": 0.1736,
+      "step": 4084
+    },
+    {
+      "epoch": 0.29474367762184783,
+      "grad_norm": 0.1102990210056305,
+      "learning_rate": 0.00018822340886130755,
+      "loss": 0.1765,
+      "step": 4085
+    },
+    {
+      "epoch": 0.2948158302969083,
+      "grad_norm": 0.13617543876171112,
+      "learning_rate": 0.00018822052244191082,
+      "loss": 0.1995,
+      "step": 4086
+    },
+    {
+      "epoch": 0.29488798297196867,
+      "grad_norm": 0.17690233886241913,
+      "learning_rate": 0.00018821763602251408,
+      "loss": 0.1253,
+      "step": 4087
+    },
+    {
+      "epoch": 0.2949601356470291,
+      "grad_norm": 0.1378060132265091,
+      "learning_rate": 0.00018821474960311734,
+      "loss": 0.1225,
+      "step": 4088
+    },
+    {
+      "epoch": 0.29503228832208955,
+      "grad_norm": 0.10630947351455688,
+      "learning_rate": 0.0001882118631837206,
+      "loss": 0.0867,
+      "step": 4089
+    },
+    {
+      "epoch": 0.29510444099714994,
+      "grad_norm": 0.17138361930847168,
+      "learning_rate": 0.00018820897676432387,
+      "loss": 0.1546,
+      "step": 4090
+    },
+    {
+      "epoch": 0.2951765936722104,
+      "grad_norm": 0.1295410692691803,
+      "learning_rate": 0.00018820609034492713,
+      "loss": 0.194,
+      "step": 4091
+    },
+    {
+      "epoch": 0.29524874634727083,
+      "grad_norm": 0.13298285007476807,
+      "learning_rate": 0.0001882032039255304,
+      "loss": 0.1998,
+      "step": 4092
+    },
+    {
+      "epoch": 0.2953208990223313,
+      "grad_norm": 0.11028098315000534,
+      "learning_rate": 0.00018820031750613366,
+      "loss": 0.1298,
+      "step": 4093
+    },
+    {
+      "epoch": 0.29539305169739166,
+      "grad_norm": 0.11302671581506729,
+      "learning_rate": 0.00018819743108673692,
+      "loss": 0.107,
+      "step": 4094
+    },
+    {
+      "epoch": 0.2954652043724521,
+      "grad_norm": 0.12494118511676788,
+      "learning_rate": 0.00018819454466734015,
+      "loss": 0.1429,
+      "step": 4095
+    },
+    {
+      "epoch": 0.29553735704751255,
+      "grad_norm": 0.15362714231014252,
+      "learning_rate": 0.00018819165824794342,
+      "loss": 0.1242,
+      "step": 4096
+    },
+    {
+      "epoch": 0.29560950972257294,
+      "grad_norm": 0.12694497406482697,
+      "learning_rate": 0.0001881887718285467,
+      "loss": 0.1348,
+      "step": 4097
+    },
+    {
+      "epoch": 0.2956816623976334,
+      "grad_norm": 0.10499429702758789,
+      "learning_rate": 0.00018818588540914997,
+      "loss": 0.1069,
+      "step": 4098
+    },
+    {
+      "epoch": 0.2957538150726938,
+      "grad_norm": 0.11628948152065277,
+      "learning_rate": 0.00018818299898975323,
+      "loss": 0.1158,
+      "step": 4099
+    },
+    {
+      "epoch": 0.29582596774775427,
+      "grad_norm": 0.09497885406017303,
+      "learning_rate": 0.00018818011257035647,
+      "loss": 0.2063,
+      "step": 4100
+    },
+    {
+      "epoch": 0.29589812042281466,
+      "grad_norm": 0.11997506767511368,
+      "learning_rate": 0.00018817722615095973,
+      "loss": 0.1884,
+      "step": 4101
+    },
+    {
+      "epoch": 0.2959702730978751,
+      "grad_norm": 0.11558733880519867,
+      "learning_rate": 0.000188174339731563,
+      "loss": 0.1529,
+      "step": 4102
+    },
+    {
+      "epoch": 0.29604242577293555,
+      "grad_norm": 0.11531398445367813,
+      "learning_rate": 0.00018817145331216626,
+      "loss": 0.1241,
+      "step": 4103
+    },
+    {
+      "epoch": 0.29611457844799594,
+      "grad_norm": 0.11987558007240295,
+      "learning_rate": 0.00018816856689276955,
+      "loss": 0.1177,
+      "step": 4104
+    },
+    {
+      "epoch": 0.2961867311230564,
+      "grad_norm": 0.13128049671649933,
+      "learning_rate": 0.00018816568047337278,
+      "loss": 0.1799,
+      "step": 4105
+    },
+    {
+      "epoch": 0.2962588837981168,
+      "grad_norm": 0.1083531603217125,
+      "learning_rate": 0.00018816279405397605,
+      "loss": 0.1408,
+      "step": 4106
+    },
+    {
+      "epoch": 0.29633103647317727,
+      "grad_norm": 0.14563891291618347,
+      "learning_rate": 0.0001881599076345793,
+      "loss": 0.1904,
+      "step": 4107
+    },
+    {
+      "epoch": 0.29640318914823766,
+      "grad_norm": 0.1262306571006775,
+      "learning_rate": 0.00018815702121518257,
+      "loss": 0.175,
+      "step": 4108
+    },
+    {
+      "epoch": 0.2964753418232981,
+      "grad_norm": 0.11391209810972214,
+      "learning_rate": 0.00018815413479578584,
+      "loss": 0.1828,
+      "step": 4109
+    },
+    {
+      "epoch": 0.29654749449835854,
+      "grad_norm": 0.1466747671365738,
+      "learning_rate": 0.0001881512483763891,
+      "loss": 0.1566,
+      "step": 4110
+    },
+    {
+      "epoch": 0.29661964717341893,
+      "grad_norm": 0.09921132773160934,
+      "learning_rate": 0.00018814836195699236,
+      "loss": 0.2183,
+      "step": 4111
+    },
+    {
+      "epoch": 0.2966917998484794,
+      "grad_norm": 0.1014072522521019,
+      "learning_rate": 0.00018814547553759562,
+      "loss": 0.1362,
+      "step": 4112
+    },
+    {
+      "epoch": 0.2967639525235398,
+      "grad_norm": 0.1419476717710495,
+      "learning_rate": 0.0001881425891181989,
+      "loss": 0.1797,
+      "step": 4113
+    },
+    {
+      "epoch": 0.29683610519860026,
+      "grad_norm": 0.14102686941623688,
+      "learning_rate": 0.00018813970269880215,
+      "loss": 0.1651,
+      "step": 4114
+    },
+    {
+      "epoch": 0.29690825787366065,
+      "grad_norm": 0.11882190406322479,
+      "learning_rate": 0.0001881368162794054,
+      "loss": 0.1252,
+      "step": 4115
+    },
+    {
+      "epoch": 0.2969804105487211,
+      "grad_norm": 0.10842740535736084,
+      "learning_rate": 0.00018813392986000865,
+      "loss": 0.1866,
+      "step": 4116
+    },
+    {
+      "epoch": 0.29705256322378154,
+      "grad_norm": 0.12704412639141083,
+      "learning_rate": 0.0001881310434406119,
+      "loss": 0.1764,
+      "step": 4117
+    },
+    {
+      "epoch": 0.29712471589884193,
+      "grad_norm": 0.09052251279354095,
+      "learning_rate": 0.0001881281570212152,
+      "loss": 0.1465,
+      "step": 4118
+    },
+    {
+      "epoch": 0.2971968685739024,
+      "grad_norm": 0.10798463970422745,
+      "learning_rate": 0.00018812527060181846,
+      "loss": 0.1193,
+      "step": 4119
+    },
+    {
+      "epoch": 0.2972690212489628,
+      "grad_norm": 0.149445578455925,
+      "learning_rate": 0.00018812238418242173,
+      "loss": 0.1578,
+      "step": 4120
+    },
+    {
+      "epoch": 0.2973411739240232,
+      "grad_norm": 0.10103411972522736,
+      "learning_rate": 0.00018811949776302496,
+      "loss": 0.1184,
+      "step": 4121
+    },
+    {
+      "epoch": 0.29741332659908365,
+      "grad_norm": 0.14422334730625153,
+      "learning_rate": 0.00018811661134362823,
+      "loss": 0.1892,
+      "step": 4122
+    },
+    {
+      "epoch": 0.2974854792741441,
+      "grad_norm": 0.14310158789157867,
+      "learning_rate": 0.0001881137249242315,
+      "loss": 0.1733,
+      "step": 4123
+    },
+    {
+      "epoch": 0.29755763194920454,
+      "grad_norm": 0.1744057536125183,
+      "learning_rate": 0.00018811083850483475,
+      "loss": 0.1628,
+      "step": 4124
+    },
+    {
+      "epoch": 0.2976297846242649,
+      "grad_norm": 0.16436494886875153,
+      "learning_rate": 0.00018810795208543804,
+      "loss": 0.1426,
+      "step": 4125
+    },
+    {
+      "epoch": 0.29770193729932537,
+      "grad_norm": 0.10014980286359787,
+      "learning_rate": 0.00018810506566604128,
+      "loss": 0.1185,
+      "step": 4126
+    },
+    {
+      "epoch": 0.2977740899743858,
+      "grad_norm": 0.12728647887706757,
+      "learning_rate": 0.00018810217924664454,
+      "loss": 0.1495,
+      "step": 4127
+    },
+    {
+      "epoch": 0.2978462426494462,
+      "grad_norm": 0.09811389446258545,
+      "learning_rate": 0.0001880992928272478,
+      "loss": 0.1469,
+      "step": 4128
+    },
+    {
+      "epoch": 0.29791839532450665,
+      "grad_norm": 0.10743153095245361,
+      "learning_rate": 0.00018809640640785107,
+      "loss": 0.1497,
+      "step": 4129
+    },
+    {
+      "epoch": 0.2979905479995671,
+      "grad_norm": 0.11010617017745972,
+      "learning_rate": 0.00018809351998845433,
+      "loss": 0.1747,
+      "step": 4130
+    },
+    {
+      "epoch": 0.29806270067462753,
+      "grad_norm": 0.11767455190420151,
+      "learning_rate": 0.0001880906335690576,
+      "loss": 0.1553,
+      "step": 4131
+    },
+    {
+      "epoch": 0.2981348533496879,
+      "grad_norm": 0.13255800306797028,
+      "learning_rate": 0.00018808774714966086,
+      "loss": 0.1649,
+      "step": 4132
+    },
+    {
+      "epoch": 0.29820700602474837,
+      "grad_norm": 0.10767663270235062,
+      "learning_rate": 0.00018808486073026412,
+      "loss": 0.2007,
+      "step": 4133
+    },
+    {
+      "epoch": 0.2982791586998088,
+      "grad_norm": 0.11623447388410568,
+      "learning_rate": 0.00018808197431086738,
+      "loss": 0.1141,
+      "step": 4134
+    },
+    {
+      "epoch": 0.2983513113748692,
+      "grad_norm": 0.12203383445739746,
+      "learning_rate": 0.00018807908789147064,
+      "loss": 0.1472,
+      "step": 4135
+    },
+    {
+      "epoch": 0.29842346404992964,
+      "grad_norm": 0.1434355527162552,
+      "learning_rate": 0.0001880762014720739,
+      "loss": 0.2322,
+      "step": 4136
+    },
+    {
+      "epoch": 0.2984956167249901,
+      "grad_norm": 0.11646952480077744,
+      "learning_rate": 0.00018807331505267717,
+      "loss": 0.1597,
+      "step": 4137
+    },
+    {
+      "epoch": 0.29856776940005053,
+      "grad_norm": 0.12211567163467407,
+      "learning_rate": 0.0001880704286332804,
+      "loss": 0.1774,
+      "step": 4138
+    },
+    {
+      "epoch": 0.2986399220751109,
+      "grad_norm": 0.09796212613582611,
+      "learning_rate": 0.0001880675422138837,
+      "loss": 0.1474,
+      "step": 4139
+    },
+    {
+      "epoch": 0.29871207475017136,
+      "grad_norm": 0.11343994736671448,
+      "learning_rate": 0.00018806465579448696,
+      "loss": 0.1741,
+      "step": 4140
+    },
+    {
+      "epoch": 0.2987842274252318,
+      "grad_norm": 0.10891371220350266,
+      "learning_rate": 0.00018806176937509022,
+      "loss": 0.1269,
+      "step": 4141
+    },
+    {
+      "epoch": 0.2988563801002922,
+      "grad_norm": 0.1293962597846985,
+      "learning_rate": 0.00018805888295569348,
+      "loss": 0.2021,
+      "step": 4142
+    },
+    {
+      "epoch": 0.29892853277535264,
+      "grad_norm": 0.10513212531805038,
+      "learning_rate": 0.00018805599653629672,
+      "loss": 0.1557,
+      "step": 4143
+    },
+    {
+      "epoch": 0.2990006854504131,
+      "grad_norm": 0.10921186208724976,
+      "learning_rate": 0.00018805311011689998,
+      "loss": 0.1242,
+      "step": 4144
+    },
+    {
+      "epoch": 0.29907283812547353,
+      "grad_norm": 0.11698474735021591,
+      "learning_rate": 0.00018805022369750325,
+      "loss": 0.1721,
+      "step": 4145
+    },
+    {
+      "epoch": 0.2991449908005339,
+      "grad_norm": 0.10917928814888,
+      "learning_rate": 0.00018804733727810654,
+      "loss": 0.1267,
+      "step": 4146
+    },
+    {
+      "epoch": 0.29921714347559436,
+      "grad_norm": 0.10929599404335022,
+      "learning_rate": 0.0001880444508587098,
+      "loss": 0.1566,
+      "step": 4147
+    },
+    {
+      "epoch": 0.2992892961506548,
+      "grad_norm": 0.12578366696834564,
+      "learning_rate": 0.00018804156443931304,
+      "loss": 0.1473,
+      "step": 4148
+    },
+    {
+      "epoch": 0.2993614488257152,
+      "grad_norm": 0.11903616040945053,
+      "learning_rate": 0.0001880386780199163,
+      "loss": 0.1594,
+      "step": 4149
+    },
+    {
+      "epoch": 0.29943360150077564,
+      "grad_norm": 0.1281002014875412,
+      "learning_rate": 0.00018803579160051956,
+      "loss": 0.1956,
+      "step": 4150
+    },
+    {
+      "epoch": 0.2995057541758361,
+      "grad_norm": 0.1199919581413269,
+      "learning_rate": 0.00018803290518112282,
+      "loss": 0.1469,
+      "step": 4151
+    },
+    {
+      "epoch": 0.29957790685089647,
+      "grad_norm": 0.11935439705848694,
+      "learning_rate": 0.0001880300187617261,
+      "loss": 0.1396,
+      "step": 4152
+    },
+    {
+      "epoch": 0.2996500595259569,
+      "grad_norm": 0.11172063648700714,
+      "learning_rate": 0.00018802713234232935,
+      "loss": 0.147,
+      "step": 4153
+    },
+    {
+      "epoch": 0.29972221220101736,
+      "grad_norm": 0.11805318295955658,
+      "learning_rate": 0.0001880242459229326,
+      "loss": 0.1475,
+      "step": 4154
+    },
+    {
+      "epoch": 0.2997943648760778,
+      "grad_norm": 0.12204427272081375,
+      "learning_rate": 0.00018802135950353588,
+      "loss": 0.1779,
+      "step": 4155
+    },
+    {
+      "epoch": 0.2998665175511382,
+      "grad_norm": 0.10106804221868515,
+      "learning_rate": 0.00018801847308413914,
+      "loss": 0.1628,
+      "step": 4156
+    },
+    {
+      "epoch": 0.29993867022619863,
+      "grad_norm": 0.1267825812101364,
+      "learning_rate": 0.0001880155866647424,
+      "loss": 0.1184,
+      "step": 4157
+    },
+    {
+      "epoch": 0.3000108229012591,
+      "grad_norm": 0.11733009666204453,
+      "learning_rate": 0.00018801270024534566,
+      "loss": 0.1502,
+      "step": 4158
+    },
+    {
+      "epoch": 0.30008297557631947,
+      "grad_norm": 0.13267752528190613,
+      "learning_rate": 0.0001880098138259489,
+      "loss": 0.1694,
+      "step": 4159
+    },
+    {
+      "epoch": 0.3001551282513799,
+      "grad_norm": 0.14454156160354614,
+      "learning_rate": 0.0001880069274065522,
+      "loss": 0.1879,
+      "step": 4160
+    },
+    {
+      "epoch": 0.30022728092644035,
+      "grad_norm": 0.10851837694644928,
+      "learning_rate": 0.00018800404098715545,
+      "loss": 0.1671,
+      "step": 4161
+    },
+    {
+      "epoch": 0.3002994336015008,
+      "grad_norm": 0.12852200865745544,
+      "learning_rate": 0.00018800115456775872,
+      "loss": 0.1832,
+      "step": 4162
+    },
+    {
+      "epoch": 0.3003715862765612,
+      "grad_norm": 0.11237096041440964,
+      "learning_rate": 0.00018799826814836198,
+      "loss": 0.1271,
+      "step": 4163
+    },
+    {
+      "epoch": 0.30044373895162163,
+      "grad_norm": 0.11820393800735474,
+      "learning_rate": 0.00018799538172896522,
+      "loss": 0.1859,
+      "step": 4164
+    },
+    {
+      "epoch": 0.3005158916266821,
+      "grad_norm": 0.13072800636291504,
+      "learning_rate": 0.00018799249530956848,
+      "loss": 0.2141,
+      "step": 4165
+    },
+    {
+      "epoch": 0.30058804430174246,
+      "grad_norm": 0.11412015557289124,
+      "learning_rate": 0.00018798960889017174,
+      "loss": 0.1604,
+      "step": 4166
+    },
+    {
+      "epoch": 0.3006601969768029,
+      "grad_norm": 0.10667741298675537,
+      "learning_rate": 0.00018798672247077503,
+      "loss": 0.1529,
+      "step": 4167
+    },
+    {
+      "epoch": 0.30073234965186335,
+      "grad_norm": 0.1238722950220108,
+      "learning_rate": 0.0001879838360513783,
+      "loss": 0.1632,
+      "step": 4168
+    },
+    {
+      "epoch": 0.3008045023269238,
+      "grad_norm": 0.1128937378525734,
+      "learning_rate": 0.00018798094963198153,
+      "loss": 0.1618,
+      "step": 4169
+    },
+    {
+      "epoch": 0.3008766550019842,
+      "grad_norm": 0.09414263069629669,
+      "learning_rate": 0.0001879780632125848,
+      "loss": 0.1546,
+      "step": 4170
+    },
+    {
+      "epoch": 0.3009488076770446,
+      "grad_norm": 0.12279564142227173,
+      "learning_rate": 0.00018797517679318806,
+      "loss": 0.1514,
+      "step": 4171
+    },
+    {
+      "epoch": 0.30102096035210507,
+      "grad_norm": 0.11160296201705933,
+      "learning_rate": 0.00018797229037379132,
+      "loss": 0.1497,
+      "step": 4172
+    },
+    {
+      "epoch": 0.30109311302716546,
+      "grad_norm": 0.12374529242515564,
+      "learning_rate": 0.00018796940395439458,
+      "loss": 0.2017,
+      "step": 4173
+    },
+    {
+      "epoch": 0.3011652657022259,
+      "grad_norm": 0.10067132115364075,
+      "learning_rate": 0.00018796651753499784,
+      "loss": 0.1761,
+      "step": 4174
+    },
+    {
+      "epoch": 0.30123741837728635,
+      "grad_norm": 0.15694798529148102,
+      "learning_rate": 0.0001879636311156011,
+      "loss": 0.1506,
+      "step": 4175
+    },
+    {
+      "epoch": 0.3013095710523468,
+      "grad_norm": 0.09987813234329224,
+      "learning_rate": 0.00018796074469620437,
+      "loss": 0.1449,
+      "step": 4176
+    },
+    {
+      "epoch": 0.3013817237274072,
+      "grad_norm": 0.11210080236196518,
+      "learning_rate": 0.00018795785827680763,
+      "loss": 0.1088,
+      "step": 4177
+    },
+    {
+      "epoch": 0.3014538764024676,
+      "grad_norm": 0.14822888374328613,
+      "learning_rate": 0.0001879549718574109,
+      "loss": 0.1419,
+      "step": 4178
+    },
+    {
+      "epoch": 0.30152602907752807,
+      "grad_norm": 0.1467704325914383,
+      "learning_rate": 0.00018795208543801416,
+      "loss": 0.1969,
+      "step": 4179
+    },
+    {
+      "epoch": 0.30159818175258846,
+      "grad_norm": 0.11436786502599716,
+      "learning_rate": 0.0001879491990186174,
+      "loss": 0.1627,
+      "step": 4180
+    },
+    {
+      "epoch": 0.3016703344276489,
+      "grad_norm": 0.13047438859939575,
+      "learning_rate": 0.00018794631259922068,
+      "loss": 0.134,
+      "step": 4181
+    },
+    {
+      "epoch": 0.30174248710270934,
+      "grad_norm": 0.12848497927188873,
+      "learning_rate": 0.00018794342617982395,
+      "loss": 0.1177,
+      "step": 4182
+    },
+    {
+      "epoch": 0.30181463977776973,
+      "grad_norm": 0.12149526923894882,
+      "learning_rate": 0.0001879405397604272,
+      "loss": 0.2049,
+      "step": 4183
+    },
+    {
+      "epoch": 0.3018867924528302,
+      "grad_norm": 0.130044087767601,
+      "learning_rate": 0.00018793765334103047,
+      "loss": 0.1751,
+      "step": 4184
+    },
+    {
+      "epoch": 0.3019589451278906,
+      "grad_norm": 0.12724927067756653,
+      "learning_rate": 0.0001879347669216337,
+      "loss": 0.1456,
+      "step": 4185
+    },
+    {
+      "epoch": 0.30203109780295107,
+      "grad_norm": 0.09408117085695267,
+      "learning_rate": 0.00018793188050223697,
+      "loss": 0.1419,
+      "step": 4186
+    },
+    {
+      "epoch": 0.30210325047801145,
+      "grad_norm": 0.13772109150886536,
+      "learning_rate": 0.00018792899408284024,
+      "loss": 0.1553,
+      "step": 4187
+    },
+    {
+      "epoch": 0.3021754031530719,
+      "grad_norm": 0.11674950271844864,
+      "learning_rate": 0.00018792610766344353,
+      "loss": 0.152,
+      "step": 4188
+    },
+    {
+      "epoch": 0.30224755582813234,
+      "grad_norm": 0.1148289367556572,
+      "learning_rate": 0.0001879232212440468,
+      "loss": 0.1891,
+      "step": 4189
+    },
+    {
+      "epoch": 0.30231970850319273,
+      "grad_norm": 0.14175260066986084,
+      "learning_rate": 0.00018792033482465002,
+      "loss": 0.1409,
+      "step": 4190
+    },
+    {
+      "epoch": 0.3023918611782532,
+      "grad_norm": 0.09403803199529648,
+      "learning_rate": 0.0001879174484052533,
+      "loss": 0.1584,
+      "step": 4191
+    },
+    {
+      "epoch": 0.3024640138533136,
+      "grad_norm": 0.13788004219532013,
+      "learning_rate": 0.00018791456198585655,
+      "loss": 0.1308,
+      "step": 4192
+    },
+    {
+      "epoch": 0.30253616652837406,
+      "grad_norm": 0.0977247878909111,
+      "learning_rate": 0.0001879116755664598,
+      "loss": 0.1536,
+      "step": 4193
+    },
+    {
+      "epoch": 0.30260831920343445,
+      "grad_norm": 0.1072482168674469,
+      "learning_rate": 0.00018790878914706308,
+      "loss": 0.2268,
+      "step": 4194
+    },
+    {
+      "epoch": 0.3026804718784949,
+      "grad_norm": 0.115860216319561,
+      "learning_rate": 0.00018790590272766634,
+      "loss": 0.1732,
+      "step": 4195
+    },
+    {
+      "epoch": 0.30275262455355534,
+      "grad_norm": 0.11402580887079239,
+      "learning_rate": 0.0001879030163082696,
+      "loss": 0.1448,
+      "step": 4196
+    },
+    {
+      "epoch": 0.3028247772286157,
+      "grad_norm": 0.1256372034549713,
+      "learning_rate": 0.00018790012988887286,
+      "loss": 0.1335,
+      "step": 4197
+    },
+    {
+      "epoch": 0.30289692990367617,
+      "grad_norm": 0.11045928299427032,
+      "learning_rate": 0.00018789724346947613,
+      "loss": 0.1777,
+      "step": 4198
+    },
+    {
+      "epoch": 0.3029690825787366,
+      "grad_norm": 0.10243780165910721,
+      "learning_rate": 0.0001878943570500794,
+      "loss": 0.1781,
+      "step": 4199
+    },
+    {
+      "epoch": 0.30304123525379706,
+      "grad_norm": 0.09597732871770859,
+      "learning_rate": 0.00018789147063068265,
+      "loss": 0.1631,
+      "step": 4200
+    },
+    {
+      "epoch": 0.30311338792885745,
+      "grad_norm": 0.11148419231176376,
+      "learning_rate": 0.0001878885842112859,
+      "loss": 0.1736,
+      "step": 4201
+    },
+    {
+      "epoch": 0.3031855406039179,
+      "grad_norm": 0.11032175272703171,
+      "learning_rate": 0.00018788569779188918,
+      "loss": 0.1119,
+      "step": 4202
+    },
+    {
+      "epoch": 0.30325769327897834,
+      "grad_norm": 0.09522320330142975,
+      "learning_rate": 0.00018788281137249244,
+      "loss": 0.1259,
+      "step": 4203
+    },
+    {
+      "epoch": 0.3033298459540387,
+      "grad_norm": 0.11687321960926056,
+      "learning_rate": 0.0001878799249530957,
+      "loss": 0.1904,
+      "step": 4204
+    },
+    {
+      "epoch": 0.30340199862909917,
+      "grad_norm": 0.10842274129390717,
+      "learning_rate": 0.00018787703853369897,
+      "loss": 0.1194,
+      "step": 4205
+    },
+    {
+      "epoch": 0.3034741513041596,
+      "grad_norm": 0.11892799288034439,
+      "learning_rate": 0.0001878741521143022,
+      "loss": 0.2106,
+      "step": 4206
+    },
+    {
+      "epoch": 0.30354630397922006,
+      "grad_norm": 0.10375183075666428,
+      "learning_rate": 0.00018787126569490547,
+      "loss": 0.136,
+      "step": 4207
+    },
+    {
+      "epoch": 0.30361845665428044,
+      "grad_norm": 0.14736315608024597,
+      "learning_rate": 0.00018786837927550873,
+      "loss": 0.18,
+      "step": 4208
+    },
+    {
+      "epoch": 0.3036906093293409,
+      "grad_norm": 0.1258639693260193,
+      "learning_rate": 0.00018786549285611202,
+      "loss": 0.1328,
+      "step": 4209
+    },
+    {
+      "epoch": 0.30376276200440133,
+      "grad_norm": 0.13408122956752777,
+      "learning_rate": 0.00018786260643671528,
+      "loss": 0.122,
+      "step": 4210
+    },
+    {
+      "epoch": 0.3038349146794617,
+      "grad_norm": 0.1173291951417923,
+      "learning_rate": 0.00018785972001731852,
+      "loss": 0.1513,
+      "step": 4211
+    },
+    {
+      "epoch": 0.30390706735452216,
+      "grad_norm": 0.10346346348524094,
+      "learning_rate": 0.00018785683359792178,
+      "loss": 0.1413,
+      "step": 4212
+    },
+    {
+      "epoch": 0.3039792200295826,
+      "grad_norm": 0.21302101016044617,
+      "learning_rate": 0.00018785394717852504,
+      "loss": 0.1854,
+      "step": 4213
+    },
+    {
+      "epoch": 0.304051372704643,
+      "grad_norm": 0.16414475440979004,
+      "learning_rate": 0.0001878510607591283,
+      "loss": 0.182,
+      "step": 4214
+    },
+    {
+      "epoch": 0.30412352537970344,
+      "grad_norm": 0.1431964635848999,
+      "learning_rate": 0.00018784817433973157,
+      "loss": 0.1724,
+      "step": 4215
+    },
+    {
+      "epoch": 0.3041956780547639,
+      "grad_norm": 0.11071380227804184,
+      "learning_rate": 0.00018784528792033483,
+      "loss": 0.1084,
+      "step": 4216
+    },
+    {
+      "epoch": 0.30426783072982433,
+      "grad_norm": 0.10251831263303757,
+      "learning_rate": 0.0001878424015009381,
+      "loss": 0.131,
+      "step": 4217
+    },
+    {
+      "epoch": 0.3043399834048847,
+      "grad_norm": 0.1020776629447937,
+      "learning_rate": 0.00018783951508154136,
+      "loss": 0.1733,
+      "step": 4218
+    },
+    {
+      "epoch": 0.30441213607994516,
+      "grad_norm": 0.10754634439945221,
+      "learning_rate": 0.00018783662866214462,
+      "loss": 0.1544,
+      "step": 4219
+    },
+    {
+      "epoch": 0.3044842887550056,
+      "grad_norm": 0.12796087563037872,
+      "learning_rate": 0.00018783374224274788,
+      "loss": 0.1478,
+      "step": 4220
+    },
+    {
+      "epoch": 0.304556441430066,
+      "grad_norm": 0.10638104379177094,
+      "learning_rate": 0.00018783085582335115,
+      "loss": 0.1373,
+      "step": 4221
+    },
+    {
+      "epoch": 0.30462859410512644,
+      "grad_norm": 0.09254088252782822,
+      "learning_rate": 0.00018782796940395438,
+      "loss": 0.2043,
+      "step": 4222
+    },
+    {
+      "epoch": 0.3047007467801869,
+      "grad_norm": 0.144930899143219,
+      "learning_rate": 0.00018782508298455765,
+      "loss": 0.1252,
+      "step": 4223
+    },
+    {
+      "epoch": 0.3047728994552473,
+      "grad_norm": 0.0938224047422409,
+      "learning_rate": 0.00018782219656516094,
+      "loss": 0.1678,
+      "step": 4224
+    },
+    {
+      "epoch": 0.3048450521303077,
+      "grad_norm": 0.11682211607694626,
+      "learning_rate": 0.0001878193101457642,
+      "loss": 0.1498,
+      "step": 4225
+    },
+    {
+      "epoch": 0.30491720480536816,
+      "grad_norm": 0.10960418730974197,
+      "learning_rate": 0.00018781642372636746,
+      "loss": 0.1359,
+      "step": 4226
+    },
+    {
+      "epoch": 0.3049893574804286,
+      "grad_norm": 0.12347632646560669,
+      "learning_rate": 0.0001878135373069707,
+      "loss": 0.1275,
+      "step": 4227
+    },
+    {
+      "epoch": 0.305061510155489,
+      "grad_norm": 0.14290280640125275,
+      "learning_rate": 0.00018781065088757396,
+      "loss": 0.1448,
+      "step": 4228
+    },
+    {
+      "epoch": 0.30513366283054943,
+      "grad_norm": 0.1424276977777481,
+      "learning_rate": 0.00018780776446817722,
+      "loss": 0.1475,
+      "step": 4229
+    },
+    {
+      "epoch": 0.3052058155056099,
+      "grad_norm": 0.11387716233730316,
+      "learning_rate": 0.0001878048780487805,
+      "loss": 0.1347,
+      "step": 4230
+    },
+    {
+      "epoch": 0.3052779681806703,
+      "grad_norm": 0.10514809191226959,
+      "learning_rate": 0.00018780199162938378,
+      "loss": 0.1378,
+      "step": 4231
+    },
+    {
+      "epoch": 0.3053501208557307,
+      "grad_norm": 0.15583384037017822,
+      "learning_rate": 0.000187799105209987,
+      "loss": 0.1515,
+      "step": 4232
+    },
+    {
+      "epoch": 0.30542227353079116,
+      "grad_norm": 0.11437677592039108,
+      "learning_rate": 0.00018779621879059028,
+      "loss": 0.184,
+      "step": 4233
+    },
+    {
+      "epoch": 0.3054944262058516,
+      "grad_norm": 0.10012994706630707,
+      "learning_rate": 0.00018779333237119354,
+      "loss": 0.1504,
+      "step": 4234
+    },
+    {
+      "epoch": 0.305566578880912,
+      "grad_norm": 0.11126403510570526,
+      "learning_rate": 0.0001877904459517968,
+      "loss": 0.1747,
+      "step": 4235
+    },
+    {
+      "epoch": 0.30563873155597243,
+      "grad_norm": 0.1091986745595932,
+      "learning_rate": 0.00018778755953240006,
+      "loss": 0.1769,
+      "step": 4236
+    },
+    {
+      "epoch": 0.3057108842310329,
+      "grad_norm": 0.09372757375240326,
+      "learning_rate": 0.00018778467311300333,
+      "loss": 0.1175,
+      "step": 4237
+    },
+    {
+      "epoch": 0.3057830369060933,
+      "grad_norm": 0.08955243974924088,
+      "learning_rate": 0.0001877817866936066,
+      "loss": 0.1118,
+      "step": 4238
+    },
+    {
+      "epoch": 0.3058551895811537,
+      "grad_norm": 0.12780213356018066,
+      "learning_rate": 0.00018777890027420985,
+      "loss": 0.1612,
+      "step": 4239
+    },
+    {
+      "epoch": 0.30592734225621415,
+      "grad_norm": 0.12503564357757568,
+      "learning_rate": 0.00018777601385481312,
+      "loss": 0.1476,
+      "step": 4240
+    },
+    {
+      "epoch": 0.3059994949312746,
+      "grad_norm": 0.12395293265581131,
+      "learning_rate": 0.00018777312743541638,
+      "loss": 0.1497,
+      "step": 4241
+    },
+    {
+      "epoch": 0.306071647606335,
+      "grad_norm": 0.13468153774738312,
+      "learning_rate": 0.00018777024101601964,
+      "loss": 0.1438,
+      "step": 4242
+    },
+    {
+      "epoch": 0.30614380028139543,
+      "grad_norm": 0.12461975961923599,
+      "learning_rate": 0.00018776735459662288,
+      "loss": 0.1979,
+      "step": 4243
+    },
+    {
+      "epoch": 0.3062159529564559,
+      "grad_norm": 0.1354757845401764,
+      "learning_rate": 0.00018776446817722614,
+      "loss": 0.1441,
+      "step": 4244
+    },
+    {
+      "epoch": 0.30628810563151626,
+      "grad_norm": 0.12288720160722733,
+      "learning_rate": 0.00018776158175782943,
+      "loss": 0.1898,
+      "step": 4245
+    },
+    {
+      "epoch": 0.3063602583065767,
+      "grad_norm": 0.13993801176548004,
+      "learning_rate": 0.0001877586953384327,
+      "loss": 0.1258,
+      "step": 4246
+    },
+    {
+      "epoch": 0.30643241098163715,
+      "grad_norm": 0.1167026087641716,
+      "learning_rate": 0.00018775580891903596,
+      "loss": 0.1559,
+      "step": 4247
+    },
+    {
+      "epoch": 0.3065045636566976,
+      "grad_norm": 0.12541060149669647,
+      "learning_rate": 0.0001877529224996392,
+      "loss": 0.1728,
+      "step": 4248
+    },
+    {
+      "epoch": 0.306576716331758,
+      "grad_norm": 0.1256195604801178,
+      "learning_rate": 0.00018775003608024245,
+      "loss": 0.1408,
+      "step": 4249
+    },
+    {
+      "epoch": 0.3066488690068184,
+      "grad_norm": 0.14515994489192963,
+      "learning_rate": 0.00018774714966084572,
+      "loss": 0.1522,
+      "step": 4250
+    },
+    {
+      "epoch": 0.30672102168187887,
+      "grad_norm": 0.10798481106758118,
+      "learning_rate": 0.00018774426324144898,
+      "loss": 0.1522,
+      "step": 4251
+    },
+    {
+      "epoch": 0.30679317435693926,
+      "grad_norm": 0.11664534360170364,
+      "learning_rate": 0.00018774137682205227,
+      "loss": 0.1785,
+      "step": 4252
+    },
+    {
+      "epoch": 0.3068653270319997,
+      "grad_norm": 0.10946546494960785,
+      "learning_rate": 0.00018773849040265553,
+      "loss": 0.1899,
+      "step": 4253
+    },
+    {
+      "epoch": 0.30693747970706015,
+      "grad_norm": 0.09712180495262146,
+      "learning_rate": 0.00018773560398325877,
+      "loss": 0.1512,
+      "step": 4254
+    },
+    {
+      "epoch": 0.3070096323821206,
+      "grad_norm": 0.1304282695055008,
+      "learning_rate": 0.00018773271756386203,
+      "loss": 0.1517,
+      "step": 4255
+    },
+    {
+      "epoch": 0.307081785057181,
+      "grad_norm": 0.09476155787706375,
+      "learning_rate": 0.0001877298311444653,
+      "loss": 0.1469,
+      "step": 4256
+    },
+    {
+      "epoch": 0.3071539377322414,
+      "grad_norm": 0.12522584199905396,
+      "learning_rate": 0.00018772694472506856,
+      "loss": 0.1569,
+      "step": 4257
+    },
+    {
+      "epoch": 0.30722609040730187,
+      "grad_norm": 0.12130534648895264,
+      "learning_rate": 0.00018772405830567182,
+      "loss": 0.163,
+      "step": 4258
+    },
+    {
+      "epoch": 0.30729824308236225,
+      "grad_norm": 0.0961449146270752,
+      "learning_rate": 0.00018772117188627508,
+      "loss": 0.1792,
+      "step": 4259
+    },
+    {
+      "epoch": 0.3073703957574227,
+      "grad_norm": 0.11592444032430649,
+      "learning_rate": 0.00018771828546687835,
+      "loss": 0.1572,
+      "step": 4260
+    },
+    {
+      "epoch": 0.30744254843248314,
+      "grad_norm": 0.11982754617929459,
+      "learning_rate": 0.0001877153990474816,
+      "loss": 0.201,
+      "step": 4261
+    },
+    {
+      "epoch": 0.3075147011075436,
+      "grad_norm": 0.09742818772792816,
+      "learning_rate": 0.00018771251262808487,
+      "loss": 0.1445,
+      "step": 4262
+    },
+    {
+      "epoch": 0.307586853782604,
+      "grad_norm": 0.11305420100688934,
+      "learning_rate": 0.00018770962620868814,
+      "loss": 0.1776,
+      "step": 4263
+    },
+    {
+      "epoch": 0.3076590064576644,
+      "grad_norm": 0.12675850093364716,
+      "learning_rate": 0.0001877067397892914,
+      "loss": 0.2095,
+      "step": 4264
+    },
+    {
+      "epoch": 0.30773115913272486,
+      "grad_norm": 0.12010957300662994,
+      "learning_rate": 0.00018770385336989463,
+      "loss": 0.1617,
+      "step": 4265
+    },
+    {
+      "epoch": 0.30780331180778525,
+      "grad_norm": 0.11646157503128052,
+      "learning_rate": 0.00018770096695049792,
+      "loss": 0.1465,
+      "step": 4266
+    },
+    {
+      "epoch": 0.3078754644828457,
+      "grad_norm": 0.09521958976984024,
+      "learning_rate": 0.0001876980805311012,
+      "loss": 0.1499,
+      "step": 4267
+    },
+    {
+      "epoch": 0.30794761715790614,
+      "grad_norm": 0.13130347430706024,
+      "learning_rate": 0.00018769519411170445,
+      "loss": 0.1362,
+      "step": 4268
+    },
+    {
+      "epoch": 0.3080197698329666,
+      "grad_norm": 0.1242891252040863,
+      "learning_rate": 0.0001876923076923077,
+      "loss": 0.1916,
+      "step": 4269
+    },
+    {
+      "epoch": 0.30809192250802697,
+      "grad_norm": 0.13037200272083282,
+      "learning_rate": 0.00018768942127291095,
+      "loss": 0.1363,
+      "step": 4270
+    },
+    {
+      "epoch": 0.3081640751830874,
+      "grad_norm": 0.10391830652952194,
+      "learning_rate": 0.0001876865348535142,
+      "loss": 0.1717,
+      "step": 4271
+    },
+    {
+      "epoch": 0.30823622785814786,
+      "grad_norm": 0.10626058280467987,
+      "learning_rate": 0.00018768364843411748,
+      "loss": 0.1713,
+      "step": 4272
+    },
+    {
+      "epoch": 0.30830838053320825,
+      "grad_norm": 0.08672137558460236,
+      "learning_rate": 0.00018768076201472077,
+      "loss": 0.1517,
+      "step": 4273
+    },
+    {
+      "epoch": 0.3083805332082687,
+      "grad_norm": 0.10902436077594757,
+      "learning_rate": 0.00018767787559532403,
+      "loss": 0.1841,
+      "step": 4274
+    },
+    {
+      "epoch": 0.30845268588332914,
+      "grad_norm": 0.10874659568071365,
+      "learning_rate": 0.00018767498917592726,
+      "loss": 0.115,
+      "step": 4275
+    },
+    {
+      "epoch": 0.3085248385583895,
+      "grad_norm": 0.11341419816017151,
+      "learning_rate": 0.00018767210275653053,
+      "loss": 0.2206,
+      "step": 4276
+    },
+    {
+      "epoch": 0.30859699123344997,
+      "grad_norm": 0.13141369819641113,
+      "learning_rate": 0.0001876692163371338,
+      "loss": 0.1607,
+      "step": 4277
+    },
+    {
+      "epoch": 0.3086691439085104,
+      "grad_norm": 0.14365047216415405,
+      "learning_rate": 0.00018766632991773705,
+      "loss": 0.1518,
+      "step": 4278
+    },
+    {
+      "epoch": 0.30874129658357086,
+      "grad_norm": 0.10291138291358948,
+      "learning_rate": 0.00018766344349834032,
+      "loss": 0.1475,
+      "step": 4279
+    },
+    {
+      "epoch": 0.30881344925863125,
+      "grad_norm": 0.13778726756572723,
+      "learning_rate": 0.00018766055707894358,
+      "loss": 0.1579,
+      "step": 4280
+    },
+    {
+      "epoch": 0.3088856019336917,
+      "grad_norm": 0.10906017571687698,
+      "learning_rate": 0.00018765767065954684,
+      "loss": 0.169,
+      "step": 4281
+    },
+    {
+      "epoch": 0.30895775460875213,
+      "grad_norm": 0.1113433763384819,
+      "learning_rate": 0.0001876547842401501,
+      "loss": 0.1795,
+      "step": 4282
+    },
+    {
+      "epoch": 0.3090299072838125,
+      "grad_norm": 0.14330008625984192,
+      "learning_rate": 0.00018765189782075337,
+      "loss": 0.1835,
+      "step": 4283
+    },
+    {
+      "epoch": 0.30910205995887297,
+      "grad_norm": 0.11915328353643417,
+      "learning_rate": 0.00018764901140135663,
+      "loss": 0.1822,
+      "step": 4284
+    },
+    {
+      "epoch": 0.3091742126339334,
+      "grad_norm": 0.13964983820915222,
+      "learning_rate": 0.0001876461249819599,
+      "loss": 0.1775,
+      "step": 4285
+    },
+    {
+      "epoch": 0.30924636530899385,
+      "grad_norm": 0.12686780095100403,
+      "learning_rate": 0.00018764323856256313,
+      "loss": 0.1364,
+      "step": 4286
+    },
+    {
+      "epoch": 0.30931851798405424,
+      "grad_norm": 0.11067734658718109,
+      "learning_rate": 0.00018764035214316642,
+      "loss": 0.1721,
+      "step": 4287
+    },
+    {
+      "epoch": 0.3093906706591147,
+      "grad_norm": 0.09381629526615143,
+      "learning_rate": 0.00018763746572376968,
+      "loss": 0.1681,
+      "step": 4288
+    },
+    {
+      "epoch": 0.30946282333417513,
+      "grad_norm": 0.0986705869436264,
+      "learning_rate": 0.00018763457930437294,
+      "loss": 0.1756,
+      "step": 4289
+    },
+    {
+      "epoch": 0.3095349760092355,
+      "grad_norm": 0.09282074123620987,
+      "learning_rate": 0.0001876316928849762,
+      "loss": 0.1794,
+      "step": 4290
+    },
+    {
+      "epoch": 0.30960712868429596,
+      "grad_norm": 0.12449940294027328,
+      "learning_rate": 0.00018762880646557944,
+      "loss": 0.1104,
+      "step": 4291
+    },
+    {
+      "epoch": 0.3096792813593564,
+      "grad_norm": 0.10148288309574127,
+      "learning_rate": 0.0001876259200461827,
+      "loss": 0.116,
+      "step": 4292
+    },
+    {
+      "epoch": 0.30975143403441685,
+      "grad_norm": 0.1262681633234024,
+      "learning_rate": 0.00018762303362678597,
+      "loss": 0.146,
+      "step": 4293
+    },
+    {
+      "epoch": 0.30982358670947724,
+      "grad_norm": 0.13044936954975128,
+      "learning_rate": 0.00018762014720738926,
+      "loss": 0.1289,
+      "step": 4294
+    },
+    {
+      "epoch": 0.3098957393845377,
+      "grad_norm": 0.08965712040662766,
+      "learning_rate": 0.00018761726078799252,
+      "loss": 0.1356,
+      "step": 4295
+    },
+    {
+      "epoch": 0.3099678920595981,
+      "grad_norm": 0.14585722982883453,
+      "learning_rate": 0.00018761437436859576,
+      "loss": 0.1672,
+      "step": 4296
+    },
+    {
+      "epoch": 0.3100400447346585,
+      "grad_norm": 0.13562092185020447,
+      "learning_rate": 0.00018761148794919902,
+      "loss": 0.1959,
+      "step": 4297
+    },
+    {
+      "epoch": 0.31011219740971896,
+      "grad_norm": 0.09663914144039154,
+      "learning_rate": 0.00018760860152980228,
+      "loss": 0.15,
+      "step": 4298
+    },
+    {
+      "epoch": 0.3101843500847794,
+      "grad_norm": 0.09218167513608932,
+      "learning_rate": 0.00018760571511040555,
+      "loss": 0.149,
+      "step": 4299
+    },
+    {
+      "epoch": 0.31025650275983985,
+      "grad_norm": 0.10205177962779999,
+      "learning_rate": 0.0001876028286910088,
+      "loss": 0.1779,
+      "step": 4300
+    },
+    {
+      "epoch": 0.31032865543490024,
+      "grad_norm": 0.12574578821659088,
+      "learning_rate": 0.00018759994227161207,
+      "loss": 0.172,
+      "step": 4301
+    },
+    {
+      "epoch": 0.3104008081099607,
+      "grad_norm": 0.12000858038663864,
+      "learning_rate": 0.00018759705585221534,
+      "loss": 0.1089,
+      "step": 4302
+    },
+    {
+      "epoch": 0.3104729607850211,
+      "grad_norm": 0.1477157026529312,
+      "learning_rate": 0.0001875941694328186,
+      "loss": 0.1796,
+      "step": 4303
+    },
+    {
+      "epoch": 0.3105451134600815,
+      "grad_norm": 0.1305723488330841,
+      "learning_rate": 0.00018759128301342186,
+      "loss": 0.1477,
+      "step": 4304
+    },
+    {
+      "epoch": 0.31061726613514196,
+      "grad_norm": 0.10859954357147217,
+      "learning_rate": 0.00018758839659402512,
+      "loss": 0.1771,
+      "step": 4305
+    },
+    {
+      "epoch": 0.3106894188102024,
+      "grad_norm": 0.0939202830195427,
+      "learning_rate": 0.0001875855101746284,
+      "loss": 0.1413,
+      "step": 4306
+    },
+    {
+      "epoch": 0.3107615714852628,
+      "grad_norm": 0.22470423579216003,
+      "learning_rate": 0.00018758262375523162,
+      "loss": 0.1255,
+      "step": 4307
+    },
+    {
+      "epoch": 0.31083372416032323,
+      "grad_norm": 0.10599032789468765,
+      "learning_rate": 0.0001875797373358349,
+      "loss": 0.19,
+      "step": 4308
+    },
+    {
+      "epoch": 0.3109058768353837,
+      "grad_norm": 0.11063870042562485,
+      "learning_rate": 0.00018757685091643818,
+      "loss": 0.1338,
+      "step": 4309
+    },
+    {
+      "epoch": 0.3109780295104441,
+      "grad_norm": 0.12211477011442184,
+      "learning_rate": 0.00018757396449704144,
+      "loss": 0.1426,
+      "step": 4310
+    },
+    {
+      "epoch": 0.3110501821855045,
+      "grad_norm": 0.11353323608636856,
+      "learning_rate": 0.0001875710780776447,
+      "loss": 0.1478,
+      "step": 4311
+    },
+    {
+      "epoch": 0.31112233486056495,
+      "grad_norm": 0.11034446209669113,
+      "learning_rate": 0.00018756819165824794,
+      "loss": 0.1592,
+      "step": 4312
+    },
+    {
+      "epoch": 0.3111944875356254,
+      "grad_norm": 0.12288372963666916,
+      "learning_rate": 0.0001875653052388512,
+      "loss": 0.1839,
+      "step": 4313
+    },
+    {
+      "epoch": 0.3112666402106858,
+      "grad_norm": 0.10573863238096237,
+      "learning_rate": 0.00018756241881945446,
+      "loss": 0.1885,
+      "step": 4314
+    },
+    {
+      "epoch": 0.31133879288574623,
+      "grad_norm": 0.15434828400611877,
+      "learning_rate": 0.00018755953240005775,
+      "loss": 0.1639,
+      "step": 4315
+    },
+    {
+      "epoch": 0.3114109455608067,
+      "grad_norm": 0.1456459015607834,
+      "learning_rate": 0.00018755664598066102,
+      "loss": 0.2052,
+      "step": 4316
+    },
+    {
+      "epoch": 0.3114830982358671,
+      "grad_norm": 0.1549510806798935,
+      "learning_rate": 0.00018755375956126425,
+      "loss": 0.1343,
+      "step": 4317
+    },
+    {
+      "epoch": 0.3115552509109275,
+      "grad_norm": 0.11198796331882477,
+      "learning_rate": 0.00018755087314186752,
+      "loss": 0.1419,
+      "step": 4318
+    },
+    {
+      "epoch": 0.31162740358598795,
+      "grad_norm": 0.1253986954689026,
+      "learning_rate": 0.00018754798672247078,
+      "loss": 0.1541,
+      "step": 4319
+    },
+    {
+      "epoch": 0.3116995562610484,
+      "grad_norm": 0.1058746874332428,
+      "learning_rate": 0.00018754510030307404,
+      "loss": 0.2107,
+      "step": 4320
+    },
+    {
+      "epoch": 0.3117717089361088,
+      "grad_norm": 0.1150989979505539,
+      "learning_rate": 0.0001875422138836773,
+      "loss": 0.2103,
+      "step": 4321
+    },
+    {
+      "epoch": 0.3118438616111692,
+      "grad_norm": 0.09693560004234314,
+      "learning_rate": 0.00018753932746428057,
+      "loss": 0.1357,
+      "step": 4322
+    },
+    {
+      "epoch": 0.31191601428622967,
+      "grad_norm": 0.1558159440755844,
+      "learning_rate": 0.00018753644104488383,
+      "loss": 0.1587,
+      "step": 4323
+    },
+    {
+      "epoch": 0.3119881669612901,
+      "grad_norm": 0.1341172456741333,
+      "learning_rate": 0.0001875335546254871,
+      "loss": 0.1185,
+      "step": 4324
+    },
+    {
+      "epoch": 0.3120603196363505,
+      "grad_norm": 0.11654019355773926,
+      "learning_rate": 0.00018753066820609036,
+      "loss": 0.1479,
+      "step": 4325
+    },
+    {
+      "epoch": 0.31213247231141095,
+      "grad_norm": 0.1901504397392273,
+      "learning_rate": 0.00018752778178669362,
+      "loss": 0.1115,
+      "step": 4326
+    },
+    {
+      "epoch": 0.3122046249864714,
+      "grad_norm": 0.1711183488368988,
+      "learning_rate": 0.00018752489536729688,
+      "loss": 0.1711,
+      "step": 4327
+    },
+    {
+      "epoch": 0.3122767776615318,
+      "grad_norm": 0.12417960911989212,
+      "learning_rate": 0.00018752200894790012,
+      "loss": 0.1292,
+      "step": 4328
+    },
+    {
+      "epoch": 0.3123489303365922,
+      "grad_norm": 0.12994439899921417,
+      "learning_rate": 0.0001875191225285034,
+      "loss": 0.1137,
+      "step": 4329
+    },
+    {
+      "epoch": 0.31242108301165267,
+      "grad_norm": 0.12198687344789505,
+      "learning_rate": 0.00018751623610910667,
+      "loss": 0.1479,
+      "step": 4330
+    },
+    {
+      "epoch": 0.3124932356867131,
+      "grad_norm": 0.1255475878715515,
+      "learning_rate": 0.00018751334968970993,
+      "loss": 0.2342,
+      "step": 4331
+    },
+    {
+      "epoch": 0.3125653883617735,
+      "grad_norm": 0.11571196466684341,
+      "learning_rate": 0.0001875104632703132,
+      "loss": 0.1098,
+      "step": 4332
+    },
+    {
+      "epoch": 0.31263754103683394,
+      "grad_norm": 0.15391094982624054,
+      "learning_rate": 0.00018750757685091643,
+      "loss": 0.1983,
+      "step": 4333
+    },
+    {
+      "epoch": 0.3127096937118944,
+      "grad_norm": 0.10191630572080612,
+      "learning_rate": 0.0001875046904315197,
+      "loss": 0.1628,
+      "step": 4334
+    },
+    {
+      "epoch": 0.3127818463869548,
+      "grad_norm": 0.13359738886356354,
+      "learning_rate": 0.00018750180401212296,
+      "loss": 0.1369,
+      "step": 4335
+    },
+    {
+      "epoch": 0.3128539990620152,
+      "grad_norm": 0.1361343115568161,
+      "learning_rate": 0.00018749891759272625,
+      "loss": 0.1614,
+      "step": 4336
+    },
+    {
+      "epoch": 0.31292615173707566,
+      "grad_norm": 0.11173868179321289,
+      "learning_rate": 0.0001874960311733295,
+      "loss": 0.1474,
+      "step": 4337
+    },
+    {
+      "epoch": 0.31299830441213605,
+      "grad_norm": 0.1364803910255432,
+      "learning_rate": 0.00018749314475393275,
+      "loss": 0.1616,
+      "step": 4338
+    },
+    {
+      "epoch": 0.3130704570871965,
+      "grad_norm": 0.1421414315700531,
+      "learning_rate": 0.000187490258334536,
+      "loss": 0.1945,
+      "step": 4339
+    },
+    {
+      "epoch": 0.31314260976225694,
+      "grad_norm": 0.11378195881843567,
+      "learning_rate": 0.00018748737191513927,
+      "loss": 0.1365,
+      "step": 4340
+    },
+    {
+      "epoch": 0.3132147624373174,
+      "grad_norm": 0.10104478150606155,
+      "learning_rate": 0.00018748448549574254,
+      "loss": 0.0923,
+      "step": 4341
+    },
+    {
+      "epoch": 0.3132869151123778,
+      "grad_norm": 0.14223428070545197,
+      "learning_rate": 0.0001874815990763458,
+      "loss": 0.2485,
+      "step": 4342
+    },
+    {
+      "epoch": 0.3133590677874382,
+      "grad_norm": 0.11533410847187042,
+      "learning_rate": 0.00018747871265694906,
+      "loss": 0.1847,
+      "step": 4343
+    },
+    {
+      "epoch": 0.31343122046249866,
+      "grad_norm": 0.09127724170684814,
+      "learning_rate": 0.00018747582623755232,
+      "loss": 0.162,
+      "step": 4344
+    },
+    {
+      "epoch": 0.31350337313755905,
+      "grad_norm": 0.10632152855396271,
+      "learning_rate": 0.0001874729398181556,
+      "loss": 0.1239,
+      "step": 4345
+    },
+    {
+      "epoch": 0.3135755258126195,
+      "grad_norm": 0.20816880464553833,
+      "learning_rate": 0.00018747005339875885,
+      "loss": 0.1647,
+      "step": 4346
+    },
+    {
+      "epoch": 0.31364767848767994,
+      "grad_norm": 0.12501870095729828,
+      "learning_rate": 0.0001874671669793621,
+      "loss": 0.1703,
+      "step": 4347
+    },
+    {
+      "epoch": 0.3137198311627404,
+      "grad_norm": 0.11986255645751953,
+      "learning_rate": 0.00018746428055996538,
+      "loss": 0.1496,
+      "step": 4348
+    },
+    {
+      "epoch": 0.31379198383780077,
+      "grad_norm": 0.13503970205783844,
+      "learning_rate": 0.0001874613941405686,
+      "loss": 0.1829,
+      "step": 4349
+    },
+    {
+      "epoch": 0.3138641365128612,
+      "grad_norm": 0.16656002402305603,
+      "learning_rate": 0.0001874585077211719,
+      "loss": 0.1814,
+      "step": 4350
+    },
+    {
+      "epoch": 0.31393628918792166,
+      "grad_norm": 0.12464180588722229,
+      "learning_rate": 0.00018745562130177516,
+      "loss": 0.112,
+      "step": 4351
+    },
+    {
+      "epoch": 0.31400844186298205,
+      "grad_norm": 0.13612239062786102,
+      "learning_rate": 0.00018745273488237843,
+      "loss": 0.1877,
+      "step": 4352
+    },
+    {
+      "epoch": 0.3140805945380425,
+      "grad_norm": 0.11188091337680817,
+      "learning_rate": 0.0001874498484629817,
+      "loss": 0.1682,
+      "step": 4353
+    },
+    {
+      "epoch": 0.31415274721310293,
+      "grad_norm": 0.1170855462551117,
+      "learning_rate": 0.00018744696204358493,
+      "loss": 0.1764,
+      "step": 4354
+    },
+    {
+      "epoch": 0.3142248998881634,
+      "grad_norm": 0.10502547025680542,
+      "learning_rate": 0.0001874440756241882,
+      "loss": 0.1801,
+      "step": 4355
+    },
+    {
+      "epoch": 0.31429705256322377,
+      "grad_norm": 0.12084412574768066,
+      "learning_rate": 0.00018744118920479145,
+      "loss": 0.1332,
+      "step": 4356
+    },
+    {
+      "epoch": 0.3143692052382842,
+      "grad_norm": 0.11734385043382645,
+      "learning_rate": 0.00018743830278539474,
+      "loss": 0.1831,
+      "step": 4357
+    },
+    {
+      "epoch": 0.31444135791334465,
+      "grad_norm": 0.10435345768928528,
+      "learning_rate": 0.000187435416365998,
+      "loss": 0.1646,
+      "step": 4358
+    },
+    {
+      "epoch": 0.31451351058840504,
+      "grad_norm": 0.11109847575426102,
+      "learning_rate": 0.00018743252994660124,
+      "loss": 0.1603,
+      "step": 4359
+    },
+    {
+      "epoch": 0.3145856632634655,
+      "grad_norm": 0.12884214520454407,
+      "learning_rate": 0.0001874296435272045,
+      "loss": 0.1502,
+      "step": 4360
+    },
+    {
+      "epoch": 0.31465781593852593,
+      "grad_norm": 0.14958150684833527,
+      "learning_rate": 0.00018742675710780777,
+      "loss": 0.1851,
+      "step": 4361
+    },
+    {
+      "epoch": 0.3147299686135864,
+      "grad_norm": 0.11282835155725479,
+      "learning_rate": 0.00018742387068841103,
+      "loss": 0.1913,
+      "step": 4362
+    },
+    {
+      "epoch": 0.31480212128864676,
+      "grad_norm": 0.15314820408821106,
+      "learning_rate": 0.0001874209842690143,
+      "loss": 0.167,
+      "step": 4363
+    },
+    {
+      "epoch": 0.3148742739637072,
+      "grad_norm": 0.09965931624174118,
+      "learning_rate": 0.00018741809784961756,
+      "loss": 0.1009,
+      "step": 4364
+    },
+    {
+      "epoch": 0.31494642663876765,
+      "grad_norm": 0.11220397800207138,
+      "learning_rate": 0.00018741521143022082,
+      "loss": 0.1251,
+      "step": 4365
+    },
+    {
+      "epoch": 0.31501857931382804,
+      "grad_norm": 0.11934738606214523,
+      "learning_rate": 0.00018741232501082408,
+      "loss": 0.1763,
+      "step": 4366
+    },
+    {
+      "epoch": 0.3150907319888885,
+      "grad_norm": 0.10464148968458176,
+      "learning_rate": 0.00018740943859142734,
+      "loss": 0.13,
+      "step": 4367
+    },
+    {
+      "epoch": 0.31516288466394893,
+      "grad_norm": 0.11204349994659424,
+      "learning_rate": 0.0001874065521720306,
+      "loss": 0.1716,
+      "step": 4368
+    },
+    {
+      "epoch": 0.3152350373390093,
+      "grad_norm": 0.11725107580423355,
+      "learning_rate": 0.00018740366575263387,
+      "loss": 0.1576,
+      "step": 4369
+    },
+    {
+      "epoch": 0.31530719001406976,
+      "grad_norm": 0.09801217913627625,
+      "learning_rate": 0.00018740077933323713,
+      "loss": 0.145,
+      "step": 4370
+    },
+    {
+      "epoch": 0.3153793426891302,
+      "grad_norm": 0.11340101063251495,
+      "learning_rate": 0.0001873978929138404,
+      "loss": 0.1408,
+      "step": 4371
+    },
+    {
+      "epoch": 0.31545149536419065,
+      "grad_norm": 0.11271858215332031,
+      "learning_rate": 0.00018739500649444366,
+      "loss": 0.1525,
+      "step": 4372
+    },
+    {
+      "epoch": 0.31552364803925104,
+      "grad_norm": 0.14412888884544373,
+      "learning_rate": 0.00018739212007504692,
+      "loss": 0.1077,
+      "step": 4373
+    },
+    {
+      "epoch": 0.3155958007143115,
+      "grad_norm": 0.14130175113677979,
+      "learning_rate": 0.00018738923365565018,
+      "loss": 0.1687,
+      "step": 4374
+    },
+    {
+      "epoch": 0.3156679533893719,
+      "grad_norm": 0.1080450788140297,
+      "learning_rate": 0.00018738634723625345,
+      "loss": 0.155,
+      "step": 4375
+    },
+    {
+      "epoch": 0.3157401060644323,
+      "grad_norm": 0.1382875144481659,
+      "learning_rate": 0.00018738346081685668,
+      "loss": 0.1675,
+      "step": 4376
+    },
+    {
+      "epoch": 0.31581225873949276,
+      "grad_norm": 0.10196302831172943,
+      "learning_rate": 0.00018738057439745995,
+      "loss": 0.1696,
+      "step": 4377
+    },
+    {
+      "epoch": 0.3158844114145532,
+      "grad_norm": 0.1086023822426796,
+      "learning_rate": 0.00018737768797806324,
+      "loss": 0.1256,
+      "step": 4378
+    },
+    {
+      "epoch": 0.31595656408961365,
+      "grad_norm": 0.10549721866846085,
+      "learning_rate": 0.0001873748015586665,
+      "loss": 0.1294,
+      "step": 4379
+    },
+    {
+      "epoch": 0.31602871676467403,
+      "grad_norm": 0.10880477726459503,
+      "learning_rate": 0.00018737191513926976,
+      "loss": 0.1322,
+      "step": 4380
+    },
+    {
+      "epoch": 0.3161008694397345,
+      "grad_norm": 0.12142103910446167,
+      "learning_rate": 0.000187369028719873,
+      "loss": 0.18,
+      "step": 4381
+    },
+    {
+      "epoch": 0.3161730221147949,
+      "grad_norm": 0.10394002497196198,
+      "learning_rate": 0.00018736614230047626,
+      "loss": 0.134,
+      "step": 4382
+    },
+    {
+      "epoch": 0.3162451747898553,
+      "grad_norm": 0.12098486721515656,
+      "learning_rate": 0.00018736325588107952,
+      "loss": 0.1417,
+      "step": 4383
+    },
+    {
+      "epoch": 0.31631732746491575,
+      "grad_norm": 0.16527141630649567,
+      "learning_rate": 0.0001873603694616828,
+      "loss": 0.1611,
+      "step": 4384
+    },
+    {
+      "epoch": 0.3163894801399762,
+      "grad_norm": 0.13548526167869568,
+      "learning_rate": 0.00018735748304228608,
+      "loss": 0.1987,
+      "step": 4385
+    },
+    {
+      "epoch": 0.31646163281503664,
+      "grad_norm": 0.10220380872488022,
+      "learning_rate": 0.0001873545966228893,
+      "loss": 0.1748,
+      "step": 4386
+    },
+    {
+      "epoch": 0.31653378549009703,
+      "grad_norm": 0.11551573127508163,
+      "learning_rate": 0.00018735171020349258,
+      "loss": 0.1566,
+      "step": 4387
+    },
+    {
+      "epoch": 0.3166059381651575,
+      "grad_norm": 0.111538365483284,
+      "learning_rate": 0.00018734882378409584,
+      "loss": 0.1734,
+      "step": 4388
+    },
+    {
+      "epoch": 0.3166780908402179,
+      "grad_norm": 0.1431824266910553,
+      "learning_rate": 0.0001873459373646991,
+      "loss": 0.16,
+      "step": 4389
+    },
+    {
+      "epoch": 0.3167502435152783,
+      "grad_norm": 0.11930502206087112,
+      "learning_rate": 0.00018734305094530236,
+      "loss": 0.1276,
+      "step": 4390
+    },
+    {
+      "epoch": 0.31682239619033875,
+      "grad_norm": 0.1125396341085434,
+      "learning_rate": 0.00018734016452590563,
+      "loss": 0.1517,
+      "step": 4391
+    },
+    {
+      "epoch": 0.3168945488653992,
+      "grad_norm": 0.1314370483160019,
+      "learning_rate": 0.0001873372781065089,
+      "loss": 0.1755,
+      "step": 4392
+    },
+    {
+      "epoch": 0.31696670154045964,
+      "grad_norm": 0.1070464625954628,
+      "learning_rate": 0.00018733439168711215,
+      "loss": 0.1557,
+      "step": 4393
+    },
+    {
+      "epoch": 0.31703885421552,
+      "grad_norm": 0.1102706640958786,
+      "learning_rate": 0.00018733150526771542,
+      "loss": 0.1643,
+      "step": 4394
+    },
+    {
+      "epoch": 0.31711100689058047,
+      "grad_norm": 0.13626664876937866,
+      "learning_rate": 0.00018732861884831868,
+      "loss": 0.1673,
+      "step": 4395
+    },
+    {
+      "epoch": 0.3171831595656409,
+      "grad_norm": 0.14753460884094238,
+      "learning_rate": 0.00018732573242892194,
+      "loss": 0.1865,
+      "step": 4396
+    },
+    {
+      "epoch": 0.3172553122407013,
+      "grad_norm": 0.08600473403930664,
+      "learning_rate": 0.00018732284600952518,
+      "loss": 0.164,
+      "step": 4397
+    },
+    {
+      "epoch": 0.31732746491576175,
+      "grad_norm": 0.08636843413114548,
+      "learning_rate": 0.00018731995959012844,
+      "loss": 0.1115,
+      "step": 4398
+    },
+    {
+      "epoch": 0.3173996175908222,
+      "grad_norm": 0.1168600469827652,
+      "learning_rate": 0.00018731707317073173,
+      "loss": 0.1659,
+      "step": 4399
+    },
+    {
+      "epoch": 0.3174717702658826,
+      "grad_norm": 0.1677643358707428,
+      "learning_rate": 0.000187314186751335,
+      "loss": 0.2004,
+      "step": 4400
+    },
+    {
+      "epoch": 0.317543922940943,
+      "grad_norm": 0.13780860602855682,
+      "learning_rate": 0.00018731130033193826,
+      "loss": 0.1379,
+      "step": 4401
+    },
+    {
+      "epoch": 0.31761607561600347,
+      "grad_norm": 0.09523440152406693,
+      "learning_rate": 0.0001873084139125415,
+      "loss": 0.1681,
+      "step": 4402
+    },
+    {
+      "epoch": 0.3176882282910639,
+      "grad_norm": 0.09516411274671555,
+      "learning_rate": 0.00018730552749314476,
+      "loss": 0.1347,
+      "step": 4403
+    },
+    {
+      "epoch": 0.3177603809661243,
+      "grad_norm": 0.11729392409324646,
+      "learning_rate": 0.00018730264107374802,
+      "loss": 0.1378,
+      "step": 4404
+    },
+    {
+      "epoch": 0.31783253364118474,
+      "grad_norm": 0.10459394752979279,
+      "learning_rate": 0.00018729975465435128,
+      "loss": 0.1994,
+      "step": 4405
+    },
+    {
+      "epoch": 0.3179046863162452,
+      "grad_norm": 0.09694515913724899,
+      "learning_rate": 0.00018729686823495457,
+      "loss": 0.0951,
+      "step": 4406
+    },
+    {
+      "epoch": 0.3179768389913056,
+      "grad_norm": 0.10586842894554138,
+      "learning_rate": 0.0001872939818155578,
+      "loss": 0.1866,
+      "step": 4407
+    },
+    {
+      "epoch": 0.318048991666366,
+      "grad_norm": 0.10234203189611435,
+      "learning_rate": 0.00018729109539616107,
+      "loss": 0.1486,
+      "step": 4408
+    },
+    {
+      "epoch": 0.31812114434142647,
+      "grad_norm": 0.10403174161911011,
+      "learning_rate": 0.00018728820897676433,
+      "loss": 0.1718,
+      "step": 4409
+    },
+    {
+      "epoch": 0.3181932970164869,
+      "grad_norm": 0.11329304426908493,
+      "learning_rate": 0.0001872853225573676,
+      "loss": 0.1913,
+      "step": 4410
+    },
+    {
+      "epoch": 0.3182654496915473,
+      "grad_norm": 0.16083496809005737,
+      "learning_rate": 0.00018728243613797086,
+      "loss": 0.1544,
+      "step": 4411
+    },
+    {
+      "epoch": 0.31833760236660774,
+      "grad_norm": 0.14580407738685608,
+      "learning_rate": 0.00018727954971857412,
+      "loss": 0.151,
+      "step": 4412
+    },
+    {
+      "epoch": 0.3184097550416682,
+      "grad_norm": 0.13236120343208313,
+      "learning_rate": 0.00018727666329917736,
+      "loss": 0.1948,
+      "step": 4413
+    },
+    {
+      "epoch": 0.3184819077167286,
+      "grad_norm": 0.14633306860923767,
+      "learning_rate": 0.00018727377687978065,
+      "loss": 0.1695,
+      "step": 4414
+    },
+    {
+      "epoch": 0.318554060391789,
+      "grad_norm": 0.14905905723571777,
+      "learning_rate": 0.0001872708904603839,
+      "loss": 0.1486,
+      "step": 4415
+    },
+    {
+      "epoch": 0.31862621306684946,
+      "grad_norm": 0.14201393723487854,
+      "learning_rate": 0.00018726800404098717,
+      "loss": 0.161,
+      "step": 4416
+    },
+    {
+      "epoch": 0.3186983657419099,
+      "grad_norm": 0.12765076756477356,
+      "learning_rate": 0.00018726511762159044,
+      "loss": 0.1649,
+      "step": 4417
+    },
+    {
+      "epoch": 0.3187705184169703,
+      "grad_norm": 0.093240886926651,
+      "learning_rate": 0.00018726223120219367,
+      "loss": 0.1571,
+      "step": 4418
+    },
+    {
+      "epoch": 0.31884267109203074,
+      "grad_norm": 0.11160070449113846,
+      "learning_rate": 0.00018725934478279693,
+      "loss": 0.1692,
+      "step": 4419
+    },
+    {
+      "epoch": 0.3189148237670912,
+      "grad_norm": 0.10448220372200012,
+      "learning_rate": 0.0001872564583634002,
+      "loss": 0.1845,
+      "step": 4420
+    },
+    {
+      "epoch": 0.31898697644215157,
+      "grad_norm": 0.11860977113246918,
+      "learning_rate": 0.0001872535719440035,
+      "loss": 0.2073,
+      "step": 4421
+    },
+    {
+      "epoch": 0.319059129117212,
+      "grad_norm": 0.1062135323882103,
+      "learning_rate": 0.00018725068552460675,
+      "loss": 0.1474,
+      "step": 4422
+    },
+    {
+      "epoch": 0.31913128179227246,
+      "grad_norm": 0.11262713372707367,
+      "learning_rate": 0.00018724779910521,
+      "loss": 0.1567,
+      "step": 4423
+    },
+    {
+      "epoch": 0.3192034344673329,
+      "grad_norm": 0.09404128044843674,
+      "learning_rate": 0.00018724491268581325,
+      "loss": 0.1307,
+      "step": 4424
+    },
+    {
+      "epoch": 0.3192755871423933,
+      "grad_norm": 0.1066492572426796,
+      "learning_rate": 0.0001872420262664165,
+      "loss": 0.1214,
+      "step": 4425
+    },
+    {
+      "epoch": 0.31934773981745374,
+      "grad_norm": 0.11451132595539093,
+      "learning_rate": 0.00018723913984701978,
+      "loss": 0.1679,
+      "step": 4426
+    },
+    {
+      "epoch": 0.3194198924925142,
+      "grad_norm": 0.1330243945121765,
+      "learning_rate": 0.00018723625342762304,
+      "loss": 0.1941,
+      "step": 4427
+    },
+    {
+      "epoch": 0.31949204516757457,
+      "grad_norm": 0.10715876519680023,
+      "learning_rate": 0.0001872333670082263,
+      "loss": 0.1643,
+      "step": 4428
+    },
+    {
+      "epoch": 0.319564197842635,
+      "grad_norm": 0.09910948574542999,
+      "learning_rate": 0.00018723048058882956,
+      "loss": 0.2107,
+      "step": 4429
+    },
+    {
+      "epoch": 0.31963635051769546,
+      "grad_norm": 0.1132940798997879,
+      "learning_rate": 0.00018722759416943283,
+      "loss": 0.1718,
+      "step": 4430
+    },
+    {
+      "epoch": 0.31970850319275584,
+      "grad_norm": 0.12930969893932343,
+      "learning_rate": 0.0001872247077500361,
+      "loss": 0.2033,
+      "step": 4431
+    },
+    {
+      "epoch": 0.3197806558678163,
+      "grad_norm": 0.10411523282527924,
+      "learning_rate": 0.00018722182133063935,
+      "loss": 0.1732,
+      "step": 4432
+    },
+    {
+      "epoch": 0.31985280854287673,
+      "grad_norm": 0.09339303523302078,
+      "learning_rate": 0.00018721893491124262,
+      "loss": 0.1742,
+      "step": 4433
+    },
+    {
+      "epoch": 0.3199249612179372,
+      "grad_norm": 0.13078728318214417,
+      "learning_rate": 0.00018721604849184585,
+      "loss": 0.1439,
+      "step": 4434
+    },
+    {
+      "epoch": 0.31999711389299756,
+      "grad_norm": 0.15128646790981293,
+      "learning_rate": 0.00018721316207244914,
+      "loss": 0.2077,
+      "step": 4435
+    },
+    {
+      "epoch": 0.320069266568058,
+      "grad_norm": 0.1379484236240387,
+      "learning_rate": 0.0001872102756530524,
+      "loss": 0.1768,
+      "step": 4436
+    },
+    {
+      "epoch": 0.32014141924311845,
+      "grad_norm": 0.09854443371295929,
+      "learning_rate": 0.00018720738923365567,
+      "loss": 0.1505,
+      "step": 4437
+    },
+    {
+      "epoch": 0.32021357191817884,
+      "grad_norm": 0.1198776438832283,
+      "learning_rate": 0.00018720450281425893,
+      "loss": 0.1648,
+      "step": 4438
+    },
+    {
+      "epoch": 0.3202857245932393,
+      "grad_norm": 0.1183851808309555,
+      "learning_rate": 0.00018720161639486217,
+      "loss": 0.1767,
+      "step": 4439
+    },
+    {
+      "epoch": 0.32035787726829973,
+      "grad_norm": 0.11320844292640686,
+      "learning_rate": 0.00018719872997546543,
+      "loss": 0.1562,
+      "step": 4440
+    },
+    {
+      "epoch": 0.3204300299433602,
+      "grad_norm": 0.11692800372838974,
+      "learning_rate": 0.0001871958435560687,
+      "loss": 0.1715,
+      "step": 4441
+    },
+    {
+      "epoch": 0.32050218261842056,
+      "grad_norm": 0.11715764552354813,
+      "learning_rate": 0.00018719295713667198,
+      "loss": 0.1428,
+      "step": 4442
+    },
+    {
+      "epoch": 0.320574335293481,
+      "grad_norm": 0.10514655709266663,
+      "learning_rate": 0.00018719007071727524,
+      "loss": 0.13,
+      "step": 4443
+    },
+    {
+      "epoch": 0.32064648796854145,
+      "grad_norm": 0.10924189537763596,
+      "learning_rate": 0.00018718718429787848,
+      "loss": 0.1128,
+      "step": 4444
+    },
+    {
+      "epoch": 0.32071864064360184,
+      "grad_norm": 0.0940491333603859,
+      "learning_rate": 0.00018718429787848174,
+      "loss": 0.1718,
+      "step": 4445
+    },
+    {
+      "epoch": 0.3207907933186623,
+      "grad_norm": 0.10690921545028687,
+      "learning_rate": 0.000187181411459085,
+      "loss": 0.1282,
+      "step": 4446
+    },
+    {
+      "epoch": 0.3208629459937227,
+      "grad_norm": 0.09734002500772476,
+      "learning_rate": 0.00018717852503968827,
+      "loss": 0.1351,
+      "step": 4447
+    },
+    {
+      "epoch": 0.32093509866878317,
+      "grad_norm": 0.11302919685840607,
+      "learning_rate": 0.00018717563862029153,
+      "loss": 0.1471,
+      "step": 4448
+    },
+    {
+      "epoch": 0.32100725134384356,
+      "grad_norm": 0.1114354357123375,
+      "learning_rate": 0.0001871727522008948,
+      "loss": 0.1741,
+      "step": 4449
+    },
+    {
+      "epoch": 0.321079404018904,
+      "grad_norm": 0.09100287407636642,
+      "learning_rate": 0.00018716986578149806,
+      "loss": 0.1366,
+      "step": 4450
+    },
+    {
+      "epoch": 0.32115155669396445,
+      "grad_norm": 0.11997717618942261,
+      "learning_rate": 0.00018716697936210132,
+      "loss": 0.2065,
+      "step": 4451
+    },
+    {
+      "epoch": 0.32122370936902483,
+      "grad_norm": 0.11886655539274216,
+      "learning_rate": 0.00018716409294270458,
+      "loss": 0.1109,
+      "step": 4452
+    },
+    {
+      "epoch": 0.3212958620440853,
+      "grad_norm": 0.13030387461185455,
+      "learning_rate": 0.00018716120652330785,
+      "loss": 0.1574,
+      "step": 4453
+    },
+    {
+      "epoch": 0.3213680147191457,
+      "grad_norm": 0.11604657024145126,
+      "learning_rate": 0.0001871583201039111,
+      "loss": 0.1803,
+      "step": 4454
+    },
+    {
+      "epoch": 0.32144016739420617,
+      "grad_norm": 0.13347670435905457,
+      "learning_rate": 0.00018715543368451435,
+      "loss": 0.19,
+      "step": 4455
+    },
+    {
+      "epoch": 0.32151232006926656,
+      "grad_norm": 0.09421198815107346,
+      "learning_rate": 0.00018715254726511764,
+      "loss": 0.1779,
+      "step": 4456
+    },
+    {
+      "epoch": 0.321584472744327,
+      "grad_norm": 0.11267413198947906,
+      "learning_rate": 0.0001871496608457209,
+      "loss": 0.1803,
+      "step": 4457
+    },
+    {
+      "epoch": 0.32165662541938744,
+      "grad_norm": 0.12656359374523163,
+      "learning_rate": 0.00018714677442632416,
+      "loss": 0.1604,
+      "step": 4458
+    },
+    {
+      "epoch": 0.32172877809444783,
+      "grad_norm": 0.1371208280324936,
+      "learning_rate": 0.00018714388800692742,
+      "loss": 0.1612,
+      "step": 4459
+    },
+    {
+      "epoch": 0.3218009307695083,
+      "grad_norm": 0.08244256675243378,
+      "learning_rate": 0.00018714100158753066,
+      "loss": 0.1081,
+      "step": 4460
+    },
+    {
+      "epoch": 0.3218730834445687,
+      "grad_norm": 0.10778731852769852,
+      "learning_rate": 0.00018713811516813392,
+      "loss": 0.2065,
+      "step": 4461
+    },
+    {
+      "epoch": 0.3219452361196291,
+      "grad_norm": 0.11527123302221298,
+      "learning_rate": 0.00018713522874873719,
+      "loss": 0.1305,
+      "step": 4462
+    },
+    {
+      "epoch": 0.32201738879468955,
+      "grad_norm": 0.10875330120325089,
+      "learning_rate": 0.00018713234232934048,
+      "loss": 0.1715,
+      "step": 4463
+    },
+    {
+      "epoch": 0.32208954146975,
+      "grad_norm": 0.09810987859964371,
+      "learning_rate": 0.00018712945590994374,
+      "loss": 0.1827,
+      "step": 4464
+    },
+    {
+      "epoch": 0.32216169414481044,
+      "grad_norm": 0.1507887840270996,
+      "learning_rate": 0.00018712656949054697,
+      "loss": 0.1881,
+      "step": 4465
+    },
+    {
+      "epoch": 0.32223384681987083,
+      "grad_norm": 0.10882745683193207,
+      "learning_rate": 0.00018712368307115024,
+      "loss": 0.1538,
+      "step": 4466
+    },
+    {
+      "epoch": 0.3223059994949313,
+      "grad_norm": 0.14391076564788818,
+      "learning_rate": 0.0001871207966517535,
+      "loss": 0.1758,
+      "step": 4467
+    },
+    {
+      "epoch": 0.3223781521699917,
+      "grad_norm": 0.10498519986867905,
+      "learning_rate": 0.00018711791023235676,
+      "loss": 0.1741,
+      "step": 4468
+    },
+    {
+      "epoch": 0.3224503048450521,
+      "grad_norm": 0.11114256083965302,
+      "learning_rate": 0.00018711502381296003,
+      "loss": 0.1855,
+      "step": 4469
+    },
+    {
+      "epoch": 0.32252245752011255,
+      "grad_norm": 0.1319652646780014,
+      "learning_rate": 0.0001871121373935633,
+      "loss": 0.1523,
+      "step": 4470
+    },
+    {
+      "epoch": 0.322594610195173,
+      "grad_norm": 0.13710534572601318,
+      "learning_rate": 0.00018710925097416655,
+      "loss": 0.144,
+      "step": 4471
+    },
+    {
+      "epoch": 0.32266676287023344,
+      "grad_norm": 0.11060652136802673,
+      "learning_rate": 0.00018710636455476982,
+      "loss": 0.1513,
+      "step": 4472
+    },
+    {
+      "epoch": 0.3227389155452938,
+      "grad_norm": 0.13275280594825745,
+      "learning_rate": 0.00018710347813537308,
+      "loss": 0.1882,
+      "step": 4473
+    },
+    {
+      "epoch": 0.32281106822035427,
+      "grad_norm": 0.09653960913419724,
+      "learning_rate": 0.00018710059171597634,
+      "loss": 0.1658,
+      "step": 4474
+    },
+    {
+      "epoch": 0.3228832208954147,
+      "grad_norm": 0.11430240422487259,
+      "learning_rate": 0.0001870977052965796,
+      "loss": 0.1951,
+      "step": 4475
+    },
+    {
+      "epoch": 0.3229553735704751,
+      "grad_norm": 0.12474402785301208,
+      "learning_rate": 0.00018709481887718287,
+      "loss": 0.1901,
+      "step": 4476
+    },
+    {
+      "epoch": 0.32302752624553555,
+      "grad_norm": 0.138519287109375,
+      "learning_rate": 0.00018709193245778613,
+      "loss": 0.1691,
+      "step": 4477
+    },
+    {
+      "epoch": 0.323099678920596,
+      "grad_norm": 0.13189628720283508,
+      "learning_rate": 0.0001870890460383894,
+      "loss": 0.1214,
+      "step": 4478
+    },
+    {
+      "epoch": 0.32317183159565643,
+      "grad_norm": 0.12156566232442856,
+      "learning_rate": 0.00018708615961899266,
+      "loss": 0.1632,
+      "step": 4479
+    },
+    {
+      "epoch": 0.3232439842707168,
+      "grad_norm": 0.1292414367198944,
+      "learning_rate": 0.00018708327319959592,
+      "loss": 0.1777,
+      "step": 4480
+    },
+    {
+      "epoch": 0.32331613694577727,
+      "grad_norm": 0.1226365715265274,
+      "learning_rate": 0.00018708038678019918,
+      "loss": 0.1383,
+      "step": 4481
+    },
+    {
+      "epoch": 0.3233882896208377,
+      "grad_norm": 0.11465481668710709,
+      "learning_rate": 0.00018707750036080242,
+      "loss": 0.1404,
+      "step": 4482
+    },
+    {
+      "epoch": 0.3234604422958981,
+      "grad_norm": 0.12336615473031998,
+      "learning_rate": 0.00018707461394140568,
+      "loss": 0.1984,
+      "step": 4483
+    },
+    {
+      "epoch": 0.32353259497095854,
+      "grad_norm": 0.10501537472009659,
+      "learning_rate": 0.00018707172752200897,
+      "loss": 0.1069,
+      "step": 4484
+    },
+    {
+      "epoch": 0.323604747646019,
+      "grad_norm": 0.12265104800462723,
+      "learning_rate": 0.00018706884110261223,
+      "loss": 0.1748,
+      "step": 4485
+    },
+    {
+      "epoch": 0.32367690032107943,
+      "grad_norm": 0.1346326619386673,
+      "learning_rate": 0.0001870659546832155,
+      "loss": 0.1089,
+      "step": 4486
+    },
+    {
+      "epoch": 0.3237490529961398,
+      "grad_norm": 0.1227821558713913,
+      "learning_rate": 0.00018706306826381873,
+      "loss": 0.1666,
+      "step": 4487
+    },
+    {
+      "epoch": 0.32382120567120026,
+      "grad_norm": 0.1063651442527771,
+      "learning_rate": 0.000187060181844422,
+      "loss": 0.1539,
+      "step": 4488
+    },
+    {
+      "epoch": 0.3238933583462607,
+      "grad_norm": 0.09898777306079865,
+      "learning_rate": 0.00018705729542502526,
+      "loss": 0.1503,
+      "step": 4489
+    },
+    {
+      "epoch": 0.3239655110213211,
+      "grad_norm": 0.12139619141817093,
+      "learning_rate": 0.00018705440900562852,
+      "loss": 0.1826,
+      "step": 4490
+    },
+    {
+      "epoch": 0.32403766369638154,
+      "grad_norm": 0.11227799952030182,
+      "learning_rate": 0.0001870515225862318,
+      "loss": 0.2026,
+      "step": 4491
+    },
+    {
+      "epoch": 0.324109816371442,
+      "grad_norm": 0.09174603223800659,
+      "learning_rate": 0.00018704863616683505,
+      "loss": 0.1523,
+      "step": 4492
+    },
+    {
+      "epoch": 0.32418196904650237,
+      "grad_norm": 0.08484648168087006,
+      "learning_rate": 0.0001870457497474383,
+      "loss": 0.149,
+      "step": 4493
+    },
+    {
+      "epoch": 0.3242541217215628,
+      "grad_norm": 0.13810379803180695,
+      "learning_rate": 0.00018704286332804157,
+      "loss": 0.1605,
+      "step": 4494
+    },
+    {
+      "epoch": 0.32432627439662326,
+      "grad_norm": 0.10621599107980728,
+      "learning_rate": 0.00018703997690864484,
+      "loss": 0.1376,
+      "step": 4495
+    },
+    {
+      "epoch": 0.3243984270716837,
+      "grad_norm": 0.11542864143848419,
+      "learning_rate": 0.0001870370904892481,
+      "loss": 0.1821,
+      "step": 4496
+    },
+    {
+      "epoch": 0.3244705797467441,
+      "grad_norm": 0.0962272360920906,
+      "learning_rate": 0.00018703420406985136,
+      "loss": 0.1415,
+      "step": 4497
+    },
+    {
+      "epoch": 0.32454273242180454,
+      "grad_norm": 0.13142697513103485,
+      "learning_rate": 0.00018703131765045462,
+      "loss": 0.1624,
+      "step": 4498
+    },
+    {
+      "epoch": 0.324614885096865,
+      "grad_norm": 0.1143009141087532,
+      "learning_rate": 0.0001870284312310579,
+      "loss": 0.1921,
+      "step": 4499
+    },
+    {
+      "epoch": 0.32468703777192537,
+      "grad_norm": 0.10246312618255615,
+      "learning_rate": 0.00018702554481166115,
+      "loss": 0.1578,
+      "step": 4500
+    },
+    {
+      "epoch": 0.3247591904469858,
+      "grad_norm": 0.11688818782567978,
+      "learning_rate": 0.0001870226583922644,
+      "loss": 0.1536,
+      "step": 4501
+    },
+    {
+      "epoch": 0.32483134312204626,
+      "grad_norm": 0.1425468921661377,
+      "learning_rate": 0.00018701977197286768,
+      "loss": 0.1644,
+      "step": 4502
+    },
+    {
+      "epoch": 0.3249034957971067,
+      "grad_norm": 0.10924214869737625,
+      "learning_rate": 0.0001870168855534709,
+      "loss": 0.2426,
+      "step": 4503
+    },
+    {
+      "epoch": 0.3249756484721671,
+      "grad_norm": 0.11056023836135864,
+      "learning_rate": 0.00018701399913407417,
+      "loss": 0.1529,
+      "step": 4504
+    },
+    {
+      "epoch": 0.32504780114722753,
+      "grad_norm": 0.08694283664226532,
+      "learning_rate": 0.00018701111271467746,
+      "loss": 0.1748,
+      "step": 4505
+    },
+    {
+      "epoch": 0.325119953822288,
+      "grad_norm": 0.10328497737646103,
+      "learning_rate": 0.00018700822629528073,
+      "loss": 0.141,
+      "step": 4506
+    },
+    {
+      "epoch": 0.32519210649734837,
+      "grad_norm": 0.1171845868229866,
+      "learning_rate": 0.000187005339875884,
+      "loss": 0.1579,
+      "step": 4507
+    },
+    {
+      "epoch": 0.3252642591724088,
+      "grad_norm": 0.09219054132699966,
+      "learning_rate": 0.00018700245345648723,
+      "loss": 0.1664,
+      "step": 4508
+    },
+    {
+      "epoch": 0.32533641184746925,
+      "grad_norm": 0.1340274214744568,
+      "learning_rate": 0.0001869995670370905,
+      "loss": 0.1391,
+      "step": 4509
+    },
+    {
+      "epoch": 0.3254085645225297,
+      "grad_norm": 0.1330026537179947,
+      "learning_rate": 0.00018699668061769375,
+      "loss": 0.1515,
+      "step": 4510
+    },
+    {
+      "epoch": 0.3254807171975901,
+      "grad_norm": 0.10810331255197525,
+      "learning_rate": 0.00018699379419829702,
+      "loss": 0.1008,
+      "step": 4511
+    },
+    {
+      "epoch": 0.32555286987265053,
+      "grad_norm": 0.10727111250162125,
+      "learning_rate": 0.0001869909077789003,
+      "loss": 0.142,
+      "step": 4512
+    },
+    {
+      "epoch": 0.325625022547711,
+      "grad_norm": 0.14349889755249023,
+      "learning_rate": 0.00018698802135950354,
+      "loss": 0.1649,
+      "step": 4513
+    },
+    {
+      "epoch": 0.32569717522277136,
+      "grad_norm": 0.09554945677518845,
+      "learning_rate": 0.0001869851349401068,
+      "loss": 0.1674,
+      "step": 4514
+    },
+    {
+      "epoch": 0.3257693278978318,
+      "grad_norm": 0.09096377342939377,
+      "learning_rate": 0.00018698224852071007,
+      "loss": 0.1298,
+      "step": 4515
+    },
+    {
+      "epoch": 0.32584148057289225,
+      "grad_norm": 0.1272154152393341,
+      "learning_rate": 0.00018697936210131333,
+      "loss": 0.1946,
+      "step": 4516
+    },
+    {
+      "epoch": 0.3259136332479527,
+      "grad_norm": 0.10733834654092789,
+      "learning_rate": 0.0001869764756819166,
+      "loss": 0.1779,
+      "step": 4517
+    },
+    {
+      "epoch": 0.3259857859230131,
+      "grad_norm": 0.10842615365982056,
+      "learning_rate": 0.00018697358926251986,
+      "loss": 0.1721,
+      "step": 4518
+    },
+    {
+      "epoch": 0.3260579385980735,
+      "grad_norm": 0.10522483289241791,
+      "learning_rate": 0.00018697070284312312,
+      "loss": 0.1295,
+      "step": 4519
+    },
+    {
+      "epoch": 0.32613009127313397,
+      "grad_norm": 0.1101028248667717,
+      "learning_rate": 0.00018696781642372638,
+      "loss": 0.1372,
+      "step": 4520
+    },
+    {
+      "epoch": 0.32620224394819436,
+      "grad_norm": 0.10599458962678909,
+      "learning_rate": 0.00018696493000432964,
+      "loss": 0.1429,
+      "step": 4521
+    },
+    {
+      "epoch": 0.3262743966232548,
+      "grad_norm": 0.10401427745819092,
+      "learning_rate": 0.0001869620435849329,
+      "loss": 0.1497,
+      "step": 4522
+    },
+    {
+      "epoch": 0.32634654929831525,
+      "grad_norm": 0.11688731610774994,
+      "learning_rate": 0.00018695915716553617,
+      "loss": 0.1729,
+      "step": 4523
+    },
+    {
+      "epoch": 0.32641870197337564,
+      "grad_norm": 0.11261221021413803,
+      "learning_rate": 0.0001869562707461394,
+      "loss": 0.2113,
+      "step": 4524
+    },
+    {
+      "epoch": 0.3264908546484361,
+      "grad_norm": 0.1077963262796402,
+      "learning_rate": 0.00018695338432674267,
+      "loss": 0.1665,
+      "step": 4525
+    },
+    {
+      "epoch": 0.3265630073234965,
+      "grad_norm": 0.11620333045721054,
+      "learning_rate": 0.00018695049790734596,
+      "loss": 0.1312,
+      "step": 4526
+    },
+    {
+      "epoch": 0.32663515999855697,
+      "grad_norm": 0.10986893624067307,
+      "learning_rate": 0.00018694761148794922,
+      "loss": 0.1637,
+      "step": 4527
+    },
+    {
+      "epoch": 0.32670731267361736,
+      "grad_norm": 0.12115433067083359,
+      "learning_rate": 0.00018694472506855248,
+      "loss": 0.1028,
+      "step": 4528
+    },
+    {
+      "epoch": 0.3267794653486778,
+      "grad_norm": 0.0897984430193901,
+      "learning_rate": 0.00018694183864915572,
+      "loss": 0.13,
+      "step": 4529
+    },
+    {
+      "epoch": 0.32685161802373824,
+      "grad_norm": 0.11984968930482864,
+      "learning_rate": 0.00018693895222975898,
+      "loss": 0.1606,
+      "step": 4530
+    },
+    {
+      "epoch": 0.32692377069879863,
+      "grad_norm": 0.09984946250915527,
+      "learning_rate": 0.00018693606581036225,
+      "loss": 0.1656,
+      "step": 4531
+    },
+    {
+      "epoch": 0.3269959233738591,
+      "grad_norm": 0.11888393014669418,
+      "learning_rate": 0.0001869331793909655,
+      "loss": 0.1907,
+      "step": 4532
+    },
+    {
+      "epoch": 0.3270680760489195,
+      "grad_norm": 0.11285512894392014,
+      "learning_rate": 0.0001869302929715688,
+      "loss": 0.1239,
+      "step": 4533
+    },
+    {
+      "epoch": 0.32714022872397996,
+      "grad_norm": 0.12616856396198273,
+      "learning_rate": 0.00018692740655217204,
+      "loss": 0.1966,
+      "step": 4534
+    },
+    {
+      "epoch": 0.32721238139904035,
+      "grad_norm": 0.12473762035369873,
+      "learning_rate": 0.0001869245201327753,
+      "loss": 0.1923,
+      "step": 4535
+    },
+    {
+      "epoch": 0.3272845340741008,
+      "grad_norm": 0.09554195404052734,
+      "learning_rate": 0.00018692163371337856,
+      "loss": 0.143,
+      "step": 4536
+    },
+    {
+      "epoch": 0.32735668674916124,
+      "grad_norm": 0.10972704738378525,
+      "learning_rate": 0.00018691874729398182,
+      "loss": 0.106,
+      "step": 4537
+    },
+    {
+      "epoch": 0.32742883942422163,
+      "grad_norm": 0.10398998856544495,
+      "learning_rate": 0.0001869158608745851,
+      "loss": 0.174,
+      "step": 4538
+    },
+    {
+      "epoch": 0.3275009920992821,
+      "grad_norm": 0.1194574236869812,
+      "learning_rate": 0.00018691297445518835,
+      "loss": 0.1593,
+      "step": 4539
+    },
+    {
+      "epoch": 0.3275731447743425,
+      "grad_norm": 0.09499070048332214,
+      "learning_rate": 0.0001869100880357916,
+      "loss": 0.1614,
+      "step": 4540
+    },
+    {
+      "epoch": 0.32764529744940296,
+      "grad_norm": 0.11292389035224915,
+      "learning_rate": 0.00018690720161639488,
+      "loss": 0.1857,
+      "step": 4541
+    },
+    {
+      "epoch": 0.32771745012446335,
+      "grad_norm": 0.10467950254678726,
+      "learning_rate": 0.00018690431519699814,
+      "loss": 0.1776,
+      "step": 4542
+    },
+    {
+      "epoch": 0.3277896027995238,
+      "grad_norm": 0.13810518383979797,
+      "learning_rate": 0.0001869014287776014,
+      "loss": 0.1982,
+      "step": 4543
+    },
+    {
+      "epoch": 0.32786175547458424,
+      "grad_norm": 0.09907660633325577,
+      "learning_rate": 0.00018689854235820466,
+      "loss": 0.1903,
+      "step": 4544
+    },
+    {
+      "epoch": 0.3279339081496446,
+      "grad_norm": 0.09725797176361084,
+      "learning_rate": 0.0001868956559388079,
+      "loss": 0.1079,
+      "step": 4545
+    },
+    {
+      "epoch": 0.32800606082470507,
+      "grad_norm": 0.09951073676347733,
+      "learning_rate": 0.00018689276951941116,
+      "loss": 0.1275,
+      "step": 4546
+    },
+    {
+      "epoch": 0.3280782134997655,
+      "grad_norm": 0.11599034070968628,
+      "learning_rate": 0.00018688988310001445,
+      "loss": 0.1522,
+      "step": 4547
+    },
+    {
+      "epoch": 0.32815036617482596,
+      "grad_norm": 0.11120975017547607,
+      "learning_rate": 0.00018688699668061772,
+      "loss": 0.2065,
+      "step": 4548
+    },
+    {
+      "epoch": 0.32822251884988635,
+      "grad_norm": 0.1209036186337471,
+      "learning_rate": 0.00018688411026122098,
+      "loss": 0.1301,
+      "step": 4549
+    },
+    {
+      "epoch": 0.3282946715249468,
+      "grad_norm": 0.13418681919574738,
+      "learning_rate": 0.00018688122384182421,
+      "loss": 0.143,
+      "step": 4550
+    },
+    {
+      "epoch": 0.32836682420000723,
+      "grad_norm": 0.12810713052749634,
+      "learning_rate": 0.00018687833742242748,
+      "loss": 0.185,
+      "step": 4551
+    },
+    {
+      "epoch": 0.3284389768750676,
+      "grad_norm": 0.12670108675956726,
+      "learning_rate": 0.00018687545100303074,
+      "loss": 0.1891,
+      "step": 4552
+    },
+    {
+      "epoch": 0.32851112955012807,
+      "grad_norm": 0.09912601858377457,
+      "learning_rate": 0.000186872564583634,
+      "loss": 0.189,
+      "step": 4553
+    },
+    {
+      "epoch": 0.3285832822251885,
+      "grad_norm": 0.10828938335180283,
+      "learning_rate": 0.0001868696781642373,
+      "loss": 0.1458,
+      "step": 4554
+    },
+    {
+      "epoch": 0.3286554349002489,
+      "grad_norm": 0.12141595780849457,
+      "learning_rate": 0.00018686679174484053,
+      "loss": 0.1673,
+      "step": 4555
+    },
+    {
+      "epoch": 0.32872758757530934,
+      "grad_norm": 0.09101445972919464,
+      "learning_rate": 0.0001868639053254438,
+      "loss": 0.177,
+      "step": 4556
+    },
+    {
+      "epoch": 0.3287997402503698,
+      "grad_norm": 0.10998333990573883,
+      "learning_rate": 0.00018686101890604706,
+      "loss": 0.1473,
+      "step": 4557
+    },
+    {
+      "epoch": 0.32887189292543023,
+      "grad_norm": 0.10678628832101822,
+      "learning_rate": 0.00018685813248665032,
+      "loss": 0.1801,
+      "step": 4558
+    },
+    {
+      "epoch": 0.3289440456004906,
+      "grad_norm": 0.12848390638828278,
+      "learning_rate": 0.00018685524606725358,
+      "loss": 0.184,
+      "step": 4559
+    },
+    {
+      "epoch": 0.32901619827555106,
+      "grad_norm": 0.10749983787536621,
+      "learning_rate": 0.00018685235964785684,
+      "loss": 0.1449,
+      "step": 4560
+    },
+    {
+      "epoch": 0.3290883509506115,
+      "grad_norm": 0.1044686958193779,
+      "learning_rate": 0.0001868494732284601,
+      "loss": 0.1846,
+      "step": 4561
+    },
+    {
+      "epoch": 0.3291605036256719,
+      "grad_norm": 0.1108483299612999,
+      "learning_rate": 0.00018684658680906337,
+      "loss": 0.184,
+      "step": 4562
+    },
+    {
+      "epoch": 0.32923265630073234,
+      "grad_norm": 0.13553328812122345,
+      "learning_rate": 0.00018684370038966663,
+      "loss": 0.1676,
+      "step": 4563
+    },
+    {
+      "epoch": 0.3293048089757928,
+      "grad_norm": 0.10779878497123718,
+      "learning_rate": 0.0001868408139702699,
+      "loss": 0.111,
+      "step": 4564
+    },
+    {
+      "epoch": 0.32937696165085323,
+      "grad_norm": 0.10668893903493881,
+      "learning_rate": 0.00018683792755087316,
+      "loss": 0.1699,
+      "step": 4565
+    },
+    {
+      "epoch": 0.3294491143259136,
+      "grad_norm": 0.10057191550731659,
+      "learning_rate": 0.0001868350411314764,
+      "loss": 0.1336,
+      "step": 4566
+    },
+    {
+      "epoch": 0.32952126700097406,
+      "grad_norm": 0.10133133828639984,
+      "learning_rate": 0.00018683215471207966,
+      "loss": 0.1269,
+      "step": 4567
+    },
+    {
+      "epoch": 0.3295934196760345,
+      "grad_norm": 0.11621999740600586,
+      "learning_rate": 0.00018682926829268295,
+      "loss": 0.1926,
+      "step": 4568
+    },
+    {
+      "epoch": 0.3296655723510949,
+      "grad_norm": 0.10976468026638031,
+      "learning_rate": 0.0001868263818732862,
+      "loss": 0.1501,
+      "step": 4569
+    },
+    {
+      "epoch": 0.32973772502615534,
+      "grad_norm": 0.1117042601108551,
+      "learning_rate": 0.00018682349545388947,
+      "loss": 0.1374,
+      "step": 4570
+    },
+    {
+      "epoch": 0.3298098777012158,
+      "grad_norm": 0.12507273256778717,
+      "learning_rate": 0.0001868206090344927,
+      "loss": 0.1484,
+      "step": 4571
+    },
+    {
+      "epoch": 0.3298820303762762,
+      "grad_norm": 0.1241648867726326,
+      "learning_rate": 0.00018681772261509597,
+      "loss": 0.1712,
+      "step": 4572
+    },
+    {
+      "epoch": 0.3299541830513366,
+      "grad_norm": 0.1172139123082161,
+      "learning_rate": 0.00018681483619569923,
+      "loss": 0.1523,
+      "step": 4573
+    },
+    {
+      "epoch": 0.33002633572639706,
+      "grad_norm": 0.14421804249286652,
+      "learning_rate": 0.0001868119497763025,
+      "loss": 0.1611,
+      "step": 4574
+    },
+    {
+      "epoch": 0.3300984884014575,
+      "grad_norm": 0.12426183372735977,
+      "learning_rate": 0.0001868090633569058,
+      "loss": 0.1317,
+      "step": 4575
+    },
+    {
+      "epoch": 0.3301706410765179,
+      "grad_norm": 0.1330544501543045,
+      "learning_rate": 0.00018680617693750902,
+      "loss": 0.1573,
+      "step": 4576
+    },
+    {
+      "epoch": 0.33024279375157833,
+      "grad_norm": 0.10799801349639893,
+      "learning_rate": 0.0001868032905181123,
+      "loss": 0.18,
+      "step": 4577
+    },
+    {
+      "epoch": 0.3303149464266388,
+      "grad_norm": 0.11260545998811722,
+      "learning_rate": 0.00018680040409871555,
+      "loss": 0.1513,
+      "step": 4578
+    },
+    {
+      "epoch": 0.3303870991016992,
+      "grad_norm": 0.12739793956279755,
+      "learning_rate": 0.0001867975176793188,
+      "loss": 0.1609,
+      "step": 4579
+    },
+    {
+      "epoch": 0.3304592517767596,
+      "grad_norm": 0.10804455727338791,
+      "learning_rate": 0.00018679463125992208,
+      "loss": 0.1712,
+      "step": 4580
+    },
+    {
+      "epoch": 0.33053140445182005,
+      "grad_norm": 0.1295986771583557,
+      "learning_rate": 0.00018679174484052534,
+      "loss": 0.1722,
+      "step": 4581
+    },
+    {
+      "epoch": 0.3306035571268805,
+      "grad_norm": 0.10611939430236816,
+      "learning_rate": 0.0001867888584211286,
+      "loss": 0.1396,
+      "step": 4582
+    },
+    {
+      "epoch": 0.3306757098019409,
+      "grad_norm": 0.1253766566514969,
+      "learning_rate": 0.00018678597200173186,
+      "loss": 0.1414,
+      "step": 4583
+    },
+    {
+      "epoch": 0.33074786247700133,
+      "grad_norm": 0.10080047696828842,
+      "learning_rate": 0.00018678308558233513,
+      "loss": 0.1893,
+      "step": 4584
+    },
+    {
+      "epoch": 0.3308200151520618,
+      "grad_norm": 0.1438768357038498,
+      "learning_rate": 0.0001867801991629384,
+      "loss": 0.187,
+      "step": 4585
+    },
+    {
+      "epoch": 0.33089216782712216,
+      "grad_norm": 0.1150486096739769,
+      "learning_rate": 0.00018677731274354165,
+      "loss": 0.1851,
+      "step": 4586
+    },
+    {
+      "epoch": 0.3309643205021826,
+      "grad_norm": 0.11817629635334015,
+      "learning_rate": 0.0001867744263241449,
+      "loss": 0.1796,
+      "step": 4587
+    },
+    {
+      "epoch": 0.33103647317724305,
+      "grad_norm": 0.11434055864810944,
+      "learning_rate": 0.00018677153990474815,
+      "loss": 0.1736,
+      "step": 4588
+    },
+    {
+      "epoch": 0.3311086258523035,
+      "grad_norm": 0.12488023936748505,
+      "learning_rate": 0.00018676865348535144,
+      "loss": 0.1656,
+      "step": 4589
+    },
+    {
+      "epoch": 0.3311807785273639,
+      "grad_norm": 0.11203234642744064,
+      "learning_rate": 0.0001867657670659547,
+      "loss": 0.1106,
+      "step": 4590
+    },
+    {
+      "epoch": 0.33125293120242433,
+      "grad_norm": 0.1347704976797104,
+      "learning_rate": 0.00018676288064655797,
+      "loss": 0.1662,
+      "step": 4591
+    },
+    {
+      "epoch": 0.33132508387748477,
+      "grad_norm": 0.11631891131401062,
+      "learning_rate": 0.0001867599942271612,
+      "loss": 0.1512,
+      "step": 4592
+    },
+    {
+      "epoch": 0.33139723655254516,
+      "grad_norm": 0.1219472661614418,
+      "learning_rate": 0.00018675710780776447,
+      "loss": 0.1522,
+      "step": 4593
+    },
+    {
+      "epoch": 0.3314693892276056,
+      "grad_norm": 0.09370163828134537,
+      "learning_rate": 0.00018675422138836773,
+      "loss": 0.1296,
+      "step": 4594
+    },
+    {
+      "epoch": 0.33154154190266605,
+      "grad_norm": 0.12474989145994186,
+      "learning_rate": 0.000186751334968971,
+      "loss": 0.169,
+      "step": 4595
+    },
+    {
+      "epoch": 0.3316136945777265,
+      "grad_norm": 0.10887616872787476,
+      "learning_rate": 0.00018674844854957428,
+      "loss": 0.1326,
+      "step": 4596
+    },
+    {
+      "epoch": 0.3316858472527869,
+      "grad_norm": 0.14505040645599365,
+      "learning_rate": 0.00018674556213017752,
+      "loss": 0.2107,
+      "step": 4597
+    },
+    {
+      "epoch": 0.3317579999278473,
+      "grad_norm": 0.10402046889066696,
+      "learning_rate": 0.00018674267571078078,
+      "loss": 0.1095,
+      "step": 4598
+    },
+    {
+      "epoch": 0.33183015260290777,
+      "grad_norm": 0.10427999496459961,
+      "learning_rate": 0.00018673978929138404,
+      "loss": 0.145,
+      "step": 4599
+    },
+    {
+      "epoch": 0.33190230527796816,
+      "grad_norm": 0.1119854524731636,
+      "learning_rate": 0.0001867369028719873,
+      "loss": 0.1437,
+      "step": 4600
+    },
+    {
+      "epoch": 0.3319744579530286,
+      "grad_norm": 0.12867394089698792,
+      "learning_rate": 0.00018673401645259057,
+      "loss": 0.1505,
+      "step": 4601
+    },
+    {
+      "epoch": 0.33204661062808905,
+      "grad_norm": 0.11001761257648468,
+      "learning_rate": 0.00018673113003319383,
+      "loss": 0.1703,
+      "step": 4602
+    },
+    {
+      "epoch": 0.3321187633031495,
+      "grad_norm": 0.11156858503818512,
+      "learning_rate": 0.0001867282436137971,
+      "loss": 0.1937,
+      "step": 4603
+    },
+    {
+      "epoch": 0.3321909159782099,
+      "grad_norm": 0.11975778639316559,
+      "learning_rate": 0.00018672535719440036,
+      "loss": 0.1671,
+      "step": 4604
+    },
+    {
+      "epoch": 0.3322630686532703,
+      "grad_norm": 0.12916690111160278,
+      "learning_rate": 0.00018672247077500362,
+      "loss": 0.1152,
+      "step": 4605
+    },
+    {
+      "epoch": 0.33233522132833077,
+      "grad_norm": 0.10326454043388367,
+      "learning_rate": 0.00018671958435560688,
+      "loss": 0.1481,
+      "step": 4606
+    },
+    {
+      "epoch": 0.33240737400339115,
+      "grad_norm": 0.10354454070329666,
+      "learning_rate": 0.00018671669793621015,
+      "loss": 0.1201,
+      "step": 4607
+    },
+    {
+      "epoch": 0.3324795266784516,
+      "grad_norm": 0.13755939900875092,
+      "learning_rate": 0.0001867138115168134,
+      "loss": 0.1908,
+      "step": 4608
+    },
+    {
+      "epoch": 0.33255167935351204,
+      "grad_norm": 0.1091679260134697,
+      "learning_rate": 0.00018671092509741665,
+      "loss": 0.1395,
+      "step": 4609
+    },
+    {
+      "epoch": 0.3326238320285725,
+      "grad_norm": 0.10698618739843369,
+      "learning_rate": 0.0001867080386780199,
+      "loss": 0.1705,
+      "step": 4610
+    },
+    {
+      "epoch": 0.3326959847036329,
+      "grad_norm": 0.12497153133153915,
+      "learning_rate": 0.0001867051522586232,
+      "loss": 0.1841,
+      "step": 4611
+    },
+    {
+      "epoch": 0.3327681373786933,
+      "grad_norm": 0.10098680853843689,
+      "learning_rate": 0.00018670226583922646,
+      "loss": 0.1874,
+      "step": 4612
+    },
+    {
+      "epoch": 0.33284029005375376,
+      "grad_norm": 0.0964135080575943,
+      "learning_rate": 0.00018669937941982972,
+      "loss": 0.1674,
+      "step": 4613
+    },
+    {
+      "epoch": 0.33291244272881415,
+      "grad_norm": 0.10805868357419968,
+      "learning_rate": 0.00018669649300043296,
+      "loss": 0.1431,
+      "step": 4614
+    },
+    {
+      "epoch": 0.3329845954038746,
+      "grad_norm": 0.11034578830003738,
+      "learning_rate": 0.00018669360658103622,
+      "loss": 0.1658,
+      "step": 4615
+    },
+    {
+      "epoch": 0.33305674807893504,
+      "grad_norm": 0.09507668018341064,
+      "learning_rate": 0.00018669072016163949,
+      "loss": 0.1955,
+      "step": 4616
+    },
+    {
+      "epoch": 0.3331289007539954,
+      "grad_norm": 0.09780636429786682,
+      "learning_rate": 0.00018668783374224275,
+      "loss": 0.1609,
+      "step": 4617
+    },
+    {
+      "epoch": 0.33320105342905587,
+      "grad_norm": 0.09780305624008179,
+      "learning_rate": 0.00018668494732284604,
+      "loss": 0.1506,
+      "step": 4618
+    },
+    {
+      "epoch": 0.3332732061041163,
+      "grad_norm": 0.10436023026704788,
+      "learning_rate": 0.00018668206090344928,
+      "loss": 0.1397,
+      "step": 4619
+    },
+    {
+      "epoch": 0.33334535877917676,
+      "grad_norm": 0.13126035034656525,
+      "learning_rate": 0.00018667917448405254,
+      "loss": 0.1509,
+      "step": 4620
+    },
+    {
+      "epoch": 0.33341751145423715,
+      "grad_norm": 0.10076587647199631,
+      "learning_rate": 0.0001866762880646558,
+      "loss": 0.1777,
+      "step": 4621
+    },
+    {
+      "epoch": 0.3334896641292976,
+      "grad_norm": 0.10679809749126434,
+      "learning_rate": 0.00018667340164525906,
+      "loss": 0.1579,
+      "step": 4622
+    },
+    {
+      "epoch": 0.33356181680435804,
+      "grad_norm": 0.0949108749628067,
+      "learning_rate": 0.00018667051522586233,
+      "loss": 0.2041,
+      "step": 4623
+    },
+    {
+      "epoch": 0.3336339694794184,
+      "grad_norm": 0.1757906824350357,
+      "learning_rate": 0.0001866676288064656,
+      "loss": 0.1807,
+      "step": 4624
+    },
+    {
+      "epoch": 0.33370612215447887,
+      "grad_norm": 0.11894873529672623,
+      "learning_rate": 0.00018666474238706885,
+      "loss": 0.1407,
+      "step": 4625
+    },
+    {
+      "epoch": 0.3337782748295393,
+      "grad_norm": 0.12828341126441956,
+      "learning_rate": 0.00018666185596767212,
+      "loss": 0.2167,
+      "step": 4626
+    },
+    {
+      "epoch": 0.33385042750459976,
+      "grad_norm": 0.10132212191820145,
+      "learning_rate": 0.00018665896954827538,
+      "loss": 0.1045,
+      "step": 4627
+    },
+    {
+      "epoch": 0.33392258017966014,
+      "grad_norm": 0.1280662566423416,
+      "learning_rate": 0.00018665608312887864,
+      "loss": 0.1172,
+      "step": 4628
+    },
+    {
+      "epoch": 0.3339947328547206,
+      "grad_norm": 0.09809806197881699,
+      "learning_rate": 0.0001866531967094819,
+      "loss": 0.0899,
+      "step": 4629
+    },
+    {
+      "epoch": 0.33406688552978103,
+      "grad_norm": 0.13381366431713104,
+      "learning_rate": 0.00018665031029008514,
+      "loss": 0.1653,
+      "step": 4630
+    },
+    {
+      "epoch": 0.3341390382048414,
+      "grad_norm": 0.12938711047172546,
+      "learning_rate": 0.0001866474238706884,
+      "loss": 0.1243,
+      "step": 4631
+    },
+    {
+      "epoch": 0.33421119087990186,
+      "grad_norm": 0.21836337447166443,
+      "learning_rate": 0.0001866445374512917,
+      "loss": 0.1444,
+      "step": 4632
+    },
+    {
+      "epoch": 0.3342833435549623,
+      "grad_norm": 0.10769309848546982,
+      "learning_rate": 0.00018664165103189496,
+      "loss": 0.09,
+      "step": 4633
+    },
+    {
+      "epoch": 0.33435549623002275,
+      "grad_norm": 0.13226410746574402,
+      "learning_rate": 0.00018663876461249822,
+      "loss": 0.159,
+      "step": 4634
+    },
+    {
+      "epoch": 0.33442764890508314,
+      "grad_norm": 0.1118188351392746,
+      "learning_rate": 0.00018663587819310145,
+      "loss": 0.1398,
+      "step": 4635
+    },
+    {
+      "epoch": 0.3344998015801436,
+      "grad_norm": 0.11711820214986801,
+      "learning_rate": 0.00018663299177370472,
+      "loss": 0.1487,
+      "step": 4636
+    },
+    {
+      "epoch": 0.33457195425520403,
+      "grad_norm": 0.10661720484495163,
+      "learning_rate": 0.00018663010535430798,
+      "loss": 0.0891,
+      "step": 4637
+    },
+    {
+      "epoch": 0.3346441069302644,
+      "grad_norm": 0.11856275051832199,
+      "learning_rate": 0.00018662721893491124,
+      "loss": 0.1348,
+      "step": 4638
+    },
+    {
+      "epoch": 0.33471625960532486,
+      "grad_norm": 0.1158871129155159,
+      "learning_rate": 0.00018662433251551453,
+      "loss": 0.158,
+      "step": 4639
+    },
+    {
+      "epoch": 0.3347884122803853,
+      "grad_norm": 0.11150803416967392,
+      "learning_rate": 0.00018662144609611777,
+      "loss": 0.16,
+      "step": 4640
+    },
+    {
+      "epoch": 0.33486056495544575,
+      "grad_norm": 0.09448430687189102,
+      "learning_rate": 0.00018661855967672103,
+      "loss": 0.1971,
+      "step": 4641
+    },
+    {
+      "epoch": 0.33493271763050614,
+      "grad_norm": 0.10407508164644241,
+      "learning_rate": 0.0001866156732573243,
+      "loss": 0.1422,
+      "step": 4642
+    },
+    {
+      "epoch": 0.3350048703055666,
+      "grad_norm": 0.1328696757555008,
+      "learning_rate": 0.00018661278683792756,
+      "loss": 0.1603,
+      "step": 4643
+    },
+    {
+      "epoch": 0.335077022980627,
+      "grad_norm": 0.11169770359992981,
+      "learning_rate": 0.00018660990041853082,
+      "loss": 0.2227,
+      "step": 4644
+    },
+    {
+      "epoch": 0.3351491756556874,
+      "grad_norm": 0.12166395038366318,
+      "learning_rate": 0.00018660701399913408,
+      "loss": 0.129,
+      "step": 4645
+    },
+    {
+      "epoch": 0.33522132833074786,
+      "grad_norm": 0.17166155576705933,
+      "learning_rate": 0.00018660412757973735,
+      "loss": 0.1548,
+      "step": 4646
+    },
+    {
+      "epoch": 0.3352934810058083,
+      "grad_norm": 0.11148369312286377,
+      "learning_rate": 0.0001866012411603406,
+      "loss": 0.1503,
+      "step": 4647
+    },
+    {
+      "epoch": 0.3353656336808687,
+      "grad_norm": 0.09874880313873291,
+      "learning_rate": 0.00018659835474094387,
+      "loss": 0.1783,
+      "step": 4648
+    },
+    {
+      "epoch": 0.33543778635592914,
+      "grad_norm": 0.11467466503381729,
+      "learning_rate": 0.00018659546832154714,
+      "loss": 0.1721,
+      "step": 4649
+    },
+    {
+      "epoch": 0.3355099390309896,
+      "grad_norm": 0.14663182199001312,
+      "learning_rate": 0.0001865925819021504,
+      "loss": 0.1608,
+      "step": 4650
+    },
+    {
+      "epoch": 0.33558209170605,
+      "grad_norm": 0.10331515222787857,
+      "learning_rate": 0.00018658969548275363,
+      "loss": 0.1141,
+      "step": 4651
+    },
+    {
+      "epoch": 0.3356542443811104,
+      "grad_norm": 0.11265163868665695,
+      "learning_rate": 0.0001865868090633569,
+      "loss": 0.1506,
+      "step": 4652
+    },
+    {
+      "epoch": 0.33572639705617086,
+      "grad_norm": 0.12390236556529999,
+      "learning_rate": 0.0001865839226439602,
+      "loss": 0.2137,
+      "step": 4653
+    },
+    {
+      "epoch": 0.3357985497312313,
+      "grad_norm": 0.11463173478841782,
+      "learning_rate": 0.00018658103622456345,
+      "loss": 0.18,
+      "step": 4654
+    },
+    {
+      "epoch": 0.3358707024062917,
+      "grad_norm": 0.10468676686286926,
+      "learning_rate": 0.0001865781498051667,
+      "loss": 0.1382,
+      "step": 4655
+    },
+    {
+      "epoch": 0.33594285508135213,
+      "grad_norm": 0.12385626882314682,
+      "learning_rate": 0.00018657526338576995,
+      "loss": 0.1558,
+      "step": 4656
+    },
+    {
+      "epoch": 0.3360150077564126,
+      "grad_norm": 0.11396743357181549,
+      "learning_rate": 0.0001865723769663732,
+      "loss": 0.1245,
+      "step": 4657
+    },
+    {
+      "epoch": 0.336087160431473,
+      "grad_norm": 0.10428833216428757,
+      "learning_rate": 0.00018656949054697647,
+      "loss": 0.134,
+      "step": 4658
+    },
+    {
+      "epoch": 0.3361593131065334,
+      "grad_norm": 0.11879660189151764,
+      "learning_rate": 0.00018656660412757974,
+      "loss": 0.2264,
+      "step": 4659
+    },
+    {
+      "epoch": 0.33623146578159385,
+      "grad_norm": 0.09917629510164261,
+      "learning_rate": 0.00018656371770818303,
+      "loss": 0.1545,
+      "step": 4660
+    },
+    {
+      "epoch": 0.3363036184566543,
+      "grad_norm": 0.11942600458860397,
+      "learning_rate": 0.00018656083128878626,
+      "loss": 0.1021,
+      "step": 4661
+    },
+    {
+      "epoch": 0.3363757711317147,
+      "grad_norm": 0.11812159419059753,
+      "learning_rate": 0.00018655794486938953,
+      "loss": 0.1416,
+      "step": 4662
+    },
+    {
+      "epoch": 0.33644792380677513,
+      "grad_norm": 0.1421084702014923,
+      "learning_rate": 0.0001865550584499928,
+      "loss": 0.1365,
+      "step": 4663
+    },
+    {
+      "epoch": 0.3365200764818356,
+      "grad_norm": 0.142777681350708,
+      "learning_rate": 0.00018655217203059605,
+      "loss": 0.1786,
+      "step": 4664
+    },
+    {
+      "epoch": 0.336592229156896,
+      "grad_norm": 0.11653783172369003,
+      "learning_rate": 0.00018654928561119932,
+      "loss": 0.1993,
+      "step": 4665
+    },
+    {
+      "epoch": 0.3366643818319564,
+      "grad_norm": 0.13131196796894073,
+      "learning_rate": 0.00018654639919180258,
+      "loss": 0.1696,
+      "step": 4666
+    },
+    {
+      "epoch": 0.33673653450701685,
+      "grad_norm": 0.10151226073503494,
+      "learning_rate": 0.00018654351277240584,
+      "loss": 0.2048,
+      "step": 4667
+    },
+    {
+      "epoch": 0.3368086871820773,
+      "grad_norm": 0.10702992230653763,
+      "learning_rate": 0.0001865406263530091,
+      "loss": 0.1293,
+      "step": 4668
+    },
+    {
+      "epoch": 0.3368808398571377,
+      "grad_norm": 0.09864121675491333,
+      "learning_rate": 0.00018653773993361237,
+      "loss": 0.1224,
+      "step": 4669
+    },
+    {
+      "epoch": 0.3369529925321981,
+      "grad_norm": 0.121495820581913,
+      "learning_rate": 0.00018653485351421563,
+      "loss": 0.1651,
+      "step": 4670
+    },
+    {
+      "epoch": 0.33702514520725857,
+      "grad_norm": 0.10459835827350616,
+      "learning_rate": 0.0001865319670948189,
+      "loss": 0.158,
+      "step": 4671
+    },
+    {
+      "epoch": 0.337097297882319,
+      "grad_norm": 0.11049709469079971,
+      "learning_rate": 0.00018652908067542213,
+      "loss": 0.1203,
+      "step": 4672
+    },
+    {
+      "epoch": 0.3371694505573794,
+      "grad_norm": 0.10447237640619278,
+      "learning_rate": 0.0001865261942560254,
+      "loss": 0.1551,
+      "step": 4673
+    },
+    {
+      "epoch": 0.33724160323243985,
+      "grad_norm": 0.11688422411680222,
+      "learning_rate": 0.00018652330783662868,
+      "loss": 0.1868,
+      "step": 4674
+    },
+    {
+      "epoch": 0.3373137559075003,
+      "grad_norm": 0.1138431653380394,
+      "learning_rate": 0.00018652042141723194,
+      "loss": 0.1653,
+      "step": 4675
+    },
+    {
+      "epoch": 0.3373859085825607,
+      "grad_norm": 0.11060623079538345,
+      "learning_rate": 0.0001865175349978352,
+      "loss": 0.1488,
+      "step": 4676
+    },
+    {
+      "epoch": 0.3374580612576211,
+      "grad_norm": 0.12014255672693253,
+      "learning_rate": 0.00018651464857843844,
+      "loss": 0.16,
+      "step": 4677
+    },
+    {
+      "epoch": 0.33753021393268157,
+      "grad_norm": 0.1315694898366928,
+      "learning_rate": 0.0001865117621590417,
+      "loss": 0.1181,
+      "step": 4678
+    },
+    {
+      "epoch": 0.33760236660774195,
+      "grad_norm": 0.11970925331115723,
+      "learning_rate": 0.00018650887573964497,
+      "loss": 0.1441,
+      "step": 4679
+    },
+    {
+      "epoch": 0.3376745192828024,
+      "grad_norm": 0.11607395112514496,
+      "learning_rate": 0.00018650598932024823,
+      "loss": 0.1245,
+      "step": 4680
+    },
+    {
+      "epoch": 0.33774667195786284,
+      "grad_norm": 0.09468183666467667,
+      "learning_rate": 0.00018650310290085152,
+      "loss": 0.1351,
+      "step": 4681
+    },
+    {
+      "epoch": 0.3378188246329233,
+      "grad_norm": 0.1312519758939743,
+      "learning_rate": 0.00018650021648145476,
+      "loss": 0.1716,
+      "step": 4682
+    },
+    {
+      "epoch": 0.3378909773079837,
+      "grad_norm": 0.10811814665794373,
+      "learning_rate": 0.00018649733006205802,
+      "loss": 0.1249,
+      "step": 4683
+    },
+    {
+      "epoch": 0.3379631299830441,
+      "grad_norm": 0.11948755383491516,
+      "learning_rate": 0.00018649444364266128,
+      "loss": 0.1462,
+      "step": 4684
+    },
+    {
+      "epoch": 0.33803528265810456,
+      "grad_norm": 0.11248423159122467,
+      "learning_rate": 0.00018649155722326455,
+      "loss": 0.1231,
+      "step": 4685
+    },
+    {
+      "epoch": 0.33810743533316495,
+      "grad_norm": 0.10225424915552139,
+      "learning_rate": 0.0001864886708038678,
+      "loss": 0.1394,
+      "step": 4686
+    },
+    {
+      "epoch": 0.3381795880082254,
+      "grad_norm": 0.15257570147514343,
+      "learning_rate": 0.00018648578438447107,
+      "loss": 0.24,
+      "step": 4687
+    },
+    {
+      "epoch": 0.33825174068328584,
+      "grad_norm": 0.11088695377111435,
+      "learning_rate": 0.00018648289796507434,
+      "loss": 0.2021,
+      "step": 4688
+    },
+    {
+      "epoch": 0.3383238933583463,
+      "grad_norm": 0.13657927513122559,
+      "learning_rate": 0.0001864800115456776,
+      "loss": 0.1179,
+      "step": 4689
+    },
+    {
+      "epoch": 0.33839604603340667,
+      "grad_norm": 0.1033518984913826,
+      "learning_rate": 0.00018647712512628086,
+      "loss": 0.1355,
+      "step": 4690
+    },
+    {
+      "epoch": 0.3384681987084671,
+      "grad_norm": 0.13475528359413147,
+      "learning_rate": 0.00018647423870688412,
+      "loss": 0.1449,
+      "step": 4691
+    },
+    {
+      "epoch": 0.33854035138352756,
+      "grad_norm": 0.13904184103012085,
+      "learning_rate": 0.0001864713522874874,
+      "loss": 0.1383,
+      "step": 4692
+    },
+    {
+      "epoch": 0.33861250405858795,
+      "grad_norm": 0.10915260761976242,
+      "learning_rate": 0.00018646846586809062,
+      "loss": 0.114,
+      "step": 4693
+    },
+    {
+      "epoch": 0.3386846567336484,
+      "grad_norm": 0.11143431812524796,
+      "learning_rate": 0.00018646557944869389,
+      "loss": 0.1838,
+      "step": 4694
+    },
+    {
+      "epoch": 0.33875680940870884,
+      "grad_norm": 0.10445766896009445,
+      "learning_rate": 0.00018646269302929718,
+      "loss": 0.1545,
+      "step": 4695
+    },
+    {
+      "epoch": 0.3388289620837693,
+      "grad_norm": 0.1109762191772461,
+      "learning_rate": 0.00018645980660990044,
+      "loss": 0.1145,
+      "step": 4696
+    },
+    {
+      "epoch": 0.33890111475882967,
+      "grad_norm": 0.09423503279685974,
+      "learning_rate": 0.0001864569201905037,
+      "loss": 0.1932,
+      "step": 4697
+    },
+    {
+      "epoch": 0.3389732674338901,
+      "grad_norm": 0.09955920279026031,
+      "learning_rate": 0.00018645403377110694,
+      "loss": 0.1493,
+      "step": 4698
+    },
+    {
+      "epoch": 0.33904542010895056,
+      "grad_norm": 0.11547043174505234,
+      "learning_rate": 0.0001864511473517102,
+      "loss": 0.1392,
+      "step": 4699
+    },
+    {
+      "epoch": 0.33911757278401095,
+      "grad_norm": 0.11824516952037811,
+      "learning_rate": 0.00018644826093231346,
+      "loss": 0.1677,
+      "step": 4700
+    },
+    {
+      "epoch": 0.3391897254590714,
+      "grad_norm": 0.10412485152482986,
+      "learning_rate": 0.00018644537451291673,
+      "loss": 0.1393,
+      "step": 4701
+    },
+    {
+      "epoch": 0.33926187813413183,
+      "grad_norm": 0.10375717282295227,
+      "learning_rate": 0.00018644248809352002,
+      "loss": 0.1385,
+      "step": 4702
+    },
+    {
+      "epoch": 0.3393340308091923,
+      "grad_norm": 0.10352824628353119,
+      "learning_rate": 0.00018643960167412325,
+      "loss": 0.1416,
+      "step": 4703
+    },
+    {
+      "epoch": 0.33940618348425267,
+      "grad_norm": 0.10737777501344681,
+      "learning_rate": 0.00018643671525472652,
+      "loss": 0.1398,
+      "step": 4704
+    },
+    {
+      "epoch": 0.3394783361593131,
+      "grad_norm": 0.1165878176689148,
+      "learning_rate": 0.00018643382883532978,
+      "loss": 0.1748,
+      "step": 4705
+    },
+    {
+      "epoch": 0.33955048883437355,
+      "grad_norm": 0.10642968118190765,
+      "learning_rate": 0.00018643094241593304,
+      "loss": 0.1579,
+      "step": 4706
+    },
+    {
+      "epoch": 0.33962264150943394,
+      "grad_norm": 0.11588136106729507,
+      "learning_rate": 0.0001864280559965363,
+      "loss": 0.1507,
+      "step": 4707
+    },
+    {
+      "epoch": 0.3396947941844944,
+      "grad_norm": 0.0921846553683281,
+      "learning_rate": 0.00018642516957713957,
+      "loss": 0.1795,
+      "step": 4708
+    },
+    {
+      "epoch": 0.33976694685955483,
+      "grad_norm": 0.1140107586979866,
+      "learning_rate": 0.00018642228315774283,
+      "loss": 0.1674,
+      "step": 4709
+    },
+    {
+      "epoch": 0.3398390995346152,
+      "grad_norm": 0.09212062507867813,
+      "learning_rate": 0.0001864193967383461,
+      "loss": 0.1469,
+      "step": 4710
+    },
+    {
+      "epoch": 0.33991125220967566,
+      "grad_norm": 0.11071064323186874,
+      "learning_rate": 0.00018641651031894936,
+      "loss": 0.1464,
+      "step": 4711
+    },
+    {
+      "epoch": 0.3399834048847361,
+      "grad_norm": 0.09902884811162949,
+      "learning_rate": 0.00018641362389955262,
+      "loss": 0.1397,
+      "step": 4712
+    },
+    {
+      "epoch": 0.34005555755979655,
+      "grad_norm": 0.10399525612592697,
+      "learning_rate": 0.00018641073748015588,
+      "loss": 0.1804,
+      "step": 4713
+    },
+    {
+      "epoch": 0.34012771023485694,
+      "grad_norm": 0.1262943297624588,
+      "learning_rate": 0.00018640785106075914,
+      "loss": 0.15,
+      "step": 4714
+    },
+    {
+      "epoch": 0.3401998629099174,
+      "grad_norm": 0.10293149948120117,
+      "learning_rate": 0.00018640496464136238,
+      "loss": 0.1787,
+      "step": 4715
+    },
+    {
+      "epoch": 0.3402720155849778,
+      "grad_norm": 0.11217711120843887,
+      "learning_rate": 0.00018640207822196567,
+      "loss": 0.1346,
+      "step": 4716
+    },
+    {
+      "epoch": 0.3403441682600382,
+      "grad_norm": 0.12639397382736206,
+      "learning_rate": 0.00018639919180256893,
+      "loss": 0.1688,
+      "step": 4717
+    },
+    {
+      "epoch": 0.34041632093509866,
+      "grad_norm": 0.1675584316253662,
+      "learning_rate": 0.0001863963053831722,
+      "loss": 0.1863,
+      "step": 4718
+    },
+    {
+      "epoch": 0.3404884736101591,
+      "grad_norm": 0.12408801913261414,
+      "learning_rate": 0.00018639341896377546,
+      "loss": 0.2211,
+      "step": 4719
+    },
+    {
+      "epoch": 0.34056062628521955,
+      "grad_norm": 0.13109129667282104,
+      "learning_rate": 0.0001863905325443787,
+      "loss": 0.1582,
+      "step": 4720
+    },
+    {
+      "epoch": 0.34063277896027994,
+      "grad_norm": 0.10773653537034988,
+      "learning_rate": 0.00018638764612498196,
+      "loss": 0.1646,
+      "step": 4721
+    },
+    {
+      "epoch": 0.3407049316353404,
+      "grad_norm": 0.13233451545238495,
+      "learning_rate": 0.00018638475970558522,
+      "loss": 0.1902,
+      "step": 4722
+    },
+    {
+      "epoch": 0.3407770843104008,
+      "grad_norm": 0.12965644896030426,
+      "learning_rate": 0.0001863818732861885,
+      "loss": 0.1846,
+      "step": 4723
+    },
+    {
+      "epoch": 0.3408492369854612,
+      "grad_norm": 0.10962004214525223,
+      "learning_rate": 0.00018637898686679177,
+      "loss": 0.1629,
+      "step": 4724
+    },
+    {
+      "epoch": 0.34092138966052166,
+      "grad_norm": 0.0985800251364708,
+      "learning_rate": 0.000186376100447395,
+      "loss": 0.1614,
+      "step": 4725
+    },
+    {
+      "epoch": 0.3409935423355821,
+      "grad_norm": 0.10292194038629532,
+      "learning_rate": 0.00018637321402799827,
+      "loss": 0.1586,
+      "step": 4726
+    },
+    {
+      "epoch": 0.34106569501064254,
+      "grad_norm": 0.08835239708423615,
+      "learning_rate": 0.00018637032760860154,
+      "loss": 0.1115,
+      "step": 4727
+    },
+    {
+      "epoch": 0.34113784768570293,
+      "grad_norm": 0.09853382408618927,
+      "learning_rate": 0.0001863674411892048,
+      "loss": 0.1714,
+      "step": 4728
+    },
+    {
+      "epoch": 0.3412100003607634,
+      "grad_norm": 0.11794517189264297,
+      "learning_rate": 0.00018636455476980806,
+      "loss": 0.143,
+      "step": 4729
+    },
+    {
+      "epoch": 0.3412821530358238,
+      "grad_norm": 0.11417117714881897,
+      "learning_rate": 0.00018636166835041132,
+      "loss": 0.1719,
+      "step": 4730
+    },
+    {
+      "epoch": 0.3413543057108842,
+      "grad_norm": 0.11145757138729095,
+      "learning_rate": 0.0001863587819310146,
+      "loss": 0.1621,
+      "step": 4731
+    },
+    {
+      "epoch": 0.34142645838594465,
+      "grad_norm": 0.12171374261379242,
+      "learning_rate": 0.00018635589551161785,
+      "loss": 0.1081,
+      "step": 4732
+    },
+    {
+      "epoch": 0.3414986110610051,
+      "grad_norm": 0.14451739192008972,
+      "learning_rate": 0.0001863530090922211,
+      "loss": 0.1434,
+      "step": 4733
+    },
+    {
+      "epoch": 0.34157076373606554,
+      "grad_norm": 0.12776045501232147,
+      "learning_rate": 0.00018635012267282438,
+      "loss": 0.1579,
+      "step": 4734
+    },
+    {
+      "epoch": 0.34164291641112593,
+      "grad_norm": 0.13398566842079163,
+      "learning_rate": 0.00018634723625342764,
+      "loss": 0.1978,
+      "step": 4735
+    },
+    {
+      "epoch": 0.3417150690861864,
+      "grad_norm": 0.15044716000556946,
+      "learning_rate": 0.00018634434983403087,
+      "loss": 0.1786,
+      "step": 4736
+    },
+    {
+      "epoch": 0.3417872217612468,
+      "grad_norm": 0.10238110274076462,
+      "learning_rate": 0.00018634146341463416,
+      "loss": 0.161,
+      "step": 4737
+    },
+    {
+      "epoch": 0.3418593744363072,
+      "grad_norm": 0.13163591921329498,
+      "learning_rate": 0.00018633857699523743,
+      "loss": 0.2066,
+      "step": 4738
+    },
+    {
+      "epoch": 0.34193152711136765,
+      "grad_norm": 0.12700612843036652,
+      "learning_rate": 0.0001863356905758407,
+      "loss": 0.1646,
+      "step": 4739
+    },
+    {
+      "epoch": 0.3420036797864281,
+      "grad_norm": 0.09891757369041443,
+      "learning_rate": 0.00018633280415644395,
+      "loss": 0.1366,
+      "step": 4740
+    },
+    {
+      "epoch": 0.3420758324614885,
+      "grad_norm": 0.10575391352176666,
+      "learning_rate": 0.0001863299177370472,
+      "loss": 0.1348,
+      "step": 4741
+    },
+    {
+      "epoch": 0.3421479851365489,
+      "grad_norm": 0.10323916375637054,
+      "learning_rate": 0.00018632703131765045,
+      "loss": 0.1507,
+      "step": 4742
+    },
+    {
+      "epoch": 0.34222013781160937,
+      "grad_norm": 0.13781608641147614,
+      "learning_rate": 0.00018632414489825371,
+      "loss": 0.115,
+      "step": 4743
+    },
+    {
+      "epoch": 0.3422922904866698,
+      "grad_norm": 0.07461650669574738,
+      "learning_rate": 0.000186321258478857,
+      "loss": 0.1986,
+      "step": 4744
+    },
+    {
+      "epoch": 0.3423644431617302,
+      "grad_norm": 0.11913321167230606,
+      "learning_rate": 0.00018631837205946027,
+      "loss": 0.1754,
+      "step": 4745
+    },
+    {
+      "epoch": 0.34243659583679065,
+      "grad_norm": 0.1495174616575241,
+      "learning_rate": 0.0001863154856400635,
+      "loss": 0.1403,
+      "step": 4746
+    },
+    {
+      "epoch": 0.3425087485118511,
+      "grad_norm": 0.10709115117788315,
+      "learning_rate": 0.00018631259922066677,
+      "loss": 0.1544,
+      "step": 4747
+    },
+    {
+      "epoch": 0.3425809011869115,
+      "grad_norm": 0.10365457087755203,
+      "learning_rate": 0.00018630971280127003,
+      "loss": 0.1524,
+      "step": 4748
+    },
+    {
+      "epoch": 0.3426530538619719,
+      "grad_norm": 0.13240467011928558,
+      "learning_rate": 0.0001863068263818733,
+      "loss": 0.1697,
+      "step": 4749
+    },
+    {
+      "epoch": 0.34272520653703237,
+      "grad_norm": 0.15050604939460754,
+      "learning_rate": 0.00018630393996247656,
+      "loss": 0.1907,
+      "step": 4750
+    },
+    {
+      "epoch": 0.3427973592120928,
+      "grad_norm": 0.09507954865694046,
+      "learning_rate": 0.00018630105354307982,
+      "loss": 0.1742,
+      "step": 4751
+    },
+    {
+      "epoch": 0.3428695118871532,
+      "grad_norm": 0.12495091557502747,
+      "learning_rate": 0.00018629816712368308,
+      "loss": 0.1267,
+      "step": 4752
+    },
+    {
+      "epoch": 0.34294166456221364,
+      "grad_norm": 0.09712611883878708,
+      "learning_rate": 0.00018629528070428634,
+      "loss": 0.1199,
+      "step": 4753
+    },
+    {
+      "epoch": 0.3430138172372741,
+      "grad_norm": 0.21932922303676605,
+      "learning_rate": 0.0001862923942848896,
+      "loss": 0.1807,
+      "step": 4754
+    },
+    {
+      "epoch": 0.3430859699123345,
+      "grad_norm": 0.1500842273235321,
+      "learning_rate": 0.00018628950786549287,
+      "loss": 0.1607,
+      "step": 4755
+    },
+    {
+      "epoch": 0.3431581225873949,
+      "grad_norm": 0.11836665123701096,
+      "learning_rate": 0.00018628662144609613,
+      "loss": 0.1686,
+      "step": 4756
+    },
+    {
+      "epoch": 0.34323027526245536,
+      "grad_norm": 0.14937469363212585,
+      "learning_rate": 0.00018628373502669937,
+      "loss": 0.1744,
+      "step": 4757
+    },
+    {
+      "epoch": 0.3433024279375158,
+      "grad_norm": 0.1330706626176834,
+      "learning_rate": 0.00018628084860730266,
+      "loss": 0.1232,
+      "step": 4758
+    },
+    {
+      "epoch": 0.3433745806125762,
+      "grad_norm": 0.17299704253673553,
+      "learning_rate": 0.00018627796218790592,
+      "loss": 0.2073,
+      "step": 4759
+    },
+    {
+      "epoch": 0.34344673328763664,
+      "grad_norm": 0.1117033064365387,
+      "learning_rate": 0.00018627507576850918,
+      "loss": 0.1562,
+      "step": 4760
+    },
+    {
+      "epoch": 0.3435188859626971,
+      "grad_norm": 0.12291192263364792,
+      "learning_rate": 0.00018627218934911245,
+      "loss": 0.2009,
+      "step": 4761
+    },
+    {
+      "epoch": 0.3435910386377575,
+      "grad_norm": 0.17998120188713074,
+      "learning_rate": 0.00018626930292971568,
+      "loss": 0.1629,
+      "step": 4762
+    },
+    {
+      "epoch": 0.3436631913128179,
+      "grad_norm": 0.13401764631271362,
+      "learning_rate": 0.00018626641651031895,
+      "loss": 0.1175,
+      "step": 4763
+    },
+    {
+      "epoch": 0.34373534398787836,
+      "grad_norm": 0.09569345414638519,
+      "learning_rate": 0.0001862635300909222,
+      "loss": 0.1052,
+      "step": 4764
+    },
+    {
+      "epoch": 0.3438074966629388,
+      "grad_norm": 0.21904654800891876,
+      "learning_rate": 0.0001862606436715255,
+      "loss": 0.1244,
+      "step": 4765
+    },
+    {
+      "epoch": 0.3438796493379992,
+      "grad_norm": 0.11808110773563385,
+      "learning_rate": 0.00018625775725212876,
+      "loss": 0.1374,
+      "step": 4766
+    },
+    {
+      "epoch": 0.34395180201305964,
+      "grad_norm": 0.12179733067750931,
+      "learning_rate": 0.000186254870832732,
+      "loss": 0.1814,
+      "step": 4767
+    },
+    {
+      "epoch": 0.3440239546881201,
+      "grad_norm": 0.16868746280670166,
+      "learning_rate": 0.00018625198441333526,
+      "loss": 0.1689,
+      "step": 4768
+    },
+    {
+      "epoch": 0.34409610736318047,
+      "grad_norm": 0.17062485218048096,
+      "learning_rate": 0.00018624909799393852,
+      "loss": 0.2181,
+      "step": 4769
+    },
+    {
+      "epoch": 0.3441682600382409,
+      "grad_norm": 0.13485071063041687,
+      "learning_rate": 0.0001862462115745418,
+      "loss": 0.1138,
+      "step": 4770
+    },
+    {
+      "epoch": 0.34424041271330136,
+      "grad_norm": 0.13490718603134155,
+      "learning_rate": 0.00018624332515514505,
+      "loss": 0.1584,
+      "step": 4771
+    },
+    {
+      "epoch": 0.34431256538836175,
+      "grad_norm": 0.10937722772359848,
+      "learning_rate": 0.0001862404387357483,
+      "loss": 0.1474,
+      "step": 4772
+    },
+    {
+      "epoch": 0.3443847180634222,
+      "grad_norm": 0.11925653368234634,
+      "learning_rate": 0.00018623755231635158,
+      "loss": 0.1239,
+      "step": 4773
+    },
+    {
+      "epoch": 0.34445687073848263,
+      "grad_norm": 0.12353496998548508,
+      "learning_rate": 0.00018623466589695484,
+      "loss": 0.1302,
+      "step": 4774
+    },
+    {
+      "epoch": 0.3445290234135431,
+      "grad_norm": 0.10578591376543045,
+      "learning_rate": 0.0001862317794775581,
+      "loss": 0.1839,
+      "step": 4775
+    },
+    {
+      "epoch": 0.34460117608860347,
+      "grad_norm": 0.12525229156017303,
+      "learning_rate": 0.00018622889305816136,
+      "loss": 0.2055,
+      "step": 4776
+    },
+    {
+      "epoch": 0.3446733287636639,
+      "grad_norm": 0.09258376061916351,
+      "learning_rate": 0.00018622600663876463,
+      "loss": 0.1966,
+      "step": 4777
+    },
+    {
+      "epoch": 0.34474548143872435,
+      "grad_norm": 0.09338001906871796,
+      "learning_rate": 0.00018622312021936786,
+      "loss": 0.1321,
+      "step": 4778
+    },
+    {
+      "epoch": 0.34481763411378474,
+      "grad_norm": 0.10230891406536102,
+      "learning_rate": 0.00018622023379997115,
+      "loss": 0.1247,
+      "step": 4779
+    },
+    {
+      "epoch": 0.3448897867888452,
+      "grad_norm": 0.12309479713439941,
+      "learning_rate": 0.00018621734738057442,
+      "loss": 0.1612,
+      "step": 4780
+    },
+    {
+      "epoch": 0.34496193946390563,
+      "grad_norm": 0.12477600574493408,
+      "learning_rate": 0.00018621446096117768,
+      "loss": 0.1614,
+      "step": 4781
+    },
+    {
+      "epoch": 0.3450340921389661,
+      "grad_norm": 0.137281596660614,
+      "learning_rate": 0.00018621157454178094,
+      "loss": 0.1415,
+      "step": 4782
+    },
+    {
+      "epoch": 0.34510624481402646,
+      "grad_norm": 0.14565874636173248,
+      "learning_rate": 0.00018620868812238418,
+      "loss": 0.1686,
+      "step": 4783
+    },
+    {
+      "epoch": 0.3451783974890869,
+      "grad_norm": 0.13980144262313843,
+      "learning_rate": 0.00018620580170298744,
+      "loss": 0.1582,
+      "step": 4784
+    },
+    {
+      "epoch": 0.34525055016414735,
+      "grad_norm": 0.11648563295602798,
+      "learning_rate": 0.0001862029152835907,
+      "loss": 0.1464,
+      "step": 4785
+    },
+    {
+      "epoch": 0.34532270283920774,
+      "grad_norm": 0.09814045578241348,
+      "learning_rate": 0.000186200028864194,
+      "loss": 0.1122,
+      "step": 4786
+    },
+    {
+      "epoch": 0.3453948555142682,
+      "grad_norm": 0.11648702621459961,
+      "learning_rate": 0.00018619714244479726,
+      "loss": 0.1814,
+      "step": 4787
+    },
+    {
+      "epoch": 0.34546700818932863,
+      "grad_norm": 0.09737619012594223,
+      "learning_rate": 0.0001861942560254005,
+      "loss": 0.0984,
+      "step": 4788
+    },
+    {
+      "epoch": 0.34553916086438907,
+      "grad_norm": 0.10784975439310074,
+      "learning_rate": 0.00018619136960600376,
+      "loss": 0.1379,
+      "step": 4789
+    },
+    {
+      "epoch": 0.34561131353944946,
+      "grad_norm": 0.1196584552526474,
+      "learning_rate": 0.00018618848318660702,
+      "loss": 0.2279,
+      "step": 4790
+    },
+    {
+      "epoch": 0.3456834662145099,
+      "grad_norm": 0.10751967877149582,
+      "learning_rate": 0.00018618559676721028,
+      "loss": 0.1506,
+      "step": 4791
+    },
+    {
+      "epoch": 0.34575561888957035,
+      "grad_norm": 0.15543009340763092,
+      "learning_rate": 0.00018618271034781354,
+      "loss": 0.2072,
+      "step": 4792
+    },
+    {
+      "epoch": 0.34582777156463074,
+      "grad_norm": 0.12283115088939667,
+      "learning_rate": 0.0001861798239284168,
+      "loss": 0.1385,
+      "step": 4793
+    },
+    {
+      "epoch": 0.3458999242396912,
+      "grad_norm": 0.11519939452409744,
+      "learning_rate": 0.00018617693750902007,
+      "loss": 0.1916,
+      "step": 4794
+    },
+    {
+      "epoch": 0.3459720769147516,
+      "grad_norm": 0.11674375087022781,
+      "learning_rate": 0.00018617405108962333,
+      "loss": 0.1635,
+      "step": 4795
+    },
+    {
+      "epoch": 0.34604422958981207,
+      "grad_norm": 0.1331339031457901,
+      "learning_rate": 0.0001861711646702266,
+      "loss": 0.1459,
+      "step": 4796
+    },
+    {
+      "epoch": 0.34611638226487246,
+      "grad_norm": 0.11357157677412033,
+      "learning_rate": 0.00018616827825082986,
+      "loss": 0.1733,
+      "step": 4797
+    },
+    {
+      "epoch": 0.3461885349399329,
+      "grad_norm": 0.10810473561286926,
+      "learning_rate": 0.00018616539183143312,
+      "loss": 0.208,
+      "step": 4798
+    },
+    {
+      "epoch": 0.34626068761499335,
+      "grad_norm": 0.1509094536304474,
+      "learning_rate": 0.00018616250541203636,
+      "loss": 0.1517,
+      "step": 4799
+    },
+    {
+      "epoch": 0.34633284029005373,
+      "grad_norm": 0.10608479380607605,
+      "learning_rate": 0.00018615961899263965,
+      "loss": 0.1997,
+      "step": 4800
+    },
+    {
+      "epoch": 0.3464049929651142,
+      "grad_norm": 0.12887756526470184,
+      "learning_rate": 0.0001861567325732429,
+      "loss": 0.1386,
+      "step": 4801
+    },
+    {
+      "epoch": 0.3464771456401746,
+      "grad_norm": 0.1120765283703804,
+      "learning_rate": 0.00018615384615384617,
+      "loss": 0.099,
+      "step": 4802
+    },
+    {
+      "epoch": 0.346549298315235,
+      "grad_norm": 0.12339556962251663,
+      "learning_rate": 0.00018615095973444944,
+      "loss": 0.1679,
+      "step": 4803
+    },
+    {
+      "epoch": 0.34662145099029545,
+      "grad_norm": 0.09008228033781052,
+      "learning_rate": 0.00018614807331505267,
+      "loss": 0.1299,
+      "step": 4804
+    },
+    {
+      "epoch": 0.3466936036653559,
+      "grad_norm": 0.11359957605600357,
+      "learning_rate": 0.00018614518689565593,
+      "loss": 0.1226,
+      "step": 4805
+    },
+    {
+      "epoch": 0.34676575634041634,
+      "grad_norm": 0.10467953234910965,
+      "learning_rate": 0.0001861423004762592,
+      "loss": 0.2249,
+      "step": 4806
+    },
+    {
+      "epoch": 0.34683790901547673,
+      "grad_norm": 0.1343858242034912,
+      "learning_rate": 0.00018613941405686246,
+      "loss": 0.1881,
+      "step": 4807
+    },
+    {
+      "epoch": 0.3469100616905372,
+      "grad_norm": 0.15307852625846863,
+      "learning_rate": 0.00018613652763746575,
+      "loss": 0.1139,
+      "step": 4808
+    },
+    {
+      "epoch": 0.3469822143655976,
+      "grad_norm": 0.11613091826438904,
+      "learning_rate": 0.00018613364121806899,
+      "loss": 0.2056,
+      "step": 4809
+    },
+    {
+      "epoch": 0.347054367040658,
+      "grad_norm": 0.11698072403669357,
+      "learning_rate": 0.00018613075479867225,
+      "loss": 0.1305,
+      "step": 4810
+    },
+    {
+      "epoch": 0.34712651971571845,
+      "grad_norm": 0.09609155356884003,
+      "learning_rate": 0.0001861278683792755,
+      "loss": 0.1496,
+      "step": 4811
+    },
+    {
+      "epoch": 0.3471986723907789,
+      "grad_norm": 0.11466329544782639,
+      "learning_rate": 0.00018612498195987878,
+      "loss": 0.1632,
+      "step": 4812
+    },
+    {
+      "epoch": 0.34727082506583934,
+      "grad_norm": 0.11565911769866943,
+      "learning_rate": 0.00018612209554048204,
+      "loss": 0.1647,
+      "step": 4813
+    },
+    {
+      "epoch": 0.3473429777408997,
+      "grad_norm": 0.12109330296516418,
+      "learning_rate": 0.0001861192091210853,
+      "loss": 0.1659,
+      "step": 4814
+    },
+    {
+      "epoch": 0.34741513041596017,
+      "grad_norm": 0.12918664515018463,
+      "learning_rate": 0.00018611632270168856,
+      "loss": 0.1974,
+      "step": 4815
+    },
+    {
+      "epoch": 0.3474872830910206,
+      "grad_norm": 0.1266615092754364,
+      "learning_rate": 0.00018611343628229183,
+      "loss": 0.1636,
+      "step": 4816
+    },
+    {
+      "epoch": 0.347559435766081,
+      "grad_norm": 0.12520161271095276,
+      "learning_rate": 0.0001861105498628951,
+      "loss": 0.157,
+      "step": 4817
+    },
+    {
+      "epoch": 0.34763158844114145,
+      "grad_norm": 0.10957998037338257,
+      "learning_rate": 0.00018610766344349835,
+      "loss": 0.1716,
+      "step": 4818
+    },
+    {
+      "epoch": 0.3477037411162019,
+      "grad_norm": 0.1544824093580246,
+      "learning_rate": 0.00018610477702410162,
+      "loss": 0.1614,
+      "step": 4819
+    },
+    {
+      "epoch": 0.34777589379126234,
+      "grad_norm": 0.11876598745584488,
+      "learning_rate": 0.00018610189060470488,
+      "loss": 0.1301,
+      "step": 4820
+    },
+    {
+      "epoch": 0.3478480464663227,
+      "grad_norm": 0.13351014256477356,
+      "learning_rate": 0.00018609900418530811,
+      "loss": 0.1828,
+      "step": 4821
+    },
+    {
+      "epoch": 0.34792019914138317,
+      "grad_norm": 0.10807938128709793,
+      "learning_rate": 0.0001860961177659114,
+      "loss": 0.1599,
+      "step": 4822
+    },
+    {
+      "epoch": 0.3479923518164436,
+      "grad_norm": 0.16282778978347778,
+      "learning_rate": 0.00018609323134651467,
+      "loss": 0.1566,
+      "step": 4823
+    },
+    {
+      "epoch": 0.348064504491504,
+      "grad_norm": 0.09783818572759628,
+      "learning_rate": 0.00018609034492711793,
+      "loss": 0.1339,
+      "step": 4824
+    },
+    {
+      "epoch": 0.34813665716656444,
+      "grad_norm": 0.15799443423748016,
+      "learning_rate": 0.0001860874585077212,
+      "loss": 0.1464,
+      "step": 4825
+    },
+    {
+      "epoch": 0.3482088098416249,
+      "grad_norm": 0.13499031960964203,
+      "learning_rate": 0.00018608457208832443,
+      "loss": 0.1794,
+      "step": 4826
+    },
+    {
+      "epoch": 0.34828096251668533,
+      "grad_norm": 0.12276072800159454,
+      "learning_rate": 0.0001860816856689277,
+      "loss": 0.1579,
+      "step": 4827
+    },
+    {
+      "epoch": 0.3483531151917457,
+      "grad_norm": 0.10281578451395035,
+      "learning_rate": 0.00018607879924953095,
+      "loss": 0.1766,
+      "step": 4828
+    },
+    {
+      "epoch": 0.34842526786680617,
+      "grad_norm": 0.10461385548114777,
+      "learning_rate": 0.00018607591283013424,
+      "loss": 0.189,
+      "step": 4829
+    },
+    {
+      "epoch": 0.3484974205418666,
+      "grad_norm": 0.12931039929389954,
+      "learning_rate": 0.0001860730264107375,
+      "loss": 0.1358,
+      "step": 4830
+    },
+    {
+      "epoch": 0.348569573216927,
+      "grad_norm": 0.1350351870059967,
+      "learning_rate": 0.00018607013999134074,
+      "loss": 0.1733,
+      "step": 4831
+    },
+    {
+      "epoch": 0.34864172589198744,
+      "grad_norm": 0.0907088965177536,
+      "learning_rate": 0.000186067253571944,
+      "loss": 0.1813,
+      "step": 4832
+    },
+    {
+      "epoch": 0.3487138785670479,
+      "grad_norm": 0.1410246193408966,
+      "learning_rate": 0.00018606436715254727,
+      "loss": 0.1817,
+      "step": 4833
+    },
+    {
+      "epoch": 0.3487860312421083,
+      "grad_norm": 0.10278214514255524,
+      "learning_rate": 0.00018606148073315053,
+      "loss": 0.1448,
+      "step": 4834
+    },
+    {
+      "epoch": 0.3488581839171687,
+      "grad_norm": 0.10022161900997162,
+      "learning_rate": 0.0001860585943137538,
+      "loss": 0.1403,
+      "step": 4835
+    },
+    {
+      "epoch": 0.34893033659222916,
+      "grad_norm": 0.09270060807466507,
+      "learning_rate": 0.00018605570789435706,
+      "loss": 0.1678,
+      "step": 4836
+    },
+    {
+      "epoch": 0.3490024892672896,
+      "grad_norm": 0.09166289865970612,
+      "learning_rate": 0.00018605282147496032,
+      "loss": 0.1091,
+      "step": 4837
+    },
+    {
+      "epoch": 0.34907464194235,
+      "grad_norm": 0.12794873118400574,
+      "learning_rate": 0.00018604993505556358,
+      "loss": 0.1895,
+      "step": 4838
+    },
+    {
+      "epoch": 0.34914679461741044,
+      "grad_norm": 0.11265773326158524,
+      "learning_rate": 0.00018604704863616685,
+      "loss": 0.158,
+      "step": 4839
+    },
+    {
+      "epoch": 0.3492189472924709,
+      "grad_norm": 0.1151571273803711,
+      "learning_rate": 0.0001860441622167701,
+      "loss": 0.1398,
+      "step": 4840
+    },
+    {
+      "epoch": 0.34929109996753127,
+      "grad_norm": 0.15330448746681213,
+      "learning_rate": 0.00018604127579737337,
+      "loss": 0.1456,
+      "step": 4841
+    },
+    {
+      "epoch": 0.3493632526425917,
+      "grad_norm": 0.1384754776954651,
+      "learning_rate": 0.0001860383893779766,
+      "loss": 0.1718,
+      "step": 4842
+    },
+    {
+      "epoch": 0.34943540531765216,
+      "grad_norm": 0.125054270029068,
+      "learning_rate": 0.0001860355029585799,
+      "loss": 0.1426,
+      "step": 4843
+    },
+    {
+      "epoch": 0.3495075579927126,
+      "grad_norm": 0.11646536737680435,
+      "learning_rate": 0.00018603261653918316,
+      "loss": 0.1718,
+      "step": 4844
+    },
+    {
+      "epoch": 0.349579710667773,
+      "grad_norm": 0.11603065580129623,
+      "learning_rate": 0.00018602973011978642,
+      "loss": 0.1669,
+      "step": 4845
+    },
+    {
+      "epoch": 0.34965186334283344,
+      "grad_norm": 0.11983395367860794,
+      "learning_rate": 0.0001860268437003897,
+      "loss": 0.1548,
+      "step": 4846
+    },
+    {
+      "epoch": 0.3497240160178939,
+      "grad_norm": 0.09510248154401779,
+      "learning_rate": 0.00018602395728099292,
+      "loss": 0.1876,
+      "step": 4847
+    },
+    {
+      "epoch": 0.34979616869295427,
+      "grad_norm": 0.11014638096094131,
+      "learning_rate": 0.00018602107086159619,
+      "loss": 0.153,
+      "step": 4848
+    },
+    {
+      "epoch": 0.3498683213680147,
+      "grad_norm": 0.11330801248550415,
+      "learning_rate": 0.00018601818444219945,
+      "loss": 0.1617,
+      "step": 4849
+    },
+    {
+      "epoch": 0.34994047404307516,
+      "grad_norm": 0.12342897802591324,
+      "learning_rate": 0.00018601529802280274,
+      "loss": 0.1629,
+      "step": 4850
+    },
+    {
+      "epoch": 0.3500126267181356,
+      "grad_norm": 0.11456283926963806,
+      "learning_rate": 0.000186012411603406,
+      "loss": 0.1526,
+      "step": 4851
+    },
+    {
+      "epoch": 0.350084779393196,
+      "grad_norm": 0.11536956578493118,
+      "learning_rate": 0.00018600952518400924,
+      "loss": 0.2178,
+      "step": 4852
+    },
+    {
+      "epoch": 0.35015693206825643,
+      "grad_norm": 0.09011700004339218,
+      "learning_rate": 0.0001860066387646125,
+      "loss": 0.1739,
+      "step": 4853
+    },
+    {
+      "epoch": 0.3502290847433169,
+      "grad_norm": 0.1924978345632553,
+      "learning_rate": 0.00018600375234521576,
+      "loss": 0.1629,
+      "step": 4854
+    },
+    {
+      "epoch": 0.35030123741837726,
+      "grad_norm": 0.11129790544509888,
+      "learning_rate": 0.00018600086592581903,
+      "loss": 0.139,
+      "step": 4855
+    },
+    {
+      "epoch": 0.3503733900934377,
+      "grad_norm": 0.13411857187747955,
+      "learning_rate": 0.0001859979795064223,
+      "loss": 0.1842,
+      "step": 4856
+    },
+    {
+      "epoch": 0.35044554276849815,
+      "grad_norm": 0.1135907843708992,
+      "learning_rate": 0.00018599509308702555,
+      "loss": 0.1568,
+      "step": 4857
+    },
+    {
+      "epoch": 0.3505176954435586,
+      "grad_norm": 0.11942192167043686,
+      "learning_rate": 0.00018599220666762882,
+      "loss": 0.1048,
+      "step": 4858
+    },
+    {
+      "epoch": 0.350589848118619,
+      "grad_norm": 0.11281799525022507,
+      "learning_rate": 0.00018598932024823208,
+      "loss": 0.16,
+      "step": 4859
+    },
+    {
+      "epoch": 0.35066200079367943,
+      "grad_norm": 0.13526779413223267,
+      "learning_rate": 0.00018598643382883534,
+      "loss": 0.1716,
+      "step": 4860
+    },
+    {
+      "epoch": 0.3507341534687399,
+      "grad_norm": 0.0978962704539299,
+      "learning_rate": 0.0001859835474094386,
+      "loss": 0.1704,
+      "step": 4861
+    },
+    {
+      "epoch": 0.35080630614380026,
+      "grad_norm": 0.1300303190946579,
+      "learning_rate": 0.00018598066099004187,
+      "loss": 0.1538,
+      "step": 4862
+    },
+    {
+      "epoch": 0.3508784588188607,
+      "grad_norm": 0.10521839559078217,
+      "learning_rate": 0.0001859777745706451,
+      "loss": 0.1361,
+      "step": 4863
+    },
+    {
+      "epoch": 0.35095061149392115,
+      "grad_norm": 0.13387595117092133,
+      "learning_rate": 0.0001859748881512484,
+      "loss": 0.202,
+      "step": 4864
+    },
+    {
+      "epoch": 0.35102276416898154,
+      "grad_norm": 0.11061757802963257,
+      "learning_rate": 0.00018597200173185166,
+      "loss": 0.1007,
+      "step": 4865
+    },
+    {
+      "epoch": 0.351094916844042,
+      "grad_norm": 0.12662525475025177,
+      "learning_rate": 0.00018596911531245492,
+      "loss": 0.1581,
+      "step": 4866
+    },
+    {
+      "epoch": 0.3511670695191024,
+      "grad_norm": 0.11428170651197433,
+      "learning_rate": 0.00018596622889305818,
+      "loss": 0.1513,
+      "step": 4867
+    },
+    {
+      "epoch": 0.35123922219416287,
+      "grad_norm": 0.09728476405143738,
+      "learning_rate": 0.00018596334247366142,
+      "loss": 0.1806,
+      "step": 4868
+    },
+    {
+      "epoch": 0.35131137486922326,
+      "grad_norm": 0.11465656012296677,
+      "learning_rate": 0.00018596045605426468,
+      "loss": 0.1413,
+      "step": 4869
+    },
+    {
+      "epoch": 0.3513835275442837,
+      "grad_norm": 0.14767242968082428,
+      "learning_rate": 0.00018595756963486794,
+      "loss": 0.1908,
+      "step": 4870
+    },
+    {
+      "epoch": 0.35145568021934415,
+      "grad_norm": 0.15032465755939484,
+      "learning_rate": 0.00018595468321547123,
+      "loss": 0.1018,
+      "step": 4871
+    },
+    {
+      "epoch": 0.35152783289440453,
+      "grad_norm": 0.11787731200456619,
+      "learning_rate": 0.0001859517967960745,
+      "loss": 0.1997,
+      "step": 4872
+    },
+    {
+      "epoch": 0.351599985569465,
+      "grad_norm": 0.14257623255252838,
+      "learning_rate": 0.00018594891037667773,
+      "loss": 0.1838,
+      "step": 4873
+    },
+    {
+      "epoch": 0.3516721382445254,
+      "grad_norm": 0.10114888846874237,
+      "learning_rate": 0.000185946023957281,
+      "loss": 0.1321,
+      "step": 4874
+    },
+    {
+      "epoch": 0.35174429091958587,
+      "grad_norm": 0.1027207151055336,
+      "learning_rate": 0.00018594313753788426,
+      "loss": 0.1783,
+      "step": 4875
+    },
+    {
+      "epoch": 0.35181644359464626,
+      "grad_norm": 0.11875282227993011,
+      "learning_rate": 0.00018594025111848752,
+      "loss": 0.2001,
+      "step": 4876
+    },
+    {
+      "epoch": 0.3518885962697067,
+      "grad_norm": 0.10851060599088669,
+      "learning_rate": 0.00018593736469909078,
+      "loss": 0.1755,
+      "step": 4877
+    },
+    {
+      "epoch": 0.35196074894476714,
+      "grad_norm": 0.1544347107410431,
+      "learning_rate": 0.00018593447827969405,
+      "loss": 0.1528,
+      "step": 4878
+    },
+    {
+      "epoch": 0.35203290161982753,
+      "grad_norm": 0.12431411445140839,
+      "learning_rate": 0.0001859315918602973,
+      "loss": 0.1784,
+      "step": 4879
+    },
+    {
+      "epoch": 0.352105054294888,
+      "grad_norm": 0.10366930812597275,
+      "learning_rate": 0.00018592870544090057,
+      "loss": 0.1516,
+      "step": 4880
+    },
+    {
+      "epoch": 0.3521772069699484,
+      "grad_norm": 0.11398233473300934,
+      "learning_rate": 0.00018592581902150384,
+      "loss": 0.1817,
+      "step": 4881
+    },
+    {
+      "epoch": 0.35224935964500886,
+      "grad_norm": 0.14207541942596436,
+      "learning_rate": 0.0001859229326021071,
+      "loss": 0.1364,
+      "step": 4882
+    },
+    {
+      "epoch": 0.35232151232006925,
+      "grad_norm": 0.10279354453086853,
+      "learning_rate": 0.00018592004618271036,
+      "loss": 0.1584,
+      "step": 4883
+    },
+    {
+      "epoch": 0.3523936649951297,
+      "grad_norm": 0.11522091180086136,
+      "learning_rate": 0.0001859171597633136,
+      "loss": 0.1577,
+      "step": 4884
+    },
+    {
+      "epoch": 0.35246581767019014,
+      "grad_norm": 0.13115240633487701,
+      "learning_rate": 0.0001859142733439169,
+      "loss": 0.1583,
+      "step": 4885
+    },
+    {
+      "epoch": 0.35253797034525053,
+      "grad_norm": 0.12494537979364395,
+      "learning_rate": 0.00018591138692452015,
+      "loss": 0.2014,
+      "step": 4886
+    },
+    {
+      "epoch": 0.352610123020311,
+      "grad_norm": 0.11419171094894409,
+      "learning_rate": 0.0001859085005051234,
+      "loss": 0.1129,
+      "step": 4887
+    },
+    {
+      "epoch": 0.3526822756953714,
+      "grad_norm": 0.10616102814674377,
+      "learning_rate": 0.00018590561408572668,
+      "loss": 0.1373,
+      "step": 4888
+    },
+    {
+      "epoch": 0.35275442837043186,
+      "grad_norm": 0.12825287878513336,
+      "learning_rate": 0.0001859027276663299,
+      "loss": 0.1472,
+      "step": 4889
+    },
+    {
+      "epoch": 0.35282658104549225,
+      "grad_norm": 0.10453600436449051,
+      "learning_rate": 0.00018589984124693317,
+      "loss": 0.1502,
+      "step": 4890
+    },
+    {
+      "epoch": 0.3528987337205527,
+      "grad_norm": 0.11971019953489304,
+      "learning_rate": 0.00018589695482753644,
+      "loss": 0.1735,
+      "step": 4891
+    },
+    {
+      "epoch": 0.35297088639561314,
+      "grad_norm": 0.12282633781433105,
+      "learning_rate": 0.00018589406840813973,
+      "loss": 0.1804,
+      "step": 4892
+    },
+    {
+      "epoch": 0.3530430390706735,
+      "grad_norm": 0.11366037279367447,
+      "learning_rate": 0.000185891181988743,
+      "loss": 0.1439,
+      "step": 4893
+    },
+    {
+      "epoch": 0.35311519174573397,
+      "grad_norm": 0.10512767732143402,
+      "learning_rate": 0.00018588829556934623,
+      "loss": 0.1536,
+      "step": 4894
+    },
+    {
+      "epoch": 0.3531873444207944,
+      "grad_norm": 0.11639894545078278,
+      "learning_rate": 0.0001858854091499495,
+      "loss": 0.1007,
+      "step": 4895
+    },
+    {
+      "epoch": 0.3532594970958548,
+      "grad_norm": 0.16471442580223083,
+      "learning_rate": 0.00018588252273055275,
+      "loss": 0.1693,
+      "step": 4896
+    },
+    {
+      "epoch": 0.35333164977091525,
+      "grad_norm": 0.1491038203239441,
+      "learning_rate": 0.00018587963631115602,
+      "loss": 0.1459,
+      "step": 4897
+    },
+    {
+      "epoch": 0.3534038024459757,
+      "grad_norm": 0.10994590073823929,
+      "learning_rate": 0.00018587674989175928,
+      "loss": 0.1671,
+      "step": 4898
+    },
+    {
+      "epoch": 0.35347595512103613,
+      "grad_norm": 0.11387878656387329,
+      "learning_rate": 0.00018587386347236254,
+      "loss": 0.089,
+      "step": 4899
+    },
+    {
+      "epoch": 0.3535481077960965,
+      "grad_norm": 0.11015894263982773,
+      "learning_rate": 0.0001858709770529658,
+      "loss": 0.1384,
+      "step": 4900
+    },
+    {
+      "epoch": 0.35362026047115697,
+      "grad_norm": 0.11075104773044586,
+      "learning_rate": 0.00018586809063356907,
+      "loss": 0.1223,
+      "step": 4901
+    },
+    {
+      "epoch": 0.3536924131462174,
+      "grad_norm": 0.13021506369113922,
+      "learning_rate": 0.00018586520421417233,
+      "loss": 0.159,
+      "step": 4902
+    },
+    {
+      "epoch": 0.3537645658212778,
+      "grad_norm": 0.16010746359825134,
+      "learning_rate": 0.0001858623177947756,
+      "loss": 0.1793,
+      "step": 4903
+    },
+    {
+      "epoch": 0.35383671849633824,
+      "grad_norm": 0.12425374984741211,
+      "learning_rate": 0.00018585943137537886,
+      "loss": 0.1644,
+      "step": 4904
+    },
+    {
+      "epoch": 0.3539088711713987,
+      "grad_norm": 0.17232443392276764,
+      "learning_rate": 0.0001858565449559821,
+      "loss": 0.1742,
+      "step": 4905
+    },
+    {
+      "epoch": 0.35398102384645913,
+      "grad_norm": 0.12240596860647202,
+      "learning_rate": 0.00018585365853658538,
+      "loss": 0.1562,
+      "step": 4906
+    },
+    {
+      "epoch": 0.3540531765215195,
+      "grad_norm": 0.11358831077814102,
+      "learning_rate": 0.00018585077211718864,
+      "loss": 0.1793,
+      "step": 4907
+    },
+    {
+      "epoch": 0.35412532919657996,
+      "grad_norm": 0.12762053310871124,
+      "learning_rate": 0.0001858478856977919,
+      "loss": 0.1609,
+      "step": 4908
+    },
+    {
+      "epoch": 0.3541974818716404,
+      "grad_norm": 0.11020836979150772,
+      "learning_rate": 0.00018584499927839517,
+      "loss": 0.1631,
+      "step": 4909
+    },
+    {
+      "epoch": 0.3542696345467008,
+      "grad_norm": 0.14324946701526642,
+      "learning_rate": 0.0001858421128589984,
+      "loss": 0.139,
+      "step": 4910
+    },
+    {
+      "epoch": 0.35434178722176124,
+      "grad_norm": 0.09634080529212952,
+      "learning_rate": 0.00018583922643960167,
+      "loss": 0.1669,
+      "step": 4911
+    },
+    {
+      "epoch": 0.3544139398968217,
+      "grad_norm": 0.11602307856082916,
+      "learning_rate": 0.00018583634002020493,
+      "loss": 0.1946,
+      "step": 4912
+    },
+    {
+      "epoch": 0.3544860925718821,
+      "grad_norm": 0.09764589369297028,
+      "learning_rate": 0.00018583345360080822,
+      "loss": 0.1496,
+      "step": 4913
+    },
+    {
+      "epoch": 0.3545582452469425,
+      "grad_norm": 0.1315620392560959,
+      "learning_rate": 0.00018583056718141148,
+      "loss": 0.1426,
+      "step": 4914
+    },
+    {
+      "epoch": 0.35463039792200296,
+      "grad_norm": 0.10375626385211945,
+      "learning_rate": 0.00018582768076201472,
+      "loss": 0.1783,
+      "step": 4915
+    },
+    {
+      "epoch": 0.3547025505970634,
+      "grad_norm": 0.13591431081295013,
+      "learning_rate": 0.00018582479434261798,
+      "loss": 0.1795,
+      "step": 4916
+    },
+    {
+      "epoch": 0.3547747032721238,
+      "grad_norm": 0.11436531692743301,
+      "learning_rate": 0.00018582190792322125,
+      "loss": 0.1377,
+      "step": 4917
+    },
+    {
+      "epoch": 0.35484685594718424,
+      "grad_norm": 0.13248689472675323,
+      "learning_rate": 0.0001858190215038245,
+      "loss": 0.1669,
+      "step": 4918
+    },
+    {
+      "epoch": 0.3549190086222447,
+      "grad_norm": 0.10617823898792267,
+      "learning_rate": 0.00018581613508442777,
+      "loss": 0.1722,
+      "step": 4919
+    },
+    {
+      "epoch": 0.3549911612973051,
+      "grad_norm": 0.13824215531349182,
+      "learning_rate": 0.00018581324866503104,
+      "loss": 0.1169,
+      "step": 4920
+    },
+    {
+      "epoch": 0.3550633139723655,
+      "grad_norm": 0.12319400161504745,
+      "learning_rate": 0.0001858103622456343,
+      "loss": 0.1784,
+      "step": 4921
+    },
+    {
+      "epoch": 0.35513546664742596,
+      "grad_norm": 0.12189222872257233,
+      "learning_rate": 0.00018580747582623756,
+      "loss": 0.1435,
+      "step": 4922
+    },
+    {
+      "epoch": 0.3552076193224864,
+      "grad_norm": 0.13751690089702606,
+      "learning_rate": 0.00018580458940684082,
+      "loss": 0.1789,
+      "step": 4923
+    },
+    {
+      "epoch": 0.3552797719975468,
+      "grad_norm": 0.11428214609622955,
+      "learning_rate": 0.0001858017029874441,
+      "loss": 0.2127,
+      "step": 4924
+    },
+    {
+      "epoch": 0.35535192467260723,
+      "grad_norm": 0.10802461951971054,
+      "learning_rate": 0.00018579881656804735,
+      "loss": 0.1921,
+      "step": 4925
+    },
+    {
+      "epoch": 0.3554240773476677,
+      "grad_norm": 0.12341997027397156,
+      "learning_rate": 0.00018579593014865059,
+      "loss": 0.1457,
+      "step": 4926
+    },
+    {
+      "epoch": 0.35549623002272807,
+      "grad_norm": 0.1213202252984047,
+      "learning_rate": 0.00018579304372925388,
+      "loss": 0.1195,
+      "step": 4927
+    },
+    {
+      "epoch": 0.3555683826977885,
+      "grad_norm": 0.11273522675037384,
+      "learning_rate": 0.00018579015730985714,
+      "loss": 0.1636,
+      "step": 4928
+    },
+    {
+      "epoch": 0.35564053537284895,
+      "grad_norm": 0.10054554790258408,
+      "learning_rate": 0.0001857872708904604,
+      "loss": 0.1404,
+      "step": 4929
+    },
+    {
+      "epoch": 0.3557126880479094,
+      "grad_norm": 0.12420728802680969,
+      "learning_rate": 0.00018578438447106366,
+      "loss": 0.1534,
+      "step": 4930
+    },
+    {
+      "epoch": 0.3557848407229698,
+      "grad_norm": 0.11710210889577866,
+      "learning_rate": 0.0001857814980516669,
+      "loss": 0.1284,
+      "step": 4931
+    },
+    {
+      "epoch": 0.35585699339803023,
+      "grad_norm": 0.11963210999965668,
+      "learning_rate": 0.00018577861163227016,
+      "loss": 0.166,
+      "step": 4932
+    },
+    {
+      "epoch": 0.3559291460730907,
+      "grad_norm": 0.12775014340877533,
+      "learning_rate": 0.00018577572521287343,
+      "loss": 0.1592,
+      "step": 4933
+    },
+    {
+      "epoch": 0.35600129874815106,
+      "grad_norm": 0.16686271131038666,
+      "learning_rate": 0.00018577283879347672,
+      "loss": 0.1688,
+      "step": 4934
+    },
+    {
+      "epoch": 0.3560734514232115,
+      "grad_norm": 0.15730160474777222,
+      "learning_rate": 0.00018576995237407998,
+      "loss": 0.214,
+      "step": 4935
+    },
+    {
+      "epoch": 0.35614560409827195,
+      "grad_norm": 0.13442355394363403,
+      "learning_rate": 0.00018576706595468321,
+      "loss": 0.136,
+      "step": 4936
+    },
+    {
+      "epoch": 0.3562177567733324,
+      "grad_norm": 0.11043615639209747,
+      "learning_rate": 0.00018576417953528648,
+      "loss": 0.1737,
+      "step": 4937
+    },
+    {
+      "epoch": 0.3562899094483928,
+      "grad_norm": 0.10412963479757309,
+      "learning_rate": 0.00018576129311588974,
+      "loss": 0.2177,
+      "step": 4938
+    },
+    {
+      "epoch": 0.3563620621234532,
+      "grad_norm": 0.1325763612985611,
+      "learning_rate": 0.000185758406696493,
+      "loss": 0.1786,
+      "step": 4939
+    },
+    {
+      "epoch": 0.35643421479851367,
+      "grad_norm": 0.12171344459056854,
+      "learning_rate": 0.00018575552027709627,
+      "loss": 0.1551,
+      "step": 4940
+    },
+    {
+      "epoch": 0.35650636747357406,
+      "grad_norm": 0.11393021047115326,
+      "learning_rate": 0.00018575263385769953,
+      "loss": 0.1474,
+      "step": 4941
+    },
+    {
+      "epoch": 0.3565785201486345,
+      "grad_norm": 0.11627914011478424,
+      "learning_rate": 0.0001857497474383028,
+      "loss": 0.1918,
+      "step": 4942
+    },
+    {
+      "epoch": 0.35665067282369495,
+      "grad_norm": 0.11078419536352158,
+      "learning_rate": 0.00018574686101890606,
+      "loss": 0.1807,
+      "step": 4943
+    },
+    {
+      "epoch": 0.3567228254987554,
+      "grad_norm": 0.11737526208162308,
+      "learning_rate": 0.00018574397459950932,
+      "loss": 0.1269,
+      "step": 4944
+    },
+    {
+      "epoch": 0.3567949781738158,
+      "grad_norm": 0.12382479012012482,
+      "learning_rate": 0.00018574108818011258,
+      "loss": 0.1615,
+      "step": 4945
+    },
+    {
+      "epoch": 0.3568671308488762,
+      "grad_norm": 0.12704169750213623,
+      "learning_rate": 0.00018573820176071584,
+      "loss": 0.1949,
+      "step": 4946
+    },
+    {
+      "epoch": 0.35693928352393667,
+      "grad_norm": 0.17338639497756958,
+      "learning_rate": 0.0001857353153413191,
+      "loss": 0.2353,
+      "step": 4947
+    },
+    {
+      "epoch": 0.35701143619899706,
+      "grad_norm": 0.10502665489912033,
+      "learning_rate": 0.00018573242892192237,
+      "loss": 0.1754,
+      "step": 4948
+    },
+    {
+      "epoch": 0.3570835888740575,
+      "grad_norm": 0.1206195130944252,
+      "learning_rate": 0.00018572954250252563,
+      "loss": 0.1575,
+      "step": 4949
+    },
+    {
+      "epoch": 0.35715574154911794,
+      "grad_norm": 0.1553194224834442,
+      "learning_rate": 0.0001857266560831289,
+      "loss": 0.1897,
+      "step": 4950
+    },
+    {
+      "epoch": 0.3572278942241784,
+      "grad_norm": 0.102202408015728,
+      "learning_rate": 0.00018572376966373216,
+      "loss": 0.1697,
+      "step": 4951
+    },
+    {
+      "epoch": 0.3573000468992388,
+      "grad_norm": 0.10409127920866013,
+      "learning_rate": 0.00018572088324433542,
+      "loss": 0.1864,
+      "step": 4952
+    },
+    {
+      "epoch": 0.3573721995742992,
+      "grad_norm": 0.10515742003917694,
+      "learning_rate": 0.00018571799682493866,
+      "loss": 0.1261,
+      "step": 4953
+    },
+    {
+      "epoch": 0.35744435224935966,
+      "grad_norm": 0.13130493462085724,
+      "learning_rate": 0.00018571511040554192,
+      "loss": 0.1506,
+      "step": 4954
+    },
+    {
+      "epoch": 0.35751650492442005,
+      "grad_norm": 0.1422574669122696,
+      "learning_rate": 0.0001857122239861452,
+      "loss": 0.1385,
+      "step": 4955
+    },
+    {
+      "epoch": 0.3575886575994805,
+      "grad_norm": 0.13222919404506683,
+      "learning_rate": 0.00018570933756674847,
+      "loss": 0.142,
+      "step": 4956
+    },
+    {
+      "epoch": 0.35766081027454094,
+      "grad_norm": 0.14456702768802643,
+      "learning_rate": 0.00018570645114735174,
+      "loss": 0.1845,
+      "step": 4957
+    },
+    {
+      "epoch": 0.35773296294960133,
+      "grad_norm": 0.10612176358699799,
+      "learning_rate": 0.00018570356472795497,
+      "loss": 0.1913,
+      "step": 4958
+    },
+    {
+      "epoch": 0.3578051156246618,
+      "grad_norm": 0.12707382440567017,
+      "learning_rate": 0.00018570067830855823,
+      "loss": 0.1965,
+      "step": 4959
+    },
+    {
+      "epoch": 0.3578772682997222,
+      "grad_norm": 0.11566483974456787,
+      "learning_rate": 0.0001856977918891615,
+      "loss": 0.1653,
+      "step": 4960
+    },
+    {
+      "epoch": 0.35794942097478266,
+      "grad_norm": 0.12770824134349823,
+      "learning_rate": 0.00018569490546976476,
+      "loss": 0.171,
+      "step": 4961
+    },
+    {
+      "epoch": 0.35802157364984305,
+      "grad_norm": 0.13529135286808014,
+      "learning_rate": 0.00018569201905036805,
+      "loss": 0.1826,
+      "step": 4962
+    },
+    {
+      "epoch": 0.3580937263249035,
+      "grad_norm": 0.162160724401474,
+      "learning_rate": 0.0001856891326309713,
+      "loss": 0.1945,
+      "step": 4963
+    },
+    {
+      "epoch": 0.35816587899996394,
+      "grad_norm": 0.1113724559545517,
+      "learning_rate": 0.00018568624621157455,
+      "loss": 0.1742,
+      "step": 4964
+    },
+    {
+      "epoch": 0.3582380316750243,
+      "grad_norm": 0.13558605313301086,
+      "learning_rate": 0.0001856833597921778,
+      "loss": 0.1912,
+      "step": 4965
+    },
+    {
+      "epoch": 0.35831018435008477,
+      "grad_norm": 0.22403933107852936,
+      "learning_rate": 0.00018568047337278108,
+      "loss": 0.1738,
+      "step": 4966
+    },
+    {
+      "epoch": 0.3583823370251452,
+      "grad_norm": 0.10644828528165817,
+      "learning_rate": 0.00018567758695338434,
+      "loss": 0.158,
+      "step": 4967
+    },
+    {
+      "epoch": 0.35845448970020566,
+      "grad_norm": 0.12324364483356476,
+      "learning_rate": 0.0001856747005339876,
+      "loss": 0.1726,
+      "step": 4968
+    },
+    {
+      "epoch": 0.35852664237526605,
+      "grad_norm": 0.1327822208404541,
+      "learning_rate": 0.00018567181411459086,
+      "loss": 0.1701,
+      "step": 4969
+    },
+    {
+      "epoch": 0.3585987950503265,
+      "grad_norm": 0.10459893196821213,
+      "learning_rate": 0.00018566892769519413,
+      "loss": 0.1236,
+      "step": 4970
+    },
+    {
+      "epoch": 0.35867094772538693,
+      "grad_norm": 0.1374405175447464,
+      "learning_rate": 0.0001856660412757974,
+      "loss": 0.1443,
+      "step": 4971
+    },
+    {
+      "epoch": 0.3587431004004473,
+      "grad_norm": 0.11382637917995453,
+      "learning_rate": 0.00018566315485640065,
+      "loss": 0.1148,
+      "step": 4972
+    },
+    {
+      "epoch": 0.35881525307550777,
+      "grad_norm": 0.14948365092277527,
+      "learning_rate": 0.00018566026843700392,
+      "loss": 0.1815,
+      "step": 4973
+    },
+    {
+      "epoch": 0.3588874057505682,
+      "grad_norm": 0.12349545955657959,
+      "learning_rate": 0.00018565738201760715,
+      "loss": 0.1543,
+      "step": 4974
+    },
+    {
+      "epoch": 0.35895955842562866,
+      "grad_norm": 0.15273398160934448,
+      "learning_rate": 0.00018565449559821041,
+      "loss": 0.1415,
+      "step": 4975
+    },
+    {
+      "epoch": 0.35903171110068904,
+      "grad_norm": 0.16976992785930634,
+      "learning_rate": 0.0001856516091788137,
+      "loss": 0.1221,
+      "step": 4976
+    },
+    {
+      "epoch": 0.3591038637757495,
+      "grad_norm": 0.1320040225982666,
+      "learning_rate": 0.00018564872275941697,
+      "loss": 0.1559,
+      "step": 4977
+    },
+    {
+      "epoch": 0.35917601645080993,
+      "grad_norm": 0.1515548974275589,
+      "learning_rate": 0.00018564583634002023,
+      "loss": 0.1812,
+      "step": 4978
+    },
+    {
+      "epoch": 0.3592481691258703,
+      "grad_norm": 0.1064198911190033,
+      "learning_rate": 0.00018564294992062347,
+      "loss": 0.1855,
+      "step": 4979
+    },
+    {
+      "epoch": 0.35932032180093076,
+      "grad_norm": 0.10020937025547028,
+      "learning_rate": 0.00018564006350122673,
+      "loss": 0.1363,
+      "step": 4980
+    },
+    {
+      "epoch": 0.3593924744759912,
+      "grad_norm": 0.11039954423904419,
+      "learning_rate": 0.00018563717708183,
+      "loss": 0.227,
+      "step": 4981
+    },
+    {
+      "epoch": 0.35946462715105165,
+      "grad_norm": 0.11901956051588058,
+      "learning_rate": 0.00018563429066243325,
+      "loss": 0.1784,
+      "step": 4982
+    },
+    {
+      "epoch": 0.35953677982611204,
+      "grad_norm": 0.13375921547412872,
+      "learning_rate": 0.00018563140424303654,
+      "loss": 0.1272,
+      "step": 4983
+    },
+    {
+      "epoch": 0.3596089325011725,
+      "grad_norm": 0.12381310015916824,
+      "learning_rate": 0.00018562851782363978,
+      "loss": 0.1631,
+      "step": 4984
+    },
+    {
+      "epoch": 0.35968108517623293,
+      "grad_norm": 0.11995392292737961,
+      "learning_rate": 0.00018562563140424304,
+      "loss": 0.1261,
+      "step": 4985
+    },
+    {
+      "epoch": 0.3597532378512933,
+      "grad_norm": 0.14222565293312073,
+      "learning_rate": 0.0001856227449848463,
+      "loss": 0.1373,
+      "step": 4986
+    },
+    {
+      "epoch": 0.35982539052635376,
+      "grad_norm": 0.09233032912015915,
+      "learning_rate": 0.00018561985856544957,
+      "loss": 0.1233,
+      "step": 4987
+    },
+    {
+      "epoch": 0.3598975432014142,
+      "grad_norm": 0.11498374491930008,
+      "learning_rate": 0.00018561697214605283,
+      "loss": 0.1335,
+      "step": 4988
+    },
+    {
+      "epoch": 0.3599696958764746,
+      "grad_norm": 0.1523871123790741,
+      "learning_rate": 0.0001856140857266561,
+      "loss": 0.1507,
+      "step": 4989
+    },
+    {
+      "epoch": 0.36004184855153504,
+      "grad_norm": 0.1467117816209793,
+      "learning_rate": 0.00018561119930725936,
+      "loss": 0.1534,
+      "step": 4990
+    },
+    {
+      "epoch": 0.3601140012265955,
+      "grad_norm": 0.12076793611049652,
+      "learning_rate": 0.00018560831288786262,
+      "loss": 0.1503,
+      "step": 4991
+    },
+    {
+      "epoch": 0.3601861539016559,
+      "grad_norm": 0.13146507740020752,
+      "learning_rate": 0.00018560542646846588,
+      "loss": 0.1526,
+      "step": 4992
+    },
+    {
+      "epoch": 0.3602583065767163,
+      "grad_norm": 0.12270855158567429,
+      "learning_rate": 0.00018560254004906915,
+      "loss": 0.1191,
+      "step": 4993
+    },
+    {
+      "epoch": 0.36033045925177676,
+      "grad_norm": 0.13664329051971436,
+      "learning_rate": 0.0001855996536296724,
+      "loss": 0.2025,
+      "step": 4994
+    },
+    {
+      "epoch": 0.3604026119268372,
+      "grad_norm": 0.15141281485557556,
+      "learning_rate": 0.00018559676721027565,
+      "loss": 0.1622,
+      "step": 4995
+    },
+    {
+      "epoch": 0.3604747646018976,
+      "grad_norm": 0.11014848947525024,
+      "learning_rate": 0.0001855938807908789,
+      "loss": 0.1817,
+      "step": 4996
+    },
+    {
+      "epoch": 0.36054691727695803,
+      "grad_norm": 0.13574273884296417,
+      "learning_rate": 0.00018559099437148217,
+      "loss": 0.1963,
+      "step": 4997
+    },
+    {
+      "epoch": 0.3606190699520185,
+      "grad_norm": 0.13352210819721222,
+      "learning_rate": 0.00018558810795208546,
+      "loss": 0.1491,
+      "step": 4998
+    },
+    {
+      "epoch": 0.3606912226270789,
+      "grad_norm": 0.09718064963817596,
+      "learning_rate": 0.00018558522153268872,
+      "loss": 0.1291,
+      "step": 4999
+    },
+    {
+      "epoch": 0.3607633753021393,
+      "grad_norm": 0.1510242223739624,
+      "learning_rate": 0.00018558233511329196,
+      "loss": 0.2136,
+      "step": 5000
+    },
+    {
+      "epoch": 0.36083552797719975,
+      "grad_norm": 0.13630354404449463,
+      "learning_rate": 0.00018557944869389522,
+      "loss": 0.1878,
+      "step": 5001
+    },
+    {
+      "epoch": 0.3609076806522602,
+      "grad_norm": 0.10373663902282715,
+      "learning_rate": 0.00018557656227449849,
+      "loss": 0.1361,
+      "step": 5002
+    },
+    {
+      "epoch": 0.3609798333273206,
+      "grad_norm": 0.11246156692504883,
+      "learning_rate": 0.00018557367585510175,
+      "loss": 0.2004,
+      "step": 5003
+    },
+    {
+      "epoch": 0.36105198600238103,
+      "grad_norm": 0.11527900397777557,
+      "learning_rate": 0.000185570789435705,
+      "loss": 0.154,
+      "step": 5004
+    },
+    {
+      "epoch": 0.3611241386774415,
+      "grad_norm": 0.15830467641353607,
+      "learning_rate": 0.00018556790301630828,
+      "loss": 0.1936,
+      "step": 5005
+    },
+    {
+      "epoch": 0.3611962913525019,
+      "grad_norm": 0.09405852109193802,
+      "learning_rate": 0.00018556501659691154,
+      "loss": 0.1425,
+      "step": 5006
+    },
+    {
+      "epoch": 0.3612684440275623,
+      "grad_norm": 0.13238678872585297,
+      "learning_rate": 0.0001855621301775148,
+      "loss": 0.1399,
+      "step": 5007
+    },
+    {
+      "epoch": 0.36134059670262275,
+      "grad_norm": 0.10964048653841019,
+      "learning_rate": 0.00018555924375811806,
+      "loss": 0.1785,
+      "step": 5008
+    },
+    {
+      "epoch": 0.3614127493776832,
+      "grad_norm": 0.1466868370771408,
+      "learning_rate": 0.00018555635733872133,
+      "loss": 0.1134,
+      "step": 5009
+    },
+    {
+      "epoch": 0.3614849020527436,
+      "grad_norm": 0.11806122213602066,
+      "learning_rate": 0.0001855534709193246,
+      "loss": 0.1418,
+      "step": 5010
+    },
+    {
+      "epoch": 0.36155705472780403,
+      "grad_norm": 0.13827867805957794,
+      "learning_rate": 0.00018555058449992783,
+      "loss": 0.1562,
+      "step": 5011
+    },
+    {
+      "epoch": 0.36162920740286447,
+      "grad_norm": 0.10188999772071838,
+      "learning_rate": 0.00018554769808053112,
+      "loss": 0.2064,
+      "step": 5012
+    },
+    {
+      "epoch": 0.3617013600779249,
+      "grad_norm": 0.14065970480442047,
+      "learning_rate": 0.00018554481166113438,
+      "loss": 0.1469,
+      "step": 5013
+    },
+    {
+      "epoch": 0.3617735127529853,
+      "grad_norm": 0.11012709140777588,
+      "learning_rate": 0.00018554192524173764,
+      "loss": 0.164,
+      "step": 5014
+    },
+    {
+      "epoch": 0.36184566542804575,
+      "grad_norm": 0.1569322943687439,
+      "learning_rate": 0.0001855390388223409,
+      "loss": 0.1547,
+      "step": 5015
+    },
+    {
+      "epoch": 0.3619178181031062,
+      "grad_norm": 0.1648615151643753,
+      "learning_rate": 0.00018553615240294414,
+      "loss": 0.1572,
+      "step": 5016
+    },
+    {
+      "epoch": 0.3619899707781666,
+      "grad_norm": 0.13935476541519165,
+      "learning_rate": 0.0001855332659835474,
+      "loss": 0.129,
+      "step": 5017
+    },
+    {
+      "epoch": 0.362062123453227,
+      "grad_norm": 0.1299179047346115,
+      "learning_rate": 0.00018553037956415067,
+      "loss": 0.144,
+      "step": 5018
+    },
+    {
+      "epoch": 0.36213427612828747,
+      "grad_norm": 0.14577659964561462,
+      "learning_rate": 0.00018552749314475396,
+      "loss": 0.1673,
+      "step": 5019
+    },
+    {
+      "epoch": 0.36220642880334786,
+      "grad_norm": 0.11624746024608612,
+      "learning_rate": 0.00018552460672535722,
+      "loss": 0.1483,
+      "step": 5020
+    },
+    {
+      "epoch": 0.3622785814784083,
+      "grad_norm": 0.1287081092596054,
+      "learning_rate": 0.00018552172030596045,
+      "loss": 0.1829,
+      "step": 5021
+    },
+    {
+      "epoch": 0.36235073415346875,
+      "grad_norm": 0.14396774768829346,
+      "learning_rate": 0.00018551883388656372,
+      "loss": 0.1286,
+      "step": 5022
+    },
+    {
+      "epoch": 0.3624228868285292,
+      "grad_norm": 0.1085737869143486,
+      "learning_rate": 0.00018551594746716698,
+      "loss": 0.1543,
+      "step": 5023
+    },
+    {
+      "epoch": 0.3624950395035896,
+      "grad_norm": 0.14155034720897675,
+      "learning_rate": 0.00018551306104777024,
+      "loss": 0.1076,
+      "step": 5024
+    },
+    {
+      "epoch": 0.36256719217865,
+      "grad_norm": 0.12421038001775742,
+      "learning_rate": 0.0001855101746283735,
+      "loss": 0.1596,
+      "step": 5025
+    },
+    {
+      "epoch": 0.36263934485371047,
+      "grad_norm": 0.1154010072350502,
+      "learning_rate": 0.00018550728820897677,
+      "loss": 0.167,
+      "step": 5026
+    },
+    {
+      "epoch": 0.36271149752877085,
+      "grad_norm": 0.13365091383457184,
+      "learning_rate": 0.00018550440178958003,
+      "loss": 0.1402,
+      "step": 5027
+    },
+    {
+      "epoch": 0.3627836502038313,
+      "grad_norm": 0.2421489953994751,
+      "learning_rate": 0.0001855015153701833,
+      "loss": 0.128,
+      "step": 5028
+    },
+    {
+      "epoch": 0.36285580287889174,
+      "grad_norm": 0.11368130892515182,
+      "learning_rate": 0.00018549862895078656,
+      "loss": 0.1508,
+      "step": 5029
+    },
+    {
+      "epoch": 0.3629279555539522,
+      "grad_norm": 0.10695904493331909,
+      "learning_rate": 0.00018549574253138982,
+      "loss": 0.1816,
+      "step": 5030
+    },
+    {
+      "epoch": 0.3630001082290126,
+      "grad_norm": 0.09516868740320206,
+      "learning_rate": 0.00018549285611199308,
+      "loss": 0.1551,
+      "step": 5031
+    },
+    {
+      "epoch": 0.363072260904073,
+      "grad_norm": 0.14686326682567596,
+      "learning_rate": 0.00018548996969259632,
+      "loss": 0.1383,
+      "step": 5032
+    },
+    {
+      "epoch": 0.36314441357913346,
+      "grad_norm": 0.13058914244174957,
+      "learning_rate": 0.0001854870832731996,
+      "loss": 0.1851,
+      "step": 5033
+    },
+    {
+      "epoch": 0.36321656625419385,
+      "grad_norm": 0.15616953372955322,
+      "learning_rate": 0.00018548419685380287,
+      "loss": 0.1557,
+      "step": 5034
+    },
+    {
+      "epoch": 0.3632887189292543,
+      "grad_norm": 0.09751254320144653,
+      "learning_rate": 0.00018548131043440614,
+      "loss": 0.1456,
+      "step": 5035
+    },
+    {
+      "epoch": 0.36336087160431474,
+      "grad_norm": 0.15860705077648163,
+      "learning_rate": 0.0001854784240150094,
+      "loss": 0.1678,
+      "step": 5036
+    },
+    {
+      "epoch": 0.3634330242793752,
+      "grad_norm": 0.1270139217376709,
+      "learning_rate": 0.00018547553759561263,
+      "loss": 0.1407,
+      "step": 5037
+    },
+    {
+      "epoch": 0.36350517695443557,
+      "grad_norm": 0.1260344237089157,
+      "learning_rate": 0.0001854726511762159,
+      "loss": 0.1758,
+      "step": 5038
+    },
+    {
+      "epoch": 0.363577329629496,
+      "grad_norm": 0.12336471676826477,
+      "learning_rate": 0.00018546976475681916,
+      "loss": 0.1415,
+      "step": 5039
+    },
+    {
+      "epoch": 0.36364948230455646,
+      "grad_norm": 0.12689828872680664,
+      "learning_rate": 0.00018546687833742245,
+      "loss": 0.1465,
+      "step": 5040
+    },
+    {
+      "epoch": 0.36372163497961685,
+      "grad_norm": 0.12668170034885406,
+      "learning_rate": 0.0001854639919180257,
+      "loss": 0.1252,
+      "step": 5041
+    },
+    {
+      "epoch": 0.3637937876546773,
+      "grad_norm": 0.12117216736078262,
+      "learning_rate": 0.00018546110549862895,
+      "loss": 0.1832,
+      "step": 5042
+    },
+    {
+      "epoch": 0.36386594032973774,
+      "grad_norm": 0.12453142553567886,
+      "learning_rate": 0.0001854582190792322,
+      "loss": 0.1583,
+      "step": 5043
+    },
+    {
+      "epoch": 0.3639380930047982,
+      "grad_norm": 0.10939580947160721,
+      "learning_rate": 0.00018545533265983547,
+      "loss": 0.1806,
+      "step": 5044
+    },
+    {
+      "epoch": 0.36401024567985857,
+      "grad_norm": 0.13718275725841522,
+      "learning_rate": 0.00018545244624043874,
+      "loss": 0.1396,
+      "step": 5045
+    },
+    {
+      "epoch": 0.364082398354919,
+      "grad_norm": 0.13322681188583374,
+      "learning_rate": 0.000185449559821042,
+      "loss": 0.1287,
+      "step": 5046
+    },
+    {
+      "epoch": 0.36415455102997946,
+      "grad_norm": 0.13685956597328186,
+      "learning_rate": 0.00018544667340164526,
+      "loss": 0.189,
+      "step": 5047
+    },
+    {
+      "epoch": 0.36422670370503984,
+      "grad_norm": 0.1251964420080185,
+      "learning_rate": 0.00018544378698224853,
+      "loss": 0.1731,
+      "step": 5048
+    },
+    {
+      "epoch": 0.3642988563801003,
+      "grad_norm": 0.14998053014278412,
+      "learning_rate": 0.0001854409005628518,
+      "loss": 0.1574,
+      "step": 5049
+    },
+    {
+      "epoch": 0.36437100905516073,
+      "grad_norm": 0.13014808297157288,
+      "learning_rate": 0.00018543801414345505,
+      "loss": 0.1192,
+      "step": 5050
+    },
+    {
+      "epoch": 0.3644431617302211,
+      "grad_norm": 0.10887446254491806,
+      "learning_rate": 0.00018543512772405832,
+      "loss": 0.1638,
+      "step": 5051
+    },
+    {
+      "epoch": 0.36451531440528157,
+      "grad_norm": 0.10679549723863602,
+      "learning_rate": 0.00018543224130466158,
+      "loss": 0.1455,
+      "step": 5052
+    },
+    {
+      "epoch": 0.364587467080342,
+      "grad_norm": 0.13491018116474152,
+      "learning_rate": 0.00018542935488526484,
+      "loss": 0.154,
+      "step": 5053
+    },
+    {
+      "epoch": 0.36465961975540245,
+      "grad_norm": 0.10097759962081909,
+      "learning_rate": 0.0001854264684658681,
+      "loss": 0.1674,
+      "step": 5054
+    },
+    {
+      "epoch": 0.36473177243046284,
+      "grad_norm": 0.1123056635260582,
+      "learning_rate": 0.00018542358204647137,
+      "loss": 0.1529,
+      "step": 5055
+    },
+    {
+      "epoch": 0.3648039251055233,
+      "grad_norm": 0.17683255672454834,
+      "learning_rate": 0.00018542069562707463,
+      "loss": 0.1562,
+      "step": 5056
+    },
+    {
+      "epoch": 0.36487607778058373,
+      "grad_norm": 0.1176273375749588,
+      "learning_rate": 0.0001854178092076779,
+      "loss": 0.1714,
+      "step": 5057
+    },
+    {
+      "epoch": 0.3649482304556441,
+      "grad_norm": 0.11856452375650406,
+      "learning_rate": 0.00018541492278828116,
+      "loss": 0.1786,
+      "step": 5058
+    },
+    {
+      "epoch": 0.36502038313070456,
+      "grad_norm": 0.13071638345718384,
+      "learning_rate": 0.0001854120363688844,
+      "loss": 0.1881,
+      "step": 5059
+    },
+    {
+      "epoch": 0.365092535805765,
+      "grad_norm": 0.12175871431827545,
+      "learning_rate": 0.00018540914994948765,
+      "loss": 0.2229,
+      "step": 5060
+    },
+    {
+      "epoch": 0.36516468848082545,
+      "grad_norm": 0.11493963748216629,
+      "learning_rate": 0.00018540626353009094,
+      "loss": 0.1575,
+      "step": 5061
+    },
+    {
+      "epoch": 0.36523684115588584,
+      "grad_norm": 0.13158316910266876,
+      "learning_rate": 0.0001854033771106942,
+      "loss": 0.1481,
+      "step": 5062
+    },
+    {
+      "epoch": 0.3653089938309463,
+      "grad_norm": 0.15244421362876892,
+      "learning_rate": 0.00018540049069129747,
+      "loss": 0.1597,
+      "step": 5063
+    },
+    {
+      "epoch": 0.3653811465060067,
+      "grad_norm": 0.1313554048538208,
+      "learning_rate": 0.0001853976042719007,
+      "loss": 0.1255,
+      "step": 5064
+    },
+    {
+      "epoch": 0.3654532991810671,
+      "grad_norm": 0.11577058583498001,
+      "learning_rate": 0.00018539471785250397,
+      "loss": 0.1504,
+      "step": 5065
+    },
+    {
+      "epoch": 0.36552545185612756,
+      "grad_norm": 0.11609276384115219,
+      "learning_rate": 0.00018539183143310723,
+      "loss": 0.1802,
+      "step": 5066
+    },
+    {
+      "epoch": 0.365597604531188,
+      "grad_norm": 0.1118810847401619,
+      "learning_rate": 0.0001853889450137105,
+      "loss": 0.1399,
+      "step": 5067
+    },
+    {
+      "epoch": 0.36566975720624845,
+      "grad_norm": 0.10551551729440689,
+      "learning_rate": 0.00018538605859431378,
+      "loss": 0.1921,
+      "step": 5068
+    },
+    {
+      "epoch": 0.36574190988130884,
+      "grad_norm": 0.10924666374921799,
+      "learning_rate": 0.00018538317217491702,
+      "loss": 0.1264,
+      "step": 5069
+    },
+    {
+      "epoch": 0.3658140625563693,
+      "grad_norm": 0.10584163665771484,
+      "learning_rate": 0.00018538028575552028,
+      "loss": 0.144,
+      "step": 5070
+    },
+    {
+      "epoch": 0.3658862152314297,
+      "grad_norm": 0.09888424724340439,
+      "learning_rate": 0.00018537739933612355,
+      "loss": 0.1276,
+      "step": 5071
+    },
+    {
+      "epoch": 0.3659583679064901,
+      "grad_norm": 0.11036736518144608,
+      "learning_rate": 0.0001853745129167268,
+      "loss": 0.1714,
+      "step": 5072
+    },
+    {
+      "epoch": 0.36603052058155056,
+      "grad_norm": 0.11193112283945084,
+      "learning_rate": 0.00018537162649733007,
+      "loss": 0.1732,
+      "step": 5073
+    },
+    {
+      "epoch": 0.366102673256611,
+      "grad_norm": 0.12735997140407562,
+      "learning_rate": 0.00018536874007793334,
+      "loss": 0.1406,
+      "step": 5074
+    },
+    {
+      "epoch": 0.36617482593167144,
+      "grad_norm": 0.11917732656002045,
+      "learning_rate": 0.0001853658536585366,
+      "loss": 0.1515,
+      "step": 5075
+    },
+    {
+      "epoch": 0.36624697860673183,
+      "grad_norm": 0.13747093081474304,
+      "learning_rate": 0.00018536296723913986,
+      "loss": 0.156,
+      "step": 5076
+    },
+    {
+      "epoch": 0.3663191312817923,
+      "grad_norm": 0.15208113193511963,
+      "learning_rate": 0.00018536008081974312,
+      "loss": 0.1641,
+      "step": 5077
+    },
+    {
+      "epoch": 0.3663912839568527,
+      "grad_norm": 0.1165727749466896,
+      "learning_rate": 0.0001853571944003464,
+      "loss": 0.1581,
+      "step": 5078
+    },
+    {
+      "epoch": 0.3664634366319131,
+      "grad_norm": 0.13178570568561554,
+      "learning_rate": 0.00018535430798094965,
+      "loss": 0.1601,
+      "step": 5079
+    },
+    {
+      "epoch": 0.36653558930697355,
+      "grad_norm": 0.13907936215400696,
+      "learning_rate": 0.00018535142156155289,
+      "loss": 0.1643,
+      "step": 5080
+    },
+    {
+      "epoch": 0.366607741982034,
+      "grad_norm": 0.1026151031255722,
+      "learning_rate": 0.00018534853514215615,
+      "loss": 0.1198,
+      "step": 5081
+    },
+    {
+      "epoch": 0.3666798946570944,
+      "grad_norm": 0.09718500077724457,
+      "learning_rate": 0.00018534564872275944,
+      "loss": 0.1908,
+      "step": 5082
+    },
+    {
+      "epoch": 0.36675204733215483,
+      "grad_norm": 0.10540282726287842,
+      "learning_rate": 0.0001853427623033627,
+      "loss": 0.1317,
+      "step": 5083
+    },
+    {
+      "epoch": 0.3668242000072153,
+      "grad_norm": 0.1018374040722847,
+      "learning_rate": 0.00018533987588396596,
+      "loss": 0.1423,
+      "step": 5084
+    },
+    {
+      "epoch": 0.3668963526822757,
+      "grad_norm": 0.13188058137893677,
+      "learning_rate": 0.0001853369894645692,
+      "loss": 0.1671,
+      "step": 5085
+    },
+    {
+      "epoch": 0.3669685053573361,
+      "grad_norm": 0.16619445383548737,
+      "learning_rate": 0.00018533410304517246,
+      "loss": 0.164,
+      "step": 5086
+    },
+    {
+      "epoch": 0.36704065803239655,
+      "grad_norm": 0.13738171756267548,
+      "learning_rate": 0.00018533121662577573,
+      "loss": 0.1734,
+      "step": 5087
+    },
+    {
+      "epoch": 0.367112810707457,
+      "grad_norm": 0.13084229826927185,
+      "learning_rate": 0.000185328330206379,
+      "loss": 0.2097,
+      "step": 5088
+    },
+    {
+      "epoch": 0.3671849633825174,
+      "grad_norm": 0.09078751504421234,
+      "learning_rate": 0.00018532544378698228,
+      "loss": 0.168,
+      "step": 5089
+    },
+    {
+      "epoch": 0.3672571160575778,
+      "grad_norm": 0.10640835762023926,
+      "learning_rate": 0.00018532255736758551,
+      "loss": 0.1773,
+      "step": 5090
+    },
+    {
+      "epoch": 0.36732926873263827,
+      "grad_norm": 0.12556225061416626,
+      "learning_rate": 0.00018531967094818878,
+      "loss": 0.13,
+      "step": 5091
+    },
+    {
+      "epoch": 0.3674014214076987,
+      "grad_norm": 0.11766793578863144,
+      "learning_rate": 0.00018531678452879204,
+      "loss": 0.1166,
+      "step": 5092
+    },
+    {
+      "epoch": 0.3674735740827591,
+      "grad_norm": 0.11859668046236038,
+      "learning_rate": 0.0001853138981093953,
+      "loss": 0.1438,
+      "step": 5093
+    },
+    {
+      "epoch": 0.36754572675781955,
+      "grad_norm": 0.13351677358150482,
+      "learning_rate": 0.00018531101168999857,
+      "loss": 0.157,
+      "step": 5094
+    },
+    {
+      "epoch": 0.36761787943288,
+      "grad_norm": 0.1354007124900818,
+      "learning_rate": 0.00018530812527060183,
+      "loss": 0.1691,
+      "step": 5095
+    },
+    {
+      "epoch": 0.3676900321079404,
+      "grad_norm": 0.1143498420715332,
+      "learning_rate": 0.0001853052388512051,
+      "loss": 0.1612,
+      "step": 5096
+    },
+    {
+      "epoch": 0.3677621847830008,
+      "grad_norm": 0.12536217272281647,
+      "learning_rate": 0.00018530235243180836,
+      "loss": 0.1642,
+      "step": 5097
+    },
+    {
+      "epoch": 0.36783433745806127,
+      "grad_norm": 0.13603799045085907,
+      "learning_rate": 0.00018529946601241162,
+      "loss": 0.1423,
+      "step": 5098
+    },
+    {
+      "epoch": 0.3679064901331217,
+      "grad_norm": 0.11363290250301361,
+      "learning_rate": 0.00018529657959301488,
+      "loss": 0.1752,
+      "step": 5099
+    },
+    {
+      "epoch": 0.3679786428081821,
+      "grad_norm": 0.11023396998643875,
+      "learning_rate": 0.00018529369317361814,
+      "loss": 0.1874,
+      "step": 5100
+    },
+    {
+      "epoch": 0.36805079548324254,
+      "grad_norm": 0.11664698272943497,
+      "learning_rate": 0.00018529080675422138,
+      "loss": 0.1488,
+      "step": 5101
+    },
+    {
+      "epoch": 0.368122948158303,
+      "grad_norm": 0.11291544139385223,
+      "learning_rate": 0.00018528792033482464,
+      "loss": 0.1407,
+      "step": 5102
+    },
+    {
+      "epoch": 0.3681951008333634,
+      "grad_norm": 0.1242561861872673,
+      "learning_rate": 0.00018528503391542793,
+      "loss": 0.1325,
+      "step": 5103
+    },
+    {
+      "epoch": 0.3682672535084238,
+      "grad_norm": 0.1404646337032318,
+      "learning_rate": 0.0001852821474960312,
+      "loss": 0.2256,
+      "step": 5104
+    },
+    {
+      "epoch": 0.36833940618348426,
+      "grad_norm": 0.11316098272800446,
+      "learning_rate": 0.00018527926107663446,
+      "loss": 0.1467,
+      "step": 5105
+    },
+    {
+      "epoch": 0.3684115588585447,
+      "grad_norm": 0.10041385143995285,
+      "learning_rate": 0.0001852763746572377,
+      "loss": 0.1365,
+      "step": 5106
+    },
+    {
+      "epoch": 0.3684837115336051,
+      "grad_norm": 0.13179618120193481,
+      "learning_rate": 0.00018527348823784096,
+      "loss": 0.1816,
+      "step": 5107
+    },
+    {
+      "epoch": 0.36855586420866554,
+      "grad_norm": 0.1282452642917633,
+      "learning_rate": 0.00018527060181844422,
+      "loss": 0.143,
+      "step": 5108
+    },
+    {
+      "epoch": 0.368628016883726,
+      "grad_norm": 0.17576515674591064,
+      "learning_rate": 0.00018526771539904748,
+      "loss": 0.1765,
+      "step": 5109
+    },
+    {
+      "epoch": 0.3687001695587864,
+      "grad_norm": 0.13764993846416473,
+      "learning_rate": 0.00018526482897965077,
+      "loss": 0.1637,
+      "step": 5110
+    },
+    {
+      "epoch": 0.3687723222338468,
+      "grad_norm": 0.10974890738725662,
+      "learning_rate": 0.000185261942560254,
+      "loss": 0.1639,
+      "step": 5111
+    },
+    {
+      "epoch": 0.36884447490890726,
+      "grad_norm": 0.13875915110111237,
+      "learning_rate": 0.00018525905614085727,
+      "loss": 0.1499,
+      "step": 5112
+    },
+    {
+      "epoch": 0.36891662758396765,
+      "grad_norm": 0.11761275678873062,
+      "learning_rate": 0.00018525616972146054,
+      "loss": 0.1813,
+      "step": 5113
+    },
+    {
+      "epoch": 0.3689887802590281,
+      "grad_norm": 0.12019307911396027,
+      "learning_rate": 0.0001852532833020638,
+      "loss": 0.0926,
+      "step": 5114
+    },
+    {
+      "epoch": 0.36906093293408854,
+      "grad_norm": 0.12786947190761566,
+      "learning_rate": 0.00018525039688266706,
+      "loss": 0.1878,
+      "step": 5115
+    },
+    {
+      "epoch": 0.369133085609149,
+      "grad_norm": 0.10546452552080154,
+      "learning_rate": 0.00018524751046327032,
+      "loss": 0.1632,
+      "step": 5116
+    },
+    {
+      "epoch": 0.36920523828420937,
+      "grad_norm": 0.13980886340141296,
+      "learning_rate": 0.0001852446240438736,
+      "loss": 0.197,
+      "step": 5117
+    },
+    {
+      "epoch": 0.3692773909592698,
+      "grad_norm": 0.10450896620750427,
+      "learning_rate": 0.00018524173762447685,
+      "loss": 0.2151,
+      "step": 5118
+    },
+    {
+      "epoch": 0.36934954363433026,
+      "grad_norm": 0.12501630187034607,
+      "learning_rate": 0.0001852388512050801,
+      "loss": 0.1527,
+      "step": 5119
+    },
+    {
+      "epoch": 0.36942169630939065,
+      "grad_norm": 0.1291973739862442,
+      "learning_rate": 0.00018523596478568338,
+      "loss": 0.1995,
+      "step": 5120
+    },
+    {
+      "epoch": 0.3694938489844511,
+      "grad_norm": 0.11190078407526016,
+      "learning_rate": 0.00018523307836628664,
+      "loss": 0.1156,
+      "step": 5121
+    },
+    {
+      "epoch": 0.36956600165951153,
+      "grad_norm": 0.1329687386751175,
+      "learning_rate": 0.00018523019194688987,
+      "loss": 0.1712,
+      "step": 5122
+    },
+    {
+      "epoch": 0.369638154334572,
+      "grad_norm": 0.11458615958690643,
+      "learning_rate": 0.00018522730552749314,
+      "loss": 0.1704,
+      "step": 5123
+    },
+    {
+      "epoch": 0.36971030700963237,
+      "grad_norm": 0.1093921810388565,
+      "learning_rate": 0.00018522441910809643,
+      "loss": 0.1724,
+      "step": 5124
+    },
+    {
+      "epoch": 0.3697824596846928,
+      "grad_norm": 0.12858615815639496,
+      "learning_rate": 0.0001852215326886997,
+      "loss": 0.1628,
+      "step": 5125
+    },
+    {
+      "epoch": 0.36985461235975325,
+      "grad_norm": 0.1646309196949005,
+      "learning_rate": 0.00018521864626930295,
+      "loss": 0.1371,
+      "step": 5126
+    },
+    {
+      "epoch": 0.36992676503481364,
+      "grad_norm": 0.12659627199172974,
+      "learning_rate": 0.0001852157598499062,
+      "loss": 0.1148,
+      "step": 5127
+    },
+    {
+      "epoch": 0.3699989177098741,
+      "grad_norm": 0.16643866896629333,
+      "learning_rate": 0.00018521287343050945,
+      "loss": 0.1886,
+      "step": 5128
+    },
+    {
+      "epoch": 0.37007107038493453,
+      "grad_norm": 0.11686883866786957,
+      "learning_rate": 0.00018520998701111271,
+      "loss": 0.1142,
+      "step": 5129
+    },
+    {
+      "epoch": 0.370143223059995,
+      "grad_norm": 0.12929080426692963,
+      "learning_rate": 0.00018520710059171598,
+      "loss": 0.1463,
+      "step": 5130
+    },
+    {
+      "epoch": 0.37021537573505536,
+      "grad_norm": 0.10576673597097397,
+      "learning_rate": 0.00018520421417231927,
+      "loss": 0.1083,
+      "step": 5131
+    },
+    {
+      "epoch": 0.3702875284101158,
+      "grad_norm": 0.1180238425731659,
+      "learning_rate": 0.0001852013277529225,
+      "loss": 0.1625,
+      "step": 5132
+    },
+    {
+      "epoch": 0.37035968108517625,
+      "grad_norm": 0.14105002582073212,
+      "learning_rate": 0.00018519844133352577,
+      "loss": 0.1921,
+      "step": 5133
+    },
+    {
+      "epoch": 0.37043183376023664,
+      "grad_norm": 0.10322585701942444,
+      "learning_rate": 0.00018519555491412903,
+      "loss": 0.1203,
+      "step": 5134
+    },
+    {
+      "epoch": 0.3705039864352971,
+      "grad_norm": 0.12555329501628876,
+      "learning_rate": 0.0001851926684947323,
+      "loss": 0.1714,
+      "step": 5135
+    },
+    {
+      "epoch": 0.3705761391103575,
+      "grad_norm": 0.11362385749816895,
+      "learning_rate": 0.00018518978207533556,
+      "loss": 0.1574,
+      "step": 5136
+    },
+    {
+      "epoch": 0.37064829178541797,
+      "grad_norm": 0.11772707849740982,
+      "learning_rate": 0.00018518689565593882,
+      "loss": 0.1694,
+      "step": 5137
+    },
+    {
+      "epoch": 0.37072044446047836,
+      "grad_norm": 0.1352548599243164,
+      "learning_rate": 0.00018518400923654208,
+      "loss": 0.1703,
+      "step": 5138
+    },
+    {
+      "epoch": 0.3707925971355388,
+      "grad_norm": 0.11592496931552887,
+      "learning_rate": 0.00018518112281714534,
+      "loss": 0.1873,
+      "step": 5139
+    },
+    {
+      "epoch": 0.37086474981059925,
+      "grad_norm": 0.12126876413822174,
+      "learning_rate": 0.0001851782363977486,
+      "loss": 0.1846,
+      "step": 5140
+    },
+    {
+      "epoch": 0.37093690248565964,
+      "grad_norm": 0.09664303064346313,
+      "learning_rate": 0.00018517534997835187,
+      "loss": 0.0948,
+      "step": 5141
+    },
+    {
+      "epoch": 0.3710090551607201,
+      "grad_norm": 0.15003101527690887,
+      "learning_rate": 0.00018517246355895513,
+      "loss": 0.2187,
+      "step": 5142
+    },
+    {
+      "epoch": 0.3710812078357805,
+      "grad_norm": 0.11940673738718033,
+      "learning_rate": 0.00018516957713955837,
+      "loss": 0.1276,
+      "step": 5143
+    },
+    {
+      "epoch": 0.3711533605108409,
+      "grad_norm": 0.10618552565574646,
+      "learning_rate": 0.00018516669072016163,
+      "loss": 0.1542,
+      "step": 5144
+    },
+    {
+      "epoch": 0.37122551318590136,
+      "grad_norm": 0.11178718507289886,
+      "learning_rate": 0.00018516380430076492,
+      "loss": 0.1326,
+      "step": 5145
+    },
+    {
+      "epoch": 0.3712976658609618,
+      "grad_norm": 0.12645266950130463,
+      "learning_rate": 0.00018516091788136818,
+      "loss": 0.1407,
+      "step": 5146
+    },
+    {
+      "epoch": 0.37136981853602224,
+      "grad_norm": 0.11778198182582855,
+      "learning_rate": 0.00018515803146197145,
+      "loss": 0.1773,
+      "step": 5147
+    },
+    {
+      "epoch": 0.37144197121108263,
+      "grad_norm": 0.12361133098602295,
+      "learning_rate": 0.00018515514504257468,
+      "loss": 0.1357,
+      "step": 5148
+    },
+    {
+      "epoch": 0.3715141238861431,
+      "grad_norm": 0.0917186588048935,
+      "learning_rate": 0.00018515225862317795,
+      "loss": 0.1331,
+      "step": 5149
+    },
+    {
+      "epoch": 0.3715862765612035,
+      "grad_norm": 0.11127284169197083,
+      "learning_rate": 0.0001851493722037812,
+      "loss": 0.1188,
+      "step": 5150
+    },
+    {
+      "epoch": 0.3716584292362639,
+      "grad_norm": 0.15068739652633667,
+      "learning_rate": 0.00018514648578438447,
+      "loss": 0.174,
+      "step": 5151
+    },
+    {
+      "epoch": 0.37173058191132435,
+      "grad_norm": 0.10275956988334656,
+      "learning_rate": 0.00018514359936498776,
+      "loss": 0.1613,
+      "step": 5152
+    },
+    {
+      "epoch": 0.3718027345863848,
+      "grad_norm": 0.1003510132431984,
+      "learning_rate": 0.000185140712945591,
+      "loss": 0.1288,
+      "step": 5153
+    },
+    {
+      "epoch": 0.37187488726144524,
+      "grad_norm": 0.09363386780023575,
+      "learning_rate": 0.00018513782652619426,
+      "loss": 0.1305,
+      "step": 5154
+    },
+    {
+      "epoch": 0.37194703993650563,
+      "grad_norm": 0.1276710331439972,
+      "learning_rate": 0.00018513494010679752,
+      "loss": 0.2236,
+      "step": 5155
+    },
+    {
+      "epoch": 0.3720191926115661,
+      "grad_norm": 0.12521874904632568,
+      "learning_rate": 0.00018513205368740079,
+      "loss": 0.152,
+      "step": 5156
+    },
+    {
+      "epoch": 0.3720913452866265,
+      "grad_norm": 0.12947016954421997,
+      "learning_rate": 0.00018512916726800405,
+      "loss": 0.1358,
+      "step": 5157
+    },
+    {
+      "epoch": 0.3721634979616869,
+      "grad_norm": 0.12372040748596191,
+      "learning_rate": 0.0001851262808486073,
+      "loss": 0.1574,
+      "step": 5158
+    },
+    {
+      "epoch": 0.37223565063674735,
+      "grad_norm": 0.18169283866882324,
+      "learning_rate": 0.00018512339442921058,
+      "loss": 0.1858,
+      "step": 5159
+    },
+    {
+      "epoch": 0.3723078033118078,
+      "grad_norm": 0.12034885585308075,
+      "learning_rate": 0.00018512050800981384,
+      "loss": 0.1416,
+      "step": 5160
+    },
+    {
+      "epoch": 0.37237995598686824,
+      "grad_norm": 0.09472742676734924,
+      "learning_rate": 0.0001851176215904171,
+      "loss": 0.1548,
+      "step": 5161
+    },
+    {
+      "epoch": 0.3724521086619286,
+      "grad_norm": 0.09557875245809555,
+      "learning_rate": 0.00018511473517102036,
+      "loss": 0.1493,
+      "step": 5162
+    },
+    {
+      "epoch": 0.37252426133698907,
+      "grad_norm": 0.12181645631790161,
+      "learning_rate": 0.00018511184875162363,
+      "loss": 0.1092,
+      "step": 5163
+    },
+    {
+      "epoch": 0.3725964140120495,
+      "grad_norm": 0.14272871613502502,
+      "learning_rate": 0.00018510896233222686,
+      "loss": 0.2234,
+      "step": 5164
+    },
+    {
+      "epoch": 0.3726685666871099,
+      "grad_norm": 0.11819078773260117,
+      "learning_rate": 0.00018510607591283013,
+      "loss": 0.1381,
+      "step": 5165
+    },
+    {
+      "epoch": 0.37274071936217035,
+      "grad_norm": 0.1666698306798935,
+      "learning_rate": 0.00018510318949343342,
+      "loss": 0.2006,
+      "step": 5166
+    },
+    {
+      "epoch": 0.3728128720372308,
+      "grad_norm": 0.1215706467628479,
+      "learning_rate": 0.00018510030307403668,
+      "loss": 0.1684,
+      "step": 5167
+    },
+    {
+      "epoch": 0.37288502471229124,
+      "grad_norm": 0.1380370706319809,
+      "learning_rate": 0.00018509741665463994,
+      "loss": 0.2058,
+      "step": 5168
+    },
+    {
+      "epoch": 0.3729571773873516,
+      "grad_norm": 0.10384880006313324,
+      "learning_rate": 0.0001850945302352432,
+      "loss": 0.156,
+      "step": 5169
+    },
+    {
+      "epoch": 0.37302933006241207,
+      "grad_norm": 0.11163298040628433,
+      "learning_rate": 0.00018509164381584644,
+      "loss": 0.1436,
+      "step": 5170
+    },
+    {
+      "epoch": 0.3731014827374725,
+      "grad_norm": 0.13944418728351593,
+      "learning_rate": 0.0001850887573964497,
+      "loss": 0.1954,
+      "step": 5171
+    },
+    {
+      "epoch": 0.3731736354125329,
+      "grad_norm": 0.17720527946949005,
+      "learning_rate": 0.00018508587097705297,
+      "loss": 0.1473,
+      "step": 5172
+    },
+    {
+      "epoch": 0.37324578808759334,
+      "grad_norm": 0.10967066138982773,
+      "learning_rate": 0.00018508298455765626,
+      "loss": 0.1882,
+      "step": 5173
+    },
+    {
+      "epoch": 0.3733179407626538,
+      "grad_norm": 0.12297673523426056,
+      "learning_rate": 0.00018508009813825952,
+      "loss": 0.1715,
+      "step": 5174
+    },
+    {
+      "epoch": 0.3733900934377142,
+      "grad_norm": 0.15667521953582764,
+      "learning_rate": 0.00018507721171886275,
+      "loss": 0.1389,
+      "step": 5175
+    },
+    {
+      "epoch": 0.3734622461127746,
+      "grad_norm": 0.12142255902290344,
+      "learning_rate": 0.00018507432529946602,
+      "loss": 0.1701,
+      "step": 5176
+    },
+    {
+      "epoch": 0.37353439878783506,
+      "grad_norm": 0.10766392946243286,
+      "learning_rate": 0.00018507143888006928,
+      "loss": 0.1125,
+      "step": 5177
+    },
+    {
+      "epoch": 0.3736065514628955,
+      "grad_norm": 0.10855721682310104,
+      "learning_rate": 0.00018506855246067254,
+      "loss": 0.1527,
+      "step": 5178
+    },
+    {
+      "epoch": 0.3736787041379559,
+      "grad_norm": 0.09985214471817017,
+      "learning_rate": 0.0001850656660412758,
+      "loss": 0.1422,
+      "step": 5179
+    },
+    {
+      "epoch": 0.37375085681301634,
+      "grad_norm": 0.11852970719337463,
+      "learning_rate": 0.00018506277962187907,
+      "loss": 0.1296,
+      "step": 5180
+    },
+    {
+      "epoch": 0.3738230094880768,
+      "grad_norm": 0.13748237490653992,
+      "learning_rate": 0.00018505989320248233,
+      "loss": 0.1861,
+      "step": 5181
+    },
+    {
+      "epoch": 0.3738951621631372,
+      "grad_norm": 0.10805381834506989,
+      "learning_rate": 0.0001850570067830856,
+      "loss": 0.1693,
+      "step": 5182
+    },
+    {
+      "epoch": 0.3739673148381976,
+      "grad_norm": 0.10942400246858597,
+      "learning_rate": 0.00018505412036368886,
+      "loss": 0.1575,
+      "step": 5183
+    },
+    {
+      "epoch": 0.37403946751325806,
+      "grad_norm": 0.1067863404750824,
+      "learning_rate": 0.00018505123394429212,
+      "loss": 0.1128,
+      "step": 5184
+    },
+    {
+      "epoch": 0.3741116201883185,
+      "grad_norm": 0.13118085265159607,
+      "learning_rate": 0.00018504834752489538,
+      "loss": 0.1486,
+      "step": 5185
+    },
+    {
+      "epoch": 0.3741837728633789,
+      "grad_norm": 0.158247172832489,
+      "learning_rate": 0.00018504546110549862,
+      "loss": 0.1464,
+      "step": 5186
+    },
+    {
+      "epoch": 0.37425592553843934,
+      "grad_norm": 0.12931883335113525,
+      "learning_rate": 0.0001850425746861019,
+      "loss": 0.1421,
+      "step": 5187
+    },
+    {
+      "epoch": 0.3743280782134998,
+      "grad_norm": 0.11958953738212585,
+      "learning_rate": 0.00018503968826670517,
+      "loss": 0.1432,
+      "step": 5188
+    },
+    {
+      "epoch": 0.37440023088856017,
+      "grad_norm": 0.15297405421733856,
+      "learning_rate": 0.00018503680184730844,
+      "loss": 0.1302,
+      "step": 5189
+    },
+    {
+      "epoch": 0.3744723835636206,
+      "grad_norm": 0.19646477699279785,
+      "learning_rate": 0.0001850339154279117,
+      "loss": 0.1448,
+      "step": 5190
+    },
+    {
+      "epoch": 0.37454453623868106,
+      "grad_norm": 0.15121282637119293,
+      "learning_rate": 0.00018503102900851493,
+      "loss": 0.1458,
+      "step": 5191
+    },
+    {
+      "epoch": 0.3746166889137415,
+      "grad_norm": 0.14924101531505585,
+      "learning_rate": 0.0001850281425891182,
+      "loss": 0.1541,
+      "step": 5192
+    },
+    {
+      "epoch": 0.3746888415888019,
+      "grad_norm": 0.12003999948501587,
+      "learning_rate": 0.00018502525616972146,
+      "loss": 0.1964,
+      "step": 5193
+    },
+    {
+      "epoch": 0.37476099426386233,
+      "grad_norm": 0.10451246798038483,
+      "learning_rate": 0.00018502236975032472,
+      "loss": 0.1744,
+      "step": 5194
+    },
+    {
+      "epoch": 0.3748331469389228,
+      "grad_norm": 0.14572416245937347,
+      "learning_rate": 0.000185019483330928,
+      "loss": 0.1611,
+      "step": 5195
+    },
+    {
+      "epoch": 0.37490529961398317,
+      "grad_norm": 0.12013660371303558,
+      "learning_rate": 0.00018501659691153125,
+      "loss": 0.1958,
+      "step": 5196
+    },
+    {
+      "epoch": 0.3749774522890436,
+      "grad_norm": 0.12600034475326538,
+      "learning_rate": 0.0001850137104921345,
+      "loss": 0.1563,
+      "step": 5197
+    },
+    {
+      "epoch": 0.37504960496410406,
+      "grad_norm": 0.11181921511888504,
+      "learning_rate": 0.00018501082407273777,
+      "loss": 0.1529,
+      "step": 5198
+    },
+    {
+      "epoch": 0.3751217576391645,
+      "grad_norm": 0.13128091394901276,
+      "learning_rate": 0.00018500793765334104,
+      "loss": 0.1629,
+      "step": 5199
+    },
+    {
+      "epoch": 0.3751939103142249,
+      "grad_norm": 0.10196653008460999,
+      "learning_rate": 0.0001850050512339443,
+      "loss": 0.1718,
+      "step": 5200
+    },
+    {
+      "epoch": 0.37526606298928533,
+      "grad_norm": 0.10212597995996475,
+      "learning_rate": 0.00018500216481454756,
+      "loss": 0.1602,
+      "step": 5201
+    },
+    {
+      "epoch": 0.3753382156643458,
+      "grad_norm": 0.12853200733661652,
+      "learning_rate": 0.00018499927839515083,
+      "loss": 0.153,
+      "step": 5202
+    },
+    {
+      "epoch": 0.37541036833940616,
+      "grad_norm": 0.11547980457544327,
+      "learning_rate": 0.0001849963919757541,
+      "loss": 0.165,
+      "step": 5203
+    },
+    {
+      "epoch": 0.3754825210144666,
+      "grad_norm": 0.11114273220300674,
+      "learning_rate": 0.00018499350555635735,
+      "loss": 0.1702,
+      "step": 5204
+    },
+    {
+      "epoch": 0.37555467368952705,
+      "grad_norm": 0.10827504843473434,
+      "learning_rate": 0.00018499061913696062,
+      "loss": 0.1513,
+      "step": 5205
+    },
+    {
+      "epoch": 0.37562682636458744,
+      "grad_norm": 0.12006576359272003,
+      "learning_rate": 0.00018498773271756388,
+      "loss": 0.0892,
+      "step": 5206
+    },
+    {
+      "epoch": 0.3756989790396479,
+      "grad_norm": 0.12298358976840973,
+      "learning_rate": 0.00018498484629816711,
+      "loss": 0.1007,
+      "step": 5207
+    },
+    {
+      "epoch": 0.37577113171470833,
+      "grad_norm": 0.12523004412651062,
+      "learning_rate": 0.00018498195987877038,
+      "loss": 0.157,
+      "step": 5208
+    },
+    {
+      "epoch": 0.3758432843897688,
+      "grad_norm": 0.12031126022338867,
+      "learning_rate": 0.00018497907345937367,
+      "loss": 0.1319,
+      "step": 5209
+    },
+    {
+      "epoch": 0.37591543706482916,
+      "grad_norm": 0.15156899392604828,
+      "learning_rate": 0.00018497618703997693,
+      "loss": 0.1494,
+      "step": 5210
+    },
+    {
+      "epoch": 0.3759875897398896,
+      "grad_norm": 0.12533117830753326,
+      "learning_rate": 0.0001849733006205802,
+      "loss": 0.168,
+      "step": 5211
+    },
+    {
+      "epoch": 0.37605974241495005,
+      "grad_norm": 0.11359720677137375,
+      "learning_rate": 0.00018497041420118343,
+      "loss": 0.1936,
+      "step": 5212
+    },
+    {
+      "epoch": 0.37613189509001044,
+      "grad_norm": 0.12386760860681534,
+      "learning_rate": 0.0001849675277817867,
+      "loss": 0.2077,
+      "step": 5213
+    },
+    {
+      "epoch": 0.3762040477650709,
+      "grad_norm": 0.09602401405572891,
+      "learning_rate": 0.00018496464136238995,
+      "loss": 0.138,
+      "step": 5214
+    },
+    {
+      "epoch": 0.3762762004401313,
+      "grad_norm": 0.11211474239826202,
+      "learning_rate": 0.00018496175494299322,
+      "loss": 0.1555,
+      "step": 5215
+    },
+    {
+      "epoch": 0.37634835311519177,
+      "grad_norm": 0.145424485206604,
+      "learning_rate": 0.0001849588685235965,
+      "loss": 0.1402,
+      "step": 5216
+    },
+    {
+      "epoch": 0.37642050579025216,
+      "grad_norm": 0.1361163705587387,
+      "learning_rate": 0.00018495598210419974,
+      "loss": 0.1345,
+      "step": 5217
+    },
+    {
+      "epoch": 0.3764926584653126,
+      "grad_norm": 0.12563641369342804,
+      "learning_rate": 0.000184953095684803,
+      "loss": 0.169,
+      "step": 5218
+    },
+    {
+      "epoch": 0.37656481114037305,
+      "grad_norm": 0.10223568975925446,
+      "learning_rate": 0.00018495020926540627,
+      "loss": 0.144,
+      "step": 5219
+    },
+    {
+      "epoch": 0.37663696381543343,
+      "grad_norm": 0.10732857137918472,
+      "learning_rate": 0.00018494732284600953,
+      "loss": 0.1629,
+      "step": 5220
+    },
+    {
+      "epoch": 0.3767091164904939,
+      "grad_norm": 0.15483683347702026,
+      "learning_rate": 0.0001849444364266128,
+      "loss": 0.1828,
+      "step": 5221
+    },
+    {
+      "epoch": 0.3767812691655543,
+      "grad_norm": 0.12385743111371994,
+      "learning_rate": 0.00018494155000721606,
+      "loss": 0.1336,
+      "step": 5222
+    },
+    {
+      "epoch": 0.37685342184061477,
+      "grad_norm": 0.10738009959459305,
+      "learning_rate": 0.00018493866358781932,
+      "loss": 0.1541,
+      "step": 5223
+    },
+    {
+      "epoch": 0.37692557451567515,
+      "grad_norm": 0.12624196708202362,
+      "learning_rate": 0.00018493577716842258,
+      "loss": 0.1947,
+      "step": 5224
+    },
+    {
+      "epoch": 0.3769977271907356,
+      "grad_norm": 0.1286533772945404,
+      "learning_rate": 0.00018493289074902585,
+      "loss": 0.1794,
+      "step": 5225
+    },
+    {
+      "epoch": 0.37706987986579604,
+      "grad_norm": 0.11351227760314941,
+      "learning_rate": 0.0001849300043296291,
+      "loss": 0.0987,
+      "step": 5226
+    },
+    {
+      "epoch": 0.37714203254085643,
+      "grad_norm": 0.1511540710926056,
+      "learning_rate": 0.00018492711791023237,
+      "loss": 0.139,
+      "step": 5227
+    },
+    {
+      "epoch": 0.3772141852159169,
+      "grad_norm": 0.113191619515419,
+      "learning_rate": 0.0001849242314908356,
+      "loss": 0.177,
+      "step": 5228
+    },
+    {
+      "epoch": 0.3772863378909773,
+      "grad_norm": 0.12065392732620239,
+      "learning_rate": 0.00018492134507143887,
+      "loss": 0.156,
+      "step": 5229
+    },
+    {
+      "epoch": 0.37735849056603776,
+      "grad_norm": 0.09924370795488358,
+      "learning_rate": 0.00018491845865204216,
+      "loss": 0.1436,
+      "step": 5230
+    },
+    {
+      "epoch": 0.37743064324109815,
+      "grad_norm": 0.10744208842515945,
+      "learning_rate": 0.00018491557223264542,
+      "loss": 0.1498,
+      "step": 5231
+    },
+    {
+      "epoch": 0.3775027959161586,
+      "grad_norm": 0.11493775993585587,
+      "learning_rate": 0.0001849126858132487,
+      "loss": 0.1281,
+      "step": 5232
+    },
+    {
+      "epoch": 0.37757494859121904,
+      "grad_norm": 0.15129004418849945,
+      "learning_rate": 0.00018490979939385192,
+      "loss": 0.1879,
+      "step": 5233
+    },
+    {
+      "epoch": 0.37764710126627943,
+      "grad_norm": 0.14441713690757751,
+      "learning_rate": 0.00018490691297445519,
+      "loss": 0.2083,
+      "step": 5234
+    },
+    {
+      "epoch": 0.37771925394133987,
+      "grad_norm": 0.10310018062591553,
+      "learning_rate": 0.00018490402655505845,
+      "loss": 0.1561,
+      "step": 5235
+    },
+    {
+      "epoch": 0.3777914066164003,
+      "grad_norm": 0.1262882649898529,
+      "learning_rate": 0.0001849011401356617,
+      "loss": 0.1585,
+      "step": 5236
+    },
+    {
+      "epoch": 0.3778635592914607,
+      "grad_norm": 0.10732994228601456,
+      "learning_rate": 0.000184898253716265,
+      "loss": 0.1657,
+      "step": 5237
+    },
+    {
+      "epoch": 0.37793571196652115,
+      "grad_norm": 0.11948318034410477,
+      "learning_rate": 0.00018489536729686824,
+      "loss": 0.1835,
+      "step": 5238
+    },
+    {
+      "epoch": 0.3780078646415816,
+      "grad_norm": 0.1175948828458786,
+      "learning_rate": 0.0001848924808774715,
+      "loss": 0.1943,
+      "step": 5239
+    },
+    {
+      "epoch": 0.37808001731664204,
+      "grad_norm": 0.10899997502565384,
+      "learning_rate": 0.00018488959445807476,
+      "loss": 0.1541,
+      "step": 5240
+    },
+    {
+      "epoch": 0.3781521699917024,
+      "grad_norm": 0.0994245633482933,
+      "learning_rate": 0.00018488670803867803,
+      "loss": 0.1894,
+      "step": 5241
+    },
+    {
+      "epoch": 0.37822432266676287,
+      "grad_norm": 0.11324607580900192,
+      "learning_rate": 0.0001848838216192813,
+      "loss": 0.175,
+      "step": 5242
+    },
+    {
+      "epoch": 0.3782964753418233,
+      "grad_norm": 0.09962138533592224,
+      "learning_rate": 0.00018488093519988455,
+      "loss": 0.2107,
+      "step": 5243
+    },
+    {
+      "epoch": 0.3783686280168837,
+      "grad_norm": 0.11725051701068878,
+      "learning_rate": 0.00018487804878048782,
+      "loss": 0.1915,
+      "step": 5244
+    },
+    {
+      "epoch": 0.37844078069194415,
+      "grad_norm": 0.11407988518476486,
+      "learning_rate": 0.00018487516236109108,
+      "loss": 0.1548,
+      "step": 5245
+    },
+    {
+      "epoch": 0.3785129333670046,
+      "grad_norm": 0.09773348271846771,
+      "learning_rate": 0.00018487227594169434,
+      "loss": 0.184,
+      "step": 5246
+    },
+    {
+      "epoch": 0.37858508604206503,
+      "grad_norm": 0.11072361469268799,
+      "learning_rate": 0.0001848693895222976,
+      "loss": 0.16,
+      "step": 5247
+    },
+    {
+      "epoch": 0.3786572387171254,
+      "grad_norm": 0.12344031035900116,
+      "learning_rate": 0.00018486650310290087,
+      "loss": 0.2174,
+      "step": 5248
+    },
+    {
+      "epoch": 0.37872939139218587,
+      "grad_norm": 0.11325129121541977,
+      "learning_rate": 0.0001848636166835041,
+      "loss": 0.1661,
+      "step": 5249
+    },
+    {
+      "epoch": 0.3788015440672463,
+      "grad_norm": 0.10998645424842834,
+      "learning_rate": 0.00018486073026410737,
+      "loss": 0.1149,
+      "step": 5250
+    },
+    {
+      "epoch": 0.3788736967423067,
+      "grad_norm": 0.11722152680158615,
+      "learning_rate": 0.00018485784384471066,
+      "loss": 0.1137,
+      "step": 5251
+    },
+    {
+      "epoch": 0.37894584941736714,
+      "grad_norm": 0.09863753616809845,
+      "learning_rate": 0.00018485495742531392,
+      "loss": 0.181,
+      "step": 5252
+    },
+    {
+      "epoch": 0.3790180020924276,
+      "grad_norm": 0.12387730181217194,
+      "learning_rate": 0.00018485207100591718,
+      "loss": 0.1864,
+      "step": 5253
+    },
+    {
+      "epoch": 0.37909015476748803,
+      "grad_norm": 0.11490000039339066,
+      "learning_rate": 0.00018484918458652042,
+      "loss": 0.1487,
+      "step": 5254
+    },
+    {
+      "epoch": 0.3791623074425484,
+      "grad_norm": 0.14609166979789734,
+      "learning_rate": 0.00018484629816712368,
+      "loss": 0.2246,
+      "step": 5255
+    },
+    {
+      "epoch": 0.37923446011760886,
+      "grad_norm": 0.11078675091266632,
+      "learning_rate": 0.00018484341174772694,
+      "loss": 0.186,
+      "step": 5256
+    },
+    {
+      "epoch": 0.3793066127926693,
+      "grad_norm": 0.09130535274744034,
+      "learning_rate": 0.0001848405253283302,
+      "loss": 0.1847,
+      "step": 5257
+    },
+    {
+      "epoch": 0.3793787654677297,
+      "grad_norm": 0.12202174961566925,
+      "learning_rate": 0.0001848376389089335,
+      "loss": 0.171,
+      "step": 5258
+    },
+    {
+      "epoch": 0.37945091814279014,
+      "grad_norm": 0.09502533078193665,
+      "learning_rate": 0.00018483475248953673,
+      "loss": 0.116,
+      "step": 5259
+    },
+    {
+      "epoch": 0.3795230708178506,
+      "grad_norm": 0.09844629466533661,
+      "learning_rate": 0.00018483186607014,
+      "loss": 0.1335,
+      "step": 5260
+    },
+    {
+      "epoch": 0.379595223492911,
+      "grad_norm": 0.10482223331928253,
+      "learning_rate": 0.00018482897965074326,
+      "loss": 0.156,
+      "step": 5261
+    },
+    {
+      "epoch": 0.3796673761679714,
+      "grad_norm": 0.14621932804584503,
+      "learning_rate": 0.00018482609323134652,
+      "loss": 0.2034,
+      "step": 5262
+    },
+    {
+      "epoch": 0.37973952884303186,
+      "grad_norm": 0.11277309060096741,
+      "learning_rate": 0.00018482320681194978,
+      "loss": 0.1122,
+      "step": 5263
+    },
+    {
+      "epoch": 0.3798116815180923,
+      "grad_norm": 0.09830714017152786,
+      "learning_rate": 0.00018482032039255305,
+      "loss": 0.1383,
+      "step": 5264
+    },
+    {
+      "epoch": 0.3798838341931527,
+      "grad_norm": 0.12473218888044357,
+      "learning_rate": 0.0001848174339731563,
+      "loss": 0.1575,
+      "step": 5265
+    },
+    {
+      "epoch": 0.37995598686821314,
+      "grad_norm": 0.11305204033851624,
+      "learning_rate": 0.00018481454755375957,
+      "loss": 0.1695,
+      "step": 5266
+    },
+    {
+      "epoch": 0.3800281395432736,
+      "grad_norm": 0.13128140568733215,
+      "learning_rate": 0.00018481166113436284,
+      "loss": 0.1735,
+      "step": 5267
+    },
+    {
+      "epoch": 0.38010029221833397,
+      "grad_norm": 0.1158086508512497,
+      "learning_rate": 0.0001848087747149661,
+      "loss": 0.1545,
+      "step": 5268
+    },
+    {
+      "epoch": 0.3801724448933944,
+      "grad_norm": 0.11829610913991928,
+      "learning_rate": 0.00018480588829556936,
+      "loss": 0.168,
+      "step": 5269
+    },
+    {
+      "epoch": 0.38024459756845486,
+      "grad_norm": 0.09383835643529892,
+      "learning_rate": 0.0001848030018761726,
+      "loss": 0.1199,
+      "step": 5270
+    },
+    {
+      "epoch": 0.3803167502435153,
+      "grad_norm": 0.12397849559783936,
+      "learning_rate": 0.00018480011545677586,
+      "loss": 0.1468,
+      "step": 5271
+    },
+    {
+      "epoch": 0.3803889029185757,
+      "grad_norm": 0.11687537282705307,
+      "learning_rate": 0.00018479722903737915,
+      "loss": 0.1096,
+      "step": 5272
+    },
+    {
+      "epoch": 0.38046105559363613,
+      "grad_norm": 0.12818457186222076,
+      "learning_rate": 0.0001847943426179824,
+      "loss": 0.1712,
+      "step": 5273
+    },
+    {
+      "epoch": 0.3805332082686966,
+      "grad_norm": 0.10428598523139954,
+      "learning_rate": 0.00018479145619858568,
+      "loss": 0.1297,
+      "step": 5274
+    },
+    {
+      "epoch": 0.38060536094375697,
+      "grad_norm": 0.08568240702152252,
+      "learning_rate": 0.0001847885697791889,
+      "loss": 0.1753,
+      "step": 5275
+    },
+    {
+      "epoch": 0.3806775136188174,
+      "grad_norm": 0.10244014859199524,
+      "learning_rate": 0.00018478568335979217,
+      "loss": 0.1478,
+      "step": 5276
+    },
+    {
+      "epoch": 0.38074966629387785,
+      "grad_norm": 0.1192367896437645,
+      "learning_rate": 0.00018478279694039544,
+      "loss": 0.1416,
+      "step": 5277
+    },
+    {
+      "epoch": 0.3808218189689383,
+      "grad_norm": 0.1428164690732956,
+      "learning_rate": 0.0001847799105209987,
+      "loss": 0.1437,
+      "step": 5278
+    },
+    {
+      "epoch": 0.3808939716439987,
+      "grad_norm": 0.11545929312705994,
+      "learning_rate": 0.000184777024101602,
+      "loss": 0.1769,
+      "step": 5279
+    },
+    {
+      "epoch": 0.38096612431905913,
+      "grad_norm": 0.12664854526519775,
+      "learning_rate": 0.00018477413768220523,
+      "loss": 0.1852,
+      "step": 5280
+    },
+    {
+      "epoch": 0.3810382769941196,
+      "grad_norm": 0.09942354261875153,
+      "learning_rate": 0.0001847712512628085,
+      "loss": 0.1493,
+      "step": 5281
+    },
+    {
+      "epoch": 0.38111042966917996,
+      "grad_norm": 0.12140817940235138,
+      "learning_rate": 0.00018476836484341175,
+      "loss": 0.1723,
+      "step": 5282
+    },
+    {
+      "epoch": 0.3811825823442404,
+      "grad_norm": 0.11576629430055618,
+      "learning_rate": 0.00018476547842401501,
+      "loss": 0.1425,
+      "step": 5283
+    },
+    {
+      "epoch": 0.38125473501930085,
+      "grad_norm": 0.1314578354358673,
+      "learning_rate": 0.00018476259200461828,
+      "loss": 0.1543,
+      "step": 5284
+    },
+    {
+      "epoch": 0.3813268876943613,
+      "grad_norm": 0.1331385374069214,
+      "learning_rate": 0.00018475970558522154,
+      "loss": 0.1852,
+      "step": 5285
+    },
+    {
+      "epoch": 0.3813990403694217,
+      "grad_norm": 0.10998408496379852,
+      "learning_rate": 0.0001847568191658248,
+      "loss": 0.1247,
+      "step": 5286
+    },
+    {
+      "epoch": 0.3814711930444821,
+      "grad_norm": 0.12362408638000488,
+      "learning_rate": 0.00018475393274642807,
+      "loss": 0.184,
+      "step": 5287
+    },
+    {
+      "epoch": 0.38154334571954257,
+      "grad_norm": 0.09371355921030045,
+      "learning_rate": 0.00018475104632703133,
+      "loss": 0.1363,
+      "step": 5288
+    },
+    {
+      "epoch": 0.38161549839460296,
+      "grad_norm": 0.12054570764303207,
+      "learning_rate": 0.0001847481599076346,
+      "loss": 0.1973,
+      "step": 5289
+    },
+    {
+      "epoch": 0.3816876510696634,
+      "grad_norm": 0.11465035378932953,
+      "learning_rate": 0.00018474527348823786,
+      "loss": 0.1481,
+      "step": 5290
+    },
+    {
+      "epoch": 0.38175980374472385,
+      "grad_norm": 0.11174909770488739,
+      "learning_rate": 0.00018474238706884112,
+      "loss": 0.1507,
+      "step": 5291
+    },
+    {
+      "epoch": 0.3818319564197843,
+      "grad_norm": 0.10161197930574417,
+      "learning_rate": 0.00018473950064944435,
+      "loss": 0.1777,
+      "step": 5292
+    },
+    {
+      "epoch": 0.3819041090948447,
+      "grad_norm": 0.11192326992750168,
+      "learning_rate": 0.00018473661423004764,
+      "loss": 0.1521,
+      "step": 5293
+    },
+    {
+      "epoch": 0.3819762617699051,
+      "grad_norm": 0.11598385125398636,
+      "learning_rate": 0.0001847337278106509,
+      "loss": 0.1865,
+      "step": 5294
+    },
+    {
+      "epoch": 0.38204841444496557,
+      "grad_norm": 0.1161946952342987,
+      "learning_rate": 0.00018473084139125417,
+      "loss": 0.1116,
+      "step": 5295
+    },
+    {
+      "epoch": 0.38212056712002596,
+      "grad_norm": 0.11318299919366837,
+      "learning_rate": 0.00018472795497185743,
+      "loss": 0.1465,
+      "step": 5296
+    },
+    {
+      "epoch": 0.3821927197950864,
+      "grad_norm": 0.12316545844078064,
+      "learning_rate": 0.00018472506855246067,
+      "loss": 0.1831,
+      "step": 5297
+    },
+    {
+      "epoch": 0.38226487247014684,
+      "grad_norm": 0.1300279051065445,
+      "learning_rate": 0.00018472218213306393,
+      "loss": 0.1342,
+      "step": 5298
+    },
+    {
+      "epoch": 0.38233702514520723,
+      "grad_norm": 0.11169512569904327,
+      "learning_rate": 0.0001847192957136672,
+      "loss": 0.1615,
+      "step": 5299
+    },
+    {
+      "epoch": 0.3824091778202677,
+      "grad_norm": 0.1198602169752121,
+      "learning_rate": 0.00018471640929427048,
+      "loss": 0.1483,
+      "step": 5300
+    },
+    {
+      "epoch": 0.3824813304953281,
+      "grad_norm": 0.12193506211042404,
+      "learning_rate": 0.00018471352287487375,
+      "loss": 0.1258,
+      "step": 5301
+    },
+    {
+      "epoch": 0.38255348317038856,
+      "grad_norm": 0.1365336775779724,
+      "learning_rate": 0.00018471063645547698,
+      "loss": 0.178,
+      "step": 5302
+    },
+    {
+      "epoch": 0.38262563584544895,
+      "grad_norm": 0.13387756049633026,
+      "learning_rate": 0.00018470775003608025,
+      "loss": 0.1403,
+      "step": 5303
+    },
+    {
+      "epoch": 0.3826977885205094,
+      "grad_norm": 0.1223507970571518,
+      "learning_rate": 0.0001847048636166835,
+      "loss": 0.1892,
+      "step": 5304
+    },
+    {
+      "epoch": 0.38276994119556984,
+      "grad_norm": 0.11407187581062317,
+      "learning_rate": 0.00018470197719728677,
+      "loss": 0.1332,
+      "step": 5305
+    },
+    {
+      "epoch": 0.38284209387063023,
+      "grad_norm": 0.10814730077981949,
+      "learning_rate": 0.00018469909077789003,
+      "loss": 0.1567,
+      "step": 5306
+    },
+    {
+      "epoch": 0.3829142465456907,
+      "grad_norm": 0.13604643940925598,
+      "learning_rate": 0.0001846962043584933,
+      "loss": 0.1558,
+      "step": 5307
+    },
+    {
+      "epoch": 0.3829863992207511,
+      "grad_norm": 0.12819768488407135,
+      "learning_rate": 0.00018469331793909656,
+      "loss": 0.1514,
+      "step": 5308
+    },
+    {
+      "epoch": 0.38305855189581156,
+      "grad_norm": 0.1057751253247261,
+      "learning_rate": 0.00018469043151969982,
+      "loss": 0.1247,
+      "step": 5309
+    },
+    {
+      "epoch": 0.38313070457087195,
+      "grad_norm": 0.10188581049442291,
+      "learning_rate": 0.0001846875451003031,
+      "loss": 0.2009,
+      "step": 5310
+    },
+    {
+      "epoch": 0.3832028572459324,
+      "grad_norm": 0.10390143096446991,
+      "learning_rate": 0.00018468465868090635,
+      "loss": 0.1475,
+      "step": 5311
+    },
+    {
+      "epoch": 0.38327500992099284,
+      "grad_norm": 0.10660845041275024,
+      "learning_rate": 0.0001846817722615096,
+      "loss": 0.1695,
+      "step": 5312
+    },
+    {
+      "epoch": 0.3833471625960532,
+      "grad_norm": 0.12353482842445374,
+      "learning_rate": 0.00018467888584211285,
+      "loss": 0.1797,
+      "step": 5313
+    },
+    {
+      "epoch": 0.38341931527111367,
+      "grad_norm": 0.12088686227798462,
+      "learning_rate": 0.00018467599942271614,
+      "loss": 0.196,
+      "step": 5314
+    },
+    {
+      "epoch": 0.3834914679461741,
+      "grad_norm": 0.09210559725761414,
+      "learning_rate": 0.0001846731130033194,
+      "loss": 0.1526,
+      "step": 5315
+    },
+    {
+      "epoch": 0.38356362062123456,
+      "grad_norm": 0.1001611053943634,
+      "learning_rate": 0.00018467022658392266,
+      "loss": 0.1851,
+      "step": 5316
+    },
+    {
+      "epoch": 0.38363577329629495,
+      "grad_norm": 0.10381637513637543,
+      "learning_rate": 0.00018466734016452593,
+      "loss": 0.13,
+      "step": 5317
+    },
+    {
+      "epoch": 0.3837079259713554,
+      "grad_norm": 0.13279971480369568,
+      "learning_rate": 0.00018466445374512916,
+      "loss": 0.1296,
+      "step": 5318
+    },
+    {
+      "epoch": 0.38378007864641583,
+      "grad_norm": 0.13534903526306152,
+      "learning_rate": 0.00018466156732573243,
+      "loss": 0.1512,
+      "step": 5319
+    },
+    {
+      "epoch": 0.3838522313214762,
+      "grad_norm": 0.11557264626026154,
+      "learning_rate": 0.0001846586809063357,
+      "loss": 0.161,
+      "step": 5320
+    },
+    {
+      "epoch": 0.38392438399653667,
+      "grad_norm": 0.10474171489477158,
+      "learning_rate": 0.00018465579448693898,
+      "loss": 0.1979,
+      "step": 5321
+    },
+    {
+      "epoch": 0.3839965366715971,
+      "grad_norm": 0.13840435445308685,
+      "learning_rate": 0.00018465290806754224,
+      "loss": 0.1288,
+      "step": 5322
+    },
+    {
+      "epoch": 0.38406868934665755,
+      "grad_norm": 0.12595689296722412,
+      "learning_rate": 0.00018465002164814548,
+      "loss": 0.1164,
+      "step": 5323
+    },
+    {
+      "epoch": 0.38414084202171794,
+      "grad_norm": 0.10718733072280884,
+      "learning_rate": 0.00018464713522874874,
+      "loss": 0.1421,
+      "step": 5324
+    },
+    {
+      "epoch": 0.3842129946967784,
+      "grad_norm": 0.12469929456710815,
+      "learning_rate": 0.000184644248809352,
+      "loss": 0.2049,
+      "step": 5325
+    },
+    {
+      "epoch": 0.38428514737183883,
+      "grad_norm": 0.10900650918483734,
+      "learning_rate": 0.00018464136238995527,
+      "loss": 0.1824,
+      "step": 5326
+    },
+    {
+      "epoch": 0.3843573000468992,
+      "grad_norm": 0.12053513526916504,
+      "learning_rate": 0.00018463847597055853,
+      "loss": 0.1582,
+      "step": 5327
+    },
+    {
+      "epoch": 0.38442945272195966,
+      "grad_norm": 0.14565838873386383,
+      "learning_rate": 0.0001846355895511618,
+      "loss": 0.1341,
+      "step": 5328
+    },
+    {
+      "epoch": 0.3845016053970201,
+      "grad_norm": 0.11942523717880249,
+      "learning_rate": 0.00018463270313176506,
+      "loss": 0.1358,
+      "step": 5329
+    },
+    {
+      "epoch": 0.3845737580720805,
+      "grad_norm": 0.16042739152908325,
+      "learning_rate": 0.00018462981671236832,
+      "loss": 0.1976,
+      "step": 5330
+    },
+    {
+      "epoch": 0.38464591074714094,
+      "grad_norm": 0.13497167825698853,
+      "learning_rate": 0.00018462693029297158,
+      "loss": 0.1564,
+      "step": 5331
+    },
+    {
+      "epoch": 0.3847180634222014,
+      "grad_norm": 0.10613252967596054,
+      "learning_rate": 0.00018462404387357484,
+      "loss": 0.1579,
+      "step": 5332
+    },
+    {
+      "epoch": 0.38479021609726183,
+      "grad_norm": 0.11304750293493271,
+      "learning_rate": 0.0001846211574541781,
+      "loss": 0.1715,
+      "step": 5333
+    },
+    {
+      "epoch": 0.3848623687723222,
+      "grad_norm": 0.12445401400327682,
+      "learning_rate": 0.00018461827103478134,
+      "loss": 0.1411,
+      "step": 5334
+    },
+    {
+      "epoch": 0.38493452144738266,
+      "grad_norm": 0.11099398136138916,
+      "learning_rate": 0.00018461538461538463,
+      "loss": 0.1169,
+      "step": 5335
+    },
+    {
+      "epoch": 0.3850066741224431,
+      "grad_norm": 0.10735882818698883,
+      "learning_rate": 0.0001846124981959879,
+      "loss": 0.132,
+      "step": 5336
+    },
+    {
+      "epoch": 0.3850788267975035,
+      "grad_norm": 0.13184022903442383,
+      "learning_rate": 0.00018460961177659116,
+      "loss": 0.1494,
+      "step": 5337
+    },
+    {
+      "epoch": 0.38515097947256394,
+      "grad_norm": 0.10633637756109238,
+      "learning_rate": 0.00018460672535719442,
+      "loss": 0.1883,
+      "step": 5338
+    },
+    {
+      "epoch": 0.3852231321476244,
+      "grad_norm": 0.10791455209255219,
+      "learning_rate": 0.00018460383893779766,
+      "loss": 0.1568,
+      "step": 5339
+    },
+    {
+      "epoch": 0.3852952848226848,
+      "grad_norm": 0.11143725365400314,
+      "learning_rate": 0.00018460095251840092,
+      "loss": 0.1623,
+      "step": 5340
+    },
+    {
+      "epoch": 0.3853674374977452,
+      "grad_norm": 0.11982785165309906,
+      "learning_rate": 0.00018459806609900418,
+      "loss": 0.1127,
+      "step": 5341
+    },
+    {
+      "epoch": 0.38543959017280566,
+      "grad_norm": 0.0953775942325592,
+      "learning_rate": 0.00018459517967960747,
+      "loss": 0.1433,
+      "step": 5342
+    },
+    {
+      "epoch": 0.3855117428478661,
+      "grad_norm": 0.09541591256856918,
+      "learning_rate": 0.00018459229326021074,
+      "loss": 0.1648,
+      "step": 5343
+    },
+    {
+      "epoch": 0.3855838955229265,
+      "grad_norm": 0.14726461470127106,
+      "learning_rate": 0.00018458940684081397,
+      "loss": 0.1893,
+      "step": 5344
+    },
+    {
+      "epoch": 0.38565604819798693,
+      "grad_norm": 0.1327129751443863,
+      "learning_rate": 0.00018458652042141723,
+      "loss": 0.1897,
+      "step": 5345
+    },
+    {
+      "epoch": 0.3857282008730474,
+      "grad_norm": 0.1486828774213791,
+      "learning_rate": 0.0001845836340020205,
+      "loss": 0.1717,
+      "step": 5346
+    },
+    {
+      "epoch": 0.3858003535481078,
+      "grad_norm": 0.1061648428440094,
+      "learning_rate": 0.00018458074758262376,
+      "loss": 0.0938,
+      "step": 5347
+    },
+    {
+      "epoch": 0.3858725062231682,
+      "grad_norm": 0.1217179223895073,
+      "learning_rate": 0.00018457786116322702,
+      "loss": 0.173,
+      "step": 5348
+    },
+    {
+      "epoch": 0.38594465889822865,
+      "grad_norm": 0.13469278812408447,
+      "learning_rate": 0.00018457497474383029,
+      "loss": 0.1417,
+      "step": 5349
+    },
+    {
+      "epoch": 0.3860168115732891,
+      "grad_norm": 0.10769320279359818,
+      "learning_rate": 0.00018457208832443355,
+      "loss": 0.1652,
+      "step": 5350
+    },
+    {
+      "epoch": 0.3860889642483495,
+      "grad_norm": 0.10926205664873123,
+      "learning_rate": 0.0001845692019050368,
+      "loss": 0.1439,
+      "step": 5351
+    },
+    {
+      "epoch": 0.38616111692340993,
+      "grad_norm": 0.09097305685281754,
+      "learning_rate": 0.00018456631548564008,
+      "loss": 0.1237,
+      "step": 5352
+    },
+    {
+      "epoch": 0.3862332695984704,
+      "grad_norm": 0.16229401528835297,
+      "learning_rate": 0.00018456342906624334,
+      "loss": 0.1723,
+      "step": 5353
+    },
+    {
+      "epoch": 0.3863054222735308,
+      "grad_norm": 0.10649501532316208,
+      "learning_rate": 0.0001845605426468466,
+      "loss": 0.155,
+      "step": 5354
+    },
+    {
+      "epoch": 0.3863775749485912,
+      "grad_norm": 0.10674185305833817,
+      "learning_rate": 0.00018455765622744984,
+      "loss": 0.1761,
+      "step": 5355
+    },
+    {
+      "epoch": 0.38644972762365165,
+      "grad_norm": 0.13681352138519287,
+      "learning_rate": 0.00018455476980805313,
+      "loss": 0.1533,
+      "step": 5356
+    },
+    {
+      "epoch": 0.3865218802987121,
+      "grad_norm": 0.1047702357172966,
+      "learning_rate": 0.0001845518833886564,
+      "loss": 0.1726,
+      "step": 5357
+    },
+    {
+      "epoch": 0.3865940329737725,
+      "grad_norm": 0.09624727070331573,
+      "learning_rate": 0.00018454899696925965,
+      "loss": 0.1286,
+      "step": 5358
+    },
+    {
+      "epoch": 0.3866661856488329,
+      "grad_norm": 0.10974021255970001,
+      "learning_rate": 0.00018454611054986292,
+      "loss": 0.1269,
+      "step": 5359
+    },
+    {
+      "epoch": 0.38673833832389337,
+      "grad_norm": 0.1494600921869278,
+      "learning_rate": 0.00018454322413046615,
+      "loss": 0.1172,
+      "step": 5360
+    },
+    {
+      "epoch": 0.38681049099895376,
+      "grad_norm": 0.11654991656541824,
+      "learning_rate": 0.00018454033771106941,
+      "loss": 0.1575,
+      "step": 5361
+    },
+    {
+      "epoch": 0.3868826436740142,
+      "grad_norm": 0.13498321175575256,
+      "learning_rate": 0.00018453745129167268,
+      "loss": 0.1384,
+      "step": 5362
+    },
+    {
+      "epoch": 0.38695479634907465,
+      "grad_norm": 0.15553444623947144,
+      "learning_rate": 0.00018453456487227597,
+      "loss": 0.1509,
+      "step": 5363
+    },
+    {
+      "epoch": 0.3870269490241351,
+      "grad_norm": 0.13026754558086395,
+      "learning_rate": 0.00018453167845287923,
+      "loss": 0.1682,
+      "step": 5364
+    },
+    {
+      "epoch": 0.3870991016991955,
+      "grad_norm": 0.1101813018321991,
+      "learning_rate": 0.00018452879203348247,
+      "loss": 0.1691,
+      "step": 5365
+    },
+    {
+      "epoch": 0.3871712543742559,
+      "grad_norm": 0.1375681310892105,
+      "learning_rate": 0.00018452590561408573,
+      "loss": 0.1199,
+      "step": 5366
+    },
+    {
+      "epoch": 0.38724340704931637,
+      "grad_norm": 0.1228758916258812,
+      "learning_rate": 0.000184523019194689,
+      "loss": 0.1025,
+      "step": 5367
+    },
+    {
+      "epoch": 0.38731555972437676,
+      "grad_norm": 0.10448736697435379,
+      "learning_rate": 0.00018452013277529225,
+      "loss": 0.1622,
+      "step": 5368
+    },
+    {
+      "epoch": 0.3873877123994372,
+      "grad_norm": 0.11223381757736206,
+      "learning_rate": 0.00018451724635589552,
+      "loss": 0.1847,
+      "step": 5369
+    },
+    {
+      "epoch": 0.38745986507449764,
+      "grad_norm": 0.13099254667758942,
+      "learning_rate": 0.00018451435993649878,
+      "loss": 0.1464,
+      "step": 5370
+    },
+    {
+      "epoch": 0.3875320177495581,
+      "grad_norm": 0.13704586029052734,
+      "learning_rate": 0.00018451147351710204,
+      "loss": 0.0955,
+      "step": 5371
+    },
+    {
+      "epoch": 0.3876041704246185,
+      "grad_norm": 0.12381067126989365,
+      "learning_rate": 0.0001845085870977053,
+      "loss": 0.1466,
+      "step": 5372
+    },
+    {
+      "epoch": 0.3876763230996789,
+      "grad_norm": 0.1215813010931015,
+      "learning_rate": 0.00018450570067830857,
+      "loss": 0.1393,
+      "step": 5373
+    },
+    {
+      "epoch": 0.38774847577473937,
+      "grad_norm": 0.1295686811208725,
+      "learning_rate": 0.00018450281425891183,
+      "loss": 0.1387,
+      "step": 5374
+    },
+    {
+      "epoch": 0.38782062844979975,
+      "grad_norm": 0.1214752271771431,
+      "learning_rate": 0.0001844999278395151,
+      "loss": 0.1621,
+      "step": 5375
+    },
+    {
+      "epoch": 0.3878927811248602,
+      "grad_norm": 0.1196463406085968,
+      "learning_rate": 0.00018449704142011833,
+      "loss": 0.1321,
+      "step": 5376
+    },
+    {
+      "epoch": 0.38796493379992064,
+      "grad_norm": 0.1484265923500061,
+      "learning_rate": 0.00018449415500072162,
+      "loss": 0.1793,
+      "step": 5377
+    },
+    {
+      "epoch": 0.3880370864749811,
+      "grad_norm": 0.12468485534191132,
+      "learning_rate": 0.00018449126858132488,
+      "loss": 0.157,
+      "step": 5378
+    },
+    {
+      "epoch": 0.3881092391500415,
+      "grad_norm": 0.0925653800368309,
+      "learning_rate": 0.00018448838216192815,
+      "loss": 0.1711,
+      "step": 5379
+    },
+    {
+      "epoch": 0.3881813918251019,
+      "grad_norm": 0.11857761442661285,
+      "learning_rate": 0.0001844854957425314,
+      "loss": 0.1254,
+      "step": 5380
+    },
+    {
+      "epoch": 0.38825354450016236,
+      "grad_norm": 0.11172490566968918,
+      "learning_rate": 0.00018448260932313465,
+      "loss": 0.1884,
+      "step": 5381
+    },
+    {
+      "epoch": 0.38832569717522275,
+      "grad_norm": 0.14004208147525787,
+      "learning_rate": 0.0001844797229037379,
+      "loss": 0.1159,
+      "step": 5382
+    },
+    {
+      "epoch": 0.3883978498502832,
+      "grad_norm": 0.11043383181095123,
+      "learning_rate": 0.00018447683648434117,
+      "loss": 0.1471,
+      "step": 5383
+    },
+    {
+      "epoch": 0.38847000252534364,
+      "grad_norm": 0.11880587786436081,
+      "learning_rate": 0.00018447395006494446,
+      "loss": 0.1561,
+      "step": 5384
+    },
+    {
+      "epoch": 0.3885421552004041,
+      "grad_norm": 0.1249169260263443,
+      "learning_rate": 0.00018447106364554772,
+      "loss": 0.1261,
+      "step": 5385
+    },
+    {
+      "epoch": 0.38861430787546447,
+      "grad_norm": 0.1326235830783844,
+      "learning_rate": 0.00018446817722615096,
+      "loss": 0.1132,
+      "step": 5386
+    },
+    {
+      "epoch": 0.3886864605505249,
+      "grad_norm": 0.1248789057135582,
+      "learning_rate": 0.00018446529080675422,
+      "loss": 0.1652,
+      "step": 5387
+    },
+    {
+      "epoch": 0.38875861322558536,
+      "grad_norm": 0.11591673642396927,
+      "learning_rate": 0.00018446240438735749,
+      "loss": 0.1938,
+      "step": 5388
+    },
+    {
+      "epoch": 0.38883076590064575,
+      "grad_norm": 0.11416880041360855,
+      "learning_rate": 0.00018445951796796075,
+      "loss": 0.1469,
+      "step": 5389
+    },
+    {
+      "epoch": 0.3889029185757062,
+      "grad_norm": 0.10494326800107956,
+      "learning_rate": 0.000184456631548564,
+      "loss": 0.1151,
+      "step": 5390
+    },
+    {
+      "epoch": 0.38897507125076664,
+      "grad_norm": 0.11870312690734863,
+      "learning_rate": 0.00018445374512916727,
+      "loss": 0.1586,
+      "step": 5391
+    },
+    {
+      "epoch": 0.389047223925827,
+      "grad_norm": 0.11810563504695892,
+      "learning_rate": 0.00018445085870977054,
+      "loss": 0.1868,
+      "step": 5392
+    },
+    {
+      "epoch": 0.38911937660088747,
+      "grad_norm": 0.10838472098112106,
+      "learning_rate": 0.0001844479722903738,
+      "loss": 0.141,
+      "step": 5393
+    },
+    {
+      "epoch": 0.3891915292759479,
+      "grad_norm": 0.1309295892715454,
+      "learning_rate": 0.00018444508587097706,
+      "loss": 0.1446,
+      "step": 5394
+    },
+    {
+      "epoch": 0.38926368195100836,
+      "grad_norm": 0.13636811077594757,
+      "learning_rate": 0.00018444219945158033,
+      "loss": 0.1854,
+      "step": 5395
+    },
+    {
+      "epoch": 0.38933583462606874,
+      "grad_norm": 0.11778973042964935,
+      "learning_rate": 0.0001844393130321836,
+      "loss": 0.1048,
+      "step": 5396
+    },
+    {
+      "epoch": 0.3894079873011292,
+      "grad_norm": 0.11265967786312103,
+      "learning_rate": 0.00018443642661278685,
+      "loss": 0.1634,
+      "step": 5397
+    },
+    {
+      "epoch": 0.38948013997618963,
+      "grad_norm": 0.12080727517604828,
+      "learning_rate": 0.0001844335401933901,
+      "loss": 0.1749,
+      "step": 5398
+    },
+    {
+      "epoch": 0.38955229265125,
+      "grad_norm": 0.10691976547241211,
+      "learning_rate": 0.00018443065377399338,
+      "loss": 0.1443,
+      "step": 5399
+    },
+    {
+      "epoch": 0.38962444532631046,
+      "grad_norm": 0.11768995970487595,
+      "learning_rate": 0.00018442776735459664,
+      "loss": 0.1771,
+      "step": 5400
+    },
+    {
+      "epoch": 0.3896965980013709,
+      "grad_norm": 0.12330100685358047,
+      "learning_rate": 0.0001844248809351999,
+      "loss": 0.1728,
+      "step": 5401
+    },
+    {
+      "epoch": 0.38976875067643135,
+      "grad_norm": 0.10355494916439056,
+      "learning_rate": 0.00018442199451580317,
+      "loss": 0.1304,
+      "step": 5402
+    },
+    {
+      "epoch": 0.38984090335149174,
+      "grad_norm": 0.11235153675079346,
+      "learning_rate": 0.0001844191080964064,
+      "loss": 0.2203,
+      "step": 5403
+    },
+    {
+      "epoch": 0.3899130560265522,
+      "grad_norm": 0.11451764404773712,
+      "learning_rate": 0.00018441622167700967,
+      "loss": 0.162,
+      "step": 5404
+    },
+    {
+      "epoch": 0.38998520870161263,
+      "grad_norm": 0.11043937504291534,
+      "learning_rate": 0.00018441333525761293,
+      "loss": 0.1765,
+      "step": 5405
+    },
+    {
+      "epoch": 0.390057361376673,
+      "grad_norm": 0.10878059267997742,
+      "learning_rate": 0.00018441044883821622,
+      "loss": 0.186,
+      "step": 5406
+    },
+    {
+      "epoch": 0.39012951405173346,
+      "grad_norm": 0.10789388418197632,
+      "learning_rate": 0.00018440756241881948,
+      "loss": 0.1377,
+      "step": 5407
+    },
+    {
+      "epoch": 0.3902016667267939,
+      "grad_norm": 0.10264996439218521,
+      "learning_rate": 0.00018440467599942272,
+      "loss": 0.1387,
+      "step": 5408
+    },
+    {
+      "epoch": 0.39027381940185435,
+      "grad_norm": 0.0962095707654953,
+      "learning_rate": 0.00018440178958002598,
+      "loss": 0.1335,
+      "step": 5409
+    },
+    {
+      "epoch": 0.39034597207691474,
+      "grad_norm": 0.16185228526592255,
+      "learning_rate": 0.00018439890316062924,
+      "loss": 0.1424,
+      "step": 5410
+    },
+    {
+      "epoch": 0.3904181247519752,
+      "grad_norm": 0.10481355339288712,
+      "learning_rate": 0.0001843960167412325,
+      "loss": 0.1497,
+      "step": 5411
+    },
+    {
+      "epoch": 0.3904902774270356,
+      "grad_norm": 0.10153397917747498,
+      "learning_rate": 0.00018439313032183577,
+      "loss": 0.1463,
+      "step": 5412
+    },
+    {
+      "epoch": 0.390562430102096,
+      "grad_norm": 0.09573136270046234,
+      "learning_rate": 0.00018439024390243903,
+      "loss": 0.1296,
+      "step": 5413
+    },
+    {
+      "epoch": 0.39063458277715646,
+      "grad_norm": 0.09889469295740128,
+      "learning_rate": 0.0001843873574830423,
+      "loss": 0.1502,
+      "step": 5414
+    },
+    {
+      "epoch": 0.3907067354522169,
+      "grad_norm": 0.10378115624189377,
+      "learning_rate": 0.00018438447106364556,
+      "loss": 0.181,
+      "step": 5415
+    },
+    {
+      "epoch": 0.39077888812727735,
+      "grad_norm": 0.12253466248512268,
+      "learning_rate": 0.00018438158464424882,
+      "loss": 0.1107,
+      "step": 5416
+    },
+    {
+      "epoch": 0.39085104080233773,
+      "grad_norm": 0.10994445532560349,
+      "learning_rate": 0.00018437869822485208,
+      "loss": 0.1217,
+      "step": 5417
+    },
+    {
+      "epoch": 0.3909231934773982,
+      "grad_norm": 0.09956198185682297,
+      "learning_rate": 0.00018437581180545535,
+      "loss": 0.1925,
+      "step": 5418
+    },
+    {
+      "epoch": 0.3909953461524586,
+      "grad_norm": 0.10294509679079056,
+      "learning_rate": 0.00018437292538605858,
+      "loss": 0.1733,
+      "step": 5419
+    },
+    {
+      "epoch": 0.391067498827519,
+      "grad_norm": 0.1036074310541153,
+      "learning_rate": 0.00018437003896666187,
+      "loss": 0.1161,
+      "step": 5420
+    },
+    {
+      "epoch": 0.39113965150257946,
+      "grad_norm": 0.14222463965415955,
+      "learning_rate": 0.00018436715254726514,
+      "loss": 0.1602,
+      "step": 5421
+    },
+    {
+      "epoch": 0.3912118041776399,
+      "grad_norm": 0.13478884100914001,
+      "learning_rate": 0.0001843642661278684,
+      "loss": 0.1516,
+      "step": 5422
+    },
+    {
+      "epoch": 0.3912839568527003,
+      "grad_norm": 0.13049808144569397,
+      "learning_rate": 0.00018436137970847166,
+      "loss": 0.1812,
+      "step": 5423
+    },
+    {
+      "epoch": 0.39135610952776073,
+      "grad_norm": 0.13071030378341675,
+      "learning_rate": 0.0001843584932890749,
+      "loss": 0.1483,
+      "step": 5424
+    },
+    {
+      "epoch": 0.3914282622028212,
+      "grad_norm": 0.10672631114721298,
+      "learning_rate": 0.00018435560686967816,
+      "loss": 0.1074,
+      "step": 5425
+    },
+    {
+      "epoch": 0.3915004148778816,
+      "grad_norm": 0.11152239143848419,
+      "learning_rate": 0.00018435272045028142,
+      "loss": 0.1203,
+      "step": 5426
+    },
+    {
+      "epoch": 0.391572567552942,
+      "grad_norm": 0.10995141416788101,
+      "learning_rate": 0.0001843498340308847,
+      "loss": 0.1235,
+      "step": 5427
+    },
+    {
+      "epoch": 0.39164472022800245,
+      "grad_norm": 0.14090102910995483,
+      "learning_rate": 0.00018434694761148798,
+      "loss": 0.1559,
+      "step": 5428
+    },
+    {
+      "epoch": 0.3917168729030629,
+      "grad_norm": 0.10918077081441879,
+      "learning_rate": 0.0001843440611920912,
+      "loss": 0.1567,
+      "step": 5429
+    },
+    {
+      "epoch": 0.3917890255781233,
+      "grad_norm": 0.12291335314512253,
+      "learning_rate": 0.00018434117477269447,
+      "loss": 0.1851,
+      "step": 5430
+    },
+    {
+      "epoch": 0.39186117825318373,
+      "grad_norm": 0.10027393698692322,
+      "learning_rate": 0.00018433828835329774,
+      "loss": 0.1488,
+      "step": 5431
+    },
+    {
+      "epoch": 0.3919333309282442,
+      "grad_norm": 0.1360558718442917,
+      "learning_rate": 0.000184335401933901,
+      "loss": 0.1473,
+      "step": 5432
+    },
+    {
+      "epoch": 0.3920054836033046,
+      "grad_norm": 0.1145196408033371,
+      "learning_rate": 0.00018433251551450426,
+      "loss": 0.1662,
+      "step": 5433
+    },
+    {
+      "epoch": 0.392077636278365,
+      "grad_norm": 0.126168355345726,
+      "learning_rate": 0.00018432962909510753,
+      "loss": 0.1747,
+      "step": 5434
+    },
+    {
+      "epoch": 0.39214978895342545,
+      "grad_norm": 0.13097237050533295,
+      "learning_rate": 0.0001843267426757108,
+      "loss": 0.1333,
+      "step": 5435
+    },
+    {
+      "epoch": 0.3922219416284859,
+      "grad_norm": 0.11811990290880203,
+      "learning_rate": 0.00018432385625631405,
+      "loss": 0.2224,
+      "step": 5436
+    },
+    {
+      "epoch": 0.3922940943035463,
+      "grad_norm": 0.1210411936044693,
+      "learning_rate": 0.00018432096983691732,
+      "loss": 0.1718,
+      "step": 5437
+    },
+    {
+      "epoch": 0.3923662469786067,
+      "grad_norm": 0.12498074769973755,
+      "learning_rate": 0.00018431808341752058,
+      "loss": 0.1645,
+      "step": 5438
+    },
+    {
+      "epoch": 0.39243839965366717,
+      "grad_norm": 0.111318439245224,
+      "learning_rate": 0.00018431519699812384,
+      "loss": 0.1946,
+      "step": 5439
+    },
+    {
+      "epoch": 0.3925105523287276,
+      "grad_norm": 0.1372980922460556,
+      "learning_rate": 0.00018431231057872708,
+      "loss": 0.1617,
+      "step": 5440
+    },
+    {
+      "epoch": 0.392582705003788,
+      "grad_norm": 0.1149701476097107,
+      "learning_rate": 0.00018430942415933037,
+      "loss": 0.1452,
+      "step": 5441
+    },
+    {
+      "epoch": 0.39265485767884845,
+      "grad_norm": 0.12510797381401062,
+      "learning_rate": 0.00018430653773993363,
+      "loss": 0.124,
+      "step": 5442
+    },
+    {
+      "epoch": 0.3927270103539089,
+      "grad_norm": 0.11453134566545486,
+      "learning_rate": 0.0001843036513205369,
+      "loss": 0.1616,
+      "step": 5443
+    },
+    {
+      "epoch": 0.3927991630289693,
+      "grad_norm": 0.11869612336158752,
+      "learning_rate": 0.00018430076490114016,
+      "loss": 0.1863,
+      "step": 5444
+    },
+    {
+      "epoch": 0.3928713157040297,
+      "grad_norm": 0.13181565701961517,
+      "learning_rate": 0.0001842978784817434,
+      "loss": 0.1824,
+      "step": 5445
+    },
+    {
+      "epoch": 0.39294346837909017,
+      "grad_norm": 0.11108796298503876,
+      "learning_rate": 0.00018429499206234665,
+      "loss": 0.1569,
+      "step": 5446
+    },
+    {
+      "epoch": 0.3930156210541506,
+      "grad_norm": 0.11389564722776413,
+      "learning_rate": 0.00018429210564294992,
+      "loss": 0.1683,
+      "step": 5447
+    },
+    {
+      "epoch": 0.393087773729211,
+      "grad_norm": 0.10978323966264725,
+      "learning_rate": 0.0001842892192235532,
+      "loss": 0.1169,
+      "step": 5448
+    },
+    {
+      "epoch": 0.39315992640427144,
+      "grad_norm": 0.1294555515050888,
+      "learning_rate": 0.00018428633280415647,
+      "loss": 0.1374,
+      "step": 5449
+    },
+    {
+      "epoch": 0.3932320790793319,
+      "grad_norm": 0.12295661866664886,
+      "learning_rate": 0.0001842834463847597,
+      "loss": 0.1673,
+      "step": 5450
+    },
+    {
+      "epoch": 0.3933042317543923,
+      "grad_norm": 0.10065015405416489,
+      "learning_rate": 0.00018428055996536297,
+      "loss": 0.158,
+      "step": 5451
+    },
+    {
+      "epoch": 0.3933763844294527,
+      "grad_norm": 0.10121423006057739,
+      "learning_rate": 0.00018427767354596623,
+      "loss": 0.1636,
+      "step": 5452
+    },
+    {
+      "epoch": 0.39344853710451316,
+      "grad_norm": 0.09941934794187546,
+      "learning_rate": 0.0001842747871265695,
+      "loss": 0.1774,
+      "step": 5453
+    },
+    {
+      "epoch": 0.39352068977957355,
+      "grad_norm": 0.17981722950935364,
+      "learning_rate": 0.00018427190070717276,
+      "loss": 0.1931,
+      "step": 5454
+    },
+    {
+      "epoch": 0.393592842454634,
+      "grad_norm": 0.13403883576393127,
+      "learning_rate": 0.00018426901428777602,
+      "loss": 0.1749,
+      "step": 5455
+    },
+    {
+      "epoch": 0.39366499512969444,
+      "grad_norm": 0.11700470745563507,
+      "learning_rate": 0.00018426612786837928,
+      "loss": 0.1459,
+      "step": 5456
+    },
+    {
+      "epoch": 0.3937371478047549,
+      "grad_norm": 0.11984625458717346,
+      "learning_rate": 0.00018426324144898255,
+      "loss": 0.2311,
+      "step": 5457
+    },
+    {
+      "epoch": 0.39380930047981527,
+      "grad_norm": 0.12416800111532211,
+      "learning_rate": 0.0001842603550295858,
+      "loss": 0.1538,
+      "step": 5458
+    },
+    {
+      "epoch": 0.3938814531548757,
+      "grad_norm": 0.10681987553834915,
+      "learning_rate": 0.00018425746861018907,
+      "loss": 0.1577,
+      "step": 5459
+    },
+    {
+      "epoch": 0.39395360582993616,
+      "grad_norm": 0.13483217358589172,
+      "learning_rate": 0.00018425458219079234,
+      "loss": 0.1452,
+      "step": 5460
+    },
+    {
+      "epoch": 0.39402575850499655,
+      "grad_norm": 0.11891290545463562,
+      "learning_rate": 0.00018425169577139557,
+      "loss": 0.1492,
+      "step": 5461
+    },
+    {
+      "epoch": 0.394097911180057,
+      "grad_norm": 0.11302975565195084,
+      "learning_rate": 0.00018424880935199886,
+      "loss": 0.1237,
+      "step": 5462
+    },
+    {
+      "epoch": 0.39417006385511744,
+      "grad_norm": 0.09831945598125458,
+      "learning_rate": 0.00018424592293260212,
+      "loss": 0.1607,
+      "step": 5463
+    },
+    {
+      "epoch": 0.3942422165301779,
+      "grad_norm": 0.10420966893434525,
+      "learning_rate": 0.0001842430365132054,
+      "loss": 0.1171,
+      "step": 5464
+    },
+    {
+      "epoch": 0.39431436920523827,
+      "grad_norm": 0.138570636510849,
+      "learning_rate": 0.00018424015009380865,
+      "loss": 0.1551,
+      "step": 5465
+    },
+    {
+      "epoch": 0.3943865218802987,
+      "grad_norm": 0.11153330653905869,
+      "learning_rate": 0.00018423726367441189,
+      "loss": 0.183,
+      "step": 5466
+    },
+    {
+      "epoch": 0.39445867455535916,
+      "grad_norm": 0.13243813812732697,
+      "learning_rate": 0.00018423437725501515,
+      "loss": 0.1178,
+      "step": 5467
+    },
+    {
+      "epoch": 0.39453082723041955,
+      "grad_norm": 0.10459816455841064,
+      "learning_rate": 0.0001842314908356184,
+      "loss": 0.1906,
+      "step": 5468
+    },
+    {
+      "epoch": 0.39460297990548,
+      "grad_norm": 0.12012185156345367,
+      "learning_rate": 0.0001842286044162217,
+      "loss": 0.1403,
+      "step": 5469
+    },
+    {
+      "epoch": 0.39467513258054043,
+      "grad_norm": 0.1246560662984848,
+      "learning_rate": 0.00018422571799682496,
+      "loss": 0.214,
+      "step": 5470
+    },
+    {
+      "epoch": 0.3947472852556009,
+      "grad_norm": 0.14620047807693481,
+      "learning_rate": 0.0001842228315774282,
+      "loss": 0.1475,
+      "step": 5471
+    },
+    {
+      "epoch": 0.39481943793066127,
+      "grad_norm": 0.13080306351184845,
+      "learning_rate": 0.00018421994515803146,
+      "loss": 0.1292,
+      "step": 5472
+    },
+    {
+      "epoch": 0.3948915906057217,
+      "grad_norm": 0.11931418627500534,
+      "learning_rate": 0.00018421705873863473,
+      "loss": 0.2049,
+      "step": 5473
+    },
+    {
+      "epoch": 0.39496374328078215,
+      "grad_norm": 0.1012091189622879,
+      "learning_rate": 0.000184214172319238,
+      "loss": 0.166,
+      "step": 5474
+    },
+    {
+      "epoch": 0.39503589595584254,
+      "grad_norm": 0.16803866624832153,
+      "learning_rate": 0.00018421128589984125,
+      "loss": 0.1538,
+      "step": 5475
+    },
+    {
+      "epoch": 0.395108048630903,
+      "grad_norm": 0.12577006220817566,
+      "learning_rate": 0.00018420839948044451,
+      "loss": 0.172,
+      "step": 5476
+    },
+    {
+      "epoch": 0.39518020130596343,
+      "grad_norm": 0.13829030096530914,
+      "learning_rate": 0.00018420551306104778,
+      "loss": 0.1718,
+      "step": 5477
+    },
+    {
+      "epoch": 0.3952523539810239,
+      "grad_norm": 0.13298436999320984,
+      "learning_rate": 0.00018420262664165104,
+      "loss": 0.1623,
+      "step": 5478
+    },
+    {
+      "epoch": 0.39532450665608426,
+      "grad_norm": 0.10100710391998291,
+      "learning_rate": 0.0001841997402222543,
+      "loss": 0.1546,
+      "step": 5479
+    },
+    {
+      "epoch": 0.3953966593311447,
+      "grad_norm": 0.12674416601657867,
+      "learning_rate": 0.00018419685380285757,
+      "loss": 0.1538,
+      "step": 5480
+    },
+    {
+      "epoch": 0.39546881200620515,
+      "grad_norm": 0.10350482910871506,
+      "learning_rate": 0.00018419396738346083,
+      "loss": 0.1574,
+      "step": 5481
+    },
+    {
+      "epoch": 0.39554096468126554,
+      "grad_norm": 0.12238260358572006,
+      "learning_rate": 0.00018419108096406407,
+      "loss": 0.178,
+      "step": 5482
+    },
+    {
+      "epoch": 0.395613117356326,
+      "grad_norm": 0.09578686207532883,
+      "learning_rate": 0.00018418819454466736,
+      "loss": 0.1866,
+      "step": 5483
+    },
+    {
+      "epoch": 0.3956852700313864,
+      "grad_norm": 0.1295008659362793,
+      "learning_rate": 0.00018418530812527062,
+      "loss": 0.1224,
+      "step": 5484
+    },
+    {
+      "epoch": 0.3957574227064468,
+      "grad_norm": 0.1356228142976761,
+      "learning_rate": 0.00018418242170587388,
+      "loss": 0.156,
+      "step": 5485
+    },
+    {
+      "epoch": 0.39582957538150726,
+      "grad_norm": 0.13476917147636414,
+      "learning_rate": 0.00018417953528647714,
+      "loss": 0.1875,
+      "step": 5486
+    },
+    {
+      "epoch": 0.3959017280565677,
+      "grad_norm": 0.12495545297861099,
+      "learning_rate": 0.00018417664886708038,
+      "loss": 0.1427,
+      "step": 5487
+    },
+    {
+      "epoch": 0.39597388073162815,
+      "grad_norm": 0.11760874092578888,
+      "learning_rate": 0.00018417376244768364,
+      "loss": 0.1597,
+      "step": 5488
+    },
+    {
+      "epoch": 0.39604603340668854,
+      "grad_norm": 0.1176750659942627,
+      "learning_rate": 0.0001841708760282869,
+      "loss": 0.1463,
+      "step": 5489
+    },
+    {
+      "epoch": 0.396118186081749,
+      "grad_norm": 0.10409785062074661,
+      "learning_rate": 0.0001841679896088902,
+      "loss": 0.1786,
+      "step": 5490
+    },
+    {
+      "epoch": 0.3961903387568094,
+      "grad_norm": 0.11557011306285858,
+      "learning_rate": 0.00018416510318949346,
+      "loss": 0.1325,
+      "step": 5491
+    },
+    {
+      "epoch": 0.3962624914318698,
+      "grad_norm": 0.14132000505924225,
+      "learning_rate": 0.0001841622167700967,
+      "loss": 0.1814,
+      "step": 5492
+    },
+    {
+      "epoch": 0.39633464410693026,
+      "grad_norm": 0.09684260189533234,
+      "learning_rate": 0.00018415933035069996,
+      "loss": 0.224,
+      "step": 5493
+    },
+    {
+      "epoch": 0.3964067967819907,
+      "grad_norm": 0.10463616997003555,
+      "learning_rate": 0.00018415644393130322,
+      "loss": 0.1462,
+      "step": 5494
+    },
+    {
+      "epoch": 0.39647894945705114,
+      "grad_norm": 0.11369778960943222,
+      "learning_rate": 0.00018415355751190648,
+      "loss": 0.1953,
+      "step": 5495
+    },
+    {
+      "epoch": 0.39655110213211153,
+      "grad_norm": 0.11679672449827194,
+      "learning_rate": 0.00018415067109250975,
+      "loss": 0.1567,
+      "step": 5496
+    },
+    {
+      "epoch": 0.396623254807172,
+      "grad_norm": 0.11416541039943695,
+      "learning_rate": 0.000184147784673113,
+      "loss": 0.1684,
+      "step": 5497
+    },
+    {
+      "epoch": 0.3966954074822324,
+      "grad_norm": 0.1330970972776413,
+      "learning_rate": 0.00018414489825371627,
+      "loss": 0.1633,
+      "step": 5498
+    },
+    {
+      "epoch": 0.3967675601572928,
+      "grad_norm": 0.13111475110054016,
+      "learning_rate": 0.00018414201183431953,
+      "loss": 0.1709,
+      "step": 5499
+    },
+    {
+      "epoch": 0.39683971283235325,
+      "grad_norm": 0.1295691430568695,
+      "learning_rate": 0.0001841391254149228,
+      "loss": 0.1488,
+      "step": 5500
+    },
+    {
+      "epoch": 0.3969118655074137,
+      "grad_norm": 0.10770321637392044,
+      "learning_rate": 0.00018413623899552606,
+      "loss": 0.1953,
+      "step": 5501
+    },
+    {
+      "epoch": 0.39698401818247414,
+      "grad_norm": 0.15028440952301025,
+      "learning_rate": 0.00018413335257612932,
+      "loss": 0.1781,
+      "step": 5502
+    },
+    {
+      "epoch": 0.39705617085753453,
+      "grad_norm": 0.09639762341976166,
+      "learning_rate": 0.00018413046615673256,
+      "loss": 0.1141,
+      "step": 5503
+    },
+    {
+      "epoch": 0.397128323532595,
+      "grad_norm": 0.14751236140727997,
+      "learning_rate": 0.00018412757973733585,
+      "loss": 0.1746,
+      "step": 5504
+    },
+    {
+      "epoch": 0.3972004762076554,
+      "grad_norm": 0.13057030737400055,
+      "learning_rate": 0.0001841246933179391,
+      "loss": 0.1534,
+      "step": 5505
+    },
+    {
+      "epoch": 0.3972726288827158,
+      "grad_norm": 0.11408231407403946,
+      "learning_rate": 0.00018412180689854238,
+      "loss": 0.2179,
+      "step": 5506
+    },
+    {
+      "epoch": 0.39734478155777625,
+      "grad_norm": 0.18876004219055176,
+      "learning_rate": 0.00018411892047914564,
+      "loss": 0.1353,
+      "step": 5507
+    },
+    {
+      "epoch": 0.3974169342328367,
+      "grad_norm": 0.11818505823612213,
+      "learning_rate": 0.00018411603405974887,
+      "loss": 0.1653,
+      "step": 5508
+    },
+    {
+      "epoch": 0.39748908690789714,
+      "grad_norm": 0.1076313853263855,
+      "learning_rate": 0.00018411314764035214,
+      "loss": 0.1502,
+      "step": 5509
+    },
+    {
+      "epoch": 0.3975612395829575,
+      "grad_norm": 0.09401322156190872,
+      "learning_rate": 0.0001841102612209554,
+      "loss": 0.197,
+      "step": 5510
+    },
+    {
+      "epoch": 0.39763339225801797,
+      "grad_norm": 0.10382793098688126,
+      "learning_rate": 0.0001841073748015587,
+      "loss": 0.1718,
+      "step": 5511
+    },
+    {
+      "epoch": 0.3977055449330784,
+      "grad_norm": 0.13243991136550903,
+      "learning_rate": 0.00018410448838216195,
+      "loss": 0.1712,
+      "step": 5512
+    },
+    {
+      "epoch": 0.3977776976081388,
+      "grad_norm": 0.13869154453277588,
+      "learning_rate": 0.0001841016019627652,
+      "loss": 0.2437,
+      "step": 5513
+    },
+    {
+      "epoch": 0.39784985028319925,
+      "grad_norm": 0.13590875267982483,
+      "learning_rate": 0.00018409871554336845,
+      "loss": 0.1687,
+      "step": 5514
+    },
+    {
+      "epoch": 0.3979220029582597,
+      "grad_norm": 0.27010488510131836,
+      "learning_rate": 0.00018409582912397171,
+      "loss": 0.1801,
+      "step": 5515
+    },
+    {
+      "epoch": 0.3979941556333201,
+      "grad_norm": 0.11247264593839645,
+      "learning_rate": 0.00018409294270457498,
+      "loss": 0.1467,
+      "step": 5516
+    },
+    {
+      "epoch": 0.3980663083083805,
+      "grad_norm": 0.1487649828195572,
+      "learning_rate": 0.00018409005628517824,
+      "loss": 0.177,
+      "step": 5517
+    },
+    {
+      "epoch": 0.39813846098344097,
+      "grad_norm": 0.13606010377407074,
+      "learning_rate": 0.00018408716986578153,
+      "loss": 0.1343,
+      "step": 5518
+    },
+    {
+      "epoch": 0.3982106136585014,
+      "grad_norm": 0.11267613619565964,
+      "learning_rate": 0.00018408428344638477,
+      "loss": 0.1745,
+      "step": 5519
+    },
+    {
+      "epoch": 0.3982827663335618,
+      "grad_norm": 0.15224646031856537,
+      "learning_rate": 0.00018408139702698803,
+      "loss": 0.1924,
+      "step": 5520
+    },
+    {
+      "epoch": 0.39835491900862224,
+      "grad_norm": 0.13173869252204895,
+      "learning_rate": 0.0001840785106075913,
+      "loss": 0.1866,
+      "step": 5521
+    },
+    {
+      "epoch": 0.3984270716836827,
+      "grad_norm": 0.127375528216362,
+      "learning_rate": 0.00018407562418819455,
+      "loss": 0.147,
+      "step": 5522
+    },
+    {
+      "epoch": 0.3984992243587431,
+      "grad_norm": 0.10252101719379425,
+      "learning_rate": 0.00018407273776879782,
+      "loss": 0.1101,
+      "step": 5523
+    },
+    {
+      "epoch": 0.3985713770338035,
+      "grad_norm": 0.09786207973957062,
+      "learning_rate": 0.00018406985134940108,
+      "loss": 0.1511,
+      "step": 5524
+    },
+    {
+      "epoch": 0.39864352970886396,
+      "grad_norm": 0.15317873656749725,
+      "learning_rate": 0.00018406696493000434,
+      "loss": 0.1469,
+      "step": 5525
+    },
+    {
+      "epoch": 0.3987156823839244,
+      "grad_norm": 0.12719370424747467,
+      "learning_rate": 0.0001840640785106076,
+      "loss": 0.1466,
+      "step": 5526
+    },
+    {
+      "epoch": 0.3987878350589848,
+      "grad_norm": 0.12291129678487778,
+      "learning_rate": 0.00018406119209121087,
+      "loss": 0.1089,
+      "step": 5527
+    },
+    {
+      "epoch": 0.39885998773404524,
+      "grad_norm": 0.13241510093212128,
+      "learning_rate": 0.00018405830567181413,
+      "loss": 0.1651,
+      "step": 5528
+    },
+    {
+      "epoch": 0.3989321404091057,
+      "grad_norm": 0.14325828850269318,
+      "learning_rate": 0.0001840554192524174,
+      "loss": 0.2134,
+      "step": 5529
+    },
+    {
+      "epoch": 0.3990042930841661,
+      "grad_norm": 0.11104754358530045,
+      "learning_rate": 0.00018405253283302063,
+      "loss": 0.1667,
+      "step": 5530
+    },
+    {
+      "epoch": 0.3990764457592265,
+      "grad_norm": 0.13289597630500793,
+      "learning_rate": 0.0001840496464136239,
+      "loss": 0.1048,
+      "step": 5531
+    },
+    {
+      "epoch": 0.39914859843428696,
+      "grad_norm": 0.14496006071567535,
+      "learning_rate": 0.00018404675999422718,
+      "loss": 0.154,
+      "step": 5532
+    },
+    {
+      "epoch": 0.3992207511093474,
+      "grad_norm": 0.14237834513187408,
+      "learning_rate": 0.00018404387357483045,
+      "loss": 0.1515,
+      "step": 5533
+    },
+    {
+      "epoch": 0.3992929037844078,
+      "grad_norm": 0.11830680817365646,
+      "learning_rate": 0.0001840409871554337,
+      "loss": 0.185,
+      "step": 5534
+    },
+    {
+      "epoch": 0.39936505645946824,
+      "grad_norm": 0.11759422719478607,
+      "learning_rate": 0.00018403810073603695,
+      "loss": 0.1492,
+      "step": 5535
+    },
+    {
+      "epoch": 0.3994372091345287,
+      "grad_norm": 0.11616528779268265,
+      "learning_rate": 0.0001840352143166402,
+      "loss": 0.1328,
+      "step": 5536
+    },
+    {
+      "epoch": 0.39950936180958907,
+      "grad_norm": 0.12721532583236694,
+      "learning_rate": 0.00018403232789724347,
+      "loss": 0.1896,
+      "step": 5537
+    },
+    {
+      "epoch": 0.3995815144846495,
+      "grad_norm": 0.12530900537967682,
+      "learning_rate": 0.00018402944147784673,
+      "loss": 0.1531,
+      "step": 5538
+    },
+    {
+      "epoch": 0.39965366715970996,
+      "grad_norm": 0.12472952157258987,
+      "learning_rate": 0.00018402655505845002,
+      "loss": 0.1626,
+      "step": 5539
+    },
+    {
+      "epoch": 0.3997258198347704,
+      "grad_norm": 0.09787862002849579,
+      "learning_rate": 0.00018402366863905326,
+      "loss": 0.1518,
+      "step": 5540
+    },
+    {
+      "epoch": 0.3997979725098308,
+      "grad_norm": 0.10951712727546692,
+      "learning_rate": 0.00018402078221965652,
+      "loss": 0.1368,
+      "step": 5541
+    },
+    {
+      "epoch": 0.39987012518489123,
+      "grad_norm": 0.15396511554718018,
+      "learning_rate": 0.00018401789580025979,
+      "loss": 0.1458,
+      "step": 5542
+    },
+    {
+      "epoch": 0.3999422778599517,
+      "grad_norm": 0.12943795323371887,
+      "learning_rate": 0.00018401500938086305,
+      "loss": 0.1693,
+      "step": 5543
+    },
+    {
+      "epoch": 0.40001443053501207,
+      "grad_norm": 0.1799429953098297,
+      "learning_rate": 0.0001840121229614663,
+      "loss": 0.1878,
+      "step": 5544
+    },
+    {
+      "epoch": 0.4000865832100725,
+      "grad_norm": 0.17684325575828552,
+      "learning_rate": 0.00018400923654206958,
+      "loss": 0.2223,
+      "step": 5545
+    },
+    {
+      "epoch": 0.40015873588513295,
+      "grad_norm": 0.11476756632328033,
+      "learning_rate": 0.00018400635012267284,
+      "loss": 0.1512,
+      "step": 5546
+    },
+    {
+      "epoch": 0.40023088856019334,
+      "grad_norm": 0.18006323277950287,
+      "learning_rate": 0.0001840034637032761,
+      "loss": 0.1732,
+      "step": 5547
+    },
+    {
+      "epoch": 0.4003030412352538,
+      "grad_norm": 0.13941843807697296,
+      "learning_rate": 0.00018400057728387936,
+      "loss": 0.1466,
+      "step": 5548
+    },
+    {
+      "epoch": 0.40037519391031423,
+      "grad_norm": 0.11408531665802002,
+      "learning_rate": 0.00018399769086448263,
+      "loss": 0.1456,
+      "step": 5549
+    },
+    {
+      "epoch": 0.4004473465853747,
+      "grad_norm": 0.12096792459487915,
+      "learning_rate": 0.0001839948044450859,
+      "loss": 0.171,
+      "step": 5550
+    },
+    {
+      "epoch": 0.40051949926043506,
+      "grad_norm": 0.11903735250234604,
+      "learning_rate": 0.00018399191802568913,
+      "loss": 0.1409,
+      "step": 5551
+    },
+    {
+      "epoch": 0.4005916519354955,
+      "grad_norm": 0.12109566479921341,
+      "learning_rate": 0.0001839890316062924,
+      "loss": 0.189,
+      "step": 5552
+    },
+    {
+      "epoch": 0.40066380461055595,
+      "grad_norm": 0.13302576541900635,
+      "learning_rate": 0.00018398614518689568,
+      "loss": 0.1552,
+      "step": 5553
+    },
+    {
+      "epoch": 0.40073595728561634,
+      "grad_norm": 0.10768107324838638,
+      "learning_rate": 0.00018398325876749894,
+      "loss": 0.1365,
+      "step": 5554
+    },
+    {
+      "epoch": 0.4008081099606768,
+      "grad_norm": 0.10121867805719376,
+      "learning_rate": 0.0001839803723481022,
+      "loss": 0.1893,
+      "step": 5555
+    },
+    {
+      "epoch": 0.40088026263573723,
+      "grad_norm": 0.09464319050312042,
+      "learning_rate": 0.00018397748592870544,
+      "loss": 0.1361,
+      "step": 5556
+    },
+    {
+      "epoch": 0.40095241531079767,
+      "grad_norm": 0.14916472136974335,
+      "learning_rate": 0.0001839745995093087,
+      "loss": 0.1604,
+      "step": 5557
+    },
+    {
+      "epoch": 0.40102456798585806,
+      "grad_norm": 0.11772575974464417,
+      "learning_rate": 0.00018397171308991197,
+      "loss": 0.1463,
+      "step": 5558
+    },
+    {
+      "epoch": 0.4010967206609185,
+      "grad_norm": 0.11101650446653366,
+      "learning_rate": 0.00018396882667051523,
+      "loss": 0.163,
+      "step": 5559
+    },
+    {
+      "epoch": 0.40116887333597895,
+      "grad_norm": 0.12537023425102234,
+      "learning_rate": 0.00018396594025111852,
+      "loss": 0.1489,
+      "step": 5560
+    },
+    {
+      "epoch": 0.40124102601103934,
+      "grad_norm": 0.09984667599201202,
+      "learning_rate": 0.00018396305383172175,
+      "loss": 0.1013,
+      "step": 5561
+    },
+    {
+      "epoch": 0.4013131786860998,
+      "grad_norm": 0.09811095148324966,
+      "learning_rate": 0.00018396016741232502,
+      "loss": 0.1849,
+      "step": 5562
+    },
+    {
+      "epoch": 0.4013853313611602,
+      "grad_norm": 0.14147751033306122,
+      "learning_rate": 0.00018395728099292828,
+      "loss": 0.0963,
+      "step": 5563
+    },
+    {
+      "epoch": 0.40145748403622067,
+      "grad_norm": 0.12936724722385406,
+      "learning_rate": 0.00018395439457353154,
+      "loss": 0.1845,
+      "step": 5564
+    },
+    {
+      "epoch": 0.40152963671128106,
+      "grad_norm": 0.10370690375566483,
+      "learning_rate": 0.0001839515081541348,
+      "loss": 0.1504,
+      "step": 5565
+    },
+    {
+      "epoch": 0.4016017893863415,
+      "grad_norm": 0.14071407914161682,
+      "learning_rate": 0.00018394862173473807,
+      "loss": 0.1893,
+      "step": 5566
+    },
+    {
+      "epoch": 0.40167394206140195,
+      "grad_norm": 0.1003267839550972,
+      "learning_rate": 0.00018394573531534133,
+      "loss": 0.1858,
+      "step": 5567
+    },
+    {
+      "epoch": 0.40174609473646233,
+      "grad_norm": 0.16829344630241394,
+      "learning_rate": 0.0001839428488959446,
+      "loss": 0.16,
+      "step": 5568
+    },
+    {
+      "epoch": 0.4018182474115228,
+      "grad_norm": 0.10380838811397552,
+      "learning_rate": 0.00018393996247654786,
+      "loss": 0.1476,
+      "step": 5569
+    },
+    {
+      "epoch": 0.4018904000865832,
+      "grad_norm": 0.11744909733533859,
+      "learning_rate": 0.00018393707605715112,
+      "loss": 0.1264,
+      "step": 5570
+    },
+    {
+      "epoch": 0.40196255276164367,
+      "grad_norm": 0.11899411678314209,
+      "learning_rate": 0.00018393418963775438,
+      "loss": 0.2101,
+      "step": 5571
+    },
+    {
+      "epoch": 0.40203470543670405,
+      "grad_norm": 0.10737913846969604,
+      "learning_rate": 0.00018393130321835762,
+      "loss": 0.2118,
+      "step": 5572
+    },
+    {
+      "epoch": 0.4021068581117645,
+      "grad_norm": 0.11953913420438766,
+      "learning_rate": 0.00018392841679896088,
+      "loss": 0.1424,
+      "step": 5573
+    },
+    {
+      "epoch": 0.40217901078682494,
+      "grad_norm": 0.1752198338508606,
+      "learning_rate": 0.00018392553037956417,
+      "loss": 0.1891,
+      "step": 5574
+    },
+    {
+      "epoch": 0.40225116346188533,
+      "grad_norm": 0.14559204876422882,
+      "learning_rate": 0.00018392264396016744,
+      "loss": 0.1421,
+      "step": 5575
+    },
+    {
+      "epoch": 0.4023233161369458,
+      "grad_norm": 0.0974157378077507,
+      "learning_rate": 0.0001839197575407707,
+      "loss": 0.1336,
+      "step": 5576
+    },
+    {
+      "epoch": 0.4023954688120062,
+      "grad_norm": 0.1417972892522812,
+      "learning_rate": 0.00018391687112137393,
+      "loss": 0.1208,
+      "step": 5577
+    },
+    {
+      "epoch": 0.4024676214870666,
+      "grad_norm": 0.10055826604366302,
+      "learning_rate": 0.0001839139847019772,
+      "loss": 0.1519,
+      "step": 5578
+    },
+    {
+      "epoch": 0.40253977416212705,
+      "grad_norm": 0.09204552322626114,
+      "learning_rate": 0.00018391109828258046,
+      "loss": 0.1635,
+      "step": 5579
+    },
+    {
+      "epoch": 0.4026119268371875,
+      "grad_norm": 0.10245250165462494,
+      "learning_rate": 0.00018390821186318372,
+      "loss": 0.1937,
+      "step": 5580
+    },
+    {
+      "epoch": 0.40268407951224794,
+      "grad_norm": 0.1685457080602646,
+      "learning_rate": 0.000183905325443787,
+      "loss": 0.2262,
+      "step": 5581
+    },
+    {
+      "epoch": 0.4027562321873083,
+      "grad_norm": 0.167040154337883,
+      "learning_rate": 0.00018390243902439025,
+      "loss": 0.2172,
+      "step": 5582
+    },
+    {
+      "epoch": 0.40282838486236877,
+      "grad_norm": 0.21358591318130493,
+      "learning_rate": 0.0001838995526049935,
+      "loss": 0.1522,
+      "step": 5583
+    },
+    {
+      "epoch": 0.4029005375374292,
+      "grad_norm": 0.10421575605869293,
+      "learning_rate": 0.00018389666618559677,
+      "loss": 0.1249,
+      "step": 5584
+    },
+    {
+      "epoch": 0.4029726902124896,
+      "grad_norm": 0.1081615537405014,
+      "learning_rate": 0.00018389377976620004,
+      "loss": 0.1344,
+      "step": 5585
+    },
+    {
+      "epoch": 0.40304484288755005,
+      "grad_norm": 0.1218922883272171,
+      "learning_rate": 0.0001838908933468033,
+      "loss": 0.1531,
+      "step": 5586
+    },
+    {
+      "epoch": 0.4031169955626105,
+      "grad_norm": 0.14666524529457092,
+      "learning_rate": 0.00018388800692740656,
+      "loss": 0.126,
+      "step": 5587
+    },
+    {
+      "epoch": 0.40318914823767094,
+      "grad_norm": 0.08872871100902557,
+      "learning_rate": 0.0001838851205080098,
+      "loss": 0.1376,
+      "step": 5588
+    },
+    {
+      "epoch": 0.4032613009127313,
+      "grad_norm": 0.10605582594871521,
+      "learning_rate": 0.0001838822340886131,
+      "loss": 0.1679,
+      "step": 5589
+    },
+    {
+      "epoch": 0.40333345358779177,
+      "grad_norm": 0.09796331822872162,
+      "learning_rate": 0.00018387934766921635,
+      "loss": 0.1382,
+      "step": 5590
+    },
+    {
+      "epoch": 0.4034056062628522,
+      "grad_norm": 0.11313818395137787,
+      "learning_rate": 0.00018387646124981962,
+      "loss": 0.1829,
+      "step": 5591
+    },
+    {
+      "epoch": 0.4034777589379126,
+      "grad_norm": 0.1263338327407837,
+      "learning_rate": 0.00018387357483042288,
+      "loss": 0.1642,
+      "step": 5592
+    },
+    {
+      "epoch": 0.40354991161297304,
+      "grad_norm": 0.14448325335979462,
+      "learning_rate": 0.00018387068841102611,
+      "loss": 0.1713,
+      "step": 5593
+    },
+    {
+      "epoch": 0.4036220642880335,
+      "grad_norm": 0.11438018083572388,
+      "learning_rate": 0.00018386780199162938,
+      "loss": 0.147,
+      "step": 5594
+    },
+    {
+      "epoch": 0.40369421696309393,
+      "grad_norm": 0.13415935635566711,
+      "learning_rate": 0.00018386491557223264,
+      "loss": 0.1614,
+      "step": 5595
+    },
+    {
+      "epoch": 0.4037663696381543,
+      "grad_norm": 0.14307013154029846,
+      "learning_rate": 0.00018386202915283593,
+      "loss": 0.1311,
+      "step": 5596
+    },
+    {
+      "epoch": 0.40383852231321477,
+      "grad_norm": 0.10783229023218155,
+      "learning_rate": 0.0001838591427334392,
+      "loss": 0.148,
+      "step": 5597
+    },
+    {
+      "epoch": 0.4039106749882752,
+      "grad_norm": 0.11876362562179565,
+      "learning_rate": 0.00018385625631404243,
+      "loss": 0.1107,
+      "step": 5598
+    },
+    {
+      "epoch": 0.4039828276633356,
+      "grad_norm": 0.12312009185552597,
+      "learning_rate": 0.0001838533698946457,
+      "loss": 0.1633,
+      "step": 5599
+    },
+    {
+      "epoch": 0.40405498033839604,
+      "grad_norm": 0.10900390893220901,
+      "learning_rate": 0.00018385048347524895,
+      "loss": 0.136,
+      "step": 5600
+    },
+    {
+      "epoch": 0.4041271330134565,
+      "grad_norm": 0.13774895668029785,
+      "learning_rate": 0.00018384759705585222,
+      "loss": 0.1689,
+      "step": 5601
+    },
+    {
+      "epoch": 0.40419928568851693,
+      "grad_norm": 0.11681775748729706,
+      "learning_rate": 0.00018384471063645548,
+      "loss": 0.1376,
+      "step": 5602
+    },
+    {
+      "epoch": 0.4042714383635773,
+      "grad_norm": 0.13280463218688965,
+      "learning_rate": 0.00018384182421705874,
+      "loss": 0.1585,
+      "step": 5603
+    },
+    {
+      "epoch": 0.40434359103863776,
+      "grad_norm": 0.12719693779945374,
+      "learning_rate": 0.000183838937797662,
+      "loss": 0.1547,
+      "step": 5604
+    },
+    {
+      "epoch": 0.4044157437136982,
+      "grad_norm": 0.12950772047042847,
+      "learning_rate": 0.00018383605137826527,
+      "loss": 0.1568,
+      "step": 5605
+    },
+    {
+      "epoch": 0.4044878963887586,
+      "grad_norm": 0.1172901839017868,
+      "learning_rate": 0.00018383316495886853,
+      "loss": 0.156,
+      "step": 5606
+    },
+    {
+      "epoch": 0.40456004906381904,
+      "grad_norm": 0.12445659935474396,
+      "learning_rate": 0.0001838302785394718,
+      "loss": 0.1376,
+      "step": 5607
+    },
+    {
+      "epoch": 0.4046322017388795,
+      "grad_norm": 0.09501469880342484,
+      "learning_rate": 0.00018382739212007506,
+      "loss": 0.1283,
+      "step": 5608
+    },
+    {
+      "epoch": 0.40470435441393987,
+      "grad_norm": 0.09757381677627563,
+      "learning_rate": 0.0001838245057006783,
+      "loss": 0.1238,
+      "step": 5609
+    },
+    {
+      "epoch": 0.4047765070890003,
+      "grad_norm": 0.11157125234603882,
+      "learning_rate": 0.00018382161928128158,
+      "loss": 0.1144,
+      "step": 5610
+    },
+    {
+      "epoch": 0.40484865976406076,
+      "grad_norm": 0.130250945687294,
+      "learning_rate": 0.00018381873286188485,
+      "loss": 0.2204,
+      "step": 5611
+    },
+    {
+      "epoch": 0.4049208124391212,
+      "grad_norm": 0.12943124771118164,
+      "learning_rate": 0.0001838158464424881,
+      "loss": 0.1603,
+      "step": 5612
+    },
+    {
+      "epoch": 0.4049929651141816,
+      "grad_norm": 0.13775520026683807,
+      "learning_rate": 0.00018381296002309137,
+      "loss": 0.1831,
+      "step": 5613
+    },
+    {
+      "epoch": 0.40506511778924204,
+      "grad_norm": 0.12541238963603973,
+      "learning_rate": 0.0001838100736036946,
+      "loss": 0.1569,
+      "step": 5614
+    },
+    {
+      "epoch": 0.4051372704643025,
+      "grad_norm": 0.13825315237045288,
+      "learning_rate": 0.00018380718718429787,
+      "loss": 0.1495,
+      "step": 5615
+    },
+    {
+      "epoch": 0.40520942313936287,
+      "grad_norm": 0.10173406451940536,
+      "learning_rate": 0.00018380430076490113,
+      "loss": 0.1119,
+      "step": 5616
+    },
+    {
+      "epoch": 0.4052815758144233,
+      "grad_norm": 0.1514287143945694,
+      "learning_rate": 0.00018380141434550442,
+      "loss": 0.1496,
+      "step": 5617
+    },
+    {
+      "epoch": 0.40535372848948376,
+      "grad_norm": 0.1010914295911789,
+      "learning_rate": 0.0001837985279261077,
+      "loss": 0.0856,
+      "step": 5618
+    },
+    {
+      "epoch": 0.4054258811645442,
+      "grad_norm": 0.14536389708518982,
+      "learning_rate": 0.00018379564150671092,
+      "loss": 0.2136,
+      "step": 5619
+    },
+    {
+      "epoch": 0.4054980338396046,
+      "grad_norm": 0.12175793945789337,
+      "learning_rate": 0.00018379275508731419,
+      "loss": 0.1596,
+      "step": 5620
+    },
+    {
+      "epoch": 0.40557018651466503,
+      "grad_norm": 0.1434437483549118,
+      "learning_rate": 0.00018378986866791745,
+      "loss": 0.1478,
+      "step": 5621
+    },
+    {
+      "epoch": 0.4056423391897255,
+      "grad_norm": 0.1122213676571846,
+      "learning_rate": 0.0001837869822485207,
+      "loss": 0.1568,
+      "step": 5622
+    },
+    {
+      "epoch": 0.40571449186478586,
+      "grad_norm": 0.10404715687036514,
+      "learning_rate": 0.00018378409582912397,
+      "loss": 0.1678,
+      "step": 5623
+    },
+    {
+      "epoch": 0.4057866445398463,
+      "grad_norm": 0.11233187466859818,
+      "learning_rate": 0.00018378120940972724,
+      "loss": 0.1707,
+      "step": 5624
+    },
+    {
+      "epoch": 0.40585879721490675,
+      "grad_norm": 0.10880354046821594,
+      "learning_rate": 0.0001837783229903305,
+      "loss": 0.1443,
+      "step": 5625
+    },
+    {
+      "epoch": 0.4059309498899672,
+      "grad_norm": 0.1127571314573288,
+      "learning_rate": 0.00018377543657093376,
+      "loss": 0.2127,
+      "step": 5626
+    },
+    {
+      "epoch": 0.4060031025650276,
+      "grad_norm": 0.11678310483694077,
+      "learning_rate": 0.00018377255015153703,
+      "loss": 0.1568,
+      "step": 5627
+    },
+    {
+      "epoch": 0.40607525524008803,
+      "grad_norm": 0.11706399917602539,
+      "learning_rate": 0.0001837696637321403,
+      "loss": 0.1637,
+      "step": 5628
+    },
+    {
+      "epoch": 0.4061474079151485,
+      "grad_norm": 0.10593684762716293,
+      "learning_rate": 0.00018376677731274355,
+      "loss": 0.1642,
+      "step": 5629
+    },
+    {
+      "epoch": 0.40621956059020886,
+      "grad_norm": 0.14842522144317627,
+      "learning_rate": 0.00018376389089334681,
+      "loss": 0.1577,
+      "step": 5630
+    },
+    {
+      "epoch": 0.4062917132652693,
+      "grad_norm": 0.11014879494905472,
+      "learning_rate": 0.00018376100447395008,
+      "loss": 0.1497,
+      "step": 5631
+    },
+    {
+      "epoch": 0.40636386594032975,
+      "grad_norm": 0.10869240015745163,
+      "learning_rate": 0.00018375811805455334,
+      "loss": 0.1774,
+      "step": 5632
+    },
+    {
+      "epoch": 0.4064360186153902,
+      "grad_norm": 0.12231890857219696,
+      "learning_rate": 0.0001837552316351566,
+      "loss": 0.14,
+      "step": 5633
+    },
+    {
+      "epoch": 0.4065081712904506,
+      "grad_norm": 0.11752889305353165,
+      "learning_rate": 0.00018375234521575987,
+      "loss": 0.1522,
+      "step": 5634
+    },
+    {
+      "epoch": 0.406580323965511,
+      "grad_norm": 0.12622301280498505,
+      "learning_rate": 0.00018374945879636313,
+      "loss": 0.1349,
+      "step": 5635
+    },
+    {
+      "epoch": 0.40665247664057147,
+      "grad_norm": 0.12093006074428558,
+      "learning_rate": 0.00018374657237696637,
+      "loss": 0.1645,
+      "step": 5636
+    },
+    {
+      "epoch": 0.40672462931563186,
+      "grad_norm": 0.10946014523506165,
+      "learning_rate": 0.00018374368595756963,
+      "loss": 0.179,
+      "step": 5637
+    },
+    {
+      "epoch": 0.4067967819906923,
+      "grad_norm": 0.105443075299263,
+      "learning_rate": 0.00018374079953817292,
+      "loss": 0.1336,
+      "step": 5638
+    },
+    {
+      "epoch": 0.40686893466575275,
+      "grad_norm": 0.1109885573387146,
+      "learning_rate": 0.00018373791311877618,
+      "loss": 0.1291,
+      "step": 5639
+    },
+    {
+      "epoch": 0.40694108734081313,
+      "grad_norm": 0.10615464299917221,
+      "learning_rate": 0.00018373502669937944,
+      "loss": 0.1363,
+      "step": 5640
+    },
+    {
+      "epoch": 0.4070132400158736,
+      "grad_norm": 0.12033508718013763,
+      "learning_rate": 0.00018373214027998268,
+      "loss": 0.1899,
+      "step": 5641
+    },
+    {
+      "epoch": 0.407085392690934,
+      "grad_norm": 0.1122654527425766,
+      "learning_rate": 0.00018372925386058594,
+      "loss": 0.1282,
+      "step": 5642
+    },
+    {
+      "epoch": 0.40715754536599447,
+      "grad_norm": 0.1292010247707367,
+      "learning_rate": 0.0001837263674411892,
+      "loss": 0.1536,
+      "step": 5643
+    },
+    {
+      "epoch": 0.40722969804105486,
+      "grad_norm": 0.11770255118608475,
+      "learning_rate": 0.00018372348102179247,
+      "loss": 0.1315,
+      "step": 5644
+    },
+    {
+      "epoch": 0.4073018507161153,
+      "grad_norm": 0.13030600547790527,
+      "learning_rate": 0.00018372059460239576,
+      "loss": 0.1585,
+      "step": 5645
+    },
+    {
+      "epoch": 0.40737400339117574,
+      "grad_norm": 0.10636156797409058,
+      "learning_rate": 0.000183717708182999,
+      "loss": 0.1644,
+      "step": 5646
+    },
+    {
+      "epoch": 0.40744615606623613,
+      "grad_norm": 0.10021941363811493,
+      "learning_rate": 0.00018371482176360226,
+      "loss": 0.1305,
+      "step": 5647
+    },
+    {
+      "epoch": 0.4075183087412966,
+      "grad_norm": 0.10358332842588425,
+      "learning_rate": 0.00018371193534420552,
+      "loss": 0.1459,
+      "step": 5648
+    },
+    {
+      "epoch": 0.407590461416357,
+      "grad_norm": 0.13000337779521942,
+      "learning_rate": 0.00018370904892480878,
+      "loss": 0.1457,
+      "step": 5649
+    },
+    {
+      "epoch": 0.40766261409141746,
+      "grad_norm": 0.15379568934440613,
+      "learning_rate": 0.00018370616250541205,
+      "loss": 0.1913,
+      "step": 5650
+    },
+    {
+      "epoch": 0.40773476676647785,
+      "grad_norm": 0.12629206478595734,
+      "learning_rate": 0.0001837032760860153,
+      "loss": 0.1264,
+      "step": 5651
+    },
+    {
+      "epoch": 0.4078069194415383,
+      "grad_norm": 0.1013764888048172,
+      "learning_rate": 0.00018370038966661857,
+      "loss": 0.1241,
+      "step": 5652
+    },
+    {
+      "epoch": 0.40787907211659874,
+      "grad_norm": 0.1186005175113678,
+      "learning_rate": 0.00018369750324722184,
+      "loss": 0.1681,
+      "step": 5653
+    },
+    {
+      "epoch": 0.40795122479165913,
+      "grad_norm": 0.12923049926757812,
+      "learning_rate": 0.0001836946168278251,
+      "loss": 0.1826,
+      "step": 5654
+    },
+    {
+      "epoch": 0.4080233774667196,
+      "grad_norm": 0.11416444182395935,
+      "learning_rate": 0.00018369173040842836,
+      "loss": 0.1738,
+      "step": 5655
+    },
+    {
+      "epoch": 0.40809553014178,
+      "grad_norm": 0.09962332248687744,
+      "learning_rate": 0.00018368884398903162,
+      "loss": 0.1549,
+      "step": 5656
+    },
+    {
+      "epoch": 0.40816768281684046,
+      "grad_norm": 0.1386914998292923,
+      "learning_rate": 0.00018368595756963486,
+      "loss": 0.1769,
+      "step": 5657
+    },
+    {
+      "epoch": 0.40823983549190085,
+      "grad_norm": 0.12415996193885803,
+      "learning_rate": 0.00018368307115023812,
+      "loss": 0.1594,
+      "step": 5658
+    },
+    {
+      "epoch": 0.4083119881669613,
+      "grad_norm": 0.1531461775302887,
+      "learning_rate": 0.0001836801847308414,
+      "loss": 0.1311,
+      "step": 5659
+    },
+    {
+      "epoch": 0.40838414084202174,
+      "grad_norm": 0.11212441325187683,
+      "learning_rate": 0.00018367729831144468,
+      "loss": 0.1853,
+      "step": 5660
+    },
+    {
+      "epoch": 0.4084562935170821,
+      "grad_norm": 0.12954039871692657,
+      "learning_rate": 0.00018367441189204794,
+      "loss": 0.194,
+      "step": 5661
+    },
+    {
+      "epoch": 0.40852844619214257,
+      "grad_norm": 0.13729546964168549,
+      "learning_rate": 0.00018367152547265117,
+      "loss": 0.1578,
+      "step": 5662
+    },
+    {
+      "epoch": 0.408600598867203,
+      "grad_norm": 0.1417706310749054,
+      "learning_rate": 0.00018366863905325444,
+      "loss": 0.1596,
+      "step": 5663
+    },
+    {
+      "epoch": 0.40867275154226346,
+      "grad_norm": 0.12691785395145416,
+      "learning_rate": 0.0001836657526338577,
+      "loss": 0.1442,
+      "step": 5664
+    },
+    {
+      "epoch": 0.40874490421732385,
+      "grad_norm": 0.11149875819683075,
+      "learning_rate": 0.00018366286621446096,
+      "loss": 0.2039,
+      "step": 5665
+    },
+    {
+      "epoch": 0.4088170568923843,
+      "grad_norm": 0.09372027218341827,
+      "learning_rate": 0.00018365997979506425,
+      "loss": 0.122,
+      "step": 5666
+    },
+    {
+      "epoch": 0.40888920956744473,
+      "grad_norm": 0.13826479017734528,
+      "learning_rate": 0.0001836570933756675,
+      "loss": 0.1548,
+      "step": 5667
+    },
+    {
+      "epoch": 0.4089613622425051,
+      "grad_norm": 0.154812291264534,
+      "learning_rate": 0.00018365420695627075,
+      "loss": 0.2081,
+      "step": 5668
+    },
+    {
+      "epoch": 0.40903351491756557,
+      "grad_norm": 0.10125817358493805,
+      "learning_rate": 0.00018365132053687401,
+      "loss": 0.1397,
+      "step": 5669
+    },
+    {
+      "epoch": 0.409105667592626,
+      "grad_norm": 0.13650555908679962,
+      "learning_rate": 0.00018364843411747728,
+      "loss": 0.1591,
+      "step": 5670
+    },
+    {
+      "epoch": 0.4091778202676864,
+      "grad_norm": 0.1282268911600113,
+      "learning_rate": 0.00018364554769808054,
+      "loss": 0.1726,
+      "step": 5671
+    },
+    {
+      "epoch": 0.40924997294274684,
+      "grad_norm": 0.13484404981136322,
+      "learning_rate": 0.0001836426612786838,
+      "loss": 0.1917,
+      "step": 5672
+    },
+    {
+      "epoch": 0.4093221256178073,
+      "grad_norm": 0.11429597437381744,
+      "learning_rate": 0.00018363977485928707,
+      "loss": 0.1403,
+      "step": 5673
+    },
+    {
+      "epoch": 0.40939427829286773,
+      "grad_norm": 0.15964668989181519,
+      "learning_rate": 0.00018363688843989033,
+      "loss": 0.1883,
+      "step": 5674
+    },
+    {
+      "epoch": 0.4094664309679281,
+      "grad_norm": 0.09609831869602203,
+      "learning_rate": 0.0001836340020204936,
+      "loss": 0.167,
+      "step": 5675
+    },
+    {
+      "epoch": 0.40953858364298856,
+      "grad_norm": 0.12426905333995819,
+      "learning_rate": 0.00018363111560109686,
+      "loss": 0.1825,
+      "step": 5676
+    },
+    {
+      "epoch": 0.409610736318049,
+      "grad_norm": 0.11844122409820557,
+      "learning_rate": 0.00018362822918170012,
+      "loss": 0.1929,
+      "step": 5677
+    },
+    {
+      "epoch": 0.4096828889931094,
+      "grad_norm": 0.1360781341791153,
+      "learning_rate": 0.00018362534276230335,
+      "loss": 0.2015,
+      "step": 5678
+    },
+    {
+      "epoch": 0.40975504166816984,
+      "grad_norm": 0.12205765396356583,
+      "learning_rate": 0.00018362245634290662,
+      "loss": 0.1006,
+      "step": 5679
+    },
+    {
+      "epoch": 0.4098271943432303,
+      "grad_norm": 0.12862655520439148,
+      "learning_rate": 0.0001836195699235099,
+      "loss": 0.1534,
+      "step": 5680
+    },
+    {
+      "epoch": 0.4098993470182907,
+      "grad_norm": 0.10743330419063568,
+      "learning_rate": 0.00018361668350411317,
+      "loss": 0.1926,
+      "step": 5681
+    },
+    {
+      "epoch": 0.4099714996933511,
+      "grad_norm": 0.14362482726573944,
+      "learning_rate": 0.00018361379708471643,
+      "loss": 0.1729,
+      "step": 5682
+    },
+    {
+      "epoch": 0.41004365236841156,
+      "grad_norm": 0.1336904764175415,
+      "learning_rate": 0.00018361091066531967,
+      "loss": 0.172,
+      "step": 5683
+    },
+    {
+      "epoch": 0.410115805043472,
+      "grad_norm": 0.14109814167022705,
+      "learning_rate": 0.00018360802424592293,
+      "loss": 0.1686,
+      "step": 5684
+    },
+    {
+      "epoch": 0.4101879577185324,
+      "grad_norm": 0.12172040343284607,
+      "learning_rate": 0.0001836051378265262,
+      "loss": 0.1594,
+      "step": 5685
+    },
+    {
+      "epoch": 0.41026011039359284,
+      "grad_norm": 0.12343661487102509,
+      "learning_rate": 0.00018360225140712946,
+      "loss": 0.1437,
+      "step": 5686
+    },
+    {
+      "epoch": 0.4103322630686533,
+      "grad_norm": 0.13678564131259918,
+      "learning_rate": 0.00018359936498773275,
+      "loss": 0.2118,
+      "step": 5687
+    },
+    {
+      "epoch": 0.4104044157437137,
+      "grad_norm": 0.1265508383512497,
+      "learning_rate": 0.00018359647856833598,
+      "loss": 0.1364,
+      "step": 5688
+    },
+    {
+      "epoch": 0.4104765684187741,
+      "grad_norm": 0.11329175531864166,
+      "learning_rate": 0.00018359359214893925,
+      "loss": 0.1411,
+      "step": 5689
+    },
+    {
+      "epoch": 0.41054872109383456,
+      "grad_norm": 0.111592598259449,
+      "learning_rate": 0.0001835907057295425,
+      "loss": 0.1482,
+      "step": 5690
+    },
+    {
+      "epoch": 0.410620873768895,
+      "grad_norm": 0.11039689183235168,
+      "learning_rate": 0.00018358781931014577,
+      "loss": 0.2005,
+      "step": 5691
+    },
+    {
+      "epoch": 0.4106930264439554,
+      "grad_norm": 0.11525961756706238,
+      "learning_rate": 0.00018358493289074903,
+      "loss": 0.1999,
+      "step": 5692
+    },
+    {
+      "epoch": 0.41076517911901583,
+      "grad_norm": 0.11791030317544937,
+      "learning_rate": 0.0001835820464713523,
+      "loss": 0.1936,
+      "step": 5693
+    },
+    {
+      "epoch": 0.4108373317940763,
+      "grad_norm": 0.11614886671304703,
+      "learning_rate": 0.00018357916005195556,
+      "loss": 0.1649,
+      "step": 5694
+    },
+    {
+      "epoch": 0.4109094844691367,
+      "grad_norm": 0.12133285403251648,
+      "learning_rate": 0.00018357627363255882,
+      "loss": 0.1373,
+      "step": 5695
+    },
+    {
+      "epoch": 0.4109816371441971,
+      "grad_norm": 0.11625772714614868,
+      "learning_rate": 0.0001835733872131621,
+      "loss": 0.1641,
+      "step": 5696
+    },
+    {
+      "epoch": 0.41105378981925755,
+      "grad_norm": 0.10323289036750793,
+      "learning_rate": 0.00018357050079376535,
+      "loss": 0.1068,
+      "step": 5697
+    },
+    {
+      "epoch": 0.411125942494318,
+      "grad_norm": 0.10496301203966141,
+      "learning_rate": 0.0001835676143743686,
+      "loss": 0.1126,
+      "step": 5698
+    },
+    {
+      "epoch": 0.4111980951693784,
+      "grad_norm": 0.11890524625778198,
+      "learning_rate": 0.00018356472795497185,
+      "loss": 0.1398,
+      "step": 5699
+    },
+    {
+      "epoch": 0.41127024784443883,
+      "grad_norm": 0.12184417992830276,
+      "learning_rate": 0.0001835618415355751,
+      "loss": 0.2108,
+      "step": 5700
+    },
+    {
+      "epoch": 0.4113424005194993,
+      "grad_norm": 0.12418466061353683,
+      "learning_rate": 0.0001835589551161784,
+      "loss": 0.1581,
+      "step": 5701
+    },
+    {
+      "epoch": 0.41141455319455966,
+      "grad_norm": 0.15010544657707214,
+      "learning_rate": 0.00018355606869678166,
+      "loss": 0.1631,
+      "step": 5702
+    },
+    {
+      "epoch": 0.4114867058696201,
+      "grad_norm": 0.22763435542583466,
+      "learning_rate": 0.00018355318227738493,
+      "loss": 0.1373,
+      "step": 5703
+    },
+    {
+      "epoch": 0.41155885854468055,
+      "grad_norm": 0.13446487486362457,
+      "learning_rate": 0.00018355029585798816,
+      "loss": 0.1767,
+      "step": 5704
+    },
+    {
+      "epoch": 0.411631011219741,
+      "grad_norm": 0.12202102690935135,
+      "learning_rate": 0.00018354740943859143,
+      "loss": 0.101,
+      "step": 5705
+    },
+    {
+      "epoch": 0.4117031638948014,
+      "grad_norm": 0.10646074265241623,
+      "learning_rate": 0.0001835445230191947,
+      "loss": 0.1843,
+      "step": 5706
+    },
+    {
+      "epoch": 0.4117753165698618,
+      "grad_norm": 0.1445694863796234,
+      "learning_rate": 0.00018354163659979795,
+      "loss": 0.2066,
+      "step": 5707
+    },
+    {
+      "epoch": 0.41184746924492227,
+      "grad_norm": 0.14221738278865814,
+      "learning_rate": 0.00018353875018040124,
+      "loss": 0.1619,
+      "step": 5708
+    },
+    {
+      "epoch": 0.41191962191998266,
+      "grad_norm": 0.1329052895307541,
+      "learning_rate": 0.00018353586376100448,
+      "loss": 0.1656,
+      "step": 5709
+    },
+    {
+      "epoch": 0.4119917745950431,
+      "grad_norm": 0.1614803969860077,
+      "learning_rate": 0.00018353297734160774,
+      "loss": 0.1685,
+      "step": 5710
+    },
+    {
+      "epoch": 0.41206392727010355,
+      "grad_norm": 0.14487746357917786,
+      "learning_rate": 0.000183530090922211,
+      "loss": 0.1753,
+      "step": 5711
+    },
+    {
+      "epoch": 0.412136079945164,
+      "grad_norm": 0.1476510763168335,
+      "learning_rate": 0.00018352720450281427,
+      "loss": 0.1733,
+      "step": 5712
+    },
+    {
+      "epoch": 0.4122082326202244,
+      "grad_norm": 0.12578529119491577,
+      "learning_rate": 0.00018352431808341753,
+      "loss": 0.1628,
+      "step": 5713
+    },
+    {
+      "epoch": 0.4122803852952848,
+      "grad_norm": 0.13598518073558807,
+      "learning_rate": 0.0001835214316640208,
+      "loss": 0.1586,
+      "step": 5714
+    },
+    {
+      "epoch": 0.41235253797034527,
+      "grad_norm": 0.12721259891986847,
+      "learning_rate": 0.00018351854524462405,
+      "loss": 0.1611,
+      "step": 5715
+    },
+    {
+      "epoch": 0.41242469064540566,
+      "grad_norm": 0.16968463361263275,
+      "learning_rate": 0.00018351565882522732,
+      "loss": 0.1996,
+      "step": 5716
+    },
+    {
+      "epoch": 0.4124968433204661,
+      "grad_norm": 0.10791303962469101,
+      "learning_rate": 0.00018351277240583058,
+      "loss": 0.1612,
+      "step": 5717
+    },
+    {
+      "epoch": 0.41256899599552654,
+      "grad_norm": 0.12305327504873276,
+      "learning_rate": 0.00018350988598643384,
+      "loss": 0.1849,
+      "step": 5718
+    },
+    {
+      "epoch": 0.412641148670587,
+      "grad_norm": 0.10294891148805618,
+      "learning_rate": 0.0001835069995670371,
+      "loss": 0.1834,
+      "step": 5719
+    },
+    {
+      "epoch": 0.4127133013456474,
+      "grad_norm": 0.11937738209962845,
+      "learning_rate": 0.00018350411314764034,
+      "loss": 0.1454,
+      "step": 5720
+    },
+    {
+      "epoch": 0.4127854540207078,
+      "grad_norm": 0.13009479641914368,
+      "learning_rate": 0.0001835012267282436,
+      "loss": 0.1847,
+      "step": 5721
+    },
+    {
+      "epoch": 0.41285760669576826,
+      "grad_norm": 0.1491885632276535,
+      "learning_rate": 0.0001834983403088469,
+      "loss": 0.2028,
+      "step": 5722
+    },
+    {
+      "epoch": 0.41292975937082865,
+      "grad_norm": 0.11253825575113297,
+      "learning_rate": 0.00018349545388945016,
+      "loss": 0.1265,
+      "step": 5723
+    },
+    {
+      "epoch": 0.4130019120458891,
+      "grad_norm": 0.13709759712219238,
+      "learning_rate": 0.00018349256747005342,
+      "loss": 0.1504,
+      "step": 5724
+    },
+    {
+      "epoch": 0.41307406472094954,
+      "grad_norm": 0.11551269143819809,
+      "learning_rate": 0.00018348968105065666,
+      "loss": 0.1652,
+      "step": 5725
+    },
+    {
+      "epoch": 0.41314621739600993,
+      "grad_norm": 0.10509738326072693,
+      "learning_rate": 0.00018348679463125992,
+      "loss": 0.1564,
+      "step": 5726
+    },
+    {
+      "epoch": 0.4132183700710704,
+      "grad_norm": 0.12881092727184296,
+      "learning_rate": 0.00018348390821186318,
+      "loss": 0.1537,
+      "step": 5727
+    },
+    {
+      "epoch": 0.4132905227461308,
+      "grad_norm": 0.11585894227027893,
+      "learning_rate": 0.00018348102179246645,
+      "loss": 0.1681,
+      "step": 5728
+    },
+    {
+      "epoch": 0.41336267542119126,
+      "grad_norm": 0.10582401603460312,
+      "learning_rate": 0.00018347813537306974,
+      "loss": 0.1401,
+      "step": 5729
+    },
+    {
+      "epoch": 0.41343482809625165,
+      "grad_norm": 0.11501099169254303,
+      "learning_rate": 0.00018347524895367297,
+      "loss": 0.1416,
+      "step": 5730
+    },
+    {
+      "epoch": 0.4135069807713121,
+      "grad_norm": 0.11508175730705261,
+      "learning_rate": 0.00018347236253427623,
+      "loss": 0.1236,
+      "step": 5731
+    },
+    {
+      "epoch": 0.41357913344637254,
+      "grad_norm": 0.13451208174228668,
+      "learning_rate": 0.0001834694761148795,
+      "loss": 0.1284,
+      "step": 5732
+    },
+    {
+      "epoch": 0.4136512861214329,
+      "grad_norm": 0.15134069323539734,
+      "learning_rate": 0.00018346658969548276,
+      "loss": 0.1498,
+      "step": 5733
+    },
+    {
+      "epoch": 0.41372343879649337,
+      "grad_norm": 0.12855762243270874,
+      "learning_rate": 0.00018346370327608602,
+      "loss": 0.1303,
+      "step": 5734
+    },
+    {
+      "epoch": 0.4137955914715538,
+      "grad_norm": 0.09979292750358582,
+      "learning_rate": 0.00018346081685668929,
+      "loss": 0.1402,
+      "step": 5735
+    },
+    {
+      "epoch": 0.41386774414661426,
+      "grad_norm": 0.13594497740268707,
+      "learning_rate": 0.00018345793043729255,
+      "loss": 0.2275,
+      "step": 5736
+    },
+    {
+      "epoch": 0.41393989682167465,
+      "grad_norm": 0.1293654441833496,
+      "learning_rate": 0.0001834550440178958,
+      "loss": 0.1622,
+      "step": 5737
+    },
+    {
+      "epoch": 0.4140120494967351,
+      "grad_norm": 0.21570616960525513,
+      "learning_rate": 0.00018345215759849907,
+      "loss": 0.1946,
+      "step": 5738
+    },
+    {
+      "epoch": 0.41408420217179553,
+      "grad_norm": 0.13746745884418488,
+      "learning_rate": 0.00018344927117910234,
+      "loss": 0.1473,
+      "step": 5739
+    },
+    {
+      "epoch": 0.4141563548468559,
+      "grad_norm": 0.12070949375629425,
+      "learning_rate": 0.0001834463847597056,
+      "loss": 0.1335,
+      "step": 5740
+    },
+    {
+      "epoch": 0.41422850752191637,
+      "grad_norm": 0.19424080848693848,
+      "learning_rate": 0.00018344349834030886,
+      "loss": 0.166,
+      "step": 5741
+    },
+    {
+      "epoch": 0.4143006601969768,
+      "grad_norm": 0.12598204612731934,
+      "learning_rate": 0.0001834406119209121,
+      "loss": 0.1249,
+      "step": 5742
+    },
+    {
+      "epoch": 0.41437281287203725,
+      "grad_norm": 0.11859706044197083,
+      "learning_rate": 0.0001834377255015154,
+      "loss": 0.2106,
+      "step": 5743
+    },
+    {
+      "epoch": 0.41444496554709764,
+      "grad_norm": 0.1263197511434555,
+      "learning_rate": 0.00018343483908211865,
+      "loss": 0.163,
+      "step": 5744
+    },
+    {
+      "epoch": 0.4145171182221581,
+      "grad_norm": 0.15331189334392548,
+      "learning_rate": 0.00018343195266272192,
+      "loss": 0.1288,
+      "step": 5745
+    },
+    {
+      "epoch": 0.41458927089721853,
+      "grad_norm": 0.13554920256137848,
+      "learning_rate": 0.00018342906624332518,
+      "loss": 0.1355,
+      "step": 5746
+    },
+    {
+      "epoch": 0.4146614235722789,
+      "grad_norm": 0.12715382874011993,
+      "learning_rate": 0.00018342617982392841,
+      "loss": 0.1382,
+      "step": 5747
+    },
+    {
+      "epoch": 0.41473357624733936,
+      "grad_norm": 0.12478697299957275,
+      "learning_rate": 0.00018342329340453168,
+      "loss": 0.171,
+      "step": 5748
+    },
+    {
+      "epoch": 0.4148057289223998,
+      "grad_norm": 0.09921965003013611,
+      "learning_rate": 0.00018342040698513494,
+      "loss": 0.0962,
+      "step": 5749
+    },
+    {
+      "epoch": 0.41487788159746025,
+      "grad_norm": 0.09717674553394318,
+      "learning_rate": 0.00018341752056573823,
+      "loss": 0.1345,
+      "step": 5750
+    },
+    {
+      "epoch": 0.41495003427252064,
+      "grad_norm": 0.11751259863376617,
+      "learning_rate": 0.0001834146341463415,
+      "loss": 0.1562,
+      "step": 5751
+    },
+    {
+      "epoch": 0.4150221869475811,
+      "grad_norm": 0.11764825880527496,
+      "learning_rate": 0.00018341174772694473,
+      "loss": 0.1871,
+      "step": 5752
+    },
+    {
+      "epoch": 0.41509433962264153,
+      "grad_norm": 0.10763514786958694,
+      "learning_rate": 0.000183408861307548,
+      "loss": 0.157,
+      "step": 5753
+    },
+    {
+      "epoch": 0.4151664922977019,
+      "grad_norm": 0.10569040477275848,
+      "learning_rate": 0.00018340597488815125,
+      "loss": 0.1303,
+      "step": 5754
+    },
+    {
+      "epoch": 0.41523864497276236,
+      "grad_norm": 0.11948921531438828,
+      "learning_rate": 0.00018340308846875452,
+      "loss": 0.1661,
+      "step": 5755
+    },
+    {
+      "epoch": 0.4153107976478228,
+      "grad_norm": 0.13358844816684723,
+      "learning_rate": 0.00018340020204935778,
+      "loss": 0.1455,
+      "step": 5756
+    },
+    {
+      "epoch": 0.4153829503228832,
+      "grad_norm": 0.12435338646173477,
+      "learning_rate": 0.00018339731562996104,
+      "loss": 0.146,
+      "step": 5757
+    },
+    {
+      "epoch": 0.41545510299794364,
+      "grad_norm": 0.13670466840267181,
+      "learning_rate": 0.0001833944292105643,
+      "loss": 0.1405,
+      "step": 5758
+    },
+    {
+      "epoch": 0.4155272556730041,
+      "grad_norm": 0.1171383336186409,
+      "learning_rate": 0.00018339154279116757,
+      "loss": 0.1349,
+      "step": 5759
+    },
+    {
+      "epoch": 0.4155994083480645,
+      "grad_norm": 0.11977862566709518,
+      "learning_rate": 0.00018338865637177083,
+      "loss": 0.1482,
+      "step": 5760
+    },
+    {
+      "epoch": 0.4156715610231249,
+      "grad_norm": 0.11456245929002762,
+      "learning_rate": 0.0001833857699523741,
+      "loss": 0.1671,
+      "step": 5761
+    },
+    {
+      "epoch": 0.41574371369818536,
+      "grad_norm": 0.13899686932563782,
+      "learning_rate": 0.00018338288353297736,
+      "loss": 0.1764,
+      "step": 5762
+    },
+    {
+      "epoch": 0.4158158663732458,
+      "grad_norm": 0.11373434215784073,
+      "learning_rate": 0.0001833799971135806,
+      "loss": 0.1239,
+      "step": 5763
+    },
+    {
+      "epoch": 0.4158880190483062,
+      "grad_norm": 0.14908647537231445,
+      "learning_rate": 0.00018337711069418388,
+      "loss": 0.1531,
+      "step": 5764
+    },
+    {
+      "epoch": 0.41596017172336663,
+      "grad_norm": 0.17516694962978363,
+      "learning_rate": 0.00018337422427478715,
+      "loss": 0.1924,
+      "step": 5765
+    },
+    {
+      "epoch": 0.4160323243984271,
+      "grad_norm": 0.13450179994106293,
+      "learning_rate": 0.0001833713378553904,
+      "loss": 0.1606,
+      "step": 5766
+    },
+    {
+      "epoch": 0.4161044770734875,
+      "grad_norm": 0.12532760202884674,
+      "learning_rate": 0.00018336845143599367,
+      "loss": 0.1399,
+      "step": 5767
+    },
+    {
+      "epoch": 0.4161766297485479,
+      "grad_norm": 0.13363435864448547,
+      "learning_rate": 0.0001833655650165969,
+      "loss": 0.1554,
+      "step": 5768
+    },
+    {
+      "epoch": 0.41624878242360835,
+      "grad_norm": 0.1193813607096672,
+      "learning_rate": 0.00018336267859720017,
+      "loss": 0.1727,
+      "step": 5769
+    },
+    {
+      "epoch": 0.4163209350986688,
+      "grad_norm": 0.10275016725063324,
+      "learning_rate": 0.00018335979217780343,
+      "loss": 0.1379,
+      "step": 5770
+    },
+    {
+      "epoch": 0.4163930877737292,
+      "grad_norm": 0.12169700860977173,
+      "learning_rate": 0.00018335690575840672,
+      "loss": 0.1478,
+      "step": 5771
+    },
+    {
+      "epoch": 0.41646524044878963,
+      "grad_norm": 0.16734255850315094,
+      "learning_rate": 0.00018335401933901,
+      "loss": 0.224,
+      "step": 5772
+    },
+    {
+      "epoch": 0.4165373931238501,
+      "grad_norm": 0.1350734382867813,
+      "learning_rate": 0.00018335113291961322,
+      "loss": 0.1518,
+      "step": 5773
+    },
+    {
+      "epoch": 0.4166095457989105,
+      "grad_norm": 0.12713783979415894,
+      "learning_rate": 0.00018334824650021649,
+      "loss": 0.1748,
+      "step": 5774
+    },
+    {
+      "epoch": 0.4166816984739709,
+      "grad_norm": 0.12002015858888626,
+      "learning_rate": 0.00018334536008081975,
+      "loss": 0.1708,
+      "step": 5775
+    },
+    {
+      "epoch": 0.41675385114903135,
+      "grad_norm": 0.12447904050350189,
+      "learning_rate": 0.000183342473661423,
+      "loss": 0.1616,
+      "step": 5776
+    },
+    {
+      "epoch": 0.4168260038240918,
+      "grad_norm": 0.12837150692939758,
+      "learning_rate": 0.00018333958724202627,
+      "loss": 0.1498,
+      "step": 5777
+    },
+    {
+      "epoch": 0.4168981564991522,
+      "grad_norm": 0.14380308985710144,
+      "learning_rate": 0.00018333670082262954,
+      "loss": 0.1424,
+      "step": 5778
+    },
+    {
+      "epoch": 0.4169703091742126,
+      "grad_norm": 0.11396576464176178,
+      "learning_rate": 0.0001833338144032328,
+      "loss": 0.1573,
+      "step": 5779
+    },
+    {
+      "epoch": 0.41704246184927307,
+      "grad_norm": 0.11688689142465591,
+      "learning_rate": 0.00018333092798383606,
+      "loss": 0.178,
+      "step": 5780
+    },
+    {
+      "epoch": 0.4171146145243335,
+      "grad_norm": 0.13411317765712738,
+      "learning_rate": 0.00018332804156443933,
+      "loss": 0.1631,
+      "step": 5781
+    },
+    {
+      "epoch": 0.4171867671993939,
+      "grad_norm": 0.15105921030044556,
+      "learning_rate": 0.0001833251551450426,
+      "loss": 0.1903,
+      "step": 5782
+    },
+    {
+      "epoch": 0.41725891987445435,
+      "grad_norm": 0.12753167748451233,
+      "learning_rate": 0.00018332226872564585,
+      "loss": 0.1432,
+      "step": 5783
+    },
+    {
+      "epoch": 0.4173310725495148,
+      "grad_norm": 0.11953182518482208,
+      "learning_rate": 0.0001833193823062491,
+      "loss": 0.1778,
+      "step": 5784
+    },
+    {
+      "epoch": 0.4174032252245752,
+      "grad_norm": 0.11744635552167892,
+      "learning_rate": 0.00018331649588685235,
+      "loss": 0.2007,
+      "step": 5785
+    },
+    {
+      "epoch": 0.4174753778996356,
+      "grad_norm": 0.12863267958164215,
+      "learning_rate": 0.00018331360946745564,
+      "loss": 0.1041,
+      "step": 5786
+    },
+    {
+      "epoch": 0.41754753057469607,
+      "grad_norm": 0.11282802373170853,
+      "learning_rate": 0.0001833107230480589,
+      "loss": 0.2177,
+      "step": 5787
+    },
+    {
+      "epoch": 0.41761968324975646,
+      "grad_norm": 0.15572760999202728,
+      "learning_rate": 0.00018330783662866217,
+      "loss": 0.1447,
+      "step": 5788
+    },
+    {
+      "epoch": 0.4176918359248169,
+      "grad_norm": 0.09966544061899185,
+      "learning_rate": 0.0001833049502092654,
+      "loss": 0.105,
+      "step": 5789
+    },
+    {
+      "epoch": 0.41776398859987735,
+      "grad_norm": 0.13301274180412292,
+      "learning_rate": 0.00018330206378986867,
+      "loss": 0.1398,
+      "step": 5790
+    },
+    {
+      "epoch": 0.4178361412749378,
+      "grad_norm": 0.1342577189207077,
+      "learning_rate": 0.00018329917737047193,
+      "loss": 0.1375,
+      "step": 5791
+    },
+    {
+      "epoch": 0.4179082939499982,
+      "grad_norm": 0.10327088832855225,
+      "learning_rate": 0.0001832962909510752,
+      "loss": 0.1565,
+      "step": 5792
+    },
+    {
+      "epoch": 0.4179804466250586,
+      "grad_norm": 0.12060295790433884,
+      "learning_rate": 0.00018329340453167848,
+      "loss": 0.1418,
+      "step": 5793
+    },
+    {
+      "epoch": 0.41805259930011907,
+      "grad_norm": 0.10361672937870026,
+      "learning_rate": 0.00018329051811228172,
+      "loss": 0.1699,
+      "step": 5794
+    },
+    {
+      "epoch": 0.41812475197517945,
+      "grad_norm": 0.1323527842760086,
+      "learning_rate": 0.00018328763169288498,
+      "loss": 0.1728,
+      "step": 5795
+    },
+    {
+      "epoch": 0.4181969046502399,
+      "grad_norm": 0.14689281582832336,
+      "learning_rate": 0.00018328474527348824,
+      "loss": 0.1719,
+      "step": 5796
+    },
+    {
+      "epoch": 0.41826905732530034,
+      "grad_norm": 0.12184374034404755,
+      "learning_rate": 0.0001832818588540915,
+      "loss": 0.1421,
+      "step": 5797
+    },
+    {
+      "epoch": 0.4183412100003608,
+      "grad_norm": 0.13505211472511292,
+      "learning_rate": 0.00018327897243469477,
+      "loss": 0.212,
+      "step": 5798
+    },
+    {
+      "epoch": 0.4184133626754212,
+      "grad_norm": 0.11547620594501495,
+      "learning_rate": 0.00018327608601529803,
+      "loss": 0.1905,
+      "step": 5799
+    },
+    {
+      "epoch": 0.4184855153504816,
+      "grad_norm": 0.12469831854104996,
+      "learning_rate": 0.0001832731995959013,
+      "loss": 0.1484,
+      "step": 5800
+    },
+    {
+      "epoch": 0.41855766802554206,
+      "grad_norm": 0.1516280174255371,
+      "learning_rate": 0.00018327031317650456,
+      "loss": 0.1436,
+      "step": 5801
+    },
+    {
+      "epoch": 0.41862982070060245,
+      "grad_norm": 0.12161953002214432,
+      "learning_rate": 0.00018326742675710782,
+      "loss": 0.1521,
+      "step": 5802
+    },
+    {
+      "epoch": 0.4187019733756629,
+      "grad_norm": 0.13412874937057495,
+      "learning_rate": 0.00018326454033771108,
+      "loss": 0.1139,
+      "step": 5803
+    },
+    {
+      "epoch": 0.41877412605072334,
+      "grad_norm": 0.11358436942100525,
+      "learning_rate": 0.00018326165391831435,
+      "loss": 0.1606,
+      "step": 5804
+    },
+    {
+      "epoch": 0.4188462787257838,
+      "grad_norm": 0.10829130560159683,
+      "learning_rate": 0.00018325876749891758,
+      "loss": 0.1828,
+      "step": 5805
+    },
+    {
+      "epoch": 0.41891843140084417,
+      "grad_norm": 0.12857361137866974,
+      "learning_rate": 0.00018325588107952085,
+      "loss": 0.1265,
+      "step": 5806
+    },
+    {
+      "epoch": 0.4189905840759046,
+      "grad_norm": 0.13641512393951416,
+      "learning_rate": 0.00018325299466012414,
+      "loss": 0.1818,
+      "step": 5807
+    },
+    {
+      "epoch": 0.41906273675096506,
+      "grad_norm": 0.11245698481798172,
+      "learning_rate": 0.0001832501082407274,
+      "loss": 0.1082,
+      "step": 5808
+    },
+    {
+      "epoch": 0.41913488942602545,
+      "grad_norm": 0.11375346779823303,
+      "learning_rate": 0.00018324722182133066,
+      "loss": 0.135,
+      "step": 5809
+    },
+    {
+      "epoch": 0.4192070421010859,
+      "grad_norm": 0.14834176003932953,
+      "learning_rate": 0.0001832443354019339,
+      "loss": 0.1715,
+      "step": 5810
+    },
+    {
+      "epoch": 0.41927919477614634,
+      "grad_norm": 0.12736055254936218,
+      "learning_rate": 0.00018324144898253716,
+      "loss": 0.1759,
+      "step": 5811
+    },
+    {
+      "epoch": 0.4193513474512068,
+      "grad_norm": 0.15465882420539856,
+      "learning_rate": 0.00018323856256314042,
+      "loss": 0.1679,
+      "step": 5812
+    },
+    {
+      "epoch": 0.41942350012626717,
+      "grad_norm": 0.11425981670618057,
+      "learning_rate": 0.00018323567614374369,
+      "loss": 0.1426,
+      "step": 5813
+    },
+    {
+      "epoch": 0.4194956528013276,
+      "grad_norm": 0.11629367619752884,
+      "learning_rate": 0.00018323278972434698,
+      "loss": 0.1339,
+      "step": 5814
+    },
+    {
+      "epoch": 0.41956780547638806,
+      "grad_norm": 0.12361908704042435,
+      "learning_rate": 0.0001832299033049502,
+      "loss": 0.1736,
+      "step": 5815
+    },
+    {
+      "epoch": 0.41963995815144844,
+      "grad_norm": 0.1226155161857605,
+      "learning_rate": 0.00018322701688555347,
+      "loss": 0.1865,
+      "step": 5816
+    },
+    {
+      "epoch": 0.4197121108265089,
+      "grad_norm": 0.11376158148050308,
+      "learning_rate": 0.00018322413046615674,
+      "loss": 0.1796,
+      "step": 5817
+    },
+    {
+      "epoch": 0.41978426350156933,
+      "grad_norm": 0.11067010462284088,
+      "learning_rate": 0.00018322124404676,
+      "loss": 0.0954,
+      "step": 5818
+    },
+    {
+      "epoch": 0.4198564161766297,
+      "grad_norm": 0.128396674990654,
+      "learning_rate": 0.00018321835762736326,
+      "loss": 0.1813,
+      "step": 5819
+    },
+    {
+      "epoch": 0.41992856885169016,
+      "grad_norm": 0.11627837270498276,
+      "learning_rate": 0.00018321547120796653,
+      "loss": 0.1594,
+      "step": 5820
+    },
+    {
+      "epoch": 0.4200007215267506,
+      "grad_norm": 0.09679029881954193,
+      "learning_rate": 0.0001832125847885698,
+      "loss": 0.1316,
+      "step": 5821
+    },
+    {
+      "epoch": 0.42007287420181105,
+      "grad_norm": 0.1351933628320694,
+      "learning_rate": 0.00018320969836917305,
+      "loss": 0.1418,
+      "step": 5822
+    },
+    {
+      "epoch": 0.42014502687687144,
+      "grad_norm": 0.13122516870498657,
+      "learning_rate": 0.00018320681194977631,
+      "loss": 0.1817,
+      "step": 5823
+    },
+    {
+      "epoch": 0.4202171795519319,
+      "grad_norm": 0.11837951838970184,
+      "learning_rate": 0.00018320392553037958,
+      "loss": 0.1491,
+      "step": 5824
+    },
+    {
+      "epoch": 0.42028933222699233,
+      "grad_norm": 0.13323001563549042,
+      "learning_rate": 0.00018320103911098284,
+      "loss": 0.184,
+      "step": 5825
+    },
+    {
+      "epoch": 0.4203614849020527,
+      "grad_norm": 0.11082823574542999,
+      "learning_rate": 0.00018319815269158608,
+      "loss": 0.111,
+      "step": 5826
+    },
+    {
+      "epoch": 0.42043363757711316,
+      "grad_norm": 0.15011513233184814,
+      "learning_rate": 0.00018319526627218934,
+      "loss": 0.1439,
+      "step": 5827
+    },
+    {
+      "epoch": 0.4205057902521736,
+      "grad_norm": 0.1101953387260437,
+      "learning_rate": 0.00018319237985279263,
+      "loss": 0.1607,
+      "step": 5828
+    },
+    {
+      "epoch": 0.42057794292723405,
+      "grad_norm": 0.11777789145708084,
+      "learning_rate": 0.0001831894934333959,
+      "loss": 0.1451,
+      "step": 5829
+    },
+    {
+      "epoch": 0.42065009560229444,
+      "grad_norm": 0.10828852653503418,
+      "learning_rate": 0.00018318660701399916,
+      "loss": 0.1446,
+      "step": 5830
+    },
+    {
+      "epoch": 0.4207222482773549,
+      "grad_norm": 0.1039978563785553,
+      "learning_rate": 0.0001831837205946024,
+      "loss": 0.1701,
+      "step": 5831
+    },
+    {
+      "epoch": 0.4207944009524153,
+      "grad_norm": 0.14535623788833618,
+      "learning_rate": 0.00018318083417520565,
+      "loss": 0.1969,
+      "step": 5832
+    },
+    {
+      "epoch": 0.4208665536274757,
+      "grad_norm": 0.12345319986343384,
+      "learning_rate": 0.00018317794775580892,
+      "loss": 0.1878,
+      "step": 5833
+    },
+    {
+      "epoch": 0.42093870630253616,
+      "grad_norm": 0.12048795819282532,
+      "learning_rate": 0.00018317506133641218,
+      "loss": 0.1764,
+      "step": 5834
+    },
+    {
+      "epoch": 0.4210108589775966,
+      "grad_norm": 0.11864970624446869,
+      "learning_rate": 0.00018317217491701547,
+      "loss": 0.1592,
+      "step": 5835
+    },
+    {
+      "epoch": 0.42108301165265705,
+      "grad_norm": 0.09020841121673584,
+      "learning_rate": 0.0001831692884976187,
+      "loss": 0.1413,
+      "step": 5836
+    },
+    {
+      "epoch": 0.42115516432771744,
+      "grad_norm": 0.10649677366018295,
+      "learning_rate": 0.00018316640207822197,
+      "loss": 0.139,
+      "step": 5837
+    },
+    {
+      "epoch": 0.4212273170027779,
+      "grad_norm": 0.1171664223074913,
+      "learning_rate": 0.00018316351565882523,
+      "loss": 0.152,
+      "step": 5838
+    },
+    {
+      "epoch": 0.4212994696778383,
+      "grad_norm": 0.11515042930841446,
+      "learning_rate": 0.0001831606292394285,
+      "loss": 0.1712,
+      "step": 5839
+    },
+    {
+      "epoch": 0.4213716223528987,
+      "grad_norm": 0.133058562874794,
+      "learning_rate": 0.00018315774282003176,
+      "loss": 0.1491,
+      "step": 5840
+    },
+    {
+      "epoch": 0.42144377502795916,
+      "grad_norm": 0.10233544558286667,
+      "learning_rate": 0.00018315485640063502,
+      "loss": 0.1469,
+      "step": 5841
+    },
+    {
+      "epoch": 0.4215159277030196,
+      "grad_norm": 0.1627454161643982,
+      "learning_rate": 0.00018315196998123828,
+      "loss": 0.1574,
+      "step": 5842
+    },
+    {
+      "epoch": 0.42158808037808004,
+      "grad_norm": 0.15958084166049957,
+      "learning_rate": 0.00018314908356184155,
+      "loss": 0.153,
+      "step": 5843
+    },
+    {
+      "epoch": 0.42166023305314043,
+      "grad_norm": 0.12667101621627808,
+      "learning_rate": 0.0001831461971424448,
+      "loss": 0.1208,
+      "step": 5844
+    },
+    {
+      "epoch": 0.4217323857282009,
+      "grad_norm": 0.11573609709739685,
+      "learning_rate": 0.00018314331072304807,
+      "loss": 0.1218,
+      "step": 5845
+    },
+    {
+      "epoch": 0.4218045384032613,
+      "grad_norm": 0.14142170548439026,
+      "learning_rate": 0.00018314042430365133,
+      "loss": 0.1279,
+      "step": 5846
+    },
+    {
+      "epoch": 0.4218766910783217,
+      "grad_norm": 0.12910237908363342,
+      "learning_rate": 0.00018313753788425457,
+      "loss": 0.1554,
+      "step": 5847
+    },
+    {
+      "epoch": 0.42194884375338215,
+      "grad_norm": 0.11738871037960052,
+      "learning_rate": 0.00018313465146485783,
+      "loss": 0.0915,
+      "step": 5848
+    },
+    {
+      "epoch": 0.4220209964284426,
+      "grad_norm": 0.13453556597232819,
+      "learning_rate": 0.00018313176504546112,
+      "loss": 0.1591,
+      "step": 5849
+    },
+    {
+      "epoch": 0.422093149103503,
+      "grad_norm": 0.10725102573633194,
+      "learning_rate": 0.0001831288786260644,
+      "loss": 0.1826,
+      "step": 5850
+    },
+    {
+      "epoch": 0.42216530177856343,
+      "grad_norm": 0.15016676485538483,
+      "learning_rate": 0.00018312599220666765,
+      "loss": 0.2079,
+      "step": 5851
+    },
+    {
+      "epoch": 0.4222374544536239,
+      "grad_norm": 0.12500222027301788,
+      "learning_rate": 0.00018312310578727089,
+      "loss": 0.1417,
+      "step": 5852
+    },
+    {
+      "epoch": 0.4223096071286843,
+      "grad_norm": 0.1071745902299881,
+      "learning_rate": 0.00018312021936787415,
+      "loss": 0.1672,
+      "step": 5853
+    },
+    {
+      "epoch": 0.4223817598037447,
+      "grad_norm": 0.11144714802503586,
+      "learning_rate": 0.0001831173329484774,
+      "loss": 0.1354,
+      "step": 5854
+    },
+    {
+      "epoch": 0.42245391247880515,
+      "grad_norm": 0.11990606784820557,
+      "learning_rate": 0.00018311444652908067,
+      "loss": 0.1728,
+      "step": 5855
+    },
+    {
+      "epoch": 0.4225260651538656,
+      "grad_norm": 0.12286270409822464,
+      "learning_rate": 0.00018311156010968396,
+      "loss": 0.1637,
+      "step": 5856
+    },
+    {
+      "epoch": 0.422598217828926,
+      "grad_norm": 0.13923144340515137,
+      "learning_rate": 0.0001831086736902872,
+      "loss": 0.223,
+      "step": 5857
+    },
+    {
+      "epoch": 0.4226703705039864,
+      "grad_norm": 0.11241895705461502,
+      "learning_rate": 0.00018310578727089046,
+      "loss": 0.1688,
+      "step": 5858
+    },
+    {
+      "epoch": 0.42274252317904687,
+      "grad_norm": 0.1049077957868576,
+      "learning_rate": 0.00018310290085149373,
+      "loss": 0.1654,
+      "step": 5859
+    },
+    {
+      "epoch": 0.4228146758541073,
+      "grad_norm": 0.12452409416437149,
+      "learning_rate": 0.000183100014432097,
+      "loss": 0.1499,
+      "step": 5860
+    },
+    {
+      "epoch": 0.4228868285291677,
+      "grad_norm": 0.12135022133588791,
+      "learning_rate": 0.00018309712801270025,
+      "loss": 0.1608,
+      "step": 5861
+    },
+    {
+      "epoch": 0.42295898120422815,
+      "grad_norm": 0.09587796032428741,
+      "learning_rate": 0.00018309424159330351,
+      "loss": 0.2024,
+      "step": 5862
+    },
+    {
+      "epoch": 0.4230311338792886,
+      "grad_norm": 0.1125534176826477,
+      "learning_rate": 0.00018309135517390678,
+      "loss": 0.1313,
+      "step": 5863
+    },
+    {
+      "epoch": 0.423103286554349,
+      "grad_norm": 0.12060602754354477,
+      "learning_rate": 0.00018308846875451004,
+      "loss": 0.1382,
+      "step": 5864
+    },
+    {
+      "epoch": 0.4231754392294094,
+      "grad_norm": 0.09139573574066162,
+      "learning_rate": 0.0001830855823351133,
+      "loss": 0.1363,
+      "step": 5865
+    },
+    {
+      "epoch": 0.42324759190446987,
+      "grad_norm": 0.11417364329099655,
+      "learning_rate": 0.00018308269591571657,
+      "loss": 0.1556,
+      "step": 5866
+    },
+    {
+      "epoch": 0.4233197445795303,
+      "grad_norm": 0.09632206708192825,
+      "learning_rate": 0.00018307980949631983,
+      "loss": 0.1635,
+      "step": 5867
+    },
+    {
+      "epoch": 0.4233918972545907,
+      "grad_norm": 0.1438894122838974,
+      "learning_rate": 0.0001830769230769231,
+      "loss": 0.1208,
+      "step": 5868
+    },
+    {
+      "epoch": 0.42346404992965114,
+      "grad_norm": 0.14925217628479004,
+      "learning_rate": 0.00018307403665752633,
+      "loss": 0.1523,
+      "step": 5869
+    },
+    {
+      "epoch": 0.4235362026047116,
+      "grad_norm": 0.13312889635562897,
+      "learning_rate": 0.00018307115023812962,
+      "loss": 0.1306,
+      "step": 5870
+    },
+    {
+      "epoch": 0.423608355279772,
+      "grad_norm": 0.10850357264280319,
+      "learning_rate": 0.00018306826381873288,
+      "loss": 0.1768,
+      "step": 5871
+    },
+    {
+      "epoch": 0.4236805079548324,
+      "grad_norm": 0.10067091882228851,
+      "learning_rate": 0.00018306537739933614,
+      "loss": 0.1211,
+      "step": 5872
+    },
+    {
+      "epoch": 0.42375266062989286,
+      "grad_norm": 0.11367423087358475,
+      "learning_rate": 0.0001830624909799394,
+      "loss": 0.1652,
+      "step": 5873
+    },
+    {
+      "epoch": 0.4238248133049533,
+      "grad_norm": 0.18777737021446228,
+      "learning_rate": 0.00018305960456054264,
+      "loss": 0.12,
+      "step": 5874
+    },
+    {
+      "epoch": 0.4238969659800137,
+      "grad_norm": 0.128363236784935,
+      "learning_rate": 0.0001830567181411459,
+      "loss": 0.1622,
+      "step": 5875
+    },
+    {
+      "epoch": 0.42396911865507414,
+      "grad_norm": 0.12165389209985733,
+      "learning_rate": 0.00018305383172174917,
+      "loss": 0.0883,
+      "step": 5876
+    },
+    {
+      "epoch": 0.4240412713301346,
+      "grad_norm": 0.09363032877445221,
+      "learning_rate": 0.00018305094530235246,
+      "loss": 0.1329,
+      "step": 5877
+    },
+    {
+      "epoch": 0.42411342400519497,
+      "grad_norm": 0.11027435213327408,
+      "learning_rate": 0.00018304805888295572,
+      "loss": 0.1508,
+      "step": 5878
+    },
+    {
+      "epoch": 0.4241855766802554,
+      "grad_norm": 0.10954953730106354,
+      "learning_rate": 0.00018304517246355896,
+      "loss": 0.1681,
+      "step": 5879
+    },
+    {
+      "epoch": 0.42425772935531586,
+      "grad_norm": 0.11906708776950836,
+      "learning_rate": 0.00018304228604416222,
+      "loss": 0.0962,
+      "step": 5880
+    },
+    {
+      "epoch": 0.42432988203037625,
+      "grad_norm": 0.13661254942417145,
+      "learning_rate": 0.00018303939962476548,
+      "loss": 0.1372,
+      "step": 5881
+    },
+    {
+      "epoch": 0.4244020347054367,
+      "grad_norm": 0.12327921390533447,
+      "learning_rate": 0.00018303651320536875,
+      "loss": 0.1481,
+      "step": 5882
+    },
+    {
+      "epoch": 0.42447418738049714,
+      "grad_norm": 0.123286172747612,
+      "learning_rate": 0.000183033626785972,
+      "loss": 0.0811,
+      "step": 5883
+    },
+    {
+      "epoch": 0.4245463400555576,
+      "grad_norm": 0.12807874381542206,
+      "learning_rate": 0.00018303074036657527,
+      "loss": 0.1677,
+      "step": 5884
+    },
+    {
+      "epoch": 0.42461849273061797,
+      "grad_norm": 0.13452769815921783,
+      "learning_rate": 0.00018302785394717853,
+      "loss": 0.0953,
+      "step": 5885
+    },
+    {
+      "epoch": 0.4246906454056784,
+      "grad_norm": 0.13784530758857727,
+      "learning_rate": 0.0001830249675277818,
+      "loss": 0.1868,
+      "step": 5886
+    },
+    {
+      "epoch": 0.42476279808073886,
+      "grad_norm": 0.13271364569664001,
+      "learning_rate": 0.00018302208110838506,
+      "loss": 0.1832,
+      "step": 5887
+    },
+    {
+      "epoch": 0.42483495075579925,
+      "grad_norm": 0.13027092814445496,
+      "learning_rate": 0.00018301919468898832,
+      "loss": 0.1451,
+      "step": 5888
+    },
+    {
+      "epoch": 0.4249071034308597,
+      "grad_norm": 0.13652382791042328,
+      "learning_rate": 0.00018301630826959159,
+      "loss": 0.1992,
+      "step": 5889
+    },
+    {
+      "epoch": 0.42497925610592013,
+      "grad_norm": 0.1736457645893097,
+      "learning_rate": 0.00018301342185019482,
+      "loss": 0.1589,
+      "step": 5890
+    },
+    {
+      "epoch": 0.4250514087809806,
+      "grad_norm": 0.12425713241100311,
+      "learning_rate": 0.0001830105354307981,
+      "loss": 0.1122,
+      "step": 5891
+    },
+    {
+      "epoch": 0.42512356145604097,
+      "grad_norm": 0.13524624705314636,
+      "learning_rate": 0.00018300764901140138,
+      "loss": 0.1785,
+      "step": 5892
+    },
+    {
+      "epoch": 0.4251957141311014,
+      "grad_norm": 0.10107419639825821,
+      "learning_rate": 0.00018300476259200464,
+      "loss": 0.148,
+      "step": 5893
+    },
+    {
+      "epoch": 0.42526786680616185,
+      "grad_norm": 0.1114598885178566,
+      "learning_rate": 0.0001830018761726079,
+      "loss": 0.2023,
+      "step": 5894
+    },
+    {
+      "epoch": 0.42534001948122224,
+      "grad_norm": 0.13687801361083984,
+      "learning_rate": 0.00018299898975321114,
+      "loss": 0.164,
+      "step": 5895
+    },
+    {
+      "epoch": 0.4254121721562827,
+      "grad_norm": 0.15442319214344025,
+      "learning_rate": 0.0001829961033338144,
+      "loss": 0.1804,
+      "step": 5896
+    },
+    {
+      "epoch": 0.42548432483134313,
+      "grad_norm": 0.08131521195173264,
+      "learning_rate": 0.00018299321691441766,
+      "loss": 0.1752,
+      "step": 5897
+    },
+    {
+      "epoch": 0.4255564775064036,
+      "grad_norm": 0.10607467591762543,
+      "learning_rate": 0.00018299033049502095,
+      "loss": 0.177,
+      "step": 5898
+    },
+    {
+      "epoch": 0.42562863018146396,
+      "grad_norm": 0.11774367839097977,
+      "learning_rate": 0.00018298744407562422,
+      "loss": 0.1205,
+      "step": 5899
+    },
+    {
+      "epoch": 0.4257007828565244,
+      "grad_norm": 0.09929922968149185,
+      "learning_rate": 0.00018298455765622745,
+      "loss": 0.1396,
+      "step": 5900
+    },
+    {
+      "epoch": 0.42577293553158485,
+      "grad_norm": 0.13096508383750916,
+      "learning_rate": 0.00018298167123683071,
+      "loss": 0.1574,
+      "step": 5901
+    },
+    {
+      "epoch": 0.42584508820664524,
+      "grad_norm": 0.1007523238658905,
+      "learning_rate": 0.00018297878481743398,
+      "loss": 0.1346,
+      "step": 5902
+    },
+    {
+      "epoch": 0.4259172408817057,
+      "grad_norm": 0.09653882682323456,
+      "learning_rate": 0.00018297589839803724,
+      "loss": 0.1508,
+      "step": 5903
+    },
+    {
+      "epoch": 0.4259893935567661,
+      "grad_norm": 0.1219748705625534,
+      "learning_rate": 0.0001829730119786405,
+      "loss": 0.131,
+      "step": 5904
+    },
+    {
+      "epoch": 0.42606154623182657,
+      "grad_norm": 0.1340920776128769,
+      "learning_rate": 0.00018297012555924377,
+      "loss": 0.1711,
+      "step": 5905
+    },
+    {
+      "epoch": 0.42613369890688696,
+      "grad_norm": 0.11681176722049713,
+      "learning_rate": 0.00018296723913984703,
+      "loss": 0.1012,
+      "step": 5906
+    },
+    {
+      "epoch": 0.4262058515819474,
+      "grad_norm": 0.11355821043252945,
+      "learning_rate": 0.0001829643527204503,
+      "loss": 0.141,
+      "step": 5907
+    },
+    {
+      "epoch": 0.42627800425700785,
+      "grad_norm": 0.10950490087270737,
+      "learning_rate": 0.00018296146630105355,
+      "loss": 0.1598,
+      "step": 5908
+    },
+    {
+      "epoch": 0.42635015693206824,
+      "grad_norm": 0.12313297390937805,
+      "learning_rate": 0.00018295857988165682,
+      "loss": 0.1669,
+      "step": 5909
+    },
+    {
+      "epoch": 0.4264223096071287,
+      "grad_norm": 0.12461540102958679,
+      "learning_rate": 0.00018295569346226008,
+      "loss": 0.1192,
+      "step": 5910
+    },
+    {
+      "epoch": 0.4264944622821891,
+      "grad_norm": 0.10815031081438065,
+      "learning_rate": 0.00018295280704286332,
+      "loss": 0.1571,
+      "step": 5911
+    },
+    {
+      "epoch": 0.4265666149572495,
+      "grad_norm": 0.16719099879264832,
+      "learning_rate": 0.0001829499206234666,
+      "loss": 0.1661,
+      "step": 5912
+    },
+    {
+      "epoch": 0.42663876763230996,
+      "grad_norm": 0.11606849730014801,
+      "learning_rate": 0.00018294703420406987,
+      "loss": 0.1781,
+      "step": 5913
+    },
+    {
+      "epoch": 0.4267109203073704,
+      "grad_norm": 0.12098933756351471,
+      "learning_rate": 0.00018294414778467313,
+      "loss": 0.1538,
+      "step": 5914
+    },
+    {
+      "epoch": 0.42678307298243084,
+      "grad_norm": 0.12739412486553192,
+      "learning_rate": 0.0001829412613652764,
+      "loss": 0.1704,
+      "step": 5915
+    },
+    {
+      "epoch": 0.42685522565749123,
+      "grad_norm": 0.11682642251253128,
+      "learning_rate": 0.00018293837494587963,
+      "loss": 0.1249,
+      "step": 5916
+    },
+    {
+      "epoch": 0.4269273783325517,
+      "grad_norm": 0.13074259459972382,
+      "learning_rate": 0.0001829354885264829,
+      "loss": 0.1812,
+      "step": 5917
+    },
+    {
+      "epoch": 0.4269995310076121,
+      "grad_norm": 0.11174934357404709,
+      "learning_rate": 0.00018293260210708616,
+      "loss": 0.1528,
+      "step": 5918
+    },
+    {
+      "epoch": 0.4270716836826725,
+      "grad_norm": 0.11838796734809875,
+      "learning_rate": 0.00018292971568768945,
+      "loss": 0.1251,
+      "step": 5919
+    },
+    {
+      "epoch": 0.42714383635773295,
+      "grad_norm": 0.16064082086086273,
+      "learning_rate": 0.0001829268292682927,
+      "loss": 0.167,
+      "step": 5920
+    },
+    {
+      "epoch": 0.4272159890327934,
+      "grad_norm": 0.12237231433391571,
+      "learning_rate": 0.00018292394284889595,
+      "loss": 0.1683,
+      "step": 5921
+    },
+    {
+      "epoch": 0.42728814170785384,
+      "grad_norm": 0.10439042747020721,
+      "learning_rate": 0.0001829210564294992,
+      "loss": 0.1037,
+      "step": 5922
+    },
+    {
+      "epoch": 0.42736029438291423,
+      "grad_norm": 0.1152818351984024,
+      "learning_rate": 0.00018291817001010247,
+      "loss": 0.138,
+      "step": 5923
+    },
+    {
+      "epoch": 0.4274324470579747,
+      "grad_norm": 0.13494327664375305,
+      "learning_rate": 0.00018291528359070573,
+      "loss": 0.1809,
+      "step": 5924
+    },
+    {
+      "epoch": 0.4275045997330351,
+      "grad_norm": 0.10945824533700943,
+      "learning_rate": 0.000182912397171309,
+      "loss": 0.1499,
+      "step": 5925
+    },
+    {
+      "epoch": 0.4275767524080955,
+      "grad_norm": 0.11632286757230759,
+      "learning_rate": 0.00018290951075191226,
+      "loss": 0.1306,
+      "step": 5926
+    },
+    {
+      "epoch": 0.42764890508315595,
+      "grad_norm": 0.13610531389713287,
+      "learning_rate": 0.00018290662433251552,
+      "loss": 0.1616,
+      "step": 5927
+    },
+    {
+      "epoch": 0.4277210577582164,
+      "grad_norm": 0.18281017243862152,
+      "learning_rate": 0.00018290373791311879,
+      "loss": 0.1488,
+      "step": 5928
+    },
+    {
+      "epoch": 0.42779321043327684,
+      "grad_norm": 0.13510634005069733,
+      "learning_rate": 0.00018290085149372205,
+      "loss": 0.1588,
+      "step": 5929
+    },
+    {
+      "epoch": 0.4278653631083372,
+      "grad_norm": 0.12557633221149445,
+      "learning_rate": 0.0001828979650743253,
+      "loss": 0.1846,
+      "step": 5930
+    },
+    {
+      "epoch": 0.42793751578339767,
+      "grad_norm": 0.11874547600746155,
+      "learning_rate": 0.00018289507865492857,
+      "loss": 0.1583,
+      "step": 5931
+    },
+    {
+      "epoch": 0.4280096684584581,
+      "grad_norm": 0.10396189242601395,
+      "learning_rate": 0.0001828921922355318,
+      "loss": 0.1409,
+      "step": 5932
+    },
+    {
+      "epoch": 0.4280818211335185,
+      "grad_norm": 0.10849086195230484,
+      "learning_rate": 0.0001828893058161351,
+      "loss": 0.1719,
+      "step": 5933
+    },
+    {
+      "epoch": 0.42815397380857895,
+      "grad_norm": 0.12240342795848846,
+      "learning_rate": 0.00018288641939673836,
+      "loss": 0.1992,
+      "step": 5934
+    },
+    {
+      "epoch": 0.4282261264836394,
+      "grad_norm": 0.10966840386390686,
+      "learning_rate": 0.00018288353297734163,
+      "loss": 0.1009,
+      "step": 5935
+    },
+    {
+      "epoch": 0.42829827915869984,
+      "grad_norm": 0.09783158451318741,
+      "learning_rate": 0.0001828806465579449,
+      "loss": 0.1323,
+      "step": 5936
+    },
+    {
+      "epoch": 0.4283704318337602,
+      "grad_norm": 0.0975455790758133,
+      "learning_rate": 0.00018287776013854813,
+      "loss": 0.202,
+      "step": 5937
+    },
+    {
+      "epoch": 0.42844258450882067,
+      "grad_norm": 0.11488337814807892,
+      "learning_rate": 0.0001828748737191514,
+      "loss": 0.1367,
+      "step": 5938
+    },
+    {
+      "epoch": 0.4285147371838811,
+      "grad_norm": 0.11133381724357605,
+      "learning_rate": 0.00018287198729975465,
+      "loss": 0.1793,
+      "step": 5939
+    },
+    {
+      "epoch": 0.4285868898589415,
+      "grad_norm": 0.10274504870176315,
+      "learning_rate": 0.00018286910088035794,
+      "loss": 0.1471,
+      "step": 5940
+    },
+    {
+      "epoch": 0.42865904253400194,
+      "grad_norm": 0.12160360813140869,
+      "learning_rate": 0.0001828662144609612,
+      "loss": 0.166,
+      "step": 5941
+    },
+    {
+      "epoch": 0.4287311952090624,
+      "grad_norm": 0.10171741247177124,
+      "learning_rate": 0.00018286332804156444,
+      "loss": 0.123,
+      "step": 5942
+    },
+    {
+      "epoch": 0.4288033478841228,
+      "grad_norm": 0.13317373394966125,
+      "learning_rate": 0.0001828604416221677,
+      "loss": 0.1214,
+      "step": 5943
+    },
+    {
+      "epoch": 0.4288755005591832,
+      "grad_norm": 0.10292576253414154,
+      "learning_rate": 0.00018285755520277097,
+      "loss": 0.1328,
+      "step": 5944
+    },
+    {
+      "epoch": 0.42894765323424366,
+      "grad_norm": 0.15207763016223907,
+      "learning_rate": 0.00018285466878337423,
+      "loss": 0.1883,
+      "step": 5945
+    },
+    {
+      "epoch": 0.4290198059093041,
+      "grad_norm": 0.10962386429309845,
+      "learning_rate": 0.0001828517823639775,
+      "loss": 0.1393,
+      "step": 5946
+    },
+    {
+      "epoch": 0.4290919585843645,
+      "grad_norm": 0.12100953608751297,
+      "learning_rate": 0.00018284889594458075,
+      "loss": 0.142,
+      "step": 5947
+    },
+    {
+      "epoch": 0.42916411125942494,
+      "grad_norm": 0.11007843911647797,
+      "learning_rate": 0.00018284600952518402,
+      "loss": 0.1595,
+      "step": 5948
+    },
+    {
+      "epoch": 0.4292362639344854,
+      "grad_norm": 0.15275146067142487,
+      "learning_rate": 0.00018284312310578728,
+      "loss": 0.117,
+      "step": 5949
+    },
+    {
+      "epoch": 0.4293084166095458,
+      "grad_norm": 0.13427838683128357,
+      "learning_rate": 0.00018284023668639054,
+      "loss": 0.1629,
+      "step": 5950
+    },
+    {
+      "epoch": 0.4293805692846062,
+      "grad_norm": 0.1632755845785141,
+      "learning_rate": 0.0001828373502669938,
+      "loss": 0.1591,
+      "step": 5951
+    },
+    {
+      "epoch": 0.42945272195966666,
+      "grad_norm": 0.1164804995059967,
+      "learning_rate": 0.00018283446384759707,
+      "loss": 0.2073,
+      "step": 5952
+    },
+    {
+      "epoch": 0.4295248746347271,
+      "grad_norm": 0.12820562720298767,
+      "learning_rate": 0.0001828315774282003,
+      "loss": 0.1669,
+      "step": 5953
+    },
+    {
+      "epoch": 0.4295970273097875,
+      "grad_norm": 0.13334399461746216,
+      "learning_rate": 0.0001828286910088036,
+      "loss": 0.1461,
+      "step": 5954
+    },
+    {
+      "epoch": 0.42966917998484794,
+      "grad_norm": 0.12655992805957794,
+      "learning_rate": 0.00018282580458940686,
+      "loss": 0.1332,
+      "step": 5955
+    },
+    {
+      "epoch": 0.4297413326599084,
+      "grad_norm": 0.12284820526838303,
+      "learning_rate": 0.00018282291817001012,
+      "loss": 0.154,
+      "step": 5956
+    },
+    {
+      "epoch": 0.42981348533496877,
+      "grad_norm": 0.10828308761119843,
+      "learning_rate": 0.00018282003175061338,
+      "loss": 0.1576,
+      "step": 5957
+    },
+    {
+      "epoch": 0.4298856380100292,
+      "grad_norm": 0.12627825140953064,
+      "learning_rate": 0.00018281714533121662,
+      "loss": 0.1195,
+      "step": 5958
+    },
+    {
+      "epoch": 0.42995779068508966,
+      "grad_norm": 0.13220927119255066,
+      "learning_rate": 0.00018281425891181988,
+      "loss": 0.1654,
+      "step": 5959
+    },
+    {
+      "epoch": 0.4300299433601501,
+      "grad_norm": 0.10761047154664993,
+      "learning_rate": 0.00018281137249242315,
+      "loss": 0.1684,
+      "step": 5960
+    },
+    {
+      "epoch": 0.4301020960352105,
+      "grad_norm": 0.1338089257478714,
+      "learning_rate": 0.00018280848607302644,
+      "loss": 0.163,
+      "step": 5961
+    },
+    {
+      "epoch": 0.43017424871027093,
+      "grad_norm": 0.13918466866016388,
+      "learning_rate": 0.0001828055996536297,
+      "loss": 0.182,
+      "step": 5962
+    },
+    {
+      "epoch": 0.4302464013853314,
+      "grad_norm": 0.11007937788963318,
+      "learning_rate": 0.00018280271323423293,
+      "loss": 0.1725,
+      "step": 5963
+    },
+    {
+      "epoch": 0.43031855406039177,
+      "grad_norm": 0.11286143213510513,
+      "learning_rate": 0.0001827998268148362,
+      "loss": 0.1548,
+      "step": 5964
+    },
+    {
+      "epoch": 0.4303907067354522,
+      "grad_norm": 0.10858482122421265,
+      "learning_rate": 0.00018279694039543946,
+      "loss": 0.1773,
+      "step": 5965
+    },
+    {
+      "epoch": 0.43046285941051265,
+      "grad_norm": 0.13632416725158691,
+      "learning_rate": 0.00018279405397604272,
+      "loss": 0.1765,
+      "step": 5966
+    },
+    {
+      "epoch": 0.4305350120855731,
+      "grad_norm": 0.11998841166496277,
+      "learning_rate": 0.00018279116755664599,
+      "loss": 0.1726,
+      "step": 5967
+    },
+    {
+      "epoch": 0.4306071647606335,
+      "grad_norm": 0.09694402664899826,
+      "learning_rate": 0.00018278828113724925,
+      "loss": 0.1676,
+      "step": 5968
+    },
+    {
+      "epoch": 0.43067931743569393,
+      "grad_norm": 0.11320126056671143,
+      "learning_rate": 0.0001827853947178525,
+      "loss": 0.1574,
+      "step": 5969
+    },
+    {
+      "epoch": 0.4307514701107544,
+      "grad_norm": 0.11799290776252747,
+      "learning_rate": 0.00018278250829845577,
+      "loss": 0.1672,
+      "step": 5970
+    },
+    {
+      "epoch": 0.43082362278581476,
+      "grad_norm": 0.11785417050123215,
+      "learning_rate": 0.00018277962187905904,
+      "loss": 0.1412,
+      "step": 5971
+    },
+    {
+      "epoch": 0.4308957754608752,
+      "grad_norm": 0.11125194281339645,
+      "learning_rate": 0.0001827767354596623,
+      "loss": 0.1512,
+      "step": 5972
+    },
+    {
+      "epoch": 0.43096792813593565,
+      "grad_norm": 0.12699389457702637,
+      "learning_rate": 0.00018277384904026556,
+      "loss": 0.137,
+      "step": 5973
+    },
+    {
+      "epoch": 0.43104008081099604,
+      "grad_norm": 0.11820469051599503,
+      "learning_rate": 0.00018277096262086883,
+      "loss": 0.1464,
+      "step": 5974
+    },
+    {
+      "epoch": 0.4311122334860565,
+      "grad_norm": 0.10742323845624924,
+      "learning_rate": 0.00018276807620147206,
+      "loss": 0.1288,
+      "step": 5975
+    },
+    {
+      "epoch": 0.43118438616111693,
+      "grad_norm": 0.11058198660612106,
+      "learning_rate": 0.00018276518978207535,
+      "loss": 0.218,
+      "step": 5976
+    },
+    {
+      "epoch": 0.43125653883617737,
+      "grad_norm": 0.1054624542593956,
+      "learning_rate": 0.00018276230336267862,
+      "loss": 0.1621,
+      "step": 5977
+    },
+    {
+      "epoch": 0.43132869151123776,
+      "grad_norm": 0.10926330089569092,
+      "learning_rate": 0.00018275941694328188,
+      "loss": 0.192,
+      "step": 5978
+    },
+    {
+      "epoch": 0.4314008441862982,
+      "grad_norm": 0.12095009535551071,
+      "learning_rate": 0.00018275653052388514,
+      "loss": 0.1717,
+      "step": 5979
+    },
+    {
+      "epoch": 0.43147299686135865,
+      "grad_norm": 0.14626653492450714,
+      "learning_rate": 0.00018275364410448838,
+      "loss": 0.1831,
+      "step": 5980
+    },
+    {
+      "epoch": 0.43154514953641904,
+      "grad_norm": 0.11830244213342667,
+      "learning_rate": 0.00018275075768509164,
+      "loss": 0.1632,
+      "step": 5981
+    },
+    {
+      "epoch": 0.4316173022114795,
+      "grad_norm": 0.105450838804245,
+      "learning_rate": 0.0001827478712656949,
+      "loss": 0.1451,
+      "step": 5982
+    },
+    {
+      "epoch": 0.4316894548865399,
+      "grad_norm": 0.12555789947509766,
+      "learning_rate": 0.0001827449848462982,
+      "loss": 0.2005,
+      "step": 5983
+    },
+    {
+      "epoch": 0.43176160756160037,
+      "grad_norm": 0.11613515764474869,
+      "learning_rate": 0.00018274209842690146,
+      "loss": 0.1313,
+      "step": 5984
+    },
+    {
+      "epoch": 0.43183376023666076,
+      "grad_norm": 0.11850589513778687,
+      "learning_rate": 0.0001827392120075047,
+      "loss": 0.172,
+      "step": 5985
+    },
+    {
+      "epoch": 0.4319059129117212,
+      "grad_norm": 0.09376665949821472,
+      "learning_rate": 0.00018273632558810795,
+      "loss": 0.1635,
+      "step": 5986
+    },
+    {
+      "epoch": 0.43197806558678165,
+      "grad_norm": 0.12785853445529938,
+      "learning_rate": 0.00018273343916871122,
+      "loss": 0.1612,
+      "step": 5987
+    },
+    {
+      "epoch": 0.43205021826184203,
+      "grad_norm": 0.16476839780807495,
+      "learning_rate": 0.00018273055274931448,
+      "loss": 0.1906,
+      "step": 5988
+    },
+    {
+      "epoch": 0.4321223709369025,
+      "grad_norm": 0.15365070104599,
+      "learning_rate": 0.00018272766632991774,
+      "loss": 0.1283,
+      "step": 5989
+    },
+    {
+      "epoch": 0.4321945236119629,
+      "grad_norm": 0.10299073159694672,
+      "learning_rate": 0.000182724779910521,
+      "loss": 0.1787,
+      "step": 5990
+    },
+    {
+      "epoch": 0.43226667628702337,
+      "grad_norm": 0.13909496366977692,
+      "learning_rate": 0.00018272189349112427,
+      "loss": 0.1656,
+      "step": 5991
+    },
+    {
+      "epoch": 0.43233882896208375,
+      "grad_norm": 0.10549302399158478,
+      "learning_rate": 0.00018271900707172753,
+      "loss": 0.1566,
+      "step": 5992
+    },
+    {
+      "epoch": 0.4324109816371442,
+      "grad_norm": 0.15905825793743134,
+      "learning_rate": 0.0001827161206523308,
+      "loss": 0.1702,
+      "step": 5993
+    },
+    {
+      "epoch": 0.43248313431220464,
+      "grad_norm": 0.13359205424785614,
+      "learning_rate": 0.00018271323423293406,
+      "loss": 0.1221,
+      "step": 5994
+    },
+    {
+      "epoch": 0.43255528698726503,
+      "grad_norm": 0.14784115552902222,
+      "learning_rate": 0.00018271034781353732,
+      "loss": 0.1176,
+      "step": 5995
+    },
+    {
+      "epoch": 0.4326274396623255,
+      "grad_norm": 0.1282242238521576,
+      "learning_rate": 0.00018270746139414056,
+      "loss": 0.137,
+      "step": 5996
+    },
+    {
+      "epoch": 0.4326995923373859,
+      "grad_norm": 0.11883100867271423,
+      "learning_rate": 0.00018270457497474385,
+      "loss": 0.1569,
+      "step": 5997
+    },
+    {
+      "epoch": 0.43277174501244636,
+      "grad_norm": 0.1250883787870407,
+      "learning_rate": 0.0001827016885553471,
+      "loss": 0.1626,
+      "step": 5998
+    },
+    {
+      "epoch": 0.43284389768750675,
+      "grad_norm": 0.11436107754707336,
+      "learning_rate": 0.00018269880213595037,
+      "loss": 0.1389,
+      "step": 5999
+    },
+    {
+      "epoch": 0.4329160503625672,
+      "grad_norm": 0.1631336808204651,
+      "learning_rate": 0.00018269591571655364,
+      "loss": 0.1491,
+      "step": 6000
+    },
+    {
+      "epoch": 0.43298820303762764,
+      "grad_norm": 0.1344766467809677,
+      "learning_rate": 0.00018269302929715687,
+      "loss": 0.1315,
+      "step": 6001
+    },
+    {
+      "epoch": 0.433060355712688,
+      "grad_norm": 0.10305652767419815,
+      "learning_rate": 0.00018269014287776013,
+      "loss": 0.1788,
+      "step": 6002
+    },
+    {
+      "epoch": 0.43313250838774847,
+      "grad_norm": 0.11222297698259354,
+      "learning_rate": 0.0001826872564583634,
+      "loss": 0.1766,
+      "step": 6003
+    },
+    {
+      "epoch": 0.4332046610628089,
+      "grad_norm": 0.12760218977928162,
+      "learning_rate": 0.0001826843700389667,
+      "loss": 0.1388,
+      "step": 6004
+    },
+    {
+      "epoch": 0.4332768137378693,
+      "grad_norm": 0.1565997153520584,
+      "learning_rate": 0.00018268148361956995,
+      "loss": 0.1935,
+      "step": 6005
+    },
+    {
+      "epoch": 0.43334896641292975,
+      "grad_norm": 0.10120268166065216,
+      "learning_rate": 0.00018267859720017319,
+      "loss": 0.1646,
+      "step": 6006
+    },
+    {
+      "epoch": 0.4334211190879902,
+      "grad_norm": 0.09831411391496658,
+      "learning_rate": 0.00018267571078077645,
+      "loss": 0.1495,
+      "step": 6007
+    },
+    {
+      "epoch": 0.43349327176305064,
+      "grad_norm": 0.0985429584980011,
+      "learning_rate": 0.0001826728243613797,
+      "loss": 0.1229,
+      "step": 6008
+    },
+    {
+      "epoch": 0.433565424438111,
+      "grad_norm": 0.13200056552886963,
+      "learning_rate": 0.00018266993794198297,
+      "loss": 0.1641,
+      "step": 6009
+    },
+    {
+      "epoch": 0.43363757711317147,
+      "grad_norm": 0.1078602522611618,
+      "learning_rate": 0.00018266705152258624,
+      "loss": 0.1545,
+      "step": 6010
+    },
+    {
+      "epoch": 0.4337097297882319,
+      "grad_norm": 0.1463153064250946,
+      "learning_rate": 0.0001826641651031895,
+      "loss": 0.1629,
+      "step": 6011
+    },
+    {
+      "epoch": 0.4337818824632923,
+      "grad_norm": 0.12757614254951477,
+      "learning_rate": 0.00018266127868379276,
+      "loss": 0.1285,
+      "step": 6012
+    },
+    {
+      "epoch": 0.43385403513835274,
+      "grad_norm": 0.10534749180078506,
+      "learning_rate": 0.00018265839226439603,
+      "loss": 0.1294,
+      "step": 6013
+    },
+    {
+      "epoch": 0.4339261878134132,
+      "grad_norm": 0.11838100850582123,
+      "learning_rate": 0.0001826555058449993,
+      "loss": 0.2004,
+      "step": 6014
+    },
+    {
+      "epoch": 0.43399834048847363,
+      "grad_norm": 0.1322600394487381,
+      "learning_rate": 0.00018265261942560255,
+      "loss": 0.15,
+      "step": 6015
+    },
+    {
+      "epoch": 0.434070493163534,
+      "grad_norm": 0.10637901723384857,
+      "learning_rate": 0.00018264973300620581,
+      "loss": 0.1582,
+      "step": 6016
+    },
+    {
+      "epoch": 0.43414264583859447,
+      "grad_norm": 0.1292022168636322,
+      "learning_rate": 0.00018264684658680905,
+      "loss": 0.1546,
+      "step": 6017
+    },
+    {
+      "epoch": 0.4342147985136549,
+      "grad_norm": 0.11601456254720688,
+      "learning_rate": 0.00018264396016741234,
+      "loss": 0.2267,
+      "step": 6018
+    },
+    {
+      "epoch": 0.4342869511887153,
+      "grad_norm": 0.12072426825761795,
+      "learning_rate": 0.0001826410737480156,
+      "loss": 0.197,
+      "step": 6019
+    },
+    {
+      "epoch": 0.43435910386377574,
+      "grad_norm": 0.10457204282283783,
+      "learning_rate": 0.00018263818732861887,
+      "loss": 0.1452,
+      "step": 6020
+    },
+    {
+      "epoch": 0.4344312565388362,
+      "grad_norm": 0.11974366754293442,
+      "learning_rate": 0.00018263530090922213,
+      "loss": 0.1394,
+      "step": 6021
+    },
+    {
+      "epoch": 0.43450340921389663,
+      "grad_norm": 0.12165110558271408,
+      "learning_rate": 0.00018263241448982537,
+      "loss": 0.1692,
+      "step": 6022
+    },
+    {
+      "epoch": 0.434575561888957,
+      "grad_norm": 0.13369393348693848,
+      "learning_rate": 0.00018262952807042863,
+      "loss": 0.1567,
+      "step": 6023
+    },
+    {
+      "epoch": 0.43464771456401746,
+      "grad_norm": 0.1187087744474411,
+      "learning_rate": 0.0001826266416510319,
+      "loss": 0.141,
+      "step": 6024
+    },
+    {
+      "epoch": 0.4347198672390779,
+      "grad_norm": 0.10499470680952072,
+      "learning_rate": 0.00018262375523163518,
+      "loss": 0.1764,
+      "step": 6025
+    },
+    {
+      "epoch": 0.4347920199141383,
+      "grad_norm": 0.10115773975849152,
+      "learning_rate": 0.00018262086881223844,
+      "loss": 0.1727,
+      "step": 6026
+    },
+    {
+      "epoch": 0.43486417258919874,
+      "grad_norm": 0.10905278474092484,
+      "learning_rate": 0.00018261798239284168,
+      "loss": 0.12,
+      "step": 6027
+    },
+    {
+      "epoch": 0.4349363252642592,
+      "grad_norm": 0.1212361603975296,
+      "learning_rate": 0.00018261509597344494,
+      "loss": 0.1583,
+      "step": 6028
+    },
+    {
+      "epoch": 0.4350084779393196,
+      "grad_norm": 0.10275877267122269,
+      "learning_rate": 0.0001826122095540482,
+      "loss": 0.1231,
+      "step": 6029
+    },
+    {
+      "epoch": 0.43508063061438,
+      "grad_norm": 0.120490163564682,
+      "learning_rate": 0.00018260932313465147,
+      "loss": 0.1782,
+      "step": 6030
+    },
+    {
+      "epoch": 0.43515278328944046,
+      "grad_norm": 0.12808160483837128,
+      "learning_rate": 0.00018260643671525473,
+      "loss": 0.1667,
+      "step": 6031
+    },
+    {
+      "epoch": 0.4352249359645009,
+      "grad_norm": 0.13066217303276062,
+      "learning_rate": 0.000182603550295858,
+      "loss": 0.1496,
+      "step": 6032
+    },
+    {
+      "epoch": 0.4352970886395613,
+      "grad_norm": 0.13443118333816528,
+      "learning_rate": 0.00018260066387646126,
+      "loss": 0.1789,
+      "step": 6033
+    },
+    {
+      "epoch": 0.43536924131462174,
+      "grad_norm": 0.10961943864822388,
+      "learning_rate": 0.00018259777745706452,
+      "loss": 0.1879,
+      "step": 6034
+    },
+    {
+      "epoch": 0.4354413939896822,
+      "grad_norm": 0.12362266331911087,
+      "learning_rate": 0.00018259489103766778,
+      "loss": 0.1758,
+      "step": 6035
+    },
+    {
+      "epoch": 0.43551354666474257,
+      "grad_norm": 0.11460345983505249,
+      "learning_rate": 0.00018259200461827105,
+      "loss": 0.1837,
+      "step": 6036
+    },
+    {
+      "epoch": 0.435585699339803,
+      "grad_norm": 0.11664992570877075,
+      "learning_rate": 0.0001825891181988743,
+      "loss": 0.153,
+      "step": 6037
+    },
+    {
+      "epoch": 0.43565785201486346,
+      "grad_norm": 0.11473949253559113,
+      "learning_rate": 0.00018258623177947754,
+      "loss": 0.1829,
+      "step": 6038
+    },
+    {
+      "epoch": 0.4357300046899239,
+      "grad_norm": 0.13825523853302002,
+      "learning_rate": 0.00018258334536008083,
+      "loss": 0.2015,
+      "step": 6039
+    },
+    {
+      "epoch": 0.4358021573649843,
+      "grad_norm": 0.13293814659118652,
+      "learning_rate": 0.0001825804589406841,
+      "loss": 0.145,
+      "step": 6040
+    },
+    {
+      "epoch": 0.43587431004004473,
+      "grad_norm": 0.15453194081783295,
+      "learning_rate": 0.00018257757252128736,
+      "loss": 0.1654,
+      "step": 6041
+    },
+    {
+      "epoch": 0.4359464627151052,
+      "grad_norm": 0.10982448607683182,
+      "learning_rate": 0.00018257468610189062,
+      "loss": 0.1681,
+      "step": 6042
+    },
+    {
+      "epoch": 0.43601861539016556,
+      "grad_norm": 0.12576572597026825,
+      "learning_rate": 0.00018257179968249386,
+      "loss": 0.1335,
+      "step": 6043
+    },
+    {
+      "epoch": 0.436090768065226,
+      "grad_norm": 0.10711273550987244,
+      "learning_rate": 0.00018256891326309712,
+      "loss": 0.1419,
+      "step": 6044
+    },
+    {
+      "epoch": 0.43616292074028645,
+      "grad_norm": 0.13865168392658234,
+      "learning_rate": 0.00018256602684370039,
+      "loss": 0.1557,
+      "step": 6045
+    },
+    {
+      "epoch": 0.4362350734153469,
+      "grad_norm": 0.11145945638418198,
+      "learning_rate": 0.00018256314042430368,
+      "loss": 0.1484,
+      "step": 6046
+    },
+    {
+      "epoch": 0.4363072260904073,
+      "grad_norm": 0.10596998035907745,
+      "learning_rate": 0.00018256025400490694,
+      "loss": 0.1762,
+      "step": 6047
+    },
+    {
+      "epoch": 0.43637937876546773,
+      "grad_norm": 0.13695760071277618,
+      "learning_rate": 0.00018255736758551017,
+      "loss": 0.1452,
+      "step": 6048
+    },
+    {
+      "epoch": 0.4364515314405282,
+      "grad_norm": 0.10145651549100876,
+      "learning_rate": 0.00018255448116611344,
+      "loss": 0.1108,
+      "step": 6049
+    },
+    {
+      "epoch": 0.43652368411558856,
+      "grad_norm": 0.11442878842353821,
+      "learning_rate": 0.0001825515947467167,
+      "loss": 0.1243,
+      "step": 6050
+    },
+    {
+      "epoch": 0.436595836790649,
+      "grad_norm": 0.11627763509750366,
+      "learning_rate": 0.00018254870832731996,
+      "loss": 0.1899,
+      "step": 6051
+    },
+    {
+      "epoch": 0.43666798946570945,
+      "grad_norm": 0.10713426768779755,
+      "learning_rate": 0.00018254582190792323,
+      "loss": 0.1579,
+      "step": 6052
+    },
+    {
+      "epoch": 0.4367401421407699,
+      "grad_norm": 0.13007032871246338,
+      "learning_rate": 0.0001825429354885265,
+      "loss": 0.1612,
+      "step": 6053
+    },
+    {
+      "epoch": 0.4368122948158303,
+      "grad_norm": 0.11363731324672699,
+      "learning_rate": 0.00018254004906912975,
+      "loss": 0.151,
+      "step": 6054
+    },
+    {
+      "epoch": 0.4368844474908907,
+      "grad_norm": 0.13862450420856476,
+      "learning_rate": 0.00018253716264973301,
+      "loss": 0.1464,
+      "step": 6055
+    },
+    {
+      "epoch": 0.43695660016595117,
+      "grad_norm": 0.10363277792930603,
+      "learning_rate": 0.00018253427623033628,
+      "loss": 0.1355,
+      "step": 6056
+    },
+    {
+      "epoch": 0.43702875284101156,
+      "grad_norm": 0.11451687663793564,
+      "learning_rate": 0.00018253138981093954,
+      "loss": 0.1645,
+      "step": 6057
+    },
+    {
+      "epoch": 0.437100905516072,
+      "grad_norm": 0.15152929723262787,
+      "learning_rate": 0.0001825285033915428,
+      "loss": 0.2073,
+      "step": 6058
+    },
+    {
+      "epoch": 0.43717305819113245,
+      "grad_norm": 0.1593613475561142,
+      "learning_rate": 0.00018252561697214604,
+      "loss": 0.1389,
+      "step": 6059
+    },
+    {
+      "epoch": 0.4372452108661929,
+      "grad_norm": 0.17250584065914154,
+      "learning_rate": 0.00018252273055274933,
+      "loss": 0.1642,
+      "step": 6060
+    },
+    {
+      "epoch": 0.4373173635412533,
+      "grad_norm": 0.1319507211446762,
+      "learning_rate": 0.0001825198441333526,
+      "loss": 0.1548,
+      "step": 6061
+    },
+    {
+      "epoch": 0.4373895162163137,
+      "grad_norm": 0.12388844043016434,
+      "learning_rate": 0.00018251695771395586,
+      "loss": 0.1348,
+      "step": 6062
+    },
+    {
+      "epoch": 0.43746166889137417,
+      "grad_norm": 0.1052316427230835,
+      "learning_rate": 0.00018251407129455912,
+      "loss": 0.1584,
+      "step": 6063
+    },
+    {
+      "epoch": 0.43753382156643456,
+      "grad_norm": 0.11350063234567642,
+      "learning_rate": 0.00018251118487516235,
+      "loss": 0.1775,
+      "step": 6064
+    },
+    {
+      "epoch": 0.437605974241495,
+      "grad_norm": 0.10035805404186249,
+      "learning_rate": 0.00018250829845576562,
+      "loss": 0.1664,
+      "step": 6065
+    },
+    {
+      "epoch": 0.43767812691655544,
+      "grad_norm": 0.10310214757919312,
+      "learning_rate": 0.00018250541203636888,
+      "loss": 0.1095,
+      "step": 6066
+    },
+    {
+      "epoch": 0.43775027959161583,
+      "grad_norm": 0.12834404408931732,
+      "learning_rate": 0.00018250252561697217,
+      "loss": 0.1463,
+      "step": 6067
+    },
+    {
+      "epoch": 0.4378224322666763,
+      "grad_norm": 0.09869488328695297,
+      "learning_rate": 0.00018249963919757543,
+      "loss": 0.1348,
+      "step": 6068
+    },
+    {
+      "epoch": 0.4378945849417367,
+      "grad_norm": 0.18246905505657196,
+      "learning_rate": 0.00018249675277817867,
+      "loss": 0.1834,
+      "step": 6069
+    },
+    {
+      "epoch": 0.43796673761679716,
+      "grad_norm": 0.11811268329620361,
+      "learning_rate": 0.00018249386635878193,
+      "loss": 0.1358,
+      "step": 6070
+    },
+    {
+      "epoch": 0.43803889029185755,
+      "grad_norm": 0.1267288476228714,
+      "learning_rate": 0.0001824909799393852,
+      "loss": 0.1811,
+      "step": 6071
+    },
+    {
+      "epoch": 0.438111042966918,
+      "grad_norm": 0.10749071836471558,
+      "learning_rate": 0.00018248809351998846,
+      "loss": 0.1516,
+      "step": 6072
+    },
+    {
+      "epoch": 0.43818319564197844,
+      "grad_norm": 0.10894975066184998,
+      "learning_rate": 0.00018248520710059172,
+      "loss": 0.1908,
+      "step": 6073
+    },
+    {
+      "epoch": 0.43825534831703883,
+      "grad_norm": 0.11814959347248077,
+      "learning_rate": 0.00018248232068119498,
+      "loss": 0.1482,
+      "step": 6074
+    },
+    {
+      "epoch": 0.4383275009920993,
+      "grad_norm": 0.13490252196788788,
+      "learning_rate": 0.00018247943426179825,
+      "loss": 0.1527,
+      "step": 6075
+    },
+    {
+      "epoch": 0.4383996536671597,
+      "grad_norm": 0.12666769325733185,
+      "learning_rate": 0.0001824765478424015,
+      "loss": 0.1583,
+      "step": 6076
+    },
+    {
+      "epoch": 0.43847180634222016,
+      "grad_norm": 0.11454398930072784,
+      "learning_rate": 0.00018247366142300477,
+      "loss": 0.1389,
+      "step": 6077
+    },
+    {
+      "epoch": 0.43854395901728055,
+      "grad_norm": 0.1308351755142212,
+      "learning_rate": 0.00018247077500360803,
+      "loss": 0.1821,
+      "step": 6078
+    },
+    {
+      "epoch": 0.438616111692341,
+      "grad_norm": 0.11633557081222534,
+      "learning_rate": 0.0001824678885842113,
+      "loss": 0.1346,
+      "step": 6079
+    },
+    {
+      "epoch": 0.43868826436740144,
+      "grad_norm": 0.10918557643890381,
+      "learning_rate": 0.00018246500216481453,
+      "loss": 0.1244,
+      "step": 6080
+    },
+    {
+      "epoch": 0.4387604170424618,
+      "grad_norm": 0.10061082988977432,
+      "learning_rate": 0.00018246211574541782,
+      "loss": 0.124,
+      "step": 6081
+    },
+    {
+      "epoch": 0.43883256971752227,
+      "grad_norm": 0.10848748683929443,
+      "learning_rate": 0.00018245922932602109,
+      "loss": 0.16,
+      "step": 6082
+    },
+    {
+      "epoch": 0.4389047223925827,
+      "grad_norm": 0.10849925130605698,
+      "learning_rate": 0.00018245634290662435,
+      "loss": 0.1529,
+      "step": 6083
+    },
+    {
+      "epoch": 0.43897687506764316,
+      "grad_norm": 0.10635923594236374,
+      "learning_rate": 0.0001824534564872276,
+      "loss": 0.1929,
+      "step": 6084
+    },
+    {
+      "epoch": 0.43904902774270355,
+      "grad_norm": 0.11233007907867432,
+      "learning_rate": 0.00018245057006783088,
+      "loss": 0.1928,
+      "step": 6085
+    },
+    {
+      "epoch": 0.439121180417764,
+      "grad_norm": 0.11263249814510345,
+      "learning_rate": 0.0001824476836484341,
+      "loss": 0.1631,
+      "step": 6086
+    },
+    {
+      "epoch": 0.43919333309282443,
+      "grad_norm": 0.09963755309581757,
+      "learning_rate": 0.00018244479722903737,
+      "loss": 0.1246,
+      "step": 6087
+    },
+    {
+      "epoch": 0.4392654857678848,
+      "grad_norm": 0.11192876845598221,
+      "learning_rate": 0.00018244191080964066,
+      "loss": 0.1498,
+      "step": 6088
+    },
+    {
+      "epoch": 0.43933763844294527,
+      "grad_norm": 0.12412458658218384,
+      "learning_rate": 0.00018243902439024393,
+      "loss": 0.1314,
+      "step": 6089
+    },
+    {
+      "epoch": 0.4394097911180057,
+      "grad_norm": 0.14059120416641235,
+      "learning_rate": 0.0001824361379708472,
+      "loss": 0.186,
+      "step": 6090
+    },
+    {
+      "epoch": 0.43948194379306615,
+      "grad_norm": 0.115476593375206,
+      "learning_rate": 0.00018243325155145043,
+      "loss": 0.178,
+      "step": 6091
+    },
+    {
+      "epoch": 0.43955409646812654,
+      "grad_norm": 0.11097398400306702,
+      "learning_rate": 0.0001824303651320537,
+      "loss": 0.1784,
+      "step": 6092
+    },
+    {
+      "epoch": 0.439626249143187,
+      "grad_norm": 0.11092938482761383,
+      "learning_rate": 0.00018242747871265695,
+      "loss": 0.1745,
+      "step": 6093
+    },
+    {
+      "epoch": 0.43969840181824743,
+      "grad_norm": 0.11939523369073868,
+      "learning_rate": 0.00018242459229326021,
+      "loss": 0.1534,
+      "step": 6094
+    },
+    {
+      "epoch": 0.4397705544933078,
+      "grad_norm": 0.12466622143983841,
+      "learning_rate": 0.0001824217058738635,
+      "loss": 0.1458,
+      "step": 6095
+    },
+    {
+      "epoch": 0.43984270716836826,
+      "grad_norm": 0.12432552129030228,
+      "learning_rate": 0.00018241881945446674,
+      "loss": 0.2048,
+      "step": 6096
+    },
+    {
+      "epoch": 0.4399148598434287,
+      "grad_norm": 0.10610135644674301,
+      "learning_rate": 0.00018241593303507,
+      "loss": 0.1668,
+      "step": 6097
+    },
+    {
+      "epoch": 0.4399870125184891,
+      "grad_norm": 0.12429031729698181,
+      "learning_rate": 0.00018241304661567327,
+      "loss": 0.1618,
+      "step": 6098
+    },
+    {
+      "epoch": 0.44005916519354954,
+      "grad_norm": 0.11785822361707687,
+      "learning_rate": 0.00018241016019627653,
+      "loss": 0.1588,
+      "step": 6099
+    },
+    {
+      "epoch": 0.44013131786861,
+      "grad_norm": 0.12341566383838654,
+      "learning_rate": 0.0001824072737768798,
+      "loss": 0.2115,
+      "step": 6100
+    },
+    {
+      "epoch": 0.4402034705436704,
+      "grad_norm": 0.12034764885902405,
+      "learning_rate": 0.00018240438735748305,
+      "loss": 0.1269,
+      "step": 6101
+    },
+    {
+      "epoch": 0.4402756232187308,
+      "grad_norm": 0.14521610736846924,
+      "learning_rate": 0.00018240150093808632,
+      "loss": 0.1682,
+      "step": 6102
+    },
+    {
+      "epoch": 0.44034777589379126,
+      "grad_norm": 0.1044413149356842,
+      "learning_rate": 0.00018239861451868958,
+      "loss": 0.1418,
+      "step": 6103
+    },
+    {
+      "epoch": 0.4404199285688517,
+      "grad_norm": 0.10452799499034882,
+      "learning_rate": 0.00018239572809929284,
+      "loss": 0.1366,
+      "step": 6104
+    },
+    {
+      "epoch": 0.4404920812439121,
+      "grad_norm": 0.08668769896030426,
+      "learning_rate": 0.0001823928416798961,
+      "loss": 0.1589,
+      "step": 6105
+    },
+    {
+      "epoch": 0.44056423391897254,
+      "grad_norm": 0.11417484283447266,
+      "learning_rate": 0.00018238995526049937,
+      "loss": 0.1404,
+      "step": 6106
+    },
+    {
+      "epoch": 0.440636386594033,
+      "grad_norm": 0.10670206695795059,
+      "learning_rate": 0.0001823870688411026,
+      "loss": 0.1395,
+      "step": 6107
+    },
+    {
+      "epoch": 0.4407085392690934,
+      "grad_norm": 0.12471598386764526,
+      "learning_rate": 0.00018238418242170587,
+      "loss": 0.1505,
+      "step": 6108
+    },
+    {
+      "epoch": 0.4407806919441538,
+      "grad_norm": 0.1277986317873001,
+      "learning_rate": 0.00018238129600230916,
+      "loss": 0.1165,
+      "step": 6109
+    },
+    {
+      "epoch": 0.44085284461921426,
+      "grad_norm": 0.1507466435432434,
+      "learning_rate": 0.00018237840958291242,
+      "loss": 0.202,
+      "step": 6110
+    },
+    {
+      "epoch": 0.4409249972942747,
+      "grad_norm": 0.10197729617357254,
+      "learning_rate": 0.00018237552316351568,
+      "loss": 0.1808,
+      "step": 6111
+    },
+    {
+      "epoch": 0.4409971499693351,
+      "grad_norm": 0.1200295016169548,
+      "learning_rate": 0.00018237263674411892,
+      "loss": 0.1685,
+      "step": 6112
+    },
+    {
+      "epoch": 0.44106930264439553,
+      "grad_norm": 0.12193494290113449,
+      "learning_rate": 0.00018236975032472218,
+      "loss": 0.1966,
+      "step": 6113
+    },
+    {
+      "epoch": 0.441141455319456,
+      "grad_norm": 0.11754129081964493,
+      "learning_rate": 0.00018236686390532545,
+      "loss": 0.1081,
+      "step": 6114
+    },
+    {
+      "epoch": 0.4412136079945164,
+      "grad_norm": 0.1152927577495575,
+      "learning_rate": 0.0001823639774859287,
+      "loss": 0.1402,
+      "step": 6115
+    },
+    {
+      "epoch": 0.4412857606695768,
+      "grad_norm": 0.12563861906528473,
+      "learning_rate": 0.000182361091066532,
+      "loss": 0.1531,
+      "step": 6116
+    },
+    {
+      "epoch": 0.44135791334463725,
+      "grad_norm": 0.1496850550174713,
+      "learning_rate": 0.00018235820464713523,
+      "loss": 0.1327,
+      "step": 6117
+    },
+    {
+      "epoch": 0.4414300660196977,
+      "grad_norm": 0.11527600139379501,
+      "learning_rate": 0.0001823553182277385,
+      "loss": 0.1886,
+      "step": 6118
+    },
+    {
+      "epoch": 0.4415022186947581,
+      "grad_norm": 0.12287785112857819,
+      "learning_rate": 0.00018235243180834176,
+      "loss": 0.1504,
+      "step": 6119
+    },
+    {
+      "epoch": 0.44157437136981853,
+      "grad_norm": 0.11043699085712433,
+      "learning_rate": 0.00018234954538894502,
+      "loss": 0.177,
+      "step": 6120
+    },
+    {
+      "epoch": 0.441646524044879,
+      "grad_norm": 0.10308093577623367,
+      "learning_rate": 0.00018234665896954829,
+      "loss": 0.1435,
+      "step": 6121
+    },
+    {
+      "epoch": 0.4417186767199394,
+      "grad_norm": 0.13625788688659668,
+      "learning_rate": 0.00018234377255015155,
+      "loss": 0.1695,
+      "step": 6122
+    },
+    {
+      "epoch": 0.4417908293949998,
+      "grad_norm": 0.17083428800106049,
+      "learning_rate": 0.0001823408861307548,
+      "loss": 0.117,
+      "step": 6123
+    },
+    {
+      "epoch": 0.44186298207006025,
+      "grad_norm": 0.10584522783756256,
+      "learning_rate": 0.00018233799971135807,
+      "loss": 0.1568,
+      "step": 6124
+    },
+    {
+      "epoch": 0.4419351347451207,
+      "grad_norm": 0.11779288947582245,
+      "learning_rate": 0.00018233511329196134,
+      "loss": 0.1604,
+      "step": 6125
+    },
+    {
+      "epoch": 0.4420072874201811,
+      "grad_norm": 0.12526173889636993,
+      "learning_rate": 0.0001823322268725646,
+      "loss": 0.1249,
+      "step": 6126
+    },
+    {
+      "epoch": 0.4420794400952415,
+      "grad_norm": 0.1471685767173767,
+      "learning_rate": 0.00018232934045316786,
+      "loss": 0.1559,
+      "step": 6127
+    },
+    {
+      "epoch": 0.44215159277030197,
+      "grad_norm": 0.14475755393505096,
+      "learning_rate": 0.0001823264540337711,
+      "loss": 0.1228,
+      "step": 6128
+    },
+    {
+      "epoch": 0.44222374544536236,
+      "grad_norm": 0.1287613958120346,
+      "learning_rate": 0.00018232356761437436,
+      "loss": 0.1143,
+      "step": 6129
+    },
+    {
+      "epoch": 0.4422958981204228,
+      "grad_norm": 0.11104518175125122,
+      "learning_rate": 0.00018232068119497765,
+      "loss": 0.152,
+      "step": 6130
+    },
+    {
+      "epoch": 0.44236805079548325,
+      "grad_norm": 0.12088888138532639,
+      "learning_rate": 0.00018231779477558092,
+      "loss": 0.1241,
+      "step": 6131
+    },
+    {
+      "epoch": 0.4424402034705437,
+      "grad_norm": 0.1110999584197998,
+      "learning_rate": 0.00018231490835618418,
+      "loss": 0.1359,
+      "step": 6132
+    },
+    {
+      "epoch": 0.4425123561456041,
+      "grad_norm": 0.10359269380569458,
+      "learning_rate": 0.00018231202193678741,
+      "loss": 0.1584,
+      "step": 6133
+    },
+    {
+      "epoch": 0.4425845088206645,
+      "grad_norm": 0.10348071157932281,
+      "learning_rate": 0.00018230913551739068,
+      "loss": 0.1518,
+      "step": 6134
+    },
+    {
+      "epoch": 0.44265666149572497,
+      "grad_norm": 0.11287201941013336,
+      "learning_rate": 0.00018230624909799394,
+      "loss": 0.118,
+      "step": 6135
+    },
+    {
+      "epoch": 0.44272881417078536,
+      "grad_norm": 0.0962667316198349,
+      "learning_rate": 0.0001823033626785972,
+      "loss": 0.1416,
+      "step": 6136
+    },
+    {
+      "epoch": 0.4428009668458458,
+      "grad_norm": 0.1272866427898407,
+      "learning_rate": 0.0001823004762592005,
+      "loss": 0.1781,
+      "step": 6137
+    },
+    {
+      "epoch": 0.44287311952090624,
+      "grad_norm": 0.10131068527698517,
+      "learning_rate": 0.00018229758983980373,
+      "loss": 0.1632,
+      "step": 6138
+    },
+    {
+      "epoch": 0.4429452721959667,
+      "grad_norm": 0.12730516493320465,
+      "learning_rate": 0.000182294703420407,
+      "loss": 0.1483,
+      "step": 6139
+    },
+    {
+      "epoch": 0.4430174248710271,
+      "grad_norm": 0.14410527050495148,
+      "learning_rate": 0.00018229181700101025,
+      "loss": 0.2395,
+      "step": 6140
+    },
+    {
+      "epoch": 0.4430895775460875,
+      "grad_norm": 0.11683524399995804,
+      "learning_rate": 0.00018228893058161352,
+      "loss": 0.1283,
+      "step": 6141
+    },
+    {
+      "epoch": 0.44316173022114796,
+      "grad_norm": 0.11929294466972351,
+      "learning_rate": 0.00018228604416221678,
+      "loss": 0.1416,
+      "step": 6142
+    },
+    {
+      "epoch": 0.44323388289620835,
+      "grad_norm": 0.10018768161535263,
+      "learning_rate": 0.00018228315774282004,
+      "loss": 0.193,
+      "step": 6143
+    },
+    {
+      "epoch": 0.4433060355712688,
+      "grad_norm": 0.12105677276849747,
+      "learning_rate": 0.0001822802713234233,
+      "loss": 0.1476,
+      "step": 6144
+    },
+    {
+      "epoch": 0.44337818824632924,
+      "grad_norm": 0.11790598183870316,
+      "learning_rate": 0.00018227738490402657,
+      "loss": 0.1682,
+      "step": 6145
+    },
+    {
+      "epoch": 0.4434503409213897,
+      "grad_norm": 0.1322735697031021,
+      "learning_rate": 0.00018227449848462983,
+      "loss": 0.1232,
+      "step": 6146
+    },
+    {
+      "epoch": 0.4435224935964501,
+      "grad_norm": 0.10716860741376877,
+      "learning_rate": 0.0001822716120652331,
+      "loss": 0.1455,
+      "step": 6147
+    },
+    {
+      "epoch": 0.4435946462715105,
+      "grad_norm": 0.10606841742992401,
+      "learning_rate": 0.00018226872564583636,
+      "loss": 0.1664,
+      "step": 6148
+    },
+    {
+      "epoch": 0.44366679894657096,
+      "grad_norm": 0.11973252892494202,
+      "learning_rate": 0.0001822658392264396,
+      "loss": 0.1208,
+      "step": 6149
+    },
+    {
+      "epoch": 0.44373895162163135,
+      "grad_norm": 0.11278528720140457,
+      "learning_rate": 0.00018226295280704286,
+      "loss": 0.1624,
+      "step": 6150
+    },
+    {
+      "epoch": 0.4438111042966918,
+      "grad_norm": 0.11765396595001221,
+      "learning_rate": 0.00018226006638764615,
+      "loss": 0.1465,
+      "step": 6151
+    },
+    {
+      "epoch": 0.44388325697175224,
+      "grad_norm": 0.11731109768152237,
+      "learning_rate": 0.0001822571799682494,
+      "loss": 0.1774,
+      "step": 6152
+    },
+    {
+      "epoch": 0.4439554096468127,
+      "grad_norm": 0.09984837472438812,
+      "learning_rate": 0.00018225429354885267,
+      "loss": 0.1207,
+      "step": 6153
+    },
+    {
+      "epoch": 0.44402756232187307,
+      "grad_norm": 0.13035528361797333,
+      "learning_rate": 0.0001822514071294559,
+      "loss": 0.1497,
+      "step": 6154
+    },
+    {
+      "epoch": 0.4440997149969335,
+      "grad_norm": 0.14929358661174774,
+      "learning_rate": 0.00018224852071005917,
+      "loss": 0.2084,
+      "step": 6155
+    },
+    {
+      "epoch": 0.44417186767199396,
+      "grad_norm": 0.11625222861766815,
+      "learning_rate": 0.00018224563429066243,
+      "loss": 0.1745,
+      "step": 6156
+    },
+    {
+      "epoch": 0.44424402034705435,
+      "grad_norm": 0.1217883750796318,
+      "learning_rate": 0.0001822427478712657,
+      "loss": 0.1987,
+      "step": 6157
+    },
+    {
+      "epoch": 0.4443161730221148,
+      "grad_norm": 0.10432552546262741,
+      "learning_rate": 0.000182239861451869,
+      "loss": 0.1421,
+      "step": 6158
+    },
+    {
+      "epoch": 0.44438832569717523,
+      "grad_norm": 0.12206144630908966,
+      "learning_rate": 0.00018223697503247222,
+      "loss": 0.1148,
+      "step": 6159
+    },
+    {
+      "epoch": 0.4444604783722356,
+      "grad_norm": 0.0915931984782219,
+      "learning_rate": 0.00018223408861307549,
+      "loss": 0.1694,
+      "step": 6160
+    },
+    {
+      "epoch": 0.44453263104729607,
+      "grad_norm": 0.11894541233778,
+      "learning_rate": 0.00018223120219367875,
+      "loss": 0.1654,
+      "step": 6161
+    },
+    {
+      "epoch": 0.4446047837223565,
+      "grad_norm": 0.10530544072389603,
+      "learning_rate": 0.000182228315774282,
+      "loss": 0.0963,
+      "step": 6162
+    },
+    {
+      "epoch": 0.44467693639741696,
+      "grad_norm": 0.14129701256752014,
+      "learning_rate": 0.00018222542935488527,
+      "loss": 0.1353,
+      "step": 6163
+    },
+    {
+      "epoch": 0.44474908907247734,
+      "grad_norm": 0.11482277512550354,
+      "learning_rate": 0.00018222254293548854,
+      "loss": 0.1608,
+      "step": 6164
+    },
+    {
+      "epoch": 0.4448212417475378,
+      "grad_norm": 0.13121692836284637,
+      "learning_rate": 0.0001822196565160918,
+      "loss": 0.1484,
+      "step": 6165
+    },
+    {
+      "epoch": 0.44489339442259823,
+      "grad_norm": 0.13311266899108887,
+      "learning_rate": 0.00018221677009669506,
+      "loss": 0.1761,
+      "step": 6166
+    },
+    {
+      "epoch": 0.4449655470976586,
+      "grad_norm": 0.10540780425071716,
+      "learning_rate": 0.00018221388367729833,
+      "loss": 0.1455,
+      "step": 6167
+    },
+    {
+      "epoch": 0.44503769977271906,
+      "grad_norm": 0.11652028560638428,
+      "learning_rate": 0.0001822109972579016,
+      "loss": 0.1911,
+      "step": 6168
+    },
+    {
+      "epoch": 0.4451098524477795,
+      "grad_norm": 0.11527334898710251,
+      "learning_rate": 0.00018220811083850485,
+      "loss": 0.1621,
+      "step": 6169
+    },
+    {
+      "epoch": 0.44518200512283995,
+      "grad_norm": 0.11917366087436676,
+      "learning_rate": 0.0001822052244191081,
+      "loss": 0.1535,
+      "step": 6170
+    },
+    {
+      "epoch": 0.44525415779790034,
+      "grad_norm": 0.10209325700998306,
+      "learning_rate": 0.00018220233799971135,
+      "loss": 0.1344,
+      "step": 6171
+    },
+    {
+      "epoch": 0.4453263104729608,
+      "grad_norm": 0.10349535197019577,
+      "learning_rate": 0.00018219945158031461,
+      "loss": 0.1509,
+      "step": 6172
+    },
+    {
+      "epoch": 0.44539846314802123,
+      "grad_norm": 0.12221920490264893,
+      "learning_rate": 0.0001821965651609179,
+      "loss": 0.1399,
+      "step": 6173
+    },
+    {
+      "epoch": 0.4454706158230816,
+      "grad_norm": 0.1287602335214615,
+      "learning_rate": 0.00018219367874152117,
+      "loss": 0.1538,
+      "step": 6174
+    },
+    {
+      "epoch": 0.44554276849814206,
+      "grad_norm": 0.13449262082576752,
+      "learning_rate": 0.0001821907923221244,
+      "loss": 0.1422,
+      "step": 6175
+    },
+    {
+      "epoch": 0.4456149211732025,
+      "grad_norm": 0.10923946648836136,
+      "learning_rate": 0.00018218790590272767,
+      "loss": 0.1438,
+      "step": 6176
+    },
+    {
+      "epoch": 0.44568707384826295,
+      "grad_norm": 0.10191056877374649,
+      "learning_rate": 0.00018218501948333093,
+      "loss": 0.1424,
+      "step": 6177
+    },
+    {
+      "epoch": 0.44575922652332334,
+      "grad_norm": 0.11073064804077148,
+      "learning_rate": 0.0001821821330639342,
+      "loss": 0.1396,
+      "step": 6178
+    },
+    {
+      "epoch": 0.4458313791983838,
+      "grad_norm": 0.14307044446468353,
+      "learning_rate": 0.00018217924664453745,
+      "loss": 0.164,
+      "step": 6179
+    },
+    {
+      "epoch": 0.4459035318734442,
+      "grad_norm": 0.1324271410703659,
+      "learning_rate": 0.00018217636022514072,
+      "loss": 0.1488,
+      "step": 6180
+    },
+    {
+      "epoch": 0.4459756845485046,
+      "grad_norm": 0.15620259940624237,
+      "learning_rate": 0.00018217347380574398,
+      "loss": 0.2011,
+      "step": 6181
+    },
+    {
+      "epoch": 0.44604783722356506,
+      "grad_norm": 0.1245202124118805,
+      "learning_rate": 0.00018217058738634724,
+      "loss": 0.1461,
+      "step": 6182
+    },
+    {
+      "epoch": 0.4461199898986255,
+      "grad_norm": 0.12234611064195633,
+      "learning_rate": 0.0001821677009669505,
+      "loss": 0.1931,
+      "step": 6183
+    },
+    {
+      "epoch": 0.44619214257368595,
+      "grad_norm": 0.10709469765424728,
+      "learning_rate": 0.00018216481454755377,
+      "loss": 0.1523,
+      "step": 6184
+    },
+    {
+      "epoch": 0.44626429524874633,
+      "grad_norm": 0.10262808948755264,
+      "learning_rate": 0.00018216192812815703,
+      "loss": 0.1532,
+      "step": 6185
+    },
+    {
+      "epoch": 0.4463364479238068,
+      "grad_norm": 0.12416316568851471,
+      "learning_rate": 0.00018215904170876027,
+      "loss": 0.1458,
+      "step": 6186
+    },
+    {
+      "epoch": 0.4464086005988672,
+      "grad_norm": 0.12517490983009338,
+      "learning_rate": 0.00018215615528936356,
+      "loss": 0.1104,
+      "step": 6187
+    },
+    {
+      "epoch": 0.4464807532739276,
+      "grad_norm": 0.12051703035831451,
+      "learning_rate": 0.00018215326886996682,
+      "loss": 0.1738,
+      "step": 6188
+    },
+    {
+      "epoch": 0.44655290594898805,
+      "grad_norm": 0.1233791932463646,
+      "learning_rate": 0.00018215038245057008,
+      "loss": 0.1313,
+      "step": 6189
+    },
+    {
+      "epoch": 0.4466250586240485,
+      "grad_norm": 0.1406187266111374,
+      "learning_rate": 0.00018214749603117335,
+      "loss": 0.1521,
+      "step": 6190
+    },
+    {
+      "epoch": 0.4466972112991089,
+      "grad_norm": 0.16725574433803558,
+      "learning_rate": 0.00018214460961177658,
+      "loss": 0.1623,
+      "step": 6191
+    },
+    {
+      "epoch": 0.44676936397416933,
+      "grad_norm": 0.11151408404111862,
+      "learning_rate": 0.00018214172319237985,
+      "loss": 0.1811,
+      "step": 6192
+    },
+    {
+      "epoch": 0.4468415166492298,
+      "grad_norm": 0.10352703928947449,
+      "learning_rate": 0.0001821388367729831,
+      "loss": 0.1245,
+      "step": 6193
+    },
+    {
+      "epoch": 0.4469136693242902,
+      "grad_norm": 0.11607158184051514,
+      "learning_rate": 0.0001821359503535864,
+      "loss": 0.1305,
+      "step": 6194
+    },
+    {
+      "epoch": 0.4469858219993506,
+      "grad_norm": 0.13465505838394165,
+      "learning_rate": 0.00018213306393418966,
+      "loss": 0.1492,
+      "step": 6195
+    },
+    {
+      "epoch": 0.44705797467441105,
+      "grad_norm": 0.1132718026638031,
+      "learning_rate": 0.0001821301775147929,
+      "loss": 0.2283,
+      "step": 6196
+    },
+    {
+      "epoch": 0.4471301273494715,
+      "grad_norm": 0.1039915382862091,
+      "learning_rate": 0.00018212729109539616,
+      "loss": 0.1543,
+      "step": 6197
+    },
+    {
+      "epoch": 0.4472022800245319,
+      "grad_norm": 0.18599781394004822,
+      "learning_rate": 0.00018212440467599942,
+      "loss": 0.166,
+      "step": 6198
+    },
+    {
+      "epoch": 0.44727443269959233,
+      "grad_norm": 0.130781888961792,
+      "learning_rate": 0.00018212151825660269,
+      "loss": 0.1512,
+      "step": 6199
+    },
+    {
+      "epoch": 0.44734658537465277,
+      "grad_norm": 0.12227053940296173,
+      "learning_rate": 0.00018211863183720595,
+      "loss": 0.1769,
+      "step": 6200
+    },
+    {
+      "epoch": 0.4474187380497132,
+      "grad_norm": 0.13931097090244293,
+      "learning_rate": 0.0001821157454178092,
+      "loss": 0.143,
+      "step": 6201
+    },
+    {
+      "epoch": 0.4474908907247736,
+      "grad_norm": 0.10793418437242508,
+      "learning_rate": 0.00018211285899841247,
+      "loss": 0.1522,
+      "step": 6202
+    },
+    {
+      "epoch": 0.44756304339983405,
+      "grad_norm": 0.12433990836143494,
+      "learning_rate": 0.00018210997257901574,
+      "loss": 0.1403,
+      "step": 6203
+    },
+    {
+      "epoch": 0.4476351960748945,
+      "grad_norm": 0.10359567403793335,
+      "learning_rate": 0.000182107086159619,
+      "loss": 0.1841,
+      "step": 6204
+    },
+    {
+      "epoch": 0.4477073487499549,
+      "grad_norm": 0.10066763311624527,
+      "learning_rate": 0.00018210419974022226,
+      "loss": 0.1791,
+      "step": 6205
+    },
+    {
+      "epoch": 0.4477795014250153,
+      "grad_norm": 0.10185603052377701,
+      "learning_rate": 0.00018210131332082553,
+      "loss": 0.1273,
+      "step": 6206
+    },
+    {
+      "epoch": 0.44785165410007577,
+      "grad_norm": 0.12718892097473145,
+      "learning_rate": 0.0001820984269014288,
+      "loss": 0.1295,
+      "step": 6207
+    },
+    {
+      "epoch": 0.4479238067751362,
+      "grad_norm": 0.09801424294710159,
+      "learning_rate": 0.00018209554048203205,
+      "loss": 0.102,
+      "step": 6208
+    },
+    {
+      "epoch": 0.4479959594501966,
+      "grad_norm": 0.1023603305220604,
+      "learning_rate": 0.00018209265406263531,
+      "loss": 0.162,
+      "step": 6209
+    },
+    {
+      "epoch": 0.44806811212525705,
+      "grad_norm": 0.11572487652301788,
+      "learning_rate": 0.00018208976764323858,
+      "loss": 0.1378,
+      "step": 6210
+    },
+    {
+      "epoch": 0.4481402648003175,
+      "grad_norm": 0.11289431154727936,
+      "learning_rate": 0.00018208688122384184,
+      "loss": 0.179,
+      "step": 6211
+    },
+    {
+      "epoch": 0.4482124174753779,
+      "grad_norm": 0.10198424011468887,
+      "learning_rate": 0.0001820839948044451,
+      "loss": 0.1729,
+      "step": 6212
+    },
+    {
+      "epoch": 0.4482845701504383,
+      "grad_norm": 0.09329713135957718,
+      "learning_rate": 0.00018208110838504834,
+      "loss": 0.1604,
+      "step": 6213
+    },
+    {
+      "epoch": 0.44835672282549877,
+      "grad_norm": 0.15892958641052246,
+      "learning_rate": 0.0001820782219656516,
+      "loss": 0.1585,
+      "step": 6214
+    },
+    {
+      "epoch": 0.4484288755005592,
+      "grad_norm": 0.11771567165851593,
+      "learning_rate": 0.0001820753355462549,
+      "loss": 0.0995,
+      "step": 6215
+    },
+    {
+      "epoch": 0.4485010281756196,
+      "grad_norm": 0.1459246426820755,
+      "learning_rate": 0.00018207244912685816,
+      "loss": 0.1453,
+      "step": 6216
+    },
+    {
+      "epoch": 0.44857318085068004,
+      "grad_norm": 0.14376023411750793,
+      "learning_rate": 0.00018206956270746142,
+      "loss": 0.2116,
+      "step": 6217
+    },
+    {
+      "epoch": 0.4486453335257405,
+      "grad_norm": 0.12150080502033234,
+      "learning_rate": 0.00018206667628806465,
+      "loss": 0.199,
+      "step": 6218
+    },
+    {
+      "epoch": 0.4487174862008009,
+      "grad_norm": 0.1031125858426094,
+      "learning_rate": 0.00018206378986866792,
+      "loss": 0.17,
+      "step": 6219
+    },
+    {
+      "epoch": 0.4487896388758613,
+      "grad_norm": 0.10577013343572617,
+      "learning_rate": 0.00018206090344927118,
+      "loss": 0.123,
+      "step": 6220
+    },
+    {
+      "epoch": 0.44886179155092176,
+      "grad_norm": 0.11361294984817505,
+      "learning_rate": 0.00018205801702987444,
+      "loss": 0.1037,
+      "step": 6221
+    },
+    {
+      "epoch": 0.44893394422598215,
+      "grad_norm": 0.12185689061880112,
+      "learning_rate": 0.00018205513061047773,
+      "loss": 0.1318,
+      "step": 6222
+    },
+    {
+      "epoch": 0.4490060969010426,
+      "grad_norm": 0.11607586592435837,
+      "learning_rate": 0.00018205224419108097,
+      "loss": 0.1489,
+      "step": 6223
+    },
+    {
+      "epoch": 0.44907824957610304,
+      "grad_norm": 0.10012314468622208,
+      "learning_rate": 0.00018204935777168423,
+      "loss": 0.1555,
+      "step": 6224
+    },
+    {
+      "epoch": 0.4491504022511635,
+      "grad_norm": 0.11083272099494934,
+      "learning_rate": 0.0001820464713522875,
+      "loss": 0.194,
+      "step": 6225
+    },
+    {
+      "epoch": 0.44922255492622387,
+      "grad_norm": 0.15222343802452087,
+      "learning_rate": 0.00018204358493289076,
+      "loss": 0.178,
+      "step": 6226
+    },
+    {
+      "epoch": 0.4492947076012843,
+      "grad_norm": 0.1352849304676056,
+      "learning_rate": 0.00018204069851349402,
+      "loss": 0.1353,
+      "step": 6227
+    },
+    {
+      "epoch": 0.44936686027634476,
+      "grad_norm": 0.13668890297412872,
+      "learning_rate": 0.00018203781209409728,
+      "loss": 0.1146,
+      "step": 6228
+    },
+    {
+      "epoch": 0.44943901295140515,
+      "grad_norm": 0.11314396560192108,
+      "learning_rate": 0.00018203492567470055,
+      "loss": 0.1265,
+      "step": 6229
+    },
+    {
+      "epoch": 0.4495111656264656,
+      "grad_norm": 0.1572897881269455,
+      "learning_rate": 0.0001820320392553038,
+      "loss": 0.1363,
+      "step": 6230
+    },
+    {
+      "epoch": 0.44958331830152604,
+      "grad_norm": 0.10678167641162872,
+      "learning_rate": 0.00018202915283590707,
+      "loss": 0.1649,
+      "step": 6231
+    },
+    {
+      "epoch": 0.4496554709765865,
+      "grad_norm": 0.15983405709266663,
+      "learning_rate": 0.00018202626641651033,
+      "loss": 0.1636,
+      "step": 6232
+    },
+    {
+      "epoch": 0.44972762365164687,
+      "grad_norm": 0.14953090250492096,
+      "learning_rate": 0.0001820233799971136,
+      "loss": 0.1702,
+      "step": 6233
+    },
+    {
+      "epoch": 0.4497997763267073,
+      "grad_norm": 0.12968796491622925,
+      "learning_rate": 0.00018202049357771683,
+      "loss": 0.1762,
+      "step": 6234
+    },
+    {
+      "epoch": 0.44987192900176776,
+      "grad_norm": 0.12254311889410019,
+      "learning_rate": 0.0001820176071583201,
+      "loss": 0.1765,
+      "step": 6235
+    },
+    {
+      "epoch": 0.44994408167682814,
+      "grad_norm": 0.13646142184734344,
+      "learning_rate": 0.0001820147207389234,
+      "loss": 0.1452,
+      "step": 6236
+    },
+    {
+      "epoch": 0.4500162343518886,
+      "grad_norm": 0.10840586572885513,
+      "learning_rate": 0.00018201183431952665,
+      "loss": 0.1542,
+      "step": 6237
+    },
+    {
+      "epoch": 0.45008838702694903,
+      "grad_norm": 0.12204860895872116,
+      "learning_rate": 0.0001820089479001299,
+      "loss": 0.1651,
+      "step": 6238
+    },
+    {
+      "epoch": 0.4501605397020095,
+      "grad_norm": 0.10802742093801498,
+      "learning_rate": 0.00018200606148073315,
+      "loss": 0.1148,
+      "step": 6239
+    },
+    {
+      "epoch": 0.45023269237706987,
+      "grad_norm": 0.11421792954206467,
+      "learning_rate": 0.0001820031750613364,
+      "loss": 0.1786,
+      "step": 6240
+    },
+    {
+      "epoch": 0.4503048450521303,
+      "grad_norm": 0.10538505762815475,
+      "learning_rate": 0.00018200028864193967,
+      "loss": 0.162,
+      "step": 6241
+    },
+    {
+      "epoch": 0.45037699772719075,
+      "grad_norm": 0.1272907853126526,
+      "learning_rate": 0.00018199740222254294,
+      "loss": 0.1742,
+      "step": 6242
+    },
+    {
+      "epoch": 0.45044915040225114,
+      "grad_norm": 0.12079206854104996,
+      "learning_rate": 0.00018199451580314623,
+      "loss": 0.147,
+      "step": 6243
+    },
+    {
+      "epoch": 0.4505213030773116,
+      "grad_norm": 0.11582466959953308,
+      "learning_rate": 0.00018199162938374946,
+      "loss": 0.1636,
+      "step": 6244
+    },
+    {
+      "epoch": 0.45059345575237203,
+      "grad_norm": 0.1340245008468628,
+      "learning_rate": 0.00018198874296435273,
+      "loss": 0.1894,
+      "step": 6245
+    },
+    {
+      "epoch": 0.4506656084274325,
+      "grad_norm": 0.11480450630187988,
+      "learning_rate": 0.000181985856544956,
+      "loss": 0.1789,
+      "step": 6246
+    },
+    {
+      "epoch": 0.45073776110249286,
+      "grad_norm": 0.14045003056526184,
+      "learning_rate": 0.00018198297012555925,
+      "loss": 0.1233,
+      "step": 6247
+    },
+    {
+      "epoch": 0.4508099137775533,
+      "grad_norm": 0.12720069289207458,
+      "learning_rate": 0.00018198008370616251,
+      "loss": 0.1262,
+      "step": 6248
+    },
+    {
+      "epoch": 0.45088206645261375,
+      "grad_norm": 0.1322208046913147,
+      "learning_rate": 0.00018197719728676578,
+      "loss": 0.1568,
+      "step": 6249
+    },
+    {
+      "epoch": 0.45095421912767414,
+      "grad_norm": 0.1560821682214737,
+      "learning_rate": 0.00018197431086736904,
+      "loss": 0.1584,
+      "step": 6250
+    },
+    {
+      "epoch": 0.4510263718027346,
+      "grad_norm": 0.131991907954216,
+      "learning_rate": 0.0001819714244479723,
+      "loss": 0.1221,
+      "step": 6251
+    },
+    {
+      "epoch": 0.451098524477795,
+      "grad_norm": 0.14872147142887115,
+      "learning_rate": 0.00018196853802857557,
+      "loss": 0.1248,
+      "step": 6252
+    },
+    {
+      "epoch": 0.4511706771528554,
+      "grad_norm": 0.14252154529094696,
+      "learning_rate": 0.00018196565160917883,
+      "loss": 0.1378,
+      "step": 6253
+    },
+    {
+      "epoch": 0.45124282982791586,
+      "grad_norm": 0.14059919118881226,
+      "learning_rate": 0.0001819627651897821,
+      "loss": 0.1513,
+      "step": 6254
+    },
+    {
+      "epoch": 0.4513149825029763,
+      "grad_norm": 0.12438001483678818,
+      "learning_rate": 0.00018195987877038533,
+      "loss": 0.1868,
+      "step": 6255
+    },
+    {
+      "epoch": 0.45138713517803675,
+      "grad_norm": 0.12125087529420853,
+      "learning_rate": 0.0001819569923509886,
+      "loss": 0.1459,
+      "step": 6256
+    },
+    {
+      "epoch": 0.45145928785309714,
+      "grad_norm": 0.12559860944747925,
+      "learning_rate": 0.00018195410593159188,
+      "loss": 0.1742,
+      "step": 6257
+    },
+    {
+      "epoch": 0.4515314405281576,
+      "grad_norm": 0.11661777645349503,
+      "learning_rate": 0.00018195121951219514,
+      "loss": 0.1382,
+      "step": 6258
+    },
+    {
+      "epoch": 0.451603593203218,
+      "grad_norm": 0.14414922893047333,
+      "learning_rate": 0.0001819483330927984,
+      "loss": 0.1732,
+      "step": 6259
+    },
+    {
+      "epoch": 0.4516757458782784,
+      "grad_norm": 0.12364254891872406,
+      "learning_rate": 0.00018194544667340164,
+      "loss": 0.2019,
+      "step": 6260
+    },
+    {
+      "epoch": 0.45174789855333886,
+      "grad_norm": 0.10109005868434906,
+      "learning_rate": 0.0001819425602540049,
+      "loss": 0.1286,
+      "step": 6261
+    },
+    {
+      "epoch": 0.4518200512283993,
+      "grad_norm": 0.12184163182973862,
+      "learning_rate": 0.00018193967383460817,
+      "loss": 0.2007,
+      "step": 6262
+    },
+    {
+      "epoch": 0.45189220390345974,
+      "grad_norm": 0.14499284327030182,
+      "learning_rate": 0.00018193678741521143,
+      "loss": 0.1365,
+      "step": 6263
+    },
+    {
+      "epoch": 0.45196435657852013,
+      "grad_norm": 0.12527626752853394,
+      "learning_rate": 0.00018193390099581472,
+      "loss": 0.1302,
+      "step": 6264
+    },
+    {
+      "epoch": 0.4520365092535806,
+      "grad_norm": 0.08568686246871948,
+      "learning_rate": 0.00018193101457641796,
+      "loss": 0.1685,
+      "step": 6265
+    },
+    {
+      "epoch": 0.452108661928641,
+      "grad_norm": 0.14542171359062195,
+      "learning_rate": 0.00018192812815702122,
+      "loss": 0.12,
+      "step": 6266
+    },
+    {
+      "epoch": 0.4521808146037014,
+      "grad_norm": 0.09898033738136292,
+      "learning_rate": 0.00018192524173762448,
+      "loss": 0.1337,
+      "step": 6267
+    },
+    {
+      "epoch": 0.45225296727876185,
+      "grad_norm": 0.09792659431695938,
+      "learning_rate": 0.00018192235531822775,
+      "loss": 0.1224,
+      "step": 6268
+    },
+    {
+      "epoch": 0.4523251199538223,
+      "grad_norm": 0.08936212956905365,
+      "learning_rate": 0.000181919468898831,
+      "loss": 0.1105,
+      "step": 6269
+    },
+    {
+      "epoch": 0.45239727262888274,
+      "grad_norm": 0.10980160534381866,
+      "learning_rate": 0.00018191658247943427,
+      "loss": 0.1328,
+      "step": 6270
+    },
+    {
+      "epoch": 0.45246942530394313,
+      "grad_norm": 0.10068574547767639,
+      "learning_rate": 0.00018191369606003753,
+      "loss": 0.1788,
+      "step": 6271
+    },
+    {
+      "epoch": 0.4525415779790036,
+      "grad_norm": 0.11020131409168243,
+      "learning_rate": 0.0001819108096406408,
+      "loss": 0.1622,
+      "step": 6272
+    },
+    {
+      "epoch": 0.452613730654064,
+      "grad_norm": 0.11048059165477753,
+      "learning_rate": 0.00018190792322124406,
+      "loss": 0.1327,
+      "step": 6273
+    },
+    {
+      "epoch": 0.4526858833291244,
+      "grad_norm": 0.13467931747436523,
+      "learning_rate": 0.00018190503680184732,
+      "loss": 0.1455,
+      "step": 6274
+    },
+    {
+      "epoch": 0.45275803600418485,
+      "grad_norm": 0.12309185415506363,
+      "learning_rate": 0.00018190215038245059,
+      "loss": 0.1491,
+      "step": 6275
+    },
+    {
+      "epoch": 0.4528301886792453,
+      "grad_norm": 0.09846778959035873,
+      "learning_rate": 0.00018189926396305382,
+      "loss": 0.145,
+      "step": 6276
+    },
+    {
+      "epoch": 0.45290234135430574,
+      "grad_norm": 0.12652455270290375,
+      "learning_rate": 0.00018189637754365708,
+      "loss": 0.1435,
+      "step": 6277
+    },
+    {
+      "epoch": 0.4529744940293661,
+      "grad_norm": 0.12107978761196136,
+      "learning_rate": 0.00018189349112426038,
+      "loss": 0.1544,
+      "step": 6278
+    },
+    {
+      "epoch": 0.45304664670442657,
+      "grad_norm": 0.13256511092185974,
+      "learning_rate": 0.00018189060470486364,
+      "loss": 0.1434,
+      "step": 6279
+    },
+    {
+      "epoch": 0.453118799379487,
+      "grad_norm": 0.11256644129753113,
+      "learning_rate": 0.0001818877182854669,
+      "loss": 0.1276,
+      "step": 6280
+    },
+    {
+      "epoch": 0.4531909520545474,
+      "grad_norm": 0.1764122098684311,
+      "learning_rate": 0.00018188483186607014,
+      "loss": 0.1369,
+      "step": 6281
+    },
+    {
+      "epoch": 0.45326310472960785,
+      "grad_norm": 0.1378893405199051,
+      "learning_rate": 0.0001818819454466734,
+      "loss": 0.1603,
+      "step": 6282
+    },
+    {
+      "epoch": 0.4533352574046683,
+      "grad_norm": 0.10006693005561829,
+      "learning_rate": 0.00018187905902727666,
+      "loss": 0.1722,
+      "step": 6283
+    },
+    {
+      "epoch": 0.4534074100797287,
+      "grad_norm": 0.09453818202018738,
+      "learning_rate": 0.00018187617260787993,
+      "loss": 0.1612,
+      "step": 6284
+    },
+    {
+      "epoch": 0.4534795627547891,
+      "grad_norm": 0.11707943677902222,
+      "learning_rate": 0.00018187328618848322,
+      "loss": 0.1738,
+      "step": 6285
+    },
+    {
+      "epoch": 0.45355171542984957,
+      "grad_norm": 0.1276026964187622,
+      "learning_rate": 0.00018187039976908645,
+      "loss": 0.1525,
+      "step": 6286
+    },
+    {
+      "epoch": 0.45362386810491,
+      "grad_norm": 0.1502910554409027,
+      "learning_rate": 0.00018186751334968971,
+      "loss": 0.1586,
+      "step": 6287
+    },
+    {
+      "epoch": 0.4536960207799704,
+      "grad_norm": 0.12908358871936798,
+      "learning_rate": 0.00018186462693029298,
+      "loss": 0.1563,
+      "step": 6288
+    },
+    {
+      "epoch": 0.45376817345503084,
+      "grad_norm": 0.09012867510318756,
+      "learning_rate": 0.00018186174051089624,
+      "loss": 0.1726,
+      "step": 6289
+    },
+    {
+      "epoch": 0.4538403261300913,
+      "grad_norm": 0.15328793227672577,
+      "learning_rate": 0.0001818588540914995,
+      "loss": 0.2016,
+      "step": 6290
+    },
+    {
+      "epoch": 0.4539124788051517,
+      "grad_norm": 0.14295051991939545,
+      "learning_rate": 0.00018185596767210277,
+      "loss": 0.156,
+      "step": 6291
+    },
+    {
+      "epoch": 0.4539846314802121,
+      "grad_norm": 0.11826188117265701,
+      "learning_rate": 0.00018185308125270603,
+      "loss": 0.246,
+      "step": 6292
+    },
+    {
+      "epoch": 0.45405678415527256,
+      "grad_norm": 0.12954123318195343,
+      "learning_rate": 0.0001818501948333093,
+      "loss": 0.1262,
+      "step": 6293
+    },
+    {
+      "epoch": 0.454128936830333,
+      "grad_norm": 0.10843376815319061,
+      "learning_rate": 0.00018184730841391255,
+      "loss": 0.1256,
+      "step": 6294
+    },
+    {
+      "epoch": 0.4542010895053934,
+      "grad_norm": 0.1581542044878006,
+      "learning_rate": 0.00018184442199451582,
+      "loss": 0.178,
+      "step": 6295
+    },
+    {
+      "epoch": 0.45427324218045384,
+      "grad_norm": 0.11956808716058731,
+      "learning_rate": 0.00018184153557511908,
+      "loss": 0.1303,
+      "step": 6296
+    },
+    {
+      "epoch": 0.4543453948555143,
+      "grad_norm": 0.12054760009050369,
+      "learning_rate": 0.00018183864915572232,
+      "loss": 0.1808,
+      "step": 6297
+    },
+    {
+      "epoch": 0.4544175475305747,
+      "grad_norm": 0.13112328946590424,
+      "learning_rate": 0.00018183576273632558,
+      "loss": 0.133,
+      "step": 6298
+    },
+    {
+      "epoch": 0.4544897002056351,
+      "grad_norm": 0.1242629885673523,
+      "learning_rate": 0.00018183287631692887,
+      "loss": 0.1619,
+      "step": 6299
+    },
+    {
+      "epoch": 0.45456185288069556,
+      "grad_norm": 0.1355835199356079,
+      "learning_rate": 0.00018182998989753213,
+      "loss": 0.177,
+      "step": 6300
+    },
+    {
+      "epoch": 0.454634005555756,
+      "grad_norm": 0.11525852233171463,
+      "learning_rate": 0.0001818271034781354,
+      "loss": 0.1728,
+      "step": 6301
+    },
+    {
+      "epoch": 0.4547061582308164,
+      "grad_norm": 0.14136461913585663,
+      "learning_rate": 0.00018182421705873863,
+      "loss": 0.2022,
+      "step": 6302
+    },
+    {
+      "epoch": 0.45477831090587684,
+      "grad_norm": 0.11866612732410431,
+      "learning_rate": 0.0001818213306393419,
+      "loss": 0.1662,
+      "step": 6303
+    },
+    {
+      "epoch": 0.4548504635809373,
+      "grad_norm": 0.12703737616539001,
+      "learning_rate": 0.00018181844421994516,
+      "loss": 0.1795,
+      "step": 6304
+    },
+    {
+      "epoch": 0.45492261625599767,
+      "grad_norm": 0.13970893621444702,
+      "learning_rate": 0.00018181555780054842,
+      "loss": 0.1836,
+      "step": 6305
+    },
+    {
+      "epoch": 0.4549947689310581,
+      "grad_norm": 0.12486319988965988,
+      "learning_rate": 0.0001818126713811517,
+      "loss": 0.193,
+      "step": 6306
+    },
+    {
+      "epoch": 0.45506692160611856,
+      "grad_norm": 0.12619183957576752,
+      "learning_rate": 0.00018180978496175495,
+      "loss": 0.259,
+      "step": 6307
+    },
+    {
+      "epoch": 0.455139074281179,
+      "grad_norm": 0.12064392119646072,
+      "learning_rate": 0.0001818068985423582,
+      "loss": 0.1446,
+      "step": 6308
+    },
+    {
+      "epoch": 0.4552112269562394,
+      "grad_norm": 0.13968324661254883,
+      "learning_rate": 0.00018180401212296147,
+      "loss": 0.1611,
+      "step": 6309
+    },
+    {
+      "epoch": 0.45528337963129983,
+      "grad_norm": 0.11358285695314407,
+      "learning_rate": 0.00018180112570356473,
+      "loss": 0.1626,
+      "step": 6310
+    },
+    {
+      "epoch": 0.4553555323063603,
+      "grad_norm": 0.10314571857452393,
+      "learning_rate": 0.000181798239284168,
+      "loss": 0.1977,
+      "step": 6311
+    },
+    {
+      "epoch": 0.45542768498142067,
+      "grad_norm": 0.11178232729434967,
+      "learning_rate": 0.00018179535286477126,
+      "loss": 0.1358,
+      "step": 6312
+    },
+    {
+      "epoch": 0.4554998376564811,
+      "grad_norm": 0.1190803200006485,
+      "learning_rate": 0.00018179246644537452,
+      "loss": 0.1085,
+      "step": 6313
+    },
+    {
+      "epoch": 0.45557199033154155,
+      "grad_norm": 0.12554095685482025,
+      "learning_rate": 0.00018178958002597779,
+      "loss": 0.1622,
+      "step": 6314
+    },
+    {
+      "epoch": 0.45564414300660194,
+      "grad_norm": 0.12150195240974426,
+      "learning_rate": 0.00018178669360658105,
+      "loss": 0.1676,
+      "step": 6315
+    },
+    {
+      "epoch": 0.4557162956816624,
+      "grad_norm": 0.1563529372215271,
+      "learning_rate": 0.0001817838071871843,
+      "loss": 0.1664,
+      "step": 6316
+    },
+    {
+      "epoch": 0.45578844835672283,
+      "grad_norm": 0.1873168647289276,
+      "learning_rate": 0.00018178092076778757,
+      "loss": 0.185,
+      "step": 6317
+    },
+    {
+      "epoch": 0.4558606010317833,
+      "grad_norm": 0.12532073259353638,
+      "learning_rate": 0.00018177803434839084,
+      "loss": 0.1596,
+      "step": 6318
+    },
+    {
+      "epoch": 0.45593275370684366,
+      "grad_norm": 0.1312006264925003,
+      "learning_rate": 0.00018177514792899407,
+      "loss": 0.1443,
+      "step": 6319
+    },
+    {
+      "epoch": 0.4560049063819041,
+      "grad_norm": 0.10332578420639038,
+      "learning_rate": 0.00018177226150959736,
+      "loss": 0.1757,
+      "step": 6320
+    },
+    {
+      "epoch": 0.45607705905696455,
+      "grad_norm": 0.1040138453245163,
+      "learning_rate": 0.00018176937509020063,
+      "loss": 0.1226,
+      "step": 6321
+    },
+    {
+      "epoch": 0.45614921173202494,
+      "grad_norm": 0.13129954040050507,
+      "learning_rate": 0.0001817664886708039,
+      "loss": 0.1652,
+      "step": 6322
+    },
+    {
+      "epoch": 0.4562213644070854,
+      "grad_norm": 0.11622961610555649,
+      "learning_rate": 0.00018176360225140715,
+      "loss": 0.1849,
+      "step": 6323
+    },
+    {
+      "epoch": 0.4562935170821458,
+      "grad_norm": 0.09446028620004654,
+      "learning_rate": 0.0001817607158320104,
+      "loss": 0.1734,
+      "step": 6324
+    },
+    {
+      "epoch": 0.45636566975720627,
+      "grad_norm": 0.10070005059242249,
+      "learning_rate": 0.00018175782941261365,
+      "loss": 0.1853,
+      "step": 6325
+    },
+    {
+      "epoch": 0.45643782243226666,
+      "grad_norm": 0.14620892703533173,
+      "learning_rate": 0.00018175494299321691,
+      "loss": 0.1377,
+      "step": 6326
+    },
+    {
+      "epoch": 0.4565099751073271,
+      "grad_norm": 0.1088029146194458,
+      "learning_rate": 0.0001817520565738202,
+      "loss": 0.1486,
+      "step": 6327
+    },
+    {
+      "epoch": 0.45658212778238755,
+      "grad_norm": 0.14180849492549896,
+      "learning_rate": 0.00018174917015442347,
+      "loss": 0.1597,
+      "step": 6328
+    },
+    {
+      "epoch": 0.45665428045744794,
+      "grad_norm": 0.12755030393600464,
+      "learning_rate": 0.0001817462837350267,
+      "loss": 0.1483,
+      "step": 6329
+    },
+    {
+      "epoch": 0.4567264331325084,
+      "grad_norm": 0.11482290923595428,
+      "learning_rate": 0.00018174339731562997,
+      "loss": 0.1008,
+      "step": 6330
+    },
+    {
+      "epoch": 0.4567985858075688,
+      "grad_norm": 0.1262357085943222,
+      "learning_rate": 0.00018174051089623323,
+      "loss": 0.1243,
+      "step": 6331
+    },
+    {
+      "epoch": 0.45687073848262927,
+      "grad_norm": 0.14928500354290009,
+      "learning_rate": 0.0001817376244768365,
+      "loss": 0.1893,
+      "step": 6332
+    },
+    {
+      "epoch": 0.45694289115768966,
+      "grad_norm": 0.12983083724975586,
+      "learning_rate": 0.00018173473805743975,
+      "loss": 0.1272,
+      "step": 6333
+    },
+    {
+      "epoch": 0.4570150438327501,
+      "grad_norm": 0.13185936212539673,
+      "learning_rate": 0.00018173185163804302,
+      "loss": 0.1277,
+      "step": 6334
+    },
+    {
+      "epoch": 0.45708719650781054,
+      "grad_norm": 0.10516639798879623,
+      "learning_rate": 0.00018172896521864628,
+      "loss": 0.1488,
+      "step": 6335
+    },
+    {
+      "epoch": 0.45715934918287093,
+      "grad_norm": 0.11697663366794586,
+      "learning_rate": 0.00018172607879924954,
+      "loss": 0.1771,
+      "step": 6336
+    },
+    {
+      "epoch": 0.4572315018579314,
+      "grad_norm": 0.176951602101326,
+      "learning_rate": 0.0001817231923798528,
+      "loss": 0.1402,
+      "step": 6337
+    },
+    {
+      "epoch": 0.4573036545329918,
+      "grad_norm": 0.11368632316589355,
+      "learning_rate": 0.00018172030596045607,
+      "loss": 0.1855,
+      "step": 6338
+    },
+    {
+      "epoch": 0.45737580720805227,
+      "grad_norm": 0.12310828268527985,
+      "learning_rate": 0.00018171741954105933,
+      "loss": 0.1033,
+      "step": 6339
+    },
+    {
+      "epoch": 0.45744795988311265,
+      "grad_norm": 0.10735306143760681,
+      "learning_rate": 0.00018171453312166257,
+      "loss": 0.1652,
+      "step": 6340
+    },
+    {
+      "epoch": 0.4575201125581731,
+      "grad_norm": 0.11295438557863235,
+      "learning_rate": 0.00018171164670226586,
+      "loss": 0.1622,
+      "step": 6341
+    },
+    {
+      "epoch": 0.45759226523323354,
+      "grad_norm": 0.10728729516267776,
+      "learning_rate": 0.00018170876028286912,
+      "loss": 0.1584,
+      "step": 6342
+    },
+    {
+      "epoch": 0.45766441790829393,
+      "grad_norm": 0.14049620926380157,
+      "learning_rate": 0.00018170587386347238,
+      "loss": 0.1725,
+      "step": 6343
+    },
+    {
+      "epoch": 0.4577365705833544,
+      "grad_norm": 0.10544250160455704,
+      "learning_rate": 0.00018170298744407565,
+      "loss": 0.1266,
+      "step": 6344
+    },
+    {
+      "epoch": 0.4578087232584148,
+      "grad_norm": 0.10104487091302872,
+      "learning_rate": 0.00018170010102467888,
+      "loss": 0.1539,
+      "step": 6345
+    },
+    {
+      "epoch": 0.4578808759334752,
+      "grad_norm": 0.12155293673276901,
+      "learning_rate": 0.00018169721460528215,
+      "loss": 0.1659,
+      "step": 6346
+    },
+    {
+      "epoch": 0.45795302860853565,
+      "grad_norm": 0.10418069362640381,
+      "learning_rate": 0.0001816943281858854,
+      "loss": 0.157,
+      "step": 6347
+    },
+    {
+      "epoch": 0.4580251812835961,
+      "grad_norm": 0.12691755592823029,
+      "learning_rate": 0.0001816914417664887,
+      "loss": 0.1556,
+      "step": 6348
+    },
+    {
+      "epoch": 0.45809733395865654,
+      "grad_norm": 0.11559534072875977,
+      "learning_rate": 0.00018168855534709196,
+      "loss": 0.1821,
+      "step": 6349
+    },
+    {
+      "epoch": 0.4581694866337169,
+      "grad_norm": 0.10430330783128738,
+      "learning_rate": 0.0001816856689276952,
+      "loss": 0.1321,
+      "step": 6350
+    },
+    {
+      "epoch": 0.45824163930877737,
+      "grad_norm": 0.11644110828638077,
+      "learning_rate": 0.00018168278250829846,
+      "loss": 0.1651,
+      "step": 6351
+    },
+    {
+      "epoch": 0.4583137919838378,
+      "grad_norm": 0.11053968220949173,
+      "learning_rate": 0.00018167989608890172,
+      "loss": 0.1446,
+      "step": 6352
+    },
+    {
+      "epoch": 0.4583859446588982,
+      "grad_norm": 0.12273887544870377,
+      "learning_rate": 0.00018167700966950499,
+      "loss": 0.1963,
+      "step": 6353
+    },
+    {
+      "epoch": 0.45845809733395865,
+      "grad_norm": 0.1266319304704666,
+      "learning_rate": 0.00018167412325010825,
+      "loss": 0.1561,
+      "step": 6354
+    },
+    {
+      "epoch": 0.4585302500090191,
+      "grad_norm": 0.11079327762126923,
+      "learning_rate": 0.0001816712368307115,
+      "loss": 0.1325,
+      "step": 6355
+    },
+    {
+      "epoch": 0.45860240268407954,
+      "grad_norm": 0.11335281282663345,
+      "learning_rate": 0.00018166835041131477,
+      "loss": 0.1843,
+      "step": 6356
+    },
+    {
+      "epoch": 0.4586745553591399,
+      "grad_norm": 0.11442596465349197,
+      "learning_rate": 0.00018166546399191804,
+      "loss": 0.1203,
+      "step": 6357
+    },
+    {
+      "epoch": 0.45874670803420037,
+      "grad_norm": 0.12477446347475052,
+      "learning_rate": 0.0001816625775725213,
+      "loss": 0.1746,
+      "step": 6358
+    },
+    {
+      "epoch": 0.4588188607092608,
+      "grad_norm": 0.09427320212125778,
+      "learning_rate": 0.00018165969115312456,
+      "loss": 0.1079,
+      "step": 6359
+    },
+    {
+      "epoch": 0.4588910133843212,
+      "grad_norm": 0.11606919765472412,
+      "learning_rate": 0.00018165680473372783,
+      "loss": 0.1729,
+      "step": 6360
+    },
+    {
+      "epoch": 0.45896316605938164,
+      "grad_norm": 0.1418781727552414,
+      "learning_rate": 0.00018165391831433106,
+      "loss": 0.1932,
+      "step": 6361
+    },
+    {
+      "epoch": 0.4590353187344421,
+      "grad_norm": 0.11492908746004105,
+      "learning_rate": 0.00018165103189493435,
+      "loss": 0.1547,
+      "step": 6362
+    },
+    {
+      "epoch": 0.45910747140950253,
+      "grad_norm": 0.10958899557590485,
+      "learning_rate": 0.00018164814547553761,
+      "loss": 0.1327,
+      "step": 6363
+    },
+    {
+      "epoch": 0.4591796240845629,
+      "grad_norm": 0.1356894075870514,
+      "learning_rate": 0.00018164525905614088,
+      "loss": 0.1571,
+      "step": 6364
+    },
+    {
+      "epoch": 0.45925177675962336,
+      "grad_norm": 0.10377617180347443,
+      "learning_rate": 0.00018164237263674414,
+      "loss": 0.2138,
+      "step": 6365
+    },
+    {
+      "epoch": 0.4593239294346838,
+      "grad_norm": 0.13075923919677734,
+      "learning_rate": 0.00018163948621734738,
+      "loss": 0.1748,
+      "step": 6366
+    },
+    {
+      "epoch": 0.4593960821097442,
+      "grad_norm": 0.1446242481470108,
+      "learning_rate": 0.00018163659979795064,
+      "loss": 0.1922,
+      "step": 6367
+    },
+    {
+      "epoch": 0.45946823478480464,
+      "grad_norm": 0.1177244782447815,
+      "learning_rate": 0.0001816337133785539,
+      "loss": 0.139,
+      "step": 6368
+    },
+    {
+      "epoch": 0.4595403874598651,
+      "grad_norm": 0.11971200257539749,
+      "learning_rate": 0.00018163082695915717,
+      "loss": 0.1529,
+      "step": 6369
+    },
+    {
+      "epoch": 0.45961254013492553,
+      "grad_norm": 0.15256580710411072,
+      "learning_rate": 0.00018162794053976046,
+      "loss": 0.1496,
+      "step": 6370
+    },
+    {
+      "epoch": 0.4596846928099859,
+      "grad_norm": 0.11101801693439484,
+      "learning_rate": 0.0001816250541203637,
+      "loss": 0.1439,
+      "step": 6371
+    },
+    {
+      "epoch": 0.45975684548504636,
+      "grad_norm": 0.11491654068231583,
+      "learning_rate": 0.00018162216770096695,
+      "loss": 0.2289,
+      "step": 6372
+    },
+    {
+      "epoch": 0.4598289981601068,
+      "grad_norm": 0.1170630007982254,
+      "learning_rate": 0.00018161928128157022,
+      "loss": 0.1543,
+      "step": 6373
+    },
+    {
+      "epoch": 0.4599011508351672,
+      "grad_norm": 0.11507470905780792,
+      "learning_rate": 0.00018161639486217348,
+      "loss": 0.1691,
+      "step": 6374
+    },
+    {
+      "epoch": 0.45997330351022764,
+      "grad_norm": 0.14252659678459167,
+      "learning_rate": 0.00018161350844277674,
+      "loss": 0.14,
+      "step": 6375
+    },
+    {
+      "epoch": 0.4600454561852881,
+      "grad_norm": 0.11022763699293137,
+      "learning_rate": 0.00018161062202338,
+      "loss": 0.166,
+      "step": 6376
+    },
+    {
+      "epoch": 0.46011760886034847,
+      "grad_norm": 0.150185227394104,
+      "learning_rate": 0.00018160773560398327,
+      "loss": 0.1591,
+      "step": 6377
+    },
+    {
+      "epoch": 0.4601897615354089,
+      "grad_norm": 0.1444399207830429,
+      "learning_rate": 0.00018160484918458653,
+      "loss": 0.1927,
+      "step": 6378
+    },
+    {
+      "epoch": 0.46026191421046936,
+      "grad_norm": 0.11369339376688004,
+      "learning_rate": 0.0001816019627651898,
+      "loss": 0.1781,
+      "step": 6379
+    },
+    {
+      "epoch": 0.4603340668855298,
+      "grad_norm": 0.1188020408153534,
+      "learning_rate": 0.00018159907634579306,
+      "loss": 0.1252,
+      "step": 6380
+    },
+    {
+      "epoch": 0.4604062195605902,
+      "grad_norm": 0.11750219017267227,
+      "learning_rate": 0.00018159618992639632,
+      "loss": 0.1637,
+      "step": 6381
+    },
+    {
+      "epoch": 0.46047837223565063,
+      "grad_norm": 0.1212676465511322,
+      "learning_rate": 0.00018159330350699956,
+      "loss": 0.1853,
+      "step": 6382
+    },
+    {
+      "epoch": 0.4605505249107111,
+      "grad_norm": 0.08860348910093307,
+      "learning_rate": 0.00018159041708760282,
+      "loss": 0.128,
+      "step": 6383
+    },
+    {
+      "epoch": 0.46062267758577147,
+      "grad_norm": 0.10735640674829483,
+      "learning_rate": 0.0001815875306682061,
+      "loss": 0.1735,
+      "step": 6384
+    },
+    {
+      "epoch": 0.4606948302608319,
+      "grad_norm": 0.13036073744297028,
+      "learning_rate": 0.00018158464424880937,
+      "loss": 0.1319,
+      "step": 6385
+    },
+    {
+      "epoch": 0.46076698293589236,
+      "grad_norm": 0.10206615924835205,
+      "learning_rate": 0.00018158175782941264,
+      "loss": 0.1723,
+      "step": 6386
+    },
+    {
+      "epoch": 0.4608391356109528,
+      "grad_norm": 0.10884787887334824,
+      "learning_rate": 0.00018157887141001587,
+      "loss": 0.1322,
+      "step": 6387
+    },
+    {
+      "epoch": 0.4609112882860132,
+      "grad_norm": 0.09827440232038498,
+      "learning_rate": 0.00018157598499061913,
+      "loss": 0.1934,
+      "step": 6388
+    },
+    {
+      "epoch": 0.46098344096107363,
+      "grad_norm": 0.10427073389291763,
+      "learning_rate": 0.0001815730985712224,
+      "loss": 0.1358,
+      "step": 6389
+    },
+    {
+      "epoch": 0.4610555936361341,
+      "grad_norm": 0.11135826259851456,
+      "learning_rate": 0.00018157021215182566,
+      "loss": 0.157,
+      "step": 6390
+    },
+    {
+      "epoch": 0.46112774631119446,
+      "grad_norm": 0.11995387077331543,
+      "learning_rate": 0.00018156732573242895,
+      "loss": 0.1854,
+      "step": 6391
+    },
+    {
+      "epoch": 0.4611998989862549,
+      "grad_norm": 0.12274812161922455,
+      "learning_rate": 0.00018156443931303219,
+      "loss": 0.142,
+      "step": 6392
+    },
+    {
+      "epoch": 0.46127205166131535,
+      "grad_norm": 0.12474401295185089,
+      "learning_rate": 0.00018156155289363545,
+      "loss": 0.1636,
+      "step": 6393
+    },
+    {
+      "epoch": 0.4613442043363758,
+      "grad_norm": 0.1390080600976944,
+      "learning_rate": 0.0001815586664742387,
+      "loss": 0.1685,
+      "step": 6394
+    },
+    {
+      "epoch": 0.4614163570114362,
+      "grad_norm": 0.10171744227409363,
+      "learning_rate": 0.00018155578005484197,
+      "loss": 0.1444,
+      "step": 6395
+    },
+    {
+      "epoch": 0.46148850968649663,
+      "grad_norm": 0.10669015347957611,
+      "learning_rate": 0.00018155289363544524,
+      "loss": 0.1095,
+      "step": 6396
+    },
+    {
+      "epoch": 0.4615606623615571,
+      "grad_norm": 0.1214829832315445,
+      "learning_rate": 0.0001815500072160485,
+      "loss": 0.1458,
+      "step": 6397
+    },
+    {
+      "epoch": 0.46163281503661746,
+      "grad_norm": 0.1343984454870224,
+      "learning_rate": 0.00018154712079665176,
+      "loss": 0.1811,
+      "step": 6398
+    },
+    {
+      "epoch": 0.4617049677116779,
+      "grad_norm": 0.11867804080247879,
+      "learning_rate": 0.00018154423437725503,
+      "loss": 0.1828,
+      "step": 6399
+    },
+    {
+      "epoch": 0.46177712038673835,
+      "grad_norm": 0.149135023355484,
+      "learning_rate": 0.0001815413479578583,
+      "loss": 0.1708,
+      "step": 6400
+    },
+    {
+      "epoch": 0.4618492730617988,
+      "grad_norm": 0.12415354698896408,
+      "learning_rate": 0.00018153846153846155,
+      "loss": 0.1478,
+      "step": 6401
+    },
+    {
+      "epoch": 0.4619214257368592,
+      "grad_norm": 0.147150918841362,
+      "learning_rate": 0.00018153557511906481,
+      "loss": 0.1718,
+      "step": 6402
+    },
+    {
+      "epoch": 0.4619935784119196,
+      "grad_norm": 0.11363677680492401,
+      "learning_rate": 0.00018153268869966805,
+      "loss": 0.1306,
+      "step": 6403
+    },
+    {
+      "epoch": 0.46206573108698007,
+      "grad_norm": 0.11995453387498856,
+      "learning_rate": 0.0001815298022802713,
+      "loss": 0.2005,
+      "step": 6404
+    },
+    {
+      "epoch": 0.46213788376204046,
+      "grad_norm": 0.1330481767654419,
+      "learning_rate": 0.0001815269158608746,
+      "loss": 0.1651,
+      "step": 6405
+    },
+    {
+      "epoch": 0.4622100364371009,
+      "grad_norm": 0.11070237308740616,
+      "learning_rate": 0.00018152402944147787,
+      "loss": 0.1586,
+      "step": 6406
+    },
+    {
+      "epoch": 0.46228218911216135,
+      "grad_norm": 0.12627100944519043,
+      "learning_rate": 0.00018152114302208113,
+      "loss": 0.171,
+      "step": 6407
+    },
+    {
+      "epoch": 0.46235434178722173,
+      "grad_norm": 0.12145937979221344,
+      "learning_rate": 0.00018151825660268437,
+      "loss": 0.1288,
+      "step": 6408
+    },
+    {
+      "epoch": 0.4624264944622822,
+      "grad_norm": 0.11226236820220947,
+      "learning_rate": 0.00018151537018328763,
+      "loss": 0.1718,
+      "step": 6409
+    },
+    {
+      "epoch": 0.4624986471373426,
+      "grad_norm": 0.16444960236549377,
+      "learning_rate": 0.0001815124837638909,
+      "loss": 0.1636,
+      "step": 6410
+    },
+    {
+      "epoch": 0.46257079981240307,
+      "grad_norm": 0.10897719115018845,
+      "learning_rate": 0.00018150959734449415,
+      "loss": 0.178,
+      "step": 6411
+    },
+    {
+      "epoch": 0.46264295248746345,
+      "grad_norm": 0.14687609672546387,
+      "learning_rate": 0.00018150671092509744,
+      "loss": 0.1847,
+      "step": 6412
+    },
+    {
+      "epoch": 0.4627151051625239,
+      "grad_norm": 0.10744399577379227,
+      "learning_rate": 0.00018150382450570068,
+      "loss": 0.1543,
+      "step": 6413
+    },
+    {
+      "epoch": 0.46278725783758434,
+      "grad_norm": 0.10385393351316452,
+      "learning_rate": 0.00018150093808630394,
+      "loss": 0.149,
+      "step": 6414
+    },
+    {
+      "epoch": 0.46285941051264473,
+      "grad_norm": 0.12182513624429703,
+      "learning_rate": 0.0001814980516669072,
+      "loss": 0.1421,
+      "step": 6415
+    },
+    {
+      "epoch": 0.4629315631877052,
+      "grad_norm": 0.14916294813156128,
+      "learning_rate": 0.00018149516524751047,
+      "loss": 0.1855,
+      "step": 6416
+    },
+    {
+      "epoch": 0.4630037158627656,
+      "grad_norm": 0.10458331555128098,
+      "learning_rate": 0.00018149227882811373,
+      "loss": 0.1786,
+      "step": 6417
+    },
+    {
+      "epoch": 0.46307586853782606,
+      "grad_norm": 0.14848539233207703,
+      "learning_rate": 0.000181489392408717,
+      "loss": 0.1478,
+      "step": 6418
+    },
+    {
+      "epoch": 0.46314802121288645,
+      "grad_norm": 0.1410219371318817,
+      "learning_rate": 0.00018148650598932026,
+      "loss": 0.1779,
+      "step": 6419
+    },
+    {
+      "epoch": 0.4632201738879469,
+      "grad_norm": 0.09285683929920197,
+      "learning_rate": 0.00018148361956992352,
+      "loss": 0.1702,
+      "step": 6420
+    },
+    {
+      "epoch": 0.46329232656300734,
+      "grad_norm": 0.1484486162662506,
+      "learning_rate": 0.00018148073315052678,
+      "loss": 0.2162,
+      "step": 6421
+    },
+    {
+      "epoch": 0.46336447923806773,
+      "grad_norm": 0.14052875339984894,
+      "learning_rate": 0.00018147784673113005,
+      "loss": 0.1639,
+      "step": 6422
+    },
+    {
+      "epoch": 0.46343663191312817,
+      "grad_norm": 0.11463950574398041,
+      "learning_rate": 0.0001814749603117333,
+      "loss": 0.169,
+      "step": 6423
+    },
+    {
+      "epoch": 0.4635087845881886,
+      "grad_norm": 0.10958924889564514,
+      "learning_rate": 0.00018147207389233654,
+      "loss": 0.1124,
+      "step": 6424
+    },
+    {
+      "epoch": 0.46358093726324906,
+      "grad_norm": 0.15305006504058838,
+      "learning_rate": 0.0001814691874729398,
+      "loss": 0.1749,
+      "step": 6425
+    },
+    {
+      "epoch": 0.46365308993830945,
+      "grad_norm": 0.10930691659450531,
+      "learning_rate": 0.0001814663010535431,
+      "loss": 0.2099,
+      "step": 6426
+    },
+    {
+      "epoch": 0.4637252426133699,
+      "grad_norm": 0.14171509444713593,
+      "learning_rate": 0.00018146341463414636,
+      "loss": 0.1764,
+      "step": 6427
+    },
+    {
+      "epoch": 0.46379739528843034,
+      "grad_norm": 0.1683446764945984,
+      "learning_rate": 0.00018146052821474962,
+      "loss": 0.1656,
+      "step": 6428
+    },
+    {
+      "epoch": 0.4638695479634907,
+      "grad_norm": 0.12286924570798874,
+      "learning_rate": 0.0001814576417953529,
+      "loss": 0.171,
+      "step": 6429
+    },
+    {
+      "epoch": 0.46394170063855117,
+      "grad_norm": 0.1306712031364441,
+      "learning_rate": 0.00018145475537595612,
+      "loss": 0.1532,
+      "step": 6430
+    },
+    {
+      "epoch": 0.4640138533136116,
+      "grad_norm": 0.13364550471305847,
+      "learning_rate": 0.00018145186895655939,
+      "loss": 0.1449,
+      "step": 6431
+    },
+    {
+      "epoch": 0.46408600598867206,
+      "grad_norm": 0.1290210783481598,
+      "learning_rate": 0.00018144898253716265,
+      "loss": 0.1507,
+      "step": 6432
+    },
+    {
+      "epoch": 0.46415815866373245,
+      "grad_norm": 0.15846581757068634,
+      "learning_rate": 0.00018144609611776594,
+      "loss": 0.1263,
+      "step": 6433
+    },
+    {
+      "epoch": 0.4642303113387929,
+      "grad_norm": 0.11158798635005951,
+      "learning_rate": 0.0001814432096983692,
+      "loss": 0.1785,
+      "step": 6434
+    },
+    {
+      "epoch": 0.46430246401385333,
+      "grad_norm": 0.09691043198108673,
+      "learning_rate": 0.00018144032327897244,
+      "loss": 0.1429,
+      "step": 6435
+    },
+    {
+      "epoch": 0.4643746166889137,
+      "grad_norm": 0.1297903060913086,
+      "learning_rate": 0.0001814374368595757,
+      "loss": 0.1737,
+      "step": 6436
+    },
+    {
+      "epoch": 0.46444676936397417,
+      "grad_norm": 0.10582982748746872,
+      "learning_rate": 0.00018143455044017896,
+      "loss": 0.2156,
+      "step": 6437
+    },
+    {
+      "epoch": 0.4645189220390346,
+      "grad_norm": 0.13353532552719116,
+      "learning_rate": 0.00018143166402078223,
+      "loss": 0.1536,
+      "step": 6438
+    },
+    {
+      "epoch": 0.464591074714095,
+      "grad_norm": 0.13009218871593475,
+      "learning_rate": 0.0001814287776013855,
+      "loss": 0.1437,
+      "step": 6439
+    },
+    {
+      "epoch": 0.46466322738915544,
+      "grad_norm": 0.11011122912168503,
+      "learning_rate": 0.00018142589118198875,
+      "loss": 0.1102,
+      "step": 6440
+    },
+    {
+      "epoch": 0.4647353800642159,
+      "grad_norm": 0.12354083359241486,
+      "learning_rate": 0.00018142300476259201,
+      "loss": 0.1938,
+      "step": 6441
+    },
+    {
+      "epoch": 0.46480753273927633,
+      "grad_norm": 0.09825614094734192,
+      "learning_rate": 0.00018142011834319528,
+      "loss": 0.1406,
+      "step": 6442
+    },
+    {
+      "epoch": 0.4648796854143367,
+      "grad_norm": 0.11126351356506348,
+      "learning_rate": 0.00018141723192379854,
+      "loss": 0.1242,
+      "step": 6443
+    },
+    {
+      "epoch": 0.46495183808939716,
+      "grad_norm": 0.1263757348060608,
+      "learning_rate": 0.0001814143455044018,
+      "loss": 0.1867,
+      "step": 6444
+    },
+    {
+      "epoch": 0.4650239907644576,
+      "grad_norm": 0.10534121096134186,
+      "learning_rate": 0.00018141145908500507,
+      "loss": 0.1539,
+      "step": 6445
+    },
+    {
+      "epoch": 0.465096143439518,
+      "grad_norm": 0.1159055158495903,
+      "learning_rate": 0.0001814085726656083,
+      "loss": 0.1574,
+      "step": 6446
+    },
+    {
+      "epoch": 0.46516829611457844,
+      "grad_norm": 0.13766884803771973,
+      "learning_rate": 0.0001814056862462116,
+      "loss": 0.1988,
+      "step": 6447
+    },
+    {
+      "epoch": 0.4652404487896389,
+      "grad_norm": 0.11503700911998749,
+      "learning_rate": 0.00018140279982681485,
+      "loss": 0.1327,
+      "step": 6448
+    },
+    {
+      "epoch": 0.4653126014646993,
+      "grad_norm": 0.15995432436466217,
+      "learning_rate": 0.00018139991340741812,
+      "loss": 0.144,
+      "step": 6449
+    },
+    {
+      "epoch": 0.4653847541397597,
+      "grad_norm": 0.10858393460512161,
+      "learning_rate": 0.00018139702698802138,
+      "loss": 0.1895,
+      "step": 6450
+    },
+    {
+      "epoch": 0.46545690681482016,
+      "grad_norm": 0.16355015337467194,
+      "learning_rate": 0.00018139414056862462,
+      "loss": 0.171,
+      "step": 6451
+    },
+    {
+      "epoch": 0.4655290594898806,
+      "grad_norm": 0.12066861987113953,
+      "learning_rate": 0.00018139125414922788,
+      "loss": 0.1786,
+      "step": 6452
+    },
+    {
+      "epoch": 0.465601212164941,
+      "grad_norm": 0.13982541859149933,
+      "learning_rate": 0.00018138836772983114,
+      "loss": 0.1929,
+      "step": 6453
+    },
+    {
+      "epoch": 0.46567336484000144,
+      "grad_norm": 0.1383274346590042,
+      "learning_rate": 0.00018138548131043443,
+      "loss": 0.1467,
+      "step": 6454
+    },
+    {
+      "epoch": 0.4657455175150619,
+      "grad_norm": 0.11604558676481247,
+      "learning_rate": 0.0001813825948910377,
+      "loss": 0.1761,
+      "step": 6455
+    },
+    {
+      "epoch": 0.4658176701901223,
+      "grad_norm": 0.13544894754886627,
+      "learning_rate": 0.00018137970847164093,
+      "loss": 0.1297,
+      "step": 6456
+    },
+    {
+      "epoch": 0.4658898228651827,
+      "grad_norm": 0.09898851811885834,
+      "learning_rate": 0.0001813768220522442,
+      "loss": 0.1838,
+      "step": 6457
+    },
+    {
+      "epoch": 0.46596197554024316,
+      "grad_norm": 0.11426880955696106,
+      "learning_rate": 0.00018137393563284746,
+      "loss": 0.1579,
+      "step": 6458
+    },
+    {
+      "epoch": 0.4660341282153036,
+      "grad_norm": 0.09921670705080032,
+      "learning_rate": 0.00018137104921345072,
+      "loss": 0.1243,
+      "step": 6459
+    },
+    {
+      "epoch": 0.466106280890364,
+      "grad_norm": 0.11920057237148285,
+      "learning_rate": 0.00018136816279405398,
+      "loss": 0.1541,
+      "step": 6460
+    },
+    {
+      "epoch": 0.46617843356542443,
+      "grad_norm": 0.12290757894515991,
+      "learning_rate": 0.00018136527637465725,
+      "loss": 0.1505,
+      "step": 6461
+    },
+    {
+      "epoch": 0.4662505862404849,
+      "grad_norm": 0.12962016463279724,
+      "learning_rate": 0.0001813623899552605,
+      "loss": 0.1735,
+      "step": 6462
+    },
+    {
+      "epoch": 0.4663227389155453,
+      "grad_norm": 0.15535619854927063,
+      "learning_rate": 0.00018135950353586377,
+      "loss": 0.1385,
+      "step": 6463
+    },
+    {
+      "epoch": 0.4663948915906057,
+      "grad_norm": 0.12486453354358673,
+      "learning_rate": 0.00018135661711646703,
+      "loss": 0.1254,
+      "step": 6464
+    },
+    {
+      "epoch": 0.46646704426566615,
+      "grad_norm": 0.14467908442020416,
+      "learning_rate": 0.0001813537306970703,
+      "loss": 0.1841,
+      "step": 6465
+    },
+    {
+      "epoch": 0.4665391969407266,
+      "grad_norm": 0.13134662806987762,
+      "learning_rate": 0.00018135084427767356,
+      "loss": 0.1249,
+      "step": 6466
+    },
+    {
+      "epoch": 0.466611349615787,
+      "grad_norm": 0.11720168590545654,
+      "learning_rate": 0.0001813479578582768,
+      "loss": 0.1126,
+      "step": 6467
+    },
+    {
+      "epoch": 0.46668350229084743,
+      "grad_norm": 0.13877685368061066,
+      "learning_rate": 0.00018134507143888009,
+      "loss": 0.1735,
+      "step": 6468
+    },
+    {
+      "epoch": 0.4667556549659079,
+      "grad_norm": 0.11466140300035477,
+      "learning_rate": 0.00018134218501948335,
+      "loss": 0.164,
+      "step": 6469
+    },
+    {
+      "epoch": 0.46682780764096826,
+      "grad_norm": 0.13385191559791565,
+      "learning_rate": 0.0001813392986000866,
+      "loss": 0.1438,
+      "step": 6470
+    },
+    {
+      "epoch": 0.4668999603160287,
+      "grad_norm": 0.1479582041501999,
+      "learning_rate": 0.00018133641218068987,
+      "loss": 0.1393,
+      "step": 6471
+    },
+    {
+      "epoch": 0.46697211299108915,
+      "grad_norm": 0.16987402737140656,
+      "learning_rate": 0.0001813335257612931,
+      "loss": 0.1922,
+      "step": 6472
+    },
+    {
+      "epoch": 0.4670442656661496,
+      "grad_norm": 0.12824693322181702,
+      "learning_rate": 0.00018133063934189637,
+      "loss": 0.1507,
+      "step": 6473
+    },
+    {
+      "epoch": 0.46711641834121,
+      "grad_norm": 0.13783419132232666,
+      "learning_rate": 0.00018132775292249964,
+      "loss": 0.1303,
+      "step": 6474
+    },
+    {
+      "epoch": 0.4671885710162704,
+      "grad_norm": 0.13383013010025024,
+      "learning_rate": 0.00018132486650310293,
+      "loss": 0.1516,
+      "step": 6475
+    },
+    {
+      "epoch": 0.46726072369133087,
+      "grad_norm": 0.11658954620361328,
+      "learning_rate": 0.0001813219800837062,
+      "loss": 0.1503,
+      "step": 6476
+    },
+    {
+      "epoch": 0.46733287636639126,
+      "grad_norm": 0.13576288521289825,
+      "learning_rate": 0.00018131909366430943,
+      "loss": 0.1386,
+      "step": 6477
+    },
+    {
+      "epoch": 0.4674050290414517,
+      "grad_norm": 0.10309716314077377,
+      "learning_rate": 0.0001813162072449127,
+      "loss": 0.1811,
+      "step": 6478
+    },
+    {
+      "epoch": 0.46747718171651215,
+      "grad_norm": 0.09541424363851547,
+      "learning_rate": 0.00018131332082551595,
+      "loss": 0.1468,
+      "step": 6479
+    },
+    {
+      "epoch": 0.4675493343915726,
+      "grad_norm": 0.10171490162611008,
+      "learning_rate": 0.00018131043440611921,
+      "loss": 0.116,
+      "step": 6480
+    },
+    {
+      "epoch": 0.467621487066633,
+      "grad_norm": 0.10852886736392975,
+      "learning_rate": 0.00018130754798672248,
+      "loss": 0.1362,
+      "step": 6481
+    },
+    {
+      "epoch": 0.4676936397416934,
+      "grad_norm": 0.12629874050617218,
+      "learning_rate": 0.00018130466156732574,
+      "loss": 0.1944,
+      "step": 6482
+    },
+    {
+      "epoch": 0.46776579241675387,
+      "grad_norm": 0.11615622043609619,
+      "learning_rate": 0.000181301775147929,
+      "loss": 0.2132,
+      "step": 6483
+    },
+    {
+      "epoch": 0.46783794509181426,
+      "grad_norm": 0.15097269415855408,
+      "learning_rate": 0.00018129888872853227,
+      "loss": 0.1369,
+      "step": 6484
+    },
+    {
+      "epoch": 0.4679100977668747,
+      "grad_norm": 0.11667463183403015,
+      "learning_rate": 0.00018129600230913553,
+      "loss": 0.1446,
+      "step": 6485
+    },
+    {
+      "epoch": 0.46798225044193514,
+      "grad_norm": 0.11408985406160355,
+      "learning_rate": 0.0001812931158897388,
+      "loss": 0.1198,
+      "step": 6486
+    },
+    {
+      "epoch": 0.4680544031169956,
+      "grad_norm": 0.1181301549077034,
+      "learning_rate": 0.00018129022947034205,
+      "loss": 0.1554,
+      "step": 6487
+    },
+    {
+      "epoch": 0.468126555792056,
+      "grad_norm": 0.1423175036907196,
+      "learning_rate": 0.0001812873430509453,
+      "loss": 0.1545,
+      "step": 6488
+    },
+    {
+      "epoch": 0.4681987084671164,
+      "grad_norm": 0.1127098798751831,
+      "learning_rate": 0.00018128445663154858,
+      "loss": 0.1267,
+      "step": 6489
+    },
+    {
+      "epoch": 0.46827086114217686,
+      "grad_norm": 0.15628333389759064,
+      "learning_rate": 0.00018128157021215184,
+      "loss": 0.193,
+      "step": 6490
+    },
+    {
+      "epoch": 0.46834301381723725,
+      "grad_norm": 0.115764319896698,
+      "learning_rate": 0.0001812786837927551,
+      "loss": 0.12,
+      "step": 6491
+    },
+    {
+      "epoch": 0.4684151664922977,
+      "grad_norm": 0.15240737795829773,
+      "learning_rate": 0.00018127579737335837,
+      "loss": 0.1843,
+      "step": 6492
+    },
+    {
+      "epoch": 0.46848731916735814,
+      "grad_norm": 0.13437342643737793,
+      "learning_rate": 0.0001812729109539616,
+      "loss": 0.1126,
+      "step": 6493
+    },
+    {
+      "epoch": 0.4685594718424186,
+      "grad_norm": 0.13212938606739044,
+      "learning_rate": 0.00018127002453456487,
+      "loss": 0.167,
+      "step": 6494
+    },
+    {
+      "epoch": 0.468631624517479,
+      "grad_norm": 0.13624313473701477,
+      "learning_rate": 0.00018126713811516813,
+      "loss": 0.1344,
+      "step": 6495
+    },
+    {
+      "epoch": 0.4687037771925394,
+      "grad_norm": 0.1937403380870819,
+      "learning_rate": 0.00018126425169577142,
+      "loss": 0.1865,
+      "step": 6496
+    },
+    {
+      "epoch": 0.46877592986759986,
+      "grad_norm": 0.1276809275150299,
+      "learning_rate": 0.00018126136527637468,
+      "loss": 0.1516,
+      "step": 6497
+    },
+    {
+      "epoch": 0.46884808254266025,
+      "grad_norm": 0.09707915782928467,
+      "learning_rate": 0.00018125847885697792,
+      "loss": 0.1733,
+      "step": 6498
+    },
+    {
+      "epoch": 0.4689202352177207,
+      "grad_norm": 0.13547371327877045,
+      "learning_rate": 0.00018125559243758118,
+      "loss": 0.145,
+      "step": 6499
+    },
+    {
+      "epoch": 0.46899238789278114,
+      "grad_norm": 0.11191370338201523,
+      "learning_rate": 0.00018125270601818445,
+      "loss": 0.1484,
+      "step": 6500
+    },
+    {
+      "epoch": 0.4690645405678415,
+      "grad_norm": 0.11115206032991409,
+      "learning_rate": 0.0001812498195987877,
+      "loss": 0.1522,
+      "step": 6501
+    },
+    {
+      "epoch": 0.46913669324290197,
+      "grad_norm": 0.11308522522449493,
+      "learning_rate": 0.00018124693317939097,
+      "loss": 0.1649,
+      "step": 6502
+    },
+    {
+      "epoch": 0.4692088459179624,
+      "grad_norm": 0.11458290368318558,
+      "learning_rate": 0.00018124404675999423,
+      "loss": 0.1652,
+      "step": 6503
+    },
+    {
+      "epoch": 0.46928099859302286,
+      "grad_norm": 0.1331714242696762,
+      "learning_rate": 0.0001812411603405975,
+      "loss": 0.1747,
+      "step": 6504
+    },
+    {
+      "epoch": 0.46935315126808325,
+      "grad_norm": 0.11369192600250244,
+      "learning_rate": 0.00018123827392120076,
+      "loss": 0.195,
+      "step": 6505
+    },
+    {
+      "epoch": 0.4694253039431437,
+      "grad_norm": 0.1556098610162735,
+      "learning_rate": 0.00018123538750180402,
+      "loss": 0.1732,
+      "step": 6506
+    },
+    {
+      "epoch": 0.46949745661820413,
+      "grad_norm": 0.11743935942649841,
+      "learning_rate": 0.00018123250108240729,
+      "loss": 0.1762,
+      "step": 6507
+    },
+    {
+      "epoch": 0.4695696092932645,
+      "grad_norm": 0.09582743793725967,
+      "learning_rate": 0.00018122961466301055,
+      "loss": 0.1053,
+      "step": 6508
+    },
+    {
+      "epoch": 0.46964176196832497,
+      "grad_norm": 0.1395568996667862,
+      "learning_rate": 0.00018122672824361378,
+      "loss": 0.1723,
+      "step": 6509
+    },
+    {
+      "epoch": 0.4697139146433854,
+      "grad_norm": 0.1317128688097,
+      "learning_rate": 0.00018122384182421707,
+      "loss": 0.1207,
+      "step": 6510
+    },
+    {
+      "epoch": 0.46978606731844585,
+      "grad_norm": 0.11051097512245178,
+      "learning_rate": 0.00018122095540482034,
+      "loss": 0.1774,
+      "step": 6511
+    },
+    {
+      "epoch": 0.46985821999350624,
+      "grad_norm": 0.14036059379577637,
+      "learning_rate": 0.0001812180689854236,
+      "loss": 0.1344,
+      "step": 6512
+    },
+    {
+      "epoch": 0.4699303726685667,
+      "grad_norm": 0.11173304915428162,
+      "learning_rate": 0.00018121518256602686,
+      "loss": 0.163,
+      "step": 6513
+    },
+    {
+      "epoch": 0.47000252534362713,
+      "grad_norm": 0.12173853814601898,
+      "learning_rate": 0.0001812122961466301,
+      "loss": 0.1832,
+      "step": 6514
+    },
+    {
+      "epoch": 0.4700746780186875,
+      "grad_norm": 0.0984521210193634,
+      "learning_rate": 0.00018120940972723336,
+      "loss": 0.1437,
+      "step": 6515
+    },
+    {
+      "epoch": 0.47014683069374796,
+      "grad_norm": 0.09181611239910126,
+      "learning_rate": 0.00018120652330783663,
+      "loss": 0.1452,
+      "step": 6516
+    },
+    {
+      "epoch": 0.4702189833688084,
+      "grad_norm": 0.1238136738538742,
+      "learning_rate": 0.00018120363688843992,
+      "loss": 0.1546,
+      "step": 6517
+    },
+    {
+      "epoch": 0.47029113604386885,
+      "grad_norm": 0.1385972499847412,
+      "learning_rate": 0.00018120075046904318,
+      "loss": 0.1755,
+      "step": 6518
+    },
+    {
+      "epoch": 0.47036328871892924,
+      "grad_norm": 0.1276998668909073,
+      "learning_rate": 0.00018119786404964641,
+      "loss": 0.2061,
+      "step": 6519
+    },
+    {
+      "epoch": 0.4704354413939897,
+      "grad_norm": 0.14419038593769073,
+      "learning_rate": 0.00018119497763024968,
+      "loss": 0.1492,
+      "step": 6520
+    },
+    {
+      "epoch": 0.47050759406905013,
+      "grad_norm": 0.11534445732831955,
+      "learning_rate": 0.00018119209121085294,
+      "loss": 0.1521,
+      "step": 6521
+    },
+    {
+      "epoch": 0.4705797467441105,
+      "grad_norm": 0.12136315554380417,
+      "learning_rate": 0.0001811892047914562,
+      "loss": 0.1367,
+      "step": 6522
+    },
+    {
+      "epoch": 0.47065189941917096,
+      "grad_norm": 0.12664173543453217,
+      "learning_rate": 0.00018118631837205947,
+      "loss": 0.1368,
+      "step": 6523
+    },
+    {
+      "epoch": 0.4707240520942314,
+      "grad_norm": 0.1297597885131836,
+      "learning_rate": 0.00018118343195266273,
+      "loss": 0.2212,
+      "step": 6524
+    },
+    {
+      "epoch": 0.47079620476929185,
+      "grad_norm": 0.11439337581396103,
+      "learning_rate": 0.000181180545533266,
+      "loss": 0.1603,
+      "step": 6525
+    },
+    {
+      "epoch": 0.47086835744435224,
+      "grad_norm": 0.1221558228135109,
+      "learning_rate": 0.00018117765911386925,
+      "loss": 0.1582,
+      "step": 6526
+    },
+    {
+      "epoch": 0.4709405101194127,
+      "grad_norm": 0.1027345359325409,
+      "learning_rate": 0.00018117477269447252,
+      "loss": 0.1799,
+      "step": 6527
+    },
+    {
+      "epoch": 0.4710126627944731,
+      "grad_norm": 0.11186765134334564,
+      "learning_rate": 0.00018117188627507578,
+      "loss": 0.1518,
+      "step": 6528
+    },
+    {
+      "epoch": 0.4710848154695335,
+      "grad_norm": 0.1288941502571106,
+      "learning_rate": 0.00018116899985567904,
+      "loss": 0.1344,
+      "step": 6529
+    },
+    {
+      "epoch": 0.47115696814459396,
+      "grad_norm": 0.10255924612283707,
+      "learning_rate": 0.00018116611343628228,
+      "loss": 0.107,
+      "step": 6530
+    },
+    {
+      "epoch": 0.4712291208196544,
+      "grad_norm": 0.11037500202655792,
+      "learning_rate": 0.00018116322701688557,
+      "loss": 0.1445,
+      "step": 6531
+    },
+    {
+      "epoch": 0.4713012734947148,
+      "grad_norm": 0.09189187735319138,
+      "learning_rate": 0.00018116034059748883,
+      "loss": 0.1748,
+      "step": 6532
+    },
+    {
+      "epoch": 0.47137342616977523,
+      "grad_norm": 0.13333386182785034,
+      "learning_rate": 0.0001811574541780921,
+      "loss": 0.1718,
+      "step": 6533
+    },
+    {
+      "epoch": 0.4714455788448357,
+      "grad_norm": 0.14279568195343018,
+      "learning_rate": 0.00018115456775869536,
+      "loss": 0.1587,
+      "step": 6534
+    },
+    {
+      "epoch": 0.4715177315198961,
+      "grad_norm": 0.11661846935749054,
+      "learning_rate": 0.0001811516813392986,
+      "loss": 0.1619,
+      "step": 6535
+    },
+    {
+      "epoch": 0.4715898841949565,
+      "grad_norm": 0.1385808289051056,
+      "learning_rate": 0.00018114879491990186,
+      "loss": 0.2019,
+      "step": 6536
+    },
+    {
+      "epoch": 0.47166203687001695,
+      "grad_norm": 0.11933812499046326,
+      "learning_rate": 0.00018114590850050512,
+      "loss": 0.1153,
+      "step": 6537
+    },
+    {
+      "epoch": 0.4717341895450774,
+      "grad_norm": 0.1256553828716278,
+      "learning_rate": 0.0001811430220811084,
+      "loss": 0.1407,
+      "step": 6538
+    },
+    {
+      "epoch": 0.4718063422201378,
+      "grad_norm": 0.14740781486034393,
+      "learning_rate": 0.00018114013566171167,
+      "loss": 0.167,
+      "step": 6539
+    },
+    {
+      "epoch": 0.47187849489519823,
+      "grad_norm": 0.12701353430747986,
+      "learning_rate": 0.0001811372492423149,
+      "loss": 0.1264,
+      "step": 6540
+    },
+    {
+      "epoch": 0.4719506475702587,
+      "grad_norm": 0.1495954990386963,
+      "learning_rate": 0.00018113436282291817,
+      "loss": 0.1844,
+      "step": 6541
+    },
+    {
+      "epoch": 0.4720228002453191,
+      "grad_norm": 0.11055200546979904,
+      "learning_rate": 0.00018113147640352143,
+      "loss": 0.1683,
+      "step": 6542
+    },
+    {
+      "epoch": 0.4720949529203795,
+      "grad_norm": 0.18256966769695282,
+      "learning_rate": 0.0001811285899841247,
+      "loss": 0.1395,
+      "step": 6543
+    },
+    {
+      "epoch": 0.47216710559543995,
+      "grad_norm": 0.1399894505739212,
+      "learning_rate": 0.00018112570356472796,
+      "loss": 0.1742,
+      "step": 6544
+    },
+    {
+      "epoch": 0.4722392582705004,
+      "grad_norm": 0.11353211849927902,
+      "learning_rate": 0.00018112281714533122,
+      "loss": 0.1333,
+      "step": 6545
+    },
+    {
+      "epoch": 0.4723114109455608,
+      "grad_norm": 0.14108607172966003,
+      "learning_rate": 0.00018111993072593449,
+      "loss": 0.2102,
+      "step": 6546
+    },
+    {
+      "epoch": 0.4723835636206212,
+      "grad_norm": 0.12063361704349518,
+      "learning_rate": 0.00018111704430653775,
+      "loss": 0.1504,
+      "step": 6547
+    },
+    {
+      "epoch": 0.47245571629568167,
+      "grad_norm": 0.1718057245016098,
+      "learning_rate": 0.000181114157887141,
+      "loss": 0.19,
+      "step": 6548
+    },
+    {
+      "epoch": 0.4725278689707421,
+      "grad_norm": 0.10668683797121048,
+      "learning_rate": 0.00018111127146774427,
+      "loss": 0.1488,
+      "step": 6549
+    },
+    {
+      "epoch": 0.4726000216458025,
+      "grad_norm": 0.09043119847774506,
+      "learning_rate": 0.00018110838504834754,
+      "loss": 0.1434,
+      "step": 6550
+    },
+    {
+      "epoch": 0.47267217432086295,
+      "grad_norm": 0.10747907310724258,
+      "learning_rate": 0.0001811054986289508,
+      "loss": 0.1596,
+      "step": 6551
+    },
+    {
+      "epoch": 0.4727443269959234,
+      "grad_norm": 0.11078695952892303,
+      "learning_rate": 0.00018110261220955406,
+      "loss": 0.1629,
+      "step": 6552
+    },
+    {
+      "epoch": 0.4728164796709838,
+      "grad_norm": 0.1163090318441391,
+      "learning_rate": 0.00018109972579015733,
+      "loss": 0.178,
+      "step": 6553
+    },
+    {
+      "epoch": 0.4728886323460442,
+      "grad_norm": 0.11894099414348602,
+      "learning_rate": 0.0001810968393707606,
+      "loss": 0.1557,
+      "step": 6554
+    },
+    {
+      "epoch": 0.47296078502110467,
+      "grad_norm": 0.10437697172164917,
+      "learning_rate": 0.00018109395295136385,
+      "loss": 0.1197,
+      "step": 6555
+    },
+    {
+      "epoch": 0.4730329376961651,
+      "grad_norm": 0.10267592966556549,
+      "learning_rate": 0.00018109106653196711,
+      "loss": 0.1778,
+      "step": 6556
+    },
+    {
+      "epoch": 0.4731050903712255,
+      "grad_norm": 0.11590174585580826,
+      "learning_rate": 0.00018108818011257035,
+      "loss": 0.1882,
+      "step": 6557
+    },
+    {
+      "epoch": 0.47317724304628594,
+      "grad_norm": 0.11858008056879044,
+      "learning_rate": 0.00018108529369317361,
+      "loss": 0.1707,
+      "step": 6558
+    },
+    {
+      "epoch": 0.4732493957213464,
+      "grad_norm": 0.11694855242967606,
+      "learning_rate": 0.0001810824072737769,
+      "loss": 0.1728,
+      "step": 6559
+    },
+    {
+      "epoch": 0.4733215483964068,
+      "grad_norm": 0.12493687868118286,
+      "learning_rate": 0.00018107952085438017,
+      "loss": 0.1962,
+      "step": 6560
+    },
+    {
+      "epoch": 0.4733937010714672,
+      "grad_norm": 0.12782999873161316,
+      "learning_rate": 0.00018107663443498343,
+      "loss": 0.1692,
+      "step": 6561
+    },
+    {
+      "epoch": 0.47346585374652767,
+      "grad_norm": 0.12364133447408676,
+      "learning_rate": 0.00018107374801558667,
+      "loss": 0.1231,
+      "step": 6562
+    },
+    {
+      "epoch": 0.47353800642158805,
+      "grad_norm": 0.12436700612306595,
+      "learning_rate": 0.00018107086159618993,
+      "loss": 0.1225,
+      "step": 6563
+    },
+    {
+      "epoch": 0.4736101590966485,
+      "grad_norm": 0.1342078149318695,
+      "learning_rate": 0.0001810679751767932,
+      "loss": 0.1363,
+      "step": 6564
+    },
+    {
+      "epoch": 0.47368231177170894,
+      "grad_norm": 0.15493200719356537,
+      "learning_rate": 0.00018106508875739645,
+      "loss": 0.1801,
+      "step": 6565
+    },
+    {
+      "epoch": 0.4737544644467694,
+      "grad_norm": 0.13285429775714874,
+      "learning_rate": 0.00018106220233799972,
+      "loss": 0.1501,
+      "step": 6566
+    },
+    {
+      "epoch": 0.4738266171218298,
+      "grad_norm": 0.15981805324554443,
+      "learning_rate": 0.00018105931591860298,
+      "loss": 0.2127,
+      "step": 6567
+    },
+    {
+      "epoch": 0.4738987697968902,
+      "grad_norm": 0.12443274259567261,
+      "learning_rate": 0.00018105642949920624,
+      "loss": 0.1572,
+      "step": 6568
+    },
+    {
+      "epoch": 0.47397092247195066,
+      "grad_norm": 0.16868320107460022,
+      "learning_rate": 0.0001810535430798095,
+      "loss": 0.1776,
+      "step": 6569
+    },
+    {
+      "epoch": 0.47404307514701105,
+      "grad_norm": 0.11300229281187057,
+      "learning_rate": 0.00018105065666041277,
+      "loss": 0.1331,
+      "step": 6570
+    },
+    {
+      "epoch": 0.4741152278220715,
+      "grad_norm": 0.1066485121846199,
+      "learning_rate": 0.00018104777024101603,
+      "loss": 0.1507,
+      "step": 6571
+    },
+    {
+      "epoch": 0.47418738049713194,
+      "grad_norm": 0.11548085510730743,
+      "learning_rate": 0.0001810448838216193,
+      "loss": 0.1868,
+      "step": 6572
+    },
+    {
+      "epoch": 0.4742595331721924,
+      "grad_norm": 0.11486897617578506,
+      "learning_rate": 0.00018104199740222253,
+      "loss": 0.152,
+      "step": 6573
+    },
+    {
+      "epoch": 0.47433168584725277,
+      "grad_norm": 0.1293863207101822,
+      "learning_rate": 0.00018103911098282582,
+      "loss": 0.1139,
+      "step": 6574
+    },
+    {
+      "epoch": 0.4744038385223132,
+      "grad_norm": 0.09505106508731842,
+      "learning_rate": 0.00018103622456342908,
+      "loss": 0.1714,
+      "step": 6575
+    },
+    {
+      "epoch": 0.47447599119737366,
+      "grad_norm": 0.11848371475934982,
+      "learning_rate": 0.00018103333814403235,
+      "loss": 0.1933,
+      "step": 6576
+    },
+    {
+      "epoch": 0.47454814387243405,
+      "grad_norm": 0.11045391112565994,
+      "learning_rate": 0.0001810304517246356,
+      "loss": 0.1315,
+      "step": 6577
+    },
+    {
+      "epoch": 0.4746202965474945,
+      "grad_norm": 0.09907637536525726,
+      "learning_rate": 0.00018102756530523884,
+      "loss": 0.134,
+      "step": 6578
+    },
+    {
+      "epoch": 0.47469244922255494,
+      "grad_norm": 0.11193614453077316,
+      "learning_rate": 0.0001810246788858421,
+      "loss": 0.1517,
+      "step": 6579
+    },
+    {
+      "epoch": 0.4747646018976154,
+      "grad_norm": 0.10982697457075119,
+      "learning_rate": 0.00018102179246644537,
+      "loss": 0.1708,
+      "step": 6580
+    },
+    {
+      "epoch": 0.47483675457267577,
+      "grad_norm": 0.12854880094528198,
+      "learning_rate": 0.00018101890604704866,
+      "loss": 0.1961,
+      "step": 6581
+    },
+    {
+      "epoch": 0.4749089072477362,
+      "grad_norm": 0.1719358265399933,
+      "learning_rate": 0.00018101601962765192,
+      "loss": 0.1382,
+      "step": 6582
+    },
+    {
+      "epoch": 0.47498105992279666,
+      "grad_norm": 0.12974786758422852,
+      "learning_rate": 0.00018101313320825516,
+      "loss": 0.137,
+      "step": 6583
+    },
+    {
+      "epoch": 0.47505321259785704,
+      "grad_norm": 0.11269278079271317,
+      "learning_rate": 0.00018101024678885842,
+      "loss": 0.1494,
+      "step": 6584
+    },
+    {
+      "epoch": 0.4751253652729175,
+      "grad_norm": 0.11579824239015579,
+      "learning_rate": 0.00018100736036946169,
+      "loss": 0.1704,
+      "step": 6585
+    },
+    {
+      "epoch": 0.47519751794797793,
+      "grad_norm": 0.11576331406831741,
+      "learning_rate": 0.00018100447395006495,
+      "loss": 0.1707,
+      "step": 6586
+    },
+    {
+      "epoch": 0.4752696706230384,
+      "grad_norm": 0.10914190113544464,
+      "learning_rate": 0.0001810015875306682,
+      "loss": 0.1695,
+      "step": 6587
+    },
+    {
+      "epoch": 0.47534182329809876,
+      "grad_norm": 0.19466876983642578,
+      "learning_rate": 0.00018099870111127147,
+      "loss": 0.1266,
+      "step": 6588
+    },
+    {
+      "epoch": 0.4754139759731592,
+      "grad_norm": 0.1322767287492752,
+      "learning_rate": 0.00018099581469187474,
+      "loss": 0.1713,
+      "step": 6589
+    },
+    {
+      "epoch": 0.47548612864821965,
+      "grad_norm": 0.13432395458221436,
+      "learning_rate": 0.000180992928272478,
+      "loss": 0.1327,
+      "step": 6590
+    },
+    {
+      "epoch": 0.47555828132328004,
+      "grad_norm": 0.11618776619434357,
+      "learning_rate": 0.00018099004185308126,
+      "loss": 0.1659,
+      "step": 6591
+    },
+    {
+      "epoch": 0.4756304339983405,
+      "grad_norm": 0.13345777988433838,
+      "learning_rate": 0.00018098715543368453,
+      "loss": 0.1703,
+      "step": 6592
+    },
+    {
+      "epoch": 0.47570258667340093,
+      "grad_norm": 0.19079522788524628,
+      "learning_rate": 0.0001809842690142878,
+      "loss": 0.1992,
+      "step": 6593
+    },
+    {
+      "epoch": 0.4757747393484613,
+      "grad_norm": 0.16293351352214813,
+      "learning_rate": 0.00018098138259489102,
+      "loss": 0.1404,
+      "step": 6594
+    },
+    {
+      "epoch": 0.47584689202352176,
+      "grad_norm": 0.1540932059288025,
+      "learning_rate": 0.00018097849617549431,
+      "loss": 0.2358,
+      "step": 6595
+    },
+    {
+      "epoch": 0.4759190446985822,
+      "grad_norm": 0.12129596620798111,
+      "learning_rate": 0.00018097560975609758,
+      "loss": 0.1355,
+      "step": 6596
+    },
+    {
+      "epoch": 0.47599119737364265,
+      "grad_norm": 0.12973016500473022,
+      "learning_rate": 0.00018097272333670084,
+      "loss": 0.1235,
+      "step": 6597
+    },
+    {
+      "epoch": 0.47606335004870304,
+      "grad_norm": 0.12584935128688812,
+      "learning_rate": 0.0001809698369173041,
+      "loss": 0.1781,
+      "step": 6598
+    },
+    {
+      "epoch": 0.4761355027237635,
+      "grad_norm": 0.14946849644184113,
+      "learning_rate": 0.00018096695049790734,
+      "loss": 0.1754,
+      "step": 6599
+    },
+    {
+      "epoch": 0.4762076553988239,
+      "grad_norm": 0.11293647438287735,
+      "learning_rate": 0.0001809640640785106,
+      "loss": 0.142,
+      "step": 6600
+    },
+    {
+      "epoch": 0.4762798080738843,
+      "grad_norm": 0.09187892824411392,
+      "learning_rate": 0.00018096117765911387,
+      "loss": 0.1148,
+      "step": 6601
+    },
+    {
+      "epoch": 0.47635196074894476,
+      "grad_norm": 0.14158938825130463,
+      "learning_rate": 0.00018095829123971716,
+      "loss": 0.1556,
+      "step": 6602
+    },
+    {
+      "epoch": 0.4764241134240052,
+      "grad_norm": 0.14010418951511383,
+      "learning_rate": 0.00018095540482032042,
+      "loss": 0.1418,
+      "step": 6603
+    },
+    {
+      "epoch": 0.47649626609906565,
+      "grad_norm": 0.11662571877241135,
+      "learning_rate": 0.00018095251840092365,
+      "loss": 0.1528,
+      "step": 6604
+    },
+    {
+      "epoch": 0.47656841877412603,
+      "grad_norm": 0.18299691379070282,
+      "learning_rate": 0.00018094963198152692,
+      "loss": 0.1936,
+      "step": 6605
+    },
+    {
+      "epoch": 0.4766405714491865,
+      "grad_norm": 0.11245797574520111,
+      "learning_rate": 0.00018094674556213018,
+      "loss": 0.1748,
+      "step": 6606
+    },
+    {
+      "epoch": 0.4767127241242469,
+      "grad_norm": 0.1389348804950714,
+      "learning_rate": 0.00018094385914273344,
+      "loss": 0.1184,
+      "step": 6607
+    },
+    {
+      "epoch": 0.4767848767993073,
+      "grad_norm": 0.12851954996585846,
+      "learning_rate": 0.0001809409727233367,
+      "loss": 0.1642,
+      "step": 6608
+    },
+    {
+      "epoch": 0.47685702947436776,
+      "grad_norm": 0.16452080011367798,
+      "learning_rate": 0.00018093808630393997,
+      "loss": 0.1655,
+      "step": 6609
+    },
+    {
+      "epoch": 0.4769291821494282,
+      "grad_norm": 0.14606648683547974,
+      "learning_rate": 0.00018093519988454323,
+      "loss": 0.1588,
+      "step": 6610
+    },
+    {
+      "epoch": 0.47700133482448864,
+      "grad_norm": 0.13718731701374054,
+      "learning_rate": 0.0001809323134651465,
+      "loss": 0.1402,
+      "step": 6611
+    },
+    {
+      "epoch": 0.47707348749954903,
+      "grad_norm": 0.11560270190238953,
+      "learning_rate": 0.00018092942704574976,
+      "loss": 0.1626,
+      "step": 6612
+    },
+    {
+      "epoch": 0.4771456401746095,
+      "grad_norm": 0.1626240760087967,
+      "learning_rate": 0.00018092654062635302,
+      "loss": 0.1752,
+      "step": 6613
+    },
+    {
+      "epoch": 0.4772177928496699,
+      "grad_norm": 0.15158993005752563,
+      "learning_rate": 0.00018092365420695628,
+      "loss": 0.1806,
+      "step": 6614
+    },
+    {
+      "epoch": 0.4772899455247303,
+      "grad_norm": 0.1264626383781433,
+      "learning_rate": 0.00018092076778755952,
+      "loss": 0.1171,
+      "step": 6615
+    },
+    {
+      "epoch": 0.47736209819979075,
+      "grad_norm": 0.1221628487110138,
+      "learning_rate": 0.0001809178813681628,
+      "loss": 0.1555,
+      "step": 6616
+    },
+    {
+      "epoch": 0.4774342508748512,
+      "grad_norm": 0.12342524528503418,
+      "learning_rate": 0.00018091499494876607,
+      "loss": 0.1459,
+      "step": 6617
+    },
+    {
+      "epoch": 0.47750640354991164,
+      "grad_norm": 0.1038442999124527,
+      "learning_rate": 0.00018091210852936933,
+      "loss": 0.1378,
+      "step": 6618
+    },
+    {
+      "epoch": 0.47757855622497203,
+      "grad_norm": 0.1145162507891655,
+      "learning_rate": 0.0001809092221099726,
+      "loss": 0.1901,
+      "step": 6619
+    },
+    {
+      "epoch": 0.4776507089000325,
+      "grad_norm": 0.12268324196338654,
+      "learning_rate": 0.00018090633569057583,
+      "loss": 0.1643,
+      "step": 6620
+    },
+    {
+      "epoch": 0.4777228615750929,
+      "grad_norm": 0.12261956185102463,
+      "learning_rate": 0.0001809034492711791,
+      "loss": 0.1541,
+      "step": 6621
+    },
+    {
+      "epoch": 0.4777950142501533,
+      "grad_norm": 0.12539663910865784,
+      "learning_rate": 0.00018090056285178236,
+      "loss": 0.1705,
+      "step": 6622
+    },
+    {
+      "epoch": 0.47786716692521375,
+      "grad_norm": 0.14309175312519073,
+      "learning_rate": 0.00018089767643238565,
+      "loss": 0.1829,
+      "step": 6623
+    },
+    {
+      "epoch": 0.4779393196002742,
+      "grad_norm": 0.11351905018091202,
+      "learning_rate": 0.0001808947900129889,
+      "loss": 0.1625,
+      "step": 6624
+    },
+    {
+      "epoch": 0.4780114722753346,
+      "grad_norm": 0.1390398144721985,
+      "learning_rate": 0.00018089190359359215,
+      "loss": 0.167,
+      "step": 6625
+    },
+    {
+      "epoch": 0.478083624950395,
+      "grad_norm": 0.12186425924301147,
+      "learning_rate": 0.0001808890171741954,
+      "loss": 0.1163,
+      "step": 6626
+    },
+    {
+      "epoch": 0.47815577762545547,
+      "grad_norm": 0.11824537813663483,
+      "learning_rate": 0.00018088613075479867,
+      "loss": 0.1289,
+      "step": 6627
+    },
+    {
+      "epoch": 0.4782279303005159,
+      "grad_norm": 0.11230827867984772,
+      "learning_rate": 0.00018088324433540194,
+      "loss": 0.1711,
+      "step": 6628
+    },
+    {
+      "epoch": 0.4783000829755763,
+      "grad_norm": 0.14109791815280914,
+      "learning_rate": 0.0001808803579160052,
+      "loss": 0.1551,
+      "step": 6629
+    },
+    {
+      "epoch": 0.47837223565063675,
+      "grad_norm": 0.12549245357513428,
+      "learning_rate": 0.00018087747149660846,
+      "loss": 0.1696,
+      "step": 6630
+    },
+    {
+      "epoch": 0.4784443883256972,
+      "grad_norm": 0.11947820335626602,
+      "learning_rate": 0.00018087458507721173,
+      "loss": 0.1483,
+      "step": 6631
+    },
+    {
+      "epoch": 0.4785165410007576,
+      "grad_norm": 0.14599832892417908,
+      "learning_rate": 0.000180871698657815,
+      "loss": 0.1937,
+      "step": 6632
+    },
+    {
+      "epoch": 0.478588693675818,
+      "grad_norm": 0.15094219148159027,
+      "learning_rate": 0.00018086881223841825,
+      "loss": 0.1848,
+      "step": 6633
+    },
+    {
+      "epoch": 0.47866084635087847,
+      "grad_norm": 0.10668916255235672,
+      "learning_rate": 0.00018086592581902151,
+      "loss": 0.1225,
+      "step": 6634
+    },
+    {
+      "epoch": 0.4787329990259389,
+      "grad_norm": 0.13218899071216583,
+      "learning_rate": 0.00018086303939962478,
+      "loss": 0.1547,
+      "step": 6635
+    },
+    {
+      "epoch": 0.4788051517009993,
+      "grad_norm": 0.10088010877370834,
+      "learning_rate": 0.000180860152980228,
+      "loss": 0.2136,
+      "step": 6636
+    },
+    {
+      "epoch": 0.47887730437605974,
+      "grad_norm": 0.13138513267040253,
+      "learning_rate": 0.0001808572665608313,
+      "loss": 0.1611,
+      "step": 6637
+    },
+    {
+      "epoch": 0.4789494570511202,
+      "grad_norm": 0.11981962621212006,
+      "learning_rate": 0.00018085438014143457,
+      "loss": 0.1709,
+      "step": 6638
+    },
+    {
+      "epoch": 0.4790216097261806,
+      "grad_norm": 0.10274869203567505,
+      "learning_rate": 0.00018085149372203783,
+      "loss": 0.1538,
+      "step": 6639
+    },
+    {
+      "epoch": 0.479093762401241,
+      "grad_norm": 0.12627063691616058,
+      "learning_rate": 0.0001808486073026411,
+      "loss": 0.1435,
+      "step": 6640
+    },
+    {
+      "epoch": 0.47916591507630146,
+      "grad_norm": 0.1629066914319992,
+      "learning_rate": 0.00018084572088324433,
+      "loss": 0.1687,
+      "step": 6641
+    },
+    {
+      "epoch": 0.4792380677513619,
+      "grad_norm": 0.11205365508794785,
+      "learning_rate": 0.0001808428344638476,
+      "loss": 0.1415,
+      "step": 6642
+    },
+    {
+      "epoch": 0.4793102204264223,
+      "grad_norm": 0.11279461532831192,
+      "learning_rate": 0.00018083994804445085,
+      "loss": 0.1365,
+      "step": 6643
+    },
+    {
+      "epoch": 0.47938237310148274,
+      "grad_norm": 0.14440464973449707,
+      "learning_rate": 0.00018083706162505414,
+      "loss": 0.1621,
+      "step": 6644
+    },
+    {
+      "epoch": 0.4794545257765432,
+      "grad_norm": 0.10292355716228485,
+      "learning_rate": 0.0001808341752056574,
+      "loss": 0.1742,
+      "step": 6645
+    },
+    {
+      "epoch": 0.47952667845160357,
+      "grad_norm": 0.16559121012687683,
+      "learning_rate": 0.00018083128878626064,
+      "loss": 0.1389,
+      "step": 6646
+    },
+    {
+      "epoch": 0.479598831126664,
+      "grad_norm": 0.1599855273962021,
+      "learning_rate": 0.0001808284023668639,
+      "loss": 0.1418,
+      "step": 6647
+    },
+    {
+      "epoch": 0.47967098380172446,
+      "grad_norm": 0.12400344759225845,
+      "learning_rate": 0.00018082551594746717,
+      "loss": 0.1426,
+      "step": 6648
+    },
+    {
+      "epoch": 0.4797431364767849,
+      "grad_norm": 0.20110811293125153,
+      "learning_rate": 0.00018082262952807043,
+      "loss": 0.1622,
+      "step": 6649
+    },
+    {
+      "epoch": 0.4798152891518453,
+      "grad_norm": 0.1190720945596695,
+      "learning_rate": 0.0001808197431086737,
+      "loss": 0.1328,
+      "step": 6650
+    },
+    {
+      "epoch": 0.47988744182690574,
+      "grad_norm": 0.1138315349817276,
+      "learning_rate": 0.00018081685668927696,
+      "loss": 0.136,
+      "step": 6651
+    },
+    {
+      "epoch": 0.4799595945019662,
+      "grad_norm": 0.10997669398784637,
+      "learning_rate": 0.00018081397026988022,
+      "loss": 0.1949,
+      "step": 6652
+    },
+    {
+      "epoch": 0.48003174717702657,
+      "grad_norm": 0.14533071219921112,
+      "learning_rate": 0.00018081108385048348,
+      "loss": 0.2486,
+      "step": 6653
+    },
+    {
+      "epoch": 0.480103899852087,
+      "grad_norm": 0.10433954745531082,
+      "learning_rate": 0.00018080819743108675,
+      "loss": 0.1373,
+      "step": 6654
+    },
+    {
+      "epoch": 0.48017605252714746,
+      "grad_norm": 0.11102496832609177,
+      "learning_rate": 0.00018080531101169,
+      "loss": 0.1293,
+      "step": 6655
+    },
+    {
+      "epoch": 0.48024820520220785,
+      "grad_norm": 0.11135412007570267,
+      "learning_rate": 0.00018080242459229327,
+      "loss": 0.1655,
+      "step": 6656
+    },
+    {
+      "epoch": 0.4803203578772683,
+      "grad_norm": 0.12301556020975113,
+      "learning_rate": 0.00018079953817289653,
+      "loss": 0.1613,
+      "step": 6657
+    },
+    {
+      "epoch": 0.48039251055232873,
+      "grad_norm": 0.11578719317913055,
+      "learning_rate": 0.0001807966517534998,
+      "loss": 0.1921,
+      "step": 6658
+    },
+    {
+      "epoch": 0.4804646632273892,
+      "grad_norm": 0.137067511677742,
+      "learning_rate": 0.00018079376533410306,
+      "loss": 0.1792,
+      "step": 6659
+    },
+    {
+      "epoch": 0.48053681590244957,
+      "grad_norm": 0.1312975287437439,
+      "learning_rate": 0.00018079087891470632,
+      "loss": 0.1593,
+      "step": 6660
+    },
+    {
+      "epoch": 0.48060896857751,
+      "grad_norm": 0.19008010625839233,
+      "learning_rate": 0.00018078799249530959,
+      "loss": 0.1669,
+      "step": 6661
+    },
+    {
+      "epoch": 0.48068112125257045,
+      "grad_norm": 0.10435241460800171,
+      "learning_rate": 0.00018078510607591285,
+      "loss": 0.1198,
+      "step": 6662
+    },
+    {
+      "epoch": 0.48075327392763084,
+      "grad_norm": 0.11011923849582672,
+      "learning_rate": 0.00018078221965651608,
+      "loss": 0.1744,
+      "step": 6663
+    },
+    {
+      "epoch": 0.4808254266026913,
+      "grad_norm": 0.12781675159931183,
+      "learning_rate": 0.00018077933323711935,
+      "loss": 0.1985,
+      "step": 6664
+    },
+    {
+      "epoch": 0.48089757927775173,
+      "grad_norm": 0.13984736800193787,
+      "learning_rate": 0.00018077644681772264,
+      "loss": 0.192,
+      "step": 6665
+    },
+    {
+      "epoch": 0.4809697319528122,
+      "grad_norm": 0.12048052996397018,
+      "learning_rate": 0.0001807735603983259,
+      "loss": 0.1622,
+      "step": 6666
+    },
+    {
+      "epoch": 0.48104188462787256,
+      "grad_norm": 0.10933246463537216,
+      "learning_rate": 0.00018077067397892916,
+      "loss": 0.0976,
+      "step": 6667
+    },
+    {
+      "epoch": 0.481114037302933,
+      "grad_norm": 0.11265300214290619,
+      "learning_rate": 0.0001807677875595324,
+      "loss": 0.1056,
+      "step": 6668
+    },
+    {
+      "epoch": 0.48118618997799345,
+      "grad_norm": 0.12599968910217285,
+      "learning_rate": 0.00018076490114013566,
+      "loss": 0.1385,
+      "step": 6669
+    },
+    {
+      "epoch": 0.48125834265305384,
+      "grad_norm": 0.15781208872795105,
+      "learning_rate": 0.00018076201472073893,
+      "loss": 0.1562,
+      "step": 6670
+    },
+    {
+      "epoch": 0.4813304953281143,
+      "grad_norm": 0.14753875136375427,
+      "learning_rate": 0.0001807591283013422,
+      "loss": 0.1207,
+      "step": 6671
+    },
+    {
+      "epoch": 0.4814026480031747,
+      "grad_norm": 0.1585770696401596,
+      "learning_rate": 0.00018075624188194548,
+      "loss": 0.1114,
+      "step": 6672
+    },
+    {
+      "epoch": 0.48147480067823517,
+      "grad_norm": 0.13444659113883972,
+      "learning_rate": 0.00018075335546254871,
+      "loss": 0.1646,
+      "step": 6673
+    },
+    {
+      "epoch": 0.48154695335329556,
+      "grad_norm": 0.1050247773528099,
+      "learning_rate": 0.00018075046904315198,
+      "loss": 0.1646,
+      "step": 6674
+    },
+    {
+      "epoch": 0.481619106028356,
+      "grad_norm": 0.12049075961112976,
+      "learning_rate": 0.00018074758262375524,
+      "loss": 0.1866,
+      "step": 6675
+    },
+    {
+      "epoch": 0.48169125870341645,
+      "grad_norm": 0.16078917682170868,
+      "learning_rate": 0.0001807446962043585,
+      "loss": 0.1709,
+      "step": 6676
+    },
+    {
+      "epoch": 0.48176341137847684,
+      "grad_norm": 0.1396174281835556,
+      "learning_rate": 0.00018074180978496177,
+      "loss": 0.1639,
+      "step": 6677
+    },
+    {
+      "epoch": 0.4818355640535373,
+      "grad_norm": 0.13774073123931885,
+      "learning_rate": 0.00018073892336556503,
+      "loss": 0.1683,
+      "step": 6678
+    },
+    {
+      "epoch": 0.4819077167285977,
+      "grad_norm": 0.1184118315577507,
+      "learning_rate": 0.0001807360369461683,
+      "loss": 0.1978,
+      "step": 6679
+    },
+    {
+      "epoch": 0.48197986940365817,
+      "grad_norm": 0.11098407953977585,
+      "learning_rate": 0.00018073315052677155,
+      "loss": 0.1413,
+      "step": 6680
+    },
+    {
+      "epoch": 0.48205202207871856,
+      "grad_norm": 0.10108431428670883,
+      "learning_rate": 0.00018073026410737482,
+      "loss": 0.1093,
+      "step": 6681
+    },
+    {
+      "epoch": 0.482124174753779,
+      "grad_norm": 0.1013457179069519,
+      "learning_rate": 0.00018072737768797808,
+      "loss": 0.118,
+      "step": 6682
+    },
+    {
+      "epoch": 0.48219632742883944,
+      "grad_norm": 0.1206645593047142,
+      "learning_rate": 0.00018072449126858134,
+      "loss": 0.1623,
+      "step": 6683
+    },
+    {
+      "epoch": 0.48226848010389983,
+      "grad_norm": 0.09064919501543045,
+      "learning_rate": 0.00018072160484918458,
+      "loss": 0.1879,
+      "step": 6684
+    },
+    {
+      "epoch": 0.4823406327789603,
+      "grad_norm": 0.14269712567329407,
+      "learning_rate": 0.00018071871842978784,
+      "loss": 0.1547,
+      "step": 6685
+    },
+    {
+      "epoch": 0.4824127854540207,
+      "grad_norm": 0.16197986900806427,
+      "learning_rate": 0.00018071583201039113,
+      "loss": 0.1873,
+      "step": 6686
+    },
+    {
+      "epoch": 0.4824849381290811,
+      "grad_norm": 0.1269264966249466,
+      "learning_rate": 0.0001807129455909944,
+      "loss": 0.1053,
+      "step": 6687
+    },
+    {
+      "epoch": 0.48255709080414155,
+      "grad_norm": 0.15127058327198029,
+      "learning_rate": 0.00018071005917159766,
+      "loss": 0.2106,
+      "step": 6688
+    },
+    {
+      "epoch": 0.482629243479202,
+      "grad_norm": 0.13187997043132782,
+      "learning_rate": 0.0001807071727522009,
+      "loss": 0.1533,
+      "step": 6689
+    },
+    {
+      "epoch": 0.48270139615426244,
+      "grad_norm": 0.12021167576313019,
+      "learning_rate": 0.00018070428633280416,
+      "loss": 0.1604,
+      "step": 6690
+    },
+    {
+      "epoch": 0.48277354882932283,
+      "grad_norm": 0.13101136684417725,
+      "learning_rate": 0.00018070139991340742,
+      "loss": 0.1488,
+      "step": 6691
+    },
+    {
+      "epoch": 0.4828457015043833,
+      "grad_norm": 0.09323979914188385,
+      "learning_rate": 0.00018069851349401068,
+      "loss": 0.1079,
+      "step": 6692
+    },
+    {
+      "epoch": 0.4829178541794437,
+      "grad_norm": 0.11968117207288742,
+      "learning_rate": 0.00018069562707461397,
+      "loss": 0.1316,
+      "step": 6693
+    },
+    {
+      "epoch": 0.4829900068545041,
+      "grad_norm": 0.12057603150606155,
+      "learning_rate": 0.0001806927406552172,
+      "loss": 0.135,
+      "step": 6694
+    },
+    {
+      "epoch": 0.48306215952956455,
+      "grad_norm": 0.10026104003190994,
+      "learning_rate": 0.00018068985423582047,
+      "loss": 0.1274,
+      "step": 6695
+    },
+    {
+      "epoch": 0.483134312204625,
+      "grad_norm": 0.08938336372375488,
+      "learning_rate": 0.00018068696781642373,
+      "loss": 0.1454,
+      "step": 6696
+    },
+    {
+      "epoch": 0.48320646487968544,
+      "grad_norm": 0.10528580099344254,
+      "learning_rate": 0.000180684081397027,
+      "loss": 0.163,
+      "step": 6697
+    },
+    {
+      "epoch": 0.4832786175547458,
+      "grad_norm": 0.10674149543046951,
+      "learning_rate": 0.00018068119497763026,
+      "loss": 0.1223,
+      "step": 6698
+    },
+    {
+      "epoch": 0.48335077022980627,
+      "grad_norm": 0.12780793011188507,
+      "learning_rate": 0.00018067830855823352,
+      "loss": 0.1572,
+      "step": 6699
+    },
+    {
+      "epoch": 0.4834229229048667,
+      "grad_norm": 0.11604584753513336,
+      "learning_rate": 0.00018067542213883679,
+      "loss": 0.1203,
+      "step": 6700
+    },
+    {
+      "epoch": 0.4834950755799271,
+      "grad_norm": 0.1347905695438385,
+      "learning_rate": 0.00018067253571944005,
+      "loss": 0.2002,
+      "step": 6701
+    },
+    {
+      "epoch": 0.48356722825498755,
+      "grad_norm": 0.1396912932395935,
+      "learning_rate": 0.0001806696493000433,
+      "loss": 0.171,
+      "step": 6702
+    },
+    {
+      "epoch": 0.483639380930048,
+      "grad_norm": 0.11245424300432205,
+      "learning_rate": 0.00018066676288064657,
+      "loss": 0.1436,
+      "step": 6703
+    },
+    {
+      "epoch": 0.48371153360510843,
+      "grad_norm": 0.12426016479730606,
+      "learning_rate": 0.00018066387646124984,
+      "loss": 0.1466,
+      "step": 6704
+    },
+    {
+      "epoch": 0.4837836862801688,
+      "grad_norm": 0.11430709064006805,
+      "learning_rate": 0.00018066099004185307,
+      "loss": 0.1435,
+      "step": 6705
+    },
+    {
+      "epoch": 0.48385583895522927,
+      "grad_norm": 0.09857946634292603,
+      "learning_rate": 0.00018065810362245634,
+      "loss": 0.1142,
+      "step": 6706
+    },
+    {
+      "epoch": 0.4839279916302897,
+      "grad_norm": 0.111849345266819,
+      "learning_rate": 0.00018065521720305963,
+      "loss": 0.1343,
+      "step": 6707
+    },
+    {
+      "epoch": 0.4840001443053501,
+      "grad_norm": 0.13994166254997253,
+      "learning_rate": 0.0001806523307836629,
+      "loss": 0.1503,
+      "step": 6708
+    },
+    {
+      "epoch": 0.48407229698041054,
+      "grad_norm": 0.14901243150234222,
+      "learning_rate": 0.00018064944436426615,
+      "loss": 0.1474,
+      "step": 6709
+    },
+    {
+      "epoch": 0.484144449655471,
+      "grad_norm": 0.1526750922203064,
+      "learning_rate": 0.0001806465579448694,
+      "loss": 0.1894,
+      "step": 6710
+    },
+    {
+      "epoch": 0.48421660233053143,
+      "grad_norm": 0.19044718146324158,
+      "learning_rate": 0.00018064367152547265,
+      "loss": 0.1503,
+      "step": 6711
+    },
+    {
+      "epoch": 0.4842887550055918,
+      "grad_norm": 0.12334035336971283,
+      "learning_rate": 0.00018064078510607591,
+      "loss": 0.202,
+      "step": 6712
+    },
+    {
+      "epoch": 0.48436090768065226,
+      "grad_norm": 0.11487280577421188,
+      "learning_rate": 0.00018063789868667918,
+      "loss": 0.1566,
+      "step": 6713
+    },
+    {
+      "epoch": 0.4844330603557127,
+      "grad_norm": 0.10991586744785309,
+      "learning_rate": 0.00018063501226728247,
+      "loss": 0.1498,
+      "step": 6714
+    },
+    {
+      "epoch": 0.4845052130307731,
+      "grad_norm": 0.12430540472269058,
+      "learning_rate": 0.0001806321258478857,
+      "loss": 0.1515,
+      "step": 6715
+    },
+    {
+      "epoch": 0.48457736570583354,
+      "grad_norm": 0.14053206145763397,
+      "learning_rate": 0.00018062923942848897,
+      "loss": 0.1809,
+      "step": 6716
+    },
+    {
+      "epoch": 0.484649518380894,
+      "grad_norm": 0.10123886913061142,
+      "learning_rate": 0.00018062635300909223,
+      "loss": 0.1721,
+      "step": 6717
+    },
+    {
+      "epoch": 0.4847216710559544,
+      "grad_norm": 0.18325616419315338,
+      "learning_rate": 0.0001806234665896955,
+      "loss": 0.1395,
+      "step": 6718
+    },
+    {
+      "epoch": 0.4847938237310148,
+      "grad_norm": 0.10586833953857422,
+      "learning_rate": 0.00018062058017029875,
+      "loss": 0.1674,
+      "step": 6719
+    },
+    {
+      "epoch": 0.48486597640607526,
+      "grad_norm": 0.11999451369047165,
+      "learning_rate": 0.00018061769375090202,
+      "loss": 0.2388,
+      "step": 6720
+    },
+    {
+      "epoch": 0.4849381290811357,
+      "grad_norm": 0.12448670715093613,
+      "learning_rate": 0.00018061480733150528,
+      "loss": 0.1352,
+      "step": 6721
+    },
+    {
+      "epoch": 0.4850102817561961,
+      "grad_norm": 0.11156656593084335,
+      "learning_rate": 0.00018061192091210854,
+      "loss": 0.1393,
+      "step": 6722
+    },
+    {
+      "epoch": 0.48508243443125654,
+      "grad_norm": 0.1126633808016777,
+      "learning_rate": 0.0001806090344927118,
+      "loss": 0.1515,
+      "step": 6723
+    },
+    {
+      "epoch": 0.485154587106317,
+      "grad_norm": 0.1979413479566574,
+      "learning_rate": 0.00018060614807331507,
+      "loss": 0.1535,
+      "step": 6724
+    },
+    {
+      "epoch": 0.48522673978137737,
+      "grad_norm": 0.12131483107805252,
+      "learning_rate": 0.00018060326165391833,
+      "loss": 0.1885,
+      "step": 6725
+    },
+    {
+      "epoch": 0.4852988924564378,
+      "grad_norm": 0.1307258903980255,
+      "learning_rate": 0.00018060037523452157,
+      "loss": 0.1509,
+      "step": 6726
+    },
+    {
+      "epoch": 0.48537104513149826,
+      "grad_norm": 0.12998425960540771,
+      "learning_rate": 0.00018059748881512483,
+      "loss": 0.1277,
+      "step": 6727
+    },
+    {
+      "epoch": 0.4854431978065587,
+      "grad_norm": 0.13239721953868866,
+      "learning_rate": 0.00018059460239572812,
+      "loss": 0.2174,
+      "step": 6728
+    },
+    {
+      "epoch": 0.4855153504816191,
+      "grad_norm": 0.16458624601364136,
+      "learning_rate": 0.00018059171597633138,
+      "loss": 0.1372,
+      "step": 6729
+    },
+    {
+      "epoch": 0.48558750315667953,
+      "grad_norm": 0.18854382634162903,
+      "learning_rate": 0.00018058882955693465,
+      "loss": 0.1805,
+      "step": 6730
+    },
+    {
+      "epoch": 0.48565965583174,
+      "grad_norm": 0.1215885728597641,
+      "learning_rate": 0.00018058594313753788,
+      "loss": 0.1563,
+      "step": 6731
+    },
+    {
+      "epoch": 0.48573180850680037,
+      "grad_norm": 0.1417505294084549,
+      "learning_rate": 0.00018058305671814115,
+      "loss": 0.2168,
+      "step": 6732
+    },
+    {
+      "epoch": 0.4858039611818608,
+      "grad_norm": 0.138729989528656,
+      "learning_rate": 0.0001805801702987444,
+      "loss": 0.1613,
+      "step": 6733
+    },
+    {
+      "epoch": 0.48587611385692125,
+      "grad_norm": 0.12912528216838837,
+      "learning_rate": 0.00018057728387934767,
+      "loss": 0.1352,
+      "step": 6734
+    },
+    {
+      "epoch": 0.4859482665319817,
+      "grad_norm": 0.13241249322891235,
+      "learning_rate": 0.00018057439745995096,
+      "loss": 0.1539,
+      "step": 6735
+    },
+    {
+      "epoch": 0.4860204192070421,
+      "grad_norm": 0.09461740404367447,
+      "learning_rate": 0.0001805715110405542,
+      "loss": 0.1591,
+      "step": 6736
+    },
+    {
+      "epoch": 0.48609257188210253,
+      "grad_norm": 0.11101723462343216,
+      "learning_rate": 0.00018056862462115746,
+      "loss": 0.1714,
+      "step": 6737
+    },
+    {
+      "epoch": 0.486164724557163,
+      "grad_norm": 0.11402534693479538,
+      "learning_rate": 0.00018056573820176072,
+      "loss": 0.133,
+      "step": 6738
+    },
+    {
+      "epoch": 0.48623687723222336,
+      "grad_norm": 0.12268409132957458,
+      "learning_rate": 0.00018056285178236399,
+      "loss": 0.171,
+      "step": 6739
+    },
+    {
+      "epoch": 0.4863090299072838,
+      "grad_norm": 0.11872043460607529,
+      "learning_rate": 0.00018055996536296725,
+      "loss": 0.1358,
+      "step": 6740
+    },
+    {
+      "epoch": 0.48638118258234425,
+      "grad_norm": 0.18621380627155304,
+      "learning_rate": 0.0001805570789435705,
+      "loss": 0.1265,
+      "step": 6741
+    },
+    {
+      "epoch": 0.4864533352574047,
+      "grad_norm": 0.14223319292068481,
+      "learning_rate": 0.00018055419252417377,
+      "loss": 0.1406,
+      "step": 6742
+    },
+    {
+      "epoch": 0.4865254879324651,
+      "grad_norm": 0.1255837380886078,
+      "learning_rate": 0.00018055130610477704,
+      "loss": 0.1574,
+      "step": 6743
+    },
+    {
+      "epoch": 0.48659764060752553,
+      "grad_norm": 0.11430840194225311,
+      "learning_rate": 0.0001805484196853803,
+      "loss": 0.1556,
+      "step": 6744
+    },
+    {
+      "epoch": 0.48666979328258597,
+      "grad_norm": 0.14644275605678558,
+      "learning_rate": 0.00018054553326598356,
+      "loss": 0.1527,
+      "step": 6745
+    },
+    {
+      "epoch": 0.48674194595764636,
+      "grad_norm": 0.12583127617835999,
+      "learning_rate": 0.00018054264684658683,
+      "loss": 0.1432,
+      "step": 6746
+    },
+    {
+      "epoch": 0.4868140986327068,
+      "grad_norm": 0.10729315131902695,
+      "learning_rate": 0.00018053976042719006,
+      "loss": 0.0996,
+      "step": 6747
+    },
+    {
+      "epoch": 0.48688625130776725,
+      "grad_norm": 0.10388250648975372,
+      "learning_rate": 0.00018053687400779332,
+      "loss": 0.1448,
+      "step": 6748
+    },
+    {
+      "epoch": 0.48695840398282764,
+      "grad_norm": 0.1295686513185501,
+      "learning_rate": 0.00018053398758839661,
+      "loss": 0.1811,
+      "step": 6749
+    },
+    {
+      "epoch": 0.4870305566578881,
+      "grad_norm": 0.12884283065795898,
+      "learning_rate": 0.00018053110116899988,
+      "loss": 0.1848,
+      "step": 6750
+    },
+    {
+      "epoch": 0.4871027093329485,
+      "grad_norm": 0.16300494968891144,
+      "learning_rate": 0.00018052821474960314,
+      "loss": 0.1611,
+      "step": 6751
+    },
+    {
+      "epoch": 0.48717486200800897,
+      "grad_norm": 0.10789994150400162,
+      "learning_rate": 0.00018052532833020638,
+      "loss": 0.1218,
+      "step": 6752
+    },
+    {
+      "epoch": 0.48724701468306936,
+      "grad_norm": 0.15742021799087524,
+      "learning_rate": 0.00018052244191080964,
+      "loss": 0.165,
+      "step": 6753
+    },
+    {
+      "epoch": 0.4873191673581298,
+      "grad_norm": 0.11617636680603027,
+      "learning_rate": 0.0001805195554914129,
+      "loss": 0.1744,
+      "step": 6754
+    },
+    {
+      "epoch": 0.48739132003319025,
+      "grad_norm": 0.13167405128479004,
+      "learning_rate": 0.00018051666907201617,
+      "loss": 0.1724,
+      "step": 6755
+    },
+    {
+      "epoch": 0.48746347270825063,
+      "grad_norm": 0.12322933971881866,
+      "learning_rate": 0.00018051378265261946,
+      "loss": 0.1461,
+      "step": 6756
+    },
+    {
+      "epoch": 0.4875356253833111,
+      "grad_norm": 0.14227062463760376,
+      "learning_rate": 0.0001805108962332227,
+      "loss": 0.1319,
+      "step": 6757
+    },
+    {
+      "epoch": 0.4876077780583715,
+      "grad_norm": 0.11945953965187073,
+      "learning_rate": 0.00018050800981382595,
+      "loss": 0.1424,
+      "step": 6758
+    },
+    {
+      "epoch": 0.48767993073343197,
+      "grad_norm": 0.11324942857027054,
+      "learning_rate": 0.00018050512339442922,
+      "loss": 0.1273,
+      "step": 6759
+    },
+    {
+      "epoch": 0.48775208340849235,
+      "grad_norm": 0.1219736859202385,
+      "learning_rate": 0.00018050223697503248,
+      "loss": 0.182,
+      "step": 6760
+    },
+    {
+      "epoch": 0.4878242360835528,
+      "grad_norm": 0.15173614025115967,
+      "learning_rate": 0.00018049935055563574,
+      "loss": 0.1324,
+      "step": 6761
+    },
+    {
+      "epoch": 0.48789638875861324,
+      "grad_norm": 0.11266780644655228,
+      "learning_rate": 0.000180496464136239,
+      "loss": 0.1393,
+      "step": 6762
+    },
+    {
+      "epoch": 0.48796854143367363,
+      "grad_norm": 0.12031975388526917,
+      "learning_rate": 0.00018049357771684224,
+      "loss": 0.1619,
+      "step": 6763
+    },
+    {
+      "epoch": 0.4880406941087341,
+      "grad_norm": 0.13273458182811737,
+      "learning_rate": 0.00018049069129744553,
+      "loss": 0.1478,
+      "step": 6764
+    },
+    {
+      "epoch": 0.4881128467837945,
+      "grad_norm": 0.15466207265853882,
+      "learning_rate": 0.0001804878048780488,
+      "loss": 0.1648,
+      "step": 6765
+    },
+    {
+      "epoch": 0.48818499945885496,
+      "grad_norm": 0.12362847477197647,
+      "learning_rate": 0.00018048491845865206,
+      "loss": 0.1551,
+      "step": 6766
+    },
+    {
+      "epoch": 0.48825715213391535,
+      "grad_norm": 0.11828542500734329,
+      "learning_rate": 0.00018048203203925532,
+      "loss": 0.2099,
+      "step": 6767
+    },
+    {
+      "epoch": 0.4883293048089758,
+      "grad_norm": 0.1162853091955185,
+      "learning_rate": 0.00018047914561985856,
+      "loss": 0.1516,
+      "step": 6768
+    },
+    {
+      "epoch": 0.48840145748403624,
+      "grad_norm": 0.1452043503522873,
+      "learning_rate": 0.00018047625920046182,
+      "loss": 0.2206,
+      "step": 6769
+    },
+    {
+      "epoch": 0.4884736101590966,
+      "grad_norm": 0.13743065297603607,
+      "learning_rate": 0.00018047337278106508,
+      "loss": 0.2595,
+      "step": 6770
+    },
+    {
+      "epoch": 0.48854576283415707,
+      "grad_norm": 0.12419189512729645,
+      "learning_rate": 0.00018047048636166837,
+      "loss": 0.1532,
+      "step": 6771
+    },
+    {
+      "epoch": 0.4886179155092175,
+      "grad_norm": 0.10468582808971405,
+      "learning_rate": 0.00018046759994227163,
+      "loss": 0.235,
+      "step": 6772
+    },
+    {
+      "epoch": 0.48869006818427796,
+      "grad_norm": 0.16387949883937836,
+      "learning_rate": 0.00018046471352287487,
+      "loss": 0.1454,
+      "step": 6773
+    },
+    {
+      "epoch": 0.48876222085933835,
+      "grad_norm": 0.11070086807012558,
+      "learning_rate": 0.00018046182710347813,
+      "loss": 0.1507,
+      "step": 6774
+    },
+    {
+      "epoch": 0.4888343735343988,
+      "grad_norm": 0.12430056184530258,
+      "learning_rate": 0.0001804589406840814,
+      "loss": 0.1779,
+      "step": 6775
+    },
+    {
+      "epoch": 0.48890652620945924,
+      "grad_norm": 0.121668241918087,
+      "learning_rate": 0.00018045605426468466,
+      "loss": 0.153,
+      "step": 6776
+    },
+    {
+      "epoch": 0.4889786788845196,
+      "grad_norm": 0.12041906267404556,
+      "learning_rate": 0.00018045316784528792,
+      "loss": 0.126,
+      "step": 6777
+    },
+    {
+      "epoch": 0.48905083155958007,
+      "grad_norm": 0.11366225779056549,
+      "learning_rate": 0.0001804502814258912,
+      "loss": 0.1017,
+      "step": 6778
+    },
+    {
+      "epoch": 0.4891229842346405,
+      "grad_norm": 0.16463147103786469,
+      "learning_rate": 0.00018044739500649445,
+      "loss": 0.1559,
+      "step": 6779
+    },
+    {
+      "epoch": 0.4891951369097009,
+      "grad_norm": 0.13968990743160248,
+      "learning_rate": 0.0001804445085870977,
+      "loss": 0.1147,
+      "step": 6780
+    },
+    {
+      "epoch": 0.48926728958476134,
+      "grad_norm": 0.1277603954076767,
+      "learning_rate": 0.00018044162216770097,
+      "loss": 0.1485,
+      "step": 6781
+    },
+    {
+      "epoch": 0.4893394422598218,
+      "grad_norm": 0.1520412266254425,
+      "learning_rate": 0.00018043873574830424,
+      "loss": 0.1495,
+      "step": 6782
+    },
+    {
+      "epoch": 0.48941159493488223,
+      "grad_norm": 0.11583957821130753,
+      "learning_rate": 0.0001804358493289075,
+      "loss": 0.1676,
+      "step": 6783
+    },
+    {
+      "epoch": 0.4894837476099426,
+      "grad_norm": 0.10783440619707108,
+      "learning_rate": 0.00018043296290951076,
+      "loss": 0.1188,
+      "step": 6784
+    },
+    {
+      "epoch": 0.48955590028500306,
+      "grad_norm": 0.10469064116477966,
+      "learning_rate": 0.00018043007649011403,
+      "loss": 0.1177,
+      "step": 6785
+    },
+    {
+      "epoch": 0.4896280529600635,
+      "grad_norm": 0.11987747251987457,
+      "learning_rate": 0.0001804271900707173,
+      "loss": 0.1044,
+      "step": 6786
+    },
+    {
+      "epoch": 0.4897002056351239,
+      "grad_norm": 0.12509019672870636,
+      "learning_rate": 0.00018042430365132055,
+      "loss": 0.1357,
+      "step": 6787
+    },
+    {
+      "epoch": 0.48977235831018434,
+      "grad_norm": 0.13454003632068634,
+      "learning_rate": 0.00018042141723192381,
+      "loss": 0.1973,
+      "step": 6788
+    },
+    {
+      "epoch": 0.4898445109852448,
+      "grad_norm": 0.1286957859992981,
+      "learning_rate": 0.00018041853081252708,
+      "loss": 0.1868,
+      "step": 6789
+    },
+    {
+      "epoch": 0.48991666366030523,
+      "grad_norm": 0.09743602573871613,
+      "learning_rate": 0.0001804156443931303,
+      "loss": 0.2109,
+      "step": 6790
+    },
+    {
+      "epoch": 0.4899888163353656,
+      "grad_norm": 0.10004373639822006,
+      "learning_rate": 0.00018041275797373358,
+      "loss": 0.1382,
+      "step": 6791
+    },
+    {
+      "epoch": 0.49006096901042606,
+      "grad_norm": 0.09505674242973328,
+      "learning_rate": 0.00018040987155433687,
+      "loss": 0.1378,
+      "step": 6792
+    },
+    {
+      "epoch": 0.4901331216854865,
+      "grad_norm": 0.0980699211359024,
+      "learning_rate": 0.00018040698513494013,
+      "loss": 0.1446,
+      "step": 6793
+    },
+    {
+      "epoch": 0.4902052743605469,
+      "grad_norm": 0.1165425106883049,
+      "learning_rate": 0.0001804040987155434,
+      "loss": 0.1441,
+      "step": 6794
+    },
+    {
+      "epoch": 0.49027742703560734,
+      "grad_norm": 0.12139949202537537,
+      "learning_rate": 0.00018040121229614663,
+      "loss": 0.1555,
+      "step": 6795
+    },
+    {
+      "epoch": 0.4903495797106678,
+      "grad_norm": 0.10587059706449509,
+      "learning_rate": 0.0001803983258767499,
+      "loss": 0.1527,
+      "step": 6796
+    },
+    {
+      "epoch": 0.4904217323857282,
+      "grad_norm": 0.11615040153265,
+      "learning_rate": 0.00018039543945735315,
+      "loss": 0.1349,
+      "step": 6797
+    },
+    {
+      "epoch": 0.4904938850607886,
+      "grad_norm": 0.1317576915025711,
+      "learning_rate": 0.00018039255303795642,
+      "loss": 0.1482,
+      "step": 6798
+    },
+    {
+      "epoch": 0.49056603773584906,
+      "grad_norm": 0.10900890082120895,
+      "learning_rate": 0.0001803896666185597,
+      "loss": 0.1226,
+      "step": 6799
+    },
+    {
+      "epoch": 0.4906381904109095,
+      "grad_norm": 0.12659330666065216,
+      "learning_rate": 0.00018038678019916294,
+      "loss": 0.1639,
+      "step": 6800
+    },
+    {
+      "epoch": 0.4907103430859699,
+      "grad_norm": 0.1328059285879135,
+      "learning_rate": 0.0001803838937797662,
+      "loss": 0.1755,
+      "step": 6801
+    },
+    {
+      "epoch": 0.49078249576103034,
+      "grad_norm": 0.12249781936407089,
+      "learning_rate": 0.00018038100736036947,
+      "loss": 0.1605,
+      "step": 6802
+    },
+    {
+      "epoch": 0.4908546484360908,
+      "grad_norm": 0.11942338198423386,
+      "learning_rate": 0.00018037812094097273,
+      "loss": 0.2028,
+      "step": 6803
+    },
+    {
+      "epoch": 0.4909268011111512,
+      "grad_norm": 0.1126035749912262,
+      "learning_rate": 0.000180375234521576,
+      "loss": 0.1113,
+      "step": 6804
+    },
+    {
+      "epoch": 0.4909989537862116,
+      "grad_norm": 0.11068452149629593,
+      "learning_rate": 0.00018037234810217926,
+      "loss": 0.1557,
+      "step": 6805
+    },
+    {
+      "epoch": 0.49107110646127206,
+      "grad_norm": 0.10908321291208267,
+      "learning_rate": 0.00018036946168278252,
+      "loss": 0.1912,
+      "step": 6806
+    },
+    {
+      "epoch": 0.4911432591363325,
+      "grad_norm": 0.10851013660430908,
+      "learning_rate": 0.00018036657526338578,
+      "loss": 0.1393,
+      "step": 6807
+    },
+    {
+      "epoch": 0.4912154118113929,
+      "grad_norm": 0.11371336877346039,
+      "learning_rate": 0.00018036368884398905,
+      "loss": 0.1267,
+      "step": 6808
+    },
+    {
+      "epoch": 0.49128756448645333,
+      "grad_norm": 0.1305808126926422,
+      "learning_rate": 0.0001803608024245923,
+      "loss": 0.1854,
+      "step": 6809
+    },
+    {
+      "epoch": 0.4913597171615138,
+      "grad_norm": 0.13042595982551575,
+      "learning_rate": 0.00018035791600519557,
+      "loss": 0.1337,
+      "step": 6810
+    },
+    {
+      "epoch": 0.49143186983657416,
+      "grad_norm": 0.11381411552429199,
+      "learning_rate": 0.0001803550295857988,
+      "loss": 0.1338,
+      "step": 6811
+    },
+    {
+      "epoch": 0.4915040225116346,
+      "grad_norm": 0.24770130217075348,
+      "learning_rate": 0.00018035214316640207,
+      "loss": 0.1505,
+      "step": 6812
+    },
+    {
+      "epoch": 0.49157617518669505,
+      "grad_norm": 0.11011113971471786,
+      "learning_rate": 0.00018034925674700536,
+      "loss": 0.1929,
+      "step": 6813
+    },
+    {
+      "epoch": 0.4916483278617555,
+      "grad_norm": 0.09932421892881393,
+      "learning_rate": 0.00018034637032760862,
+      "loss": 0.1531,
+      "step": 6814
+    },
+    {
+      "epoch": 0.4917204805368159,
+      "grad_norm": 0.1148666962981224,
+      "learning_rate": 0.00018034348390821189,
+      "loss": 0.1751,
+      "step": 6815
+    },
+    {
+      "epoch": 0.49179263321187633,
+      "grad_norm": 0.13124980032444,
+      "learning_rate": 0.00018034059748881512,
+      "loss": 0.1797,
+      "step": 6816
+    },
+    {
+      "epoch": 0.4918647858869368,
+      "grad_norm": 0.1289656162261963,
+      "learning_rate": 0.00018033771106941839,
+      "loss": 0.1274,
+      "step": 6817
+    },
+    {
+      "epoch": 0.49193693856199716,
+      "grad_norm": 0.1168791651725769,
+      "learning_rate": 0.00018033482465002165,
+      "loss": 0.163,
+      "step": 6818
+    },
+    {
+      "epoch": 0.4920090912370576,
+      "grad_norm": 0.1393895000219345,
+      "learning_rate": 0.0001803319382306249,
+      "loss": 0.1439,
+      "step": 6819
+    },
+    {
+      "epoch": 0.49208124391211805,
+      "grad_norm": 0.12687747180461884,
+      "learning_rate": 0.0001803290518112282,
+      "loss": 0.1318,
+      "step": 6820
+    },
+    {
+      "epoch": 0.4921533965871785,
+      "grad_norm": 0.13864044845104218,
+      "learning_rate": 0.00018032616539183144,
+      "loss": 0.1782,
+      "step": 6821
+    },
+    {
+      "epoch": 0.4922255492622389,
+      "grad_norm": 0.15509755909442902,
+      "learning_rate": 0.0001803232789724347,
+      "loss": 0.1428,
+      "step": 6822
+    },
+    {
+      "epoch": 0.4922977019372993,
+      "grad_norm": 0.1268070787191391,
+      "learning_rate": 0.00018032039255303796,
+      "loss": 0.1352,
+      "step": 6823
+    },
+    {
+      "epoch": 0.49236985461235977,
+      "grad_norm": 0.14477965235710144,
+      "learning_rate": 0.00018031750613364123,
+      "loss": 0.1877,
+      "step": 6824
+    },
+    {
+      "epoch": 0.49244200728742016,
+      "grad_norm": 0.11024410277605057,
+      "learning_rate": 0.0001803146197142445,
+      "loss": 0.157,
+      "step": 6825
+    },
+    {
+      "epoch": 0.4925141599624806,
+      "grad_norm": 0.1326301097869873,
+      "learning_rate": 0.00018031173329484775,
+      "loss": 0.1775,
+      "step": 6826
+    },
+    {
+      "epoch": 0.49258631263754105,
+      "grad_norm": 0.11130791902542114,
+      "learning_rate": 0.00018030884687545101,
+      "loss": 0.1153,
+      "step": 6827
+    },
+    {
+      "epoch": 0.4926584653126015,
+      "grad_norm": 0.14091289043426514,
+      "learning_rate": 0.00018030596045605428,
+      "loss": 0.1392,
+      "step": 6828
+    },
+    {
+      "epoch": 0.4927306179876619,
+      "grad_norm": 0.1333671510219574,
+      "learning_rate": 0.00018030307403665754,
+      "loss": 0.1282,
+      "step": 6829
+    },
+    {
+      "epoch": 0.4928027706627223,
+      "grad_norm": 0.12756669521331787,
+      "learning_rate": 0.0001803001876172608,
+      "loss": 0.1384,
+      "step": 6830
+    },
+    {
+      "epoch": 0.49287492333778277,
+      "grad_norm": 0.13180451095104218,
+      "learning_rate": 0.00018029730119786407,
+      "loss": 0.1725,
+      "step": 6831
+    },
+    {
+      "epoch": 0.49294707601284316,
+      "grad_norm": 0.12362208217382431,
+      "learning_rate": 0.0001802944147784673,
+      "loss": 0.2103,
+      "step": 6832
+    },
+    {
+      "epoch": 0.4930192286879036,
+      "grad_norm": 0.11759877949953079,
+      "learning_rate": 0.00018029152835907056,
+      "loss": 0.1783,
+      "step": 6833
+    },
+    {
+      "epoch": 0.49309138136296404,
+      "grad_norm": 0.13143250346183777,
+      "learning_rate": 0.00018028864193967385,
+      "loss": 0.2168,
+      "step": 6834
+    },
+    {
+      "epoch": 0.4931635340380245,
+      "grad_norm": 0.20306192338466644,
+      "learning_rate": 0.00018028575552027712,
+      "loss": 0.1445,
+      "step": 6835
+    },
+    {
+      "epoch": 0.4932356867130849,
+      "grad_norm": 0.12542825937271118,
+      "learning_rate": 0.00018028286910088038,
+      "loss": 0.1667,
+      "step": 6836
+    },
+    {
+      "epoch": 0.4933078393881453,
+      "grad_norm": 0.16882474720478058,
+      "learning_rate": 0.00018027998268148362,
+      "loss": 0.1411,
+      "step": 6837
+    },
+    {
+      "epoch": 0.49337999206320576,
+      "grad_norm": 0.12080718576908112,
+      "learning_rate": 0.00018027709626208688,
+      "loss": 0.1399,
+      "step": 6838
+    },
+    {
+      "epoch": 0.49345214473826615,
+      "grad_norm": 0.13088224828243256,
+      "learning_rate": 0.00018027420984269014,
+      "loss": 0.1605,
+      "step": 6839
+    },
+    {
+      "epoch": 0.4935242974133266,
+      "grad_norm": 0.11381285637617111,
+      "learning_rate": 0.0001802713234232934,
+      "loss": 0.1636,
+      "step": 6840
+    },
+    {
+      "epoch": 0.49359645008838704,
+      "grad_norm": 0.13267774879932404,
+      "learning_rate": 0.0001802684370038967,
+      "loss": 0.1716,
+      "step": 6841
+    },
+    {
+      "epoch": 0.49366860276344743,
+      "grad_norm": 0.12944793701171875,
+      "learning_rate": 0.00018026555058449993,
+      "loss": 0.1396,
+      "step": 6842
+    },
+    {
+      "epoch": 0.4937407554385079,
+      "grad_norm": 0.12048157304525375,
+      "learning_rate": 0.0001802626641651032,
+      "loss": 0.1656,
+      "step": 6843
+    },
+    {
+      "epoch": 0.4938129081135683,
+      "grad_norm": 0.14227688312530518,
+      "learning_rate": 0.00018025977774570646,
+      "loss": 0.1323,
+      "step": 6844
+    },
+    {
+      "epoch": 0.49388506078862876,
+      "grad_norm": 0.11630921810865402,
+      "learning_rate": 0.00018025689132630972,
+      "loss": 0.1066,
+      "step": 6845
+    },
+    {
+      "epoch": 0.49395721346368915,
+      "grad_norm": 0.12264031171798706,
+      "learning_rate": 0.00018025400490691298,
+      "loss": 0.1489,
+      "step": 6846
+    },
+    {
+      "epoch": 0.4940293661387496,
+      "grad_norm": 0.0999833270907402,
+      "learning_rate": 0.00018025111848751625,
+      "loss": 0.1859,
+      "step": 6847
+    },
+    {
+      "epoch": 0.49410151881381004,
+      "grad_norm": 0.10908046364784241,
+      "learning_rate": 0.0001802482320681195,
+      "loss": 0.1322,
+      "step": 6848
+    },
+    {
+      "epoch": 0.4941736714888704,
+      "grad_norm": 0.115277960896492,
+      "learning_rate": 0.00018024534564872277,
+      "loss": 0.1943,
+      "step": 6849
+    },
+    {
+      "epoch": 0.49424582416393087,
+      "grad_norm": 0.17231899499893188,
+      "learning_rate": 0.00018024245922932603,
+      "loss": 0.2096,
+      "step": 6850
+    },
+    {
+      "epoch": 0.4943179768389913,
+      "grad_norm": 0.11707253009080887,
+      "learning_rate": 0.0001802395728099293,
+      "loss": 0.2134,
+      "step": 6851
+    },
+    {
+      "epoch": 0.49439012951405176,
+      "grad_norm": 0.10661081969738007,
+      "learning_rate": 0.00018023668639053256,
+      "loss": 0.166,
+      "step": 6852
+    },
+    {
+      "epoch": 0.49446228218911215,
+      "grad_norm": 0.11799009889364243,
+      "learning_rate": 0.0001802337999711358,
+      "loss": 0.1633,
+      "step": 6853
+    },
+    {
+      "epoch": 0.4945344348641726,
+      "grad_norm": 0.11546391248703003,
+      "learning_rate": 0.00018023091355173906,
+      "loss": 0.1618,
+      "step": 6854
+    },
+    {
+      "epoch": 0.49460658753923303,
+      "grad_norm": 0.14451655745506287,
+      "learning_rate": 0.00018022802713234235,
+      "loss": 0.2062,
+      "step": 6855
+    },
+    {
+      "epoch": 0.4946787402142934,
+      "grad_norm": 0.12024658918380737,
+      "learning_rate": 0.0001802251407129456,
+      "loss": 0.1083,
+      "step": 6856
+    },
+    {
+      "epoch": 0.49475089288935387,
+      "grad_norm": 0.10264474898576736,
+      "learning_rate": 0.00018022225429354887,
+      "loss": 0.1322,
+      "step": 6857
+    },
+    {
+      "epoch": 0.4948230455644143,
+      "grad_norm": 0.1129191517829895,
+      "learning_rate": 0.0001802193678741521,
+      "loss": 0.198,
+      "step": 6858
+    },
+    {
+      "epoch": 0.49489519823947475,
+      "grad_norm": 0.14828601479530334,
+      "learning_rate": 0.00018021648145475537,
+      "loss": 0.1402,
+      "step": 6859
+    },
+    {
+      "epoch": 0.49496735091453514,
+      "grad_norm": 0.10332966595888138,
+      "learning_rate": 0.00018021359503535864,
+      "loss": 0.1265,
+      "step": 6860
+    },
+    {
+      "epoch": 0.4950395035895956,
+      "grad_norm": 0.13966286182403564,
+      "learning_rate": 0.0001802107086159619,
+      "loss": 0.1923,
+      "step": 6861
+    },
+    {
+      "epoch": 0.49511165626465603,
+      "grad_norm": 0.13843166828155518,
+      "learning_rate": 0.0001802078221965652,
+      "loss": 0.1386,
+      "step": 6862
+    },
+    {
+      "epoch": 0.4951838089397164,
+      "grad_norm": 0.11983319371938705,
+      "learning_rate": 0.00018020493577716843,
+      "loss": 0.1604,
+      "step": 6863
+    },
+    {
+      "epoch": 0.49525596161477686,
+      "grad_norm": 0.14071129262447357,
+      "learning_rate": 0.0001802020493577717,
+      "loss": 0.1664,
+      "step": 6864
+    },
+    {
+      "epoch": 0.4953281142898373,
+      "grad_norm": 0.10576890408992767,
+      "learning_rate": 0.00018019916293837495,
+      "loss": 0.1891,
+      "step": 6865
+    },
+    {
+      "epoch": 0.49540026696489775,
+      "grad_norm": 0.11082214117050171,
+      "learning_rate": 0.00018019627651897821,
+      "loss": 0.1443,
+      "step": 6866
+    },
+    {
+      "epoch": 0.49547241963995814,
+      "grad_norm": 0.11030527949333191,
+      "learning_rate": 0.00018019339009958148,
+      "loss": 0.1156,
+      "step": 6867
+    },
+    {
+      "epoch": 0.4955445723150186,
+      "grad_norm": 0.17411255836486816,
+      "learning_rate": 0.00018019050368018474,
+      "loss": 0.1127,
+      "step": 6868
+    },
+    {
+      "epoch": 0.495616724990079,
+      "grad_norm": 0.11475282907485962,
+      "learning_rate": 0.000180187617260788,
+      "loss": 0.1339,
+      "step": 6869
+    },
+    {
+      "epoch": 0.4956888776651394,
+      "grad_norm": 0.12999731302261353,
+      "learning_rate": 0.00018018473084139127,
+      "loss": 0.1332,
+      "step": 6870
+    },
+    {
+      "epoch": 0.49576103034019986,
+      "grad_norm": 0.13385990262031555,
+      "learning_rate": 0.00018018184442199453,
+      "loss": 0.1379,
+      "step": 6871
+    },
+    {
+      "epoch": 0.4958331830152603,
+      "grad_norm": 0.13611887395381927,
+      "learning_rate": 0.0001801789580025978,
+      "loss": 0.1593,
+      "step": 6872
+    },
+    {
+      "epoch": 0.4959053356903207,
+      "grad_norm": 0.11021700501441956,
+      "learning_rate": 0.00018017607158320105,
+      "loss": 0.1198,
+      "step": 6873
+    },
+    {
+      "epoch": 0.49597748836538114,
+      "grad_norm": 0.12136051058769226,
+      "learning_rate": 0.0001801731851638043,
+      "loss": 0.1448,
+      "step": 6874
+    },
+    {
+      "epoch": 0.4960496410404416,
+      "grad_norm": 0.11286564916372299,
+      "learning_rate": 0.00018017029874440755,
+      "loss": 0.16,
+      "step": 6875
+    },
+    {
+      "epoch": 0.496121793715502,
+      "grad_norm": 0.11558818817138672,
+      "learning_rate": 0.00018016741232501084,
+      "loss": 0.1311,
+      "step": 6876
+    },
+    {
+      "epoch": 0.4961939463905624,
+      "grad_norm": 0.1303420513868332,
+      "learning_rate": 0.0001801645259056141,
+      "loss": 0.1402,
+      "step": 6877
+    },
+    {
+      "epoch": 0.49626609906562286,
+      "grad_norm": 0.11762979626655579,
+      "learning_rate": 0.00018016163948621737,
+      "loss": 0.2148,
+      "step": 6878
+    },
+    {
+      "epoch": 0.4963382517406833,
+      "grad_norm": 0.1510939747095108,
+      "learning_rate": 0.0001801587530668206,
+      "loss": 0.1713,
+      "step": 6879
+    },
+    {
+      "epoch": 0.4964104044157437,
+      "grad_norm": 0.10852061212062836,
+      "learning_rate": 0.00018015586664742387,
+      "loss": 0.1599,
+      "step": 6880
+    },
+    {
+      "epoch": 0.49648255709080413,
+      "grad_norm": 0.1138058751821518,
+      "learning_rate": 0.00018015298022802713,
+      "loss": 0.1685,
+      "step": 6881
+    },
+    {
+      "epoch": 0.4965547097658646,
+      "grad_norm": 0.11576670408248901,
+      "learning_rate": 0.0001801500938086304,
+      "loss": 0.2199,
+      "step": 6882
+    },
+    {
+      "epoch": 0.496626862440925,
+      "grad_norm": 0.10762116312980652,
+      "learning_rate": 0.00018014720738923368,
+      "loss": 0.1245,
+      "step": 6883
+    },
+    {
+      "epoch": 0.4966990151159854,
+      "grad_norm": 0.10610596090555191,
+      "learning_rate": 0.00018014432096983692,
+      "loss": 0.1297,
+      "step": 6884
+    },
+    {
+      "epoch": 0.49677116779104585,
+      "grad_norm": 0.13391663134098053,
+      "learning_rate": 0.00018014143455044018,
+      "loss": 0.2265,
+      "step": 6885
+    },
+    {
+      "epoch": 0.4968433204661063,
+      "grad_norm": 0.15051336586475372,
+      "learning_rate": 0.00018013854813104345,
+      "loss": 0.1682,
+      "step": 6886
+    },
+    {
+      "epoch": 0.4969154731411667,
+      "grad_norm": 0.12215742468833923,
+      "learning_rate": 0.0001801356617116467,
+      "loss": 0.1466,
+      "step": 6887
+    },
+    {
+      "epoch": 0.49698762581622713,
+      "grad_norm": 0.10626982152462006,
+      "learning_rate": 0.00018013277529224997,
+      "loss": 0.131,
+      "step": 6888
+    },
+    {
+      "epoch": 0.4970597784912876,
+      "grad_norm": 0.10880763083696365,
+      "learning_rate": 0.00018012988887285323,
+      "loss": 0.1352,
+      "step": 6889
+    },
+    {
+      "epoch": 0.497131931166348,
+      "grad_norm": 0.10323496162891388,
+      "learning_rate": 0.0001801270024534565,
+      "loss": 0.1245,
+      "step": 6890
+    },
+    {
+      "epoch": 0.4972040838414084,
+      "grad_norm": 0.1361677497625351,
+      "learning_rate": 0.00018012411603405976,
+      "loss": 0.2063,
+      "step": 6891
+    },
+    {
+      "epoch": 0.49727623651646885,
+      "grad_norm": 0.1083250641822815,
+      "learning_rate": 0.00018012122961466302,
+      "loss": 0.1222,
+      "step": 6892
+    },
+    {
+      "epoch": 0.4973483891915293,
+      "grad_norm": 0.11975391954183578,
+      "learning_rate": 0.00018011834319526629,
+      "loss": 0.1638,
+      "step": 6893
+    },
+    {
+      "epoch": 0.4974205418665897,
+      "grad_norm": 0.09761971235275269,
+      "learning_rate": 0.00018011545677586955,
+      "loss": 0.1286,
+      "step": 6894
+    },
+    {
+      "epoch": 0.4974926945416501,
+      "grad_norm": 0.11344848573207855,
+      "learning_rate": 0.0001801125703564728,
+      "loss": 0.1573,
+      "step": 6895
+    },
+    {
+      "epoch": 0.49756484721671057,
+      "grad_norm": 0.11941707134246826,
+      "learning_rate": 0.00018010968393707605,
+      "loss": 0.1608,
+      "step": 6896
+    },
+    {
+      "epoch": 0.497636999891771,
+      "grad_norm": 0.10896875709295273,
+      "learning_rate": 0.00018010679751767934,
+      "loss": 0.136,
+      "step": 6897
+    },
+    {
+      "epoch": 0.4977091525668314,
+      "grad_norm": 0.1440306007862091,
+      "learning_rate": 0.0001801039110982826,
+      "loss": 0.1656,
+      "step": 6898
+    },
+    {
+      "epoch": 0.49778130524189185,
+      "grad_norm": 0.11497805267572403,
+      "learning_rate": 0.00018010102467888586,
+      "loss": 0.1135,
+      "step": 6899
+    },
+    {
+      "epoch": 0.4978534579169523,
+      "grad_norm": 0.10532939434051514,
+      "learning_rate": 0.00018009813825948913,
+      "loss": 0.1423,
+      "step": 6900
+    },
+    {
+      "epoch": 0.4979256105920127,
+      "grad_norm": 0.11883176863193512,
+      "learning_rate": 0.00018009525184009236,
+      "loss": 0.1556,
+      "step": 6901
+    },
+    {
+      "epoch": 0.4979977632670731,
+      "grad_norm": 0.1338721215724945,
+      "learning_rate": 0.00018009236542069562,
+      "loss": 0.1549,
+      "step": 6902
+    },
+    {
+      "epoch": 0.49806991594213357,
+      "grad_norm": 0.11123886704444885,
+      "learning_rate": 0.0001800894790012989,
+      "loss": 0.1702,
+      "step": 6903
+    },
+    {
+      "epoch": 0.49814206861719396,
+      "grad_norm": 0.12014085054397583,
+      "learning_rate": 0.00018008659258190218,
+      "loss": 0.1508,
+      "step": 6904
+    },
+    {
+      "epoch": 0.4982142212922544,
+      "grad_norm": 0.13090504705905914,
+      "learning_rate": 0.00018008370616250544,
+      "loss": 0.136,
+      "step": 6905
+    },
+    {
+      "epoch": 0.49828637396731484,
+      "grad_norm": 0.11417842656373978,
+      "learning_rate": 0.00018008081974310868,
+      "loss": 0.1126,
+      "step": 6906
+    },
+    {
+      "epoch": 0.4983585266423753,
+      "grad_norm": 0.12810492515563965,
+      "learning_rate": 0.00018007793332371194,
+      "loss": 0.1879,
+      "step": 6907
+    },
+    {
+      "epoch": 0.4984306793174357,
+      "grad_norm": 0.14297088980674744,
+      "learning_rate": 0.0001800750469043152,
+      "loss": 0.1806,
+      "step": 6908
+    },
+    {
+      "epoch": 0.4985028319924961,
+      "grad_norm": 0.10888095945119858,
+      "learning_rate": 0.00018007216048491847,
+      "loss": 0.1546,
+      "step": 6909
+    },
+    {
+      "epoch": 0.49857498466755656,
+      "grad_norm": 0.11205391585826874,
+      "learning_rate": 0.00018006927406552173,
+      "loss": 0.1656,
+      "step": 6910
+    },
+    {
+      "epoch": 0.49864713734261695,
+      "grad_norm": 0.1569240689277649,
+      "learning_rate": 0.000180066387646125,
+      "loss": 0.1952,
+      "step": 6911
+    },
+    {
+      "epoch": 0.4987192900176774,
+      "grad_norm": 0.11143320053815842,
+      "learning_rate": 0.00018006350122672825,
+      "loss": 0.1617,
+      "step": 6912
+    },
+    {
+      "epoch": 0.49879144269273784,
+      "grad_norm": 0.15045265853405,
+      "learning_rate": 0.00018006061480733152,
+      "loss": 0.1575,
+      "step": 6913
+    },
+    {
+      "epoch": 0.4988635953677983,
+      "grad_norm": 0.13224567472934723,
+      "learning_rate": 0.00018005772838793478,
+      "loss": 0.1554,
+      "step": 6914
+    },
+    {
+      "epoch": 0.4989357480428587,
+      "grad_norm": 0.1473943293094635,
+      "learning_rate": 0.00018005484196853804,
+      "loss": 0.1615,
+      "step": 6915
+    },
+    {
+      "epoch": 0.4990079007179191,
+      "grad_norm": 0.13103948533535004,
+      "learning_rate": 0.0001800519555491413,
+      "loss": 0.2071,
+      "step": 6916
+    },
+    {
+      "epoch": 0.49908005339297956,
+      "grad_norm": 0.13195586204528809,
+      "learning_rate": 0.00018004906912974454,
+      "loss": 0.1665,
+      "step": 6917
+    },
+    {
+      "epoch": 0.49915220606803995,
+      "grad_norm": 0.12213563174009323,
+      "learning_rate": 0.00018004618271034783,
+      "loss": 0.1648,
+      "step": 6918
+    },
+    {
+      "epoch": 0.4992243587431004,
+      "grad_norm": 0.1488204449415207,
+      "learning_rate": 0.0001800432962909511,
+      "loss": 0.155,
+      "step": 6919
+    },
+    {
+      "epoch": 0.49929651141816084,
+      "grad_norm": 0.1254352629184723,
+      "learning_rate": 0.00018004040987155436,
+      "loss": 0.1236,
+      "step": 6920
+    },
+    {
+      "epoch": 0.4993686640932213,
+      "grad_norm": 0.12986153364181519,
+      "learning_rate": 0.00018003752345215762,
+      "loss": 0.1576,
+      "step": 6921
+    },
+    {
+      "epoch": 0.49944081676828167,
+      "grad_norm": 0.1283103972673416,
+      "learning_rate": 0.00018003463703276086,
+      "loss": 0.1107,
+      "step": 6922
+    },
+    {
+      "epoch": 0.4995129694433421,
+      "grad_norm": 0.11497905850410461,
+      "learning_rate": 0.00018003175061336412,
+      "loss": 0.1333,
+      "step": 6923
+    },
+    {
+      "epoch": 0.49958512211840256,
+      "grad_norm": 0.1354951113462448,
+      "learning_rate": 0.00018002886419396738,
+      "loss": 0.1466,
+      "step": 6924
+    },
+    {
+      "epoch": 0.49965727479346295,
+      "grad_norm": 0.12747453153133392,
+      "learning_rate": 0.00018002597777457067,
+      "loss": 0.1574,
+      "step": 6925
+    },
+    {
+      "epoch": 0.4997294274685234,
+      "grad_norm": 0.11834773421287537,
+      "learning_rate": 0.00018002309135517394,
+      "loss": 0.1301,
+      "step": 6926
+    },
+    {
+      "epoch": 0.49980158014358383,
+      "grad_norm": 0.13238999247550964,
+      "learning_rate": 0.00018002020493577717,
+      "loss": 0.1679,
+      "step": 6927
+    },
+    {
+      "epoch": 0.4998737328186443,
+      "grad_norm": 0.1121571809053421,
+      "learning_rate": 0.00018001731851638043,
+      "loss": 0.0951,
+      "step": 6928
+    },
+    {
+      "epoch": 0.49994588549370467,
+      "grad_norm": 0.1254463642835617,
+      "learning_rate": 0.0001800144320969837,
+      "loss": 0.1794,
+      "step": 6929
+    },
+    {
+      "epoch": 0.5000180381687651,
+      "grad_norm": 0.13795477151870728,
+      "learning_rate": 0.00018001154567758696,
+      "loss": 0.1449,
+      "step": 6930
+    },
+    {
+      "epoch": 0.5000901908438256,
+      "grad_norm": 0.14214231073856354,
+      "learning_rate": 0.00018000865925819022,
+      "loss": 0.169,
+      "step": 6931
+    },
+    {
+      "epoch": 0.500162343518886,
+      "grad_norm": 0.1185181513428688,
+      "learning_rate": 0.00018000577283879349,
+      "loss": 0.1569,
+      "step": 6932
+    },
+    {
+      "epoch": 0.5002344961939464,
+      "grad_norm": 0.12954311072826385,
+      "learning_rate": 0.00018000288641939675,
+      "loss": 0.1504,
+      "step": 6933
+    },
+    {
+      "epoch": 0.5003066488690068,
+      "grad_norm": 0.11512862145900726,
+      "learning_rate": 0.00018,
+      "loss": 0.1353,
+      "step": 6934
+    },
+    {
+      "epoch": 0.5003788015440672,
+      "grad_norm": 0.11813472956418991,
+      "learning_rate": 0.00017999711358060327,
+      "loss": 0.12,
+      "step": 6935
+    },
+    {
+      "epoch": 0.5004509542191277,
+      "grad_norm": 0.11345507949590683,
+      "learning_rate": 0.00017999422716120654,
+      "loss": 0.1778,
+      "step": 6936
+    },
+    {
+      "epoch": 0.5005231068941881,
+      "grad_norm": 0.116734080016613,
+      "learning_rate": 0.0001799913407418098,
+      "loss": 0.1353,
+      "step": 6937
+    },
+    {
+      "epoch": 0.5005952595692486,
+      "grad_norm": 0.10393017530441284,
+      "learning_rate": 0.00017998845432241304,
+      "loss": 0.1672,
+      "step": 6938
+    },
+    {
+      "epoch": 0.500667412244309,
+      "grad_norm": 0.16826027631759644,
+      "learning_rate": 0.00017998556790301633,
+      "loss": 0.1595,
+      "step": 6939
+    },
+    {
+      "epoch": 0.5007395649193694,
+      "grad_norm": 0.10345373302698135,
+      "learning_rate": 0.0001799826814836196,
+      "loss": 0.188,
+      "step": 6940
+    },
+    {
+      "epoch": 0.5008117175944298,
+      "grad_norm": 0.12687575817108154,
+      "learning_rate": 0.00017997979506422285,
+      "loss": 0.1285,
+      "step": 6941
+    },
+    {
+      "epoch": 0.5008838702694902,
+      "grad_norm": 0.10807101428508759,
+      "learning_rate": 0.00017997690864482611,
+      "loss": 0.1236,
+      "step": 6942
+    },
+    {
+      "epoch": 0.5009560229445507,
+      "grad_norm": 0.1182204857468605,
+      "learning_rate": 0.00017997402222542935,
+      "loss": 0.1638,
+      "step": 6943
+    },
+    {
+      "epoch": 0.5010281756196111,
+      "grad_norm": 0.12244538217782974,
+      "learning_rate": 0.0001799711358060326,
+      "loss": 0.1899,
+      "step": 6944
+    },
+    {
+      "epoch": 0.5011003282946715,
+      "grad_norm": 0.12142857164144516,
+      "learning_rate": 0.00017996824938663588,
+      "loss": 0.1343,
+      "step": 6945
+    },
+    {
+      "epoch": 0.501172480969732,
+      "grad_norm": 0.1048036441206932,
+      "learning_rate": 0.00017996536296723917,
+      "loss": 0.1174,
+      "step": 6946
+    },
+    {
+      "epoch": 0.5012446336447923,
+      "grad_norm": 0.14296436309814453,
+      "learning_rate": 0.00017996247654784243,
+      "loss": 0.1805,
+      "step": 6947
+    },
+    {
+      "epoch": 0.5013167863198528,
+      "grad_norm": 0.1301654428243637,
+      "learning_rate": 0.00017995959012844567,
+      "loss": 0.166,
+      "step": 6948
+    },
+    {
+      "epoch": 0.5013889389949132,
+      "grad_norm": 0.12834221124649048,
+      "learning_rate": 0.00017995670370904893,
+      "loss": 0.1018,
+      "step": 6949
+    },
+    {
+      "epoch": 0.5014610916699737,
+      "grad_norm": 0.11551238596439362,
+      "learning_rate": 0.0001799538172896522,
+      "loss": 0.1263,
+      "step": 6950
+    },
+    {
+      "epoch": 0.5015332443450341,
+      "grad_norm": 0.12239497154951096,
+      "learning_rate": 0.00017995093087025545,
+      "loss": 0.1839,
+      "step": 6951
+    },
+    {
+      "epoch": 0.5016053970200945,
+      "grad_norm": 0.10061843693256378,
+      "learning_rate": 0.00017994804445085872,
+      "loss": 0.1273,
+      "step": 6952
+    },
+    {
+      "epoch": 0.501677549695155,
+      "grad_norm": 0.11200693994760513,
+      "learning_rate": 0.00017994515803146198,
+      "loss": 0.1774,
+      "step": 6953
+    },
+    {
+      "epoch": 0.5017497023702153,
+      "grad_norm": 0.12200941145420074,
+      "learning_rate": 0.00017994227161206524,
+      "loss": 0.1455,
+      "step": 6954
+    },
+    {
+      "epoch": 0.5018218550452758,
+      "grad_norm": 0.11386123299598694,
+      "learning_rate": 0.0001799393851926685,
+      "loss": 0.1651,
+      "step": 6955
+    },
+    {
+      "epoch": 0.5018940077203362,
+      "grad_norm": 0.11359906196594238,
+      "learning_rate": 0.00017993649877327177,
+      "loss": 0.109,
+      "step": 6956
+    },
+    {
+      "epoch": 0.5019661603953967,
+      "grad_norm": 0.12544330954551697,
+      "learning_rate": 0.00017993361235387503,
+      "loss": 0.1447,
+      "step": 6957
+    },
+    {
+      "epoch": 0.5020383130704571,
+      "grad_norm": 0.11329219490289688,
+      "learning_rate": 0.0001799307259344783,
+      "loss": 0.2536,
+      "step": 6958
+    },
+    {
+      "epoch": 0.5021104657455175,
+      "grad_norm": 0.12163042277097702,
+      "learning_rate": 0.00017992783951508153,
+      "loss": 0.1926,
+      "step": 6959
+    },
+    {
+      "epoch": 0.502182618420578,
+      "grad_norm": 0.10154034942388535,
+      "learning_rate": 0.0001799249530956848,
+      "loss": 0.1485,
+      "step": 6960
+    },
+    {
+      "epoch": 0.5022547710956383,
+      "grad_norm": 0.12191421538591385,
+      "learning_rate": 0.00017992206667628808,
+      "loss": 0.1388,
+      "step": 6961
+    },
+    {
+      "epoch": 0.5023269237706988,
+      "grad_norm": 0.12472362816333771,
+      "learning_rate": 0.00017991918025689135,
+      "loss": 0.1511,
+      "step": 6962
+    },
+    {
+      "epoch": 0.5023990764457592,
+      "grad_norm": 0.11926441639661789,
+      "learning_rate": 0.0001799162938374946,
+      "loss": 0.203,
+      "step": 6963
+    },
+    {
+      "epoch": 0.5024712291208197,
+      "grad_norm": 0.09784934669733047,
+      "learning_rate": 0.00017991340741809784,
+      "loss": 0.1425,
+      "step": 6964
+    },
+    {
+      "epoch": 0.5025433817958801,
+      "grad_norm": 0.12471173703670502,
+      "learning_rate": 0.0001799105209987011,
+      "loss": 0.1314,
+      "step": 6965
+    },
+    {
+      "epoch": 0.5026155344709405,
+      "grad_norm": 0.112209253013134,
+      "learning_rate": 0.00017990763457930437,
+      "loss": 0.1505,
+      "step": 6966
+    },
+    {
+      "epoch": 0.502687687146001,
+      "grad_norm": 0.13939471542835236,
+      "learning_rate": 0.00017990474815990763,
+      "loss": 0.146,
+      "step": 6967
+    },
+    {
+      "epoch": 0.5027598398210613,
+      "grad_norm": 0.13189548254013062,
+      "learning_rate": 0.00017990186174051092,
+      "loss": 0.1621,
+      "step": 6968
+    },
+    {
+      "epoch": 0.5028319924961218,
+      "grad_norm": 0.15791858732700348,
+      "learning_rate": 0.00017989897532111416,
+      "loss": 0.1749,
+      "step": 6969
+    },
+    {
+      "epoch": 0.5029041451711822,
+      "grad_norm": 0.1081838309764862,
+      "learning_rate": 0.00017989608890171742,
+      "loss": 0.1829,
+      "step": 6970
+    },
+    {
+      "epoch": 0.5029762978462426,
+      "grad_norm": 0.12584063410758972,
+      "learning_rate": 0.00017989320248232069,
+      "loss": 0.1542,
+      "step": 6971
+    },
+    {
+      "epoch": 0.5030484505213031,
+      "grad_norm": 0.12362273037433624,
+      "learning_rate": 0.00017989031606292395,
+      "loss": 0.1612,
+      "step": 6972
+    },
+    {
+      "epoch": 0.5031206031963635,
+      "grad_norm": 0.10868021845817566,
+      "learning_rate": 0.0001798874296435272,
+      "loss": 0.1536,
+      "step": 6973
+    },
+    {
+      "epoch": 0.503192755871424,
+      "grad_norm": 0.13362200558185577,
+      "learning_rate": 0.00017988454322413047,
+      "loss": 0.1671,
+      "step": 6974
+    },
+    {
+      "epoch": 0.5032649085464843,
+      "grad_norm": 0.12995874881744385,
+      "learning_rate": 0.00017988165680473374,
+      "loss": 0.1807,
+      "step": 6975
+    },
+    {
+      "epoch": 0.5033370612215448,
+      "grad_norm": 0.10249987989664078,
+      "learning_rate": 0.000179878770385337,
+      "loss": 0.174,
+      "step": 6976
+    },
+    {
+      "epoch": 0.5034092138966052,
+      "grad_norm": 0.13581307232379913,
+      "learning_rate": 0.00017987588396594026,
+      "loss": 0.1428,
+      "step": 6977
+    },
+    {
+      "epoch": 0.5034813665716656,
+      "grad_norm": 0.132228821516037,
+      "learning_rate": 0.00017987299754654353,
+      "loss": 0.1549,
+      "step": 6978
+    },
+    {
+      "epoch": 0.5035535192467261,
+      "grad_norm": 0.11819226294755936,
+      "learning_rate": 0.0001798701111271468,
+      "loss": 0.1437,
+      "step": 6979
+    },
+    {
+      "epoch": 0.5036256719217865,
+      "grad_norm": 0.13969078660011292,
+      "learning_rate": 0.00017986722470775002,
+      "loss": 0.192,
+      "step": 6980
+    },
+    {
+      "epoch": 0.503697824596847,
+      "grad_norm": 0.11371377855539322,
+      "learning_rate": 0.0001798643382883533,
+      "loss": 0.1306,
+      "step": 6981
+    },
+    {
+      "epoch": 0.5037699772719073,
+      "grad_norm": 0.11589173227548599,
+      "learning_rate": 0.00017986145186895658,
+      "loss": 0.1906,
+      "step": 6982
+    },
+    {
+      "epoch": 0.5038421299469678,
+      "grad_norm": 0.12938082218170166,
+      "learning_rate": 0.00017985856544955984,
+      "loss": 0.131,
+      "step": 6983
+    },
+    {
+      "epoch": 0.5039142826220282,
+      "grad_norm": 0.13039258122444153,
+      "learning_rate": 0.0001798556790301631,
+      "loss": 0.1316,
+      "step": 6984
+    },
+    {
+      "epoch": 0.5039864352970886,
+      "grad_norm": 0.12052374333143234,
+      "learning_rate": 0.00017985279261076634,
+      "loss": 0.1307,
+      "step": 6985
+    },
+    {
+      "epoch": 0.5040585879721491,
+      "grad_norm": 0.1361314356327057,
+      "learning_rate": 0.0001798499061913696,
+      "loss": 0.1431,
+      "step": 6986
+    },
+    {
+      "epoch": 0.5041307406472095,
+      "grad_norm": 0.1491934359073639,
+      "learning_rate": 0.00017984701977197286,
+      "loss": 0.1424,
+      "step": 6987
+    },
+    {
+      "epoch": 0.50420289332227,
+      "grad_norm": 0.10724689811468124,
+      "learning_rate": 0.00017984413335257613,
+      "loss": 0.131,
+      "step": 6988
+    },
+    {
+      "epoch": 0.5042750459973303,
+      "grad_norm": 0.129995197057724,
+      "learning_rate": 0.00017984124693317942,
+      "loss": 0.1716,
+      "step": 6989
+    },
+    {
+      "epoch": 0.5043471986723908,
+      "grad_norm": 0.1048898696899414,
+      "learning_rate": 0.00017983836051378265,
+      "loss": 0.1915,
+      "step": 6990
+    },
+    {
+      "epoch": 0.5044193513474512,
+      "grad_norm": 0.1751856803894043,
+      "learning_rate": 0.00017983547409438592,
+      "loss": 0.1597,
+      "step": 6991
+    },
+    {
+      "epoch": 0.5044915040225116,
+      "grad_norm": 0.12383807450532913,
+      "learning_rate": 0.00017983258767498918,
+      "loss": 0.1263,
+      "step": 6992
+    },
+    {
+      "epoch": 0.5045636566975721,
+      "grad_norm": 0.13870978355407715,
+      "learning_rate": 0.00017982970125559244,
+      "loss": 0.1651,
+      "step": 6993
+    },
+    {
+      "epoch": 0.5046358093726325,
+      "grad_norm": 0.12926892936229706,
+      "learning_rate": 0.0001798268148361957,
+      "loss": 0.1254,
+      "step": 6994
+    },
+    {
+      "epoch": 0.504707962047693,
+      "grad_norm": 0.11067026853561401,
+      "learning_rate": 0.00017982392841679897,
+      "loss": 0.1536,
+      "step": 6995
+    },
+    {
+      "epoch": 0.5047801147227533,
+      "grad_norm": 0.2897682189941406,
+      "learning_rate": 0.00017982104199740223,
+      "loss": 0.1842,
+      "step": 6996
+    },
+    {
+      "epoch": 0.5048522673978137,
+      "grad_norm": 0.15760228037834167,
+      "learning_rate": 0.0001798181555780055,
+      "loss": 0.1897,
+      "step": 6997
+    },
+    {
+      "epoch": 0.5049244200728742,
+      "grad_norm": 0.10852501541376114,
+      "learning_rate": 0.00017981526915860876,
+      "loss": 0.1853,
+      "step": 6998
+    },
+    {
+      "epoch": 0.5049965727479346,
+      "grad_norm": 0.1096150130033493,
+      "learning_rate": 0.00017981238273921202,
+      "loss": 0.188,
+      "step": 6999
+    },
+    {
+      "epoch": 0.5050687254229951,
+      "grad_norm": 0.15682603418827057,
+      "learning_rate": 0.00017980949631981528,
+      "loss": 0.1283,
+      "step": 7000
+    },
+    {
+      "epoch": 0.5051408780980555,
+      "grad_norm": 0.14422990381717682,
+      "learning_rate": 0.00017980660990041855,
+      "loss": 0.1779,
+      "step": 7001
+    },
+    {
+      "epoch": 0.505213030773116,
+      "grad_norm": 0.11944327503442764,
+      "learning_rate": 0.00017980372348102178,
+      "loss": 0.1191,
+      "step": 7002
+    },
+    {
+      "epoch": 0.5052851834481763,
+      "grad_norm": 0.11260118335485458,
+      "learning_rate": 0.00017980083706162507,
+      "loss": 0.1435,
+      "step": 7003
+    },
+    {
+      "epoch": 0.5053573361232367,
+      "grad_norm": 0.1541164666414261,
+      "learning_rate": 0.00017979795064222833,
+      "loss": 0.1337,
+      "step": 7004
+    },
+    {
+      "epoch": 0.5054294887982972,
+      "grad_norm": 0.12973268330097198,
+      "learning_rate": 0.0001797950642228316,
+      "loss": 0.1456,
+      "step": 7005
+    },
+    {
+      "epoch": 0.5055016414733576,
+      "grad_norm": 0.1538536548614502,
+      "learning_rate": 0.00017979217780343486,
+      "loss": 0.1787,
+      "step": 7006
+    },
+    {
+      "epoch": 0.5055737941484181,
+      "grad_norm": 0.1301107406616211,
+      "learning_rate": 0.0001797892913840381,
+      "loss": 0.1498,
+      "step": 7007
+    },
+    {
+      "epoch": 0.5056459468234785,
+      "grad_norm": 0.12300629168748856,
+      "learning_rate": 0.00017978640496464136,
+      "loss": 0.1831,
+      "step": 7008
+    },
+    {
+      "epoch": 0.5057180994985389,
+      "grad_norm": 0.13967126607894897,
+      "learning_rate": 0.00017978351854524462,
+      "loss": 0.1445,
+      "step": 7009
+    },
+    {
+      "epoch": 0.5057902521735993,
+      "grad_norm": 0.11427458375692368,
+      "learning_rate": 0.0001797806321258479,
+      "loss": 0.1792,
+      "step": 7010
+    },
+    {
+      "epoch": 0.5058624048486597,
+      "grad_norm": 0.10966087132692337,
+      "learning_rate": 0.00017977774570645117,
+      "loss": 0.1518,
+      "step": 7011
+    },
+    {
+      "epoch": 0.5059345575237202,
+      "grad_norm": 0.11489519476890564,
+      "learning_rate": 0.0001797748592870544,
+      "loss": 0.1191,
+      "step": 7012
+    },
+    {
+      "epoch": 0.5060067101987806,
+      "grad_norm": 0.10946142673492432,
+      "learning_rate": 0.00017977197286765767,
+      "loss": 0.1454,
+      "step": 7013
+    },
+    {
+      "epoch": 0.5060788628738411,
+      "grad_norm": 0.14782243967056274,
+      "learning_rate": 0.00017976908644826094,
+      "loss": 0.1324,
+      "step": 7014
+    },
+    {
+      "epoch": 0.5061510155489015,
+      "grad_norm": 0.13172070682048798,
+      "learning_rate": 0.0001797662000288642,
+      "loss": 0.1634,
+      "step": 7015
+    },
+    {
+      "epoch": 0.5062231682239619,
+      "grad_norm": 0.1212749034166336,
+      "learning_rate": 0.00017976331360946746,
+      "loss": 0.1197,
+      "step": 7016
+    },
+    {
+      "epoch": 0.5062953208990223,
+      "grad_norm": 0.1257588416337967,
+      "learning_rate": 0.00017976042719007073,
+      "loss": 0.1632,
+      "step": 7017
+    },
+    {
+      "epoch": 0.5063674735740827,
+      "grad_norm": 0.11557316780090332,
+      "learning_rate": 0.000179757540770674,
+      "loss": 0.122,
+      "step": 7018
+    },
+    {
+      "epoch": 0.5064396262491432,
+      "grad_norm": 0.09845668822526932,
+      "learning_rate": 0.00017975465435127725,
+      "loss": 0.1862,
+      "step": 7019
+    },
+    {
+      "epoch": 0.5065117789242036,
+      "grad_norm": 0.11798904091119766,
+      "learning_rate": 0.00017975176793188051,
+      "loss": 0.1551,
+      "step": 7020
+    },
+    {
+      "epoch": 0.5065839315992641,
+      "grad_norm": 0.16191120445728302,
+      "learning_rate": 0.00017974888151248378,
+      "loss": 0.1648,
+      "step": 7021
+    },
+    {
+      "epoch": 0.5066560842743245,
+      "grad_norm": 0.1420876383781433,
+      "learning_rate": 0.00017974599509308704,
+      "loss": 0.2152,
+      "step": 7022
+    },
+    {
+      "epoch": 0.5067282369493848,
+      "grad_norm": 0.13263604044914246,
+      "learning_rate": 0.00017974310867369028,
+      "loss": 0.179,
+      "step": 7023
+    },
+    {
+      "epoch": 0.5068003896244453,
+      "grad_norm": 0.12817439436912537,
+      "learning_rate": 0.00017974022225429357,
+      "loss": 0.1644,
+      "step": 7024
+    },
+    {
+      "epoch": 0.5068725422995057,
+      "grad_norm": 0.1356990933418274,
+      "learning_rate": 0.00017973733583489683,
+      "loss": 0.2159,
+      "step": 7025
+    },
+    {
+      "epoch": 0.5069446949745662,
+      "grad_norm": 0.15865840017795563,
+      "learning_rate": 0.0001797344494155001,
+      "loss": 0.1347,
+      "step": 7026
+    },
+    {
+      "epoch": 0.5070168476496266,
+      "grad_norm": 0.12952634692192078,
+      "learning_rate": 0.00017973156299610335,
+      "loss": 0.1918,
+      "step": 7027
+    },
+    {
+      "epoch": 0.5070890003246871,
+      "grad_norm": 0.12019264698028564,
+      "learning_rate": 0.0001797286765767066,
+      "loss": 0.1209,
+      "step": 7028
+    },
+    {
+      "epoch": 0.5071611529997475,
+      "grad_norm": 0.13946324586868286,
+      "learning_rate": 0.00017972579015730985,
+      "loss": 0.1278,
+      "step": 7029
+    },
+    {
+      "epoch": 0.5072333056748078,
+      "grad_norm": 0.11112209409475327,
+      "learning_rate": 0.00017972290373791312,
+      "loss": 0.0934,
+      "step": 7030
+    },
+    {
+      "epoch": 0.5073054583498683,
+      "grad_norm": 0.11413145065307617,
+      "learning_rate": 0.0001797200173185164,
+      "loss": 0.1378,
+      "step": 7031
+    },
+    {
+      "epoch": 0.5073776110249287,
+      "grad_norm": 0.12074559926986694,
+      "learning_rate": 0.00017971713089911967,
+      "loss": 0.1683,
+      "step": 7032
+    },
+    {
+      "epoch": 0.5074497636999892,
+      "grad_norm": 0.12333431094884872,
+      "learning_rate": 0.0001797142444797229,
+      "loss": 0.128,
+      "step": 7033
+    },
+    {
+      "epoch": 0.5075219163750496,
+      "grad_norm": 0.10644882917404175,
+      "learning_rate": 0.00017971135806032617,
+      "loss": 0.1364,
+      "step": 7034
+    },
+    {
+      "epoch": 0.5075940690501101,
+      "grad_norm": 0.10675278306007385,
+      "learning_rate": 0.00017970847164092943,
+      "loss": 0.1467,
+      "step": 7035
+    },
+    {
+      "epoch": 0.5076662217251705,
+      "grad_norm": 0.1398714929819107,
+      "learning_rate": 0.0001797055852215327,
+      "loss": 0.1811,
+      "step": 7036
+    },
+    {
+      "epoch": 0.5077383744002308,
+      "grad_norm": 0.11652438342571259,
+      "learning_rate": 0.00017970269880213596,
+      "loss": 0.1778,
+      "step": 7037
+    },
+    {
+      "epoch": 0.5078105270752913,
+      "grad_norm": 0.11117210984230042,
+      "learning_rate": 0.00017969981238273922,
+      "loss": 0.1226,
+      "step": 7038
+    },
+    {
+      "epoch": 0.5078826797503517,
+      "grad_norm": 0.12034925073385239,
+      "learning_rate": 0.00017969692596334248,
+      "loss": 0.2226,
+      "step": 7039
+    },
+    {
+      "epoch": 0.5079548324254122,
+      "grad_norm": 0.17814908921718597,
+      "learning_rate": 0.00017969403954394575,
+      "loss": 0.1634,
+      "step": 7040
+    },
+    {
+      "epoch": 0.5080269851004726,
+      "grad_norm": 0.12198749929666519,
+      "learning_rate": 0.000179691153124549,
+      "loss": 0.1614,
+      "step": 7041
+    },
+    {
+      "epoch": 0.5080991377755331,
+      "grad_norm": 0.10999343544244766,
+      "learning_rate": 0.00017968826670515227,
+      "loss": 0.1414,
+      "step": 7042
+    },
+    {
+      "epoch": 0.5081712904505935,
+      "grad_norm": 0.11487124860286713,
+      "learning_rate": 0.00017968538028575553,
+      "loss": 0.1703,
+      "step": 7043
+    },
+    {
+      "epoch": 0.5082434431256538,
+      "grad_norm": 0.10328925400972366,
+      "learning_rate": 0.00017968249386635877,
+      "loss": 0.147,
+      "step": 7044
+    },
+    {
+      "epoch": 0.5083155958007143,
+      "grad_norm": 0.0999947264790535,
+      "learning_rate": 0.00017967960744696206,
+      "loss": 0.1905,
+      "step": 7045
+    },
+    {
+      "epoch": 0.5083877484757747,
+      "grad_norm": 0.10743480175733566,
+      "learning_rate": 0.00017967672102756532,
+      "loss": 0.1452,
+      "step": 7046
+    },
+    {
+      "epoch": 0.5084599011508352,
+      "grad_norm": 0.11257276684045792,
+      "learning_rate": 0.00017967383460816859,
+      "loss": 0.1336,
+      "step": 7047
+    },
+    {
+      "epoch": 0.5085320538258956,
+      "grad_norm": 0.12533427774906158,
+      "learning_rate": 0.00017967094818877185,
+      "loss": 0.1068,
+      "step": 7048
+    },
+    {
+      "epoch": 0.5086042065009561,
+      "grad_norm": 0.11079025268554688,
+      "learning_rate": 0.00017966806176937508,
+      "loss": 0.1423,
+      "step": 7049
+    },
+    {
+      "epoch": 0.5086763591760165,
+      "grad_norm": 0.12263496220111847,
+      "learning_rate": 0.00017966517534997835,
+      "loss": 0.1511,
+      "step": 7050
+    },
+    {
+      "epoch": 0.5087485118510768,
+      "grad_norm": 0.1146349087357521,
+      "learning_rate": 0.0001796622889305816,
+      "loss": 0.1482,
+      "step": 7051
+    },
+    {
+      "epoch": 0.5088206645261373,
+      "grad_norm": 0.11123580485582352,
+      "learning_rate": 0.0001796594025111849,
+      "loss": 0.1826,
+      "step": 7052
+    },
+    {
+      "epoch": 0.5088928172011977,
+      "grad_norm": 0.13652732968330383,
+      "learning_rate": 0.00017965651609178816,
+      "loss": 0.1803,
+      "step": 7053
+    },
+    {
+      "epoch": 0.5089649698762582,
+      "grad_norm": 0.16410355269908905,
+      "learning_rate": 0.0001796536296723914,
+      "loss": 0.1844,
+      "step": 7054
+    },
+    {
+      "epoch": 0.5090371225513186,
+      "grad_norm": 0.1269945502281189,
+      "learning_rate": 0.00017965074325299466,
+      "loss": 0.1875,
+      "step": 7055
+    },
+    {
+      "epoch": 0.509109275226379,
+      "grad_norm": 0.1545627862215042,
+      "learning_rate": 0.00017964785683359793,
+      "loss": 0.1762,
+      "step": 7056
+    },
+    {
+      "epoch": 0.5091814279014395,
+      "grad_norm": 0.12612615525722504,
+      "learning_rate": 0.0001796449704142012,
+      "loss": 0.1691,
+      "step": 7057
+    },
+    {
+      "epoch": 0.5092535805764998,
+      "grad_norm": 0.14168410003185272,
+      "learning_rate": 0.00017964208399480445,
+      "loss": 0.1414,
+      "step": 7058
+    },
+    {
+      "epoch": 0.5093257332515603,
+      "grad_norm": 0.1303904950618744,
+      "learning_rate": 0.00017963919757540771,
+      "loss": 0.2057,
+      "step": 7059
+    },
+    {
+      "epoch": 0.5093978859266207,
+      "grad_norm": 0.11661367118358612,
+      "learning_rate": 0.00017963631115601098,
+      "loss": 0.1503,
+      "step": 7060
+    },
+    {
+      "epoch": 0.5094700386016812,
+      "grad_norm": 0.13968224823474884,
+      "learning_rate": 0.00017963342473661424,
+      "loss": 0.1054,
+      "step": 7061
+    },
+    {
+      "epoch": 0.5095421912767416,
+      "grad_norm": 0.12871862947940826,
+      "learning_rate": 0.0001796305383172175,
+      "loss": 0.1559,
+      "step": 7062
+    },
+    {
+      "epoch": 0.509614343951802,
+      "grad_norm": 0.16265441477298737,
+      "learning_rate": 0.00017962765189782077,
+      "loss": 0.1363,
+      "step": 7063
+    },
+    {
+      "epoch": 0.5096864966268625,
+      "grad_norm": 0.19330908358097076,
+      "learning_rate": 0.00017962476547842403,
+      "loss": 0.1145,
+      "step": 7064
+    },
+    {
+      "epoch": 0.5097586493019228,
+      "grad_norm": 0.1145443320274353,
+      "learning_rate": 0.00017962187905902726,
+      "loss": 0.1688,
+      "step": 7065
+    },
+    {
+      "epoch": 0.5098308019769833,
+      "grad_norm": 0.16316966712474823,
+      "learning_rate": 0.00017961899263963055,
+      "loss": 0.1873,
+      "step": 7066
+    },
+    {
+      "epoch": 0.5099029546520437,
+      "grad_norm": 0.10494080930948257,
+      "learning_rate": 0.00017961610622023382,
+      "loss": 0.1404,
+      "step": 7067
+    },
+    {
+      "epoch": 0.5099751073271042,
+      "grad_norm": 0.11450763791799545,
+      "learning_rate": 0.00017961321980083708,
+      "loss": 0.1579,
+      "step": 7068
+    },
+    {
+      "epoch": 0.5100472600021646,
+      "grad_norm": 0.1169004738330841,
+      "learning_rate": 0.00017961033338144034,
+      "loss": 0.1023,
+      "step": 7069
+    },
+    {
+      "epoch": 0.510119412677225,
+      "grad_norm": 0.1317950189113617,
+      "learning_rate": 0.00017960744696204358,
+      "loss": 0.2358,
+      "step": 7070
+    },
+    {
+      "epoch": 0.5101915653522854,
+      "grad_norm": 0.14101877808570862,
+      "learning_rate": 0.00017960456054264684,
+      "loss": 0.143,
+      "step": 7071
+    },
+    {
+      "epoch": 0.5102637180273458,
+      "grad_norm": 0.18086402118206024,
+      "learning_rate": 0.0001796016741232501,
+      "loss": 0.1629,
+      "step": 7072
+    },
+    {
+      "epoch": 0.5103358707024063,
+      "grad_norm": 0.1619880646467209,
+      "learning_rate": 0.0001795987877038534,
+      "loss": 0.144,
+      "step": 7073
+    },
+    {
+      "epoch": 0.5104080233774667,
+      "grad_norm": 0.14426441490650177,
+      "learning_rate": 0.00017959590128445666,
+      "loss": 0.1667,
+      "step": 7074
+    },
+    {
+      "epoch": 0.5104801760525272,
+      "grad_norm": 0.12022842466831207,
+      "learning_rate": 0.0001795930148650599,
+      "loss": 0.173,
+      "step": 7075
+    },
+    {
+      "epoch": 0.5105523287275876,
+      "grad_norm": 0.1245194524526596,
+      "learning_rate": 0.00017959012844566316,
+      "loss": 0.1601,
+      "step": 7076
+    },
+    {
+      "epoch": 0.510624481402648,
+      "grad_norm": 0.1720270961523056,
+      "learning_rate": 0.00017958724202626642,
+      "loss": 0.2207,
+      "step": 7077
+    },
+    {
+      "epoch": 0.5106966340777084,
+      "grad_norm": 0.13784779608249664,
+      "learning_rate": 0.00017958435560686968,
+      "loss": 0.1676,
+      "step": 7078
+    },
+    {
+      "epoch": 0.5107687867527688,
+      "grad_norm": 0.10445517301559448,
+      "learning_rate": 0.00017958146918747295,
+      "loss": 0.1448,
+      "step": 7079
+    },
+    {
+      "epoch": 0.5108409394278293,
+      "grad_norm": 0.1254737228155136,
+      "learning_rate": 0.0001795785827680762,
+      "loss": 0.1447,
+      "step": 7080
+    },
+    {
+      "epoch": 0.5109130921028897,
+      "grad_norm": 0.11472035944461823,
+      "learning_rate": 0.00017957569634867947,
+      "loss": 0.1331,
+      "step": 7081
+    },
+    {
+      "epoch": 0.5109852447779502,
+      "grad_norm": 0.11648841202259064,
+      "learning_rate": 0.00017957280992928273,
+      "loss": 0.1631,
+      "step": 7082
+    },
+    {
+      "epoch": 0.5110573974530106,
+      "grad_norm": 0.12026941031217575,
+      "learning_rate": 0.000179569923509886,
+      "loss": 0.1392,
+      "step": 7083
+    },
+    {
+      "epoch": 0.511129550128071,
+      "grad_norm": 0.11940126866102219,
+      "learning_rate": 0.00017956703709048926,
+      "loss": 0.1431,
+      "step": 7084
+    },
+    {
+      "epoch": 0.5112017028031314,
+      "grad_norm": 0.12190019339323044,
+      "learning_rate": 0.00017956415067109252,
+      "loss": 0.1748,
+      "step": 7085
+    },
+    {
+      "epoch": 0.5112738554781918,
+      "grad_norm": 0.1303064376115799,
+      "learning_rate": 0.00017956126425169576,
+      "loss": 0.1483,
+      "step": 7086
+    },
+    {
+      "epoch": 0.5113460081532523,
+      "grad_norm": 0.11201722174882889,
+      "learning_rate": 0.00017955837783229905,
+      "loss": 0.1709,
+      "step": 7087
+    },
+    {
+      "epoch": 0.5114181608283127,
+      "grad_norm": 0.132659912109375,
+      "learning_rate": 0.0001795554914129023,
+      "loss": 0.148,
+      "step": 7088
+    },
+    {
+      "epoch": 0.5114903135033732,
+      "grad_norm": 0.1099841296672821,
+      "learning_rate": 0.00017955260499350557,
+      "loss": 0.2,
+      "step": 7089
+    },
+    {
+      "epoch": 0.5115624661784336,
+      "grad_norm": 0.11605177074670792,
+      "learning_rate": 0.00017954971857410884,
+      "loss": 0.1367,
+      "step": 7090
+    },
+    {
+      "epoch": 0.511634618853494,
+      "grad_norm": 0.13425514101982117,
+      "learning_rate": 0.00017954683215471207,
+      "loss": 0.1885,
+      "step": 7091
+    },
+    {
+      "epoch": 0.5117067715285544,
+      "grad_norm": 0.09016387164592743,
+      "learning_rate": 0.00017954394573531534,
+      "loss": 0.129,
+      "step": 7092
+    },
+    {
+      "epoch": 0.5117789242036148,
+      "grad_norm": 0.11584919691085815,
+      "learning_rate": 0.0001795410593159186,
+      "loss": 0.1891,
+      "step": 7093
+    },
+    {
+      "epoch": 0.5118510768786753,
+      "grad_norm": 0.13602469861507416,
+      "learning_rate": 0.0001795381728965219,
+      "loss": 0.1442,
+      "step": 7094
+    },
+    {
+      "epoch": 0.5119232295537357,
+      "grad_norm": 0.11831388622522354,
+      "learning_rate": 0.00017953528647712515,
+      "loss": 0.1617,
+      "step": 7095
+    },
+    {
+      "epoch": 0.5119953822287961,
+      "grad_norm": 0.12001494318246841,
+      "learning_rate": 0.0001795324000577284,
+      "loss": 0.1257,
+      "step": 7096
+    },
+    {
+      "epoch": 0.5120675349038566,
+      "grad_norm": 0.1519460827112198,
+      "learning_rate": 0.00017952951363833165,
+      "loss": 0.1627,
+      "step": 7097
+    },
+    {
+      "epoch": 0.512139687578917,
+      "grad_norm": 0.12677805125713348,
+      "learning_rate": 0.00017952662721893491,
+      "loss": 0.1374,
+      "step": 7098
+    },
+    {
+      "epoch": 0.5122118402539774,
+      "grad_norm": 0.1298854649066925,
+      "learning_rate": 0.00017952374079953818,
+      "loss": 0.1082,
+      "step": 7099
+    },
+    {
+      "epoch": 0.5122839929290378,
+      "grad_norm": 0.16897009313106537,
+      "learning_rate": 0.00017952085438014144,
+      "loss": 0.2109,
+      "step": 7100
+    },
+    {
+      "epoch": 0.5123561456040983,
+      "grad_norm": 0.13847383856773376,
+      "learning_rate": 0.0001795179679607447,
+      "loss": 0.2166,
+      "step": 7101
+    },
+    {
+      "epoch": 0.5124282982791587,
+      "grad_norm": 0.13180577754974365,
+      "learning_rate": 0.00017951508154134797,
+      "loss": 0.1774,
+      "step": 7102
+    },
+    {
+      "epoch": 0.5125004509542191,
+      "grad_norm": 0.1354549080133438,
+      "learning_rate": 0.00017951219512195123,
+      "loss": 0.1629,
+      "step": 7103
+    },
+    {
+      "epoch": 0.5125726036292796,
+      "grad_norm": 0.10785700380802155,
+      "learning_rate": 0.0001795093087025545,
+      "loss": 0.1226,
+      "step": 7104
+    },
+    {
+      "epoch": 0.51264475630434,
+      "grad_norm": 0.11956942081451416,
+      "learning_rate": 0.00017950642228315775,
+      "loss": 0.1717,
+      "step": 7105
+    },
+    {
+      "epoch": 0.5127169089794004,
+      "grad_norm": 0.11497174948453903,
+      "learning_rate": 0.00017950353586376102,
+      "loss": 0.1623,
+      "step": 7106
+    },
+    {
+      "epoch": 0.5127890616544608,
+      "grad_norm": 0.15201187133789062,
+      "learning_rate": 0.00017950064944436425,
+      "loss": 0.1592,
+      "step": 7107
+    },
+    {
+      "epoch": 0.5128612143295213,
+      "grad_norm": 0.12435279041528702,
+      "learning_rate": 0.00017949776302496754,
+      "loss": 0.1148,
+      "step": 7108
+    },
+    {
+      "epoch": 0.5129333670045817,
+      "grad_norm": 0.12462184578180313,
+      "learning_rate": 0.0001794948766055708,
+      "loss": 0.1231,
+      "step": 7109
+    },
+    {
+      "epoch": 0.5130055196796421,
+      "grad_norm": 0.12461728602647781,
+      "learning_rate": 0.00017949199018617407,
+      "loss": 0.1636,
+      "step": 7110
+    },
+    {
+      "epoch": 0.5130776723547026,
+      "grad_norm": 0.11966323107481003,
+      "learning_rate": 0.00017948910376677733,
+      "loss": 0.1824,
+      "step": 7111
+    },
+    {
+      "epoch": 0.513149825029763,
+      "grad_norm": 0.10968772321939468,
+      "learning_rate": 0.00017948621734738057,
+      "loss": 0.1257,
+      "step": 7112
+    },
+    {
+      "epoch": 0.5132219777048234,
+      "grad_norm": 0.13588903844356537,
+      "learning_rate": 0.00017948333092798383,
+      "loss": 0.187,
+      "step": 7113
+    },
+    {
+      "epoch": 0.5132941303798838,
+      "grad_norm": 0.10114775598049164,
+      "learning_rate": 0.0001794804445085871,
+      "loss": 0.1311,
+      "step": 7114
+    },
+    {
+      "epoch": 0.5133662830549443,
+      "grad_norm": 0.1150478795170784,
+      "learning_rate": 0.00017947755808919038,
+      "loss": 0.1423,
+      "step": 7115
+    },
+    {
+      "epoch": 0.5134384357300047,
+      "grad_norm": 0.13415440917015076,
+      "learning_rate": 0.00017947467166979365,
+      "loss": 0.1541,
+      "step": 7116
+    },
+    {
+      "epoch": 0.5135105884050651,
+      "grad_norm": 0.12662853300571442,
+      "learning_rate": 0.00017947178525039688,
+      "loss": 0.1371,
+      "step": 7117
+    },
+    {
+      "epoch": 0.5135827410801256,
+      "grad_norm": 0.10206128656864166,
+      "learning_rate": 0.00017946889883100014,
+      "loss": 0.1565,
+      "step": 7118
+    },
+    {
+      "epoch": 0.513654893755186,
+      "grad_norm": 0.14350473880767822,
+      "learning_rate": 0.0001794660124116034,
+      "loss": 0.1764,
+      "step": 7119
+    },
+    {
+      "epoch": 0.5137270464302464,
+      "grad_norm": 0.07911955565214157,
+      "learning_rate": 0.00017946312599220667,
+      "loss": 0.1468,
+      "step": 7120
+    },
+    {
+      "epoch": 0.5137991991053068,
+      "grad_norm": 0.12495509535074234,
+      "learning_rate": 0.00017946023957280993,
+      "loss": 0.1332,
+      "step": 7121
+    },
+    {
+      "epoch": 0.5138713517803672,
+      "grad_norm": 0.11479618400335312,
+      "learning_rate": 0.0001794573531534132,
+      "loss": 0.1465,
+      "step": 7122
+    },
+    {
+      "epoch": 0.5139435044554277,
+      "grad_norm": 0.14247792959213257,
+      "learning_rate": 0.00017945446673401646,
+      "loss": 0.1726,
+      "step": 7123
+    },
+    {
+      "epoch": 0.5140156571304881,
+      "grad_norm": 0.1663128286600113,
+      "learning_rate": 0.00017945158031461972,
+      "loss": 0.2307,
+      "step": 7124
+    },
+    {
+      "epoch": 0.5140878098055486,
+      "grad_norm": 0.12039539217948914,
+      "learning_rate": 0.00017944869389522299,
+      "loss": 0.1669,
+      "step": 7125
+    },
+    {
+      "epoch": 0.514159962480609,
+      "grad_norm": 0.11929290741682053,
+      "learning_rate": 0.00017944580747582625,
+      "loss": 0.1681,
+      "step": 7126
+    },
+    {
+      "epoch": 0.5142321151556694,
+      "grad_norm": 0.1053629219532013,
+      "learning_rate": 0.0001794429210564295,
+      "loss": 0.1712,
+      "step": 7127
+    },
+    {
+      "epoch": 0.5143042678307298,
+      "grad_norm": 0.11167839169502258,
+      "learning_rate": 0.00017944003463703277,
+      "loss": 0.1656,
+      "step": 7128
+    },
+    {
+      "epoch": 0.5143764205057902,
+      "grad_norm": 0.11405834555625916,
+      "learning_rate": 0.00017943714821763604,
+      "loss": 0.1653,
+      "step": 7129
+    },
+    {
+      "epoch": 0.5144485731808507,
+      "grad_norm": 0.11706086993217468,
+      "learning_rate": 0.0001794342617982393,
+      "loss": 0.1301,
+      "step": 7130
+    },
+    {
+      "epoch": 0.5145207258559111,
+      "grad_norm": 0.11771078407764435,
+      "learning_rate": 0.00017943137537884256,
+      "loss": 0.1333,
+      "step": 7131
+    },
+    {
+      "epoch": 0.5145928785309716,
+      "grad_norm": 0.1398884356021881,
+      "learning_rate": 0.00017942848895944583,
+      "loss": 0.1846,
+      "step": 7132
+    },
+    {
+      "epoch": 0.5146650312060319,
+      "grad_norm": 0.10110854357481003,
+      "learning_rate": 0.0001794256025400491,
+      "loss": 0.1993,
+      "step": 7133
+    },
+    {
+      "epoch": 0.5147371838810924,
+      "grad_norm": 0.1052713468670845,
+      "learning_rate": 0.00017942271612065232,
+      "loss": 0.0889,
+      "step": 7134
+    },
+    {
+      "epoch": 0.5148093365561528,
+      "grad_norm": 0.10848946869373322,
+      "learning_rate": 0.0001794198297012556,
+      "loss": 0.1714,
+      "step": 7135
+    },
+    {
+      "epoch": 0.5148814892312132,
+      "grad_norm": 0.10938854515552521,
+      "learning_rate": 0.00017941694328185888,
+      "loss": 0.1233,
+      "step": 7136
+    },
+    {
+      "epoch": 0.5149536419062737,
+      "grad_norm": 0.15125346183776855,
+      "learning_rate": 0.00017941405686246214,
+      "loss": 0.1782,
+      "step": 7137
+    },
+    {
+      "epoch": 0.5150257945813341,
+      "grad_norm": 0.11715354770421982,
+      "learning_rate": 0.0001794111704430654,
+      "loss": 0.1256,
+      "step": 7138
+    },
+    {
+      "epoch": 0.5150979472563946,
+      "grad_norm": 0.15763883292675018,
+      "learning_rate": 0.00017940828402366864,
+      "loss": 0.1721,
+      "step": 7139
+    },
+    {
+      "epoch": 0.5151700999314549,
+      "grad_norm": 0.1265951693058014,
+      "learning_rate": 0.0001794053976042719,
+      "loss": 0.1218,
+      "step": 7140
+    },
+    {
+      "epoch": 0.5152422526065153,
+      "grad_norm": 0.1079411506652832,
+      "learning_rate": 0.00017940251118487517,
+      "loss": 0.1693,
+      "step": 7141
+    },
+    {
+      "epoch": 0.5153144052815758,
+      "grad_norm": 0.1086970716714859,
+      "learning_rate": 0.00017939962476547843,
+      "loss": 0.1467,
+      "step": 7142
+    },
+    {
+      "epoch": 0.5153865579566362,
+      "grad_norm": 0.11504565179347992,
+      "learning_rate": 0.00017939673834608172,
+      "loss": 0.121,
+      "step": 7143
+    },
+    {
+      "epoch": 0.5154587106316967,
+      "grad_norm": 0.12443605810403824,
+      "learning_rate": 0.00017939385192668495,
+      "loss": 0.1886,
+      "step": 7144
+    },
+    {
+      "epoch": 0.5155308633067571,
+      "grad_norm": 0.10076568275690079,
+      "learning_rate": 0.00017939096550728822,
+      "loss": 0.1106,
+      "step": 7145
+    },
+    {
+      "epoch": 0.5156030159818176,
+      "grad_norm": 0.12320218235254288,
+      "learning_rate": 0.00017938807908789148,
+      "loss": 0.1613,
+      "step": 7146
+    },
+    {
+      "epoch": 0.5156751686568779,
+      "grad_norm": 0.12105323374271393,
+      "learning_rate": 0.00017938519266849474,
+      "loss": 0.1779,
+      "step": 7147
+    },
+    {
+      "epoch": 0.5157473213319383,
+      "grad_norm": 0.11187251657247543,
+      "learning_rate": 0.000179382306249098,
+      "loss": 0.1617,
+      "step": 7148
+    },
+    {
+      "epoch": 0.5158194740069988,
+      "grad_norm": 0.13889268040657043,
+      "learning_rate": 0.00017937941982970127,
+      "loss": 0.142,
+      "step": 7149
+    },
+    {
+      "epoch": 0.5158916266820592,
+      "grad_norm": 0.15993313491344452,
+      "learning_rate": 0.0001793765334103045,
+      "loss": 0.1478,
+      "step": 7150
+    },
+    {
+      "epoch": 0.5159637793571197,
+      "grad_norm": 0.12263643741607666,
+      "learning_rate": 0.0001793736469909078,
+      "loss": 0.116,
+      "step": 7151
+    },
+    {
+      "epoch": 0.5160359320321801,
+      "grad_norm": 0.11724000424146652,
+      "learning_rate": 0.00017937076057151106,
+      "loss": 0.168,
+      "step": 7152
+    },
+    {
+      "epoch": 0.5161080847072406,
+      "grad_norm": 0.11943333595991135,
+      "learning_rate": 0.00017936787415211432,
+      "loss": 0.1757,
+      "step": 7153
+    },
+    {
+      "epoch": 0.5161802373823009,
+      "grad_norm": 0.12497618049383163,
+      "learning_rate": 0.00017936498773271758,
+      "loss": 0.1531,
+      "step": 7154
+    },
+    {
+      "epoch": 0.5162523900573613,
+      "grad_norm": 0.11900891363620758,
+      "learning_rate": 0.00017936210131332082,
+      "loss": 0.1362,
+      "step": 7155
+    },
+    {
+      "epoch": 0.5163245427324218,
+      "grad_norm": 0.12837271392345428,
+      "learning_rate": 0.00017935921489392408,
+      "loss": 0.148,
+      "step": 7156
+    },
+    {
+      "epoch": 0.5163966954074822,
+      "grad_norm": 0.11471260339021683,
+      "learning_rate": 0.00017935632847452734,
+      "loss": 0.1738,
+      "step": 7157
+    },
+    {
+      "epoch": 0.5164688480825427,
+      "grad_norm": 0.10337173193693161,
+      "learning_rate": 0.00017935344205513063,
+      "loss": 0.1604,
+      "step": 7158
+    },
+    {
+      "epoch": 0.5165410007576031,
+      "grad_norm": 0.12180466949939728,
+      "learning_rate": 0.0001793505556357339,
+      "loss": 0.1699,
+      "step": 7159
+    },
+    {
+      "epoch": 0.5166131534326636,
+      "grad_norm": 0.10087094455957413,
+      "learning_rate": 0.00017934766921633713,
+      "loss": 0.1819,
+      "step": 7160
+    },
+    {
+      "epoch": 0.5166853061077239,
+      "grad_norm": 0.12025085836648941,
+      "learning_rate": 0.0001793447827969404,
+      "loss": 0.1661,
+      "step": 7161
+    },
+    {
+      "epoch": 0.5167574587827843,
+      "grad_norm": 0.11435249447822571,
+      "learning_rate": 0.00017934189637754366,
+      "loss": 0.107,
+      "step": 7162
+    },
+    {
+      "epoch": 0.5168296114578448,
+      "grad_norm": 0.1328279674053192,
+      "learning_rate": 0.00017933900995814692,
+      "loss": 0.1972,
+      "step": 7163
+    },
+    {
+      "epoch": 0.5169017641329052,
+      "grad_norm": 0.11817678064107895,
+      "learning_rate": 0.00017933612353875019,
+      "loss": 0.1716,
+      "step": 7164
+    },
+    {
+      "epoch": 0.5169739168079657,
+      "grad_norm": 0.10996687412261963,
+      "learning_rate": 0.00017933323711935345,
+      "loss": 0.164,
+      "step": 7165
+    },
+    {
+      "epoch": 0.5170460694830261,
+      "grad_norm": 0.1188410222530365,
+      "learning_rate": 0.0001793303506999567,
+      "loss": 0.1349,
+      "step": 7166
+    },
+    {
+      "epoch": 0.5171182221580866,
+      "grad_norm": 0.13090239465236664,
+      "learning_rate": 0.00017932746428055997,
+      "loss": 0.2171,
+      "step": 7167
+    },
+    {
+      "epoch": 0.5171903748331469,
+      "grad_norm": 0.11918402463197708,
+      "learning_rate": 0.00017932457786116324,
+      "loss": 0.1671,
+      "step": 7168
+    },
+    {
+      "epoch": 0.5172625275082073,
+      "grad_norm": 0.12439822405576706,
+      "learning_rate": 0.0001793216914417665,
+      "loss": 0.1678,
+      "step": 7169
+    },
+    {
+      "epoch": 0.5173346801832678,
+      "grad_norm": 0.12405625730752945,
+      "learning_rate": 0.00017931880502236976,
+      "loss": 0.107,
+      "step": 7170
+    },
+    {
+      "epoch": 0.5174068328583282,
+      "grad_norm": 0.10050161182880402,
+      "learning_rate": 0.000179315918602973,
+      "loss": 0.1308,
+      "step": 7171
+    },
+    {
+      "epoch": 0.5174789855333887,
+      "grad_norm": 0.1161796823143959,
+      "learning_rate": 0.0001793130321835763,
+      "loss": 0.1474,
+      "step": 7172
+    },
+    {
+      "epoch": 0.5175511382084491,
+      "grad_norm": 0.1269403100013733,
+      "learning_rate": 0.00017931014576417955,
+      "loss": 0.2059,
+      "step": 7173
+    },
+    {
+      "epoch": 0.5176232908835096,
+      "grad_norm": 0.10315892100334167,
+      "learning_rate": 0.00017930725934478281,
+      "loss": 0.1052,
+      "step": 7174
+    },
+    {
+      "epoch": 0.5176954435585699,
+      "grad_norm": 0.1161118671298027,
+      "learning_rate": 0.00017930437292538608,
+      "loss": 0.1421,
+      "step": 7175
+    },
+    {
+      "epoch": 0.5177675962336303,
+      "grad_norm": 0.1218157634139061,
+      "learning_rate": 0.0001793014865059893,
+      "loss": 0.1585,
+      "step": 7176
+    },
+    {
+      "epoch": 0.5178397489086908,
+      "grad_norm": 0.11405991017818451,
+      "learning_rate": 0.00017929860008659258,
+      "loss": 0.1643,
+      "step": 7177
+    },
+    {
+      "epoch": 0.5179119015837512,
+      "grad_norm": 0.11038383096456528,
+      "learning_rate": 0.00017929571366719584,
+      "loss": 0.1415,
+      "step": 7178
+    },
+    {
+      "epoch": 0.5179840542588117,
+      "grad_norm": 0.12101422995328903,
+      "learning_rate": 0.00017929282724779913,
+      "loss": 0.163,
+      "step": 7179
+    },
+    {
+      "epoch": 0.5180562069338721,
+      "grad_norm": 0.10214401036500931,
+      "learning_rate": 0.0001792899408284024,
+      "loss": 0.1273,
+      "step": 7180
+    },
+    {
+      "epoch": 0.5181283596089326,
+      "grad_norm": 0.08696547150611877,
+      "learning_rate": 0.00017928705440900563,
+      "loss": 0.1835,
+      "step": 7181
+    },
+    {
+      "epoch": 0.5182005122839929,
+      "grad_norm": 0.1108965203166008,
+      "learning_rate": 0.0001792841679896089,
+      "loss": 0.1787,
+      "step": 7182
+    },
+    {
+      "epoch": 0.5182726649590533,
+      "grad_norm": 0.10518892854452133,
+      "learning_rate": 0.00017928128157021215,
+      "loss": 0.1906,
+      "step": 7183
+    },
+    {
+      "epoch": 0.5183448176341138,
+      "grad_norm": 0.11729423701763153,
+      "learning_rate": 0.00017927839515081542,
+      "loss": 0.1725,
+      "step": 7184
+    },
+    {
+      "epoch": 0.5184169703091742,
+      "grad_norm": 0.11098305135965347,
+      "learning_rate": 0.00017927550873141868,
+      "loss": 0.1754,
+      "step": 7185
+    },
+    {
+      "epoch": 0.5184891229842347,
+      "grad_norm": 0.14198029041290283,
+      "learning_rate": 0.00017927262231202194,
+      "loss": 0.1726,
+      "step": 7186
+    },
+    {
+      "epoch": 0.5185612756592951,
+      "grad_norm": 0.12816059589385986,
+      "learning_rate": 0.0001792697358926252,
+      "loss": 0.1231,
+      "step": 7187
+    },
+    {
+      "epoch": 0.5186334283343556,
+      "grad_norm": 0.20600976049900055,
+      "learning_rate": 0.00017926684947322847,
+      "loss": 0.1498,
+      "step": 7188
+    },
+    {
+      "epoch": 0.5187055810094159,
+      "grad_norm": 0.10582345724105835,
+      "learning_rate": 0.00017926396305383173,
+      "loss": 0.2063,
+      "step": 7189
+    },
+    {
+      "epoch": 0.5187777336844763,
+      "grad_norm": 0.11878283321857452,
+      "learning_rate": 0.000179261076634435,
+      "loss": 0.1777,
+      "step": 7190
+    },
+    {
+      "epoch": 0.5188498863595368,
+      "grad_norm": 0.13395068049430847,
+      "learning_rate": 0.00017925819021503826,
+      "loss": 0.1911,
+      "step": 7191
+    },
+    {
+      "epoch": 0.5189220390345972,
+      "grad_norm": 0.12442594021558762,
+      "learning_rate": 0.0001792553037956415,
+      "loss": 0.1398,
+      "step": 7192
+    },
+    {
+      "epoch": 0.5189941917096577,
+      "grad_norm": 0.10257423669099808,
+      "learning_rate": 0.00017925241737624478,
+      "loss": 0.1238,
+      "step": 7193
+    },
+    {
+      "epoch": 0.5190663443847181,
+      "grad_norm": 0.1495160013437271,
+      "learning_rate": 0.00017924953095684805,
+      "loss": 0.1993,
+      "step": 7194
+    },
+    {
+      "epoch": 0.5191384970597784,
+      "grad_norm": 0.11780592799186707,
+      "learning_rate": 0.0001792466445374513,
+      "loss": 0.1729,
+      "step": 7195
+    },
+    {
+      "epoch": 0.5192106497348389,
+      "grad_norm": 0.1251525729894638,
+      "learning_rate": 0.00017924375811805457,
+      "loss": 0.1138,
+      "step": 7196
+    },
+    {
+      "epoch": 0.5192828024098993,
+      "grad_norm": 0.14698529243469238,
+      "learning_rate": 0.0001792408716986578,
+      "loss": 0.151,
+      "step": 7197
+    },
+    {
+      "epoch": 0.5193549550849598,
+      "grad_norm": 0.11803632974624634,
+      "learning_rate": 0.00017923798527926107,
+      "loss": 0.1888,
+      "step": 7198
+    },
+    {
+      "epoch": 0.5194271077600202,
+      "grad_norm": 0.13017745316028595,
+      "learning_rate": 0.00017923509885986433,
+      "loss": 0.1845,
+      "step": 7199
+    },
+    {
+      "epoch": 0.5194992604350807,
+      "grad_norm": 0.11711041629314423,
+      "learning_rate": 0.00017923221244046762,
+      "loss": 0.1322,
+      "step": 7200
+    },
+    {
+      "epoch": 0.5195714131101411,
+      "grad_norm": 0.10870666801929474,
+      "learning_rate": 0.00017922932602107089,
+      "loss": 0.1238,
+      "step": 7201
+    },
+    {
+      "epoch": 0.5196435657852014,
+      "grad_norm": 0.1246069073677063,
+      "learning_rate": 0.00017922643960167412,
+      "loss": 0.1529,
+      "step": 7202
+    },
+    {
+      "epoch": 0.5197157184602619,
+      "grad_norm": 0.11276337504386902,
+      "learning_rate": 0.00017922355318227738,
+      "loss": 0.1522,
+      "step": 7203
+    },
+    {
+      "epoch": 0.5197878711353223,
+      "grad_norm": 0.10751859843730927,
+      "learning_rate": 0.00017922066676288065,
+      "loss": 0.2177,
+      "step": 7204
+    },
+    {
+      "epoch": 0.5198600238103828,
+      "grad_norm": 0.10630340874195099,
+      "learning_rate": 0.0001792177803434839,
+      "loss": 0.163,
+      "step": 7205
+    },
+    {
+      "epoch": 0.5199321764854432,
+      "grad_norm": 0.11413315683603287,
+      "learning_rate": 0.00017921489392408717,
+      "loss": 0.1517,
+      "step": 7206
+    },
+    {
+      "epoch": 0.5200043291605037,
+      "grad_norm": 0.12250448018312454,
+      "learning_rate": 0.00017921200750469044,
+      "loss": 0.1842,
+      "step": 7207
+    },
+    {
+      "epoch": 0.5200764818355641,
+      "grad_norm": 0.09859684854745865,
+      "learning_rate": 0.0001792091210852937,
+      "loss": 0.1106,
+      "step": 7208
+    },
+    {
+      "epoch": 0.5201486345106244,
+      "grad_norm": 0.09322864562273026,
+      "learning_rate": 0.00017920623466589696,
+      "loss": 0.145,
+      "step": 7209
+    },
+    {
+      "epoch": 0.5202207871856849,
+      "grad_norm": 0.11078327894210815,
+      "learning_rate": 0.00017920334824650023,
+      "loss": 0.1852,
+      "step": 7210
+    },
+    {
+      "epoch": 0.5202929398607453,
+      "grad_norm": 0.11507966369390488,
+      "learning_rate": 0.0001792004618271035,
+      "loss": 0.1514,
+      "step": 7211
+    },
+    {
+      "epoch": 0.5203650925358058,
+      "grad_norm": 0.14388643205165863,
+      "learning_rate": 0.00017919757540770675,
+      "loss": 0.1565,
+      "step": 7212
+    },
+    {
+      "epoch": 0.5204372452108662,
+      "grad_norm": 0.10419867932796478,
+      "learning_rate": 0.00017919468898831,
+      "loss": 0.111,
+      "step": 7213
+    },
+    {
+      "epoch": 0.5205093978859267,
+      "grad_norm": 0.14187461137771606,
+      "learning_rate": 0.00017919180256891328,
+      "loss": 0.1281,
+      "step": 7214
+    },
+    {
+      "epoch": 0.5205815505609871,
+      "grad_norm": 0.11850924044847488,
+      "learning_rate": 0.00017918891614951654,
+      "loss": 0.1794,
+      "step": 7215
+    },
+    {
+      "epoch": 0.5206537032360474,
+      "grad_norm": 0.13303731381893158,
+      "learning_rate": 0.0001791860297301198,
+      "loss": 0.1664,
+      "step": 7216
+    },
+    {
+      "epoch": 0.5207258559111079,
+      "grad_norm": 0.14410582184791565,
+      "learning_rate": 0.00017918314331072307,
+      "loss": 0.165,
+      "step": 7217
+    },
+    {
+      "epoch": 0.5207980085861683,
+      "grad_norm": 0.14188916981220245,
+      "learning_rate": 0.0001791802568913263,
+      "loss": 0.1256,
+      "step": 7218
+    },
+    {
+      "epoch": 0.5208701612612288,
+      "grad_norm": 0.15088891983032227,
+      "learning_rate": 0.00017917737047192956,
+      "loss": 0.1743,
+      "step": 7219
+    },
+    {
+      "epoch": 0.5209423139362892,
+      "grad_norm": 0.1348952054977417,
+      "learning_rate": 0.00017917448405253283,
+      "loss": 0.1547,
+      "step": 7220
+    },
+    {
+      "epoch": 0.5210144666113496,
+      "grad_norm": 0.15551945567131042,
+      "learning_rate": 0.00017917159763313612,
+      "loss": 0.1381,
+      "step": 7221
+    },
+    {
+      "epoch": 0.5210866192864101,
+      "grad_norm": 0.12490364164113998,
+      "learning_rate": 0.00017916871121373938,
+      "loss": 0.1634,
+      "step": 7222
+    },
+    {
+      "epoch": 0.5211587719614704,
+      "grad_norm": 0.11290589720010757,
+      "learning_rate": 0.00017916582479434262,
+      "loss": 0.141,
+      "step": 7223
+    },
+    {
+      "epoch": 0.5212309246365309,
+      "grad_norm": 0.10861491411924362,
+      "learning_rate": 0.00017916293837494588,
+      "loss": 0.1162,
+      "step": 7224
+    },
+    {
+      "epoch": 0.5213030773115913,
+      "grad_norm": 0.19713301956653595,
+      "learning_rate": 0.00017916005195554914,
+      "loss": 0.1338,
+      "step": 7225
+    },
+    {
+      "epoch": 0.5213752299866518,
+      "grad_norm": 0.13340701162815094,
+      "learning_rate": 0.0001791571655361524,
+      "loss": 0.171,
+      "step": 7226
+    },
+    {
+      "epoch": 0.5214473826617122,
+      "grad_norm": 0.13060644268989563,
+      "learning_rate": 0.00017915427911675567,
+      "loss": 0.1887,
+      "step": 7227
+    },
+    {
+      "epoch": 0.5215195353367726,
+      "grad_norm": 0.12915129959583282,
+      "learning_rate": 0.00017915139269735893,
+      "loss": 0.1207,
+      "step": 7228
+    },
+    {
+      "epoch": 0.5215916880118331,
+      "grad_norm": 0.11018068343400955,
+      "learning_rate": 0.0001791485062779622,
+      "loss": 0.1748,
+      "step": 7229
+    },
+    {
+      "epoch": 0.5216638406868934,
+      "grad_norm": 0.11852601170539856,
+      "learning_rate": 0.00017914561985856546,
+      "loss": 0.1858,
+      "step": 7230
+    },
+    {
+      "epoch": 0.5217359933619539,
+      "grad_norm": 0.12446949630975723,
+      "learning_rate": 0.00017914273343916872,
+      "loss": 0.1479,
+      "step": 7231
+    },
+    {
+      "epoch": 0.5218081460370143,
+      "grad_norm": 0.15044647455215454,
+      "learning_rate": 0.00017913984701977198,
+      "loss": 0.1515,
+      "step": 7232
+    },
+    {
+      "epoch": 0.5218802987120748,
+      "grad_norm": 0.11914116889238358,
+      "learning_rate": 0.00017913696060037525,
+      "loss": 0.1491,
+      "step": 7233
+    },
+    {
+      "epoch": 0.5219524513871352,
+      "grad_norm": 0.09492188692092896,
+      "learning_rate": 0.0001791340741809785,
+      "loss": 0.1718,
+      "step": 7234
+    },
+    {
+      "epoch": 0.5220246040621956,
+      "grad_norm": 0.13469257950782776,
+      "learning_rate": 0.00017913118776158177,
+      "loss": 0.1585,
+      "step": 7235
+    },
+    {
+      "epoch": 0.5220967567372561,
+      "grad_norm": 0.11164631694555283,
+      "learning_rate": 0.00017912830134218503,
+      "loss": 0.1524,
+      "step": 7236
+    },
+    {
+      "epoch": 0.5221689094123164,
+      "grad_norm": 0.15975221991539001,
+      "learning_rate": 0.0001791254149227883,
+      "loss": 0.219,
+      "step": 7237
+    },
+    {
+      "epoch": 0.5222410620873769,
+      "grad_norm": 0.12551988661289215,
+      "learning_rate": 0.00017912252850339156,
+      "loss": 0.1607,
+      "step": 7238
+    },
+    {
+      "epoch": 0.5223132147624373,
+      "grad_norm": 0.10428985953330994,
+      "learning_rate": 0.00017911964208399482,
+      "loss": 0.1552,
+      "step": 7239
+    },
+    {
+      "epoch": 0.5223853674374977,
+      "grad_norm": 0.11404060572385788,
+      "learning_rate": 0.00017911675566459806,
+      "loss": 0.1778,
+      "step": 7240
+    },
+    {
+      "epoch": 0.5224575201125582,
+      "grad_norm": 0.12849149107933044,
+      "learning_rate": 0.00017911386924520132,
+      "loss": 0.1412,
+      "step": 7241
+    },
+    {
+      "epoch": 0.5225296727876186,
+      "grad_norm": 0.12899062037467957,
+      "learning_rate": 0.0001791109828258046,
+      "loss": 0.1425,
+      "step": 7242
+    },
+    {
+      "epoch": 0.5226018254626791,
+      "grad_norm": 0.11130117624998093,
+      "learning_rate": 0.00017910809640640787,
+      "loss": 0.1233,
+      "step": 7243
+    },
+    {
+      "epoch": 0.5226739781377394,
+      "grad_norm": 0.12267808616161346,
+      "learning_rate": 0.00017910520998701114,
+      "loss": 0.1623,
+      "step": 7244
+    },
+    {
+      "epoch": 0.5227461308127999,
+      "grad_norm": 0.10102290660142899,
+      "learning_rate": 0.00017910232356761437,
+      "loss": 0.1738,
+      "step": 7245
+    },
+    {
+      "epoch": 0.5228182834878603,
+      "grad_norm": 0.10643978416919708,
+      "learning_rate": 0.00017909943714821764,
+      "loss": 0.1551,
+      "step": 7246
+    },
+    {
+      "epoch": 0.5228904361629207,
+      "grad_norm": 0.12526944279670715,
+      "learning_rate": 0.0001790965507288209,
+      "loss": 0.1375,
+      "step": 7247
+    },
+    {
+      "epoch": 0.5229625888379812,
+      "grad_norm": 0.11846707761287689,
+      "learning_rate": 0.00017909366430942416,
+      "loss": 0.1041,
+      "step": 7248
+    },
+    {
+      "epoch": 0.5230347415130416,
+      "grad_norm": 0.15429191291332245,
+      "learning_rate": 0.00017909077789002745,
+      "loss": 0.1766,
+      "step": 7249
+    },
+    {
+      "epoch": 0.5231068941881021,
+      "grad_norm": 0.1419401466846466,
+      "learning_rate": 0.0001790878914706307,
+      "loss": 0.2065,
+      "step": 7250
+    },
+    {
+      "epoch": 0.5231790468631624,
+      "grad_norm": 0.17341232299804688,
+      "learning_rate": 0.00017908500505123395,
+      "loss": 0.1787,
+      "step": 7251
+    },
+    {
+      "epoch": 0.5232511995382229,
+      "grad_norm": 0.11634615063667297,
+      "learning_rate": 0.00017908211863183721,
+      "loss": 0.1473,
+      "step": 7252
+    },
+    {
+      "epoch": 0.5233233522132833,
+      "grad_norm": 0.13800795376300812,
+      "learning_rate": 0.00017907923221244048,
+      "loss": 0.1643,
+      "step": 7253
+    },
+    {
+      "epoch": 0.5233955048883437,
+      "grad_norm": 0.138355553150177,
+      "learning_rate": 0.00017907634579304374,
+      "loss": 0.179,
+      "step": 7254
+    },
+    {
+      "epoch": 0.5234676575634042,
+      "grad_norm": 0.11804528534412384,
+      "learning_rate": 0.000179073459373647,
+      "loss": 0.1722,
+      "step": 7255
+    },
+    {
+      "epoch": 0.5235398102384646,
+      "grad_norm": 0.11212264001369476,
+      "learning_rate": 0.00017907057295425027,
+      "loss": 0.1425,
+      "step": 7256
+    },
+    {
+      "epoch": 0.523611962913525,
+      "grad_norm": 0.1296979933977127,
+      "learning_rate": 0.00017906768653485353,
+      "loss": 0.1733,
+      "step": 7257
+    },
+    {
+      "epoch": 0.5236841155885854,
+      "grad_norm": 0.1127726212143898,
+      "learning_rate": 0.0001790648001154568,
+      "loss": 0.1511,
+      "step": 7258
+    },
+    {
+      "epoch": 0.5237562682636459,
+      "grad_norm": 0.1350761502981186,
+      "learning_rate": 0.00017906191369606005,
+      "loss": 0.1661,
+      "step": 7259
+    },
+    {
+      "epoch": 0.5238284209387063,
+      "grad_norm": 0.10852393507957458,
+      "learning_rate": 0.00017905902727666332,
+      "loss": 0.1762,
+      "step": 7260
+    },
+    {
+      "epoch": 0.5239005736137667,
+      "grad_norm": 0.10514908283948898,
+      "learning_rate": 0.00017905614085726655,
+      "loss": 0.1299,
+      "step": 7261
+    },
+    {
+      "epoch": 0.5239727262888272,
+      "grad_norm": 0.10958047956228256,
+      "learning_rate": 0.00017905325443786982,
+      "loss": 0.1214,
+      "step": 7262
+    },
+    {
+      "epoch": 0.5240448789638876,
+      "grad_norm": 0.09741929918527603,
+      "learning_rate": 0.0001790503680184731,
+      "loss": 0.156,
+      "step": 7263
+    },
+    {
+      "epoch": 0.524117031638948,
+      "grad_norm": 0.10688262432813644,
+      "learning_rate": 0.00017904748159907637,
+      "loss": 0.149,
+      "step": 7264
+    },
+    {
+      "epoch": 0.5241891843140084,
+      "grad_norm": 0.12545837461948395,
+      "learning_rate": 0.00017904459517967963,
+      "loss": 0.1376,
+      "step": 7265
+    },
+    {
+      "epoch": 0.5242613369890688,
+      "grad_norm": 0.09972314536571503,
+      "learning_rate": 0.00017904170876028287,
+      "loss": 0.1856,
+      "step": 7266
+    },
+    {
+      "epoch": 0.5243334896641293,
+      "grad_norm": 0.12121710181236267,
+      "learning_rate": 0.00017903882234088613,
+      "loss": 0.1755,
+      "step": 7267
+    },
+    {
+      "epoch": 0.5244056423391897,
+      "grad_norm": 0.1155657023191452,
+      "learning_rate": 0.0001790359359214894,
+      "loss": 0.1547,
+      "step": 7268
+    },
+    {
+      "epoch": 0.5244777950142502,
+      "grad_norm": 0.12311001867055893,
+      "learning_rate": 0.00017903304950209266,
+      "loss": 0.1646,
+      "step": 7269
+    },
+    {
+      "epoch": 0.5245499476893106,
+      "grad_norm": 0.1461220383644104,
+      "learning_rate": 0.00017903016308269595,
+      "loss": 0.1963,
+      "step": 7270
+    },
+    {
+      "epoch": 0.524622100364371,
+      "grad_norm": 0.1390925645828247,
+      "learning_rate": 0.00017902727666329918,
+      "loss": 0.1499,
+      "step": 7271
+    },
+    {
+      "epoch": 0.5246942530394314,
+      "grad_norm": 0.10540948063135147,
+      "learning_rate": 0.00017902439024390245,
+      "loss": 0.1333,
+      "step": 7272
+    },
+    {
+      "epoch": 0.5247664057144918,
+      "grad_norm": 0.13809522986412048,
+      "learning_rate": 0.0001790215038245057,
+      "loss": 0.1216,
+      "step": 7273
+    },
+    {
+      "epoch": 0.5248385583895523,
+      "grad_norm": 0.13587015867233276,
+      "learning_rate": 0.00017901861740510897,
+      "loss": 0.182,
+      "step": 7274
+    },
+    {
+      "epoch": 0.5249107110646127,
+      "grad_norm": 0.11889909207820892,
+      "learning_rate": 0.00017901573098571223,
+      "loss": 0.1897,
+      "step": 7275
+    },
+    {
+      "epoch": 0.5249828637396732,
+      "grad_norm": 0.1114877238869667,
+      "learning_rate": 0.0001790128445663155,
+      "loss": 0.138,
+      "step": 7276
+    },
+    {
+      "epoch": 0.5250550164147336,
+      "grad_norm": 0.10427363961935043,
+      "learning_rate": 0.00017900995814691876,
+      "loss": 0.2297,
+      "step": 7277
+    },
+    {
+      "epoch": 0.525127169089794,
+      "grad_norm": 0.11984950304031372,
+      "learning_rate": 0.00017900707172752202,
+      "loss": 0.1384,
+      "step": 7278
+    },
+    {
+      "epoch": 0.5251993217648544,
+      "grad_norm": 0.10204624384641647,
+      "learning_rate": 0.00017900418530812529,
+      "loss": 0.133,
+      "step": 7279
+    },
+    {
+      "epoch": 0.5252714744399148,
+      "grad_norm": 0.1155349463224411,
+      "learning_rate": 0.00017900129888872855,
+      "loss": 0.1434,
+      "step": 7280
+    },
+    {
+      "epoch": 0.5253436271149753,
+      "grad_norm": 0.10468247532844543,
+      "learning_rate": 0.0001789984124693318,
+      "loss": 0.165,
+      "step": 7281
+    },
+    {
+      "epoch": 0.5254157797900357,
+      "grad_norm": 0.14522889256477356,
+      "learning_rate": 0.00017899552604993505,
+      "loss": 0.1742,
+      "step": 7282
+    },
+    {
+      "epoch": 0.5254879324650962,
+      "grad_norm": 0.11063766479492188,
+      "learning_rate": 0.0001789926396305383,
+      "loss": 0.159,
+      "step": 7283
+    },
+    {
+      "epoch": 0.5255600851401566,
+      "grad_norm": 0.0959901511669159,
+      "learning_rate": 0.0001789897532111416,
+      "loss": 0.1331,
+      "step": 7284
+    },
+    {
+      "epoch": 0.525632237815217,
+      "grad_norm": 0.10436305403709412,
+      "learning_rate": 0.00017898686679174486,
+      "loss": 0.154,
+      "step": 7285
+    },
+    {
+      "epoch": 0.5257043904902774,
+      "grad_norm": 0.14302986860275269,
+      "learning_rate": 0.00017898398037234813,
+      "loss": 0.153,
+      "step": 7286
+    },
+    {
+      "epoch": 0.5257765431653378,
+      "grad_norm": 0.10951834172010422,
+      "learning_rate": 0.00017898109395295136,
+      "loss": 0.1038,
+      "step": 7287
+    },
+    {
+      "epoch": 0.5258486958403983,
+      "grad_norm": 0.14024408161640167,
+      "learning_rate": 0.00017897820753355462,
+      "loss": 0.1774,
+      "step": 7288
+    },
+    {
+      "epoch": 0.5259208485154587,
+      "grad_norm": 0.14749003946781158,
+      "learning_rate": 0.0001789753211141579,
+      "loss": 0.1897,
+      "step": 7289
+    },
+    {
+      "epoch": 0.5259930011905192,
+      "grad_norm": 0.1298619657754898,
+      "learning_rate": 0.00017897243469476115,
+      "loss": 0.1918,
+      "step": 7290
+    },
+    {
+      "epoch": 0.5260651538655796,
+      "grad_norm": 0.1270139515399933,
+      "learning_rate": 0.00017896954827536444,
+      "loss": 0.1808,
+      "step": 7291
+    },
+    {
+      "epoch": 0.52613730654064,
+      "grad_norm": 0.11911728233098984,
+      "learning_rate": 0.00017896666185596768,
+      "loss": 0.1621,
+      "step": 7292
+    },
+    {
+      "epoch": 0.5262094592157004,
+      "grad_norm": 0.12327376753091812,
+      "learning_rate": 0.00017896377543657094,
+      "loss": 0.1415,
+      "step": 7293
+    },
+    {
+      "epoch": 0.5262816118907608,
+      "grad_norm": 0.10306958854198456,
+      "learning_rate": 0.0001789608890171742,
+      "loss": 0.1708,
+      "step": 7294
+    },
+    {
+      "epoch": 0.5263537645658213,
+      "grad_norm": 0.14238040149211884,
+      "learning_rate": 0.00017895800259777747,
+      "loss": 0.1303,
+      "step": 7295
+    },
+    {
+      "epoch": 0.5264259172408817,
+      "grad_norm": 0.11975669860839844,
+      "learning_rate": 0.00017895511617838073,
+      "loss": 0.1656,
+      "step": 7296
+    },
+    {
+      "epoch": 0.5264980699159422,
+      "grad_norm": 0.11540134996175766,
+      "learning_rate": 0.000178952229758984,
+      "loss": 0.1431,
+      "step": 7297
+    },
+    {
+      "epoch": 0.5265702225910026,
+      "grad_norm": 0.11716841161251068,
+      "learning_rate": 0.00017894934333958725,
+      "loss": 0.1022,
+      "step": 7298
+    },
+    {
+      "epoch": 0.526642375266063,
+      "grad_norm": 0.1370508372783661,
+      "learning_rate": 0.00017894645692019052,
+      "loss": 0.1944,
+      "step": 7299
+    },
+    {
+      "epoch": 0.5267145279411234,
+      "grad_norm": 0.14596012234687805,
+      "learning_rate": 0.00017894357050079378,
+      "loss": 0.1265,
+      "step": 7300
+    },
+    {
+      "epoch": 0.5267866806161838,
+      "grad_norm": 0.1146797239780426,
+      "learning_rate": 0.00017894068408139704,
+      "loss": 0.1364,
+      "step": 7301
+    },
+    {
+      "epoch": 0.5268588332912443,
+      "grad_norm": 0.12097577750682831,
+      "learning_rate": 0.0001789377976620003,
+      "loss": 0.1279,
+      "step": 7302
+    },
+    {
+      "epoch": 0.5269309859663047,
+      "grad_norm": 0.11120638996362686,
+      "learning_rate": 0.00017893491124260354,
+      "loss": 0.1288,
+      "step": 7303
+    },
+    {
+      "epoch": 0.5270031386413652,
+      "grad_norm": 0.11464061588048935,
+      "learning_rate": 0.0001789320248232068,
+      "loss": 0.1699,
+      "step": 7304
+    },
+    {
+      "epoch": 0.5270752913164256,
+      "grad_norm": 0.12002135068178177,
+      "learning_rate": 0.0001789291384038101,
+      "loss": 0.1712,
+      "step": 7305
+    },
+    {
+      "epoch": 0.5271474439914859,
+      "grad_norm": 0.1227092370390892,
+      "learning_rate": 0.00017892625198441336,
+      "loss": 0.2047,
+      "step": 7306
+    },
+    {
+      "epoch": 0.5272195966665464,
+      "grad_norm": 0.11020387709140778,
+      "learning_rate": 0.00017892336556501662,
+      "loss": 0.1425,
+      "step": 7307
+    },
+    {
+      "epoch": 0.5272917493416068,
+      "grad_norm": 0.0997639000415802,
+      "learning_rate": 0.00017892047914561986,
+      "loss": 0.1449,
+      "step": 7308
+    },
+    {
+      "epoch": 0.5273639020166673,
+      "grad_norm": 0.1353057473897934,
+      "learning_rate": 0.00017891759272622312,
+      "loss": 0.167,
+      "step": 7309
+    },
+    {
+      "epoch": 0.5274360546917277,
+      "grad_norm": 0.14489580690860748,
+      "learning_rate": 0.00017891470630682638,
+      "loss": 0.1542,
+      "step": 7310
+    },
+    {
+      "epoch": 0.5275082073667882,
+      "grad_norm": 0.12727104127407074,
+      "learning_rate": 0.00017891181988742964,
+      "loss": 0.1562,
+      "step": 7311
+    },
+    {
+      "epoch": 0.5275803600418486,
+      "grad_norm": 0.11234887689352036,
+      "learning_rate": 0.00017890893346803293,
+      "loss": 0.14,
+      "step": 7312
+    },
+    {
+      "epoch": 0.5276525127169089,
+      "grad_norm": 0.11045292019844055,
+      "learning_rate": 0.00017890604704863617,
+      "loss": 0.1741,
+      "step": 7313
+    },
+    {
+      "epoch": 0.5277246653919694,
+      "grad_norm": 0.1492622047662735,
+      "learning_rate": 0.00017890316062923943,
+      "loss": 0.147,
+      "step": 7314
+    },
+    {
+      "epoch": 0.5277968180670298,
+      "grad_norm": 0.1515076458454132,
+      "learning_rate": 0.0001789002742098427,
+      "loss": 0.2006,
+      "step": 7315
+    },
+    {
+      "epoch": 0.5278689707420903,
+      "grad_norm": 0.1236778125166893,
+      "learning_rate": 0.00017889738779044596,
+      "loss": 0.1518,
+      "step": 7316
+    },
+    {
+      "epoch": 0.5279411234171507,
+      "grad_norm": 0.15923312306404114,
+      "learning_rate": 0.00017889450137104922,
+      "loss": 0.1619,
+      "step": 7317
+    },
+    {
+      "epoch": 0.5280132760922112,
+      "grad_norm": 0.10383324325084686,
+      "learning_rate": 0.00017889161495165249,
+      "loss": 0.1007,
+      "step": 7318
+    },
+    {
+      "epoch": 0.5280854287672715,
+      "grad_norm": 0.11270064115524292,
+      "learning_rate": 0.00017888872853225575,
+      "loss": 0.194,
+      "step": 7319
+    },
+    {
+      "epoch": 0.5281575814423319,
+      "grad_norm": 0.12055972963571548,
+      "learning_rate": 0.000178885842112859,
+      "loss": 0.2315,
+      "step": 7320
+    },
+    {
+      "epoch": 0.5282297341173924,
+      "grad_norm": 0.11434181034564972,
+      "learning_rate": 0.00017888295569346227,
+      "loss": 0.1195,
+      "step": 7321
+    },
+    {
+      "epoch": 0.5283018867924528,
+      "grad_norm": 0.13705787062644958,
+      "learning_rate": 0.00017888006927406554,
+      "loss": 0.1512,
+      "step": 7322
+    },
+    {
+      "epoch": 0.5283740394675133,
+      "grad_norm": 0.09598613530397415,
+      "learning_rate": 0.0001788771828546688,
+      "loss": 0.1748,
+      "step": 7323
+    },
+    {
+      "epoch": 0.5284461921425737,
+      "grad_norm": 0.11793044209480286,
+      "learning_rate": 0.00017887429643527204,
+      "loss": 0.1213,
+      "step": 7324
+    },
+    {
+      "epoch": 0.5285183448176342,
+      "grad_norm": 0.15571290254592896,
+      "learning_rate": 0.0001788714100158753,
+      "loss": 0.1339,
+      "step": 7325
+    },
+    {
+      "epoch": 0.5285904974926945,
+      "grad_norm": 0.11369923502206802,
+      "learning_rate": 0.0001788685235964786,
+      "loss": 0.1997,
+      "step": 7326
+    },
+    {
+      "epoch": 0.5286626501677549,
+      "grad_norm": 0.0989232212305069,
+      "learning_rate": 0.00017886563717708185,
+      "loss": 0.2261,
+      "step": 7327
+    },
+    {
+      "epoch": 0.5287348028428154,
+      "grad_norm": 0.1456446349620819,
+      "learning_rate": 0.00017886275075768511,
+      "loss": 0.1508,
+      "step": 7328
+    },
+    {
+      "epoch": 0.5288069555178758,
+      "grad_norm": 0.13999877870082855,
+      "learning_rate": 0.00017885986433828835,
+      "loss": 0.1501,
+      "step": 7329
+    },
+    {
+      "epoch": 0.5288791081929363,
+      "grad_norm": 0.10752904415130615,
+      "learning_rate": 0.0001788569779188916,
+      "loss": 0.1255,
+      "step": 7330
+    },
+    {
+      "epoch": 0.5289512608679967,
+      "grad_norm": 0.11771759390830994,
+      "learning_rate": 0.00017885409149949488,
+      "loss": 0.1705,
+      "step": 7331
+    },
+    {
+      "epoch": 0.5290234135430572,
+      "grad_norm": 0.1180545911192894,
+      "learning_rate": 0.00017885120508009814,
+      "loss": 0.1774,
+      "step": 7332
+    },
+    {
+      "epoch": 0.5290955662181175,
+      "grad_norm": 0.14889933168888092,
+      "learning_rate": 0.00017884831866070143,
+      "loss": 0.1587,
+      "step": 7333
+    },
+    {
+      "epoch": 0.5291677188931779,
+      "grad_norm": 0.14299236238002777,
+      "learning_rate": 0.00017884543224130466,
+      "loss": 0.1442,
+      "step": 7334
+    },
+    {
+      "epoch": 0.5292398715682384,
+      "grad_norm": 0.1142808049917221,
+      "learning_rate": 0.00017884254582190793,
+      "loss": 0.1215,
+      "step": 7335
+    },
+    {
+      "epoch": 0.5293120242432988,
+      "grad_norm": 0.12313350290060043,
+      "learning_rate": 0.0001788396594025112,
+      "loss": 0.1455,
+      "step": 7336
+    },
+    {
+      "epoch": 0.5293841769183593,
+      "grad_norm": 0.14371417462825775,
+      "learning_rate": 0.00017883677298311445,
+      "loss": 0.1279,
+      "step": 7337
+    },
+    {
+      "epoch": 0.5294563295934197,
+      "grad_norm": 0.10964024811983109,
+      "learning_rate": 0.00017883388656371772,
+      "loss": 0.1436,
+      "step": 7338
+    },
+    {
+      "epoch": 0.5295284822684801,
+      "grad_norm": 0.1285284459590912,
+      "learning_rate": 0.00017883100014432098,
+      "loss": 0.1519,
+      "step": 7339
+    },
+    {
+      "epoch": 0.5296006349435405,
+      "grad_norm": 0.1490326076745987,
+      "learning_rate": 0.00017882811372492424,
+      "loss": 0.1315,
+      "step": 7340
+    },
+    {
+      "epoch": 0.5296727876186009,
+      "grad_norm": 0.13214173913002014,
+      "learning_rate": 0.0001788252273055275,
+      "loss": 0.156,
+      "step": 7341
+    },
+    {
+      "epoch": 0.5297449402936614,
+      "grad_norm": 0.1099911779165268,
+      "learning_rate": 0.00017882234088613077,
+      "loss": 0.1452,
+      "step": 7342
+    },
+    {
+      "epoch": 0.5298170929687218,
+      "grad_norm": 0.14715947210788727,
+      "learning_rate": 0.00017881945446673403,
+      "loss": 0.1482,
+      "step": 7343
+    },
+    {
+      "epoch": 0.5298892456437823,
+      "grad_norm": 0.11125681549310684,
+      "learning_rate": 0.0001788165680473373,
+      "loss": 0.1225,
+      "step": 7344
+    },
+    {
+      "epoch": 0.5299613983188427,
+      "grad_norm": 0.11176412552595139,
+      "learning_rate": 0.00017881368162794056,
+      "loss": 0.1581,
+      "step": 7345
+    },
+    {
+      "epoch": 0.5300335509939031,
+      "grad_norm": 0.13218072056770325,
+      "learning_rate": 0.0001788107952085438,
+      "loss": 0.1549,
+      "step": 7346
+    },
+    {
+      "epoch": 0.5301057036689635,
+      "grad_norm": 0.13086387515068054,
+      "learning_rate": 0.00017880790878914706,
+      "loss": 0.1645,
+      "step": 7347
+    },
+    {
+      "epoch": 0.5301778563440239,
+      "grad_norm": 0.1359982043504715,
+      "learning_rate": 0.00017880502236975035,
+      "loss": 0.1634,
+      "step": 7348
+    },
+    {
+      "epoch": 0.5302500090190844,
+      "grad_norm": 0.0855100080370903,
+      "learning_rate": 0.0001788021359503536,
+      "loss": 0.1359,
+      "step": 7349
+    },
+    {
+      "epoch": 0.5303221616941448,
+      "grad_norm": 0.154966801404953,
+      "learning_rate": 0.00017879924953095687,
+      "loss": 0.2241,
+      "step": 7350
+    },
+    {
+      "epoch": 0.5303943143692053,
+      "grad_norm": 0.1637096107006073,
+      "learning_rate": 0.0001787963631115601,
+      "loss": 0.1386,
+      "step": 7351
+    },
+    {
+      "epoch": 0.5304664670442657,
+      "grad_norm": 0.14011329412460327,
+      "learning_rate": 0.00017879347669216337,
+      "loss": 0.1895,
+      "step": 7352
+    },
+    {
+      "epoch": 0.5305386197193261,
+      "grad_norm": 0.13018305599689484,
+      "learning_rate": 0.00017879059027276663,
+      "loss": 0.1221,
+      "step": 7353
+    },
+    {
+      "epoch": 0.5306107723943865,
+      "grad_norm": 0.13092190027236938,
+      "learning_rate": 0.0001787877038533699,
+      "loss": 0.1585,
+      "step": 7354
+    },
+    {
+      "epoch": 0.5306829250694469,
+      "grad_norm": 0.10489319264888763,
+      "learning_rate": 0.00017878481743397319,
+      "loss": 0.1557,
+      "step": 7355
+    },
+    {
+      "epoch": 0.5307550777445074,
+      "grad_norm": 0.14457325637340546,
+      "learning_rate": 0.00017878193101457642,
+      "loss": 0.1921,
+      "step": 7356
+    },
+    {
+      "epoch": 0.5308272304195678,
+      "grad_norm": 0.12561573088169098,
+      "learning_rate": 0.00017877904459517969,
+      "loss": 0.1537,
+      "step": 7357
+    },
+    {
+      "epoch": 0.5308993830946283,
+      "grad_norm": 0.11675235629081726,
+      "learning_rate": 0.00017877615817578295,
+      "loss": 0.1242,
+      "step": 7358
+    },
+    {
+      "epoch": 0.5309715357696887,
+      "grad_norm": 0.12591566145420074,
+      "learning_rate": 0.0001787732717563862,
+      "loss": 0.1519,
+      "step": 7359
+    },
+    {
+      "epoch": 0.5310436884447491,
+      "grad_norm": 0.10922004282474518,
+      "learning_rate": 0.00017877038533698947,
+      "loss": 0.141,
+      "step": 7360
+    },
+    {
+      "epoch": 0.5311158411198095,
+      "grad_norm": 0.12277299910783768,
+      "learning_rate": 0.00017876749891759274,
+      "loss": 0.1774,
+      "step": 7361
+    },
+    {
+      "epoch": 0.5311879937948699,
+      "grad_norm": 0.14493514597415924,
+      "learning_rate": 0.000178764612498196,
+      "loss": 0.1453,
+      "step": 7362
+    },
+    {
+      "epoch": 0.5312601464699304,
+      "grad_norm": 0.10822132229804993,
+      "learning_rate": 0.00017876172607879926,
+      "loss": 0.1953,
+      "step": 7363
+    },
+    {
+      "epoch": 0.5313322991449908,
+      "grad_norm": 0.12490901350975037,
+      "learning_rate": 0.00017875883965940253,
+      "loss": 0.1105,
+      "step": 7364
+    },
+    {
+      "epoch": 0.5314044518200512,
+      "grad_norm": 0.11226848512887955,
+      "learning_rate": 0.0001787559532400058,
+      "loss": 0.1592,
+      "step": 7365
+    },
+    {
+      "epoch": 0.5314766044951117,
+      "grad_norm": 0.13065274059772491,
+      "learning_rate": 0.00017875306682060905,
+      "loss": 0.1858,
+      "step": 7366
+    },
+    {
+      "epoch": 0.5315487571701721,
+      "grad_norm": 0.14060640335083008,
+      "learning_rate": 0.0001787501804012123,
+      "loss": 0.1615,
+      "step": 7367
+    },
+    {
+      "epoch": 0.5316209098452325,
+      "grad_norm": 0.12865915894508362,
+      "learning_rate": 0.00017874729398181555,
+      "loss": 0.139,
+      "step": 7368
+    },
+    {
+      "epoch": 0.5316930625202929,
+      "grad_norm": 0.12481102347373962,
+      "learning_rate": 0.00017874440756241884,
+      "loss": 0.1293,
+      "step": 7369
+    },
+    {
+      "epoch": 0.5317652151953534,
+      "grad_norm": 0.11680778861045837,
+      "learning_rate": 0.0001787415211430221,
+      "loss": 0.1598,
+      "step": 7370
+    },
+    {
+      "epoch": 0.5318373678704138,
+      "grad_norm": 0.09632851928472519,
+      "learning_rate": 0.00017873863472362537,
+      "loss": 0.1169,
+      "step": 7371
+    },
+    {
+      "epoch": 0.5319095205454742,
+      "grad_norm": 0.13258910179138184,
+      "learning_rate": 0.0001787357483042286,
+      "loss": 0.1608,
+      "step": 7372
+    },
+    {
+      "epoch": 0.5319816732205347,
+      "grad_norm": 0.125102698802948,
+      "learning_rate": 0.00017873286188483186,
+      "loss": 0.1477,
+      "step": 7373
+    },
+    {
+      "epoch": 0.5320538258955951,
+      "grad_norm": 0.10045205801725388,
+      "learning_rate": 0.00017872997546543513,
+      "loss": 0.1474,
+      "step": 7374
+    },
+    {
+      "epoch": 0.5321259785706555,
+      "grad_norm": 0.11710695177316666,
+      "learning_rate": 0.0001787270890460384,
+      "loss": 0.1835,
+      "step": 7375
+    },
+    {
+      "epoch": 0.5321981312457159,
+      "grad_norm": 0.11347135901451111,
+      "learning_rate": 0.00017872420262664168,
+      "loss": 0.1541,
+      "step": 7376
+    },
+    {
+      "epoch": 0.5322702839207764,
+      "grad_norm": 0.09853716194629669,
+      "learning_rate": 0.00017872131620724492,
+      "loss": 0.1138,
+      "step": 7377
+    },
+    {
+      "epoch": 0.5323424365958368,
+      "grad_norm": 0.11942870169878006,
+      "learning_rate": 0.00017871842978784818,
+      "loss": 0.1378,
+      "step": 7378
+    },
+    {
+      "epoch": 0.5324145892708972,
+      "grad_norm": 0.08775313943624496,
+      "learning_rate": 0.00017871554336845144,
+      "loss": 0.1425,
+      "step": 7379
+    },
+    {
+      "epoch": 0.5324867419459577,
+      "grad_norm": 0.13760487735271454,
+      "learning_rate": 0.0001787126569490547,
+      "loss": 0.1515,
+      "step": 7380
+    },
+    {
+      "epoch": 0.532558894621018,
+      "grad_norm": 0.12147516757249832,
+      "learning_rate": 0.00017870977052965797,
+      "loss": 0.1927,
+      "step": 7381
+    },
+    {
+      "epoch": 0.5326310472960785,
+      "grad_norm": 0.11707384139299393,
+      "learning_rate": 0.00017870688411026123,
+      "loss": 0.1447,
+      "step": 7382
+    },
+    {
+      "epoch": 0.5327031999711389,
+      "grad_norm": 0.14905457198619843,
+      "learning_rate": 0.0001787039976908645,
+      "loss": 0.1562,
+      "step": 7383
+    },
+    {
+      "epoch": 0.5327753526461994,
+      "grad_norm": 0.11915255337953568,
+      "learning_rate": 0.00017870111127146776,
+      "loss": 0.1564,
+      "step": 7384
+    },
+    {
+      "epoch": 0.5328475053212598,
+      "grad_norm": 0.12020910531282425,
+      "learning_rate": 0.00017869822485207102,
+      "loss": 0.1396,
+      "step": 7385
+    },
+    {
+      "epoch": 0.5329196579963202,
+      "grad_norm": 0.14836715161800385,
+      "learning_rate": 0.00017869533843267428,
+      "loss": 0.1485,
+      "step": 7386
+    },
+    {
+      "epoch": 0.5329918106713807,
+      "grad_norm": 0.11415567994117737,
+      "learning_rate": 0.00017869245201327755,
+      "loss": 0.1299,
+      "step": 7387
+    },
+    {
+      "epoch": 0.533063963346441,
+      "grad_norm": 0.09554366767406464,
+      "learning_rate": 0.00017868956559388078,
+      "loss": 0.1387,
+      "step": 7388
+    },
+    {
+      "epoch": 0.5331361160215015,
+      "grad_norm": 0.12184850126504898,
+      "learning_rate": 0.00017868667917448404,
+      "loss": 0.1311,
+      "step": 7389
+    },
+    {
+      "epoch": 0.5332082686965619,
+      "grad_norm": 0.10899917781352997,
+      "learning_rate": 0.00017868379275508733,
+      "loss": 0.1887,
+      "step": 7390
+    },
+    {
+      "epoch": 0.5332804213716223,
+      "grad_norm": 0.12960898876190186,
+      "learning_rate": 0.0001786809063356906,
+      "loss": 0.1725,
+      "step": 7391
+    },
+    {
+      "epoch": 0.5333525740466828,
+      "grad_norm": 0.11854162812232971,
+      "learning_rate": 0.00017867801991629386,
+      "loss": 0.1437,
+      "step": 7392
+    },
+    {
+      "epoch": 0.5334247267217432,
+      "grad_norm": 0.15556089580059052,
+      "learning_rate": 0.0001786751334968971,
+      "loss": 0.1601,
+      "step": 7393
+    },
+    {
+      "epoch": 0.5334968793968037,
+      "grad_norm": 0.13274702429771423,
+      "learning_rate": 0.00017867224707750036,
+      "loss": 0.1342,
+      "step": 7394
+    },
+    {
+      "epoch": 0.533569032071864,
+      "grad_norm": 0.1248529702425003,
+      "learning_rate": 0.00017866936065810362,
+      "loss": 0.1499,
+      "step": 7395
+    },
+    {
+      "epoch": 0.5336411847469245,
+      "grad_norm": 0.12986032664775848,
+      "learning_rate": 0.00017866647423870688,
+      "loss": 0.1588,
+      "step": 7396
+    },
+    {
+      "epoch": 0.5337133374219849,
+      "grad_norm": 0.10592981427907944,
+      "learning_rate": 0.00017866358781931017,
+      "loss": 0.1684,
+      "step": 7397
+    },
+    {
+      "epoch": 0.5337854900970453,
+      "grad_norm": 0.12006526440382004,
+      "learning_rate": 0.0001786607013999134,
+      "loss": 0.1408,
+      "step": 7398
+    },
+    {
+      "epoch": 0.5338576427721058,
+      "grad_norm": 0.13237181305885315,
+      "learning_rate": 0.00017865781498051667,
+      "loss": 0.1512,
+      "step": 7399
+    },
+    {
+      "epoch": 0.5339297954471662,
+      "grad_norm": 0.12144875526428223,
+      "learning_rate": 0.00017865492856111994,
+      "loss": 0.1505,
+      "step": 7400
+    },
+    {
+      "epoch": 0.5340019481222267,
+      "grad_norm": 0.14751940965652466,
+      "learning_rate": 0.0001786520421417232,
+      "loss": 0.1466,
+      "step": 7401
+    },
+    {
+      "epoch": 0.534074100797287,
+      "grad_norm": 0.10068172216415405,
+      "learning_rate": 0.00017864915572232646,
+      "loss": 0.0937,
+      "step": 7402
+    },
+    {
+      "epoch": 0.5341462534723475,
+      "grad_norm": 0.13573504984378815,
+      "learning_rate": 0.00017864626930292973,
+      "loss": 0.1119,
+      "step": 7403
+    },
+    {
+      "epoch": 0.5342184061474079,
+      "grad_norm": 0.1610974818468094,
+      "learning_rate": 0.000178643382883533,
+      "loss": 0.1449,
+      "step": 7404
+    },
+    {
+      "epoch": 0.5342905588224683,
+      "grad_norm": 0.12172017246484756,
+      "learning_rate": 0.00017864049646413625,
+      "loss": 0.1585,
+      "step": 7405
+    },
+    {
+      "epoch": 0.5343627114975288,
+      "grad_norm": 0.11596543341875076,
+      "learning_rate": 0.00017863761004473951,
+      "loss": 0.1571,
+      "step": 7406
+    },
+    {
+      "epoch": 0.5344348641725892,
+      "grad_norm": 0.12293829023838043,
+      "learning_rate": 0.00017863472362534278,
+      "loss": 0.1388,
+      "step": 7407
+    },
+    {
+      "epoch": 0.5345070168476497,
+      "grad_norm": 0.12956799566745758,
+      "learning_rate": 0.00017863183720594604,
+      "loss": 0.1046,
+      "step": 7408
+    },
+    {
+      "epoch": 0.53457916952271,
+      "grad_norm": 0.13919049501419067,
+      "learning_rate": 0.00017862895078654928,
+      "loss": 0.1617,
+      "step": 7409
+    },
+    {
+      "epoch": 0.5346513221977705,
+      "grad_norm": 0.12343361228704453,
+      "learning_rate": 0.00017862606436715254,
+      "loss": 0.1887,
+      "step": 7410
+    },
+    {
+      "epoch": 0.5347234748728309,
+      "grad_norm": 0.1412951499223709,
+      "learning_rate": 0.00017862317794775583,
+      "loss": 0.1573,
+      "step": 7411
+    },
+    {
+      "epoch": 0.5347956275478913,
+      "grad_norm": 0.13477957248687744,
+      "learning_rate": 0.0001786202915283591,
+      "loss": 0.1599,
+      "step": 7412
+    },
+    {
+      "epoch": 0.5348677802229518,
+      "grad_norm": 0.14682935178279877,
+      "learning_rate": 0.00017861740510896235,
+      "loss": 0.1528,
+      "step": 7413
+    },
+    {
+      "epoch": 0.5349399328980122,
+      "grad_norm": 0.14483290910720825,
+      "learning_rate": 0.0001786145186895656,
+      "loss": 0.1552,
+      "step": 7414
+    },
+    {
+      "epoch": 0.5350120855730727,
+      "grad_norm": 0.12272778898477554,
+      "learning_rate": 0.00017861163227016885,
+      "loss": 0.1441,
+      "step": 7415
+    },
+    {
+      "epoch": 0.535084238248133,
+      "grad_norm": 0.09411150962114334,
+      "learning_rate": 0.00017860874585077212,
+      "loss": 0.1685,
+      "step": 7416
+    },
+    {
+      "epoch": 0.5351563909231934,
+      "grad_norm": 0.10887883603572845,
+      "learning_rate": 0.00017860585943137538,
+      "loss": 0.1683,
+      "step": 7417
+    },
+    {
+      "epoch": 0.5352285435982539,
+      "grad_norm": 0.1251358836889267,
+      "learning_rate": 0.00017860297301197867,
+      "loss": 0.1843,
+      "step": 7418
+    },
+    {
+      "epoch": 0.5353006962733143,
+      "grad_norm": 0.11616582423448563,
+      "learning_rate": 0.0001786000865925819,
+      "loss": 0.1655,
+      "step": 7419
+    },
+    {
+      "epoch": 0.5353728489483748,
+      "grad_norm": 0.10984798520803452,
+      "learning_rate": 0.00017859720017318517,
+      "loss": 0.1494,
+      "step": 7420
+    },
+    {
+      "epoch": 0.5354450016234352,
+      "grad_norm": 0.12896649539470673,
+      "learning_rate": 0.00017859431375378843,
+      "loss": 0.1559,
+      "step": 7421
+    },
+    {
+      "epoch": 0.5355171542984957,
+      "grad_norm": 0.1121734008193016,
+      "learning_rate": 0.0001785914273343917,
+      "loss": 0.1404,
+      "step": 7422
+    },
+    {
+      "epoch": 0.535589306973556,
+      "grad_norm": 0.12782329320907593,
+      "learning_rate": 0.00017858854091499496,
+      "loss": 0.1403,
+      "step": 7423
+    },
+    {
+      "epoch": 0.5356614596486164,
+      "grad_norm": 0.1383926272392273,
+      "learning_rate": 0.00017858565449559822,
+      "loss": 0.1697,
+      "step": 7424
+    },
+    {
+      "epoch": 0.5357336123236769,
+      "grad_norm": 0.12111084908246994,
+      "learning_rate": 0.00017858276807620148,
+      "loss": 0.1076,
+      "step": 7425
+    },
+    {
+      "epoch": 0.5358057649987373,
+      "grad_norm": 0.1440785825252533,
+      "learning_rate": 0.00017857988165680475,
+      "loss": 0.1437,
+      "step": 7426
+    },
+    {
+      "epoch": 0.5358779176737978,
+      "grad_norm": 0.10769060254096985,
+      "learning_rate": 0.000178576995237408,
+      "loss": 0.1804,
+      "step": 7427
+    },
+    {
+      "epoch": 0.5359500703488582,
+      "grad_norm": 0.13949033617973328,
+      "learning_rate": 0.00017857410881801127,
+      "loss": 0.1988,
+      "step": 7428
+    },
+    {
+      "epoch": 0.5360222230239187,
+      "grad_norm": 0.1220717653632164,
+      "learning_rate": 0.00017857122239861453,
+      "loss": 0.1741,
+      "step": 7429
+    },
+    {
+      "epoch": 0.536094375698979,
+      "grad_norm": 0.1301799863576889,
+      "learning_rate": 0.00017856833597921777,
+      "loss": 0.1792,
+      "step": 7430
+    },
+    {
+      "epoch": 0.5361665283740394,
+      "grad_norm": 0.1295902132987976,
+      "learning_rate": 0.00017856544955982103,
+      "loss": 0.0954,
+      "step": 7431
+    },
+    {
+      "epoch": 0.5362386810490999,
+      "grad_norm": 0.12458403408527374,
+      "learning_rate": 0.00017856256314042432,
+      "loss": 0.1745,
+      "step": 7432
+    },
+    {
+      "epoch": 0.5363108337241603,
+      "grad_norm": 0.14518317580223083,
+      "learning_rate": 0.00017855967672102759,
+      "loss": 0.2067,
+      "step": 7433
+    },
+    {
+      "epoch": 0.5363829863992208,
+      "grad_norm": 0.11274714767932892,
+      "learning_rate": 0.00017855679030163085,
+      "loss": 0.1549,
+      "step": 7434
+    },
+    {
+      "epoch": 0.5364551390742812,
+      "grad_norm": 0.1241229847073555,
+      "learning_rate": 0.00017855390388223408,
+      "loss": 0.1385,
+      "step": 7435
+    },
+    {
+      "epoch": 0.5365272917493417,
+      "grad_norm": 0.09956695884466171,
+      "learning_rate": 0.00017855101746283735,
+      "loss": 0.1277,
+      "step": 7436
+    },
+    {
+      "epoch": 0.536599444424402,
+      "grad_norm": 0.1887374222278595,
+      "learning_rate": 0.0001785481310434406,
+      "loss": 0.1822,
+      "step": 7437
+    },
+    {
+      "epoch": 0.5366715970994624,
+      "grad_norm": 0.09727919101715088,
+      "learning_rate": 0.00017854524462404387,
+      "loss": 0.1651,
+      "step": 7438
+    },
+    {
+      "epoch": 0.5367437497745229,
+      "grad_norm": 0.13736706972122192,
+      "learning_rate": 0.00017854235820464716,
+      "loss": 0.1952,
+      "step": 7439
+    },
+    {
+      "epoch": 0.5368159024495833,
+      "grad_norm": 0.1056545078754425,
+      "learning_rate": 0.0001785394717852504,
+      "loss": 0.1501,
+      "step": 7440
+    },
+    {
+      "epoch": 0.5368880551246438,
+      "grad_norm": 0.12971024215221405,
+      "learning_rate": 0.00017853658536585366,
+      "loss": 0.1367,
+      "step": 7441
+    },
+    {
+      "epoch": 0.5369602077997042,
+      "grad_norm": 0.13022993505001068,
+      "learning_rate": 0.00017853369894645692,
+      "loss": 0.1455,
+      "step": 7442
+    },
+    {
+      "epoch": 0.5370323604747645,
+      "grad_norm": 0.09962315857410431,
+      "learning_rate": 0.0001785308125270602,
+      "loss": 0.1498,
+      "step": 7443
+    },
+    {
+      "epoch": 0.537104513149825,
+      "grad_norm": 0.13888941705226898,
+      "learning_rate": 0.00017852792610766345,
+      "loss": 0.2137,
+      "step": 7444
+    },
+    {
+      "epoch": 0.5371766658248854,
+      "grad_norm": 0.12977571785449982,
+      "learning_rate": 0.00017852503968826671,
+      "loss": 0.1779,
+      "step": 7445
+    },
+    {
+      "epoch": 0.5372488184999459,
+      "grad_norm": 0.14706486463546753,
+      "learning_rate": 0.00017852215326886998,
+      "loss": 0.1396,
+      "step": 7446
+    },
+    {
+      "epoch": 0.5373209711750063,
+      "grad_norm": 0.11621315777301788,
+      "learning_rate": 0.00017851926684947324,
+      "loss": 0.1595,
+      "step": 7447
+    },
+    {
+      "epoch": 0.5373931238500668,
+      "grad_norm": 0.1178140938282013,
+      "learning_rate": 0.0001785163804300765,
+      "loss": 0.1704,
+      "step": 7448
+    },
+    {
+      "epoch": 0.5374652765251272,
+      "grad_norm": 0.12601010501384735,
+      "learning_rate": 0.00017851349401067977,
+      "loss": 0.1725,
+      "step": 7449
+    },
+    {
+      "epoch": 0.5375374292001875,
+      "grad_norm": 0.12485157698392868,
+      "learning_rate": 0.00017851060759128303,
+      "loss": 0.1638,
+      "step": 7450
+    },
+    {
+      "epoch": 0.537609581875248,
+      "grad_norm": 0.14232799410820007,
+      "learning_rate": 0.00017850772117188626,
+      "loss": 0.1912,
+      "step": 7451
+    },
+    {
+      "epoch": 0.5376817345503084,
+      "grad_norm": 0.12679298222064972,
+      "learning_rate": 0.00017850483475248953,
+      "loss": 0.1562,
+      "step": 7452
+    },
+    {
+      "epoch": 0.5377538872253689,
+      "grad_norm": 0.11403336375951767,
+      "learning_rate": 0.00017850194833309282,
+      "loss": 0.1204,
+      "step": 7453
+    },
+    {
+      "epoch": 0.5378260399004293,
+      "grad_norm": 0.11030253022909164,
+      "learning_rate": 0.00017849906191369608,
+      "loss": 0.1351,
+      "step": 7454
+    },
+    {
+      "epoch": 0.5378981925754898,
+      "grad_norm": 0.11188559234142303,
+      "learning_rate": 0.00017849617549429934,
+      "loss": 0.1371,
+      "step": 7455
+    },
+    {
+      "epoch": 0.5379703452505502,
+      "grad_norm": 0.14153379201889038,
+      "learning_rate": 0.00017849328907490258,
+      "loss": 0.1632,
+      "step": 7456
+    },
+    {
+      "epoch": 0.5380424979256105,
+      "grad_norm": 0.12268875539302826,
+      "learning_rate": 0.00017849040265550584,
+      "loss": 0.1593,
+      "step": 7457
+    },
+    {
+      "epoch": 0.538114650600671,
+      "grad_norm": 0.10495118796825409,
+      "learning_rate": 0.0001784875162361091,
+      "loss": 0.1816,
+      "step": 7458
+    },
+    {
+      "epoch": 0.5381868032757314,
+      "grad_norm": 0.15740640461444855,
+      "learning_rate": 0.00017848462981671237,
+      "loss": 0.1475,
+      "step": 7459
+    },
+    {
+      "epoch": 0.5382589559507919,
+      "grad_norm": 0.13259257376194,
+      "learning_rate": 0.00017848174339731566,
+      "loss": 0.1376,
+      "step": 7460
+    },
+    {
+      "epoch": 0.5383311086258523,
+      "grad_norm": 0.1220489889383316,
+      "learning_rate": 0.0001784788569779189,
+      "loss": 0.1532,
+      "step": 7461
+    },
+    {
+      "epoch": 0.5384032613009128,
+      "grad_norm": 0.150324746966362,
+      "learning_rate": 0.00017847597055852216,
+      "loss": 0.1654,
+      "step": 7462
+    },
+    {
+      "epoch": 0.5384754139759732,
+      "grad_norm": 0.12435202300548553,
+      "learning_rate": 0.00017847308413912542,
+      "loss": 0.1377,
+      "step": 7463
+    },
+    {
+      "epoch": 0.5385475666510335,
+      "grad_norm": 0.12755684554576874,
+      "learning_rate": 0.00017847019771972868,
+      "loss": 0.1627,
+      "step": 7464
+    },
+    {
+      "epoch": 0.538619719326094,
+      "grad_norm": 0.11508068442344666,
+      "learning_rate": 0.00017846731130033195,
+      "loss": 0.1258,
+      "step": 7465
+    },
+    {
+      "epoch": 0.5386918720011544,
+      "grad_norm": 0.1059078648686409,
+      "learning_rate": 0.0001784644248809352,
+      "loss": 0.1732,
+      "step": 7466
+    },
+    {
+      "epoch": 0.5387640246762149,
+      "grad_norm": 0.12024451792240143,
+      "learning_rate": 0.00017846153846153847,
+      "loss": 0.1186,
+      "step": 7467
+    },
+    {
+      "epoch": 0.5388361773512753,
+      "grad_norm": 0.12258029729127884,
+      "learning_rate": 0.00017845865204214173,
+      "loss": 0.1833,
+      "step": 7468
+    },
+    {
+      "epoch": 0.5389083300263358,
+      "grad_norm": 0.12107551097869873,
+      "learning_rate": 0.000178455765622745,
+      "loss": 0.0989,
+      "step": 7469
+    },
+    {
+      "epoch": 0.5389804827013962,
+      "grad_norm": 0.11932362616062164,
+      "learning_rate": 0.00017845287920334826,
+      "loss": 0.1489,
+      "step": 7470
+    },
+    {
+      "epoch": 0.5390526353764565,
+      "grad_norm": 0.11584921926259995,
+      "learning_rate": 0.00017844999278395152,
+      "loss": 0.1332,
+      "step": 7471
+    },
+    {
+      "epoch": 0.539124788051517,
+      "grad_norm": 0.1187252476811409,
+      "learning_rate": 0.00017844710636455479,
+      "loss": 0.1562,
+      "step": 7472
+    },
+    {
+      "epoch": 0.5391969407265774,
+      "grad_norm": 0.12253367155790329,
+      "learning_rate": 0.00017844421994515802,
+      "loss": 0.1184,
+      "step": 7473
+    },
+    {
+      "epoch": 0.5392690934016379,
+      "grad_norm": 0.11383675038814545,
+      "learning_rate": 0.0001784413335257613,
+      "loss": 0.1086,
+      "step": 7474
+    },
+    {
+      "epoch": 0.5393412460766983,
+      "grad_norm": 0.12146003544330597,
+      "learning_rate": 0.00017843844710636457,
+      "loss": 0.1607,
+      "step": 7475
+    },
+    {
+      "epoch": 0.5394133987517588,
+      "grad_norm": 0.14561280608177185,
+      "learning_rate": 0.00017843556068696784,
+      "loss": 0.1257,
+      "step": 7476
+    },
+    {
+      "epoch": 0.5394855514268192,
+      "grad_norm": 0.15061399340629578,
+      "learning_rate": 0.0001784326742675711,
+      "loss": 0.1643,
+      "step": 7477
+    },
+    {
+      "epoch": 0.5395577041018795,
+      "grad_norm": 0.13116496801376343,
+      "learning_rate": 0.00017842978784817434,
+      "loss": 0.1475,
+      "step": 7478
+    },
+    {
+      "epoch": 0.53962985677694,
+      "grad_norm": 0.1168837696313858,
+      "learning_rate": 0.0001784269014287776,
+      "loss": 0.1522,
+      "step": 7479
+    },
+    {
+      "epoch": 0.5397020094520004,
+      "grad_norm": 0.11416567862033844,
+      "learning_rate": 0.00017842401500938086,
+      "loss": 0.1505,
+      "step": 7480
+    },
+    {
+      "epoch": 0.5397741621270609,
+      "grad_norm": 0.13860619068145752,
+      "learning_rate": 0.00017842112858998415,
+      "loss": 0.145,
+      "step": 7481
+    },
+    {
+      "epoch": 0.5398463148021213,
+      "grad_norm": 0.10703293234109879,
+      "learning_rate": 0.00017841824217058741,
+      "loss": 0.1425,
+      "step": 7482
+    },
+    {
+      "epoch": 0.5399184674771818,
+      "grad_norm": 0.13385754823684692,
+      "learning_rate": 0.00017841535575119065,
+      "loss": 0.1485,
+      "step": 7483
+    },
+    {
+      "epoch": 0.5399906201522422,
+      "grad_norm": 0.11074848473072052,
+      "learning_rate": 0.0001784124693317939,
+      "loss": 0.1244,
+      "step": 7484
+    },
+    {
+      "epoch": 0.5400627728273025,
+      "grad_norm": 0.13621360063552856,
+      "learning_rate": 0.00017840958291239718,
+      "loss": 0.1685,
+      "step": 7485
+    },
+    {
+      "epoch": 0.540134925502363,
+      "grad_norm": 0.15088190138339996,
+      "learning_rate": 0.00017840669649300044,
+      "loss": 0.2403,
+      "step": 7486
+    },
+    {
+      "epoch": 0.5402070781774234,
+      "grad_norm": 0.1203557550907135,
+      "learning_rate": 0.0001784038100736037,
+      "loss": 0.1638,
+      "step": 7487
+    },
+    {
+      "epoch": 0.5402792308524839,
+      "grad_norm": 0.12648047506809235,
+      "learning_rate": 0.00017840092365420697,
+      "loss": 0.1408,
+      "step": 7488
+    },
+    {
+      "epoch": 0.5403513835275443,
+      "grad_norm": 0.12805725634098053,
+      "learning_rate": 0.00017839803723481023,
+      "loss": 0.1709,
+      "step": 7489
+    },
+    {
+      "epoch": 0.5404235362026047,
+      "grad_norm": 0.12517820298671722,
+      "learning_rate": 0.0001783951508154135,
+      "loss": 0.1697,
+      "step": 7490
+    },
+    {
+      "epoch": 0.5404956888776652,
+      "grad_norm": 0.16566172242164612,
+      "learning_rate": 0.00017839226439601675,
+      "loss": 0.1823,
+      "step": 7491
+    },
+    {
+      "epoch": 0.5405678415527255,
+      "grad_norm": 0.12278152257204056,
+      "learning_rate": 0.00017838937797662002,
+      "loss": 0.1122,
+      "step": 7492
+    },
+    {
+      "epoch": 0.540639994227786,
+      "grad_norm": 0.12212082743644714,
+      "learning_rate": 0.00017838649155722328,
+      "loss": 0.133,
+      "step": 7493
+    },
+    {
+      "epoch": 0.5407121469028464,
+      "grad_norm": 0.13311481475830078,
+      "learning_rate": 0.00017838360513782652,
+      "loss": 0.1068,
+      "step": 7494
+    },
+    {
+      "epoch": 0.5407842995779069,
+      "grad_norm": 0.10868926346302032,
+      "learning_rate": 0.0001783807187184298,
+      "loss": 0.1112,
+      "step": 7495
+    },
+    {
+      "epoch": 0.5408564522529673,
+      "grad_norm": 0.11439421772956848,
+      "learning_rate": 0.00017837783229903307,
+      "loss": 0.1431,
+      "step": 7496
+    },
+    {
+      "epoch": 0.5409286049280277,
+      "grad_norm": 0.1813734471797943,
+      "learning_rate": 0.00017837494587963633,
+      "loss": 0.1313,
+      "step": 7497
+    },
+    {
+      "epoch": 0.5410007576030882,
+      "grad_norm": 0.12070521712303162,
+      "learning_rate": 0.0001783720594602396,
+      "loss": 0.1214,
+      "step": 7498
+    },
+    {
+      "epoch": 0.5410729102781485,
+      "grad_norm": 0.12103161960840225,
+      "learning_rate": 0.00017836917304084283,
+      "loss": 0.1357,
+      "step": 7499
+    },
+    {
+      "epoch": 0.541145062953209,
+      "grad_norm": 0.11779063194990158,
+      "learning_rate": 0.0001783662866214461,
+      "loss": 0.1659,
+      "step": 7500
+    },
+    {
+      "epoch": 0.5412172156282694,
+      "grad_norm": 0.10207480937242508,
+      "learning_rate": 0.00017836340020204936,
+      "loss": 0.1414,
+      "step": 7501
+    },
+    {
+      "epoch": 0.5412893683033299,
+      "grad_norm": 0.15236796438694,
+      "learning_rate": 0.00017836051378265265,
+      "loss": 0.1169,
+      "step": 7502
+    },
+    {
+      "epoch": 0.5413615209783903,
+      "grad_norm": 0.12786640226840973,
+      "learning_rate": 0.0001783576273632559,
+      "loss": 0.1719,
+      "step": 7503
+    },
+    {
+      "epoch": 0.5414336736534507,
+      "grad_norm": 0.12417449802160263,
+      "learning_rate": 0.00017835474094385914,
+      "loss": 0.1625,
+      "step": 7504
+    },
+    {
+      "epoch": 0.5415058263285111,
+      "grad_norm": 0.11570677161216736,
+      "learning_rate": 0.0001783518545244624,
+      "loss": 0.1395,
+      "step": 7505
+    },
+    {
+      "epoch": 0.5415779790035715,
+      "grad_norm": 0.10178045928478241,
+      "learning_rate": 0.00017834896810506567,
+      "loss": 0.1621,
+      "step": 7506
+    },
+    {
+      "epoch": 0.541650131678632,
+      "grad_norm": 0.13424795866012573,
+      "learning_rate": 0.00017834608168566893,
+      "loss": 0.1552,
+      "step": 7507
+    },
+    {
+      "epoch": 0.5417222843536924,
+      "grad_norm": 0.15743964910507202,
+      "learning_rate": 0.0001783431952662722,
+      "loss": 0.1692,
+      "step": 7508
+    },
+    {
+      "epoch": 0.5417944370287529,
+      "grad_norm": 0.1120472252368927,
+      "learning_rate": 0.00017834030884687546,
+      "loss": 0.1583,
+      "step": 7509
+    },
+    {
+      "epoch": 0.5418665897038133,
+      "grad_norm": 0.13689734041690826,
+      "learning_rate": 0.00017833742242747872,
+      "loss": 0.1499,
+      "step": 7510
+    },
+    {
+      "epoch": 0.5419387423788737,
+      "grad_norm": 0.10957453399896622,
+      "learning_rate": 0.00017833453600808199,
+      "loss": 0.143,
+      "step": 7511
+    },
+    {
+      "epoch": 0.5420108950539341,
+      "grad_norm": 0.1020032986998558,
+      "learning_rate": 0.00017833164958868525,
+      "loss": 0.1344,
+      "step": 7512
+    },
+    {
+      "epoch": 0.5420830477289945,
+      "grad_norm": 0.10065440088510513,
+      "learning_rate": 0.0001783287631692885,
+      "loss": 0.1309,
+      "step": 7513
+    },
+    {
+      "epoch": 0.542155200404055,
+      "grad_norm": 0.10141412913799286,
+      "learning_rate": 0.00017832587674989177,
+      "loss": 0.1971,
+      "step": 7514
+    },
+    {
+      "epoch": 0.5422273530791154,
+      "grad_norm": 0.10940968990325928,
+      "learning_rate": 0.000178322990330495,
+      "loss": 0.2016,
+      "step": 7515
+    },
+    {
+      "epoch": 0.5422995057541758,
+      "grad_norm": 0.12552902102470398,
+      "learning_rate": 0.0001783201039110983,
+      "loss": 0.2007,
+      "step": 7516
+    },
+    {
+      "epoch": 0.5423716584292363,
+      "grad_norm": 0.11455725878477097,
+      "learning_rate": 0.00017831721749170156,
+      "loss": 0.1712,
+      "step": 7517
+    },
+    {
+      "epoch": 0.5424438111042967,
+      "grad_norm": 0.12817665934562683,
+      "learning_rate": 0.00017831433107230483,
+      "loss": 0.1344,
+      "step": 7518
+    },
+    {
+      "epoch": 0.5425159637793571,
+      "grad_norm": 0.11515673249959946,
+      "learning_rate": 0.0001783114446529081,
+      "loss": 0.1457,
+      "step": 7519
+    },
+    {
+      "epoch": 0.5425881164544175,
+      "grad_norm": 0.14225012063980103,
+      "learning_rate": 0.00017830855823351132,
+      "loss": 0.2118,
+      "step": 7520
+    },
+    {
+      "epoch": 0.542660269129478,
+      "grad_norm": 0.13224397599697113,
+      "learning_rate": 0.0001783056718141146,
+      "loss": 0.1475,
+      "step": 7521
+    },
+    {
+      "epoch": 0.5427324218045384,
+      "grad_norm": 0.1179685965180397,
+      "learning_rate": 0.00017830278539471785,
+      "loss": 0.167,
+      "step": 7522
+    },
+    {
+      "epoch": 0.5428045744795988,
+      "grad_norm": 0.15331527590751648,
+      "learning_rate": 0.00017829989897532114,
+      "loss": 0.1492,
+      "step": 7523
+    },
+    {
+      "epoch": 0.5428767271546593,
+      "grad_norm": 0.11871089786291122,
+      "learning_rate": 0.0001782970125559244,
+      "loss": 0.1001,
+      "step": 7524
+    },
+    {
+      "epoch": 0.5429488798297197,
+      "grad_norm": 0.10068800300359726,
+      "learning_rate": 0.00017829412613652764,
+      "loss": 0.1689,
+      "step": 7525
+    },
+    {
+      "epoch": 0.5430210325047801,
+      "grad_norm": 0.15152029693126678,
+      "learning_rate": 0.0001782912397171309,
+      "loss": 0.1281,
+      "step": 7526
+    },
+    {
+      "epoch": 0.5430931851798405,
+      "grad_norm": 0.13519147038459778,
+      "learning_rate": 0.00017828835329773416,
+      "loss": 0.1416,
+      "step": 7527
+    },
+    {
+      "epoch": 0.543165337854901,
+      "grad_norm": 0.13669776916503906,
+      "learning_rate": 0.00017828546687833743,
+      "loss": 0.1426,
+      "step": 7528
+    },
+    {
+      "epoch": 0.5432374905299614,
+      "grad_norm": 0.1263340562582016,
+      "learning_rate": 0.0001782825804589407,
+      "loss": 0.1462,
+      "step": 7529
+    },
+    {
+      "epoch": 0.5433096432050218,
+      "grad_norm": 0.1487351357936859,
+      "learning_rate": 0.00017827969403954395,
+      "loss": 0.2028,
+      "step": 7530
+    },
+    {
+      "epoch": 0.5433817958800823,
+      "grad_norm": 0.13854576647281647,
+      "learning_rate": 0.00017827680762014722,
+      "loss": 0.1274,
+      "step": 7531
+    },
+    {
+      "epoch": 0.5434539485551427,
+      "grad_norm": 0.111342653632164,
+      "learning_rate": 0.00017827392120075048,
+      "loss": 0.138,
+      "step": 7532
+    },
+    {
+      "epoch": 0.5435261012302031,
+      "grad_norm": 0.15238115191459656,
+      "learning_rate": 0.00017827103478135374,
+      "loss": 0.1189,
+      "step": 7533
+    },
+    {
+      "epoch": 0.5435982539052635,
+      "grad_norm": 0.1749115288257599,
+      "learning_rate": 0.000178268148361957,
+      "loss": 0.1141,
+      "step": 7534
+    },
+    {
+      "epoch": 0.543670406580324,
+      "grad_norm": 0.13412636518478394,
+      "learning_rate": 0.00017826526194256027,
+      "loss": 0.1245,
+      "step": 7535
+    },
+    {
+      "epoch": 0.5437425592553844,
+      "grad_norm": 0.13132649660110474,
+      "learning_rate": 0.0001782623755231635,
+      "loss": 0.18,
+      "step": 7536
+    },
+    {
+      "epoch": 0.5438147119304448,
+      "grad_norm": 0.11912304908037186,
+      "learning_rate": 0.0001782594891037668,
+      "loss": 0.1517,
+      "step": 7537
+    },
+    {
+      "epoch": 0.5438868646055053,
+      "grad_norm": 0.16066962480545044,
+      "learning_rate": 0.00017825660268437006,
+      "loss": 0.1757,
+      "step": 7538
+    },
+    {
+      "epoch": 0.5439590172805657,
+      "grad_norm": 0.12418557703495026,
+      "learning_rate": 0.00017825371626497332,
+      "loss": 0.0902,
+      "step": 7539
+    },
+    {
+      "epoch": 0.5440311699556261,
+      "grad_norm": 0.11531732976436615,
+      "learning_rate": 0.00017825082984557658,
+      "loss": 0.1782,
+      "step": 7540
+    },
+    {
+      "epoch": 0.5441033226306865,
+      "grad_norm": 0.11655911058187485,
+      "learning_rate": 0.00017824794342617982,
+      "loss": 0.1245,
+      "step": 7541
+    },
+    {
+      "epoch": 0.544175475305747,
+      "grad_norm": 0.11822719871997833,
+      "learning_rate": 0.00017824505700678308,
+      "loss": 0.1253,
+      "step": 7542
+    },
+    {
+      "epoch": 0.5442476279808074,
+      "grad_norm": 0.13040819764137268,
+      "learning_rate": 0.00017824217058738634,
+      "loss": 0.1286,
+      "step": 7543
+    },
+    {
+      "epoch": 0.5443197806558678,
+      "grad_norm": 0.12679016590118408,
+      "learning_rate": 0.0001782392841679896,
+      "loss": 0.1337,
+      "step": 7544
+    },
+    {
+      "epoch": 0.5443919333309283,
+      "grad_norm": 0.11832067370414734,
+      "learning_rate": 0.0001782363977485929,
+      "loss": 0.118,
+      "step": 7545
+    },
+    {
+      "epoch": 0.5444640860059887,
+      "grad_norm": 0.12346930801868439,
+      "learning_rate": 0.00017823351132919613,
+      "loss": 0.1466,
+      "step": 7546
+    },
+    {
+      "epoch": 0.5445362386810491,
+      "grad_norm": 0.1250762641429901,
+      "learning_rate": 0.0001782306249097994,
+      "loss": 0.1432,
+      "step": 7547
+    },
+    {
+      "epoch": 0.5446083913561095,
+      "grad_norm": 0.11732055991888046,
+      "learning_rate": 0.00017822773849040266,
+      "loss": 0.1582,
+      "step": 7548
+    },
+    {
+      "epoch": 0.5446805440311699,
+      "grad_norm": 0.16200515627861023,
+      "learning_rate": 0.00017822485207100592,
+      "loss": 0.1538,
+      "step": 7549
+    },
+    {
+      "epoch": 0.5447526967062304,
+      "grad_norm": 0.15169191360473633,
+      "learning_rate": 0.00017822196565160918,
+      "loss": 0.1744,
+      "step": 7550
+    },
+    {
+      "epoch": 0.5448248493812908,
+      "grad_norm": 0.1553208976984024,
+      "learning_rate": 0.00017821907923221245,
+      "loss": 0.1946,
+      "step": 7551
+    },
+    {
+      "epoch": 0.5448970020563513,
+      "grad_norm": 0.10830380022525787,
+      "learning_rate": 0.0001782161928128157,
+      "loss": 0.1781,
+      "step": 7552
+    },
+    {
+      "epoch": 0.5449691547314117,
+      "grad_norm": 0.11492560058832169,
+      "learning_rate": 0.00017821330639341897,
+      "loss": 0.1468,
+      "step": 7553
+    },
+    {
+      "epoch": 0.545041307406472,
+      "grad_norm": 0.10390572249889374,
+      "learning_rate": 0.00017821041997402224,
+      "loss": 0.1941,
+      "step": 7554
+    },
+    {
+      "epoch": 0.5451134600815325,
+      "grad_norm": 0.1058596521615982,
+      "learning_rate": 0.0001782075335546255,
+      "loss": 0.1417,
+      "step": 7555
+    },
+    {
+      "epoch": 0.5451856127565929,
+      "grad_norm": 0.11372096836566925,
+      "learning_rate": 0.00017820464713522876,
+      "loss": 0.0962,
+      "step": 7556
+    },
+    {
+      "epoch": 0.5452577654316534,
+      "grad_norm": 0.1294596791267395,
+      "learning_rate": 0.000178201760715832,
+      "loss": 0.1438,
+      "step": 7557
+    },
+    {
+      "epoch": 0.5453299181067138,
+      "grad_norm": 0.11079258471727371,
+      "learning_rate": 0.00017819887429643526,
+      "loss": 0.1097,
+      "step": 7558
+    },
+    {
+      "epoch": 0.5454020707817743,
+      "grad_norm": 0.11069349944591522,
+      "learning_rate": 0.00017819598787703855,
+      "loss": 0.1254,
+      "step": 7559
+    },
+    {
+      "epoch": 0.5454742234568347,
+      "grad_norm": 0.12368962913751602,
+      "learning_rate": 0.00017819310145764181,
+      "loss": 0.143,
+      "step": 7560
+    },
+    {
+      "epoch": 0.545546376131895,
+      "grad_norm": 0.1657244712114334,
+      "learning_rate": 0.00017819021503824508,
+      "loss": 0.1468,
+      "step": 7561
+    },
+    {
+      "epoch": 0.5456185288069555,
+      "grad_norm": 0.1279374361038208,
+      "learning_rate": 0.0001781873286188483,
+      "loss": 0.1696,
+      "step": 7562
+    },
+    {
+      "epoch": 0.5456906814820159,
+      "grad_norm": 0.12708182632923126,
+      "learning_rate": 0.00017818444219945158,
+      "loss": 0.1597,
+      "step": 7563
+    },
+    {
+      "epoch": 0.5457628341570764,
+      "grad_norm": 0.11969926208257675,
+      "learning_rate": 0.00017818155578005484,
+      "loss": 0.1275,
+      "step": 7564
+    },
+    {
+      "epoch": 0.5458349868321368,
+      "grad_norm": 0.10338714718818665,
+      "learning_rate": 0.0001781786693606581,
+      "loss": 0.1419,
+      "step": 7565
+    },
+    {
+      "epoch": 0.5459071395071973,
+      "grad_norm": 0.11943601071834564,
+      "learning_rate": 0.0001781757829412614,
+      "loss": 0.1026,
+      "step": 7566
+    },
+    {
+      "epoch": 0.5459792921822576,
+      "grad_norm": 0.10397817194461823,
+      "learning_rate": 0.00017817289652186463,
+      "loss": 0.1125,
+      "step": 7567
+    },
+    {
+      "epoch": 0.546051444857318,
+      "grad_norm": 0.11815197020769119,
+      "learning_rate": 0.0001781700101024679,
+      "loss": 0.1846,
+      "step": 7568
+    },
+    {
+      "epoch": 0.5461235975323785,
+      "grad_norm": 0.11354372650384903,
+      "learning_rate": 0.00017816712368307115,
+      "loss": 0.1755,
+      "step": 7569
+    },
+    {
+      "epoch": 0.5461957502074389,
+      "grad_norm": 0.11804115772247314,
+      "learning_rate": 0.00017816423726367442,
+      "loss": 0.1541,
+      "step": 7570
+    },
+    {
+      "epoch": 0.5462679028824994,
+      "grad_norm": 0.12533968687057495,
+      "learning_rate": 0.00017816135084427768,
+      "loss": 0.1538,
+      "step": 7571
+    },
+    {
+      "epoch": 0.5463400555575598,
+      "grad_norm": 0.09542267769575119,
+      "learning_rate": 0.00017815846442488094,
+      "loss": 0.1612,
+      "step": 7572
+    },
+    {
+      "epoch": 0.5464122082326203,
+      "grad_norm": 0.11053860932588577,
+      "learning_rate": 0.0001781555780054842,
+      "loss": 0.1906,
+      "step": 7573
+    },
+    {
+      "epoch": 0.5464843609076806,
+      "grad_norm": 0.1260577142238617,
+      "learning_rate": 0.00017815269158608747,
+      "loss": 0.184,
+      "step": 7574
+    },
+    {
+      "epoch": 0.546556513582741,
+      "grad_norm": 0.11573726683855057,
+      "learning_rate": 0.00017814980516669073,
+      "loss": 0.143,
+      "step": 7575
+    },
+    {
+      "epoch": 0.5466286662578015,
+      "grad_norm": 0.16125363111495972,
+      "learning_rate": 0.000178146918747294,
+      "loss": 0.1601,
+      "step": 7576
+    },
+    {
+      "epoch": 0.5467008189328619,
+      "grad_norm": 0.1232057511806488,
+      "learning_rate": 0.00017814403232789726,
+      "loss": 0.1597,
+      "step": 7577
+    },
+    {
+      "epoch": 0.5467729716079224,
+      "grad_norm": 0.11699820309877396,
+      "learning_rate": 0.00017814114590850052,
+      "loss": 0.1498,
+      "step": 7578
+    },
+    {
+      "epoch": 0.5468451242829828,
+      "grad_norm": 0.11260601133108139,
+      "learning_rate": 0.00017813825948910376,
+      "loss": 0.1171,
+      "step": 7579
+    },
+    {
+      "epoch": 0.5469172769580433,
+      "grad_norm": 0.10898337513208389,
+      "learning_rate": 0.00017813537306970705,
+      "loss": 0.1352,
+      "step": 7580
+    },
+    {
+      "epoch": 0.5469894296331036,
+      "grad_norm": 0.12603630125522614,
+      "learning_rate": 0.0001781324866503103,
+      "loss": 0.1892,
+      "step": 7581
+    },
+    {
+      "epoch": 0.547061582308164,
+      "grad_norm": 0.1058061271905899,
+      "learning_rate": 0.00017812960023091357,
+      "loss": 0.2125,
+      "step": 7582
+    },
+    {
+      "epoch": 0.5471337349832245,
+      "grad_norm": 0.11850077658891678,
+      "learning_rate": 0.00017812671381151683,
+      "loss": 0.1416,
+      "step": 7583
+    },
+    {
+      "epoch": 0.5472058876582849,
+      "grad_norm": 0.11276722699403763,
+      "learning_rate": 0.00017812382739212007,
+      "loss": 0.1597,
+      "step": 7584
+    },
+    {
+      "epoch": 0.5472780403333454,
+      "grad_norm": 0.15107154846191406,
+      "learning_rate": 0.00017812094097272333,
+      "loss": 0.1779,
+      "step": 7585
+    },
+    {
+      "epoch": 0.5473501930084058,
+      "grad_norm": 0.10331179201602936,
+      "learning_rate": 0.0001781180545533266,
+      "loss": 0.1795,
+      "step": 7586
+    },
+    {
+      "epoch": 0.5474223456834663,
+      "grad_norm": 0.13958831131458282,
+      "learning_rate": 0.00017811516813392989,
+      "loss": 0.1666,
+      "step": 7587
+    },
+    {
+      "epoch": 0.5474944983585266,
+      "grad_norm": 0.11555318534374237,
+      "learning_rate": 0.00017811228171453315,
+      "loss": 0.1879,
+      "step": 7588
+    },
+    {
+      "epoch": 0.547566651033587,
+      "grad_norm": 0.13235783576965332,
+      "learning_rate": 0.00017810939529513638,
+      "loss": 0.1313,
+      "step": 7589
+    },
+    {
+      "epoch": 0.5476388037086475,
+      "grad_norm": 0.14834065735340118,
+      "learning_rate": 0.00017810650887573965,
+      "loss": 0.1452,
+      "step": 7590
+    },
+    {
+      "epoch": 0.5477109563837079,
+      "grad_norm": 0.14114947617053986,
+      "learning_rate": 0.0001781036224563429,
+      "loss": 0.1599,
+      "step": 7591
+    },
+    {
+      "epoch": 0.5477831090587684,
+      "grad_norm": 0.1421668529510498,
+      "learning_rate": 0.00017810073603694617,
+      "loss": 0.1575,
+      "step": 7592
+    },
+    {
+      "epoch": 0.5478552617338288,
+      "grad_norm": 0.1561838686466217,
+      "learning_rate": 0.00017809784961754944,
+      "loss": 0.1774,
+      "step": 7593
+    },
+    {
+      "epoch": 0.5479274144088893,
+      "grad_norm": 0.12360503524541855,
+      "learning_rate": 0.0001780949631981527,
+      "loss": 0.1588,
+      "step": 7594
+    },
+    {
+      "epoch": 0.5479995670839496,
+      "grad_norm": 0.11868561804294586,
+      "learning_rate": 0.00017809207677875596,
+      "loss": 0.1474,
+      "step": 7595
+    },
+    {
+      "epoch": 0.54807171975901,
+      "grad_norm": 0.09299908578395844,
+      "learning_rate": 0.00017808919035935923,
+      "loss": 0.1211,
+      "step": 7596
+    },
+    {
+      "epoch": 0.5481438724340705,
+      "grad_norm": 0.14525295794010162,
+      "learning_rate": 0.0001780863039399625,
+      "loss": 0.2189,
+      "step": 7597
+    },
+    {
+      "epoch": 0.5482160251091309,
+      "grad_norm": 0.1374361664056778,
+      "learning_rate": 0.00017808341752056575,
+      "loss": 0.1785,
+      "step": 7598
+    },
+    {
+      "epoch": 0.5482881777841914,
+      "grad_norm": 0.10330330580472946,
+      "learning_rate": 0.00017808053110116901,
+      "loss": 0.1544,
+      "step": 7599
+    },
+    {
+      "epoch": 0.5483603304592518,
+      "grad_norm": 0.10700894892215729,
+      "learning_rate": 0.00017807764468177225,
+      "loss": 0.1815,
+      "step": 7600
+    },
+    {
+      "epoch": 0.5484324831343123,
+      "grad_norm": 0.11932407319545746,
+      "learning_rate": 0.00017807475826237554,
+      "loss": 0.1836,
+      "step": 7601
+    },
+    {
+      "epoch": 0.5485046358093726,
+      "grad_norm": 0.11152351647615433,
+      "learning_rate": 0.0001780718718429788,
+      "loss": 0.1673,
+      "step": 7602
+    },
+    {
+      "epoch": 0.548576788484433,
+      "grad_norm": 0.11458411812782288,
+      "learning_rate": 0.00017806898542358207,
+      "loss": 0.1754,
+      "step": 7603
+    },
+    {
+      "epoch": 0.5486489411594935,
+      "grad_norm": 0.1215558722615242,
+      "learning_rate": 0.00017806609900418533,
+      "loss": 0.148,
+      "step": 7604
+    },
+    {
+      "epoch": 0.5487210938345539,
+      "grad_norm": 0.1182718351483345,
+      "learning_rate": 0.00017806321258478856,
+      "loss": 0.1805,
+      "step": 7605
+    },
+    {
+      "epoch": 0.5487932465096144,
+      "grad_norm": 0.13227002322673798,
+      "learning_rate": 0.00017806032616539183,
+      "loss": 0.1678,
+      "step": 7606
+    },
+    {
+      "epoch": 0.5488653991846748,
+      "grad_norm": 0.09978866577148438,
+      "learning_rate": 0.0001780574397459951,
+      "loss": 0.1455,
+      "step": 7607
+    },
+    {
+      "epoch": 0.5489375518597353,
+      "grad_norm": 0.1297626942396164,
+      "learning_rate": 0.00017805455332659838,
+      "loss": 0.1643,
+      "step": 7608
+    },
+    {
+      "epoch": 0.5490097045347956,
+      "grad_norm": 0.13307395577430725,
+      "learning_rate": 0.00017805166690720164,
+      "loss": 0.1121,
+      "step": 7609
+    },
+    {
+      "epoch": 0.549081857209856,
+      "grad_norm": 0.09766818583011627,
+      "learning_rate": 0.00017804878048780488,
+      "loss": 0.182,
+      "step": 7610
+    },
+    {
+      "epoch": 0.5491540098849165,
+      "grad_norm": 0.1137261837720871,
+      "learning_rate": 0.00017804589406840814,
+      "loss": 0.142,
+      "step": 7611
+    },
+    {
+      "epoch": 0.5492261625599769,
+      "grad_norm": 0.1196482926607132,
+      "learning_rate": 0.0001780430076490114,
+      "loss": 0.1306,
+      "step": 7612
+    },
+    {
+      "epoch": 0.5492983152350374,
+      "grad_norm": 0.11196993291378021,
+      "learning_rate": 0.00017804012122961467,
+      "loss": 0.1153,
+      "step": 7613
+    },
+    {
+      "epoch": 0.5493704679100978,
+      "grad_norm": 0.10517489910125732,
+      "learning_rate": 0.00017803723481021793,
+      "loss": 0.1291,
+      "step": 7614
+    },
+    {
+      "epoch": 0.5494426205851582,
+      "grad_norm": 0.1116122379899025,
+      "learning_rate": 0.0001780343483908212,
+      "loss": 0.1371,
+      "step": 7615
+    },
+    {
+      "epoch": 0.5495147732602186,
+      "grad_norm": 0.21631592512130737,
+      "learning_rate": 0.00017803146197142446,
+      "loss": 0.1495,
+      "step": 7616
+    },
+    {
+      "epoch": 0.549586925935279,
+      "grad_norm": 0.13187208771705627,
+      "learning_rate": 0.00017802857555202772,
+      "loss": 0.1828,
+      "step": 7617
+    },
+    {
+      "epoch": 0.5496590786103395,
+      "grad_norm": 0.10498936474323273,
+      "learning_rate": 0.00017802568913263098,
+      "loss": 0.1785,
+      "step": 7618
+    },
+    {
+      "epoch": 0.5497312312853999,
+      "grad_norm": 0.10033317655324936,
+      "learning_rate": 0.00017802280271323425,
+      "loss": 0.19,
+      "step": 7619
+    },
+    {
+      "epoch": 0.5498033839604604,
+      "grad_norm": 0.11087717115879059,
+      "learning_rate": 0.0001780199162938375,
+      "loss": 0.1631,
+      "step": 7620
+    },
+    {
+      "epoch": 0.5498755366355208,
+      "grad_norm": 0.15313465893268585,
+      "learning_rate": 0.00017801702987444074,
+      "loss": 0.1384,
+      "step": 7621
+    },
+    {
+      "epoch": 0.5499476893105812,
+      "grad_norm": 0.12790971994400024,
+      "learning_rate": 0.00017801414345504403,
+      "loss": 0.1112,
+      "step": 7622
+    },
+    {
+      "epoch": 0.5500198419856416,
+      "grad_norm": 0.1123102530837059,
+      "learning_rate": 0.0001780112570356473,
+      "loss": 0.1493,
+      "step": 7623
+    },
+    {
+      "epoch": 0.550091994660702,
+      "grad_norm": 0.1404675394296646,
+      "learning_rate": 0.00017800837061625056,
+      "loss": 0.1352,
+      "step": 7624
+    },
+    {
+      "epoch": 0.5501641473357625,
+      "grad_norm": 0.12286730110645294,
+      "learning_rate": 0.00017800548419685382,
+      "loss": 0.1422,
+      "step": 7625
+    },
+    {
+      "epoch": 0.5502363000108229,
+      "grad_norm": 0.15925469994544983,
+      "learning_rate": 0.00017800259777745706,
+      "loss": 0.1666,
+      "step": 7626
+    },
+    {
+      "epoch": 0.5503084526858834,
+      "grad_norm": 0.1199231743812561,
+      "learning_rate": 0.00017799971135806032,
+      "loss": 0.1769,
+      "step": 7627
+    },
+    {
+      "epoch": 0.5503806053609438,
+      "grad_norm": 0.11914505064487457,
+      "learning_rate": 0.00017799682493866358,
+      "loss": 0.1695,
+      "step": 7628
+    },
+    {
+      "epoch": 0.5504527580360041,
+      "grad_norm": 0.13842357695102692,
+      "learning_rate": 0.00017799393851926687,
+      "loss": 0.1644,
+      "step": 7629
+    },
+    {
+      "epoch": 0.5505249107110646,
+      "grad_norm": 0.10801035910844803,
+      "learning_rate": 0.00017799105209987014,
+      "loss": 0.2006,
+      "step": 7630
+    },
+    {
+      "epoch": 0.550597063386125,
+      "grad_norm": 0.12393604218959808,
+      "learning_rate": 0.00017798816568047337,
+      "loss": 0.1494,
+      "step": 7631
+    },
+    {
+      "epoch": 0.5506692160611855,
+      "grad_norm": 0.13326948881149292,
+      "learning_rate": 0.00017798527926107664,
+      "loss": 0.1248,
+      "step": 7632
+    },
+    {
+      "epoch": 0.5507413687362459,
+      "grad_norm": 0.11505146324634552,
+      "learning_rate": 0.0001779823928416799,
+      "loss": 0.1555,
+      "step": 7633
+    },
+    {
+      "epoch": 0.5508135214113064,
+      "grad_norm": 0.1264219731092453,
+      "learning_rate": 0.00017797950642228316,
+      "loss": 0.1262,
+      "step": 7634
+    },
+    {
+      "epoch": 0.5508856740863668,
+      "grad_norm": 0.12235382199287415,
+      "learning_rate": 0.00017797662000288642,
+      "loss": 0.1525,
+      "step": 7635
+    },
+    {
+      "epoch": 0.5509578267614271,
+      "grad_norm": 0.11811922490596771,
+      "learning_rate": 0.0001779737335834897,
+      "loss": 0.1316,
+      "step": 7636
+    },
+    {
+      "epoch": 0.5510299794364876,
+      "grad_norm": 0.12360856682062149,
+      "learning_rate": 0.00017797084716409295,
+      "loss": 0.1944,
+      "step": 7637
+    },
+    {
+      "epoch": 0.551102132111548,
+      "grad_norm": 0.16416537761688232,
+      "learning_rate": 0.00017796796074469621,
+      "loss": 0.1796,
+      "step": 7638
+    },
+    {
+      "epoch": 0.5511742847866085,
+      "grad_norm": 0.15926092863082886,
+      "learning_rate": 0.00017796507432529948,
+      "loss": 0.1256,
+      "step": 7639
+    },
+    {
+      "epoch": 0.5512464374616689,
+      "grad_norm": 0.13481405377388,
+      "learning_rate": 0.00017796218790590274,
+      "loss": 0.1616,
+      "step": 7640
+    },
+    {
+      "epoch": 0.5513185901367293,
+      "grad_norm": 0.1308879256248474,
+      "learning_rate": 0.000177959301486506,
+      "loss": 0.1884,
+      "step": 7641
+    },
+    {
+      "epoch": 0.5513907428117898,
+      "grad_norm": 0.13799186050891876,
+      "learning_rate": 0.00017795641506710924,
+      "loss": 0.1155,
+      "step": 7642
+    },
+    {
+      "epoch": 0.5514628954868501,
+      "grad_norm": 0.11589345335960388,
+      "learning_rate": 0.00017795352864771253,
+      "loss": 0.1686,
+      "step": 7643
+    },
+    {
+      "epoch": 0.5515350481619106,
+      "grad_norm": 0.10249035060405731,
+      "learning_rate": 0.0001779506422283158,
+      "loss": 0.0997,
+      "step": 7644
+    },
+    {
+      "epoch": 0.551607200836971,
+      "grad_norm": 0.12435011565685272,
+      "learning_rate": 0.00017794775580891905,
+      "loss": 0.1612,
+      "step": 7645
+    },
+    {
+      "epoch": 0.5516793535120315,
+      "grad_norm": 0.14909322559833527,
+      "learning_rate": 0.00017794486938952232,
+      "loss": 0.1083,
+      "step": 7646
+    },
+    {
+      "epoch": 0.5517515061870919,
+      "grad_norm": 0.1289183795452118,
+      "learning_rate": 0.00017794198297012555,
+      "loss": 0.1677,
+      "step": 7647
+    },
+    {
+      "epoch": 0.5518236588621523,
+      "grad_norm": 0.10382948070764542,
+      "learning_rate": 0.00017793909655072882,
+      "loss": 0.091,
+      "step": 7648
+    },
+    {
+      "epoch": 0.5518958115372128,
+      "grad_norm": 0.11587633937597275,
+      "learning_rate": 0.00017793621013133208,
+      "loss": 0.132,
+      "step": 7649
+    },
+    {
+      "epoch": 0.5519679642122731,
+      "grad_norm": 0.15552467107772827,
+      "learning_rate": 0.00017793332371193537,
+      "loss": 0.1311,
+      "step": 7650
+    },
+    {
+      "epoch": 0.5520401168873336,
+      "grad_norm": 0.13075612485408783,
+      "learning_rate": 0.00017793043729253863,
+      "loss": 0.1812,
+      "step": 7651
+    },
+    {
+      "epoch": 0.552112269562394,
+      "grad_norm": 0.10492058098316193,
+      "learning_rate": 0.00017792755087314187,
+      "loss": 0.1694,
+      "step": 7652
+    },
+    {
+      "epoch": 0.5521844222374545,
+      "grad_norm": 0.10020974278450012,
+      "learning_rate": 0.00017792466445374513,
+      "loss": 0.1825,
+      "step": 7653
+    },
+    {
+      "epoch": 0.5522565749125149,
+      "grad_norm": 0.12632660567760468,
+      "learning_rate": 0.0001779217780343484,
+      "loss": 0.1432,
+      "step": 7654
+    },
+    {
+      "epoch": 0.5523287275875753,
+      "grad_norm": 0.10315108299255371,
+      "learning_rate": 0.00017791889161495166,
+      "loss": 0.1665,
+      "step": 7655
+    },
+    {
+      "epoch": 0.5524008802626358,
+      "grad_norm": 0.12094546854496002,
+      "learning_rate": 0.00017791600519555492,
+      "loss": 0.1668,
+      "step": 7656
+    },
+    {
+      "epoch": 0.5524730329376961,
+      "grad_norm": 0.09945043921470642,
+      "learning_rate": 0.00017791311877615818,
+      "loss": 0.1524,
+      "step": 7657
+    },
+    {
+      "epoch": 0.5525451856127566,
+      "grad_norm": 0.11824460327625275,
+      "learning_rate": 0.00017791023235676145,
+      "loss": 0.1465,
+      "step": 7658
+    },
+    {
+      "epoch": 0.552617338287817,
+      "grad_norm": 0.11674058437347412,
+      "learning_rate": 0.0001779073459373647,
+      "loss": 0.1636,
+      "step": 7659
+    },
+    {
+      "epoch": 0.5526894909628774,
+      "grad_norm": 0.18679635226726532,
+      "learning_rate": 0.00017790445951796797,
+      "loss": 0.1319,
+      "step": 7660
+    },
+    {
+      "epoch": 0.5527616436379379,
+      "grad_norm": 0.11995816230773926,
+      "learning_rate": 0.00017790157309857123,
+      "loss": 0.1507,
+      "step": 7661
+    },
+    {
+      "epoch": 0.5528337963129983,
+      "grad_norm": 0.11749674379825592,
+      "learning_rate": 0.0001778986866791745,
+      "loss": 0.1657,
+      "step": 7662
+    },
+    {
+      "epoch": 0.5529059489880588,
+      "grad_norm": 0.10846541821956635,
+      "learning_rate": 0.00017789580025977773,
+      "loss": 0.1576,
+      "step": 7663
+    },
+    {
+      "epoch": 0.5529781016631191,
+      "grad_norm": 0.1053551658987999,
+      "learning_rate": 0.00017789291384038102,
+      "loss": 0.1576,
+      "step": 7664
+    },
+    {
+      "epoch": 0.5530502543381796,
+      "grad_norm": 0.1170341745018959,
+      "learning_rate": 0.00017789002742098429,
+      "loss": 0.1297,
+      "step": 7665
+    },
+    {
+      "epoch": 0.55312240701324,
+      "grad_norm": 0.15445485711097717,
+      "learning_rate": 0.00017788714100158755,
+      "loss": 0.1411,
+      "step": 7666
+    },
+    {
+      "epoch": 0.5531945596883004,
+      "grad_norm": 0.11116831749677658,
+      "learning_rate": 0.0001778842545821908,
+      "loss": 0.1496,
+      "step": 7667
+    },
+    {
+      "epoch": 0.5532667123633609,
+      "grad_norm": 0.10674598067998886,
+      "learning_rate": 0.00017788136816279405,
+      "loss": 0.1317,
+      "step": 7668
+    },
+    {
+      "epoch": 0.5533388650384213,
+      "grad_norm": 0.11206822097301483,
+      "learning_rate": 0.0001778784817433973,
+      "loss": 0.1271,
+      "step": 7669
+    },
+    {
+      "epoch": 0.5534110177134818,
+      "grad_norm": 0.18465600907802582,
+      "learning_rate": 0.00017787559532400057,
+      "loss": 0.199,
+      "step": 7670
+    },
+    {
+      "epoch": 0.5534831703885421,
+      "grad_norm": 0.1241191029548645,
+      "learning_rate": 0.00017787270890460386,
+      "loss": 0.1434,
+      "step": 7671
+    },
+    {
+      "epoch": 0.5535553230636026,
+      "grad_norm": 0.1338217556476593,
+      "learning_rate": 0.00017786982248520713,
+      "loss": 0.1697,
+      "step": 7672
+    },
+    {
+      "epoch": 0.553627475738663,
+      "grad_norm": 0.1337876319885254,
+      "learning_rate": 0.00017786693606581036,
+      "loss": 0.1513,
+      "step": 7673
+    },
+    {
+      "epoch": 0.5536996284137234,
+      "grad_norm": 0.11135462671518326,
+      "learning_rate": 0.00017786404964641362,
+      "loss": 0.1336,
+      "step": 7674
+    },
+    {
+      "epoch": 0.5537717810887839,
+      "grad_norm": 0.11627933382987976,
+      "learning_rate": 0.0001778611632270169,
+      "loss": 0.1688,
+      "step": 7675
+    },
+    {
+      "epoch": 0.5538439337638443,
+      "grad_norm": 0.11941682547330856,
+      "learning_rate": 0.00017785827680762015,
+      "loss": 0.1504,
+      "step": 7676
+    },
+    {
+      "epoch": 0.5539160864389048,
+      "grad_norm": 0.14273981750011444,
+      "learning_rate": 0.0001778553903882234,
+      "loss": 0.1497,
+      "step": 7677
+    },
+    {
+      "epoch": 0.5539882391139651,
+      "grad_norm": 0.11758162081241608,
+      "learning_rate": 0.00017785250396882668,
+      "loss": 0.1439,
+      "step": 7678
+    },
+    {
+      "epoch": 0.5540603917890256,
+      "grad_norm": 0.12304052710533142,
+      "learning_rate": 0.00017784961754942994,
+      "loss": 0.1373,
+      "step": 7679
+    },
+    {
+      "epoch": 0.554132544464086,
+      "grad_norm": 0.11368243396282196,
+      "learning_rate": 0.0001778467311300332,
+      "loss": 0.1176,
+      "step": 7680
+    },
+    {
+      "epoch": 0.5542046971391464,
+      "grad_norm": 0.10586858540773392,
+      "learning_rate": 0.00017784384471063647,
+      "loss": 0.1657,
+      "step": 7681
+    },
+    {
+      "epoch": 0.5542768498142069,
+      "grad_norm": 0.12059576064348221,
+      "learning_rate": 0.00017784095829123973,
+      "loss": 0.1499,
+      "step": 7682
+    },
+    {
+      "epoch": 0.5543490024892673,
+      "grad_norm": 0.12533071637153625,
+      "learning_rate": 0.000177838071871843,
+      "loss": 0.2116,
+      "step": 7683
+    },
+    {
+      "epoch": 0.5544211551643278,
+      "grad_norm": 0.12862452864646912,
+      "learning_rate": 0.00017783518545244623,
+      "loss": 0.1547,
+      "step": 7684
+    },
+    {
+      "epoch": 0.5544933078393881,
+      "grad_norm": 0.18502777814865112,
+      "learning_rate": 0.00017783229903304952,
+      "loss": 0.1716,
+      "step": 7685
+    },
+    {
+      "epoch": 0.5545654605144485,
+      "grad_norm": 0.12029103934764862,
+      "learning_rate": 0.00017782941261365278,
+      "loss": 0.1329,
+      "step": 7686
+    },
+    {
+      "epoch": 0.554637613189509,
+      "grad_norm": 0.1410187929868698,
+      "learning_rate": 0.00017782652619425604,
+      "loss": 0.1179,
+      "step": 7687
+    },
+    {
+      "epoch": 0.5547097658645694,
+      "grad_norm": 0.1031469777226448,
+      "learning_rate": 0.0001778236397748593,
+      "loss": 0.1802,
+      "step": 7688
+    },
+    {
+      "epoch": 0.5547819185396299,
+      "grad_norm": 0.12865352630615234,
+      "learning_rate": 0.00017782075335546254,
+      "loss": 0.1739,
+      "step": 7689
+    },
+    {
+      "epoch": 0.5548540712146903,
+      "grad_norm": 0.16174176335334778,
+      "learning_rate": 0.0001778178669360658,
+      "loss": 0.1199,
+      "step": 7690
+    },
+    {
+      "epoch": 0.5549262238897507,
+      "grad_norm": 0.16539667546749115,
+      "learning_rate": 0.00017781498051666907,
+      "loss": 0.1874,
+      "step": 7691
+    },
+    {
+      "epoch": 0.5549983765648111,
+      "grad_norm": 0.1633646935224533,
+      "learning_rate": 0.00017781209409727236,
+      "loss": 0.1506,
+      "step": 7692
+    },
+    {
+      "epoch": 0.5550705292398715,
+      "grad_norm": 0.12402980774641037,
+      "learning_rate": 0.00017780920767787562,
+      "loss": 0.1601,
+      "step": 7693
+    },
+    {
+      "epoch": 0.555142681914932,
+      "grad_norm": 0.12489049881696701,
+      "learning_rate": 0.00017780632125847888,
+      "loss": 0.1614,
+      "step": 7694
+    },
+    {
+      "epoch": 0.5552148345899924,
+      "grad_norm": 0.09997840225696564,
+      "learning_rate": 0.00017780343483908212,
+      "loss": 0.1898,
+      "step": 7695
+    },
+    {
+      "epoch": 0.5552869872650529,
+      "grad_norm": 0.10950327664613724,
+      "learning_rate": 0.00017780054841968538,
+      "loss": 0.1264,
+      "step": 7696
+    },
+    {
+      "epoch": 0.5553591399401133,
+      "grad_norm": 0.14299684762954712,
+      "learning_rate": 0.00017779766200028864,
+      "loss": 0.1656,
+      "step": 7697
+    },
+    {
+      "epoch": 0.5554312926151737,
+      "grad_norm": 0.11332228034734726,
+      "learning_rate": 0.0001777947755808919,
+      "loss": 0.202,
+      "step": 7698
+    },
+    {
+      "epoch": 0.5555034452902341,
+      "grad_norm": 0.11944280564785004,
+      "learning_rate": 0.0001777918891614952,
+      "loss": 0.1409,
+      "step": 7699
+    },
+    {
+      "epoch": 0.5555755979652945,
+      "grad_norm": 0.1238955482840538,
+      "learning_rate": 0.00017778900274209843,
+      "loss": 0.2241,
+      "step": 7700
+    },
+    {
+      "epoch": 0.555647750640355,
+      "grad_norm": 0.12767411768436432,
+      "learning_rate": 0.0001777861163227017,
+      "loss": 0.1316,
+      "step": 7701
+    },
+    {
+      "epoch": 0.5557199033154154,
+      "grad_norm": 0.14630673825740814,
+      "learning_rate": 0.00017778322990330496,
+      "loss": 0.1333,
+      "step": 7702
+    },
+    {
+      "epoch": 0.5557920559904759,
+      "grad_norm": 0.15821218490600586,
+      "learning_rate": 0.00017778034348390822,
+      "loss": 0.1814,
+      "step": 7703
+    },
+    {
+      "epoch": 0.5558642086655363,
+      "grad_norm": 0.1112324520945549,
+      "learning_rate": 0.00017777745706451149,
+      "loss": 0.1542,
+      "step": 7704
+    },
+    {
+      "epoch": 0.5559363613405967,
+      "grad_norm": 0.12217922508716583,
+      "learning_rate": 0.00017777457064511475,
+      "loss": 0.1775,
+      "step": 7705
+    },
+    {
+      "epoch": 0.5560085140156571,
+      "grad_norm": 0.16339953243732452,
+      "learning_rate": 0.000177771684225718,
+      "loss": 0.0958,
+      "step": 7706
+    },
+    {
+      "epoch": 0.5560806666907175,
+      "grad_norm": 0.1335275024175644,
+      "learning_rate": 0.00017776879780632127,
+      "loss": 0.1565,
+      "step": 7707
+    },
+    {
+      "epoch": 0.556152819365778,
+      "grad_norm": 0.11807729303836823,
+      "learning_rate": 0.00017776591138692454,
+      "loss": 0.1073,
+      "step": 7708
+    },
+    {
+      "epoch": 0.5562249720408384,
+      "grad_norm": 0.13649781048297882,
+      "learning_rate": 0.0001777630249675278,
+      "loss": 0.118,
+      "step": 7709
+    },
+    {
+      "epoch": 0.5562971247158989,
+      "grad_norm": 0.11952555924654007,
+      "learning_rate": 0.00017776013854813106,
+      "loss": 0.1918,
+      "step": 7710
+    },
+    {
+      "epoch": 0.5563692773909593,
+      "grad_norm": 0.14145714044570923,
+      "learning_rate": 0.0001777572521287343,
+      "loss": 0.1392,
+      "step": 7711
+    },
+    {
+      "epoch": 0.5564414300660196,
+      "grad_norm": 0.12335800379514694,
+      "learning_rate": 0.00017775436570933756,
+      "loss": 0.1684,
+      "step": 7712
+    },
+    {
+      "epoch": 0.5565135827410801,
+      "grad_norm": 0.16189730167388916,
+      "learning_rate": 0.00017775147928994085,
+      "loss": 0.194,
+      "step": 7713
+    },
+    {
+      "epoch": 0.5565857354161405,
+      "grad_norm": 0.11122278869152069,
+      "learning_rate": 0.00017774859287054411,
+      "loss": 0.1809,
+      "step": 7714
+    },
+    {
+      "epoch": 0.556657888091201,
+      "grad_norm": 0.1196560338139534,
+      "learning_rate": 0.00017774570645114738,
+      "loss": 0.1621,
+      "step": 7715
+    },
+    {
+      "epoch": 0.5567300407662614,
+      "grad_norm": 0.11895806342363358,
+      "learning_rate": 0.0001777428200317506,
+      "loss": 0.1817,
+      "step": 7716
+    },
+    {
+      "epoch": 0.5568021934413219,
+      "grad_norm": 0.12140301614999771,
+      "learning_rate": 0.00017773993361235388,
+      "loss": 0.1284,
+      "step": 7717
+    },
+    {
+      "epoch": 0.5568743461163823,
+      "grad_norm": 0.10380367189645767,
+      "learning_rate": 0.00017773704719295714,
+      "loss": 0.169,
+      "step": 7718
+    },
+    {
+      "epoch": 0.5569464987914426,
+      "grad_norm": 0.12693089246749878,
+      "learning_rate": 0.0001777341607735604,
+      "loss": 0.1258,
+      "step": 7719
+    },
+    {
+      "epoch": 0.5570186514665031,
+      "grad_norm": 0.1187371239066124,
+      "learning_rate": 0.0001777312743541637,
+      "loss": 0.1467,
+      "step": 7720
+    },
+    {
+      "epoch": 0.5570908041415635,
+      "grad_norm": 0.1260175108909607,
+      "learning_rate": 0.00017772838793476693,
+      "loss": 0.172,
+      "step": 7721
+    },
+    {
+      "epoch": 0.557162956816624,
+      "grad_norm": 0.11985334753990173,
+      "learning_rate": 0.0001777255015153702,
+      "loss": 0.1496,
+      "step": 7722
+    },
+    {
+      "epoch": 0.5572351094916844,
+      "grad_norm": 0.11689590662717819,
+      "learning_rate": 0.00017772261509597345,
+      "loss": 0.1327,
+      "step": 7723
+    },
+    {
+      "epoch": 0.5573072621667449,
+      "grad_norm": 0.139571875333786,
+      "learning_rate": 0.00017771972867657672,
+      "loss": 0.1196,
+      "step": 7724
+    },
+    {
+      "epoch": 0.5573794148418053,
+      "grad_norm": 0.158230260014534,
+      "learning_rate": 0.00017771684225717998,
+      "loss": 0.138,
+      "step": 7725
+    },
+    {
+      "epoch": 0.5574515675168656,
+      "grad_norm": 0.14422976970672607,
+      "learning_rate": 0.00017771395583778324,
+      "loss": 0.1767,
+      "step": 7726
+    },
+    {
+      "epoch": 0.5575237201919261,
+      "grad_norm": 0.08607729524374008,
+      "learning_rate": 0.0001777110694183865,
+      "loss": 0.1117,
+      "step": 7727
+    },
+    {
+      "epoch": 0.5575958728669865,
+      "grad_norm": 0.11835624277591705,
+      "learning_rate": 0.00017770818299898977,
+      "loss": 0.1872,
+      "step": 7728
+    },
+    {
+      "epoch": 0.557668025542047,
+      "grad_norm": 0.16040194034576416,
+      "learning_rate": 0.00017770529657959303,
+      "loss": 0.1255,
+      "step": 7729
+    },
+    {
+      "epoch": 0.5577401782171074,
+      "grad_norm": 0.11539598554372787,
+      "learning_rate": 0.0001777024101601963,
+      "loss": 0.1598,
+      "step": 7730
+    },
+    {
+      "epoch": 0.5578123308921679,
+      "grad_norm": 0.1356775462627411,
+      "learning_rate": 0.00017769952374079956,
+      "loss": 0.161,
+      "step": 7731
+    },
+    {
+      "epoch": 0.5578844835672283,
+      "grad_norm": 0.11297253519296646,
+      "learning_rate": 0.0001776966373214028,
+      "loss": 0.1604,
+      "step": 7732
+    },
+    {
+      "epoch": 0.5579566362422886,
+      "grad_norm": 0.11831973493099213,
+      "learning_rate": 0.00017769375090200606,
+      "loss": 0.178,
+      "step": 7733
+    },
+    {
+      "epoch": 0.5580287889173491,
+      "grad_norm": 0.1246437281370163,
+      "learning_rate": 0.00017769086448260935,
+      "loss": 0.1518,
+      "step": 7734
+    },
+    {
+      "epoch": 0.5581009415924095,
+      "grad_norm": 0.15455931425094604,
+      "learning_rate": 0.0001776879780632126,
+      "loss": 0.173,
+      "step": 7735
+    },
+    {
+      "epoch": 0.55817309426747,
+      "grad_norm": 0.11363532394170761,
+      "learning_rate": 0.00017768509164381587,
+      "loss": 0.1335,
+      "step": 7736
+    },
+    {
+      "epoch": 0.5582452469425304,
+      "grad_norm": 0.11740487068891525,
+      "learning_rate": 0.0001776822052244191,
+      "loss": 0.1696,
+      "step": 7737
+    },
+    {
+      "epoch": 0.5583173996175909,
+      "grad_norm": 0.1704363375902176,
+      "learning_rate": 0.00017767931880502237,
+      "loss": 0.1614,
+      "step": 7738
+    },
+    {
+      "epoch": 0.5583895522926513,
+      "grad_norm": 0.11599764227867126,
+      "learning_rate": 0.00017767643238562563,
+      "loss": 0.1438,
+      "step": 7739
+    },
+    {
+      "epoch": 0.5584617049677116,
+      "grad_norm": 0.12581701576709747,
+      "learning_rate": 0.0001776735459662289,
+      "loss": 0.1482,
+      "step": 7740
+    },
+    {
+      "epoch": 0.5585338576427721,
+      "grad_norm": 0.13323472440242767,
+      "learning_rate": 0.00017767065954683216,
+      "loss": 0.1381,
+      "step": 7741
+    },
+    {
+      "epoch": 0.5586060103178325,
+      "grad_norm": 0.1303575187921524,
+      "learning_rate": 0.00017766777312743542,
+      "loss": 0.1571,
+      "step": 7742
+    },
+    {
+      "epoch": 0.558678162992893,
+      "grad_norm": 0.12017255276441574,
+      "learning_rate": 0.00017766488670803868,
+      "loss": 0.1685,
+      "step": 7743
+    },
+    {
+      "epoch": 0.5587503156679534,
+      "grad_norm": 0.12586411833763123,
+      "learning_rate": 0.00017766200028864195,
+      "loss": 0.1704,
+      "step": 7744
+    },
+    {
+      "epoch": 0.5588224683430139,
+      "grad_norm": 0.1185087338089943,
+      "learning_rate": 0.0001776591138692452,
+      "loss": 0.1719,
+      "step": 7745
+    },
+    {
+      "epoch": 0.5588946210180743,
+      "grad_norm": 0.1333579421043396,
+      "learning_rate": 0.00017765622744984847,
+      "loss": 0.1062,
+      "step": 7746
+    },
+    {
+      "epoch": 0.5589667736931346,
+      "grad_norm": 0.1394158899784088,
+      "learning_rate": 0.00017765334103045174,
+      "loss": 0.1787,
+      "step": 7747
+    },
+    {
+      "epoch": 0.5590389263681951,
+      "grad_norm": 0.1150098666548729,
+      "learning_rate": 0.00017765045461105497,
+      "loss": 0.1542,
+      "step": 7748
+    },
+    {
+      "epoch": 0.5591110790432555,
+      "grad_norm": 0.12589210271835327,
+      "learning_rate": 0.00017764756819165826,
+      "loss": 0.1628,
+      "step": 7749
+    },
+    {
+      "epoch": 0.559183231718316,
+      "grad_norm": 0.10655258595943451,
+      "learning_rate": 0.00017764468177226153,
+      "loss": 0.1381,
+      "step": 7750
+    },
+    {
+      "epoch": 0.5592553843933764,
+      "grad_norm": 0.16475246846675873,
+      "learning_rate": 0.0001776417953528648,
+      "loss": 0.1519,
+      "step": 7751
+    },
+    {
+      "epoch": 0.5593275370684369,
+      "grad_norm": 0.1504022479057312,
+      "learning_rate": 0.00017763890893346805,
+      "loss": 0.1332,
+      "step": 7752
+    },
+    {
+      "epoch": 0.5593996897434972,
+      "grad_norm": 0.11250517517328262,
+      "learning_rate": 0.0001776360225140713,
+      "loss": 0.1696,
+      "step": 7753
+    },
+    {
+      "epoch": 0.5594718424185576,
+      "grad_norm": 0.10336173325777054,
+      "learning_rate": 0.00017763313609467455,
+      "loss": 0.174,
+      "step": 7754
+    },
+    {
+      "epoch": 0.5595439950936181,
+      "grad_norm": 0.12454720586538315,
+      "learning_rate": 0.0001776302496752778,
+      "loss": 0.1843,
+      "step": 7755
+    },
+    {
+      "epoch": 0.5596161477686785,
+      "grad_norm": 0.11423910409212112,
+      "learning_rate": 0.0001776273632558811,
+      "loss": 0.1572,
+      "step": 7756
+    },
+    {
+      "epoch": 0.559688300443739,
+      "grad_norm": 0.13247184455394745,
+      "learning_rate": 0.00017762447683648437,
+      "loss": 0.1453,
+      "step": 7757
+    },
+    {
+      "epoch": 0.5597604531187994,
+      "grad_norm": 0.10575006902217865,
+      "learning_rate": 0.0001776215904170876,
+      "loss": 0.0785,
+      "step": 7758
+    },
+    {
+      "epoch": 0.5598326057938598,
+      "grad_norm": 0.11786968261003494,
+      "learning_rate": 0.00017761870399769086,
+      "loss": 0.1604,
+      "step": 7759
+    },
+    {
+      "epoch": 0.5599047584689202,
+      "grad_norm": 0.12994790077209473,
+      "learning_rate": 0.00017761581757829413,
+      "loss": 0.1449,
+      "step": 7760
+    },
+    {
+      "epoch": 0.5599769111439806,
+      "grad_norm": 0.15007157623767853,
+      "learning_rate": 0.0001776129311588974,
+      "loss": 0.1944,
+      "step": 7761
+    },
+    {
+      "epoch": 0.5600490638190411,
+      "grad_norm": 0.18437841534614563,
+      "learning_rate": 0.00017761004473950065,
+      "loss": 0.1149,
+      "step": 7762
+    },
+    {
+      "epoch": 0.5601212164941015,
+      "grad_norm": 0.11152983456850052,
+      "learning_rate": 0.00017760715832010392,
+      "loss": 0.1887,
+      "step": 7763
+    },
+    {
+      "epoch": 0.560193369169162,
+      "grad_norm": 0.12899920344352722,
+      "learning_rate": 0.00017760427190070718,
+      "loss": 0.1235,
+      "step": 7764
+    },
+    {
+      "epoch": 0.5602655218442224,
+      "grad_norm": 0.11892049759626389,
+      "learning_rate": 0.00017760138548131044,
+      "loss": 0.1411,
+      "step": 7765
+    },
+    {
+      "epoch": 0.5603376745192828,
+      "grad_norm": 0.10350506752729416,
+      "learning_rate": 0.0001775984990619137,
+      "loss": 0.1875,
+      "step": 7766
+    },
+    {
+      "epoch": 0.5604098271943432,
+      "grad_norm": 0.11183896660804749,
+      "learning_rate": 0.00017759561264251697,
+      "loss": 0.1592,
+      "step": 7767
+    },
+    {
+      "epoch": 0.5604819798694036,
+      "grad_norm": 0.11698009818792343,
+      "learning_rate": 0.00017759272622312023,
+      "loss": 0.144,
+      "step": 7768
+    },
+    {
+      "epoch": 0.5605541325444641,
+      "grad_norm": 0.11330104619264603,
+      "learning_rate": 0.00017758983980372347,
+      "loss": 0.1357,
+      "step": 7769
+    },
+    {
+      "epoch": 0.5606262852195245,
+      "grad_norm": 0.11938729137182236,
+      "learning_rate": 0.00017758695338432676,
+      "loss": 0.1204,
+      "step": 7770
+    },
+    {
+      "epoch": 0.560698437894585,
+      "grad_norm": 0.11675950139760971,
+      "learning_rate": 0.00017758406696493002,
+      "loss": 0.1239,
+      "step": 7771
+    },
+    {
+      "epoch": 0.5607705905696454,
+      "grad_norm": 0.12216418981552124,
+      "learning_rate": 0.00017758118054553328,
+      "loss": 0.1392,
+      "step": 7772
+    },
+    {
+      "epoch": 0.5608427432447058,
+      "grad_norm": 0.11849422752857208,
+      "learning_rate": 0.00017757829412613655,
+      "loss": 0.1702,
+      "step": 7773
+    },
+    {
+      "epoch": 0.5609148959197662,
+      "grad_norm": 0.12819740176200867,
+      "learning_rate": 0.00017757540770673978,
+      "loss": 0.1424,
+      "step": 7774
+    },
+    {
+      "epoch": 0.5609870485948266,
+      "grad_norm": 0.12744930386543274,
+      "learning_rate": 0.00017757252128734304,
+      "loss": 0.1797,
+      "step": 7775
+    },
+    {
+      "epoch": 0.5610592012698871,
+      "grad_norm": 0.11002054065465927,
+      "learning_rate": 0.0001775696348679463,
+      "loss": 0.1652,
+      "step": 7776
+    },
+    {
+      "epoch": 0.5611313539449475,
+      "grad_norm": 0.11307639628648758,
+      "learning_rate": 0.0001775667484485496,
+      "loss": 0.1562,
+      "step": 7777
+    },
+    {
+      "epoch": 0.561203506620008,
+      "grad_norm": 0.1282927691936493,
+      "learning_rate": 0.00017756386202915286,
+      "loss": 0.1614,
+      "step": 7778
+    },
+    {
+      "epoch": 0.5612756592950684,
+      "grad_norm": 0.10930712521076202,
+      "learning_rate": 0.0001775609756097561,
+      "loss": 0.123,
+      "step": 7779
+    },
+    {
+      "epoch": 0.5613478119701288,
+      "grad_norm": 0.08749040216207504,
+      "learning_rate": 0.00017755808919035936,
+      "loss": 0.1138,
+      "step": 7780
+    },
+    {
+      "epoch": 0.5614199646451892,
+      "grad_norm": 0.1288251429796219,
+      "learning_rate": 0.00017755520277096262,
+      "loss": 0.1957,
+      "step": 7781
+    },
+    {
+      "epoch": 0.5614921173202496,
+      "grad_norm": 0.10661497712135315,
+      "learning_rate": 0.00017755231635156588,
+      "loss": 0.1582,
+      "step": 7782
+    },
+    {
+      "epoch": 0.5615642699953101,
+      "grad_norm": 0.1347241997718811,
+      "learning_rate": 0.00017754942993216915,
+      "loss": 0.1825,
+      "step": 7783
+    },
+    {
+      "epoch": 0.5616364226703705,
+      "grad_norm": 0.1108936294913292,
+      "learning_rate": 0.0001775465435127724,
+      "loss": 0.1479,
+      "step": 7784
+    },
+    {
+      "epoch": 0.561708575345431,
+      "grad_norm": 0.1376233994960785,
+      "learning_rate": 0.00017754365709337567,
+      "loss": 0.157,
+      "step": 7785
+    },
+    {
+      "epoch": 0.5617807280204914,
+      "grad_norm": 0.10962553322315216,
+      "learning_rate": 0.00017754077067397894,
+      "loss": 0.12,
+      "step": 7786
+    },
+    {
+      "epoch": 0.5618528806955518,
+      "grad_norm": 0.1102694571018219,
+      "learning_rate": 0.0001775378842545822,
+      "loss": 0.1318,
+      "step": 7787
+    },
+    {
+      "epoch": 0.5619250333706122,
+      "grad_norm": 0.10344778746366501,
+      "learning_rate": 0.00017753499783518546,
+      "loss": 0.1434,
+      "step": 7788
+    },
+    {
+      "epoch": 0.5619971860456726,
+      "grad_norm": 0.15022890269756317,
+      "learning_rate": 0.00017753211141578873,
+      "loss": 0.1926,
+      "step": 7789
+    },
+    {
+      "epoch": 0.5620693387207331,
+      "grad_norm": 0.1173153966665268,
+      "learning_rate": 0.00017752922499639196,
+      "loss": 0.1327,
+      "step": 7790
+    },
+    {
+      "epoch": 0.5621414913957935,
+      "grad_norm": 0.1331547051668167,
+      "learning_rate": 0.00017752633857699525,
+      "loss": 0.1249,
+      "step": 7791
+    },
+    {
+      "epoch": 0.562213644070854,
+      "grad_norm": 0.11356645822525024,
+      "learning_rate": 0.00017752345215759851,
+      "loss": 0.1685,
+      "step": 7792
+    },
+    {
+      "epoch": 0.5622857967459144,
+      "grad_norm": 0.11696729809045792,
+      "learning_rate": 0.00017752056573820178,
+      "loss": 0.2,
+      "step": 7793
+    },
+    {
+      "epoch": 0.5623579494209748,
+      "grad_norm": 0.15943534672260284,
+      "learning_rate": 0.00017751767931880504,
+      "loss": 0.1918,
+      "step": 7794
+    },
+    {
+      "epoch": 0.5624301020960352,
+      "grad_norm": 0.12938512861728668,
+      "learning_rate": 0.00017751479289940828,
+      "loss": 0.133,
+      "step": 7795
+    },
+    {
+      "epoch": 0.5625022547710956,
+      "grad_norm": 0.11734830588102341,
+      "learning_rate": 0.00017751190648001154,
+      "loss": 0.1261,
+      "step": 7796
+    },
+    {
+      "epoch": 0.562574407446156,
+      "grad_norm": 0.11850757896900177,
+      "learning_rate": 0.0001775090200606148,
+      "loss": 0.1456,
+      "step": 7797
+    },
+    {
+      "epoch": 0.5626465601212165,
+      "grad_norm": 0.11291621625423431,
+      "learning_rate": 0.0001775061336412181,
+      "loss": 0.1857,
+      "step": 7798
+    },
+    {
+      "epoch": 0.5627187127962769,
+      "grad_norm": 0.09668024629354477,
+      "learning_rate": 0.00017750324722182135,
+      "loss": 0.1862,
+      "step": 7799
+    },
+    {
+      "epoch": 0.5627908654713374,
+      "grad_norm": 0.1215052530169487,
+      "learning_rate": 0.0001775003608024246,
+      "loss": 0.1662,
+      "step": 7800
+    },
+    {
+      "epoch": 0.5628630181463978,
+      "grad_norm": 0.1308431327342987,
+      "learning_rate": 0.00017749747438302785,
+      "loss": 0.2088,
+      "step": 7801
+    },
+    {
+      "epoch": 0.5629351708214582,
+      "grad_norm": 0.12154701352119446,
+      "learning_rate": 0.00017749458796363112,
+      "loss": 0.169,
+      "step": 7802
+    },
+    {
+      "epoch": 0.5630073234965186,
+      "grad_norm": 0.15033183991909027,
+      "learning_rate": 0.00017749170154423438,
+      "loss": 0.15,
+      "step": 7803
+    },
+    {
+      "epoch": 0.563079476171579,
+      "grad_norm": 0.11695659905672073,
+      "learning_rate": 0.00017748881512483764,
+      "loss": 0.0921,
+      "step": 7804
+    },
+    {
+      "epoch": 0.5631516288466395,
+      "grad_norm": 0.10494501143693924,
+      "learning_rate": 0.0001774859287054409,
+      "loss": 0.1731,
+      "step": 7805
+    },
+    {
+      "epoch": 0.5632237815216999,
+      "grad_norm": 0.10611262172460556,
+      "learning_rate": 0.00017748304228604417,
+      "loss": 0.1566,
+      "step": 7806
+    },
+    {
+      "epoch": 0.5632959341967604,
+      "grad_norm": 0.15995727479457855,
+      "learning_rate": 0.00017748015586664743,
+      "loss": 0.1528,
+      "step": 7807
+    },
+    {
+      "epoch": 0.5633680868718208,
+      "grad_norm": 0.14674517512321472,
+      "learning_rate": 0.0001774772694472507,
+      "loss": 0.148,
+      "step": 7808
+    },
+    {
+      "epoch": 0.5634402395468812,
+      "grad_norm": 0.13919983804225922,
+      "learning_rate": 0.00017747438302785396,
+      "loss": 0.1798,
+      "step": 7809
+    },
+    {
+      "epoch": 0.5635123922219416,
+      "grad_norm": 0.10498116910457611,
+      "learning_rate": 0.00017747149660845722,
+      "loss": 0.1377,
+      "step": 7810
+    },
+    {
+      "epoch": 0.563584544897002,
+      "grad_norm": 0.14174553751945496,
+      "learning_rate": 0.00017746861018906048,
+      "loss": 0.1471,
+      "step": 7811
+    },
+    {
+      "epoch": 0.5636566975720625,
+      "grad_norm": 0.11989643424749374,
+      "learning_rate": 0.00017746572376966375,
+      "loss": 0.1488,
+      "step": 7812
+    },
+    {
+      "epoch": 0.5637288502471229,
+      "grad_norm": 0.13585850596427917,
+      "learning_rate": 0.000177462837350267,
+      "loss": 0.2016,
+      "step": 7813
+    },
+    {
+      "epoch": 0.5638010029221834,
+      "grad_norm": 0.11506017297506332,
+      "learning_rate": 0.00017745995093087027,
+      "loss": 0.1419,
+      "step": 7814
+    },
+    {
+      "epoch": 0.5638731555972437,
+      "grad_norm": 0.13118226826190948,
+      "learning_rate": 0.00017745706451147353,
+      "loss": 0.1764,
+      "step": 7815
+    },
+    {
+      "epoch": 0.5639453082723042,
+      "grad_norm": 0.1246950775384903,
+      "learning_rate": 0.0001774541780920768,
+      "loss": 0.1055,
+      "step": 7816
+    },
+    {
+      "epoch": 0.5640174609473646,
+      "grad_norm": 0.13198302686214447,
+      "learning_rate": 0.00017745129167268003,
+      "loss": 0.118,
+      "step": 7817
+    },
+    {
+      "epoch": 0.564089613622425,
+      "grad_norm": 0.10069485753774643,
+      "learning_rate": 0.0001774484052532833,
+      "loss": 0.1893,
+      "step": 7818
+    },
+    {
+      "epoch": 0.5641617662974855,
+      "grad_norm": 0.10112564265727997,
+      "learning_rate": 0.00017744551883388659,
+      "loss": 0.1307,
+      "step": 7819
+    },
+    {
+      "epoch": 0.5642339189725459,
+      "grad_norm": 0.12570911645889282,
+      "learning_rate": 0.00017744263241448985,
+      "loss": 0.1566,
+      "step": 7820
+    },
+    {
+      "epoch": 0.5643060716476064,
+      "grad_norm": 0.12907375395298004,
+      "learning_rate": 0.0001774397459950931,
+      "loss": 0.1428,
+      "step": 7821
+    },
+    {
+      "epoch": 0.5643782243226667,
+      "grad_norm": 0.10935437679290771,
+      "learning_rate": 0.00017743685957569635,
+      "loss": 0.1633,
+      "step": 7822
+    },
+    {
+      "epoch": 0.5644503769977272,
+      "grad_norm": 0.11014962941408157,
+      "learning_rate": 0.0001774339731562996,
+      "loss": 0.1644,
+      "step": 7823
+    },
+    {
+      "epoch": 0.5645225296727876,
+      "grad_norm": 0.10696630924940109,
+      "learning_rate": 0.00017743108673690287,
+      "loss": 0.1317,
+      "step": 7824
+    },
+    {
+      "epoch": 0.564594682347848,
+      "grad_norm": 0.11150635778903961,
+      "learning_rate": 0.00017742820031750614,
+      "loss": 0.1669,
+      "step": 7825
+    },
+    {
+      "epoch": 0.5646668350229085,
+      "grad_norm": 0.17067500948905945,
+      "learning_rate": 0.00017742531389810943,
+      "loss": 0.2251,
+      "step": 7826
+    },
+    {
+      "epoch": 0.5647389876979689,
+      "grad_norm": 0.10682345926761627,
+      "learning_rate": 0.00017742242747871266,
+      "loss": 0.1934,
+      "step": 7827
+    },
+    {
+      "epoch": 0.5648111403730294,
+      "grad_norm": 0.10371333360671997,
+      "learning_rate": 0.00017741954105931592,
+      "loss": 0.1336,
+      "step": 7828
+    },
+    {
+      "epoch": 0.5648832930480897,
+      "grad_norm": 0.10731291025876999,
+      "learning_rate": 0.0001774166546399192,
+      "loss": 0.1173,
+      "step": 7829
+    },
+    {
+      "epoch": 0.5649554457231502,
+      "grad_norm": 0.10326001793146133,
+      "learning_rate": 0.00017741376822052245,
+      "loss": 0.1219,
+      "step": 7830
+    },
+    {
+      "epoch": 0.5650275983982106,
+      "grad_norm": 0.1006198599934578,
+      "learning_rate": 0.00017741088180112571,
+      "loss": 0.1796,
+      "step": 7831
+    },
+    {
+      "epoch": 0.565099751073271,
+      "grad_norm": 0.1153801754117012,
+      "learning_rate": 0.00017740799538172898,
+      "loss": 0.1585,
+      "step": 7832
+    },
+    {
+      "epoch": 0.5651719037483315,
+      "grad_norm": 0.11774788796901703,
+      "learning_rate": 0.00017740510896233224,
+      "loss": 0.2245,
+      "step": 7833
+    },
+    {
+      "epoch": 0.5652440564233919,
+      "grad_norm": 0.1453564465045929,
+      "learning_rate": 0.0001774022225429355,
+      "loss": 0.1558,
+      "step": 7834
+    },
+    {
+      "epoch": 0.5653162090984524,
+      "grad_norm": 0.10883069783449173,
+      "learning_rate": 0.00017739933612353877,
+      "loss": 0.1848,
+      "step": 7835
+    },
+    {
+      "epoch": 0.5653883617735127,
+      "grad_norm": 0.1174570843577385,
+      "learning_rate": 0.00017739644970414203,
+      "loss": 0.1599,
+      "step": 7836
+    },
+    {
+      "epoch": 0.5654605144485731,
+      "grad_norm": 0.10084299743175507,
+      "learning_rate": 0.0001773935632847453,
+      "loss": 0.1504,
+      "step": 7837
+    },
+    {
+      "epoch": 0.5655326671236336,
+      "grad_norm": 0.13395412266254425,
+      "learning_rate": 0.00017739067686534853,
+      "loss": 0.1339,
+      "step": 7838
+    },
+    {
+      "epoch": 0.565604819798694,
+      "grad_norm": 0.13974985480308533,
+      "learning_rate": 0.0001773877904459518,
+      "loss": 0.1623,
+      "step": 7839
+    },
+    {
+      "epoch": 0.5656769724737545,
+      "grad_norm": 0.14301472902297974,
+      "learning_rate": 0.00017738490402655508,
+      "loss": 0.1551,
+      "step": 7840
+    },
+    {
+      "epoch": 0.5657491251488149,
+      "grad_norm": 0.13463523983955383,
+      "learning_rate": 0.00017738201760715834,
+      "loss": 0.1478,
+      "step": 7841
+    },
+    {
+      "epoch": 0.5658212778238754,
+      "grad_norm": 0.1306980699300766,
+      "learning_rate": 0.0001773791311877616,
+      "loss": 0.1303,
+      "step": 7842
+    },
+    {
+      "epoch": 0.5658934304989357,
+      "grad_norm": 0.14141131937503815,
+      "learning_rate": 0.00017737624476836484,
+      "loss": 0.1289,
+      "step": 7843
+    },
+    {
+      "epoch": 0.5659655831739961,
+      "grad_norm": 0.183505579829216,
+      "learning_rate": 0.0001773733583489681,
+      "loss": 0.1868,
+      "step": 7844
+    },
+    {
+      "epoch": 0.5660377358490566,
+      "grad_norm": 0.11580534279346466,
+      "learning_rate": 0.00017737047192957137,
+      "loss": 0.1245,
+      "step": 7845
+    },
+    {
+      "epoch": 0.566109888524117,
+      "grad_norm": 0.12077432125806808,
+      "learning_rate": 0.00017736758551017463,
+      "loss": 0.1593,
+      "step": 7846
+    },
+    {
+      "epoch": 0.5661820411991775,
+      "grad_norm": 0.15405291318893433,
+      "learning_rate": 0.00017736469909077792,
+      "loss": 0.153,
+      "step": 7847
+    },
+    {
+      "epoch": 0.5662541938742379,
+      "grad_norm": 0.12104575335979462,
+      "learning_rate": 0.00017736181267138116,
+      "loss": 0.1468,
+      "step": 7848
+    },
+    {
+      "epoch": 0.5663263465492984,
+      "grad_norm": 0.12845638394355774,
+      "learning_rate": 0.00017735892625198442,
+      "loss": 0.1317,
+      "step": 7849
+    },
+    {
+      "epoch": 0.5663984992243587,
+      "grad_norm": 0.10479018837213516,
+      "learning_rate": 0.00017735603983258768,
+      "loss": 0.141,
+      "step": 7850
+    },
+    {
+      "epoch": 0.5664706518994191,
+      "grad_norm": 0.11782775074243546,
+      "learning_rate": 0.00017735315341319094,
+      "loss": 0.158,
+      "step": 7851
+    },
+    {
+      "epoch": 0.5665428045744796,
+      "grad_norm": 0.10476533323526382,
+      "learning_rate": 0.0001773502669937942,
+      "loss": 0.1423,
+      "step": 7852
+    },
+    {
+      "epoch": 0.56661495724954,
+      "grad_norm": 0.1525888293981552,
+      "learning_rate": 0.00017734738057439747,
+      "loss": 0.1121,
+      "step": 7853
+    },
+    {
+      "epoch": 0.5666871099246005,
+      "grad_norm": 0.10566260665655136,
+      "learning_rate": 0.00017734449415500073,
+      "loss": 0.164,
+      "step": 7854
+    },
+    {
+      "epoch": 0.5667592625996609,
+      "grad_norm": 0.12231025099754333,
+      "learning_rate": 0.000177341607735604,
+      "loss": 0.1728,
+      "step": 7855
+    },
+    {
+      "epoch": 0.5668314152747214,
+      "grad_norm": 0.11222410947084427,
+      "learning_rate": 0.00017733872131620726,
+      "loss": 0.1337,
+      "step": 7856
+    },
+    {
+      "epoch": 0.5669035679497817,
+      "grad_norm": 0.13453534245491028,
+      "learning_rate": 0.00017733583489681052,
+      "loss": 0.1505,
+      "step": 7857
+    },
+    {
+      "epoch": 0.5669757206248421,
+      "grad_norm": 0.12299520522356033,
+      "learning_rate": 0.00017733294847741379,
+      "loss": 0.1656,
+      "step": 7858
+    },
+    {
+      "epoch": 0.5670478732999026,
+      "grad_norm": 0.1308135837316513,
+      "learning_rate": 0.00017733006205801702,
+      "loss": 0.1198,
+      "step": 7859
+    },
+    {
+      "epoch": 0.567120025974963,
+      "grad_norm": 0.11543524265289307,
+      "learning_rate": 0.00017732717563862028,
+      "loss": 0.1835,
+      "step": 7860
+    },
+    {
+      "epoch": 0.5671921786500235,
+      "grad_norm": 0.16316701471805573,
+      "learning_rate": 0.00017732428921922357,
+      "loss": 0.1373,
+      "step": 7861
+    },
+    {
+      "epoch": 0.5672643313250839,
+      "grad_norm": 0.1554131805896759,
+      "learning_rate": 0.00017732140279982684,
+      "loss": 0.1163,
+      "step": 7862
+    },
+    {
+      "epoch": 0.5673364840001444,
+      "grad_norm": 0.11200838536024094,
+      "learning_rate": 0.0001773185163804301,
+      "loss": 0.1432,
+      "step": 7863
+    },
+    {
+      "epoch": 0.5674086366752047,
+      "grad_norm": 0.12736523151397705,
+      "learning_rate": 0.00017731562996103334,
+      "loss": 0.1543,
+      "step": 7864
+    },
+    {
+      "epoch": 0.5674807893502651,
+      "grad_norm": 0.13625681400299072,
+      "learning_rate": 0.0001773127435416366,
+      "loss": 0.1217,
+      "step": 7865
+    },
+    {
+      "epoch": 0.5675529420253256,
+      "grad_norm": 0.11235533654689789,
+      "learning_rate": 0.00017730985712223986,
+      "loss": 0.1119,
+      "step": 7866
+    },
+    {
+      "epoch": 0.567625094700386,
+      "grad_norm": 0.11463140696287155,
+      "learning_rate": 0.00017730697070284312,
+      "loss": 0.0992,
+      "step": 7867
+    },
+    {
+      "epoch": 0.5676972473754465,
+      "grad_norm": 0.12850303947925568,
+      "learning_rate": 0.00017730408428344641,
+      "loss": 0.158,
+      "step": 7868
+    },
+    {
+      "epoch": 0.5677694000505069,
+      "grad_norm": 0.10813012719154358,
+      "learning_rate": 0.00017730119786404965,
+      "loss": 0.1255,
+      "step": 7869
+    },
+    {
+      "epoch": 0.5678415527255674,
+      "grad_norm": 0.13995739817619324,
+      "learning_rate": 0.0001772983114446529,
+      "loss": 0.1458,
+      "step": 7870
+    },
+    {
+      "epoch": 0.5679137054006277,
+      "grad_norm": 0.12784218788146973,
+      "learning_rate": 0.00017729542502525618,
+      "loss": 0.1665,
+      "step": 7871
+    },
+    {
+      "epoch": 0.5679858580756881,
+      "grad_norm": 0.10078656673431396,
+      "learning_rate": 0.00017729253860585944,
+      "loss": 0.1704,
+      "step": 7872
+    },
+    {
+      "epoch": 0.5680580107507486,
+      "grad_norm": 0.12124498188495636,
+      "learning_rate": 0.0001772896521864627,
+      "loss": 0.1322,
+      "step": 7873
+    },
+    {
+      "epoch": 0.568130163425809,
+      "grad_norm": 0.11808305233716965,
+      "learning_rate": 0.00017728676576706597,
+      "loss": 0.1296,
+      "step": 7874
+    },
+    {
+      "epoch": 0.5682023161008695,
+      "grad_norm": 0.1439986675977707,
+      "learning_rate": 0.00017728387934766923,
+      "loss": 0.1617,
+      "step": 7875
+    },
+    {
+      "epoch": 0.5682744687759299,
+      "grad_norm": 0.14310899376869202,
+      "learning_rate": 0.0001772809929282725,
+      "loss": 0.1167,
+      "step": 7876
+    },
+    {
+      "epoch": 0.5683466214509902,
+      "grad_norm": 0.16115215420722961,
+      "learning_rate": 0.00017727810650887575,
+      "loss": 0.1566,
+      "step": 7877
+    },
+    {
+      "epoch": 0.5684187741260507,
+      "grad_norm": 0.13886554539203644,
+      "learning_rate": 0.00017727522008947902,
+      "loss": 0.1257,
+      "step": 7878
+    },
+    {
+      "epoch": 0.5684909268011111,
+      "grad_norm": 0.10506069660186768,
+      "learning_rate": 0.00017727233367008228,
+      "loss": 0.1358,
+      "step": 7879
+    },
+    {
+      "epoch": 0.5685630794761716,
+      "grad_norm": 0.14647378027439117,
+      "learning_rate": 0.00017726944725068552,
+      "loss": 0.1508,
+      "step": 7880
+    },
+    {
+      "epoch": 0.568635232151232,
+      "grad_norm": 0.12777476012706757,
+      "learning_rate": 0.00017726656083128878,
+      "loss": 0.1475,
+      "step": 7881
+    },
+    {
+      "epoch": 0.5687073848262925,
+      "grad_norm": 0.11274504661560059,
+      "learning_rate": 0.00017726367441189207,
+      "loss": 0.1705,
+      "step": 7882
+    },
+    {
+      "epoch": 0.5687795375013529,
+      "grad_norm": 0.11977039277553558,
+      "learning_rate": 0.00017726078799249533,
+      "loss": 0.1316,
+      "step": 7883
+    },
+    {
+      "epoch": 0.5688516901764132,
+      "grad_norm": 0.11919888108968735,
+      "learning_rate": 0.0001772579015730986,
+      "loss": 0.1237,
+      "step": 7884
+    },
+    {
+      "epoch": 0.5689238428514737,
+      "grad_norm": 0.14281430840492249,
+      "learning_rate": 0.00017725501515370183,
+      "loss": 0.1786,
+      "step": 7885
+    },
+    {
+      "epoch": 0.5689959955265341,
+      "grad_norm": 0.11657267063856125,
+      "learning_rate": 0.0001772521287343051,
+      "loss": 0.129,
+      "step": 7886
+    },
+    {
+      "epoch": 0.5690681482015946,
+      "grad_norm": 0.1586855798959732,
+      "learning_rate": 0.00017724924231490836,
+      "loss": 0.1367,
+      "step": 7887
+    },
+    {
+      "epoch": 0.569140300876655,
+      "grad_norm": 0.14276975393295288,
+      "learning_rate": 0.00017724635589551162,
+      "loss": 0.1211,
+      "step": 7888
+    },
+    {
+      "epoch": 0.5692124535517155,
+      "grad_norm": 0.1068420559167862,
+      "learning_rate": 0.0001772434694761149,
+      "loss": 0.1461,
+      "step": 7889
+    },
+    {
+      "epoch": 0.5692846062267759,
+      "grad_norm": 0.11131718754768372,
+      "learning_rate": 0.00017724058305671814,
+      "loss": 0.1772,
+      "step": 7890
+    },
+    {
+      "epoch": 0.5693567589018362,
+      "grad_norm": 0.11713793873786926,
+      "learning_rate": 0.0001772376966373214,
+      "loss": 0.1111,
+      "step": 7891
+    },
+    {
+      "epoch": 0.5694289115768967,
+      "grad_norm": 0.09942521899938583,
+      "learning_rate": 0.00017723481021792467,
+      "loss": 0.1052,
+      "step": 7892
+    },
+    {
+      "epoch": 0.5695010642519571,
+      "grad_norm": 0.11540911346673965,
+      "learning_rate": 0.00017723192379852793,
+      "loss": 0.1405,
+      "step": 7893
+    },
+    {
+      "epoch": 0.5695732169270176,
+      "grad_norm": 0.11284588277339935,
+      "learning_rate": 0.0001772290373791312,
+      "loss": 0.1201,
+      "step": 7894
+    },
+    {
+      "epoch": 0.569645369602078,
+      "grad_norm": 0.1278034895658493,
+      "learning_rate": 0.00017722615095973446,
+      "loss": 0.1076,
+      "step": 7895
+    },
+    {
+      "epoch": 0.5697175222771385,
+      "grad_norm": 0.1453760266304016,
+      "learning_rate": 0.00017722326454033772,
+      "loss": 0.2114,
+      "step": 7896
+    },
+    {
+      "epoch": 0.5697896749521989,
+      "grad_norm": 0.106064572930336,
+      "learning_rate": 0.00017722037812094099,
+      "loss": 0.1124,
+      "step": 7897
+    },
+    {
+      "epoch": 0.5698618276272592,
+      "grad_norm": 0.1113254651427269,
+      "learning_rate": 0.00017721749170154425,
+      "loss": 0.137,
+      "step": 7898
+    },
+    {
+      "epoch": 0.5699339803023197,
+      "grad_norm": 0.14589551091194153,
+      "learning_rate": 0.0001772146052821475,
+      "loss": 0.1727,
+      "step": 7899
+    },
+    {
+      "epoch": 0.5700061329773801,
+      "grad_norm": 0.1296132653951645,
+      "learning_rate": 0.00017721171886275077,
+      "loss": 0.15,
+      "step": 7900
+    },
+    {
+      "epoch": 0.5700782856524406,
+      "grad_norm": 0.10612339526414871,
+      "learning_rate": 0.000177208832443354,
+      "loss": 0.1453,
+      "step": 7901
+    },
+    {
+      "epoch": 0.570150438327501,
+      "grad_norm": 0.14799730479717255,
+      "learning_rate": 0.00017720594602395727,
+      "loss": 0.2194,
+      "step": 7902
+    },
+    {
+      "epoch": 0.5702225910025615,
+      "grad_norm": 0.1471415013074875,
+      "learning_rate": 0.00017720305960456056,
+      "loss": 0.1767,
+      "step": 7903
+    },
+    {
+      "epoch": 0.5702947436776219,
+      "grad_norm": 0.1280488222837448,
+      "learning_rate": 0.00017720017318516383,
+      "loss": 0.1058,
+      "step": 7904
+    },
+    {
+      "epoch": 0.5703668963526822,
+      "grad_norm": 0.1329127699136734,
+      "learning_rate": 0.0001771972867657671,
+      "loss": 0.164,
+      "step": 7905
+    },
+    {
+      "epoch": 0.5704390490277427,
+      "grad_norm": 0.11687469482421875,
+      "learning_rate": 0.00017719440034637032,
+      "loss": 0.1635,
+      "step": 7906
+    },
+    {
+      "epoch": 0.5705112017028031,
+      "grad_norm": 0.10324371606111526,
+      "learning_rate": 0.0001771915139269736,
+      "loss": 0.1563,
+      "step": 7907
+    },
+    {
+      "epoch": 0.5705833543778636,
+      "grad_norm": 0.1355886459350586,
+      "learning_rate": 0.00017718862750757685,
+      "loss": 0.2098,
+      "step": 7908
+    },
+    {
+      "epoch": 0.570655507052924,
+      "grad_norm": 0.13116252422332764,
+      "learning_rate": 0.0001771857410881801,
+      "loss": 0.1752,
+      "step": 7909
+    },
+    {
+      "epoch": 0.5707276597279844,
+      "grad_norm": 0.12008823454380035,
+      "learning_rate": 0.0001771828546687834,
+      "loss": 0.1882,
+      "step": 7910
+    },
+    {
+      "epoch": 0.5707998124030449,
+      "grad_norm": 0.1371777504682541,
+      "learning_rate": 0.00017717996824938664,
+      "loss": 0.1357,
+      "step": 7911
+    },
+    {
+      "epoch": 0.5708719650781052,
+      "grad_norm": 0.10057571530342102,
+      "learning_rate": 0.0001771770818299899,
+      "loss": 0.1335,
+      "step": 7912
+    },
+    {
+      "epoch": 0.5709441177531657,
+      "grad_norm": 0.11042293906211853,
+      "learning_rate": 0.00017717419541059316,
+      "loss": 0.1495,
+      "step": 7913
+    },
+    {
+      "epoch": 0.5710162704282261,
+      "grad_norm": 0.12480539083480835,
+      "learning_rate": 0.00017717130899119643,
+      "loss": 0.1661,
+      "step": 7914
+    },
+    {
+      "epoch": 0.5710884231032866,
+      "grad_norm": 0.1366511732339859,
+      "learning_rate": 0.0001771684225717997,
+      "loss": 0.1799,
+      "step": 7915
+    },
+    {
+      "epoch": 0.571160575778347,
+      "grad_norm": 0.12441026419401169,
+      "learning_rate": 0.00017716553615240295,
+      "loss": 0.1783,
+      "step": 7916
+    },
+    {
+      "epoch": 0.5712327284534074,
+      "grad_norm": 0.11140074580907822,
+      "learning_rate": 0.00017716264973300622,
+      "loss": 0.0865,
+      "step": 7917
+    },
+    {
+      "epoch": 0.5713048811284679,
+      "grad_norm": 0.11715801805257797,
+      "learning_rate": 0.00017715976331360948,
+      "loss": 0.1565,
+      "step": 7918
+    },
+    {
+      "epoch": 0.5713770338035282,
+      "grad_norm": 0.09649024903774261,
+      "learning_rate": 0.00017715687689421274,
+      "loss": 0.128,
+      "step": 7919
+    },
+    {
+      "epoch": 0.5714491864785887,
+      "grad_norm": 0.11736408621072769,
+      "learning_rate": 0.000177153990474816,
+      "loss": 0.1478,
+      "step": 7920
+    },
+    {
+      "epoch": 0.5715213391536491,
+      "grad_norm": 0.17529132962226868,
+      "learning_rate": 0.00017715110405541927,
+      "loss": 0.1721,
+      "step": 7921
+    },
+    {
+      "epoch": 0.5715934918287096,
+      "grad_norm": 0.11500423401594162,
+      "learning_rate": 0.00017714821763602253,
+      "loss": 0.1564,
+      "step": 7922
+    },
+    {
+      "epoch": 0.57166564450377,
+      "grad_norm": 0.12164665758609772,
+      "learning_rate": 0.00017714533121662577,
+      "loss": 0.1281,
+      "step": 7923
+    },
+    {
+      "epoch": 0.5717377971788304,
+      "grad_norm": 0.1208866536617279,
+      "learning_rate": 0.00017714244479722906,
+      "loss": 0.1502,
+      "step": 7924
+    },
+    {
+      "epoch": 0.5718099498538909,
+      "grad_norm": 0.11391763389110565,
+      "learning_rate": 0.00017713955837783232,
+      "loss": 0.1341,
+      "step": 7925
+    },
+    {
+      "epoch": 0.5718821025289512,
+      "grad_norm": 0.14047269523143768,
+      "learning_rate": 0.00017713667195843558,
+      "loss": 0.1277,
+      "step": 7926
+    },
+    {
+      "epoch": 0.5719542552040117,
+      "grad_norm": 0.10359933227300644,
+      "learning_rate": 0.00017713378553903885,
+      "loss": 0.1257,
+      "step": 7927
+    },
+    {
+      "epoch": 0.5720264078790721,
+      "grad_norm": 0.12921454012393951,
+      "learning_rate": 0.00017713089911964208,
+      "loss": 0.1704,
+      "step": 7928
+    },
+    {
+      "epoch": 0.5720985605541326,
+      "grad_norm": 0.12614530324935913,
+      "learning_rate": 0.00017712801270024534,
+      "loss": 0.1127,
+      "step": 7929
+    },
+    {
+      "epoch": 0.572170713229193,
+      "grad_norm": 0.1164608746767044,
+      "learning_rate": 0.0001771251262808486,
+      "loss": 0.124,
+      "step": 7930
+    },
+    {
+      "epoch": 0.5722428659042534,
+      "grad_norm": 0.1259773224592209,
+      "learning_rate": 0.0001771222398614519,
+      "loss": 0.1856,
+      "step": 7931
+    },
+    {
+      "epoch": 0.5723150185793139,
+      "grad_norm": 0.12784960865974426,
+      "learning_rate": 0.00017711935344205516,
+      "loss": 0.1383,
+      "step": 7932
+    },
+    {
+      "epoch": 0.5723871712543742,
+      "grad_norm": 0.14621759951114655,
+      "learning_rate": 0.0001771164670226584,
+      "loss": 0.1635,
+      "step": 7933
+    },
+    {
+      "epoch": 0.5724593239294347,
+      "grad_norm": 0.12372894585132599,
+      "learning_rate": 0.00017711358060326166,
+      "loss": 0.1801,
+      "step": 7934
+    },
+    {
+      "epoch": 0.5725314766044951,
+      "grad_norm": 0.1166391670703888,
+      "learning_rate": 0.00017711069418386492,
+      "loss": 0.1538,
+      "step": 7935
+    },
+    {
+      "epoch": 0.5726036292795555,
+      "grad_norm": 0.12102074921131134,
+      "learning_rate": 0.00017710780776446818,
+      "loss": 0.1643,
+      "step": 7936
+    },
+    {
+      "epoch": 0.572675781954616,
+      "grad_norm": 0.12680689990520477,
+      "learning_rate": 0.00017710492134507145,
+      "loss": 0.1837,
+      "step": 7937
+    },
+    {
+      "epoch": 0.5727479346296764,
+      "grad_norm": 0.12156582623720169,
+      "learning_rate": 0.0001771020349256747,
+      "loss": 0.1849,
+      "step": 7938
+    },
+    {
+      "epoch": 0.5728200873047368,
+      "grad_norm": 0.1272418349981308,
+      "learning_rate": 0.00017709914850627797,
+      "loss": 0.1726,
+      "step": 7939
+    },
+    {
+      "epoch": 0.5728922399797972,
+      "grad_norm": 0.11186318844556808,
+      "learning_rate": 0.00017709626208688124,
+      "loss": 0.1104,
+      "step": 7940
+    },
+    {
+      "epoch": 0.5729643926548577,
+      "grad_norm": 0.11623166501522064,
+      "learning_rate": 0.0001770933756674845,
+      "loss": 0.1697,
+      "step": 7941
+    },
+    {
+      "epoch": 0.5730365453299181,
+      "grad_norm": 0.09506198763847351,
+      "learning_rate": 0.00017709048924808776,
+      "loss": 0.1252,
+      "step": 7942
+    },
+    {
+      "epoch": 0.5731086980049785,
+      "grad_norm": 0.1287769079208374,
+      "learning_rate": 0.00017708760282869103,
+      "loss": 0.1801,
+      "step": 7943
+    },
+    {
+      "epoch": 0.573180850680039,
+      "grad_norm": 0.1254848688840866,
+      "learning_rate": 0.00017708471640929426,
+      "loss": 0.0905,
+      "step": 7944
+    },
+    {
+      "epoch": 0.5732530033550994,
+      "grad_norm": 0.13968469202518463,
+      "learning_rate": 0.00017708182998989752,
+      "loss": 0.1201,
+      "step": 7945
+    },
+    {
+      "epoch": 0.5733251560301598,
+      "grad_norm": 0.14913301169872284,
+      "learning_rate": 0.00017707894357050081,
+      "loss": 0.1704,
+      "step": 7946
+    },
+    {
+      "epoch": 0.5733973087052202,
+      "grad_norm": 0.12298578768968582,
+      "learning_rate": 0.00017707605715110408,
+      "loss": 0.1661,
+      "step": 7947
+    },
+    {
+      "epoch": 0.5734694613802807,
+      "grad_norm": 0.13233782351016998,
+      "learning_rate": 0.00017707317073170734,
+      "loss": 0.1633,
+      "step": 7948
+    },
+    {
+      "epoch": 0.5735416140553411,
+      "grad_norm": 0.11294033378362656,
+      "learning_rate": 0.00017707028431231058,
+      "loss": 0.1812,
+      "step": 7949
+    },
+    {
+      "epoch": 0.5736137667304015,
+      "grad_norm": 0.11323747038841248,
+      "learning_rate": 0.00017706739789291384,
+      "loss": 0.1808,
+      "step": 7950
+    },
+    {
+      "epoch": 0.573685919405462,
+      "grad_norm": 0.11524073779582977,
+      "learning_rate": 0.0001770645114735171,
+      "loss": 0.1934,
+      "step": 7951
+    },
+    {
+      "epoch": 0.5737580720805224,
+      "grad_norm": 0.1528632491827011,
+      "learning_rate": 0.00017706162505412036,
+      "loss": 0.2212,
+      "step": 7952
+    },
+    {
+      "epoch": 0.5738302247555828,
+      "grad_norm": 0.11348945647478104,
+      "learning_rate": 0.00017705873863472365,
+      "loss": 0.1783,
+      "step": 7953
+    },
+    {
+      "epoch": 0.5739023774306432,
+      "grad_norm": 0.1193523034453392,
+      "learning_rate": 0.0001770558522153269,
+      "loss": 0.1534,
+      "step": 7954
+    },
+    {
+      "epoch": 0.5739745301057037,
+      "grad_norm": 0.10278993844985962,
+      "learning_rate": 0.00017705296579593015,
+      "loss": 0.1076,
+      "step": 7955
+    },
+    {
+      "epoch": 0.5740466827807641,
+      "grad_norm": 0.22504210472106934,
+      "learning_rate": 0.00017705007937653342,
+      "loss": 0.1806,
+      "step": 7956
+    },
+    {
+      "epoch": 0.5741188354558245,
+      "grad_norm": 0.13772232830524445,
+      "learning_rate": 0.00017704719295713668,
+      "loss": 0.1451,
+      "step": 7957
+    },
+    {
+      "epoch": 0.574190988130885,
+      "grad_norm": 0.12129385769367218,
+      "learning_rate": 0.00017704430653773994,
+      "loss": 0.1349,
+      "step": 7958
+    },
+    {
+      "epoch": 0.5742631408059454,
+      "grad_norm": 0.11908667534589767,
+      "learning_rate": 0.0001770414201183432,
+      "loss": 0.1441,
+      "step": 7959
+    },
+    {
+      "epoch": 0.5743352934810058,
+      "grad_norm": 0.13737215101718903,
+      "learning_rate": 0.00017703853369894647,
+      "loss": 0.1959,
+      "step": 7960
+    },
+    {
+      "epoch": 0.5744074461560662,
+      "grad_norm": 0.10954530537128448,
+      "learning_rate": 0.00017703564727954973,
+      "loss": 0.134,
+      "step": 7961
+    },
+    {
+      "epoch": 0.5744795988311266,
+      "grad_norm": 0.1645643413066864,
+      "learning_rate": 0.000177032760860153,
+      "loss": 0.1874,
+      "step": 7962
+    },
+    {
+      "epoch": 0.5745517515061871,
+      "grad_norm": 0.1279773712158203,
+      "learning_rate": 0.00017702987444075626,
+      "loss": 0.1334,
+      "step": 7963
+    },
+    {
+      "epoch": 0.5746239041812475,
+      "grad_norm": 0.17158547043800354,
+      "learning_rate": 0.00017702698802135952,
+      "loss": 0.1986,
+      "step": 7964
+    },
+    {
+      "epoch": 0.574696056856308,
+      "grad_norm": 0.1652517318725586,
+      "learning_rate": 0.00017702410160196276,
+      "loss": 0.1859,
+      "step": 7965
+    },
+    {
+      "epoch": 0.5747682095313684,
+      "grad_norm": 0.17374536395072937,
+      "learning_rate": 0.00017702121518256602,
+      "loss": 0.1251,
+      "step": 7966
+    },
+    {
+      "epoch": 0.5748403622064288,
+      "grad_norm": 0.1305515170097351,
+      "learning_rate": 0.0001770183287631693,
+      "loss": 0.1499,
+      "step": 7967
+    },
+    {
+      "epoch": 0.5749125148814892,
+      "grad_norm": 0.1515403687953949,
+      "learning_rate": 0.00017701544234377257,
+      "loss": 0.1328,
+      "step": 7968
+    },
+    {
+      "epoch": 0.5749846675565496,
+      "grad_norm": 0.13701438903808594,
+      "learning_rate": 0.00017701255592437583,
+      "loss": 0.1975,
+      "step": 7969
+    },
+    {
+      "epoch": 0.5750568202316101,
+      "grad_norm": 0.12394005060195923,
+      "learning_rate": 0.00017700966950497907,
+      "loss": 0.1507,
+      "step": 7970
+    },
+    {
+      "epoch": 0.5751289729066705,
+      "grad_norm": 0.11440756171941757,
+      "learning_rate": 0.00017700678308558233,
+      "loss": 0.117,
+      "step": 7971
+    },
+    {
+      "epoch": 0.575201125581731,
+      "grad_norm": 0.1346004754304886,
+      "learning_rate": 0.0001770038966661856,
+      "loss": 0.1944,
+      "step": 7972
+    },
+    {
+      "epoch": 0.5752732782567914,
+      "grad_norm": 0.10825420916080475,
+      "learning_rate": 0.00017700101024678886,
+      "loss": 0.1583,
+      "step": 7973
+    },
+    {
+      "epoch": 0.5753454309318518,
+      "grad_norm": 0.1305004507303238,
+      "learning_rate": 0.00017699812382739215,
+      "loss": 0.1128,
+      "step": 7974
+    },
+    {
+      "epoch": 0.5754175836069122,
+      "grad_norm": 0.11992324888706207,
+      "learning_rate": 0.00017699523740799538,
+      "loss": 0.1341,
+      "step": 7975
+    },
+    {
+      "epoch": 0.5754897362819726,
+      "grad_norm": 0.13553780317306519,
+      "learning_rate": 0.00017699235098859865,
+      "loss": 0.1642,
+      "step": 7976
+    },
+    {
+      "epoch": 0.5755618889570331,
+      "grad_norm": 0.13199329376220703,
+      "learning_rate": 0.0001769894645692019,
+      "loss": 0.1994,
+      "step": 7977
+    },
+    {
+      "epoch": 0.5756340416320935,
+      "grad_norm": 0.1123315766453743,
+      "learning_rate": 0.00017698657814980517,
+      "loss": 0.1362,
+      "step": 7978
+    },
+    {
+      "epoch": 0.575706194307154,
+      "grad_norm": 0.12229757755994797,
+      "learning_rate": 0.00017698369173040844,
+      "loss": 0.1647,
+      "step": 7979
+    },
+    {
+      "epoch": 0.5757783469822144,
+      "grad_norm": 0.1358393430709839,
+      "learning_rate": 0.0001769808053110117,
+      "loss": 0.1466,
+      "step": 7980
+    },
+    {
+      "epoch": 0.5758504996572748,
+      "grad_norm": 0.1132424995303154,
+      "learning_rate": 0.00017697791889161496,
+      "loss": 0.1159,
+      "step": 7981
+    },
+    {
+      "epoch": 0.5759226523323352,
+      "grad_norm": 0.15970008075237274,
+      "learning_rate": 0.00017697503247221823,
+      "loss": 0.2145,
+      "step": 7982
+    },
+    {
+      "epoch": 0.5759948050073956,
+      "grad_norm": 0.12362741678953171,
+      "learning_rate": 0.0001769721460528215,
+      "loss": 0.1255,
+      "step": 7983
+    },
+    {
+      "epoch": 0.5760669576824561,
+      "grad_norm": 0.12955516576766968,
+      "learning_rate": 0.00017696925963342475,
+      "loss": 0.1118,
+      "step": 7984
+    },
+    {
+      "epoch": 0.5761391103575165,
+      "grad_norm": 0.14107045531272888,
+      "learning_rate": 0.00017696637321402801,
+      "loss": 0.1415,
+      "step": 7985
+    },
+    {
+      "epoch": 0.576211263032577,
+      "grad_norm": 0.11059821397066116,
+      "learning_rate": 0.00017696348679463125,
+      "loss": 0.1957,
+      "step": 7986
+    },
+    {
+      "epoch": 0.5762834157076374,
+      "grad_norm": 0.13710936903953552,
+      "learning_rate": 0.0001769606003752345,
+      "loss": 0.1357,
+      "step": 7987
+    },
+    {
+      "epoch": 0.5763555683826977,
+      "grad_norm": 0.12946854531764984,
+      "learning_rate": 0.0001769577139558378,
+      "loss": 0.127,
+      "step": 7988
+    },
+    {
+      "epoch": 0.5764277210577582,
+      "grad_norm": 0.11622809618711472,
+      "learning_rate": 0.00017695482753644107,
+      "loss": 0.1296,
+      "step": 7989
+    },
+    {
+      "epoch": 0.5764998737328186,
+      "grad_norm": 0.11052305996417999,
+      "learning_rate": 0.00017695194111704433,
+      "loss": 0.1659,
+      "step": 7990
+    },
+    {
+      "epoch": 0.5765720264078791,
+      "grad_norm": 0.12174511700868607,
+      "learning_rate": 0.00017694905469764756,
+      "loss": 0.1529,
+      "step": 7991
+    },
+    {
+      "epoch": 0.5766441790829395,
+      "grad_norm": 0.1198398694396019,
+      "learning_rate": 0.00017694616827825083,
+      "loss": 0.1802,
+      "step": 7992
+    },
+    {
+      "epoch": 0.576716331758,
+      "grad_norm": 0.1096002608537674,
+      "learning_rate": 0.0001769432818588541,
+      "loss": 0.1478,
+      "step": 7993
+    },
+    {
+      "epoch": 0.5767884844330604,
+      "grad_norm": 0.15888899564743042,
+      "learning_rate": 0.00017694039543945735,
+      "loss": 0.1951,
+      "step": 7994
+    },
+    {
+      "epoch": 0.5768606371081207,
+      "grad_norm": 0.10436540842056274,
+      "learning_rate": 0.00017693750902006064,
+      "loss": 0.1599,
+      "step": 7995
+    },
+    {
+      "epoch": 0.5769327897831812,
+      "grad_norm": 0.14138226211071014,
+      "learning_rate": 0.00017693462260066388,
+      "loss": 0.1255,
+      "step": 7996
+    },
+    {
+      "epoch": 0.5770049424582416,
+      "grad_norm": 0.10548333823680878,
+      "learning_rate": 0.00017693173618126714,
+      "loss": 0.1171,
+      "step": 7997
+    },
+    {
+      "epoch": 0.5770770951333021,
+      "grad_norm": 0.14108656346797943,
+      "learning_rate": 0.0001769288497618704,
+      "loss": 0.1835,
+      "step": 7998
+    },
+    {
+      "epoch": 0.5771492478083625,
+      "grad_norm": 0.128933846950531,
+      "learning_rate": 0.00017692596334247367,
+      "loss": 0.1689,
+      "step": 7999
+    },
+    {
+      "epoch": 0.577221400483423,
+      "grad_norm": 0.10365033894777298,
+      "learning_rate": 0.00017692307692307693,
+      "loss": 0.1361,
+      "step": 8000
+    },
+    {
+      "epoch": 0.5772935531584833,
+      "grad_norm": 0.09977944195270538,
+      "learning_rate": 0.0001769201905036802,
+      "loss": 0.1758,
+      "step": 8001
+    },
+    {
+      "epoch": 0.5773657058335437,
+      "grad_norm": 0.13828906416893005,
+      "learning_rate": 0.00017691730408428346,
+      "loss": 0.1466,
+      "step": 8002
+    },
+    {
+      "epoch": 0.5774378585086042,
+      "grad_norm": 0.09375912696123123,
+      "learning_rate": 0.00017691441766488672,
+      "loss": 0.151,
+      "step": 8003
+    },
+    {
+      "epoch": 0.5775100111836646,
+      "grad_norm": 0.1094922423362732,
+      "learning_rate": 0.00017691153124548998,
+      "loss": 0.1221,
+      "step": 8004
+    },
+    {
+      "epoch": 0.5775821638587251,
+      "grad_norm": 0.12363706529140472,
+      "learning_rate": 0.00017690864482609325,
+      "loss": 0.2183,
+      "step": 8005
+    },
+    {
+      "epoch": 0.5776543165337855,
+      "grad_norm": 0.11408735066652298,
+      "learning_rate": 0.0001769057584066965,
+      "loss": 0.1519,
+      "step": 8006
+    },
+    {
+      "epoch": 0.577726469208846,
+      "grad_norm": 0.11651131510734558,
+      "learning_rate": 0.00017690287198729974,
+      "loss": 0.1517,
+      "step": 8007
+    },
+    {
+      "epoch": 0.5777986218839063,
+      "grad_norm": 0.1508703976869583,
+      "learning_rate": 0.000176899985567903,
+      "loss": 0.1454,
+      "step": 8008
+    },
+    {
+      "epoch": 0.5778707745589667,
+      "grad_norm": 0.13194362819194794,
+      "learning_rate": 0.0001768970991485063,
+      "loss": 0.1315,
+      "step": 8009
+    },
+    {
+      "epoch": 0.5779429272340272,
+      "grad_norm": 0.1377791166305542,
+      "learning_rate": 0.00017689421272910956,
+      "loss": 0.1828,
+      "step": 8010
+    },
+    {
+      "epoch": 0.5780150799090876,
+      "grad_norm": 0.14916688203811646,
+      "learning_rate": 0.00017689132630971282,
+      "loss": 0.1301,
+      "step": 8011
+    },
+    {
+      "epoch": 0.5780872325841481,
+      "grad_norm": 0.12122716754674911,
+      "learning_rate": 0.00017688843989031606,
+      "loss": 0.1358,
+      "step": 8012
+    },
+    {
+      "epoch": 0.5781593852592085,
+      "grad_norm": 0.10407043993473053,
+      "learning_rate": 0.00017688555347091932,
+      "loss": 0.1897,
+      "step": 8013
+    },
+    {
+      "epoch": 0.578231537934269,
+      "grad_norm": 0.13571223616600037,
+      "learning_rate": 0.00017688266705152258,
+      "loss": 0.1735,
+      "step": 8014
+    },
+    {
+      "epoch": 0.5783036906093293,
+      "grad_norm": 0.12706823647022247,
+      "learning_rate": 0.00017687978063212585,
+      "loss": 0.1405,
+      "step": 8015
+    },
+    {
+      "epoch": 0.5783758432843897,
+      "grad_norm": 0.12309744209051132,
+      "learning_rate": 0.00017687689421272914,
+      "loss": 0.1399,
+      "step": 8016
+    },
+    {
+      "epoch": 0.5784479959594502,
+      "grad_norm": 0.11440018564462662,
+      "learning_rate": 0.00017687400779333237,
+      "loss": 0.1656,
+      "step": 8017
+    },
+    {
+      "epoch": 0.5785201486345106,
+      "grad_norm": 0.11489548534154892,
+      "learning_rate": 0.00017687112137393564,
+      "loss": 0.1434,
+      "step": 8018
+    },
+    {
+      "epoch": 0.5785923013095711,
+      "grad_norm": 0.14805598556995392,
+      "learning_rate": 0.0001768682349545389,
+      "loss": 0.2329,
+      "step": 8019
+    },
+    {
+      "epoch": 0.5786644539846315,
+      "grad_norm": 0.13587909936904907,
+      "learning_rate": 0.00017686534853514216,
+      "loss": 0.1575,
+      "step": 8020
+    },
+    {
+      "epoch": 0.578736606659692,
+      "grad_norm": 0.1194760873913765,
+      "learning_rate": 0.00017686246211574542,
+      "loss": 0.1489,
+      "step": 8021
+    },
+    {
+      "epoch": 0.5788087593347523,
+      "grad_norm": 0.1006384789943695,
+      "learning_rate": 0.0001768595756963487,
+      "loss": 0.1388,
+      "step": 8022
+    },
+    {
+      "epoch": 0.5788809120098127,
+      "grad_norm": 0.11905606836080551,
+      "learning_rate": 0.00017685668927695195,
+      "loss": 0.1608,
+      "step": 8023
+    },
+    {
+      "epoch": 0.5789530646848732,
+      "grad_norm": 0.10487359762191772,
+      "learning_rate": 0.0001768538028575552,
+      "loss": 0.1172,
+      "step": 8024
+    },
+    {
+      "epoch": 0.5790252173599336,
+      "grad_norm": 0.13513118028640747,
+      "learning_rate": 0.00017685091643815848,
+      "loss": 0.167,
+      "step": 8025
+    },
+    {
+      "epoch": 0.5790973700349941,
+      "grad_norm": 0.13310585916042328,
+      "learning_rate": 0.00017684803001876174,
+      "loss": 0.1645,
+      "step": 8026
+    },
+    {
+      "epoch": 0.5791695227100545,
+      "grad_norm": 0.12719425559043884,
+      "learning_rate": 0.000176845143599365,
+      "loss": 0.1273,
+      "step": 8027
+    },
+    {
+      "epoch": 0.579241675385115,
+      "grad_norm": 0.153107151389122,
+      "learning_rate": 0.00017684225717996824,
+      "loss": 0.1389,
+      "step": 8028
+    },
+    {
+      "epoch": 0.5793138280601753,
+      "grad_norm": 0.15313667058944702,
+      "learning_rate": 0.0001768393707605715,
+      "loss": 0.1454,
+      "step": 8029
+    },
+    {
+      "epoch": 0.5793859807352357,
+      "grad_norm": 0.12298291176557541,
+      "learning_rate": 0.0001768364843411748,
+      "loss": 0.1811,
+      "step": 8030
+    },
+    {
+      "epoch": 0.5794581334102962,
+      "grad_norm": 0.11076110601425171,
+      "learning_rate": 0.00017683359792177805,
+      "loss": 0.1416,
+      "step": 8031
+    },
+    {
+      "epoch": 0.5795302860853566,
+      "grad_norm": 0.10948219895362854,
+      "learning_rate": 0.00017683071150238132,
+      "loss": 0.1283,
+      "step": 8032
+    },
+    {
+      "epoch": 0.5796024387604171,
+      "grad_norm": 0.11751320958137512,
+      "learning_rate": 0.00017682782508298455,
+      "loss": 0.1924,
+      "step": 8033
+    },
+    {
+      "epoch": 0.5796745914354775,
+      "grad_norm": 0.13950355350971222,
+      "learning_rate": 0.00017682493866358782,
+      "loss": 0.1472,
+      "step": 8034
+    },
+    {
+      "epoch": 0.579746744110538,
+      "grad_norm": 0.13754689693450928,
+      "learning_rate": 0.00017682205224419108,
+      "loss": 0.1325,
+      "step": 8035
+    },
+    {
+      "epoch": 0.5798188967855983,
+      "grad_norm": 0.1298028528690338,
+      "learning_rate": 0.00017681916582479434,
+      "loss": 0.141,
+      "step": 8036
+    },
+    {
+      "epoch": 0.5798910494606587,
+      "grad_norm": 0.11332829296588898,
+      "learning_rate": 0.00017681627940539763,
+      "loss": 0.1247,
+      "step": 8037
+    },
+    {
+      "epoch": 0.5799632021357192,
+      "grad_norm": 0.11405795067548752,
+      "learning_rate": 0.00017681339298600087,
+      "loss": 0.1323,
+      "step": 8038
+    },
+    {
+      "epoch": 0.5800353548107796,
+      "grad_norm": 0.1341920644044876,
+      "learning_rate": 0.00017681050656660413,
+      "loss": 0.174,
+      "step": 8039
+    },
+    {
+      "epoch": 0.5801075074858401,
+      "grad_norm": 0.12394584715366364,
+      "learning_rate": 0.0001768076201472074,
+      "loss": 0.1567,
+      "step": 8040
+    },
+    {
+      "epoch": 0.5801796601609005,
+      "grad_norm": 0.1091771200299263,
+      "learning_rate": 0.00017680473372781066,
+      "loss": 0.1574,
+      "step": 8041
+    },
+    {
+      "epoch": 0.580251812835961,
+      "grad_norm": 0.1096918135881424,
+      "learning_rate": 0.00017680184730841392,
+      "loss": 0.1164,
+      "step": 8042
+    },
+    {
+      "epoch": 0.5803239655110213,
+      "grad_norm": 0.13219282031059265,
+      "learning_rate": 0.00017679896088901718,
+      "loss": 0.1471,
+      "step": 8043
+    },
+    {
+      "epoch": 0.5803961181860817,
+      "grad_norm": 0.14074230194091797,
+      "learning_rate": 0.00017679607446962044,
+      "loss": 0.1854,
+      "step": 8044
+    },
+    {
+      "epoch": 0.5804682708611422,
+      "grad_norm": 0.11911468207836151,
+      "learning_rate": 0.0001767931880502237,
+      "loss": 0.1105,
+      "step": 8045
+    },
+    {
+      "epoch": 0.5805404235362026,
+      "grad_norm": 0.15260189771652222,
+      "learning_rate": 0.00017679030163082697,
+      "loss": 0.1359,
+      "step": 8046
+    },
+    {
+      "epoch": 0.580612576211263,
+      "grad_norm": 0.12161078304052353,
+      "learning_rate": 0.00017678741521143023,
+      "loss": 0.1979,
+      "step": 8047
+    },
+    {
+      "epoch": 0.5806847288863235,
+      "grad_norm": 0.1545887142419815,
+      "learning_rate": 0.0001767845287920335,
+      "loss": 0.1833,
+      "step": 8048
+    },
+    {
+      "epoch": 0.5807568815613839,
+      "grad_norm": 0.10799109935760498,
+      "learning_rate": 0.00017678164237263676,
+      "loss": 0.1778,
+      "step": 8049
+    },
+    {
+      "epoch": 0.5808290342364443,
+      "grad_norm": 0.11913906037807465,
+      "learning_rate": 0.00017677875595324,
+      "loss": 0.1342,
+      "step": 8050
+    },
+    {
+      "epoch": 0.5809011869115047,
+      "grad_norm": 0.14187337458133698,
+      "learning_rate": 0.00017677586953384329,
+      "loss": 0.1379,
+      "step": 8051
+    },
+    {
+      "epoch": 0.5809733395865652,
+      "grad_norm": 0.13161632418632507,
+      "learning_rate": 0.00017677298311444655,
+      "loss": 0.1695,
+      "step": 8052
+    },
+    {
+      "epoch": 0.5810454922616256,
+      "grad_norm": 0.09995569288730621,
+      "learning_rate": 0.0001767700966950498,
+      "loss": 0.1552,
+      "step": 8053
+    },
+    {
+      "epoch": 0.581117644936686,
+      "grad_norm": 0.24291056394577026,
+      "learning_rate": 0.00017676721027565307,
+      "loss": 0.1736,
+      "step": 8054
+    },
+    {
+      "epoch": 0.5811897976117465,
+      "grad_norm": 0.12517669796943665,
+      "learning_rate": 0.0001767643238562563,
+      "loss": 0.1713,
+      "step": 8055
+    },
+    {
+      "epoch": 0.5812619502868069,
+      "grad_norm": 0.12074005603790283,
+      "learning_rate": 0.00017676143743685957,
+      "loss": 0.1906,
+      "step": 8056
+    },
+    {
+      "epoch": 0.5813341029618673,
+      "grad_norm": 0.11038137227296829,
+      "learning_rate": 0.00017675855101746284,
+      "loss": 0.2014,
+      "step": 8057
+    },
+    {
+      "epoch": 0.5814062556369277,
+      "grad_norm": 0.10443025827407837,
+      "learning_rate": 0.00017675566459806613,
+      "loss": 0.1752,
+      "step": 8058
+    },
+    {
+      "epoch": 0.5814784083119882,
+      "grad_norm": 0.10579415410757065,
+      "learning_rate": 0.0001767527781786694,
+      "loss": 0.1253,
+      "step": 8059
+    },
+    {
+      "epoch": 0.5815505609870486,
+      "grad_norm": 0.11671837419271469,
+      "learning_rate": 0.00017674989175927262,
+      "loss": 0.1062,
+      "step": 8060
+    },
+    {
+      "epoch": 0.581622713662109,
+      "grad_norm": 0.11670401692390442,
+      "learning_rate": 0.0001767470053398759,
+      "loss": 0.1615,
+      "step": 8061
+    },
+    {
+      "epoch": 0.5816948663371695,
+      "grad_norm": 0.1220296323299408,
+      "learning_rate": 0.00017674411892047915,
+      "loss": 0.1762,
+      "step": 8062
+    },
+    {
+      "epoch": 0.5817670190122298,
+      "grad_norm": 0.12593242526054382,
+      "learning_rate": 0.0001767412325010824,
+      "loss": 0.1555,
+      "step": 8063
+    },
+    {
+      "epoch": 0.5818391716872903,
+      "grad_norm": 0.11044905334711075,
+      "learning_rate": 0.00017673834608168568,
+      "loss": 0.1762,
+      "step": 8064
+    },
+    {
+      "epoch": 0.5819113243623507,
+      "grad_norm": 0.12676940858364105,
+      "learning_rate": 0.00017673545966228894,
+      "loss": 0.1826,
+      "step": 8065
+    },
+    {
+      "epoch": 0.5819834770374112,
+      "grad_norm": 0.12122169137001038,
+      "learning_rate": 0.0001767325732428922,
+      "loss": 0.1479,
+      "step": 8066
+    },
+    {
+      "epoch": 0.5820556297124716,
+      "grad_norm": 0.11513711512088776,
+      "learning_rate": 0.00017672968682349546,
+      "loss": 0.1332,
+      "step": 8067
+    },
+    {
+      "epoch": 0.582127782387532,
+      "grad_norm": 0.13474337756633759,
+      "learning_rate": 0.00017672680040409873,
+      "loss": 0.1417,
+      "step": 8068
+    },
+    {
+      "epoch": 0.5821999350625925,
+      "grad_norm": 0.11347810924053192,
+      "learning_rate": 0.000176723913984702,
+      "loss": 0.1159,
+      "step": 8069
+    },
+    {
+      "epoch": 0.5822720877376528,
+      "grad_norm": 0.1329052895307541,
+      "learning_rate": 0.00017672102756530525,
+      "loss": 0.1467,
+      "step": 8070
+    },
+    {
+      "epoch": 0.5823442404127133,
+      "grad_norm": 0.1284935027360916,
+      "learning_rate": 0.0001767181411459085,
+      "loss": 0.2003,
+      "step": 8071
+    },
+    {
+      "epoch": 0.5824163930877737,
+      "grad_norm": 0.11546535044908524,
+      "learning_rate": 0.00017671525472651178,
+      "loss": 0.1359,
+      "step": 8072
+    },
+    {
+      "epoch": 0.5824885457628342,
+      "grad_norm": 0.12746301293373108,
+      "learning_rate": 0.00017671236830711504,
+      "loss": 0.1333,
+      "step": 8073
+    },
+    {
+      "epoch": 0.5825606984378946,
+      "grad_norm": 0.13880078494548798,
+      "learning_rate": 0.0001767094818877183,
+      "loss": 0.1537,
+      "step": 8074
+    },
+    {
+      "epoch": 0.582632851112955,
+      "grad_norm": 0.11027500033378601,
+      "learning_rate": 0.00017670659546832157,
+      "loss": 0.1878,
+      "step": 8075
+    },
+    {
+      "epoch": 0.5827050037880155,
+      "grad_norm": 0.12961946427822113,
+      "learning_rate": 0.0001767037090489248,
+      "loss": 0.196,
+      "step": 8076
+    },
+    {
+      "epoch": 0.5827771564630758,
+      "grad_norm": 0.1460336446762085,
+      "learning_rate": 0.00017670082262952807,
+      "loss": 0.2021,
+      "step": 8077
+    },
+    {
+      "epoch": 0.5828493091381363,
+      "grad_norm": 0.13091175258159637,
+      "learning_rate": 0.00017669793621013133,
+      "loss": 0.1604,
+      "step": 8078
+    },
+    {
+      "epoch": 0.5829214618131967,
+      "grad_norm": 0.11825500428676605,
+      "learning_rate": 0.00017669504979073462,
+      "loss": 0.1935,
+      "step": 8079
+    },
+    {
+      "epoch": 0.5829936144882572,
+      "grad_norm": 0.13889840245246887,
+      "learning_rate": 0.00017669216337133788,
+      "loss": 0.132,
+      "step": 8080
+    },
+    {
+      "epoch": 0.5830657671633176,
+      "grad_norm": 0.09815270453691483,
+      "learning_rate": 0.00017668927695194112,
+      "loss": 0.1476,
+      "step": 8081
+    },
+    {
+      "epoch": 0.583137919838378,
+      "grad_norm": 0.12321629375219345,
+      "learning_rate": 0.00017668639053254438,
+      "loss": 0.149,
+      "step": 8082
+    },
+    {
+      "epoch": 0.5832100725134385,
+      "grad_norm": 0.10414538532495499,
+      "learning_rate": 0.00017668350411314764,
+      "loss": 0.1447,
+      "step": 8083
+    },
+    {
+      "epoch": 0.5832822251884988,
+      "grad_norm": 0.1281728446483612,
+      "learning_rate": 0.0001766806176937509,
+      "loss": 0.1721,
+      "step": 8084
+    },
+    {
+      "epoch": 0.5833543778635593,
+      "grad_norm": 0.12013985216617584,
+      "learning_rate": 0.00017667773127435417,
+      "loss": 0.1812,
+      "step": 8085
+    },
+    {
+      "epoch": 0.5834265305386197,
+      "grad_norm": 0.1369110494852066,
+      "learning_rate": 0.00017667484485495743,
+      "loss": 0.1566,
+      "step": 8086
+    },
+    {
+      "epoch": 0.5834986832136801,
+      "grad_norm": 0.12445517629384995,
+      "learning_rate": 0.0001766719584355607,
+      "loss": 0.1898,
+      "step": 8087
+    },
+    {
+      "epoch": 0.5835708358887406,
+      "grad_norm": 0.13069842755794525,
+      "learning_rate": 0.00017666907201616396,
+      "loss": 0.1904,
+      "step": 8088
+    },
+    {
+      "epoch": 0.583642988563801,
+      "grad_norm": 0.16488301753997803,
+      "learning_rate": 0.00017666618559676722,
+      "loss": 0.1951,
+      "step": 8089
+    },
+    {
+      "epoch": 0.5837151412388615,
+      "grad_norm": 0.09553637355566025,
+      "learning_rate": 0.00017666329917737049,
+      "loss": 0.1271,
+      "step": 8090
+    },
+    {
+      "epoch": 0.5837872939139218,
+      "grad_norm": 0.1081295982003212,
+      "learning_rate": 0.00017666041275797375,
+      "loss": 0.1486,
+      "step": 8091
+    },
+    {
+      "epoch": 0.5838594465889823,
+      "grad_norm": 0.10838132351636887,
+      "learning_rate": 0.00017665752633857698,
+      "loss": 0.1583,
+      "step": 8092
+    },
+    {
+      "epoch": 0.5839315992640427,
+      "grad_norm": 0.12616951763629913,
+      "learning_rate": 0.00017665463991918027,
+      "loss": 0.1385,
+      "step": 8093
+    },
+    {
+      "epoch": 0.5840037519391031,
+      "grad_norm": 0.128948375582695,
+      "learning_rate": 0.00017665175349978354,
+      "loss": 0.18,
+      "step": 8094
+    },
+    {
+      "epoch": 0.5840759046141636,
+      "grad_norm": 0.10365475714206696,
+      "learning_rate": 0.0001766488670803868,
+      "loss": 0.1544,
+      "step": 8095
+    },
+    {
+      "epoch": 0.584148057289224,
+      "grad_norm": 0.10040339082479477,
+      "learning_rate": 0.00017664598066099006,
+      "loss": 0.1165,
+      "step": 8096
+    },
+    {
+      "epoch": 0.5842202099642845,
+      "grad_norm": 0.0983489602804184,
+      "learning_rate": 0.0001766430942415933,
+      "loss": 0.1268,
+      "step": 8097
+    },
+    {
+      "epoch": 0.5842923626393448,
+      "grad_norm": 0.13310272991657257,
+      "learning_rate": 0.00017664020782219656,
+      "loss": 0.2079,
+      "step": 8098
+    },
+    {
+      "epoch": 0.5843645153144053,
+      "grad_norm": 0.1181897446513176,
+      "learning_rate": 0.00017663732140279982,
+      "loss": 0.1429,
+      "step": 8099
+    },
+    {
+      "epoch": 0.5844366679894657,
+      "grad_norm": 0.1250694841146469,
+      "learning_rate": 0.00017663443498340311,
+      "loss": 0.1173,
+      "step": 8100
+    },
+    {
+      "epoch": 0.5845088206645261,
+      "grad_norm": 0.14011278748512268,
+      "learning_rate": 0.00017663154856400638,
+      "loss": 0.134,
+      "step": 8101
+    },
+    {
+      "epoch": 0.5845809733395866,
+      "grad_norm": 0.12558667361736298,
+      "learning_rate": 0.0001766286621446096,
+      "loss": 0.1352,
+      "step": 8102
+    },
+    {
+      "epoch": 0.584653126014647,
+      "grad_norm": 0.14681829512119293,
+      "learning_rate": 0.00017662577572521288,
+      "loss": 0.1809,
+      "step": 8103
+    },
+    {
+      "epoch": 0.5847252786897075,
+      "grad_norm": 0.13540738821029663,
+      "learning_rate": 0.00017662288930581614,
+      "loss": 0.1622,
+      "step": 8104
+    },
+    {
+      "epoch": 0.5847974313647678,
+      "grad_norm": 0.13094480335712433,
+      "learning_rate": 0.0001766200028864194,
+      "loss": 0.1603,
+      "step": 8105
+    },
+    {
+      "epoch": 0.5848695840398282,
+      "grad_norm": 0.14726389944553375,
+      "learning_rate": 0.00017661711646702266,
+      "loss": 0.1949,
+      "step": 8106
+    },
+    {
+      "epoch": 0.5849417367148887,
+      "grad_norm": 0.1353321075439453,
+      "learning_rate": 0.00017661423004762593,
+      "loss": 0.1302,
+      "step": 8107
+    },
+    {
+      "epoch": 0.5850138893899491,
+      "grad_norm": 0.13002312183380127,
+      "learning_rate": 0.0001766113436282292,
+      "loss": 0.1433,
+      "step": 8108
+    },
+    {
+      "epoch": 0.5850860420650096,
+      "grad_norm": 0.14790894091129303,
+      "learning_rate": 0.00017660845720883245,
+      "loss": 0.1423,
+      "step": 8109
+    },
+    {
+      "epoch": 0.58515819474007,
+      "grad_norm": 0.11800483614206314,
+      "learning_rate": 0.00017660557078943572,
+      "loss": 0.1602,
+      "step": 8110
+    },
+    {
+      "epoch": 0.5852303474151305,
+      "grad_norm": 0.09049563109874725,
+      "learning_rate": 0.00017660268437003898,
+      "loss": 0.1143,
+      "step": 8111
+    },
+    {
+      "epoch": 0.5853025000901908,
+      "grad_norm": 0.10751941800117493,
+      "learning_rate": 0.00017659979795064224,
+      "loss": 0.176,
+      "step": 8112
+    },
+    {
+      "epoch": 0.5853746527652512,
+      "grad_norm": 0.10051951557397842,
+      "learning_rate": 0.00017659691153124548,
+      "loss": 0.1714,
+      "step": 8113
+    },
+    {
+      "epoch": 0.5854468054403117,
+      "grad_norm": 0.11118445545434952,
+      "learning_rate": 0.00017659402511184877,
+      "loss": 0.1561,
+      "step": 8114
+    },
+    {
+      "epoch": 0.5855189581153721,
+      "grad_norm": 0.09477000683546066,
+      "learning_rate": 0.00017659113869245203,
+      "loss": 0.1209,
+      "step": 8115
+    },
+    {
+      "epoch": 0.5855911107904326,
+      "grad_norm": 0.09346623718738556,
+      "learning_rate": 0.0001765882522730553,
+      "loss": 0.1768,
+      "step": 8116
+    },
+    {
+      "epoch": 0.585663263465493,
+      "grad_norm": 0.1038440689444542,
+      "learning_rate": 0.00017658536585365856,
+      "loss": 0.0868,
+      "step": 8117
+    },
+    {
+      "epoch": 0.5857354161405535,
+      "grad_norm": 0.12548695504665375,
+      "learning_rate": 0.0001765824794342618,
+      "loss": 0.1595,
+      "step": 8118
+    },
+    {
+      "epoch": 0.5858075688156138,
+      "grad_norm": 0.10288131237030029,
+      "learning_rate": 0.00017657959301486506,
+      "loss": 0.1708,
+      "step": 8119
+    },
+    {
+      "epoch": 0.5858797214906742,
+      "grad_norm": 0.12666648626327515,
+      "learning_rate": 0.00017657670659546832,
+      "loss": 0.1288,
+      "step": 8120
+    },
+    {
+      "epoch": 0.5859518741657347,
+      "grad_norm": 0.12510676681995392,
+      "learning_rate": 0.0001765738201760716,
+      "loss": 0.1497,
+      "step": 8121
+    },
+    {
+      "epoch": 0.5860240268407951,
+      "grad_norm": 0.13701088726520538,
+      "learning_rate": 0.00017657093375667487,
+      "loss": 0.121,
+      "step": 8122
+    },
+    {
+      "epoch": 0.5860961795158556,
+      "grad_norm": 0.1612546443939209,
+      "learning_rate": 0.0001765680473372781,
+      "loss": 0.1468,
+      "step": 8123
+    },
+    {
+      "epoch": 0.586168332190916,
+      "grad_norm": 0.1792639046907425,
+      "learning_rate": 0.00017656516091788137,
+      "loss": 0.1617,
+      "step": 8124
+    },
+    {
+      "epoch": 0.5862404848659764,
+      "grad_norm": 0.11944622546434402,
+      "learning_rate": 0.00017656227449848463,
+      "loss": 0.1339,
+      "step": 8125
+    },
+    {
+      "epoch": 0.5863126375410368,
+      "grad_norm": 0.1193036362528801,
+      "learning_rate": 0.0001765593880790879,
+      "loss": 0.1702,
+      "step": 8126
+    },
+    {
+      "epoch": 0.5863847902160972,
+      "grad_norm": 0.1043739914894104,
+      "learning_rate": 0.00017655650165969116,
+      "loss": 0.1453,
+      "step": 8127
+    },
+    {
+      "epoch": 0.5864569428911577,
+      "grad_norm": 0.08376028388738632,
+      "learning_rate": 0.00017655361524029442,
+      "loss": 0.1438,
+      "step": 8128
+    },
+    {
+      "epoch": 0.5865290955662181,
+      "grad_norm": 0.11524992436170578,
+      "learning_rate": 0.00017655072882089768,
+      "loss": 0.17,
+      "step": 8129
+    },
+    {
+      "epoch": 0.5866012482412786,
+      "grad_norm": 0.09101670235395432,
+      "learning_rate": 0.00017654784240150095,
+      "loss": 0.1545,
+      "step": 8130
+    },
+    {
+      "epoch": 0.586673400916339,
+      "grad_norm": 0.12667500972747803,
+      "learning_rate": 0.0001765449559821042,
+      "loss": 0.2263,
+      "step": 8131
+    },
+    {
+      "epoch": 0.5867455535913993,
+      "grad_norm": 0.11871380358934402,
+      "learning_rate": 0.00017654206956270747,
+      "loss": 0.174,
+      "step": 8132
+    },
+    {
+      "epoch": 0.5868177062664598,
+      "grad_norm": 0.09008299559354782,
+      "learning_rate": 0.00017653918314331074,
+      "loss": 0.1534,
+      "step": 8133
+    },
+    {
+      "epoch": 0.5868898589415202,
+      "grad_norm": 0.12811166048049927,
+      "learning_rate": 0.00017653629672391397,
+      "loss": 0.1238,
+      "step": 8134
+    },
+    {
+      "epoch": 0.5869620116165807,
+      "grad_norm": 0.12848630547523499,
+      "learning_rate": 0.00017653341030451724,
+      "loss": 0.226,
+      "step": 8135
+    },
+    {
+      "epoch": 0.5870341642916411,
+      "grad_norm": 0.11928470432758331,
+      "learning_rate": 0.00017653052388512053,
+      "loss": 0.1446,
+      "step": 8136
+    },
+    {
+      "epoch": 0.5871063169667016,
+      "grad_norm": 0.1059202253818512,
+      "learning_rate": 0.0001765276374657238,
+      "loss": 0.1667,
+      "step": 8137
+    },
+    {
+      "epoch": 0.587178469641762,
+      "grad_norm": 0.10207483917474747,
+      "learning_rate": 0.00017652475104632705,
+      "loss": 0.1564,
+      "step": 8138
+    },
+    {
+      "epoch": 0.5872506223168223,
+      "grad_norm": 0.1323583722114563,
+      "learning_rate": 0.0001765218646269303,
+      "loss": 0.1715,
+      "step": 8139
+    },
+    {
+      "epoch": 0.5873227749918828,
+      "grad_norm": 0.14230741560459137,
+      "learning_rate": 0.00017651897820753355,
+      "loss": 0.1531,
+      "step": 8140
+    },
+    {
+      "epoch": 0.5873949276669432,
+      "grad_norm": 0.17423692345619202,
+      "learning_rate": 0.0001765160917881368,
+      "loss": 0.1823,
+      "step": 8141
+    },
+    {
+      "epoch": 0.5874670803420037,
+      "grad_norm": 0.13893862068653107,
+      "learning_rate": 0.00017651320536874008,
+      "loss": 0.1552,
+      "step": 8142
+    },
+    {
+      "epoch": 0.5875392330170641,
+      "grad_norm": 0.13696856796741486,
+      "learning_rate": 0.00017651031894934337,
+      "loss": 0.1557,
+      "step": 8143
+    },
+    {
+      "epoch": 0.5876113856921246,
+      "grad_norm": 0.14471766352653503,
+      "learning_rate": 0.0001765074325299466,
+      "loss": 0.1854,
+      "step": 8144
+    },
+    {
+      "epoch": 0.587683538367185,
+      "grad_norm": 0.124466173350811,
+      "learning_rate": 0.00017650454611054986,
+      "loss": 0.168,
+      "step": 8145
+    },
+    {
+      "epoch": 0.5877556910422453,
+      "grad_norm": 0.12519100308418274,
+      "learning_rate": 0.00017650165969115313,
+      "loss": 0.144,
+      "step": 8146
+    },
+    {
+      "epoch": 0.5878278437173058,
+      "grad_norm": 0.12249601632356644,
+      "learning_rate": 0.0001764987732717564,
+      "loss": 0.1837,
+      "step": 8147
+    },
+    {
+      "epoch": 0.5878999963923662,
+      "grad_norm": 0.10754699259996414,
+      "learning_rate": 0.00017649588685235965,
+      "loss": 0.1643,
+      "step": 8148
+    },
+    {
+      "epoch": 0.5879721490674267,
+      "grad_norm": 0.12035319954156876,
+      "learning_rate": 0.00017649300043296292,
+      "loss": 0.157,
+      "step": 8149
+    },
+    {
+      "epoch": 0.5880443017424871,
+      "grad_norm": 0.12663905322551727,
+      "learning_rate": 0.00017649011401356618,
+      "loss": 0.1624,
+      "step": 8150
+    },
+    {
+      "epoch": 0.5881164544175476,
+      "grad_norm": 0.11968550831079483,
+      "learning_rate": 0.00017648722759416944,
+      "loss": 0.1206,
+      "step": 8151
+    },
+    {
+      "epoch": 0.588188607092608,
+      "grad_norm": 0.11833333969116211,
+      "learning_rate": 0.0001764843411747727,
+      "loss": 0.1622,
+      "step": 8152
+    },
+    {
+      "epoch": 0.5882607597676683,
+      "grad_norm": 0.13024753332138062,
+      "learning_rate": 0.00017648145475537597,
+      "loss": 0.1178,
+      "step": 8153
+    },
+    {
+      "epoch": 0.5883329124427288,
+      "grad_norm": 0.11432848125696182,
+      "learning_rate": 0.00017647856833597923,
+      "loss": 0.138,
+      "step": 8154
+    },
+    {
+      "epoch": 0.5884050651177892,
+      "grad_norm": 0.1305474489927292,
+      "learning_rate": 0.0001764756819165825,
+      "loss": 0.1701,
+      "step": 8155
+    },
+    {
+      "epoch": 0.5884772177928497,
+      "grad_norm": 0.10625302791595459,
+      "learning_rate": 0.00017647279549718573,
+      "loss": 0.1256,
+      "step": 8156
+    },
+    {
+      "epoch": 0.5885493704679101,
+      "grad_norm": 0.155123770236969,
+      "learning_rate": 0.00017646990907778902,
+      "loss": 0.1649,
+      "step": 8157
+    },
+    {
+      "epoch": 0.5886215231429706,
+      "grad_norm": 0.10322414338588715,
+      "learning_rate": 0.00017646702265839228,
+      "loss": 0.1806,
+      "step": 8158
+    },
+    {
+      "epoch": 0.588693675818031,
+      "grad_norm": 0.1403033435344696,
+      "learning_rate": 0.00017646413623899555,
+      "loss": 0.1803,
+      "step": 8159
+    },
+    {
+      "epoch": 0.5887658284930913,
+      "grad_norm": 0.16688910126686096,
+      "learning_rate": 0.0001764612498195988,
+      "loss": 0.1431,
+      "step": 8160
+    },
+    {
+      "epoch": 0.5888379811681518,
+      "grad_norm": 0.12197678536176682,
+      "learning_rate": 0.00017645836340020204,
+      "loss": 0.0995,
+      "step": 8161
+    },
+    {
+      "epoch": 0.5889101338432122,
+      "grad_norm": 0.12910448014736176,
+      "learning_rate": 0.0001764554769808053,
+      "loss": 0.105,
+      "step": 8162
+    },
+    {
+      "epoch": 0.5889822865182727,
+      "grad_norm": 0.12313324213027954,
+      "learning_rate": 0.00017645259056140857,
+      "loss": 0.167,
+      "step": 8163
+    },
+    {
+      "epoch": 0.5890544391933331,
+      "grad_norm": 0.14233951270580292,
+      "learning_rate": 0.00017644970414201186,
+      "loss": 0.1482,
+      "step": 8164
+    },
+    {
+      "epoch": 0.5891265918683936,
+      "grad_norm": 0.11455697566270828,
+      "learning_rate": 0.00017644681772261512,
+      "loss": 0.1856,
+      "step": 8165
+    },
+    {
+      "epoch": 0.589198744543454,
+      "grad_norm": 0.13419370353221893,
+      "learning_rate": 0.00017644393130321836,
+      "loss": 0.1675,
+      "step": 8166
+    },
+    {
+      "epoch": 0.5892708972185143,
+      "grad_norm": 0.1370537281036377,
+      "learning_rate": 0.00017644104488382162,
+      "loss": 0.1323,
+      "step": 8167
+    },
+    {
+      "epoch": 0.5893430498935748,
+      "grad_norm": 0.1115809828042984,
+      "learning_rate": 0.00017643815846442488,
+      "loss": 0.1097,
+      "step": 8168
+    },
+    {
+      "epoch": 0.5894152025686352,
+      "grad_norm": 0.13678720593452454,
+      "learning_rate": 0.00017643527204502815,
+      "loss": 0.1977,
+      "step": 8169
+    },
+    {
+      "epoch": 0.5894873552436957,
+      "grad_norm": 0.11522739380598068,
+      "learning_rate": 0.0001764323856256314,
+      "loss": 0.19,
+      "step": 8170
+    },
+    {
+      "epoch": 0.5895595079187561,
+      "grad_norm": 0.11245250701904297,
+      "learning_rate": 0.00017642949920623467,
+      "loss": 0.1756,
+      "step": 8171
+    },
+    {
+      "epoch": 0.5896316605938166,
+      "grad_norm": 0.13633395731449127,
+      "learning_rate": 0.00017642661278683794,
+      "loss": 0.1782,
+      "step": 8172
+    },
+    {
+      "epoch": 0.589703813268877,
+      "grad_norm": 0.12719576060771942,
+      "learning_rate": 0.0001764237263674412,
+      "loss": 0.1714,
+      "step": 8173
+    },
+    {
+      "epoch": 0.5897759659439373,
+      "grad_norm": 0.107512466609478,
+      "learning_rate": 0.00017642083994804446,
+      "loss": 0.1105,
+      "step": 8174
+    },
+    {
+      "epoch": 0.5898481186189978,
+      "grad_norm": 0.13118325173854828,
+      "learning_rate": 0.00017641795352864772,
+      "loss": 0.1097,
+      "step": 8175
+    },
+    {
+      "epoch": 0.5899202712940582,
+      "grad_norm": 0.1463913917541504,
+      "learning_rate": 0.000176415067109251,
+      "loss": 0.1743,
+      "step": 8176
+    },
+    {
+      "epoch": 0.5899924239691187,
+      "grad_norm": 0.1269274204969406,
+      "learning_rate": 0.00017641218068985422,
+      "loss": 0.138,
+      "step": 8177
+    },
+    {
+      "epoch": 0.5900645766441791,
+      "grad_norm": 0.12935011088848114,
+      "learning_rate": 0.00017640929427045751,
+      "loss": 0.1528,
+      "step": 8178
+    },
+    {
+      "epoch": 0.5901367293192396,
+      "grad_norm": 0.10900428891181946,
+      "learning_rate": 0.00017640640785106078,
+      "loss": 0.1439,
+      "step": 8179
+    },
+    {
+      "epoch": 0.5902088819942999,
+      "grad_norm": 0.13429750502109528,
+      "learning_rate": 0.00017640352143166404,
+      "loss": 0.1801,
+      "step": 8180
+    },
+    {
+      "epoch": 0.5902810346693603,
+      "grad_norm": 0.14954394102096558,
+      "learning_rate": 0.0001764006350122673,
+      "loss": 0.1817,
+      "step": 8181
+    },
+    {
+      "epoch": 0.5903531873444208,
+      "grad_norm": 0.13643895089626312,
+      "learning_rate": 0.00017639774859287054,
+      "loss": 0.1868,
+      "step": 8182
+    },
+    {
+      "epoch": 0.5904253400194812,
+      "grad_norm": 0.14662951231002808,
+      "learning_rate": 0.0001763948621734738,
+      "loss": 0.1059,
+      "step": 8183
+    },
+    {
+      "epoch": 0.5904974926945417,
+      "grad_norm": 0.1294011026620865,
+      "learning_rate": 0.00017639197575407706,
+      "loss": 0.1778,
+      "step": 8184
+    },
+    {
+      "epoch": 0.5905696453696021,
+      "grad_norm": 0.12653610110282898,
+      "learning_rate": 0.00017638908933468035,
+      "loss": 0.1412,
+      "step": 8185
+    },
+    {
+      "epoch": 0.5906417980446625,
+      "grad_norm": 0.12400539964437485,
+      "learning_rate": 0.00017638620291528362,
+      "loss": 0.192,
+      "step": 8186
+    },
+    {
+      "epoch": 0.5907139507197229,
+      "grad_norm": 0.0926087498664856,
+      "learning_rate": 0.00017638331649588685,
+      "loss": 0.1708,
+      "step": 8187
+    },
+    {
+      "epoch": 0.5907861033947833,
+      "grad_norm": 0.1382742077112198,
+      "learning_rate": 0.00017638043007649012,
+      "loss": 0.1597,
+      "step": 8188
+    },
+    {
+      "epoch": 0.5908582560698438,
+      "grad_norm": 0.14681148529052734,
+      "learning_rate": 0.00017637754365709338,
+      "loss": 0.1769,
+      "step": 8189
+    },
+    {
+      "epoch": 0.5909304087449042,
+      "grad_norm": 0.1179690808057785,
+      "learning_rate": 0.00017637465723769664,
+      "loss": 0.1475,
+      "step": 8190
+    },
+    {
+      "epoch": 0.5910025614199647,
+      "grad_norm": 0.10221707075834274,
+      "learning_rate": 0.0001763717708182999,
+      "loss": 0.1271,
+      "step": 8191
+    },
+    {
+      "epoch": 0.5910747140950251,
+      "grad_norm": 0.10415665060281754,
+      "learning_rate": 0.00017636888439890317,
+      "loss": 0.1735,
+      "step": 8192
+    },
+    {
+      "epoch": 0.5911468667700855,
+      "grad_norm": 0.11267755925655365,
+      "learning_rate": 0.00017636599797950643,
+      "loss": 0.1356,
+      "step": 8193
+    },
+    {
+      "epoch": 0.5912190194451459,
+      "grad_norm": 0.10448981821537018,
+      "learning_rate": 0.0001763631115601097,
+      "loss": 0.117,
+      "step": 8194
+    },
+    {
+      "epoch": 0.5912911721202063,
+      "grad_norm": 0.14107589423656464,
+      "learning_rate": 0.00017636022514071296,
+      "loss": 0.1642,
+      "step": 8195
+    },
+    {
+      "epoch": 0.5913633247952668,
+      "grad_norm": 0.12608906626701355,
+      "learning_rate": 0.00017635733872131622,
+      "loss": 0.1218,
+      "step": 8196
+    },
+    {
+      "epoch": 0.5914354774703272,
+      "grad_norm": 0.15684707462787628,
+      "learning_rate": 0.00017635445230191948,
+      "loss": 0.1486,
+      "step": 8197
+    },
+    {
+      "epoch": 0.5915076301453877,
+      "grad_norm": 0.1347835212945938,
+      "learning_rate": 0.00017635156588252272,
+      "loss": 0.1775,
+      "step": 8198
+    },
+    {
+      "epoch": 0.5915797828204481,
+      "grad_norm": 0.13035349547863007,
+      "learning_rate": 0.000176348679463126,
+      "loss": 0.1867,
+      "step": 8199
+    },
+    {
+      "epoch": 0.5916519354955085,
+      "grad_norm": 0.12555575370788574,
+      "learning_rate": 0.00017634579304372927,
+      "loss": 0.1035,
+      "step": 8200
+    },
+    {
+      "epoch": 0.5917240881705689,
+      "grad_norm": 0.13564147055149078,
+      "learning_rate": 0.00017634290662433253,
+      "loss": 0.1022,
+      "step": 8201
+    },
+    {
+      "epoch": 0.5917962408456293,
+      "grad_norm": 0.12636464834213257,
+      "learning_rate": 0.0001763400202049358,
+      "loss": 0.1745,
+      "step": 8202
+    },
+    {
+      "epoch": 0.5918683935206898,
+      "grad_norm": 0.12000800669193268,
+      "learning_rate": 0.00017633713378553903,
+      "loss": 0.1581,
+      "step": 8203
+    },
+    {
+      "epoch": 0.5919405461957502,
+      "grad_norm": 0.1053057610988617,
+      "learning_rate": 0.0001763342473661423,
+      "loss": 0.1535,
+      "step": 8204
+    },
+    {
+      "epoch": 0.5920126988708106,
+      "grad_norm": 0.11557186394929886,
+      "learning_rate": 0.00017633136094674556,
+      "loss": 0.1258,
+      "step": 8205
+    },
+    {
+      "epoch": 0.5920848515458711,
+      "grad_norm": 0.11119802296161652,
+      "learning_rate": 0.00017632847452734885,
+      "loss": 0.1633,
+      "step": 8206
+    },
+    {
+      "epoch": 0.5921570042209315,
+      "grad_norm": 0.11186131834983826,
+      "learning_rate": 0.0001763255881079521,
+      "loss": 0.1286,
+      "step": 8207
+    },
+    {
+      "epoch": 0.5922291568959919,
+      "grad_norm": 0.11805294454097748,
+      "learning_rate": 0.00017632270168855535,
+      "loss": 0.1443,
+      "step": 8208
+    },
+    {
+      "epoch": 0.5923013095710523,
+      "grad_norm": 0.1183740571141243,
+      "learning_rate": 0.0001763198152691586,
+      "loss": 0.1524,
+      "step": 8209
+    },
+    {
+      "epoch": 0.5923734622461128,
+      "grad_norm": 0.11456070095300674,
+      "learning_rate": 0.00017631692884976187,
+      "loss": 0.1872,
+      "step": 8210
+    },
+    {
+      "epoch": 0.5924456149211732,
+      "grad_norm": 0.10778361558914185,
+      "learning_rate": 0.00017631404243036514,
+      "loss": 0.1611,
+      "step": 8211
+    },
+    {
+      "epoch": 0.5925177675962336,
+      "grad_norm": 0.14650307595729828,
+      "learning_rate": 0.0001763111560109684,
+      "loss": 0.1643,
+      "step": 8212
+    },
+    {
+      "epoch": 0.5925899202712941,
+      "grad_norm": 0.10854461044073105,
+      "learning_rate": 0.00017630826959157166,
+      "loss": 0.1605,
+      "step": 8213
+    },
+    {
+      "epoch": 0.5926620729463545,
+      "grad_norm": 0.10862936824560165,
+      "learning_rate": 0.00017630538317217492,
+      "loss": 0.1706,
+      "step": 8214
+    },
+    {
+      "epoch": 0.5927342256214149,
+      "grad_norm": 0.14142727851867676,
+      "learning_rate": 0.0001763024967527782,
+      "loss": 0.1914,
+      "step": 8215
+    },
+    {
+      "epoch": 0.5928063782964753,
+      "grad_norm": 0.14297692477703094,
+      "learning_rate": 0.00017629961033338145,
+      "loss": 0.1399,
+      "step": 8216
+    },
+    {
+      "epoch": 0.5928785309715358,
+      "grad_norm": 0.0961894765496254,
+      "learning_rate": 0.0001762967239139847,
+      "loss": 0.1311,
+      "step": 8217
+    },
+    {
+      "epoch": 0.5929506836465962,
+      "grad_norm": 0.1122874915599823,
+      "learning_rate": 0.00017629383749458798,
+      "loss": 0.1667,
+      "step": 8218
+    },
+    {
+      "epoch": 0.5930228363216566,
+      "grad_norm": 0.12797664105892181,
+      "learning_rate": 0.0001762909510751912,
+      "loss": 0.1759,
+      "step": 8219
+    },
+    {
+      "epoch": 0.5930949889967171,
+      "grad_norm": 0.10802415013313293,
+      "learning_rate": 0.0001762880646557945,
+      "loss": 0.1064,
+      "step": 8220
+    },
+    {
+      "epoch": 0.5931671416717775,
+      "grad_norm": 0.11519049853086472,
+      "learning_rate": 0.00017628517823639777,
+      "loss": 0.1738,
+      "step": 8221
+    },
+    {
+      "epoch": 0.5932392943468379,
+      "grad_norm": 0.1328316330909729,
+      "learning_rate": 0.00017628229181700103,
+      "loss": 0.1477,
+      "step": 8222
+    },
+    {
+      "epoch": 0.5933114470218983,
+      "grad_norm": 0.1109108254313469,
+      "learning_rate": 0.0001762794053976043,
+      "loss": 0.1461,
+      "step": 8223
+    },
+    {
+      "epoch": 0.5933835996969588,
+      "grad_norm": 0.11819633841514587,
+      "learning_rate": 0.00017627651897820753,
+      "loss": 0.1343,
+      "step": 8224
+    },
+    {
+      "epoch": 0.5934557523720192,
+      "grad_norm": 0.12076713889837265,
+      "learning_rate": 0.0001762736325588108,
+      "loss": 0.2058,
+      "step": 8225
+    },
+    {
+      "epoch": 0.5935279050470796,
+      "grad_norm": 0.13241450488567352,
+      "learning_rate": 0.00017627074613941405,
+      "loss": 0.1176,
+      "step": 8226
+    },
+    {
+      "epoch": 0.5936000577221401,
+      "grad_norm": 0.11165394634008408,
+      "learning_rate": 0.00017626785972001734,
+      "loss": 0.1765,
+      "step": 8227
+    },
+    {
+      "epoch": 0.5936722103972005,
+      "grad_norm": 0.1249672994017601,
+      "learning_rate": 0.0001762649733006206,
+      "loss": 0.0977,
+      "step": 8228
+    },
+    {
+      "epoch": 0.5937443630722609,
+      "grad_norm": 0.1294601857662201,
+      "learning_rate": 0.00017626208688122384,
+      "loss": 0.1549,
+      "step": 8229
+    },
+    {
+      "epoch": 0.5938165157473213,
+      "grad_norm": 0.1573733389377594,
+      "learning_rate": 0.0001762592004618271,
+      "loss": 0.2001,
+      "step": 8230
+    },
+    {
+      "epoch": 0.5938886684223817,
+      "grad_norm": 0.12269971519708633,
+      "learning_rate": 0.00017625631404243037,
+      "loss": 0.1411,
+      "step": 8231
+    },
+    {
+      "epoch": 0.5939608210974422,
+      "grad_norm": 0.10967043042182922,
+      "learning_rate": 0.00017625342762303363,
+      "loss": 0.1858,
+      "step": 8232
+    },
+    {
+      "epoch": 0.5940329737725026,
+      "grad_norm": 0.10778573900461197,
+      "learning_rate": 0.0001762505412036369,
+      "loss": 0.1595,
+      "step": 8233
+    },
+    {
+      "epoch": 0.5941051264475631,
+      "grad_norm": 0.13718152046203613,
+      "learning_rate": 0.00017624765478424016,
+      "loss": 0.1512,
+      "step": 8234
+    },
+    {
+      "epoch": 0.5941772791226235,
+      "grad_norm": 0.11807660013437271,
+      "learning_rate": 0.00017624476836484342,
+      "loss": 0.1529,
+      "step": 8235
+    },
+    {
+      "epoch": 0.5942494317976839,
+      "grad_norm": 0.13162270188331604,
+      "learning_rate": 0.00017624188194544668,
+      "loss": 0.1738,
+      "step": 8236
+    },
+    {
+      "epoch": 0.5943215844727443,
+      "grad_norm": 0.09376419335603714,
+      "learning_rate": 0.00017623899552604994,
+      "loss": 0.0917,
+      "step": 8237
+    },
+    {
+      "epoch": 0.5943937371478047,
+      "grad_norm": 0.10288868844509125,
+      "learning_rate": 0.0001762361091066532,
+      "loss": 0.127,
+      "step": 8238
+    },
+    {
+      "epoch": 0.5944658898228652,
+      "grad_norm": 0.11284645646810532,
+      "learning_rate": 0.00017623322268725647,
+      "loss": 0.1761,
+      "step": 8239
+    },
+    {
+      "epoch": 0.5945380424979256,
+      "grad_norm": 0.09281051903963089,
+      "learning_rate": 0.0001762303362678597,
+      "loss": 0.103,
+      "step": 8240
+    },
+    {
+      "epoch": 0.5946101951729861,
+      "grad_norm": 0.10831299424171448,
+      "learning_rate": 0.000176227449848463,
+      "loss": 0.1427,
+      "step": 8241
+    },
+    {
+      "epoch": 0.5946823478480464,
+      "grad_norm": 0.13449856638908386,
+      "learning_rate": 0.00017622456342906626,
+      "loss": 0.1633,
+      "step": 8242
+    },
+    {
+      "epoch": 0.5947545005231069,
+      "grad_norm": 0.13355782628059387,
+      "learning_rate": 0.00017622167700966952,
+      "loss": 0.1517,
+      "step": 8243
+    },
+    {
+      "epoch": 0.5948266531981673,
+      "grad_norm": 0.13006524741649628,
+      "learning_rate": 0.00017621879059027279,
+      "loss": 0.1852,
+      "step": 8244
+    },
+    {
+      "epoch": 0.5948988058732277,
+      "grad_norm": 0.11725495010614395,
+      "learning_rate": 0.00017621590417087602,
+      "loss": 0.1499,
+      "step": 8245
+    },
+    {
+      "epoch": 0.5949709585482882,
+      "grad_norm": 0.12848377227783203,
+      "learning_rate": 0.00017621301775147928,
+      "loss": 0.1685,
+      "step": 8246
+    },
+    {
+      "epoch": 0.5950431112233486,
+      "grad_norm": 0.1165483221411705,
+      "learning_rate": 0.00017621013133208255,
+      "loss": 0.1683,
+      "step": 8247
+    },
+    {
+      "epoch": 0.5951152638984091,
+      "grad_norm": 0.11599836498498917,
+      "learning_rate": 0.00017620724491268584,
+      "loss": 0.2006,
+      "step": 8248
+    },
+    {
+      "epoch": 0.5951874165734694,
+      "grad_norm": 0.12383638322353363,
+      "learning_rate": 0.0001762043584932891,
+      "loss": 0.1609,
+      "step": 8249
+    },
+    {
+      "epoch": 0.5952595692485299,
+      "grad_norm": 0.12446649372577667,
+      "learning_rate": 0.00017620147207389234,
+      "loss": 0.1178,
+      "step": 8250
+    },
+    {
+      "epoch": 0.5953317219235903,
+      "grad_norm": 0.11396703869104385,
+      "learning_rate": 0.0001761985856544956,
+      "loss": 0.186,
+      "step": 8251
+    },
+    {
+      "epoch": 0.5954038745986507,
+      "grad_norm": 0.11467541754245758,
+      "learning_rate": 0.00017619569923509886,
+      "loss": 0.155,
+      "step": 8252
+    },
+    {
+      "epoch": 0.5954760272737112,
+      "grad_norm": 0.26076146960258484,
+      "learning_rate": 0.00017619281281570212,
+      "loss": 0.1501,
+      "step": 8253
+    },
+    {
+      "epoch": 0.5955481799487716,
+      "grad_norm": 0.12669560313224792,
+      "learning_rate": 0.0001761899263963054,
+      "loss": 0.1418,
+      "step": 8254
+    },
+    {
+      "epoch": 0.5956203326238321,
+      "grad_norm": 0.12748664617538452,
+      "learning_rate": 0.00017618703997690865,
+      "loss": 0.1941,
+      "step": 8255
+    },
+    {
+      "epoch": 0.5956924852988924,
+      "grad_norm": 0.18919895589351654,
+      "learning_rate": 0.0001761841535575119,
+      "loss": 0.1552,
+      "step": 8256
+    },
+    {
+      "epoch": 0.5957646379739528,
+      "grad_norm": Infinity,
+      "learning_rate": 0.0001761841535575119,
+      "loss": 0.2004,
+      "step": 8257
+    },
+    {
+      "epoch": 0.5958367906490133,
+      "grad_norm": 0.12094852328300476,
+      "learning_rate": 0.00017618126713811518,
+      "loss": 0.1832,
+      "step": 8258
+    },
+    {
+      "epoch": 0.5959089433240737,
+      "grad_norm": 0.1229955181479454,
+      "learning_rate": 0.00017617838071871844,
+      "loss": 0.0837,
+      "step": 8259
+    },
+    {
+      "epoch": 0.5959810959991342,
+      "grad_norm": 0.11439645290374756,
+      "learning_rate": 0.0001761754942993217,
+      "loss": 0.1419,
+      "step": 8260
+    },
+    {
+      "epoch": 0.5960532486741946,
+      "grad_norm": 0.12709204852581024,
+      "learning_rate": 0.00017617260787992496,
+      "loss": 0.1911,
+      "step": 8261
+    },
+    {
+      "epoch": 0.5961254013492551,
+      "grad_norm": 0.13323001563549042,
+      "learning_rate": 0.00017616972146052823,
+      "loss": 0.2032,
+      "step": 8262
+    },
+    {
+      "epoch": 0.5961975540243154,
+      "grad_norm": 0.12606051564216614,
+      "learning_rate": 0.0001761668350411315,
+      "loss": 0.1947,
+      "step": 8263
+    },
+    {
+      "epoch": 0.5962697066993758,
+      "grad_norm": 0.11966247111558914,
+      "learning_rate": 0.00017616394862173475,
+      "loss": 0.091,
+      "step": 8264
+    },
+    {
+      "epoch": 0.5963418593744363,
+      "grad_norm": 0.12878213822841644,
+      "learning_rate": 0.00017616106220233802,
+      "loss": 0.1934,
+      "step": 8265
+    },
+    {
+      "epoch": 0.5964140120494967,
+      "grad_norm": 0.17161764204502106,
+      "learning_rate": 0.00017615817578294128,
+      "loss": 0.1866,
+      "step": 8266
+    },
+    {
+      "epoch": 0.5964861647245572,
+      "grad_norm": 0.12299740314483643,
+      "learning_rate": 0.00017615528936354454,
+      "loss": 0.135,
+      "step": 8267
+    },
+    {
+      "epoch": 0.5965583173996176,
+      "grad_norm": 0.11680129915475845,
+      "learning_rate": 0.00017615240294414778,
+      "loss": 0.1201,
+      "step": 8268
+    },
+    {
+      "epoch": 0.5966304700746781,
+      "grad_norm": 0.1203378289937973,
+      "learning_rate": 0.00017614951652475104,
+      "loss": 0.1124,
+      "step": 8269
+    },
+    {
+      "epoch": 0.5967026227497384,
+      "grad_norm": 0.11593905836343765,
+      "learning_rate": 0.00017614663010535433,
+      "loss": 0.1568,
+      "step": 8270
+    },
+    {
+      "epoch": 0.5967747754247988,
+      "grad_norm": 0.10731831938028336,
+      "learning_rate": 0.0001761437436859576,
+      "loss": 0.1897,
+      "step": 8271
+    },
+    {
+      "epoch": 0.5968469280998593,
+      "grad_norm": 0.13110333681106567,
+      "learning_rate": 0.00017614085726656086,
+      "loss": 0.1156,
+      "step": 8272
+    },
+    {
+      "epoch": 0.5969190807749197,
+      "grad_norm": Infinity,
+      "learning_rate": 0.00017614085726656086,
+      "loss": 0.2281,
+      "step": 8273
+    },
+    {
+      "epoch": 0.5969912334499802,
+      "grad_norm": 0.11687818914651871,
+      "learning_rate": 0.0001761379708471641,
+      "loss": 0.1353,
+      "step": 8274
+    },
+    {
+      "epoch": 0.5970633861250406,
+      "grad_norm": 0.13382591307163239,
+      "learning_rate": 0.00017613508442776736,
+      "loss": 0.181,
+      "step": 8275
+    },
+    {
+      "epoch": 0.5971355388001011,
+      "grad_norm": 0.12436043471097946,
+      "learning_rate": 0.00017613219800837062,
+      "loss": 0.1696,
+      "step": 8276
+    },
+    {
+      "epoch": 0.5972076914751614,
+      "grad_norm": 0.13329434394836426,
+      "learning_rate": 0.00017612931158897388,
+      "loss": 0.1778,
+      "step": 8277
+    },
+    {
+      "epoch": 0.5972798441502218,
+      "grad_norm": 0.12108474224805832,
+      "learning_rate": 0.00017612642516957717,
+      "loss": 0.1643,
+      "step": 8278
+    },
+    {
+      "epoch": 0.5973519968252823,
+      "grad_norm": Infinity,
+      "learning_rate": 0.00017612642516957717,
+      "loss": 0.1723,
+      "step": 8279
+    },
+    {
+      "epoch": 0.5974241495003427,
+      "grad_norm": 0.10556795448064804,
+      "learning_rate": 0.0001761235387501804,
+      "loss": 0.147,
+      "step": 8280
+    },
+    {
+      "epoch": 0.5974963021754032,
+      "grad_norm": 0.14981569349765778,
+      "learning_rate": 0.00017612065233078367,
+      "loss": 0.1644,
+      "step": 8281
+    },
+    {
+      "epoch": 0.5975684548504636,
+      "grad_norm": 0.13146357238292694,
+      "learning_rate": 0.00017611776591138693,
+      "loss": 0.1755,
+      "step": 8282
+    },
+    {
+      "epoch": 0.5976406075255241,
+      "grad_norm": 1.4089605808258057,
+      "learning_rate": 0.0001761148794919902,
+      "loss": 0.1624,
+      "step": 8283
+    },
+    {
+      "epoch": 0.5977127602005844,
+      "grad_norm": 0.11864148825407028,
+      "learning_rate": 0.00017611199307259346,
+      "loss": 0.113,
+      "step": 8284
+    },
+    {
+      "epoch": 0.5977849128756448,
+      "grad_norm": 0.14449827373027802,
+      "learning_rate": 0.00017610910665319672,
+      "loss": 0.1887,
+      "step": 8285
+    },
+    {
+      "epoch": 0.5978570655507053,
+      "grad_norm": 0.17353913187980652,
+      "learning_rate": 0.00017610622023379998,
+      "loss": 0.1691,
+      "step": 8286
+    },
+    {
+      "epoch": 0.5979292182257657,
+      "grad_norm": 0.13227996230125427,
+      "learning_rate": 0.00017610333381440325,
+      "loss": 0.1468,
+      "step": 8287
+    },
+    {
+      "epoch": 0.5980013709008262,
+      "grad_norm": 0.12186557799577713,
+      "learning_rate": 0.0001761004473950065,
+      "loss": 0.152,
+      "step": 8288
+    },
+    {
+      "epoch": 0.5980735235758866,
+      "grad_norm": 0.11306702345609665,
+      "learning_rate": 0.00017609756097560977,
+      "loss": 0.1818,
+      "step": 8289
+    },
+    {
+      "epoch": 0.5981456762509471,
+      "grad_norm": 0.09941356629133224,
+      "learning_rate": 0.00017609467455621304,
+      "loss": 0.1653,
+      "step": 8290
+    },
+    {
+      "epoch": 0.5982178289260074,
+      "grad_norm": 0.10904739052057266,
+      "learning_rate": 0.00017609178813681627,
+      "loss": 0.1048,
+      "step": 8291
+    },
+    {
+      "epoch": 0.5982899816010678,
+      "grad_norm": 0.11784323304891586,
+      "learning_rate": 0.00017608890171741954,
+      "loss": 0.1864,
+      "step": 8292
+    },
+    {
+      "epoch": 0.5983621342761283,
+      "grad_norm": 0.10877453535795212,
+      "learning_rate": 0.00017608601529802283,
+      "loss": 0.1739,
+      "step": 8293
+    },
+    {
+      "epoch": 0.5984342869511887,
+      "grad_norm": 0.11221017688512802,
+      "learning_rate": 0.0001760831288786261,
+      "loss": 0.1518,
+      "step": 8294
+    },
+    {
+      "epoch": 0.5985064396262492,
+      "grad_norm": 0.11134081333875656,
+      "learning_rate": 0.00017608024245922935,
+      "loss": 0.1703,
+      "step": 8295
+    },
+    {
+      "epoch": 0.5985785923013096,
+      "grad_norm": 0.15727955102920532,
+      "learning_rate": 0.0001760773560398326,
+      "loss": 0.1345,
+      "step": 8296
+    },
+    {
+      "epoch": 0.59865074497637,
+      "grad_norm": 0.15842805802822113,
+      "learning_rate": 0.00017607446962043585,
+      "loss": 0.1875,
+      "step": 8297
+    },
+    {
+      "epoch": 0.5987228976514304,
+      "grad_norm": 0.1713588535785675,
+      "learning_rate": 0.0001760715832010391,
+      "loss": 0.1926,
+      "step": 8298
+    },
+    {
+      "epoch": 0.5987950503264908,
+      "grad_norm": 0.13431398570537567,
+      "learning_rate": 0.00017606869678164238,
+      "loss": 0.1158,
+      "step": 8299
+    },
+    {
+      "epoch": 0.5988672030015513,
+      "grad_norm": 0.14271295070648193,
+      "learning_rate": 0.00017606581036224567,
+      "loss": 0.1277,
+      "step": 8300
+    },
+    {
+      "epoch": 0.5989393556766117,
+      "grad_norm": 0.16294990479946136,
+      "learning_rate": 0.0001760629239428489,
+      "loss": 0.2188,
+      "step": 8301
+    },
+    {
+      "epoch": 0.5990115083516722,
+      "grad_norm": 0.14524459838867188,
+      "learning_rate": 0.00017606003752345216,
+      "loss": 0.1834,
+      "step": 8302
+    },
+    {
+      "epoch": 0.5990836610267326,
+      "grad_norm": 0.14850306510925293,
+      "learning_rate": 0.00017605715110405543,
+      "loss": 0.1632,
+      "step": 8303
+    },
+    {
+      "epoch": 0.5991558137017929,
+      "grad_norm": 0.152170330286026,
+      "learning_rate": 0.0001760542646846587,
+      "loss": 0.1828,
+      "step": 8304
+    },
+    {
+      "epoch": 0.5992279663768534,
+      "grad_norm": 0.13063561916351318,
+      "learning_rate": 0.00017605137826526195,
+      "loss": 0.0932,
+      "step": 8305
+    },
+    {
+      "epoch": 0.5993001190519138,
+      "grad_norm": 0.1113244891166687,
+      "learning_rate": 0.00017604849184586522,
+      "loss": 0.1761,
+      "step": 8306
+    },
+    {
+      "epoch": 0.5993722717269743,
+      "grad_norm": 0.18196213245391846,
+      "learning_rate": 0.00017604560542646848,
+      "loss": 0.1519,
+      "step": 8307
+    },
+    {
+      "epoch": 0.5994444244020347,
+      "grad_norm": 0.39838525652885437,
+      "learning_rate": 0.00017604271900707174,
+      "loss": 0.1334,
+      "step": 8308
+    },
+    {
+      "epoch": 0.5995165770770952,
+      "grad_norm": 0.13521502912044525,
+      "learning_rate": 0.000176039832587675,
+      "loss": 0.1795,
+      "step": 8309
+    },
+    {
+      "epoch": 0.5995887297521556,
+      "grad_norm": 0.10476474463939667,
+      "learning_rate": 0.00017603694616827827,
+      "loss": 0.1904,
+      "step": 8310
+    },
+    {
+      "epoch": 0.5996608824272159,
+      "grad_norm": 0.10485932976007462,
+      "learning_rate": 0.00017603405974888153,
+      "loss": 0.1499,
+      "step": 8311
+    },
+    {
+      "epoch": 0.5997330351022764,
+      "grad_norm": 0.1285116970539093,
+      "learning_rate": 0.00017603117332948477,
+      "loss": 0.1108,
+      "step": 8312
+    },
+    {
+      "epoch": 0.5998051877773368,
+      "grad_norm": 0.49555107951164246,
+      "learning_rate": 0.00017602828691008803,
+      "loss": 0.1378,
+      "step": 8313
+    },
+    {
+      "epoch": 0.5998773404523973,
+      "grad_norm": 0.1335631012916565,
+      "learning_rate": 0.00017602540049069132,
+      "loss": 0.1742,
+      "step": 8314
+    },
+    {
+      "epoch": 0.5999494931274577,
+      "grad_norm": 0.17374007403850555,
+      "learning_rate": 0.00017602251407129458,
+      "loss": 0.1292,
+      "step": 8315
+    },
+    {
+      "epoch": 0.6000216458025182,
+      "grad_norm": 0.11139317601919174,
+      "learning_rate": 0.00017601962765189785,
+      "loss": 0.153,
+      "step": 8316
+    },
+    {
+      "epoch": 0.6000937984775786,
+      "grad_norm": 0.1253328174352646,
+      "learning_rate": 0.00017601674123250108,
+      "loss": 0.1928,
+      "step": 8317
+    },
+    {
+      "epoch": 0.6001659511526389,
+      "grad_norm": 0.17890290915966034,
+      "learning_rate": 0.00017601385481310434,
+      "loss": 0.1429,
+      "step": 8318
+    },
+    {
+      "epoch": 0.6002381038276994,
+      "grad_norm": 0.15463121235370636,
+      "learning_rate": 0.0001760109683937076,
+      "loss": 0.1566,
+      "step": 8319
+    },
+    {
+      "epoch": 0.6003102565027598,
+      "grad_norm": 0.13858912885189056,
+      "learning_rate": 0.00017600808197431087,
+      "loss": 0.1606,
+      "step": 8320
+    },
+    {
+      "epoch": 0.6003824091778203,
+      "grad_norm": 0.17602328956127167,
+      "learning_rate": 0.00017600519555491416,
+      "loss": 0.165,
+      "step": 8321
+    },
+    {
+      "epoch": 0.6004545618528807,
+      "grad_norm": 0.1393021047115326,
+      "learning_rate": 0.0001760023091355174,
+      "loss": 0.1929,
+      "step": 8322
+    },
+    {
+      "epoch": 0.6005267145279412,
+      "grad_norm": 0.14253228902816772,
+      "learning_rate": 0.00017599942271612066,
+      "loss": 0.1145,
+      "step": 8323
+    },
+    {
+      "epoch": 0.6005988672030016,
+      "grad_norm": 0.15090863406658173,
+      "learning_rate": 0.00017599653629672392,
+      "loss": 0.1349,
+      "step": 8324
+    },
+    {
+      "epoch": 0.6006710198780619,
+      "grad_norm": 0.5086891055107117,
+      "learning_rate": 0.00017599364987732718,
+      "loss": 0.1871,
+      "step": 8325
+    },
+    {
+      "epoch": 0.6007431725531224,
+      "grad_norm": 0.11092958599328995,
+      "learning_rate": 0.00017599076345793045,
+      "loss": 0.1257,
+      "step": 8326
+    },
+    {
+      "epoch": 0.6008153252281828,
+      "grad_norm": 0.1473458707332611,
+      "learning_rate": 0.0001759878770385337,
+      "loss": 0.1264,
+      "step": 8327
+    },
+    {
+      "epoch": 0.6008874779032433,
+      "grad_norm": 0.14060139656066895,
+      "learning_rate": 0.00017598499061913695,
+      "loss": 0.1595,
+      "step": 8328
+    },
+    {
+      "epoch": 0.6009596305783037,
+      "grad_norm": 0.14718382060527802,
+      "learning_rate": 0.00017598210419974024,
+      "loss": 0.106,
+      "step": 8329
+    },
+    {
+      "epoch": 0.6010317832533641,
+      "grad_norm": 0.14671294391155243,
+      "learning_rate": 0.0001759792177803435,
+      "loss": 0.1141,
+      "step": 8330
+    },
+    {
+      "epoch": 0.6011039359284246,
+      "grad_norm": 0.17073041200637817,
+      "learning_rate": 0.00017597633136094676,
+      "loss": 0.171,
+      "step": 8331
+    },
+    {
+      "epoch": 0.6011760886034849,
+      "grad_norm": 0.1408713310956955,
+      "learning_rate": 0.00017597344494155003,
+      "loss": 0.1605,
+      "step": 8332
+    },
+    {
+      "epoch": 0.6012482412785454,
+      "grad_norm": 0.1482093334197998,
+      "learning_rate": 0.00017597055852215326,
+      "loss": 0.1601,
+      "step": 8333
+    },
+    {
+      "epoch": 0.6013203939536058,
+      "grad_norm": 0.14968480169773102,
+      "learning_rate": 0.00017596767210275652,
+      "loss": 0.1451,
+      "step": 8334
+    },
+    {
+      "epoch": 0.6013925466286663,
+      "grad_norm": 0.15854620933532715,
+      "learning_rate": 0.0001759647856833598,
+      "loss": 0.1609,
+      "step": 8335
+    },
+    {
+      "epoch": 0.6014646993037267,
+      "grad_norm": 0.1588122546672821,
+      "learning_rate": 0.00017596189926396308,
+      "loss": 0.1193,
+      "step": 8336
+    },
+    {
+      "epoch": 0.6015368519787871,
+      "grad_norm": 0.12753252685070038,
+      "learning_rate": 0.00017595901284456634,
+      "loss": 0.1469,
+      "step": 8337
+    },
+    {
+      "epoch": 0.6016090046538476,
+      "grad_norm": 0.17234788835048676,
+      "learning_rate": 0.00017595612642516958,
+      "loss": 0.1591,
+      "step": 8338
+    },
+    {
+      "epoch": 0.6016811573289079,
+      "grad_norm": 0.12896670401096344,
+      "learning_rate": 0.00017595324000577284,
+      "loss": 0.0929,
+      "step": 8339
+    },
+    {
+      "epoch": 0.6017533100039684,
+      "grad_norm": 0.11914967000484467,
+      "learning_rate": 0.0001759503535863761,
+      "loss": 0.2071,
+      "step": 8340
+    },
+    {
+      "epoch": 0.6018254626790288,
+      "grad_norm": 0.15577170252799988,
+      "learning_rate": 0.00017594746716697936,
+      "loss": 0.1752,
+      "step": 8341
+    },
+    {
+      "epoch": 0.6018976153540893,
+      "grad_norm": 0.16357608139514923,
+      "learning_rate": 0.00017594458074758263,
+      "loss": 0.138,
+      "step": 8342
+    },
+    {
+      "epoch": 0.6019697680291497,
+      "grad_norm": 0.13914862275123596,
+      "learning_rate": 0.0001759416943281859,
+      "loss": 0.1418,
+      "step": 8343
+    },
+    {
+      "epoch": 0.6020419207042101,
+      "grad_norm": 0.14447425305843353,
+      "learning_rate": 0.00017593880790878915,
+      "loss": 0.1567,
+      "step": 8344
+    },
+    {
+      "epoch": 0.6021140733792706,
+      "grad_norm": 0.15893866121768951,
+      "learning_rate": 0.00017593592148939242,
+      "loss": 0.1409,
+      "step": 8345
+    },
+    {
+      "epoch": 0.6021862260543309,
+      "grad_norm": 0.13015730679035187,
+      "learning_rate": 0.00017593303506999568,
+      "loss": 0.1905,
+      "step": 8346
+    },
+    {
+      "epoch": 0.6022583787293914,
+      "grad_norm": 0.12765511870384216,
+      "learning_rate": 0.00017593014865059894,
+      "loss": 0.1418,
+      "step": 8347
+    },
+    {
+      "epoch": 0.6023305314044518,
+      "grad_norm": 0.16375190019607544,
+      "learning_rate": 0.0001759272622312022,
+      "loss": 0.2088,
+      "step": 8348
+    },
+    {
+      "epoch": 0.6024026840795123,
+      "grad_norm": 0.12868079543113708,
+      "learning_rate": 0.00017592437581180544,
+      "loss": 0.1422,
+      "step": 8349
+    },
+    {
+      "epoch": 0.6024748367545727,
+      "grad_norm": 0.126992329955101,
+      "learning_rate": 0.00017592148939240873,
+      "loss": 0.211,
+      "step": 8350
+    },
+    {
+      "epoch": 0.6025469894296331,
+      "grad_norm": 0.11882653087377548,
+      "learning_rate": 0.000175918602973012,
+      "loss": 0.1504,
+      "step": 8351
+    },
+    {
+      "epoch": 0.6026191421046936,
+      "grad_norm": 0.13179273903369904,
+      "learning_rate": 0.00017591571655361526,
+      "loss": 0.1606,
+      "step": 8352
+    },
+    {
+      "epoch": 0.6026912947797539,
+      "grad_norm": 0.15020805597305298,
+      "learning_rate": 0.00017591283013421852,
+      "loss": 0.1343,
+      "step": 8353
+    },
+    {
+      "epoch": 0.6027634474548144,
+      "grad_norm": 0.13423669338226318,
+      "learning_rate": 0.00017590994371482176,
+      "loss": 0.1468,
+      "step": 8354
+    },
+    {
+      "epoch": 0.6028356001298748,
+      "grad_norm": 0.11200478672981262,
+      "learning_rate": 0.00017590705729542502,
+      "loss": 0.1351,
+      "step": 8355
+    },
+    {
+      "epoch": 0.6029077528049352,
+      "grad_norm": 0.11784103512763977,
+      "learning_rate": 0.00017590417087602828,
+      "loss": 0.1908,
+      "step": 8356
+    },
+    {
+      "epoch": 0.6029799054799957,
+      "grad_norm": 0.1697710007429123,
+      "learning_rate": 0.00017590128445663157,
+      "loss": 0.1684,
+      "step": 8357
+    },
+    {
+      "epoch": 0.6030520581550561,
+      "grad_norm": 0.14489956200122833,
+      "learning_rate": 0.00017589839803723483,
+      "loss": 0.1406,
+      "step": 8358
+    },
+    {
+      "epoch": 0.6031242108301166,
+      "grad_norm": 0.16728362441062927,
+      "learning_rate": 0.00017589551161783807,
+      "loss": 0.1702,
+      "step": 8359
+    },
+    {
+      "epoch": 0.6031963635051769,
+      "grad_norm": 0.15663135051727295,
+      "learning_rate": 0.00017589262519844133,
+      "loss": 0.1406,
+      "step": 8360
+    },
+    {
+      "epoch": 0.6032685161802374,
+      "grad_norm": 0.11991741508245468,
+      "learning_rate": 0.0001758897387790446,
+      "loss": 0.1781,
+      "step": 8361
+    },
+    {
+      "epoch": 0.6033406688552978,
+      "grad_norm": 0.12316525727510452,
+      "learning_rate": 0.00017588685235964786,
+      "loss": 0.1411,
+      "step": 8362
+    },
+    {
+      "epoch": 0.6034128215303582,
+      "grad_norm": 0.1290789693593979,
+      "learning_rate": 0.00017588396594025112,
+      "loss": 0.2423,
+      "step": 8363
+    },
+    {
+      "epoch": 0.6034849742054187,
+      "grad_norm": 0.12257073819637299,
+      "learning_rate": 0.00017588107952085438,
+      "loss": 0.0995,
+      "step": 8364
+    },
+    {
+      "epoch": 0.6035571268804791,
+      "grad_norm": 0.15544748306274414,
+      "learning_rate": 0.00017587819310145765,
+      "loss": 0.1324,
+      "step": 8365
+    },
+    {
+      "epoch": 0.6036292795555395,
+      "grad_norm": 0.1269799917936325,
+      "learning_rate": 0.0001758753066820609,
+      "loss": 0.1462,
+      "step": 8366
+    },
+    {
+      "epoch": 0.6037014322305999,
+      "grad_norm": 0.1171223595738411,
+      "learning_rate": 0.00017587242026266417,
+      "loss": 0.1556,
+      "step": 8367
+    },
+    {
+      "epoch": 0.6037735849056604,
+      "grad_norm": 0.13106483221054077,
+      "learning_rate": 0.00017586953384326744,
+      "loss": 0.1869,
+      "step": 8368
+    },
+    {
+      "epoch": 0.6038457375807208,
+      "grad_norm": 0.13471928238868713,
+      "learning_rate": 0.0001758666474238707,
+      "loss": 0.1953,
+      "step": 8369
+    },
+    {
+      "epoch": 0.6039178902557812,
+      "grad_norm": 0.11581786721944809,
+      "learning_rate": 0.00017586376100447393,
+      "loss": 0.16,
+      "step": 8370
+    },
+    {
+      "epoch": 0.6039900429308417,
+      "grad_norm": 0.12391266226768494,
+      "learning_rate": 0.00017586087458507722,
+      "loss": 0.1026,
+      "step": 8371
+    },
+    {
+      "epoch": 0.6040621956059021,
+      "grad_norm": 0.1501830667257309,
+      "learning_rate": 0.0001758579881656805,
+      "loss": 0.1543,
+      "step": 8372
+    },
+    {
+      "epoch": 0.6041343482809625,
+      "grad_norm": 0.13379184901714325,
+      "learning_rate": 0.00017585510174628375,
+      "loss": 0.172,
+      "step": 8373
+    },
+    {
+      "epoch": 0.6042065009560229,
+      "grad_norm": 0.1036381721496582,
+      "learning_rate": 0.00017585221532688701,
+      "loss": 0.1869,
+      "step": 8374
+    },
+    {
+      "epoch": 0.6042786536310834,
+      "grad_norm": 0.14931990206241608,
+      "learning_rate": 0.00017584932890749025,
+      "loss": 0.1331,
+      "step": 8375
+    },
+    {
+      "epoch": 0.6043508063061438,
+      "grad_norm": 0.13077418506145477,
+      "learning_rate": 0.0001758464424880935,
+      "loss": 0.1143,
+      "step": 8376
+    },
+    {
+      "epoch": 0.6044229589812042,
+      "grad_norm": 0.13980217278003693,
+      "learning_rate": 0.00017584355606869678,
+      "loss": 0.1828,
+      "step": 8377
+    },
+    {
+      "epoch": 0.6044951116562647,
+      "grad_norm": 0.13169902563095093,
+      "learning_rate": 0.00017584066964930007,
+      "loss": 0.1714,
+      "step": 8378
+    },
+    {
+      "epoch": 0.6045672643313251,
+      "grad_norm": 0.11600630730390549,
+      "learning_rate": 0.00017583778322990333,
+      "loss": 0.1674,
+      "step": 8379
+    },
+    {
+      "epoch": 0.6046394170063855,
+      "grad_norm": 0.12073885649442673,
+      "learning_rate": 0.00017583489681050656,
+      "loss": 0.1442,
+      "step": 8380
+    },
+    {
+      "epoch": 0.6047115696814459,
+      "grad_norm": 0.13687610626220703,
+      "learning_rate": 0.00017583201039110983,
+      "loss": 0.1384,
+      "step": 8381
+    },
+    {
+      "epoch": 0.6047837223565063,
+      "grad_norm": 0.21396295726299286,
+      "learning_rate": 0.0001758291239717131,
+      "loss": 0.1329,
+      "step": 8382
+    },
+    {
+      "epoch": 0.6048558750315668,
+      "grad_norm": 0.13280992209911346,
+      "learning_rate": 0.00017582623755231635,
+      "loss": 0.1466,
+      "step": 8383
+    },
+    {
+      "epoch": 0.6049280277066272,
+      "grad_norm": 0.16064168512821198,
+      "learning_rate": 0.00017582335113291962,
+      "loss": 0.1553,
+      "step": 8384
+    },
+    {
+      "epoch": 0.6050001803816877,
+      "grad_norm": 0.13958929479122162,
+      "learning_rate": 0.00017582046471352288,
+      "loss": 0.197,
+      "step": 8385
+    },
+    {
+      "epoch": 0.6050723330567481,
+      "grad_norm": 0.12864023447036743,
+      "learning_rate": 0.00017581757829412614,
+      "loss": 0.1839,
+      "step": 8386
+    },
+    {
+      "epoch": 0.6051444857318085,
+      "grad_norm": 0.13165269792079926,
+      "learning_rate": 0.0001758146918747294,
+      "loss": 0.1637,
+      "step": 8387
+    },
+    {
+      "epoch": 0.6052166384068689,
+      "grad_norm": 0.12166374921798706,
+      "learning_rate": 0.00017581180545533267,
+      "loss": 0.1869,
+      "step": 8388
+    },
+    {
+      "epoch": 0.6052887910819293,
+      "grad_norm": 0.17785470187664032,
+      "learning_rate": 0.00017580891903593593,
+      "loss": 0.2323,
+      "step": 8389
+    },
+    {
+      "epoch": 0.6053609437569898,
+      "grad_norm": 0.11101136356592178,
+      "learning_rate": 0.0001758060326165392,
+      "loss": 0.1265,
+      "step": 8390
+    },
+    {
+      "epoch": 0.6054330964320502,
+      "grad_norm": 0.13086219131946564,
+      "learning_rate": 0.00017580314619714246,
+      "loss": 0.1322,
+      "step": 8391
+    },
+    {
+      "epoch": 0.6055052491071107,
+      "grad_norm": 0.11813119053840637,
+      "learning_rate": 0.00017580025977774572,
+      "loss": 0.1202,
+      "step": 8392
+    },
+    {
+      "epoch": 0.6055774017821711,
+      "grad_norm": 0.12252860516309738,
+      "learning_rate": 0.00017579737335834898,
+      "loss": 0.1369,
+      "step": 8393
+    },
+    {
+      "epoch": 0.6056495544572315,
+      "grad_norm": 0.14627180993556976,
+      "learning_rate": 0.00017579448693895224,
+      "loss": 0.1393,
+      "step": 8394
+    },
+    {
+      "epoch": 0.6057217071322919,
+      "grad_norm": 0.14379194378852844,
+      "learning_rate": 0.0001757916005195555,
+      "loss": 0.2139,
+      "step": 8395
+    },
+    {
+      "epoch": 0.6057938598073523,
+      "grad_norm": 0.15863902866840363,
+      "learning_rate": 0.00017578871410015877,
+      "loss": 0.1719,
+      "step": 8396
+    },
+    {
+      "epoch": 0.6058660124824128,
+      "grad_norm": 0.13218453526496887,
+      "learning_rate": 0.000175785827680762,
+      "loss": 0.0966,
+      "step": 8397
+    },
+    {
+      "epoch": 0.6059381651574732,
+      "grad_norm": 0.12433724105358124,
+      "learning_rate": 0.00017578294126136527,
+      "loss": 0.1607,
+      "step": 8398
+    },
+    {
+      "epoch": 0.6060103178325337,
+      "grad_norm": 0.1300356090068817,
+      "learning_rate": 0.00017578005484196856,
+      "loss": 0.1506,
+      "step": 8399
+    },
+    {
+      "epoch": 0.6060824705075941,
+      "grad_norm": 0.11802296340465546,
+      "learning_rate": 0.00017577716842257182,
+      "loss": 0.148,
+      "step": 8400
+    },
+    {
+      "epoch": 0.6061546231826545,
+      "grad_norm": 0.10881169885396957,
+      "learning_rate": 0.00017577428200317509,
+      "loss": 0.1698,
+      "step": 8401
+    },
+    {
+      "epoch": 0.6062267758577149,
+      "grad_norm": 0.1484992504119873,
+      "learning_rate": 0.00017577139558377832,
+      "loss": 0.1707,
+      "step": 8402
+    },
+    {
+      "epoch": 0.6062989285327753,
+      "grad_norm": 0.15537378191947937,
+      "learning_rate": 0.00017576850916438158,
+      "loss": 0.1942,
+      "step": 8403
+    },
+    {
+      "epoch": 0.6063710812078358,
+      "grad_norm": 0.15242907404899597,
+      "learning_rate": 0.00017576562274498485,
+      "loss": 0.1153,
+      "step": 8404
+    },
+    {
+      "epoch": 0.6064432338828962,
+      "grad_norm": 0.12700161337852478,
+      "learning_rate": 0.0001757627363255881,
+      "loss": 0.1304,
+      "step": 8405
+    },
+    {
+      "epoch": 0.6065153865579567,
+      "grad_norm": 0.1762162744998932,
+      "learning_rate": 0.0001757598499061914,
+      "loss": 0.138,
+      "step": 8406
+    },
+    {
+      "epoch": 0.6065875392330171,
+      "grad_norm": 0.13309970498085022,
+      "learning_rate": 0.00017575696348679464,
+      "loss": 0.1119,
+      "step": 8407
+    },
+    {
+      "epoch": 0.6066596919080774,
+      "grad_norm": 0.123813696205616,
+      "learning_rate": 0.0001757540770673979,
+      "loss": 0.1493,
+      "step": 8408
+    },
+    {
+      "epoch": 0.6067318445831379,
+      "grad_norm": 0.11938238888978958,
+      "learning_rate": 0.00017575119064800116,
+      "loss": 0.1189,
+      "step": 8409
+    },
+    {
+      "epoch": 0.6068039972581983,
+      "grad_norm": 0.13266606628894806,
+      "learning_rate": 0.00017574830422860442,
+      "loss": 0.1454,
+      "step": 8410
+    },
+    {
+      "epoch": 0.6068761499332588,
+      "grad_norm": 0.14781904220581055,
+      "learning_rate": 0.0001757454178092077,
+      "loss": 0.1694,
+      "step": 8411
+    },
+    {
+      "epoch": 0.6069483026083192,
+      "grad_norm": 0.10925555229187012,
+      "learning_rate": 0.00017574253138981095,
+      "loss": 0.1823,
+      "step": 8412
+    },
+    {
+      "epoch": 0.6070204552833797,
+      "grad_norm": 0.12646707892417908,
+      "learning_rate": 0.0001757396449704142,
+      "loss": 0.159,
+      "step": 8413
+    },
+    {
+      "epoch": 0.6070926079584401,
+      "grad_norm": 0.1189049780368805,
+      "learning_rate": 0.00017573675855101748,
+      "loss": 0.1093,
+      "step": 8414
+    },
+    {
+      "epoch": 0.6071647606335004,
+      "grad_norm": 0.11102983355522156,
+      "learning_rate": 0.00017573387213162074,
+      "loss": 0.1654,
+      "step": 8415
+    },
+    {
+      "epoch": 0.6072369133085609,
+      "grad_norm": 0.13493704795837402,
+      "learning_rate": 0.000175730985712224,
+      "loss": 0.0933,
+      "step": 8416
+    },
+    {
+      "epoch": 0.6073090659836213,
+      "grad_norm": 0.14110951125621796,
+      "learning_rate": 0.00017572809929282727,
+      "loss": 0.1382,
+      "step": 8417
+    },
+    {
+      "epoch": 0.6073812186586818,
+      "grad_norm": 0.0968608409166336,
+      "learning_rate": 0.0001757252128734305,
+      "loss": 0.119,
+      "step": 8418
+    },
+    {
+      "epoch": 0.6074533713337422,
+      "grad_norm": 0.11105870455503464,
+      "learning_rate": 0.00017572232645403376,
+      "loss": 0.1332,
+      "step": 8419
+    },
+    {
+      "epoch": 0.6075255240088027,
+      "grad_norm": 0.1404876559972763,
+      "learning_rate": 0.00017571944003463705,
+      "loss": 0.1191,
+      "step": 8420
+    },
+    {
+      "epoch": 0.6075976766838631,
+      "grad_norm": 0.11005908250808716,
+      "learning_rate": 0.00017571655361524032,
+      "loss": 0.1517,
+      "step": 8421
+    },
+    {
+      "epoch": 0.6076698293589234,
+      "grad_norm": 0.1336730271577835,
+      "learning_rate": 0.00017571366719584358,
+      "loss": 0.1874,
+      "step": 8422
+    },
+    {
+      "epoch": 0.6077419820339839,
+      "grad_norm": 0.14844858646392822,
+      "learning_rate": 0.00017571078077644682,
+      "loss": 0.1909,
+      "step": 8423
+    },
+    {
+      "epoch": 0.6078141347090443,
+      "grad_norm": 0.15753395855426788,
+      "learning_rate": 0.00017570789435705008,
+      "loss": 0.1399,
+      "step": 8424
+    },
+    {
+      "epoch": 0.6078862873841048,
+      "grad_norm": 0.1073303371667862,
+      "learning_rate": 0.00017570500793765334,
+      "loss": 0.149,
+      "step": 8425
+    },
+    {
+      "epoch": 0.6079584400591652,
+      "grad_norm": 0.11728430539369583,
+      "learning_rate": 0.0001757021215182566,
+      "loss": 0.115,
+      "step": 8426
+    },
+    {
+      "epoch": 0.6080305927342257,
+      "grad_norm": 0.14064405858516693,
+      "learning_rate": 0.0001756992350988599,
+      "loss": 0.1693,
+      "step": 8427
+    },
+    {
+      "epoch": 0.608102745409286,
+      "grad_norm": 0.13664928078651428,
+      "learning_rate": 0.00017569634867946313,
+      "loss": 0.1718,
+      "step": 8428
+    },
+    {
+      "epoch": 0.6081748980843464,
+      "grad_norm": 0.12747839093208313,
+      "learning_rate": 0.0001756934622600664,
+      "loss": 0.1621,
+      "step": 8429
+    },
+    {
+      "epoch": 0.6082470507594069,
+      "grad_norm": 0.1261085867881775,
+      "learning_rate": 0.00017569057584066966,
+      "loss": 0.1809,
+      "step": 8430
+    },
+    {
+      "epoch": 0.6083192034344673,
+      "grad_norm": 0.12232770770788193,
+      "learning_rate": 0.00017568768942127292,
+      "loss": 0.1752,
+      "step": 8431
+    },
+    {
+      "epoch": 0.6083913561095278,
+      "grad_norm": 0.11349008232355118,
+      "learning_rate": 0.00017568480300187618,
+      "loss": 0.1542,
+      "step": 8432
+    },
+    {
+      "epoch": 0.6084635087845882,
+      "grad_norm": 0.1297486126422882,
+      "learning_rate": 0.00017568191658247944,
+      "loss": 0.1297,
+      "step": 8433
+    },
+    {
+      "epoch": 0.6085356614596487,
+      "grad_norm": 0.15281839668750763,
+      "learning_rate": 0.0001756790301630827,
+      "loss": 0.1011,
+      "step": 8434
+    },
+    {
+      "epoch": 0.608607814134709,
+      "grad_norm": 0.1351483166217804,
+      "learning_rate": 0.00017567614374368597,
+      "loss": 0.192,
+      "step": 8435
+    },
+    {
+      "epoch": 0.6086799668097694,
+      "grad_norm": 0.17875918745994568,
+      "learning_rate": 0.00017567325732428923,
+      "loss": 0.1881,
+      "step": 8436
+    },
+    {
+      "epoch": 0.6087521194848299,
+      "grad_norm": 0.148821622133255,
+      "learning_rate": 0.0001756703709048925,
+      "loss": 0.1612,
+      "step": 8437
+    },
+    {
+      "epoch": 0.6088242721598903,
+      "grad_norm": 0.12716643512248993,
+      "learning_rate": 0.00017566748448549576,
+      "loss": 0.1141,
+      "step": 8438
+    },
+    {
+      "epoch": 0.6088964248349508,
+      "grad_norm": 0.12227771431207657,
+      "learning_rate": 0.000175664598066099,
+      "loss": 0.2097,
+      "step": 8439
+    },
+    {
+      "epoch": 0.6089685775100112,
+      "grad_norm": 0.1000041589140892,
+      "learning_rate": 0.00017566171164670226,
+      "loss": 0.1184,
+      "step": 8440
+    },
+    {
+      "epoch": 0.6090407301850717,
+      "grad_norm": 0.09327298402786255,
+      "learning_rate": 0.00017565882522730555,
+      "loss": 0.1192,
+      "step": 8441
+    },
+    {
+      "epoch": 0.609112882860132,
+      "grad_norm": 0.11925908923149109,
+      "learning_rate": 0.0001756559388079088,
+      "loss": 0.1972,
+      "step": 8442
+    },
+    {
+      "epoch": 0.6091850355351924,
+      "grad_norm": 0.11328861862421036,
+      "learning_rate": 0.00017565305238851207,
+      "loss": 0.1384,
+      "step": 8443
+    },
+    {
+      "epoch": 0.6092571882102529,
+      "grad_norm": 0.11418649554252625,
+      "learning_rate": 0.0001756501659691153,
+      "loss": 0.1356,
+      "step": 8444
+    },
+    {
+      "epoch": 0.6093293408853133,
+      "grad_norm": 0.10574861615896225,
+      "learning_rate": 0.00017564727954971857,
+      "loss": 0.15,
+      "step": 8445
+    },
+    {
+      "epoch": 0.6094014935603738,
+      "grad_norm": 0.12365765869617462,
+      "learning_rate": 0.00017564439313032184,
+      "loss": 0.1564,
+      "step": 8446
+    },
+    {
+      "epoch": 0.6094736462354342,
+      "grad_norm": 0.09398072212934494,
+      "learning_rate": 0.0001756415067109251,
+      "loss": 0.1744,
+      "step": 8447
+    },
+    {
+      "epoch": 0.6095457989104947,
+      "grad_norm": 0.12091232091188431,
+      "learning_rate": 0.0001756386202915284,
+      "loss": 0.1232,
+      "step": 8448
+    },
+    {
+      "epoch": 0.609617951585555,
+      "grad_norm": 0.11692681163549423,
+      "learning_rate": 0.00017563573387213162,
+      "loss": 0.1815,
+      "step": 8449
+    },
+    {
+      "epoch": 0.6096901042606154,
+      "grad_norm": 0.12754695117473602,
+      "learning_rate": 0.0001756328474527349,
+      "loss": 0.1337,
+      "step": 8450
+    },
+    {
+      "epoch": 0.6097622569356759,
+      "grad_norm": 0.1740695983171463,
+      "learning_rate": 0.00017562996103333815,
+      "loss": 0.1787,
+      "step": 8451
+    },
+    {
+      "epoch": 0.6098344096107363,
+      "grad_norm": 0.12354660779237747,
+      "learning_rate": 0.0001756270746139414,
+      "loss": 0.1298,
+      "step": 8452
+    },
+    {
+      "epoch": 0.6099065622857968,
+      "grad_norm": 0.11192192882299423,
+      "learning_rate": 0.00017562418819454468,
+      "loss": 0.1585,
+      "step": 8453
+    },
+    {
+      "epoch": 0.6099787149608572,
+      "grad_norm": 0.10584592819213867,
+      "learning_rate": 0.00017562130177514794,
+      "loss": 0.1279,
+      "step": 8454
+    },
+    {
+      "epoch": 0.6100508676359176,
+      "grad_norm": 0.11145295947790146,
+      "learning_rate": 0.0001756184153557512,
+      "loss": 0.1279,
+      "step": 8455
+    },
+    {
+      "epoch": 0.610123020310978,
+      "grad_norm": 0.11601231247186661,
+      "learning_rate": 0.00017561552893635446,
+      "loss": 0.1752,
+      "step": 8456
+    },
+    {
+      "epoch": 0.6101951729860384,
+      "grad_norm": 0.1352606862783432,
+      "learning_rate": 0.00017561264251695773,
+      "loss": 0.1099,
+      "step": 8457
+    },
+    {
+      "epoch": 0.6102673256610989,
+      "grad_norm": 0.11708886921405792,
+      "learning_rate": 0.000175609756097561,
+      "loss": 0.1533,
+      "step": 8458
+    },
+    {
+      "epoch": 0.6103394783361593,
+      "grad_norm": 0.14400361478328705,
+      "learning_rate": 0.00017560686967816425,
+      "loss": 0.1418,
+      "step": 8459
+    },
+    {
+      "epoch": 0.6104116310112198,
+      "grad_norm": 0.11821573227643967,
+      "learning_rate": 0.0001756039832587675,
+      "loss": 0.1228,
+      "step": 8460
+    },
+    {
+      "epoch": 0.6104837836862802,
+      "grad_norm": 0.13207882642745972,
+      "learning_rate": 0.00017560109683937075,
+      "loss": 0.1242,
+      "step": 8461
+    },
+    {
+      "epoch": 0.6105559363613406,
+      "grad_norm": 0.13548435270786285,
+      "learning_rate": 0.00017559821041997404,
+      "loss": 0.1262,
+      "step": 8462
+    },
+    {
+      "epoch": 0.610628089036401,
+      "grad_norm": 0.1567934900522232,
+      "learning_rate": 0.0001755953240005773,
+      "loss": 0.1973,
+      "step": 8463
+    },
+    {
+      "epoch": 0.6107002417114614,
+      "grad_norm": 0.17090696096420288,
+      "learning_rate": 0.00017559243758118057,
+      "loss": 0.1539,
+      "step": 8464
+    },
+    {
+      "epoch": 0.6107723943865219,
+      "grad_norm": 0.1312856376171112,
+      "learning_rate": 0.0001755895511617838,
+      "loss": 0.1151,
+      "step": 8465
+    },
+    {
+      "epoch": 0.6108445470615823,
+      "grad_norm": 0.15751205384731293,
+      "learning_rate": 0.00017558666474238707,
+      "loss": 0.1696,
+      "step": 8466
+    },
+    {
+      "epoch": 0.6109166997366428,
+      "grad_norm": 0.14566873013973236,
+      "learning_rate": 0.00017558377832299033,
+      "loss": 0.1762,
+      "step": 8467
+    },
+    {
+      "epoch": 0.6109888524117032,
+      "grad_norm": 0.12950445711612701,
+      "learning_rate": 0.0001755808919035936,
+      "loss": 0.1188,
+      "step": 8468
+    },
+    {
+      "epoch": 0.6110610050867636,
+      "grad_norm": 0.1337715983390808,
+      "learning_rate": 0.00017557800548419688,
+      "loss": 0.1809,
+      "step": 8469
+    },
+    {
+      "epoch": 0.611133157761824,
+      "grad_norm": 0.13403064012527466,
+      "learning_rate": 0.00017557511906480012,
+      "loss": 0.1583,
+      "step": 8470
+    },
+    {
+      "epoch": 0.6112053104368844,
+      "grad_norm": 0.10216417163610458,
+      "learning_rate": 0.00017557223264540338,
+      "loss": 0.1674,
+      "step": 8471
+    },
+    {
+      "epoch": 0.6112774631119449,
+      "grad_norm": 0.1403636634349823,
+      "learning_rate": 0.00017556934622600664,
+      "loss": 0.1559,
+      "step": 8472
+    },
+    {
+      "epoch": 0.6113496157870053,
+      "grad_norm": 0.2026059478521347,
+      "learning_rate": 0.0001755664598066099,
+      "loss": 0.1158,
+      "step": 8473
+    },
+    {
+      "epoch": 0.6114217684620658,
+      "grad_norm": 0.10862534493207932,
+      "learning_rate": 0.00017556357338721317,
+      "loss": 0.1191,
+      "step": 8474
+    },
+    {
+      "epoch": 0.6114939211371262,
+      "grad_norm": 0.12901028990745544,
+      "learning_rate": 0.00017556068696781643,
+      "loss": 0.1377,
+      "step": 8475
+    },
+    {
+      "epoch": 0.6115660738121866,
+      "grad_norm": 0.11864591389894485,
+      "learning_rate": 0.0001755578005484197,
+      "loss": 0.118,
+      "step": 8476
+    },
+    {
+      "epoch": 0.611638226487247,
+      "grad_norm": 0.11161187291145325,
+      "learning_rate": 0.00017555491412902296,
+      "loss": 0.1901,
+      "step": 8477
+    },
+    {
+      "epoch": 0.6117103791623074,
+      "grad_norm": 0.10867039114236832,
+      "learning_rate": 0.00017555202770962622,
+      "loss": 0.0806,
+      "step": 8478
+    },
+    {
+      "epoch": 0.6117825318373679,
+      "grad_norm": 0.13876743614673615,
+      "learning_rate": 0.00017554914129022948,
+      "loss": 0.1455,
+      "step": 8479
+    },
+    {
+      "epoch": 0.6118546845124283,
+      "grad_norm": 0.10943257063627243,
+      "learning_rate": 0.00017554625487083275,
+      "loss": 0.1715,
+      "step": 8480
+    },
+    {
+      "epoch": 0.6119268371874887,
+      "grad_norm": 0.11604628711938858,
+      "learning_rate": 0.00017554336845143598,
+      "loss": 0.1693,
+      "step": 8481
+    },
+    {
+      "epoch": 0.6119989898625492,
+      "grad_norm": 0.1253431886434555,
+      "learning_rate": 0.00017554048203203925,
+      "loss": 0.1411,
+      "step": 8482
+    },
+    {
+      "epoch": 0.6120711425376096,
+      "grad_norm": 0.1400705724954605,
+      "learning_rate": 0.00017553759561264254,
+      "loss": 0.1572,
+      "step": 8483
+    },
+    {
+      "epoch": 0.61214329521267,
+      "grad_norm": 0.15838080644607544,
+      "learning_rate": 0.0001755347091932458,
+      "loss": 0.1528,
+      "step": 8484
+    },
+    {
+      "epoch": 0.6122154478877304,
+      "grad_norm": 0.11225708574056625,
+      "learning_rate": 0.00017553182277384906,
+      "loss": 0.1533,
+      "step": 8485
+    },
+    {
+      "epoch": 0.6122876005627909,
+      "grad_norm": 0.13094502687454224,
+      "learning_rate": 0.0001755289363544523,
+      "loss": 0.1668,
+      "step": 8486
+    },
+    {
+      "epoch": 0.6123597532378513,
+      "grad_norm": 0.13761380314826965,
+      "learning_rate": 0.00017552604993505556,
+      "loss": 0.1466,
+      "step": 8487
+    },
+    {
+      "epoch": 0.6124319059129117,
+      "grad_norm": 0.12459521740674973,
+      "learning_rate": 0.00017552316351565882,
+      "loss": 0.1122,
+      "step": 8488
+    },
+    {
+      "epoch": 0.6125040585879722,
+      "grad_norm": 0.13551610708236694,
+      "learning_rate": 0.0001755202770962621,
+      "loss": 0.1571,
+      "step": 8489
+    },
+    {
+      "epoch": 0.6125762112630325,
+      "grad_norm": 0.10906567424535751,
+      "learning_rate": 0.00017551739067686538,
+      "loss": 0.1432,
+      "step": 8490
+    },
+    {
+      "epoch": 0.612648363938093,
+      "grad_norm": 0.13732370734214783,
+      "learning_rate": 0.0001755145042574686,
+      "loss": 0.1786,
+      "step": 8491
+    },
+    {
+      "epoch": 0.6127205166131534,
+      "grad_norm": 0.11145670711994171,
+      "learning_rate": 0.00017551161783807188,
+      "loss": 0.1494,
+      "step": 8492
+    },
+    {
+      "epoch": 0.6127926692882139,
+      "grad_norm": 0.13747194409370422,
+      "learning_rate": 0.00017550873141867514,
+      "loss": 0.1684,
+      "step": 8493
+    },
+    {
+      "epoch": 0.6128648219632743,
+      "grad_norm": 0.15329043567180634,
+      "learning_rate": 0.0001755058449992784,
+      "loss": 0.1808,
+      "step": 8494
+    },
+    {
+      "epoch": 0.6129369746383347,
+      "grad_norm": 0.15293771028518677,
+      "learning_rate": 0.00017550295857988166,
+      "loss": 0.1691,
+      "step": 8495
+    },
+    {
+      "epoch": 0.6130091273133952,
+      "grad_norm": 0.10288719832897186,
+      "learning_rate": 0.00017550007216048493,
+      "loss": 0.1448,
+      "step": 8496
+    },
+    {
+      "epoch": 0.6130812799884555,
+      "grad_norm": 0.11011450737714767,
+      "learning_rate": 0.0001754971857410882,
+      "loss": 0.1423,
+      "step": 8497
+    },
+    {
+      "epoch": 0.613153432663516,
+      "grad_norm": 0.1155213788151741,
+      "learning_rate": 0.00017549429932169145,
+      "loss": 0.1385,
+      "step": 8498
+    },
+    {
+      "epoch": 0.6132255853385764,
+      "grad_norm": 0.1479150354862213,
+      "learning_rate": 0.00017549141290229472,
+      "loss": 0.1405,
+      "step": 8499
+    },
+    {
+      "epoch": 0.6132977380136369,
+      "grad_norm": 0.1567593663930893,
+      "learning_rate": 0.00017548852648289798,
+      "loss": 0.1805,
+      "step": 8500
+    },
+    {
+      "epoch": 0.6133698906886973,
+      "grad_norm": 0.1502797156572342,
+      "learning_rate": 0.00017548564006350124,
+      "loss": 0.1652,
+      "step": 8501
+    },
+    {
+      "epoch": 0.6134420433637577,
+      "grad_norm": 0.12881901860237122,
+      "learning_rate": 0.0001754827536441045,
+      "loss": 0.1654,
+      "step": 8502
+    },
+    {
+      "epoch": 0.6135141960388182,
+      "grad_norm": 0.15938179194927216,
+      "learning_rate": 0.00017547986722470774,
+      "loss": 0.1861,
+      "step": 8503
+    },
+    {
+      "epoch": 0.6135863487138785,
+      "grad_norm": 0.15454211831092834,
+      "learning_rate": 0.00017547698080531103,
+      "loss": 0.1632,
+      "step": 8504
+    },
+    {
+      "epoch": 0.613658501388939,
+      "grad_norm": 0.18221557140350342,
+      "learning_rate": 0.0001754740943859143,
+      "loss": 0.1617,
+      "step": 8505
+    },
+    {
+      "epoch": 0.6137306540639994,
+      "grad_norm": 0.10344431549310684,
+      "learning_rate": 0.00017547120796651756,
+      "loss": 0.1188,
+      "step": 8506
+    },
+    {
+      "epoch": 0.6138028067390598,
+      "grad_norm": 0.16359294950962067,
+      "learning_rate": 0.00017546832154712082,
+      "loss": 0.1974,
+      "step": 8507
+    },
+    {
+      "epoch": 0.6138749594141203,
+      "grad_norm": 0.12594661116600037,
+      "learning_rate": 0.00017546543512772406,
+      "loss": 0.1541,
+      "step": 8508
+    },
+    {
+      "epoch": 0.6139471120891807,
+      "grad_norm": 0.13963118195533752,
+      "learning_rate": 0.00017546254870832732,
+      "loss": 0.1701,
+      "step": 8509
+    },
+    {
+      "epoch": 0.6140192647642412,
+      "grad_norm": 0.13694241642951965,
+      "learning_rate": 0.00017545966228893058,
+      "loss": 0.1564,
+      "step": 8510
+    },
+    {
+      "epoch": 0.6140914174393015,
+      "grad_norm": 0.15386688709259033,
+      "learning_rate": 0.00017545677586953387,
+      "loss": 0.1247,
+      "step": 8511
+    },
+    {
+      "epoch": 0.614163570114362,
+      "grad_norm": 0.12560677528381348,
+      "learning_rate": 0.00017545388945013713,
+      "loss": 0.1475,
+      "step": 8512
+    },
+    {
+      "epoch": 0.6142357227894224,
+      "grad_norm": 0.15082158148288727,
+      "learning_rate": 0.00017545100303074037,
+      "loss": 0.154,
+      "step": 8513
+    },
+    {
+      "epoch": 0.6143078754644828,
+      "grad_norm": 0.12800118327140808,
+      "learning_rate": 0.00017544811661134363,
+      "loss": 0.1477,
+      "step": 8514
+    },
+    {
+      "epoch": 0.6143800281395433,
+      "grad_norm": 0.11983068287372589,
+      "learning_rate": 0.0001754452301919469,
+      "loss": 0.1787,
+      "step": 8515
+    },
+    {
+      "epoch": 0.6144521808146037,
+      "grad_norm": 0.11161981523036957,
+      "learning_rate": 0.00017544234377255016,
+      "loss": 0.177,
+      "step": 8516
+    },
+    {
+      "epoch": 0.6145243334896642,
+      "grad_norm": 0.12424928694963455,
+      "learning_rate": 0.00017543945735315342,
+      "loss": 0.1565,
+      "step": 8517
+    },
+    {
+      "epoch": 0.6145964861647245,
+      "grad_norm": 0.11708874255418777,
+      "learning_rate": 0.00017543657093375668,
+      "loss": 0.1713,
+      "step": 8518
+    },
+    {
+      "epoch": 0.614668638839785,
+      "grad_norm": 0.13190799951553345,
+      "learning_rate": 0.00017543368451435995,
+      "loss": 0.1117,
+      "step": 8519
+    },
+    {
+      "epoch": 0.6147407915148454,
+      "grad_norm": 0.1273881196975708,
+      "learning_rate": 0.0001754307980949632,
+      "loss": 0.117,
+      "step": 8520
+    },
+    {
+      "epoch": 0.6148129441899058,
+      "grad_norm": 0.15069957077503204,
+      "learning_rate": 0.00017542791167556647,
+      "loss": 0.1727,
+      "step": 8521
+    },
+    {
+      "epoch": 0.6148850968649663,
+      "grad_norm": 0.12246958911418915,
+      "learning_rate": 0.00017542502525616974,
+      "loss": 0.1283,
+      "step": 8522
+    },
+    {
+      "epoch": 0.6149572495400267,
+      "grad_norm": 0.12573544681072235,
+      "learning_rate": 0.000175422138836773,
+      "loss": 0.1428,
+      "step": 8523
+    },
+    {
+      "epoch": 0.6150294022150872,
+      "grad_norm": 0.15929429233074188,
+      "learning_rate": 0.00017541925241737624,
+      "loss": 0.1073,
+      "step": 8524
+    },
+    {
+      "epoch": 0.6151015548901475,
+      "grad_norm": 0.11118916422128677,
+      "learning_rate": 0.0001754163659979795,
+      "loss": 0.0931,
+      "step": 8525
+    },
+    {
+      "epoch": 0.615173707565208,
+      "grad_norm": 0.10511104762554169,
+      "learning_rate": 0.0001754134795785828,
+      "loss": 0.1519,
+      "step": 8526
+    },
+    {
+      "epoch": 0.6152458602402684,
+      "grad_norm": 0.16426770389080048,
+      "learning_rate": 0.00017541059315918605,
+      "loss": 0.1643,
+      "step": 8527
+    },
+    {
+      "epoch": 0.6153180129153288,
+      "grad_norm": 0.12183352559804916,
+      "learning_rate": 0.00017540770673978931,
+      "loss": 0.1171,
+      "step": 8528
+    },
+    {
+      "epoch": 0.6153901655903893,
+      "grad_norm": 0.10447021573781967,
+      "learning_rate": 0.00017540482032039255,
+      "loss": 0.1498,
+      "step": 8529
+    },
+    {
+      "epoch": 0.6154623182654497,
+      "grad_norm": 0.1412649154663086,
+      "learning_rate": 0.0001754019339009958,
+      "loss": 0.1806,
+      "step": 8530
+    },
+    {
+      "epoch": 0.6155344709405102,
+      "grad_norm": 0.1257966160774231,
+      "learning_rate": 0.00017539904748159908,
+      "loss": 0.2047,
+      "step": 8531
+    },
+    {
+      "epoch": 0.6156066236155705,
+      "grad_norm": 0.12649193406105042,
+      "learning_rate": 0.00017539616106220234,
+      "loss": 0.1712,
+      "step": 8532
+    },
+    {
+      "epoch": 0.615678776290631,
+      "grad_norm": 0.17213580012321472,
+      "learning_rate": 0.00017539327464280563,
+      "loss": 0.1278,
+      "step": 8533
+    },
+    {
+      "epoch": 0.6157509289656914,
+      "grad_norm": 0.13075965642929077,
+      "learning_rate": 0.00017539038822340886,
+      "loss": 0.1334,
+      "step": 8534
+    },
+    {
+      "epoch": 0.6158230816407518,
+      "grad_norm": 0.11880091577768326,
+      "learning_rate": 0.00017538750180401213,
+      "loss": 0.1513,
+      "step": 8535
+    },
+    {
+      "epoch": 0.6158952343158123,
+      "grad_norm": 0.1348184049129486,
+      "learning_rate": 0.0001753846153846154,
+      "loss": 0.1195,
+      "step": 8536
+    },
+    {
+      "epoch": 0.6159673869908727,
+      "grad_norm": 0.12139120697975159,
+      "learning_rate": 0.00017538172896521865,
+      "loss": 0.1647,
+      "step": 8537
+    },
+    {
+      "epoch": 0.6160395396659332,
+      "grad_norm": 0.21920908987522125,
+      "learning_rate": 0.00017537884254582192,
+      "loss": 0.1475,
+      "step": 8538
+    },
+    {
+      "epoch": 0.6161116923409935,
+      "grad_norm": 0.14035917818546295,
+      "learning_rate": 0.00017537595612642518,
+      "loss": 0.1817,
+      "step": 8539
+    },
+    {
+      "epoch": 0.6161838450160539,
+      "grad_norm": 0.11102382093667984,
+      "learning_rate": 0.00017537306970702844,
+      "loss": 0.1153,
+      "step": 8540
+    },
+    {
+      "epoch": 0.6162559976911144,
+      "grad_norm": 0.15595851838588715,
+      "learning_rate": 0.0001753701832876317,
+      "loss": 0.1358,
+      "step": 8541
+    },
+    {
+      "epoch": 0.6163281503661748,
+      "grad_norm": 0.14062592387199402,
+      "learning_rate": 0.00017536729686823497,
+      "loss": 0.2017,
+      "step": 8542
+    },
+    {
+      "epoch": 0.6164003030412353,
+      "grad_norm": 0.14323318004608154,
+      "learning_rate": 0.00017536441044883823,
+      "loss": 0.1508,
+      "step": 8543
+    },
+    {
+      "epoch": 0.6164724557162957,
+      "grad_norm": 0.14219464361667633,
+      "learning_rate": 0.0001753615240294415,
+      "loss": 0.178,
+      "step": 8544
+    },
+    {
+      "epoch": 0.6165446083913562,
+      "grad_norm": 0.1259685605764389,
+      "learning_rate": 0.00017535863761004473,
+      "loss": 0.1548,
+      "step": 8545
+    },
+    {
+      "epoch": 0.6166167610664165,
+      "grad_norm": 0.1387157440185547,
+      "learning_rate": 0.000175355751190648,
+      "loss": 0.175,
+      "step": 8546
+    },
+    {
+      "epoch": 0.6166889137414769,
+      "grad_norm": 0.1125483587384224,
+      "learning_rate": 0.00017535286477125128,
+      "loss": 0.1553,
+      "step": 8547
+    },
+    {
+      "epoch": 0.6167610664165374,
+      "grad_norm": 0.13433030247688293,
+      "learning_rate": 0.00017534997835185455,
+      "loss": 0.2003,
+      "step": 8548
+    },
+    {
+      "epoch": 0.6168332190915978,
+      "grad_norm": 0.13598592579364777,
+      "learning_rate": 0.0001753470919324578,
+      "loss": 0.1419,
+      "step": 8549
+    },
+    {
+      "epoch": 0.6169053717666583,
+      "grad_norm": 0.11717887222766876,
+      "learning_rate": 0.00017534420551306104,
+      "loss": 0.1611,
+      "step": 8550
+    },
+    {
+      "epoch": 0.6169775244417187,
+      "grad_norm": 0.129541277885437,
+      "learning_rate": 0.0001753413190936643,
+      "loss": 0.1321,
+      "step": 8551
+    },
+    {
+      "epoch": 0.617049677116779,
+      "grad_norm": 0.11288454383611679,
+      "learning_rate": 0.00017533843267426757,
+      "loss": 0.1598,
+      "step": 8552
+    },
+    {
+      "epoch": 0.6171218297918395,
+      "grad_norm": 0.1419808268547058,
+      "learning_rate": 0.00017533554625487083,
+      "loss": 0.1494,
+      "step": 8553
+    },
+    {
+      "epoch": 0.6171939824668999,
+      "grad_norm": 0.1335771232843399,
+      "learning_rate": 0.00017533265983547412,
+      "loss": 0.1457,
+      "step": 8554
+    },
+    {
+      "epoch": 0.6172661351419604,
+      "grad_norm": 0.1458192765712738,
+      "learning_rate": 0.00017532977341607736,
+      "loss": 0.1806,
+      "step": 8555
+    },
+    {
+      "epoch": 0.6173382878170208,
+      "grad_norm": 0.11284256726503372,
+      "learning_rate": 0.00017532688699668062,
+      "loss": 0.1395,
+      "step": 8556
+    },
+    {
+      "epoch": 0.6174104404920813,
+      "grad_norm": 0.10957358777523041,
+      "learning_rate": 0.00017532400057728388,
+      "loss": 0.131,
+      "step": 8557
+    },
+    {
+      "epoch": 0.6174825931671417,
+      "grad_norm": 0.1465001404285431,
+      "learning_rate": 0.00017532111415788715,
+      "loss": 0.1585,
+      "step": 8558
+    },
+    {
+      "epoch": 0.617554745842202,
+      "grad_norm": 0.14206071197986603,
+      "learning_rate": 0.0001753182277384904,
+      "loss": 0.14,
+      "step": 8559
+    },
+    {
+      "epoch": 0.6176268985172625,
+      "grad_norm": 0.1402553766965866,
+      "learning_rate": 0.00017531534131909367,
+      "loss": 0.1754,
+      "step": 8560
+    },
+    {
+      "epoch": 0.6176990511923229,
+      "grad_norm": 0.12297656387090683,
+      "learning_rate": 0.00017531245489969694,
+      "loss": 0.1783,
+      "step": 8561
+    },
+    {
+      "epoch": 0.6177712038673834,
+      "grad_norm": 0.1184375211596489,
+      "learning_rate": 0.0001753095684803002,
+      "loss": 0.1654,
+      "step": 8562
+    },
+    {
+      "epoch": 0.6178433565424438,
+      "grad_norm": 0.11259499192237854,
+      "learning_rate": 0.00017530668206090346,
+      "loss": 0.1434,
+      "step": 8563
+    },
+    {
+      "epoch": 0.6179155092175043,
+      "grad_norm": 0.13270528614521027,
+      "learning_rate": 0.00017530379564150672,
+      "loss": 0.131,
+      "step": 8564
+    },
+    {
+      "epoch": 0.6179876618925647,
+      "grad_norm": 0.17860640585422516,
+      "learning_rate": 0.00017530090922211,
+      "loss": 0.1351,
+      "step": 8565
+    },
+    {
+      "epoch": 0.618059814567625,
+      "grad_norm": 0.1279137283563614,
+      "learning_rate": 0.00017529802280271322,
+      "loss": 0.1286,
+      "step": 8566
+    },
+    {
+      "epoch": 0.6181319672426855,
+      "grad_norm": 0.15142486989498138,
+      "learning_rate": 0.0001752951363833165,
+      "loss": 0.1479,
+      "step": 8567
+    },
+    {
+      "epoch": 0.6182041199177459,
+      "grad_norm": 0.13252979516983032,
+      "learning_rate": 0.00017529224996391978,
+      "loss": 0.2062,
+      "step": 8568
+    },
+    {
+      "epoch": 0.6182762725928064,
+      "grad_norm": 0.11797452718019485,
+      "learning_rate": 0.00017528936354452304,
+      "loss": 0.125,
+      "step": 8569
+    },
+    {
+      "epoch": 0.6183484252678668,
+      "grad_norm": 0.12526576220989227,
+      "learning_rate": 0.0001752864771251263,
+      "loss": 0.1176,
+      "step": 8570
+    },
+    {
+      "epoch": 0.6184205779429273,
+      "grad_norm": 0.12063656747341156,
+      "learning_rate": 0.00017528359070572954,
+      "loss": 0.1391,
+      "step": 8571
+    },
+    {
+      "epoch": 0.6184927306179877,
+      "grad_norm": 0.10913331061601639,
+      "learning_rate": 0.0001752807042863328,
+      "loss": 0.1639,
+      "step": 8572
+    },
+    {
+      "epoch": 0.618564883293048,
+      "grad_norm": 0.1257828176021576,
+      "learning_rate": 0.00017527781786693606,
+      "loss": 0.153,
+      "step": 8573
+    },
+    {
+      "epoch": 0.6186370359681085,
+      "grad_norm": 0.13285456597805023,
+      "learning_rate": 0.00017527493144753933,
+      "loss": 0.1729,
+      "step": 8574
+    },
+    {
+      "epoch": 0.6187091886431689,
+      "grad_norm": 0.15405382215976715,
+      "learning_rate": 0.00017527204502814262,
+      "loss": 0.1654,
+      "step": 8575
+    },
+    {
+      "epoch": 0.6187813413182294,
+      "grad_norm": 0.11983431130647659,
+      "learning_rate": 0.00017526915860874585,
+      "loss": 0.1605,
+      "step": 8576
+    },
+    {
+      "epoch": 0.6188534939932898,
+      "grad_norm": 0.1335511952638626,
+      "learning_rate": 0.00017526627218934912,
+      "loss": 0.1281,
+      "step": 8577
+    },
+    {
+      "epoch": 0.6189256466683503,
+      "grad_norm": 0.13076375424861908,
+      "learning_rate": 0.00017526338576995238,
+      "loss": 0.1468,
+      "step": 8578
+    },
+    {
+      "epoch": 0.6189977993434107,
+      "grad_norm": 0.11791105568408966,
+      "learning_rate": 0.00017526049935055564,
+      "loss": 0.133,
+      "step": 8579
+    },
+    {
+      "epoch": 0.619069952018471,
+      "grad_norm": 0.15978433191776276,
+      "learning_rate": 0.0001752576129311589,
+      "loss": 0.1464,
+      "step": 8580
+    },
+    {
+      "epoch": 0.6191421046935315,
+      "grad_norm": 0.15569712221622467,
+      "learning_rate": 0.00017525472651176217,
+      "loss": 0.1407,
+      "step": 8581
+    },
+    {
+      "epoch": 0.6192142573685919,
+      "grad_norm": 0.1182025671005249,
+      "learning_rate": 0.00017525184009236543,
+      "loss": 0.1861,
+      "step": 8582
+    },
+    {
+      "epoch": 0.6192864100436524,
+      "grad_norm": 0.13768304884433746,
+      "learning_rate": 0.0001752489536729687,
+      "loss": 0.1786,
+      "step": 8583
+    },
+    {
+      "epoch": 0.6193585627187128,
+      "grad_norm": 0.10402512550354004,
+      "learning_rate": 0.00017524606725357196,
+      "loss": 0.1489,
+      "step": 8584
+    },
+    {
+      "epoch": 0.6194307153937733,
+      "grad_norm": 0.11849159002304077,
+      "learning_rate": 0.00017524318083417522,
+      "loss": 0.1867,
+      "step": 8585
+    },
+    {
+      "epoch": 0.6195028680688337,
+      "grad_norm": 0.11498197168111801,
+      "learning_rate": 0.00017524029441477848,
+      "loss": 0.1748,
+      "step": 8586
+    },
+    {
+      "epoch": 0.619575020743894,
+      "grad_norm": 0.11893530935049057,
+      "learning_rate": 0.00017523740799538172,
+      "loss": 0.1826,
+      "step": 8587
+    },
+    {
+      "epoch": 0.6196471734189545,
+      "grad_norm": 0.1262560784816742,
+      "learning_rate": 0.00017523452157598498,
+      "loss": 0.1247,
+      "step": 8588
+    },
+    {
+      "epoch": 0.6197193260940149,
+      "grad_norm": 0.10598855465650558,
+      "learning_rate": 0.00017523163515658827,
+      "loss": 0.135,
+      "step": 8589
+    },
+    {
+      "epoch": 0.6197914787690754,
+      "grad_norm": 0.1007552519440651,
+      "learning_rate": 0.00017522874873719153,
+      "loss": 0.1218,
+      "step": 8590
+    },
+    {
+      "epoch": 0.6198636314441358,
+      "grad_norm": 0.12365041673183441,
+      "learning_rate": 0.0001752258623177948,
+      "loss": 0.1183,
+      "step": 8591
+    },
+    {
+      "epoch": 0.6199357841191963,
+      "grad_norm": 0.12275546044111252,
+      "learning_rate": 0.00017522297589839803,
+      "loss": 0.137,
+      "step": 8592
+    },
+    {
+      "epoch": 0.6200079367942567,
+      "grad_norm": 0.1261817216873169,
+      "learning_rate": 0.0001752200894790013,
+      "loss": 0.1373,
+      "step": 8593
+    },
+    {
+      "epoch": 0.620080089469317,
+      "grad_norm": 0.14289404451847076,
+      "learning_rate": 0.00017521720305960456,
+      "loss": 0.1445,
+      "step": 8594
+    },
+    {
+      "epoch": 0.6201522421443775,
+      "grad_norm": 0.1267629861831665,
+      "learning_rate": 0.00017521431664020782,
+      "loss": 0.134,
+      "step": 8595
+    },
+    {
+      "epoch": 0.6202243948194379,
+      "grad_norm": 0.11549125611782074,
+      "learning_rate": 0.0001752114302208111,
+      "loss": 0.174,
+      "step": 8596
+    },
+    {
+      "epoch": 0.6202965474944984,
+      "grad_norm": 0.16596673429012299,
+      "learning_rate": 0.00017520854380141435,
+      "loss": 0.1311,
+      "step": 8597
+    },
+    {
+      "epoch": 0.6203687001695588,
+      "grad_norm": 0.13521885871887207,
+      "learning_rate": 0.0001752056573820176,
+      "loss": 0.1945,
+      "step": 8598
+    },
+    {
+      "epoch": 0.6204408528446193,
+      "grad_norm": 0.11223003268241882,
+      "learning_rate": 0.00017520277096262087,
+      "loss": 0.1864,
+      "step": 8599
+    },
+    {
+      "epoch": 0.6205130055196797,
+      "grad_norm": 0.1500011831521988,
+      "learning_rate": 0.00017519988454322414,
+      "loss": 0.1917,
+      "step": 8600
+    },
+    {
+      "epoch": 0.62058515819474,
+      "grad_norm": 0.1375226080417633,
+      "learning_rate": 0.0001751969981238274,
+      "loss": 0.1331,
+      "step": 8601
+    },
+    {
+      "epoch": 0.6206573108698005,
+      "grad_norm": 0.15558865666389465,
+      "learning_rate": 0.00017519411170443066,
+      "loss": 0.1416,
+      "step": 8602
+    },
+    {
+      "epoch": 0.6207294635448609,
+      "grad_norm": 0.11568037420511246,
+      "learning_rate": 0.00017519122528503392,
+      "loss": 0.1475,
+      "step": 8603
+    },
+    {
+      "epoch": 0.6208016162199214,
+      "grad_norm": 0.13693024218082428,
+      "learning_rate": 0.0001751883388656372,
+      "loss": 0.1409,
+      "step": 8604
+    },
+    {
+      "epoch": 0.6208737688949818,
+      "grad_norm": 0.14244809746742249,
+      "learning_rate": 0.00017518545244624045,
+      "loss": 0.1655,
+      "step": 8605
+    },
+    {
+      "epoch": 0.6209459215700422,
+      "grad_norm": 0.14547933638095856,
+      "learning_rate": 0.0001751825660268437,
+      "loss": 0.188,
+      "step": 8606
+    },
+    {
+      "epoch": 0.6210180742451027,
+      "grad_norm": 0.14529702067375183,
+      "learning_rate": 0.00017517967960744698,
+      "loss": 0.1711,
+      "step": 8607
+    },
+    {
+      "epoch": 0.621090226920163,
+      "grad_norm": 0.11163908988237381,
+      "learning_rate": 0.0001751767931880502,
+      "loss": 0.128,
+      "step": 8608
+    },
+    {
+      "epoch": 0.6211623795952235,
+      "grad_norm": 0.15089009702205658,
+      "learning_rate": 0.00017517390676865347,
+      "loss": 0.1739,
+      "step": 8609
+    },
+    {
+      "epoch": 0.6212345322702839,
+      "grad_norm": 0.10636797547340393,
+      "learning_rate": 0.00017517102034925676,
+      "loss": 0.1474,
+      "step": 8610
+    },
+    {
+      "epoch": 0.6213066849453444,
+      "grad_norm": 0.14714211225509644,
+      "learning_rate": 0.00017516813392986003,
+      "loss": 0.1739,
+      "step": 8611
+    },
+    {
+      "epoch": 0.6213788376204048,
+      "grad_norm": 0.12578116357326508,
+      "learning_rate": 0.0001751652475104633,
+      "loss": 0.1828,
+      "step": 8612
+    },
+    {
+      "epoch": 0.6214509902954652,
+      "grad_norm": 0.12373779714107513,
+      "learning_rate": 0.00017516236109106655,
+      "loss": 0.1375,
+      "step": 8613
+    },
+    {
+      "epoch": 0.6215231429705256,
+      "grad_norm": 0.16616494953632355,
+      "learning_rate": 0.0001751594746716698,
+      "loss": 0.1831,
+      "step": 8614
+    },
+    {
+      "epoch": 0.621595295645586,
+      "grad_norm": 0.14434950053691864,
+      "learning_rate": 0.00017515658825227305,
+      "loss": 0.1662,
+      "step": 8615
+    },
+    {
+      "epoch": 0.6216674483206465,
+      "grad_norm": 0.15726368129253387,
+      "learning_rate": 0.00017515370183287632,
+      "loss": 0.1119,
+      "step": 8616
+    },
+    {
+      "epoch": 0.6217396009957069,
+      "grad_norm": 0.13260503113269806,
+      "learning_rate": 0.0001751508154134796,
+      "loss": 0.1706,
+      "step": 8617
+    },
+    {
+      "epoch": 0.6218117536707674,
+      "grad_norm": 0.13859052956104279,
+      "learning_rate": 0.00017514792899408287,
+      "loss": 0.1466,
+      "step": 8618
+    },
+    {
+      "epoch": 0.6218839063458278,
+      "grad_norm": 0.15725117921829224,
+      "learning_rate": 0.0001751450425746861,
+      "loss": 0.1442,
+      "step": 8619
+    },
+    {
+      "epoch": 0.6219560590208882,
+      "grad_norm": 0.15142501890659332,
+      "learning_rate": 0.00017514215615528937,
+      "loss": 0.1715,
+      "step": 8620
+    },
+    {
+      "epoch": 0.6220282116959486,
+      "grad_norm": 0.12751902639865875,
+      "learning_rate": 0.00017513926973589263,
+      "loss": 0.1523,
+      "step": 8621
+    },
+    {
+      "epoch": 0.622100364371009,
+      "grad_norm": 0.1309266984462738,
+      "learning_rate": 0.0001751363833164959,
+      "loss": 0.2041,
+      "step": 8622
+    },
+    {
+      "epoch": 0.6221725170460695,
+      "grad_norm": 0.13360129296779633,
+      "learning_rate": 0.00017513349689709916,
+      "loss": 0.1556,
+      "step": 8623
+    },
+    {
+      "epoch": 0.6222446697211299,
+      "grad_norm": 0.12475387752056122,
+      "learning_rate": 0.00017513061047770242,
+      "loss": 0.1112,
+      "step": 8624
+    },
+    {
+      "epoch": 0.6223168223961904,
+      "grad_norm": 0.11016663908958435,
+      "learning_rate": 0.00017512772405830568,
+      "loss": 0.1715,
+      "step": 8625
+    },
+    {
+      "epoch": 0.6223889750712508,
+      "grad_norm": 0.12061906605958939,
+      "learning_rate": 0.00017512483763890894,
+      "loss": 0.172,
+      "step": 8626
+    },
+    {
+      "epoch": 0.6224611277463112,
+      "grad_norm": 0.1120067685842514,
+      "learning_rate": 0.0001751219512195122,
+      "loss": 0.1668,
+      "step": 8627
+    },
+    {
+      "epoch": 0.6225332804213716,
+      "grad_norm": 0.10962363332509995,
+      "learning_rate": 0.00017511906480011547,
+      "loss": 0.1077,
+      "step": 8628
+    },
+    {
+      "epoch": 0.622605433096432,
+      "grad_norm": 0.12538926303386688,
+      "learning_rate": 0.00017511617838071873,
+      "loss": 0.1826,
+      "step": 8629
+    },
+    {
+      "epoch": 0.6226775857714925,
+      "grad_norm": 0.13419729471206665,
+      "learning_rate": 0.00017511329196132197,
+      "loss": 0.1384,
+      "step": 8630
+    },
+    {
+      "epoch": 0.6227497384465529,
+      "grad_norm": 0.1244427040219307,
+      "learning_rate": 0.00017511040554192526,
+      "loss": 0.1417,
+      "step": 8631
+    },
+    {
+      "epoch": 0.6228218911216133,
+      "grad_norm": 0.14291410148143768,
+      "learning_rate": 0.00017510751912252852,
+      "loss": 0.1516,
+      "step": 8632
+    },
+    {
+      "epoch": 0.6228940437966738,
+      "grad_norm": 0.09003730118274689,
+      "learning_rate": 0.00017510463270313179,
+      "loss": 0.1391,
+      "step": 8633
+    },
+    {
+      "epoch": 0.6229661964717342,
+      "grad_norm": 0.1254691481590271,
+      "learning_rate": 0.00017510174628373505,
+      "loss": 0.1943,
+      "step": 8634
+    },
+    {
+      "epoch": 0.6230383491467946,
+      "grad_norm": 0.13230058550834656,
+      "learning_rate": 0.00017509885986433828,
+      "loss": 0.1199,
+      "step": 8635
+    },
+    {
+      "epoch": 0.623110501821855,
+      "grad_norm": 0.11702358722686768,
+      "learning_rate": 0.00017509597344494155,
+      "loss": 0.1576,
+      "step": 8636
+    },
+    {
+      "epoch": 0.6231826544969155,
+      "grad_norm": 0.12280598282814026,
+      "learning_rate": 0.0001750930870255448,
+      "loss": 0.1592,
+      "step": 8637
+    },
+    {
+      "epoch": 0.6232548071719759,
+      "grad_norm": 0.1280744969844818,
+      "learning_rate": 0.0001750902006061481,
+      "loss": 0.1572,
+      "step": 8638
+    },
+    {
+      "epoch": 0.6233269598470363,
+      "grad_norm": 0.14713403582572937,
+      "learning_rate": 0.00017508731418675136,
+      "loss": 0.1358,
+      "step": 8639
+    },
+    {
+      "epoch": 0.6233991125220968,
+      "grad_norm": 0.1206742525100708,
+      "learning_rate": 0.0001750844277673546,
+      "loss": 0.1492,
+      "step": 8640
+    },
+    {
+      "epoch": 0.6234712651971572,
+      "grad_norm": 0.1162571832537651,
+      "learning_rate": 0.00017508154134795786,
+      "loss": 0.137,
+      "step": 8641
+    },
+    {
+      "epoch": 0.6235434178722176,
+      "grad_norm": 0.10991784930229187,
+      "learning_rate": 0.00017507865492856112,
+      "loss": 0.1663,
+      "step": 8642
+    },
+    {
+      "epoch": 0.623615570547278,
+      "grad_norm": 0.117439404129982,
+      "learning_rate": 0.0001750757685091644,
+      "loss": 0.1518,
+      "step": 8643
+    },
+    {
+      "epoch": 0.6236877232223385,
+      "grad_norm": 0.12584786117076874,
+      "learning_rate": 0.00017507288208976765,
+      "loss": 0.1558,
+      "step": 8644
+    },
+    {
+      "epoch": 0.6237598758973989,
+      "grad_norm": 0.11283411830663681,
+      "learning_rate": 0.0001750699956703709,
+      "loss": 0.1734,
+      "step": 8645
+    },
+    {
+      "epoch": 0.6238320285724593,
+      "grad_norm": 0.1332118809223175,
+      "learning_rate": 0.00017506710925097418,
+      "loss": 0.116,
+      "step": 8646
+    },
+    {
+      "epoch": 0.6239041812475198,
+      "grad_norm": 0.1143932119011879,
+      "learning_rate": 0.00017506422283157744,
+      "loss": 0.1296,
+      "step": 8647
+    },
+    {
+      "epoch": 0.6239763339225802,
+      "grad_norm": 0.1439419388771057,
+      "learning_rate": 0.0001750613364121807,
+      "loss": 0.1776,
+      "step": 8648
+    },
+    {
+      "epoch": 0.6240484865976406,
+      "grad_norm": 0.10962345451116562,
+      "learning_rate": 0.00017505844999278396,
+      "loss": 0.1534,
+      "step": 8649
+    },
+    {
+      "epoch": 0.624120639272701,
+      "grad_norm": 0.1164950579404831,
+      "learning_rate": 0.00017505556357338723,
+      "loss": 0.1169,
+      "step": 8650
+    },
+    {
+      "epoch": 0.6241927919477614,
+      "grad_norm": 0.11711418628692627,
+      "learning_rate": 0.00017505267715399046,
+      "loss": 0.1254,
+      "step": 8651
+    },
+    {
+      "epoch": 0.6242649446228219,
+      "grad_norm": 0.14272616803646088,
+      "learning_rate": 0.00017504979073459375,
+      "loss": 0.156,
+      "step": 8652
+    },
+    {
+      "epoch": 0.6243370972978823,
+      "grad_norm": 0.11704165488481522,
+      "learning_rate": 0.00017504690431519702,
+      "loss": 0.1106,
+      "step": 8653
+    },
+    {
+      "epoch": 0.6244092499729428,
+      "grad_norm": 0.12983237206935883,
+      "learning_rate": 0.00017504401789580028,
+      "loss": 0.1266,
+      "step": 8654
+    },
+    {
+      "epoch": 0.6244814026480032,
+      "grad_norm": 0.2319333404302597,
+      "learning_rate": 0.00017504113147640354,
+      "loss": 0.1721,
+      "step": 8655
+    },
+    {
+      "epoch": 0.6245535553230636,
+      "grad_norm": 0.13093380630016327,
+      "learning_rate": 0.00017503824505700678,
+      "loss": 0.1971,
+      "step": 8656
+    },
+    {
+      "epoch": 0.624625707998124,
+      "grad_norm": 0.12470812350511551,
+      "learning_rate": 0.00017503535863761004,
+      "loss": 0.1382,
+      "step": 8657
+    },
+    {
+      "epoch": 0.6246978606731844,
+      "grad_norm": 0.1307307928800583,
+      "learning_rate": 0.0001750324722182133,
+      "loss": 0.1292,
+      "step": 8658
+    },
+    {
+      "epoch": 0.6247700133482449,
+      "grad_norm": 0.12445079535245895,
+      "learning_rate": 0.0001750295857988166,
+      "loss": 0.157,
+      "step": 8659
+    },
+    {
+      "epoch": 0.6248421660233053,
+      "grad_norm": 0.16324906051158905,
+      "learning_rate": 0.00017502669937941986,
+      "loss": 0.1094,
+      "step": 8660
+    },
+    {
+      "epoch": 0.6249143186983658,
+      "grad_norm": 0.14993080496788025,
+      "learning_rate": 0.0001750238129600231,
+      "loss": 0.1342,
+      "step": 8661
+    },
+    {
+      "epoch": 0.6249864713734262,
+      "grad_norm": 0.12553778290748596,
+      "learning_rate": 0.00017502092654062636,
+      "loss": 0.1262,
+      "step": 8662
+    },
+    {
+      "epoch": 0.6250586240484866,
+      "grad_norm": 0.11642362177371979,
+      "learning_rate": 0.00017501804012122962,
+      "loss": 0.1321,
+      "step": 8663
+    },
+    {
+      "epoch": 0.625130776723547,
+      "grad_norm": 0.12395073473453522,
+      "learning_rate": 0.00017501515370183288,
+      "loss": 0.1437,
+      "step": 8664
+    },
+    {
+      "epoch": 0.6252029293986074,
+      "grad_norm": 0.14258527755737305,
+      "learning_rate": 0.00017501226728243614,
+      "loss": 0.1619,
+      "step": 8665
+    },
+    {
+      "epoch": 0.6252750820736679,
+      "grad_norm": 0.10943171381950378,
+      "learning_rate": 0.0001750093808630394,
+      "loss": 0.093,
+      "step": 8666
+    },
+    {
+      "epoch": 0.6253472347487283,
+      "grad_norm": 0.09779515862464905,
+      "learning_rate": 0.00017500649444364267,
+      "loss": 0.101,
+      "step": 8667
+    },
+    {
+      "epoch": 0.6254193874237888,
+      "grad_norm": 0.11287999153137207,
+      "learning_rate": 0.00017500360802424593,
+      "loss": 0.1274,
+      "step": 8668
+    },
+    {
+      "epoch": 0.6254915400988492,
+      "grad_norm": 0.13966453075408936,
+      "learning_rate": 0.0001750007216048492,
+      "loss": 0.1559,
+      "step": 8669
+    },
+    {
+      "epoch": 0.6255636927739096,
+      "grad_norm": 0.12718842923641205,
+      "learning_rate": 0.00017499783518545246,
+      "loss": 0.1294,
+      "step": 8670
+    },
+    {
+      "epoch": 0.62563584544897,
+      "grad_norm": 0.11159928888082504,
+      "learning_rate": 0.00017499494876605572,
+      "loss": 0.1533,
+      "step": 8671
+    },
+    {
+      "epoch": 0.6257079981240304,
+      "grad_norm": 0.1387571096420288,
+      "learning_rate": 0.00017499206234665896,
+      "loss": 0.1852,
+      "step": 8672
+    },
+    {
+      "epoch": 0.6257801507990909,
+      "grad_norm": 0.11032717674970627,
+      "learning_rate": 0.00017498917592726225,
+      "loss": 0.1292,
+      "step": 8673
+    },
+    {
+      "epoch": 0.6258523034741513,
+      "grad_norm": 0.11329511553049088,
+      "learning_rate": 0.0001749862895078655,
+      "loss": 0.1227,
+      "step": 8674
+    },
+    {
+      "epoch": 0.6259244561492118,
+      "grad_norm": 0.15416178107261658,
+      "learning_rate": 0.00017498340308846877,
+      "loss": 0.1241,
+      "step": 8675
+    },
+    {
+      "epoch": 0.6259966088242721,
+      "grad_norm": 0.12209072709083557,
+      "learning_rate": 0.00017498051666907204,
+      "loss": 0.1208,
+      "step": 8676
+    },
+    {
+      "epoch": 0.6260687614993325,
+      "grad_norm": 0.11543633043766022,
+      "learning_rate": 0.00017497763024967527,
+      "loss": 0.1848,
+      "step": 8677
+    },
+    {
+      "epoch": 0.626140914174393,
+      "grad_norm": 0.10174732655286789,
+      "learning_rate": 0.00017497474383027854,
+      "loss": 0.1551,
+      "step": 8678
+    },
+    {
+      "epoch": 0.6262130668494534,
+      "grad_norm": 0.11609652638435364,
+      "learning_rate": 0.0001749718574108818,
+      "loss": 0.1391,
+      "step": 8679
+    },
+    {
+      "epoch": 0.6262852195245139,
+      "grad_norm": 0.12952245771884918,
+      "learning_rate": 0.0001749689709914851,
+      "loss": 0.1448,
+      "step": 8680
+    },
+    {
+      "epoch": 0.6263573721995743,
+      "grad_norm": 0.128461092710495,
+      "learning_rate": 0.00017496608457208835,
+      "loss": 0.157,
+      "step": 8681
+    },
+    {
+      "epoch": 0.6264295248746348,
+      "grad_norm": 0.2677597105503082,
+      "learning_rate": 0.0001749631981526916,
+      "loss": 0.1382,
+      "step": 8682
+    },
+    {
+      "epoch": 0.6265016775496951,
+      "grad_norm": 0.1268332302570343,
+      "learning_rate": 0.00017496031173329485,
+      "loss": 0.171,
+      "step": 8683
+    },
+    {
+      "epoch": 0.6265738302247555,
+      "grad_norm": 0.15028905868530273,
+      "learning_rate": 0.0001749574253138981,
+      "loss": 0.1555,
+      "step": 8684
+    },
+    {
+      "epoch": 0.626645982899816,
+      "grad_norm": 0.14424365758895874,
+      "learning_rate": 0.00017495453889450138,
+      "loss": 0.0959,
+      "step": 8685
+    },
+    {
+      "epoch": 0.6267181355748764,
+      "grad_norm": 0.12823861837387085,
+      "learning_rate": 0.00017495165247510464,
+      "loss": 0.1587,
+      "step": 8686
+    },
+    {
+      "epoch": 0.6267902882499369,
+      "grad_norm": 0.12356722354888916,
+      "learning_rate": 0.0001749487660557079,
+      "loss": 0.1459,
+      "step": 8687
+    },
+    {
+      "epoch": 0.6268624409249973,
+      "grad_norm": 0.13832490146160126,
+      "learning_rate": 0.00017494587963631116,
+      "loss": 0.1678,
+      "step": 8688
+    },
+    {
+      "epoch": 0.6269345936000578,
+      "grad_norm": 0.13942967355251312,
+      "learning_rate": 0.00017494299321691443,
+      "loss": 0.1195,
+      "step": 8689
+    },
+    {
+      "epoch": 0.6270067462751181,
+      "grad_norm": 0.09987753629684448,
+      "learning_rate": 0.0001749401067975177,
+      "loss": 0.1307,
+      "step": 8690
+    },
+    {
+      "epoch": 0.6270788989501785,
+      "grad_norm": 0.16269667446613312,
+      "learning_rate": 0.00017493722037812095,
+      "loss": 0.1805,
+      "step": 8691
+    },
+    {
+      "epoch": 0.627151051625239,
+      "grad_norm": 0.1342119574546814,
+      "learning_rate": 0.00017493433395872422,
+      "loss": 0.1414,
+      "step": 8692
+    },
+    {
+      "epoch": 0.6272232043002994,
+      "grad_norm": 0.13867583870887756,
+      "learning_rate": 0.00017493144753932745,
+      "loss": 0.1808,
+      "step": 8693
+    },
+    {
+      "epoch": 0.6272953569753599,
+      "grad_norm": 0.1276274472475052,
+      "learning_rate": 0.00017492856111993074,
+      "loss": 0.1457,
+      "step": 8694
+    },
+    {
+      "epoch": 0.6273675096504203,
+      "grad_norm": 0.11411254107952118,
+      "learning_rate": 0.000174925674700534,
+      "loss": 0.1462,
+      "step": 8695
+    },
+    {
+      "epoch": 0.6274396623254808,
+      "grad_norm": 0.12012795358896255,
+      "learning_rate": 0.00017492278828113727,
+      "loss": 0.1684,
+      "step": 8696
+    },
+    {
+      "epoch": 0.6275118150005411,
+      "grad_norm": 0.11880932003259659,
+      "learning_rate": 0.00017491990186174053,
+      "loss": 0.1186,
+      "step": 8697
+    },
+    {
+      "epoch": 0.6275839676756015,
+      "grad_norm": 0.15673860907554626,
+      "learning_rate": 0.00017491701544234377,
+      "loss": 0.2067,
+      "step": 8698
+    },
+    {
+      "epoch": 0.627656120350662,
+      "grad_norm": 0.13303562998771667,
+      "learning_rate": 0.00017491412902294703,
+      "loss": 0.1834,
+      "step": 8699
+    },
+    {
+      "epoch": 0.6277282730257224,
+      "grad_norm": 0.10776332765817642,
+      "learning_rate": 0.0001749112426035503,
+      "loss": 0.162,
+      "step": 8700
+    },
+    {
+      "epoch": 0.6278004257007829,
+      "grad_norm": 0.12505806982517242,
+      "learning_rate": 0.00017490835618415358,
+      "loss": 0.1254,
+      "step": 8701
+    },
+    {
+      "epoch": 0.6278725783758433,
+      "grad_norm": 0.09923242032527924,
+      "learning_rate": 0.00017490546976475685,
+      "loss": 0.1769,
+      "step": 8702
+    },
+    {
+      "epoch": 0.6279447310509038,
+      "grad_norm": 0.12002786248922348,
+      "learning_rate": 0.00017490258334536008,
+      "loss": 0.1534,
+      "step": 8703
+    },
+    {
+      "epoch": 0.6280168837259641,
+      "grad_norm": 0.15881898999214172,
+      "learning_rate": 0.00017489969692596334,
+      "loss": 0.2111,
+      "step": 8704
+    },
+    {
+      "epoch": 0.6280890364010245,
+      "grad_norm": 0.1061813086271286,
+      "learning_rate": 0.0001748968105065666,
+      "loss": 0.1912,
+      "step": 8705
+    },
+    {
+      "epoch": 0.628161189076085,
+      "grad_norm": 0.11571187525987625,
+      "learning_rate": 0.00017489392408716987,
+      "loss": 0.1243,
+      "step": 8706
+    },
+    {
+      "epoch": 0.6282333417511454,
+      "grad_norm": 0.10322290658950806,
+      "learning_rate": 0.00017489103766777313,
+      "loss": 0.1472,
+      "step": 8707
+    },
+    {
+      "epoch": 0.6283054944262059,
+      "grad_norm": 0.11178483814001083,
+      "learning_rate": 0.0001748881512483764,
+      "loss": 0.1167,
+      "step": 8708
+    },
+    {
+      "epoch": 0.6283776471012663,
+      "grad_norm": 0.12025123834609985,
+      "learning_rate": 0.00017488526482897966,
+      "loss": 0.1446,
+      "step": 8709
+    },
+    {
+      "epoch": 0.6284497997763268,
+      "grad_norm": 0.13027307391166687,
+      "learning_rate": 0.00017488237840958292,
+      "loss": 0.1324,
+      "step": 8710
+    },
+    {
+      "epoch": 0.6285219524513871,
+      "grad_norm": 0.1130695715546608,
+      "learning_rate": 0.00017487949199018618,
+      "loss": 0.1665,
+      "step": 8711
+    },
+    {
+      "epoch": 0.6285941051264475,
+      "grad_norm": 0.13819366693496704,
+      "learning_rate": 0.00017487660557078945,
+      "loss": 0.108,
+      "step": 8712
+    },
+    {
+      "epoch": 0.628666257801508,
+      "grad_norm": 0.11559824645519257,
+      "learning_rate": 0.0001748737191513927,
+      "loss": 0.0957,
+      "step": 8713
+    },
+    {
+      "epoch": 0.6287384104765684,
+      "grad_norm": 0.15998490154743195,
+      "learning_rate": 0.00017487083273199595,
+      "loss": 0.1218,
+      "step": 8714
+    },
+    {
+      "epoch": 0.6288105631516289,
+      "grad_norm": 0.11568142473697662,
+      "learning_rate": 0.00017486794631259924,
+      "loss": 0.1278,
+      "step": 8715
+    },
+    {
+      "epoch": 0.6288827158266893,
+      "grad_norm": 0.10960458219051361,
+      "learning_rate": 0.0001748650598932025,
+      "loss": 0.1357,
+      "step": 8716
+    },
+    {
+      "epoch": 0.6289548685017498,
+      "grad_norm": 0.13815350830554962,
+      "learning_rate": 0.00017486217347380576,
+      "loss": 0.1773,
+      "step": 8717
+    },
+    {
+      "epoch": 0.6290270211768101,
+      "grad_norm": 0.1252228170633316,
+      "learning_rate": 0.00017485928705440902,
+      "loss": 0.1545,
+      "step": 8718
+    },
+    {
+      "epoch": 0.6290991738518705,
+      "grad_norm": 0.11478475481271744,
+      "learning_rate": 0.00017485640063501226,
+      "loss": 0.1353,
+      "step": 8719
+    },
+    {
+      "epoch": 0.629171326526931,
+      "grad_norm": 0.1047094315290451,
+      "learning_rate": 0.00017485351421561552,
+      "loss": 0.1488,
+      "step": 8720
+    },
+    {
+      "epoch": 0.6292434792019914,
+      "grad_norm": 0.11537590622901917,
+      "learning_rate": 0.0001748506277962188,
+      "loss": 0.117,
+      "step": 8721
+    },
+    {
+      "epoch": 0.6293156318770519,
+      "grad_norm": 0.13007521629333496,
+      "learning_rate": 0.00017484774137682205,
+      "loss": 0.1519,
+      "step": 8722
+    },
+    {
+      "epoch": 0.6293877845521123,
+      "grad_norm": 0.14826463162899017,
+      "learning_rate": 0.00017484485495742534,
+      "loss": 0.1953,
+      "step": 8723
+    },
+    {
+      "epoch": 0.6294599372271727,
+      "grad_norm": 0.12364893406629562,
+      "learning_rate": 0.00017484196853802858,
+      "loss": 0.1411,
+      "step": 8724
+    },
+    {
+      "epoch": 0.6295320899022331,
+      "grad_norm": 0.11580374091863632,
+      "learning_rate": 0.00017483908211863184,
+      "loss": 0.1244,
+      "step": 8725
+    },
+    {
+      "epoch": 0.6296042425772935,
+      "grad_norm": 0.10067996382713318,
+      "learning_rate": 0.0001748361956992351,
+      "loss": 0.1506,
+      "step": 8726
+    },
+    {
+      "epoch": 0.629676395252354,
+      "grad_norm": 0.1357460469007492,
+      "learning_rate": 0.00017483330927983836,
+      "loss": 0.1553,
+      "step": 8727
+    },
+    {
+      "epoch": 0.6297485479274144,
+      "grad_norm": 0.12918460369110107,
+      "learning_rate": 0.00017483042286044163,
+      "loss": 0.1572,
+      "step": 8728
+    },
+    {
+      "epoch": 0.6298207006024749,
+      "grad_norm": 0.12484171241521835,
+      "learning_rate": 0.0001748275364410449,
+      "loss": 0.1436,
+      "step": 8729
+    },
+    {
+      "epoch": 0.6298928532775353,
+      "grad_norm": 0.14255495369434357,
+      "learning_rate": 0.00017482465002164815,
+      "loss": 0.1387,
+      "step": 8730
+    },
+    {
+      "epoch": 0.6299650059525957,
+      "grad_norm": 0.10925611853599548,
+      "learning_rate": 0.00017482176360225142,
+      "loss": 0.1287,
+      "step": 8731
+    },
+    {
+      "epoch": 0.6300371586276561,
+      "grad_norm": 0.13331304490566254,
+      "learning_rate": 0.00017481887718285468,
+      "loss": 0.1441,
+      "step": 8732
+    },
+    {
+      "epoch": 0.6301093113027165,
+      "grad_norm": 0.1275538206100464,
+      "learning_rate": 0.00017481599076345794,
+      "loss": 0.1368,
+      "step": 8733
+    },
+    {
+      "epoch": 0.630181463977777,
+      "grad_norm": 0.11866793036460876,
+      "learning_rate": 0.0001748131043440612,
+      "loss": 0.1594,
+      "step": 8734
+    },
+    {
+      "epoch": 0.6302536166528374,
+      "grad_norm": 0.13025593757629395,
+      "learning_rate": 0.00017481021792466447,
+      "loss": 0.13,
+      "step": 8735
+    },
+    {
+      "epoch": 0.6303257693278979,
+      "grad_norm": 0.15139830112457275,
+      "learning_rate": 0.0001748073315052677,
+      "loss": 0.1629,
+      "step": 8736
+    },
+    {
+      "epoch": 0.6303979220029583,
+      "grad_norm": 0.1346716433763504,
+      "learning_rate": 0.000174804445085871,
+      "loss": 0.1279,
+      "step": 8737
+    },
+    {
+      "epoch": 0.6304700746780186,
+      "grad_norm": 0.12959204614162445,
+      "learning_rate": 0.00017480155866647426,
+      "loss": 0.1375,
+      "step": 8738
+    },
+    {
+      "epoch": 0.6305422273530791,
+      "grad_norm": 0.13815361261367798,
+      "learning_rate": 0.00017479867224707752,
+      "loss": 0.1194,
+      "step": 8739
+    },
+    {
+      "epoch": 0.6306143800281395,
+      "grad_norm": 0.11633475869894028,
+      "learning_rate": 0.00017479578582768078,
+      "loss": 0.14,
+      "step": 8740
+    },
+    {
+      "epoch": 0.6306865327032,
+      "grad_norm": 0.14336754381656647,
+      "learning_rate": 0.00017479289940828402,
+      "loss": 0.134,
+      "step": 8741
+    },
+    {
+      "epoch": 0.6307586853782604,
+      "grad_norm": 0.11213643848896027,
+      "learning_rate": 0.00017479001298888728,
+      "loss": 0.1408,
+      "step": 8742
+    },
+    {
+      "epoch": 0.6308308380533209,
+      "grad_norm": 0.11513768136501312,
+      "learning_rate": 0.00017478712656949054,
+      "loss": 0.1488,
+      "step": 8743
+    },
+    {
+      "epoch": 0.6309029907283813,
+      "grad_norm": 0.1266719400882721,
+      "learning_rate": 0.00017478424015009383,
+      "loss": 0.1407,
+      "step": 8744
+    },
+    {
+      "epoch": 0.6309751434034416,
+      "grad_norm": 0.12178874760866165,
+      "learning_rate": 0.0001747813537306971,
+      "loss": 0.1267,
+      "step": 8745
+    },
+    {
+      "epoch": 0.6310472960785021,
+      "grad_norm": 0.11389937251806259,
+      "learning_rate": 0.00017477846731130033,
+      "loss": 0.2012,
+      "step": 8746
+    },
+    {
+      "epoch": 0.6311194487535625,
+      "grad_norm": 0.1470189392566681,
+      "learning_rate": 0.0001747755808919036,
+      "loss": 0.1914,
+      "step": 8747
+    },
+    {
+      "epoch": 0.631191601428623,
+      "grad_norm": 0.10151471942663193,
+      "learning_rate": 0.00017477269447250686,
+      "loss": 0.1635,
+      "step": 8748
+    },
+    {
+      "epoch": 0.6312637541036834,
+      "grad_norm": 0.1164185032248497,
+      "learning_rate": 0.00017476980805311012,
+      "loss": 0.1088,
+      "step": 8749
+    },
+    {
+      "epoch": 0.6313359067787438,
+      "grad_norm": 0.12604768574237823,
+      "learning_rate": 0.00017476692163371338,
+      "loss": 0.1445,
+      "step": 8750
+    },
+    {
+      "epoch": 0.6314080594538043,
+      "grad_norm": 0.12387518584728241,
+      "learning_rate": 0.00017476403521431665,
+      "loss": 0.1584,
+      "step": 8751
+    },
+    {
+      "epoch": 0.6314802121288646,
+      "grad_norm": 0.1140308678150177,
+      "learning_rate": 0.0001747611487949199,
+      "loss": 0.1565,
+      "step": 8752
+    },
+    {
+      "epoch": 0.6315523648039251,
+      "grad_norm": 0.10060878098011017,
+      "learning_rate": 0.00017475826237552317,
+      "loss": 0.1516,
+      "step": 8753
+    },
+    {
+      "epoch": 0.6316245174789855,
+      "grad_norm": 0.12557385861873627,
+      "learning_rate": 0.00017475537595612644,
+      "loss": 0.165,
+      "step": 8754
+    },
+    {
+      "epoch": 0.631696670154046,
+      "grad_norm": 0.1297946721315384,
+      "learning_rate": 0.0001747524895367297,
+      "loss": 0.1414,
+      "step": 8755
+    },
+    {
+      "epoch": 0.6317688228291064,
+      "grad_norm": 0.12463095784187317,
+      "learning_rate": 0.00017474960311733296,
+      "loss": 0.1583,
+      "step": 8756
+    },
+    {
+      "epoch": 0.6318409755041668,
+      "grad_norm": 0.11979036033153534,
+      "learning_rate": 0.0001747467166979362,
+      "loss": 0.1665,
+      "step": 8757
+    },
+    {
+      "epoch": 0.6319131281792273,
+      "grad_norm": 0.12523148953914642,
+      "learning_rate": 0.0001747438302785395,
+      "loss": 0.1585,
+      "step": 8758
+    },
+    {
+      "epoch": 0.6319852808542876,
+      "grad_norm": 0.1275649517774582,
+      "learning_rate": 0.00017474094385914275,
+      "loss": 0.1434,
+      "step": 8759
+    },
+    {
+      "epoch": 0.6320574335293481,
+      "grad_norm": 0.1115640178322792,
+      "learning_rate": 0.000174738057439746,
+      "loss": 0.1405,
+      "step": 8760
+    },
+    {
+      "epoch": 0.6321295862044085,
+      "grad_norm": 0.11878620833158493,
+      "learning_rate": 0.00017473517102034928,
+      "loss": 0.1416,
+      "step": 8761
+    },
+    {
+      "epoch": 0.632201738879469,
+      "grad_norm": 0.1362275630235672,
+      "learning_rate": 0.0001747322846009525,
+      "loss": 0.159,
+      "step": 8762
+    },
+    {
+      "epoch": 0.6322738915545294,
+      "grad_norm": 0.13509535789489746,
+      "learning_rate": 0.00017472939818155578,
+      "loss": 0.1644,
+      "step": 8763
+    },
+    {
+      "epoch": 0.6323460442295898,
+      "grad_norm": 0.12418070435523987,
+      "learning_rate": 0.00017472651176215904,
+      "loss": 0.1594,
+      "step": 8764
+    },
+    {
+      "epoch": 0.6324181969046503,
+      "grad_norm": 0.12860502302646637,
+      "learning_rate": 0.00017472362534276233,
+      "loss": 0.1132,
+      "step": 8765
+    },
+    {
+      "epoch": 0.6324903495797106,
+      "grad_norm": 0.14515362679958344,
+      "learning_rate": 0.0001747207389233656,
+      "loss": 0.1654,
+      "step": 8766
+    },
+    {
+      "epoch": 0.6325625022547711,
+      "grad_norm": 0.12168245762586594,
+      "learning_rate": 0.00017471785250396883,
+      "loss": 0.1178,
+      "step": 8767
+    },
+    {
+      "epoch": 0.6326346549298315,
+      "grad_norm": 0.13350696861743927,
+      "learning_rate": 0.0001747149660845721,
+      "loss": 0.1459,
+      "step": 8768
+    },
+    {
+      "epoch": 0.632706807604892,
+      "grad_norm": 0.12162505090236664,
+      "learning_rate": 0.00017471207966517535,
+      "loss": 0.1625,
+      "step": 8769
+    },
+    {
+      "epoch": 0.6327789602799524,
+      "grad_norm": 0.1261981874704361,
+      "learning_rate": 0.00017470919324577862,
+      "loss": 0.1498,
+      "step": 8770
+    },
+    {
+      "epoch": 0.6328511129550128,
+      "grad_norm": 0.12764649093151093,
+      "learning_rate": 0.00017470630682638188,
+      "loss": 0.1942,
+      "step": 8771
+    },
+    {
+      "epoch": 0.6329232656300733,
+      "grad_norm": 0.12060592323541641,
+      "learning_rate": 0.00017470342040698514,
+      "loss": 0.1494,
+      "step": 8772
+    },
+    {
+      "epoch": 0.6329954183051336,
+      "grad_norm": 0.12116419523954391,
+      "learning_rate": 0.0001747005339875884,
+      "loss": 0.1392,
+      "step": 8773
+    },
+    {
+      "epoch": 0.6330675709801941,
+      "grad_norm": 0.12815189361572266,
+      "learning_rate": 0.00017469764756819167,
+      "loss": 0.1444,
+      "step": 8774
+    },
+    {
+      "epoch": 0.6331397236552545,
+      "grad_norm": 0.15026380121707916,
+      "learning_rate": 0.00017469476114879493,
+      "loss": 0.1727,
+      "step": 8775
+    },
+    {
+      "epoch": 0.633211876330315,
+      "grad_norm": 0.17085008323192596,
+      "learning_rate": 0.0001746918747293982,
+      "loss": 0.1534,
+      "step": 8776
+    },
+    {
+      "epoch": 0.6332840290053754,
+      "grad_norm": 0.14156273007392883,
+      "learning_rate": 0.00017468898831000146,
+      "loss": 0.1646,
+      "step": 8777
+    },
+    {
+      "epoch": 0.6333561816804358,
+      "grad_norm": 0.13254617154598236,
+      "learning_rate": 0.0001746861018906047,
+      "loss": 0.1259,
+      "step": 8778
+    },
+    {
+      "epoch": 0.6334283343554963,
+      "grad_norm": 0.1390049159526825,
+      "learning_rate": 0.00017468321547120798,
+      "loss": 0.1702,
+      "step": 8779
+    },
+    {
+      "epoch": 0.6335004870305566,
+      "grad_norm": 0.1458684653043747,
+      "learning_rate": 0.00017468032905181124,
+      "loss": 0.1466,
+      "step": 8780
+    },
+    {
+      "epoch": 0.6335726397056171,
+      "grad_norm": 0.1377117782831192,
+      "learning_rate": 0.0001746774426324145,
+      "loss": 0.1531,
+      "step": 8781
+    },
+    {
+      "epoch": 0.6336447923806775,
+      "grad_norm": 0.11346470564603806,
+      "learning_rate": 0.00017467455621301777,
+      "loss": 0.1724,
+      "step": 8782
+    },
+    {
+      "epoch": 0.633716945055738,
+      "grad_norm": 0.1253042221069336,
+      "learning_rate": 0.000174671669793621,
+      "loss": 0.1973,
+      "step": 8783
+    },
+    {
+      "epoch": 0.6337890977307984,
+      "grad_norm": 0.11717258393764496,
+      "learning_rate": 0.00017466878337422427,
+      "loss": 0.1256,
+      "step": 8784
+    },
+    {
+      "epoch": 0.6338612504058588,
+      "grad_norm": 0.11658114939928055,
+      "learning_rate": 0.00017466589695482753,
+      "loss": 0.1705,
+      "step": 8785
+    },
+    {
+      "epoch": 0.6339334030809193,
+      "grad_norm": 0.11756215244531631,
+      "learning_rate": 0.00017466301053543082,
+      "loss": 0.1484,
+      "step": 8786
+    },
+    {
+      "epoch": 0.6340055557559796,
+      "grad_norm": 0.11114584654569626,
+      "learning_rate": 0.00017466012411603409,
+      "loss": 0.091,
+      "step": 8787
+    },
+    {
+      "epoch": 0.63407770843104,
+      "grad_norm": 0.09902235120534897,
+      "learning_rate": 0.00017465723769663732,
+      "loss": 0.1089,
+      "step": 8788
+    },
+    {
+      "epoch": 0.6341498611061005,
+      "grad_norm": 0.14965693652629852,
+      "learning_rate": 0.00017465435127724058,
+      "loss": 0.1519,
+      "step": 8789
+    },
+    {
+      "epoch": 0.6342220137811609,
+      "grad_norm": 0.11054068803787231,
+      "learning_rate": 0.00017465146485784385,
+      "loss": 0.1564,
+      "step": 8790
+    },
+    {
+      "epoch": 0.6342941664562214,
+      "grad_norm": 0.10102395713329315,
+      "learning_rate": 0.0001746485784384471,
+      "loss": 0.1928,
+      "step": 8791
+    },
+    {
+      "epoch": 0.6343663191312818,
+      "grad_norm": 0.11846046894788742,
+      "learning_rate": 0.00017464569201905037,
+      "loss": 0.1286,
+      "step": 8792
+    },
+    {
+      "epoch": 0.6344384718063423,
+      "grad_norm": 0.13022750616073608,
+      "learning_rate": 0.00017464280559965364,
+      "loss": 0.1428,
+      "step": 8793
+    },
+    {
+      "epoch": 0.6345106244814026,
+      "grad_norm": 0.11548298597335815,
+      "learning_rate": 0.0001746399191802569,
+      "loss": 0.1116,
+      "step": 8794
+    },
+    {
+      "epoch": 0.634582777156463,
+      "grad_norm": 0.12897050380706787,
+      "learning_rate": 0.00017463703276086016,
+      "loss": 0.1657,
+      "step": 8795
+    },
+    {
+      "epoch": 0.6346549298315235,
+      "grad_norm": 0.11598154902458191,
+      "learning_rate": 0.00017463414634146342,
+      "loss": 0.1812,
+      "step": 8796
+    },
+    {
+      "epoch": 0.6347270825065839,
+      "grad_norm": 0.1394772082567215,
+      "learning_rate": 0.0001746312599220667,
+      "loss": 0.1886,
+      "step": 8797
+    },
+    {
+      "epoch": 0.6347992351816444,
+      "grad_norm": 0.1729123890399933,
+      "learning_rate": 0.00017462837350266995,
+      "loss": 0.1067,
+      "step": 8798
+    },
+    {
+      "epoch": 0.6348713878567048,
+      "grad_norm": 0.12441124767065048,
+      "learning_rate": 0.00017462548708327319,
+      "loss": 0.2004,
+      "step": 8799
+    },
+    {
+      "epoch": 0.6349435405317652,
+      "grad_norm": 0.13342799246311188,
+      "learning_rate": 0.00017462260066387648,
+      "loss": 0.1619,
+      "step": 8800
+    },
+    {
+      "epoch": 0.6350156932068256,
+      "grad_norm": 0.13017302751541138,
+      "learning_rate": 0.00017461971424447974,
+      "loss": 0.1307,
+      "step": 8801
+    },
+    {
+      "epoch": 0.635087845881886,
+      "grad_norm": 0.09042824059724808,
+      "learning_rate": 0.000174616827825083,
+      "loss": 0.1751,
+      "step": 8802
+    },
+    {
+      "epoch": 0.6351599985569465,
+      "grad_norm": 0.13923433423042297,
+      "learning_rate": 0.00017461394140568626,
+      "loss": 0.1526,
+      "step": 8803
+    },
+    {
+      "epoch": 0.6352321512320069,
+      "grad_norm": 0.12554508447647095,
+      "learning_rate": 0.0001746110549862895,
+      "loss": 0.1528,
+      "step": 8804
+    },
+    {
+      "epoch": 0.6353043039070674,
+      "grad_norm": 0.11458244919776917,
+      "learning_rate": 0.00017460816856689276,
+      "loss": 0.1277,
+      "step": 8805
+    },
+    {
+      "epoch": 0.6353764565821278,
+      "grad_norm": 0.1285858452320099,
+      "learning_rate": 0.00017460528214749603,
+      "loss": 0.1692,
+      "step": 8806
+    },
+    {
+      "epoch": 0.6354486092571882,
+      "grad_norm": 0.13324186205863953,
+      "learning_rate": 0.00017460239572809932,
+      "loss": 0.175,
+      "step": 8807
+    },
+    {
+      "epoch": 0.6355207619322486,
+      "grad_norm": 0.12661926448345184,
+      "learning_rate": 0.00017459950930870258,
+      "loss": 0.1376,
+      "step": 8808
+    },
+    {
+      "epoch": 0.635592914607309,
+      "grad_norm": 0.11739029735326767,
+      "learning_rate": 0.00017459662288930582,
+      "loss": 0.1313,
+      "step": 8809
+    },
+    {
+      "epoch": 0.6356650672823695,
+      "grad_norm": 0.10466552525758743,
+      "learning_rate": 0.00017459373646990908,
+      "loss": 0.1541,
+      "step": 8810
+    },
+    {
+      "epoch": 0.6357372199574299,
+      "grad_norm": 0.12793004512786865,
+      "learning_rate": 0.00017459085005051234,
+      "loss": 0.2179,
+      "step": 8811
+    },
+    {
+      "epoch": 0.6358093726324904,
+      "grad_norm": 0.10991460084915161,
+      "learning_rate": 0.0001745879636311156,
+      "loss": 0.1911,
+      "step": 8812
+    },
+    {
+      "epoch": 0.6358815253075508,
+      "grad_norm": 0.12637649476528168,
+      "learning_rate": 0.00017458507721171887,
+      "loss": 0.1155,
+      "step": 8813
+    },
+    {
+      "epoch": 0.6359536779826112,
+      "grad_norm": 0.10572929680347443,
+      "learning_rate": 0.00017458219079232213,
+      "loss": 0.0975,
+      "step": 8814
+    },
+    {
+      "epoch": 0.6360258306576716,
+      "grad_norm": 0.11248910427093506,
+      "learning_rate": 0.0001745793043729254,
+      "loss": 0.1795,
+      "step": 8815
+    },
+    {
+      "epoch": 0.636097983332732,
+      "grad_norm": 0.1279258131980896,
+      "learning_rate": 0.00017457641795352866,
+      "loss": 0.1528,
+      "step": 8816
+    },
+    {
+      "epoch": 0.6361701360077925,
+      "grad_norm": 0.10839273780584335,
+      "learning_rate": 0.00017457353153413192,
+      "loss": 0.1751,
+      "step": 8817
+    },
+    {
+      "epoch": 0.6362422886828529,
+      "grad_norm": 0.13171249628067017,
+      "learning_rate": 0.00017457064511473518,
+      "loss": 0.1549,
+      "step": 8818
+    },
+    {
+      "epoch": 0.6363144413579134,
+      "grad_norm": 0.11827006936073303,
+      "learning_rate": 0.00017456775869533844,
+      "loss": 0.1295,
+      "step": 8819
+    },
+    {
+      "epoch": 0.6363865940329738,
+      "grad_norm": 0.11012952774763107,
+      "learning_rate": 0.00017456487227594168,
+      "loss": 0.1051,
+      "step": 8820
+    },
+    {
+      "epoch": 0.6364587467080342,
+      "grad_norm": 0.11319731920957565,
+      "learning_rate": 0.00017456198585654497,
+      "loss": 0.1582,
+      "step": 8821
+    },
+    {
+      "epoch": 0.6365308993830946,
+      "grad_norm": 0.10944925248622894,
+      "learning_rate": 0.00017455909943714823,
+      "loss": 0.1749,
+      "step": 8822
+    },
+    {
+      "epoch": 0.636603052058155,
+      "grad_norm": 0.10941876471042633,
+      "learning_rate": 0.0001745562130177515,
+      "loss": 0.1563,
+      "step": 8823
+    },
+    {
+      "epoch": 0.6366752047332155,
+      "grad_norm": 0.1071227639913559,
+      "learning_rate": 0.00017455332659835476,
+      "loss": 0.1394,
+      "step": 8824
+    },
+    {
+      "epoch": 0.6367473574082759,
+      "grad_norm": 0.11995568871498108,
+      "learning_rate": 0.000174550440178958,
+      "loss": 0.1243,
+      "step": 8825
+    },
+    {
+      "epoch": 0.6368195100833364,
+      "grad_norm": 0.12740518152713776,
+      "learning_rate": 0.00017454755375956126,
+      "loss": 0.1442,
+      "step": 8826
+    },
+    {
+      "epoch": 0.6368916627583968,
+      "grad_norm": 0.11265741288661957,
+      "learning_rate": 0.00017454466734016452,
+      "loss": 0.1692,
+      "step": 8827
+    },
+    {
+      "epoch": 0.6369638154334571,
+      "grad_norm": 0.1335940957069397,
+      "learning_rate": 0.0001745417809207678,
+      "loss": 0.0964,
+      "step": 8828
+    },
+    {
+      "epoch": 0.6370359681085176,
+      "grad_norm": 0.12412536889314651,
+      "learning_rate": 0.00017453889450137107,
+      "loss": 0.1094,
+      "step": 8829
+    },
+    {
+      "epoch": 0.637108120783578,
+      "grad_norm": 0.11655788123607635,
+      "learning_rate": 0.0001745360080819743,
+      "loss": 0.1524,
+      "step": 8830
+    },
+    {
+      "epoch": 0.6371802734586385,
+      "grad_norm": 0.11918830871582031,
+      "learning_rate": 0.00017453312166257757,
+      "loss": 0.1182,
+      "step": 8831
+    },
+    {
+      "epoch": 0.6372524261336989,
+      "grad_norm": 0.14078155159950256,
+      "learning_rate": 0.00017453023524318084,
+      "loss": 0.1709,
+      "step": 8832
+    },
+    {
+      "epoch": 0.6373245788087594,
+      "grad_norm": 0.11668020486831665,
+      "learning_rate": 0.0001745273488237841,
+      "loss": 0.0906,
+      "step": 8833
+    },
+    {
+      "epoch": 0.6373967314838198,
+      "grad_norm": 0.10997610539197922,
+      "learning_rate": 0.00017452446240438736,
+      "loss": 0.155,
+      "step": 8834
+    },
+    {
+      "epoch": 0.6374688841588801,
+      "grad_norm": 0.11578045785427094,
+      "learning_rate": 0.00017452157598499062,
+      "loss": 0.1799,
+      "step": 8835
+    },
+    {
+      "epoch": 0.6375410368339406,
+      "grad_norm": 0.12486160546541214,
+      "learning_rate": 0.0001745186895655939,
+      "loss": 0.1332,
+      "step": 8836
+    },
+    {
+      "epoch": 0.637613189509001,
+      "grad_norm": 0.17098915576934814,
+      "learning_rate": 0.00017451580314619715,
+      "loss": 0.1367,
+      "step": 8837
+    },
+    {
+      "epoch": 0.6376853421840615,
+      "grad_norm": 0.13832706212997437,
+      "learning_rate": 0.0001745129167268004,
+      "loss": 0.1714,
+      "step": 8838
+    },
+    {
+      "epoch": 0.6377574948591219,
+      "grad_norm": 0.17727001011371613,
+      "learning_rate": 0.00017451003030740368,
+      "loss": 0.1732,
+      "step": 8839
+    },
+    {
+      "epoch": 0.6378296475341824,
+      "grad_norm": 0.1226031705737114,
+      "learning_rate": 0.00017450714388800694,
+      "loss": 0.1622,
+      "step": 8840
+    },
+    {
+      "epoch": 0.6379018002092428,
+      "grad_norm": 0.11801004409790039,
+      "learning_rate": 0.0001745042574686102,
+      "loss": 0.1565,
+      "step": 8841
+    },
+    {
+      "epoch": 0.6379739528843031,
+      "grad_norm": 0.14041762053966522,
+      "learning_rate": 0.00017450137104921346,
+      "loss": 0.162,
+      "step": 8842
+    },
+    {
+      "epoch": 0.6380461055593636,
+      "grad_norm": 0.10397140681743622,
+      "learning_rate": 0.00017449848462981673,
+      "loss": 0.1237,
+      "step": 8843
+    },
+    {
+      "epoch": 0.638118258234424,
+      "grad_norm": 0.1438121199607849,
+      "learning_rate": 0.00017449559821042,
+      "loss": 0.1204,
+      "step": 8844
+    },
+    {
+      "epoch": 0.6381904109094845,
+      "grad_norm": 0.1460319459438324,
+      "learning_rate": 0.00017449271179102325,
+      "loss": 0.181,
+      "step": 8845
+    },
+    {
+      "epoch": 0.6382625635845449,
+      "grad_norm": 0.10840224474668503,
+      "learning_rate": 0.00017448982537162652,
+      "loss": 0.1114,
+      "step": 8846
+    },
+    {
+      "epoch": 0.6383347162596054,
+      "grad_norm": 0.15132613480091095,
+      "learning_rate": 0.00017448693895222975,
+      "loss": 0.1205,
+      "step": 8847
+    },
+    {
+      "epoch": 0.6384068689346658,
+      "grad_norm": 0.0996752679347992,
+      "learning_rate": 0.00017448405253283302,
+      "loss": 0.1632,
+      "step": 8848
+    },
+    {
+      "epoch": 0.6384790216097261,
+      "grad_norm": 0.131254181265831,
+      "learning_rate": 0.0001744811661134363,
+      "loss": 0.1482,
+      "step": 8849
+    },
+    {
+      "epoch": 0.6385511742847866,
+      "grad_norm": 0.11335877329111099,
+      "learning_rate": 0.00017447827969403957,
+      "loss": 0.1318,
+      "step": 8850
+    },
+    {
+      "epoch": 0.638623326959847,
+      "grad_norm": 0.131666362285614,
+      "learning_rate": 0.00017447539327464283,
+      "loss": 0.1272,
+      "step": 8851
+    },
+    {
+      "epoch": 0.6386954796349075,
+      "grad_norm": 0.12124770134687424,
+      "learning_rate": 0.00017447250685524607,
+      "loss": 0.2281,
+      "step": 8852
+    },
+    {
+      "epoch": 0.6387676323099679,
+      "grad_norm": 0.11714229732751846,
+      "learning_rate": 0.00017446962043584933,
+      "loss": 0.1716,
+      "step": 8853
+    },
+    {
+      "epoch": 0.6388397849850284,
+      "grad_norm": 0.13586623966693878,
+      "learning_rate": 0.0001744667340164526,
+      "loss": 0.1225,
+      "step": 8854
+    },
+    {
+      "epoch": 0.6389119376600888,
+      "grad_norm": 0.18940232694149017,
+      "learning_rate": 0.00017446384759705586,
+      "loss": 0.1713,
+      "step": 8855
+    },
+    {
+      "epoch": 0.6389840903351491,
+      "grad_norm": 0.13319925963878632,
+      "learning_rate": 0.00017446096117765915,
+      "loss": 0.1439,
+      "step": 8856
+    },
+    {
+      "epoch": 0.6390562430102096,
+      "grad_norm": 0.10407208651304245,
+      "learning_rate": 0.00017445807475826238,
+      "loss": 0.1557,
+      "step": 8857
+    },
+    {
+      "epoch": 0.63912839568527,
+      "grad_norm": 0.11361634731292725,
+      "learning_rate": 0.00017445518833886564,
+      "loss": 0.1159,
+      "step": 8858
+    },
+    {
+      "epoch": 0.6392005483603305,
+      "grad_norm": 0.1038358137011528,
+      "learning_rate": 0.0001744523019194689,
+      "loss": 0.143,
+      "step": 8859
+    },
+    {
+      "epoch": 0.6392727010353909,
+      "grad_norm": 0.10394181311130524,
+      "learning_rate": 0.00017444941550007217,
+      "loss": 0.0907,
+      "step": 8860
+    },
+    {
+      "epoch": 0.6393448537104514,
+      "grad_norm": 0.12115704268217087,
+      "learning_rate": 0.00017444652908067543,
+      "loss": 0.1268,
+      "step": 8861
+    },
+    {
+      "epoch": 0.6394170063855117,
+      "grad_norm": 0.1325000524520874,
+      "learning_rate": 0.0001744436426612787,
+      "loss": 0.1396,
+      "step": 8862
+    },
+    {
+      "epoch": 0.6394891590605721,
+      "grad_norm": 0.14049799740314484,
+      "learning_rate": 0.00017444075624188196,
+      "loss": 0.1837,
+      "step": 8863
+    },
+    {
+      "epoch": 0.6395613117356326,
+      "grad_norm": 0.1779290735721588,
+      "learning_rate": 0.00017443786982248522,
+      "loss": 0.1752,
+      "step": 8864
+    },
+    {
+      "epoch": 0.639633464410693,
+      "grad_norm": 0.12939026951789856,
+      "learning_rate": 0.00017443498340308848,
+      "loss": 0.0974,
+      "step": 8865
+    },
+    {
+      "epoch": 0.6397056170857535,
+      "grad_norm": 0.11895648390054703,
+      "learning_rate": 0.00017443209698369175,
+      "loss": 0.1321,
+      "step": 8866
+    },
+    {
+      "epoch": 0.6397777697608139,
+      "grad_norm": 0.13495993614196777,
+      "learning_rate": 0.000174429210564295,
+      "loss": 0.1731,
+      "step": 8867
+    },
+    {
+      "epoch": 0.6398499224358744,
+      "grad_norm": 0.12926140427589417,
+      "learning_rate": 0.00017442632414489825,
+      "loss": 0.1683,
+      "step": 8868
+    },
+    {
+      "epoch": 0.6399220751109347,
+      "grad_norm": 0.12302546203136444,
+      "learning_rate": 0.0001744234377255015,
+      "loss": 0.1318,
+      "step": 8869
+    },
+    {
+      "epoch": 0.6399942277859951,
+      "grad_norm": 0.12506909668445587,
+      "learning_rate": 0.0001744205513061048,
+      "loss": 0.178,
+      "step": 8870
+    },
+    {
+      "epoch": 0.6400663804610556,
+      "grad_norm": 0.15225814282894135,
+      "learning_rate": 0.00017441766488670806,
+      "loss": 0.1534,
+      "step": 8871
+    },
+    {
+      "epoch": 0.640138533136116,
+      "grad_norm": 0.12990593910217285,
+      "learning_rate": 0.00017441477846731133,
+      "loss": 0.1337,
+      "step": 8872
+    },
+    {
+      "epoch": 0.6402106858111765,
+      "grad_norm": 0.1329309344291687,
+      "learning_rate": 0.00017441189204791456,
+      "loss": 0.1332,
+      "step": 8873
+    },
+    {
+      "epoch": 0.6402828384862369,
+      "grad_norm": 0.1620033234357834,
+      "learning_rate": 0.00017440900562851782,
+      "loss": 0.1796,
+      "step": 8874
+    },
+    {
+      "epoch": 0.6403549911612973,
+      "grad_norm": 0.11278960108757019,
+      "learning_rate": 0.0001744061192091211,
+      "loss": 0.1394,
+      "step": 8875
+    },
+    {
+      "epoch": 0.6404271438363577,
+      "grad_norm": 0.12775515019893646,
+      "learning_rate": 0.00017440323278972435,
+      "loss": 0.1686,
+      "step": 8876
+    },
+    {
+      "epoch": 0.6404992965114181,
+      "grad_norm": 0.13999012112617493,
+      "learning_rate": 0.00017440034637032764,
+      "loss": 0.1519,
+      "step": 8877
+    },
+    {
+      "epoch": 0.6405714491864786,
+      "grad_norm": 0.13053064048290253,
+      "learning_rate": 0.00017439745995093088,
+      "loss": 0.1398,
+      "step": 8878
+    },
+    {
+      "epoch": 0.640643601861539,
+      "grad_norm": 0.11259409785270691,
+      "learning_rate": 0.00017439457353153414,
+      "loss": 0.1681,
+      "step": 8879
+    },
+    {
+      "epoch": 0.6407157545365995,
+      "grad_norm": 0.12789352238178253,
+      "learning_rate": 0.0001743916871121374,
+      "loss": 0.1797,
+      "step": 8880
+    },
+    {
+      "epoch": 0.6407879072116599,
+      "grad_norm": 0.11534351855516434,
+      "learning_rate": 0.00017438880069274066,
+      "loss": 0.1227,
+      "step": 8881
+    },
+    {
+      "epoch": 0.6408600598867203,
+      "grad_norm": 0.16358792781829834,
+      "learning_rate": 0.00017438591427334393,
+      "loss": 0.1178,
+      "step": 8882
+    },
+    {
+      "epoch": 0.6409322125617807,
+      "grad_norm": 0.1316683292388916,
+      "learning_rate": 0.0001743830278539472,
+      "loss": 0.1611,
+      "step": 8883
+    },
+    {
+      "epoch": 0.6410043652368411,
+      "grad_norm": 0.14574764668941498,
+      "learning_rate": 0.00017438014143455045,
+      "loss": 0.1929,
+      "step": 8884
+    },
+    {
+      "epoch": 0.6410765179119016,
+      "grad_norm": 0.12572930753231049,
+      "learning_rate": 0.00017437725501515372,
+      "loss": 0.1585,
+      "step": 8885
+    },
+    {
+      "epoch": 0.641148670586962,
+      "grad_norm": 0.17408978939056396,
+      "learning_rate": 0.00017437436859575698,
+      "loss": 0.1606,
+      "step": 8886
+    },
+    {
+      "epoch": 0.6412208232620225,
+      "grad_norm": 0.10904928296804428,
+      "learning_rate": 0.00017437148217636024,
+      "loss": 0.1288,
+      "step": 8887
+    },
+    {
+      "epoch": 0.6412929759370829,
+      "grad_norm": 0.1295899897813797,
+      "learning_rate": 0.0001743685957569635,
+      "loss": 0.1674,
+      "step": 8888
+    },
+    {
+      "epoch": 0.6413651286121433,
+      "grad_norm": 0.13774964213371277,
+      "learning_rate": 0.00017436570933756674,
+      "loss": 0.1628,
+      "step": 8889
+    },
+    {
+      "epoch": 0.6414372812872037,
+      "grad_norm": 0.10904145985841751,
+      "learning_rate": 0.00017436282291817,
+      "loss": 0.1532,
+      "step": 8890
+    },
+    {
+      "epoch": 0.6415094339622641,
+      "grad_norm": 0.17066548764705658,
+      "learning_rate": 0.0001743599364987733,
+      "loss": 0.1818,
+      "step": 8891
+    },
+    {
+      "epoch": 0.6415815866373246,
+      "grad_norm": 0.09667755663394928,
+      "learning_rate": 0.00017435705007937656,
+      "loss": 0.1036,
+      "step": 8892
+    },
+    {
+      "epoch": 0.641653739312385,
+      "grad_norm": 0.09876111894845963,
+      "learning_rate": 0.00017435416365997982,
+      "loss": 0.1071,
+      "step": 8893
+    },
+    {
+      "epoch": 0.6417258919874455,
+      "grad_norm": 0.12817133963108063,
+      "learning_rate": 0.00017435127724058306,
+      "loss": 0.152,
+      "step": 8894
+    },
+    {
+      "epoch": 0.6417980446625059,
+      "grad_norm": 0.12401299178600311,
+      "learning_rate": 0.00017434839082118632,
+      "loss": 0.1233,
+      "step": 8895
+    },
+    {
+      "epoch": 0.6418701973375663,
+      "grad_norm": 0.1025814563035965,
+      "learning_rate": 0.00017434550440178958,
+      "loss": 0.1495,
+      "step": 8896
+    },
+    {
+      "epoch": 0.6419423500126267,
+      "grad_norm": 0.15205642580986023,
+      "learning_rate": 0.00017434261798239284,
+      "loss": 0.1213,
+      "step": 8897
+    },
+    {
+      "epoch": 0.6420145026876871,
+      "grad_norm": 0.11617343127727509,
+      "learning_rate": 0.00017433973156299613,
+      "loss": 0.19,
+      "step": 8898
+    },
+    {
+      "epoch": 0.6420866553627476,
+      "grad_norm": 0.1051785945892334,
+      "learning_rate": 0.00017433684514359937,
+      "loss": 0.0935,
+      "step": 8899
+    },
+    {
+      "epoch": 0.642158808037808,
+      "grad_norm": 0.13081860542297363,
+      "learning_rate": 0.00017433395872420263,
+      "loss": 0.1629,
+      "step": 8900
+    },
+    {
+      "epoch": 0.6422309607128684,
+      "grad_norm": 0.10840879380702972,
+      "learning_rate": 0.0001743310723048059,
+      "loss": 0.1534,
+      "step": 8901
+    },
+    {
+      "epoch": 0.6423031133879289,
+      "grad_norm": 0.1379835158586502,
+      "learning_rate": 0.00017432818588540916,
+      "loss": 0.1478,
+      "step": 8902
+    },
+    {
+      "epoch": 0.6423752660629893,
+      "grad_norm": 0.14373107254505157,
+      "learning_rate": 0.00017432529946601242,
+      "loss": 0.142,
+      "step": 8903
+    },
+    {
+      "epoch": 0.6424474187380497,
+      "grad_norm": 0.13795924186706543,
+      "learning_rate": 0.00017432241304661568,
+      "loss": 0.131,
+      "step": 8904
+    },
+    {
+      "epoch": 0.6425195714131101,
+      "grad_norm": 0.13038359582424164,
+      "learning_rate": 0.00017431952662721895,
+      "loss": 0.2,
+      "step": 8905
+    },
+    {
+      "epoch": 0.6425917240881706,
+      "grad_norm": 0.13648727536201477,
+      "learning_rate": 0.0001743166402078222,
+      "loss": 0.116,
+      "step": 8906
+    },
+    {
+      "epoch": 0.642663876763231,
+      "grad_norm": 0.1238555833697319,
+      "learning_rate": 0.00017431375378842547,
+      "loss": 0.1402,
+      "step": 8907
+    },
+    {
+      "epoch": 0.6427360294382914,
+      "grad_norm": 0.09869746118783951,
+      "learning_rate": 0.00017431086736902874,
+      "loss": 0.1483,
+      "step": 8908
+    },
+    {
+      "epoch": 0.6428081821133519,
+      "grad_norm": 0.13061855733394623,
+      "learning_rate": 0.000174307980949632,
+      "loss": 0.1738,
+      "step": 8909
+    },
+    {
+      "epoch": 0.6428803347884123,
+      "grad_norm": 0.12091786414384842,
+      "learning_rate": 0.00017430509453023523,
+      "loss": 0.126,
+      "step": 8910
+    },
+    {
+      "epoch": 0.6429524874634727,
+      "grad_norm": 0.13954919576644897,
+      "learning_rate": 0.0001743022081108385,
+      "loss": 0.1315,
+      "step": 8911
+    },
+    {
+      "epoch": 0.6430246401385331,
+      "grad_norm": 0.1259092539548874,
+      "learning_rate": 0.0001742993216914418,
+      "loss": 0.1744,
+      "step": 8912
+    },
+    {
+      "epoch": 0.6430967928135936,
+      "grad_norm": 0.0919126346707344,
+      "learning_rate": 0.00017429643527204505,
+      "loss": 0.139,
+      "step": 8913
+    },
+    {
+      "epoch": 0.643168945488654,
+      "grad_norm": 0.11809264123439789,
+      "learning_rate": 0.00017429354885264831,
+      "loss": 0.2004,
+      "step": 8914
+    },
+    {
+      "epoch": 0.6432410981637144,
+      "grad_norm": 0.1017296239733696,
+      "learning_rate": 0.00017429066243325155,
+      "loss": 0.1515,
+      "step": 8915
+    },
+    {
+      "epoch": 0.6433132508387749,
+      "grad_norm": 0.10862316936254501,
+      "learning_rate": 0.0001742877760138548,
+      "loss": 0.1471,
+      "step": 8916
+    },
+    {
+      "epoch": 0.6433854035138353,
+      "grad_norm": 0.1337960660457611,
+      "learning_rate": 0.00017428488959445808,
+      "loss": 0.1852,
+      "step": 8917
+    },
+    {
+      "epoch": 0.6434575561888957,
+      "grad_norm": 0.12023504823446274,
+      "learning_rate": 0.00017428200317506134,
+      "loss": 0.2022,
+      "step": 8918
+    },
+    {
+      "epoch": 0.6435297088639561,
+      "grad_norm": 0.1266653686761856,
+      "learning_rate": 0.0001742791167556646,
+      "loss": 0.1578,
+      "step": 8919
+    },
+    {
+      "epoch": 0.6436018615390166,
+      "grad_norm": 0.09703958034515381,
+      "learning_rate": 0.00017427623033626786,
+      "loss": 0.1417,
+      "step": 8920
+    },
+    {
+      "epoch": 0.643674014214077,
+      "grad_norm": 0.10218293219804764,
+      "learning_rate": 0.00017427334391687113,
+      "loss": 0.1621,
+      "step": 8921
+    },
+    {
+      "epoch": 0.6437461668891374,
+      "grad_norm": 0.10678768157958984,
+      "learning_rate": 0.0001742704574974744,
+      "loss": 0.122,
+      "step": 8922
+    },
+    {
+      "epoch": 0.6438183195641979,
+      "grad_norm": 0.1419648379087448,
+      "learning_rate": 0.00017426757107807765,
+      "loss": 0.2476,
+      "step": 8923
+    },
+    {
+      "epoch": 0.6438904722392582,
+      "grad_norm": 0.10171731561422348,
+      "learning_rate": 0.00017426468465868092,
+      "loss": 0.1387,
+      "step": 8924
+    },
+    {
+      "epoch": 0.6439626249143187,
+      "grad_norm": 0.1244988664984703,
+      "learning_rate": 0.00017426179823928418,
+      "loss": 0.1465,
+      "step": 8925
+    },
+    {
+      "epoch": 0.6440347775893791,
+      "grad_norm": 0.09062091261148453,
+      "learning_rate": 0.00017425891181988741,
+      "loss": 0.1233,
+      "step": 8926
+    },
+    {
+      "epoch": 0.6441069302644395,
+      "grad_norm": 0.1125463992357254,
+      "learning_rate": 0.0001742560254004907,
+      "loss": 0.1311,
+      "step": 8927
+    },
+    {
+      "epoch": 0.6441790829395,
+      "grad_norm": 0.12110617756843567,
+      "learning_rate": 0.00017425313898109397,
+      "loss": 0.1166,
+      "step": 8928
+    },
+    {
+      "epoch": 0.6442512356145604,
+      "grad_norm": 0.153212770819664,
+      "learning_rate": 0.00017425025256169723,
+      "loss": 0.1534,
+      "step": 8929
+    },
+    {
+      "epoch": 0.6443233882896209,
+      "grad_norm": 0.15019434690475464,
+      "learning_rate": 0.0001742473661423005,
+      "loss": 0.1215,
+      "step": 8930
+    },
+    {
+      "epoch": 0.6443955409646812,
+      "grad_norm": 0.13318707048892975,
+      "learning_rate": 0.00017424447972290373,
+      "loss": 0.1641,
+      "step": 8931
+    },
+    {
+      "epoch": 0.6444676936397417,
+      "grad_norm": 0.1360812485218048,
+      "learning_rate": 0.000174241593303507,
+      "loss": 0.1578,
+      "step": 8932
+    },
+    {
+      "epoch": 0.6445398463148021,
+      "grad_norm": 0.15063872933387756,
+      "learning_rate": 0.00017423870688411025,
+      "loss": 0.1781,
+      "step": 8933
+    },
+    {
+      "epoch": 0.6446119989898625,
+      "grad_norm": 0.11997667700052261,
+      "learning_rate": 0.00017423582046471355,
+      "loss": 0.1412,
+      "step": 8934
+    },
+    {
+      "epoch": 0.644684151664923,
+      "grad_norm": 0.11618088185787201,
+      "learning_rate": 0.0001742329340453168,
+      "loss": 0.1667,
+      "step": 8935
+    },
+    {
+      "epoch": 0.6447563043399834,
+      "grad_norm": 0.1366327404975891,
+      "learning_rate": 0.00017423004762592004,
+      "loss": 0.1746,
+      "step": 8936
+    },
+    {
+      "epoch": 0.6448284570150439,
+      "grad_norm": 0.11012442409992218,
+      "learning_rate": 0.0001742271612065233,
+      "loss": 0.1424,
+      "step": 8937
+    },
+    {
+      "epoch": 0.6449006096901042,
+      "grad_norm": 0.10074211657047272,
+      "learning_rate": 0.00017422427478712657,
+      "loss": 0.1463,
+      "step": 8938
+    },
+    {
+      "epoch": 0.6449727623651647,
+      "grad_norm": 0.09732308983802795,
+      "learning_rate": 0.00017422138836772983,
+      "loss": 0.2021,
+      "step": 8939
+    },
+    {
+      "epoch": 0.6450449150402251,
+      "grad_norm": 0.1292310208082199,
+      "learning_rate": 0.0001742185019483331,
+      "loss": 0.158,
+      "step": 8940
+    },
+    {
+      "epoch": 0.6451170677152855,
+      "grad_norm": 0.11873973906040192,
+      "learning_rate": 0.00017421561552893636,
+      "loss": 0.1379,
+      "step": 8941
+    },
+    {
+      "epoch": 0.645189220390346,
+      "grad_norm": 0.12597502768039703,
+      "learning_rate": 0.00017421272910953962,
+      "loss": 0.1949,
+      "step": 8942
+    },
+    {
+      "epoch": 0.6452613730654064,
+      "grad_norm": 0.1288425624370575,
+      "learning_rate": 0.00017420984269014288,
+      "loss": 0.1049,
+      "step": 8943
+    },
+    {
+      "epoch": 0.6453335257404669,
+      "grad_norm": 0.11511608958244324,
+      "learning_rate": 0.00017420695627074615,
+      "loss": 0.185,
+      "step": 8944
+    },
+    {
+      "epoch": 0.6454056784155272,
+      "grad_norm": 0.11685147881507874,
+      "learning_rate": 0.0001742040698513494,
+      "loss": 0.1677,
+      "step": 8945
+    },
+    {
+      "epoch": 0.6454778310905877,
+      "grad_norm": 0.13405561447143555,
+      "learning_rate": 0.00017420118343195267,
+      "loss": 0.1731,
+      "step": 8946
+    },
+    {
+      "epoch": 0.6455499837656481,
+      "grad_norm": 0.11815590411424637,
+      "learning_rate": 0.0001741982970125559,
+      "loss": 0.1125,
+      "step": 8947
+    },
+    {
+      "epoch": 0.6456221364407085,
+      "grad_norm": 0.15212565660476685,
+      "learning_rate": 0.0001741954105931592,
+      "loss": 0.1492,
+      "step": 8948
+    },
+    {
+      "epoch": 0.645694289115769,
+      "grad_norm": 0.13262511789798737,
+      "learning_rate": 0.00017419252417376246,
+      "loss": 0.1658,
+      "step": 8949
+    },
+    {
+      "epoch": 0.6457664417908294,
+      "grad_norm": 0.12056512385606766,
+      "learning_rate": 0.00017418963775436572,
+      "loss": 0.1551,
+      "step": 8950
+    },
+    {
+      "epoch": 0.6458385944658899,
+      "grad_norm": 0.14746679365634918,
+      "learning_rate": 0.000174186751334969,
+      "loss": 0.1721,
+      "step": 8951
+    },
+    {
+      "epoch": 0.6459107471409502,
+      "grad_norm": 0.11994044482707977,
+      "learning_rate": 0.00017418386491557222,
+      "loss": 0.1043,
+      "step": 8952
+    },
+    {
+      "epoch": 0.6459828998160106,
+      "grad_norm": 0.1168675571680069,
+      "learning_rate": 0.00017418097849617549,
+      "loss": 0.1317,
+      "step": 8953
+    },
+    {
+      "epoch": 0.6460550524910711,
+      "grad_norm": 0.14804407954216003,
+      "learning_rate": 0.00017417809207677875,
+      "loss": 0.1971,
+      "step": 8954
+    },
+    {
+      "epoch": 0.6461272051661315,
+      "grad_norm": 0.13889257609844208,
+      "learning_rate": 0.00017417520565738204,
+      "loss": 0.1625,
+      "step": 8955
+    },
+    {
+      "epoch": 0.646199357841192,
+      "grad_norm": 0.13769084215164185,
+      "learning_rate": 0.0001741723192379853,
+      "loss": 0.1459,
+      "step": 8956
+    },
+    {
+      "epoch": 0.6462715105162524,
+      "grad_norm": 0.13194069266319275,
+      "learning_rate": 0.00017416943281858854,
+      "loss": 0.1474,
+      "step": 8957
+    },
+    {
+      "epoch": 0.6463436631913129,
+      "grad_norm": 0.14091116189956665,
+      "learning_rate": 0.0001741665463991918,
+      "loss": 0.1615,
+      "step": 8958
+    },
+    {
+      "epoch": 0.6464158158663732,
+      "grad_norm": 0.15515783429145813,
+      "learning_rate": 0.00017416365997979506,
+      "loss": 0.1721,
+      "step": 8959
+    },
+    {
+      "epoch": 0.6464879685414336,
+      "grad_norm": 0.12409847974777222,
+      "learning_rate": 0.00017416077356039833,
+      "loss": 0.1612,
+      "step": 8960
+    },
+    {
+      "epoch": 0.6465601212164941,
+      "grad_norm": 0.12766936421394348,
+      "learning_rate": 0.0001741578871410016,
+      "loss": 0.0999,
+      "step": 8961
+    },
+    {
+      "epoch": 0.6466322738915545,
+      "grad_norm": 0.11388033628463745,
+      "learning_rate": 0.00017415500072160488,
+      "loss": 0.156,
+      "step": 8962
+    },
+    {
+      "epoch": 0.646704426566615,
+      "grad_norm": 0.12239213287830353,
+      "learning_rate": 0.00017415211430220812,
+      "loss": 0.1523,
+      "step": 8963
+    },
+    {
+      "epoch": 0.6467765792416754,
+      "grad_norm": 0.11749175935983658,
+      "learning_rate": 0.00017414922788281138,
+      "loss": 0.1223,
+      "step": 8964
+    },
+    {
+      "epoch": 0.6468487319167359,
+      "grad_norm": 0.15253546833992004,
+      "learning_rate": 0.00017414634146341464,
+      "loss": 0.1802,
+      "step": 8965
+    },
+    {
+      "epoch": 0.6469208845917962,
+      "grad_norm": 0.32105064392089844,
+      "learning_rate": 0.0001741434550440179,
+      "loss": 0.1777,
+      "step": 8966
+    },
+    {
+      "epoch": 0.6469930372668566,
+      "grad_norm": 0.11545057594776154,
+      "learning_rate": 0.00017414056862462117,
+      "loss": 0.1665,
+      "step": 8967
+    },
+    {
+      "epoch": 0.6470651899419171,
+      "grad_norm": 0.13333509862422943,
+      "learning_rate": 0.00017413768220522443,
+      "loss": 0.1473,
+      "step": 8968
+    },
+    {
+      "epoch": 0.6471373426169775,
+      "grad_norm": 0.12783385813236237,
+      "learning_rate": 0.0001741347957858277,
+      "loss": 0.1507,
+      "step": 8969
+    },
+    {
+      "epoch": 0.647209495292038,
+      "grad_norm": 0.1804548054933548,
+      "learning_rate": 0.00017413190936643096,
+      "loss": 0.1961,
+      "step": 8970
+    },
+    {
+      "epoch": 0.6472816479670984,
+      "grad_norm": 0.11268200725317001,
+      "learning_rate": 0.00017412902294703422,
+      "loss": 0.1626,
+      "step": 8971
+    },
+    {
+      "epoch": 0.6473538006421589,
+      "grad_norm": 0.127872034907341,
+      "learning_rate": 0.00017412613652763748,
+      "loss": 0.1689,
+      "step": 8972
+    },
+    {
+      "epoch": 0.6474259533172192,
+      "grad_norm": 0.15813906490802765,
+      "learning_rate": 0.00017412325010824074,
+      "loss": 0.1737,
+      "step": 8973
+    },
+    {
+      "epoch": 0.6474981059922796,
+      "grad_norm": 0.15613089501857758,
+      "learning_rate": 0.00017412036368884398,
+      "loss": 0.1417,
+      "step": 8974
+    },
+    {
+      "epoch": 0.6475702586673401,
+      "grad_norm": 0.16650298237800598,
+      "learning_rate": 0.00017411747726944724,
+      "loss": 0.142,
+      "step": 8975
+    },
+    {
+      "epoch": 0.6476424113424005,
+      "grad_norm": 0.10688599199056625,
+      "learning_rate": 0.00017411459085005053,
+      "loss": 0.1724,
+      "step": 8976
+    },
+    {
+      "epoch": 0.647714564017461,
+      "grad_norm": 0.14942897856235504,
+      "learning_rate": 0.0001741117044306538,
+      "loss": 0.1122,
+      "step": 8977
+    },
+    {
+      "epoch": 0.6477867166925214,
+      "grad_norm": 0.13502287864685059,
+      "learning_rate": 0.00017410881801125706,
+      "loss": 0.1792,
+      "step": 8978
+    },
+    {
+      "epoch": 0.6478588693675819,
+      "grad_norm": 0.10939890146255493,
+      "learning_rate": 0.0001741059315918603,
+      "loss": 0.1645,
+      "step": 8979
+    },
+    {
+      "epoch": 0.6479310220426422,
+      "grad_norm": 0.1340005099773407,
+      "learning_rate": 0.00017410304517246356,
+      "loss": 0.169,
+      "step": 8980
+    },
+    {
+      "epoch": 0.6480031747177026,
+      "grad_norm": 0.11436860263347626,
+      "learning_rate": 0.00017410015875306682,
+      "loss": 0.1316,
+      "step": 8981
+    },
+    {
+      "epoch": 0.6480753273927631,
+      "grad_norm": 0.10513529926538467,
+      "learning_rate": 0.00017409727233367008,
+      "loss": 0.1081,
+      "step": 8982
+    },
+    {
+      "epoch": 0.6481474800678235,
+      "grad_norm": 0.13902191817760468,
+      "learning_rate": 0.00017409438591427337,
+      "loss": 0.1259,
+      "step": 8983
+    },
+    {
+      "epoch": 0.648219632742884,
+      "grad_norm": 0.13189220428466797,
+      "learning_rate": 0.0001740914994948766,
+      "loss": 0.1382,
+      "step": 8984
+    },
+    {
+      "epoch": 0.6482917854179444,
+      "grad_norm": 0.1712450236082077,
+      "learning_rate": 0.00017408861307547987,
+      "loss": 0.165,
+      "step": 8985
+    },
+    {
+      "epoch": 0.6483639380930047,
+      "grad_norm": 0.15805469453334808,
+      "learning_rate": 0.00017408572665608314,
+      "loss": 0.1587,
+      "step": 8986
+    },
+    {
+      "epoch": 0.6484360907680652,
+      "grad_norm": 0.18048928678035736,
+      "learning_rate": 0.0001740828402366864,
+      "loss": 0.1628,
+      "step": 8987
+    },
+    {
+      "epoch": 0.6485082434431256,
+      "grad_norm": 0.14291101694107056,
+      "learning_rate": 0.00017407995381728966,
+      "loss": 0.144,
+      "step": 8988
+    },
+    {
+      "epoch": 0.6485803961181861,
+      "grad_norm": 0.1614530086517334,
+      "learning_rate": 0.00017407706739789292,
+      "loss": 0.1113,
+      "step": 8989
+    },
+    {
+      "epoch": 0.6486525487932465,
+      "grad_norm": 0.14932601153850555,
+      "learning_rate": 0.0001740741809784962,
+      "loss": 0.1439,
+      "step": 8990
+    },
+    {
+      "epoch": 0.648724701468307,
+      "grad_norm": 0.1545875370502472,
+      "learning_rate": 0.00017407129455909945,
+      "loss": 0.1403,
+      "step": 8991
+    },
+    {
+      "epoch": 0.6487968541433674,
+      "grad_norm": 0.14962558448314667,
+      "learning_rate": 0.0001740684081397027,
+      "loss": 0.1506,
+      "step": 8992
+    },
+    {
+      "epoch": 0.6488690068184277,
+      "grad_norm": 0.11427868902683258,
+      "learning_rate": 0.00017406552172030598,
+      "loss": 0.1574,
+      "step": 8993
+    },
+    {
+      "epoch": 0.6489411594934882,
+      "grad_norm": 0.13557592034339905,
+      "learning_rate": 0.00017406263530090924,
+      "loss": 0.1226,
+      "step": 8994
+    },
+    {
+      "epoch": 0.6490133121685486,
+      "grad_norm": 0.12659603357315063,
+      "learning_rate": 0.00017405974888151247,
+      "loss": 0.1653,
+      "step": 8995
+    },
+    {
+      "epoch": 0.6490854648436091,
+      "grad_norm": 0.11794587969779968,
+      "learning_rate": 0.00017405686246211574,
+      "loss": 0.1686,
+      "step": 8996
+    },
+    {
+      "epoch": 0.6491576175186695,
+      "grad_norm": 0.14208902418613434,
+      "learning_rate": 0.00017405397604271903,
+      "loss": 0.1705,
+      "step": 8997
+    },
+    {
+      "epoch": 0.64922977019373,
+      "grad_norm": 0.1434546709060669,
+      "learning_rate": 0.0001740510896233223,
+      "loss": 0.1735,
+      "step": 8998
+    },
+    {
+      "epoch": 0.6493019228687904,
+      "grad_norm": 0.11880837380886078,
+      "learning_rate": 0.00017404820320392555,
+      "loss": 0.1294,
+      "step": 8999
+    },
+    {
+      "epoch": 0.6493740755438507,
+      "grad_norm": 0.11917240172624588,
+      "learning_rate": 0.0001740453167845288,
+      "loss": 0.1591,
+      "step": 9000
+    },
+    {
+      "epoch": 0.6494462282189112,
+      "grad_norm": 0.13792914152145386,
+      "learning_rate": 0.00017404243036513205,
+      "loss": 0.1422,
+      "step": 9001
+    },
+    {
+      "epoch": 0.6495183808939716,
+      "grad_norm": 0.1305783987045288,
+      "learning_rate": 0.00017403954394573532,
+      "loss": 0.144,
+      "step": 9002
+    },
+    {
+      "epoch": 0.6495905335690321,
+      "grad_norm": 0.1063060387969017,
+      "learning_rate": 0.00017403665752633858,
+      "loss": 0.154,
+      "step": 9003
+    },
+    {
+      "epoch": 0.6496626862440925,
+      "grad_norm": 0.11954107135534286,
+      "learning_rate": 0.00017403377110694187,
+      "loss": 0.1124,
+      "step": 9004
+    },
+    {
+      "epoch": 0.649734838919153,
+      "grad_norm": 0.1425926238298416,
+      "learning_rate": 0.0001740308846875451,
+      "loss": 0.1338,
+      "step": 9005
+    },
+    {
+      "epoch": 0.6498069915942134,
+      "grad_norm": 0.11237387359142303,
+      "learning_rate": 0.00017402799826814837,
+      "loss": 0.1435,
+      "step": 9006
+    },
+    {
+      "epoch": 0.6498791442692737,
+      "grad_norm": 0.12347462028265,
+      "learning_rate": 0.00017402511184875163,
+      "loss": 0.1599,
+      "step": 9007
+    },
+    {
+      "epoch": 0.6499512969443342,
+      "grad_norm": 0.10732217133045197,
+      "learning_rate": 0.0001740222254293549,
+      "loss": 0.1328,
+      "step": 9008
+    },
+    {
+      "epoch": 0.6500234496193946,
+      "grad_norm": 0.24294838309288025,
+      "learning_rate": 0.00017401933900995816,
+      "loss": 0.148,
+      "step": 9009
+    },
+    {
+      "epoch": 0.6500956022944551,
+      "grad_norm": 0.10149053484201431,
+      "learning_rate": 0.00017401645259056142,
+      "loss": 0.1174,
+      "step": 9010
+    },
+    {
+      "epoch": 0.6501677549695155,
+      "grad_norm": 0.11952211707830429,
+      "learning_rate": 0.00017401356617116468,
+      "loss": 0.1687,
+      "step": 9011
+    },
+    {
+      "epoch": 0.650239907644576,
+      "grad_norm": 0.13148994743824005,
+      "learning_rate": 0.00017401067975176794,
+      "loss": 0.1652,
+      "step": 9012
+    },
+    {
+      "epoch": 0.6503120603196364,
+      "grad_norm": 0.12399965524673462,
+      "learning_rate": 0.0001740077933323712,
+      "loss": 0.1636,
+      "step": 9013
+    },
+    {
+      "epoch": 0.6503842129946967,
+      "grad_norm": 0.114295594394207,
+      "learning_rate": 0.00017400490691297447,
+      "loss": 0.1434,
+      "step": 9014
+    },
+    {
+      "epoch": 0.6504563656697572,
+      "grad_norm": 0.13758529722690582,
+      "learning_rate": 0.00017400202049357773,
+      "loss": 0.1302,
+      "step": 9015
+    },
+    {
+      "epoch": 0.6505285183448176,
+      "grad_norm": 0.12450076639652252,
+      "learning_rate": 0.00017399913407418097,
+      "loss": 0.2087,
+      "step": 9016
+    },
+    {
+      "epoch": 0.6506006710198781,
+      "grad_norm": 0.12183558940887451,
+      "learning_rate": 0.00017399624765478423,
+      "loss": 0.1517,
+      "step": 9017
+    },
+    {
+      "epoch": 0.6506728236949385,
+      "grad_norm": 0.1189873218536377,
+      "learning_rate": 0.00017399336123538752,
+      "loss": 0.1552,
+      "step": 9018
+    },
+    {
+      "epoch": 0.650744976369999,
+      "grad_norm": 0.12722237408161163,
+      "learning_rate": 0.00017399047481599078,
+      "loss": 0.1544,
+      "step": 9019
+    },
+    {
+      "epoch": 0.6508171290450594,
+      "grad_norm": 0.17670875787734985,
+      "learning_rate": 0.00017398758839659405,
+      "loss": 0.1583,
+      "step": 9020
+    },
+    {
+      "epoch": 0.6508892817201197,
+      "grad_norm": 0.12485279142856598,
+      "learning_rate": 0.00017398470197719728,
+      "loss": 0.1588,
+      "step": 9021
+    },
+    {
+      "epoch": 0.6509614343951802,
+      "grad_norm": 0.15111367404460907,
+      "learning_rate": 0.00017398181555780055,
+      "loss": 0.1757,
+      "step": 9022
+    },
+    {
+      "epoch": 0.6510335870702406,
+      "grad_norm": 0.1123456209897995,
+      "learning_rate": 0.0001739789291384038,
+      "loss": 0.1306,
+      "step": 9023
+    },
+    {
+      "epoch": 0.6511057397453011,
+      "grad_norm": 0.13833723962306976,
+      "learning_rate": 0.00017397604271900707,
+      "loss": 0.1329,
+      "step": 9024
+    },
+    {
+      "epoch": 0.6511778924203615,
+      "grad_norm": 0.11428412050008774,
+      "learning_rate": 0.00017397315629961036,
+      "loss": 0.1491,
+      "step": 9025
+    },
+    {
+      "epoch": 0.651250045095422,
+      "grad_norm": 0.12801140546798706,
+      "learning_rate": 0.0001739702698802136,
+      "loss": 0.172,
+      "step": 9026
+    },
+    {
+      "epoch": 0.6513221977704824,
+      "grad_norm": 0.10705477744340897,
+      "learning_rate": 0.00017396738346081686,
+      "loss": 0.1163,
+      "step": 9027
+    },
+    {
+      "epoch": 0.6513943504455427,
+      "grad_norm": 0.14140252768993378,
+      "learning_rate": 0.00017396449704142012,
+      "loss": 0.1914,
+      "step": 9028
+    },
+    {
+      "epoch": 0.6514665031206032,
+      "grad_norm": 0.11550770699977875,
+      "learning_rate": 0.0001739616106220234,
+      "loss": 0.1464,
+      "step": 9029
+    },
+    {
+      "epoch": 0.6515386557956636,
+      "grad_norm": 0.11913446336984634,
+      "learning_rate": 0.00017395872420262665,
+      "loss": 0.1302,
+      "step": 9030
+    },
+    {
+      "epoch": 0.6516108084707241,
+      "grad_norm": 0.12009747326374054,
+      "learning_rate": 0.0001739558377832299,
+      "loss": 0.1749,
+      "step": 9031
+    },
+    {
+      "epoch": 0.6516829611457845,
+      "grad_norm": 0.11971908062696457,
+      "learning_rate": 0.00017395295136383318,
+      "loss": 0.133,
+      "step": 9032
+    },
+    {
+      "epoch": 0.651755113820845,
+      "grad_norm": 0.13815897703170776,
+      "learning_rate": 0.00017395006494443644,
+      "loss": 0.144,
+      "step": 9033
+    },
+    {
+      "epoch": 0.6518272664959054,
+      "grad_norm": 0.12359744310379028,
+      "learning_rate": 0.0001739471785250397,
+      "loss": 0.1585,
+      "step": 9034
+    },
+    {
+      "epoch": 0.6518994191709657,
+      "grad_norm": 0.13417810201644897,
+      "learning_rate": 0.00017394429210564296,
+      "loss": 0.1407,
+      "step": 9035
+    },
+    {
+      "epoch": 0.6519715718460262,
+      "grad_norm": 0.12911564111709595,
+      "learning_rate": 0.00017394140568624623,
+      "loss": 0.1524,
+      "step": 9036
+    },
+    {
+      "epoch": 0.6520437245210866,
+      "grad_norm": 0.1165081039071083,
+      "learning_rate": 0.00017393851926684946,
+      "loss": 0.1445,
+      "step": 9037
+    },
+    {
+      "epoch": 0.652115877196147,
+      "grad_norm": 0.14567390084266663,
+      "learning_rate": 0.00017393563284745273,
+      "loss": 0.1455,
+      "step": 9038
+    },
+    {
+      "epoch": 0.6521880298712075,
+      "grad_norm": 0.12246451526880264,
+      "learning_rate": 0.00017393274642805602,
+      "loss": 0.1476,
+      "step": 9039
+    },
+    {
+      "epoch": 0.6522601825462679,
+      "grad_norm": 0.13728214800357819,
+      "learning_rate": 0.00017392986000865928,
+      "loss": 0.1845,
+      "step": 9040
+    },
+    {
+      "epoch": 0.6523323352213284,
+      "grad_norm": 0.1250610500574112,
+      "learning_rate": 0.00017392697358926254,
+      "loss": 0.0996,
+      "step": 9041
+    },
+    {
+      "epoch": 0.6524044878963887,
+      "grad_norm": 0.12806789577007294,
+      "learning_rate": 0.00017392408716986578,
+      "loss": 0.1788,
+      "step": 9042
+    },
+    {
+      "epoch": 0.6524766405714492,
+      "grad_norm": 0.1574259102344513,
+      "learning_rate": 0.00017392120075046904,
+      "loss": 0.1679,
+      "step": 9043
+    },
+    {
+      "epoch": 0.6525487932465096,
+      "grad_norm": 0.09644627571105957,
+      "learning_rate": 0.0001739183143310723,
+      "loss": 0.1305,
+      "step": 9044
+    },
+    {
+      "epoch": 0.65262094592157,
+      "grad_norm": 0.18874594569206238,
+      "learning_rate": 0.00017391542791167557,
+      "loss": 0.1565,
+      "step": 9045
+    },
+    {
+      "epoch": 0.6526930985966305,
+      "grad_norm": 0.1280926764011383,
+      "learning_rate": 0.00017391254149227886,
+      "loss": 0.1642,
+      "step": 9046
+    },
+    {
+      "epoch": 0.6527652512716909,
+      "grad_norm": 0.12547607719898224,
+      "learning_rate": 0.0001739096550728821,
+      "loss": 0.1266,
+      "step": 9047
+    },
+    {
+      "epoch": 0.6528374039467513,
+      "grad_norm": 0.15735046565532684,
+      "learning_rate": 0.00017390676865348536,
+      "loss": 0.1283,
+      "step": 9048
+    },
+    {
+      "epoch": 0.6529095566218117,
+      "grad_norm": 0.1462177336215973,
+      "learning_rate": 0.00017390388223408862,
+      "loss": 0.1425,
+      "step": 9049
+    },
+    {
+      "epoch": 0.6529817092968722,
+      "grad_norm": 0.12556686997413635,
+      "learning_rate": 0.00017390099581469188,
+      "loss": 0.1284,
+      "step": 9050
+    },
+    {
+      "epoch": 0.6530538619719326,
+      "grad_norm": 0.12326043099164963,
+      "learning_rate": 0.00017389810939529514,
+      "loss": 0.1789,
+      "step": 9051
+    },
+    {
+      "epoch": 0.653126014646993,
+      "grad_norm": 0.13321638107299805,
+      "learning_rate": 0.0001738952229758984,
+      "loss": 0.1553,
+      "step": 9052
+    },
+    {
+      "epoch": 0.6531981673220535,
+      "grad_norm": 0.12841051816940308,
+      "learning_rate": 0.00017389233655650167,
+      "loss": 0.1218,
+      "step": 9053
+    },
+    {
+      "epoch": 0.6532703199971139,
+      "grad_norm": 0.1277436763048172,
+      "learning_rate": 0.00017388945013710493,
+      "loss": 0.1623,
+      "step": 9054
+    },
+    {
+      "epoch": 0.6533424726721743,
+      "grad_norm": 0.13014984130859375,
+      "learning_rate": 0.0001738865637177082,
+      "loss": 0.1872,
+      "step": 9055
+    },
+    {
+      "epoch": 0.6534146253472347,
+      "grad_norm": 0.08620619028806686,
+      "learning_rate": 0.00017388367729831146,
+      "loss": 0.1381,
+      "step": 9056
+    },
+    {
+      "epoch": 0.6534867780222952,
+      "grad_norm": 0.12672410905361176,
+      "learning_rate": 0.00017388079087891472,
+      "loss": 0.1348,
+      "step": 9057
+    },
+    {
+      "epoch": 0.6535589306973556,
+      "grad_norm": 0.1097164899110794,
+      "learning_rate": 0.00017387790445951796,
+      "loss": 0.1504,
+      "step": 9058
+    },
+    {
+      "epoch": 0.653631083372416,
+      "grad_norm": 0.11512299627065659,
+      "learning_rate": 0.00017387501804012122,
+      "loss": 0.1701,
+      "step": 9059
+    },
+    {
+      "epoch": 0.6537032360474765,
+      "grad_norm": 0.11480879038572311,
+      "learning_rate": 0.0001738721316207245,
+      "loss": 0.1297,
+      "step": 9060
+    },
+    {
+      "epoch": 0.6537753887225369,
+      "grad_norm": 0.11241624504327774,
+      "learning_rate": 0.00017386924520132777,
+      "loss": 0.1218,
+      "step": 9061
+    },
+    {
+      "epoch": 0.6538475413975973,
+      "grad_norm": 0.12165973335504532,
+      "learning_rate": 0.00017386635878193104,
+      "loss": 0.1612,
+      "step": 9062
+    },
+    {
+      "epoch": 0.6539196940726577,
+      "grad_norm": 0.12013527750968933,
+      "learning_rate": 0.00017386347236253427,
+      "loss": 0.1698,
+      "step": 9063
+    },
+    {
+      "epoch": 0.6539918467477182,
+      "grad_norm": 0.12046132236719131,
+      "learning_rate": 0.00017386058594313754,
+      "loss": 0.1403,
+      "step": 9064
+    },
+    {
+      "epoch": 0.6540639994227786,
+      "grad_norm": 0.19139885902404785,
+      "learning_rate": 0.0001738576995237408,
+      "loss": 0.1486,
+      "step": 9065
+    },
+    {
+      "epoch": 0.654136152097839,
+      "grad_norm": 0.10963544249534607,
+      "learning_rate": 0.00017385481310434406,
+      "loss": 0.1256,
+      "step": 9066
+    },
+    {
+      "epoch": 0.6542083047728995,
+      "grad_norm": 0.12036189436912537,
+      "learning_rate": 0.00017385192668494735,
+      "loss": 0.1456,
+      "step": 9067
+    },
+    {
+      "epoch": 0.6542804574479599,
+      "grad_norm": 0.15908774733543396,
+      "learning_rate": 0.0001738490402655506,
+      "loss": 0.1481,
+      "step": 9068
+    },
+    {
+      "epoch": 0.6543526101230203,
+      "grad_norm": 0.10817126929759979,
+      "learning_rate": 0.00017384615384615385,
+      "loss": 0.1329,
+      "step": 9069
+    },
+    {
+      "epoch": 0.6544247627980807,
+      "grad_norm": 0.15718476474285126,
+      "learning_rate": 0.0001738432674267571,
+      "loss": 0.176,
+      "step": 9070
+    },
+    {
+      "epoch": 0.6544969154731411,
+      "grad_norm": 0.11672906577587128,
+      "learning_rate": 0.00017384038100736038,
+      "loss": 0.1484,
+      "step": 9071
+    },
+    {
+      "epoch": 0.6545690681482016,
+      "grad_norm": 0.10921874642372131,
+      "learning_rate": 0.00017383749458796364,
+      "loss": 0.1706,
+      "step": 9072
+    },
+    {
+      "epoch": 0.654641220823262,
+      "grad_norm": 0.11707517504692078,
+      "learning_rate": 0.0001738346081685669,
+      "loss": 0.1011,
+      "step": 9073
+    },
+    {
+      "epoch": 0.6547133734983225,
+      "grad_norm": 0.12148593366146088,
+      "learning_rate": 0.00017383172174917016,
+      "loss": 0.1497,
+      "step": 9074
+    },
+    {
+      "epoch": 0.6547855261733829,
+      "grad_norm": 0.12721076607704163,
+      "learning_rate": 0.00017382883532977343,
+      "loss": 0.1131,
+      "step": 9075
+    },
+    {
+      "epoch": 0.6548576788484433,
+      "grad_norm": 0.1254507154226303,
+      "learning_rate": 0.0001738259489103767,
+      "loss": 0.1319,
+      "step": 9076
+    },
+    {
+      "epoch": 0.6549298315235037,
+      "grad_norm": 0.1127161830663681,
+      "learning_rate": 0.00017382306249097995,
+      "loss": 0.1671,
+      "step": 9077
+    },
+    {
+      "epoch": 0.6550019841985641,
+      "grad_norm": 0.1015736311674118,
+      "learning_rate": 0.00017382017607158322,
+      "loss": 0.1358,
+      "step": 9078
+    },
+    {
+      "epoch": 0.6550741368736246,
+      "grad_norm": 0.12445895373821259,
+      "learning_rate": 0.00017381728965218648,
+      "loss": 0.1781,
+      "step": 9079
+    },
+    {
+      "epoch": 0.655146289548685,
+      "grad_norm": 0.11472073197364807,
+      "learning_rate": 0.00017381440323278971,
+      "loss": 0.1472,
+      "step": 9080
+    },
+    {
+      "epoch": 0.6552184422237455,
+      "grad_norm": 0.1182158887386322,
+      "learning_rate": 0.000173811516813393,
+      "loss": 0.1353,
+      "step": 9081
+    },
+    {
+      "epoch": 0.6552905948988059,
+      "grad_norm": 0.10132770985364914,
+      "learning_rate": 0.00017380863039399627,
+      "loss": 0.0996,
+      "step": 9082
+    },
+    {
+      "epoch": 0.6553627475738663,
+      "grad_norm": 0.13250702619552612,
+      "learning_rate": 0.00017380574397459953,
+      "loss": 0.1399,
+      "step": 9083
+    },
+    {
+      "epoch": 0.6554349002489267,
+      "grad_norm": 0.13307896256446838,
+      "learning_rate": 0.0001738028575552028,
+      "loss": 0.1609,
+      "step": 9084
+    },
+    {
+      "epoch": 0.6555070529239871,
+      "grad_norm": 0.10817299783229828,
+      "learning_rate": 0.00017379997113580603,
+      "loss": 0.1663,
+      "step": 9085
+    },
+    {
+      "epoch": 0.6555792055990476,
+      "grad_norm": 0.12967552244663239,
+      "learning_rate": 0.0001737970847164093,
+      "loss": 0.195,
+      "step": 9086
+    },
+    {
+      "epoch": 0.655651358274108,
+      "grad_norm": 0.1627797782421112,
+      "learning_rate": 0.00017379419829701256,
+      "loss": 0.1271,
+      "step": 9087
+    },
+    {
+      "epoch": 0.6557235109491685,
+      "grad_norm": 0.39876589179039,
+      "learning_rate": 0.00017379131187761585,
+      "loss": 0.1646,
+      "step": 9088
+    },
+    {
+      "epoch": 0.6557956636242289,
+      "grad_norm": 0.12808945775032043,
+      "learning_rate": 0.0001737884254582191,
+      "loss": 0.1721,
+      "step": 9089
+    },
+    {
+      "epoch": 0.6558678162992893,
+      "grad_norm": 0.10217764228582382,
+      "learning_rate": 0.00017378553903882234,
+      "loss": 0.1559,
+      "step": 9090
+    },
+    {
+      "epoch": 0.6559399689743497,
+      "grad_norm": 0.13511773943901062,
+      "learning_rate": 0.0001737826526194256,
+      "loss": 0.1269,
+      "step": 9091
+    },
+    {
+      "epoch": 0.6560121216494101,
+      "grad_norm": 0.12675148248672485,
+      "learning_rate": 0.00017377976620002887,
+      "loss": 0.1626,
+      "step": 9092
+    },
+    {
+      "epoch": 0.6560842743244706,
+      "grad_norm": 0.1097366064786911,
+      "learning_rate": 0.00017377687978063213,
+      "loss": 0.171,
+      "step": 9093
+    },
+    {
+      "epoch": 0.656156426999531,
+      "grad_norm": 0.1200672835111618,
+      "learning_rate": 0.0001737739933612354,
+      "loss": 0.1106,
+      "step": 9094
+    },
+    {
+      "epoch": 0.6562285796745915,
+      "grad_norm": 0.09111147373914719,
+      "learning_rate": 0.00017377110694183866,
+      "loss": 0.1084,
+      "step": 9095
+    },
+    {
+      "epoch": 0.6563007323496519,
+      "grad_norm": 0.13823656737804413,
+      "learning_rate": 0.00017376822052244192,
+      "loss": 0.1241,
+      "step": 9096
+    },
+    {
+      "epoch": 0.6563728850247122,
+      "grad_norm": 0.1446205973625183,
+      "learning_rate": 0.00017376533410304518,
+      "loss": 0.1689,
+      "step": 9097
+    },
+    {
+      "epoch": 0.6564450376997727,
+      "grad_norm": 0.13478872179985046,
+      "learning_rate": 0.00017376244768364845,
+      "loss": 0.1042,
+      "step": 9098
+    },
+    {
+      "epoch": 0.6565171903748331,
+      "grad_norm": 0.12716075778007507,
+      "learning_rate": 0.0001737595612642517,
+      "loss": 0.14,
+      "step": 9099
+    },
+    {
+      "epoch": 0.6565893430498936,
+      "grad_norm": 0.1401834487915039,
+      "learning_rate": 0.00017375667484485497,
+      "loss": 0.1949,
+      "step": 9100
+    },
+    {
+      "epoch": 0.656661495724954,
+      "grad_norm": 0.1363379806280136,
+      "learning_rate": 0.0001737537884254582,
+      "loss": 0.1919,
+      "step": 9101
+    },
+    {
+      "epoch": 0.6567336484000145,
+      "grad_norm": 0.12148359417915344,
+      "learning_rate": 0.0001737509020060615,
+      "loss": 0.0931,
+      "step": 9102
+    },
+    {
+      "epoch": 0.6568058010750749,
+      "grad_norm": 0.17094339430332184,
+      "learning_rate": 0.00017374801558666476,
+      "loss": 0.1411,
+      "step": 9103
+    },
+    {
+      "epoch": 0.6568779537501352,
+      "grad_norm": 0.11943087726831436,
+      "learning_rate": 0.00017374512916726802,
+      "loss": 0.2096,
+      "step": 9104
+    },
+    {
+      "epoch": 0.6569501064251957,
+      "grad_norm": 0.1346716731786728,
+      "learning_rate": 0.0001737422427478713,
+      "loss": 0.1242,
+      "step": 9105
+    },
+    {
+      "epoch": 0.6570222591002561,
+      "grad_norm": 0.14123597741127014,
+      "learning_rate": 0.00017373935632847452,
+      "loss": 0.1607,
+      "step": 9106
+    },
+    {
+      "epoch": 0.6570944117753166,
+      "grad_norm": 0.12001238763332367,
+      "learning_rate": 0.0001737364699090778,
+      "loss": 0.0955,
+      "step": 9107
+    },
+    {
+      "epoch": 0.657166564450377,
+      "grad_norm": 0.1153496503829956,
+      "learning_rate": 0.00017373358348968105,
+      "loss": 0.1066,
+      "step": 9108
+    },
+    {
+      "epoch": 0.6572387171254375,
+      "grad_norm": 0.1329270452260971,
+      "learning_rate": 0.00017373069707028434,
+      "loss": 0.213,
+      "step": 9109
+    },
+    {
+      "epoch": 0.6573108698004978,
+      "grad_norm": 0.13615714013576508,
+      "learning_rate": 0.0001737278106508876,
+      "loss": 0.1278,
+      "step": 9110
+    },
+    {
+      "epoch": 0.6573830224755582,
+      "grad_norm": 0.14342476427555084,
+      "learning_rate": 0.00017372492423149084,
+      "loss": 0.1512,
+      "step": 9111
+    },
+    {
+      "epoch": 0.6574551751506187,
+      "grad_norm": 0.1282559186220169,
+      "learning_rate": 0.0001737220378120941,
+      "loss": 0.1891,
+      "step": 9112
+    },
+    {
+      "epoch": 0.6575273278256791,
+      "grad_norm": 0.14406467974185944,
+      "learning_rate": 0.00017371915139269736,
+      "loss": 0.1585,
+      "step": 9113
+    },
+    {
+      "epoch": 0.6575994805007396,
+      "grad_norm": 0.22428575158119202,
+      "learning_rate": 0.00017371626497330063,
+      "loss": 0.1143,
+      "step": 9114
+    },
+    {
+      "epoch": 0.6576716331758,
+      "grad_norm": 0.13172686100006104,
+      "learning_rate": 0.0001737133785539039,
+      "loss": 0.1947,
+      "step": 9115
+    },
+    {
+      "epoch": 0.6577437858508605,
+      "grad_norm": 0.11411601305007935,
+      "learning_rate": 0.00017371049213450715,
+      "loss": 0.1748,
+      "step": 9116
+    },
+    {
+      "epoch": 0.6578159385259208,
+      "grad_norm": 0.12008052319288254,
+      "learning_rate": 0.00017370760571511042,
+      "loss": 0.1556,
+      "step": 9117
+    },
+    {
+      "epoch": 0.6578880912009812,
+      "grad_norm": 0.26494452357292175,
+      "learning_rate": 0.00017370471929571368,
+      "loss": 0.1572,
+      "step": 9118
+    },
+    {
+      "epoch": 0.6579602438760417,
+      "grad_norm": 0.11839305609464645,
+      "learning_rate": 0.00017370183287631694,
+      "loss": 0.1664,
+      "step": 9119
+    },
+    {
+      "epoch": 0.6580323965511021,
+      "grad_norm": 0.10700162500143051,
+      "learning_rate": 0.0001736989464569202,
+      "loss": 0.1318,
+      "step": 9120
+    },
+    {
+      "epoch": 0.6581045492261626,
+      "grad_norm": 0.11779207736253738,
+      "learning_rate": 0.00017369606003752347,
+      "loss": 0.1172,
+      "step": 9121
+    },
+    {
+      "epoch": 0.658176701901223,
+      "grad_norm": 0.32551857829093933,
+      "learning_rate": 0.0001736931736181267,
+      "loss": 0.1116,
+      "step": 9122
+    },
+    {
+      "epoch": 0.6582488545762835,
+      "grad_norm": 0.11007951945066452,
+      "learning_rate": 0.00017369028719872997,
+      "loss": 0.1587,
+      "step": 9123
+    },
+    {
+      "epoch": 0.6583210072513438,
+      "grad_norm": 0.10492239147424698,
+      "learning_rate": 0.00017368740077933326,
+      "loss": 0.1242,
+      "step": 9124
+    },
+    {
+      "epoch": 0.6583931599264042,
+      "grad_norm": 0.12001994997262955,
+      "learning_rate": 0.00017368451435993652,
+      "loss": 0.166,
+      "step": 9125
+    },
+    {
+      "epoch": 0.6584653126014647,
+      "grad_norm": 0.14339768886566162,
+      "learning_rate": 0.00017368162794053978,
+      "loss": 0.1323,
+      "step": 9126
+    },
+    {
+      "epoch": 0.6585374652765251,
+      "grad_norm": 0.12687118351459503,
+      "learning_rate": 0.00017367874152114302,
+      "loss": 0.1348,
+      "step": 9127
+    },
+    {
+      "epoch": 0.6586096179515856,
+      "grad_norm": 0.1434631049633026,
+      "learning_rate": 0.00017367585510174628,
+      "loss": 0.1581,
+      "step": 9128
+    },
+    {
+      "epoch": 0.658681770626646,
+      "grad_norm": 0.11574066430330276,
+      "learning_rate": 0.00017367296868234954,
+      "loss": 0.1262,
+      "step": 9129
+    },
+    {
+      "epoch": 0.6587539233017065,
+      "grad_norm": 0.13802607357501984,
+      "learning_rate": 0.0001736700822629528,
+      "loss": 0.1637,
+      "step": 9130
+    },
+    {
+      "epoch": 0.6588260759767668,
+      "grad_norm": 0.12027553468942642,
+      "learning_rate": 0.0001736671958435561,
+      "loss": 0.1266,
+      "step": 9131
+    },
+    {
+      "epoch": 0.6588982286518272,
+      "grad_norm": 0.13257108628749847,
+      "learning_rate": 0.00017366430942415933,
+      "loss": 0.2181,
+      "step": 9132
+    },
+    {
+      "epoch": 0.6589703813268877,
+      "grad_norm": 0.15435339510440826,
+      "learning_rate": 0.0001736614230047626,
+      "loss": 0.1721,
+      "step": 9133
+    },
+    {
+      "epoch": 0.6590425340019481,
+      "grad_norm": 0.12928619980812073,
+      "learning_rate": 0.00017365853658536586,
+      "loss": 0.1218,
+      "step": 9134
+    },
+    {
+      "epoch": 0.6591146866770086,
+      "grad_norm": 0.13578961789608002,
+      "learning_rate": 0.00017365565016596912,
+      "loss": 0.1625,
+      "step": 9135
+    },
+    {
+      "epoch": 0.659186839352069,
+      "grad_norm": 0.11385858058929443,
+      "learning_rate": 0.00017365276374657238,
+      "loss": 0.177,
+      "step": 9136
+    },
+    {
+      "epoch": 0.6592589920271295,
+      "grad_norm": 0.11839604377746582,
+      "learning_rate": 0.00017364987732717565,
+      "loss": 0.1303,
+      "step": 9137
+    },
+    {
+      "epoch": 0.6593311447021898,
+      "grad_norm": 0.15037642419338226,
+      "learning_rate": 0.0001736469909077789,
+      "loss": 0.2064,
+      "step": 9138
+    },
+    {
+      "epoch": 0.6594032973772502,
+      "grad_norm": 0.15002579987049103,
+      "learning_rate": 0.00017364410448838217,
+      "loss": 0.116,
+      "step": 9139
+    },
+    {
+      "epoch": 0.6594754500523107,
+      "grad_norm": 0.14399875700473785,
+      "learning_rate": 0.00017364121806898544,
+      "loss": 0.1374,
+      "step": 9140
+    },
+    {
+      "epoch": 0.6595476027273711,
+      "grad_norm": 0.161982923746109,
+      "learning_rate": 0.0001736383316495887,
+      "loss": 0.1634,
+      "step": 9141
+    },
+    {
+      "epoch": 0.6596197554024316,
+      "grad_norm": 0.11750172823667526,
+      "learning_rate": 0.00017363544523019196,
+      "loss": 0.1982,
+      "step": 9142
+    },
+    {
+      "epoch": 0.659691908077492,
+      "grad_norm": 0.1385720819234848,
+      "learning_rate": 0.0001736325588107952,
+      "loss": 0.1265,
+      "step": 9143
+    },
+    {
+      "epoch": 0.6597640607525525,
+      "grad_norm": 0.11476393043994904,
+      "learning_rate": 0.00017362967239139846,
+      "loss": 0.1465,
+      "step": 9144
+    },
+    {
+      "epoch": 0.6598362134276128,
+      "grad_norm": 0.15320426225662231,
+      "learning_rate": 0.00017362678597200175,
+      "loss": 0.1539,
+      "step": 9145
+    },
+    {
+      "epoch": 0.6599083661026732,
+      "grad_norm": 0.1807917058467865,
+      "learning_rate": 0.000173623899552605,
+      "loss": 0.1649,
+      "step": 9146
+    },
+    {
+      "epoch": 0.6599805187777337,
+      "grad_norm": 0.12906044721603394,
+      "learning_rate": 0.00017362101313320828,
+      "loss": 0.1785,
+      "step": 9147
+    },
+    {
+      "epoch": 0.6600526714527941,
+      "grad_norm": 0.12782162427902222,
+      "learning_rate": 0.0001736181267138115,
+      "loss": 0.223,
+      "step": 9148
+    },
+    {
+      "epoch": 0.6601248241278546,
+      "grad_norm": 0.11752726882696152,
+      "learning_rate": 0.00017361524029441477,
+      "loss": 0.1313,
+      "step": 9149
+    },
+    {
+      "epoch": 0.660196976802915,
+      "grad_norm": 0.13953953981399536,
+      "learning_rate": 0.00017361235387501804,
+      "loss": 0.1489,
+      "step": 9150
+    },
+    {
+      "epoch": 0.6602691294779754,
+      "grad_norm": 0.14270111918449402,
+      "learning_rate": 0.0001736094674556213,
+      "loss": 0.1692,
+      "step": 9151
+    },
+    {
+      "epoch": 0.6603412821530358,
+      "grad_norm": 0.1215793639421463,
+      "learning_rate": 0.0001736065810362246,
+      "loss": 0.1602,
+      "step": 9152
+    },
+    {
+      "epoch": 0.6604134348280962,
+      "grad_norm": 0.11785700917243958,
+      "learning_rate": 0.00017360369461682783,
+      "loss": 0.1667,
+      "step": 9153
+    },
+    {
+      "epoch": 0.6604855875031567,
+      "grad_norm": 0.13058754801750183,
+      "learning_rate": 0.0001736008081974311,
+      "loss": 0.1413,
+      "step": 9154
+    },
+    {
+      "epoch": 0.6605577401782171,
+      "grad_norm": 0.14015406370162964,
+      "learning_rate": 0.00017359792177803435,
+      "loss": 0.1907,
+      "step": 9155
+    },
+    {
+      "epoch": 0.6606298928532776,
+      "grad_norm": 0.13626953959465027,
+      "learning_rate": 0.00017359503535863762,
+      "loss": 0.1652,
+      "step": 9156
+    },
+    {
+      "epoch": 0.660702045528338,
+      "grad_norm": 0.1171763464808464,
+      "learning_rate": 0.00017359214893924088,
+      "loss": 0.1636,
+      "step": 9157
+    },
+    {
+      "epoch": 0.6607741982033984,
+      "grad_norm": 0.13445504009723663,
+      "learning_rate": 0.00017358926251984414,
+      "loss": 0.1257,
+      "step": 9158
+    },
+    {
+      "epoch": 0.6608463508784588,
+      "grad_norm": 0.11587665975093842,
+      "learning_rate": 0.0001735863761004474,
+      "loss": 0.1388,
+      "step": 9159
+    },
+    {
+      "epoch": 0.6609185035535192,
+      "grad_norm": 0.12831035256385803,
+      "learning_rate": 0.00017358348968105067,
+      "loss": 0.1398,
+      "step": 9160
+    },
+    {
+      "epoch": 0.6609906562285797,
+      "grad_norm": 0.1297786980867386,
+      "learning_rate": 0.00017358060326165393,
+      "loss": 0.1714,
+      "step": 9161
+    },
+    {
+      "epoch": 0.6610628089036401,
+      "grad_norm": 0.11348583549261093,
+      "learning_rate": 0.0001735777168422572,
+      "loss": 0.2014,
+      "step": 9162
+    },
+    {
+      "epoch": 0.6611349615787006,
+      "grad_norm": 0.10949632525444031,
+      "learning_rate": 0.00017357483042286046,
+      "loss": 0.1695,
+      "step": 9163
+    },
+    {
+      "epoch": 0.661207114253761,
+      "grad_norm": 0.10830409079790115,
+      "learning_rate": 0.0001735719440034637,
+      "loss": 0.1603,
+      "step": 9164
+    },
+    {
+      "epoch": 0.6612792669288214,
+      "grad_norm": 0.12362265586853027,
+      "learning_rate": 0.00017356905758406695,
+      "loss": 0.1288,
+      "step": 9165
+    },
+    {
+      "epoch": 0.6613514196038818,
+      "grad_norm": 0.14946235716342926,
+      "learning_rate": 0.00017356617116467024,
+      "loss": 0.1215,
+      "step": 9166
+    },
+    {
+      "epoch": 0.6614235722789422,
+      "grad_norm": 0.10959585756063461,
+      "learning_rate": 0.0001735632847452735,
+      "loss": 0.1563,
+      "step": 9167
+    },
+    {
+      "epoch": 0.6614957249540027,
+      "grad_norm": 0.11575577408075333,
+      "learning_rate": 0.00017356039832587677,
+      "loss": 0.1416,
+      "step": 9168
+    },
+    {
+      "epoch": 0.6615678776290631,
+      "grad_norm": 0.14225371181964874,
+      "learning_rate": 0.00017355751190648,
+      "loss": 0.112,
+      "step": 9169
+    },
+    {
+      "epoch": 0.6616400303041235,
+      "grad_norm": 0.1122872456908226,
+      "learning_rate": 0.00017355462548708327,
+      "loss": 0.1053,
+      "step": 9170
+    },
+    {
+      "epoch": 0.661712182979184,
+      "grad_norm": 0.12862779200077057,
+      "learning_rate": 0.00017355173906768653,
+      "loss": 0.1571,
+      "step": 9171
+    },
+    {
+      "epoch": 0.6617843356542443,
+      "grad_norm": 0.09694743156433105,
+      "learning_rate": 0.0001735488526482898,
+      "loss": 0.1853,
+      "step": 9172
+    },
+    {
+      "epoch": 0.6618564883293048,
+      "grad_norm": 0.12143151462078094,
+      "learning_rate": 0.00017354596622889309,
+      "loss": 0.1153,
+      "step": 9173
+    },
+    {
+      "epoch": 0.6619286410043652,
+      "grad_norm": 0.1144392341375351,
+      "learning_rate": 0.00017354307980949632,
+      "loss": 0.1797,
+      "step": 9174
+    },
+    {
+      "epoch": 0.6620007936794257,
+      "grad_norm": 0.11424224823713303,
+      "learning_rate": 0.00017354019339009958,
+      "loss": 0.1396,
+      "step": 9175
+    },
+    {
+      "epoch": 0.6620729463544861,
+      "grad_norm": 0.10566970705986023,
+      "learning_rate": 0.00017353730697070285,
+      "loss": 0.1309,
+      "step": 9176
+    },
+    {
+      "epoch": 0.6621450990295465,
+      "grad_norm": 0.11206743121147156,
+      "learning_rate": 0.0001735344205513061,
+      "loss": 0.1465,
+      "step": 9177
+    },
+    {
+      "epoch": 0.662217251704607,
+      "grad_norm": 0.12112920731306076,
+      "learning_rate": 0.00017353153413190937,
+      "loss": 0.1493,
+      "step": 9178
+    },
+    {
+      "epoch": 0.6622894043796673,
+      "grad_norm": 0.12534743547439575,
+      "learning_rate": 0.00017352864771251264,
+      "loss": 0.1683,
+      "step": 9179
+    },
+    {
+      "epoch": 0.6623615570547278,
+      "grad_norm": 0.1410726010799408,
+      "learning_rate": 0.0001735257612931159,
+      "loss": 0.1698,
+      "step": 9180
+    },
+    {
+      "epoch": 0.6624337097297882,
+      "grad_norm": 0.14639095962047577,
+      "learning_rate": 0.00017352287487371916,
+      "loss": 0.15,
+      "step": 9181
+    },
+    {
+      "epoch": 0.6625058624048487,
+      "grad_norm": 0.13366194069385529,
+      "learning_rate": 0.00017351998845432242,
+      "loss": 0.1467,
+      "step": 9182
+    },
+    {
+      "epoch": 0.6625780150799091,
+      "grad_norm": 0.1295410394668579,
+      "learning_rate": 0.0001735171020349257,
+      "loss": 0.1067,
+      "step": 9183
+    },
+    {
+      "epoch": 0.6626501677549695,
+      "grad_norm": 0.13306112587451935,
+      "learning_rate": 0.00017351421561552895,
+      "loss": 0.1788,
+      "step": 9184
+    },
+    {
+      "epoch": 0.66272232043003,
+      "grad_norm": 0.12532058358192444,
+      "learning_rate": 0.0001735113291961322,
+      "loss": 0.188,
+      "step": 9185
+    },
+    {
+      "epoch": 0.6627944731050903,
+      "grad_norm": 0.1394512951374054,
+      "learning_rate": 0.00017350844277673545,
+      "loss": 0.1339,
+      "step": 9186
+    },
+    {
+      "epoch": 0.6628666257801508,
+      "grad_norm": 0.1411266326904297,
+      "learning_rate": 0.00017350555635733874,
+      "loss": 0.1815,
+      "step": 9187
+    },
+    {
+      "epoch": 0.6629387784552112,
+      "grad_norm": 0.14007483422756195,
+      "learning_rate": 0.000173502669937942,
+      "loss": 0.1358,
+      "step": 9188
+    },
+    {
+      "epoch": 0.6630109311302717,
+      "grad_norm": 0.11793815344572067,
+      "learning_rate": 0.00017349978351854526,
+      "loss": 0.1542,
+      "step": 9189
+    },
+    {
+      "epoch": 0.6630830838053321,
+      "grad_norm": 0.12423793226480484,
+      "learning_rate": 0.00017349689709914853,
+      "loss": 0.1299,
+      "step": 9190
+    },
+    {
+      "epoch": 0.6631552364803925,
+      "grad_norm": 0.10026775300502777,
+      "learning_rate": 0.00017349401067975176,
+      "loss": 0.1334,
+      "step": 9191
+    },
+    {
+      "epoch": 0.663227389155453,
+      "grad_norm": 0.12117689102888107,
+      "learning_rate": 0.00017349112426035503,
+      "loss": 0.1354,
+      "step": 9192
+    },
+    {
+      "epoch": 0.6632995418305133,
+      "grad_norm": 0.10784576833248138,
+      "learning_rate": 0.0001734882378409583,
+      "loss": 0.144,
+      "step": 9193
+    },
+    {
+      "epoch": 0.6633716945055738,
+      "grad_norm": 0.12422692775726318,
+      "learning_rate": 0.00017348535142156158,
+      "loss": 0.2063,
+      "step": 9194
+    },
+    {
+      "epoch": 0.6634438471806342,
+      "grad_norm": 0.11072385311126709,
+      "learning_rate": 0.00017348246500216484,
+      "loss": 0.1621,
+      "step": 9195
+    },
+    {
+      "epoch": 0.6635159998556946,
+      "grad_norm": 0.11303888261318207,
+      "learning_rate": 0.00017347957858276808,
+      "loss": 0.1967,
+      "step": 9196
+    },
+    {
+      "epoch": 0.6635881525307551,
+      "grad_norm": 0.1248398870229721,
+      "learning_rate": 0.00017347669216337134,
+      "loss": 0.135,
+      "step": 9197
+    },
+    {
+      "epoch": 0.6636603052058155,
+      "grad_norm": 0.15116511285305023,
+      "learning_rate": 0.0001734738057439746,
+      "loss": 0.1321,
+      "step": 9198
+    },
+    {
+      "epoch": 0.663732457880876,
+      "grad_norm": 0.14752434194087982,
+      "learning_rate": 0.00017347091932457787,
+      "loss": 0.1263,
+      "step": 9199
+    },
+    {
+      "epoch": 0.6638046105559363,
+      "grad_norm": 0.14145156741142273,
+      "learning_rate": 0.00017346803290518113,
+      "loss": 0.1105,
+      "step": 9200
+    },
+    {
+      "epoch": 0.6638767632309968,
+      "grad_norm": 0.12625358998775482,
+      "learning_rate": 0.0001734651464857844,
+      "loss": 0.1207,
+      "step": 9201
+    },
+    {
+      "epoch": 0.6639489159060572,
+      "grad_norm": 0.1078922376036644,
+      "learning_rate": 0.00017346226006638766,
+      "loss": 0.1343,
+      "step": 9202
+    },
+    {
+      "epoch": 0.6640210685811176,
+      "grad_norm": 0.11718551069498062,
+      "learning_rate": 0.00017345937364699092,
+      "loss": 0.1425,
+      "step": 9203
+    },
+    {
+      "epoch": 0.6640932212561781,
+      "grad_norm": 0.11266859620809555,
+      "learning_rate": 0.00017345648722759418,
+      "loss": 0.1679,
+      "step": 9204
+    },
+    {
+      "epoch": 0.6641653739312385,
+      "grad_norm": 0.0987582877278328,
+      "learning_rate": 0.00017345360080819744,
+      "loss": 0.1503,
+      "step": 9205
+    },
+    {
+      "epoch": 0.664237526606299,
+      "grad_norm": 0.11114714294672012,
+      "learning_rate": 0.0001734507143888007,
+      "loss": 0.1958,
+      "step": 9206
+    },
+    {
+      "epoch": 0.6643096792813593,
+      "grad_norm": 0.13825024664402008,
+      "learning_rate": 0.00017344782796940394,
+      "loss": 0.1484,
+      "step": 9207
+    },
+    {
+      "epoch": 0.6643818319564198,
+      "grad_norm": 0.1525411456823349,
+      "learning_rate": 0.00017344494155000723,
+      "loss": 0.1482,
+      "step": 9208
+    },
+    {
+      "epoch": 0.6644539846314802,
+      "grad_norm": 0.11406046152114868,
+      "learning_rate": 0.0001734420551306105,
+      "loss": 0.1675,
+      "step": 9209
+    },
+    {
+      "epoch": 0.6645261373065406,
+      "grad_norm": 0.12549228966236115,
+      "learning_rate": 0.00017343916871121376,
+      "loss": 0.1388,
+      "step": 9210
+    },
+    {
+      "epoch": 0.6645982899816011,
+      "grad_norm": 0.10623666644096375,
+      "learning_rate": 0.00017343628229181702,
+      "loss": 0.1478,
+      "step": 9211
+    },
+    {
+      "epoch": 0.6646704426566615,
+      "grad_norm": 0.11086919903755188,
+      "learning_rate": 0.00017343339587242026,
+      "loss": 0.1588,
+      "step": 9212
+    },
+    {
+      "epoch": 0.664742595331722,
+      "grad_norm": 0.14076271653175354,
+      "learning_rate": 0.00017343050945302352,
+      "loss": 0.1376,
+      "step": 9213
+    },
+    {
+      "epoch": 0.6648147480067823,
+      "grad_norm": 0.14605247974395752,
+      "learning_rate": 0.00017342762303362678,
+      "loss": 0.1864,
+      "step": 9214
+    },
+    {
+      "epoch": 0.6648869006818428,
+      "grad_norm": 0.11169461905956268,
+      "learning_rate": 0.00017342473661423007,
+      "loss": 0.1709,
+      "step": 9215
+    },
+    {
+      "epoch": 0.6649590533569032,
+      "grad_norm": 0.1111663356423378,
+      "learning_rate": 0.00017342185019483334,
+      "loss": 0.1568,
+      "step": 9216
+    },
+    {
+      "epoch": 0.6650312060319636,
+      "grad_norm": 0.11549469828605652,
+      "learning_rate": 0.00017341896377543657,
+      "loss": 0.1018,
+      "step": 9217
+    },
+    {
+      "epoch": 0.6651033587070241,
+      "grad_norm": 0.12490471452474594,
+      "learning_rate": 0.00017341607735603984,
+      "loss": 0.1482,
+      "step": 9218
+    },
+    {
+      "epoch": 0.6651755113820845,
+      "grad_norm": 0.14729900658130646,
+      "learning_rate": 0.0001734131909366431,
+      "loss": 0.1523,
+      "step": 9219
+    },
+    {
+      "epoch": 0.665247664057145,
+      "grad_norm": 0.12454581260681152,
+      "learning_rate": 0.00017341030451724636,
+      "loss": 0.1901,
+      "step": 9220
+    },
+    {
+      "epoch": 0.6653198167322053,
+      "grad_norm": 0.14527574181556702,
+      "learning_rate": 0.00017340741809784962,
+      "loss": 0.1269,
+      "step": 9221
+    },
+    {
+      "epoch": 0.6653919694072657,
+      "grad_norm": 0.11533478647470474,
+      "learning_rate": 0.0001734045316784529,
+      "loss": 0.1096,
+      "step": 9222
+    },
+    {
+      "epoch": 0.6654641220823262,
+      "grad_norm": 0.10546639561653137,
+      "learning_rate": 0.00017340164525905615,
+      "loss": 0.1381,
+      "step": 9223
+    },
+    {
+      "epoch": 0.6655362747573866,
+      "grad_norm": 0.16379734873771667,
+      "learning_rate": 0.0001733987588396594,
+      "loss": 0.1197,
+      "step": 9224
+    },
+    {
+      "epoch": 0.6656084274324471,
+      "grad_norm": 0.1349085569381714,
+      "learning_rate": 0.00017339587242026268,
+      "loss": 0.1796,
+      "step": 9225
+    },
+    {
+      "epoch": 0.6656805801075075,
+      "grad_norm": 0.11026155203580856,
+      "learning_rate": 0.00017339298600086594,
+      "loss": 0.1436,
+      "step": 9226
+    },
+    {
+      "epoch": 0.665752732782568,
+      "grad_norm": 0.11905506253242493,
+      "learning_rate": 0.0001733900995814692,
+      "loss": 0.146,
+      "step": 9227
+    },
+    {
+      "epoch": 0.6658248854576283,
+      "grad_norm": 0.11506408452987671,
+      "learning_rate": 0.00017338721316207244,
+      "loss": 0.0963,
+      "step": 9228
+    },
+    {
+      "epoch": 0.6658970381326887,
+      "grad_norm": 0.11228253692388535,
+      "learning_rate": 0.00017338432674267573,
+      "loss": 0.1216,
+      "step": 9229
+    },
+    {
+      "epoch": 0.6659691908077492,
+      "grad_norm": 0.12005262076854706,
+      "learning_rate": 0.000173381440323279,
+      "loss": 0.1218,
+      "step": 9230
+    },
+    {
+      "epoch": 0.6660413434828096,
+      "grad_norm": 0.15088176727294922,
+      "learning_rate": 0.00017337855390388225,
+      "loss": 0.2027,
+      "step": 9231
+    },
+    {
+      "epoch": 0.6661134961578701,
+      "grad_norm": 0.11610926687717438,
+      "learning_rate": 0.00017337566748448552,
+      "loss": 0.1593,
+      "step": 9232
+    },
+    {
+      "epoch": 0.6661856488329305,
+      "grad_norm": 0.12422781437635422,
+      "learning_rate": 0.00017337278106508875,
+      "loss": 0.1488,
+      "step": 9233
+    },
+    {
+      "epoch": 0.6662578015079909,
+      "grad_norm": 0.12029799818992615,
+      "learning_rate": 0.00017336989464569201,
+      "loss": 0.1523,
+      "step": 9234
+    },
+    {
+      "epoch": 0.6663299541830513,
+      "grad_norm": 0.12359961867332458,
+      "learning_rate": 0.00017336700822629528,
+      "loss": 0.1469,
+      "step": 9235
+    },
+    {
+      "epoch": 0.6664021068581117,
+      "grad_norm": 0.10292413085699081,
+      "learning_rate": 0.00017336412180689857,
+      "loss": 0.1257,
+      "step": 9236
+    },
+    {
+      "epoch": 0.6664742595331722,
+      "grad_norm": 0.09860517829656601,
+      "learning_rate": 0.00017336123538750183,
+      "loss": 0.1214,
+      "step": 9237
+    },
+    {
+      "epoch": 0.6665464122082326,
+      "grad_norm": 0.10806264728307724,
+      "learning_rate": 0.00017335834896810507,
+      "loss": 0.1466,
+      "step": 9238
+    },
+    {
+      "epoch": 0.6666185648832931,
+      "grad_norm": 0.1720741242170334,
+      "learning_rate": 0.00017335546254870833,
+      "loss": 0.2037,
+      "step": 9239
+    },
+    {
+      "epoch": 0.6666907175583535,
+      "grad_norm": 0.1357303261756897,
+      "learning_rate": 0.0001733525761293116,
+      "loss": 0.1505,
+      "step": 9240
+    },
+    {
+      "epoch": 0.6667628702334139,
+      "grad_norm": 0.11138720065355301,
+      "learning_rate": 0.00017334968970991486,
+      "loss": 0.117,
+      "step": 9241
+    },
+    {
+      "epoch": 0.6668350229084743,
+      "grad_norm": 0.1529517024755478,
+      "learning_rate": 0.00017334680329051812,
+      "loss": 0.1293,
+      "step": 9242
+    },
+    {
+      "epoch": 0.6669071755835347,
+      "grad_norm": 0.1390070766210556,
+      "learning_rate": 0.00017334391687112138,
+      "loss": 0.1433,
+      "step": 9243
+    },
+    {
+      "epoch": 0.6669793282585952,
+      "grad_norm": 0.1554955691099167,
+      "learning_rate": 0.00017334103045172464,
+      "loss": 0.1329,
+      "step": 9244
+    },
+    {
+      "epoch": 0.6670514809336556,
+      "grad_norm": 0.13039268553256989,
+      "learning_rate": 0.0001733381440323279,
+      "loss": 0.1956,
+      "step": 9245
+    },
+    {
+      "epoch": 0.6671236336087161,
+      "grad_norm": 0.1186772882938385,
+      "learning_rate": 0.00017333525761293117,
+      "loss": 0.1512,
+      "step": 9246
+    },
+    {
+      "epoch": 0.6671957862837765,
+      "grad_norm": 0.12815089523792267,
+      "learning_rate": 0.00017333237119353443,
+      "loss": 0.1297,
+      "step": 9247
+    },
+    {
+      "epoch": 0.6672679389588368,
+      "grad_norm": 0.13035452365875244,
+      "learning_rate": 0.0001733294847741377,
+      "loss": 0.1845,
+      "step": 9248
+    },
+    {
+      "epoch": 0.6673400916338973,
+      "grad_norm": 0.12902598083019257,
+      "learning_rate": 0.00017332659835474093,
+      "loss": 0.125,
+      "step": 9249
+    },
+    {
+      "epoch": 0.6674122443089577,
+      "grad_norm": 0.13073696196079254,
+      "learning_rate": 0.00017332371193534422,
+      "loss": 0.1342,
+      "step": 9250
+    },
+    {
+      "epoch": 0.6674843969840182,
+      "grad_norm": 0.12306298315525055,
+      "learning_rate": 0.00017332082551594748,
+      "loss": 0.1453,
+      "step": 9251
+    },
+    {
+      "epoch": 0.6675565496590786,
+      "grad_norm": 0.1435564160346985,
+      "learning_rate": 0.00017331793909655075,
+      "loss": 0.2049,
+      "step": 9252
+    },
+    {
+      "epoch": 0.6676287023341391,
+      "grad_norm": 0.1250120848417282,
+      "learning_rate": 0.000173315052677154,
+      "loss": 0.1394,
+      "step": 9253
+    },
+    {
+      "epoch": 0.6677008550091995,
+      "grad_norm": 0.1147419810295105,
+      "learning_rate": 0.00017331216625775725,
+      "loss": 0.1613,
+      "step": 9254
+    },
+    {
+      "epoch": 0.6677730076842598,
+      "grad_norm": 0.15764589607715607,
+      "learning_rate": 0.0001733092798383605,
+      "loss": 0.1241,
+      "step": 9255
+    },
+    {
+      "epoch": 0.6678451603593203,
+      "grad_norm": 0.16571545600891113,
+      "learning_rate": 0.00017330639341896377,
+      "loss": 0.1939,
+      "step": 9256
+    },
+    {
+      "epoch": 0.6679173130343807,
+      "grad_norm": 0.1149231493473053,
+      "learning_rate": 0.00017330350699956706,
+      "loss": 0.133,
+      "step": 9257
+    },
+    {
+      "epoch": 0.6679894657094412,
+      "grad_norm": 0.15089252591133118,
+      "learning_rate": 0.00017330062058017033,
+      "loss": 0.1276,
+      "step": 9258
+    },
+    {
+      "epoch": 0.6680616183845016,
+      "grad_norm": 0.11390020698308945,
+      "learning_rate": 0.00017329773416077356,
+      "loss": 0.1727,
+      "step": 9259
+    },
+    {
+      "epoch": 0.6681337710595621,
+      "grad_norm": 0.14585581421852112,
+      "learning_rate": 0.00017329484774137682,
+      "loss": 0.153,
+      "step": 9260
+    },
+    {
+      "epoch": 0.6682059237346225,
+      "grad_norm": 0.10556942224502563,
+      "learning_rate": 0.0001732919613219801,
+      "loss": 0.1383,
+      "step": 9261
+    },
+    {
+      "epoch": 0.6682780764096828,
+      "grad_norm": 0.1368727684020996,
+      "learning_rate": 0.00017328907490258335,
+      "loss": 0.1672,
+      "step": 9262
+    },
+    {
+      "epoch": 0.6683502290847433,
+      "grad_norm": 0.10373331606388092,
+      "learning_rate": 0.0001732861884831866,
+      "loss": 0.1535,
+      "step": 9263
+    },
+    {
+      "epoch": 0.6684223817598037,
+      "grad_norm": 0.1099395751953125,
+      "learning_rate": 0.00017328330206378988,
+      "loss": 0.1044,
+      "step": 9264
+    },
+    {
+      "epoch": 0.6684945344348642,
+      "grad_norm": 0.16567417979240417,
+      "learning_rate": 0.00017328041564439314,
+      "loss": 0.2068,
+      "step": 9265
+    },
+    {
+      "epoch": 0.6685666871099246,
+      "grad_norm": 0.12957066297531128,
+      "learning_rate": 0.0001732775292249964,
+      "loss": 0.1651,
+      "step": 9266
+    },
+    {
+      "epoch": 0.6686388397849851,
+      "grad_norm": 0.1365097612142563,
+      "learning_rate": 0.00017327464280559966,
+      "loss": 0.1454,
+      "step": 9267
+    },
+    {
+      "epoch": 0.6687109924600455,
+      "grad_norm": 0.11914641410112381,
+      "learning_rate": 0.00017327175638620293,
+      "loss": 0.1368,
+      "step": 9268
+    },
+    {
+      "epoch": 0.6687831451351058,
+      "grad_norm": 0.13403435051441193,
+      "learning_rate": 0.0001732688699668062,
+      "loss": 0.1677,
+      "step": 9269
+    },
+    {
+      "epoch": 0.6688552978101663,
+      "grad_norm": 0.12896324694156647,
+      "learning_rate": 0.00017326598354740943,
+      "loss": 0.1832,
+      "step": 9270
+    },
+    {
+      "epoch": 0.6689274504852267,
+      "grad_norm": 0.13894294202327728,
+      "learning_rate": 0.00017326309712801272,
+      "loss": 0.1239,
+      "step": 9271
+    },
+    {
+      "epoch": 0.6689996031602872,
+      "grad_norm": 0.1334586888551712,
+      "learning_rate": 0.00017326021070861598,
+      "loss": 0.1633,
+      "step": 9272
+    },
+    {
+      "epoch": 0.6690717558353476,
+      "grad_norm": 0.11441192030906677,
+      "learning_rate": 0.00017325732428921924,
+      "loss": 0.1351,
+      "step": 9273
+    },
+    {
+      "epoch": 0.6691439085104081,
+      "grad_norm": 0.12247227877378464,
+      "learning_rate": 0.0001732544378698225,
+      "loss": 0.1434,
+      "step": 9274
+    },
+    {
+      "epoch": 0.6692160611854685,
+      "grad_norm": 0.10820969194173813,
+      "learning_rate": 0.00017325155145042574,
+      "loss": 0.131,
+      "step": 9275
+    },
+    {
+      "epoch": 0.6692882138605288,
+      "grad_norm": 0.11150060594081879,
+      "learning_rate": 0.000173248665031029,
+      "loss": 0.1278,
+      "step": 9276
+    },
+    {
+      "epoch": 0.6693603665355893,
+      "grad_norm": 0.11507835239171982,
+      "learning_rate": 0.00017324577861163227,
+      "loss": 0.1424,
+      "step": 9277
+    },
+    {
+      "epoch": 0.6694325192106497,
+      "grad_norm": 0.14963656663894653,
+      "learning_rate": 0.00017324289219223556,
+      "loss": 0.2403,
+      "step": 9278
+    },
+    {
+      "epoch": 0.6695046718857102,
+      "grad_norm": 0.15234751999378204,
+      "learning_rate": 0.00017324000577283882,
+      "loss": 0.1937,
+      "step": 9279
+    },
+    {
+      "epoch": 0.6695768245607706,
+      "grad_norm": 0.12259795516729355,
+      "learning_rate": 0.00017323711935344206,
+      "loss": 0.1788,
+      "step": 9280
+    },
+    {
+      "epoch": 0.669648977235831,
+      "grad_norm": 0.11492430418729782,
+      "learning_rate": 0.00017323423293404532,
+      "loss": 0.103,
+      "step": 9281
+    },
+    {
+      "epoch": 0.6697211299108915,
+      "grad_norm": 0.13131506741046906,
+      "learning_rate": 0.00017323134651464858,
+      "loss": 0.1534,
+      "step": 9282
+    },
+    {
+      "epoch": 0.6697932825859518,
+      "grad_norm": 0.14066502451896667,
+      "learning_rate": 0.00017322846009525184,
+      "loss": 0.1237,
+      "step": 9283
+    },
+    {
+      "epoch": 0.6698654352610123,
+      "grad_norm": 0.12199169397354126,
+      "learning_rate": 0.0001732255736758551,
+      "loss": 0.1902,
+      "step": 9284
+    },
+    {
+      "epoch": 0.6699375879360727,
+      "grad_norm": 0.14018568396568298,
+      "learning_rate": 0.00017322268725645837,
+      "loss": 0.1269,
+      "step": 9285
+    },
+    {
+      "epoch": 0.6700097406111332,
+      "grad_norm": 0.14004381000995636,
+      "learning_rate": 0.00017321980083706163,
+      "loss": 0.1613,
+      "step": 9286
+    },
+    {
+      "epoch": 0.6700818932861936,
+      "grad_norm": 0.11310358345508575,
+      "learning_rate": 0.0001732169144176649,
+      "loss": 0.1388,
+      "step": 9287
+    },
+    {
+      "epoch": 0.670154045961254,
+      "grad_norm": 0.11956499516963959,
+      "learning_rate": 0.00017321402799826816,
+      "loss": 0.1646,
+      "step": 9288
+    },
+    {
+      "epoch": 0.6702261986363145,
+      "grad_norm": 0.13938401639461517,
+      "learning_rate": 0.00017321114157887142,
+      "loss": 0.1247,
+      "step": 9289
+    },
+    {
+      "epoch": 0.6702983513113748,
+      "grad_norm": 0.12736207246780396,
+      "learning_rate": 0.00017320825515947468,
+      "loss": 0.1567,
+      "step": 9290
+    },
+    {
+      "epoch": 0.6703705039864353,
+      "grad_norm": 0.1147407665848732,
+      "learning_rate": 0.00017320536874007792,
+      "loss": 0.1721,
+      "step": 9291
+    },
+    {
+      "epoch": 0.6704426566614957,
+      "grad_norm": 0.12425895780324936,
+      "learning_rate": 0.0001732024823206812,
+      "loss": 0.1532,
+      "step": 9292
+    },
+    {
+      "epoch": 0.6705148093365562,
+      "grad_norm": 0.13098202645778656,
+      "learning_rate": 0.00017319959590128447,
+      "loss": 0.1793,
+      "step": 9293
+    },
+    {
+      "epoch": 0.6705869620116166,
+      "grad_norm": 0.13009867072105408,
+      "learning_rate": 0.00017319670948188774,
+      "loss": 0.1198,
+      "step": 9294
+    },
+    {
+      "epoch": 0.670659114686677,
+      "grad_norm": 0.14394155144691467,
+      "learning_rate": 0.000173193823062491,
+      "loss": 0.1742,
+      "step": 9295
+    },
+    {
+      "epoch": 0.6707312673617374,
+      "grad_norm": 0.14598698914051056,
+      "learning_rate": 0.00017319093664309423,
+      "loss": 0.1242,
+      "step": 9296
+    },
+    {
+      "epoch": 0.6708034200367978,
+      "grad_norm": 0.14718224108219147,
+      "learning_rate": 0.0001731880502236975,
+      "loss": 0.1691,
+      "step": 9297
+    },
+    {
+      "epoch": 0.6708755727118583,
+      "grad_norm": 0.1442996710538864,
+      "learning_rate": 0.00017318516380430076,
+      "loss": 0.167,
+      "step": 9298
+    },
+    {
+      "epoch": 0.6709477253869187,
+      "grad_norm": 0.15796516835689545,
+      "learning_rate": 0.00017318227738490405,
+      "loss": 0.1633,
+      "step": 9299
+    },
+    {
+      "epoch": 0.6710198780619792,
+      "grad_norm": 0.13265594840049744,
+      "learning_rate": 0.0001731793909655073,
+      "loss": 0.1334,
+      "step": 9300
+    },
+    {
+      "epoch": 0.6710920307370396,
+      "grad_norm": 0.10171502083539963,
+      "learning_rate": 0.00017317650454611055,
+      "loss": 0.1917,
+      "step": 9301
+    },
+    {
+      "epoch": 0.6711641834121,
+      "grad_norm": 0.11402595788240433,
+      "learning_rate": 0.0001731736181267138,
+      "loss": 0.1237,
+      "step": 9302
+    },
+    {
+      "epoch": 0.6712363360871604,
+      "grad_norm": 0.11267706751823425,
+      "learning_rate": 0.00017317073170731708,
+      "loss": 0.1131,
+      "step": 9303
+    },
+    {
+      "epoch": 0.6713084887622208,
+      "grad_norm": 0.1277383267879486,
+      "learning_rate": 0.00017316784528792034,
+      "loss": 0.1939,
+      "step": 9304
+    },
+    {
+      "epoch": 0.6713806414372813,
+      "grad_norm": 0.11373007297515869,
+      "learning_rate": 0.0001731649588685236,
+      "loss": 0.1375,
+      "step": 9305
+    },
+    {
+      "epoch": 0.6714527941123417,
+      "grad_norm": 0.12875738739967346,
+      "learning_rate": 0.00017316207244912686,
+      "loss": 0.1629,
+      "step": 9306
+    },
+    {
+      "epoch": 0.6715249467874022,
+      "grad_norm": 0.1210503950715065,
+      "learning_rate": 0.00017315918602973013,
+      "loss": 0.1695,
+      "step": 9307
+    },
+    {
+      "epoch": 0.6715970994624626,
+      "grad_norm": 0.12265054881572723,
+      "learning_rate": 0.0001731562996103334,
+      "loss": 0.1549,
+      "step": 9308
+    },
+    {
+      "epoch": 0.671669252137523,
+      "grad_norm": 0.16889280080795288,
+      "learning_rate": 0.00017315341319093665,
+      "loss": 0.1612,
+      "step": 9309
+    },
+    {
+      "epoch": 0.6717414048125834,
+      "grad_norm": 0.10836353152990341,
+      "learning_rate": 0.00017315052677153992,
+      "loss": 0.0958,
+      "step": 9310
+    },
+    {
+      "epoch": 0.6718135574876438,
+      "grad_norm": 0.13138365745544434,
+      "learning_rate": 0.00017314764035214318,
+      "loss": 0.189,
+      "step": 9311
+    },
+    {
+      "epoch": 0.6718857101627043,
+      "grad_norm": 0.1403338760137558,
+      "learning_rate": 0.00017314475393274644,
+      "loss": 0.1797,
+      "step": 9312
+    },
+    {
+      "epoch": 0.6719578628377647,
+      "grad_norm": 0.11391955614089966,
+      "learning_rate": 0.00017314186751334968,
+      "loss": 0.1524,
+      "step": 9313
+    },
+    {
+      "epoch": 0.6720300155128252,
+      "grad_norm": 0.1363377422094345,
+      "learning_rate": 0.00017313898109395297,
+      "loss": 0.1743,
+      "step": 9314
+    },
+    {
+      "epoch": 0.6721021681878856,
+      "grad_norm": 0.11498959362506866,
+      "learning_rate": 0.00017313609467455623,
+      "loss": 0.1127,
+      "step": 9315
+    },
+    {
+      "epoch": 0.672174320862946,
+      "grad_norm": 0.15003925561904907,
+      "learning_rate": 0.0001731332082551595,
+      "loss": 0.1359,
+      "step": 9316
+    },
+    {
+      "epoch": 0.6722464735380064,
+      "grad_norm": 0.2588501572608948,
+      "learning_rate": 0.00017313032183576276,
+      "loss": 0.1256,
+      "step": 9317
+    },
+    {
+      "epoch": 0.6723186262130668,
+      "grad_norm": 0.11945448815822601,
+      "learning_rate": 0.000173127435416366,
+      "loss": 0.1448,
+      "step": 9318
+    },
+    {
+      "epoch": 0.6723907788881273,
+      "grad_norm": 0.1260116547346115,
+      "learning_rate": 0.00017312454899696925,
+      "loss": 0.1636,
+      "step": 9319
+    },
+    {
+      "epoch": 0.6724629315631877,
+      "grad_norm": 0.15767249464988708,
+      "learning_rate": 0.00017312166257757252,
+      "loss": 0.1541,
+      "step": 9320
+    },
+    {
+      "epoch": 0.6725350842382481,
+      "grad_norm": 0.10866338014602661,
+      "learning_rate": 0.0001731187761581758,
+      "loss": 0.1175,
+      "step": 9321
+    },
+    {
+      "epoch": 0.6726072369133086,
+      "grad_norm": 0.11621398478746414,
+      "learning_rate": 0.00017311588973877907,
+      "loss": 0.1795,
+      "step": 9322
+    },
+    {
+      "epoch": 0.672679389588369,
+      "grad_norm": 0.12204501032829285,
+      "learning_rate": 0.0001731130033193823,
+      "loss": 0.1531,
+      "step": 9323
+    },
+    {
+      "epoch": 0.6727515422634294,
+      "grad_norm": 0.1150672510266304,
+      "learning_rate": 0.00017311011689998557,
+      "loss": 0.1294,
+      "step": 9324
+    },
+    {
+      "epoch": 0.6728236949384898,
+      "grad_norm": 0.1237521544098854,
+      "learning_rate": 0.00017310723048058883,
+      "loss": 0.1397,
+      "step": 9325
+    },
+    {
+      "epoch": 0.6728958476135503,
+      "grad_norm": 0.10224903374910355,
+      "learning_rate": 0.0001731043440611921,
+      "loss": 0.1182,
+      "step": 9326
+    },
+    {
+      "epoch": 0.6729680002886107,
+      "grad_norm": 0.10159409791231155,
+      "learning_rate": 0.00017310145764179536,
+      "loss": 0.1351,
+      "step": 9327
+    },
+    {
+      "epoch": 0.6730401529636711,
+      "grad_norm": 0.10354913026094437,
+      "learning_rate": 0.00017309857122239862,
+      "loss": 0.1667,
+      "step": 9328
+    },
+    {
+      "epoch": 0.6731123056387316,
+      "grad_norm": 0.13224786520004272,
+      "learning_rate": 0.00017309568480300188,
+      "loss": 0.1567,
+      "step": 9329
+    },
+    {
+      "epoch": 0.673184458313792,
+      "grad_norm": 0.1250360608100891,
+      "learning_rate": 0.00017309279838360515,
+      "loss": 0.1538,
+      "step": 9330
+    },
+    {
+      "epoch": 0.6732566109888524,
+      "grad_norm": 0.11552729457616806,
+      "learning_rate": 0.0001730899119642084,
+      "loss": 0.1454,
+      "step": 9331
+    },
+    {
+      "epoch": 0.6733287636639128,
+      "grad_norm": 0.12813526391983032,
+      "learning_rate": 0.00017308702554481167,
+      "loss": 0.1015,
+      "step": 9332
+    },
+    {
+      "epoch": 0.6734009163389733,
+      "grad_norm": 0.1277819573879242,
+      "learning_rate": 0.00017308413912541494,
+      "loss": 0.1689,
+      "step": 9333
+    },
+    {
+      "epoch": 0.6734730690140337,
+      "grad_norm": 0.11043453216552734,
+      "learning_rate": 0.00017308125270601817,
+      "loss": 0.1426,
+      "step": 9334
+    },
+    {
+      "epoch": 0.6735452216890941,
+      "grad_norm": 0.12549757957458496,
+      "learning_rate": 0.00017307836628662146,
+      "loss": 0.1157,
+      "step": 9335
+    },
+    {
+      "epoch": 0.6736173743641546,
+      "grad_norm": 0.12948933243751526,
+      "learning_rate": 0.00017307547986722472,
+      "loss": 0.122,
+      "step": 9336
+    },
+    {
+      "epoch": 0.673689527039215,
+      "grad_norm": 0.12379142642021179,
+      "learning_rate": 0.000173072593447828,
+      "loss": 0.1467,
+      "step": 9337
+    },
+    {
+      "epoch": 0.6737616797142754,
+      "grad_norm": 0.12186525762081146,
+      "learning_rate": 0.00017306970702843125,
+      "loss": 0.0807,
+      "step": 9338
+    },
+    {
+      "epoch": 0.6738338323893358,
+      "grad_norm": 0.12237106263637543,
+      "learning_rate": 0.00017306682060903449,
+      "loss": 0.1665,
+      "step": 9339
+    },
+    {
+      "epoch": 0.6739059850643963,
+      "grad_norm": 0.1131465956568718,
+      "learning_rate": 0.00017306393418963775,
+      "loss": 0.1921,
+      "step": 9340
+    },
+    {
+      "epoch": 0.6739781377394567,
+      "grad_norm": 0.11823917180299759,
+      "learning_rate": 0.000173061047770241,
+      "loss": 0.1777,
+      "step": 9341
+    },
+    {
+      "epoch": 0.6740502904145171,
+      "grad_norm": 0.11592920869588852,
+      "learning_rate": 0.0001730581613508443,
+      "loss": 0.1456,
+      "step": 9342
+    },
+    {
+      "epoch": 0.6741224430895776,
+      "grad_norm": 0.13973605632781982,
+      "learning_rate": 0.00017305527493144756,
+      "loss": 0.1273,
+      "step": 9343
+    },
+    {
+      "epoch": 0.674194595764638,
+      "grad_norm": 0.13213014602661133,
+      "learning_rate": 0.0001730523885120508,
+      "loss": 0.135,
+      "step": 9344
+    },
+    {
+      "epoch": 0.6742667484396984,
+      "grad_norm": 0.1320028156042099,
+      "learning_rate": 0.00017304950209265406,
+      "loss": 0.1549,
+      "step": 9345
+    },
+    {
+      "epoch": 0.6743389011147588,
+      "grad_norm": 0.126231849193573,
+      "learning_rate": 0.00017304661567325733,
+      "loss": 0.1158,
+      "step": 9346
+    },
+    {
+      "epoch": 0.6744110537898192,
+      "grad_norm": 0.12511806190013885,
+      "learning_rate": 0.0001730437292538606,
+      "loss": 0.1436,
+      "step": 9347
+    },
+    {
+      "epoch": 0.6744832064648797,
+      "grad_norm": 0.12003560364246368,
+      "learning_rate": 0.00017304084283446385,
+      "loss": 0.124,
+      "step": 9348
+    },
+    {
+      "epoch": 0.6745553591399401,
+      "grad_norm": 0.14016613364219666,
+      "learning_rate": 0.00017303795641506712,
+      "loss": 0.1371,
+      "step": 9349
+    },
+    {
+      "epoch": 0.6746275118150006,
+      "grad_norm": 0.14404606819152832,
+      "learning_rate": 0.00017303506999567038,
+      "loss": 0.1867,
+      "step": 9350
+    },
+    {
+      "epoch": 0.674699664490061,
+      "grad_norm": 0.1769256889820099,
+      "learning_rate": 0.00017303218357627364,
+      "loss": 0.1762,
+      "step": 9351
+    },
+    {
+      "epoch": 0.6747718171651214,
+      "grad_norm": 0.11872179061174393,
+      "learning_rate": 0.0001730292971568769,
+      "loss": 0.1419,
+      "step": 9352
+    },
+    {
+      "epoch": 0.6748439698401818,
+      "grad_norm": 0.16135835647583008,
+      "learning_rate": 0.00017302641073748017,
+      "loss": 0.1975,
+      "step": 9353
+    },
+    {
+      "epoch": 0.6749161225152422,
+      "grad_norm": 0.1160866841673851,
+      "learning_rate": 0.00017302352431808343,
+      "loss": 0.1376,
+      "step": 9354
+    },
+    {
+      "epoch": 0.6749882751903027,
+      "grad_norm": 0.12021545320749283,
+      "learning_rate": 0.00017302063789868667,
+      "loss": 0.1097,
+      "step": 9355
+    },
+    {
+      "epoch": 0.6750604278653631,
+      "grad_norm": 0.12414583563804626,
+      "learning_rate": 0.00017301775147928996,
+      "loss": 0.1826,
+      "step": 9356
+    },
+    {
+      "epoch": 0.6751325805404236,
+      "grad_norm": 0.10644211620092392,
+      "learning_rate": 0.00017301486505989322,
+      "loss": 0.195,
+      "step": 9357
+    },
+    {
+      "epoch": 0.6752047332154839,
+      "grad_norm": 0.11322013288736343,
+      "learning_rate": 0.00017301197864049648,
+      "loss": 0.1603,
+      "step": 9358
+    },
+    {
+      "epoch": 0.6752768858905444,
+      "grad_norm": 0.11740998178720474,
+      "learning_rate": 0.00017300909222109974,
+      "loss": 0.173,
+      "step": 9359
+    },
+    {
+      "epoch": 0.6753490385656048,
+      "grad_norm": 0.13309647142887115,
+      "learning_rate": 0.00017300620580170298,
+      "loss": 0.0993,
+      "step": 9360
+    },
+    {
+      "epoch": 0.6754211912406652,
+      "grad_norm": 0.12983696162700653,
+      "learning_rate": 0.00017300331938230624,
+      "loss": 0.141,
+      "step": 9361
+    },
+    {
+      "epoch": 0.6754933439157257,
+      "grad_norm": 0.11454816907644272,
+      "learning_rate": 0.0001730004329629095,
+      "loss": 0.1624,
+      "step": 9362
+    },
+    {
+      "epoch": 0.6755654965907861,
+      "grad_norm": 0.1439279317855835,
+      "learning_rate": 0.0001729975465435128,
+      "loss": 0.1423,
+      "step": 9363
+    },
+    {
+      "epoch": 0.6756376492658466,
+      "grad_norm": 0.11750374734401703,
+      "learning_rate": 0.00017299466012411606,
+      "loss": 0.1883,
+      "step": 9364
+    },
+    {
+      "epoch": 0.6757098019409069,
+      "grad_norm": 0.10431762784719467,
+      "learning_rate": 0.0001729917737047193,
+      "loss": 0.1434,
+      "step": 9365
+    },
+    {
+      "epoch": 0.6757819546159674,
+      "grad_norm": 0.13653522729873657,
+      "learning_rate": 0.00017298888728532256,
+      "loss": 0.0969,
+      "step": 9366
+    },
+    {
+      "epoch": 0.6758541072910278,
+      "grad_norm": 0.14985524117946625,
+      "learning_rate": 0.00017298600086592582,
+      "loss": 0.1314,
+      "step": 9367
+    },
+    {
+      "epoch": 0.6759262599660882,
+      "grad_norm": 0.1311166137456894,
+      "learning_rate": 0.00017298311444652908,
+      "loss": 0.1129,
+      "step": 9368
+    },
+    {
+      "epoch": 0.6759984126411487,
+      "grad_norm": 0.1330045461654663,
+      "learning_rate": 0.00017298022802713235,
+      "loss": 0.1555,
+      "step": 9369
+    },
+    {
+      "epoch": 0.6760705653162091,
+      "grad_norm": 0.14036214351654053,
+      "learning_rate": 0.0001729773416077356,
+      "loss": 0.1691,
+      "step": 9370
+    },
+    {
+      "epoch": 0.6761427179912696,
+      "grad_norm": 0.14099359512329102,
+      "learning_rate": 0.00017297445518833887,
+      "loss": 0.2007,
+      "step": 9371
+    },
+    {
+      "epoch": 0.6762148706663299,
+      "grad_norm": 0.12093814462423325,
+      "learning_rate": 0.00017297156876894214,
+      "loss": 0.1089,
+      "step": 9372
+    },
+    {
+      "epoch": 0.6762870233413903,
+      "grad_norm": 0.14383164048194885,
+      "learning_rate": 0.0001729686823495454,
+      "loss": 0.1937,
+      "step": 9373
+    },
+    {
+      "epoch": 0.6763591760164508,
+      "grad_norm": 0.12347964197397232,
+      "learning_rate": 0.00017296579593014866,
+      "loss": 0.1303,
+      "step": 9374
+    },
+    {
+      "epoch": 0.6764313286915112,
+      "grad_norm": 0.10055216401815414,
+      "learning_rate": 0.00017296290951075192,
+      "loss": 0.1261,
+      "step": 9375
+    },
+    {
+      "epoch": 0.6765034813665717,
+      "grad_norm": 0.11191868782043457,
+      "learning_rate": 0.00017296002309135516,
+      "loss": 0.1554,
+      "step": 9376
+    },
+    {
+      "epoch": 0.6765756340416321,
+      "grad_norm": 0.115901418030262,
+      "learning_rate": 0.00017295713667195845,
+      "loss": 0.1105,
+      "step": 9377
+    },
+    {
+      "epoch": 0.6766477867166926,
+      "grad_norm": 0.14895713329315186,
+      "learning_rate": 0.0001729542502525617,
+      "loss": 0.1164,
+      "step": 9378
+    },
+    {
+      "epoch": 0.6767199393917529,
+      "grad_norm": 0.11927609145641327,
+      "learning_rate": 0.00017295136383316498,
+      "loss": 0.1252,
+      "step": 9379
+    },
+    {
+      "epoch": 0.6767920920668133,
+      "grad_norm": 0.14049996435642242,
+      "learning_rate": 0.00017294847741376824,
+      "loss": 0.1873,
+      "step": 9380
+    },
+    {
+      "epoch": 0.6768642447418738,
+      "grad_norm": 0.12153515964746475,
+      "learning_rate": 0.00017294559099437147,
+      "loss": 0.1326,
+      "step": 9381
+    },
+    {
+      "epoch": 0.6769363974169342,
+      "grad_norm": 0.10713379085063934,
+      "learning_rate": 0.00017294270457497474,
+      "loss": 0.1187,
+      "step": 9382
+    },
+    {
+      "epoch": 0.6770085500919947,
+      "grad_norm": 0.1224794015288353,
+      "learning_rate": 0.000172939818155578,
+      "loss": 0.2151,
+      "step": 9383
+    },
+    {
+      "epoch": 0.6770807027670551,
+      "grad_norm": 0.11354351788759232,
+      "learning_rate": 0.0001729369317361813,
+      "loss": 0.1237,
+      "step": 9384
+    },
+    {
+      "epoch": 0.6771528554421156,
+      "grad_norm": 0.13472294807434082,
+      "learning_rate": 0.00017293404531678455,
+      "loss": 0.1396,
+      "step": 9385
+    },
+    {
+      "epoch": 0.6772250081171759,
+      "grad_norm": 0.1110520288348198,
+      "learning_rate": 0.0001729311588973878,
+      "loss": 0.1332,
+      "step": 9386
+    },
+    {
+      "epoch": 0.6772971607922363,
+      "grad_norm": 0.13699764013290405,
+      "learning_rate": 0.00017292827247799105,
+      "loss": 0.1321,
+      "step": 9387
+    },
+    {
+      "epoch": 0.6773693134672968,
+      "grad_norm": 0.11775174736976624,
+      "learning_rate": 0.00017292538605859432,
+      "loss": 0.1417,
+      "step": 9388
+    },
+    {
+      "epoch": 0.6774414661423572,
+      "grad_norm": 0.13674022257328033,
+      "learning_rate": 0.00017292249963919758,
+      "loss": 0.1393,
+      "step": 9389
+    },
+    {
+      "epoch": 0.6775136188174177,
+      "grad_norm": 0.12976695597171783,
+      "learning_rate": 0.00017291961321980084,
+      "loss": 0.1223,
+      "step": 9390
+    },
+    {
+      "epoch": 0.6775857714924781,
+      "grad_norm": 0.11459603160619736,
+      "learning_rate": 0.0001729167268004041,
+      "loss": 0.1396,
+      "step": 9391
+    },
+    {
+      "epoch": 0.6776579241675386,
+      "grad_norm": 0.11260157078504562,
+      "learning_rate": 0.00017291384038100737,
+      "loss": 0.1094,
+      "step": 9392
+    },
+    {
+      "epoch": 0.6777300768425989,
+      "grad_norm": 0.15651853382587433,
+      "learning_rate": 0.00017291095396161063,
+      "loss": 0.1238,
+      "step": 9393
+    },
+    {
+      "epoch": 0.6778022295176593,
+      "grad_norm": 0.11085481941699982,
+      "learning_rate": 0.0001729080675422139,
+      "loss": 0.111,
+      "step": 9394
+    },
+    {
+      "epoch": 0.6778743821927198,
+      "grad_norm": 0.13176412880420685,
+      "learning_rate": 0.00017290518112281716,
+      "loss": 0.125,
+      "step": 9395
+    },
+    {
+      "epoch": 0.6779465348677802,
+      "grad_norm": 0.12989777326583862,
+      "learning_rate": 0.00017290229470342042,
+      "loss": 0.1401,
+      "step": 9396
+    },
+    {
+      "epoch": 0.6780186875428407,
+      "grad_norm": 0.11334443837404251,
+      "learning_rate": 0.00017289940828402365,
+      "loss": 0.1895,
+      "step": 9397
+    },
+    {
+      "epoch": 0.6780908402179011,
+      "grad_norm": 0.12922954559326172,
+      "learning_rate": 0.00017289652186462694,
+      "loss": 0.1404,
+      "step": 9398
+    },
+    {
+      "epoch": 0.6781629928929616,
+      "grad_norm": 0.12999506294727325,
+      "learning_rate": 0.0001728936354452302,
+      "loss": 0.1793,
+      "step": 9399
+    },
+    {
+      "epoch": 0.6782351455680219,
+      "grad_norm": 0.14302140474319458,
+      "learning_rate": 0.00017289074902583347,
+      "loss": 0.1557,
+      "step": 9400
+    },
+    {
+      "epoch": 0.6783072982430823,
+      "grad_norm": 0.13245822489261627,
+      "learning_rate": 0.00017288786260643673,
+      "loss": 0.1292,
+      "step": 9401
+    },
+    {
+      "epoch": 0.6783794509181428,
+      "grad_norm": 0.15688206255435944,
+      "learning_rate": 0.00017288497618703997,
+      "loss": 0.1519,
+      "step": 9402
+    },
+    {
+      "epoch": 0.6784516035932032,
+      "grad_norm": 0.12463241070508957,
+      "learning_rate": 0.00017288208976764323,
+      "loss": 0.1406,
+      "step": 9403
+    },
+    {
+      "epoch": 0.6785237562682637,
+      "grad_norm": 0.12490706145763397,
+      "learning_rate": 0.0001728792033482465,
+      "loss": 0.1746,
+      "step": 9404
+    },
+    {
+      "epoch": 0.6785959089433241,
+      "grad_norm": 0.1181960180401802,
+      "learning_rate": 0.00017287631692884978,
+      "loss": 0.1423,
+      "step": 9405
+    },
+    {
+      "epoch": 0.6786680616183846,
+      "grad_norm": 0.13778024911880493,
+      "learning_rate": 0.00017287343050945305,
+      "loss": 0.1425,
+      "step": 9406
+    },
+    {
+      "epoch": 0.6787402142934449,
+      "grad_norm": 0.12254858762025833,
+      "learning_rate": 0.00017287054409005628,
+      "loss": 0.1267,
+      "step": 9407
+    },
+    {
+      "epoch": 0.6788123669685053,
+      "grad_norm": 0.14109854400157928,
+      "learning_rate": 0.00017286765767065955,
+      "loss": 0.1313,
+      "step": 9408
+    },
+    {
+      "epoch": 0.6788845196435658,
+      "grad_norm": 0.1068730428814888,
+      "learning_rate": 0.0001728647712512628,
+      "loss": 0.1553,
+      "step": 9409
+    },
+    {
+      "epoch": 0.6789566723186262,
+      "grad_norm": 0.137517049908638,
+      "learning_rate": 0.00017286188483186607,
+      "loss": 0.1921,
+      "step": 9410
+    },
+    {
+      "epoch": 0.6790288249936867,
+      "grad_norm": 0.14153388142585754,
+      "learning_rate": 0.00017285899841246934,
+      "loss": 0.1704,
+      "step": 9411
+    },
+    {
+      "epoch": 0.6791009776687471,
+      "grad_norm": 0.1350041925907135,
+      "learning_rate": 0.0001728561119930726,
+      "loss": 0.1646,
+      "step": 9412
+    },
+    {
+      "epoch": 0.6791731303438076,
+      "grad_norm": 0.12162260711193085,
+      "learning_rate": 0.00017285322557367586,
+      "loss": 0.1823,
+      "step": 9413
+    },
+    {
+      "epoch": 0.6792452830188679,
+      "grad_norm": 0.1428827941417694,
+      "learning_rate": 0.00017285033915427912,
+      "loss": 0.1548,
+      "step": 9414
+    },
+    {
+      "epoch": 0.6793174356939283,
+      "grad_norm": 0.11298346519470215,
+      "learning_rate": 0.0001728474527348824,
+      "loss": 0.1114,
+      "step": 9415
+    },
+    {
+      "epoch": 0.6793895883689888,
+      "grad_norm": 0.14177198708057404,
+      "learning_rate": 0.00017284456631548565,
+      "loss": 0.1665,
+      "step": 9416
+    },
+    {
+      "epoch": 0.6794617410440492,
+      "grad_norm": 0.14046108722686768,
+      "learning_rate": 0.0001728416798960889,
+      "loss": 0.1998,
+      "step": 9417
+    },
+    {
+      "epoch": 0.6795338937191097,
+      "grad_norm": 0.13591402769088745,
+      "learning_rate": 0.00017283879347669218,
+      "loss": 0.1313,
+      "step": 9418
+    },
+    {
+      "epoch": 0.6796060463941701,
+      "grad_norm": 0.1478928178548813,
+      "learning_rate": 0.00017283590705729544,
+      "loss": 0.1595,
+      "step": 9419
+    },
+    {
+      "epoch": 0.6796781990692304,
+      "grad_norm": 0.10648253560066223,
+      "learning_rate": 0.0001728330206378987,
+      "loss": 0.1338,
+      "step": 9420
+    },
+    {
+      "epoch": 0.6797503517442909,
+      "grad_norm": 0.1144508346915245,
+      "learning_rate": 0.00017283013421850196,
+      "loss": 0.1517,
+      "step": 9421
+    },
+    {
+      "epoch": 0.6798225044193513,
+      "grad_norm": 0.12182658165693283,
+      "learning_rate": 0.00017282724779910523,
+      "loss": 0.1726,
+      "step": 9422
+    },
+    {
+      "epoch": 0.6798946570944118,
+      "grad_norm": 0.09788574278354645,
+      "learning_rate": 0.0001728243613797085,
+      "loss": 0.1191,
+      "step": 9423
+    },
+    {
+      "epoch": 0.6799668097694722,
+      "grad_norm": 0.12478946894407272,
+      "learning_rate": 0.00017282147496031173,
+      "loss": 0.1847,
+      "step": 9424
+    },
+    {
+      "epoch": 0.6800389624445327,
+      "grad_norm": 0.13396452367305756,
+      "learning_rate": 0.000172818588540915,
+      "loss": 0.1459,
+      "step": 9425
+    },
+    {
+      "epoch": 0.6801111151195931,
+      "grad_norm": 0.14490275084972382,
+      "learning_rate": 0.00017281570212151828,
+      "loss": 0.1488,
+      "step": 9426
+    },
+    {
+      "epoch": 0.6801832677946534,
+      "grad_norm": 0.11920656263828278,
+      "learning_rate": 0.00017281281570212154,
+      "loss": 0.1479,
+      "step": 9427
+    },
+    {
+      "epoch": 0.6802554204697139,
+      "grad_norm": 0.13172681629657745,
+      "learning_rate": 0.0001728099292827248,
+      "loss": 0.1396,
+      "step": 9428
+    },
+    {
+      "epoch": 0.6803275731447743,
+      "grad_norm": 0.11047541350126266,
+      "learning_rate": 0.00017280704286332804,
+      "loss": 0.1122,
+      "step": 9429
+    },
+    {
+      "epoch": 0.6803997258198348,
+      "grad_norm": 0.10893700271844864,
+      "learning_rate": 0.0001728041564439313,
+      "loss": 0.1294,
+      "step": 9430
+    },
+    {
+      "epoch": 0.6804718784948952,
+      "grad_norm": 0.12578247487545013,
+      "learning_rate": 0.00017280127002453457,
+      "loss": 0.1333,
+      "step": 9431
+    },
+    {
+      "epoch": 0.6805440311699557,
+      "grad_norm": 0.13303852081298828,
+      "learning_rate": 0.00017279838360513783,
+      "loss": 0.1471,
+      "step": 9432
+    },
+    {
+      "epoch": 0.6806161838450161,
+      "grad_norm": 0.13911806046962738,
+      "learning_rate": 0.00017279549718574112,
+      "loss": 0.1831,
+      "step": 9433
+    },
+    {
+      "epoch": 0.6806883365200764,
+      "grad_norm": 0.10774588584899902,
+      "learning_rate": 0.00017279261076634436,
+      "loss": 0.1762,
+      "step": 9434
+    },
+    {
+      "epoch": 0.6807604891951369,
+      "grad_norm": 0.12354011088609695,
+      "learning_rate": 0.00017278972434694762,
+      "loss": 0.2078,
+      "step": 9435
+    },
+    {
+      "epoch": 0.6808326418701973,
+      "grad_norm": 0.1513359546661377,
+      "learning_rate": 0.00017278683792755088,
+      "loss": 0.1283,
+      "step": 9436
+    },
+    {
+      "epoch": 0.6809047945452578,
+      "grad_norm": 0.13830235600471497,
+      "learning_rate": 0.00017278395150815414,
+      "loss": 0.1261,
+      "step": 9437
+    },
+    {
+      "epoch": 0.6809769472203182,
+      "grad_norm": 0.13394580781459808,
+      "learning_rate": 0.0001727810650887574,
+      "loss": 0.179,
+      "step": 9438
+    },
+    {
+      "epoch": 0.6810490998953787,
+      "grad_norm": 0.11969741433858871,
+      "learning_rate": 0.00017277817866936067,
+      "loss": 0.1396,
+      "step": 9439
+    },
+    {
+      "epoch": 0.6811212525704391,
+      "grad_norm": 0.13217154145240784,
+      "learning_rate": 0.00017277529224996393,
+      "loss": 0.1401,
+      "step": 9440
+    },
+    {
+      "epoch": 0.6811934052454994,
+      "grad_norm": 0.11761936545372009,
+      "learning_rate": 0.0001727724058305672,
+      "loss": 0.1316,
+      "step": 9441
+    },
+    {
+      "epoch": 0.6812655579205599,
+      "grad_norm": 0.1200450211763382,
+      "learning_rate": 0.00017276951941117046,
+      "loss": 0.1436,
+      "step": 9442
+    },
+    {
+      "epoch": 0.6813377105956203,
+      "grad_norm": 0.1316228061914444,
+      "learning_rate": 0.00017276663299177372,
+      "loss": 0.1564,
+      "step": 9443
+    },
+    {
+      "epoch": 0.6814098632706808,
+      "grad_norm": 0.12965944409370422,
+      "learning_rate": 0.00017276374657237698,
+      "loss": 0.1458,
+      "step": 9444
+    },
+    {
+      "epoch": 0.6814820159457412,
+      "grad_norm": 0.13953819870948792,
+      "learning_rate": 0.00017276086015298022,
+      "loss": 0.1425,
+      "step": 9445
+    },
+    {
+      "epoch": 0.6815541686208016,
+      "grad_norm": 0.1284487396478653,
+      "learning_rate": 0.00017275797373358348,
+      "loss": 0.1378,
+      "step": 9446
+    },
+    {
+      "epoch": 0.6816263212958621,
+      "grad_norm": 0.09896153956651688,
+      "learning_rate": 0.00017275508731418677,
+      "loss": 0.1547,
+      "step": 9447
+    },
+    {
+      "epoch": 0.6816984739709224,
+      "grad_norm": 0.11616410315036774,
+      "learning_rate": 0.00017275220089479004,
+      "loss": 0.116,
+      "step": 9448
+    },
+    {
+      "epoch": 0.6817706266459829,
+      "grad_norm": 0.150199756026268,
+      "learning_rate": 0.0001727493144753933,
+      "loss": 0.1362,
+      "step": 9449
+    },
+    {
+      "epoch": 0.6818427793210433,
+      "grad_norm": 0.15000593662261963,
+      "learning_rate": 0.00017274642805599653,
+      "loss": 0.1047,
+      "step": 9450
+    },
+    {
+      "epoch": 0.6819149319961038,
+      "grad_norm": 0.1399821788072586,
+      "learning_rate": 0.0001727435416365998,
+      "loss": 0.1229,
+      "step": 9451
+    },
+    {
+      "epoch": 0.6819870846711642,
+      "grad_norm": 0.12325285375118256,
+      "learning_rate": 0.00017274065521720306,
+      "loss": 0.1374,
+      "step": 9452
+    },
+    {
+      "epoch": 0.6820592373462246,
+      "grad_norm": 0.10625096410512924,
+      "learning_rate": 0.00017273776879780632,
+      "loss": 0.0805,
+      "step": 9453
+    },
+    {
+      "epoch": 0.6821313900212851,
+      "grad_norm": 0.1238563284277916,
+      "learning_rate": 0.00017273488237840961,
+      "loss": 0.1878,
+      "step": 9454
+    },
+    {
+      "epoch": 0.6822035426963454,
+      "grad_norm": 0.14863990247249603,
+      "learning_rate": 0.00017273199595901285,
+      "loss": 0.1547,
+      "step": 9455
+    },
+    {
+      "epoch": 0.6822756953714059,
+      "grad_norm": 0.14600786566734314,
+      "learning_rate": 0.0001727291095396161,
+      "loss": 0.187,
+      "step": 9456
+    },
+    {
+      "epoch": 0.6823478480464663,
+      "grad_norm": 0.13600049912929535,
+      "learning_rate": 0.00017272622312021938,
+      "loss": 0.1425,
+      "step": 9457
+    },
+    {
+      "epoch": 0.6824200007215268,
+      "grad_norm": 0.14593887329101562,
+      "learning_rate": 0.00017272333670082264,
+      "loss": 0.1396,
+      "step": 9458
+    },
+    {
+      "epoch": 0.6824921533965872,
+      "grad_norm": 0.10805844515562057,
+      "learning_rate": 0.0001727204502814259,
+      "loss": 0.1403,
+      "step": 9459
+    },
+    {
+      "epoch": 0.6825643060716476,
+      "grad_norm": 0.13881978392601013,
+      "learning_rate": 0.00017271756386202916,
+      "loss": 0.1365,
+      "step": 9460
+    },
+    {
+      "epoch": 0.6826364587467081,
+      "grad_norm": 0.11945115774869919,
+      "learning_rate": 0.00017271467744263243,
+      "loss": 0.1848,
+      "step": 9461
+    },
+    {
+      "epoch": 0.6827086114217684,
+      "grad_norm": 0.10048525780439377,
+      "learning_rate": 0.0001727117910232357,
+      "loss": 0.1184,
+      "step": 9462
+    },
+    {
+      "epoch": 0.6827807640968289,
+      "grad_norm": 0.1330692172050476,
+      "learning_rate": 0.00017270890460383895,
+      "loss": 0.124,
+      "step": 9463
+    },
+    {
+      "epoch": 0.6828529167718893,
+      "grad_norm": 0.1183803454041481,
+      "learning_rate": 0.00017270601818444222,
+      "loss": 0.1459,
+      "step": 9464
+    },
+    {
+      "epoch": 0.6829250694469498,
+      "grad_norm": 0.1102006658911705,
+      "learning_rate": 0.00017270313176504548,
+      "loss": 0.1913,
+      "step": 9465
+    },
+    {
+      "epoch": 0.6829972221220102,
+      "grad_norm": 0.1035265251994133,
+      "learning_rate": 0.00017270024534564871,
+      "loss": 0.16,
+      "step": 9466
+    },
+    {
+      "epoch": 0.6830693747970706,
+      "grad_norm": 0.13262054324150085,
+      "learning_rate": 0.00017269735892625198,
+      "loss": 0.1612,
+      "step": 9467
+    },
+    {
+      "epoch": 0.6831415274721311,
+      "grad_norm": 0.12814365327358246,
+      "learning_rate": 0.00017269447250685527,
+      "loss": 0.1059,
+      "step": 9468
+    },
+    {
+      "epoch": 0.6832136801471914,
+      "grad_norm": 0.10215715318918228,
+      "learning_rate": 0.00017269158608745853,
+      "loss": 0.1637,
+      "step": 9469
+    },
+    {
+      "epoch": 0.6832858328222519,
+      "grad_norm": 0.13353413343429565,
+      "learning_rate": 0.0001726886996680618,
+      "loss": 0.1213,
+      "step": 9470
+    },
+    {
+      "epoch": 0.6833579854973123,
+      "grad_norm": 0.1456976979970932,
+      "learning_rate": 0.00017268581324866503,
+      "loss": 0.1989,
+      "step": 9471
+    },
+    {
+      "epoch": 0.6834301381723727,
+      "grad_norm": 0.11505065858364105,
+      "learning_rate": 0.0001726829268292683,
+      "loss": 0.1887,
+      "step": 9472
+    },
+    {
+      "epoch": 0.6835022908474332,
+      "grad_norm": 0.11727729439735413,
+      "learning_rate": 0.00017268004040987156,
+      "loss": 0.1246,
+      "step": 9473
+    },
+    {
+      "epoch": 0.6835744435224936,
+      "grad_norm": 0.1266585737466812,
+      "learning_rate": 0.00017267715399047482,
+      "loss": 0.1695,
+      "step": 9474
+    },
+    {
+      "epoch": 0.6836465961975541,
+      "grad_norm": 0.13724775612354279,
+      "learning_rate": 0.0001726742675710781,
+      "loss": 0.1464,
+      "step": 9475
+    },
+    {
+      "epoch": 0.6837187488726144,
+      "grad_norm": 0.11454111337661743,
+      "learning_rate": 0.00017267138115168134,
+      "loss": 0.1337,
+      "step": 9476
+    },
+    {
+      "epoch": 0.6837909015476749,
+      "grad_norm": 0.13361407816410065,
+      "learning_rate": 0.0001726684947322846,
+      "loss": 0.1239,
+      "step": 9477
+    },
+    {
+      "epoch": 0.6838630542227353,
+      "grad_norm": 0.1123400554060936,
+      "learning_rate": 0.00017266560831288787,
+      "loss": 0.1107,
+      "step": 9478
+    },
+    {
+      "epoch": 0.6839352068977957,
+      "grad_norm": 0.12301890552043915,
+      "learning_rate": 0.00017266272189349113,
+      "loss": 0.1448,
+      "step": 9479
+    },
+    {
+      "epoch": 0.6840073595728562,
+      "grad_norm": 0.13098479807376862,
+      "learning_rate": 0.0001726598354740944,
+      "loss": 0.1417,
+      "step": 9480
+    },
+    {
+      "epoch": 0.6840795122479166,
+      "grad_norm": 0.12436211854219437,
+      "learning_rate": 0.00017265694905469766,
+      "loss": 0.1333,
+      "step": 9481
+    },
+    {
+      "epoch": 0.684151664922977,
+      "grad_norm": 0.11635185778141022,
+      "learning_rate": 0.00017265406263530092,
+      "loss": 0.2084,
+      "step": 9482
+    },
+    {
+      "epoch": 0.6842238175980374,
+      "grad_norm": 0.14020775258541107,
+      "learning_rate": 0.00017265117621590418,
+      "loss": 0.15,
+      "step": 9483
+    },
+    {
+      "epoch": 0.6842959702730979,
+      "grad_norm": 0.14138399064540863,
+      "learning_rate": 0.00017264828979650745,
+      "loss": 0.2069,
+      "step": 9484
+    },
+    {
+      "epoch": 0.6843681229481583,
+      "grad_norm": 0.13020771741867065,
+      "learning_rate": 0.0001726454033771107,
+      "loss": 0.1489,
+      "step": 9485
+    },
+    {
+      "epoch": 0.6844402756232187,
+      "grad_norm": 0.11580926924943924,
+      "learning_rate": 0.00017264251695771397,
+      "loss": 0.1375,
+      "step": 9486
+    },
+    {
+      "epoch": 0.6845124282982792,
+      "grad_norm": 0.12390075623989105,
+      "learning_rate": 0.0001726396305383172,
+      "loss": 0.1811,
+      "step": 9487
+    },
+    {
+      "epoch": 0.6845845809733396,
+      "grad_norm": 0.11783546954393387,
+      "learning_rate": 0.00017263674411892047,
+      "loss": 0.13,
+      "step": 9488
+    },
+    {
+      "epoch": 0.6846567336484,
+      "grad_norm": 0.13768181204795837,
+      "learning_rate": 0.00017263385769952376,
+      "loss": 0.1394,
+      "step": 9489
+    },
+    {
+      "epoch": 0.6847288863234604,
+      "grad_norm": 0.13023586571216583,
+      "learning_rate": 0.00017263097128012702,
+      "loss": 0.1207,
+      "step": 9490
+    },
+    {
+      "epoch": 0.6848010389985209,
+      "grad_norm": 0.12852199375629425,
+      "learning_rate": 0.0001726280848607303,
+      "loss": 0.1538,
+      "step": 9491
+    },
+    {
+      "epoch": 0.6848731916735813,
+      "grad_norm": 0.15153183043003082,
+      "learning_rate": 0.00017262519844133352,
+      "loss": 0.1209,
+      "step": 9492
+    },
+    {
+      "epoch": 0.6849453443486417,
+      "grad_norm": 0.12875881791114807,
+      "learning_rate": 0.00017262231202193679,
+      "loss": 0.1907,
+      "step": 9493
+    },
+    {
+      "epoch": 0.6850174970237022,
+      "grad_norm": 0.11426623910665512,
+      "learning_rate": 0.00017261942560254005,
+      "loss": 0.1444,
+      "step": 9494
+    },
+    {
+      "epoch": 0.6850896496987626,
+      "grad_norm": 0.15419016778469086,
+      "learning_rate": 0.0001726165391831433,
+      "loss": 0.1739,
+      "step": 9495
+    },
+    {
+      "epoch": 0.685161802373823,
+      "grad_norm": 0.09922267496585846,
+      "learning_rate": 0.0001726136527637466,
+      "loss": 0.0887,
+      "step": 9496
+    },
+    {
+      "epoch": 0.6852339550488834,
+      "grad_norm": 0.12559925019741058,
+      "learning_rate": 0.00017261076634434984,
+      "loss": 0.1384,
+      "step": 9497
+    },
+    {
+      "epoch": 0.6853061077239438,
+      "grad_norm": 0.16568002104759216,
+      "learning_rate": 0.0001726078799249531,
+      "loss": 0.1957,
+      "step": 9498
+    },
+    {
+      "epoch": 0.6853782603990043,
+      "grad_norm": 0.13108040392398834,
+      "learning_rate": 0.00017260499350555636,
+      "loss": 0.1209,
+      "step": 9499
+    },
+    {
+      "epoch": 0.6854504130740647,
+      "grad_norm": 0.12693406641483307,
+      "learning_rate": 0.00017260210708615963,
+      "loss": 0.1452,
+      "step": 9500
+    },
+    {
+      "epoch": 0.6855225657491252,
+      "grad_norm": 0.15336719155311584,
+      "learning_rate": 0.0001725992206667629,
+      "loss": 0.1389,
+      "step": 9501
+    },
+    {
+      "epoch": 0.6855947184241856,
+      "grad_norm": 0.11107275635004044,
+      "learning_rate": 0.00017259633424736615,
+      "loss": 0.0965,
+      "step": 9502
+    },
+    {
+      "epoch": 0.685666871099246,
+      "grad_norm": 0.13186608254909515,
+      "learning_rate": 0.0001725934478279694,
+      "loss": 0.1351,
+      "step": 9503
+    },
+    {
+      "epoch": 0.6857390237743064,
+      "grad_norm": 0.11895376443862915,
+      "learning_rate": 0.00017259056140857268,
+      "loss": 0.0987,
+      "step": 9504
+    },
+    {
+      "epoch": 0.6858111764493668,
+      "grad_norm": 0.11779962480068207,
+      "learning_rate": 0.00017258767498917594,
+      "loss": 0.1417,
+      "step": 9505
+    },
+    {
+      "epoch": 0.6858833291244273,
+      "grad_norm": 0.12343606352806091,
+      "learning_rate": 0.0001725847885697792,
+      "loss": 0.1554,
+      "step": 9506
+    },
+    {
+      "epoch": 0.6859554817994877,
+      "grad_norm": 0.13738082349300385,
+      "learning_rate": 0.00017258190215038247,
+      "loss": 0.198,
+      "step": 9507
+    },
+    {
+      "epoch": 0.6860276344745482,
+      "grad_norm": 0.13566620647907257,
+      "learning_rate": 0.0001725790157309857,
+      "loss": 0.1304,
+      "step": 9508
+    },
+    {
+      "epoch": 0.6860997871496086,
+      "grad_norm": 0.12368319928646088,
+      "learning_rate": 0.00017257612931158897,
+      "loss": 0.1367,
+      "step": 9509
+    },
+    {
+      "epoch": 0.686171939824669,
+      "grad_norm": 0.16261732578277588,
+      "learning_rate": 0.00017257324289219223,
+      "loss": 0.18,
+      "step": 9510
+    },
+    {
+      "epoch": 0.6862440924997294,
+      "grad_norm": 0.11897819489240646,
+      "learning_rate": 0.00017257035647279552,
+      "loss": 0.1463,
+      "step": 9511
+    },
+    {
+      "epoch": 0.6863162451747898,
+      "grad_norm": 0.10302698612213135,
+      "learning_rate": 0.00017256747005339878,
+      "loss": 0.1263,
+      "step": 9512
+    },
+    {
+      "epoch": 0.6863883978498503,
+      "grad_norm": 0.10292990505695343,
+      "learning_rate": 0.00017256458363400202,
+      "loss": 0.1433,
+      "step": 9513
+    },
+    {
+      "epoch": 0.6864605505249107,
+      "grad_norm": 0.13708685338497162,
+      "learning_rate": 0.00017256169721460528,
+      "loss": 0.1819,
+      "step": 9514
+    },
+    {
+      "epoch": 0.6865327031999712,
+      "grad_norm": 0.1115337535738945,
+      "learning_rate": 0.00017255881079520854,
+      "loss": 0.1542,
+      "step": 9515
+    },
+    {
+      "epoch": 0.6866048558750316,
+      "grad_norm": 0.15068112313747406,
+      "learning_rate": 0.0001725559243758118,
+      "loss": 0.1547,
+      "step": 9516
+    },
+    {
+      "epoch": 0.686677008550092,
+      "grad_norm": 0.13998079299926758,
+      "learning_rate": 0.00017255303795641507,
+      "loss": 0.1274,
+      "step": 9517
+    },
+    {
+      "epoch": 0.6867491612251524,
+      "grad_norm": 0.1232718825340271,
+      "learning_rate": 0.00017255015153701833,
+      "loss": 0.1221,
+      "step": 9518
+    },
+    {
+      "epoch": 0.6868213139002128,
+      "grad_norm": 0.11890007555484772,
+      "learning_rate": 0.0001725472651176216,
+      "loss": 0.1534,
+      "step": 9519
+    },
+    {
+      "epoch": 0.6868934665752733,
+      "grad_norm": 0.12487497925758362,
+      "learning_rate": 0.00017254437869822486,
+      "loss": 0.1456,
+      "step": 9520
+    },
+    {
+      "epoch": 0.6869656192503337,
+      "grad_norm": 0.13111813366413116,
+      "learning_rate": 0.00017254149227882812,
+      "loss": 0.124,
+      "step": 9521
+    },
+    {
+      "epoch": 0.6870377719253942,
+      "grad_norm": 0.1577240377664566,
+      "learning_rate": 0.00017253860585943138,
+      "loss": 0.1544,
+      "step": 9522
+    },
+    {
+      "epoch": 0.6871099246004546,
+      "grad_norm": 0.14386539161205292,
+      "learning_rate": 0.00017253571944003465,
+      "loss": 0.1515,
+      "step": 9523
+    },
+    {
+      "epoch": 0.687182077275515,
+      "grad_norm": 0.12887921929359436,
+      "learning_rate": 0.00017253283302063788,
+      "loss": 0.1457,
+      "step": 9524
+    },
+    {
+      "epoch": 0.6872542299505754,
+      "grad_norm": 0.1316738724708557,
+      "learning_rate": 0.00017252994660124117,
+      "loss": 0.1257,
+      "step": 9525
+    },
+    {
+      "epoch": 0.6873263826256358,
+      "grad_norm": 0.12598678469657898,
+      "learning_rate": 0.00017252706018184444,
+      "loss": 0.1165,
+      "step": 9526
+    },
+    {
+      "epoch": 0.6873985353006963,
+      "grad_norm": 0.11530796438455582,
+      "learning_rate": 0.0001725241737624477,
+      "loss": 0.1135,
+      "step": 9527
+    },
+    {
+      "epoch": 0.6874706879757567,
+      "grad_norm": 0.1386406421661377,
+      "learning_rate": 0.00017252128734305096,
+      "loss": 0.1847,
+      "step": 9528
+    },
+    {
+      "epoch": 0.6875428406508172,
+      "grad_norm": 0.12268256396055222,
+      "learning_rate": 0.00017251840092365422,
+      "loss": 0.2082,
+      "step": 9529
+    },
+    {
+      "epoch": 0.6876149933258776,
+      "grad_norm": 0.15390999615192413,
+      "learning_rate": 0.00017251551450425746,
+      "loss": 0.128,
+      "step": 9530
+    },
+    {
+      "epoch": 0.6876871460009379,
+      "grad_norm": 0.10604900121688843,
+      "learning_rate": 0.00017251262808486072,
+      "loss": 0.106,
+      "step": 9531
+    },
+    {
+      "epoch": 0.6877592986759984,
+      "grad_norm": 0.1284235268831253,
+      "learning_rate": 0.000172509741665464,
+      "loss": 0.1408,
+      "step": 9532
+    },
+    {
+      "epoch": 0.6878314513510588,
+      "grad_norm": 0.1350274533033371,
+      "learning_rate": 0.00017250685524606728,
+      "loss": 0.1856,
+      "step": 9533
+    },
+    {
+      "epoch": 0.6879036040261193,
+      "grad_norm": 0.11651486158370972,
+      "learning_rate": 0.00017250396882667054,
+      "loss": 0.1913,
+      "step": 9534
+    },
+    {
+      "epoch": 0.6879757567011797,
+      "grad_norm": 0.15933158993721008,
+      "learning_rate": 0.00017250108240727377,
+      "loss": 0.1646,
+      "step": 9535
+    },
+    {
+      "epoch": 0.6880479093762402,
+      "grad_norm": 0.10325729101896286,
+      "learning_rate": 0.00017249819598787704,
+      "loss": 0.1412,
+      "step": 9536
+    },
+    {
+      "epoch": 0.6881200620513006,
+      "grad_norm": 0.12476102262735367,
+      "learning_rate": 0.0001724953095684803,
+      "loss": 0.14,
+      "step": 9537
+    },
+    {
+      "epoch": 0.6881922147263609,
+      "grad_norm": 0.10627015680074692,
+      "learning_rate": 0.00017249242314908356,
+      "loss": 0.1665,
+      "step": 9538
+    },
+    {
+      "epoch": 0.6882643674014214,
+      "grad_norm": 0.10776442289352417,
+      "learning_rate": 0.00017248953672968685,
+      "loss": 0.1371,
+      "step": 9539
+    },
+    {
+      "epoch": 0.6883365200764818,
+      "grad_norm": 0.13613662123680115,
+      "learning_rate": 0.0001724866503102901,
+      "loss": 0.1821,
+      "step": 9540
+    },
+    {
+      "epoch": 0.6884086727515423,
+      "grad_norm": 0.11171852797269821,
+      "learning_rate": 0.00017248376389089335,
+      "loss": 0.1647,
+      "step": 9541
+    },
+    {
+      "epoch": 0.6884808254266027,
+      "grad_norm": 0.11853030323982239,
+      "learning_rate": 0.00017248087747149662,
+      "loss": 0.1447,
+      "step": 9542
+    },
+    {
+      "epoch": 0.6885529781016632,
+      "grad_norm": 0.11910616606473923,
+      "learning_rate": 0.00017247799105209988,
+      "loss": 0.1787,
+      "step": 9543
+    },
+    {
+      "epoch": 0.6886251307767235,
+      "grad_norm": 0.11468103528022766,
+      "learning_rate": 0.00017247510463270314,
+      "loss": 0.1179,
+      "step": 9544
+    },
+    {
+      "epoch": 0.6886972834517839,
+      "grad_norm": 0.1218499168753624,
+      "learning_rate": 0.0001724722182133064,
+      "loss": 0.1822,
+      "step": 9545
+    },
+    {
+      "epoch": 0.6887694361268444,
+      "grad_norm": 0.1380220502614975,
+      "learning_rate": 0.00017246933179390967,
+      "loss": 0.142,
+      "step": 9546
+    },
+    {
+      "epoch": 0.6888415888019048,
+      "grad_norm": 0.1014731153845787,
+      "learning_rate": 0.00017246644537451293,
+      "loss": 0.158,
+      "step": 9547
+    },
+    {
+      "epoch": 0.6889137414769653,
+      "grad_norm": 0.12388917803764343,
+      "learning_rate": 0.0001724635589551162,
+      "loss": 0.1516,
+      "step": 9548
+    },
+    {
+      "epoch": 0.6889858941520257,
+      "grad_norm": 0.13061605393886566,
+      "learning_rate": 0.00017246067253571946,
+      "loss": 0.1361,
+      "step": 9549
+    },
+    {
+      "epoch": 0.6890580468270862,
+      "grad_norm": 0.11290262639522552,
+      "learning_rate": 0.00017245778611632272,
+      "loss": 0.1292,
+      "step": 9550
+    },
+    {
+      "epoch": 0.6891301995021465,
+      "grad_norm": 0.10076184570789337,
+      "learning_rate": 0.00017245489969692595,
+      "loss": 0.1734,
+      "step": 9551
+    },
+    {
+      "epoch": 0.6892023521772069,
+      "grad_norm": 0.1263117641210556,
+      "learning_rate": 0.00017245201327752922,
+      "loss": 0.1593,
+      "step": 9552
+    },
+    {
+      "epoch": 0.6892745048522674,
+      "grad_norm": 0.13651815056800842,
+      "learning_rate": 0.0001724491268581325,
+      "loss": 0.1683,
+      "step": 9553
+    },
+    {
+      "epoch": 0.6893466575273278,
+      "grad_norm": 0.13438211381435394,
+      "learning_rate": 0.00017244624043873577,
+      "loss": 0.1387,
+      "step": 9554
+    },
+    {
+      "epoch": 0.6894188102023883,
+      "grad_norm": 0.11674962937831879,
+      "learning_rate": 0.00017244335401933903,
+      "loss": 0.1842,
+      "step": 9555
+    },
+    {
+      "epoch": 0.6894909628774487,
+      "grad_norm": 0.14424452185630798,
+      "learning_rate": 0.00017244046759994227,
+      "loss": 0.1309,
+      "step": 9556
+    },
+    {
+      "epoch": 0.6895631155525092,
+      "grad_norm": 0.1262129247188568,
+      "learning_rate": 0.00017243758118054553,
+      "loss": 0.1803,
+      "step": 9557
+    },
+    {
+      "epoch": 0.6896352682275695,
+      "grad_norm": 0.13307301700115204,
+      "learning_rate": 0.0001724346947611488,
+      "loss": 0.1321,
+      "step": 9558
+    },
+    {
+      "epoch": 0.6897074209026299,
+      "grad_norm": 0.13330549001693726,
+      "learning_rate": 0.00017243180834175206,
+      "loss": 0.1213,
+      "step": 9559
+    },
+    {
+      "epoch": 0.6897795735776904,
+      "grad_norm": 0.11524797230958939,
+      "learning_rate": 0.00017242892192235535,
+      "loss": 0.1487,
+      "step": 9560
+    },
+    {
+      "epoch": 0.6898517262527508,
+      "grad_norm": 0.12419763952493668,
+      "learning_rate": 0.00017242603550295858,
+      "loss": 0.1586,
+      "step": 9561
+    },
+    {
+      "epoch": 0.6899238789278113,
+      "grad_norm": 0.1287110149860382,
+      "learning_rate": 0.00017242314908356185,
+      "loss": 0.1274,
+      "step": 9562
+    },
+    {
+      "epoch": 0.6899960316028717,
+      "grad_norm": 0.15064340829849243,
+      "learning_rate": 0.0001724202626641651,
+      "loss": 0.1192,
+      "step": 9563
+    },
+    {
+      "epoch": 0.6900681842779322,
+      "grad_norm": 0.11350834369659424,
+      "learning_rate": 0.00017241737624476837,
+      "loss": 0.2087,
+      "step": 9564
+    },
+    {
+      "epoch": 0.6901403369529925,
+      "grad_norm": 0.11448984593153,
+      "learning_rate": 0.00017241448982537164,
+      "loss": 0.178,
+      "step": 9565
+    },
+    {
+      "epoch": 0.6902124896280529,
+      "grad_norm": 0.09771303087472916,
+      "learning_rate": 0.0001724116034059749,
+      "loss": 0.143,
+      "step": 9566
+    },
+    {
+      "epoch": 0.6902846423031134,
+      "grad_norm": 0.12573154270648956,
+      "learning_rate": 0.00017240871698657816,
+      "loss": 0.1395,
+      "step": 9567
+    },
+    {
+      "epoch": 0.6903567949781738,
+      "grad_norm": 0.10742811858654022,
+      "learning_rate": 0.00017240583056718142,
+      "loss": 0.1624,
+      "step": 9568
+    },
+    {
+      "epoch": 0.6904289476532343,
+      "grad_norm": 0.13596831262111664,
+      "learning_rate": 0.0001724029441477847,
+      "loss": 0.131,
+      "step": 9569
+    },
+    {
+      "epoch": 0.6905011003282947,
+      "grad_norm": 0.12550032138824463,
+      "learning_rate": 0.00017240005772838795,
+      "loss": 0.1523,
+      "step": 9570
+    },
+    {
+      "epoch": 0.6905732530033551,
+      "grad_norm": 0.1494235396385193,
+      "learning_rate": 0.0001723971713089912,
+      "loss": 0.1294,
+      "step": 9571
+    },
+    {
+      "epoch": 0.6906454056784155,
+      "grad_norm": 0.11433825641870499,
+      "learning_rate": 0.00017239428488959445,
+      "loss": 0.0807,
+      "step": 9572
+    },
+    {
+      "epoch": 0.6907175583534759,
+      "grad_norm": 0.12817634642124176,
+      "learning_rate": 0.0001723913984701977,
+      "loss": 0.1258,
+      "step": 9573
+    },
+    {
+      "epoch": 0.6907897110285364,
+      "grad_norm": 0.1334555745124817,
+      "learning_rate": 0.000172388512050801,
+      "loss": 0.1566,
+      "step": 9574
+    },
+    {
+      "epoch": 0.6908618637035968,
+      "grad_norm": 0.1532028317451477,
+      "learning_rate": 0.00017238562563140426,
+      "loss": 0.1331,
+      "step": 9575
+    },
+    {
+      "epoch": 0.6909340163786573,
+      "grad_norm": 0.11318008601665497,
+      "learning_rate": 0.00017238273921200753,
+      "loss": 0.1324,
+      "step": 9576
+    },
+    {
+      "epoch": 0.6910061690537177,
+      "grad_norm": 0.13026483356952667,
+      "learning_rate": 0.00017237985279261076,
+      "loss": 0.1817,
+      "step": 9577
+    },
+    {
+      "epoch": 0.6910783217287781,
+      "grad_norm": 0.1172034814953804,
+      "learning_rate": 0.00017237696637321403,
+      "loss": 0.1744,
+      "step": 9578
+    },
+    {
+      "epoch": 0.6911504744038385,
+      "grad_norm": 0.11258107423782349,
+      "learning_rate": 0.0001723740799538173,
+      "loss": 0.1678,
+      "step": 9579
+    },
+    {
+      "epoch": 0.6912226270788989,
+      "grad_norm": 0.12172319740056992,
+      "learning_rate": 0.00017237119353442055,
+      "loss": 0.1469,
+      "step": 9580
+    },
+    {
+      "epoch": 0.6912947797539594,
+      "grad_norm": 0.1422765851020813,
+      "learning_rate": 0.00017236830711502384,
+      "loss": 0.1624,
+      "step": 9581
+    },
+    {
+      "epoch": 0.6913669324290198,
+      "grad_norm": 0.10180949419736862,
+      "learning_rate": 0.00017236542069562708,
+      "loss": 0.1255,
+      "step": 9582
+    },
+    {
+      "epoch": 0.6914390851040803,
+      "grad_norm": 0.11146809905767441,
+      "learning_rate": 0.00017236253427623034,
+      "loss": 0.1089,
+      "step": 9583
+    },
+    {
+      "epoch": 0.6915112377791407,
+      "grad_norm": 0.10429064929485321,
+      "learning_rate": 0.0001723596478568336,
+      "loss": 0.1667,
+      "step": 9584
+    },
+    {
+      "epoch": 0.6915833904542011,
+      "grad_norm": 0.12864717841148376,
+      "learning_rate": 0.00017235676143743687,
+      "loss": 0.1105,
+      "step": 9585
+    },
+    {
+      "epoch": 0.6916555431292615,
+      "grad_norm": 0.16162557899951935,
+      "learning_rate": 0.00017235387501804013,
+      "loss": 0.1241,
+      "step": 9586
+    },
+    {
+      "epoch": 0.6917276958043219,
+      "grad_norm": 0.14276309311389923,
+      "learning_rate": 0.0001723509885986434,
+      "loss": 0.1546,
+      "step": 9587
+    },
+    {
+      "epoch": 0.6917998484793824,
+      "grad_norm": 0.11883494257926941,
+      "learning_rate": 0.00017234810217924666,
+      "loss": 0.1405,
+      "step": 9588
+    },
+    {
+      "epoch": 0.6918720011544428,
+      "grad_norm": 0.16758319735527039,
+      "learning_rate": 0.00017234521575984992,
+      "loss": 0.1938,
+      "step": 9589
+    },
+    {
+      "epoch": 0.6919441538295033,
+      "grad_norm": 0.12192453444004059,
+      "learning_rate": 0.00017234232934045318,
+      "loss": 0.1321,
+      "step": 9590
+    },
+    {
+      "epoch": 0.6920163065045637,
+      "grad_norm": 0.12941285967826843,
+      "learning_rate": 0.00017233944292105644,
+      "loss": 0.1712,
+      "step": 9591
+    },
+    {
+      "epoch": 0.6920884591796241,
+      "grad_norm": 0.11735109984874725,
+      "learning_rate": 0.0001723365565016597,
+      "loss": 0.1034,
+      "step": 9592
+    },
+    {
+      "epoch": 0.6921606118546845,
+      "grad_norm": 0.11214487999677658,
+      "learning_rate": 0.00017233367008226294,
+      "loss": 0.1359,
+      "step": 9593
+    },
+    {
+      "epoch": 0.6922327645297449,
+      "grad_norm": 0.12300753593444824,
+      "learning_rate": 0.0001723307836628662,
+      "loss": 0.1582,
+      "step": 9594
+    },
+    {
+      "epoch": 0.6923049172048054,
+      "grad_norm": 0.13960377871990204,
+      "learning_rate": 0.0001723278972434695,
+      "loss": 0.2027,
+      "step": 9595
+    },
+    {
+      "epoch": 0.6923770698798658,
+      "grad_norm": 0.10137218236923218,
+      "learning_rate": 0.00017232501082407276,
+      "loss": 0.0833,
+      "step": 9596
+    },
+    {
+      "epoch": 0.6924492225549262,
+      "grad_norm": 0.11234530061483383,
+      "learning_rate": 0.00017232212440467602,
+      "loss": 0.1472,
+      "step": 9597
+    },
+    {
+      "epoch": 0.6925213752299867,
+      "grad_norm": 0.11832275986671448,
+      "learning_rate": 0.00017231923798527926,
+      "loss": 0.1347,
+      "step": 9598
+    },
+    {
+      "epoch": 0.6925935279050471,
+      "grad_norm": 0.11702826619148254,
+      "learning_rate": 0.00017231635156588252,
+      "loss": 0.1819,
+      "step": 9599
+    },
+    {
+      "epoch": 0.6926656805801075,
+      "grad_norm": 0.12916803359985352,
+      "learning_rate": 0.00017231346514648578,
+      "loss": 0.1322,
+      "step": 9600
+    },
+    {
+      "epoch": 0.6927378332551679,
+      "grad_norm": 0.13468345999717712,
+      "learning_rate": 0.00017231057872708905,
+      "loss": 0.2088,
+      "step": 9601
+    },
+    {
+      "epoch": 0.6928099859302284,
+      "grad_norm": 0.13211590051651,
+      "learning_rate": 0.00017230769230769234,
+      "loss": 0.1602,
+      "step": 9602
+    },
+    {
+      "epoch": 0.6928821386052888,
+      "grad_norm": 0.13652805984020233,
+      "learning_rate": 0.00017230480588829557,
+      "loss": 0.1893,
+      "step": 9603
+    },
+    {
+      "epoch": 0.6929542912803492,
+      "grad_norm": 0.14025311172008514,
+      "learning_rate": 0.00017230191946889884,
+      "loss": 0.1307,
+      "step": 9604
+    },
+    {
+      "epoch": 0.6930264439554097,
+      "grad_norm": 0.10820788890123367,
+      "learning_rate": 0.0001722990330495021,
+      "loss": 0.1834,
+      "step": 9605
+    },
+    {
+      "epoch": 0.69309859663047,
+      "grad_norm": 0.14675457775592804,
+      "learning_rate": 0.00017229614663010536,
+      "loss": 0.1804,
+      "step": 9606
+    },
+    {
+      "epoch": 0.6931707493055305,
+      "grad_norm": 0.11689016968011856,
+      "learning_rate": 0.00017229326021070862,
+      "loss": 0.1217,
+      "step": 9607
+    },
+    {
+      "epoch": 0.6932429019805909,
+      "grad_norm": 0.24402357637882233,
+      "learning_rate": 0.0001722903737913119,
+      "loss": 0.1643,
+      "step": 9608
+    },
+    {
+      "epoch": 0.6933150546556514,
+      "grad_norm": 0.11813085526227951,
+      "learning_rate": 0.00017228748737191515,
+      "loss": 0.1675,
+      "step": 9609
+    },
+    {
+      "epoch": 0.6933872073307118,
+      "grad_norm": 0.10200902819633484,
+      "learning_rate": 0.0001722846009525184,
+      "loss": 0.1088,
+      "step": 9610
+    },
+    {
+      "epoch": 0.6934593600057722,
+      "grad_norm": 0.11175123602151871,
+      "learning_rate": 0.00017228171453312168,
+      "loss": 0.1709,
+      "step": 9611
+    },
+    {
+      "epoch": 0.6935315126808327,
+      "grad_norm": 0.10914164036512375,
+      "learning_rate": 0.00017227882811372494,
+      "loss": 0.1564,
+      "step": 9612
+    },
+    {
+      "epoch": 0.693603665355893,
+      "grad_norm": 0.1380518078804016,
+      "learning_rate": 0.0001722759416943282,
+      "loss": 0.1299,
+      "step": 9613
+    },
+    {
+      "epoch": 0.6936758180309535,
+      "grad_norm": 0.12954658269882202,
+      "learning_rate": 0.00017227305527493144,
+      "loss": 0.1477,
+      "step": 9614
+    },
+    {
+      "epoch": 0.6937479707060139,
+      "grad_norm": 0.13407106697559357,
+      "learning_rate": 0.0001722701688555347,
+      "loss": 0.1991,
+      "step": 9615
+    },
+    {
+      "epoch": 0.6938201233810743,
+      "grad_norm": 0.1745564341545105,
+      "learning_rate": 0.000172267282436138,
+      "loss": 0.1809,
+      "step": 9616
+    },
+    {
+      "epoch": 0.6938922760561348,
+      "grad_norm": 0.112023264169693,
+      "learning_rate": 0.00017226439601674125,
+      "loss": 0.2297,
+      "step": 9617
+    },
+    {
+      "epoch": 0.6939644287311952,
+      "grad_norm": 0.1244281604886055,
+      "learning_rate": 0.00017226150959734452,
+      "loss": 0.1197,
+      "step": 9618
+    },
+    {
+      "epoch": 0.6940365814062557,
+      "grad_norm": 0.11674293130636215,
+      "learning_rate": 0.00017225862317794775,
+      "loss": 0.1706,
+      "step": 9619
+    },
+    {
+      "epoch": 0.694108734081316,
+      "grad_norm": 0.10764952003955841,
+      "learning_rate": 0.00017225573675855101,
+      "loss": 0.1818,
+      "step": 9620
+    },
+    {
+      "epoch": 0.6941808867563765,
+      "grad_norm": 0.1448487639427185,
+      "learning_rate": 0.00017225285033915428,
+      "loss": 0.1375,
+      "step": 9621
+    },
+    {
+      "epoch": 0.6942530394314369,
+      "grad_norm": 0.14177432656288147,
+      "learning_rate": 0.00017224996391975754,
+      "loss": 0.1386,
+      "step": 9622
+    },
+    {
+      "epoch": 0.6943251921064973,
+      "grad_norm": 0.11783361434936523,
+      "learning_rate": 0.00017224707750036083,
+      "loss": 0.1665,
+      "step": 9623
+    },
+    {
+      "epoch": 0.6943973447815578,
+      "grad_norm": 0.137073814868927,
+      "learning_rate": 0.00017224419108096407,
+      "loss": 0.1446,
+      "step": 9624
+    },
+    {
+      "epoch": 0.6944694974566182,
+      "grad_norm": 0.12243133783340454,
+      "learning_rate": 0.00017224130466156733,
+      "loss": 0.1175,
+      "step": 9625
+    },
+    {
+      "epoch": 0.6945416501316787,
+      "grad_norm": 0.13991667330265045,
+      "learning_rate": 0.0001722384182421706,
+      "loss": 0.1561,
+      "step": 9626
+    },
+    {
+      "epoch": 0.694613802806739,
+      "grad_norm": 0.14223317801952362,
+      "learning_rate": 0.00017223553182277386,
+      "loss": 0.1893,
+      "step": 9627
+    },
+    {
+      "epoch": 0.6946859554817995,
+      "grad_norm": 0.12393701821565628,
+      "learning_rate": 0.00017223264540337712,
+      "loss": 0.2033,
+      "step": 9628
+    },
+    {
+      "epoch": 0.6947581081568599,
+      "grad_norm": 0.16162846982479095,
+      "learning_rate": 0.00017222975898398038,
+      "loss": 0.1483,
+      "step": 9629
+    },
+    {
+      "epoch": 0.6948302608319203,
+      "grad_norm": 0.10883046686649323,
+      "learning_rate": 0.00017222687256458364,
+      "loss": 0.1971,
+      "step": 9630
+    },
+    {
+      "epoch": 0.6949024135069808,
+      "grad_norm": 0.13543400168418884,
+      "learning_rate": 0.0001722239861451869,
+      "loss": 0.1575,
+      "step": 9631
+    },
+    {
+      "epoch": 0.6949745661820412,
+      "grad_norm": 0.11675167083740234,
+      "learning_rate": 0.00017222109972579017,
+      "loss": 0.1327,
+      "step": 9632
+    },
+    {
+      "epoch": 0.6950467188571017,
+      "grad_norm": 0.12616172432899475,
+      "learning_rate": 0.00017221821330639343,
+      "loss": 0.1708,
+      "step": 9633
+    },
+    {
+      "epoch": 0.695118871532162,
+      "grad_norm": 0.12379202991724014,
+      "learning_rate": 0.0001722153268869967,
+      "loss": 0.1934,
+      "step": 9634
+    },
+    {
+      "epoch": 0.6951910242072225,
+      "grad_norm": 0.12510265409946442,
+      "learning_rate": 0.00017221244046759993,
+      "loss": 0.1411,
+      "step": 9635
+    },
+    {
+      "epoch": 0.6952631768822829,
+      "grad_norm": 0.1382853239774704,
+      "learning_rate": 0.0001722095540482032,
+      "loss": 0.1537,
+      "step": 9636
+    },
+    {
+      "epoch": 0.6953353295573433,
+      "grad_norm": 0.14490236341953278,
+      "learning_rate": 0.00017220666762880648,
+      "loss": 0.1318,
+      "step": 9637
+    },
+    {
+      "epoch": 0.6954074822324038,
+      "grad_norm": 0.20690834522247314,
+      "learning_rate": 0.00017220378120940975,
+      "loss": 0.1717,
+      "step": 9638
+    },
+    {
+      "epoch": 0.6954796349074642,
+      "grad_norm": 0.14243018627166748,
+      "learning_rate": 0.000172200894790013,
+      "loss": 0.1559,
+      "step": 9639
+    },
+    {
+      "epoch": 0.6955517875825247,
+      "grad_norm": 0.1079765185713768,
+      "learning_rate": 0.00017219800837061625,
+      "loss": 0.1484,
+      "step": 9640
+    },
+    {
+      "epoch": 0.695623940257585,
+      "grad_norm": 0.12757904827594757,
+      "learning_rate": 0.0001721951219512195,
+      "loss": 0.1159,
+      "step": 9641
+    },
+    {
+      "epoch": 0.6956960929326454,
+      "grad_norm": 0.09857811033725739,
+      "learning_rate": 0.00017219223553182277,
+      "loss": 0.1791,
+      "step": 9642
+    },
+    {
+      "epoch": 0.6957682456077059,
+      "grad_norm": 0.10688071697950363,
+      "learning_rate": 0.00017218934911242603,
+      "loss": 0.1583,
+      "step": 9643
+    },
+    {
+      "epoch": 0.6958403982827663,
+      "grad_norm": 0.1281883716583252,
+      "learning_rate": 0.00017218646269302932,
+      "loss": 0.1197,
+      "step": 9644
+    },
+    {
+      "epoch": 0.6959125509578268,
+      "grad_norm": 0.11293533444404602,
+      "learning_rate": 0.00017218357627363256,
+      "loss": 0.1473,
+      "step": 9645
+    },
+    {
+      "epoch": 0.6959847036328872,
+      "grad_norm": 0.09393231570720673,
+      "learning_rate": 0.00017218068985423582,
+      "loss": 0.1865,
+      "step": 9646
+    },
+    {
+      "epoch": 0.6960568563079477,
+      "grad_norm": 0.10949627310037613,
+      "learning_rate": 0.0001721778034348391,
+      "loss": 0.1177,
+      "step": 9647
+    },
+    {
+      "epoch": 0.696129008983008,
+      "grad_norm": 0.10585692524909973,
+      "learning_rate": 0.00017217491701544235,
+      "loss": 0.162,
+      "step": 9648
+    },
+    {
+      "epoch": 0.6962011616580684,
+      "grad_norm": 0.20258155465126038,
+      "learning_rate": 0.0001721720305960456,
+      "loss": 0.1325,
+      "step": 9649
+    },
+    {
+      "epoch": 0.6962733143331289,
+      "grad_norm": 0.11715184152126312,
+      "learning_rate": 0.00017216914417664888,
+      "loss": 0.0812,
+      "step": 9650
+    },
+    {
+      "epoch": 0.6963454670081893,
+      "grad_norm": 0.12601949274539948,
+      "learning_rate": 0.00017216625775725214,
+      "loss": 0.1534,
+      "step": 9651
+    },
+    {
+      "epoch": 0.6964176196832498,
+      "grad_norm": 0.12976346909999847,
+      "learning_rate": 0.0001721633713378554,
+      "loss": 0.1311,
+      "step": 9652
+    },
+    {
+      "epoch": 0.6964897723583102,
+      "grad_norm": 0.11151674389839172,
+      "learning_rate": 0.00017216048491845866,
+      "loss": 0.1125,
+      "step": 9653
+    },
+    {
+      "epoch": 0.6965619250333707,
+      "grad_norm": 0.11534590274095535,
+      "learning_rate": 0.00017215759849906193,
+      "loss": 0.1359,
+      "step": 9654
+    },
+    {
+      "epoch": 0.696634077708431,
+      "grad_norm": 0.11569371819496155,
+      "learning_rate": 0.0001721547120796652,
+      "loss": 0.1609,
+      "step": 9655
+    },
+    {
+      "epoch": 0.6967062303834914,
+      "grad_norm": 0.10117107629776001,
+      "learning_rate": 0.00017215182566026845,
+      "loss": 0.1113,
+      "step": 9656
+    },
+    {
+      "epoch": 0.6967783830585519,
+      "grad_norm": 0.20143665373325348,
+      "learning_rate": 0.0001721489392408717,
+      "loss": 0.2259,
+      "step": 9657
+    },
+    {
+      "epoch": 0.6968505357336123,
+      "grad_norm": 0.13160905241966248,
+      "learning_rate": 0.00017214605282147498,
+      "loss": 0.1889,
+      "step": 9658
+    },
+    {
+      "epoch": 0.6969226884086728,
+      "grad_norm": 0.12414603680372238,
+      "learning_rate": 0.00017214316640207824,
+      "loss": 0.1555,
+      "step": 9659
+    },
+    {
+      "epoch": 0.6969948410837332,
+      "grad_norm": 0.12692929804325104,
+      "learning_rate": 0.0001721402799826815,
+      "loss": 0.135,
+      "step": 9660
+    },
+    {
+      "epoch": 0.6970669937587937,
+      "grad_norm": 0.14018724858760834,
+      "learning_rate": 0.00017213739356328477,
+      "loss": 0.1293,
+      "step": 9661
+    },
+    {
+      "epoch": 0.697139146433854,
+      "grad_norm": 0.12804187834262848,
+      "learning_rate": 0.000172134507143888,
+      "loss": 0.1518,
+      "step": 9662
+    },
+    {
+      "epoch": 0.6972112991089144,
+      "grad_norm": 0.11388224363327026,
+      "learning_rate": 0.00017213162072449127,
+      "loss": 0.1431,
+      "step": 9663
+    },
+    {
+      "epoch": 0.6972834517839749,
+      "grad_norm": 0.12438242137432098,
+      "learning_rate": 0.00017212873430509453,
+      "loss": 0.2015,
+      "step": 9664
+    },
+    {
+      "epoch": 0.6973556044590353,
+      "grad_norm": 0.1338079273700714,
+      "learning_rate": 0.00017212584788569782,
+      "loss": 0.1611,
+      "step": 9665
+    },
+    {
+      "epoch": 0.6974277571340958,
+      "grad_norm": 0.14716395735740662,
+      "learning_rate": 0.00017212296146630108,
+      "loss": 0.1185,
+      "step": 9666
+    },
+    {
+      "epoch": 0.6974999098091562,
+      "grad_norm": 0.12072800099849701,
+      "learning_rate": 0.00017212007504690432,
+      "loss": 0.1504,
+      "step": 9667
+    },
+    {
+      "epoch": 0.6975720624842165,
+      "grad_norm": 0.11952561885118484,
+      "learning_rate": 0.00017211718862750758,
+      "loss": 0.2027,
+      "step": 9668
+    },
+    {
+      "epoch": 0.697644215159277,
+      "grad_norm": 0.14002487063407898,
+      "learning_rate": 0.00017211430220811084,
+      "loss": 0.1722,
+      "step": 9669
+    },
+    {
+      "epoch": 0.6977163678343374,
+      "grad_norm": 0.17951540648937225,
+      "learning_rate": 0.0001721114157887141,
+      "loss": 0.1605,
+      "step": 9670
+    },
+    {
+      "epoch": 0.6977885205093979,
+      "grad_norm": 0.12686780095100403,
+      "learning_rate": 0.00017210852936931737,
+      "loss": 0.1333,
+      "step": 9671
+    },
+    {
+      "epoch": 0.6978606731844583,
+      "grad_norm": 0.1033802255988121,
+      "learning_rate": 0.00017210564294992063,
+      "loss": 0.1672,
+      "step": 9672
+    },
+    {
+      "epoch": 0.6979328258595188,
+      "grad_norm": 0.13104183971881866,
+      "learning_rate": 0.0001721027565305239,
+      "loss": 0.1433,
+      "step": 9673
+    },
+    {
+      "epoch": 0.6980049785345792,
+      "grad_norm": 0.12744154036045074,
+      "learning_rate": 0.00017209987011112716,
+      "loss": 0.1628,
+      "step": 9674
+    },
+    {
+      "epoch": 0.6980771312096395,
+      "grad_norm": 0.12353894114494324,
+      "learning_rate": 0.00017209698369173042,
+      "loss": 0.15,
+      "step": 9675
+    },
+    {
+      "epoch": 0.6981492838847,
+      "grad_norm": 0.13690710067749023,
+      "learning_rate": 0.00017209409727233368,
+      "loss": 0.1135,
+      "step": 9676
+    },
+    {
+      "epoch": 0.6982214365597604,
+      "grad_norm": 0.1367052048444748,
+      "learning_rate": 0.00017209121085293695,
+      "loss": 0.1601,
+      "step": 9677
+    },
+    {
+      "epoch": 0.6982935892348209,
+      "grad_norm": 0.10972071439027786,
+      "learning_rate": 0.00017208832443354018,
+      "loss": 0.1109,
+      "step": 9678
+    },
+    {
+      "epoch": 0.6983657419098813,
+      "grad_norm": 0.19588278234004974,
+      "learning_rate": 0.00017208543801414347,
+      "loss": 0.1736,
+      "step": 9679
+    },
+    {
+      "epoch": 0.6984378945849418,
+      "grad_norm": 0.14738193154335022,
+      "learning_rate": 0.00017208255159474674,
+      "loss": 0.1543,
+      "step": 9680
+    },
+    {
+      "epoch": 0.6985100472600022,
+      "grad_norm": 0.1534094214439392,
+      "learning_rate": 0.00017207966517535,
+      "loss": 0.1204,
+      "step": 9681
+    },
+    {
+      "epoch": 0.6985821999350625,
+      "grad_norm": 0.18039028346538544,
+      "learning_rate": 0.00017207677875595326,
+      "loss": 0.1981,
+      "step": 9682
+    },
+    {
+      "epoch": 0.698654352610123,
+      "grad_norm": 0.12097857892513275,
+      "learning_rate": 0.0001720738923365565,
+      "loss": 0.1441,
+      "step": 9683
+    },
+    {
+      "epoch": 0.6987265052851834,
+      "grad_norm": 0.13416697084903717,
+      "learning_rate": 0.00017207100591715976,
+      "loss": 0.1491,
+      "step": 9684
+    },
+    {
+      "epoch": 0.6987986579602439,
+      "grad_norm": 0.17525966465473175,
+      "learning_rate": 0.00017206811949776302,
+      "loss": 0.1977,
+      "step": 9685
+    },
+    {
+      "epoch": 0.6988708106353043,
+      "grad_norm": 0.1118546798825264,
+      "learning_rate": 0.0001720652330783663,
+      "loss": 0.1563,
+      "step": 9686
+    },
+    {
+      "epoch": 0.6989429633103648,
+      "grad_norm": 0.1599108874797821,
+      "learning_rate": 0.00017206234665896958,
+      "loss": 0.2525,
+      "step": 9687
+    },
+    {
+      "epoch": 0.6990151159854252,
+      "grad_norm": 0.10533977299928665,
+      "learning_rate": 0.0001720594602395728,
+      "loss": 0.1827,
+      "step": 9688
+    },
+    {
+      "epoch": 0.6990872686604855,
+      "grad_norm": 0.1353236883878708,
+      "learning_rate": 0.00017205657382017608,
+      "loss": 0.1175,
+      "step": 9689
+    },
+    {
+      "epoch": 0.699159421335546,
+      "grad_norm": 0.21189481019973755,
+      "learning_rate": 0.00017205368740077934,
+      "loss": 0.1894,
+      "step": 9690
+    },
+    {
+      "epoch": 0.6992315740106064,
+      "grad_norm": 0.13929972052574158,
+      "learning_rate": 0.0001720508009813826,
+      "loss": 0.1996,
+      "step": 9691
+    },
+    {
+      "epoch": 0.6993037266856669,
+      "grad_norm": 0.1412792205810547,
+      "learning_rate": 0.00017204791456198586,
+      "loss": 0.1683,
+      "step": 9692
+    },
+    {
+      "epoch": 0.6993758793607273,
+      "grad_norm": 0.11568436771631241,
+      "learning_rate": 0.00017204502814258913,
+      "loss": 0.1659,
+      "step": 9693
+    },
+    {
+      "epoch": 0.6994480320357878,
+      "grad_norm": 0.11832127720117569,
+      "learning_rate": 0.0001720421417231924,
+      "loss": 0.1163,
+      "step": 9694
+    },
+    {
+      "epoch": 0.6995201847108482,
+      "grad_norm": 0.12044627219438553,
+      "learning_rate": 0.00017203925530379565,
+      "loss": 0.1622,
+      "step": 9695
+    },
+    {
+      "epoch": 0.6995923373859085,
+      "grad_norm": 0.11714223027229309,
+      "learning_rate": 0.00017203636888439892,
+      "loss": 0.1213,
+      "step": 9696
+    },
+    {
+      "epoch": 0.699664490060969,
+      "grad_norm": 0.11178726702928543,
+      "learning_rate": 0.00017203348246500218,
+      "loss": 0.1417,
+      "step": 9697
+    },
+    {
+      "epoch": 0.6997366427360294,
+      "grad_norm": 0.10355254262685776,
+      "learning_rate": 0.00017203059604560544,
+      "loss": 0.1369,
+      "step": 9698
+    },
+    {
+      "epoch": 0.6998087954110899,
+      "grad_norm": 0.15807823836803436,
+      "learning_rate": 0.00017202770962620868,
+      "loss": 0.1503,
+      "step": 9699
+    },
+    {
+      "epoch": 0.6998809480861503,
+      "grad_norm": 0.09935374557971954,
+      "learning_rate": 0.00017202482320681194,
+      "loss": 0.1474,
+      "step": 9700
+    },
+    {
+      "epoch": 0.6999531007612108,
+      "grad_norm": 0.10886482894420624,
+      "learning_rate": 0.00017202193678741523,
+      "loss": 0.1866,
+      "step": 9701
+    },
+    {
+      "epoch": 0.7000252534362712,
+      "grad_norm": 0.11364229768514633,
+      "learning_rate": 0.0001720190503680185,
+      "loss": 0.1196,
+      "step": 9702
+    },
+    {
+      "epoch": 0.7000974061113315,
+      "grad_norm": 0.12444400787353516,
+      "learning_rate": 0.00017201616394862176,
+      "loss": 0.1216,
+      "step": 9703
+    },
+    {
+      "epoch": 0.700169558786392,
+      "grad_norm": 0.14167913794517517,
+      "learning_rate": 0.000172013277529225,
+      "loss": 0.1237,
+      "step": 9704
+    },
+    {
+      "epoch": 0.7002417114614524,
+      "grad_norm": 0.12108553946018219,
+      "learning_rate": 0.00017201039110982825,
+      "loss": 0.1821,
+      "step": 9705
+    },
+    {
+      "epoch": 0.7003138641365129,
+      "grad_norm": 0.1302456557750702,
+      "learning_rate": 0.00017200750469043152,
+      "loss": 0.1074,
+      "step": 9706
+    },
+    {
+      "epoch": 0.7003860168115733,
+      "grad_norm": 0.13411375880241394,
+      "learning_rate": 0.00017200461827103478,
+      "loss": 0.1422,
+      "step": 9707
+    },
+    {
+      "epoch": 0.7004581694866338,
+      "grad_norm": 0.13024677336215973,
+      "learning_rate": 0.00017200173185163807,
+      "loss": 0.1648,
+      "step": 9708
+    },
+    {
+      "epoch": 0.7005303221616942,
+      "grad_norm": 0.12400952726602554,
+      "learning_rate": 0.0001719988454322413,
+      "loss": 0.1654,
+      "step": 9709
+    },
+    {
+      "epoch": 0.7006024748367545,
+      "grad_norm": 0.121695376932621,
+      "learning_rate": 0.00017199595901284457,
+      "loss": 0.1818,
+      "step": 9710
+    },
+    {
+      "epoch": 0.700674627511815,
+      "grad_norm": 0.1169712096452713,
+      "learning_rate": 0.00017199307259344783,
+      "loss": 0.1702,
+      "step": 9711
+    },
+    {
+      "epoch": 0.7007467801868754,
+      "grad_norm": 0.1264851689338684,
+      "learning_rate": 0.0001719901861740511,
+      "loss": 0.13,
+      "step": 9712
+    },
+    {
+      "epoch": 0.7008189328619359,
+      "grad_norm": 0.11403610557317734,
+      "learning_rate": 0.00017198729975465436,
+      "loss": 0.1638,
+      "step": 9713
+    },
+    {
+      "epoch": 0.7008910855369963,
+      "grad_norm": 0.12154081463813782,
+      "learning_rate": 0.00017198441333525762,
+      "loss": 0.1206,
+      "step": 9714
+    },
+    {
+      "epoch": 0.7009632382120567,
+      "grad_norm": 0.12071266770362854,
+      "learning_rate": 0.00017198152691586088,
+      "loss": 0.1472,
+      "step": 9715
+    },
+    {
+      "epoch": 0.7010353908871172,
+      "grad_norm": 0.12383043020963669,
+      "learning_rate": 0.00017197864049646415,
+      "loss": 0.1187,
+      "step": 9716
+    },
+    {
+      "epoch": 0.7011075435621775,
+      "grad_norm": 0.14367400109767914,
+      "learning_rate": 0.0001719757540770674,
+      "loss": 0.1238,
+      "step": 9717
+    },
+    {
+      "epoch": 0.701179696237238,
+      "grad_norm": 0.1460137665271759,
+      "learning_rate": 0.00017197286765767067,
+      "loss": 0.1454,
+      "step": 9718
+    },
+    {
+      "epoch": 0.7012518489122984,
+      "grad_norm": 0.10463149100542068,
+      "learning_rate": 0.00017196998123827394,
+      "loss": 0.1509,
+      "step": 9719
+    },
+    {
+      "epoch": 0.7013240015873589,
+      "grad_norm": 0.1418483406305313,
+      "learning_rate": 0.00017196709481887717,
+      "loss": 0.1841,
+      "step": 9720
+    },
+    {
+      "epoch": 0.7013961542624193,
+      "grad_norm": 0.14183053374290466,
+      "learning_rate": 0.00017196420839948043,
+      "loss": 0.2072,
+      "step": 9721
+    },
+    {
+      "epoch": 0.7014683069374797,
+      "grad_norm": 0.14547039568424225,
+      "learning_rate": 0.00017196132198008372,
+      "loss": 0.1647,
+      "step": 9722
+    },
+    {
+      "epoch": 0.7015404596125402,
+      "grad_norm": 0.11831801384687424,
+      "learning_rate": 0.000171958435560687,
+      "loss": 0.1648,
+      "step": 9723
+    },
+    {
+      "epoch": 0.7016126122876005,
+      "grad_norm": 0.15214595198631287,
+      "learning_rate": 0.00017195554914129025,
+      "loss": 0.1577,
+      "step": 9724
+    },
+    {
+      "epoch": 0.701684764962661,
+      "grad_norm": 0.1515510529279709,
+      "learning_rate": 0.00017195266272189349,
+      "loss": 0.1071,
+      "step": 9725
+    },
+    {
+      "epoch": 0.7017569176377214,
+      "grad_norm": 0.1443483531475067,
+      "learning_rate": 0.00017194977630249675,
+      "loss": 0.1358,
+      "step": 9726
+    },
+    {
+      "epoch": 0.7018290703127819,
+      "grad_norm": 0.11696092784404755,
+      "learning_rate": 0.0001719468898831,
+      "loss": 0.086,
+      "step": 9727
+    },
+    {
+      "epoch": 0.7019012229878423,
+      "grad_norm": 0.1589919775724411,
+      "learning_rate": 0.00017194400346370327,
+      "loss": 0.1566,
+      "step": 9728
+    },
+    {
+      "epoch": 0.7019733756629027,
+      "grad_norm": 0.1331232637166977,
+      "learning_rate": 0.00017194111704430656,
+      "loss": 0.1431,
+      "step": 9729
+    },
+    {
+      "epoch": 0.7020455283379631,
+      "grad_norm": 0.12140674144029617,
+      "learning_rate": 0.0001719382306249098,
+      "loss": 0.1458,
+      "step": 9730
+    },
+    {
+      "epoch": 0.7021176810130235,
+      "grad_norm": 0.16071046888828278,
+      "learning_rate": 0.00017193534420551306,
+      "loss": 0.1976,
+      "step": 9731
+    },
+    {
+      "epoch": 0.702189833688084,
+      "grad_norm": 0.11735766381025314,
+      "learning_rate": 0.00017193245778611633,
+      "loss": 0.1283,
+      "step": 9732
+    },
+    {
+      "epoch": 0.7022619863631444,
+      "grad_norm": 0.12011319398880005,
+      "learning_rate": 0.0001719295713667196,
+      "loss": 0.1559,
+      "step": 9733
+    },
+    {
+      "epoch": 0.7023341390382049,
+      "grad_norm": 0.10947341471910477,
+      "learning_rate": 0.00017192668494732285,
+      "loss": 0.1376,
+      "step": 9734
+    },
+    {
+      "epoch": 0.7024062917132653,
+      "grad_norm": 0.1267535537481308,
+      "learning_rate": 0.00017192379852792612,
+      "loss": 0.1407,
+      "step": 9735
+    },
+    {
+      "epoch": 0.7024784443883257,
+      "grad_norm": 0.10268282145261765,
+      "learning_rate": 0.00017192091210852938,
+      "loss": 0.1855,
+      "step": 9736
+    },
+    {
+      "epoch": 0.7025505970633861,
+      "grad_norm": 0.12966306507587433,
+      "learning_rate": 0.00017191802568913264,
+      "loss": 0.1203,
+      "step": 9737
+    },
+    {
+      "epoch": 0.7026227497384465,
+      "grad_norm": 0.12109366059303284,
+      "learning_rate": 0.0001719151392697359,
+      "loss": 0.1425,
+      "step": 9738
+    },
+    {
+      "epoch": 0.702694902413507,
+      "grad_norm": 0.11503426730632782,
+      "learning_rate": 0.00017191225285033917,
+      "loss": 0.1411,
+      "step": 9739
+    },
+    {
+      "epoch": 0.7027670550885674,
+      "grad_norm": 0.12528328597545624,
+      "learning_rate": 0.00017190936643094243,
+      "loss": 0.1367,
+      "step": 9740
+    },
+    {
+      "epoch": 0.7028392077636278,
+      "grad_norm": 0.11384246498346329,
+      "learning_rate": 0.00017190648001154567,
+      "loss": 0.1325,
+      "step": 9741
+    },
+    {
+      "epoch": 0.7029113604386883,
+      "grad_norm": 0.1350889950990677,
+      "learning_rate": 0.00017190359359214893,
+      "loss": 0.1155,
+      "step": 9742
+    },
+    {
+      "epoch": 0.7029835131137487,
+      "grad_norm": 0.13169564306735992,
+      "learning_rate": 0.00017190070717275222,
+      "loss": 0.1646,
+      "step": 9743
+    },
+    {
+      "epoch": 0.7030556657888091,
+      "grad_norm": 0.12558674812316895,
+      "learning_rate": 0.00017189782075335548,
+      "loss": 0.1683,
+      "step": 9744
+    },
+    {
+      "epoch": 0.7031278184638695,
+      "grad_norm": 0.12045971304178238,
+      "learning_rate": 0.00017189493433395874,
+      "loss": 0.1713,
+      "step": 9745
+    },
+    {
+      "epoch": 0.70319997113893,
+      "grad_norm": 0.10757529735565186,
+      "learning_rate": 0.00017189204791456198,
+      "loss": 0.138,
+      "step": 9746
+    },
+    {
+      "epoch": 0.7032721238139904,
+      "grad_norm": 0.11273236572742462,
+      "learning_rate": 0.00017188916149516524,
+      "loss": 0.0946,
+      "step": 9747
+    },
+    {
+      "epoch": 0.7033442764890508,
+      "grad_norm": 0.100577212870121,
+      "learning_rate": 0.0001718862750757685,
+      "loss": 0.1557,
+      "step": 9748
+    },
+    {
+      "epoch": 0.7034164291641113,
+      "grad_norm": 0.13702300190925598,
+      "learning_rate": 0.00017188338865637177,
+      "loss": 0.149,
+      "step": 9749
+    },
+    {
+      "epoch": 0.7034885818391717,
+      "grad_norm": 0.15205080807209015,
+      "learning_rate": 0.00017188050223697506,
+      "loss": 0.1669,
+      "step": 9750
+    },
+    {
+      "epoch": 0.7035607345142321,
+      "grad_norm": 0.10914731025695801,
+      "learning_rate": 0.0001718776158175783,
+      "loss": 0.1311,
+      "step": 9751
+    },
+    {
+      "epoch": 0.7036328871892925,
+      "grad_norm": 0.11806619167327881,
+      "learning_rate": 0.00017187472939818156,
+      "loss": 0.1528,
+      "step": 9752
+    },
+    {
+      "epoch": 0.703705039864353,
+      "grad_norm": 0.12875840067863464,
+      "learning_rate": 0.00017187184297878482,
+      "loss": 0.1028,
+      "step": 9753
+    },
+    {
+      "epoch": 0.7037771925394134,
+      "grad_norm": 0.12242988497018814,
+      "learning_rate": 0.00017186895655938808,
+      "loss": 0.1797,
+      "step": 9754
+    },
+    {
+      "epoch": 0.7038493452144738,
+      "grad_norm": 0.15106521546840668,
+      "learning_rate": 0.00017186607013999135,
+      "loss": 0.1605,
+      "step": 9755
+    },
+    {
+      "epoch": 0.7039214978895343,
+      "grad_norm": 0.1285814642906189,
+      "learning_rate": 0.0001718631837205946,
+      "loss": 0.1856,
+      "step": 9756
+    },
+    {
+      "epoch": 0.7039936505645947,
+      "grad_norm": 0.12011777609586716,
+      "learning_rate": 0.00017186029730119787,
+      "loss": 0.135,
+      "step": 9757
+    },
+    {
+      "epoch": 0.7040658032396551,
+      "grad_norm": 0.13019108772277832,
+      "learning_rate": 0.00017185741088180114,
+      "loss": 0.1589,
+      "step": 9758
+    },
+    {
+      "epoch": 0.7041379559147155,
+      "grad_norm": 0.12479769438505173,
+      "learning_rate": 0.0001718545244624044,
+      "loss": 0.1187,
+      "step": 9759
+    },
+    {
+      "epoch": 0.704210108589776,
+      "grad_norm": 0.30191943049430847,
+      "learning_rate": 0.00017185163804300766,
+      "loss": 0.1201,
+      "step": 9760
+    },
+    {
+      "epoch": 0.7042822612648364,
+      "grad_norm": 0.12107078731060028,
+      "learning_rate": 0.00017184875162361092,
+      "loss": 0.1464,
+      "step": 9761
+    },
+    {
+      "epoch": 0.7043544139398968,
+      "grad_norm": 0.1304750144481659,
+      "learning_rate": 0.0001718458652042142,
+      "loss": 0.1765,
+      "step": 9762
+    },
+    {
+      "epoch": 0.7044265666149573,
+      "grad_norm": 0.11234626173973083,
+      "learning_rate": 0.00017184297878481742,
+      "loss": 0.1156,
+      "step": 9763
+    },
+    {
+      "epoch": 0.7044987192900177,
+      "grad_norm": 0.11644095182418823,
+      "learning_rate": 0.0001718400923654207,
+      "loss": 0.124,
+      "step": 9764
+    },
+    {
+      "epoch": 0.7045708719650781,
+      "grad_norm": 0.14146173000335693,
+      "learning_rate": 0.00017183720594602398,
+      "loss": 0.1258,
+      "step": 9765
+    },
+    {
+      "epoch": 0.7046430246401385,
+      "grad_norm": 0.13076259195804596,
+      "learning_rate": 0.00017183431952662724,
+      "loss": 0.1368,
+      "step": 9766
+    },
+    {
+      "epoch": 0.704715177315199,
+      "grad_norm": 0.1335371434688568,
+      "learning_rate": 0.0001718314331072305,
+      "loss": 0.162,
+      "step": 9767
+    },
+    {
+      "epoch": 0.7047873299902594,
+      "grad_norm": 0.15889213979244232,
+      "learning_rate": 0.00017182854668783374,
+      "loss": 0.1524,
+      "step": 9768
+    },
+    {
+      "epoch": 0.7048594826653198,
+      "grad_norm": 0.11813731491565704,
+      "learning_rate": 0.000171825660268437,
+      "loss": 0.1364,
+      "step": 9769
+    },
+    {
+      "epoch": 0.7049316353403803,
+      "grad_norm": 0.11796697974205017,
+      "learning_rate": 0.00017182277384904026,
+      "loss": 0.1635,
+      "step": 9770
+    },
+    {
+      "epoch": 0.7050037880154407,
+      "grad_norm": 0.13033068180084229,
+      "learning_rate": 0.00017181988742964355,
+      "loss": 0.1523,
+      "step": 9771
+    },
+    {
+      "epoch": 0.7050759406905011,
+      "grad_norm": 0.10769747197628021,
+      "learning_rate": 0.00017181700101024682,
+      "loss": 0.1449,
+      "step": 9772
+    },
+    {
+      "epoch": 0.7051480933655615,
+      "grad_norm": 0.1245947927236557,
+      "learning_rate": 0.00017181411459085005,
+      "loss": 0.1393,
+      "step": 9773
+    },
+    {
+      "epoch": 0.705220246040622,
+      "grad_norm": 0.11615555733442307,
+      "learning_rate": 0.00017181122817145331,
+      "loss": 0.2104,
+      "step": 9774
+    },
+    {
+      "epoch": 0.7052923987156824,
+      "grad_norm": 0.1361609548330307,
+      "learning_rate": 0.00017180834175205658,
+      "loss": 0.1473,
+      "step": 9775
+    },
+    {
+      "epoch": 0.7053645513907428,
+      "grad_norm": 0.11919081211090088,
+      "learning_rate": 0.00017180545533265984,
+      "loss": 0.1078,
+      "step": 9776
+    },
+    {
+      "epoch": 0.7054367040658033,
+      "grad_norm": 0.11736463755369186,
+      "learning_rate": 0.0001718025689132631,
+      "loss": 0.1614,
+      "step": 9777
+    },
+    {
+      "epoch": 0.7055088567408637,
+      "grad_norm": 0.09773008525371552,
+      "learning_rate": 0.00017179968249386637,
+      "loss": 0.146,
+      "step": 9778
+    },
+    {
+      "epoch": 0.705581009415924,
+      "grad_norm": 0.15151232481002808,
+      "learning_rate": 0.00017179679607446963,
+      "loss": 0.1969,
+      "step": 9779
+    },
+    {
+      "epoch": 0.7056531620909845,
+      "grad_norm": 0.12831969559192657,
+      "learning_rate": 0.0001717939096550729,
+      "loss": 0.1403,
+      "step": 9780
+    },
+    {
+      "epoch": 0.7057253147660449,
+      "grad_norm": 0.11678869277238846,
+      "learning_rate": 0.00017179102323567616,
+      "loss": 0.1455,
+      "step": 9781
+    },
+    {
+      "epoch": 0.7057974674411054,
+      "grad_norm": 0.10794802755117416,
+      "learning_rate": 0.00017178813681627942,
+      "loss": 0.1364,
+      "step": 9782
+    },
+    {
+      "epoch": 0.7058696201161658,
+      "grad_norm": 0.15175952017307281,
+      "learning_rate": 0.00017178525039688268,
+      "loss": 0.1973,
+      "step": 9783
+    },
+    {
+      "epoch": 0.7059417727912263,
+      "grad_norm": 0.13190539181232452,
+      "learning_rate": 0.00017178236397748592,
+      "loss": 0.1294,
+      "step": 9784
+    },
+    {
+      "epoch": 0.7060139254662866,
+      "grad_norm": 0.13258545100688934,
+      "learning_rate": 0.0001717794775580892,
+      "loss": 0.1977,
+      "step": 9785
+    },
+    {
+      "epoch": 0.706086078141347,
+      "grad_norm": 0.11998707056045532,
+      "learning_rate": 0.00017177659113869247,
+      "loss": 0.1372,
+      "step": 9786
+    },
+    {
+      "epoch": 0.7061582308164075,
+      "grad_norm": 0.1670992523431778,
+      "learning_rate": 0.00017177370471929573,
+      "loss": 0.1864,
+      "step": 9787
+    },
+    {
+      "epoch": 0.7062303834914679,
+      "grad_norm": 0.14586926996707916,
+      "learning_rate": 0.000171770818299899,
+      "loss": 0.1557,
+      "step": 9788
+    },
+    {
+      "epoch": 0.7063025361665284,
+      "grad_norm": 0.12285439670085907,
+      "learning_rate": 0.00017176793188050223,
+      "loss": 0.157,
+      "step": 9789
+    },
+    {
+      "epoch": 0.7063746888415888,
+      "grad_norm": 0.10460318624973297,
+      "learning_rate": 0.0001717650454611055,
+      "loss": 0.1386,
+      "step": 9790
+    },
+    {
+      "epoch": 0.7064468415166493,
+      "grad_norm": 0.14111445844173431,
+      "learning_rate": 0.00017176215904170876,
+      "loss": 0.1299,
+      "step": 9791
+    },
+    {
+      "epoch": 0.7065189941917096,
+      "grad_norm": 0.1285100281238556,
+      "learning_rate": 0.00017175927262231205,
+      "loss": 0.153,
+      "step": 9792
+    },
+    {
+      "epoch": 0.70659114686677,
+      "grad_norm": 0.1267320066690445,
+      "learning_rate": 0.0001717563862029153,
+      "loss": 0.1916,
+      "step": 9793
+    },
+    {
+      "epoch": 0.7066632995418305,
+      "grad_norm": 0.14147202670574188,
+      "learning_rate": 0.00017175349978351855,
+      "loss": 0.1354,
+      "step": 9794
+    },
+    {
+      "epoch": 0.7067354522168909,
+      "grad_norm": 0.15187636017799377,
+      "learning_rate": 0.0001717506133641218,
+      "loss": 0.1668,
+      "step": 9795
+    },
+    {
+      "epoch": 0.7068076048919514,
+      "grad_norm": 0.1322856992483139,
+      "learning_rate": 0.00017174772694472507,
+      "loss": 0.2105,
+      "step": 9796
+    },
+    {
+      "epoch": 0.7068797575670118,
+      "grad_norm": 0.11827823519706726,
+      "learning_rate": 0.00017174484052532834,
+      "loss": 0.1454,
+      "step": 9797
+    },
+    {
+      "epoch": 0.7069519102420723,
+      "grad_norm": 0.13164323568344116,
+      "learning_rate": 0.0001717419541059316,
+      "loss": 0.1443,
+      "step": 9798
+    },
+    {
+      "epoch": 0.7070240629171326,
+      "grad_norm": 0.1065685823559761,
+      "learning_rate": 0.00017173906768653486,
+      "loss": 0.1235,
+      "step": 9799
+    },
+    {
+      "epoch": 0.707096215592193,
+      "grad_norm": 0.6572790741920471,
+      "learning_rate": 0.00017173618126713812,
+      "loss": 0.1534,
+      "step": 9800
+    },
+    {
+      "epoch": 0.7071683682672535,
+      "grad_norm": 0.12067247182130814,
+      "learning_rate": 0.0001717332948477414,
+      "loss": 0.185,
+      "step": 9801
+    },
+    {
+      "epoch": 0.7072405209423139,
+      "grad_norm": 0.10997969657182693,
+      "learning_rate": 0.00017173040842834465,
+      "loss": 0.1665,
+      "step": 9802
+    },
+    {
+      "epoch": 0.7073126736173744,
+      "grad_norm": 0.1059444323182106,
+      "learning_rate": 0.0001717275220089479,
+      "loss": 0.1168,
+      "step": 9803
+    },
+    {
+      "epoch": 0.7073848262924348,
+      "grad_norm": 0.10876884311437607,
+      "learning_rate": 0.00017172463558955118,
+      "loss": 0.1466,
+      "step": 9804
+    },
+    {
+      "epoch": 0.7074569789674953,
+      "grad_norm": 0.11825942248106003,
+      "learning_rate": 0.0001717217491701544,
+      "loss": 0.1516,
+      "step": 9805
+    },
+    {
+      "epoch": 0.7075291316425556,
+      "grad_norm": 0.1341393142938614,
+      "learning_rate": 0.0001717188627507577,
+      "loss": 0.1745,
+      "step": 9806
+    },
+    {
+      "epoch": 0.707601284317616,
+      "grad_norm": 0.10476581007242203,
+      "learning_rate": 0.00017171597633136096,
+      "loss": 0.1466,
+      "step": 9807
+    },
+    {
+      "epoch": 0.7076734369926765,
+      "grad_norm": 0.11815085262060165,
+      "learning_rate": 0.00017171308991196423,
+      "loss": 0.1593,
+      "step": 9808
+    },
+    {
+      "epoch": 0.7077455896677369,
+      "grad_norm": 0.12892809510231018,
+      "learning_rate": 0.0001717102034925675,
+      "loss": 0.1365,
+      "step": 9809
+    },
+    {
+      "epoch": 0.7078177423427974,
+      "grad_norm": 0.13001443445682526,
+      "learning_rate": 0.00017170731707317073,
+      "loss": 0.1805,
+      "step": 9810
+    },
+    {
+      "epoch": 0.7078898950178578,
+      "grad_norm": 0.1401710957288742,
+      "learning_rate": 0.000171704430653774,
+      "loss": 0.1604,
+      "step": 9811
+    },
+    {
+      "epoch": 0.7079620476929183,
+      "grad_norm": 0.1300428807735443,
+      "learning_rate": 0.00017170154423437725,
+      "loss": 0.1425,
+      "step": 9812
+    },
+    {
+      "epoch": 0.7080342003679786,
+      "grad_norm": 0.11680950224399567,
+      "learning_rate": 0.00017169865781498054,
+      "loss": 0.1626,
+      "step": 9813
+    },
+    {
+      "epoch": 0.708106353043039,
+      "grad_norm": 0.12791261076927185,
+      "learning_rate": 0.0001716957713955838,
+      "loss": 0.1756,
+      "step": 9814
+    },
+    {
+      "epoch": 0.7081785057180995,
+      "grad_norm": 0.1518712341785431,
+      "learning_rate": 0.00017169288497618704,
+      "loss": 0.1706,
+      "step": 9815
+    },
+    {
+      "epoch": 0.7082506583931599,
+      "grad_norm": 0.12080371379852295,
+      "learning_rate": 0.0001716899985567903,
+      "loss": 0.1604,
+      "step": 9816
+    },
+    {
+      "epoch": 0.7083228110682204,
+      "grad_norm": 0.13024398684501648,
+      "learning_rate": 0.00017168711213739357,
+      "loss": 0.1538,
+      "step": 9817
+    },
+    {
+      "epoch": 0.7083949637432808,
+      "grad_norm": 0.14617067575454712,
+      "learning_rate": 0.00017168422571799683,
+      "loss": 0.1482,
+      "step": 9818
+    },
+    {
+      "epoch": 0.7084671164183413,
+      "grad_norm": 0.11080318689346313,
+      "learning_rate": 0.0001716813392986001,
+      "loss": 0.1428,
+      "step": 9819
+    },
+    {
+      "epoch": 0.7085392690934016,
+      "grad_norm": 0.13892851769924164,
+      "learning_rate": 0.00017167845287920336,
+      "loss": 0.1448,
+      "step": 9820
+    },
+    {
+      "epoch": 0.708611421768462,
+      "grad_norm": 0.18297190964221954,
+      "learning_rate": 0.00017167556645980662,
+      "loss": 0.1453,
+      "step": 9821
+    },
+    {
+      "epoch": 0.7086835744435225,
+      "grad_norm": 0.10961407423019409,
+      "learning_rate": 0.00017167268004040988,
+      "loss": 0.1476,
+      "step": 9822
+    },
+    {
+      "epoch": 0.7087557271185829,
+      "grad_norm": 0.15383893251419067,
+      "learning_rate": 0.00017166979362101314,
+      "loss": 0.1899,
+      "step": 9823
+    },
+    {
+      "epoch": 0.7088278797936434,
+      "grad_norm": 0.11609875410795212,
+      "learning_rate": 0.0001716669072016164,
+      "loss": 0.124,
+      "step": 9824
+    },
+    {
+      "epoch": 0.7089000324687038,
+      "grad_norm": 0.1328735500574112,
+      "learning_rate": 0.00017166402078221967,
+      "loss": 0.1486,
+      "step": 9825
+    },
+    {
+      "epoch": 0.7089721851437643,
+      "grad_norm": 0.1313145011663437,
+      "learning_rate": 0.0001716611343628229,
+      "loss": 0.159,
+      "step": 9826
+    },
+    {
+      "epoch": 0.7090443378188246,
+      "grad_norm": 0.11288633942604065,
+      "learning_rate": 0.0001716582479434262,
+      "loss": 0.1553,
+      "step": 9827
+    },
+    {
+      "epoch": 0.709116490493885,
+      "grad_norm": 0.14243653416633606,
+      "learning_rate": 0.00017165536152402946,
+      "loss": 0.1693,
+      "step": 9828
+    },
+    {
+      "epoch": 0.7091886431689455,
+      "grad_norm": 0.12032989412546158,
+      "learning_rate": 0.00017165247510463272,
+      "loss": 0.1274,
+      "step": 9829
+    },
+    {
+      "epoch": 0.7092607958440059,
+      "grad_norm": 0.13511602580547333,
+      "learning_rate": 0.00017164958868523598,
+      "loss": 0.1661,
+      "step": 9830
+    },
+    {
+      "epoch": 0.7093329485190664,
+      "grad_norm": 0.1368311047554016,
+      "learning_rate": 0.00017164670226583922,
+      "loss": 0.1767,
+      "step": 9831
+    },
+    {
+      "epoch": 0.7094051011941268,
+      "grad_norm": 0.10124894976615906,
+      "learning_rate": 0.00017164381584644248,
+      "loss": 0.1765,
+      "step": 9832
+    },
+    {
+      "epoch": 0.7094772538691873,
+      "grad_norm": 0.11749887466430664,
+      "learning_rate": 0.00017164092942704575,
+      "loss": 0.1343,
+      "step": 9833
+    },
+    {
+      "epoch": 0.7095494065442476,
+      "grad_norm": 0.12168819457292557,
+      "learning_rate": 0.00017163804300764904,
+      "loss": 0.1573,
+      "step": 9834
+    },
+    {
+      "epoch": 0.709621559219308,
+      "grad_norm": 0.11898388713598251,
+      "learning_rate": 0.0001716351565882523,
+      "loss": 0.1558,
+      "step": 9835
+    },
+    {
+      "epoch": 0.7096937118943685,
+      "grad_norm": 0.1410788744688034,
+      "learning_rate": 0.00017163227016885553,
+      "loss": 0.1722,
+      "step": 9836
+    },
+    {
+      "epoch": 0.7097658645694289,
+      "grad_norm": 0.10520344972610474,
+      "learning_rate": 0.0001716293837494588,
+      "loss": 0.0989,
+      "step": 9837
+    },
+    {
+      "epoch": 0.7098380172444894,
+      "grad_norm": 0.1149938628077507,
+      "learning_rate": 0.00017162649733006206,
+      "loss": 0.1037,
+      "step": 9838
+    },
+    {
+      "epoch": 0.7099101699195498,
+      "grad_norm": 0.15595239400863647,
+      "learning_rate": 0.00017162361091066532,
+      "loss": 0.1389,
+      "step": 9839
+    },
+    {
+      "epoch": 0.7099823225946102,
+      "grad_norm": 0.12930183112621307,
+      "learning_rate": 0.0001716207244912686,
+      "loss": 0.0971,
+      "step": 9840
+    },
+    {
+      "epoch": 0.7100544752696706,
+      "grad_norm": 0.11761047691106796,
+      "learning_rate": 0.00017161783807187185,
+      "loss": 0.1513,
+      "step": 9841
+    },
+    {
+      "epoch": 0.710126627944731,
+      "grad_norm": 0.18295948207378387,
+      "learning_rate": 0.0001716149516524751,
+      "loss": 0.1786,
+      "step": 9842
+    },
+    {
+      "epoch": 0.7101987806197915,
+      "grad_norm": 0.15927353501319885,
+      "learning_rate": 0.00017161206523307838,
+      "loss": 0.1635,
+      "step": 9843
+    },
+    {
+      "epoch": 0.7102709332948519,
+      "grad_norm": 0.14733755588531494,
+      "learning_rate": 0.00017160917881368164,
+      "loss": 0.1586,
+      "step": 9844
+    },
+    {
+      "epoch": 0.7103430859699124,
+      "grad_norm": 0.11530192941427231,
+      "learning_rate": 0.0001716062923942849,
+      "loss": 0.1309,
+      "step": 9845
+    },
+    {
+      "epoch": 0.7104152386449728,
+      "grad_norm": 0.5042710304260254,
+      "learning_rate": 0.00017160340597488816,
+      "loss": 0.1704,
+      "step": 9846
+    },
+    {
+      "epoch": 0.7104873913200331,
+      "grad_norm": 0.13949906826019287,
+      "learning_rate": 0.0001716005195554914,
+      "loss": 0.1263,
+      "step": 9847
+    },
+    {
+      "epoch": 0.7105595439950936,
+      "grad_norm": 0.10125601291656494,
+      "learning_rate": 0.0001715976331360947,
+      "loss": 0.1121,
+      "step": 9848
+    },
+    {
+      "epoch": 0.710631696670154,
+      "grad_norm": 0.11898714303970337,
+      "learning_rate": 0.00017159474671669795,
+      "loss": 0.1455,
+      "step": 9849
+    },
+    {
+      "epoch": 0.7107038493452145,
+      "grad_norm": 0.13794389367103577,
+      "learning_rate": 0.00017159186029730122,
+      "loss": 0.1873,
+      "step": 9850
+    },
+    {
+      "epoch": 0.7107760020202749,
+      "grad_norm": 0.11388465762138367,
+      "learning_rate": 0.00017158897387790448,
+      "loss": 0.1974,
+      "step": 9851
+    },
+    {
+      "epoch": 0.7108481546953354,
+      "grad_norm": 0.169645294547081,
+      "learning_rate": 0.00017158608745850771,
+      "loss": 0.1842,
+      "step": 9852
+    },
+    {
+      "epoch": 0.7109203073703958,
+      "grad_norm": 0.11859288066625595,
+      "learning_rate": 0.00017158320103911098,
+      "loss": 0.1438,
+      "step": 9853
+    },
+    {
+      "epoch": 0.7109924600454561,
+      "grad_norm": 0.11447511613368988,
+      "learning_rate": 0.00017158031461971424,
+      "loss": 0.1258,
+      "step": 9854
+    },
+    {
+      "epoch": 0.7110646127205166,
+      "grad_norm": 0.1500857025384903,
+      "learning_rate": 0.00017157742820031753,
+      "loss": 0.1707,
+      "step": 9855
+    },
+    {
+      "epoch": 0.711136765395577,
+      "grad_norm": 0.1037248969078064,
+      "learning_rate": 0.0001715745417809208,
+      "loss": 0.1653,
+      "step": 9856
+    },
+    {
+      "epoch": 0.7112089180706375,
+      "grad_norm": 0.14994950592517853,
+      "learning_rate": 0.00017157165536152403,
+      "loss": 0.1192,
+      "step": 9857
+    },
+    {
+      "epoch": 0.7112810707456979,
+      "grad_norm": 0.11467479914426804,
+      "learning_rate": 0.0001715687689421273,
+      "loss": 0.1513,
+      "step": 9858
+    },
+    {
+      "epoch": 0.7113532234207584,
+      "grad_norm": 0.11612613499164581,
+      "learning_rate": 0.00017156588252273055,
+      "loss": 0.1455,
+      "step": 9859
+    },
+    {
+      "epoch": 0.7114253760958188,
+      "grad_norm": 0.14025089144706726,
+      "learning_rate": 0.00017156299610333382,
+      "loss": 0.1282,
+      "step": 9860
+    },
+    {
+      "epoch": 0.7114975287708791,
+      "grad_norm": 0.10642143338918686,
+      "learning_rate": 0.00017156010968393708,
+      "loss": 0.175,
+      "step": 9861
+    },
+    {
+      "epoch": 0.7115696814459396,
+      "grad_norm": 0.1332758218050003,
+      "learning_rate": 0.00017155722326454034,
+      "loss": 0.1791,
+      "step": 9862
+    },
+    {
+      "epoch": 0.711641834121,
+      "grad_norm": 0.1146327331662178,
+      "learning_rate": 0.0001715543368451436,
+      "loss": 0.1294,
+      "step": 9863
+    },
+    {
+      "epoch": 0.7117139867960605,
+      "grad_norm": 0.1492261290550232,
+      "learning_rate": 0.00017155145042574687,
+      "loss": 0.152,
+      "step": 9864
+    },
+    {
+      "epoch": 0.7117861394711209,
+      "grad_norm": 0.14946997165679932,
+      "learning_rate": 0.00017154856400635013,
+      "loss": 0.1694,
+      "step": 9865
+    },
+    {
+      "epoch": 0.7118582921461813,
+      "grad_norm": 0.14650723338127136,
+      "learning_rate": 0.0001715456775869534,
+      "loss": 0.1241,
+      "step": 9866
+    },
+    {
+      "epoch": 0.7119304448212418,
+      "grad_norm": 0.12067387253046036,
+      "learning_rate": 0.00017154279116755666,
+      "loss": 0.1527,
+      "step": 9867
+    },
+    {
+      "epoch": 0.7120025974963021,
+      "grad_norm": 0.12674711644649506,
+      "learning_rate": 0.0001715399047481599,
+      "loss": 0.1037,
+      "step": 9868
+    },
+    {
+      "epoch": 0.7120747501713626,
+      "grad_norm": 0.12338986992835999,
+      "learning_rate": 0.00017153701832876318,
+      "loss": 0.1904,
+      "step": 9869
+    },
+    {
+      "epoch": 0.712146902846423,
+      "grad_norm": 0.13921062648296356,
+      "learning_rate": 0.00017153413190936645,
+      "loss": 0.1651,
+      "step": 9870
+    },
+    {
+      "epoch": 0.7122190555214835,
+      "grad_norm": 0.1583152711391449,
+      "learning_rate": 0.0001715312454899697,
+      "loss": 0.1568,
+      "step": 9871
+    },
+    {
+      "epoch": 0.7122912081965439,
+      "grad_norm": 0.11268026381731033,
+      "learning_rate": 0.00017152835907057297,
+      "loss": 0.1469,
+      "step": 9872
+    },
+    {
+      "epoch": 0.7123633608716043,
+      "grad_norm": 0.14031006395816803,
+      "learning_rate": 0.00017152547265117624,
+      "loss": 0.207,
+      "step": 9873
+    },
+    {
+      "epoch": 0.7124355135466648,
+      "grad_norm": 0.13599331676959991,
+      "learning_rate": 0.00017152258623177947,
+      "loss": 0.1778,
+      "step": 9874
+    },
+    {
+      "epoch": 0.7125076662217251,
+      "grad_norm": 0.14373207092285156,
+      "learning_rate": 0.00017151969981238273,
+      "loss": 0.1517,
+      "step": 9875
+    },
+    {
+      "epoch": 0.7125798188967856,
+      "grad_norm": 0.14559577405452728,
+      "learning_rate": 0.00017151681339298602,
+      "loss": 0.1573,
+      "step": 9876
+    },
+    {
+      "epoch": 0.712651971571846,
+      "grad_norm": 0.13349908590316772,
+      "learning_rate": 0.0001715139269735893,
+      "loss": 0.1683,
+      "step": 9877
+    },
+    {
+      "epoch": 0.7127241242469065,
+      "grad_norm": 0.13450081646442413,
+      "learning_rate": 0.00017151104055419255,
+      "loss": 0.2079,
+      "step": 9878
+    },
+    {
+      "epoch": 0.7127962769219669,
+      "grad_norm": 0.1484353095293045,
+      "learning_rate": 0.00017150815413479579,
+      "loss": 0.1633,
+      "step": 9879
+    },
+    {
+      "epoch": 0.7128684295970273,
+      "grad_norm": 0.12456122785806656,
+      "learning_rate": 0.00017150526771539905,
+      "loss": 0.1789,
+      "step": 9880
+    },
+    {
+      "epoch": 0.7129405822720878,
+      "grad_norm": 0.11555665731430054,
+      "learning_rate": 0.0001715023812960023,
+      "loss": 0.1746,
+      "step": 9881
+    },
+    {
+      "epoch": 0.7130127349471481,
+      "grad_norm": 0.1268804520368576,
+      "learning_rate": 0.00017149949487660557,
+      "loss": 0.1736,
+      "step": 9882
+    },
+    {
+      "epoch": 0.7130848876222086,
+      "grad_norm": 0.10452228039503098,
+      "learning_rate": 0.00017149660845720886,
+      "loss": 0.1,
+      "step": 9883
+    },
+    {
+      "epoch": 0.713157040297269,
+      "grad_norm": 0.10974579304456711,
+      "learning_rate": 0.0001714937220378121,
+      "loss": 0.154,
+      "step": 9884
+    },
+    {
+      "epoch": 0.7132291929723295,
+      "grad_norm": 0.09949525445699692,
+      "learning_rate": 0.00017149083561841536,
+      "loss": 0.1743,
+      "step": 9885
+    },
+    {
+      "epoch": 0.7133013456473899,
+      "grad_norm": 0.13148930668830872,
+      "learning_rate": 0.00017148794919901863,
+      "loss": 0.1599,
+      "step": 9886
+    },
+    {
+      "epoch": 0.7133734983224503,
+      "grad_norm": 0.14373208582401276,
+      "learning_rate": 0.0001714850627796219,
+      "loss": 0.1374,
+      "step": 9887
+    },
+    {
+      "epoch": 0.7134456509975108,
+      "grad_norm": 0.11444774270057678,
+      "learning_rate": 0.00017148217636022515,
+      "loss": 0.1616,
+      "step": 9888
+    },
+    {
+      "epoch": 0.7135178036725711,
+      "grad_norm": 0.17785054445266724,
+      "learning_rate": 0.00017147928994082842,
+      "loss": 0.1814,
+      "step": 9889
+    },
+    {
+      "epoch": 0.7135899563476316,
+      "grad_norm": 0.11420541256666183,
+      "learning_rate": 0.00017147640352143168,
+      "loss": 0.14,
+      "step": 9890
+    },
+    {
+      "epoch": 0.713662109022692,
+      "grad_norm": 0.12373317778110504,
+      "learning_rate": 0.00017147351710203494,
+      "loss": 0.1239,
+      "step": 9891
+    },
+    {
+      "epoch": 0.7137342616977524,
+      "grad_norm": 0.1315503716468811,
+      "learning_rate": 0.0001714706306826382,
+      "loss": 0.1345,
+      "step": 9892
+    },
+    {
+      "epoch": 0.7138064143728129,
+      "grad_norm": 0.13126040995121002,
+      "learning_rate": 0.00017146774426324147,
+      "loss": 0.1735,
+      "step": 9893
+    },
+    {
+      "epoch": 0.7138785670478733,
+      "grad_norm": 0.11406406760215759,
+      "learning_rate": 0.00017146485784384473,
+      "loss": 0.1266,
+      "step": 9894
+    },
+    {
+      "epoch": 0.7139507197229338,
+      "grad_norm": 0.12042064964771271,
+      "learning_rate": 0.00017146197142444797,
+      "loss": 0.1657,
+      "step": 9895
+    },
+    {
+      "epoch": 0.7140228723979941,
+      "grad_norm": 0.13797765970230103,
+      "learning_rate": 0.00017145908500505123,
+      "loss": 0.1272,
+      "step": 9896
+    },
+    {
+      "epoch": 0.7140950250730546,
+      "grad_norm": 0.12352259457111359,
+      "learning_rate": 0.0001714561985856545,
+      "loss": 0.1665,
+      "step": 9897
+    },
+    {
+      "epoch": 0.714167177748115,
+      "grad_norm": 0.13089880347251892,
+      "learning_rate": 0.00017145331216625778,
+      "loss": 0.1745,
+      "step": 9898
+    },
+    {
+      "epoch": 0.7142393304231754,
+      "grad_norm": 0.13658390939235687,
+      "learning_rate": 0.00017145042574686104,
+      "loss": 0.1538,
+      "step": 9899
+    },
+    {
+      "epoch": 0.7143114830982359,
+      "grad_norm": 0.13549038767814636,
+      "learning_rate": 0.00017144753932746428,
+      "loss": 0.1206,
+      "step": 9900
+    },
+    {
+      "epoch": 0.7143836357732963,
+      "grad_norm": 0.13650107383728027,
+      "learning_rate": 0.00017144465290806754,
+      "loss": 0.1378,
+      "step": 9901
+    },
+    {
+      "epoch": 0.7144557884483568,
+      "grad_norm": 0.12035445123910904,
+      "learning_rate": 0.0001714417664886708,
+      "loss": 0.1522,
+      "step": 9902
+    },
+    {
+      "epoch": 0.7145279411234171,
+      "grad_norm": 0.15744692087173462,
+      "learning_rate": 0.00017143888006927407,
+      "loss": 0.1576,
+      "step": 9903
+    },
+    {
+      "epoch": 0.7146000937984776,
+      "grad_norm": 0.14461341500282288,
+      "learning_rate": 0.00017143599364987733,
+      "loss": 0.2089,
+      "step": 9904
+    },
+    {
+      "epoch": 0.714672246473538,
+      "grad_norm": 0.15803925693035126,
+      "learning_rate": 0.0001714331072304806,
+      "loss": 0.2166,
+      "step": 9905
+    },
+    {
+      "epoch": 0.7147443991485984,
+      "grad_norm": 0.12885110080242157,
+      "learning_rate": 0.00017143022081108386,
+      "loss": 0.1752,
+      "step": 9906
+    },
+    {
+      "epoch": 0.7148165518236589,
+      "grad_norm": 0.1436045616865158,
+      "learning_rate": 0.00017142733439168712,
+      "loss": 0.2196,
+      "step": 9907
+    },
+    {
+      "epoch": 0.7148887044987193,
+      "grad_norm": 0.10944678634405136,
+      "learning_rate": 0.00017142444797229038,
+      "loss": 0.1131,
+      "step": 9908
+    },
+    {
+      "epoch": 0.7149608571737797,
+      "grad_norm": 0.12081919610500336,
+      "learning_rate": 0.00017142156155289365,
+      "loss": 0.1596,
+      "step": 9909
+    },
+    {
+      "epoch": 0.7150330098488401,
+      "grad_norm": 0.10796798765659332,
+      "learning_rate": 0.0001714186751334969,
+      "loss": 0.1817,
+      "step": 9910
+    },
+    {
+      "epoch": 0.7151051625239006,
+      "grad_norm": 0.12811782956123352,
+      "learning_rate": 0.00017141578871410015,
+      "loss": 0.1859,
+      "step": 9911
+    },
+    {
+      "epoch": 0.715177315198961,
+      "grad_norm": 0.12989024817943573,
+      "learning_rate": 0.00017141290229470344,
+      "loss": 0.172,
+      "step": 9912
+    },
+    {
+      "epoch": 0.7152494678740214,
+      "grad_norm": 0.12131687253713608,
+      "learning_rate": 0.0001714100158753067,
+      "loss": 0.1459,
+      "step": 9913
+    },
+    {
+      "epoch": 0.7153216205490819,
+      "grad_norm": 0.12954425811767578,
+      "learning_rate": 0.00017140712945590996,
+      "loss": 0.1669,
+      "step": 9914
+    },
+    {
+      "epoch": 0.7153937732241423,
+      "grad_norm": 0.09515898674726486,
+      "learning_rate": 0.00017140424303651322,
+      "loss": 0.1473,
+      "step": 9915
+    },
+    {
+      "epoch": 0.7154659258992027,
+      "grad_norm": 0.14139306545257568,
+      "learning_rate": 0.00017140135661711646,
+      "loss": 0.1211,
+      "step": 9916
+    },
+    {
+      "epoch": 0.7155380785742631,
+      "grad_norm": 0.118651382625103,
+      "learning_rate": 0.00017139847019771972,
+      "loss": 0.1129,
+      "step": 9917
+    },
+    {
+      "epoch": 0.7156102312493235,
+      "grad_norm": 0.11456239968538284,
+      "learning_rate": 0.00017139558377832299,
+      "loss": 0.1543,
+      "step": 9918
+    },
+    {
+      "epoch": 0.715682383924384,
+      "grad_norm": 0.11746779084205627,
+      "learning_rate": 0.00017139269735892628,
+      "loss": 0.1007,
+      "step": 9919
+    },
+    {
+      "epoch": 0.7157545365994444,
+      "grad_norm": 0.12484867870807648,
+      "learning_rate": 0.00017138981093952954,
+      "loss": 0.1402,
+      "step": 9920
+    },
+    {
+      "epoch": 0.7158266892745049,
+      "grad_norm": 0.12649855017662048,
+      "learning_rate": 0.00017138692452013277,
+      "loss": 0.1495,
+      "step": 9921
+    },
+    {
+      "epoch": 0.7158988419495653,
+      "grad_norm": 0.12505637109279633,
+      "learning_rate": 0.00017138403810073604,
+      "loss": 0.1251,
+      "step": 9922
+    },
+    {
+      "epoch": 0.7159709946246257,
+      "grad_norm": 0.15270036458969116,
+      "learning_rate": 0.0001713811516813393,
+      "loss": 0.1469,
+      "step": 9923
+    },
+    {
+      "epoch": 0.7160431472996861,
+      "grad_norm": 0.14083799719810486,
+      "learning_rate": 0.00017137826526194256,
+      "loss": 0.1705,
+      "step": 9924
+    },
+    {
+      "epoch": 0.7161152999747465,
+      "grad_norm": 0.1292945295572281,
+      "learning_rate": 0.00017137537884254583,
+      "loss": 0.1094,
+      "step": 9925
+    },
+    {
+      "epoch": 0.716187452649807,
+      "grad_norm": 0.11223212629556656,
+      "learning_rate": 0.0001713724924231491,
+      "loss": 0.1134,
+      "step": 9926
+    },
+    {
+      "epoch": 0.7162596053248674,
+      "grad_norm": 0.11963280290365219,
+      "learning_rate": 0.00017136960600375235,
+      "loss": 0.1646,
+      "step": 9927
+    },
+    {
+      "epoch": 0.7163317579999279,
+      "grad_norm": 0.11987181752920151,
+      "learning_rate": 0.00017136671958435562,
+      "loss": 0.1701,
+      "step": 9928
+    },
+    {
+      "epoch": 0.7164039106749883,
+      "grad_norm": 0.1538725346326828,
+      "learning_rate": 0.00017136383316495888,
+      "loss": 0.1631,
+      "step": 9929
+    },
+    {
+      "epoch": 0.7164760633500487,
+      "grad_norm": 0.11311746388673782,
+      "learning_rate": 0.00017136094674556214,
+      "loss": 0.1808,
+      "step": 9930
+    },
+    {
+      "epoch": 0.7165482160251091,
+      "grad_norm": 0.16025066375732422,
+      "learning_rate": 0.0001713580603261654,
+      "loss": 0.1725,
+      "step": 9931
+    },
+    {
+      "epoch": 0.7166203687001695,
+      "grad_norm": 0.09958884865045547,
+      "learning_rate": 0.00017135517390676864,
+      "loss": 0.1903,
+      "step": 9932
+    },
+    {
+      "epoch": 0.71669252137523,
+      "grad_norm": 0.1386653184890747,
+      "learning_rate": 0.00017135228748737193,
+      "loss": 0.151,
+      "step": 9933
+    },
+    {
+      "epoch": 0.7167646740502904,
+      "grad_norm": 0.14528487622737885,
+      "learning_rate": 0.0001713494010679752,
+      "loss": 0.2039,
+      "step": 9934
+    },
+    {
+      "epoch": 0.7168368267253509,
+      "grad_norm": 0.11425530910491943,
+      "learning_rate": 0.00017134651464857846,
+      "loss": 0.1829,
+      "step": 9935
+    },
+    {
+      "epoch": 0.7169089794004113,
+      "grad_norm": 0.11770536750555038,
+      "learning_rate": 0.00017134362822918172,
+      "loss": 0.2005,
+      "step": 9936
+    },
+    {
+      "epoch": 0.7169811320754716,
+      "grad_norm": 0.11351843178272247,
+      "learning_rate": 0.00017134074180978495,
+      "loss": 0.1404,
+      "step": 9937
+    },
+    {
+      "epoch": 0.7170532847505321,
+      "grad_norm": 0.14741669595241547,
+      "learning_rate": 0.00017133785539038822,
+      "loss": 0.1254,
+      "step": 9938
+    },
+    {
+      "epoch": 0.7171254374255925,
+      "grad_norm": 0.10124711692333221,
+      "learning_rate": 0.00017133496897099148,
+      "loss": 0.1792,
+      "step": 9939
+    },
+    {
+      "epoch": 0.717197590100653,
+      "grad_norm": 0.12266574800014496,
+      "learning_rate": 0.00017133208255159477,
+      "loss": 0.1897,
+      "step": 9940
+    },
+    {
+      "epoch": 0.7172697427757134,
+      "grad_norm": 0.15079885721206665,
+      "learning_rate": 0.00017132919613219803,
+      "loss": 0.2157,
+      "step": 9941
+    },
+    {
+      "epoch": 0.7173418954507739,
+      "grad_norm": 0.14475995302200317,
+      "learning_rate": 0.00017132630971280127,
+      "loss": 0.1779,
+      "step": 9942
+    },
+    {
+      "epoch": 0.7174140481258343,
+      "grad_norm": 0.1366308331489563,
+      "learning_rate": 0.00017132342329340453,
+      "loss": 0.1503,
+      "step": 9943
+    },
+    {
+      "epoch": 0.7174862008008946,
+      "grad_norm": 0.13412003219127655,
+      "learning_rate": 0.0001713205368740078,
+      "loss": 0.1522,
+      "step": 9944
+    },
+    {
+      "epoch": 0.7175583534759551,
+      "grad_norm": 0.15304332971572876,
+      "learning_rate": 0.00017131765045461106,
+      "loss": 0.1418,
+      "step": 9945
+    },
+    {
+      "epoch": 0.7176305061510155,
+      "grad_norm": 0.12837323546409607,
+      "learning_rate": 0.00017131476403521432,
+      "loss": 0.155,
+      "step": 9946
+    },
+    {
+      "epoch": 0.717702658826076,
+      "grad_norm": 0.14579154551029205,
+      "learning_rate": 0.00017131187761581758,
+      "loss": 0.152,
+      "step": 9947
+    },
+    {
+      "epoch": 0.7177748115011364,
+      "grad_norm": 0.1218762919306755,
+      "learning_rate": 0.00017130899119642085,
+      "loss": 0.1179,
+      "step": 9948
+    },
+    {
+      "epoch": 0.7178469641761969,
+      "grad_norm": 0.11870580911636353,
+      "learning_rate": 0.0001713061047770241,
+      "loss": 0.1741,
+      "step": 9949
+    },
+    {
+      "epoch": 0.7179191168512573,
+      "grad_norm": 0.1070319414138794,
+      "learning_rate": 0.00017130321835762737,
+      "loss": 0.1778,
+      "step": 9950
+    },
+    {
+      "epoch": 0.7179912695263176,
+      "grad_norm": 0.11091592162847519,
+      "learning_rate": 0.00017130033193823064,
+      "loss": 0.1717,
+      "step": 9951
+    },
+    {
+      "epoch": 0.7180634222013781,
+      "grad_norm": 0.13534674048423767,
+      "learning_rate": 0.0001712974455188339,
+      "loss": 0.1926,
+      "step": 9952
+    },
+    {
+      "epoch": 0.7181355748764385,
+      "grad_norm": 0.10683610290288925,
+      "learning_rate": 0.00017129455909943713,
+      "loss": 0.1329,
+      "step": 9953
+    },
+    {
+      "epoch": 0.718207727551499,
+      "grad_norm": 0.13404878973960876,
+      "learning_rate": 0.00017129167268004042,
+      "loss": 0.1191,
+      "step": 9954
+    },
+    {
+      "epoch": 0.7182798802265594,
+      "grad_norm": 0.1568976789712906,
+      "learning_rate": 0.0001712887862606437,
+      "loss": 0.1917,
+      "step": 9955
+    },
+    {
+      "epoch": 0.7183520329016199,
+      "grad_norm": 0.09924539923667908,
+      "learning_rate": 0.00017128589984124695,
+      "loss": 0.129,
+      "step": 9956
+    },
+    {
+      "epoch": 0.7184241855766803,
+      "grad_norm": 0.14538629353046417,
+      "learning_rate": 0.0001712830134218502,
+      "loss": 0.1502,
+      "step": 9957
+    },
+    {
+      "epoch": 0.7184963382517406,
+      "grad_norm": 0.11760757118463516,
+      "learning_rate": 0.00017128012700245345,
+      "loss": 0.1622,
+      "step": 9958
+    },
+    {
+      "epoch": 0.7185684909268011,
+      "grad_norm": 0.12738563120365143,
+      "learning_rate": 0.0001712772405830567,
+      "loss": 0.1684,
+      "step": 9959
+    },
+    {
+      "epoch": 0.7186406436018615,
+      "grad_norm": 0.1411866843700409,
+      "learning_rate": 0.00017127435416365997,
+      "loss": 0.1755,
+      "step": 9960
+    },
+    {
+      "epoch": 0.718712796276922,
+      "grad_norm": 0.1326630711555481,
+      "learning_rate": 0.00017127146774426326,
+      "loss": 0.1994,
+      "step": 9961
+    },
+    {
+      "epoch": 0.7187849489519824,
+      "grad_norm": 0.12362193316221237,
+      "learning_rate": 0.00017126858132486653,
+      "loss": 0.1716,
+      "step": 9962
+    },
+    {
+      "epoch": 0.7188571016270429,
+      "grad_norm": 0.11543554812669754,
+      "learning_rate": 0.00017126569490546976,
+      "loss": 0.2053,
+      "step": 9963
+    },
+    {
+      "epoch": 0.7189292543021033,
+      "grad_norm": 0.18158745765686035,
+      "learning_rate": 0.00017126280848607303,
+      "loss": 0.1318,
+      "step": 9964
+    },
+    {
+      "epoch": 0.7190014069771636,
+      "grad_norm": 0.17341430485248566,
+      "learning_rate": 0.0001712599220666763,
+      "loss": 0.1776,
+      "step": 9965
+    },
+    {
+      "epoch": 0.7190735596522241,
+      "grad_norm": 0.12372343242168427,
+      "learning_rate": 0.00017125703564727955,
+      "loss": 0.0971,
+      "step": 9966
+    },
+    {
+      "epoch": 0.7191457123272845,
+      "grad_norm": 0.14443086087703705,
+      "learning_rate": 0.00017125414922788281,
+      "loss": 0.1811,
+      "step": 9967
+    },
+    {
+      "epoch": 0.719217865002345,
+      "grad_norm": 0.12786145508289337,
+      "learning_rate": 0.00017125126280848608,
+      "loss": 0.1371,
+      "step": 9968
+    },
+    {
+      "epoch": 0.7192900176774054,
+      "grad_norm": 0.12725846469402313,
+      "learning_rate": 0.00017124837638908934,
+      "loss": 0.1724,
+      "step": 9969
+    },
+    {
+      "epoch": 0.7193621703524659,
+      "grad_norm": 0.14343729615211487,
+      "learning_rate": 0.0001712454899696926,
+      "loss": 0.1309,
+      "step": 9970
+    },
+    {
+      "epoch": 0.7194343230275262,
+      "grad_norm": 0.13157957792282104,
+      "learning_rate": 0.00017124260355029587,
+      "loss": 0.149,
+      "step": 9971
+    },
+    {
+      "epoch": 0.7195064757025866,
+      "grad_norm": 0.12144739925861359,
+      "learning_rate": 0.00017123971713089913,
+      "loss": 0.0983,
+      "step": 9972
+    },
+    {
+      "epoch": 0.7195786283776471,
+      "grad_norm": 0.11445478349924088,
+      "learning_rate": 0.0001712368307115024,
+      "loss": 0.1063,
+      "step": 9973
+    },
+    {
+      "epoch": 0.7196507810527075,
+      "grad_norm": 0.10009689629077911,
+      "learning_rate": 0.00017123394429210563,
+      "loss": 0.1739,
+      "step": 9974
+    },
+    {
+      "epoch": 0.719722933727768,
+      "grad_norm": 0.16738224029541016,
+      "learning_rate": 0.00017123105787270892,
+      "loss": 0.1399,
+      "step": 9975
+    },
+    {
+      "epoch": 0.7197950864028284,
+      "grad_norm": 0.11219814419746399,
+      "learning_rate": 0.00017122817145331218,
+      "loss": 0.1006,
+      "step": 9976
+    },
+    {
+      "epoch": 0.7198672390778889,
+      "grad_norm": 0.10597624629735947,
+      "learning_rate": 0.00017122528503391544,
+      "loss": 0.1562,
+      "step": 9977
+    },
+    {
+      "epoch": 0.7199393917529492,
+      "grad_norm": 0.1117899939417839,
+      "learning_rate": 0.0001712223986145187,
+      "loss": 0.1558,
+      "step": 9978
+    },
+    {
+      "epoch": 0.7200115444280096,
+      "grad_norm": 0.14814738929271698,
+      "learning_rate": 0.00017121951219512194,
+      "loss": 0.2113,
+      "step": 9979
+    },
+    {
+      "epoch": 0.7200836971030701,
+      "grad_norm": 0.1278420090675354,
+      "learning_rate": 0.0001712166257757252,
+      "loss": 0.1927,
+      "step": 9980
+    },
+    {
+      "epoch": 0.7201558497781305,
+      "grad_norm": 0.12248340994119644,
+      "learning_rate": 0.00017121373935632847,
+      "loss": 0.1348,
+      "step": 9981
+    },
+    {
+      "epoch": 0.720228002453191,
+      "grad_norm": 0.1193833276629448,
+      "learning_rate": 0.00017121085293693176,
+      "loss": 0.1439,
+      "step": 9982
+    },
+    {
+      "epoch": 0.7203001551282514,
+      "grad_norm": 0.1236564889550209,
+      "learning_rate": 0.00017120796651753502,
+      "loss": 0.1097,
+      "step": 9983
+    },
+    {
+      "epoch": 0.7203723078033119,
+      "grad_norm": 0.12870290875434875,
+      "learning_rate": 0.00017120508009813826,
+      "loss": 0.1418,
+      "step": 9984
+    },
+    {
+      "epoch": 0.7204444604783722,
+      "grad_norm": 0.13253255188465118,
+      "learning_rate": 0.00017120219367874152,
+      "loss": 0.1525,
+      "step": 9985
+    },
+    {
+      "epoch": 0.7205166131534326,
+      "grad_norm": 0.1435869038105011,
+      "learning_rate": 0.00017119930725934478,
+      "loss": 0.1625,
+      "step": 9986
+    },
+    {
+      "epoch": 0.7205887658284931,
+      "grad_norm": 0.11423452943563461,
+      "learning_rate": 0.00017119642083994805,
+      "loss": 0.1222,
+      "step": 9987
+    },
+    {
+      "epoch": 0.7206609185035535,
+      "grad_norm": 0.11768162250518799,
+      "learning_rate": 0.0001711935344205513,
+      "loss": 0.1332,
+      "step": 9988
+    },
+    {
+      "epoch": 0.720733071178614,
+      "grad_norm": 0.12453066557645798,
+      "learning_rate": 0.00017119064800115457,
+      "loss": 0.1189,
+      "step": 9989
+    },
+    {
+      "epoch": 0.7208052238536744,
+      "grad_norm": 0.14446821808815002,
+      "learning_rate": 0.00017118776158175783,
+      "loss": 0.1886,
+      "step": 9990
+    },
+    {
+      "epoch": 0.7208773765287348,
+      "grad_norm": 0.1353866159915924,
+      "learning_rate": 0.0001711848751623611,
+      "loss": 0.17,
+      "step": 9991
+    },
+    {
+      "epoch": 0.7209495292037952,
+      "grad_norm": 0.12891019880771637,
+      "learning_rate": 0.00017118198874296436,
+      "loss": 0.1298,
+      "step": 9992
+    },
+    {
+      "epoch": 0.7210216818788556,
+      "grad_norm": 0.13144133985042572,
+      "learning_rate": 0.00017117910232356762,
+      "loss": 0.1379,
+      "step": 9993
+    },
+    {
+      "epoch": 0.7210938345539161,
+      "grad_norm": 0.14286787807941437,
+      "learning_rate": 0.0001711762159041709,
+      "loss": 0.1333,
+      "step": 9994
+    },
+    {
+      "epoch": 0.7211659872289765,
+      "grad_norm": 0.1285174936056137,
+      "learning_rate": 0.00017117332948477415,
+      "loss": 0.1381,
+      "step": 9995
+    },
+    {
+      "epoch": 0.721238139904037,
+      "grad_norm": 0.12918663024902344,
+      "learning_rate": 0.0001711704430653774,
+      "loss": 0.1332,
+      "step": 9996
+    },
+    {
+      "epoch": 0.7213102925790974,
+      "grad_norm": 0.13201268017292023,
+      "learning_rate": 0.00017116755664598068,
+      "loss": 0.1679,
+      "step": 9997
+    },
+    {
+      "epoch": 0.7213824452541578,
+      "grad_norm": 0.12824837863445282,
+      "learning_rate": 0.00017116467022658394,
+      "loss": 0.1709,
+      "step": 9998
+    },
+    {
+      "epoch": 0.7214545979292182,
+      "grad_norm": 0.12619931995868683,
+      "learning_rate": 0.0001711617838071872,
+      "loss": 0.1428,
+      "step": 9999
+    },
+    {
+      "epoch": 0.7215267506042786,
+      "grad_norm": 0.14253847301006317,
+      "learning_rate": 0.00017115889738779046,
+      "loss": 0.1476,
+      "step": 10000
+    },
+    {
+      "epoch": 0.7215989032793391,
+      "grad_norm": 0.20578481256961823,
+      "learning_rate": 0.0001711560109683937,
+      "loss": 0.1211,
+      "step": 10001
+    },
+    {
+      "epoch": 0.7216710559543995,
+      "grad_norm": 0.15455134212970734,
+      "learning_rate": 0.00017115312454899696,
+      "loss": 0.1585,
+      "step": 10002
+    },
+    {
+      "epoch": 0.72174320862946,
+      "grad_norm": 0.12016787379980087,
+      "learning_rate": 0.00017115023812960025,
+      "loss": 0.1748,
+      "step": 10003
+    },
+    {
+      "epoch": 0.7218153613045204,
+      "grad_norm": 0.12311887741088867,
+      "learning_rate": 0.00017114735171020352,
+      "loss": 0.1412,
+      "step": 10004
+    },
+    {
+      "epoch": 0.7218875139795808,
+      "grad_norm": 0.15775811672210693,
+      "learning_rate": 0.00017114446529080678,
+      "loss": 0.1747,
+      "step": 10005
+    },
+    {
+      "epoch": 0.7219596666546412,
+      "grad_norm": 0.12297017872333527,
+      "learning_rate": 0.00017114157887141001,
+      "loss": 0.1616,
+      "step": 10006
+    },
+    {
+      "epoch": 0.7220318193297016,
+      "grad_norm": 0.10638998448848724,
+      "learning_rate": 0.00017113869245201328,
+      "loss": 0.1458,
+      "step": 10007
+    },
+    {
+      "epoch": 0.7221039720047621,
+      "grad_norm": 0.13442695140838623,
+      "learning_rate": 0.00017113580603261654,
+      "loss": 0.155,
+      "step": 10008
+    },
+    {
+      "epoch": 0.7221761246798225,
+      "grad_norm": 0.11203104257583618,
+      "learning_rate": 0.0001711329196132198,
+      "loss": 0.1521,
+      "step": 10009
+    },
+    {
+      "epoch": 0.722248277354883,
+      "grad_norm": 0.12732189893722534,
+      "learning_rate": 0.0001711300331938231,
+      "loss": 0.1849,
+      "step": 10010
+    },
+    {
+      "epoch": 0.7223204300299434,
+      "grad_norm": 0.1229824498295784,
+      "learning_rate": 0.00017112714677442633,
+      "loss": 0.1829,
+      "step": 10011
+    },
+    {
+      "epoch": 0.7223925827050038,
+      "grad_norm": 0.1607559472322464,
+      "learning_rate": 0.0001711242603550296,
+      "loss": 0.1613,
+      "step": 10012
+    },
+    {
+      "epoch": 0.7224647353800642,
+      "grad_norm": 0.1528715193271637,
+      "learning_rate": 0.00017112137393563286,
+      "loss": 0.2003,
+      "step": 10013
+    },
+    {
+      "epoch": 0.7225368880551246,
+      "grad_norm": 0.12777936458587646,
+      "learning_rate": 0.00017111848751623612,
+      "loss": 0.1339,
+      "step": 10014
+    },
+    {
+      "epoch": 0.7226090407301851,
+      "grad_norm": 0.12514327466487885,
+      "learning_rate": 0.00017111560109683938,
+      "loss": 0.1781,
+      "step": 10015
+    },
+    {
+      "epoch": 0.7226811934052455,
+      "grad_norm": 0.12802931666374207,
+      "learning_rate": 0.00017111271467744264,
+      "loss": 0.1376,
+      "step": 10016
+    },
+    {
+      "epoch": 0.722753346080306,
+      "grad_norm": 0.12756921350955963,
+      "learning_rate": 0.0001711098282580459,
+      "loss": 0.1606,
+      "step": 10017
+    },
+    {
+      "epoch": 0.7228254987553664,
+      "grad_norm": 0.129941925406456,
+      "learning_rate": 0.00017110694183864917,
+      "loss": 0.1805,
+      "step": 10018
+    },
+    {
+      "epoch": 0.7228976514304268,
+      "grad_norm": 0.11695303022861481,
+      "learning_rate": 0.00017110405541925243,
+      "loss": 0.1748,
+      "step": 10019
+    },
+    {
+      "epoch": 0.7229698041054872,
+      "grad_norm": 0.13325800001621246,
+      "learning_rate": 0.0001711011689998557,
+      "loss": 0.147,
+      "step": 10020
+    },
+    {
+      "epoch": 0.7230419567805476,
+      "grad_norm": 0.14300288259983063,
+      "learning_rate": 0.00017109828258045896,
+      "loss": 0.1306,
+      "step": 10021
+    },
+    {
+      "epoch": 0.7231141094556081,
+      "grad_norm": 0.1265317052602768,
+      "learning_rate": 0.0001710953961610622,
+      "loss": 0.1317,
+      "step": 10022
+    },
+    {
+      "epoch": 0.7231862621306685,
+      "grad_norm": 0.13566988706588745,
+      "learning_rate": 0.00017109250974166546,
+      "loss": 0.175,
+      "step": 10023
+    },
+    {
+      "epoch": 0.7232584148057289,
+      "grad_norm": 0.1730663776397705,
+      "learning_rate": 0.00017108962332226875,
+      "loss": 0.1616,
+      "step": 10024
+    },
+    {
+      "epoch": 0.7233305674807894,
+      "grad_norm": 0.13771170377731323,
+      "learning_rate": 0.000171086736902872,
+      "loss": 0.1969,
+      "step": 10025
+    },
+    {
+      "epoch": 0.7234027201558498,
+      "grad_norm": 0.12104084342718124,
+      "learning_rate": 0.00017108385048347527,
+      "loss": 0.1646,
+      "step": 10026
+    },
+    {
+      "epoch": 0.7234748728309102,
+      "grad_norm": 0.13200148940086365,
+      "learning_rate": 0.0001710809640640785,
+      "loss": 0.2274,
+      "step": 10027
+    },
+    {
+      "epoch": 0.7235470255059706,
+      "grad_norm": 0.11547756940126419,
+      "learning_rate": 0.00017107807764468177,
+      "loss": 0.1619,
+      "step": 10028
+    },
+    {
+      "epoch": 0.723619178181031,
+      "grad_norm": 0.12168179452419281,
+      "learning_rate": 0.00017107519122528503,
+      "loss": 0.1962,
+      "step": 10029
+    },
+    {
+      "epoch": 0.7236913308560915,
+      "grad_norm": 0.13543535768985748,
+      "learning_rate": 0.0001710723048058883,
+      "loss": 0.1609,
+      "step": 10030
+    },
+    {
+      "epoch": 0.7237634835311519,
+      "grad_norm": 0.13941948115825653,
+      "learning_rate": 0.0001710694183864916,
+      "loss": 0.1634,
+      "step": 10031
+    },
+    {
+      "epoch": 0.7238356362062124,
+      "grad_norm": 0.11474108695983887,
+      "learning_rate": 0.00017106653196709482,
+      "loss": 0.1859,
+      "step": 10032
+    },
+    {
+      "epoch": 0.7239077888812727,
+      "grad_norm": 0.12846776843070984,
+      "learning_rate": 0.00017106364554769809,
+      "loss": 0.149,
+      "step": 10033
+    },
+    {
+      "epoch": 0.7239799415563332,
+      "grad_norm": 0.10760863125324249,
+      "learning_rate": 0.00017106075912830135,
+      "loss": 0.1786,
+      "step": 10034
+    },
+    {
+      "epoch": 0.7240520942313936,
+      "grad_norm": 0.12543274462223053,
+      "learning_rate": 0.0001710578727089046,
+      "loss": 0.1386,
+      "step": 10035
+    },
+    {
+      "epoch": 0.724124246906454,
+      "grad_norm": 0.11868203431367874,
+      "learning_rate": 0.00017105498628950788,
+      "loss": 0.1529,
+      "step": 10036
+    },
+    {
+      "epoch": 0.7241963995815145,
+      "grad_norm": 0.11668435484170914,
+      "learning_rate": 0.00017105209987011114,
+      "loss": 0.1325,
+      "step": 10037
+    },
+    {
+      "epoch": 0.7242685522565749,
+      "grad_norm": 0.15729379653930664,
+      "learning_rate": 0.0001710492134507144,
+      "loss": 0.1503,
+      "step": 10038
+    },
+    {
+      "epoch": 0.7243407049316354,
+      "grad_norm": 0.1290607750415802,
+      "learning_rate": 0.00017104632703131766,
+      "loss": 0.1718,
+      "step": 10039
+    },
+    {
+      "epoch": 0.7244128576066957,
+      "grad_norm": 0.12674786150455475,
+      "learning_rate": 0.00017104344061192093,
+      "loss": 0.0857,
+      "step": 10040
+    },
+    {
+      "epoch": 0.7244850102817562,
+      "grad_norm": 0.11376924812793732,
+      "learning_rate": 0.0001710405541925242,
+      "loss": 0.142,
+      "step": 10041
+    },
+    {
+      "epoch": 0.7245571629568166,
+      "grad_norm": 0.11870646476745605,
+      "learning_rate": 0.00017103766777312745,
+      "loss": 0.1461,
+      "step": 10042
+    },
+    {
+      "epoch": 0.724629315631877,
+      "grad_norm": 0.13887138664722443,
+      "learning_rate": 0.0001710347813537307,
+      "loss": 0.129,
+      "step": 10043
+    },
+    {
+      "epoch": 0.7247014683069375,
+      "grad_norm": 0.11356978863477707,
+      "learning_rate": 0.00017103189493433395,
+      "loss": 0.097,
+      "step": 10044
+    },
+    {
+      "epoch": 0.7247736209819979,
+      "grad_norm": 0.1348128318786621,
+      "learning_rate": 0.00017102900851493724,
+      "loss": 0.1446,
+      "step": 10045
+    },
+    {
+      "epoch": 0.7248457736570584,
+      "grad_norm": 0.13882333040237427,
+      "learning_rate": 0.0001710261220955405,
+      "loss": 0.1242,
+      "step": 10046
+    },
+    {
+      "epoch": 0.7249179263321187,
+      "grad_norm": 0.13452178239822388,
+      "learning_rate": 0.00017102323567614377,
+      "loss": 0.1336,
+      "step": 10047
+    },
+    {
+      "epoch": 0.7249900790071792,
+      "grad_norm": 0.10064591467380524,
+      "learning_rate": 0.000171020349256747,
+      "loss": 0.0977,
+      "step": 10048
+    },
+    {
+      "epoch": 0.7250622316822396,
+      "grad_norm": 0.14382019639015198,
+      "learning_rate": 0.00017101746283735027,
+      "loss": 0.1991,
+      "step": 10049
+    },
+    {
+      "epoch": 0.7251343843573,
+      "grad_norm": 0.11695842444896698,
+      "learning_rate": 0.00017101457641795353,
+      "loss": 0.1164,
+      "step": 10050
+    },
+    {
+      "epoch": 0.7252065370323605,
+      "grad_norm": 0.1313917487859726,
+      "learning_rate": 0.0001710116899985568,
+      "loss": 0.1575,
+      "step": 10051
+    },
+    {
+      "epoch": 0.7252786897074209,
+      "grad_norm": 0.16316965222358704,
+      "learning_rate": 0.00017100880357916008,
+      "loss": 0.1643,
+      "step": 10052
+    },
+    {
+      "epoch": 0.7253508423824814,
+      "grad_norm": 0.1231640875339508,
+      "learning_rate": 0.00017100591715976332,
+      "loss": 0.1688,
+      "step": 10053
+    },
+    {
+      "epoch": 0.7254229950575417,
+      "grad_norm": 0.1439533829689026,
+      "learning_rate": 0.00017100303074036658,
+      "loss": 0.1871,
+      "step": 10054
+    },
+    {
+      "epoch": 0.7254951477326022,
+      "grad_norm": 0.12452032417058945,
+      "learning_rate": 0.00017100014432096984,
+      "loss": 0.1418,
+      "step": 10055
+    },
+    {
+      "epoch": 0.7255673004076626,
+      "grad_norm": 0.12324023991823196,
+      "learning_rate": 0.0001709972579015731,
+      "loss": 0.1924,
+      "step": 10056
+    },
+    {
+      "epoch": 0.725639453082723,
+      "grad_norm": 0.13798697292804718,
+      "learning_rate": 0.00017099437148217637,
+      "loss": 0.1033,
+      "step": 10057
+    },
+    {
+      "epoch": 0.7257116057577835,
+      "grad_norm": 0.13696545362472534,
+      "learning_rate": 0.00017099148506277963,
+      "loss": 0.1339,
+      "step": 10058
+    },
+    {
+      "epoch": 0.7257837584328439,
+      "grad_norm": 0.1420350968837738,
+      "learning_rate": 0.0001709885986433829,
+      "loss": 0.2039,
+      "step": 10059
+    },
+    {
+      "epoch": 0.7258559111079044,
+      "grad_norm": 0.1186450943350792,
+      "learning_rate": 0.00017098571222398616,
+      "loss": 0.1643,
+      "step": 10060
+    },
+    {
+      "epoch": 0.7259280637829647,
+      "grad_norm": 0.1391008347272873,
+      "learning_rate": 0.00017098282580458942,
+      "loss": 0.1168,
+      "step": 10061
+    },
+    {
+      "epoch": 0.7260002164580251,
+      "grad_norm": 0.14331486821174622,
+      "learning_rate": 0.00017097993938519268,
+      "loss": 0.212,
+      "step": 10062
+    },
+    {
+      "epoch": 0.7260723691330856,
+      "grad_norm": 0.0992756262421608,
+      "learning_rate": 0.00017097705296579595,
+      "loss": 0.1528,
+      "step": 10063
+    },
+    {
+      "epoch": 0.726144521808146,
+      "grad_norm": 0.14541590213775635,
+      "learning_rate": 0.00017097416654639918,
+      "loss": 0.2204,
+      "step": 10064
+    },
+    {
+      "epoch": 0.7262166744832065,
+      "grad_norm": 0.13561774790287018,
+      "learning_rate": 0.00017097128012700245,
+      "loss": 0.1278,
+      "step": 10065
+    },
+    {
+      "epoch": 0.7262888271582669,
+      "grad_norm": 0.12054046988487244,
+      "learning_rate": 0.00017096839370760574,
+      "loss": 0.1575,
+      "step": 10066
+    },
+    {
+      "epoch": 0.7263609798333274,
+      "grad_norm": 0.12773770093917847,
+      "learning_rate": 0.000170965507288209,
+      "loss": 0.1479,
+      "step": 10067
+    },
+    {
+      "epoch": 0.7264331325083877,
+      "grad_norm": 0.12233958393335342,
+      "learning_rate": 0.00017096262086881226,
+      "loss": 0.1576,
+      "step": 10068
+    },
+    {
+      "epoch": 0.7265052851834481,
+      "grad_norm": 0.12692715227603912,
+      "learning_rate": 0.0001709597344494155,
+      "loss": 0.1406,
+      "step": 10069
+    },
+    {
+      "epoch": 0.7265774378585086,
+      "grad_norm": 0.14090755581855774,
+      "learning_rate": 0.00017095684803001876,
+      "loss": 0.1059,
+      "step": 10070
+    },
+    {
+      "epoch": 0.726649590533569,
+      "grad_norm": 0.13065628707408905,
+      "learning_rate": 0.00017095396161062202,
+      "loss": 0.1638,
+      "step": 10071
+    },
+    {
+      "epoch": 0.7267217432086295,
+      "grad_norm": 0.13230380415916443,
+      "learning_rate": 0.00017095107519122529,
+      "loss": 0.1357,
+      "step": 10072
+    },
+    {
+      "epoch": 0.7267938958836899,
+      "grad_norm": 0.14219319820404053,
+      "learning_rate": 0.00017094818877182858,
+      "loss": 0.2059,
+      "step": 10073
+    },
+    {
+      "epoch": 0.7268660485587504,
+      "grad_norm": 0.12111040204763412,
+      "learning_rate": 0.0001709453023524318,
+      "loss": 0.1343,
+      "step": 10074
+    },
+    {
+      "epoch": 0.7269382012338107,
+      "grad_norm": 0.11151017248630524,
+      "learning_rate": 0.00017094241593303507,
+      "loss": 0.1541,
+      "step": 10075
+    },
+    {
+      "epoch": 0.7270103539088711,
+      "grad_norm": 0.13076148927211761,
+      "learning_rate": 0.00017093952951363834,
+      "loss": 0.1342,
+      "step": 10076
+    },
+    {
+      "epoch": 0.7270825065839316,
+      "grad_norm": 0.15508994460105896,
+      "learning_rate": 0.0001709366430942416,
+      "loss": 0.1355,
+      "step": 10077
+    },
+    {
+      "epoch": 0.727154659258992,
+      "grad_norm": 0.13253220915794373,
+      "learning_rate": 0.00017093375667484486,
+      "loss": 0.1287,
+      "step": 10078
+    },
+    {
+      "epoch": 0.7272268119340525,
+      "grad_norm": 0.1186075359582901,
+      "learning_rate": 0.00017093087025544813,
+      "loss": 0.1317,
+      "step": 10079
+    },
+    {
+      "epoch": 0.7272989646091129,
+      "grad_norm": 0.15678545832633972,
+      "learning_rate": 0.0001709279838360514,
+      "loss": 0.1704,
+      "step": 10080
+    },
+    {
+      "epoch": 0.7273711172841734,
+      "grad_norm": 0.09805598109960556,
+      "learning_rate": 0.00017092509741665465,
+      "loss": 0.1376,
+      "step": 10081
+    },
+    {
+      "epoch": 0.7274432699592337,
+      "grad_norm": 0.13805170357227325,
+      "learning_rate": 0.00017092221099725792,
+      "loss": 0.204,
+      "step": 10082
+    },
+    {
+      "epoch": 0.7275154226342941,
+      "grad_norm": 0.12763634324073792,
+      "learning_rate": 0.00017091932457786118,
+      "loss": 0.1744,
+      "step": 10083
+    },
+    {
+      "epoch": 0.7275875753093546,
+      "grad_norm": 0.1276688426733017,
+      "learning_rate": 0.00017091643815846444,
+      "loss": 0.1901,
+      "step": 10084
+    },
+    {
+      "epoch": 0.727659727984415,
+      "grad_norm": 0.14280658960342407,
+      "learning_rate": 0.00017091355173906768,
+      "loss": 0.144,
+      "step": 10085
+    },
+    {
+      "epoch": 0.7277318806594755,
+      "grad_norm": 0.11498956382274628,
+      "learning_rate": 0.00017091066531967094,
+      "loss": 0.1353,
+      "step": 10086
+    },
+    {
+      "epoch": 0.7278040333345359,
+      "grad_norm": 0.10629608482122421,
+      "learning_rate": 0.00017090777890027423,
+      "loss": 0.145,
+      "step": 10087
+    },
+    {
+      "epoch": 0.7278761860095964,
+      "grad_norm": 0.1334647238254547,
+      "learning_rate": 0.0001709048924808775,
+      "loss": 0.1681,
+      "step": 10088
+    },
+    {
+      "epoch": 0.7279483386846567,
+      "grad_norm": 0.12539075314998627,
+      "learning_rate": 0.00017090200606148076,
+      "loss": 0.1383,
+      "step": 10089
+    },
+    {
+      "epoch": 0.7280204913597171,
+      "grad_norm": 0.13498573005199432,
+      "learning_rate": 0.000170899119642084,
+      "loss": 0.1435,
+      "step": 10090
+    },
+    {
+      "epoch": 0.7280926440347776,
+      "grad_norm": 0.11439192295074463,
+      "learning_rate": 0.00017089623322268725,
+      "loss": 0.1539,
+      "step": 10091
+    },
+    {
+      "epoch": 0.728164796709838,
+      "grad_norm": 0.10842323303222656,
+      "learning_rate": 0.00017089334680329052,
+      "loss": 0.1098,
+      "step": 10092
+    },
+    {
+      "epoch": 0.7282369493848985,
+      "grad_norm": 0.1083604246377945,
+      "learning_rate": 0.00017089046038389378,
+      "loss": 0.1522,
+      "step": 10093
+    },
+    {
+      "epoch": 0.7283091020599589,
+      "grad_norm": 0.15302124619483948,
+      "learning_rate": 0.00017088757396449704,
+      "loss": 0.1659,
+      "step": 10094
+    },
+    {
+      "epoch": 0.7283812547350192,
+      "grad_norm": 0.1261652261018753,
+      "learning_rate": 0.0001708846875451003,
+      "loss": 0.1497,
+      "step": 10095
+    },
+    {
+      "epoch": 0.7284534074100797,
+      "grad_norm": 0.1251983344554901,
+      "learning_rate": 0.00017088180112570357,
+      "loss": 0.1364,
+      "step": 10096
+    },
+    {
+      "epoch": 0.7285255600851401,
+      "grad_norm": 0.13810065388679504,
+      "learning_rate": 0.00017087891470630683,
+      "loss": 0.1888,
+      "step": 10097
+    },
+    {
+      "epoch": 0.7285977127602006,
+      "grad_norm": 0.16389283537864685,
+      "learning_rate": 0.0001708760282869101,
+      "loss": 0.1574,
+      "step": 10098
+    },
+    {
+      "epoch": 0.728669865435261,
+      "grad_norm": 0.14504991471767426,
+      "learning_rate": 0.00017087314186751336,
+      "loss": 0.1818,
+      "step": 10099
+    },
+    {
+      "epoch": 0.7287420181103215,
+      "grad_norm": 0.10631627589464188,
+      "learning_rate": 0.00017087025544811662,
+      "loss": 0.1367,
+      "step": 10100
+    },
+    {
+      "epoch": 0.7288141707853819,
+      "grad_norm": 0.14447440207004547,
+      "learning_rate": 0.00017086736902871988,
+      "loss": 0.1231,
+      "step": 10101
+    },
+    {
+      "epoch": 0.7288863234604422,
+      "grad_norm": 0.11821649223566055,
+      "learning_rate": 0.00017086448260932315,
+      "loss": 0.1434,
+      "step": 10102
+    },
+    {
+      "epoch": 0.7289584761355027,
+      "grad_norm": 0.1422772854566574,
+      "learning_rate": 0.0001708615961899264,
+      "loss": 0.2335,
+      "step": 10103
+    },
+    {
+      "epoch": 0.7290306288105631,
+      "grad_norm": 0.13319611549377441,
+      "learning_rate": 0.00017085870977052967,
+      "loss": 0.1332,
+      "step": 10104
+    },
+    {
+      "epoch": 0.7291027814856236,
+      "grad_norm": 0.13768510520458221,
+      "learning_rate": 0.00017085582335113294,
+      "loss": 0.1334,
+      "step": 10105
+    },
+    {
+      "epoch": 0.729174934160684,
+      "grad_norm": 0.12326176464557648,
+      "learning_rate": 0.0001708529369317362,
+      "loss": 0.1608,
+      "step": 10106
+    },
+    {
+      "epoch": 0.7292470868357445,
+      "grad_norm": 0.10238928347826004,
+      "learning_rate": 0.00017085005051233943,
+      "loss": 0.1859,
+      "step": 10107
+    },
+    {
+      "epoch": 0.7293192395108049,
+      "grad_norm": 0.12593914568424225,
+      "learning_rate": 0.0001708471640929427,
+      "loss": 0.1376,
+      "step": 10108
+    },
+    {
+      "epoch": 0.7293913921858652,
+      "grad_norm": 0.12407062202692032,
+      "learning_rate": 0.000170844277673546,
+      "loss": 0.1306,
+      "step": 10109
+    },
+    {
+      "epoch": 0.7294635448609257,
+      "grad_norm": 0.1254005879163742,
+      "learning_rate": 0.00017084139125414925,
+      "loss": 0.2009,
+      "step": 10110
+    },
+    {
+      "epoch": 0.7295356975359861,
+      "grad_norm": 0.12667208909988403,
+      "learning_rate": 0.0001708385048347525,
+      "loss": 0.1096,
+      "step": 10111
+    },
+    {
+      "epoch": 0.7296078502110466,
+      "grad_norm": 0.11722003668546677,
+      "learning_rate": 0.00017083561841535575,
+      "loss": 0.1369,
+      "step": 10112
+    },
+    {
+      "epoch": 0.729680002886107,
+      "grad_norm": 0.13819009065628052,
+      "learning_rate": 0.000170832731995959,
+      "loss": 0.1957,
+      "step": 10113
+    },
+    {
+      "epoch": 0.7297521555611675,
+      "grad_norm": 0.15020692348480225,
+      "learning_rate": 0.00017082984557656227,
+      "loss": 0.1156,
+      "step": 10114
+    },
+    {
+      "epoch": 0.7298243082362279,
+      "grad_norm": 0.126190185546875,
+      "learning_rate": 0.00017082695915716554,
+      "loss": 0.1554,
+      "step": 10115
+    },
+    {
+      "epoch": 0.7298964609112882,
+      "grad_norm": 0.17837029695510864,
+      "learning_rate": 0.00017082407273776883,
+      "loss": 0.1639,
+      "step": 10116
+    },
+    {
+      "epoch": 0.7299686135863487,
+      "grad_norm": 0.15776686370372772,
+      "learning_rate": 0.00017082118631837206,
+      "loss": 0.1393,
+      "step": 10117
+    },
+    {
+      "epoch": 0.7300407662614091,
+      "grad_norm": 0.14482903480529785,
+      "learning_rate": 0.00017081829989897533,
+      "loss": 0.1971,
+      "step": 10118
+    },
+    {
+      "epoch": 0.7301129189364696,
+      "grad_norm": 0.13167844712734222,
+      "learning_rate": 0.0001708154134795786,
+      "loss": 0.1216,
+      "step": 10119
+    },
+    {
+      "epoch": 0.73018507161153,
+      "grad_norm": 0.12372057139873505,
+      "learning_rate": 0.00017081252706018185,
+      "loss": 0.1852,
+      "step": 10120
+    },
+    {
+      "epoch": 0.7302572242865905,
+      "grad_norm": 0.12731702625751495,
+      "learning_rate": 0.00017080964064078512,
+      "loss": 0.1643,
+      "step": 10121
+    },
+    {
+      "epoch": 0.7303293769616509,
+      "grad_norm": 0.1208278015255928,
+      "learning_rate": 0.00017080675422138838,
+      "loss": 0.1215,
+      "step": 10122
+    },
+    {
+      "epoch": 0.7304015296367112,
+      "grad_norm": 0.11122459918260574,
+      "learning_rate": 0.00017080386780199164,
+      "loss": 0.1746,
+      "step": 10123
+    },
+    {
+      "epoch": 0.7304736823117717,
+      "grad_norm": 0.1511441320180893,
+      "learning_rate": 0.0001708009813825949,
+      "loss": 0.1628,
+      "step": 10124
+    },
+    {
+      "epoch": 0.7305458349868321,
+      "grad_norm": 0.11985379457473755,
+      "learning_rate": 0.00017079809496319817,
+      "loss": 0.1583,
+      "step": 10125
+    },
+    {
+      "epoch": 0.7306179876618926,
+      "grad_norm": 0.12886056303977966,
+      "learning_rate": 0.00017079520854380143,
+      "loss": 0.1738,
+      "step": 10126
+    },
+    {
+      "epoch": 0.730690140336953,
+      "grad_norm": 0.1353933960199356,
+      "learning_rate": 0.0001707923221244047,
+      "loss": 0.1565,
+      "step": 10127
+    },
+    {
+      "epoch": 0.7307622930120135,
+      "grad_norm": 0.1290416270494461,
+      "learning_rate": 0.00017078943570500793,
+      "loss": 0.1337,
+      "step": 10128
+    },
+    {
+      "epoch": 0.7308344456870739,
+      "grad_norm": 0.13235312700271606,
+      "learning_rate": 0.0001707865492856112,
+      "loss": 0.1311,
+      "step": 10129
+    },
+    {
+      "epoch": 0.7309065983621342,
+      "grad_norm": 0.10793448984622955,
+      "learning_rate": 0.00017078366286621448,
+      "loss": 0.1722,
+      "step": 10130
+    },
+    {
+      "epoch": 0.7309787510371947,
+      "grad_norm": 0.1269800215959549,
+      "learning_rate": 0.00017078077644681774,
+      "loss": 0.114,
+      "step": 10131
+    },
+    {
+      "epoch": 0.7310509037122551,
+      "grad_norm": 0.13012513518333435,
+      "learning_rate": 0.000170777890027421,
+      "loss": 0.1407,
+      "step": 10132
+    },
+    {
+      "epoch": 0.7311230563873156,
+      "grad_norm": 0.1555079072713852,
+      "learning_rate": 0.00017077500360802424,
+      "loss": 0.1749,
+      "step": 10133
+    },
+    {
+      "epoch": 0.731195209062376,
+      "grad_norm": 0.12376718968153,
+      "learning_rate": 0.0001707721171886275,
+      "loss": 0.1088,
+      "step": 10134
+    },
+    {
+      "epoch": 0.7312673617374364,
+      "grad_norm": 0.10243170708417892,
+      "learning_rate": 0.00017076923076923077,
+      "loss": 0.1269,
+      "step": 10135
+    },
+    {
+      "epoch": 0.7313395144124969,
+      "grad_norm": 0.14888563752174377,
+      "learning_rate": 0.00017076634434983403,
+      "loss": 0.19,
+      "step": 10136
+    },
+    {
+      "epoch": 0.7314116670875572,
+      "grad_norm": 0.12645818293094635,
+      "learning_rate": 0.00017076345793043732,
+      "loss": 0.1304,
+      "step": 10137
+    },
+    {
+      "epoch": 0.7314838197626177,
+      "grad_norm": 0.13674162328243256,
+      "learning_rate": 0.00017076057151104056,
+      "loss": 0.1357,
+      "step": 10138
+    },
+    {
+      "epoch": 0.7315559724376781,
+      "grad_norm": 0.13103006780147552,
+      "learning_rate": 0.00017075768509164382,
+      "loss": 0.145,
+      "step": 10139
+    },
+    {
+      "epoch": 0.7316281251127386,
+      "grad_norm": 0.10523130744695663,
+      "learning_rate": 0.00017075479867224708,
+      "loss": 0.1585,
+      "step": 10140
+    },
+    {
+      "epoch": 0.731700277787799,
+      "grad_norm": 0.10682960599660873,
+      "learning_rate": 0.00017075191225285035,
+      "loss": 0.1467,
+      "step": 10141
+    },
+    {
+      "epoch": 0.7317724304628594,
+      "grad_norm": 0.12744466960430145,
+      "learning_rate": 0.0001707490258334536,
+      "loss": 0.17,
+      "step": 10142
+    },
+    {
+      "epoch": 0.7318445831379199,
+      "grad_norm": 0.1741938292980194,
+      "learning_rate": 0.00017074613941405687,
+      "loss": 0.1534,
+      "step": 10143
+    },
+    {
+      "epoch": 0.7319167358129802,
+      "grad_norm": 0.11006762832403183,
+      "learning_rate": 0.00017074325299466014,
+      "loss": 0.1436,
+      "step": 10144
+    },
+    {
+      "epoch": 0.7319888884880407,
+      "grad_norm": 0.1253357082605362,
+      "learning_rate": 0.0001707403665752634,
+      "loss": 0.1665,
+      "step": 10145
+    },
+    {
+      "epoch": 0.7320610411631011,
+      "grad_norm": 0.11946944147348404,
+      "learning_rate": 0.00017073748015586666,
+      "loss": 0.1685,
+      "step": 10146
+    },
+    {
+      "epoch": 0.7321331938381616,
+      "grad_norm": 0.3890308737754822,
+      "learning_rate": 0.00017073459373646992,
+      "loss": 0.1891,
+      "step": 10147
+    },
+    {
+      "epoch": 0.732205346513222,
+      "grad_norm": 0.13302887976169586,
+      "learning_rate": 0.0001707317073170732,
+      "loss": 0.1052,
+      "step": 10148
+    },
+    {
+      "epoch": 0.7322774991882824,
+      "grad_norm": 0.1204490065574646,
+      "learning_rate": 0.00017072882089767642,
+      "loss": 0.128,
+      "step": 10149
+    },
+    {
+      "epoch": 0.7323496518633429,
+      "grad_norm": 0.15260469913482666,
+      "learning_rate": 0.00017072593447827969,
+      "loss": 0.1898,
+      "step": 10150
+    },
+    {
+      "epoch": 0.7324218045384032,
+      "grad_norm": 0.15671607851982117,
+      "learning_rate": 0.00017072304805888298,
+      "loss": 0.1479,
+      "step": 10151
+    },
+    {
+      "epoch": 0.7324939572134637,
+      "grad_norm": 0.16737323999404907,
+      "learning_rate": 0.00017072016163948624,
+      "loss": 0.1342,
+      "step": 10152
+    },
+    {
+      "epoch": 0.7325661098885241,
+      "grad_norm": 0.14424218237400055,
+      "learning_rate": 0.0001707172752200895,
+      "loss": 0.1619,
+      "step": 10153
+    },
+    {
+      "epoch": 0.7326382625635846,
+      "grad_norm": 0.12221214920282364,
+      "learning_rate": 0.00017071438880069274,
+      "loss": 0.1382,
+      "step": 10154
+    },
+    {
+      "epoch": 0.732710415238645,
+      "grad_norm": 0.12235883623361588,
+      "learning_rate": 0.000170711502381296,
+      "loss": 0.143,
+      "step": 10155
+    },
+    {
+      "epoch": 0.7327825679137054,
+      "grad_norm": 0.12458932399749756,
+      "learning_rate": 0.00017070861596189926,
+      "loss": 0.1641,
+      "step": 10156
+    },
+    {
+      "epoch": 0.7328547205887658,
+      "grad_norm": 0.12354271113872528,
+      "learning_rate": 0.00017070572954250253,
+      "loss": 0.1511,
+      "step": 10157
+    },
+    {
+      "epoch": 0.7329268732638262,
+      "grad_norm": 0.142563596367836,
+      "learning_rate": 0.00017070284312310582,
+      "loss": 0.1252,
+      "step": 10158
+    },
+    {
+      "epoch": 0.7329990259388867,
+      "grad_norm": 0.13783249258995056,
+      "learning_rate": 0.00017069995670370905,
+      "loss": 0.1016,
+      "step": 10159
+    },
+    {
+      "epoch": 0.7330711786139471,
+      "grad_norm": 0.14102815091609955,
+      "learning_rate": 0.00017069707028431231,
+      "loss": 0.171,
+      "step": 10160
+    },
+    {
+      "epoch": 0.7331433312890075,
+      "grad_norm": 0.1071409359574318,
+      "learning_rate": 0.00017069418386491558,
+      "loss": 0.1769,
+      "step": 10161
+    },
+    {
+      "epoch": 0.733215483964068,
+      "grad_norm": 0.1648252159357071,
+      "learning_rate": 0.00017069129744551884,
+      "loss": 0.1868,
+      "step": 10162
+    },
+    {
+      "epoch": 0.7332876366391284,
+      "grad_norm": 0.11002007126808167,
+      "learning_rate": 0.0001706884110261221,
+      "loss": 0.1612,
+      "step": 10163
+    },
+    {
+      "epoch": 0.7333597893141888,
+      "grad_norm": 0.13496430218219757,
+      "learning_rate": 0.00017068552460672537,
+      "loss": 0.1275,
+      "step": 10164
+    },
+    {
+      "epoch": 0.7334319419892492,
+      "grad_norm": 0.14498379826545715,
+      "learning_rate": 0.00017068263818732863,
+      "loss": 0.1544,
+      "step": 10165
+    },
+    {
+      "epoch": 0.7335040946643097,
+      "grad_norm": 0.1313927322626114,
+      "learning_rate": 0.0001706797517679319,
+      "loss": 0.1489,
+      "step": 10166
+    },
+    {
+      "epoch": 0.7335762473393701,
+      "grad_norm": 0.12810350954532623,
+      "learning_rate": 0.00017067686534853516,
+      "loss": 0.1256,
+      "step": 10167
+    },
+    {
+      "epoch": 0.7336484000144305,
+      "grad_norm": 0.1490558534860611,
+      "learning_rate": 0.00017067397892913842,
+      "loss": 0.1415,
+      "step": 10168
+    },
+    {
+      "epoch": 0.733720552689491,
+      "grad_norm": 0.1265607327222824,
+      "learning_rate": 0.00017067109250974168,
+      "loss": 0.138,
+      "step": 10169
+    },
+    {
+      "epoch": 0.7337927053645514,
+      "grad_norm": 0.14467424154281616,
+      "learning_rate": 0.00017066820609034492,
+      "loss": 0.164,
+      "step": 10170
+    },
+    {
+      "epoch": 0.7338648580396118,
+      "grad_norm": 0.14883024990558624,
+      "learning_rate": 0.00017066531967094818,
+      "loss": 0.1691,
+      "step": 10171
+    },
+    {
+      "epoch": 0.7339370107146722,
+      "grad_norm": 0.11522503942251205,
+      "learning_rate": 0.00017066243325155147,
+      "loss": 0.1184,
+      "step": 10172
+    },
+    {
+      "epoch": 0.7340091633897327,
+      "grad_norm": 0.18318067491054535,
+      "learning_rate": 0.00017065954683215473,
+      "loss": 0.1523,
+      "step": 10173
+    },
+    {
+      "epoch": 0.7340813160647931,
+      "grad_norm": 0.11408477276563644,
+      "learning_rate": 0.000170656660412758,
+      "loss": 0.1236,
+      "step": 10174
+    },
+    {
+      "epoch": 0.7341534687398535,
+      "grad_norm": 0.11548970639705658,
+      "learning_rate": 0.00017065377399336123,
+      "loss": 0.1361,
+      "step": 10175
+    },
+    {
+      "epoch": 0.734225621414914,
+      "grad_norm": 0.11234814673662186,
+      "learning_rate": 0.0001706508875739645,
+      "loss": 0.1877,
+      "step": 10176
+    },
+    {
+      "epoch": 0.7342977740899744,
+      "grad_norm": 0.12228648364543915,
+      "learning_rate": 0.00017064800115456776,
+      "loss": 0.1763,
+      "step": 10177
+    },
+    {
+      "epoch": 0.7343699267650348,
+      "grad_norm": 0.12698303163051605,
+      "learning_rate": 0.00017064511473517102,
+      "loss": 0.1457,
+      "step": 10178
+    },
+    {
+      "epoch": 0.7344420794400952,
+      "grad_norm": 0.10683344304561615,
+      "learning_rate": 0.0001706422283157743,
+      "loss": 0.1815,
+      "step": 10179
+    },
+    {
+      "epoch": 0.7345142321151557,
+      "grad_norm": 0.15215647220611572,
+      "learning_rate": 0.00017063934189637755,
+      "loss": 0.126,
+      "step": 10180
+    },
+    {
+      "epoch": 0.7345863847902161,
+      "grad_norm": 0.11738741397857666,
+      "learning_rate": 0.0001706364554769808,
+      "loss": 0.1444,
+      "step": 10181
+    },
+    {
+      "epoch": 0.7346585374652765,
+      "grad_norm": 0.1048831045627594,
+      "learning_rate": 0.00017063356905758407,
+      "loss": 0.1873,
+      "step": 10182
+    },
+    {
+      "epoch": 0.734730690140337,
+      "grad_norm": 0.1351441740989685,
+      "learning_rate": 0.00017063068263818733,
+      "loss": 0.133,
+      "step": 10183
+    },
+    {
+      "epoch": 0.7348028428153974,
+      "grad_norm": 0.14018692076206207,
+      "learning_rate": 0.0001706277962187906,
+      "loss": 0.1705,
+      "step": 10184
+    },
+    {
+      "epoch": 0.7348749954904578,
+      "grad_norm": 0.10088642686605453,
+      "learning_rate": 0.00017062490979939386,
+      "loss": 0.1964,
+      "step": 10185
+    },
+    {
+      "epoch": 0.7349471481655182,
+      "grad_norm": 0.13162271678447723,
+      "learning_rate": 0.00017062202337999712,
+      "loss": 0.1733,
+      "step": 10186
+    },
+    {
+      "epoch": 0.7350193008405786,
+      "grad_norm": 0.11622099578380585,
+      "learning_rate": 0.0001706191369606004,
+      "loss": 0.1714,
+      "step": 10187
+    },
+    {
+      "epoch": 0.7350914535156391,
+      "grad_norm": 0.11611003428697586,
+      "learning_rate": 0.00017061625054120365,
+      "loss": 0.1416,
+      "step": 10188
+    },
+    {
+      "epoch": 0.7351636061906995,
+      "grad_norm": 0.1257910132408142,
+      "learning_rate": 0.0001706133641218069,
+      "loss": 0.1274,
+      "step": 10189
+    },
+    {
+      "epoch": 0.73523575886576,
+      "grad_norm": 0.1268366575241089,
+      "learning_rate": 0.00017061047770241018,
+      "loss": 0.1629,
+      "step": 10190
+    },
+    {
+      "epoch": 0.7353079115408204,
+      "grad_norm": 0.12874042987823486,
+      "learning_rate": 0.0001706075912830134,
+      "loss": 0.1698,
+      "step": 10191
+    },
+    {
+      "epoch": 0.7353800642158808,
+      "grad_norm": 0.1255289614200592,
+      "learning_rate": 0.00017060470486361667,
+      "loss": 0.1547,
+      "step": 10192
+    },
+    {
+      "epoch": 0.7354522168909412,
+      "grad_norm": 0.11216352880001068,
+      "learning_rate": 0.00017060181844421996,
+      "loss": 0.1335,
+      "step": 10193
+    },
+    {
+      "epoch": 0.7355243695660016,
+      "grad_norm": 0.09830225259065628,
+      "learning_rate": 0.00017059893202482323,
+      "loss": 0.1087,
+      "step": 10194
+    },
+    {
+      "epoch": 0.7355965222410621,
+      "grad_norm": 0.12806928157806396,
+      "learning_rate": 0.0001705960456054265,
+      "loss": 0.1885,
+      "step": 10195
+    },
+    {
+      "epoch": 0.7356686749161225,
+      "grad_norm": 0.11917254328727722,
+      "learning_rate": 0.00017059315918602973,
+      "loss": 0.155,
+      "step": 10196
+    },
+    {
+      "epoch": 0.735740827591183,
+      "grad_norm": 0.11708007752895355,
+      "learning_rate": 0.000170590272766633,
+      "loss": 0.0909,
+      "step": 10197
+    },
+    {
+      "epoch": 0.7358129802662434,
+      "grad_norm": 0.1161891296505928,
+      "learning_rate": 0.00017058738634723625,
+      "loss": 0.1694,
+      "step": 10198
+    },
+    {
+      "epoch": 0.7358851329413038,
+      "grad_norm": 0.12696947157382965,
+      "learning_rate": 0.00017058449992783951,
+      "loss": 0.1794,
+      "step": 10199
+    },
+    {
+      "epoch": 0.7359572856163642,
+      "grad_norm": 0.16883036494255066,
+      "learning_rate": 0.0001705816135084428,
+      "loss": 0.1698,
+      "step": 10200
+    },
+    {
+      "epoch": 0.7360294382914246,
+      "grad_norm": 0.11908348649740219,
+      "learning_rate": 0.00017057872708904604,
+      "loss": 0.1056,
+      "step": 10201
+    },
+    {
+      "epoch": 0.7361015909664851,
+      "grad_norm": 0.10129093378782272,
+      "learning_rate": 0.0001705758406696493,
+      "loss": 0.1728,
+      "step": 10202
+    },
+    {
+      "epoch": 0.7361737436415455,
+      "grad_norm": 0.11746098101139069,
+      "learning_rate": 0.00017057295425025257,
+      "loss": 0.146,
+      "step": 10203
+    },
+    {
+      "epoch": 0.736245896316606,
+      "grad_norm": 0.13154010474681854,
+      "learning_rate": 0.00017057006783085583,
+      "loss": 0.1349,
+      "step": 10204
+    },
+    {
+      "epoch": 0.7363180489916664,
+      "grad_norm": 0.12824952602386475,
+      "learning_rate": 0.0001705671814114591,
+      "loss": 0.1214,
+      "step": 10205
+    },
+    {
+      "epoch": 0.7363902016667268,
+      "grad_norm": 0.10343913733959198,
+      "learning_rate": 0.00017056429499206235,
+      "loss": 0.129,
+      "step": 10206
+    },
+    {
+      "epoch": 0.7364623543417872,
+      "grad_norm": 0.14011070132255554,
+      "learning_rate": 0.00017056140857266562,
+      "loss": 0.19,
+      "step": 10207
+    },
+    {
+      "epoch": 0.7365345070168476,
+      "grad_norm": 0.15326650440692902,
+      "learning_rate": 0.00017055852215326888,
+      "loss": 0.1614,
+      "step": 10208
+    },
+    {
+      "epoch": 0.7366066596919081,
+      "grad_norm": 0.11808323860168457,
+      "learning_rate": 0.00017055563573387214,
+      "loss": 0.1423,
+      "step": 10209
+    },
+    {
+      "epoch": 0.7366788123669685,
+      "grad_norm": 0.12559036910533905,
+      "learning_rate": 0.0001705527493144754,
+      "loss": 0.2252,
+      "step": 10210
+    },
+    {
+      "epoch": 0.736750965042029,
+      "grad_norm": 0.11885204911231995,
+      "learning_rate": 0.00017054986289507867,
+      "loss": 0.1672,
+      "step": 10211
+    },
+    {
+      "epoch": 0.7368231177170894,
+      "grad_norm": 0.11948374658823013,
+      "learning_rate": 0.0001705469764756819,
+      "loss": 0.1679,
+      "step": 10212
+    },
+    {
+      "epoch": 0.7368952703921497,
+      "grad_norm": 0.11641169339418411,
+      "learning_rate": 0.00017054409005628517,
+      "loss": 0.1811,
+      "step": 10213
+    },
+    {
+      "epoch": 0.7369674230672102,
+      "grad_norm": 0.14575567841529846,
+      "learning_rate": 0.00017054120363688846,
+      "loss": 0.1539,
+      "step": 10214
+    },
+    {
+      "epoch": 0.7370395757422706,
+      "grad_norm": 0.12116880714893341,
+      "learning_rate": 0.00017053831721749172,
+      "loss": 0.1403,
+      "step": 10215
+    },
+    {
+      "epoch": 0.7371117284173311,
+      "grad_norm": 0.12820352613925934,
+      "learning_rate": 0.00017053543079809498,
+      "loss": 0.171,
+      "step": 10216
+    },
+    {
+      "epoch": 0.7371838810923915,
+      "grad_norm": 0.12892208993434906,
+      "learning_rate": 0.00017053254437869822,
+      "loss": 0.1217,
+      "step": 10217
+    },
+    {
+      "epoch": 0.737256033767452,
+      "grad_norm": 0.11848312616348267,
+      "learning_rate": 0.00017052965795930148,
+      "loss": 0.1366,
+      "step": 10218
+    },
+    {
+      "epoch": 0.7373281864425123,
+      "grad_norm": 0.10166176408529282,
+      "learning_rate": 0.00017052677153990475,
+      "loss": 0.1273,
+      "step": 10219
+    },
+    {
+      "epoch": 0.7374003391175727,
+      "grad_norm": 0.13539794087409973,
+      "learning_rate": 0.000170523885120508,
+      "loss": 0.1311,
+      "step": 10220
+    },
+    {
+      "epoch": 0.7374724917926332,
+      "grad_norm": 0.10737601667642593,
+      "learning_rate": 0.0001705209987011113,
+      "loss": 0.1346,
+      "step": 10221
+    },
+    {
+      "epoch": 0.7375446444676936,
+      "grad_norm": 0.11928819864988327,
+      "learning_rate": 0.00017051811228171456,
+      "loss": 0.2049,
+      "step": 10222
+    },
+    {
+      "epoch": 0.7376167971427541,
+      "grad_norm": 0.12231762707233429,
+      "learning_rate": 0.0001705152258623178,
+      "loss": 0.1663,
+      "step": 10223
+    },
+    {
+      "epoch": 0.7376889498178145,
+      "grad_norm": 0.12005126476287842,
+      "learning_rate": 0.00017051233944292106,
+      "loss": 0.1627,
+      "step": 10224
+    },
+    {
+      "epoch": 0.737761102492875,
+      "grad_norm": 0.14178934693336487,
+      "learning_rate": 0.00017050945302352432,
+      "loss": 0.1194,
+      "step": 10225
+    },
+    {
+      "epoch": 0.7378332551679353,
+      "grad_norm": 0.12870538234710693,
+      "learning_rate": 0.00017050656660412759,
+      "loss": 0.1542,
+      "step": 10226
+    },
+    {
+      "epoch": 0.7379054078429957,
+      "grad_norm": 0.11229836195707321,
+      "learning_rate": 0.00017050368018473085,
+      "loss": 0.1437,
+      "step": 10227
+    },
+    {
+      "epoch": 0.7379775605180562,
+      "grad_norm": 0.1517128050327301,
+      "learning_rate": 0.0001705007937653341,
+      "loss": 0.1417,
+      "step": 10228
+    },
+    {
+      "epoch": 0.7380497131931166,
+      "grad_norm": 0.12081311643123627,
+      "learning_rate": 0.00017049790734593738,
+      "loss": 0.1452,
+      "step": 10229
+    },
+    {
+      "epoch": 0.7381218658681771,
+      "grad_norm": 0.1069989800453186,
+      "learning_rate": 0.00017049502092654064,
+      "loss": 0.107,
+      "step": 10230
+    },
+    {
+      "epoch": 0.7381940185432375,
+      "grad_norm": 0.11483065038919449,
+      "learning_rate": 0.0001704921345071439,
+      "loss": 0.1321,
+      "step": 10231
+    },
+    {
+      "epoch": 0.738266171218298,
+      "grad_norm": 0.15927647054195404,
+      "learning_rate": 0.00017048924808774716,
+      "loss": 0.1807,
+      "step": 10232
+    },
+    {
+      "epoch": 0.7383383238933583,
+      "grad_norm": 0.12965205311775208,
+      "learning_rate": 0.00017048636166835043,
+      "loss": 0.1774,
+      "step": 10233
+    },
+    {
+      "epoch": 0.7384104765684187,
+      "grad_norm": 0.11923392862081528,
+      "learning_rate": 0.00017048347524895366,
+      "loss": 0.1272,
+      "step": 10234
+    },
+    {
+      "epoch": 0.7384826292434792,
+      "grad_norm": 0.11872682720422745,
+      "learning_rate": 0.00017048058882955695,
+      "loss": 0.1845,
+      "step": 10235
+    },
+    {
+      "epoch": 0.7385547819185396,
+      "grad_norm": 0.1188027635216713,
+      "learning_rate": 0.00017047770241016022,
+      "loss": 0.156,
+      "step": 10236
+    },
+    {
+      "epoch": 0.7386269345936001,
+      "grad_norm": 0.11587867140769958,
+      "learning_rate": 0.00017047481599076348,
+      "loss": 0.1246,
+      "step": 10237
+    },
+    {
+      "epoch": 0.7386990872686605,
+      "grad_norm": 0.10874349623918533,
+      "learning_rate": 0.00017047192957136674,
+      "loss": 0.1273,
+      "step": 10238
+    },
+    {
+      "epoch": 0.738771239943721,
+      "grad_norm": 0.12459474056959152,
+      "learning_rate": 0.00017046904315196998,
+      "loss": 0.2181,
+      "step": 10239
+    },
+    {
+      "epoch": 0.7388433926187813,
+      "grad_norm": 0.11925427615642548,
+      "learning_rate": 0.00017046615673257324,
+      "loss": 0.1334,
+      "step": 10240
+    },
+    {
+      "epoch": 0.7389155452938417,
+      "grad_norm": 0.09950932115316391,
+      "learning_rate": 0.0001704632703131765,
+      "loss": 0.1162,
+      "step": 10241
+    },
+    {
+      "epoch": 0.7389876979689022,
+      "grad_norm": 0.15613147616386414,
+      "learning_rate": 0.0001704603838937798,
+      "loss": 0.1572,
+      "step": 10242
+    },
+    {
+      "epoch": 0.7390598506439626,
+      "grad_norm": 0.10930442810058594,
+      "learning_rate": 0.00017045749747438306,
+      "loss": 0.1569,
+      "step": 10243
+    },
+    {
+      "epoch": 0.7391320033190231,
+      "grad_norm": 0.14993628859519958,
+      "learning_rate": 0.0001704546110549863,
+      "loss": 0.116,
+      "step": 10244
+    },
+    {
+      "epoch": 0.7392041559940835,
+      "grad_norm": 0.12143708020448685,
+      "learning_rate": 0.00017045172463558955,
+      "loss": 0.1969,
+      "step": 10245
+    },
+    {
+      "epoch": 0.739276308669144,
+      "grad_norm": 0.13101693987846375,
+      "learning_rate": 0.00017044883821619282,
+      "loss": 0.1173,
+      "step": 10246
+    },
+    {
+      "epoch": 0.7393484613442043,
+      "grad_norm": 0.14847607910633087,
+      "learning_rate": 0.00017044595179679608,
+      "loss": 0.1377,
+      "step": 10247
+    },
+    {
+      "epoch": 0.7394206140192647,
+      "grad_norm": 0.14123043417930603,
+      "learning_rate": 0.00017044306537739934,
+      "loss": 0.2099,
+      "step": 10248
+    },
+    {
+      "epoch": 0.7394927666943252,
+      "grad_norm": 0.13185706734657288,
+      "learning_rate": 0.0001704401789580026,
+      "loss": 0.1807,
+      "step": 10249
+    },
+    {
+      "epoch": 0.7395649193693856,
+      "grad_norm": 0.12274592369794846,
+      "learning_rate": 0.00017043729253860587,
+      "loss": 0.1357,
+      "step": 10250
+    },
+    {
+      "epoch": 0.7396370720444461,
+      "grad_norm": 0.12145980447530746,
+      "learning_rate": 0.00017043440611920913,
+      "loss": 0.1451,
+      "step": 10251
+    },
+    {
+      "epoch": 0.7397092247195065,
+      "grad_norm": 0.15183459222316742,
+      "learning_rate": 0.0001704315196998124,
+      "loss": 0.1306,
+      "step": 10252
+    },
+    {
+      "epoch": 0.739781377394567,
+      "grad_norm": 0.17286445200443268,
+      "learning_rate": 0.00017042863328041566,
+      "loss": 0.1253,
+      "step": 10253
+    },
+    {
+      "epoch": 0.7398535300696273,
+      "grad_norm": 0.12338947504758835,
+      "learning_rate": 0.00017042574686101892,
+      "loss": 0.1671,
+      "step": 10254
+    },
+    {
+      "epoch": 0.7399256827446877,
+      "grad_norm": 0.13943032920360565,
+      "learning_rate": 0.00017042286044162216,
+      "loss": 0.1527,
+      "step": 10255
+    },
+    {
+      "epoch": 0.7399978354197482,
+      "grad_norm": 0.11988866329193115,
+      "learning_rate": 0.00017041997402222545,
+      "loss": 0.1655,
+      "step": 10256
+    },
+    {
+      "epoch": 0.7400699880948086,
+      "grad_norm": 0.1152622401714325,
+      "learning_rate": 0.0001704170876028287,
+      "loss": 0.1276,
+      "step": 10257
+    },
+    {
+      "epoch": 0.7401421407698691,
+      "grad_norm": 0.15951548516750336,
+      "learning_rate": 0.00017041420118343197,
+      "loss": 0.1386,
+      "step": 10258
+    },
+    {
+      "epoch": 0.7402142934449295,
+      "grad_norm": 0.12054353952407837,
+      "learning_rate": 0.00017041131476403524,
+      "loss": 0.1663,
+      "step": 10259
+    },
+    {
+      "epoch": 0.74028644611999,
+      "grad_norm": 0.11046476662158966,
+      "learning_rate": 0.00017040842834463847,
+      "loss": 0.1466,
+      "step": 10260
+    },
+    {
+      "epoch": 0.7403585987950503,
+      "grad_norm": 0.10996885597705841,
+      "learning_rate": 0.00017040554192524173,
+      "loss": 0.1292,
+      "step": 10261
+    },
+    {
+      "epoch": 0.7404307514701107,
+      "grad_norm": 0.16070950031280518,
+      "learning_rate": 0.000170402655505845,
+      "loss": 0.2011,
+      "step": 10262
+    },
+    {
+      "epoch": 0.7405029041451712,
+      "grad_norm": 0.11673824489116669,
+      "learning_rate": 0.0001703997690864483,
+      "loss": 0.1772,
+      "step": 10263
+    },
+    {
+      "epoch": 0.7405750568202316,
+      "grad_norm": 0.11503014713525772,
+      "learning_rate": 0.00017039688266705155,
+      "loss": 0.1636,
+      "step": 10264
+    },
+    {
+      "epoch": 0.7406472094952921,
+      "grad_norm": 0.1217779591679573,
+      "learning_rate": 0.00017039399624765479,
+      "loss": 0.1235,
+      "step": 10265
+    },
+    {
+      "epoch": 0.7407193621703525,
+      "grad_norm": 0.14887690544128418,
+      "learning_rate": 0.00017039110982825805,
+      "loss": 0.1662,
+      "step": 10266
+    },
+    {
+      "epoch": 0.740791514845413,
+      "grad_norm": 0.15789437294006348,
+      "learning_rate": 0.0001703882234088613,
+      "loss": 0.1578,
+      "step": 10267
+    },
+    {
+      "epoch": 0.7408636675204733,
+      "grad_norm": 0.149520143866539,
+      "learning_rate": 0.00017038533698946457,
+      "loss": 0.1287,
+      "step": 10268
+    },
+    {
+      "epoch": 0.7409358201955337,
+      "grad_norm": 0.1349458545446396,
+      "learning_rate": 0.00017038245057006784,
+      "loss": 0.1216,
+      "step": 10269
+    },
+    {
+      "epoch": 0.7410079728705942,
+      "grad_norm": 0.12127988785505295,
+      "learning_rate": 0.0001703795641506711,
+      "loss": 0.1378,
+      "step": 10270
+    },
+    {
+      "epoch": 0.7410801255456546,
+      "grad_norm": 0.12569235265254974,
+      "learning_rate": 0.00017037667773127436,
+      "loss": 0.1641,
+      "step": 10271
+    },
+    {
+      "epoch": 0.741152278220715,
+      "grad_norm": 0.15909765660762787,
+      "learning_rate": 0.00017037379131187763,
+      "loss": 0.1541,
+      "step": 10272
+    },
+    {
+      "epoch": 0.7412244308957755,
+      "grad_norm": 0.13231971859931946,
+      "learning_rate": 0.0001703709048924809,
+      "loss": 0.1246,
+      "step": 10273
+    },
+    {
+      "epoch": 0.7412965835708359,
+      "grad_norm": 0.13595962524414062,
+      "learning_rate": 0.00017036801847308415,
+      "loss": 0.1856,
+      "step": 10274
+    },
+    {
+      "epoch": 0.7413687362458963,
+      "grad_norm": 0.11069751530885696,
+      "learning_rate": 0.00017036513205368742,
+      "loss": 0.1249,
+      "step": 10275
+    },
+    {
+      "epoch": 0.7414408889209567,
+      "grad_norm": 0.1497296690940857,
+      "learning_rate": 0.00017036224563429065,
+      "loss": 0.131,
+      "step": 10276
+    },
+    {
+      "epoch": 0.7415130415960172,
+      "grad_norm": 0.13204748928546906,
+      "learning_rate": 0.00017035935921489394,
+      "loss": 0.1537,
+      "step": 10277
+    },
+    {
+      "epoch": 0.7415851942710776,
+      "grad_norm": 0.1311349719762802,
+      "learning_rate": 0.0001703564727954972,
+      "loss": 0.1527,
+      "step": 10278
+    },
+    {
+      "epoch": 0.741657346946138,
+      "grad_norm": 0.13212205469608307,
+      "learning_rate": 0.00017035358637610047,
+      "loss": 0.1667,
+      "step": 10279
+    },
+    {
+      "epoch": 0.7417294996211985,
+      "grad_norm": 0.1319914013147354,
+      "learning_rate": 0.00017035069995670373,
+      "loss": 0.1303,
+      "step": 10280
+    },
+    {
+      "epoch": 0.7418016522962588,
+      "grad_norm": 0.14723023772239685,
+      "learning_rate": 0.00017034781353730697,
+      "loss": 0.1533,
+      "step": 10281
+    },
+    {
+      "epoch": 0.7418738049713193,
+      "grad_norm": 0.107989601790905,
+      "learning_rate": 0.00017034492711791023,
+      "loss": 0.1736,
+      "step": 10282
+    },
+    {
+      "epoch": 0.7419459576463797,
+      "grad_norm": 0.1135607436299324,
+      "learning_rate": 0.0001703420406985135,
+      "loss": 0.1548,
+      "step": 10283
+    },
+    {
+      "epoch": 0.7420181103214402,
+      "grad_norm": 0.12351679801940918,
+      "learning_rate": 0.00017033915427911678,
+      "loss": 0.1967,
+      "step": 10284
+    },
+    {
+      "epoch": 0.7420902629965006,
+      "grad_norm": 0.12371385842561722,
+      "learning_rate": 0.00017033626785972004,
+      "loss": 0.1234,
+      "step": 10285
+    },
+    {
+      "epoch": 0.742162415671561,
+      "grad_norm": 0.09827817231416702,
+      "learning_rate": 0.00017033338144032328,
+      "loss": 0.1501,
+      "step": 10286
+    },
+    {
+      "epoch": 0.7422345683466215,
+      "grad_norm": 0.14295990765094757,
+      "learning_rate": 0.00017033049502092654,
+      "loss": 0.1303,
+      "step": 10287
+    },
+    {
+      "epoch": 0.7423067210216818,
+      "grad_norm": 0.11243745684623718,
+      "learning_rate": 0.0001703276086015298,
+      "loss": 0.1327,
+      "step": 10288
+    },
+    {
+      "epoch": 0.7423788736967423,
+      "grad_norm": 0.10786975920200348,
+      "learning_rate": 0.00017032472218213307,
+      "loss": 0.1222,
+      "step": 10289
+    },
+    {
+      "epoch": 0.7424510263718027,
+      "grad_norm": 0.1218876987695694,
+      "learning_rate": 0.00017032183576273633,
+      "loss": 0.1701,
+      "step": 10290
+    },
+    {
+      "epoch": 0.7425231790468632,
+      "grad_norm": 0.11511393636465073,
+      "learning_rate": 0.0001703189493433396,
+      "loss": 0.1492,
+      "step": 10291
+    },
+    {
+      "epoch": 0.7425953317219236,
+      "grad_norm": 0.12704148888587952,
+      "learning_rate": 0.00017031606292394286,
+      "loss": 0.1061,
+      "step": 10292
+    },
+    {
+      "epoch": 0.742667484396984,
+      "grad_norm": 0.17741963267326355,
+      "learning_rate": 0.00017031317650454612,
+      "loss": 0.1653,
+      "step": 10293
+    },
+    {
+      "epoch": 0.7427396370720445,
+      "grad_norm": 0.1387127786874771,
+      "learning_rate": 0.00017031029008514938,
+      "loss": 0.1746,
+      "step": 10294
+    },
+    {
+      "epoch": 0.7428117897471048,
+      "grad_norm": 0.1183629259467125,
+      "learning_rate": 0.00017030740366575265,
+      "loss": 0.1156,
+      "step": 10295
+    },
+    {
+      "epoch": 0.7428839424221653,
+      "grad_norm": 0.13635767996311188,
+      "learning_rate": 0.0001703045172463559,
+      "loss": 0.1429,
+      "step": 10296
+    },
+    {
+      "epoch": 0.7429560950972257,
+      "grad_norm": 0.13225290179252625,
+      "learning_rate": 0.00017030163082695915,
+      "loss": 0.1052,
+      "step": 10297
+    },
+    {
+      "epoch": 0.7430282477722862,
+      "grad_norm": 0.13401448726654053,
+      "learning_rate": 0.0001702987444075624,
+      "loss": 0.1197,
+      "step": 10298
+    },
+    {
+      "epoch": 0.7431004004473466,
+      "grad_norm": 0.1291186511516571,
+      "learning_rate": 0.0001702958579881657,
+      "loss": 0.1424,
+      "step": 10299
+    },
+    {
+      "epoch": 0.743172553122407,
+      "grad_norm": 0.11414799094200134,
+      "learning_rate": 0.00017029297156876896,
+      "loss": 0.2152,
+      "step": 10300
+    },
+    {
+      "epoch": 0.7432447057974675,
+      "grad_norm": 0.10049829632043839,
+      "learning_rate": 0.00017029008514937222,
+      "loss": 0.2259,
+      "step": 10301
+    },
+    {
+      "epoch": 0.7433168584725278,
+      "grad_norm": 0.1451665759086609,
+      "learning_rate": 0.00017028719872997546,
+      "loss": 0.1854,
+      "step": 10302
+    },
+    {
+      "epoch": 0.7433890111475883,
+      "grad_norm": 0.12422867119312286,
+      "learning_rate": 0.00017028431231057872,
+      "loss": 0.1514,
+      "step": 10303
+    },
+    {
+      "epoch": 0.7434611638226487,
+      "grad_norm": 0.12117260694503784,
+      "learning_rate": 0.00017028142589118199,
+      "loss": 0.1644,
+      "step": 10304
+    },
+    {
+      "epoch": 0.7435333164977092,
+      "grad_norm": 0.1230030283331871,
+      "learning_rate": 0.00017027853947178525,
+      "loss": 0.1242,
+      "step": 10305
+    },
+    {
+      "epoch": 0.7436054691727696,
+      "grad_norm": 0.12640705704689026,
+      "learning_rate": 0.00017027565305238854,
+      "loss": 0.1934,
+      "step": 10306
+    },
+    {
+      "epoch": 0.74367762184783,
+      "grad_norm": 0.10742591321468353,
+      "learning_rate": 0.00017027276663299177,
+      "loss": 0.1581,
+      "step": 10307
+    },
+    {
+      "epoch": 0.7437497745228905,
+      "grad_norm": 0.12577266991138458,
+      "learning_rate": 0.00017026988021359504,
+      "loss": 0.1343,
+      "step": 10308
+    },
+    {
+      "epoch": 0.7438219271979508,
+      "grad_norm": 0.1116676852107048,
+      "learning_rate": 0.0001702669937941983,
+      "loss": 0.099,
+      "step": 10309
+    },
+    {
+      "epoch": 0.7438940798730113,
+      "grad_norm": 0.13507837057113647,
+      "learning_rate": 0.00017026410737480156,
+      "loss": 0.1237,
+      "step": 10310
+    },
+    {
+      "epoch": 0.7439662325480717,
+      "grad_norm": 0.15094028413295746,
+      "learning_rate": 0.00017026122095540483,
+      "loss": 0.1319,
+      "step": 10311
+    },
+    {
+      "epoch": 0.7440383852231321,
+      "grad_norm": 0.150686115026474,
+      "learning_rate": 0.0001702583345360081,
+      "loss": 0.1256,
+      "step": 10312
+    },
+    {
+      "epoch": 0.7441105378981926,
+      "grad_norm": 0.11062533408403397,
+      "learning_rate": 0.00017025544811661135,
+      "loss": 0.1243,
+      "step": 10313
+    },
+    {
+      "epoch": 0.744182690573253,
+      "grad_norm": 0.12666769325733185,
+      "learning_rate": 0.00017025256169721461,
+      "loss": 0.1767,
+      "step": 10314
+    },
+    {
+      "epoch": 0.7442548432483135,
+      "grad_norm": 0.12366464734077454,
+      "learning_rate": 0.00017024967527781788,
+      "loss": 0.15,
+      "step": 10315
+    },
+    {
+      "epoch": 0.7443269959233738,
+      "grad_norm": 0.14813701808452606,
+      "learning_rate": 0.00017024678885842114,
+      "loss": 0.1266,
+      "step": 10316
+    },
+    {
+      "epoch": 0.7443991485984343,
+      "grad_norm": 0.12138532847166061,
+      "learning_rate": 0.0001702439024390244,
+      "loss": 0.1147,
+      "step": 10317
+    },
+    {
+      "epoch": 0.7444713012734947,
+      "grad_norm": 0.12905026972293854,
+      "learning_rate": 0.00017024101601962764,
+      "loss": 0.0911,
+      "step": 10318
+    },
+    {
+      "epoch": 0.7445434539485551,
+      "grad_norm": 0.13295404613018036,
+      "learning_rate": 0.0001702381296002309,
+      "loss": 0.1484,
+      "step": 10319
+    },
+    {
+      "epoch": 0.7446156066236156,
+      "grad_norm": 0.15890981256961823,
+      "learning_rate": 0.0001702352431808342,
+      "loss": 0.1794,
+      "step": 10320
+    },
+    {
+      "epoch": 0.744687759298676,
+      "grad_norm": 0.11637790501117706,
+      "learning_rate": 0.00017023235676143746,
+      "loss": 0.1104,
+      "step": 10321
+    },
+    {
+      "epoch": 0.7447599119737365,
+      "grad_norm": 0.1495571881532669,
+      "learning_rate": 0.00017022947034204072,
+      "loss": 0.1615,
+      "step": 10322
+    },
+    {
+      "epoch": 0.7448320646487968,
+      "grad_norm": 0.12739881873130798,
+      "learning_rate": 0.00017022658392264395,
+      "loss": 0.1152,
+      "step": 10323
+    },
+    {
+      "epoch": 0.7449042173238573,
+      "grad_norm": 0.13501571118831635,
+      "learning_rate": 0.00017022369750324722,
+      "loss": 0.1711,
+      "step": 10324
+    },
+    {
+      "epoch": 0.7449763699989177,
+      "grad_norm": 0.1249973401427269,
+      "learning_rate": 0.00017022081108385048,
+      "loss": 0.1366,
+      "step": 10325
+    },
+    {
+      "epoch": 0.7450485226739781,
+      "grad_norm": 0.13027076423168182,
+      "learning_rate": 0.00017021792466445374,
+      "loss": 0.1628,
+      "step": 10326
+    },
+    {
+      "epoch": 0.7451206753490386,
+      "grad_norm": 0.1260368376970291,
+      "learning_rate": 0.00017021503824505703,
+      "loss": 0.1419,
+      "step": 10327
+    },
+    {
+      "epoch": 0.745192828024099,
+      "grad_norm": 0.12586842477321625,
+      "learning_rate": 0.00017021215182566027,
+      "loss": 0.0825,
+      "step": 10328
+    },
+    {
+      "epoch": 0.7452649806991595,
+      "grad_norm": 0.13267827033996582,
+      "learning_rate": 0.00017020926540626353,
+      "loss": 0.1424,
+      "step": 10329
+    },
+    {
+      "epoch": 0.7453371333742198,
+      "grad_norm": 0.1316024363040924,
+      "learning_rate": 0.0001702063789868668,
+      "loss": 0.1184,
+      "step": 10330
+    },
+    {
+      "epoch": 0.7454092860492803,
+      "grad_norm": 0.12148045748472214,
+      "learning_rate": 0.00017020349256747006,
+      "loss": 0.1377,
+      "step": 10331
+    },
+    {
+      "epoch": 0.7454814387243407,
+      "grad_norm": 0.1150650829076767,
+      "learning_rate": 0.00017020060614807332,
+      "loss": 0.209,
+      "step": 10332
+    },
+    {
+      "epoch": 0.7455535913994011,
+      "grad_norm": 0.14753451943397522,
+      "learning_rate": 0.00017019771972867658,
+      "loss": 0.1344,
+      "step": 10333
+    },
+    {
+      "epoch": 0.7456257440744616,
+      "grad_norm": 0.1898769736289978,
+      "learning_rate": 0.00017019483330927985,
+      "loss": 0.1278,
+      "step": 10334
+    },
+    {
+      "epoch": 0.745697896749522,
+      "grad_norm": 0.1436893194913864,
+      "learning_rate": 0.0001701919468898831,
+      "loss": 0.1581,
+      "step": 10335
+    },
+    {
+      "epoch": 0.7457700494245825,
+      "grad_norm": 0.10907220095396042,
+      "learning_rate": 0.00017018906047048637,
+      "loss": 0.1198,
+      "step": 10336
+    },
+    {
+      "epoch": 0.7458422020996428,
+      "grad_norm": 0.09962081164121628,
+      "learning_rate": 0.00017018617405108964,
+      "loss": 0.1721,
+      "step": 10337
+    },
+    {
+      "epoch": 0.7459143547747032,
+      "grad_norm": 0.10710161179304123,
+      "learning_rate": 0.0001701832876316929,
+      "loss": 0.1383,
+      "step": 10338
+    },
+    {
+      "epoch": 0.7459865074497637,
+      "grad_norm": 0.13437522947788239,
+      "learning_rate": 0.00017018040121229616,
+      "loss": 0.1758,
+      "step": 10339
+    },
+    {
+      "epoch": 0.7460586601248241,
+      "grad_norm": 0.19935932755470276,
+      "learning_rate": 0.0001701775147928994,
+      "loss": 0.1387,
+      "step": 10340
+    },
+    {
+      "epoch": 0.7461308127998846,
+      "grad_norm": 0.12037497013807297,
+      "learning_rate": 0.0001701746283735027,
+      "loss": 0.1502,
+      "step": 10341
+    },
+    {
+      "epoch": 0.746202965474945,
+      "grad_norm": 0.17062495648860931,
+      "learning_rate": 0.00017017174195410595,
+      "loss": 0.182,
+      "step": 10342
+    },
+    {
+      "epoch": 0.7462751181500054,
+      "grad_norm": 0.11846476793289185,
+      "learning_rate": 0.0001701688555347092,
+      "loss": 0.1571,
+      "step": 10343
+    },
+    {
+      "epoch": 0.7463472708250658,
+      "grad_norm": 0.12169267237186432,
+      "learning_rate": 0.00017016596911531248,
+      "loss": 0.1633,
+      "step": 10344
+    },
+    {
+      "epoch": 0.7464194235001262,
+      "grad_norm": 0.1215028464794159,
+      "learning_rate": 0.0001701630826959157,
+      "loss": 0.1245,
+      "step": 10345
+    },
+    {
+      "epoch": 0.7464915761751867,
+      "grad_norm": 0.13800732791423798,
+      "learning_rate": 0.00017016019627651897,
+      "loss": 0.1209,
+      "step": 10346
+    },
+    {
+      "epoch": 0.7465637288502471,
+      "grad_norm": 0.1455557644367218,
+      "learning_rate": 0.00017015730985712224,
+      "loss": 0.106,
+      "step": 10347
+    },
+    {
+      "epoch": 0.7466358815253076,
+      "grad_norm": 0.11333151161670685,
+      "learning_rate": 0.00017015442343772553,
+      "loss": 0.1287,
+      "step": 10348
+    },
+    {
+      "epoch": 0.746708034200368,
+      "grad_norm": 0.12079031765460968,
+      "learning_rate": 0.0001701515370183288,
+      "loss": 0.1287,
+      "step": 10349
+    },
+    {
+      "epoch": 0.7467801868754284,
+      "grad_norm": 0.12342115491628647,
+      "learning_rate": 0.00017014865059893203,
+      "loss": 0.1223,
+      "step": 10350
+    },
+    {
+      "epoch": 0.7468523395504888,
+      "grad_norm": 0.1584462970495224,
+      "learning_rate": 0.0001701457641795353,
+      "loss": 0.1633,
+      "step": 10351
+    },
+    {
+      "epoch": 0.7469244922255492,
+      "grad_norm": 0.15562903881072998,
+      "learning_rate": 0.00017014287776013855,
+      "loss": 0.1489,
+      "step": 10352
+    },
+    {
+      "epoch": 0.7469966449006097,
+      "grad_norm": 0.22772414982318878,
+      "learning_rate": 0.00017013999134074181,
+      "loss": 0.2305,
+      "step": 10353
+    },
+    {
+      "epoch": 0.7470687975756701,
+      "grad_norm": 0.15519371628761292,
+      "learning_rate": 0.00017013710492134508,
+      "loss": 0.0979,
+      "step": 10354
+    },
+    {
+      "epoch": 0.7471409502507306,
+      "grad_norm": 0.11990796774625778,
+      "learning_rate": 0.00017013421850194834,
+      "loss": 0.1244,
+      "step": 10355
+    },
+    {
+      "epoch": 0.747213102925791,
+      "grad_norm": 0.13046614825725555,
+      "learning_rate": 0.0001701313320825516,
+      "loss": 0.1048,
+      "step": 10356
+    },
+    {
+      "epoch": 0.7472852556008514,
+      "grad_norm": 0.13911251723766327,
+      "learning_rate": 0.00017012844566315487,
+      "loss": 0.1657,
+      "step": 10357
+    },
+    {
+      "epoch": 0.7473574082759118,
+      "grad_norm": 0.16226904094219208,
+      "learning_rate": 0.00017012555924375813,
+      "loss": 0.1899,
+      "step": 10358
+    },
+    {
+      "epoch": 0.7474295609509722,
+      "grad_norm": 0.14956645667552948,
+      "learning_rate": 0.0001701226728243614,
+      "loss": 0.1472,
+      "step": 10359
+    },
+    {
+      "epoch": 0.7475017136260327,
+      "grad_norm": 0.11555131524801254,
+      "learning_rate": 0.00017011978640496466,
+      "loss": 0.1899,
+      "step": 10360
+    },
+    {
+      "epoch": 0.7475738663010931,
+      "grad_norm": 0.13167062401771545,
+      "learning_rate": 0.0001701168999855679,
+      "loss": 0.1666,
+      "step": 10361
+    },
+    {
+      "epoch": 0.7476460189761536,
+      "grad_norm": 0.1495990753173828,
+      "learning_rate": 0.00017011401356617118,
+      "loss": 0.1483,
+      "step": 10362
+    },
+    {
+      "epoch": 0.747718171651214,
+      "grad_norm": 0.13280245661735535,
+      "learning_rate": 0.00017011112714677444,
+      "loss": 0.1396,
+      "step": 10363
+    },
+    {
+      "epoch": 0.7477903243262743,
+      "grad_norm": 0.14321625232696533,
+      "learning_rate": 0.0001701082407273777,
+      "loss": 0.1656,
+      "step": 10364
+    },
+    {
+      "epoch": 0.7478624770013348,
+      "grad_norm": 0.14525526762008667,
+      "learning_rate": 0.00017010535430798097,
+      "loss": 0.132,
+      "step": 10365
+    },
+    {
+      "epoch": 0.7479346296763952,
+      "grad_norm": 0.12012545019388199,
+      "learning_rate": 0.0001701024678885842,
+      "loss": 0.1542,
+      "step": 10366
+    },
+    {
+      "epoch": 0.7480067823514557,
+      "grad_norm": 0.11860865354537964,
+      "learning_rate": 0.00017009958146918747,
+      "loss": 0.1519,
+      "step": 10367
+    },
+    {
+      "epoch": 0.7480789350265161,
+      "grad_norm": 0.11256777495145798,
+      "learning_rate": 0.00017009669504979073,
+      "loss": 0.1418,
+      "step": 10368
+    },
+    {
+      "epoch": 0.7481510877015766,
+      "grad_norm": 0.12375520169734955,
+      "learning_rate": 0.00017009380863039402,
+      "loss": 0.1606,
+      "step": 10369
+    },
+    {
+      "epoch": 0.748223240376637,
+      "grad_norm": 0.1487514227628708,
+      "learning_rate": 0.00017009092221099728,
+      "loss": 0.1531,
+      "step": 10370
+    },
+    {
+      "epoch": 0.7482953930516973,
+      "grad_norm": 0.1397939920425415,
+      "learning_rate": 0.00017008803579160052,
+      "loss": 0.1043,
+      "step": 10371
+    },
+    {
+      "epoch": 0.7483675457267578,
+      "grad_norm": 0.15424096584320068,
+      "learning_rate": 0.00017008514937220378,
+      "loss": 0.1452,
+      "step": 10372
+    },
+    {
+      "epoch": 0.7484396984018182,
+      "grad_norm": 0.16209803521633148,
+      "learning_rate": 0.00017008226295280705,
+      "loss": 0.1851,
+      "step": 10373
+    },
+    {
+      "epoch": 0.7485118510768787,
+      "grad_norm": 0.10724832862615585,
+      "learning_rate": 0.0001700793765334103,
+      "loss": 0.1372,
+      "step": 10374
+    },
+    {
+      "epoch": 0.7485840037519391,
+      "grad_norm": 0.1410454362630844,
+      "learning_rate": 0.00017007649011401357,
+      "loss": 0.1291,
+      "step": 10375
+    },
+    {
+      "epoch": 0.7486561564269996,
+      "grad_norm": 0.11869491636753082,
+      "learning_rate": 0.00017007360369461683,
+      "loss": 0.1405,
+      "step": 10376
+    },
+    {
+      "epoch": 0.74872830910206,
+      "grad_norm": 0.12462595850229263,
+      "learning_rate": 0.0001700707172752201,
+      "loss": 0.1124,
+      "step": 10377
+    },
+    {
+      "epoch": 0.7488004617771203,
+      "grad_norm": 0.148727685213089,
+      "learning_rate": 0.00017006783085582336,
+      "loss": 0.1731,
+      "step": 10378
+    },
+    {
+      "epoch": 0.7488726144521808,
+      "grad_norm": 0.1503356695175171,
+      "learning_rate": 0.00017006494443642662,
+      "loss": 0.1683,
+      "step": 10379
+    },
+    {
+      "epoch": 0.7489447671272412,
+      "grad_norm": 0.11363843083381653,
+      "learning_rate": 0.0001700620580170299,
+      "loss": 0.1039,
+      "step": 10380
+    },
+    {
+      "epoch": 0.7490169198023017,
+      "grad_norm": 0.10472255200147629,
+      "learning_rate": 0.00017005917159763315,
+      "loss": 0.1464,
+      "step": 10381
+    },
+    {
+      "epoch": 0.7490890724773621,
+      "grad_norm": 0.11832225322723389,
+      "learning_rate": 0.00017005628517823639,
+      "loss": 0.139,
+      "step": 10382
+    },
+    {
+      "epoch": 0.7491612251524226,
+      "grad_norm": 0.16063863039016724,
+      "learning_rate": 0.00017005339875883968,
+      "loss": 0.1847,
+      "step": 10383
+    },
+    {
+      "epoch": 0.749233377827483,
+      "grad_norm": 0.11803101003170013,
+      "learning_rate": 0.00017005051233944294,
+      "loss": 0.1772,
+      "step": 10384
+    },
+    {
+      "epoch": 0.7493055305025433,
+      "grad_norm": 0.11719434708356857,
+      "learning_rate": 0.0001700476259200462,
+      "loss": 0.1745,
+      "step": 10385
+    },
+    {
+      "epoch": 0.7493776831776038,
+      "grad_norm": 0.1297706663608551,
+      "learning_rate": 0.00017004473950064946,
+      "loss": 0.1692,
+      "step": 10386
+    },
+    {
+      "epoch": 0.7494498358526642,
+      "grad_norm": 0.13485105335712433,
+      "learning_rate": 0.0001700418530812527,
+      "loss": 0.1523,
+      "step": 10387
+    },
+    {
+      "epoch": 0.7495219885277247,
+      "grad_norm": 0.15161189436912537,
+      "learning_rate": 0.00017003896666185596,
+      "loss": 0.1668,
+      "step": 10388
+    },
+    {
+      "epoch": 0.7495941412027851,
+      "grad_norm": 0.12917101383209229,
+      "learning_rate": 0.00017003608024245923,
+      "loss": 0.1824,
+      "step": 10389
+    },
+    {
+      "epoch": 0.7496662938778456,
+      "grad_norm": 0.12237484008073807,
+      "learning_rate": 0.00017003319382306252,
+      "loss": 0.1413,
+      "step": 10390
+    },
+    {
+      "epoch": 0.749738446552906,
+      "grad_norm": 0.12186992168426514,
+      "learning_rate": 0.00017003030740366578,
+      "loss": 0.1002,
+      "step": 10391
+    },
+    {
+      "epoch": 0.7498105992279663,
+      "grad_norm": 0.12338005006313324,
+      "learning_rate": 0.00017002742098426901,
+      "loss": 0.1595,
+      "step": 10392
+    },
+    {
+      "epoch": 0.7498827519030268,
+      "grad_norm": 0.12393818795681,
+      "learning_rate": 0.00017002453456487228,
+      "loss": 0.1045,
+      "step": 10393
+    },
+    {
+      "epoch": 0.7499549045780872,
+      "grad_norm": 0.1437109261751175,
+      "learning_rate": 0.00017002164814547554,
+      "loss": 0.134,
+      "step": 10394
+    },
+    {
+      "epoch": 0.7500270572531477,
+      "grad_norm": 0.12134461849927902,
+      "learning_rate": 0.0001700187617260788,
+      "loss": 0.1285,
+      "step": 10395
+    },
+    {
+      "epoch": 0.7500992099282081,
+      "grad_norm": 0.14531710743904114,
+      "learning_rate": 0.00017001587530668207,
+      "loss": 0.1368,
+      "step": 10396
+    },
+    {
+      "epoch": 0.7501713626032686,
+      "grad_norm": 0.10627005249261856,
+      "learning_rate": 0.00017001298888728533,
+      "loss": 0.157,
+      "step": 10397
+    },
+    {
+      "epoch": 0.750243515278329,
+      "grad_norm": 0.15446774661540985,
+      "learning_rate": 0.0001700101024678886,
+      "loss": 0.1568,
+      "step": 10398
+    },
+    {
+      "epoch": 0.7503156679533893,
+      "grad_norm": 0.12825755774974823,
+      "learning_rate": 0.00017000721604849185,
+      "loss": 0.125,
+      "step": 10399
+    },
+    {
+      "epoch": 0.7503878206284498,
+      "grad_norm": 0.12873336672782898,
+      "learning_rate": 0.00017000432962909512,
+      "loss": 0.1751,
+      "step": 10400
+    },
+    {
+      "epoch": 0.7504599733035102,
+      "grad_norm": 0.1349041610956192,
+      "learning_rate": 0.00017000144320969838,
+      "loss": 0.222,
+      "step": 10401
+    },
+    {
+      "epoch": 0.7505321259785707,
+      "grad_norm": 0.11576125025749207,
+      "learning_rate": 0.00016999855679030164,
+      "loss": 0.1801,
+      "step": 10402
+    },
+    {
+      "epoch": 0.7506042786536311,
+      "grad_norm": 0.16132929921150208,
+      "learning_rate": 0.00016999567037090488,
+      "loss": 0.1499,
+      "step": 10403
+    },
+    {
+      "epoch": 0.7506764313286916,
+      "grad_norm": 0.09981893748044968,
+      "learning_rate": 0.00016999278395150817,
+      "loss": 0.1179,
+      "step": 10404
+    },
+    {
+      "epoch": 0.7507485840037519,
+      "grad_norm": 0.1367056667804718,
+      "learning_rate": 0.00016998989753211143,
+      "loss": 0.1519,
+      "step": 10405
+    },
+    {
+      "epoch": 0.7508207366788123,
+      "grad_norm": 0.11750783026218414,
+      "learning_rate": 0.0001699870111127147,
+      "loss": 0.1608,
+      "step": 10406
+    },
+    {
+      "epoch": 0.7508928893538728,
+      "grad_norm": 0.10363408923149109,
+      "learning_rate": 0.00016998412469331796,
+      "loss": 0.1685,
+      "step": 10407
+    },
+    {
+      "epoch": 0.7509650420289332,
+      "grad_norm": 0.12339738011360168,
+      "learning_rate": 0.0001699812382739212,
+      "loss": 0.1216,
+      "step": 10408
+    },
+    {
+      "epoch": 0.7510371947039937,
+      "grad_norm": 0.15196317434310913,
+      "learning_rate": 0.00016997835185452446,
+      "loss": 0.1887,
+      "step": 10409
+    },
+    {
+      "epoch": 0.7511093473790541,
+      "grad_norm": 0.1309935450553894,
+      "learning_rate": 0.00016997546543512772,
+      "loss": 0.1506,
+      "step": 10410
+    },
+    {
+      "epoch": 0.7511815000541145,
+      "grad_norm": 0.1456453651189804,
+      "learning_rate": 0.000169972579015731,
+      "loss": 0.1851,
+      "step": 10411
+    },
+    {
+      "epoch": 0.7512536527291749,
+      "grad_norm": 0.12276160717010498,
+      "learning_rate": 0.00016996969259633427,
+      "loss": 0.1703,
+      "step": 10412
+    },
+    {
+      "epoch": 0.7513258054042353,
+      "grad_norm": 0.12643969058990479,
+      "learning_rate": 0.0001699668061769375,
+      "loss": 0.1499,
+      "step": 10413
+    },
+    {
+      "epoch": 0.7513979580792958,
+      "grad_norm": 0.11966102570295334,
+      "learning_rate": 0.00016996391975754077,
+      "loss": 0.1294,
+      "step": 10414
+    },
+    {
+      "epoch": 0.7514701107543562,
+      "grad_norm": 0.12556810677051544,
+      "learning_rate": 0.00016996103333814403,
+      "loss": 0.1216,
+      "step": 10415
+    },
+    {
+      "epoch": 0.7515422634294167,
+      "grad_norm": 0.12193518131971359,
+      "learning_rate": 0.0001699581469187473,
+      "loss": 0.1218,
+      "step": 10416
+    },
+    {
+      "epoch": 0.7516144161044771,
+      "grad_norm": 0.11967793107032776,
+      "learning_rate": 0.00016995526049935056,
+      "loss": 0.1809,
+      "step": 10417
+    },
+    {
+      "epoch": 0.7516865687795375,
+      "grad_norm": 0.1303376853466034,
+      "learning_rate": 0.00016995237407995382,
+      "loss": 0.156,
+      "step": 10418
+    },
+    {
+      "epoch": 0.7517587214545979,
+      "grad_norm": 0.11569512635469437,
+      "learning_rate": 0.00016994948766055709,
+      "loss": 0.1307,
+      "step": 10419
+    },
+    {
+      "epoch": 0.7518308741296583,
+      "grad_norm": 0.1179572120308876,
+      "learning_rate": 0.00016994660124116035,
+      "loss": 0.1423,
+      "step": 10420
+    },
+    {
+      "epoch": 0.7519030268047188,
+      "grad_norm": 0.13549353182315826,
+      "learning_rate": 0.0001699437148217636,
+      "loss": 0.1471,
+      "step": 10421
+    },
+    {
+      "epoch": 0.7519751794797792,
+      "grad_norm": 0.13920718431472778,
+      "learning_rate": 0.00016994082840236688,
+      "loss": 0.1169,
+      "step": 10422
+    },
+    {
+      "epoch": 0.7520473321548397,
+      "grad_norm": 0.14348579943180084,
+      "learning_rate": 0.00016993794198297014,
+      "loss": 0.1132,
+      "step": 10423
+    },
+    {
+      "epoch": 0.7521194848299001,
+      "grad_norm": 0.12128528952598572,
+      "learning_rate": 0.00016993505556357337,
+      "loss": 0.1564,
+      "step": 10424
+    },
+    {
+      "epoch": 0.7521916375049605,
+      "grad_norm": 0.13051150739192963,
+      "learning_rate": 0.00016993216914417666,
+      "loss": 0.1195,
+      "step": 10425
+    },
+    {
+      "epoch": 0.7522637901800209,
+      "grad_norm": 0.14699678122997284,
+      "learning_rate": 0.00016992928272477993,
+      "loss": 0.1884,
+      "step": 10426
+    },
+    {
+      "epoch": 0.7523359428550813,
+      "grad_norm": 0.12040664255619049,
+      "learning_rate": 0.0001699263963053832,
+      "loss": 0.1212,
+      "step": 10427
+    },
+    {
+      "epoch": 0.7524080955301418,
+      "grad_norm": 0.16848629713058472,
+      "learning_rate": 0.00016992350988598645,
+      "loss": 0.1749,
+      "step": 10428
+    },
+    {
+      "epoch": 0.7524802482052022,
+      "grad_norm": 0.13176479935646057,
+      "learning_rate": 0.0001699206234665897,
+      "loss": 0.178,
+      "step": 10429
+    },
+    {
+      "epoch": 0.7525524008802627,
+      "grad_norm": 0.12817110121250153,
+      "learning_rate": 0.00016991773704719295,
+      "loss": 0.1494,
+      "step": 10430
+    },
+    {
+      "epoch": 0.7526245535553231,
+      "grad_norm": 0.11498573422431946,
+      "learning_rate": 0.00016991485062779621,
+      "loss": 0.1385,
+      "step": 10431
+    },
+    {
+      "epoch": 0.7526967062303835,
+      "grad_norm": 0.12366468459367752,
+      "learning_rate": 0.0001699119642083995,
+      "loss": 0.1304,
+      "step": 10432
+    },
+    {
+      "epoch": 0.7527688589054439,
+      "grad_norm": 0.13612817227840424,
+      "learning_rate": 0.00016990907778900277,
+      "loss": 0.1738,
+      "step": 10433
+    },
+    {
+      "epoch": 0.7528410115805043,
+      "grad_norm": 0.10676132887601852,
+      "learning_rate": 0.000169906191369606,
+      "loss": 0.1567,
+      "step": 10434
+    },
+    {
+      "epoch": 0.7529131642555648,
+      "grad_norm": 0.12812548875808716,
+      "learning_rate": 0.00016990330495020927,
+      "loss": 0.1694,
+      "step": 10435
+    },
+    {
+      "epoch": 0.7529853169306252,
+      "grad_norm": 0.12463195621967316,
+      "learning_rate": 0.00016990041853081253,
+      "loss": 0.167,
+      "step": 10436
+    },
+    {
+      "epoch": 0.7530574696056856,
+      "grad_norm": 0.130820631980896,
+      "learning_rate": 0.0001698975321114158,
+      "loss": 0.1442,
+      "step": 10437
+    },
+    {
+      "epoch": 0.7531296222807461,
+      "grad_norm": 0.21053113043308258,
+      "learning_rate": 0.00016989464569201905,
+      "loss": 0.1621,
+      "step": 10438
+    },
+    {
+      "epoch": 0.7532017749558065,
+      "grad_norm": 0.1375279277563095,
+      "learning_rate": 0.00016989175927262232,
+      "loss": 0.1263,
+      "step": 10439
+    },
+    {
+      "epoch": 0.7532739276308669,
+      "grad_norm": 0.1391715258359909,
+      "learning_rate": 0.00016988887285322558,
+      "loss": 0.1442,
+      "step": 10440
+    },
+    {
+      "epoch": 0.7533460803059273,
+      "grad_norm": 0.09525745362043381,
+      "learning_rate": 0.00016988598643382884,
+      "loss": 0.1582,
+      "step": 10441
+    },
+    {
+      "epoch": 0.7534182329809878,
+      "grad_norm": 0.12608473002910614,
+      "learning_rate": 0.0001698831000144321,
+      "loss": 0.1562,
+      "step": 10442
+    },
+    {
+      "epoch": 0.7534903856560482,
+      "grad_norm": 0.14218218624591827,
+      "learning_rate": 0.00016988021359503537,
+      "loss": 0.1569,
+      "step": 10443
+    },
+    {
+      "epoch": 0.7535625383311086,
+      "grad_norm": 0.1170210987329483,
+      "learning_rate": 0.00016987732717563863,
+      "loss": 0.1118,
+      "step": 10444
+    },
+    {
+      "epoch": 0.7536346910061691,
+      "grad_norm": 0.1179591715335846,
+      "learning_rate": 0.0001698744407562419,
+      "loss": 0.1414,
+      "step": 10445
+    },
+    {
+      "epoch": 0.7537068436812295,
+      "grad_norm": 0.1263728141784668,
+      "learning_rate": 0.00016987155433684516,
+      "loss": 0.1607,
+      "step": 10446
+    },
+    {
+      "epoch": 0.7537789963562899,
+      "grad_norm": 0.12224898487329483,
+      "learning_rate": 0.00016986866791744842,
+      "loss": 0.1155,
+      "step": 10447
+    },
+    {
+      "epoch": 0.7538511490313503,
+      "grad_norm": 0.12241674959659576,
+      "learning_rate": 0.00016986578149805168,
+      "loss": 0.1496,
+      "step": 10448
+    },
+    {
+      "epoch": 0.7539233017064108,
+      "grad_norm": 0.13699893653392792,
+      "learning_rate": 0.00016986289507865495,
+      "loss": 0.1763,
+      "step": 10449
+    },
+    {
+      "epoch": 0.7539954543814712,
+      "grad_norm": 0.12991581857204437,
+      "learning_rate": 0.0001698600086592582,
+      "loss": 0.1727,
+      "step": 10450
+    },
+    {
+      "epoch": 0.7540676070565316,
+      "grad_norm": 0.11489979177713394,
+      "learning_rate": 0.00016985712223986145,
+      "loss": 0.0994,
+      "step": 10451
+    },
+    {
+      "epoch": 0.7541397597315921,
+      "grad_norm": 0.12281400710344315,
+      "learning_rate": 0.0001698542358204647,
+      "loss": 0.0968,
+      "step": 10452
+    },
+    {
+      "epoch": 0.7542119124066525,
+      "grad_norm": 0.1341441124677658,
+      "learning_rate": 0.000169851349401068,
+      "loss": 0.1217,
+      "step": 10453
+    },
+    {
+      "epoch": 0.7542840650817129,
+      "grad_norm": 0.11157387495040894,
+      "learning_rate": 0.00016984846298167126,
+      "loss": 0.1332,
+      "step": 10454
+    },
+    {
+      "epoch": 0.7543562177567733,
+      "grad_norm": 0.13739007711410522,
+      "learning_rate": 0.00016984557656227452,
+      "loss": 0.1817,
+      "step": 10455
+    },
+    {
+      "epoch": 0.7544283704318338,
+      "grad_norm": 0.11979958415031433,
+      "learning_rate": 0.00016984269014287776,
+      "loss": 0.1797,
+      "step": 10456
+    },
+    {
+      "epoch": 0.7545005231068942,
+      "grad_norm": 0.13151156902313232,
+      "learning_rate": 0.00016983980372348102,
+      "loss": 0.1484,
+      "step": 10457
+    },
+    {
+      "epoch": 0.7545726757819546,
+      "grad_norm": 0.10742885619401932,
+      "learning_rate": 0.00016983691730408429,
+      "loss": 0.1095,
+      "step": 10458
+    },
+    {
+      "epoch": 0.7546448284570151,
+      "grad_norm": 0.127263605594635,
+      "learning_rate": 0.00016983403088468755,
+      "loss": 0.1483,
+      "step": 10459
+    },
+    {
+      "epoch": 0.7547169811320755,
+      "grad_norm": 0.13081799447536469,
+      "learning_rate": 0.00016983114446529084,
+      "loss": 0.1322,
+      "step": 10460
+    },
+    {
+      "epoch": 0.7547891338071359,
+      "grad_norm": 0.1259460151195526,
+      "learning_rate": 0.00016982825804589407,
+      "loss": 0.1523,
+      "step": 10461
+    },
+    {
+      "epoch": 0.7548612864821963,
+      "grad_norm": 0.1344456672668457,
+      "learning_rate": 0.00016982537162649734,
+      "loss": 0.1354,
+      "step": 10462
+    },
+    {
+      "epoch": 0.7549334391572567,
+      "grad_norm": 0.0938878208398819,
+      "learning_rate": 0.0001698224852071006,
+      "loss": 0.1613,
+      "step": 10463
+    },
+    {
+      "epoch": 0.7550055918323172,
+      "grad_norm": 0.14028160274028778,
+      "learning_rate": 0.00016981959878770386,
+      "loss": 0.1738,
+      "step": 10464
+    },
+    {
+      "epoch": 0.7550777445073776,
+      "grad_norm": 0.11873999983072281,
+      "learning_rate": 0.00016981671236830713,
+      "loss": 0.171,
+      "step": 10465
+    },
+    {
+      "epoch": 0.7551498971824381,
+      "grad_norm": 0.14979086816310883,
+      "learning_rate": 0.0001698138259489104,
+      "loss": 0.1222,
+      "step": 10466
+    },
+    {
+      "epoch": 0.7552220498574984,
+      "grad_norm": 0.13354110717773438,
+      "learning_rate": 0.00016981093952951365,
+      "loss": 0.1766,
+      "step": 10467
+    },
+    {
+      "epoch": 0.7552942025325589,
+      "grad_norm": 0.1624889224767685,
+      "learning_rate": 0.00016980805311011692,
+      "loss": 0.157,
+      "step": 10468
+    },
+    {
+      "epoch": 0.7553663552076193,
+      "grad_norm": 0.11007406562566757,
+      "learning_rate": 0.00016980516669072018,
+      "loss": 0.1467,
+      "step": 10469
+    },
+    {
+      "epoch": 0.7554385078826797,
+      "grad_norm": 0.1187017410993576,
+      "learning_rate": 0.00016980228027132344,
+      "loss": 0.1675,
+      "step": 10470
+    },
+    {
+      "epoch": 0.7555106605577402,
+      "grad_norm": 0.1325860619544983,
+      "learning_rate": 0.0001697993938519267,
+      "loss": 0.1588,
+      "step": 10471
+    },
+    {
+      "epoch": 0.7555828132328006,
+      "grad_norm": 0.1073029488325119,
+      "learning_rate": 0.00016979650743252994,
+      "loss": 0.1438,
+      "step": 10472
+    },
+    {
+      "epoch": 0.7556549659078611,
+      "grad_norm": 0.1197071373462677,
+      "learning_rate": 0.0001697936210131332,
+      "loss": 0.1207,
+      "step": 10473
+    },
+    {
+      "epoch": 0.7557271185829214,
+      "grad_norm": 0.10033522546291351,
+      "learning_rate": 0.0001697907345937365,
+      "loss": 0.1403,
+      "step": 10474
+    },
+    {
+      "epoch": 0.7557992712579819,
+      "grad_norm": 0.15383054316043854,
+      "learning_rate": 0.00016978784817433976,
+      "loss": 0.1566,
+      "step": 10475
+    },
+    {
+      "epoch": 0.7558714239330423,
+      "grad_norm": 0.1383810043334961,
+      "learning_rate": 0.00016978496175494302,
+      "loss": 0.1637,
+      "step": 10476
+    },
+    {
+      "epoch": 0.7559435766081027,
+      "grad_norm": 0.12929747998714447,
+      "learning_rate": 0.00016978207533554625,
+      "loss": 0.1333,
+      "step": 10477
+    },
+    {
+      "epoch": 0.7560157292831632,
+      "grad_norm": 0.14632758498191833,
+      "learning_rate": 0.00016977918891614952,
+      "loss": 0.1209,
+      "step": 10478
+    },
+    {
+      "epoch": 0.7560878819582236,
+      "grad_norm": 0.12172812968492508,
+      "learning_rate": 0.00016977630249675278,
+      "loss": 0.1562,
+      "step": 10479
+    },
+    {
+      "epoch": 0.7561600346332841,
+      "grad_norm": 0.12071457505226135,
+      "learning_rate": 0.00016977341607735604,
+      "loss": 0.1862,
+      "step": 10480
+    },
+    {
+      "epoch": 0.7562321873083444,
+      "grad_norm": 0.17001500725746155,
+      "learning_rate": 0.0001697705296579593,
+      "loss": 0.18,
+      "step": 10481
+    },
+    {
+      "epoch": 0.7563043399834048,
+      "grad_norm": 0.1405080109834671,
+      "learning_rate": 0.00016976764323856257,
+      "loss": 0.1594,
+      "step": 10482
+    },
+    {
+      "epoch": 0.7563764926584653,
+      "grad_norm": 0.13267336785793304,
+      "learning_rate": 0.00016976475681916583,
+      "loss": 0.1662,
+      "step": 10483
+    },
+    {
+      "epoch": 0.7564486453335257,
+      "grad_norm": 0.11440946161746979,
+      "learning_rate": 0.0001697618703997691,
+      "loss": 0.1922,
+      "step": 10484
+    },
+    {
+      "epoch": 0.7565207980085862,
+      "grad_norm": 0.11692952364683151,
+      "learning_rate": 0.00016975898398037236,
+      "loss": 0.1521,
+      "step": 10485
+    },
+    {
+      "epoch": 0.7565929506836466,
+      "grad_norm": 0.11950668692588806,
+      "learning_rate": 0.00016975609756097562,
+      "loss": 0.1226,
+      "step": 10486
+    },
+    {
+      "epoch": 0.7566651033587071,
+      "grad_norm": 0.18788966536521912,
+      "learning_rate": 0.00016975321114157888,
+      "loss": 0.1664,
+      "step": 10487
+    },
+    {
+      "epoch": 0.7567372560337674,
+      "grad_norm": 0.14929738640785217,
+      "learning_rate": 0.00016975032472218212,
+      "loss": 0.2299,
+      "step": 10488
+    },
+    {
+      "epoch": 0.7568094087088278,
+      "grad_norm": 0.12198912352323532,
+      "learning_rate": 0.0001697474383027854,
+      "loss": 0.1459,
+      "step": 10489
+    },
+    {
+      "epoch": 0.7568815613838883,
+      "grad_norm": 0.12246627360582352,
+      "learning_rate": 0.00016974455188338867,
+      "loss": 0.1054,
+      "step": 10490
+    },
+    {
+      "epoch": 0.7569537140589487,
+      "grad_norm": 0.2314099669456482,
+      "learning_rate": 0.00016974166546399194,
+      "loss": 0.1356,
+      "step": 10491
+    },
+    {
+      "epoch": 0.7570258667340092,
+      "grad_norm": 0.14936254918575287,
+      "learning_rate": 0.0001697387790445952,
+      "loss": 0.1425,
+      "step": 10492
+    },
+    {
+      "epoch": 0.7570980194090696,
+      "grad_norm": 0.25354865193367004,
+      "learning_rate": 0.00016973589262519843,
+      "loss": 0.1332,
+      "step": 10493
+    },
+    {
+      "epoch": 0.7571701720841301,
+      "grad_norm": 0.1566449999809265,
+      "learning_rate": 0.0001697330062058017,
+      "loss": 0.1457,
+      "step": 10494
+    },
+    {
+      "epoch": 0.7572423247591904,
+      "grad_norm": 0.11155090481042862,
+      "learning_rate": 0.00016973011978640496,
+      "loss": 0.1403,
+      "step": 10495
+    },
+    {
+      "epoch": 0.7573144774342508,
+      "grad_norm": 0.16458825767040253,
+      "learning_rate": 0.00016972723336700825,
+      "loss": 0.1304,
+      "step": 10496
+    },
+    {
+      "epoch": 0.7573866301093113,
+      "grad_norm": 0.157064750790596,
+      "learning_rate": 0.0001697243469476115,
+      "loss": 0.164,
+      "step": 10497
+    },
+    {
+      "epoch": 0.7574587827843717,
+      "grad_norm": 0.11798777431249619,
+      "learning_rate": 0.00016972146052821475,
+      "loss": 0.1347,
+      "step": 10498
+    },
+    {
+      "epoch": 0.7575309354594322,
+      "grad_norm": 0.12378858029842377,
+      "learning_rate": 0.000169718574108818,
+      "loss": 0.1091,
+      "step": 10499
+    },
+    {
+      "epoch": 0.7576030881344926,
+      "grad_norm": 0.10340747237205505,
+      "learning_rate": 0.00016971568768942127,
+      "loss": 0.1362,
+      "step": 10500
+    },
+    {
+      "epoch": 0.7576752408095531,
+      "grad_norm": 0.12779733538627625,
+      "learning_rate": 0.00016971280127002454,
+      "loss": 0.1408,
+      "step": 10501
+    },
+    {
+      "epoch": 0.7577473934846134,
+      "grad_norm": 0.11349274218082428,
+      "learning_rate": 0.0001697099148506278,
+      "loss": 0.1894,
+      "step": 10502
+    },
+    {
+      "epoch": 0.7578195461596738,
+      "grad_norm": 0.12187425792217255,
+      "learning_rate": 0.00016970702843123106,
+      "loss": 0.1075,
+      "step": 10503
+    },
+    {
+      "epoch": 0.7578916988347343,
+      "grad_norm": 0.11838438361883163,
+      "learning_rate": 0.00016970414201183433,
+      "loss": 0.1302,
+      "step": 10504
+    },
+    {
+      "epoch": 0.7579638515097947,
+      "grad_norm": 0.16571784019470215,
+      "learning_rate": 0.0001697012555924376,
+      "loss": 0.1419,
+      "step": 10505
+    },
+    {
+      "epoch": 0.7580360041848552,
+      "grad_norm": 0.13391056656837463,
+      "learning_rate": 0.00016969836917304085,
+      "loss": 0.1051,
+      "step": 10506
+    },
+    {
+      "epoch": 0.7581081568599156,
+      "grad_norm": 0.1178104355931282,
+      "learning_rate": 0.00016969548275364411,
+      "loss": 0.123,
+      "step": 10507
+    },
+    {
+      "epoch": 0.7581803095349761,
+      "grad_norm": 0.120695561170578,
+      "learning_rate": 0.00016969259633424738,
+      "loss": 0.138,
+      "step": 10508
+    },
+    {
+      "epoch": 0.7582524622100364,
+      "grad_norm": 0.1247735545039177,
+      "learning_rate": 0.00016968970991485061,
+      "loss": 0.1937,
+      "step": 10509
+    },
+    {
+      "epoch": 0.7583246148850968,
+      "grad_norm": 0.14644338190555573,
+      "learning_rate": 0.0001696868234954539,
+      "loss": 0.1871,
+      "step": 10510
+    },
+    {
+      "epoch": 0.7583967675601573,
+      "grad_norm": 0.13256101310253143,
+      "learning_rate": 0.00016968393707605717,
+      "loss": 0.1359,
+      "step": 10511
+    },
+    {
+      "epoch": 0.7584689202352177,
+      "grad_norm": 0.16652289032936096,
+      "learning_rate": 0.00016968105065666043,
+      "loss": 0.2052,
+      "step": 10512
+    },
+    {
+      "epoch": 0.7585410729102782,
+      "grad_norm": 0.12736444175243378,
+      "learning_rate": 0.0001696781642372637,
+      "loss": 0.2008,
+      "step": 10513
+    },
+    {
+      "epoch": 0.7586132255853386,
+      "grad_norm": 0.15626487135887146,
+      "learning_rate": 0.00016967527781786693,
+      "loss": 0.1391,
+      "step": 10514
+    },
+    {
+      "epoch": 0.7586853782603991,
+      "grad_norm": 0.11295124888420105,
+      "learning_rate": 0.0001696723913984702,
+      "loss": 0.1772,
+      "step": 10515
+    },
+    {
+      "epoch": 0.7587575309354594,
+      "grad_norm": 0.12680451571941376,
+      "learning_rate": 0.00016966950497907345,
+      "loss": 0.1579,
+      "step": 10516
+    },
+    {
+      "epoch": 0.7588296836105198,
+      "grad_norm": 0.13611815869808197,
+      "learning_rate": 0.00016966661855967674,
+      "loss": 0.1583,
+      "step": 10517
+    },
+    {
+      "epoch": 0.7589018362855803,
+      "grad_norm": 0.13193492591381073,
+      "learning_rate": 0.00016966373214028,
+      "loss": 0.126,
+      "step": 10518
+    },
+    {
+      "epoch": 0.7589739889606407,
+      "grad_norm": 0.12841257452964783,
+      "learning_rate": 0.00016966084572088324,
+      "loss": 0.1759,
+      "step": 10519
+    },
+    {
+      "epoch": 0.7590461416357012,
+      "grad_norm": 0.1348455548286438,
+      "learning_rate": 0.0001696579593014865,
+      "loss": 0.1695,
+      "step": 10520
+    },
+    {
+      "epoch": 0.7591182943107616,
+      "grad_norm": 0.18122850358486176,
+      "learning_rate": 0.00016965507288208977,
+      "loss": 0.1138,
+      "step": 10521
+    },
+    {
+      "epoch": 0.759190446985822,
+      "grad_norm": 0.13468194007873535,
+      "learning_rate": 0.00016965218646269303,
+      "loss": 0.1631,
+      "step": 10522
+    },
+    {
+      "epoch": 0.7592625996608824,
+      "grad_norm": 0.10488202422857285,
+      "learning_rate": 0.0001696493000432963,
+      "loss": 0.1398,
+      "step": 10523
+    },
+    {
+      "epoch": 0.7593347523359428,
+      "grad_norm": 0.12960539758205414,
+      "learning_rate": 0.00016964641362389956,
+      "loss": 0.1449,
+      "step": 10524
+    },
+    {
+      "epoch": 0.7594069050110033,
+      "grad_norm": 0.13000430166721344,
+      "learning_rate": 0.00016964352720450282,
+      "loss": 0.226,
+      "step": 10525
+    },
+    {
+      "epoch": 0.7594790576860637,
+      "grad_norm": 0.13472294807434082,
+      "learning_rate": 0.00016964064078510608,
+      "loss": 0.153,
+      "step": 10526
+    },
+    {
+      "epoch": 0.7595512103611242,
+      "grad_norm": 0.12741820514202118,
+      "learning_rate": 0.00016963775436570935,
+      "loss": 0.1445,
+      "step": 10527
+    },
+    {
+      "epoch": 0.7596233630361846,
+      "grad_norm": 0.1185845211148262,
+      "learning_rate": 0.0001696348679463126,
+      "loss": 0.1334,
+      "step": 10528
+    },
+    {
+      "epoch": 0.7596955157112449,
+      "grad_norm": 0.13726027309894562,
+      "learning_rate": 0.00016963198152691587,
+      "loss": 0.1478,
+      "step": 10529
+    },
+    {
+      "epoch": 0.7597676683863054,
+      "grad_norm": 0.13547062873840332,
+      "learning_rate": 0.0001696290951075191,
+      "loss": 0.1653,
+      "step": 10530
+    },
+    {
+      "epoch": 0.7598398210613658,
+      "grad_norm": 0.12183656543493271,
+      "learning_rate": 0.0001696262086881224,
+      "loss": 0.1175,
+      "step": 10531
+    },
+    {
+      "epoch": 0.7599119737364263,
+      "grad_norm": 0.11496282368898392,
+      "learning_rate": 0.00016962332226872566,
+      "loss": 0.1361,
+      "step": 10532
+    },
+    {
+      "epoch": 0.7599841264114867,
+      "grad_norm": 0.10563007742166519,
+      "learning_rate": 0.00016962043584932892,
+      "loss": 0.0923,
+      "step": 10533
+    },
+    {
+      "epoch": 0.7600562790865472,
+      "grad_norm": 0.12999393045902252,
+      "learning_rate": 0.0001696175494299322,
+      "loss": 0.193,
+      "step": 10534
+    },
+    {
+      "epoch": 0.7601284317616076,
+      "grad_norm": 0.12223460525274277,
+      "learning_rate": 0.00016961466301053542,
+      "loss": 0.138,
+      "step": 10535
+    },
+    {
+      "epoch": 0.7602005844366679,
+      "grad_norm": 0.10693982243537903,
+      "learning_rate": 0.00016961177659113869,
+      "loss": 0.1323,
+      "step": 10536
+    },
+    {
+      "epoch": 0.7602727371117284,
+      "grad_norm": 0.15222221612930298,
+      "learning_rate": 0.00016960889017174195,
+      "loss": 0.1668,
+      "step": 10537
+    },
+    {
+      "epoch": 0.7603448897867888,
+      "grad_norm": 0.12483417242765427,
+      "learning_rate": 0.00016960600375234524,
+      "loss": 0.1504,
+      "step": 10538
+    },
+    {
+      "epoch": 0.7604170424618493,
+      "grad_norm": 0.12700700759887695,
+      "learning_rate": 0.0001696031173329485,
+      "loss": 0.1705,
+      "step": 10539
+    },
+    {
+      "epoch": 0.7604891951369097,
+      "grad_norm": 0.1095048263669014,
+      "learning_rate": 0.00016960023091355174,
+      "loss": 0.134,
+      "step": 10540
+    },
+    {
+      "epoch": 0.7605613478119702,
+      "grad_norm": 0.11708509922027588,
+      "learning_rate": 0.000169597344494155,
+      "loss": 0.1653,
+      "step": 10541
+    },
+    {
+      "epoch": 0.7606335004870306,
+      "grad_norm": 0.11617586761713028,
+      "learning_rate": 0.00016959445807475826,
+      "loss": 0.1648,
+      "step": 10542
+    },
+    {
+      "epoch": 0.7607056531620909,
+      "grad_norm": 0.11328215897083282,
+      "learning_rate": 0.00016959157165536153,
+      "loss": 0.1126,
+      "step": 10543
+    },
+    {
+      "epoch": 0.7607778058371514,
+      "grad_norm": 0.13360731303691864,
+      "learning_rate": 0.0001695886852359648,
+      "loss": 0.1805,
+      "step": 10544
+    },
+    {
+      "epoch": 0.7608499585122118,
+      "grad_norm": 0.11923332512378693,
+      "learning_rate": 0.00016958579881656805,
+      "loss": 0.195,
+      "step": 10545
+    },
+    {
+      "epoch": 0.7609221111872723,
+      "grad_norm": 0.11156607419252396,
+      "learning_rate": 0.00016958291239717131,
+      "loss": 0.1654,
+      "step": 10546
+    },
+    {
+      "epoch": 0.7609942638623327,
+      "grad_norm": 0.11855777353048325,
+      "learning_rate": 0.00016958002597777458,
+      "loss": 0.163,
+      "step": 10547
+    },
+    {
+      "epoch": 0.7610664165373932,
+      "grad_norm": 0.12136217206716537,
+      "learning_rate": 0.00016957713955837784,
+      "loss": 0.1644,
+      "step": 10548
+    },
+    {
+      "epoch": 0.7611385692124536,
+      "grad_norm": 0.11538812518119812,
+      "learning_rate": 0.0001695742531389811,
+      "loss": 0.1351,
+      "step": 10549
+    },
+    {
+      "epoch": 0.7612107218875139,
+      "grad_norm": 0.1355658620595932,
+      "learning_rate": 0.00016957136671958437,
+      "loss": 0.1553,
+      "step": 10550
+    },
+    {
+      "epoch": 0.7612828745625744,
+      "grad_norm": 0.12112399190664291,
+      "learning_rate": 0.0001695684803001876,
+      "loss": 0.1589,
+      "step": 10551
+    },
+    {
+      "epoch": 0.7613550272376348,
+      "grad_norm": 0.12598440051078796,
+      "learning_rate": 0.0001695655938807909,
+      "loss": 0.1808,
+      "step": 10552
+    },
+    {
+      "epoch": 0.7614271799126953,
+      "grad_norm": 0.11427775770425797,
+      "learning_rate": 0.00016956270746139416,
+      "loss": 0.1173,
+      "step": 10553
+    },
+    {
+      "epoch": 0.7614993325877557,
+      "grad_norm": 0.10798707604408264,
+      "learning_rate": 0.00016955982104199742,
+      "loss": 0.0849,
+      "step": 10554
+    },
+    {
+      "epoch": 0.7615714852628162,
+      "grad_norm": 0.1420150250196457,
+      "learning_rate": 0.00016955693462260068,
+      "loss": 0.1353,
+      "step": 10555
+    },
+    {
+      "epoch": 0.7616436379378766,
+      "grad_norm": 0.14471231400966644,
+      "learning_rate": 0.00016955404820320392,
+      "loss": 0.1246,
+      "step": 10556
+    },
+    {
+      "epoch": 0.7617157906129369,
+      "grad_norm": 0.1214648112654686,
+      "learning_rate": 0.00016955116178380718,
+      "loss": 0.1547,
+      "step": 10557
+    },
+    {
+      "epoch": 0.7617879432879974,
+      "grad_norm": 0.10652025043964386,
+      "learning_rate": 0.00016954827536441044,
+      "loss": 0.1363,
+      "step": 10558
+    },
+    {
+      "epoch": 0.7618600959630578,
+      "grad_norm": 0.13901583850383759,
+      "learning_rate": 0.00016954538894501373,
+      "loss": 0.1473,
+      "step": 10559
+    },
+    {
+      "epoch": 0.7619322486381183,
+      "grad_norm": 0.1238698959350586,
+      "learning_rate": 0.000169542502525617,
+      "loss": 0.1511,
+      "step": 10560
+    },
+    {
+      "epoch": 0.7620044013131787,
+      "grad_norm": 0.12141493707895279,
+      "learning_rate": 0.00016953961610622023,
+      "loss": 0.117,
+      "step": 10561
+    },
+    {
+      "epoch": 0.7620765539882391,
+      "grad_norm": 0.16087163984775543,
+      "learning_rate": 0.0001695367296868235,
+      "loss": 0.1778,
+      "step": 10562
+    },
+    {
+      "epoch": 0.7621487066632996,
+      "grad_norm": 0.11237693578004837,
+      "learning_rate": 0.00016953384326742676,
+      "loss": 0.1186,
+      "step": 10563
+    },
+    {
+      "epoch": 0.7622208593383599,
+      "grad_norm": 0.12508907914161682,
+      "learning_rate": 0.00016953095684803002,
+      "loss": 0.1679,
+      "step": 10564
+    },
+    {
+      "epoch": 0.7622930120134204,
+      "grad_norm": 0.11945411562919617,
+      "learning_rate": 0.00016952807042863328,
+      "loss": 0.1131,
+      "step": 10565
+    },
+    {
+      "epoch": 0.7623651646884808,
+      "grad_norm": 0.10769058018922806,
+      "learning_rate": 0.00016952518400923655,
+      "loss": 0.1321,
+      "step": 10566
+    },
+    {
+      "epoch": 0.7624373173635413,
+      "grad_norm": 0.12163474410772324,
+      "learning_rate": 0.0001695222975898398,
+      "loss": 0.1591,
+      "step": 10567
+    },
+    {
+      "epoch": 0.7625094700386017,
+      "grad_norm": 0.11006736755371094,
+      "learning_rate": 0.00016951941117044307,
+      "loss": 0.1283,
+      "step": 10568
+    },
+    {
+      "epoch": 0.7625816227136621,
+      "grad_norm": 0.10961829870939255,
+      "learning_rate": 0.00016951652475104633,
+      "loss": 0.2023,
+      "step": 10569
+    },
+    {
+      "epoch": 0.7626537753887226,
+      "grad_norm": 0.12149278819561005,
+      "learning_rate": 0.0001695136383316496,
+      "loss": 0.1072,
+      "step": 10570
+    },
+    {
+      "epoch": 0.7627259280637829,
+      "grad_norm": 0.14779675006866455,
+      "learning_rate": 0.00016951075191225286,
+      "loss": 0.1284,
+      "step": 10571
+    },
+    {
+      "epoch": 0.7627980807388434,
+      "grad_norm": 0.12754885852336884,
+      "learning_rate": 0.00016950786549285612,
+      "loss": 0.1409,
+      "step": 10572
+    },
+    {
+      "epoch": 0.7628702334139038,
+      "grad_norm": 0.12782339751720428,
+      "learning_rate": 0.00016950497907345939,
+      "loss": 0.1769,
+      "step": 10573
+    },
+    {
+      "epoch": 0.7629423860889643,
+      "grad_norm": 0.1684998869895935,
+      "learning_rate": 0.00016950209265406265,
+      "loss": 0.1587,
+      "step": 10574
+    },
+    {
+      "epoch": 0.7630145387640247,
+      "grad_norm": 0.14323611557483673,
+      "learning_rate": 0.0001694992062346659,
+      "loss": 0.128,
+      "step": 10575
+    },
+    {
+      "epoch": 0.7630866914390851,
+      "grad_norm": 0.13388456404209137,
+      "learning_rate": 0.00016949631981526918,
+      "loss": 0.114,
+      "step": 10576
+    },
+    {
+      "epoch": 0.7631588441141456,
+      "grad_norm": 0.15299002826213837,
+      "learning_rate": 0.00016949343339587244,
+      "loss": 0.1677,
+      "step": 10577
+    },
+    {
+      "epoch": 0.7632309967892059,
+      "grad_norm": 0.1399703323841095,
+      "learning_rate": 0.00016949054697647567,
+      "loss": 0.1231,
+      "step": 10578
+    },
+    {
+      "epoch": 0.7633031494642664,
+      "grad_norm": 0.12333228439092636,
+      "learning_rate": 0.00016948766055707894,
+      "loss": 0.1611,
+      "step": 10579
+    },
+    {
+      "epoch": 0.7633753021393268,
+      "grad_norm": 0.11283402144908905,
+      "learning_rate": 0.00016948477413768223,
+      "loss": 0.1616,
+      "step": 10580
+    },
+    {
+      "epoch": 0.7634474548143872,
+      "grad_norm": 0.12879428267478943,
+      "learning_rate": 0.0001694818877182855,
+      "loss": 0.1887,
+      "step": 10581
+    },
+    {
+      "epoch": 0.7635196074894477,
+      "grad_norm": 0.12961862981319427,
+      "learning_rate": 0.00016947900129888875,
+      "loss": 0.1694,
+      "step": 10582
+    },
+    {
+      "epoch": 0.7635917601645081,
+      "grad_norm": 0.11353792995214462,
+      "learning_rate": 0.000169476114879492,
+      "loss": 0.1381,
+      "step": 10583
+    },
+    {
+      "epoch": 0.7636639128395686,
+      "grad_norm": 0.13557705283164978,
+      "learning_rate": 0.00016947322846009525,
+      "loss": 0.1359,
+      "step": 10584
+    },
+    {
+      "epoch": 0.7637360655146289,
+      "grad_norm": 0.13384157419204712,
+      "learning_rate": 0.00016947034204069851,
+      "loss": 0.1468,
+      "step": 10585
+    },
+    {
+      "epoch": 0.7638082181896894,
+      "grad_norm": 0.11125749349594116,
+      "learning_rate": 0.00016946745562130178,
+      "loss": 0.1409,
+      "step": 10586
+    },
+    {
+      "epoch": 0.7638803708647498,
+      "grad_norm": 0.1282130628824234,
+      "learning_rate": 0.00016946456920190507,
+      "loss": 0.1239,
+      "step": 10587
+    },
+    {
+      "epoch": 0.7639525235398102,
+      "grad_norm": 0.14103315770626068,
+      "learning_rate": 0.0001694616827825083,
+      "loss": 0.1109,
+      "step": 10588
+    },
+    {
+      "epoch": 0.7640246762148707,
+      "grad_norm": 0.129958376288414,
+      "learning_rate": 0.00016945879636311157,
+      "loss": 0.1855,
+      "step": 10589
+    },
+    {
+      "epoch": 0.7640968288899311,
+      "grad_norm": 0.15713174641132355,
+      "learning_rate": 0.00016945590994371483,
+      "loss": 0.1798,
+      "step": 10590
+    },
+    {
+      "epoch": 0.7641689815649915,
+      "grad_norm": 0.1206924319267273,
+      "learning_rate": 0.0001694530235243181,
+      "loss": 0.1376,
+      "step": 10591
+    },
+    {
+      "epoch": 0.7642411342400519,
+      "grad_norm": 0.11607380211353302,
+      "learning_rate": 0.00016945013710492135,
+      "loss": 0.0943,
+      "step": 10592
+    },
+    {
+      "epoch": 0.7643132869151124,
+      "grad_norm": 0.11553899198770523,
+      "learning_rate": 0.00016944725068552462,
+      "loss": 0.1038,
+      "step": 10593
+    },
+    {
+      "epoch": 0.7643854395901728,
+      "grad_norm": 0.12019038945436478,
+      "learning_rate": 0.00016944436426612788,
+      "loss": 0.153,
+      "step": 10594
+    },
+    {
+      "epoch": 0.7644575922652332,
+      "grad_norm": 0.14442084729671478,
+      "learning_rate": 0.00016944147784673114,
+      "loss": 0.1657,
+      "step": 10595
+    },
+    {
+      "epoch": 0.7645297449402937,
+      "grad_norm": 0.1325032263994217,
+      "learning_rate": 0.0001694385914273344,
+      "loss": 0.1738,
+      "step": 10596
+    },
+    {
+      "epoch": 0.7646018976153541,
+      "grad_norm": 0.11100459843873978,
+      "learning_rate": 0.00016943570500793767,
+      "loss": 0.1271,
+      "step": 10597
+    },
+    {
+      "epoch": 0.7646740502904145,
+      "grad_norm": 0.1309879720211029,
+      "learning_rate": 0.00016943281858854093,
+      "loss": 0.1298,
+      "step": 10598
+    },
+    {
+      "epoch": 0.7647462029654749,
+      "grad_norm": 0.10606569796800613,
+      "learning_rate": 0.00016942993216914417,
+      "loss": 0.0989,
+      "step": 10599
+    },
+    {
+      "epoch": 0.7648183556405354,
+      "grad_norm": 0.11444351077079773,
+      "learning_rate": 0.00016942704574974743,
+      "loss": 0.1487,
+      "step": 10600
+    },
+    {
+      "epoch": 0.7648905083155958,
+      "grad_norm": 0.1355319619178772,
+      "learning_rate": 0.00016942415933035072,
+      "loss": 0.1473,
+      "step": 10601
+    },
+    {
+      "epoch": 0.7649626609906562,
+      "grad_norm": 0.10285015404224396,
+      "learning_rate": 0.00016942127291095398,
+      "loss": 0.085,
+      "step": 10602
+    },
+    {
+      "epoch": 0.7650348136657167,
+      "grad_norm": 0.1468527615070343,
+      "learning_rate": 0.00016941838649155725,
+      "loss": 0.1947,
+      "step": 10603
+    },
+    {
+      "epoch": 0.7651069663407771,
+      "grad_norm": 0.11259424686431885,
+      "learning_rate": 0.00016941550007216048,
+      "loss": 0.1345,
+      "step": 10604
+    },
+    {
+      "epoch": 0.7651791190158375,
+      "grad_norm": 0.10993609577417374,
+      "learning_rate": 0.00016941261365276375,
+      "loss": 0.1777,
+      "step": 10605
+    },
+    {
+      "epoch": 0.7652512716908979,
+      "grad_norm": 0.11793410778045654,
+      "learning_rate": 0.000169409727233367,
+      "loss": 0.1205,
+      "step": 10606
+    },
+    {
+      "epoch": 0.7653234243659583,
+      "grad_norm": 0.14657844603061676,
+      "learning_rate": 0.00016940684081397027,
+      "loss": 0.1635,
+      "step": 10607
+    },
+    {
+      "epoch": 0.7653955770410188,
+      "grad_norm": 0.1182616651058197,
+      "learning_rate": 0.00016940395439457356,
+      "loss": 0.1611,
+      "step": 10608
+    },
+    {
+      "epoch": 0.7654677297160792,
+      "grad_norm": 0.14213289320468903,
+      "learning_rate": 0.0001694010679751768,
+      "loss": 0.1835,
+      "step": 10609
+    },
+    {
+      "epoch": 0.7655398823911397,
+      "grad_norm": 0.12534356117248535,
+      "learning_rate": 0.00016939818155578006,
+      "loss": 0.1351,
+      "step": 10610
+    },
+    {
+      "epoch": 0.7656120350662001,
+      "grad_norm": 0.11216719448566437,
+      "learning_rate": 0.00016939529513638332,
+      "loss": 0.1542,
+      "step": 10611
+    },
+    {
+      "epoch": 0.7656841877412605,
+      "grad_norm": 0.12680353224277496,
+      "learning_rate": 0.00016939240871698659,
+      "loss": 0.1498,
+      "step": 10612
+    },
+    {
+      "epoch": 0.7657563404163209,
+      "grad_norm": 0.1329881250858307,
+      "learning_rate": 0.00016938952229758985,
+      "loss": 0.1257,
+      "step": 10613
+    },
+    {
+      "epoch": 0.7658284930913813,
+      "grad_norm": 0.12916041910648346,
+      "learning_rate": 0.0001693866358781931,
+      "loss": 0.1436,
+      "step": 10614
+    },
+    {
+      "epoch": 0.7659006457664418,
+      "grad_norm": 0.12449880689382553,
+      "learning_rate": 0.00016938374945879637,
+      "loss": 0.1535,
+      "step": 10615
+    },
+    {
+      "epoch": 0.7659727984415022,
+      "grad_norm": 0.14934857189655304,
+      "learning_rate": 0.00016938086303939964,
+      "loss": 0.175,
+      "step": 10616
+    },
+    {
+      "epoch": 0.7660449511165627,
+      "grad_norm": 0.10569044202566147,
+      "learning_rate": 0.0001693779766200029,
+      "loss": 0.1539,
+      "step": 10617
+    },
+    {
+      "epoch": 0.7661171037916231,
+      "grad_norm": 0.11117196083068848,
+      "learning_rate": 0.00016937509020060616,
+      "loss": 0.1819,
+      "step": 10618
+    },
+    {
+      "epoch": 0.7661892564666835,
+      "grad_norm": 0.15367351472377777,
+      "learning_rate": 0.00016937220378120943,
+      "loss": 0.1558,
+      "step": 10619
+    },
+    {
+      "epoch": 0.7662614091417439,
+      "grad_norm": 0.11300353705883026,
+      "learning_rate": 0.00016936931736181266,
+      "loss": 0.1581,
+      "step": 10620
+    },
+    {
+      "epoch": 0.7663335618168043,
+      "grad_norm": 0.11891332268714905,
+      "learning_rate": 0.00016936643094241593,
+      "loss": 0.1779,
+      "step": 10621
+    },
+    {
+      "epoch": 0.7664057144918648,
+      "grad_norm": 0.11424824595451355,
+      "learning_rate": 0.00016936354452301922,
+      "loss": 0.1732,
+      "step": 10622
+    },
+    {
+      "epoch": 0.7664778671669252,
+      "grad_norm": 0.10762421786785126,
+      "learning_rate": 0.00016936065810362248,
+      "loss": 0.1504,
+      "step": 10623
+    },
+    {
+      "epoch": 0.7665500198419857,
+      "grad_norm": 0.10763785988092422,
+      "learning_rate": 0.00016935777168422574,
+      "loss": 0.1497,
+      "step": 10624
+    },
+    {
+      "epoch": 0.7666221725170461,
+      "grad_norm": 0.13233208656311035,
+      "learning_rate": 0.00016935488526482898,
+      "loss": 0.1385,
+      "step": 10625
+    },
+    {
+      "epoch": 0.7666943251921065,
+      "grad_norm": 0.143177330493927,
+      "learning_rate": 0.00016935199884543224,
+      "loss": 0.2356,
+      "step": 10626
+    },
+    {
+      "epoch": 0.7667664778671669,
+      "grad_norm": 0.10598569363355637,
+      "learning_rate": 0.0001693491124260355,
+      "loss": 0.1418,
+      "step": 10627
+    },
+    {
+      "epoch": 0.7668386305422273,
+      "grad_norm": 0.13420173525810242,
+      "learning_rate": 0.00016934622600663877,
+      "loss": 0.1654,
+      "step": 10628
+    },
+    {
+      "epoch": 0.7669107832172878,
+      "grad_norm": 0.13415329158306122,
+      "learning_rate": 0.00016934333958724206,
+      "loss": 0.163,
+      "step": 10629
+    },
+    {
+      "epoch": 0.7669829358923482,
+      "grad_norm": 0.12192816287279129,
+      "learning_rate": 0.0001693404531678453,
+      "loss": 0.1422,
+      "step": 10630
+    },
+    {
+      "epoch": 0.7670550885674087,
+      "grad_norm": 0.10028716921806335,
+      "learning_rate": 0.00016933756674844855,
+      "loss": 0.1549,
+      "step": 10631
+    },
+    {
+      "epoch": 0.7671272412424691,
+      "grad_norm": 0.14095130562782288,
+      "learning_rate": 0.00016933468032905182,
+      "loss": 0.1345,
+      "step": 10632
+    },
+    {
+      "epoch": 0.7671993939175294,
+      "grad_norm": 0.1579335480928421,
+      "learning_rate": 0.00016933179390965508,
+      "loss": 0.1835,
+      "step": 10633
+    },
+    {
+      "epoch": 0.7672715465925899,
+      "grad_norm": 0.11538061499595642,
+      "learning_rate": 0.00016932890749025834,
+      "loss": 0.1737,
+      "step": 10634
+    },
+    {
+      "epoch": 0.7673436992676503,
+      "grad_norm": 0.12305877357721329,
+      "learning_rate": 0.0001693260210708616,
+      "loss": 0.1435,
+      "step": 10635
+    },
+    {
+      "epoch": 0.7674158519427108,
+      "grad_norm": 0.11222461611032486,
+      "learning_rate": 0.00016932313465146487,
+      "loss": 0.1926,
+      "step": 10636
+    },
+    {
+      "epoch": 0.7674880046177712,
+      "grad_norm": 0.141736701130867,
+      "learning_rate": 0.00016932024823206813,
+      "loss": 0.1649,
+      "step": 10637
+    },
+    {
+      "epoch": 0.7675601572928317,
+      "grad_norm": 0.11970090121030807,
+      "learning_rate": 0.0001693173618126714,
+      "loss": 0.1496,
+      "step": 10638
+    },
+    {
+      "epoch": 0.7676323099678921,
+      "grad_norm": 0.12373533844947815,
+      "learning_rate": 0.00016931447539327466,
+      "loss": 0.1525,
+      "step": 10639
+    },
+    {
+      "epoch": 0.7677044626429524,
+      "grad_norm": 0.10975643992424011,
+      "learning_rate": 0.00016931158897387792,
+      "loss": 0.1319,
+      "step": 10640
+    },
+    {
+      "epoch": 0.7677766153180129,
+      "grad_norm": 0.1512438952922821,
+      "learning_rate": 0.00016930870255448116,
+      "loss": 0.1895,
+      "step": 10641
+    },
+    {
+      "epoch": 0.7678487679930733,
+      "grad_norm": 0.15371336042881012,
+      "learning_rate": 0.00016930581613508442,
+      "loss": 0.1561,
+      "step": 10642
+    },
+    {
+      "epoch": 0.7679209206681338,
+      "grad_norm": 0.16743223369121552,
+      "learning_rate": 0.0001693029297156877,
+      "loss": 0.1434,
+      "step": 10643
+    },
+    {
+      "epoch": 0.7679930733431942,
+      "grad_norm": 0.11173707991838455,
+      "learning_rate": 0.00016930004329629097,
+      "loss": 0.2314,
+      "step": 10644
+    },
+    {
+      "epoch": 0.7680652260182547,
+      "grad_norm": 0.13833162188529968,
+      "learning_rate": 0.00016929715687689424,
+      "loss": 0.1643,
+      "step": 10645
+    },
+    {
+      "epoch": 0.7681373786933151,
+      "grad_norm": 0.14580032229423523,
+      "learning_rate": 0.00016929427045749747,
+      "loss": 0.1791,
+      "step": 10646
+    },
+    {
+      "epoch": 0.7682095313683754,
+      "grad_norm": 0.12167628109455109,
+      "learning_rate": 0.00016929138403810073,
+      "loss": 0.1377,
+      "step": 10647
+    },
+    {
+      "epoch": 0.7682816840434359,
+      "grad_norm": 0.10616665333509445,
+      "learning_rate": 0.000169288497618704,
+      "loss": 0.1351,
+      "step": 10648
+    },
+    {
+      "epoch": 0.7683538367184963,
+      "grad_norm": 0.1406809240579605,
+      "learning_rate": 0.00016928561119930726,
+      "loss": 0.1399,
+      "step": 10649
+    },
+    {
+      "epoch": 0.7684259893935568,
+      "grad_norm": 0.12610119581222534,
+      "learning_rate": 0.00016928272477991055,
+      "loss": 0.1451,
+      "step": 10650
+    },
+    {
+      "epoch": 0.7684981420686172,
+      "grad_norm": 0.12259224057197571,
+      "learning_rate": 0.00016927983836051379,
+      "loss": 0.1398,
+      "step": 10651
+    },
+    {
+      "epoch": 0.7685702947436777,
+      "grad_norm": 0.17353153228759766,
+      "learning_rate": 0.00016927695194111705,
+      "loss": 0.1219,
+      "step": 10652
+    },
+    {
+      "epoch": 0.768642447418738,
+      "grad_norm": 0.12674789130687714,
+      "learning_rate": 0.0001692740655217203,
+      "loss": 0.1321,
+      "step": 10653
+    },
+    {
+      "epoch": 0.7687146000937984,
+      "grad_norm": 0.14105506241321564,
+      "learning_rate": 0.00016927117910232357,
+      "loss": 0.1645,
+      "step": 10654
+    },
+    {
+      "epoch": 0.7687867527688589,
+      "grad_norm": 0.1468188464641571,
+      "learning_rate": 0.00016926829268292684,
+      "loss": 0.1863,
+      "step": 10655
+    },
+    {
+      "epoch": 0.7688589054439193,
+      "grad_norm": 0.10780065506696701,
+      "learning_rate": 0.0001692654062635301,
+      "loss": 0.1524,
+      "step": 10656
+    },
+    {
+      "epoch": 0.7689310581189798,
+      "grad_norm": 0.12532243132591248,
+      "learning_rate": 0.00016926251984413336,
+      "loss": 0.131,
+      "step": 10657
+    },
+    {
+      "epoch": 0.7690032107940402,
+      "grad_norm": 0.14364686608314514,
+      "learning_rate": 0.00016925963342473663,
+      "loss": 0.1368,
+      "step": 10658
+    },
+    {
+      "epoch": 0.7690753634691007,
+      "grad_norm": 0.11589592695236206,
+      "learning_rate": 0.0001692567470053399,
+      "loss": 0.1454,
+      "step": 10659
+    },
+    {
+      "epoch": 0.769147516144161,
+      "grad_norm": 0.12993989884853363,
+      "learning_rate": 0.00016925386058594315,
+      "loss": 0.1301,
+      "step": 10660
+    },
+    {
+      "epoch": 0.7692196688192214,
+      "grad_norm": 0.10442299395799637,
+      "learning_rate": 0.00016925097416654642,
+      "loss": 0.1113,
+      "step": 10661
+    },
+    {
+      "epoch": 0.7692918214942819,
+      "grad_norm": 0.11771980673074722,
+      "learning_rate": 0.00016924808774714965,
+      "loss": 0.1237,
+      "step": 10662
+    },
+    {
+      "epoch": 0.7693639741693423,
+      "grad_norm": 0.10796065628528595,
+      "learning_rate": 0.00016924520132775291,
+      "loss": 0.1148,
+      "step": 10663
+    },
+    {
+      "epoch": 0.7694361268444028,
+      "grad_norm": 0.13033612072467804,
+      "learning_rate": 0.0001692423149083562,
+      "loss": 0.13,
+      "step": 10664
+    },
+    {
+      "epoch": 0.7695082795194632,
+      "grad_norm": 0.12694084644317627,
+      "learning_rate": 0.00016923942848895947,
+      "loss": 0.1824,
+      "step": 10665
+    },
+    {
+      "epoch": 0.7695804321945237,
+      "grad_norm": 0.11404303461313248,
+      "learning_rate": 0.00016923654206956273,
+      "loss": 0.1272,
+      "step": 10666
+    },
+    {
+      "epoch": 0.769652584869584,
+      "grad_norm": 0.11634194105863571,
+      "learning_rate": 0.00016923365565016597,
+      "loss": 0.143,
+      "step": 10667
+    },
+    {
+      "epoch": 0.7697247375446444,
+      "grad_norm": 0.15671080350875854,
+      "learning_rate": 0.00016923076923076923,
+      "loss": 0.2003,
+      "step": 10668
+    },
+    {
+      "epoch": 0.7697968902197049,
+      "grad_norm": 0.13332685828208923,
+      "learning_rate": 0.0001692278828113725,
+      "loss": 0.1697,
+      "step": 10669
+    },
+    {
+      "epoch": 0.7698690428947653,
+      "grad_norm": 0.13197067379951477,
+      "learning_rate": 0.00016922499639197575,
+      "loss": 0.145,
+      "step": 10670
+    },
+    {
+      "epoch": 0.7699411955698258,
+      "grad_norm": 0.12754791975021362,
+      "learning_rate": 0.00016922210997257904,
+      "loss": 0.1259,
+      "step": 10671
+    },
+    {
+      "epoch": 0.7700133482448862,
+      "grad_norm": 0.15722058713436127,
+      "learning_rate": 0.00016921922355318228,
+      "loss": 0.1884,
+      "step": 10672
+    },
+    {
+      "epoch": 0.7700855009199467,
+      "grad_norm": 0.13386334478855133,
+      "learning_rate": 0.00016921633713378554,
+      "loss": 0.1332,
+      "step": 10673
+    },
+    {
+      "epoch": 0.770157653595007,
+      "grad_norm": 0.08790024369955063,
+      "learning_rate": 0.0001692134507143888,
+      "loss": 0.1512,
+      "step": 10674
+    },
+    {
+      "epoch": 0.7702298062700674,
+      "grad_norm": 0.11426102370023727,
+      "learning_rate": 0.00016921056429499207,
+      "loss": 0.1388,
+      "step": 10675
+    },
+    {
+      "epoch": 0.7703019589451279,
+      "grad_norm": 0.12486805021762848,
+      "learning_rate": 0.00016920767787559533,
+      "loss": 0.1354,
+      "step": 10676
+    },
+    {
+      "epoch": 0.7703741116201883,
+      "grad_norm": 0.1404605358839035,
+      "learning_rate": 0.0001692047914561986,
+      "loss": 0.1629,
+      "step": 10677
+    },
+    {
+      "epoch": 0.7704462642952488,
+      "grad_norm": 0.11838164925575256,
+      "learning_rate": 0.00016920190503680186,
+      "loss": 0.1379,
+      "step": 10678
+    },
+    {
+      "epoch": 0.7705184169703092,
+      "grad_norm": 0.11654802411794662,
+      "learning_rate": 0.00016919901861740512,
+      "loss": 0.1385,
+      "step": 10679
+    },
+    {
+      "epoch": 0.7705905696453696,
+      "grad_norm": 0.12266696989536285,
+      "learning_rate": 0.00016919613219800838,
+      "loss": 0.2171,
+      "step": 10680
+    },
+    {
+      "epoch": 0.77066272232043,
+      "grad_norm": 0.15840482711791992,
+      "learning_rate": 0.00016919324577861165,
+      "loss": 0.1579,
+      "step": 10681
+    },
+    {
+      "epoch": 0.7707348749954904,
+      "grad_norm": 0.12705667316913605,
+      "learning_rate": 0.0001691903593592149,
+      "loss": 0.1098,
+      "step": 10682
+    },
+    {
+      "epoch": 0.7708070276705509,
+      "grad_norm": 0.09946004301309586,
+      "learning_rate": 0.00016918747293981817,
+      "loss": 0.1467,
+      "step": 10683
+    },
+    {
+      "epoch": 0.7708791803456113,
+      "grad_norm": 0.12914541363716125,
+      "learning_rate": 0.0001691845865204214,
+      "loss": 0.1541,
+      "step": 10684
+    },
+    {
+      "epoch": 0.7709513330206718,
+      "grad_norm": 0.11290688812732697,
+      "learning_rate": 0.00016918170010102467,
+      "loss": 0.151,
+      "step": 10685
+    },
+    {
+      "epoch": 0.7710234856957322,
+      "grad_norm": 0.32748568058013916,
+      "learning_rate": 0.00016917881368162796,
+      "loss": 0.1804,
+      "step": 10686
+    },
+    {
+      "epoch": 0.7710956383707926,
+      "grad_norm": 0.14599604904651642,
+      "learning_rate": 0.00016917592726223122,
+      "loss": 0.1855,
+      "step": 10687
+    },
+    {
+      "epoch": 0.771167791045853,
+      "grad_norm": 0.11095081269741058,
+      "learning_rate": 0.0001691730408428345,
+      "loss": 0.179,
+      "step": 10688
+    },
+    {
+      "epoch": 0.7712399437209134,
+      "grad_norm": 0.14396479725837708,
+      "learning_rate": 0.00016917015442343772,
+      "loss": 0.1457,
+      "step": 10689
+    },
+    {
+      "epoch": 0.7713120963959739,
+      "grad_norm": 0.11976207047700882,
+      "learning_rate": 0.00016916726800404099,
+      "loss": 0.2047,
+      "step": 10690
+    },
+    {
+      "epoch": 0.7713842490710343,
+      "grad_norm": 0.14696775376796722,
+      "learning_rate": 0.00016916438158464425,
+      "loss": 0.1652,
+      "step": 10691
+    },
+    {
+      "epoch": 0.7714564017460948,
+      "grad_norm": 0.14759984612464905,
+      "learning_rate": 0.0001691614951652475,
+      "loss": 0.1867,
+      "step": 10692
+    },
+    {
+      "epoch": 0.7715285544211552,
+      "grad_norm": 0.15106844902038574,
+      "learning_rate": 0.0001691586087458508,
+      "loss": 0.1715,
+      "step": 10693
+    },
+    {
+      "epoch": 0.7716007070962156,
+      "grad_norm": 0.1324152797460556,
+      "learning_rate": 0.00016915572232645404,
+      "loss": 0.1529,
+      "step": 10694
+    },
+    {
+      "epoch": 0.771672859771276,
+      "grad_norm": 0.1258843094110489,
+      "learning_rate": 0.0001691528359070573,
+      "loss": 0.1704,
+      "step": 10695
+    },
+    {
+      "epoch": 0.7717450124463364,
+      "grad_norm": 0.12931056320667267,
+      "learning_rate": 0.00016914994948766056,
+      "loss": 0.141,
+      "step": 10696
+    },
+    {
+      "epoch": 0.7718171651213969,
+      "grad_norm": 0.1137009933590889,
+      "learning_rate": 0.00016914706306826383,
+      "loss": 0.1317,
+      "step": 10697
+    },
+    {
+      "epoch": 0.7718893177964573,
+      "grad_norm": 0.12198218703269958,
+      "learning_rate": 0.0001691441766488671,
+      "loss": 0.1589,
+      "step": 10698
+    },
+    {
+      "epoch": 0.7719614704715178,
+      "grad_norm": 0.13116715848445892,
+      "learning_rate": 0.00016914129022947035,
+      "loss": 0.1776,
+      "step": 10699
+    },
+    {
+      "epoch": 0.7720336231465782,
+      "grad_norm": 0.15571896731853485,
+      "learning_rate": 0.00016913840381007361,
+      "loss": 0.1644,
+      "step": 10700
+    },
+    {
+      "epoch": 0.7721057758216386,
+      "grad_norm": 0.15015654265880585,
+      "learning_rate": 0.00016913551739067688,
+      "loss": 0.142,
+      "step": 10701
+    },
+    {
+      "epoch": 0.772177928496699,
+      "grad_norm": 0.14481572806835175,
+      "learning_rate": 0.00016913263097128014,
+      "loss": 0.1455,
+      "step": 10702
+    },
+    {
+      "epoch": 0.7722500811717594,
+      "grad_norm": 0.11075858026742935,
+      "learning_rate": 0.0001691297445518834,
+      "loss": 0.1451,
+      "step": 10703
+    },
+    {
+      "epoch": 0.7723222338468199,
+      "grad_norm": 0.14457841217517853,
+      "learning_rate": 0.00016912685813248667,
+      "loss": 0.1397,
+      "step": 10704
+    },
+    {
+      "epoch": 0.7723943865218803,
+      "grad_norm": 0.12623868882656097,
+      "learning_rate": 0.0001691239717130899,
+      "loss": 0.1117,
+      "step": 10705
+    },
+    {
+      "epoch": 0.7724665391969407,
+      "grad_norm": 0.1958254724740982,
+      "learning_rate": 0.00016912108529369317,
+      "loss": 0.192,
+      "step": 10706
+    },
+    {
+      "epoch": 0.7725386918720012,
+      "grad_norm": 0.12673625349998474,
+      "learning_rate": 0.00016911819887429646,
+      "loss": 0.1742,
+      "step": 10707
+    },
+    {
+      "epoch": 0.7726108445470616,
+      "grad_norm": 0.16572904586791992,
+      "learning_rate": 0.00016911531245489972,
+      "loss": 0.1314,
+      "step": 10708
+    },
+    {
+      "epoch": 0.772682997222122,
+      "grad_norm": 0.1162322536110878,
+      "learning_rate": 0.00016911242603550298,
+      "loss": 0.1218,
+      "step": 10709
+    },
+    {
+      "epoch": 0.7727551498971824,
+      "grad_norm": 0.11697287857532501,
+      "learning_rate": 0.00016910953961610622,
+      "loss": 0.1442,
+      "step": 10710
+    },
+    {
+      "epoch": 0.7728273025722429,
+      "grad_norm": 0.11673609167337418,
+      "learning_rate": 0.00016910665319670948,
+      "loss": 0.1322,
+      "step": 10711
+    },
+    {
+      "epoch": 0.7728994552473033,
+      "grad_norm": 0.13033811748027802,
+      "learning_rate": 0.00016910376677731274,
+      "loss": 0.1588,
+      "step": 10712
+    },
+    {
+      "epoch": 0.7729716079223637,
+      "grad_norm": 0.11955799907445908,
+      "learning_rate": 0.000169100880357916,
+      "loss": 0.1487,
+      "step": 10713
+    },
+    {
+      "epoch": 0.7730437605974242,
+      "grad_norm": 0.1232030987739563,
+      "learning_rate": 0.0001690979939385193,
+      "loss": 0.1041,
+      "step": 10714
+    },
+    {
+      "epoch": 0.7731159132724845,
+      "grad_norm": 0.13681338727474213,
+      "learning_rate": 0.00016909510751912253,
+      "loss": 0.1624,
+      "step": 10715
+    },
+    {
+      "epoch": 0.773188065947545,
+      "grad_norm": 0.16010960936546326,
+      "learning_rate": 0.0001690922210997258,
+      "loss": 0.1497,
+      "step": 10716
+    },
+    {
+      "epoch": 0.7732602186226054,
+      "grad_norm": 0.10944380611181259,
+      "learning_rate": 0.00016908933468032906,
+      "loss": 0.0934,
+      "step": 10717
+    },
+    {
+      "epoch": 0.7733323712976659,
+      "grad_norm": 0.15146002173423767,
+      "learning_rate": 0.00016908644826093232,
+      "loss": 0.1349,
+      "step": 10718
+    },
+    {
+      "epoch": 0.7734045239727263,
+      "grad_norm": 0.12636396288871765,
+      "learning_rate": 0.00016908356184153558,
+      "loss": 0.0996,
+      "step": 10719
+    },
+    {
+      "epoch": 0.7734766766477867,
+      "grad_norm": 0.1262812614440918,
+      "learning_rate": 0.00016908067542213885,
+      "loss": 0.1905,
+      "step": 10720
+    },
+    {
+      "epoch": 0.7735488293228472,
+      "grad_norm": 0.12517490983009338,
+      "learning_rate": 0.0001690777890027421,
+      "loss": 0.1575,
+      "step": 10721
+    },
+    {
+      "epoch": 0.7736209819979075,
+      "grad_norm": 0.1311482936143875,
+      "learning_rate": 0.00016907490258334537,
+      "loss": 0.1676,
+      "step": 10722
+    },
+    {
+      "epoch": 0.773693134672968,
+      "grad_norm": 0.11919399350881577,
+      "learning_rate": 0.00016907201616394863,
+      "loss": 0.1316,
+      "step": 10723
+    },
+    {
+      "epoch": 0.7737652873480284,
+      "grad_norm": 0.13207107782363892,
+      "learning_rate": 0.0001690691297445519,
+      "loss": 0.1424,
+      "step": 10724
+    },
+    {
+      "epoch": 0.7738374400230889,
+      "grad_norm": 0.13004319369792938,
+      "learning_rate": 0.00016906624332515516,
+      "loss": 0.1912,
+      "step": 10725
+    },
+    {
+      "epoch": 0.7739095926981493,
+      "grad_norm": 0.1217634528875351,
+      "learning_rate": 0.0001690633569057584,
+      "loss": 0.143,
+      "step": 10726
+    },
+    {
+      "epoch": 0.7739817453732097,
+      "grad_norm": 0.1645490676164627,
+      "learning_rate": 0.00016906047048636166,
+      "loss": 0.1609,
+      "step": 10727
+    },
+    {
+      "epoch": 0.7740538980482702,
+      "grad_norm": 0.15471287071704865,
+      "learning_rate": 0.00016905758406696495,
+      "loss": 0.1037,
+      "step": 10728
+    },
+    {
+      "epoch": 0.7741260507233305,
+      "grad_norm": 0.15667590498924255,
+      "learning_rate": 0.0001690546976475682,
+      "loss": 0.2011,
+      "step": 10729
+    },
+    {
+      "epoch": 0.774198203398391,
+      "grad_norm": 0.1259351372718811,
+      "learning_rate": 0.00016905181122817148,
+      "loss": 0.1646,
+      "step": 10730
+    },
+    {
+      "epoch": 0.7742703560734514,
+      "grad_norm": 0.12652148306369781,
+      "learning_rate": 0.0001690489248087747,
+      "loss": 0.119,
+      "step": 10731
+    },
+    {
+      "epoch": 0.7743425087485118,
+      "grad_norm": 0.1419692188501358,
+      "learning_rate": 0.00016904603838937797,
+      "loss": 0.1098,
+      "step": 10732
+    },
+    {
+      "epoch": 0.7744146614235723,
+      "grad_norm": 0.12805712223052979,
+      "learning_rate": 0.00016904315196998124,
+      "loss": 0.1245,
+      "step": 10733
+    },
+    {
+      "epoch": 0.7744868140986327,
+      "grad_norm": 0.13761387765407562,
+      "learning_rate": 0.0001690402655505845,
+      "loss": 0.1687,
+      "step": 10734
+    },
+    {
+      "epoch": 0.7745589667736932,
+      "grad_norm": 0.1373915672302246,
+      "learning_rate": 0.0001690373791311878,
+      "loss": 0.1557,
+      "step": 10735
+    },
+    {
+      "epoch": 0.7746311194487535,
+      "grad_norm": 0.16077910363674164,
+      "learning_rate": 0.00016903449271179103,
+      "loss": 0.1386,
+      "step": 10736
+    },
+    {
+      "epoch": 0.774703272123814,
+      "grad_norm": 0.16774095594882965,
+      "learning_rate": 0.0001690316062923943,
+      "loss": 0.1649,
+      "step": 10737
+    },
+    {
+      "epoch": 0.7747754247988744,
+      "grad_norm": 0.14782363176345825,
+      "learning_rate": 0.00016902871987299755,
+      "loss": 0.1749,
+      "step": 10738
+    },
+    {
+      "epoch": 0.7748475774739348,
+      "grad_norm": 0.15641121566295624,
+      "learning_rate": 0.00016902583345360081,
+      "loss": 0.1631,
+      "step": 10739
+    },
+    {
+      "epoch": 0.7749197301489953,
+      "grad_norm": 0.12619055807590485,
+      "learning_rate": 0.00016902294703420408,
+      "loss": 0.1684,
+      "step": 10740
+    },
+    {
+      "epoch": 0.7749918828240557,
+      "grad_norm": 0.12410851567983627,
+      "learning_rate": 0.00016902006061480734,
+      "loss": 0.1097,
+      "step": 10741
+    },
+    {
+      "epoch": 0.7750640354991162,
+      "grad_norm": 0.11382365971803665,
+      "learning_rate": 0.0001690171741954106,
+      "loss": 0.1824,
+      "step": 10742
+    },
+    {
+      "epoch": 0.7751361881741765,
+      "grad_norm": 0.11722074449062347,
+      "learning_rate": 0.00016901428777601387,
+      "loss": 0.1302,
+      "step": 10743
+    },
+    {
+      "epoch": 0.775208340849237,
+      "grad_norm": 0.15282313525676727,
+      "learning_rate": 0.00016901140135661713,
+      "loss": 0.1719,
+      "step": 10744
+    },
+    {
+      "epoch": 0.7752804935242974,
+      "grad_norm": 0.09096650779247284,
+      "learning_rate": 0.0001690085149372204,
+      "loss": 0.1648,
+      "step": 10745
+    },
+    {
+      "epoch": 0.7753526461993578,
+      "grad_norm": 0.10087671130895615,
+      "learning_rate": 0.00016900562851782366,
+      "loss": 0.1545,
+      "step": 10746
+    },
+    {
+      "epoch": 0.7754247988744183,
+      "grad_norm": 0.11997248977422714,
+      "learning_rate": 0.0001690027420984269,
+      "loss": 0.1821,
+      "step": 10747
+    },
+    {
+      "epoch": 0.7754969515494787,
+      "grad_norm": 0.10546719282865524,
+      "learning_rate": 0.00016899985567903015,
+      "loss": 0.1186,
+      "step": 10748
+    },
+    {
+      "epoch": 0.7755691042245392,
+      "grad_norm": 0.1350906938314438,
+      "learning_rate": 0.00016899696925963344,
+      "loss": 0.143,
+      "step": 10749
+    },
+    {
+      "epoch": 0.7756412568995995,
+      "grad_norm": 0.12140945345163345,
+      "learning_rate": 0.0001689940828402367,
+      "loss": 0.1197,
+      "step": 10750
+    },
+    {
+      "epoch": 0.77571340957466,
+      "grad_norm": 0.13864918053150177,
+      "learning_rate": 0.00016899119642083997,
+      "loss": 0.1636,
+      "step": 10751
+    },
+    {
+      "epoch": 0.7757855622497204,
+      "grad_norm": 0.12148050963878632,
+      "learning_rate": 0.0001689883100014432,
+      "loss": 0.1389,
+      "step": 10752
+    },
+    {
+      "epoch": 0.7758577149247808,
+      "grad_norm": 0.11241988092660904,
+      "learning_rate": 0.00016898542358204647,
+      "loss": 0.1512,
+      "step": 10753
+    },
+    {
+      "epoch": 0.7759298675998413,
+      "grad_norm": 0.11895349621772766,
+      "learning_rate": 0.00016898253716264973,
+      "loss": 0.0942,
+      "step": 10754
+    },
+    {
+      "epoch": 0.7760020202749017,
+      "grad_norm": 0.10460896044969559,
+      "learning_rate": 0.000168979650743253,
+      "loss": 0.1165,
+      "step": 10755
+    },
+    {
+      "epoch": 0.7760741729499622,
+      "grad_norm": 0.12709875404834747,
+      "learning_rate": 0.00016897676432385628,
+      "loss": 0.1779,
+      "step": 10756
+    },
+    {
+      "epoch": 0.7761463256250225,
+      "grad_norm": 0.12685567140579224,
+      "learning_rate": 0.00016897387790445952,
+      "loss": 0.1404,
+      "step": 10757
+    },
+    {
+      "epoch": 0.776218478300083,
+      "grad_norm": 0.12755173444747925,
+      "learning_rate": 0.00016897099148506278,
+      "loss": 0.1457,
+      "step": 10758
+    },
+    {
+      "epoch": 0.7762906309751434,
+      "grad_norm": 0.12302425503730774,
+      "learning_rate": 0.00016896810506566605,
+      "loss": 0.1253,
+      "step": 10759
+    },
+    {
+      "epoch": 0.7763627836502038,
+      "grad_norm": 0.1417432427406311,
+      "learning_rate": 0.0001689652186462693,
+      "loss": 0.1744,
+      "step": 10760
+    },
+    {
+      "epoch": 0.7764349363252643,
+      "grad_norm": 0.1446855515241623,
+      "learning_rate": 0.00016896233222687257,
+      "loss": 0.1304,
+      "step": 10761
+    },
+    {
+      "epoch": 0.7765070890003247,
+      "grad_norm": 0.13084016740322113,
+      "learning_rate": 0.00016895944580747583,
+      "loss": 0.1248,
+      "step": 10762
+    },
+    {
+      "epoch": 0.7765792416753852,
+      "grad_norm": 0.126628115773201,
+      "learning_rate": 0.0001689565593880791,
+      "loss": 0.176,
+      "step": 10763
+    },
+    {
+      "epoch": 0.7766513943504455,
+      "grad_norm": 0.11866142600774765,
+      "learning_rate": 0.00016895367296868236,
+      "loss": 0.1711,
+      "step": 10764
+    },
+    {
+      "epoch": 0.776723547025506,
+      "grad_norm": 0.10277380049228668,
+      "learning_rate": 0.00016895078654928562,
+      "loss": 0.1761,
+      "step": 10765
+    },
+    {
+      "epoch": 0.7767956997005664,
+      "grad_norm": 0.15225686132907867,
+      "learning_rate": 0.00016894790012988889,
+      "loss": 0.1789,
+      "step": 10766
+    },
+    {
+      "epoch": 0.7768678523756268,
+      "grad_norm": 0.12679462134838104,
+      "learning_rate": 0.00016894501371049215,
+      "loss": 0.1518,
+      "step": 10767
+    },
+    {
+      "epoch": 0.7769400050506873,
+      "grad_norm": 0.1197810247540474,
+      "learning_rate": 0.00016894212729109539,
+      "loss": 0.1386,
+      "step": 10768
+    },
+    {
+      "epoch": 0.7770121577257477,
+      "grad_norm": 0.12745292484760284,
+      "learning_rate": 0.00016893924087169865,
+      "loss": 0.0812,
+      "step": 10769
+    },
+    {
+      "epoch": 0.7770843104008082,
+      "grad_norm": 0.10420835763216019,
+      "learning_rate": 0.00016893635445230194,
+      "loss": 0.1773,
+      "step": 10770
+    },
+    {
+      "epoch": 0.7771564630758685,
+      "grad_norm": 0.13833922147750854,
+      "learning_rate": 0.0001689334680329052,
+      "loss": 0.1549,
+      "step": 10771
+    },
+    {
+      "epoch": 0.7772286157509289,
+      "grad_norm": 0.1223774328827858,
+      "learning_rate": 0.00016893058161350846,
+      "loss": 0.1592,
+      "step": 10772
+    },
+    {
+      "epoch": 0.7773007684259894,
+      "grad_norm": 0.1366223692893982,
+      "learning_rate": 0.0001689276951941117,
+      "loss": 0.1607,
+      "step": 10773
+    },
+    {
+      "epoch": 0.7773729211010498,
+      "grad_norm": 0.14215941727161407,
+      "learning_rate": 0.00016892480877471496,
+      "loss": 0.143,
+      "step": 10774
+    },
+    {
+      "epoch": 0.7774450737761103,
+      "grad_norm": 0.12098676711320877,
+      "learning_rate": 0.00016892192235531823,
+      "loss": 0.1474,
+      "step": 10775
+    },
+    {
+      "epoch": 0.7775172264511707,
+      "grad_norm": 0.14428995549678802,
+      "learning_rate": 0.0001689190359359215,
+      "loss": 0.1557,
+      "step": 10776
+    },
+    {
+      "epoch": 0.777589379126231,
+      "grad_norm": 0.12384141981601715,
+      "learning_rate": 0.00016891614951652478,
+      "loss": 0.1735,
+      "step": 10777
+    },
+    {
+      "epoch": 0.7776615318012915,
+      "grad_norm": 0.1443023979663849,
+      "learning_rate": 0.00016891326309712801,
+      "loss": 0.1317,
+      "step": 10778
+    },
+    {
+      "epoch": 0.7777336844763519,
+      "grad_norm": 0.11942099034786224,
+      "learning_rate": 0.00016891037667773128,
+      "loss": 0.1694,
+      "step": 10779
+    },
+    {
+      "epoch": 0.7778058371514124,
+      "grad_norm": 0.13776026666164398,
+      "learning_rate": 0.00016890749025833454,
+      "loss": 0.147,
+      "step": 10780
+    },
+    {
+      "epoch": 0.7778779898264728,
+      "grad_norm": 0.12703008949756622,
+      "learning_rate": 0.0001689046038389378,
+      "loss": 0.1834,
+      "step": 10781
+    },
+    {
+      "epoch": 0.7779501425015333,
+      "grad_norm": 0.1265055388212204,
+      "learning_rate": 0.00016890171741954107,
+      "loss": 0.1549,
+      "step": 10782
+    },
+    {
+      "epoch": 0.7780222951765937,
+      "grad_norm": 0.12888294458389282,
+      "learning_rate": 0.00016889883100014433,
+      "loss": 0.1315,
+      "step": 10783
+    },
+    {
+      "epoch": 0.778094447851654,
+      "grad_norm": 0.11409862339496613,
+      "learning_rate": 0.0001688959445807476,
+      "loss": 0.1035,
+      "step": 10784
+    },
+    {
+      "epoch": 0.7781666005267145,
+      "grad_norm": 0.12385515868663788,
+      "learning_rate": 0.00016889305816135085,
+      "loss": 0.187,
+      "step": 10785
+    },
+    {
+      "epoch": 0.7782387532017749,
+      "grad_norm": 0.16930937767028809,
+      "learning_rate": 0.00016889017174195412,
+      "loss": 0.1942,
+      "step": 10786
+    },
+    {
+      "epoch": 0.7783109058768354,
+      "grad_norm": 0.11819856613874435,
+      "learning_rate": 0.00016888728532255738,
+      "loss": 0.103,
+      "step": 10787
+    },
+    {
+      "epoch": 0.7783830585518958,
+      "grad_norm": 0.1241755485534668,
+      "learning_rate": 0.00016888439890316064,
+      "loss": 0.127,
+      "step": 10788
+    },
+    {
+      "epoch": 0.7784552112269563,
+      "grad_norm": 0.14113487303256989,
+      "learning_rate": 0.0001688815124837639,
+      "loss": 0.1682,
+      "step": 10789
+    },
+    {
+      "epoch": 0.7785273639020167,
+      "grad_norm": 0.12371841818094254,
+      "learning_rate": 0.00016887862606436714,
+      "loss": 0.1559,
+      "step": 10790
+    },
+    {
+      "epoch": 0.778599516577077,
+      "grad_norm": 0.12854477763175964,
+      "learning_rate": 0.00016887573964497043,
+      "loss": 0.1635,
+      "step": 10791
+    },
+    {
+      "epoch": 0.7786716692521375,
+      "grad_norm": 0.12481623888015747,
+      "learning_rate": 0.0001688728532255737,
+      "loss": 0.1517,
+      "step": 10792
+    },
+    {
+      "epoch": 0.7787438219271979,
+      "grad_norm": 0.12101232260465622,
+      "learning_rate": 0.00016886996680617696,
+      "loss": 0.181,
+      "step": 10793
+    },
+    {
+      "epoch": 0.7788159746022584,
+      "grad_norm": 0.15985621511936188,
+      "learning_rate": 0.00016886708038678022,
+      "loss": 0.1382,
+      "step": 10794
+    },
+    {
+      "epoch": 0.7788881272773188,
+      "grad_norm": 0.12230433523654938,
+      "learning_rate": 0.00016886419396738346,
+      "loss": 0.129,
+      "step": 10795
+    },
+    {
+      "epoch": 0.7789602799523793,
+      "grad_norm": 0.124177947640419,
+      "learning_rate": 0.00016886130754798672,
+      "loss": 0.138,
+      "step": 10796
+    },
+    {
+      "epoch": 0.7790324326274397,
+      "grad_norm": 0.16062316298484802,
+      "learning_rate": 0.00016885842112858998,
+      "loss": 0.1646,
+      "step": 10797
+    },
+    {
+      "epoch": 0.7791045853025,
+      "grad_norm": 0.12008413672447205,
+      "learning_rate": 0.00016885553470919327,
+      "loss": 0.157,
+      "step": 10798
+    },
+    {
+      "epoch": 0.7791767379775605,
+      "grad_norm": 0.1269167959690094,
+      "learning_rate": 0.00016885264828979654,
+      "loss": 0.1238,
+      "step": 10799
+    },
+    {
+      "epoch": 0.7792488906526209,
+      "grad_norm": 0.10606911778450012,
+      "learning_rate": 0.00016884976187039977,
+      "loss": 0.1213,
+      "step": 10800
+    },
+    {
+      "epoch": 0.7793210433276814,
+      "grad_norm": 0.10247175395488739,
+      "learning_rate": 0.00016884687545100303,
+      "loss": 0.1617,
+      "step": 10801
+    },
+    {
+      "epoch": 0.7793931960027418,
+      "grad_norm": 0.107964888215065,
+      "learning_rate": 0.0001688439890316063,
+      "loss": 0.1601,
+      "step": 10802
+    },
+    {
+      "epoch": 0.7794653486778023,
+      "grad_norm": 0.13006651401519775,
+      "learning_rate": 0.00016884110261220956,
+      "loss": 0.1439,
+      "step": 10803
+    },
+    {
+      "epoch": 0.7795375013528627,
+      "grad_norm": 0.1302005499601364,
+      "learning_rate": 0.00016883821619281282,
+      "loss": 0.1269,
+      "step": 10804
+    },
+    {
+      "epoch": 0.779609654027923,
+      "grad_norm": 0.12471070140600204,
+      "learning_rate": 0.00016883532977341609,
+      "loss": 0.1638,
+      "step": 10805
+    },
+    {
+      "epoch": 0.7796818067029835,
+      "grad_norm": 0.10785996168851852,
+      "learning_rate": 0.00016883244335401935,
+      "loss": 0.1934,
+      "step": 10806
+    },
+    {
+      "epoch": 0.7797539593780439,
+      "grad_norm": 0.12432392686605453,
+      "learning_rate": 0.0001688295569346226,
+      "loss": 0.1791,
+      "step": 10807
+    },
+    {
+      "epoch": 0.7798261120531044,
+      "grad_norm": 0.13929963111877441,
+      "learning_rate": 0.00016882667051522587,
+      "loss": 0.1332,
+      "step": 10808
+    },
+    {
+      "epoch": 0.7798982647281648,
+      "grad_norm": 0.12116377800703049,
+      "learning_rate": 0.00016882378409582914,
+      "loss": 0.1829,
+      "step": 10809
+    },
+    {
+      "epoch": 0.7799704174032253,
+      "grad_norm": 0.11945638060569763,
+      "learning_rate": 0.0001688208976764324,
+      "loss": 0.1106,
+      "step": 10810
+    },
+    {
+      "epoch": 0.7800425700782857,
+      "grad_norm": 0.1397213488817215,
+      "learning_rate": 0.00016881801125703564,
+      "loss": 0.1425,
+      "step": 10811
+    },
+    {
+      "epoch": 0.780114722753346,
+      "grad_norm": 0.11561702191829681,
+      "learning_rate": 0.00016881512483763893,
+      "loss": 0.1579,
+      "step": 10812
+    },
+    {
+      "epoch": 0.7801868754284065,
+      "grad_norm": 0.12192624807357788,
+      "learning_rate": 0.0001688122384182422,
+      "loss": 0.1477,
+      "step": 10813
+    },
+    {
+      "epoch": 0.7802590281034669,
+      "grad_norm": 0.12877143919467926,
+      "learning_rate": 0.00016880935199884545,
+      "loss": 0.1478,
+      "step": 10814
+    },
+    {
+      "epoch": 0.7803311807785274,
+      "grad_norm": 0.14012739062309265,
+      "learning_rate": 0.00016880646557944872,
+      "loss": 0.1527,
+      "step": 10815
+    },
+    {
+      "epoch": 0.7804033334535878,
+      "grad_norm": 0.14249911904335022,
+      "learning_rate": 0.00016880357916005195,
+      "loss": 0.1467,
+      "step": 10816
+    },
+    {
+      "epoch": 0.7804754861286483,
+      "grad_norm": 0.1142381876707077,
+      "learning_rate": 0.00016880069274065521,
+      "loss": 0.1031,
+      "step": 10817
+    },
+    {
+      "epoch": 0.7805476388037087,
+      "grad_norm": 0.14810532331466675,
+      "learning_rate": 0.00016879780632125848,
+      "loss": 0.1363,
+      "step": 10818
+    },
+    {
+      "epoch": 0.780619791478769,
+      "grad_norm": 0.11812330037355423,
+      "learning_rate": 0.00016879491990186177,
+      "loss": 0.1941,
+      "step": 10819
+    },
+    {
+      "epoch": 0.7806919441538295,
+      "grad_norm": 0.13109090924263,
+      "learning_rate": 0.00016879203348246503,
+      "loss": 0.1461,
+      "step": 10820
+    },
+    {
+      "epoch": 0.7807640968288899,
+      "grad_norm": 0.1146184429526329,
+      "learning_rate": 0.00016878914706306827,
+      "loss": 0.1562,
+      "step": 10821
+    },
+    {
+      "epoch": 0.7808362495039504,
+      "grad_norm": 0.14675399661064148,
+      "learning_rate": 0.00016878626064367153,
+      "loss": 0.134,
+      "step": 10822
+    },
+    {
+      "epoch": 0.7809084021790108,
+      "grad_norm": 0.13877572119235992,
+      "learning_rate": 0.0001687833742242748,
+      "loss": 0.1441,
+      "step": 10823
+    },
+    {
+      "epoch": 0.7809805548540713,
+      "grad_norm": 0.13332527875900269,
+      "learning_rate": 0.00016878048780487805,
+      "loss": 0.189,
+      "step": 10824
+    },
+    {
+      "epoch": 0.7810527075291317,
+      "grad_norm": 0.11128589510917664,
+      "learning_rate": 0.00016877760138548132,
+      "loss": 0.1317,
+      "step": 10825
+    },
+    {
+      "epoch": 0.781124860204192,
+      "grad_norm": 0.13484609127044678,
+      "learning_rate": 0.00016877471496608458,
+      "loss": 0.1502,
+      "step": 10826
+    },
+    {
+      "epoch": 0.7811970128792525,
+      "grad_norm": 0.1292797327041626,
+      "learning_rate": 0.00016877182854668784,
+      "loss": 0.1171,
+      "step": 10827
+    },
+    {
+      "epoch": 0.7812691655543129,
+      "grad_norm": 0.14283114671707153,
+      "learning_rate": 0.0001687689421272911,
+      "loss": 0.1898,
+      "step": 10828
+    },
+    {
+      "epoch": 0.7813413182293734,
+      "grad_norm": 0.13625703752040863,
+      "learning_rate": 0.00016876605570789437,
+      "loss": 0.1963,
+      "step": 10829
+    },
+    {
+      "epoch": 0.7814134709044338,
+      "grad_norm": 0.11518652737140656,
+      "learning_rate": 0.00016876316928849763,
+      "loss": 0.1691,
+      "step": 10830
+    },
+    {
+      "epoch": 0.7814856235794942,
+      "grad_norm": 0.13726310431957245,
+      "learning_rate": 0.0001687602828691009,
+      "loss": 0.1371,
+      "step": 10831
+    },
+    {
+      "epoch": 0.7815577762545547,
+      "grad_norm": 0.1303400844335556,
+      "learning_rate": 0.00016875739644970413,
+      "loss": 0.1665,
+      "step": 10832
+    },
+    {
+      "epoch": 0.781629928929615,
+      "grad_norm": 0.14024718105793,
+      "learning_rate": 0.00016875451003030742,
+      "loss": 0.1337,
+      "step": 10833
+    },
+    {
+      "epoch": 0.7817020816046755,
+      "grad_norm": 0.116419218480587,
+      "learning_rate": 0.00016875162361091068,
+      "loss": 0.16,
+      "step": 10834
+    },
+    {
+      "epoch": 0.7817742342797359,
+      "grad_norm": 0.1419404000043869,
+      "learning_rate": 0.00016874873719151395,
+      "loss": 0.1306,
+      "step": 10835
+    },
+    {
+      "epoch": 0.7818463869547964,
+      "grad_norm": 0.11938845366239548,
+      "learning_rate": 0.0001687458507721172,
+      "loss": 0.1363,
+      "step": 10836
+    },
+    {
+      "epoch": 0.7819185396298568,
+      "grad_norm": 0.12528158724308014,
+      "learning_rate": 0.00016874296435272045,
+      "loss": 0.1374,
+      "step": 10837
+    },
+    {
+      "epoch": 0.7819906923049172,
+      "grad_norm": 0.11019222438335419,
+      "learning_rate": 0.0001687400779333237,
+      "loss": 0.1364,
+      "step": 10838
+    },
+    {
+      "epoch": 0.7820628449799776,
+      "grad_norm": 0.1004524901509285,
+      "learning_rate": 0.00016873719151392697,
+      "loss": 0.1293,
+      "step": 10839
+    },
+    {
+      "epoch": 0.782134997655038,
+      "grad_norm": 0.13266058266162872,
+      "learning_rate": 0.00016873430509453026,
+      "loss": 0.1314,
+      "step": 10840
+    },
+    {
+      "epoch": 0.7822071503300985,
+      "grad_norm": 0.17364564538002014,
+      "learning_rate": 0.00016873141867513352,
+      "loss": 0.1968,
+      "step": 10841
+    },
+    {
+      "epoch": 0.7822793030051589,
+      "grad_norm": 0.1177988052368164,
+      "learning_rate": 0.00016872853225573676,
+      "loss": 0.1591,
+      "step": 10842
+    },
+    {
+      "epoch": 0.7823514556802194,
+      "grad_norm": 0.18249043822288513,
+      "learning_rate": 0.00016872564583634002,
+      "loss": 0.2081,
+      "step": 10843
+    },
+    {
+      "epoch": 0.7824236083552798,
+      "grad_norm": 0.11414939910173416,
+      "learning_rate": 0.00016872275941694329,
+      "loss": 0.1286,
+      "step": 10844
+    },
+    {
+      "epoch": 0.7824957610303402,
+      "grad_norm": 0.11555025726556778,
+      "learning_rate": 0.00016871987299754655,
+      "loss": 0.1155,
+      "step": 10845
+    },
+    {
+      "epoch": 0.7825679137054006,
+      "grad_norm": 0.12041687965393066,
+      "learning_rate": 0.0001687169865781498,
+      "loss": 0.109,
+      "step": 10846
+    },
+    {
+      "epoch": 0.782640066380461,
+      "grad_norm": 0.12202759832143784,
+      "learning_rate": 0.00016871410015875307,
+      "loss": 0.1369,
+      "step": 10847
+    },
+    {
+      "epoch": 0.7827122190555215,
+      "grad_norm": 0.11626517027616501,
+      "learning_rate": 0.00016871121373935634,
+      "loss": 0.1648,
+      "step": 10848
+    },
+    {
+      "epoch": 0.7827843717305819,
+      "grad_norm": 0.113328717648983,
+      "learning_rate": 0.0001687083273199596,
+      "loss": 0.178,
+      "step": 10849
+    },
+    {
+      "epoch": 0.7828565244056424,
+      "grad_norm": 0.15602675080299377,
+      "learning_rate": 0.00016870544090056286,
+      "loss": 0.1751,
+      "step": 10850
+    },
+    {
+      "epoch": 0.7829286770807028,
+      "grad_norm": 0.13897716999053955,
+      "learning_rate": 0.00016870255448116613,
+      "loss": 0.1409,
+      "step": 10851
+    },
+    {
+      "epoch": 0.7830008297557632,
+      "grad_norm": 0.13525345921516418,
+      "learning_rate": 0.0001686996680617694,
+      "loss": 0.1135,
+      "step": 10852
+    },
+    {
+      "epoch": 0.7830729824308236,
+      "grad_norm": 0.1421322375535965,
+      "learning_rate": 0.00016869678164237263,
+      "loss": 0.1788,
+      "step": 10853
+    },
+    {
+      "epoch": 0.783145135105884,
+      "grad_norm": 0.1331518441438675,
+      "learning_rate": 0.00016869389522297592,
+      "loss": 0.2281,
+      "step": 10854
+    },
+    {
+      "epoch": 0.7832172877809445,
+      "grad_norm": 0.13175399601459503,
+      "learning_rate": 0.00016869100880357918,
+      "loss": 0.1567,
+      "step": 10855
+    },
+    {
+      "epoch": 0.7832894404560049,
+      "grad_norm": 0.13462431728839874,
+      "learning_rate": 0.00016868812238418244,
+      "loss": 0.1574,
+      "step": 10856
+    },
+    {
+      "epoch": 0.7833615931310653,
+      "grad_norm": 0.15298853814601898,
+      "learning_rate": 0.0001686852359647857,
+      "loss": 0.1195,
+      "step": 10857
+    },
+    {
+      "epoch": 0.7834337458061258,
+      "grad_norm": 0.14155510067939758,
+      "learning_rate": 0.00016868234954538894,
+      "loss": 0.1517,
+      "step": 10858
+    },
+    {
+      "epoch": 0.7835058984811862,
+      "grad_norm": 0.12201669067144394,
+      "learning_rate": 0.0001686794631259922,
+      "loss": 0.1525,
+      "step": 10859
+    },
+    {
+      "epoch": 0.7835780511562466,
+      "grad_norm": 0.12332157045602798,
+      "learning_rate": 0.00016867657670659547,
+      "loss": 0.1462,
+      "step": 10860
+    },
+    {
+      "epoch": 0.783650203831307,
+      "grad_norm": 0.13681064546108246,
+      "learning_rate": 0.00016867369028719876,
+      "loss": 0.1072,
+      "step": 10861
+    },
+    {
+      "epoch": 0.7837223565063675,
+      "grad_norm": 0.16225147247314453,
+      "learning_rate": 0.00016867080386780202,
+      "loss": 0.119,
+      "step": 10862
+    },
+    {
+      "epoch": 0.7837945091814279,
+      "grad_norm": 0.12531347572803497,
+      "learning_rate": 0.00016866791744840525,
+      "loss": 0.1352,
+      "step": 10863
+    },
+    {
+      "epoch": 0.7838666618564883,
+      "grad_norm": 0.11706455051898956,
+      "learning_rate": 0.00016866503102900852,
+      "loss": 0.1303,
+      "step": 10864
+    },
+    {
+      "epoch": 0.7839388145315488,
+      "grad_norm": 0.12031789124011993,
+      "learning_rate": 0.00016866214460961178,
+      "loss": 0.2086,
+      "step": 10865
+    },
+    {
+      "epoch": 0.7840109672066092,
+      "grad_norm": 0.12857460975646973,
+      "learning_rate": 0.00016865925819021504,
+      "loss": 0.1688,
+      "step": 10866
+    },
+    {
+      "epoch": 0.7840831198816696,
+      "grad_norm": 0.12610378861427307,
+      "learning_rate": 0.0001686563717708183,
+      "loss": 0.1963,
+      "step": 10867
+    },
+    {
+      "epoch": 0.78415527255673,
+      "grad_norm": 0.12777261435985565,
+      "learning_rate": 0.00016865348535142157,
+      "loss": 0.1576,
+      "step": 10868
+    },
+    {
+      "epoch": 0.7842274252317905,
+      "grad_norm": 0.09360314160585403,
+      "learning_rate": 0.00016865059893202483,
+      "loss": 0.1196,
+      "step": 10869
+    },
+    {
+      "epoch": 0.7842995779068509,
+      "grad_norm": 0.12106601148843765,
+      "learning_rate": 0.0001686477125126281,
+      "loss": 0.0844,
+      "step": 10870
+    },
+    {
+      "epoch": 0.7843717305819113,
+      "grad_norm": 0.12097611278295517,
+      "learning_rate": 0.00016864482609323136,
+      "loss": 0.1942,
+      "step": 10871
+    },
+    {
+      "epoch": 0.7844438832569718,
+      "grad_norm": 0.12000215798616409,
+      "learning_rate": 0.00016864193967383462,
+      "loss": 0.1234,
+      "step": 10872
+    },
+    {
+      "epoch": 0.7845160359320322,
+      "grad_norm": 0.13257797062397003,
+      "learning_rate": 0.00016863905325443788,
+      "loss": 0.1488,
+      "step": 10873
+    },
+    {
+      "epoch": 0.7845881886070926,
+      "grad_norm": 0.11641601473093033,
+      "learning_rate": 0.00016863616683504112,
+      "loss": 0.1159,
+      "step": 10874
+    },
+    {
+      "epoch": 0.784660341282153,
+      "grad_norm": 0.11885194480419159,
+      "learning_rate": 0.00016863328041564438,
+      "loss": 0.1736,
+      "step": 10875
+    },
+    {
+      "epoch": 0.7847324939572135,
+      "grad_norm": 0.12223616987466812,
+      "learning_rate": 0.00016863039399624767,
+      "loss": 0.1634,
+      "step": 10876
+    },
+    {
+      "epoch": 0.7848046466322739,
+      "grad_norm": 0.1331803798675537,
+      "learning_rate": 0.00016862750757685094,
+      "loss": 0.1812,
+      "step": 10877
+    },
+    {
+      "epoch": 0.7848767993073343,
+      "grad_norm": 0.13615085184574127,
+      "learning_rate": 0.0001686246211574542,
+      "loss": 0.1512,
+      "step": 10878
+    },
+    {
+      "epoch": 0.7849489519823948,
+      "grad_norm": 0.12675344944000244,
+      "learning_rate": 0.00016862173473805743,
+      "loss": 0.2002,
+      "step": 10879
+    },
+    {
+      "epoch": 0.7850211046574552,
+      "grad_norm": 0.13016042113304138,
+      "learning_rate": 0.0001686188483186607,
+      "loss": 0.1745,
+      "step": 10880
+    },
+    {
+      "epoch": 0.7850932573325156,
+      "grad_norm": 0.15857744216918945,
+      "learning_rate": 0.00016861596189926396,
+      "loss": 0.1854,
+      "step": 10881
+    },
+    {
+      "epoch": 0.785165410007576,
+      "grad_norm": 0.1491554081439972,
+      "learning_rate": 0.00016861307547986722,
+      "loss": 0.1416,
+      "step": 10882
+    },
+    {
+      "epoch": 0.7852375626826364,
+      "grad_norm": 0.1276445835828781,
+      "learning_rate": 0.0001686101890604705,
+      "loss": 0.1493,
+      "step": 10883
+    },
+    {
+      "epoch": 0.7853097153576969,
+      "grad_norm": 0.1050001010298729,
+      "learning_rate": 0.00016860730264107375,
+      "loss": 0.163,
+      "step": 10884
+    },
+    {
+      "epoch": 0.7853818680327573,
+      "grad_norm": 0.16622294485569,
+      "learning_rate": 0.000168604416221677,
+      "loss": 0.1567,
+      "step": 10885
+    },
+    {
+      "epoch": 0.7854540207078178,
+      "grad_norm": 0.1504763662815094,
+      "learning_rate": 0.00016860152980228027,
+      "loss": 0.1235,
+      "step": 10886
+    },
+    {
+      "epoch": 0.7855261733828782,
+      "grad_norm": 0.11783110350370407,
+      "learning_rate": 0.00016859864338288354,
+      "loss": 0.1368,
+      "step": 10887
+    },
+    {
+      "epoch": 0.7855983260579386,
+      "grad_norm": 0.13893578946590424,
+      "learning_rate": 0.0001685957569634868,
+      "loss": 0.1315,
+      "step": 10888
+    },
+    {
+      "epoch": 0.785670478732999,
+      "grad_norm": 0.12017761915922165,
+      "learning_rate": 0.00016859287054409006,
+      "loss": 0.181,
+      "step": 10889
+    },
+    {
+      "epoch": 0.7857426314080594,
+      "grad_norm": 0.13865232467651367,
+      "learning_rate": 0.00016858998412469333,
+      "loss": 0.1214,
+      "step": 10890
+    },
+    {
+      "epoch": 0.7858147840831199,
+      "grad_norm": 0.1186799556016922,
+      "learning_rate": 0.0001685870977052966,
+      "loss": 0.1217,
+      "step": 10891
+    },
+    {
+      "epoch": 0.7858869367581803,
+      "grad_norm": 0.1285744309425354,
+      "learning_rate": 0.00016858421128589985,
+      "loss": 0.1181,
+      "step": 10892
+    },
+    {
+      "epoch": 0.7859590894332408,
+      "grad_norm": 0.11852288991212845,
+      "learning_rate": 0.00016858132486650311,
+      "loss": 0.1499,
+      "step": 10893
+    },
+    {
+      "epoch": 0.7860312421083012,
+      "grad_norm": 0.12144551426172256,
+      "learning_rate": 0.00016857843844710638,
+      "loss": 0.1436,
+      "step": 10894
+    },
+    {
+      "epoch": 0.7861033947833616,
+      "grad_norm": 0.13382890820503235,
+      "learning_rate": 0.0001685755520277096,
+      "loss": 0.1288,
+      "step": 10895
+    },
+    {
+      "epoch": 0.786175547458422,
+      "grad_norm": 0.12314509600400925,
+      "learning_rate": 0.00016857266560831288,
+      "loss": 0.1561,
+      "step": 10896
+    },
+    {
+      "epoch": 0.7862477001334824,
+      "grad_norm": 0.11607926338911057,
+      "learning_rate": 0.00016856977918891617,
+      "loss": 0.1555,
+      "step": 10897
+    },
+    {
+      "epoch": 0.7863198528085429,
+      "grad_norm": 0.13606394827365875,
+      "learning_rate": 0.00016856689276951943,
+      "loss": 0.1663,
+      "step": 10898
+    },
+    {
+      "epoch": 0.7863920054836033,
+      "grad_norm": 0.16237007081508636,
+      "learning_rate": 0.0001685640063501227,
+      "loss": 0.1275,
+      "step": 10899
+    },
+    {
+      "epoch": 0.7864641581586638,
+      "grad_norm": 0.13067243993282318,
+      "learning_rate": 0.00016856111993072593,
+      "loss": 0.1582,
+      "step": 10900
+    },
+    {
+      "epoch": 0.7865363108337241,
+      "grad_norm": 0.14115722477436066,
+      "learning_rate": 0.0001685582335113292,
+      "loss": 0.1358,
+      "step": 10901
+    },
+    {
+      "epoch": 0.7866084635087846,
+      "grad_norm": 0.09878934174776077,
+      "learning_rate": 0.00016855534709193245,
+      "loss": 0.1509,
+      "step": 10902
+    },
+    {
+      "epoch": 0.786680616183845,
+      "grad_norm": 0.132870152592659,
+      "learning_rate": 0.00016855246067253572,
+      "loss": 0.1154,
+      "step": 10903
+    },
+    {
+      "epoch": 0.7867527688589054,
+      "grad_norm": 0.08644016832113266,
+      "learning_rate": 0.000168549574253139,
+      "loss": 0.1532,
+      "step": 10904
+    },
+    {
+      "epoch": 0.7868249215339659,
+      "grad_norm": 0.12310770153999329,
+      "learning_rate": 0.00016854668783374224,
+      "loss": 0.1308,
+      "step": 10905
+    },
+    {
+      "epoch": 0.7868970742090263,
+      "grad_norm": 0.11368894577026367,
+      "learning_rate": 0.0001685438014143455,
+      "loss": 0.1171,
+      "step": 10906
+    },
+    {
+      "epoch": 0.7869692268840868,
+      "grad_norm": 0.11752180010080338,
+      "learning_rate": 0.00016854091499494877,
+      "loss": 0.1616,
+      "step": 10907
+    },
+    {
+      "epoch": 0.7870413795591471,
+      "grad_norm": 0.1096261516213417,
+      "learning_rate": 0.00016853802857555203,
+      "loss": 0.1141,
+      "step": 10908
+    },
+    {
+      "epoch": 0.7871135322342075,
+      "grad_norm": 0.1345599889755249,
+      "learning_rate": 0.0001685351421561553,
+      "loss": 0.1711,
+      "step": 10909
+    },
+    {
+      "epoch": 0.787185684909268,
+      "grad_norm": 0.11580589413642883,
+      "learning_rate": 0.00016853225573675856,
+      "loss": 0.144,
+      "step": 10910
+    },
+    {
+      "epoch": 0.7872578375843284,
+      "grad_norm": 0.16636911034584045,
+      "learning_rate": 0.00016852936931736182,
+      "loss": 0.1685,
+      "step": 10911
+    },
+    {
+      "epoch": 0.7873299902593889,
+      "grad_norm": 0.12539048492908478,
+      "learning_rate": 0.00016852648289796508,
+      "loss": 0.1341,
+      "step": 10912
+    },
+    {
+      "epoch": 0.7874021429344493,
+      "grad_norm": 0.12115386128425598,
+      "learning_rate": 0.00016852359647856835,
+      "loss": 0.1485,
+      "step": 10913
+    },
+    {
+      "epoch": 0.7874742956095098,
+      "grad_norm": 0.12285510450601578,
+      "learning_rate": 0.0001685207100591716,
+      "loss": 0.1301,
+      "step": 10914
+    },
+    {
+      "epoch": 0.7875464482845701,
+      "grad_norm": 0.11459830403327942,
+      "learning_rate": 0.00016851782363977487,
+      "loss": 0.1605,
+      "step": 10915
+    },
+    {
+      "epoch": 0.7876186009596305,
+      "grad_norm": 0.1197519302368164,
+      "learning_rate": 0.00016851493722037813,
+      "loss": 0.1275,
+      "step": 10916
+    },
+    {
+      "epoch": 0.787690753634691,
+      "grad_norm": 0.13795311748981476,
+      "learning_rate": 0.00016851205080098137,
+      "loss": 0.1568,
+      "step": 10917
+    },
+    {
+      "epoch": 0.7877629063097514,
+      "grad_norm": 0.14670489728450775,
+      "learning_rate": 0.00016850916438158466,
+      "loss": 0.2084,
+      "step": 10918
+    },
+    {
+      "epoch": 0.7878350589848119,
+      "grad_norm": 0.11156128346920013,
+      "learning_rate": 0.00016850627796218792,
+      "loss": 0.1654,
+      "step": 10919
+    },
+    {
+      "epoch": 0.7879072116598723,
+      "grad_norm": 0.13015100359916687,
+      "learning_rate": 0.0001685033915427912,
+      "loss": 0.1147,
+      "step": 10920
+    },
+    {
+      "epoch": 0.7879793643349328,
+      "grad_norm": 0.1453978270292282,
+      "learning_rate": 0.00016850050512339445,
+      "loss": 0.1496,
+      "step": 10921
+    },
+    {
+      "epoch": 0.7880515170099931,
+      "grad_norm": 0.12433165311813354,
+      "learning_rate": 0.00016849761870399769,
+      "loss": 0.1396,
+      "step": 10922
+    },
+    {
+      "epoch": 0.7881236696850535,
+      "grad_norm": 0.1300925463438034,
+      "learning_rate": 0.00016849473228460095,
+      "loss": 0.146,
+      "step": 10923
+    },
+    {
+      "epoch": 0.788195822360114,
+      "grad_norm": 0.15568086504936218,
+      "learning_rate": 0.0001684918458652042,
+      "loss": 0.1706,
+      "step": 10924
+    },
+    {
+      "epoch": 0.7882679750351744,
+      "grad_norm": 0.12890571355819702,
+      "learning_rate": 0.0001684889594458075,
+      "loss": 0.1484,
+      "step": 10925
+    },
+    {
+      "epoch": 0.7883401277102349,
+      "grad_norm": 0.12381922453641891,
+      "learning_rate": 0.00016848607302641076,
+      "loss": 0.1536,
+      "step": 10926
+    },
+    {
+      "epoch": 0.7884122803852953,
+      "grad_norm": 0.10168445855379105,
+      "learning_rate": 0.000168483186607014,
+      "loss": 0.1703,
+      "step": 10927
+    },
+    {
+      "epoch": 0.7884844330603558,
+      "grad_norm": 0.10643867403268814,
+      "learning_rate": 0.00016848030018761726,
+      "loss": 0.149,
+      "step": 10928
+    },
+    {
+      "epoch": 0.7885565857354161,
+      "grad_norm": 0.14253583550453186,
+      "learning_rate": 0.00016847741376822053,
+      "loss": 0.1532,
+      "step": 10929
+    },
+    {
+      "epoch": 0.7886287384104765,
+      "grad_norm": 0.1330222636461258,
+      "learning_rate": 0.0001684745273488238,
+      "loss": 0.1518,
+      "step": 10930
+    },
+    {
+      "epoch": 0.788700891085537,
+      "grad_norm": 0.10836239159107208,
+      "learning_rate": 0.00016847164092942705,
+      "loss": 0.1459,
+      "step": 10931
+    },
+    {
+      "epoch": 0.7887730437605974,
+      "grad_norm": 0.11788875609636307,
+      "learning_rate": 0.00016846875451003031,
+      "loss": 0.1645,
+      "step": 10932
+    },
+    {
+      "epoch": 0.7888451964356579,
+      "grad_norm": 0.13345979154109955,
+      "learning_rate": 0.00016846586809063358,
+      "loss": 0.1267,
+      "step": 10933
+    },
+    {
+      "epoch": 0.7889173491107183,
+      "grad_norm": 0.11228634417057037,
+      "learning_rate": 0.00016846298167123684,
+      "loss": 0.1121,
+      "step": 10934
+    },
+    {
+      "epoch": 0.7889895017857788,
+      "grad_norm": 0.13591991364955902,
+      "learning_rate": 0.0001684600952518401,
+      "loss": 0.1445,
+      "step": 10935
+    },
+    {
+      "epoch": 0.7890616544608391,
+      "grad_norm": 0.12425404042005539,
+      "learning_rate": 0.00016845720883244337,
+      "loss": 0.1689,
+      "step": 10936
+    },
+    {
+      "epoch": 0.7891338071358995,
+      "grad_norm": 0.11064977943897247,
+      "learning_rate": 0.00016845432241304663,
+      "loss": 0.1406,
+      "step": 10937
+    },
+    {
+      "epoch": 0.78920595981096,
+      "grad_norm": 0.12111656367778778,
+      "learning_rate": 0.00016845143599364986,
+      "loss": 0.142,
+      "step": 10938
+    },
+    {
+      "epoch": 0.7892781124860204,
+      "grad_norm": 0.13789787888526917,
+      "learning_rate": 0.00016844854957425315,
+      "loss": 0.1613,
+      "step": 10939
+    },
+    {
+      "epoch": 0.7893502651610809,
+      "grad_norm": 0.12854664027690887,
+      "learning_rate": 0.00016844566315485642,
+      "loss": 0.1634,
+      "step": 10940
+    },
+    {
+      "epoch": 0.7894224178361413,
+      "grad_norm": 0.12651242315769196,
+      "learning_rate": 0.00016844277673545968,
+      "loss": 0.1214,
+      "step": 10941
+    },
+    {
+      "epoch": 0.7894945705112018,
+      "grad_norm": 0.12767834961414337,
+      "learning_rate": 0.00016843989031606294,
+      "loss": 0.1311,
+      "step": 10942
+    },
+    {
+      "epoch": 0.7895667231862621,
+      "grad_norm": 0.13253939151763916,
+      "learning_rate": 0.00016843700389666618,
+      "loss": 0.107,
+      "step": 10943
+    },
+    {
+      "epoch": 0.7896388758613225,
+      "grad_norm": 0.130879208445549,
+      "learning_rate": 0.00016843411747726944,
+      "loss": 0.2173,
+      "step": 10944
+    },
+    {
+      "epoch": 0.789711028536383,
+      "grad_norm": 0.12859420478343964,
+      "learning_rate": 0.0001684312310578727,
+      "loss": 0.158,
+      "step": 10945
+    },
+    {
+      "epoch": 0.7897831812114434,
+      "grad_norm": 0.15572106838226318,
+      "learning_rate": 0.000168428344638476,
+      "loss": 0.1779,
+      "step": 10946
+    },
+    {
+      "epoch": 0.7898553338865039,
+      "grad_norm": 0.1253107190132141,
+      "learning_rate": 0.00016842545821907926,
+      "loss": 0.1792,
+      "step": 10947
+    },
+    {
+      "epoch": 0.7899274865615643,
+      "grad_norm": 0.13750699162483215,
+      "learning_rate": 0.0001684225717996825,
+      "loss": 0.1698,
+      "step": 10948
+    },
+    {
+      "epoch": 0.7899996392366248,
+      "grad_norm": 0.1363247185945511,
+      "learning_rate": 0.00016841968538028576,
+      "loss": 0.1339,
+      "step": 10949
+    },
+    {
+      "epoch": 0.7900717919116851,
+      "grad_norm": 0.11178048700094223,
+      "learning_rate": 0.00016841679896088902,
+      "loss": 0.1553,
+      "step": 10950
+    },
+    {
+      "epoch": 0.7901439445867455,
+      "grad_norm": 0.14459486305713654,
+      "learning_rate": 0.00016841391254149228,
+      "loss": 0.1378,
+      "step": 10951
+    },
+    {
+      "epoch": 0.790216097261806,
+      "grad_norm": 0.11802060902118683,
+      "learning_rate": 0.00016841102612209555,
+      "loss": 0.1492,
+      "step": 10952
+    },
+    {
+      "epoch": 0.7902882499368664,
+      "grad_norm": 0.12663447856903076,
+      "learning_rate": 0.0001684081397026988,
+      "loss": 0.1783,
+      "step": 10953
+    },
+    {
+      "epoch": 0.7903604026119269,
+      "grad_norm": 0.12220313400030136,
+      "learning_rate": 0.00016840525328330207,
+      "loss": 0.1298,
+      "step": 10954
+    },
+    {
+      "epoch": 0.7904325552869873,
+      "grad_norm": 0.12975122034549713,
+      "learning_rate": 0.00016840236686390533,
+      "loss": 0.195,
+      "step": 10955
+    },
+    {
+      "epoch": 0.7905047079620477,
+      "grad_norm": 0.10023821890354156,
+      "learning_rate": 0.0001683994804445086,
+      "loss": 0.1572,
+      "step": 10956
+    },
+    {
+      "epoch": 0.7905768606371081,
+      "grad_norm": 0.10845848172903061,
+      "learning_rate": 0.00016839659402511186,
+      "loss": 0.159,
+      "step": 10957
+    },
+    {
+      "epoch": 0.7906490133121685,
+      "grad_norm": 0.10611424595117569,
+      "learning_rate": 0.00016839370760571512,
+      "loss": 0.1648,
+      "step": 10958
+    },
+    {
+      "epoch": 0.790721165987229,
+      "grad_norm": 0.1480133831501007,
+      "learning_rate": 0.00016839082118631836,
+      "loss": 0.1169,
+      "step": 10959
+    },
+    {
+      "epoch": 0.7907933186622894,
+      "grad_norm": 0.1449739634990692,
+      "learning_rate": 0.00016838793476692165,
+      "loss": 0.2015,
+      "step": 10960
+    },
+    {
+      "epoch": 0.7908654713373499,
+      "grad_norm": 0.1358804553747177,
+      "learning_rate": 0.0001683850483475249,
+      "loss": 0.1521,
+      "step": 10961
+    },
+    {
+      "epoch": 0.7909376240124103,
+      "grad_norm": 0.12293657660484314,
+      "learning_rate": 0.00016838216192812818,
+      "loss": 0.1589,
+      "step": 10962
+    },
+    {
+      "epoch": 0.7910097766874706,
+      "grad_norm": 0.12123416364192963,
+      "learning_rate": 0.00016837927550873144,
+      "loss": 0.1311,
+      "step": 10963
+    },
+    {
+      "epoch": 0.7910819293625311,
+      "grad_norm": 0.17734867334365845,
+      "learning_rate": 0.00016837638908933467,
+      "loss": 0.1805,
+      "step": 10964
+    },
+    {
+      "epoch": 0.7911540820375915,
+      "grad_norm": 0.12703557312488556,
+      "learning_rate": 0.00016837350266993794,
+      "loss": 0.1313,
+      "step": 10965
+    },
+    {
+      "epoch": 0.791226234712652,
+      "grad_norm": 0.13336026668548584,
+      "learning_rate": 0.0001683706162505412,
+      "loss": 0.1366,
+      "step": 10966
+    },
+    {
+      "epoch": 0.7912983873877124,
+      "grad_norm": 0.13004052639007568,
+      "learning_rate": 0.0001683677298311445,
+      "loss": 0.186,
+      "step": 10967
+    },
+    {
+      "epoch": 0.7913705400627729,
+      "grad_norm": 0.1224091425538063,
+      "learning_rate": 0.00016836484341174775,
+      "loss": 0.1608,
+      "step": 10968
+    },
+    {
+      "epoch": 0.7914426927378333,
+      "grad_norm": 0.12723009288311005,
+      "learning_rate": 0.000168361956992351,
+      "loss": 0.1769,
+      "step": 10969
+    },
+    {
+      "epoch": 0.7915148454128936,
+      "grad_norm": 0.11376544088125229,
+      "learning_rate": 0.00016835907057295425,
+      "loss": 0.1576,
+      "step": 10970
+    },
+    {
+      "epoch": 0.7915869980879541,
+      "grad_norm": 0.12775452435016632,
+      "learning_rate": 0.00016835618415355751,
+      "loss": 0.1666,
+      "step": 10971
+    },
+    {
+      "epoch": 0.7916591507630145,
+      "grad_norm": 0.1533483862876892,
+      "learning_rate": 0.00016835329773416078,
+      "loss": 0.1292,
+      "step": 10972
+    },
+    {
+      "epoch": 0.791731303438075,
+      "grad_norm": 0.11418499052524567,
+      "learning_rate": 0.00016835041131476404,
+      "loss": 0.1533,
+      "step": 10973
+    },
+    {
+      "epoch": 0.7918034561131354,
+      "grad_norm": 0.19840049743652344,
+      "learning_rate": 0.0001683475248953673,
+      "loss": 0.1527,
+      "step": 10974
+    },
+    {
+      "epoch": 0.7918756087881959,
+      "grad_norm": 0.1294814646244049,
+      "learning_rate": 0.00016834463847597057,
+      "loss": 0.131,
+      "step": 10975
+    },
+    {
+      "epoch": 0.7919477614632563,
+      "grad_norm": 0.11488209664821625,
+      "learning_rate": 0.00016834175205657383,
+      "loss": 0.1398,
+      "step": 10976
+    },
+    {
+      "epoch": 0.7920199141383166,
+      "grad_norm": 0.13387158513069153,
+      "learning_rate": 0.0001683388656371771,
+      "loss": 0.1738,
+      "step": 10977
+    },
+    {
+      "epoch": 0.7920920668133771,
+      "grad_norm": 0.10368437319993973,
+      "learning_rate": 0.00016833597921778035,
+      "loss": 0.1688,
+      "step": 10978
+    },
+    {
+      "epoch": 0.7921642194884375,
+      "grad_norm": 0.11186818778514862,
+      "learning_rate": 0.00016833309279838362,
+      "loss": 0.1874,
+      "step": 10979
+    },
+    {
+      "epoch": 0.792236372163498,
+      "grad_norm": 0.12419915944337845,
+      "learning_rate": 0.00016833020637898685,
+      "loss": 0.1283,
+      "step": 10980
+    },
+    {
+      "epoch": 0.7923085248385584,
+      "grad_norm": 0.12867562472820282,
+      "learning_rate": 0.00016832731995959014,
+      "loss": 0.1525,
+      "step": 10981
+    },
+    {
+      "epoch": 0.7923806775136188,
+      "grad_norm": 0.12154834717512131,
+      "learning_rate": 0.0001683244335401934,
+      "loss": 0.1687,
+      "step": 10982
+    },
+    {
+      "epoch": 0.7924528301886793,
+      "grad_norm": 0.1212034672498703,
+      "learning_rate": 0.00016832154712079667,
+      "loss": 0.1295,
+      "step": 10983
+    },
+    {
+      "epoch": 0.7925249828637396,
+      "grad_norm": 0.14599169790744781,
+      "learning_rate": 0.00016831866070139993,
+      "loss": 0.1702,
+      "step": 10984
+    },
+    {
+      "epoch": 0.7925971355388001,
+      "grad_norm": 0.13765747845172882,
+      "learning_rate": 0.00016831577428200317,
+      "loss": 0.1584,
+      "step": 10985
+    },
+    {
+      "epoch": 0.7926692882138605,
+      "grad_norm": 0.14818070828914642,
+      "learning_rate": 0.00016831288786260643,
+      "loss": 0.1627,
+      "step": 10986
+    },
+    {
+      "epoch": 0.792741440888921,
+      "grad_norm": 0.1253470629453659,
+      "learning_rate": 0.0001683100014432097,
+      "loss": 0.1414,
+      "step": 10987
+    },
+    {
+      "epoch": 0.7928135935639814,
+      "grad_norm": 0.10206609964370728,
+      "learning_rate": 0.00016830711502381298,
+      "loss": 0.1615,
+      "step": 10988
+    },
+    {
+      "epoch": 0.7928857462390418,
+      "grad_norm": 0.13145264983177185,
+      "learning_rate": 0.00016830422860441625,
+      "loss": 0.1741,
+      "step": 10989
+    },
+    {
+      "epoch": 0.7929578989141023,
+      "grad_norm": 0.1456836611032486,
+      "learning_rate": 0.00016830134218501948,
+      "loss": 0.1373,
+      "step": 10990
+    },
+    {
+      "epoch": 0.7930300515891626,
+      "grad_norm": 0.1337760090827942,
+      "learning_rate": 0.00016829845576562275,
+      "loss": 0.1051,
+      "step": 10991
+    },
+    {
+      "epoch": 0.7931022042642231,
+      "grad_norm": 0.12097540497779846,
+      "learning_rate": 0.000168295569346226,
+      "loss": 0.1076,
+      "step": 10992
+    },
+    {
+      "epoch": 0.7931743569392835,
+      "grad_norm": 0.12801945209503174,
+      "learning_rate": 0.00016829268292682927,
+      "loss": 0.146,
+      "step": 10993
+    },
+    {
+      "epoch": 0.793246509614344,
+      "grad_norm": 0.11971257627010345,
+      "learning_rate": 0.00016828979650743253,
+      "loss": 0.157,
+      "step": 10994
+    },
+    {
+      "epoch": 0.7933186622894044,
+      "grad_norm": 0.13249745965003967,
+      "learning_rate": 0.0001682869100880358,
+      "loss": 0.12,
+      "step": 10995
+    },
+    {
+      "epoch": 0.7933908149644648,
+      "grad_norm": 0.2097712904214859,
+      "learning_rate": 0.00016828402366863906,
+      "loss": 0.1284,
+      "step": 10996
+    },
+    {
+      "epoch": 0.7934629676395253,
+      "grad_norm": 0.12385252863168716,
+      "learning_rate": 0.00016828113724924232,
+      "loss": 0.2047,
+      "step": 10997
+    },
+    {
+      "epoch": 0.7935351203145856,
+      "grad_norm": 0.13884519040584564,
+      "learning_rate": 0.00016827825082984559,
+      "loss": 0.1494,
+      "step": 10998
+    },
+    {
+      "epoch": 0.7936072729896461,
+      "grad_norm": 0.14670537412166595,
+      "learning_rate": 0.00016827536441044885,
+      "loss": 0.1556,
+      "step": 10999
+    },
+    {
+      "epoch": 0.7936794256647065,
+      "grad_norm": 0.11171022057533264,
+      "learning_rate": 0.0001682724779910521,
+      "loss": 0.1081,
+      "step": 11000
+    },
+    {
+      "epoch": 0.793751578339767,
+      "grad_norm": 0.15498478710651398,
+      "learning_rate": 0.00016826959157165535,
+      "loss": 0.1346,
+      "step": 11001
+    },
+    {
+      "epoch": 0.7938237310148274,
+      "grad_norm": 0.13899146020412445,
+      "learning_rate": 0.00016826670515225864,
+      "loss": 0.1592,
+      "step": 11002
+    },
+    {
+      "epoch": 0.7938958836898878,
+      "grad_norm": 0.1698872596025467,
+      "learning_rate": 0.0001682638187328619,
+      "loss": 0.1465,
+      "step": 11003
+    },
+    {
+      "epoch": 0.7939680363649483,
+      "grad_norm": 0.13369418680667877,
+      "learning_rate": 0.00016826093231346516,
+      "loss": 0.1208,
+      "step": 11004
+    },
+    {
+      "epoch": 0.7940401890400086,
+      "grad_norm": 0.15157140791416168,
+      "learning_rate": 0.00016825804589406843,
+      "loss": 0.1777,
+      "step": 11005
+    },
+    {
+      "epoch": 0.7941123417150691,
+      "grad_norm": 0.15633326768875122,
+      "learning_rate": 0.00016825515947467166,
+      "loss": 0.1924,
+      "step": 11006
+    },
+    {
+      "epoch": 0.7941844943901295,
+      "grad_norm": 0.15235872566699982,
+      "learning_rate": 0.00016825227305527493,
+      "loss": 0.1608,
+      "step": 11007
+    },
+    {
+      "epoch": 0.79425664706519,
+      "grad_norm": 0.13778749108314514,
+      "learning_rate": 0.0001682493866358782,
+      "loss": 0.1851,
+      "step": 11008
+    },
+    {
+      "epoch": 0.7943287997402504,
+      "grad_norm": 0.11128794401884079,
+      "learning_rate": 0.00016824650021648148,
+      "loss": 0.1282,
+      "step": 11009
+    },
+    {
+      "epoch": 0.7944009524153108,
+      "grad_norm": 0.11886956542730331,
+      "learning_rate": 0.00016824361379708474,
+      "loss": 0.1332,
+      "step": 11010
+    },
+    {
+      "epoch": 0.7944731050903713,
+      "grad_norm": 0.1259932816028595,
+      "learning_rate": 0.00016824072737768798,
+      "loss": 0.1581,
+      "step": 11011
+    },
+    {
+      "epoch": 0.7945452577654316,
+      "grad_norm": 0.12389802187681198,
+      "learning_rate": 0.00016823784095829124,
+      "loss": 0.135,
+      "step": 11012
+    },
+    {
+      "epoch": 0.7946174104404921,
+      "grad_norm": 0.13104520738124847,
+      "learning_rate": 0.0001682349545388945,
+      "loss": 0.1377,
+      "step": 11013
+    },
+    {
+      "epoch": 0.7946895631155525,
+      "grad_norm": 0.14885981380939484,
+      "learning_rate": 0.00016823206811949777,
+      "loss": 0.1501,
+      "step": 11014
+    },
+    {
+      "epoch": 0.7947617157906129,
+      "grad_norm": 0.14056426286697388,
+      "learning_rate": 0.00016822918170010103,
+      "loss": 0.135,
+      "step": 11015
+    },
+    {
+      "epoch": 0.7948338684656734,
+      "grad_norm": 0.14511126279830933,
+      "learning_rate": 0.0001682262952807043,
+      "loss": 0.1362,
+      "step": 11016
+    },
+    {
+      "epoch": 0.7949060211407338,
+      "grad_norm": 0.11810924112796783,
+      "learning_rate": 0.00016822340886130755,
+      "loss": 0.1169,
+      "step": 11017
+    },
+    {
+      "epoch": 0.7949781738157943,
+      "grad_norm": 0.13882525265216827,
+      "learning_rate": 0.00016822052244191082,
+      "loss": 0.1672,
+      "step": 11018
+    },
+    {
+      "epoch": 0.7950503264908546,
+      "grad_norm": 0.11954337358474731,
+      "learning_rate": 0.00016821763602251408,
+      "loss": 0.1856,
+      "step": 11019
+    },
+    {
+      "epoch": 0.795122479165915,
+      "grad_norm": 0.11523205041885376,
+      "learning_rate": 0.00016821474960311734,
+      "loss": 0.1555,
+      "step": 11020
+    },
+    {
+      "epoch": 0.7951946318409755,
+      "grad_norm": 0.14944764971733093,
+      "learning_rate": 0.0001682118631837206,
+      "loss": 0.168,
+      "step": 11021
+    },
+    {
+      "epoch": 0.7952667845160359,
+      "grad_norm": 0.12593041360378265,
+      "learning_rate": 0.00016820897676432387,
+      "loss": 0.2075,
+      "step": 11022
+    },
+    {
+      "epoch": 0.7953389371910964,
+      "grad_norm": 0.12796074151992798,
+      "learning_rate": 0.00016820609034492713,
+      "loss": 0.1472,
+      "step": 11023
+    },
+    {
+      "epoch": 0.7954110898661568,
+      "grad_norm": 0.13617539405822754,
+      "learning_rate": 0.0001682032039255304,
+      "loss": 0.1355,
+      "step": 11024
+    },
+    {
+      "epoch": 0.7954832425412172,
+      "grad_norm": 0.12220434844493866,
+      "learning_rate": 0.00016820031750613366,
+      "loss": 0.1286,
+      "step": 11025
+    },
+    {
+      "epoch": 0.7955553952162776,
+      "grad_norm": 0.13229826092720032,
+      "learning_rate": 0.00016819743108673692,
+      "loss": 0.1169,
+      "step": 11026
+    },
+    {
+      "epoch": 0.795627547891338,
+      "grad_norm": 0.1340683549642563,
+      "learning_rate": 0.00016819454466734018,
+      "loss": 0.1285,
+      "step": 11027
+    },
+    {
+      "epoch": 0.7956997005663985,
+      "grad_norm": 0.11346662789583206,
+      "learning_rate": 0.00016819165824794342,
+      "loss": 0.1267,
+      "step": 11028
+    },
+    {
+      "epoch": 0.7957718532414589,
+      "grad_norm": 0.1273529827594757,
+      "learning_rate": 0.00016818877182854668,
+      "loss": 0.1172,
+      "step": 11029
+    },
+    {
+      "epoch": 0.7958440059165194,
+      "grad_norm": 0.12470119446516037,
+      "learning_rate": 0.00016818588540914997,
+      "loss": 0.1863,
+      "step": 11030
+    },
+    {
+      "epoch": 0.7959161585915798,
+      "grad_norm": 0.15152983367443085,
+      "learning_rate": 0.00016818299898975324,
+      "loss": 0.1409,
+      "step": 11031
+    },
+    {
+      "epoch": 0.7959883112666402,
+      "grad_norm": 0.13718275725841522,
+      "learning_rate": 0.0001681801125703565,
+      "loss": 0.1661,
+      "step": 11032
+    },
+    {
+      "epoch": 0.7960604639417006,
+      "grad_norm": 0.13387587666511536,
+      "learning_rate": 0.00016817722615095973,
+      "loss": 0.1327,
+      "step": 11033
+    },
+    {
+      "epoch": 0.796132616616761,
+      "grad_norm": 0.13916029036045074,
+      "learning_rate": 0.000168174339731563,
+      "loss": 0.1708,
+      "step": 11034
+    },
+    {
+      "epoch": 0.7962047692918215,
+      "grad_norm": 0.12818798422813416,
+      "learning_rate": 0.00016817145331216626,
+      "loss": 0.1445,
+      "step": 11035
+    },
+    {
+      "epoch": 0.7962769219668819,
+      "grad_norm": 0.11526653915643692,
+      "learning_rate": 0.00016816856689276952,
+      "loss": 0.1111,
+      "step": 11036
+    },
+    {
+      "epoch": 0.7963490746419424,
+      "grad_norm": 0.13559386134147644,
+      "learning_rate": 0.0001681656804733728,
+      "loss": 0.168,
+      "step": 11037
+    },
+    {
+      "epoch": 0.7964212273170028,
+      "grad_norm": 0.10860492289066315,
+      "learning_rate": 0.00016816279405397605,
+      "loss": 0.1433,
+      "step": 11038
+    },
+    {
+      "epoch": 0.7964933799920632,
+      "grad_norm": 0.14890626072883606,
+      "learning_rate": 0.0001681599076345793,
+      "loss": 0.1484,
+      "step": 11039
+    },
+    {
+      "epoch": 0.7965655326671236,
+      "grad_norm": 0.13415803015232086,
+      "learning_rate": 0.00016815702121518257,
+      "loss": 0.1534,
+      "step": 11040
+    },
+    {
+      "epoch": 0.796637685342184,
+      "grad_norm": 0.1619867980480194,
+      "learning_rate": 0.00016815413479578584,
+      "loss": 0.2313,
+      "step": 11041
+    },
+    {
+      "epoch": 0.7967098380172445,
+      "grad_norm": 0.1239086389541626,
+      "learning_rate": 0.0001681512483763891,
+      "loss": 0.18,
+      "step": 11042
+    },
+    {
+      "epoch": 0.7967819906923049,
+      "grad_norm": 0.13515089452266693,
+      "learning_rate": 0.00016814836195699236,
+      "loss": 0.1975,
+      "step": 11043
+    },
+    {
+      "epoch": 0.7968541433673654,
+      "grad_norm": 0.15039147436618805,
+      "learning_rate": 0.00016814547553759563,
+      "loss": 0.1609,
+      "step": 11044
+    },
+    {
+      "epoch": 0.7969262960424258,
+      "grad_norm": 0.12758326530456543,
+      "learning_rate": 0.0001681425891181989,
+      "loss": 0.1529,
+      "step": 11045
+    },
+    {
+      "epoch": 0.7969984487174862,
+      "grad_norm": 0.14930225908756256,
+      "learning_rate": 0.00016813970269880215,
+      "loss": 0.1397,
+      "step": 11046
+    },
+    {
+      "epoch": 0.7970706013925466,
+      "grad_norm": 0.15626980364322662,
+      "learning_rate": 0.00016813681627940541,
+      "loss": 0.1611,
+      "step": 11047
+    },
+    {
+      "epoch": 0.797142754067607,
+      "grad_norm": 0.18415814638137817,
+      "learning_rate": 0.00016813392986000868,
+      "loss": 0.131,
+      "step": 11048
+    },
+    {
+      "epoch": 0.7972149067426675,
+      "grad_norm": 0.11131515353918076,
+      "learning_rate": 0.00016813104344061191,
+      "loss": 0.1164,
+      "step": 11049
+    },
+    {
+      "epoch": 0.7972870594177279,
+      "grad_norm": 0.12445852905511856,
+      "learning_rate": 0.00016812815702121518,
+      "loss": 0.1369,
+      "step": 11050
+    },
+    {
+      "epoch": 0.7973592120927884,
+      "grad_norm": 0.13122273981571198,
+      "learning_rate": 0.00016812527060181847,
+      "loss": 0.1133,
+      "step": 11051
+    },
+    {
+      "epoch": 0.7974313647678488,
+      "grad_norm": 0.12259101122617722,
+      "learning_rate": 0.00016812238418242173,
+      "loss": 0.1821,
+      "step": 11052
+    },
+    {
+      "epoch": 0.7975035174429091,
+      "grad_norm": 0.14598500728607178,
+      "learning_rate": 0.000168119497763025,
+      "loss": 0.1431,
+      "step": 11053
+    },
+    {
+      "epoch": 0.7975756701179696,
+      "grad_norm": 0.1440744400024414,
+      "learning_rate": 0.00016811661134362823,
+      "loss": 0.141,
+      "step": 11054
+    },
+    {
+      "epoch": 0.79764782279303,
+      "grad_norm": 0.12835662066936493,
+      "learning_rate": 0.0001681137249242315,
+      "loss": 0.1309,
+      "step": 11055
+    },
+    {
+      "epoch": 0.7977199754680905,
+      "grad_norm": 0.12210889160633087,
+      "learning_rate": 0.00016811083850483475,
+      "loss": 0.1485,
+      "step": 11056
+    },
+    {
+      "epoch": 0.7977921281431509,
+      "grad_norm": 0.11877714842557907,
+      "learning_rate": 0.00016810795208543802,
+      "loss": 0.1407,
+      "step": 11057
+    },
+    {
+      "epoch": 0.7978642808182114,
+      "grad_norm": 0.11467092484235764,
+      "learning_rate": 0.0001681050656660413,
+      "loss": 0.1181,
+      "step": 11058
+    },
+    {
+      "epoch": 0.7979364334932718,
+      "grad_norm": 0.13680388033390045,
+      "learning_rate": 0.00016810217924664454,
+      "loss": 0.2024,
+      "step": 11059
+    },
+    {
+      "epoch": 0.7980085861683321,
+      "grad_norm": 0.1774633377790451,
+      "learning_rate": 0.0001680992928272478,
+      "loss": 0.1748,
+      "step": 11060
+    },
+    {
+      "epoch": 0.7980807388433926,
+      "grad_norm": 0.11040361225605011,
+      "learning_rate": 0.00016809640640785107,
+      "loss": 0.1621,
+      "step": 11061
+    },
+    {
+      "epoch": 0.798152891518453,
+      "grad_norm": 0.3321816027164459,
+      "learning_rate": 0.00016809351998845433,
+      "loss": 0.1643,
+      "step": 11062
+    },
+    {
+      "epoch": 0.7982250441935135,
+      "grad_norm": 0.1258113533258438,
+      "learning_rate": 0.0001680906335690576,
+      "loss": 0.1304,
+      "step": 11063
+    },
+    {
+      "epoch": 0.7982971968685739,
+      "grad_norm": 0.14619180560112,
+      "learning_rate": 0.00016808774714966086,
+      "loss": 0.1457,
+      "step": 11064
+    },
+    {
+      "epoch": 0.7983693495436344,
+      "grad_norm": 0.11650705337524414,
+      "learning_rate": 0.00016808486073026412,
+      "loss": 0.177,
+      "step": 11065
+    },
+    {
+      "epoch": 0.7984415022186948,
+      "grad_norm": 0.16568635404109955,
+      "learning_rate": 0.00016808197431086738,
+      "loss": 0.126,
+      "step": 11066
+    },
+    {
+      "epoch": 0.7985136548937551,
+      "grad_norm": 0.11353182047605515,
+      "learning_rate": 0.00016807908789147065,
+      "loss": 0.1656,
+      "step": 11067
+    },
+    {
+      "epoch": 0.7985858075688156,
+      "grad_norm": 0.16108182072639465,
+      "learning_rate": 0.0001680762014720739,
+      "loss": 0.1462,
+      "step": 11068
+    },
+    {
+      "epoch": 0.798657960243876,
+      "grad_norm": 0.13784301280975342,
+      "learning_rate": 0.00016807331505267717,
+      "loss": 0.1297,
+      "step": 11069
+    },
+    {
+      "epoch": 0.7987301129189365,
+      "grad_norm": 0.1316288560628891,
+      "learning_rate": 0.0001680704286332804,
+      "loss": 0.177,
+      "step": 11070
+    },
+    {
+      "epoch": 0.7988022655939969,
+      "grad_norm": 0.12894243001937866,
+      "learning_rate": 0.00016806754221388367,
+      "loss": 0.1345,
+      "step": 11071
+    },
+    {
+      "epoch": 0.7988744182690574,
+      "grad_norm": 0.10570898652076721,
+      "learning_rate": 0.00016806465579448693,
+      "loss": 0.1641,
+      "step": 11072
+    },
+    {
+      "epoch": 0.7989465709441178,
+      "grad_norm": 0.16653867065906525,
+      "learning_rate": 0.00016806176937509022,
+      "loss": 0.1412,
+      "step": 11073
+    },
+    {
+      "epoch": 0.7990187236191781,
+      "grad_norm": 0.13186757266521454,
+      "learning_rate": 0.0001680588829556935,
+      "loss": 0.1499,
+      "step": 11074
+    },
+    {
+      "epoch": 0.7990908762942386,
+      "grad_norm": 0.16151700913906097,
+      "learning_rate": 0.00016805599653629672,
+      "loss": 0.1783,
+      "step": 11075
+    },
+    {
+      "epoch": 0.799163028969299,
+      "grad_norm": 0.14202305674552917,
+      "learning_rate": 0.00016805311011689999,
+      "loss": 0.1441,
+      "step": 11076
+    },
+    {
+      "epoch": 0.7992351816443595,
+      "grad_norm": 0.13662859797477722,
+      "learning_rate": 0.00016805022369750325,
+      "loss": 0.1678,
+      "step": 11077
+    },
+    {
+      "epoch": 0.7993073343194199,
+      "grad_norm": 0.13601934909820557,
+      "learning_rate": 0.0001680473372781065,
+      "loss": 0.1353,
+      "step": 11078
+    },
+    {
+      "epoch": 0.7993794869944804,
+      "grad_norm": 0.14189928770065308,
+      "learning_rate": 0.00016804445085870977,
+      "loss": 0.1134,
+      "step": 11079
+    },
+    {
+      "epoch": 0.7994516396695408,
+      "grad_norm": 0.12195149064064026,
+      "learning_rate": 0.00016804156443931304,
+      "loss": 0.1695,
+      "step": 11080
+    },
+    {
+      "epoch": 0.7995237923446011,
+      "grad_norm": 0.12974140048027039,
+      "learning_rate": 0.0001680386780199163,
+      "loss": 0.1421,
+      "step": 11081
+    },
+    {
+      "epoch": 0.7995959450196616,
+      "grad_norm": 0.12792320549488068,
+      "learning_rate": 0.00016803579160051956,
+      "loss": 0.1302,
+      "step": 11082
+    },
+    {
+      "epoch": 0.799668097694722,
+      "grad_norm": 0.14331893622875214,
+      "learning_rate": 0.00016803290518112283,
+      "loss": 0.1269,
+      "step": 11083
+    },
+    {
+      "epoch": 0.7997402503697825,
+      "grad_norm": 0.12498790770769119,
+      "learning_rate": 0.0001680300187617261,
+      "loss": 0.1295,
+      "step": 11084
+    },
+    {
+      "epoch": 0.7998124030448429,
+      "grad_norm": 0.12381463497877121,
+      "learning_rate": 0.00016802713234232935,
+      "loss": 0.185,
+      "step": 11085
+    },
+    {
+      "epoch": 0.7998845557199034,
+      "grad_norm": 0.16269150376319885,
+      "learning_rate": 0.0001680242459229326,
+      "loss": 0.1914,
+      "step": 11086
+    },
+    {
+      "epoch": 0.7999567083949637,
+      "grad_norm": 0.13072358071804047,
+      "learning_rate": 0.00016802135950353588,
+      "loss": 0.1577,
+      "step": 11087
+    },
+    {
+      "epoch": 0.8000288610700241,
+      "grad_norm": 0.15264281630516052,
+      "learning_rate": 0.00016801847308413914,
+      "loss": 0.202,
+      "step": 11088
+    },
+    {
+      "epoch": 0.8001010137450846,
+      "grad_norm": 0.11712154746055603,
+      "learning_rate": 0.0001680155866647424,
+      "loss": 0.1266,
+      "step": 11089
+    },
+    {
+      "epoch": 0.800173166420145,
+      "grad_norm": 0.12803013622760773,
+      "learning_rate": 0.00016801270024534567,
+      "loss": 0.1535,
+      "step": 11090
+    },
+    {
+      "epoch": 0.8002453190952055,
+      "grad_norm": 0.10732483118772507,
+      "learning_rate": 0.0001680098138259489,
+      "loss": 0.1794,
+      "step": 11091
+    },
+    {
+      "epoch": 0.8003174717702659,
+      "grad_norm": 0.12305649369955063,
+      "learning_rate": 0.00016800692740655217,
+      "loss": 0.1655,
+      "step": 11092
+    },
+    {
+      "epoch": 0.8003896244453264,
+      "grad_norm": 0.13291281461715698,
+      "learning_rate": 0.00016800404098715543,
+      "loss": 0.1491,
+      "step": 11093
+    },
+    {
+      "epoch": 0.8004617771203867,
+      "grad_norm": 0.11650607734918594,
+      "learning_rate": 0.00016800115456775872,
+      "loss": 0.1201,
+      "step": 11094
+    },
+    {
+      "epoch": 0.8005339297954471,
+      "grad_norm": 0.13787731528282166,
+      "learning_rate": 0.00016799826814836198,
+      "loss": 0.1982,
+      "step": 11095
+    },
+    {
+      "epoch": 0.8006060824705076,
+      "grad_norm": 0.13782642781734467,
+      "learning_rate": 0.00016799538172896522,
+      "loss": 0.1443,
+      "step": 11096
+    },
+    {
+      "epoch": 0.800678235145568,
+      "grad_norm": 0.1185268759727478,
+      "learning_rate": 0.00016799249530956848,
+      "loss": 0.0963,
+      "step": 11097
+    },
+    {
+      "epoch": 0.8007503878206285,
+      "grad_norm": 0.15925146639347076,
+      "learning_rate": 0.00016798960889017174,
+      "loss": 0.1398,
+      "step": 11098
+    },
+    {
+      "epoch": 0.8008225404956889,
+      "grad_norm": 0.11412061750888824,
+      "learning_rate": 0.000167986722470775,
+      "loss": 0.1256,
+      "step": 11099
+    },
+    {
+      "epoch": 0.8008946931707493,
+      "grad_norm": 0.12418772280216217,
+      "learning_rate": 0.00016798383605137827,
+      "loss": 0.17,
+      "step": 11100
+    },
+    {
+      "epoch": 0.8009668458458097,
+      "grad_norm": 0.14615289866924286,
+      "learning_rate": 0.00016798094963198153,
+      "loss": 0.1543,
+      "step": 11101
+    },
+    {
+      "epoch": 0.8010389985208701,
+      "grad_norm": 0.12303359806537628,
+      "learning_rate": 0.0001679780632125848,
+      "loss": 0.1103,
+      "step": 11102
+    },
+    {
+      "epoch": 0.8011111511959306,
+      "grad_norm": 0.11707977950572968,
+      "learning_rate": 0.00016797517679318806,
+      "loss": 0.1468,
+      "step": 11103
+    },
+    {
+      "epoch": 0.801183303870991,
+      "grad_norm": 0.13978630304336548,
+      "learning_rate": 0.00016797229037379132,
+      "loss": 0.1343,
+      "step": 11104
+    },
+    {
+      "epoch": 0.8012554565460515,
+      "grad_norm": 0.11165934056043625,
+      "learning_rate": 0.00016796940395439458,
+      "loss": 0.1898,
+      "step": 11105
+    },
+    {
+      "epoch": 0.8013276092211119,
+      "grad_norm": 0.1523934006690979,
+      "learning_rate": 0.00016796651753499785,
+      "loss": 0.1602,
+      "step": 11106
+    },
+    {
+      "epoch": 0.8013997618961723,
+      "grad_norm": 0.16679495573043823,
+      "learning_rate": 0.00016796363111560108,
+      "loss": 0.1846,
+      "step": 11107
+    },
+    {
+      "epoch": 0.8014719145712327,
+      "grad_norm": 0.11543601751327515,
+      "learning_rate": 0.00016796074469620437,
+      "loss": 0.1513,
+      "step": 11108
+    },
+    {
+      "epoch": 0.8015440672462931,
+      "grad_norm": 0.12580257654190063,
+      "learning_rate": 0.00016795785827680763,
+      "loss": 0.1397,
+      "step": 11109
+    },
+    {
+      "epoch": 0.8016162199213536,
+      "grad_norm": 0.16165702044963837,
+      "learning_rate": 0.0001679549718574109,
+      "loss": 0.1716,
+      "step": 11110
+    },
+    {
+      "epoch": 0.801688372596414,
+      "grad_norm": 0.18446248769760132,
+      "learning_rate": 0.00016795208543801416,
+      "loss": 0.1899,
+      "step": 11111
+    },
+    {
+      "epoch": 0.8017605252714745,
+      "grad_norm": 0.11582392454147339,
+      "learning_rate": 0.0001679491990186174,
+      "loss": 0.1481,
+      "step": 11112
+    },
+    {
+      "epoch": 0.8018326779465349,
+      "grad_norm": 0.11042899638414383,
+      "learning_rate": 0.00016794631259922066,
+      "loss": 0.1733,
+      "step": 11113
+    },
+    {
+      "epoch": 0.8019048306215953,
+      "grad_norm": 0.1459999531507492,
+      "learning_rate": 0.00016794342617982392,
+      "loss": 0.1677,
+      "step": 11114
+    },
+    {
+      "epoch": 0.8019769832966557,
+      "grad_norm": 0.12890112400054932,
+      "learning_rate": 0.0001679405397604272,
+      "loss": 0.1541,
+      "step": 11115
+    },
+    {
+      "epoch": 0.8020491359717161,
+      "grad_norm": 0.1220753863453865,
+      "learning_rate": 0.00016793765334103048,
+      "loss": 0.1718,
+      "step": 11116
+    },
+    {
+      "epoch": 0.8021212886467766,
+      "grad_norm": 0.122266486287117,
+      "learning_rate": 0.0001679347669216337,
+      "loss": 0.144,
+      "step": 11117
+    },
+    {
+      "epoch": 0.802193441321837,
+      "grad_norm": 0.12399003654718399,
+      "learning_rate": 0.00016793188050223697,
+      "loss": 0.137,
+      "step": 11118
+    },
+    {
+      "epoch": 0.8022655939968975,
+      "grad_norm": 0.15937049686908722,
+      "learning_rate": 0.00016792899408284024,
+      "loss": 0.1457,
+      "step": 11119
+    },
+    {
+      "epoch": 0.8023377466719579,
+      "grad_norm": 0.10514838248491287,
+      "learning_rate": 0.0001679261076634435,
+      "loss": 0.1775,
+      "step": 11120
+    },
+    {
+      "epoch": 0.8024098993470183,
+      "grad_norm": 0.1084451898932457,
+      "learning_rate": 0.00016792322124404676,
+      "loss": 0.152,
+      "step": 11121
+    },
+    {
+      "epoch": 0.8024820520220787,
+      "grad_norm": 0.11995755881071091,
+      "learning_rate": 0.00016792033482465003,
+      "loss": 0.1656,
+      "step": 11122
+    },
+    {
+      "epoch": 0.8025542046971391,
+      "grad_norm": 0.11566449701786041,
+      "learning_rate": 0.0001679174484052533,
+      "loss": 0.1407,
+      "step": 11123
+    },
+    {
+      "epoch": 0.8026263573721996,
+      "grad_norm": 0.1932581067085266,
+      "learning_rate": 0.00016791456198585655,
+      "loss": 0.2038,
+      "step": 11124
+    },
+    {
+      "epoch": 0.80269851004726,
+      "grad_norm": 0.12143067270517349,
+      "learning_rate": 0.00016791167556645981,
+      "loss": 0.1021,
+      "step": 11125
+    },
+    {
+      "epoch": 0.8027706627223204,
+      "grad_norm": 0.11793456971645355,
+      "learning_rate": 0.00016790878914706308,
+      "loss": 0.1047,
+      "step": 11126
+    },
+    {
+      "epoch": 0.8028428153973809,
+      "grad_norm": 0.13719895482063293,
+      "learning_rate": 0.00016790590272766634,
+      "loss": 0.1541,
+      "step": 11127
+    },
+    {
+      "epoch": 0.8029149680724413,
+      "grad_norm": 0.12765516340732574,
+      "learning_rate": 0.00016790301630826958,
+      "loss": 0.1213,
+      "step": 11128
+    },
+    {
+      "epoch": 0.8029871207475017,
+      "grad_norm": 0.1204436868429184,
+      "learning_rate": 0.00016790012988887287,
+      "loss": 0.1423,
+      "step": 11129
+    },
+    {
+      "epoch": 0.8030592734225621,
+      "grad_norm": 0.10415612161159515,
+      "learning_rate": 0.00016789724346947613,
+      "loss": 0.1272,
+      "step": 11130
+    },
+    {
+      "epoch": 0.8031314260976226,
+      "grad_norm": 0.12991686165332794,
+      "learning_rate": 0.0001678943570500794,
+      "loss": 0.1381,
+      "step": 11131
+    },
+    {
+      "epoch": 0.803203578772683,
+      "grad_norm": 0.14686962962150574,
+      "learning_rate": 0.00016789147063068265,
+      "loss": 0.159,
+      "step": 11132
+    },
+    {
+      "epoch": 0.8032757314477434,
+      "grad_norm": 0.11568587273359299,
+      "learning_rate": 0.0001678885842112859,
+      "loss": 0.1534,
+      "step": 11133
+    },
+    {
+      "epoch": 0.8033478841228039,
+      "grad_norm": 0.13723793625831604,
+      "learning_rate": 0.00016788569779188915,
+      "loss": 0.1367,
+      "step": 11134
+    },
+    {
+      "epoch": 0.8034200367978643,
+      "grad_norm": 0.12334664165973663,
+      "learning_rate": 0.00016788281137249242,
+      "loss": 0.1297,
+      "step": 11135
+    },
+    {
+      "epoch": 0.8034921894729247,
+      "grad_norm": 0.1709556132555008,
+      "learning_rate": 0.0001678799249530957,
+      "loss": 0.1469,
+      "step": 11136
+    },
+    {
+      "epoch": 0.8035643421479851,
+      "grad_norm": 0.12486976385116577,
+      "learning_rate": 0.00016787703853369897,
+      "loss": 0.1848,
+      "step": 11137
+    },
+    {
+      "epoch": 0.8036364948230456,
+      "grad_norm": 0.12414836883544922,
+      "learning_rate": 0.00016787415211430223,
+      "loss": 0.1363,
+      "step": 11138
+    },
+    {
+      "epoch": 0.803708647498106,
+      "grad_norm": 0.1451658457517624,
+      "learning_rate": 0.00016787126569490547,
+      "loss": 0.1283,
+      "step": 11139
+    },
+    {
+      "epoch": 0.8037808001731664,
+      "grad_norm": 0.12030114978551865,
+      "learning_rate": 0.00016786837927550873,
+      "loss": 0.0882,
+      "step": 11140
+    },
+    {
+      "epoch": 0.8038529528482269,
+      "grad_norm": 0.13571010529994965,
+      "learning_rate": 0.000167865492856112,
+      "loss": 0.1278,
+      "step": 11141
+    },
+    {
+      "epoch": 0.8039251055232873,
+      "grad_norm": 0.12487173825502396,
+      "learning_rate": 0.00016786260643671526,
+      "loss": 0.1593,
+      "step": 11142
+    },
+    {
+      "epoch": 0.8039972581983477,
+      "grad_norm": 0.12655773758888245,
+      "learning_rate": 0.00016785972001731855,
+      "loss": 0.1221,
+      "step": 11143
+    },
+    {
+      "epoch": 0.8040694108734081,
+      "grad_norm": 0.1122177243232727,
+      "learning_rate": 0.00016785683359792178,
+      "loss": 0.1716,
+      "step": 11144
+    },
+    {
+      "epoch": 0.8041415635484686,
+      "grad_norm": 0.11251802742481232,
+      "learning_rate": 0.00016785394717852505,
+      "loss": 0.117,
+      "step": 11145
+    },
+    {
+      "epoch": 0.804213716223529,
+      "grad_norm": 0.11069377511739731,
+      "learning_rate": 0.0001678510607591283,
+      "loss": 0.1195,
+      "step": 11146
+    },
+    {
+      "epoch": 0.8042858688985894,
+      "grad_norm": 0.14175282418727875,
+      "learning_rate": 0.00016784817433973157,
+      "loss": 0.1554,
+      "step": 11147
+    },
+    {
+      "epoch": 0.8043580215736499,
+      "grad_norm": 0.14564311504364014,
+      "learning_rate": 0.00016784528792033483,
+      "loss": 0.2149,
+      "step": 11148
+    },
+    {
+      "epoch": 0.8044301742487102,
+      "grad_norm": 0.1261538416147232,
+      "learning_rate": 0.0001678424015009381,
+      "loss": 0.1294,
+      "step": 11149
+    },
+    {
+      "epoch": 0.8045023269237707,
+      "grad_norm": 0.16583268344402313,
+      "learning_rate": 0.00016783951508154136,
+      "loss": 0.1325,
+      "step": 11150
+    },
+    {
+      "epoch": 0.8045744795988311,
+      "grad_norm": 0.17326302826404572,
+      "learning_rate": 0.00016783662866214462,
+      "loss": 0.1394,
+      "step": 11151
+    },
+    {
+      "epoch": 0.8046466322738915,
+      "grad_norm": 0.10651998221874237,
+      "learning_rate": 0.00016783374224274789,
+      "loss": 0.1718,
+      "step": 11152
+    },
+    {
+      "epoch": 0.804718784948952,
+      "grad_norm": 0.155874565243721,
+      "learning_rate": 0.00016783085582335115,
+      "loss": 0.1603,
+      "step": 11153
+    },
+    {
+      "epoch": 0.8047909376240124,
+      "grad_norm": 0.1414341926574707,
+      "learning_rate": 0.0001678279694039544,
+      "loss": 0.1659,
+      "step": 11154
+    },
+    {
+      "epoch": 0.8048630902990729,
+      "grad_norm": 0.10812345892190933,
+      "learning_rate": 0.00016782508298455765,
+      "loss": 0.1377,
+      "step": 11155
+    },
+    {
+      "epoch": 0.8049352429741332,
+      "grad_norm": 0.11542447656393051,
+      "learning_rate": 0.0001678221965651609,
+      "loss": 0.1732,
+      "step": 11156
+    },
+    {
+      "epoch": 0.8050073956491937,
+      "grad_norm": 0.1083124428987503,
+      "learning_rate": 0.0001678193101457642,
+      "loss": 0.0919,
+      "step": 11157
+    },
+    {
+      "epoch": 0.8050795483242541,
+      "grad_norm": 0.14099064469337463,
+      "learning_rate": 0.00016781642372636746,
+      "loss": 0.151,
+      "step": 11158
+    },
+    {
+      "epoch": 0.8051517009993145,
+      "grad_norm": 0.13833631575107574,
+      "learning_rate": 0.00016781353730697073,
+      "loss": 0.1222,
+      "step": 11159
+    },
+    {
+      "epoch": 0.805223853674375,
+      "grad_norm": 0.11046667397022247,
+      "learning_rate": 0.00016781065088757396,
+      "loss": 0.1266,
+      "step": 11160
+    },
+    {
+      "epoch": 0.8052960063494354,
+      "grad_norm": 0.11911126971244812,
+      "learning_rate": 0.00016780776446817723,
+      "loss": 0.1513,
+      "step": 11161
+    },
+    {
+      "epoch": 0.8053681590244959,
+      "grad_norm": 0.11474090814590454,
+      "learning_rate": 0.0001678048780487805,
+      "loss": 0.1932,
+      "step": 11162
+    },
+    {
+      "epoch": 0.8054403116995562,
+      "grad_norm": 0.13939101994037628,
+      "learning_rate": 0.00016780199162938375,
+      "loss": 0.1614,
+      "step": 11163
+    },
+    {
+      "epoch": 0.8055124643746167,
+      "grad_norm": 0.12426722049713135,
+      "learning_rate": 0.00016779910520998704,
+      "loss": 0.1221,
+      "step": 11164
+    },
+    {
+      "epoch": 0.8055846170496771,
+      "grad_norm": 0.13918867707252502,
+      "learning_rate": 0.00016779621879059028,
+      "loss": 0.1437,
+      "step": 11165
+    },
+    {
+      "epoch": 0.8056567697247375,
+      "grad_norm": 0.14279721677303314,
+      "learning_rate": 0.00016779333237119354,
+      "loss": 0.1442,
+      "step": 11166
+    },
+    {
+      "epoch": 0.805728922399798,
+      "grad_norm": 0.09675594419240952,
+      "learning_rate": 0.0001677904459517968,
+      "loss": 0.0808,
+      "step": 11167
+    },
+    {
+      "epoch": 0.8058010750748584,
+      "grad_norm": 0.14310872554779053,
+      "learning_rate": 0.00016778755953240007,
+      "loss": 0.1837,
+      "step": 11168
+    },
+    {
+      "epoch": 0.8058732277499189,
+      "grad_norm": 0.12977318465709686,
+      "learning_rate": 0.00016778467311300333,
+      "loss": 0.1405,
+      "step": 11169
+    },
+    {
+      "epoch": 0.8059453804249792,
+      "grad_norm": 0.15566326677799225,
+      "learning_rate": 0.0001677817866936066,
+      "loss": 0.1557,
+      "step": 11170
+    },
+    {
+      "epoch": 0.8060175331000397,
+      "grad_norm": 0.12260245531797409,
+      "learning_rate": 0.00016777890027420985,
+      "loss": 0.152,
+      "step": 11171
+    },
+    {
+      "epoch": 0.8060896857751001,
+      "grad_norm": 0.1256342977285385,
+      "learning_rate": 0.00016777601385481312,
+      "loss": 0.1045,
+      "step": 11172
+    },
+    {
+      "epoch": 0.8061618384501605,
+      "grad_norm": 0.13613678514957428,
+      "learning_rate": 0.00016777312743541638,
+      "loss": 0.1467,
+      "step": 11173
+    },
+    {
+      "epoch": 0.806233991125221,
+      "grad_norm": 0.11526962369680405,
+      "learning_rate": 0.00016777024101601964,
+      "loss": 0.1202,
+      "step": 11174
+    },
+    {
+      "epoch": 0.8063061438002814,
+      "grad_norm": 0.1624159812927246,
+      "learning_rate": 0.0001677673545966229,
+      "loss": 0.1944,
+      "step": 11175
+    },
+    {
+      "epoch": 0.8063782964753419,
+      "grad_norm": 0.12479659169912338,
+      "learning_rate": 0.00016776446817722614,
+      "loss": 0.2056,
+      "step": 11176
+    },
+    {
+      "epoch": 0.8064504491504022,
+      "grad_norm": 0.1443111151456833,
+      "learning_rate": 0.0001677615817578294,
+      "loss": 0.1321,
+      "step": 11177
+    },
+    {
+      "epoch": 0.8065226018254626,
+      "grad_norm": 0.1252567321062088,
+      "learning_rate": 0.0001677586953384327,
+      "loss": 0.1626,
+      "step": 11178
+    },
+    {
+      "epoch": 0.8065947545005231,
+      "grad_norm": 0.11821672320365906,
+      "learning_rate": 0.00016775580891903596,
+      "loss": 0.1475,
+      "step": 11179
+    },
+    {
+      "epoch": 0.8066669071755835,
+      "grad_norm": 0.12168832868337631,
+      "learning_rate": 0.00016775292249963922,
+      "loss": 0.1922,
+      "step": 11180
+    },
+    {
+      "epoch": 0.806739059850644,
+      "grad_norm": 0.15275044739246368,
+      "learning_rate": 0.00016775003608024246,
+      "loss": 0.1887,
+      "step": 11181
+    },
+    {
+      "epoch": 0.8068112125257044,
+      "grad_norm": 0.10763371735811234,
+      "learning_rate": 0.00016774714966084572,
+      "loss": 0.1638,
+      "step": 11182
+    },
+    {
+      "epoch": 0.8068833652007649,
+      "grad_norm": 0.11747167259454727,
+      "learning_rate": 0.00016774426324144898,
+      "loss": 0.1281,
+      "step": 11183
+    },
+    {
+      "epoch": 0.8069555178758252,
+      "grad_norm": 0.1161046177148819,
+      "learning_rate": 0.00016774137682205225,
+      "loss": 0.1831,
+      "step": 11184
+    },
+    {
+      "epoch": 0.8070276705508856,
+      "grad_norm": 0.12537787854671478,
+      "learning_rate": 0.00016773849040265554,
+      "loss": 0.143,
+      "step": 11185
+    },
+    {
+      "epoch": 0.8070998232259461,
+      "grad_norm": 0.1212264746427536,
+      "learning_rate": 0.00016773560398325877,
+      "loss": 0.1711,
+      "step": 11186
+    },
+    {
+      "epoch": 0.8071719759010065,
+      "grad_norm": 0.12325877696275711,
+      "learning_rate": 0.00016773271756386203,
+      "loss": 0.1536,
+      "step": 11187
+    },
+    {
+      "epoch": 0.807244128576067,
+      "grad_norm": 0.11776801943778992,
+      "learning_rate": 0.0001677298311444653,
+      "loss": 0.1784,
+      "step": 11188
+    },
+    {
+      "epoch": 0.8073162812511274,
+      "grad_norm": 0.12928734719753265,
+      "learning_rate": 0.00016772694472506856,
+      "loss": 0.1933,
+      "step": 11189
+    },
+    {
+      "epoch": 0.8073884339261879,
+      "grad_norm": 0.1151927262544632,
+      "learning_rate": 0.00016772405830567182,
+      "loss": 0.1473,
+      "step": 11190
+    },
+    {
+      "epoch": 0.8074605866012482,
+      "grad_norm": 0.16673599183559418,
+      "learning_rate": 0.00016772117188627509,
+      "loss": 0.1819,
+      "step": 11191
+    },
+    {
+      "epoch": 0.8075327392763086,
+      "grad_norm": 0.11783953756093979,
+      "learning_rate": 0.00016771828546687835,
+      "loss": 0.1417,
+      "step": 11192
+    },
+    {
+      "epoch": 0.8076048919513691,
+      "grad_norm": 0.123351089656353,
+      "learning_rate": 0.0001677153990474816,
+      "loss": 0.1522,
+      "step": 11193
+    },
+    {
+      "epoch": 0.8076770446264295,
+      "grad_norm": 0.1466146856546402,
+      "learning_rate": 0.00016771251262808487,
+      "loss": 0.1501,
+      "step": 11194
+    },
+    {
+      "epoch": 0.80774919730149,
+      "grad_norm": 0.1161920353770256,
+      "learning_rate": 0.00016770962620868814,
+      "loss": 0.1799,
+      "step": 11195
+    },
+    {
+      "epoch": 0.8078213499765504,
+      "grad_norm": 0.11827120929956436,
+      "learning_rate": 0.0001677067397892914,
+      "loss": 0.1913,
+      "step": 11196
+    },
+    {
+      "epoch": 0.8078935026516109,
+      "grad_norm": 0.14861279726028442,
+      "learning_rate": 0.00016770385336989464,
+      "loss": 0.1656,
+      "step": 11197
+    },
+    {
+      "epoch": 0.8079656553266712,
+      "grad_norm": 0.12918280065059662,
+      "learning_rate": 0.0001677009669504979,
+      "loss": 0.1735,
+      "step": 11198
+    },
+    {
+      "epoch": 0.8080378080017316,
+      "grad_norm": 0.11571779102087021,
+      "learning_rate": 0.0001676980805311012,
+      "loss": 0.1383,
+      "step": 11199
+    },
+    {
+      "epoch": 0.8081099606767921,
+      "grad_norm": 0.11656660586595535,
+      "learning_rate": 0.00016769519411170445,
+      "loss": 0.1501,
+      "step": 11200
+    },
+    {
+      "epoch": 0.8081821133518525,
+      "grad_norm": 0.12739737331867218,
+      "learning_rate": 0.00016769230769230772,
+      "loss": 0.1368,
+      "step": 11201
+    },
+    {
+      "epoch": 0.808254266026913,
+      "grad_norm": 0.14029301702976227,
+      "learning_rate": 0.00016768942127291095,
+      "loss": 0.1191,
+      "step": 11202
+    },
+    {
+      "epoch": 0.8083264187019734,
+      "grad_norm": 0.10776422917842865,
+      "learning_rate": 0.00016768653485351421,
+      "loss": 0.1448,
+      "step": 11203
+    },
+    {
+      "epoch": 0.8083985713770339,
+      "grad_norm": 0.12312967330217361,
+      "learning_rate": 0.00016768364843411748,
+      "loss": 0.1387,
+      "step": 11204
+    },
+    {
+      "epoch": 0.8084707240520942,
+      "grad_norm": 0.1375785768032074,
+      "learning_rate": 0.00016768076201472074,
+      "loss": 0.1581,
+      "step": 11205
+    },
+    {
+      "epoch": 0.8085428767271546,
+      "grad_norm": 0.11886519193649292,
+      "learning_rate": 0.00016767787559532403,
+      "loss": 0.1384,
+      "step": 11206
+    },
+    {
+      "epoch": 0.8086150294022151,
+      "grad_norm": 0.1250735968351364,
+      "learning_rate": 0.00016767498917592727,
+      "loss": 0.1567,
+      "step": 11207
+    },
+    {
+      "epoch": 0.8086871820772755,
+      "grad_norm": 0.1060570776462555,
+      "learning_rate": 0.00016767210275653053,
+      "loss": 0.175,
+      "step": 11208
+    },
+    {
+      "epoch": 0.808759334752336,
+      "grad_norm": 0.11892693489789963,
+      "learning_rate": 0.0001676692163371338,
+      "loss": 0.1379,
+      "step": 11209
+    },
+    {
+      "epoch": 0.8088314874273964,
+      "grad_norm": 0.10399611294269562,
+      "learning_rate": 0.00016766632991773705,
+      "loss": 0.116,
+      "step": 11210
+    },
+    {
+      "epoch": 0.8089036401024567,
+      "grad_norm": 0.10794928669929504,
+      "learning_rate": 0.00016766344349834032,
+      "loss": 0.1365,
+      "step": 11211
+    },
+    {
+      "epoch": 0.8089757927775172,
+      "grad_norm": 0.14975190162658691,
+      "learning_rate": 0.00016766055707894358,
+      "loss": 0.1608,
+      "step": 11212
+    },
+    {
+      "epoch": 0.8090479454525776,
+      "grad_norm": 0.11688051372766495,
+      "learning_rate": 0.00016765767065954684,
+      "loss": 0.1273,
+      "step": 11213
+    },
+    {
+      "epoch": 0.8091200981276381,
+      "grad_norm": 0.11928575485944748,
+      "learning_rate": 0.0001676547842401501,
+      "loss": 0.1428,
+      "step": 11214
+    },
+    {
+      "epoch": 0.8091922508026985,
+      "grad_norm": 0.13377560675144196,
+      "learning_rate": 0.00016765189782075337,
+      "loss": 0.1409,
+      "step": 11215
+    },
+    {
+      "epoch": 0.809264403477759,
+      "grad_norm": 0.13069269061088562,
+      "learning_rate": 0.00016764901140135663,
+      "loss": 0.1968,
+      "step": 11216
+    },
+    {
+      "epoch": 0.8093365561528194,
+      "grad_norm": 0.11220591515302658,
+      "learning_rate": 0.0001676461249819599,
+      "loss": 0.1061,
+      "step": 11217
+    },
+    {
+      "epoch": 0.8094087088278797,
+      "grad_norm": 0.15183371305465698,
+      "learning_rate": 0.00016764323856256313,
+      "loss": 0.1728,
+      "step": 11218
+    },
+    {
+      "epoch": 0.8094808615029402,
+      "grad_norm": 0.13244639337062836,
+      "learning_rate": 0.0001676403521431664,
+      "loss": 0.1345,
+      "step": 11219
+    },
+    {
+      "epoch": 0.8095530141780006,
+      "grad_norm": 0.1347954273223877,
+      "learning_rate": 0.00016763746572376968,
+      "loss": 0.1207,
+      "step": 11220
+    },
+    {
+      "epoch": 0.8096251668530611,
+      "grad_norm": 0.11808697134256363,
+      "learning_rate": 0.00016763457930437295,
+      "loss": 0.1422,
+      "step": 11221
+    },
+    {
+      "epoch": 0.8096973195281215,
+      "grad_norm": 0.1248321384191513,
+      "learning_rate": 0.0001676316928849762,
+      "loss": 0.141,
+      "step": 11222
+    },
+    {
+      "epoch": 0.809769472203182,
+      "grad_norm": 0.12465531378984451,
+      "learning_rate": 0.00016762880646557945,
+      "loss": 0.1774,
+      "step": 11223
+    },
+    {
+      "epoch": 0.8098416248782424,
+      "grad_norm": 0.12220194935798645,
+      "learning_rate": 0.0001676259200461827,
+      "loss": 0.1766,
+      "step": 11224
+    },
+    {
+      "epoch": 0.8099137775533027,
+      "grad_norm": 0.14990898966789246,
+      "learning_rate": 0.00016762303362678597,
+      "loss": 0.1763,
+      "step": 11225
+    },
+    {
+      "epoch": 0.8099859302283632,
+      "grad_norm": 0.15053509175777435,
+      "learning_rate": 0.00016762014720738923,
+      "loss": 0.1366,
+      "step": 11226
+    },
+    {
+      "epoch": 0.8100580829034236,
+      "grad_norm": 0.13961613178253174,
+      "learning_rate": 0.00016761726078799252,
+      "loss": 0.1554,
+      "step": 11227
+    },
+    {
+      "epoch": 0.8101302355784841,
+      "grad_norm": 0.14634525775909424,
+      "learning_rate": 0.00016761437436859576,
+      "loss": 0.1336,
+      "step": 11228
+    },
+    {
+      "epoch": 0.8102023882535445,
+      "grad_norm": 0.10927371680736542,
+      "learning_rate": 0.00016761148794919902,
+      "loss": 0.1568,
+      "step": 11229
+    },
+    {
+      "epoch": 0.810274540928605,
+      "grad_norm": 0.13111984729766846,
+      "learning_rate": 0.00016760860152980229,
+      "loss": 0.103,
+      "step": 11230
+    },
+    {
+      "epoch": 0.8103466936036654,
+      "grad_norm": 0.12341060489416122,
+      "learning_rate": 0.00016760571511040555,
+      "loss": 0.11,
+      "step": 11231
+    },
+    {
+      "epoch": 0.8104188462787257,
+      "grad_norm": 0.1280965507030487,
+      "learning_rate": 0.0001676028286910088,
+      "loss": 0.0989,
+      "step": 11232
+    },
+    {
+      "epoch": 0.8104909989537862,
+      "grad_norm": 0.12683764100074768,
+      "learning_rate": 0.00016759994227161207,
+      "loss": 0.1371,
+      "step": 11233
+    },
+    {
+      "epoch": 0.8105631516288466,
+      "grad_norm": 0.16215945780277252,
+      "learning_rate": 0.00016759705585221534,
+      "loss": 0.1319,
+      "step": 11234
+    },
+    {
+      "epoch": 0.8106353043039071,
+      "grad_norm": 0.11233913153409958,
+      "learning_rate": 0.0001675941694328186,
+      "loss": 0.1328,
+      "step": 11235
+    },
+    {
+      "epoch": 0.8107074569789675,
+      "grad_norm": 0.12480802834033966,
+      "learning_rate": 0.00016759128301342186,
+      "loss": 0.1461,
+      "step": 11236
+    },
+    {
+      "epoch": 0.810779609654028,
+      "grad_norm": 0.14508230984210968,
+      "learning_rate": 0.00016758839659402513,
+      "loss": 0.1261,
+      "step": 11237
+    },
+    {
+      "epoch": 0.8108517623290884,
+      "grad_norm": 0.11494122445583344,
+      "learning_rate": 0.0001675855101746284,
+      "loss": 0.1382,
+      "step": 11238
+    },
+    {
+      "epoch": 0.8109239150041487,
+      "grad_norm": 0.10430081188678741,
+      "learning_rate": 0.00016758262375523162,
+      "loss": 0.1159,
+      "step": 11239
+    },
+    {
+      "epoch": 0.8109960676792092,
+      "grad_norm": 0.11663941293954849,
+      "learning_rate": 0.0001675797373358349,
+      "loss": 0.1413,
+      "step": 11240
+    },
+    {
+      "epoch": 0.8110682203542696,
+      "grad_norm": 0.12681062519550323,
+      "learning_rate": 0.00016757685091643818,
+      "loss": 0.1445,
+      "step": 11241
+    },
+    {
+      "epoch": 0.8111403730293301,
+      "grad_norm": 0.12679167091846466,
+      "learning_rate": 0.00016757396449704144,
+      "loss": 0.1555,
+      "step": 11242
+    },
+    {
+      "epoch": 0.8112125257043905,
+      "grad_norm": 0.1359633356332779,
+      "learning_rate": 0.0001675710780776447,
+      "loss": 0.1371,
+      "step": 11243
+    },
+    {
+      "epoch": 0.811284678379451,
+      "grad_norm": 0.10687170177698135,
+      "learning_rate": 0.00016756819165824794,
+      "loss": 0.1833,
+      "step": 11244
+    },
+    {
+      "epoch": 0.8113568310545114,
+      "grad_norm": 0.12260755151510239,
+      "learning_rate": 0.0001675653052388512,
+      "loss": 0.1535,
+      "step": 11245
+    },
+    {
+      "epoch": 0.8114289837295717,
+      "grad_norm": 0.11183297634124756,
+      "learning_rate": 0.00016756241881945447,
+      "loss": 0.1613,
+      "step": 11246
+    },
+    {
+      "epoch": 0.8115011364046322,
+      "grad_norm": 0.12028773874044418,
+      "learning_rate": 0.00016755953240005773,
+      "loss": 0.1303,
+      "step": 11247
+    },
+    {
+      "epoch": 0.8115732890796926,
+      "grad_norm": 0.11625336855649948,
+      "learning_rate": 0.00016755664598066102,
+      "loss": 0.1652,
+      "step": 11248
+    },
+    {
+      "epoch": 0.8116454417547531,
+      "grad_norm": 0.12436410784721375,
+      "learning_rate": 0.00016755375956126425,
+      "loss": 0.1558,
+      "step": 11249
+    },
+    {
+      "epoch": 0.8117175944298135,
+      "grad_norm": 0.13242529332637787,
+      "learning_rate": 0.00016755087314186752,
+      "loss": 0.1258,
+      "step": 11250
+    },
+    {
+      "epoch": 0.811789747104874,
+      "grad_norm": 0.12705907225608826,
+      "learning_rate": 0.00016754798672247078,
+      "loss": 0.124,
+      "step": 11251
+    },
+    {
+      "epoch": 0.8118618997799344,
+      "grad_norm": 0.12154010683298111,
+      "learning_rate": 0.00016754510030307404,
+      "loss": 0.168,
+      "step": 11252
+    },
+    {
+      "epoch": 0.8119340524549947,
+      "grad_norm": 0.12976540625095367,
+      "learning_rate": 0.0001675422138836773,
+      "loss": 0.1559,
+      "step": 11253
+    },
+    {
+      "epoch": 0.8120062051300552,
+      "grad_norm": 0.11038821190595627,
+      "learning_rate": 0.00016753932746428057,
+      "loss": 0.1713,
+      "step": 11254
+    },
+    {
+      "epoch": 0.8120783578051156,
+      "grad_norm": 0.11370693147182465,
+      "learning_rate": 0.00016753644104488383,
+      "loss": 0.1736,
+      "step": 11255
+    },
+    {
+      "epoch": 0.8121505104801761,
+      "grad_norm": 0.12086302787065506,
+      "learning_rate": 0.0001675335546254871,
+      "loss": 0.1648,
+      "step": 11256
+    },
+    {
+      "epoch": 0.8122226631552365,
+      "grad_norm": 0.1379534900188446,
+      "learning_rate": 0.00016753066820609036,
+      "loss": 0.1458,
+      "step": 11257
+    },
+    {
+      "epoch": 0.812294815830297,
+      "grad_norm": 0.1315435767173767,
+      "learning_rate": 0.00016752778178669362,
+      "loss": 0.1855,
+      "step": 11258
+    },
+    {
+      "epoch": 0.8123669685053574,
+      "grad_norm": 0.10974664986133575,
+      "learning_rate": 0.00016752489536729688,
+      "loss": 0.1462,
+      "step": 11259
+    },
+    {
+      "epoch": 0.8124391211804177,
+      "grad_norm": 0.11963661760091782,
+      "learning_rate": 0.00016752200894790015,
+      "loss": 0.1593,
+      "step": 11260
+    },
+    {
+      "epoch": 0.8125112738554782,
+      "grad_norm": 0.12325388193130493,
+      "learning_rate": 0.00016751912252850338,
+      "loss": 0.1379,
+      "step": 11261
+    },
+    {
+      "epoch": 0.8125834265305386,
+      "grad_norm": 0.11024509370326996,
+      "learning_rate": 0.00016751623610910667,
+      "loss": 0.1094,
+      "step": 11262
+    },
+    {
+      "epoch": 0.812655579205599,
+      "grad_norm": 0.11602330207824707,
+      "learning_rate": 0.00016751334968970993,
+      "loss": 0.1302,
+      "step": 11263
+    },
+    {
+      "epoch": 0.8127277318806595,
+      "grad_norm": 0.1416502147912979,
+      "learning_rate": 0.0001675104632703132,
+      "loss": 0.1638,
+      "step": 11264
+    },
+    {
+      "epoch": 0.8127998845557199,
+      "grad_norm": 0.12808161973953247,
+      "learning_rate": 0.00016750757685091646,
+      "loss": 0.1488,
+      "step": 11265
+    },
+    {
+      "epoch": 0.8128720372307804,
+      "grad_norm": 0.14989076554775238,
+      "learning_rate": 0.0001675046904315197,
+      "loss": 0.174,
+      "step": 11266
+    },
+    {
+      "epoch": 0.8129441899058407,
+      "grad_norm": 0.11965425312519073,
+      "learning_rate": 0.00016750180401212296,
+      "loss": 0.1437,
+      "step": 11267
+    },
+    {
+      "epoch": 0.8130163425809012,
+      "grad_norm": 0.12085084617137909,
+      "learning_rate": 0.00016749891759272622,
+      "loss": 0.1232,
+      "step": 11268
+    },
+    {
+      "epoch": 0.8130884952559616,
+      "grad_norm": 0.1350867748260498,
+      "learning_rate": 0.00016749603117332949,
+      "loss": 0.1333,
+      "step": 11269
+    },
+    {
+      "epoch": 0.813160647931022,
+      "grad_norm": 0.14008502662181854,
+      "learning_rate": 0.00016749314475393278,
+      "loss": 0.1382,
+      "step": 11270
+    },
+    {
+      "epoch": 0.8132328006060825,
+      "grad_norm": 0.12756000459194183,
+      "learning_rate": 0.000167490258334536,
+      "loss": 0.1764,
+      "step": 11271
+    },
+    {
+      "epoch": 0.8133049532811429,
+      "grad_norm": 0.12092864513397217,
+      "learning_rate": 0.00016748737191513927,
+      "loss": 0.1201,
+      "step": 11272
+    },
+    {
+      "epoch": 0.8133771059562033,
+      "grad_norm": 0.1111254170536995,
+      "learning_rate": 0.00016748448549574254,
+      "loss": 0.1212,
+      "step": 11273
+    },
+    {
+      "epoch": 0.8134492586312637,
+      "grad_norm": 0.11691894382238388,
+      "learning_rate": 0.0001674815990763458,
+      "loss": 0.129,
+      "step": 11274
+    },
+    {
+      "epoch": 0.8135214113063242,
+      "grad_norm": 0.1301519274711609,
+      "learning_rate": 0.00016747871265694906,
+      "loss": 0.1261,
+      "step": 11275
+    },
+    {
+      "epoch": 0.8135935639813846,
+      "grad_norm": 0.12759649753570557,
+      "learning_rate": 0.00016747582623755233,
+      "loss": 0.1058,
+      "step": 11276
+    },
+    {
+      "epoch": 0.813665716656445,
+      "grad_norm": 0.15787038207054138,
+      "learning_rate": 0.0001674729398181556,
+      "loss": 0.1682,
+      "step": 11277
+    },
+    {
+      "epoch": 0.8137378693315055,
+      "grad_norm": 0.1614495813846588,
+      "learning_rate": 0.00016747005339875885,
+      "loss": 0.1503,
+      "step": 11278
+    },
+    {
+      "epoch": 0.8138100220065659,
+      "grad_norm": 0.1081974133849144,
+      "learning_rate": 0.00016746716697936211,
+      "loss": 0.1154,
+      "step": 11279
+    },
+    {
+      "epoch": 0.8138821746816263,
+      "grad_norm": 0.1459926962852478,
+      "learning_rate": 0.00016746428055996538,
+      "loss": 0.1193,
+      "step": 11280
+    },
+    {
+      "epoch": 0.8139543273566867,
+      "grad_norm": 0.10666241496801376,
+      "learning_rate": 0.00016746139414056864,
+      "loss": 0.1682,
+      "step": 11281
+    },
+    {
+      "epoch": 0.8140264800317472,
+      "grad_norm": 0.11136288940906525,
+      "learning_rate": 0.00016745850772117188,
+      "loss": 0.1232,
+      "step": 11282
+    },
+    {
+      "epoch": 0.8140986327068076,
+      "grad_norm": 0.10956153273582458,
+      "learning_rate": 0.00016745562130177514,
+      "loss": 0.1283,
+      "step": 11283
+    },
+    {
+      "epoch": 0.814170785381868,
+      "grad_norm": 0.12416128814220428,
+      "learning_rate": 0.00016745273488237843,
+      "loss": 0.0944,
+      "step": 11284
+    },
+    {
+      "epoch": 0.8142429380569285,
+      "grad_norm": 0.12514308094978333,
+      "learning_rate": 0.0001674498484629817,
+      "loss": 0.1457,
+      "step": 11285
+    },
+    {
+      "epoch": 0.8143150907319889,
+      "grad_norm": 0.11458826810121536,
+      "learning_rate": 0.00016744696204358496,
+      "loss": 0.1193,
+      "step": 11286
+    },
+    {
+      "epoch": 0.8143872434070493,
+      "grad_norm": 0.1273362785577774,
+      "learning_rate": 0.0001674440756241882,
+      "loss": 0.1279,
+      "step": 11287
+    },
+    {
+      "epoch": 0.8144593960821097,
+      "grad_norm": 0.13258153200149536,
+      "learning_rate": 0.00016744118920479145,
+      "loss": 0.1405,
+      "step": 11288
+    },
+    {
+      "epoch": 0.8145315487571702,
+      "grad_norm": 0.15388639271259308,
+      "learning_rate": 0.00016743830278539472,
+      "loss": 0.175,
+      "step": 11289
+    },
+    {
+      "epoch": 0.8146037014322306,
+      "grad_norm": 0.1363607496023178,
+      "learning_rate": 0.00016743541636599798,
+      "loss": 0.1533,
+      "step": 11290
+    },
+    {
+      "epoch": 0.814675854107291,
+      "grad_norm": 0.09938452392816544,
+      "learning_rate": 0.00016743252994660127,
+      "loss": 0.1546,
+      "step": 11291
+    },
+    {
+      "epoch": 0.8147480067823515,
+      "grad_norm": 0.1165052130818367,
+      "learning_rate": 0.0001674296435272045,
+      "loss": 0.1516,
+      "step": 11292
+    },
+    {
+      "epoch": 0.8148201594574119,
+      "grad_norm": 0.12615390121936798,
+      "learning_rate": 0.00016742675710780777,
+      "loss": 0.1686,
+      "step": 11293
+    },
+    {
+      "epoch": 0.8148923121324723,
+      "grad_norm": 0.15578638017177582,
+      "learning_rate": 0.00016742387068841103,
+      "loss": 0.1546,
+      "step": 11294
+    },
+    {
+      "epoch": 0.8149644648075327,
+      "grad_norm": 0.12379761785268784,
+      "learning_rate": 0.0001674209842690143,
+      "loss": 0.1009,
+      "step": 11295
+    },
+    {
+      "epoch": 0.8150366174825932,
+      "grad_norm": 0.13307251036167145,
+      "learning_rate": 0.00016741809784961756,
+      "loss": 0.1382,
+      "step": 11296
+    },
+    {
+      "epoch": 0.8151087701576536,
+      "grad_norm": 0.12157807499170303,
+      "learning_rate": 0.00016741521143022082,
+      "loss": 0.1465,
+      "step": 11297
+    },
+    {
+      "epoch": 0.815180922832714,
+      "grad_norm": 0.1452769786119461,
+      "learning_rate": 0.00016741232501082408,
+      "loss": 0.206,
+      "step": 11298
+    },
+    {
+      "epoch": 0.8152530755077745,
+      "grad_norm": 0.11464543640613556,
+      "learning_rate": 0.00016740943859142735,
+      "loss": 0.1673,
+      "step": 11299
+    },
+    {
+      "epoch": 0.8153252281828349,
+      "grad_norm": 0.1436835080385208,
+      "learning_rate": 0.0001674065521720306,
+      "loss": 0.1452,
+      "step": 11300
+    },
+    {
+      "epoch": 0.8153973808578953,
+      "grad_norm": 0.12474244832992554,
+      "learning_rate": 0.00016740366575263387,
+      "loss": 0.1306,
+      "step": 11301
+    },
+    {
+      "epoch": 0.8154695335329557,
+      "grad_norm": 0.10895528644323349,
+      "learning_rate": 0.00016740077933323713,
+      "loss": 0.0865,
+      "step": 11302
+    },
+    {
+      "epoch": 0.8155416862080161,
+      "grad_norm": 0.11682059615850449,
+      "learning_rate": 0.00016739789291384037,
+      "loss": 0.1051,
+      "step": 11303
+    },
+    {
+      "epoch": 0.8156138388830766,
+      "grad_norm": 0.11779388785362244,
+      "learning_rate": 0.00016739500649444363,
+      "loss": 0.1642,
+      "step": 11304
+    },
+    {
+      "epoch": 0.815685991558137,
+      "grad_norm": 0.13912788033485413,
+      "learning_rate": 0.00016739212007504692,
+      "loss": 0.1451,
+      "step": 11305
+    },
+    {
+      "epoch": 0.8157581442331975,
+      "grad_norm": 0.14120379090309143,
+      "learning_rate": 0.00016738923365565019,
+      "loss": 0.1531,
+      "step": 11306
+    },
+    {
+      "epoch": 0.8158302969082579,
+      "grad_norm": 0.13223904371261597,
+      "learning_rate": 0.00016738634723625345,
+      "loss": 0.1145,
+      "step": 11307
+    },
+    {
+      "epoch": 0.8159024495833183,
+      "grad_norm": 0.10849042236804962,
+      "learning_rate": 0.00016738346081685669,
+      "loss": 0.1252,
+      "step": 11308
+    },
+    {
+      "epoch": 0.8159746022583787,
+      "grad_norm": 0.1092374175786972,
+      "learning_rate": 0.00016738057439745995,
+      "loss": 0.1861,
+      "step": 11309
+    },
+    {
+      "epoch": 0.8160467549334391,
+      "grad_norm": 0.15699417889118195,
+      "learning_rate": 0.0001673776879780632,
+      "loss": 0.1783,
+      "step": 11310
+    },
+    {
+      "epoch": 0.8161189076084996,
+      "grad_norm": 0.12624722719192505,
+      "learning_rate": 0.00016737480155866647,
+      "loss": 0.1394,
+      "step": 11311
+    },
+    {
+      "epoch": 0.81619106028356,
+      "grad_norm": 0.1225733831524849,
+      "learning_rate": 0.00016737191513926976,
+      "loss": 0.1113,
+      "step": 11312
+    },
+    {
+      "epoch": 0.8162632129586205,
+      "grad_norm": 0.13231535255908966,
+      "learning_rate": 0.000167369028719873,
+      "loss": 0.1254,
+      "step": 11313
+    },
+    {
+      "epoch": 0.8163353656336809,
+      "grad_norm": 0.10797318071126938,
+      "learning_rate": 0.00016736614230047626,
+      "loss": 0.1623,
+      "step": 11314
+    },
+    {
+      "epoch": 0.8164075183087413,
+      "grad_norm": 0.12934650480747223,
+      "learning_rate": 0.00016736325588107953,
+      "loss": 0.1713,
+      "step": 11315
+    },
+    {
+      "epoch": 0.8164796709838017,
+      "grad_norm": 0.14138473570346832,
+      "learning_rate": 0.0001673603694616828,
+      "loss": 0.1406,
+      "step": 11316
+    },
+    {
+      "epoch": 0.8165518236588621,
+      "grad_norm": 0.11887970566749573,
+      "learning_rate": 0.00016735748304228605,
+      "loss": 0.1198,
+      "step": 11317
+    },
+    {
+      "epoch": 0.8166239763339226,
+      "grad_norm": 0.15465225279331207,
+      "learning_rate": 0.00016735459662288931,
+      "loss": 0.1481,
+      "step": 11318
+    },
+    {
+      "epoch": 0.816696129008983,
+      "grad_norm": 0.11133931577205658,
+      "learning_rate": 0.00016735171020349258,
+      "loss": 0.1649,
+      "step": 11319
+    },
+    {
+      "epoch": 0.8167682816840435,
+      "grad_norm": 0.14071118831634521,
+      "learning_rate": 0.00016734882378409584,
+      "loss": 0.2198,
+      "step": 11320
+    },
+    {
+      "epoch": 0.8168404343591039,
+      "grad_norm": 0.1234261766076088,
+      "learning_rate": 0.0001673459373646991,
+      "loss": 0.1704,
+      "step": 11321
+    },
+    {
+      "epoch": 0.8169125870341643,
+      "grad_norm": 0.13197477161884308,
+      "learning_rate": 0.00016734305094530237,
+      "loss": 0.1667,
+      "step": 11322
+    },
+    {
+      "epoch": 0.8169847397092247,
+      "grad_norm": 0.15567070245742798,
+      "learning_rate": 0.00016734016452590563,
+      "loss": 0.1706,
+      "step": 11323
+    },
+    {
+      "epoch": 0.8170568923842851,
+      "grad_norm": 0.14438800513744354,
+      "learning_rate": 0.00016733727810650886,
+      "loss": 0.1418,
+      "step": 11324
+    },
+    {
+      "epoch": 0.8171290450593456,
+      "grad_norm": 0.12666374444961548,
+      "learning_rate": 0.00016733439168711213,
+      "loss": 0.1437,
+      "step": 11325
+    },
+    {
+      "epoch": 0.817201197734406,
+      "grad_norm": 0.10681997239589691,
+      "learning_rate": 0.00016733150526771542,
+      "loss": 0.1826,
+      "step": 11326
+    },
+    {
+      "epoch": 0.8172733504094665,
+      "grad_norm": 0.13707345724105835,
+      "learning_rate": 0.00016732861884831868,
+      "loss": 0.1495,
+      "step": 11327
+    },
+    {
+      "epoch": 0.8173455030845269,
+      "grad_norm": 0.10398998111486435,
+      "learning_rate": 0.00016732573242892194,
+      "loss": 0.1334,
+      "step": 11328
+    },
+    {
+      "epoch": 0.8174176557595872,
+      "grad_norm": 0.13627657294273376,
+      "learning_rate": 0.00016732284600952518,
+      "loss": 0.1568,
+      "step": 11329
+    },
+    {
+      "epoch": 0.8174898084346477,
+      "grad_norm": 0.11443864554166794,
+      "learning_rate": 0.00016731995959012844,
+      "loss": 0.1403,
+      "step": 11330
+    },
+    {
+      "epoch": 0.8175619611097081,
+      "grad_norm": 0.13510550558567047,
+      "learning_rate": 0.0001673170731707317,
+      "loss": 0.138,
+      "step": 11331
+    },
+    {
+      "epoch": 0.8176341137847686,
+      "grad_norm": 0.13009688258171082,
+      "learning_rate": 0.00016731418675133497,
+      "loss": 0.2187,
+      "step": 11332
+    },
+    {
+      "epoch": 0.817706266459829,
+      "grad_norm": 0.12749722599983215,
+      "learning_rate": 0.00016731130033193826,
+      "loss": 0.1697,
+      "step": 11333
+    },
+    {
+      "epoch": 0.8177784191348895,
+      "grad_norm": 0.13540560007095337,
+      "learning_rate": 0.0001673084139125415,
+      "loss": 0.1496,
+      "step": 11334
+    },
+    {
+      "epoch": 0.8178505718099498,
+      "grad_norm": 0.12042059749364853,
+      "learning_rate": 0.00016730552749314476,
+      "loss": 0.1515,
+      "step": 11335
+    },
+    {
+      "epoch": 0.8179227244850102,
+      "grad_norm": 0.11170211434364319,
+      "learning_rate": 0.00016730264107374802,
+      "loss": 0.1372,
+      "step": 11336
+    },
+    {
+      "epoch": 0.8179948771600707,
+      "grad_norm": 0.12397082149982452,
+      "learning_rate": 0.00016729975465435128,
+      "loss": 0.18,
+      "step": 11337
+    },
+    {
+      "epoch": 0.8180670298351311,
+      "grad_norm": 0.15444037318229675,
+      "learning_rate": 0.00016729686823495455,
+      "loss": 0.1394,
+      "step": 11338
+    },
+    {
+      "epoch": 0.8181391825101916,
+      "grad_norm": 0.16244472563266754,
+      "learning_rate": 0.0001672939818155578,
+      "loss": 0.2063,
+      "step": 11339
+    },
+    {
+      "epoch": 0.818211335185252,
+      "grad_norm": 0.14282941818237305,
+      "learning_rate": 0.00016729109539616107,
+      "loss": 0.1592,
+      "step": 11340
+    },
+    {
+      "epoch": 0.8182834878603125,
+      "grad_norm": 0.12612587213516235,
+      "learning_rate": 0.00016728820897676433,
+      "loss": 0.117,
+      "step": 11341
+    },
+    {
+      "epoch": 0.8183556405353728,
+      "grad_norm": 0.1341152936220169,
+      "learning_rate": 0.0001672853225573676,
+      "loss": 0.0801,
+      "step": 11342
+    },
+    {
+      "epoch": 0.8184277932104332,
+      "grad_norm": 0.12467809021472931,
+      "learning_rate": 0.00016728243613797086,
+      "loss": 0.1567,
+      "step": 11343
+    },
+    {
+      "epoch": 0.8184999458854937,
+      "grad_norm": 0.1068568080663681,
+      "learning_rate": 0.00016727954971857412,
+      "loss": 0.1679,
+      "step": 11344
+    },
+    {
+      "epoch": 0.8185720985605541,
+      "grad_norm": 0.1270045042037964,
+      "learning_rate": 0.00016727666329917736,
+      "loss": 0.1398,
+      "step": 11345
+    },
+    {
+      "epoch": 0.8186442512356146,
+      "grad_norm": 0.1394347995519638,
+      "learning_rate": 0.00016727377687978062,
+      "loss": 0.1676,
+      "step": 11346
+    },
+    {
+      "epoch": 0.818716403910675,
+      "grad_norm": 0.10702545195817947,
+      "learning_rate": 0.0001672708904603839,
+      "loss": 0.1303,
+      "step": 11347
+    },
+    {
+      "epoch": 0.8187885565857355,
+      "grad_norm": 0.10880977660417557,
+      "learning_rate": 0.00016726800404098717,
+      "loss": 0.1486,
+      "step": 11348
+    },
+    {
+      "epoch": 0.8188607092607958,
+      "grad_norm": 0.12881873548030853,
+      "learning_rate": 0.00016726511762159044,
+      "loss": 0.1323,
+      "step": 11349
+    },
+    {
+      "epoch": 0.8189328619358562,
+      "grad_norm": 0.13381995260715485,
+      "learning_rate": 0.00016726223120219367,
+      "loss": 0.1607,
+      "step": 11350
+    },
+    {
+      "epoch": 0.8190050146109167,
+      "grad_norm": 0.12776945531368256,
+      "learning_rate": 0.00016725934478279694,
+      "loss": 0.1351,
+      "step": 11351
+    },
+    {
+      "epoch": 0.8190771672859771,
+      "grad_norm": 0.10766884684562683,
+      "learning_rate": 0.0001672564583634002,
+      "loss": 0.1314,
+      "step": 11352
+    },
+    {
+      "epoch": 0.8191493199610376,
+      "grad_norm": 0.10411593317985535,
+      "learning_rate": 0.00016725357194400346,
+      "loss": 0.1241,
+      "step": 11353
+    },
+    {
+      "epoch": 0.819221472636098,
+      "grad_norm": 0.11986301839351654,
+      "learning_rate": 0.00016725068552460675,
+      "loss": 0.1641,
+      "step": 11354
+    },
+    {
+      "epoch": 0.8192936253111585,
+      "grad_norm": 0.11862561106681824,
+      "learning_rate": 0.00016724779910521,
+      "loss": 0.0899,
+      "step": 11355
+    },
+    {
+      "epoch": 0.8193657779862188,
+      "grad_norm": 0.09624575823545456,
+      "learning_rate": 0.00016724491268581325,
+      "loss": 0.1614,
+      "step": 11356
+    },
+    {
+      "epoch": 0.8194379306612792,
+      "grad_norm": 0.11138183623552322,
+      "learning_rate": 0.00016724202626641651,
+      "loss": 0.1067,
+      "step": 11357
+    },
+    {
+      "epoch": 0.8195100833363397,
+      "grad_norm": 0.14141838252544403,
+      "learning_rate": 0.00016723913984701978,
+      "loss": 0.2167,
+      "step": 11358
+    },
+    {
+      "epoch": 0.8195822360114001,
+      "grad_norm": 0.13359762728214264,
+      "learning_rate": 0.00016723625342762304,
+      "loss": 0.1533,
+      "step": 11359
+    },
+    {
+      "epoch": 0.8196543886864606,
+      "grad_norm": 0.14311687648296356,
+      "learning_rate": 0.0001672333670082263,
+      "loss": 0.0928,
+      "step": 11360
+    },
+    {
+      "epoch": 0.819726541361521,
+      "grad_norm": 0.12119587510824203,
+      "learning_rate": 0.00016723048058882957,
+      "loss": 0.1239,
+      "step": 11361
+    },
+    {
+      "epoch": 0.8197986940365815,
+      "grad_norm": 0.1338701993227005,
+      "learning_rate": 0.00016722759416943283,
+      "loss": 0.1138,
+      "step": 11362
+    },
+    {
+      "epoch": 0.8198708467116418,
+      "grad_norm": 0.14802776277065277,
+      "learning_rate": 0.0001672247077500361,
+      "loss": 0.1847,
+      "step": 11363
+    },
+    {
+      "epoch": 0.8199429993867022,
+      "grad_norm": 0.10998402535915375,
+      "learning_rate": 0.00016722182133063935,
+      "loss": 0.1656,
+      "step": 11364
+    },
+    {
+      "epoch": 0.8200151520617627,
+      "grad_norm": 0.0966625064611435,
+      "learning_rate": 0.00016721893491124262,
+      "loss": 0.1347,
+      "step": 11365
+    },
+    {
+      "epoch": 0.8200873047368231,
+      "grad_norm": 0.12330837547779083,
+      "learning_rate": 0.00016721604849184588,
+      "loss": 0.1189,
+      "step": 11366
+    },
+    {
+      "epoch": 0.8201594574118836,
+      "grad_norm": 0.14066612720489502,
+      "learning_rate": 0.00016721316207244912,
+      "loss": 0.1443,
+      "step": 11367
+    },
+    {
+      "epoch": 0.820231610086944,
+      "grad_norm": 0.12989702820777893,
+      "learning_rate": 0.0001672102756530524,
+      "loss": 0.1289,
+      "step": 11368
+    },
+    {
+      "epoch": 0.8203037627620045,
+      "grad_norm": 0.11941897124052048,
+      "learning_rate": 0.00016720738923365567,
+      "loss": 0.1422,
+      "step": 11369
+    },
+    {
+      "epoch": 0.8203759154370648,
+      "grad_norm": 0.14209255576133728,
+      "learning_rate": 0.00016720450281425893,
+      "loss": 0.133,
+      "step": 11370
+    },
+    {
+      "epoch": 0.8204480681121252,
+      "grad_norm": 0.11604241281747818,
+      "learning_rate": 0.0001672016163948622,
+      "loss": 0.1468,
+      "step": 11371
+    },
+    {
+      "epoch": 0.8205202207871857,
+      "grad_norm": 0.12015185505151749,
+      "learning_rate": 0.00016719872997546543,
+      "loss": 0.1473,
+      "step": 11372
+    },
+    {
+      "epoch": 0.8205923734622461,
+      "grad_norm": 0.11163683235645294,
+      "learning_rate": 0.0001671958435560687,
+      "loss": 0.1324,
+      "step": 11373
+    },
+    {
+      "epoch": 0.8206645261373066,
+      "grad_norm": 0.1218007430434227,
+      "learning_rate": 0.00016719295713667196,
+      "loss": 0.165,
+      "step": 11374
+    },
+    {
+      "epoch": 0.820736678812367,
+      "grad_norm": 0.12326497584581375,
+      "learning_rate": 0.00016719007071727525,
+      "loss": 0.1737,
+      "step": 11375
+    },
+    {
+      "epoch": 0.8208088314874274,
+      "grad_norm": 0.11788228154182434,
+      "learning_rate": 0.0001671871842978785,
+      "loss": 0.1724,
+      "step": 11376
+    },
+    {
+      "epoch": 0.8208809841624878,
+      "grad_norm": 0.13680443167686462,
+      "learning_rate": 0.00016718429787848175,
+      "loss": 0.1258,
+      "step": 11377
+    },
+    {
+      "epoch": 0.8209531368375482,
+      "grad_norm": 0.14765697717666626,
+      "learning_rate": 0.000167181411459085,
+      "loss": 0.1183,
+      "step": 11378
+    },
+    {
+      "epoch": 0.8210252895126087,
+      "grad_norm": 0.12750886380672455,
+      "learning_rate": 0.00016717852503968827,
+      "loss": 0.1449,
+      "step": 11379
+    },
+    {
+      "epoch": 0.8210974421876691,
+      "grad_norm": 0.1314268410205841,
+      "learning_rate": 0.00016717563862029153,
+      "loss": 0.1475,
+      "step": 11380
+    },
+    {
+      "epoch": 0.8211695948627296,
+      "grad_norm": 0.09203343838453293,
+      "learning_rate": 0.0001671727522008948,
+      "loss": 0.1635,
+      "step": 11381
+    },
+    {
+      "epoch": 0.82124174753779,
+      "grad_norm": 0.1281193345785141,
+      "learning_rate": 0.00016716986578149806,
+      "loss": 0.1168,
+      "step": 11382
+    },
+    {
+      "epoch": 0.8213139002128504,
+      "grad_norm": 0.12138880789279938,
+      "learning_rate": 0.00016716697936210132,
+      "loss": 0.1024,
+      "step": 11383
+    },
+    {
+      "epoch": 0.8213860528879108,
+      "grad_norm": 0.12816603481769562,
+      "learning_rate": 0.00016716409294270459,
+      "loss": 0.187,
+      "step": 11384
+    },
+    {
+      "epoch": 0.8214582055629712,
+      "grad_norm": 0.12044396996498108,
+      "learning_rate": 0.00016716120652330785,
+      "loss": 0.1667,
+      "step": 11385
+    },
+    {
+      "epoch": 0.8215303582380317,
+      "grad_norm": 0.12165230512619019,
+      "learning_rate": 0.0001671583201039111,
+      "loss": 0.1448,
+      "step": 11386
+    },
+    {
+      "epoch": 0.8216025109130921,
+      "grad_norm": 0.12377709150314331,
+      "learning_rate": 0.00016715543368451437,
+      "loss": 0.1234,
+      "step": 11387
+    },
+    {
+      "epoch": 0.8216746635881526,
+      "grad_norm": 0.1418270766735077,
+      "learning_rate": 0.0001671525472651176,
+      "loss": 0.1433,
+      "step": 11388
+    },
+    {
+      "epoch": 0.821746816263213,
+      "grad_norm": 0.14677253365516663,
+      "learning_rate": 0.0001671496608457209,
+      "loss": 0.1389,
+      "step": 11389
+    },
+    {
+      "epoch": 0.8218189689382734,
+      "grad_norm": 0.13841110467910767,
+      "learning_rate": 0.00016714677442632416,
+      "loss": 0.1414,
+      "step": 11390
+    },
+    {
+      "epoch": 0.8218911216133338,
+      "grad_norm": 0.12224303185939789,
+      "learning_rate": 0.00016714388800692743,
+      "loss": 0.1198,
+      "step": 11391
+    },
+    {
+      "epoch": 0.8219632742883942,
+      "grad_norm": 0.11889313906431198,
+      "learning_rate": 0.0001671410015875307,
+      "loss": 0.149,
+      "step": 11392
+    },
+    {
+      "epoch": 0.8220354269634547,
+      "grad_norm": 0.12684161961078644,
+      "learning_rate": 0.00016713811516813393,
+      "loss": 0.1097,
+      "step": 11393
+    },
+    {
+      "epoch": 0.8221075796385151,
+      "grad_norm": 0.14017455279827118,
+      "learning_rate": 0.0001671352287487372,
+      "loss": 0.1165,
+      "step": 11394
+    },
+    {
+      "epoch": 0.8221797323135756,
+      "grad_norm": 0.13586123287677765,
+      "learning_rate": 0.00016713234232934045,
+      "loss": 0.1928,
+      "step": 11395
+    },
+    {
+      "epoch": 0.822251884988636,
+      "grad_norm": 0.12471065670251846,
+      "learning_rate": 0.00016712945590994374,
+      "loss": 0.1518,
+      "step": 11396
+    },
+    {
+      "epoch": 0.8223240376636963,
+      "grad_norm": 0.1188042163848877,
+      "learning_rate": 0.000167126569490547,
+      "loss": 0.0972,
+      "step": 11397
+    },
+    {
+      "epoch": 0.8223961903387568,
+      "grad_norm": 0.1473436951637268,
+      "learning_rate": 0.00016712368307115024,
+      "loss": 0.2051,
+      "step": 11398
+    },
+    {
+      "epoch": 0.8224683430138172,
+      "grad_norm": 0.12210531532764435,
+      "learning_rate": 0.0001671207966517535,
+      "loss": 0.1327,
+      "step": 11399
+    },
+    {
+      "epoch": 0.8225404956888777,
+      "grad_norm": 0.13445596396923065,
+      "learning_rate": 0.00016711791023235677,
+      "loss": 0.1217,
+      "step": 11400
+    },
+    {
+      "epoch": 0.8226126483639381,
+      "grad_norm": 0.12706753611564636,
+      "learning_rate": 0.00016711502381296003,
+      "loss": 0.1666,
+      "step": 11401
+    },
+    {
+      "epoch": 0.8226848010389985,
+      "grad_norm": 0.1364869624376297,
+      "learning_rate": 0.0001671121373935633,
+      "loss": 0.1012,
+      "step": 11402
+    },
+    {
+      "epoch": 0.822756953714059,
+      "grad_norm": 0.1339399367570877,
+      "learning_rate": 0.00016710925097416655,
+      "loss": 0.1085,
+      "step": 11403
+    },
+    {
+      "epoch": 0.8228291063891193,
+      "grad_norm": 0.12054028362035751,
+      "learning_rate": 0.00016710636455476982,
+      "loss": 0.1091,
+      "step": 11404
+    },
+    {
+      "epoch": 0.8229012590641798,
+      "grad_norm": 0.12239770591259003,
+      "learning_rate": 0.00016710347813537308,
+      "loss": 0.112,
+      "step": 11405
+    },
+    {
+      "epoch": 0.8229734117392402,
+      "grad_norm": 0.12391000986099243,
+      "learning_rate": 0.00016710059171597634,
+      "loss": 0.1344,
+      "step": 11406
+    },
+    {
+      "epoch": 0.8230455644143007,
+      "grad_norm": 0.13942229747772217,
+      "learning_rate": 0.0001670977052965796,
+      "loss": 0.1902,
+      "step": 11407
+    },
+    {
+      "epoch": 0.8231177170893611,
+      "grad_norm": 0.12491654604673386,
+      "learning_rate": 0.00016709481887718287,
+      "loss": 0.1334,
+      "step": 11408
+    },
+    {
+      "epoch": 0.8231898697644215,
+      "grad_norm": 0.12403670698404312,
+      "learning_rate": 0.0001670919324577861,
+      "loss": 0.1935,
+      "step": 11409
+    },
+    {
+      "epoch": 0.823262022439482,
+      "grad_norm": 0.1195942834019661,
+      "learning_rate": 0.0001670890460383894,
+      "loss": 0.1291,
+      "step": 11410
+    },
+    {
+      "epoch": 0.8233341751145423,
+      "grad_norm": 0.12322898209095001,
+      "learning_rate": 0.00016708615961899266,
+      "loss": 0.1439,
+      "step": 11411
+    },
+    {
+      "epoch": 0.8234063277896028,
+      "grad_norm": 0.15479105710983276,
+      "learning_rate": 0.00016708327319959592,
+      "loss": 0.1983,
+      "step": 11412
+    },
+    {
+      "epoch": 0.8234784804646632,
+      "grad_norm": 0.14281509816646576,
+      "learning_rate": 0.00016708038678019918,
+      "loss": 0.1374,
+      "step": 11413
+    },
+    {
+      "epoch": 0.8235506331397237,
+      "grad_norm": 0.18304792046546936,
+      "learning_rate": 0.00016707750036080242,
+      "loss": 0.1163,
+      "step": 11414
+    },
+    {
+      "epoch": 0.8236227858147841,
+      "grad_norm": 0.1353597342967987,
+      "learning_rate": 0.00016707461394140568,
+      "loss": 0.1208,
+      "step": 11415
+    },
+    {
+      "epoch": 0.8236949384898445,
+      "grad_norm": 0.11527366191148758,
+      "learning_rate": 0.00016707172752200895,
+      "loss": 0.1905,
+      "step": 11416
+    },
+    {
+      "epoch": 0.823767091164905,
+      "grad_norm": 0.13008643686771393,
+      "learning_rate": 0.00016706884110261224,
+      "loss": 0.1522,
+      "step": 11417
+    },
+    {
+      "epoch": 0.8238392438399653,
+      "grad_norm": 0.1226491928100586,
+      "learning_rate": 0.0001670659546832155,
+      "loss": 0.1924,
+      "step": 11418
+    },
+    {
+      "epoch": 0.8239113965150258,
+      "grad_norm": 0.10746552050113678,
+      "learning_rate": 0.00016706306826381873,
+      "loss": 0.1586,
+      "step": 11419
+    },
+    {
+      "epoch": 0.8239835491900862,
+      "grad_norm": 0.11757387965917587,
+      "learning_rate": 0.000167060181844422,
+      "loss": 0.1338,
+      "step": 11420
+    },
+    {
+      "epoch": 0.8240557018651467,
+      "grad_norm": 0.12384447455406189,
+      "learning_rate": 0.00016705729542502526,
+      "loss": 0.0816,
+      "step": 11421
+    },
+    {
+      "epoch": 0.8241278545402071,
+      "grad_norm": 0.1322738528251648,
+      "learning_rate": 0.00016705440900562852,
+      "loss": 0.1374,
+      "step": 11422
+    },
+    {
+      "epoch": 0.8242000072152675,
+      "grad_norm": 0.12896503508090973,
+      "learning_rate": 0.00016705152258623179,
+      "loss": 0.1012,
+      "step": 11423
+    },
+    {
+      "epoch": 0.824272159890328,
+      "grad_norm": 0.10323493182659149,
+      "learning_rate": 0.00016704863616683505,
+      "loss": 0.1385,
+      "step": 11424
+    },
+    {
+      "epoch": 0.8243443125653883,
+      "grad_norm": 0.12220212072134018,
+      "learning_rate": 0.0001670457497474383,
+      "loss": 0.1403,
+      "step": 11425
+    },
+    {
+      "epoch": 0.8244164652404488,
+      "grad_norm": 0.10760347545146942,
+      "learning_rate": 0.00016704286332804157,
+      "loss": 0.1465,
+      "step": 11426
+    },
+    {
+      "epoch": 0.8244886179155092,
+      "grad_norm": 0.11672066152095795,
+      "learning_rate": 0.00016703997690864484,
+      "loss": 0.1387,
+      "step": 11427
+    },
+    {
+      "epoch": 0.8245607705905696,
+      "grad_norm": 0.11714489012956619,
+      "learning_rate": 0.0001670370904892481,
+      "loss": 0.1338,
+      "step": 11428
+    },
+    {
+      "epoch": 0.8246329232656301,
+      "grad_norm": 0.14161768555641174,
+      "learning_rate": 0.00016703420406985136,
+      "loss": 0.1463,
+      "step": 11429
+    },
+    {
+      "epoch": 0.8247050759406905,
+      "grad_norm": 0.17559358477592468,
+      "learning_rate": 0.0001670313176504546,
+      "loss": 0.1412,
+      "step": 11430
+    },
+    {
+      "epoch": 0.824777228615751,
+      "grad_norm": 0.10626249015331268,
+      "learning_rate": 0.0001670284312310579,
+      "loss": 0.1907,
+      "step": 11431
+    },
+    {
+      "epoch": 0.8248493812908113,
+      "grad_norm": 0.1279340535402298,
+      "learning_rate": 0.00016702554481166115,
+      "loss": 0.1475,
+      "step": 11432
+    },
+    {
+      "epoch": 0.8249215339658718,
+      "grad_norm": 0.10294477641582489,
+      "learning_rate": 0.00016702265839226441,
+      "loss": 0.1131,
+      "step": 11433
+    },
+    {
+      "epoch": 0.8249936866409322,
+      "grad_norm": 0.13072919845581055,
+      "learning_rate": 0.00016701977197286768,
+      "loss": 0.1605,
+      "step": 11434
+    },
+    {
+      "epoch": 0.8250658393159926,
+      "grad_norm": 0.15092572569847107,
+      "learning_rate": 0.0001670168855534709,
+      "loss": 0.1565,
+      "step": 11435
+    },
+    {
+      "epoch": 0.8251379919910531,
+      "grad_norm": 0.13632875680923462,
+      "learning_rate": 0.00016701399913407418,
+      "loss": 0.1183,
+      "step": 11436
+    },
+    {
+      "epoch": 0.8252101446661135,
+      "grad_norm": 0.10911676287651062,
+      "learning_rate": 0.00016701111271467744,
+      "loss": 0.1335,
+      "step": 11437
+    },
+    {
+      "epoch": 0.825282297341174,
+      "grad_norm": 0.15608882904052734,
+      "learning_rate": 0.00016700822629528073,
+      "loss": 0.134,
+      "step": 11438
+    },
+    {
+      "epoch": 0.8253544500162343,
+      "grad_norm": 0.11503724753856659,
+      "learning_rate": 0.000167005339875884,
+      "loss": 0.1257,
+      "step": 11439
+    },
+    {
+      "epoch": 0.8254266026912948,
+      "grad_norm": 0.1044754907488823,
+      "learning_rate": 0.00016700245345648723,
+      "loss": 0.1657,
+      "step": 11440
+    },
+    {
+      "epoch": 0.8254987553663552,
+      "grad_norm": 0.1386835128068924,
+      "learning_rate": 0.0001669995670370905,
+      "loss": 0.1441,
+      "step": 11441
+    },
+    {
+      "epoch": 0.8255709080414156,
+      "grad_norm": 0.11585705727338791,
+      "learning_rate": 0.00016699668061769375,
+      "loss": 0.1287,
+      "step": 11442
+    },
+    {
+      "epoch": 0.8256430607164761,
+      "grad_norm": 0.14055432379245758,
+      "learning_rate": 0.00016699379419829702,
+      "loss": 0.1557,
+      "step": 11443
+    },
+    {
+      "epoch": 0.8257152133915365,
+      "grad_norm": 0.14461514353752136,
+      "learning_rate": 0.00016699090777890028,
+      "loss": 0.2014,
+      "step": 11444
+    },
+    {
+      "epoch": 0.825787366066597,
+      "grad_norm": 0.11708839982748032,
+      "learning_rate": 0.00016698802135950354,
+      "loss": 0.136,
+      "step": 11445
+    },
+    {
+      "epoch": 0.8258595187416573,
+      "grad_norm": 0.14736014604568481,
+      "learning_rate": 0.0001669851349401068,
+      "loss": 0.2258,
+      "step": 11446
+    },
+    {
+      "epoch": 0.8259316714167177,
+      "grad_norm": 0.13729187846183777,
+      "learning_rate": 0.00016698224852071007,
+      "loss": 0.1322,
+      "step": 11447
+    },
+    {
+      "epoch": 0.8260038240917782,
+      "grad_norm": 0.17847642302513123,
+      "learning_rate": 0.00016697936210131333,
+      "loss": 0.158,
+      "step": 11448
+    },
+    {
+      "epoch": 0.8260759767668386,
+      "grad_norm": 0.12275891751050949,
+      "learning_rate": 0.0001669764756819166,
+      "loss": 0.1545,
+      "step": 11449
+    },
+    {
+      "epoch": 0.8261481294418991,
+      "grad_norm": 0.12103191018104553,
+      "learning_rate": 0.00016697358926251986,
+      "loss": 0.1496,
+      "step": 11450
+    },
+    {
+      "epoch": 0.8262202821169595,
+      "grad_norm": 0.12052446603775024,
+      "learning_rate": 0.0001669707028431231,
+      "loss": 0.1507,
+      "step": 11451
+    },
+    {
+      "epoch": 0.8262924347920199,
+      "grad_norm": 0.13198426365852356,
+      "learning_rate": 0.00016696781642372638,
+      "loss": 0.1461,
+      "step": 11452
+    },
+    {
+      "epoch": 0.8263645874670803,
+      "grad_norm": 0.11974554508924484,
+      "learning_rate": 0.00016696493000432965,
+      "loss": 0.1452,
+      "step": 11453
+    },
+    {
+      "epoch": 0.8264367401421407,
+      "grad_norm": 0.14511948823928833,
+      "learning_rate": 0.0001669620435849329,
+      "loss": 0.1544,
+      "step": 11454
+    },
+    {
+      "epoch": 0.8265088928172012,
+      "grad_norm": 0.12905672192573547,
+      "learning_rate": 0.00016695915716553617,
+      "loss": 0.2065,
+      "step": 11455
+    },
+    {
+      "epoch": 0.8265810454922616,
+      "grad_norm": 0.12299144268035889,
+      "learning_rate": 0.0001669562707461394,
+      "loss": 0.1579,
+      "step": 11456
+    },
+    {
+      "epoch": 0.8266531981673221,
+      "grad_norm": 0.11316408962011337,
+      "learning_rate": 0.00016695338432674267,
+      "loss": 0.1802,
+      "step": 11457
+    },
+    {
+      "epoch": 0.8267253508423825,
+      "grad_norm": 0.11111404746770859,
+      "learning_rate": 0.00016695049790734593,
+      "loss": 0.1292,
+      "step": 11458
+    },
+    {
+      "epoch": 0.8267975035174429,
+      "grad_norm": 0.11728944629430771,
+      "learning_rate": 0.0001669476114879492,
+      "loss": 0.1598,
+      "step": 11459
+    },
+    {
+      "epoch": 0.8268696561925033,
+      "grad_norm": 0.13308461010456085,
+      "learning_rate": 0.0001669447250685525,
+      "loss": 0.147,
+      "step": 11460
+    },
+    {
+      "epoch": 0.8269418088675637,
+      "grad_norm": 0.12469086796045303,
+      "learning_rate": 0.00016694183864915572,
+      "loss": 0.1544,
+      "step": 11461
+    },
+    {
+      "epoch": 0.8270139615426242,
+      "grad_norm": 0.1318177431821823,
+      "learning_rate": 0.00016693895222975899,
+      "loss": 0.1395,
+      "step": 11462
+    },
+    {
+      "epoch": 0.8270861142176846,
+      "grad_norm": 0.10883750021457672,
+      "learning_rate": 0.00016693606581036225,
+      "loss": 0.1476,
+      "step": 11463
+    },
+    {
+      "epoch": 0.8271582668927451,
+      "grad_norm": 0.11540371179580688,
+      "learning_rate": 0.0001669331793909655,
+      "loss": 0.1527,
+      "step": 11464
+    },
+    {
+      "epoch": 0.8272304195678055,
+      "grad_norm": 0.11190015822649002,
+      "learning_rate": 0.00016693029297156877,
+      "loss": 0.1358,
+      "step": 11465
+    },
+    {
+      "epoch": 0.8273025722428659,
+      "grad_norm": 0.14315339922904968,
+      "learning_rate": 0.00016692740655217204,
+      "loss": 0.1627,
+      "step": 11466
+    },
+    {
+      "epoch": 0.8273747249179263,
+      "grad_norm": 0.11390077322721481,
+      "learning_rate": 0.0001669245201327753,
+      "loss": 0.1291,
+      "step": 11467
+    },
+    {
+      "epoch": 0.8274468775929867,
+      "grad_norm": 0.11460059136152267,
+      "learning_rate": 0.00016692163371337856,
+      "loss": 0.163,
+      "step": 11468
+    },
+    {
+      "epoch": 0.8275190302680472,
+      "grad_norm": 0.12776412069797516,
+      "learning_rate": 0.00016691874729398183,
+      "loss": 0.1456,
+      "step": 11469
+    },
+    {
+      "epoch": 0.8275911829431076,
+      "grad_norm": 0.1419527381658554,
+      "learning_rate": 0.0001669158608745851,
+      "loss": 0.1697,
+      "step": 11470
+    },
+    {
+      "epoch": 0.8276633356181681,
+      "grad_norm": 0.1077413484454155,
+      "learning_rate": 0.00016691297445518835,
+      "loss": 0.0857,
+      "step": 11471
+    },
+    {
+      "epoch": 0.8277354882932285,
+      "grad_norm": 0.11748011410236359,
+      "learning_rate": 0.0001669100880357916,
+      "loss": 0.1341,
+      "step": 11472
+    },
+    {
+      "epoch": 0.8278076409682888,
+      "grad_norm": 0.12573198974132538,
+      "learning_rate": 0.00016690720161639485,
+      "loss": 0.1627,
+      "step": 11473
+    },
+    {
+      "epoch": 0.8278797936433493,
+      "grad_norm": 0.1180257648229599,
+      "learning_rate": 0.00016690431519699814,
+      "loss": 0.1353,
+      "step": 11474
+    },
+    {
+      "epoch": 0.8279519463184097,
+      "grad_norm": 0.11512475460767746,
+      "learning_rate": 0.0001669014287776014,
+      "loss": 0.1908,
+      "step": 11475
+    },
+    {
+      "epoch": 0.8280240989934702,
+      "grad_norm": 0.11620759218931198,
+      "learning_rate": 0.00016689854235820467,
+      "loss": 0.185,
+      "step": 11476
+    },
+    {
+      "epoch": 0.8280962516685306,
+      "grad_norm": 0.11351757496595383,
+      "learning_rate": 0.0001668956559388079,
+      "loss": 0.1665,
+      "step": 11477
+    },
+    {
+      "epoch": 0.8281684043435911,
+      "grad_norm": 0.11081854999065399,
+      "learning_rate": 0.00016689276951941116,
+      "loss": 0.1247,
+      "step": 11478
+    },
+    {
+      "epoch": 0.8282405570186515,
+      "grad_norm": 0.12272868305444717,
+      "learning_rate": 0.00016688988310001443,
+      "loss": 0.177,
+      "step": 11479
+    },
+    {
+      "epoch": 0.8283127096937118,
+      "grad_norm": 0.15450416505336761,
+      "learning_rate": 0.0001668869966806177,
+      "loss": 0.1583,
+      "step": 11480
+    },
+    {
+      "epoch": 0.8283848623687723,
+      "grad_norm": 0.12492774426937103,
+      "learning_rate": 0.00016688411026122098,
+      "loss": 0.1731,
+      "step": 11481
+    },
+    {
+      "epoch": 0.8284570150438327,
+      "grad_norm": 0.1554734855890274,
+      "learning_rate": 0.00016688122384182422,
+      "loss": 0.1951,
+      "step": 11482
+    },
+    {
+      "epoch": 0.8285291677188932,
+      "grad_norm": 0.12958884239196777,
+      "learning_rate": 0.00016687833742242748,
+      "loss": 0.1956,
+      "step": 11483
+    },
+    {
+      "epoch": 0.8286013203939536,
+      "grad_norm": 0.11311990767717361,
+      "learning_rate": 0.00016687545100303074,
+      "loss": 0.1696,
+      "step": 11484
+    },
+    {
+      "epoch": 0.8286734730690141,
+      "grad_norm": 0.14542226493358612,
+      "learning_rate": 0.000166872564583634,
+      "loss": 0.1311,
+      "step": 11485
+    },
+    {
+      "epoch": 0.8287456257440745,
+      "grad_norm": 0.13559548556804657,
+      "learning_rate": 0.00016686967816423727,
+      "loss": 0.1058,
+      "step": 11486
+    },
+    {
+      "epoch": 0.8288177784191348,
+      "grad_norm": 0.13337862491607666,
+      "learning_rate": 0.00016686679174484053,
+      "loss": 0.1734,
+      "step": 11487
+    },
+    {
+      "epoch": 0.8288899310941953,
+      "grad_norm": 0.10532380640506744,
+      "learning_rate": 0.0001668639053254438,
+      "loss": 0.1618,
+      "step": 11488
+    },
+    {
+      "epoch": 0.8289620837692557,
+      "grad_norm": 0.1238148957490921,
+      "learning_rate": 0.00016686101890604706,
+      "loss": 0.1336,
+      "step": 11489
+    },
+    {
+      "epoch": 0.8290342364443162,
+      "grad_norm": 0.11113140732049942,
+      "learning_rate": 0.00016685813248665032,
+      "loss": 0.1732,
+      "step": 11490
+    },
+    {
+      "epoch": 0.8291063891193766,
+      "grad_norm": 0.11272302269935608,
+      "learning_rate": 0.00016685524606725358,
+      "loss": 0.1699,
+      "step": 11491
+    },
+    {
+      "epoch": 0.8291785417944371,
+      "grad_norm": 0.14038799703121185,
+      "learning_rate": 0.00016685235964785685,
+      "loss": 0.1912,
+      "step": 11492
+    },
+    {
+      "epoch": 0.8292506944694975,
+      "grad_norm": 0.15837141871452332,
+      "learning_rate": 0.0001668494732284601,
+      "loss": 0.19,
+      "step": 11493
+    },
+    {
+      "epoch": 0.8293228471445578,
+      "grad_norm": 0.11943506449460983,
+      "learning_rate": 0.00016684658680906334,
+      "loss": 0.1677,
+      "step": 11494
+    },
+    {
+      "epoch": 0.8293949998196183,
+      "grad_norm": 0.11513236910104752,
+      "learning_rate": 0.00016684370038966663,
+      "loss": 0.2095,
+      "step": 11495
+    },
+    {
+      "epoch": 0.8294671524946787,
+      "grad_norm": 0.12636685371398926,
+      "learning_rate": 0.0001668408139702699,
+      "loss": 0.1642,
+      "step": 11496
+    },
+    {
+      "epoch": 0.8295393051697392,
+      "grad_norm": 0.13415803015232086,
+      "learning_rate": 0.00016683792755087316,
+      "loss": 0.1485,
+      "step": 11497
+    },
+    {
+      "epoch": 0.8296114578447996,
+      "grad_norm": 0.11816070973873138,
+      "learning_rate": 0.00016683504113147642,
+      "loss": 0.1304,
+      "step": 11498
+    },
+    {
+      "epoch": 0.8296836105198601,
+      "grad_norm": 0.13376086950302124,
+      "learning_rate": 0.00016683215471207966,
+      "loss": 0.14,
+      "step": 11499
+    },
+    {
+      "epoch": 0.8297557631949205,
+      "grad_norm": 0.12860022485256195,
+      "learning_rate": 0.00016682926829268292,
+      "loss": 0.0953,
+      "step": 11500
+    },
+    {
+      "epoch": 0.8298279158699808,
+      "grad_norm": 0.1409020721912384,
+      "learning_rate": 0.00016682638187328619,
+      "loss": 0.1394,
+      "step": 11501
+    },
+    {
+      "epoch": 0.8299000685450413,
+      "grad_norm": 0.14115962386131287,
+      "learning_rate": 0.00016682349545388948,
+      "loss": 0.1678,
+      "step": 11502
+    },
+    {
+      "epoch": 0.8299722212201017,
+      "grad_norm": 0.17172449827194214,
+      "learning_rate": 0.00016682060903449274,
+      "loss": 0.1547,
+      "step": 11503
+    },
+    {
+      "epoch": 0.8300443738951622,
+      "grad_norm": 0.12612241506576538,
+      "learning_rate": 0.00016681772261509597,
+      "loss": 0.0994,
+      "step": 11504
+    },
+    {
+      "epoch": 0.8301165265702226,
+      "grad_norm": 0.11162726581096649,
+      "learning_rate": 0.00016681483619569924,
+      "loss": 0.142,
+      "step": 11505
+    },
+    {
+      "epoch": 0.8301886792452831,
+      "grad_norm": 0.12488222122192383,
+      "learning_rate": 0.0001668119497763025,
+      "loss": 0.1436,
+      "step": 11506
+    },
+    {
+      "epoch": 0.8302608319203435,
+      "grad_norm": 0.12031426280736923,
+      "learning_rate": 0.00016680906335690576,
+      "loss": 0.1167,
+      "step": 11507
+    },
+    {
+      "epoch": 0.8303329845954038,
+      "grad_norm": 0.11629743129014969,
+      "learning_rate": 0.00016680617693750903,
+      "loss": 0.1639,
+      "step": 11508
+    },
+    {
+      "epoch": 0.8304051372704643,
+      "grad_norm": 0.13165660202503204,
+      "learning_rate": 0.0001668032905181123,
+      "loss": 0.194,
+      "step": 11509
+    },
+    {
+      "epoch": 0.8304772899455247,
+      "grad_norm": 0.12788671255111694,
+      "learning_rate": 0.00016680040409871555,
+      "loss": 0.1567,
+      "step": 11510
+    },
+    {
+      "epoch": 0.8305494426205852,
+      "grad_norm": 0.11516663432121277,
+      "learning_rate": 0.00016679751767931881,
+      "loss": 0.1759,
+      "step": 11511
+    },
+    {
+      "epoch": 0.8306215952956456,
+      "grad_norm": 0.133749857544899,
+      "learning_rate": 0.00016679463125992208,
+      "loss": 0.1222,
+      "step": 11512
+    },
+    {
+      "epoch": 0.830693747970706,
+      "grad_norm": 0.1398380994796753,
+      "learning_rate": 0.00016679174484052534,
+      "loss": 0.1513,
+      "step": 11513
+    },
+    {
+      "epoch": 0.8307659006457664,
+      "grad_norm": 0.12789134681224823,
+      "learning_rate": 0.0001667888584211286,
+      "loss": 0.1611,
+      "step": 11514
+    },
+    {
+      "epoch": 0.8308380533208268,
+      "grad_norm": 0.1294834166765213,
+      "learning_rate": 0.00016678597200173184,
+      "loss": 0.1273,
+      "step": 11515
+    },
+    {
+      "epoch": 0.8309102059958873,
+      "grad_norm": 0.11569526791572571,
+      "learning_rate": 0.00016678308558233513,
+      "loss": 0.1147,
+      "step": 11516
+    },
+    {
+      "epoch": 0.8309823586709477,
+      "grad_norm": 0.1409730315208435,
+      "learning_rate": 0.0001667801991629384,
+      "loss": 0.2149,
+      "step": 11517
+    },
+    {
+      "epoch": 0.8310545113460082,
+      "grad_norm": 0.10316772013902664,
+      "learning_rate": 0.00016677731274354165,
+      "loss": 0.1332,
+      "step": 11518
+    },
+    {
+      "epoch": 0.8311266640210686,
+      "grad_norm": 0.1244623139500618,
+      "learning_rate": 0.00016677442632414492,
+      "loss": 0.1979,
+      "step": 11519
+    },
+    {
+      "epoch": 0.831198816696129,
+      "grad_norm": 0.11210250854492188,
+      "learning_rate": 0.00016677153990474815,
+      "loss": 0.1677,
+      "step": 11520
+    },
+    {
+      "epoch": 0.8312709693711894,
+      "grad_norm": 0.14468421041965485,
+      "learning_rate": 0.00016676865348535142,
+      "loss": 0.1433,
+      "step": 11521
+    },
+    {
+      "epoch": 0.8313431220462498,
+      "grad_norm": 0.13782362639904022,
+      "learning_rate": 0.00016676576706595468,
+      "loss": 0.2326,
+      "step": 11522
+    },
+    {
+      "epoch": 0.8314152747213103,
+      "grad_norm": 0.12275179475545883,
+      "learning_rate": 0.00016676288064655797,
+      "loss": 0.1579,
+      "step": 11523
+    },
+    {
+      "epoch": 0.8314874273963707,
+      "grad_norm": 0.12560491263866425,
+      "learning_rate": 0.00016675999422716123,
+      "loss": 0.1762,
+      "step": 11524
+    },
+    {
+      "epoch": 0.8315595800714312,
+      "grad_norm": 0.11914512515068054,
+      "learning_rate": 0.00016675710780776447,
+      "loss": 0.176,
+      "step": 11525
+    },
+    {
+      "epoch": 0.8316317327464916,
+      "grad_norm": 0.12210630625486374,
+      "learning_rate": 0.00016675422138836773,
+      "loss": 0.1536,
+      "step": 11526
+    },
+    {
+      "epoch": 0.831703885421552,
+      "grad_norm": 0.15344761312007904,
+      "learning_rate": 0.000166751334968971,
+      "loss": 0.1431,
+      "step": 11527
+    },
+    {
+      "epoch": 0.8317760380966124,
+      "grad_norm": 0.12144190818071365,
+      "learning_rate": 0.00016674844854957426,
+      "loss": 0.1842,
+      "step": 11528
+    },
+    {
+      "epoch": 0.8318481907716728,
+      "grad_norm": 0.12379223108291626,
+      "learning_rate": 0.00016674556213017752,
+      "loss": 0.1751,
+      "step": 11529
+    },
+    {
+      "epoch": 0.8319203434467333,
+      "grad_norm": 0.1101490706205368,
+      "learning_rate": 0.00016674267571078078,
+      "loss": 0.1572,
+      "step": 11530
+    },
+    {
+      "epoch": 0.8319924961217937,
+      "grad_norm": 0.13809335231781006,
+      "learning_rate": 0.00016673978929138405,
+      "loss": 0.2072,
+      "step": 11531
+    },
+    {
+      "epoch": 0.8320646487968542,
+      "grad_norm": 0.11991512775421143,
+      "learning_rate": 0.0001667369028719873,
+      "loss": 0.1055,
+      "step": 11532
+    },
+    {
+      "epoch": 0.8321368014719146,
+      "grad_norm": 0.11620807647705078,
+      "learning_rate": 0.00016673401645259057,
+      "loss": 0.1341,
+      "step": 11533
+    },
+    {
+      "epoch": 0.832208954146975,
+      "grad_norm": 0.16210775077342987,
+      "learning_rate": 0.00016673113003319383,
+      "loss": 0.2117,
+      "step": 11534
+    },
+    {
+      "epoch": 0.8322811068220354,
+      "grad_norm": 0.1260756105184555,
+      "learning_rate": 0.0001667282436137971,
+      "loss": 0.1628,
+      "step": 11535
+    },
+    {
+      "epoch": 0.8323532594970958,
+      "grad_norm": 0.10844235867261887,
+      "learning_rate": 0.00016672535719440033,
+      "loss": 0.1438,
+      "step": 11536
+    },
+    {
+      "epoch": 0.8324254121721563,
+      "grad_norm": 0.11905061453580856,
+      "learning_rate": 0.00016672247077500362,
+      "loss": 0.1055,
+      "step": 11537
+    },
+    {
+      "epoch": 0.8324975648472167,
+      "grad_norm": 0.15732115507125854,
+      "learning_rate": 0.00016671958435560689,
+      "loss": 0.1399,
+      "step": 11538
+    },
+    {
+      "epoch": 0.8325697175222772,
+      "grad_norm": 0.11067745834589005,
+      "learning_rate": 0.00016671669793621015,
+      "loss": 0.1822,
+      "step": 11539
+    },
+    {
+      "epoch": 0.8326418701973376,
+      "grad_norm": 0.14369891583919525,
+      "learning_rate": 0.0001667138115168134,
+      "loss": 0.1583,
+      "step": 11540
+    },
+    {
+      "epoch": 0.832714022872398,
+      "grad_norm": 0.11254069209098816,
+      "learning_rate": 0.00016671092509741665,
+      "loss": 0.1024,
+      "step": 11541
+    },
+    {
+      "epoch": 0.8327861755474584,
+      "grad_norm": 0.13814480602741241,
+      "learning_rate": 0.0001667080386780199,
+      "loss": 0.141,
+      "step": 11542
+    },
+    {
+      "epoch": 0.8328583282225188,
+      "grad_norm": 0.1405809223651886,
+      "learning_rate": 0.00016670515225862317,
+      "loss": 0.2155,
+      "step": 11543
+    },
+    {
+      "epoch": 0.8329304808975793,
+      "grad_norm": 0.13315117359161377,
+      "learning_rate": 0.00016670226583922646,
+      "loss": 0.1662,
+      "step": 11544
+    },
+    {
+      "epoch": 0.8330026335726397,
+      "grad_norm": 0.14672383666038513,
+      "learning_rate": 0.00016669937941982973,
+      "loss": 0.1382,
+      "step": 11545
+    },
+    {
+      "epoch": 0.8330747862477001,
+      "grad_norm": 0.11861515045166016,
+      "learning_rate": 0.00016669649300043296,
+      "loss": 0.1208,
+      "step": 11546
+    },
+    {
+      "epoch": 0.8331469389227606,
+      "grad_norm": 0.14986343681812286,
+      "learning_rate": 0.00016669360658103623,
+      "loss": 0.097,
+      "step": 11547
+    },
+    {
+      "epoch": 0.833219091597821,
+      "grad_norm": 0.13064922392368317,
+      "learning_rate": 0.0001666907201616395,
+      "loss": 0.1685,
+      "step": 11548
+    },
+    {
+      "epoch": 0.8332912442728814,
+      "grad_norm": 0.12011728435754776,
+      "learning_rate": 0.00016668783374224275,
+      "loss": 0.1227,
+      "step": 11549
+    },
+    {
+      "epoch": 0.8333633969479418,
+      "grad_norm": 0.11312943696975708,
+      "learning_rate": 0.00016668494732284601,
+      "loss": 0.0844,
+      "step": 11550
+    },
+    {
+      "epoch": 0.8334355496230023,
+      "grad_norm": 0.12065355479717255,
+      "learning_rate": 0.00016668206090344928,
+      "loss": 0.112,
+      "step": 11551
+    },
+    {
+      "epoch": 0.8335077022980627,
+      "grad_norm": 0.13878248631954193,
+      "learning_rate": 0.00016667917448405254,
+      "loss": 0.1753,
+      "step": 11552
+    },
+    {
+      "epoch": 0.8335798549731231,
+      "grad_norm": 0.16975809633731842,
+      "learning_rate": 0.0001666762880646558,
+      "loss": 0.1151,
+      "step": 11553
+    },
+    {
+      "epoch": 0.8336520076481836,
+      "grad_norm": 0.13543464243412018,
+      "learning_rate": 0.00016667340164525907,
+      "loss": 0.1407,
+      "step": 11554
+    },
+    {
+      "epoch": 0.833724160323244,
+      "grad_norm": 0.12031517177820206,
+      "learning_rate": 0.00016667051522586233,
+      "loss": 0.1399,
+      "step": 11555
+    },
+    {
+      "epoch": 0.8337963129983044,
+      "grad_norm": 0.14035508036613464,
+      "learning_rate": 0.0001666676288064656,
+      "loss": 0.1424,
+      "step": 11556
+    },
+    {
+      "epoch": 0.8338684656733648,
+      "grad_norm": 0.14016559720039368,
+      "learning_rate": 0.00016666474238706883,
+      "loss": 0.125,
+      "step": 11557
+    },
+    {
+      "epoch": 0.8339406183484253,
+      "grad_norm": 0.1287499964237213,
+      "learning_rate": 0.00016666185596767212,
+      "loss": 0.1863,
+      "step": 11558
+    },
+    {
+      "epoch": 0.8340127710234857,
+      "grad_norm": 0.1210627481341362,
+      "learning_rate": 0.00016665896954827538,
+      "loss": 0.1541,
+      "step": 11559
+    },
+    {
+      "epoch": 0.8340849236985461,
+      "grad_norm": 0.14031276106834412,
+      "learning_rate": 0.00016665608312887864,
+      "loss": 0.1555,
+      "step": 11560
+    },
+    {
+      "epoch": 0.8341570763736066,
+      "grad_norm": 0.1310780644416809,
+      "learning_rate": 0.0001666531967094819,
+      "loss": 0.1365,
+      "step": 11561
+    },
+    {
+      "epoch": 0.834229229048667,
+      "grad_norm": 0.14506569504737854,
+      "learning_rate": 0.00016665031029008514,
+      "loss": 0.1649,
+      "step": 11562
+    },
+    {
+      "epoch": 0.8343013817237274,
+      "grad_norm": 0.1415039449930191,
+      "learning_rate": 0.0001666474238706884,
+      "loss": 0.1915,
+      "step": 11563
+    },
+    {
+      "epoch": 0.8343735343987878,
+      "grad_norm": 0.10610788315534592,
+      "learning_rate": 0.00016664453745129167,
+      "loss": 0.1244,
+      "step": 11564
+    },
+    {
+      "epoch": 0.8344456870738483,
+      "grad_norm": 0.10591522604227066,
+      "learning_rate": 0.00016664165103189496,
+      "loss": 0.1504,
+      "step": 11565
+    },
+    {
+      "epoch": 0.8345178397489087,
+      "grad_norm": 0.11307922005653381,
+      "learning_rate": 0.00016663876461249822,
+      "loss": 0.1631,
+      "step": 11566
+    },
+    {
+      "epoch": 0.8345899924239691,
+      "grad_norm": 0.11754350364208221,
+      "learning_rate": 0.00016663587819310146,
+      "loss": 0.1145,
+      "step": 11567
+    },
+    {
+      "epoch": 0.8346621450990296,
+      "grad_norm": 0.1252533346414566,
+      "learning_rate": 0.00016663299177370472,
+      "loss": 0.1688,
+      "step": 11568
+    },
+    {
+      "epoch": 0.83473429777409,
+      "grad_norm": 0.14636370539665222,
+      "learning_rate": 0.00016663010535430798,
+      "loss": 0.1494,
+      "step": 11569
+    },
+    {
+      "epoch": 0.8348064504491504,
+      "grad_norm": 0.1493082493543625,
+      "learning_rate": 0.00016662721893491125,
+      "loss": 0.1801,
+      "step": 11570
+    },
+    {
+      "epoch": 0.8348786031242108,
+      "grad_norm": 0.13885556161403656,
+      "learning_rate": 0.0001666243325155145,
+      "loss": 0.1435,
+      "step": 11571
+    },
+    {
+      "epoch": 0.8349507557992712,
+      "grad_norm": 0.11697115749120712,
+      "learning_rate": 0.00016662144609611777,
+      "loss": 0.1327,
+      "step": 11572
+    },
+    {
+      "epoch": 0.8350229084743317,
+      "grad_norm": 0.12583328783512115,
+      "learning_rate": 0.00016661855967672103,
+      "loss": 0.1206,
+      "step": 11573
+    },
+    {
+      "epoch": 0.8350950611493921,
+      "grad_norm": 0.11333484947681427,
+      "learning_rate": 0.0001666156732573243,
+      "loss": 0.1397,
+      "step": 11574
+    },
+    {
+      "epoch": 0.8351672138244526,
+      "grad_norm": 0.1309988647699356,
+      "learning_rate": 0.00016661278683792756,
+      "loss": 0.1506,
+      "step": 11575
+    },
+    {
+      "epoch": 0.8352393664995129,
+      "grad_norm": 0.13478970527648926,
+      "learning_rate": 0.00016660990041853082,
+      "loss": 0.1074,
+      "step": 11576
+    },
+    {
+      "epoch": 0.8353115191745734,
+      "grad_norm": 0.13597935438156128,
+      "learning_rate": 0.00016660701399913409,
+      "loss": 0.1678,
+      "step": 11577
+    },
+    {
+      "epoch": 0.8353836718496338,
+      "grad_norm": 0.11729282885789871,
+      "learning_rate": 0.00016660412757973732,
+      "loss": 0.1036,
+      "step": 11578
+    },
+    {
+      "epoch": 0.8354558245246942,
+      "grad_norm": 0.12911324203014374,
+      "learning_rate": 0.0001666012411603406,
+      "loss": 0.1369,
+      "step": 11579
+    },
+    {
+      "epoch": 0.8355279771997547,
+      "grad_norm": 0.12044049799442291,
+      "learning_rate": 0.00016659835474094387,
+      "loss": 0.1524,
+      "step": 11580
+    },
+    {
+      "epoch": 0.8356001298748151,
+      "grad_norm": 0.12708896398544312,
+      "learning_rate": 0.00016659546832154714,
+      "loss": 0.1234,
+      "step": 11581
+    },
+    {
+      "epoch": 0.8356722825498756,
+      "grad_norm": 0.15077067911624908,
+      "learning_rate": 0.0001665925819021504,
+      "loss": 0.1612,
+      "step": 11582
+    },
+    {
+      "epoch": 0.8357444352249359,
+      "grad_norm": 0.13292542099952698,
+      "learning_rate": 0.00016658969548275364,
+      "loss": 0.1269,
+      "step": 11583
+    },
+    {
+      "epoch": 0.8358165878999964,
+      "grad_norm": 0.10909565538167953,
+      "learning_rate": 0.0001665868090633569,
+      "loss": 0.1611,
+      "step": 11584
+    },
+    {
+      "epoch": 0.8358887405750568,
+      "grad_norm": 0.11976361274719238,
+      "learning_rate": 0.00016658392264396016,
+      "loss": 0.2101,
+      "step": 11585
+    },
+    {
+      "epoch": 0.8359608932501172,
+      "grad_norm": 0.13796712458133698,
+      "learning_rate": 0.00016658103622456345,
+      "loss": 0.1632,
+      "step": 11586
+    },
+    {
+      "epoch": 0.8360330459251777,
+      "grad_norm": 0.1254863739013672,
+      "learning_rate": 0.00016657814980516671,
+      "loss": 0.2033,
+      "step": 11587
+    },
+    {
+      "epoch": 0.8361051986002381,
+      "grad_norm": 0.11761124432086945,
+      "learning_rate": 0.00016657526338576995,
+      "loss": 0.1263,
+      "step": 11588
+    },
+    {
+      "epoch": 0.8361773512752986,
+      "grad_norm": 0.11324866116046906,
+      "learning_rate": 0.00016657237696637321,
+      "loss": 0.1696,
+      "step": 11589
+    },
+    {
+      "epoch": 0.8362495039503589,
+      "grad_norm": 0.11937769502401352,
+      "learning_rate": 0.00016656949054697648,
+      "loss": 0.1739,
+      "step": 11590
+    },
+    {
+      "epoch": 0.8363216566254194,
+      "grad_norm": 0.12692390382289886,
+      "learning_rate": 0.00016656660412757974,
+      "loss": 0.1929,
+      "step": 11591
+    },
+    {
+      "epoch": 0.8363938093004798,
+      "grad_norm": 0.11676450073719025,
+      "learning_rate": 0.000166563717708183,
+      "loss": 0.0951,
+      "step": 11592
+    },
+    {
+      "epoch": 0.8364659619755402,
+      "grad_norm": 0.1368885040283203,
+      "learning_rate": 0.00016656083128878627,
+      "loss": 0.0942,
+      "step": 11593
+    },
+    {
+      "epoch": 0.8365381146506007,
+      "grad_norm": 0.11106109619140625,
+      "learning_rate": 0.00016655794486938953,
+      "loss": 0.1062,
+      "step": 11594
+    },
+    {
+      "epoch": 0.8366102673256611,
+      "grad_norm": 0.13033699989318848,
+      "learning_rate": 0.0001665550584499928,
+      "loss": 0.1791,
+      "step": 11595
+    },
+    {
+      "epoch": 0.8366824200007216,
+      "grad_norm": 0.12796767055988312,
+      "learning_rate": 0.00016655217203059605,
+      "loss": 0.1027,
+      "step": 11596
+    },
+    {
+      "epoch": 0.8367545726757819,
+      "grad_norm": 0.13466742634773254,
+      "learning_rate": 0.00016654928561119932,
+      "loss": 0.1332,
+      "step": 11597
+    },
+    {
+      "epoch": 0.8368267253508423,
+      "grad_norm": 0.12785984575748444,
+      "learning_rate": 0.00016654639919180258,
+      "loss": 0.1346,
+      "step": 11598
+    },
+    {
+      "epoch": 0.8368988780259028,
+      "grad_norm": 0.12983115017414093,
+      "learning_rate": 0.00016654351277240584,
+      "loss": 0.1502,
+      "step": 11599
+    },
+    {
+      "epoch": 0.8369710307009632,
+      "grad_norm": 0.14171342551708221,
+      "learning_rate": 0.0001665406263530091,
+      "loss": 0.1731,
+      "step": 11600
+    },
+    {
+      "epoch": 0.8370431833760237,
+      "grad_norm": 0.10917267203330994,
+      "learning_rate": 0.00016653773993361237,
+      "loss": 0.1408,
+      "step": 11601
+    },
+    {
+      "epoch": 0.8371153360510841,
+      "grad_norm": 0.13165688514709473,
+      "learning_rate": 0.00016653485351421563,
+      "loss": 0.1488,
+      "step": 11602
+    },
+    {
+      "epoch": 0.8371874887261446,
+      "grad_norm": 0.12978290021419525,
+      "learning_rate": 0.0001665319670948189,
+      "loss": 0.1539,
+      "step": 11603
+    },
+    {
+      "epoch": 0.8372596414012049,
+      "grad_norm": 0.1484910249710083,
+      "learning_rate": 0.00016652908067542216,
+      "loss": 0.163,
+      "step": 11604
+    },
+    {
+      "epoch": 0.8373317940762653,
+      "grad_norm": 0.11736516654491425,
+      "learning_rate": 0.0001665261942560254,
+      "loss": 0.1658,
+      "step": 11605
+    },
+    {
+      "epoch": 0.8374039467513258,
+      "grad_norm": 0.14045096933841705,
+      "learning_rate": 0.00016652330783662866,
+      "loss": 0.1278,
+      "step": 11606
+    },
+    {
+      "epoch": 0.8374760994263862,
+      "grad_norm": 0.13117900490760803,
+      "learning_rate": 0.00016652042141723195,
+      "loss": 0.1536,
+      "step": 11607
+    },
+    {
+      "epoch": 0.8375482521014467,
+      "grad_norm": 0.1257036328315735,
+      "learning_rate": 0.0001665175349978352,
+      "loss": 0.1652,
+      "step": 11608
+    },
+    {
+      "epoch": 0.8376204047765071,
+      "grad_norm": 0.12943686544895172,
+      "learning_rate": 0.00016651464857843847,
+      "loss": 0.1552,
+      "step": 11609
+    },
+    {
+      "epoch": 0.8376925574515676,
+      "grad_norm": 0.13568373024463654,
+      "learning_rate": 0.0001665117621590417,
+      "loss": 0.1832,
+      "step": 11610
+    },
+    {
+      "epoch": 0.8377647101266279,
+      "grad_norm": 0.13962949812412262,
+      "learning_rate": 0.00016650887573964497,
+      "loss": 0.1481,
+      "step": 11611
+    },
+    {
+      "epoch": 0.8378368628016883,
+      "grad_norm": 0.14716729521751404,
+      "learning_rate": 0.00016650598932024823,
+      "loss": 0.1601,
+      "step": 11612
+    },
+    {
+      "epoch": 0.8379090154767488,
+      "grad_norm": 0.13211050629615784,
+      "learning_rate": 0.0001665031029008515,
+      "loss": 0.1902,
+      "step": 11613
+    },
+    {
+      "epoch": 0.8379811681518092,
+      "grad_norm": 0.15353256464004517,
+      "learning_rate": 0.0001665002164814548,
+      "loss": 0.1344,
+      "step": 11614
+    },
+    {
+      "epoch": 0.8380533208268697,
+      "grad_norm": 0.12700878083705902,
+      "learning_rate": 0.00016649733006205802,
+      "loss": 0.1139,
+      "step": 11615
+    },
+    {
+      "epoch": 0.8381254735019301,
+      "grad_norm": 0.11437607556581497,
+      "learning_rate": 0.00016649444364266129,
+      "loss": 0.1528,
+      "step": 11616
+    },
+    {
+      "epoch": 0.8381976261769906,
+      "grad_norm": 0.12418381124734879,
+      "learning_rate": 0.00016649155722326455,
+      "loss": 0.1254,
+      "step": 11617
+    },
+    {
+      "epoch": 0.8382697788520509,
+      "grad_norm": 0.1047118529677391,
+      "learning_rate": 0.0001664886708038678,
+      "loss": 0.0956,
+      "step": 11618
+    },
+    {
+      "epoch": 0.8383419315271113,
+      "grad_norm": 0.1184152364730835,
+      "learning_rate": 0.00016648578438447107,
+      "loss": 0.1315,
+      "step": 11619
+    },
+    {
+      "epoch": 0.8384140842021718,
+      "grad_norm": 0.10887156426906586,
+      "learning_rate": 0.00016648289796507434,
+      "loss": 0.1824,
+      "step": 11620
+    },
+    {
+      "epoch": 0.8384862368772322,
+      "grad_norm": 0.14598292112350464,
+      "learning_rate": 0.0001664800115456776,
+      "loss": 0.1581,
+      "step": 11621
+    },
+    {
+      "epoch": 0.8385583895522927,
+      "grad_norm": 0.1101701483130455,
+      "learning_rate": 0.00016647712512628086,
+      "loss": 0.1703,
+      "step": 11622
+    },
+    {
+      "epoch": 0.8386305422273531,
+      "grad_norm": 0.0960790365934372,
+      "learning_rate": 0.00016647423870688413,
+      "loss": 0.1462,
+      "step": 11623
+    },
+    {
+      "epoch": 0.8387026949024136,
+      "grad_norm": 0.12851935625076294,
+      "learning_rate": 0.0001664713522874874,
+      "loss": 0.1799,
+      "step": 11624
+    },
+    {
+      "epoch": 0.8387748475774739,
+      "grad_norm": 0.11664525419473648,
+      "learning_rate": 0.00016646846586809065,
+      "loss": 0.1433,
+      "step": 11625
+    },
+    {
+      "epoch": 0.8388470002525343,
+      "grad_norm": 0.13805916905403137,
+      "learning_rate": 0.0001664655794486939,
+      "loss": 0.1731,
+      "step": 11626
+    },
+    {
+      "epoch": 0.8389191529275948,
+      "grad_norm": 0.12190663814544678,
+      "learning_rate": 0.00016646269302929715,
+      "loss": 0.1282,
+      "step": 11627
+    },
+    {
+      "epoch": 0.8389913056026552,
+      "grad_norm": 0.13862162828445435,
+      "learning_rate": 0.00016645980660990044,
+      "loss": 0.1863,
+      "step": 11628
+    },
+    {
+      "epoch": 0.8390634582777157,
+      "grad_norm": 0.14046546816825867,
+      "learning_rate": 0.0001664569201905037,
+      "loss": 0.1539,
+      "step": 11629
+    },
+    {
+      "epoch": 0.8391356109527761,
+      "grad_norm": 0.13328999280929565,
+      "learning_rate": 0.00016645403377110697,
+      "loss": 0.1191,
+      "step": 11630
+    },
+    {
+      "epoch": 0.8392077636278366,
+      "grad_norm": 0.13714025914669037,
+      "learning_rate": 0.0001664511473517102,
+      "loss": 0.1423,
+      "step": 11631
+    },
+    {
+      "epoch": 0.8392799163028969,
+      "grad_norm": 0.16360893845558167,
+      "learning_rate": 0.00016644826093231347,
+      "loss": 0.2118,
+      "step": 11632
+    },
+    {
+      "epoch": 0.8393520689779573,
+      "grad_norm": 0.13132081925868988,
+      "learning_rate": 0.00016644537451291673,
+      "loss": 0.17,
+      "step": 11633
+    },
+    {
+      "epoch": 0.8394242216530178,
+      "grad_norm": 0.12430932372808456,
+      "learning_rate": 0.00016644248809352,
+      "loss": 0.2066,
+      "step": 11634
+    },
+    {
+      "epoch": 0.8394963743280782,
+      "grad_norm": 0.12707607448101044,
+      "learning_rate": 0.00016643960167412328,
+      "loss": 0.1372,
+      "step": 11635
+    },
+    {
+      "epoch": 0.8395685270031387,
+      "grad_norm": 0.14341352880001068,
+      "learning_rate": 0.00016643671525472652,
+      "loss": 0.1295,
+      "step": 11636
+    },
+    {
+      "epoch": 0.8396406796781991,
+      "grad_norm": 0.1361696720123291,
+      "learning_rate": 0.00016643382883532978,
+      "loss": 0.1591,
+      "step": 11637
+    },
+    {
+      "epoch": 0.8397128323532594,
+      "grad_norm": 0.1292620152235031,
+      "learning_rate": 0.00016643094241593304,
+      "loss": 0.159,
+      "step": 11638
+    },
+    {
+      "epoch": 0.8397849850283199,
+      "grad_norm": 0.11830408871173859,
+      "learning_rate": 0.0001664280559965363,
+      "loss": 0.1404,
+      "step": 11639
+    },
+    {
+      "epoch": 0.8398571377033803,
+      "grad_norm": 0.12927722930908203,
+      "learning_rate": 0.00016642516957713957,
+      "loss": 0.1789,
+      "step": 11640
+    },
+    {
+      "epoch": 0.8399292903784408,
+      "grad_norm": 0.14580398797988892,
+      "learning_rate": 0.00016642228315774283,
+      "loss": 0.1429,
+      "step": 11641
+    },
+    {
+      "epoch": 0.8400014430535012,
+      "grad_norm": 0.09831997752189636,
+      "learning_rate": 0.0001664193967383461,
+      "loss": 0.1347,
+      "step": 11642
+    },
+    {
+      "epoch": 0.8400735957285617,
+      "grad_norm": 0.11972676217556,
+      "learning_rate": 0.00016641651031894936,
+      "loss": 0.1382,
+      "step": 11643
+    },
+    {
+      "epoch": 0.8401457484036221,
+      "grad_norm": 0.11414449661970139,
+      "learning_rate": 0.00016641362389955262,
+      "loss": 0.163,
+      "step": 11644
+    },
+    {
+      "epoch": 0.8402179010786824,
+      "grad_norm": 0.1057526171207428,
+      "learning_rate": 0.00016641073748015588,
+      "loss": 0.0997,
+      "step": 11645
+    },
+    {
+      "epoch": 0.8402900537537429,
+      "grad_norm": 0.13895364105701447,
+      "learning_rate": 0.00016640785106075915,
+      "loss": 0.137,
+      "step": 11646
+    },
+    {
+      "epoch": 0.8403622064288033,
+      "grad_norm": 0.10537075996398926,
+      "learning_rate": 0.00016640496464136238,
+      "loss": 0.1089,
+      "step": 11647
+    },
+    {
+      "epoch": 0.8404343591038638,
+      "grad_norm": 0.12381959706544876,
+      "learning_rate": 0.00016640207822196564,
+      "loss": 0.1533,
+      "step": 11648
+    },
+    {
+      "epoch": 0.8405065117789242,
+      "grad_norm": 0.1299159824848175,
+      "learning_rate": 0.00016639919180256893,
+      "loss": 0.1418,
+      "step": 11649
+    },
+    {
+      "epoch": 0.8405786644539847,
+      "grad_norm": 0.14879825711250305,
+      "learning_rate": 0.0001663963053831722,
+      "loss": 0.1463,
+      "step": 11650
+    },
+    {
+      "epoch": 0.8406508171290451,
+      "grad_norm": 0.14904411137104034,
+      "learning_rate": 0.00016639341896377546,
+      "loss": 0.1493,
+      "step": 11651
+    },
+    {
+      "epoch": 0.8407229698041054,
+      "grad_norm": 0.14877448976039886,
+      "learning_rate": 0.0001663905325443787,
+      "loss": 0.1724,
+      "step": 11652
+    },
+    {
+      "epoch": 0.8407951224791659,
+      "grad_norm": 0.11669496446847916,
+      "learning_rate": 0.00016638764612498196,
+      "loss": 0.1217,
+      "step": 11653
+    },
+    {
+      "epoch": 0.8408672751542263,
+      "grad_norm": 0.13598738610744476,
+      "learning_rate": 0.00016638475970558522,
+      "loss": 0.1389,
+      "step": 11654
+    },
+    {
+      "epoch": 0.8409394278292868,
+      "grad_norm": 0.12701286375522614,
+      "learning_rate": 0.00016638187328618849,
+      "loss": 0.1995,
+      "step": 11655
+    },
+    {
+      "epoch": 0.8410115805043472,
+      "grad_norm": 0.13989871740341187,
+      "learning_rate": 0.00016637898686679175,
+      "loss": 0.119,
+      "step": 11656
+    },
+    {
+      "epoch": 0.8410837331794077,
+      "grad_norm": 0.12764325737953186,
+      "learning_rate": 0.000166376100447395,
+      "loss": 0.1129,
+      "step": 11657
+    },
+    {
+      "epoch": 0.8411558858544681,
+      "grad_norm": 0.1132327988743782,
+      "learning_rate": 0.00016637321402799827,
+      "loss": 0.187,
+      "step": 11658
+    },
+    {
+      "epoch": 0.8412280385295284,
+      "grad_norm": 0.13291585445404053,
+      "learning_rate": 0.00016637032760860154,
+      "loss": 0.1254,
+      "step": 11659
+    },
+    {
+      "epoch": 0.8413001912045889,
+      "grad_norm": 0.11555839329957962,
+      "learning_rate": 0.0001663674411892048,
+      "loss": 0.1212,
+      "step": 11660
+    },
+    {
+      "epoch": 0.8413723438796493,
+      "grad_norm": 0.15152710676193237,
+      "learning_rate": 0.00016636455476980806,
+      "loss": 0.1276,
+      "step": 11661
+    },
+    {
+      "epoch": 0.8414444965547098,
+      "grad_norm": 0.14820121228694916,
+      "learning_rate": 0.00016636166835041133,
+      "loss": 0.143,
+      "step": 11662
+    },
+    {
+      "epoch": 0.8415166492297702,
+      "grad_norm": 0.1499357670545578,
+      "learning_rate": 0.00016635878193101456,
+      "loss": 0.1566,
+      "step": 11663
+    },
+    {
+      "epoch": 0.8415888019048307,
+      "grad_norm": 0.16589221358299255,
+      "learning_rate": 0.00016635589551161785,
+      "loss": 0.1108,
+      "step": 11664
+    },
+    {
+      "epoch": 0.8416609545798911,
+      "grad_norm": 0.14025495946407318,
+      "learning_rate": 0.00016635300909222111,
+      "loss": 0.1645,
+      "step": 11665
+    },
+    {
+      "epoch": 0.8417331072549514,
+      "grad_norm": 0.10733349621295929,
+      "learning_rate": 0.00016635012267282438,
+      "loss": 0.1862,
+      "step": 11666
+    },
+    {
+      "epoch": 0.8418052599300119,
+      "grad_norm": 0.11976485699415207,
+      "learning_rate": 0.00016634723625342764,
+      "loss": 0.1642,
+      "step": 11667
+    },
+    {
+      "epoch": 0.8418774126050723,
+      "grad_norm": 0.1251676231622696,
+      "learning_rate": 0.00016634434983403088,
+      "loss": 0.1462,
+      "step": 11668
+    },
+    {
+      "epoch": 0.8419495652801328,
+      "grad_norm": 0.14736083149909973,
+      "learning_rate": 0.00016634146341463414,
+      "loss": 0.1838,
+      "step": 11669
+    },
+    {
+      "epoch": 0.8420217179551932,
+      "grad_norm": 0.11634893715381622,
+      "learning_rate": 0.0001663385769952374,
+      "loss": 0.1775,
+      "step": 11670
+    },
+    {
+      "epoch": 0.8420938706302536,
+      "grad_norm": 0.12968191504478455,
+      "learning_rate": 0.0001663356905758407,
+      "loss": 0.1387,
+      "step": 11671
+    },
+    {
+      "epoch": 0.8421660233053141,
+      "grad_norm": 0.12363044917583466,
+      "learning_rate": 0.00016633280415644395,
+      "loss": 0.1612,
+      "step": 11672
+    },
+    {
+      "epoch": 0.8422381759803744,
+      "grad_norm": 0.11333352327346802,
+      "learning_rate": 0.0001663299177370472,
+      "loss": 0.1801,
+      "step": 11673
+    },
+    {
+      "epoch": 0.8423103286554349,
+      "grad_norm": 0.11646721512079239,
+      "learning_rate": 0.00016632703131765045,
+      "loss": 0.1249,
+      "step": 11674
+    },
+    {
+      "epoch": 0.8423824813304953,
+      "grad_norm": 0.13021302223205566,
+      "learning_rate": 0.00016632414489825372,
+      "loss": 0.1557,
+      "step": 11675
+    },
+    {
+      "epoch": 0.8424546340055558,
+      "grad_norm": 0.13064372539520264,
+      "learning_rate": 0.00016632125847885698,
+      "loss": 0.1488,
+      "step": 11676
+    },
+    {
+      "epoch": 0.8425267866806162,
+      "grad_norm": 0.11225259304046631,
+      "learning_rate": 0.00016631837205946024,
+      "loss": 0.1922,
+      "step": 11677
+    },
+    {
+      "epoch": 0.8425989393556766,
+      "grad_norm": 0.1373172402381897,
+      "learning_rate": 0.0001663154856400635,
+      "loss": 0.1163,
+      "step": 11678
+    },
+    {
+      "epoch": 0.8426710920307371,
+      "grad_norm": 0.14084011316299438,
+      "learning_rate": 0.00016631259922066677,
+      "loss": 0.1572,
+      "step": 11679
+    },
+    {
+      "epoch": 0.8427432447057974,
+      "grad_norm": 0.12305203080177307,
+      "learning_rate": 0.00016630971280127003,
+      "loss": 0.159,
+      "step": 11680
+    },
+    {
+      "epoch": 0.8428153973808579,
+      "grad_norm": 0.11790595948696136,
+      "learning_rate": 0.0001663068263818733,
+      "loss": 0.2064,
+      "step": 11681
+    },
+    {
+      "epoch": 0.8428875500559183,
+      "grad_norm": 0.15226507186889648,
+      "learning_rate": 0.00016630393996247656,
+      "loss": 0.1463,
+      "step": 11682
+    },
+    {
+      "epoch": 0.8429597027309788,
+      "grad_norm": 0.12697234749794006,
+      "learning_rate": 0.00016630105354307982,
+      "loss": 0.1668,
+      "step": 11683
+    },
+    {
+      "epoch": 0.8430318554060392,
+      "grad_norm": 0.12504594027996063,
+      "learning_rate": 0.00016629816712368306,
+      "loss": 0.1444,
+      "step": 11684
+    },
+    {
+      "epoch": 0.8431040080810996,
+      "grad_norm": 0.0949978157877922,
+      "learning_rate": 0.00016629528070428635,
+      "loss": 0.1314,
+      "step": 11685
+    },
+    {
+      "epoch": 0.8431761607561601,
+      "grad_norm": 0.11104533821344376,
+      "learning_rate": 0.0001662923942848896,
+      "loss": 0.1454,
+      "step": 11686
+    },
+    {
+      "epoch": 0.8432483134312204,
+      "grad_norm": 0.1141478419303894,
+      "learning_rate": 0.00016628950786549287,
+      "loss": 0.0956,
+      "step": 11687
+    },
+    {
+      "epoch": 0.8433204661062809,
+      "grad_norm": 0.14451053738594055,
+      "learning_rate": 0.00016628662144609613,
+      "loss": 0.1338,
+      "step": 11688
+    },
+    {
+      "epoch": 0.8433926187813413,
+      "grad_norm": 0.1483115404844284,
+      "learning_rate": 0.00016628373502669937,
+      "loss": 0.1578,
+      "step": 11689
+    },
+    {
+      "epoch": 0.8434647714564018,
+      "grad_norm": 0.12317361682653427,
+      "learning_rate": 0.00016628084860730263,
+      "loss": 0.1108,
+      "step": 11690
+    },
+    {
+      "epoch": 0.8435369241314622,
+      "grad_norm": 0.15117180347442627,
+      "learning_rate": 0.0001662779621879059,
+      "loss": 0.1499,
+      "step": 11691
+    },
+    {
+      "epoch": 0.8436090768065226,
+      "grad_norm": 0.11195392906665802,
+      "learning_rate": 0.00016627507576850919,
+      "loss": 0.1197,
+      "step": 11692
+    },
+    {
+      "epoch": 0.8436812294815831,
+      "grad_norm": 0.10647310316562653,
+      "learning_rate": 0.00016627218934911245,
+      "loss": 0.097,
+      "step": 11693
+    },
+    {
+      "epoch": 0.8437533821566434,
+      "grad_norm": 0.11704418808221817,
+      "learning_rate": 0.00016626930292971568,
+      "loss": 0.161,
+      "step": 11694
+    },
+    {
+      "epoch": 0.8438255348317039,
+      "grad_norm": 0.12431292235851288,
+      "learning_rate": 0.00016626641651031895,
+      "loss": 0.1694,
+      "step": 11695
+    },
+    {
+      "epoch": 0.8438976875067643,
+      "grad_norm": 0.12180831283330917,
+      "learning_rate": 0.0001662635300909222,
+      "loss": 0.1201,
+      "step": 11696
+    },
+    {
+      "epoch": 0.8439698401818247,
+      "grad_norm": 0.1429813951253891,
+      "learning_rate": 0.00016626064367152547,
+      "loss": 0.1343,
+      "step": 11697
+    },
+    {
+      "epoch": 0.8440419928568852,
+      "grad_norm": 0.1192222610116005,
+      "learning_rate": 0.00016625775725212874,
+      "loss": 0.0985,
+      "step": 11698
+    },
+    {
+      "epoch": 0.8441141455319456,
+      "grad_norm": 0.10998225212097168,
+      "learning_rate": 0.000166254870832732,
+      "loss": 0.1364,
+      "step": 11699
+    },
+    {
+      "epoch": 0.844186298207006,
+      "grad_norm": 0.12994202971458435,
+      "learning_rate": 0.00016625198441333526,
+      "loss": 0.1249,
+      "step": 11700
+    },
+    {
+      "epoch": 0.8442584508820664,
+      "grad_norm": 0.11415591090917587,
+      "learning_rate": 0.00016624909799393853,
+      "loss": 0.1583,
+      "step": 11701
+    },
+    {
+      "epoch": 0.8443306035571269,
+      "grad_norm": 0.14631566405296326,
+      "learning_rate": 0.0001662462115745418,
+      "loss": 0.1399,
+      "step": 11702
+    },
+    {
+      "epoch": 0.8444027562321873,
+      "grad_norm": 0.1491636484861374,
+      "learning_rate": 0.00016624332515514505,
+      "loss": 0.1385,
+      "step": 11703
+    },
+    {
+      "epoch": 0.8444749089072477,
+      "grad_norm": 0.12752553820610046,
+      "learning_rate": 0.00016624043873574831,
+      "loss": 0.1757,
+      "step": 11704
+    },
+    {
+      "epoch": 0.8445470615823082,
+      "grad_norm": 0.12609128654003143,
+      "learning_rate": 0.00016623755231635158,
+      "loss": 0.1507,
+      "step": 11705
+    },
+    {
+      "epoch": 0.8446192142573686,
+      "grad_norm": 0.10666986554861069,
+      "learning_rate": 0.00016623466589695484,
+      "loss": 0.1555,
+      "step": 11706
+    },
+    {
+      "epoch": 0.844691366932429,
+      "grad_norm": 0.11576931178569794,
+      "learning_rate": 0.0001662317794775581,
+      "loss": 0.1805,
+      "step": 11707
+    },
+    {
+      "epoch": 0.8447635196074894,
+      "grad_norm": 0.12209343910217285,
+      "learning_rate": 0.00016622889305816137,
+      "loss": 0.1064,
+      "step": 11708
+    },
+    {
+      "epoch": 0.8448356722825499,
+      "grad_norm": 0.10738017410039902,
+      "learning_rate": 0.00016622600663876463,
+      "loss": 0.1554,
+      "step": 11709
+    },
+    {
+      "epoch": 0.8449078249576103,
+      "grad_norm": 0.14066986739635468,
+      "learning_rate": 0.0001662231202193679,
+      "loss": 0.1137,
+      "step": 11710
+    },
+    {
+      "epoch": 0.8449799776326707,
+      "grad_norm": 0.14349329471588135,
+      "learning_rate": 0.00016622023379997113,
+      "loss": 0.1877,
+      "step": 11711
+    },
+    {
+      "epoch": 0.8450521303077312,
+      "grad_norm": 0.12119587510824203,
+      "learning_rate": 0.0001662173473805744,
+      "loss": 0.185,
+      "step": 11712
+    },
+    {
+      "epoch": 0.8451242829827916,
+      "grad_norm": 0.12557916343212128,
+      "learning_rate": 0.00016621446096117768,
+      "loss": 0.1786,
+      "step": 11713
+    },
+    {
+      "epoch": 0.845196435657852,
+      "grad_norm": 0.12383130192756653,
+      "learning_rate": 0.00016621157454178094,
+      "loss": 0.1696,
+      "step": 11714
+    },
+    {
+      "epoch": 0.8452685883329124,
+      "grad_norm": 0.10766246169805527,
+      "learning_rate": 0.0001662086881223842,
+      "loss": 0.1514,
+      "step": 11715
+    },
+    {
+      "epoch": 0.8453407410079729,
+      "grad_norm": 0.14209994673728943,
+      "learning_rate": 0.00016620580170298744,
+      "loss": 0.1789,
+      "step": 11716
+    },
+    {
+      "epoch": 0.8454128936830333,
+      "grad_norm": 0.11148392409086227,
+      "learning_rate": 0.0001662029152835907,
+      "loss": 0.1402,
+      "step": 11717
+    },
+    {
+      "epoch": 0.8454850463580937,
+      "grad_norm": 0.1409093290567398,
+      "learning_rate": 0.00016620002886419397,
+      "loss": 0.1454,
+      "step": 11718
+    },
+    {
+      "epoch": 0.8455571990331542,
+      "grad_norm": 0.13453932106494904,
+      "learning_rate": 0.00016619714244479723,
+      "loss": 0.1292,
+      "step": 11719
+    },
+    {
+      "epoch": 0.8456293517082146,
+      "grad_norm": 0.13617649674415588,
+      "learning_rate": 0.00016619425602540052,
+      "loss": 0.1129,
+      "step": 11720
+    },
+    {
+      "epoch": 0.845701504383275,
+      "grad_norm": 0.12087401002645493,
+      "learning_rate": 0.00016619136960600376,
+      "loss": 0.1551,
+      "step": 11721
+    },
+    {
+      "epoch": 0.8457736570583354,
+      "grad_norm": 0.1239386722445488,
+      "learning_rate": 0.00016618848318660702,
+      "loss": 0.1647,
+      "step": 11722
+    },
+    {
+      "epoch": 0.8458458097333958,
+      "grad_norm": 0.17587853968143463,
+      "learning_rate": 0.00016618559676721028,
+      "loss": 0.2084,
+      "step": 11723
+    },
+    {
+      "epoch": 0.8459179624084563,
+      "grad_norm": 0.15436212718486786,
+      "learning_rate": 0.00016618271034781355,
+      "loss": 0.1165,
+      "step": 11724
+    },
+    {
+      "epoch": 0.8459901150835167,
+      "grad_norm": 0.13399550318717957,
+      "learning_rate": 0.0001661798239284168,
+      "loss": 0.1525,
+      "step": 11725
+    },
+    {
+      "epoch": 0.8460622677585772,
+      "grad_norm": 0.1814533770084381,
+      "learning_rate": 0.00016617693750902007,
+      "loss": 0.1584,
+      "step": 11726
+    },
+    {
+      "epoch": 0.8461344204336376,
+      "grad_norm": 0.11515140533447266,
+      "learning_rate": 0.00016617405108962333,
+      "loss": 0.1214,
+      "step": 11727
+    },
+    {
+      "epoch": 0.846206573108698,
+      "grad_norm": 0.11948894709348679,
+      "learning_rate": 0.0001661711646702266,
+      "loss": 0.2008,
+      "step": 11728
+    },
+    {
+      "epoch": 0.8462787257837584,
+      "grad_norm": 0.13980422914028168,
+      "learning_rate": 0.00016616827825082986,
+      "loss": 0.1524,
+      "step": 11729
+    },
+    {
+      "epoch": 0.8463508784588188,
+      "grad_norm": 0.10919107496738434,
+      "learning_rate": 0.00016616539183143312,
+      "loss": 0.1528,
+      "step": 11730
+    },
+    {
+      "epoch": 0.8464230311338793,
+      "grad_norm": 0.12131740152835846,
+      "learning_rate": 0.00016616250541203639,
+      "loss": 0.1795,
+      "step": 11731
+    },
+    {
+      "epoch": 0.8464951838089397,
+      "grad_norm": 0.1246170774102211,
+      "learning_rate": 0.00016615961899263962,
+      "loss": 0.1201,
+      "step": 11732
+    },
+    {
+      "epoch": 0.8465673364840002,
+      "grad_norm": 0.11502742022275925,
+      "learning_rate": 0.00016615673257324288,
+      "loss": 0.1408,
+      "step": 11733
+    },
+    {
+      "epoch": 0.8466394891590606,
+      "grad_norm": 0.14159716665744781,
+      "learning_rate": 0.00016615384615384617,
+      "loss": 0.1671,
+      "step": 11734
+    },
+    {
+      "epoch": 0.846711641834121,
+      "grad_norm": 0.12725681066513062,
+      "learning_rate": 0.00016615095973444944,
+      "loss": 0.1122,
+      "step": 11735
+    },
+    {
+      "epoch": 0.8467837945091814,
+      "grad_norm": 0.15508094429969788,
+      "learning_rate": 0.0001661480733150527,
+      "loss": 0.1269,
+      "step": 11736
+    },
+    {
+      "epoch": 0.8468559471842418,
+      "grad_norm": 0.15546204149723053,
+      "learning_rate": 0.00016614518689565594,
+      "loss": 0.1178,
+      "step": 11737
+    },
+    {
+      "epoch": 0.8469280998593023,
+      "grad_norm": 0.12115608155727386,
+      "learning_rate": 0.0001661423004762592,
+      "loss": 0.1976,
+      "step": 11738
+    },
+    {
+      "epoch": 0.8470002525343627,
+      "grad_norm": 0.14080692827701569,
+      "learning_rate": 0.00016613941405686246,
+      "loss": 0.1395,
+      "step": 11739
+    },
+    {
+      "epoch": 0.8470724052094232,
+      "grad_norm": 0.125188946723938,
+      "learning_rate": 0.00016613652763746573,
+      "loss": 0.0898,
+      "step": 11740
+    },
+    {
+      "epoch": 0.8471445578844836,
+      "grad_norm": 0.1310095638036728,
+      "learning_rate": 0.00016613364121806902,
+      "loss": 0.1495,
+      "step": 11741
+    },
+    {
+      "epoch": 0.847216710559544,
+      "grad_norm": 0.13303005695343018,
+      "learning_rate": 0.00016613075479867225,
+      "loss": 0.1362,
+      "step": 11742
+    },
+    {
+      "epoch": 0.8472888632346044,
+      "grad_norm": 0.12238302826881409,
+      "learning_rate": 0.00016612786837927551,
+      "loss": 0.1302,
+      "step": 11743
+    },
+    {
+      "epoch": 0.8473610159096648,
+      "grad_norm": 0.1152992770075798,
+      "learning_rate": 0.00016612498195987878,
+      "loss": 0.1432,
+      "step": 11744
+    },
+    {
+      "epoch": 0.8474331685847253,
+      "grad_norm": 0.12224925309419632,
+      "learning_rate": 0.00016612209554048204,
+      "loss": 0.1798,
+      "step": 11745
+    },
+    {
+      "epoch": 0.8475053212597857,
+      "grad_norm": 0.13379549980163574,
+      "learning_rate": 0.0001661192091210853,
+      "loss": 0.1379,
+      "step": 11746
+    },
+    {
+      "epoch": 0.8475774739348462,
+      "grad_norm": 0.12874086201190948,
+      "learning_rate": 0.00016611632270168857,
+      "loss": 0.1613,
+      "step": 11747
+    },
+    {
+      "epoch": 0.8476496266099066,
+      "grad_norm": 0.1376962661743164,
+      "learning_rate": 0.00016611343628229183,
+      "loss": 0.1632,
+      "step": 11748
+    },
+    {
+      "epoch": 0.847721779284967,
+      "grad_norm": 0.1415809541940689,
+      "learning_rate": 0.0001661105498628951,
+      "loss": 0.1397,
+      "step": 11749
+    },
+    {
+      "epoch": 0.8477939319600274,
+      "grad_norm": 0.12963339686393738,
+      "learning_rate": 0.00016610766344349835,
+      "loss": 0.1394,
+      "step": 11750
+    },
+    {
+      "epoch": 0.8478660846350878,
+      "grad_norm": 0.14303992688655853,
+      "learning_rate": 0.00016610477702410162,
+      "loss": 0.1652,
+      "step": 11751
+    },
+    {
+      "epoch": 0.8479382373101483,
+      "grad_norm": 0.14118750393390656,
+      "learning_rate": 0.00016610189060470488,
+      "loss": 0.1125,
+      "step": 11752
+    },
+    {
+      "epoch": 0.8480103899852087,
+      "grad_norm": 0.1844176948070526,
+      "learning_rate": 0.00016609900418530812,
+      "loss": 0.1899,
+      "step": 11753
+    },
+    {
+      "epoch": 0.8480825426602692,
+      "grad_norm": 0.15066631138324738,
+      "learning_rate": 0.00016609611776591138,
+      "loss": 0.2096,
+      "step": 11754
+    },
+    {
+      "epoch": 0.8481546953353296,
+      "grad_norm": 0.15434785187244415,
+      "learning_rate": 0.00016609323134651467,
+      "loss": 0.0953,
+      "step": 11755
+    },
+    {
+      "epoch": 0.8482268480103899,
+      "grad_norm": 0.1429002583026886,
+      "learning_rate": 0.00016609034492711793,
+      "loss": 0.1368,
+      "step": 11756
+    },
+    {
+      "epoch": 0.8482990006854504,
+      "grad_norm": 0.1369299292564392,
+      "learning_rate": 0.0001660874585077212,
+      "loss": 0.1339,
+      "step": 11757
+    },
+    {
+      "epoch": 0.8483711533605108,
+      "grad_norm": 0.11154896765947342,
+      "learning_rate": 0.00016608457208832443,
+      "loss": 0.1767,
+      "step": 11758
+    },
+    {
+      "epoch": 0.8484433060355713,
+      "grad_norm": 0.11356501281261444,
+      "learning_rate": 0.0001660816856689277,
+      "loss": 0.1901,
+      "step": 11759
+    },
+    {
+      "epoch": 0.8485154587106317,
+      "grad_norm": 0.12390024214982986,
+      "learning_rate": 0.00016607879924953096,
+      "loss": 0.175,
+      "step": 11760
+    },
+    {
+      "epoch": 0.8485876113856922,
+      "grad_norm": 0.1285458654165268,
+      "learning_rate": 0.00016607591283013422,
+      "loss": 0.1187,
+      "step": 11761
+    },
+    {
+      "epoch": 0.8486597640607525,
+      "grad_norm": 0.15921713411808014,
+      "learning_rate": 0.0001660730264107375,
+      "loss": 0.158,
+      "step": 11762
+    },
+    {
+      "epoch": 0.8487319167358129,
+      "grad_norm": 0.12651832401752472,
+      "learning_rate": 0.00016607013999134075,
+      "loss": 0.1487,
+      "step": 11763
+    },
+    {
+      "epoch": 0.8488040694108734,
+      "grad_norm": 0.2776840031147003,
+      "learning_rate": 0.000166067253571944,
+      "loss": 0.171,
+      "step": 11764
+    },
+    {
+      "epoch": 0.8488762220859338,
+      "grad_norm": 0.132423996925354,
+      "learning_rate": 0.00016606436715254727,
+      "loss": 0.1564,
+      "step": 11765
+    },
+    {
+      "epoch": 0.8489483747609943,
+      "grad_norm": 0.17471779882907867,
+      "learning_rate": 0.00016606148073315053,
+      "loss": 0.161,
+      "step": 11766
+    },
+    {
+      "epoch": 0.8490205274360547,
+      "grad_norm": 0.11781930923461914,
+      "learning_rate": 0.0001660585943137538,
+      "loss": 0.1571,
+      "step": 11767
+    },
+    {
+      "epoch": 0.8490926801111152,
+      "grad_norm": 0.15253686904907227,
+      "learning_rate": 0.00016605570789435706,
+      "loss": 0.1931,
+      "step": 11768
+    },
+    {
+      "epoch": 0.8491648327861755,
+      "grad_norm": 0.13133996725082397,
+      "learning_rate": 0.00016605282147496032,
+      "loss": 0.1382,
+      "step": 11769
+    },
+    {
+      "epoch": 0.8492369854612359,
+      "grad_norm": 0.11417736113071442,
+      "learning_rate": 0.00016604993505556359,
+      "loss": 0.1565,
+      "step": 11770
+    },
+    {
+      "epoch": 0.8493091381362964,
+      "grad_norm": 0.12417210638523102,
+      "learning_rate": 0.00016604704863616685,
+      "loss": 0.1294,
+      "step": 11771
+    },
+    {
+      "epoch": 0.8493812908113568,
+      "grad_norm": 0.1240205466747284,
+      "learning_rate": 0.0001660441622167701,
+      "loss": 0.1329,
+      "step": 11772
+    },
+    {
+      "epoch": 0.8494534434864173,
+      "grad_norm": 0.13930866122245789,
+      "learning_rate": 0.00016604127579737337,
+      "loss": 0.1656,
+      "step": 11773
+    },
+    {
+      "epoch": 0.8495255961614777,
+      "grad_norm": 0.10060317814350128,
+      "learning_rate": 0.0001660383893779766,
+      "loss": 0.1443,
+      "step": 11774
+    },
+    {
+      "epoch": 0.8495977488365382,
+      "grad_norm": 0.15756677091121674,
+      "learning_rate": 0.00016603550295857987,
+      "loss": 0.156,
+      "step": 11775
+    },
+    {
+      "epoch": 0.8496699015115985,
+      "grad_norm": 0.13482096791267395,
+      "learning_rate": 0.00016603261653918316,
+      "loss": 0.1377,
+      "step": 11776
+    },
+    {
+      "epoch": 0.8497420541866589,
+      "grad_norm": 0.16339333355426788,
+      "learning_rate": 0.00016602973011978643,
+      "loss": 0.1683,
+      "step": 11777
+    },
+    {
+      "epoch": 0.8498142068617194,
+      "grad_norm": 0.16918936371803284,
+      "learning_rate": 0.0001660268437003897,
+      "loss": 0.133,
+      "step": 11778
+    },
+    {
+      "epoch": 0.8498863595367798,
+      "grad_norm": 0.14417551457881927,
+      "learning_rate": 0.00016602395728099292,
+      "loss": 0.1369,
+      "step": 11779
+    },
+    {
+      "epoch": 0.8499585122118403,
+      "grad_norm": 0.1408488005399704,
+      "learning_rate": 0.0001660210708615962,
+      "loss": 0.196,
+      "step": 11780
+    },
+    {
+      "epoch": 0.8500306648869007,
+      "grad_norm": 0.12176976352930069,
+      "learning_rate": 0.00016601818444219945,
+      "loss": 0.1533,
+      "step": 11781
+    },
+    {
+      "epoch": 0.8501028175619612,
+      "grad_norm": 0.17833276093006134,
+      "learning_rate": 0.00016601529802280271,
+      "loss": 0.1841,
+      "step": 11782
+    },
+    {
+      "epoch": 0.8501749702370215,
+      "grad_norm": 0.11696712672710419,
+      "learning_rate": 0.000166012411603406,
+      "loss": 0.1609,
+      "step": 11783
+    },
+    {
+      "epoch": 0.8502471229120819,
+      "grad_norm": 0.12877492606639862,
+      "learning_rate": 0.00016600952518400924,
+      "loss": 0.1472,
+      "step": 11784
+    },
+    {
+      "epoch": 0.8503192755871424,
+      "grad_norm": 0.12504887580871582,
+      "learning_rate": 0.0001660066387646125,
+      "loss": 0.1627,
+      "step": 11785
+    },
+    {
+      "epoch": 0.8503914282622028,
+      "grad_norm": 0.15413124859333038,
+      "learning_rate": 0.00016600375234521577,
+      "loss": 0.1505,
+      "step": 11786
+    },
+    {
+      "epoch": 0.8504635809372633,
+      "grad_norm": 0.11818953603506088,
+      "learning_rate": 0.00016600086592581903,
+      "loss": 0.1606,
+      "step": 11787
+    },
+    {
+      "epoch": 0.8505357336123237,
+      "grad_norm": 0.14028437435626984,
+      "learning_rate": 0.0001659979795064223,
+      "loss": 0.1177,
+      "step": 11788
+    },
+    {
+      "epoch": 0.8506078862873842,
+      "grad_norm": 0.13598275184631348,
+      "learning_rate": 0.00016599509308702555,
+      "loss": 0.1538,
+      "step": 11789
+    },
+    {
+      "epoch": 0.8506800389624445,
+      "grad_norm": 0.12115483731031418,
+      "learning_rate": 0.00016599220666762882,
+      "loss": 0.119,
+      "step": 11790
+    },
+    {
+      "epoch": 0.8507521916375049,
+      "grad_norm": 0.13980644941329956,
+      "learning_rate": 0.00016598932024823208,
+      "loss": 0.1628,
+      "step": 11791
+    },
+    {
+      "epoch": 0.8508243443125654,
+      "grad_norm": 0.17200279235839844,
+      "learning_rate": 0.00016598643382883534,
+      "loss": 0.1413,
+      "step": 11792
+    },
+    {
+      "epoch": 0.8508964969876258,
+      "grad_norm": 0.12813691794872284,
+      "learning_rate": 0.0001659835474094386,
+      "loss": 0.1447,
+      "step": 11793
+    },
+    {
+      "epoch": 0.8509686496626863,
+      "grad_norm": 0.129331573843956,
+      "learning_rate": 0.00016598066099004187,
+      "loss": 0.1292,
+      "step": 11794
+    },
+    {
+      "epoch": 0.8510408023377467,
+      "grad_norm": 0.2023138403892517,
+      "learning_rate": 0.0001659777745706451,
+      "loss": 0.1565,
+      "step": 11795
+    },
+    {
+      "epoch": 0.8511129550128071,
+      "grad_norm": 0.13821351528167725,
+      "learning_rate": 0.00016597488815124837,
+      "loss": 0.1463,
+      "step": 11796
+    },
+    {
+      "epoch": 0.8511851076878675,
+      "grad_norm": 0.12791946530342102,
+      "learning_rate": 0.00016597200173185166,
+      "loss": 0.1642,
+      "step": 11797
+    },
+    {
+      "epoch": 0.8512572603629279,
+      "grad_norm": 0.10310852527618408,
+      "learning_rate": 0.00016596911531245492,
+      "loss": 0.1367,
+      "step": 11798
+    },
+    {
+      "epoch": 0.8513294130379884,
+      "grad_norm": 0.135204017162323,
+      "learning_rate": 0.00016596622889305818,
+      "loss": 0.142,
+      "step": 11799
+    },
+    {
+      "epoch": 0.8514015657130488,
+      "grad_norm": 0.11695842444896698,
+      "learning_rate": 0.00016596334247366142,
+      "loss": 0.1917,
+      "step": 11800
+    },
+    {
+      "epoch": 0.8514737183881093,
+      "grad_norm": 0.12734819948673248,
+      "learning_rate": 0.00016596045605426468,
+      "loss": 0.117,
+      "step": 11801
+    },
+    {
+      "epoch": 0.8515458710631697,
+      "grad_norm": 0.14392152428627014,
+      "learning_rate": 0.00016595756963486794,
+      "loss": 0.1608,
+      "step": 11802
+    },
+    {
+      "epoch": 0.8516180237382301,
+      "grad_norm": 0.10268928110599518,
+      "learning_rate": 0.0001659546832154712,
+      "loss": 0.2276,
+      "step": 11803
+    },
+    {
+      "epoch": 0.8516901764132905,
+      "grad_norm": 0.11644991487264633,
+      "learning_rate": 0.0001659517967960745,
+      "loss": 0.0831,
+      "step": 11804
+    },
+    {
+      "epoch": 0.8517623290883509,
+      "grad_norm": 0.1109037920832634,
+      "learning_rate": 0.00016594891037667773,
+      "loss": 0.1869,
+      "step": 11805
+    },
+    {
+      "epoch": 0.8518344817634114,
+      "grad_norm": 0.1327962577342987,
+      "learning_rate": 0.000165946023957281,
+      "loss": 0.1389,
+      "step": 11806
+    },
+    {
+      "epoch": 0.8519066344384718,
+      "grad_norm": 0.2042866200208664,
+      "learning_rate": 0.00016594313753788426,
+      "loss": 0.1413,
+      "step": 11807
+    },
+    {
+      "epoch": 0.8519787871135323,
+      "grad_norm": 0.14395737648010254,
+      "learning_rate": 0.00016594025111848752,
+      "loss": 0.1604,
+      "step": 11808
+    },
+    {
+      "epoch": 0.8520509397885927,
+      "grad_norm": 0.11978230625391006,
+      "learning_rate": 0.00016593736469909079,
+      "loss": 0.164,
+      "step": 11809
+    },
+    {
+      "epoch": 0.8521230924636531,
+      "grad_norm": 0.1249191090464592,
+      "learning_rate": 0.00016593447827969405,
+      "loss": 0.1563,
+      "step": 11810
+    },
+    {
+      "epoch": 0.8521952451387135,
+      "grad_norm": 0.13559097051620483,
+      "learning_rate": 0.0001659315918602973,
+      "loss": 0.1385,
+      "step": 11811
+    },
+    {
+      "epoch": 0.8522673978137739,
+      "grad_norm": 0.13980789482593536,
+      "learning_rate": 0.00016592870544090057,
+      "loss": 0.1819,
+      "step": 11812
+    },
+    {
+      "epoch": 0.8523395504888344,
+      "grad_norm": 0.1371464878320694,
+      "learning_rate": 0.00016592581902150384,
+      "loss": 0.125,
+      "step": 11813
+    },
+    {
+      "epoch": 0.8524117031638948,
+      "grad_norm": 0.1271425485610962,
+      "learning_rate": 0.0001659229326021071,
+      "loss": 0.1494,
+      "step": 11814
+    },
+    {
+      "epoch": 0.8524838558389553,
+      "grad_norm": 0.12910963594913483,
+      "learning_rate": 0.00016592004618271036,
+      "loss": 0.1224,
+      "step": 11815
+    },
+    {
+      "epoch": 0.8525560085140157,
+      "grad_norm": 0.177041158080101,
+      "learning_rate": 0.0001659171597633136,
+      "loss": 0.2076,
+      "step": 11816
+    },
+    {
+      "epoch": 0.8526281611890761,
+      "grad_norm": 0.10944493860006332,
+      "learning_rate": 0.00016591427334391686,
+      "loss": 0.1419,
+      "step": 11817
+    },
+    {
+      "epoch": 0.8527003138641365,
+      "grad_norm": 0.13446961343288422,
+      "learning_rate": 0.00016591138692452015,
+      "loss": 0.175,
+      "step": 11818
+    },
+    {
+      "epoch": 0.8527724665391969,
+      "grad_norm": 0.14175055921077728,
+      "learning_rate": 0.00016590850050512341,
+      "loss": 0.1884,
+      "step": 11819
+    },
+    {
+      "epoch": 0.8528446192142574,
+      "grad_norm": 0.1306513547897339,
+      "learning_rate": 0.00016590561408572668,
+      "loss": 0.1185,
+      "step": 11820
+    },
+    {
+      "epoch": 0.8529167718893178,
+      "grad_norm": 0.1797804832458496,
+      "learning_rate": 0.0001659027276663299,
+      "loss": 0.1726,
+      "step": 11821
+    },
+    {
+      "epoch": 0.8529889245643782,
+      "grad_norm": 0.14040648937225342,
+      "learning_rate": 0.00016589984124693318,
+      "loss": 0.139,
+      "step": 11822
+    },
+    {
+      "epoch": 0.8530610772394387,
+      "grad_norm": 0.12760290503501892,
+      "learning_rate": 0.00016589695482753644,
+      "loss": 0.171,
+      "step": 11823
+    },
+    {
+      "epoch": 0.853133229914499,
+      "grad_norm": 0.11597708612680435,
+      "learning_rate": 0.0001658940684081397,
+      "loss": 0.1488,
+      "step": 11824
+    },
+    {
+      "epoch": 0.8532053825895595,
+      "grad_norm": 0.1469823122024536,
+      "learning_rate": 0.000165891181988743,
+      "loss": 0.1322,
+      "step": 11825
+    },
+    {
+      "epoch": 0.8532775352646199,
+      "grad_norm": 0.13939408957958221,
+      "learning_rate": 0.00016588829556934623,
+      "loss": 0.1653,
+      "step": 11826
+    },
+    {
+      "epoch": 0.8533496879396804,
+      "grad_norm": 0.12158030271530151,
+      "learning_rate": 0.0001658854091499495,
+      "loss": 0.1278,
+      "step": 11827
+    },
+    {
+      "epoch": 0.8534218406147408,
+      "grad_norm": 0.13025440275669098,
+      "learning_rate": 0.00016588252273055275,
+      "loss": 0.1598,
+      "step": 11828
+    },
+    {
+      "epoch": 0.8534939932898012,
+      "grad_norm": 0.1255502849817276,
+      "learning_rate": 0.00016587963631115602,
+      "loss": 0.1613,
+      "step": 11829
+    },
+    {
+      "epoch": 0.8535661459648617,
+      "grad_norm": 0.11364217847585678,
+      "learning_rate": 0.00016587674989175928,
+      "loss": 0.1492,
+      "step": 11830
+    },
+    {
+      "epoch": 0.853638298639922,
+      "grad_norm": 0.14769354462623596,
+      "learning_rate": 0.00016587386347236254,
+      "loss": 0.1254,
+      "step": 11831
+    },
+    {
+      "epoch": 0.8537104513149825,
+      "grad_norm": 0.11613823473453522,
+      "learning_rate": 0.0001658709770529658,
+      "loss": 0.0857,
+      "step": 11832
+    },
+    {
+      "epoch": 0.8537826039900429,
+      "grad_norm": 0.12310709059238434,
+      "learning_rate": 0.00016586809063356907,
+      "loss": 0.1717,
+      "step": 11833
+    },
+    {
+      "epoch": 0.8538547566651034,
+      "grad_norm": 0.1325674206018448,
+      "learning_rate": 0.00016586520421417233,
+      "loss": 0.1637,
+      "step": 11834
+    },
+    {
+      "epoch": 0.8539269093401638,
+      "grad_norm": 0.15822456777095795,
+      "learning_rate": 0.0001658623177947756,
+      "loss": 0.152,
+      "step": 11835
+    },
+    {
+      "epoch": 0.8539990620152242,
+      "grad_norm": 0.12222934514284134,
+      "learning_rate": 0.00016585943137537886,
+      "loss": 0.1502,
+      "step": 11836
+    },
+    {
+      "epoch": 0.8540712146902847,
+      "grad_norm": 0.11390216648578644,
+      "learning_rate": 0.00016585654495598212,
+      "loss": 0.1064,
+      "step": 11837
+    },
+    {
+      "epoch": 0.854143367365345,
+      "grad_norm": 0.11588306725025177,
+      "learning_rate": 0.00016585365853658536,
+      "loss": 0.1002,
+      "step": 11838
+    },
+    {
+      "epoch": 0.8542155200404055,
+      "grad_norm": 0.11872265487909317,
+      "learning_rate": 0.00016585077211718865,
+      "loss": 0.1486,
+      "step": 11839
+    },
+    {
+      "epoch": 0.8542876727154659,
+      "grad_norm": 0.12992970645427704,
+      "learning_rate": 0.0001658478856977919,
+      "loss": 0.187,
+      "step": 11840
+    },
+    {
+      "epoch": 0.8543598253905264,
+      "grad_norm": 0.11380898952484131,
+      "learning_rate": 0.00016584499927839517,
+      "loss": 0.1943,
+      "step": 11841
+    },
+    {
+      "epoch": 0.8544319780655868,
+      "grad_norm": 0.1165139377117157,
+      "learning_rate": 0.00016584211285899843,
+      "loss": 0.1595,
+      "step": 11842
+    },
+    {
+      "epoch": 0.8545041307406472,
+      "grad_norm": 0.12911410629749298,
+      "learning_rate": 0.00016583922643960167,
+      "loss": 0.1654,
+      "step": 11843
+    },
+    {
+      "epoch": 0.8545762834157077,
+      "grad_norm": 0.12455809116363525,
+      "learning_rate": 0.00016583634002020493,
+      "loss": 0.1554,
+      "step": 11844
+    },
+    {
+      "epoch": 0.854648436090768,
+      "grad_norm": 0.14468957483768463,
+      "learning_rate": 0.0001658334536008082,
+      "loss": 0.1496,
+      "step": 11845
+    },
+    {
+      "epoch": 0.8547205887658285,
+      "grad_norm": 0.13239078223705292,
+      "learning_rate": 0.00016583056718141149,
+      "loss": 0.16,
+      "step": 11846
+    },
+    {
+      "epoch": 0.8547927414408889,
+      "grad_norm": 0.15964853763580322,
+      "learning_rate": 0.00016582768076201475,
+      "loss": 0.1798,
+      "step": 11847
+    },
+    {
+      "epoch": 0.8548648941159493,
+      "grad_norm": 0.125840961933136,
+      "learning_rate": 0.00016582479434261799,
+      "loss": 0.1847,
+      "step": 11848
+    },
+    {
+      "epoch": 0.8549370467910098,
+      "grad_norm": 0.12420251220464706,
+      "learning_rate": 0.00016582190792322125,
+      "loss": 0.1495,
+      "step": 11849
+    },
+    {
+      "epoch": 0.8550091994660702,
+      "grad_norm": 0.12960167229175568,
+      "learning_rate": 0.0001658190215038245,
+      "loss": 0.171,
+      "step": 11850
+    },
+    {
+      "epoch": 0.8550813521411307,
+      "grad_norm": 0.131041020154953,
+      "learning_rate": 0.00016581613508442777,
+      "loss": 0.1571,
+      "step": 11851
+    },
+    {
+      "epoch": 0.855153504816191,
+      "grad_norm": 0.14004012942314148,
+      "learning_rate": 0.00016581324866503104,
+      "loss": 0.1786,
+      "step": 11852
+    },
+    {
+      "epoch": 0.8552256574912515,
+      "grad_norm": 0.16974465548992157,
+      "learning_rate": 0.0001658103622456343,
+      "loss": 0.1707,
+      "step": 11853
+    },
+    {
+      "epoch": 0.8552978101663119,
+      "grad_norm": 0.12199197709560394,
+      "learning_rate": 0.00016580747582623756,
+      "loss": 0.1511,
+      "step": 11854
+    },
+    {
+      "epoch": 0.8553699628413723,
+      "grad_norm": 0.13741664588451385,
+      "learning_rate": 0.00016580458940684083,
+      "loss": 0.1303,
+      "step": 11855
+    },
+    {
+      "epoch": 0.8554421155164328,
+      "grad_norm": 0.15471656620502472,
+      "learning_rate": 0.0001658017029874441,
+      "loss": 0.1828,
+      "step": 11856
+    },
+    {
+      "epoch": 0.8555142681914932,
+      "grad_norm": 0.13550004363059998,
+      "learning_rate": 0.00016579881656804735,
+      "loss": 0.1784,
+      "step": 11857
+    },
+    {
+      "epoch": 0.8555864208665537,
+      "grad_norm": 0.13504238426685333,
+      "learning_rate": 0.00016579593014865061,
+      "loss": 0.1659,
+      "step": 11858
+    },
+    {
+      "epoch": 0.855658573541614,
+      "grad_norm": 0.11602792143821716,
+      "learning_rate": 0.00016579304372925385,
+      "loss": 0.1613,
+      "step": 11859
+    },
+    {
+      "epoch": 0.8557307262166745,
+      "grad_norm": 0.15487723052501678,
+      "learning_rate": 0.0001657901573098571,
+      "loss": 0.1583,
+      "step": 11860
+    },
+    {
+      "epoch": 0.8558028788917349,
+      "grad_norm": 0.12514373660087585,
+      "learning_rate": 0.0001657872708904604,
+      "loss": 0.1089,
+      "step": 11861
+    },
+    {
+      "epoch": 0.8558750315667953,
+      "grad_norm": 0.12423887848854065,
+      "learning_rate": 0.00016578438447106367,
+      "loss": 0.1917,
+      "step": 11862
+    },
+    {
+      "epoch": 0.8559471842418558,
+      "grad_norm": 0.11663633584976196,
+      "learning_rate": 0.00016578149805166693,
+      "loss": 0.1608,
+      "step": 11863
+    },
+    {
+      "epoch": 0.8560193369169162,
+      "grad_norm": 0.11846447736024857,
+      "learning_rate": 0.00016577861163227016,
+      "loss": 0.1215,
+      "step": 11864
+    },
+    {
+      "epoch": 0.8560914895919767,
+      "grad_norm": 0.10441578924655914,
+      "learning_rate": 0.00016577572521287343,
+      "loss": 0.1784,
+      "step": 11865
+    },
+    {
+      "epoch": 0.856163642267037,
+      "grad_norm": 0.0960303395986557,
+      "learning_rate": 0.0001657728387934767,
+      "loss": 0.1237,
+      "step": 11866
+    },
+    {
+      "epoch": 0.8562357949420975,
+      "grad_norm": 0.15245820581912994,
+      "learning_rate": 0.00016576995237407995,
+      "loss": 0.1516,
+      "step": 11867
+    },
+    {
+      "epoch": 0.8563079476171579,
+      "grad_norm": 0.13254792988300323,
+      "learning_rate": 0.00016576706595468324,
+      "loss": 0.1164,
+      "step": 11868
+    },
+    {
+      "epoch": 0.8563801002922183,
+      "grad_norm": 0.13312913477420807,
+      "learning_rate": 0.00016576417953528648,
+      "loss": 0.1717,
+      "step": 11869
+    },
+    {
+      "epoch": 0.8564522529672788,
+      "grad_norm": 0.1143551617860794,
+      "learning_rate": 0.00016576129311588974,
+      "loss": 0.1451,
+      "step": 11870
+    },
+    {
+      "epoch": 0.8565244056423392,
+      "grad_norm": 0.11746720969676971,
+      "learning_rate": 0.000165758406696493,
+      "loss": 0.1395,
+      "step": 11871
+    },
+    {
+      "epoch": 0.8565965583173997,
+      "grad_norm": 0.10211291909217834,
+      "learning_rate": 0.00016575552027709627,
+      "loss": 0.1794,
+      "step": 11872
+    },
+    {
+      "epoch": 0.85666871099246,
+      "grad_norm": 0.13879451155662537,
+      "learning_rate": 0.00016575263385769953,
+      "loss": 0.1596,
+      "step": 11873
+    },
+    {
+      "epoch": 0.8567408636675204,
+      "grad_norm": 0.10806053131818771,
+      "learning_rate": 0.0001657497474383028,
+      "loss": 0.186,
+      "step": 11874
+    },
+    {
+      "epoch": 0.8568130163425809,
+      "grad_norm": 0.11560015380382538,
+      "learning_rate": 0.00016574686101890606,
+      "loss": 0.1438,
+      "step": 11875
+    },
+    {
+      "epoch": 0.8568851690176413,
+      "grad_norm": 0.13607026636600494,
+      "learning_rate": 0.00016574397459950932,
+      "loss": 0.1432,
+      "step": 11876
+    },
+    {
+      "epoch": 0.8569573216927018,
+      "grad_norm": 0.11837439984083176,
+      "learning_rate": 0.00016574108818011258,
+      "loss": 0.0987,
+      "step": 11877
+    },
+    {
+      "epoch": 0.8570294743677622,
+      "grad_norm": 0.1011228933930397,
+      "learning_rate": 0.00016573820176071585,
+      "loss": 0.1205,
+      "step": 11878
+    },
+    {
+      "epoch": 0.8571016270428227,
+      "grad_norm": 0.12492066621780396,
+      "learning_rate": 0.0001657353153413191,
+      "loss": 0.1546,
+      "step": 11879
+    },
+    {
+      "epoch": 0.857173779717883,
+      "grad_norm": 0.1269485205411911,
+      "learning_rate": 0.00016573242892192234,
+      "loss": 0.1232,
+      "step": 11880
+    },
+    {
+      "epoch": 0.8572459323929434,
+      "grad_norm": 0.12102842330932617,
+      "learning_rate": 0.0001657295425025256,
+      "loss": 0.1611,
+      "step": 11881
+    },
+    {
+      "epoch": 0.8573180850680039,
+      "grad_norm": 0.1312066614627838,
+      "learning_rate": 0.0001657266560831289,
+      "loss": 0.1085,
+      "step": 11882
+    },
+    {
+      "epoch": 0.8573902377430643,
+      "grad_norm": 0.1548612266778946,
+      "learning_rate": 0.00016572376966373216,
+      "loss": 0.2026,
+      "step": 11883
+    },
+    {
+      "epoch": 0.8574623904181248,
+      "grad_norm": 0.12773779034614563,
+      "learning_rate": 0.00016572088324433542,
+      "loss": 0.1643,
+      "step": 11884
+    },
+    {
+      "epoch": 0.8575345430931852,
+      "grad_norm": 0.1022697240114212,
+      "learning_rate": 0.00016571799682493866,
+      "loss": 0.0982,
+      "step": 11885
+    },
+    {
+      "epoch": 0.8576066957682456,
+      "grad_norm": 0.11860981583595276,
+      "learning_rate": 0.00016571511040554192,
+      "loss": 0.1102,
+      "step": 11886
+    },
+    {
+      "epoch": 0.857678848443306,
+      "grad_norm": 0.11475259810686111,
+      "learning_rate": 0.00016571222398614518,
+      "loss": 0.153,
+      "step": 11887
+    },
+    {
+      "epoch": 0.8577510011183664,
+      "grad_norm": 0.12198229879140854,
+      "learning_rate": 0.00016570933756674845,
+      "loss": 0.1282,
+      "step": 11888
+    },
+    {
+      "epoch": 0.8578231537934269,
+      "grad_norm": 0.10108748078346252,
+      "learning_rate": 0.00016570645114735174,
+      "loss": 0.0983,
+      "step": 11889
+    },
+    {
+      "epoch": 0.8578953064684873,
+      "grad_norm": 0.20072031021118164,
+      "learning_rate": 0.00016570356472795497,
+      "loss": 0.1407,
+      "step": 11890
+    },
+    {
+      "epoch": 0.8579674591435478,
+      "grad_norm": 0.11227798461914062,
+      "learning_rate": 0.00016570067830855824,
+      "loss": 0.1458,
+      "step": 11891
+    },
+    {
+      "epoch": 0.8580396118186082,
+      "grad_norm": 0.13460202515125275,
+      "learning_rate": 0.0001656977918891615,
+      "loss": 0.1292,
+      "step": 11892
+    },
+    {
+      "epoch": 0.8581117644936685,
+      "grad_norm": 0.12463308125734329,
+      "learning_rate": 0.00016569490546976476,
+      "loss": 0.0953,
+      "step": 11893
+    },
+    {
+      "epoch": 0.858183917168729,
+      "grad_norm": 0.14697812497615814,
+      "learning_rate": 0.00016569201905036803,
+      "loss": 0.1126,
+      "step": 11894
+    },
+    {
+      "epoch": 0.8582560698437894,
+      "grad_norm": 0.14126932621002197,
+      "learning_rate": 0.0001656891326309713,
+      "loss": 0.1499,
+      "step": 11895
+    },
+    {
+      "epoch": 0.8583282225188499,
+      "grad_norm": 0.11501924693584442,
+      "learning_rate": 0.00016568624621157455,
+      "loss": 0.1156,
+      "step": 11896
+    },
+    {
+      "epoch": 0.8584003751939103,
+      "grad_norm": 0.10844486206769943,
+      "learning_rate": 0.00016568335979217781,
+      "loss": 0.1566,
+      "step": 11897
+    },
+    {
+      "epoch": 0.8584725278689708,
+      "grad_norm": 0.11743530631065369,
+      "learning_rate": 0.00016568047337278108,
+      "loss": 0.1794,
+      "step": 11898
+    },
+    {
+      "epoch": 0.8585446805440312,
+      "grad_norm": 0.1236339583992958,
+      "learning_rate": 0.00016567758695338434,
+      "loss": 0.1827,
+      "step": 11899
+    },
+    {
+      "epoch": 0.8586168332190915,
+      "grad_norm": 0.10101400315761566,
+      "learning_rate": 0.0001656747005339876,
+      "loss": 0.1801,
+      "step": 11900
+    },
+    {
+      "epoch": 0.858688985894152,
+      "grad_norm": 0.12601572275161743,
+      "learning_rate": 0.00016567181411459084,
+      "loss": 0.1398,
+      "step": 11901
+    },
+    {
+      "epoch": 0.8587611385692124,
+      "grad_norm": 0.10722434520721436,
+      "learning_rate": 0.0001656689276951941,
+      "loss": 0.132,
+      "step": 11902
+    },
+    {
+      "epoch": 0.8588332912442729,
+      "grad_norm": 0.12820284068584442,
+      "learning_rate": 0.0001656660412757974,
+      "loss": 0.1443,
+      "step": 11903
+    },
+    {
+      "epoch": 0.8589054439193333,
+      "grad_norm": 0.11345351487398148,
+      "learning_rate": 0.00016566315485640065,
+      "loss": 0.1394,
+      "step": 11904
+    },
+    {
+      "epoch": 0.8589775965943938,
+      "grad_norm": 0.13568373024463654,
+      "learning_rate": 0.00016566026843700392,
+      "loss": 0.1392,
+      "step": 11905
+    },
+    {
+      "epoch": 0.8590497492694542,
+      "grad_norm": 0.1328319013118744,
+      "learning_rate": 0.00016565738201760715,
+      "loss": 0.1457,
+      "step": 11906
+    },
+    {
+      "epoch": 0.8591219019445145,
+      "grad_norm": 0.10983971506357193,
+      "learning_rate": 0.00016565449559821042,
+      "loss": 0.1057,
+      "step": 11907
+    },
+    {
+      "epoch": 0.859194054619575,
+      "grad_norm": 0.12285325676202774,
+      "learning_rate": 0.00016565160917881368,
+      "loss": 0.2251,
+      "step": 11908
+    },
+    {
+      "epoch": 0.8592662072946354,
+      "grad_norm": 0.13944754004478455,
+      "learning_rate": 0.00016564872275941694,
+      "loss": 0.1366,
+      "step": 11909
+    },
+    {
+      "epoch": 0.8593383599696959,
+      "grad_norm": 0.11536892503499985,
+      "learning_rate": 0.00016564583634002023,
+      "loss": 0.1483,
+      "step": 11910
+    },
+    {
+      "epoch": 0.8594105126447563,
+      "grad_norm": 0.1251530647277832,
+      "learning_rate": 0.00016564294992062347,
+      "loss": 0.1474,
+      "step": 11911
+    },
+    {
+      "epoch": 0.8594826653198168,
+      "grad_norm": 0.11108506470918655,
+      "learning_rate": 0.00016564006350122673,
+      "loss": 0.1627,
+      "step": 11912
+    },
+    {
+      "epoch": 0.8595548179948772,
+      "grad_norm": 0.12315988540649414,
+      "learning_rate": 0.00016563717708183,
+      "loss": 0.1291,
+      "step": 11913
+    },
+    {
+      "epoch": 0.8596269706699375,
+      "grad_norm": 0.1117662563920021,
+      "learning_rate": 0.00016563429066243326,
+      "loss": 0.172,
+      "step": 11914
+    },
+    {
+      "epoch": 0.859699123344998,
+      "grad_norm": 0.10717906057834625,
+      "learning_rate": 0.00016563140424303652,
+      "loss": 0.1616,
+      "step": 11915
+    },
+    {
+      "epoch": 0.8597712760200584,
+      "grad_norm": 0.11858413368463516,
+      "learning_rate": 0.00016562851782363978,
+      "loss": 0.196,
+      "step": 11916
+    },
+    {
+      "epoch": 0.8598434286951189,
+      "grad_norm": 0.10697498917579651,
+      "learning_rate": 0.00016562563140424305,
+      "loss": 0.1471,
+      "step": 11917
+    },
+    {
+      "epoch": 0.8599155813701793,
+      "grad_norm": 0.11936324089765549,
+      "learning_rate": 0.0001656227449848463,
+      "loss": 0.1531,
+      "step": 11918
+    },
+    {
+      "epoch": 0.8599877340452398,
+      "grad_norm": 0.16321326792240143,
+      "learning_rate": 0.00016561985856544957,
+      "loss": 0.1923,
+      "step": 11919
+    },
+    {
+      "epoch": 0.8600598867203002,
+      "grad_norm": 0.1054329127073288,
+      "learning_rate": 0.00016561697214605283,
+      "loss": 0.1986,
+      "step": 11920
+    },
+    {
+      "epoch": 0.8601320393953605,
+      "grad_norm": 0.1419740617275238,
+      "learning_rate": 0.0001656140857266561,
+      "loss": 0.1601,
+      "step": 11921
+    },
+    {
+      "epoch": 0.860204192070421,
+      "grad_norm": 0.11228160560131073,
+      "learning_rate": 0.00016561119930725933,
+      "loss": 0.1515,
+      "step": 11922
+    },
+    {
+      "epoch": 0.8602763447454814,
+      "grad_norm": 0.10666772723197937,
+      "learning_rate": 0.0001656083128878626,
+      "loss": 0.1176,
+      "step": 11923
+    },
+    {
+      "epoch": 0.8603484974205419,
+      "grad_norm": 0.145542711019516,
+      "learning_rate": 0.00016560542646846589,
+      "loss": 0.1825,
+      "step": 11924
+    },
+    {
+      "epoch": 0.8604206500956023,
+      "grad_norm": 0.1109357476234436,
+      "learning_rate": 0.00016560254004906915,
+      "loss": 0.1566,
+      "step": 11925
+    },
+    {
+      "epoch": 0.8604928027706628,
+      "grad_norm": 0.13261841237545013,
+      "learning_rate": 0.0001655996536296724,
+      "loss": 0.1368,
+      "step": 11926
+    },
+    {
+      "epoch": 0.8605649554457232,
+      "grad_norm": 0.18109826743602753,
+      "learning_rate": 0.00016559676721027565,
+      "loss": 0.1794,
+      "step": 11927
+    },
+    {
+      "epoch": 0.8606371081207835,
+      "grad_norm": 0.12986978888511658,
+      "learning_rate": 0.0001655938807908789,
+      "loss": 0.1721,
+      "step": 11928
+    },
+    {
+      "epoch": 0.860709260795844,
+      "grad_norm": 0.14286606013774872,
+      "learning_rate": 0.00016559099437148217,
+      "loss": 0.1783,
+      "step": 11929
+    },
+    {
+      "epoch": 0.8607814134709044,
+      "grad_norm": 0.13024945557117462,
+      "learning_rate": 0.00016558810795208544,
+      "loss": 0.1656,
+      "step": 11930
+    },
+    {
+      "epoch": 0.8608535661459649,
+      "grad_norm": 0.1351431906223297,
+      "learning_rate": 0.00016558522153268873,
+      "loss": 0.1338,
+      "step": 11931
+    },
+    {
+      "epoch": 0.8609257188210253,
+      "grad_norm": 0.14224115014076233,
+      "learning_rate": 0.00016558233511329196,
+      "loss": 0.183,
+      "step": 11932
+    },
+    {
+      "epoch": 0.8609978714960858,
+      "grad_norm": 0.1591176688671112,
+      "learning_rate": 0.00016557944869389523,
+      "loss": 0.1574,
+      "step": 11933
+    },
+    {
+      "epoch": 0.8610700241711462,
+      "grad_norm": 0.13040365278720856,
+      "learning_rate": 0.0001655765622744985,
+      "loss": 0.1652,
+      "step": 11934
+    },
+    {
+      "epoch": 0.8611421768462065,
+      "grad_norm": 0.13435451686382294,
+      "learning_rate": 0.00016557367585510175,
+      "loss": 0.1408,
+      "step": 11935
+    },
+    {
+      "epoch": 0.861214329521267,
+      "grad_norm": 0.1458059549331665,
+      "learning_rate": 0.00016557078943570501,
+      "loss": 0.1505,
+      "step": 11936
+    },
+    {
+      "epoch": 0.8612864821963274,
+      "grad_norm": 0.11824122816324234,
+      "learning_rate": 0.00016556790301630828,
+      "loss": 0.1792,
+      "step": 11937
+    },
+    {
+      "epoch": 0.8613586348713879,
+      "grad_norm": 0.11797327548265457,
+      "learning_rate": 0.00016556501659691154,
+      "loss": 0.1492,
+      "step": 11938
+    },
+    {
+      "epoch": 0.8614307875464483,
+      "grad_norm": 0.164879709482193,
+      "learning_rate": 0.0001655621301775148,
+      "loss": 0.1701,
+      "step": 11939
+    },
+    {
+      "epoch": 0.8615029402215088,
+      "grad_norm": 0.13124053180217743,
+      "learning_rate": 0.00016555924375811807,
+      "loss": 0.1505,
+      "step": 11940
+    },
+    {
+      "epoch": 0.8615750928965692,
+      "grad_norm": 0.11650021374225616,
+      "learning_rate": 0.00016555635733872133,
+      "loss": 0.1222,
+      "step": 11941
+    },
+    {
+      "epoch": 0.8616472455716295,
+      "grad_norm": 0.11756842583417892,
+      "learning_rate": 0.0001655534709193246,
+      "loss": 0.1936,
+      "step": 11942
+    },
+    {
+      "epoch": 0.86171939824669,
+      "grad_norm": 0.12020300328731537,
+      "learning_rate": 0.00016555058449992785,
+      "loss": 0.1209,
+      "step": 11943
+    },
+    {
+      "epoch": 0.8617915509217504,
+      "grad_norm": 0.10335661470890045,
+      "learning_rate": 0.0001655476980805311,
+      "loss": 0.1269,
+      "step": 11944
+    },
+    {
+      "epoch": 0.8618637035968109,
+      "grad_norm": 0.10224736481904984,
+      "learning_rate": 0.00016554481166113438,
+      "loss": 0.1017,
+      "step": 11945
+    },
+    {
+      "epoch": 0.8619358562718713,
+      "grad_norm": 0.16791343688964844,
+      "learning_rate": 0.00016554192524173764,
+      "loss": 0.1442,
+      "step": 11946
+    },
+    {
+      "epoch": 0.8620080089469317,
+      "grad_norm": 0.12022384256124496,
+      "learning_rate": 0.0001655390388223409,
+      "loss": 0.1631,
+      "step": 11947
+    },
+    {
+      "epoch": 0.8620801616219921,
+      "grad_norm": 0.11751755326986313,
+      "learning_rate": 0.00016553615240294417,
+      "loss": 0.1516,
+      "step": 11948
+    },
+    {
+      "epoch": 0.8621523142970525,
+      "grad_norm": 0.11795671284198761,
+      "learning_rate": 0.0001655332659835474,
+      "loss": 0.1598,
+      "step": 11949
+    },
+    {
+      "epoch": 0.862224466972113,
+      "grad_norm": 0.11847096681594849,
+      "learning_rate": 0.00016553037956415067,
+      "loss": 0.1389,
+      "step": 11950
+    },
+    {
+      "epoch": 0.8622966196471734,
+      "grad_norm": 0.13842691481113434,
+      "learning_rate": 0.00016552749314475393,
+      "loss": 0.1496,
+      "step": 11951
+    },
+    {
+      "epoch": 0.8623687723222339,
+      "grad_norm": 0.1378527283668518,
+      "learning_rate": 0.00016552460672535722,
+      "loss": 0.1888,
+      "step": 11952
+    },
+    {
+      "epoch": 0.8624409249972943,
+      "grad_norm": 0.11756579577922821,
+      "learning_rate": 0.00016552172030596048,
+      "loss": 0.1009,
+      "step": 11953
+    },
+    {
+      "epoch": 0.8625130776723547,
+      "grad_norm": 0.12039361894130707,
+      "learning_rate": 0.00016551883388656372,
+      "loss": 0.2594,
+      "step": 11954
+    },
+    {
+      "epoch": 0.8625852303474151,
+      "grad_norm": 0.10809768736362457,
+      "learning_rate": 0.00016551594746716698,
+      "loss": 0.1223,
+      "step": 11955
+    },
+    {
+      "epoch": 0.8626573830224755,
+      "grad_norm": 0.11799143254756927,
+      "learning_rate": 0.00016551306104777025,
+      "loss": 0.1588,
+      "step": 11956
+    },
+    {
+      "epoch": 0.862729535697536,
+      "grad_norm": 0.11549495905637741,
+      "learning_rate": 0.0001655101746283735,
+      "loss": 0.1738,
+      "step": 11957
+    },
+    {
+      "epoch": 0.8628016883725964,
+      "grad_norm": 0.13709287345409393,
+      "learning_rate": 0.00016550728820897677,
+      "loss": 0.1597,
+      "step": 11958
+    },
+    {
+      "epoch": 0.8628738410476569,
+      "grad_norm": 0.11733334511518478,
+      "learning_rate": 0.00016550440178958003,
+      "loss": 0.1439,
+      "step": 11959
+    },
+    {
+      "epoch": 0.8629459937227173,
+      "grad_norm": 0.1170649528503418,
+      "learning_rate": 0.0001655015153701833,
+      "loss": 0.1483,
+      "step": 11960
+    },
+    {
+      "epoch": 0.8630181463977777,
+      "grad_norm": 0.11585357040166855,
+      "learning_rate": 0.00016549862895078656,
+      "loss": 0.133,
+      "step": 11961
+    },
+    {
+      "epoch": 0.8630902990728381,
+      "grad_norm": 0.12861251831054688,
+      "learning_rate": 0.00016549574253138982,
+      "loss": 0.1677,
+      "step": 11962
+    },
+    {
+      "epoch": 0.8631624517478985,
+      "grad_norm": 0.14745929837226868,
+      "learning_rate": 0.00016549285611199309,
+      "loss": 0.1699,
+      "step": 11963
+    },
+    {
+      "epoch": 0.863234604422959,
+      "grad_norm": 0.11474446207284927,
+      "learning_rate": 0.00016548996969259635,
+      "loss": 0.1778,
+      "step": 11964
+    },
+    {
+      "epoch": 0.8633067570980194,
+      "grad_norm": 0.1322767734527588,
+      "learning_rate": 0.00016548708327319958,
+      "loss": 0.1542,
+      "step": 11965
+    },
+    {
+      "epoch": 0.8633789097730799,
+      "grad_norm": 0.1421336829662323,
+      "learning_rate": 0.00016548419685380287,
+      "loss": 0.1424,
+      "step": 11966
+    },
+    {
+      "epoch": 0.8634510624481403,
+      "grad_norm": 0.13709428906440735,
+      "learning_rate": 0.00016548131043440614,
+      "loss": 0.1552,
+      "step": 11967
+    },
+    {
+      "epoch": 0.8635232151232007,
+      "grad_norm": 0.13968567550182343,
+      "learning_rate": 0.0001654784240150094,
+      "loss": 0.101,
+      "step": 11968
+    },
+    {
+      "epoch": 0.8635953677982611,
+      "grad_norm": 0.1329333335161209,
+      "learning_rate": 0.00016547553759561266,
+      "loss": 0.1844,
+      "step": 11969
+    },
+    {
+      "epoch": 0.8636675204733215,
+      "grad_norm": 0.12988804280757904,
+      "learning_rate": 0.0001654726511762159,
+      "loss": 0.1629,
+      "step": 11970
+    },
+    {
+      "epoch": 0.863739673148382,
+      "grad_norm": 0.18330612778663635,
+      "learning_rate": 0.00016546976475681916,
+      "loss": 0.144,
+      "step": 11971
+    },
+    {
+      "epoch": 0.8638118258234424,
+      "grad_norm": 0.13274584710597992,
+      "learning_rate": 0.00016546687833742242,
+      "loss": 0.1395,
+      "step": 11972
+    },
+    {
+      "epoch": 0.8638839784985028,
+      "grad_norm": 0.12589463591575623,
+      "learning_rate": 0.00016546399191802571,
+      "loss": 0.165,
+      "step": 11973
+    },
+    {
+      "epoch": 0.8639561311735633,
+      "grad_norm": 0.15424275398254395,
+      "learning_rate": 0.00016546110549862898,
+      "loss": 0.2213,
+      "step": 11974
+    },
+    {
+      "epoch": 0.8640282838486237,
+      "grad_norm": 0.14214026927947998,
+      "learning_rate": 0.00016545821907923221,
+      "loss": 0.1617,
+      "step": 11975
+    },
+    {
+      "epoch": 0.8641004365236841,
+      "grad_norm": 0.12770618498325348,
+      "learning_rate": 0.00016545533265983548,
+      "loss": 0.1558,
+      "step": 11976
+    },
+    {
+      "epoch": 0.8641725891987445,
+      "grad_norm": 0.1013932079076767,
+      "learning_rate": 0.00016545244624043874,
+      "loss": 0.1326,
+      "step": 11977
+    },
+    {
+      "epoch": 0.864244741873805,
+      "grad_norm": 0.1254311501979828,
+      "learning_rate": 0.000165449559821042,
+      "loss": 0.1413,
+      "step": 11978
+    },
+    {
+      "epoch": 0.8643168945488654,
+      "grad_norm": 0.11821615695953369,
+      "learning_rate": 0.00016544667340164527,
+      "loss": 0.1184,
+      "step": 11979
+    },
+    {
+      "epoch": 0.8643890472239258,
+      "grad_norm": 0.11322508752346039,
+      "learning_rate": 0.00016544378698224853,
+      "loss": 0.1394,
+      "step": 11980
+    },
+    {
+      "epoch": 0.8644611998989863,
+      "grad_norm": 0.1489751785993576,
+      "learning_rate": 0.0001654409005628518,
+      "loss": 0.1476,
+      "step": 11981
+    },
+    {
+      "epoch": 0.8645333525740467,
+      "grad_norm": 0.12493439763784409,
+      "learning_rate": 0.00016543801414345505,
+      "loss": 0.1311,
+      "step": 11982
+    },
+    {
+      "epoch": 0.8646055052491071,
+      "grad_norm": 0.12061279267072678,
+      "learning_rate": 0.00016543512772405832,
+      "loss": 0.1578,
+      "step": 11983
+    },
+    {
+      "epoch": 0.8646776579241675,
+      "grad_norm": 0.12649033963680267,
+      "learning_rate": 0.00016543224130466158,
+      "loss": 0.1538,
+      "step": 11984
+    },
+    {
+      "epoch": 0.864749810599228,
+      "grad_norm": 0.12275785952806473,
+      "learning_rate": 0.00016542935488526484,
+      "loss": 0.1375,
+      "step": 11985
+    },
+    {
+      "epoch": 0.8648219632742884,
+      "grad_norm": 0.12534111738204956,
+      "learning_rate": 0.00016542646846586808,
+      "loss": 0.1792,
+      "step": 11986
+    },
+    {
+      "epoch": 0.8648941159493488,
+      "grad_norm": 0.1655583679676056,
+      "learning_rate": 0.00016542358204647137,
+      "loss": 0.1193,
+      "step": 11987
+    },
+    {
+      "epoch": 0.8649662686244093,
+      "grad_norm": 0.13253222405910492,
+      "learning_rate": 0.00016542069562707463,
+      "loss": 0.1462,
+      "step": 11988
+    },
+    {
+      "epoch": 0.8650384212994697,
+      "grad_norm": 0.13312864303588867,
+      "learning_rate": 0.0001654178092076779,
+      "loss": 0.135,
+      "step": 11989
+    },
+    {
+      "epoch": 0.8651105739745301,
+      "grad_norm": 0.09539494663476944,
+      "learning_rate": 0.00016541492278828116,
+      "loss": 0.1916,
+      "step": 11990
+    },
+    {
+      "epoch": 0.8651827266495905,
+      "grad_norm": 0.11006974428892136,
+      "learning_rate": 0.0001654120363688844,
+      "loss": 0.1785,
+      "step": 11991
+    },
+    {
+      "epoch": 0.865254879324651,
+      "grad_norm": 0.11187838017940521,
+      "learning_rate": 0.00016540914994948766,
+      "loss": 0.1586,
+      "step": 11992
+    },
+    {
+      "epoch": 0.8653270319997114,
+      "grad_norm": 0.10977104306221008,
+      "learning_rate": 0.00016540626353009092,
+      "loss": 0.1669,
+      "step": 11993
+    },
+    {
+      "epoch": 0.8653991846747718,
+      "grad_norm": 0.10701952129602432,
+      "learning_rate": 0.0001654033771106942,
+      "loss": 0.1144,
+      "step": 11994
+    },
+    {
+      "epoch": 0.8654713373498323,
+      "grad_norm": 0.11350420862436295,
+      "learning_rate": 0.00016540049069129747,
+      "loss": 0.1286,
+      "step": 11995
+    },
+    {
+      "epoch": 0.8655434900248927,
+      "grad_norm": 0.11541275680065155,
+      "learning_rate": 0.0001653976042719007,
+      "loss": 0.1623,
+      "step": 11996
+    },
+    {
+      "epoch": 0.8656156426999531,
+      "grad_norm": 0.10224005579948425,
+      "learning_rate": 0.00016539471785250397,
+      "loss": 0.0958,
+      "step": 11997
+    },
+    {
+      "epoch": 0.8656877953750135,
+      "grad_norm": 0.1323241889476776,
+      "learning_rate": 0.00016539183143310723,
+      "loss": 0.1257,
+      "step": 11998
+    },
+    {
+      "epoch": 0.865759948050074,
+      "grad_norm": 0.14228209853172302,
+      "learning_rate": 0.0001653889450137105,
+      "loss": 0.145,
+      "step": 11999
+    },
+    {
+      "epoch": 0.8658321007251344,
+      "grad_norm": 0.14026106894016266,
+      "learning_rate": 0.00016538605859431376,
+      "loss": 0.1575,
+      "step": 12000
+    },
+    {
+      "epoch": 0.8659042534001948,
+      "grad_norm": 0.14158804714679718,
+      "learning_rate": 0.00016538317217491702,
+      "loss": 0.2045,
+      "step": 12001
+    },
+    {
+      "epoch": 0.8659764060752553,
+      "grad_norm": 0.1218271553516388,
+      "learning_rate": 0.00016538028575552029,
+      "loss": 0.1815,
+      "step": 12002
+    },
+    {
+      "epoch": 0.8660485587503157,
+      "grad_norm": 0.13607192039489746,
+      "learning_rate": 0.00016537739933612355,
+      "loss": 0.1223,
+      "step": 12003
+    },
+    {
+      "epoch": 0.866120711425376,
+      "grad_norm": 0.11378008872270584,
+      "learning_rate": 0.0001653745129167268,
+      "loss": 0.1253,
+      "step": 12004
+    },
+    {
+      "epoch": 0.8661928641004365,
+      "grad_norm": 0.12672148644924164,
+      "learning_rate": 0.00016537162649733007,
+      "loss": 0.1702,
+      "step": 12005
+    },
+    {
+      "epoch": 0.8662650167754969,
+      "grad_norm": 0.1406373679637909,
+      "learning_rate": 0.00016536874007793334,
+      "loss": 0.1351,
+      "step": 12006
+    },
+    {
+      "epoch": 0.8663371694505574,
+      "grad_norm": 0.16386227309703827,
+      "learning_rate": 0.00016536585365853657,
+      "loss": 0.1901,
+      "step": 12007
+    },
+    {
+      "epoch": 0.8664093221256178,
+      "grad_norm": 0.12637802958488464,
+      "learning_rate": 0.00016536296723913986,
+      "loss": 0.1616,
+      "step": 12008
+    },
+    {
+      "epoch": 0.8664814748006783,
+      "grad_norm": 0.11758668720722198,
+      "learning_rate": 0.00016536008081974313,
+      "loss": 0.151,
+      "step": 12009
+    },
+    {
+      "epoch": 0.8665536274757386,
+      "grad_norm": 0.13361458480358124,
+      "learning_rate": 0.0001653571944003464,
+      "loss": 0.1844,
+      "step": 12010
+    },
+    {
+      "epoch": 0.866625780150799,
+      "grad_norm": 0.14450719952583313,
+      "learning_rate": 0.00016535430798094965,
+      "loss": 0.161,
+      "step": 12011
+    },
+    {
+      "epoch": 0.8666979328258595,
+      "grad_norm": 0.12745462357997894,
+      "learning_rate": 0.0001653514215615529,
+      "loss": 0.166,
+      "step": 12012
+    },
+    {
+      "epoch": 0.8667700855009199,
+      "grad_norm": 0.13964685797691345,
+      "learning_rate": 0.00016534853514215615,
+      "loss": 0.1315,
+      "step": 12013
+    },
+    {
+      "epoch": 0.8668422381759804,
+      "grad_norm": 0.13584551215171814,
+      "learning_rate": 0.0001653456487227594,
+      "loss": 0.2064,
+      "step": 12014
+    },
+    {
+      "epoch": 0.8669143908510408,
+      "grad_norm": 0.14645154774188995,
+      "learning_rate": 0.0001653427623033627,
+      "loss": 0.1317,
+      "step": 12015
+    },
+    {
+      "epoch": 0.8669865435261013,
+      "grad_norm": 0.12439603358507156,
+      "learning_rate": 0.00016533987588396597,
+      "loss": 0.1519,
+      "step": 12016
+    },
+    {
+      "epoch": 0.8670586962011616,
+      "grad_norm": 0.1141846626996994,
+      "learning_rate": 0.0001653369894645692,
+      "loss": 0.1704,
+      "step": 12017
+    },
+    {
+      "epoch": 0.867130848876222,
+      "grad_norm": 0.1145968809723854,
+      "learning_rate": 0.00016533410304517247,
+      "loss": 0.1426,
+      "step": 12018
+    },
+    {
+      "epoch": 0.8672030015512825,
+      "grad_norm": 0.12812818586826324,
+      "learning_rate": 0.00016533121662577573,
+      "loss": 0.1639,
+      "step": 12019
+    },
+    {
+      "epoch": 0.8672751542263429,
+      "grad_norm": 0.12737447023391724,
+      "learning_rate": 0.000165328330206379,
+      "loss": 0.1526,
+      "step": 12020
+    },
+    {
+      "epoch": 0.8673473069014034,
+      "grad_norm": 0.11546266824007034,
+      "learning_rate": 0.00016532544378698225,
+      "loss": 0.1362,
+      "step": 12021
+    },
+    {
+      "epoch": 0.8674194595764638,
+      "grad_norm": 0.14184580743312836,
+      "learning_rate": 0.00016532255736758552,
+      "loss": 0.1934,
+      "step": 12022
+    },
+    {
+      "epoch": 0.8674916122515243,
+      "grad_norm": 0.10528424382209778,
+      "learning_rate": 0.00016531967094818878,
+      "loss": 0.1609,
+      "step": 12023
+    },
+    {
+      "epoch": 0.8675637649265846,
+      "grad_norm": 0.15018628537654877,
+      "learning_rate": 0.00016531678452879204,
+      "loss": 0.1719,
+      "step": 12024
+    },
+    {
+      "epoch": 0.867635917601645,
+      "grad_norm": 0.13803143799304962,
+      "learning_rate": 0.0001653138981093953,
+      "loss": 0.0904,
+      "step": 12025
+    },
+    {
+      "epoch": 0.8677080702767055,
+      "grad_norm": 0.20398986339569092,
+      "learning_rate": 0.00016531101168999857,
+      "loss": 0.1759,
+      "step": 12026
+    },
+    {
+      "epoch": 0.8677802229517659,
+      "grad_norm": 0.14385467767715454,
+      "learning_rate": 0.00016530812527060183,
+      "loss": 0.1553,
+      "step": 12027
+    },
+    {
+      "epoch": 0.8678523756268264,
+      "grad_norm": 0.10514909774065018,
+      "learning_rate": 0.00016530523885120507,
+      "loss": 0.1138,
+      "step": 12028
+    },
+    {
+      "epoch": 0.8679245283018868,
+      "grad_norm": 0.11758711189031601,
+      "learning_rate": 0.00016530235243180836,
+      "loss": 0.1221,
+      "step": 12029
+    },
+    {
+      "epoch": 0.8679966809769473,
+      "grad_norm": 0.10472291707992554,
+      "learning_rate": 0.00016529946601241162,
+      "loss": 0.1683,
+      "step": 12030
+    },
+    {
+      "epoch": 0.8680688336520076,
+      "grad_norm": 0.11128973215818405,
+      "learning_rate": 0.00016529657959301488,
+      "loss": 0.1502,
+      "step": 12031
+    },
+    {
+      "epoch": 0.868140986327068,
+      "grad_norm": 0.12483924627304077,
+      "learning_rate": 0.00016529369317361815,
+      "loss": 0.1499,
+      "step": 12032
+    },
+    {
+      "epoch": 0.8682131390021285,
+      "grad_norm": 0.12955377995967865,
+      "learning_rate": 0.00016529080675422138,
+      "loss": 0.1212,
+      "step": 12033
+    },
+    {
+      "epoch": 0.8682852916771889,
+      "grad_norm": 0.13617177307605743,
+      "learning_rate": 0.00016528792033482464,
+      "loss": 0.1881,
+      "step": 12034
+    },
+    {
+      "epoch": 0.8683574443522494,
+      "grad_norm": 0.136407732963562,
+      "learning_rate": 0.0001652850339154279,
+      "loss": 0.1583,
+      "step": 12035
+    },
+    {
+      "epoch": 0.8684295970273098,
+      "grad_norm": 0.12334705889225006,
+      "learning_rate": 0.0001652821474960312,
+      "loss": 0.1374,
+      "step": 12036
+    },
+    {
+      "epoch": 0.8685017497023703,
+      "grad_norm": 0.13375817239284515,
+      "learning_rate": 0.00016527926107663446,
+      "loss": 0.1291,
+      "step": 12037
+    },
+    {
+      "epoch": 0.8685739023774306,
+      "grad_norm": 0.1542121320962906,
+      "learning_rate": 0.0001652763746572377,
+      "loss": 0.1469,
+      "step": 12038
+    },
+    {
+      "epoch": 0.868646055052491,
+      "grad_norm": 0.12267142534255981,
+      "learning_rate": 0.00016527348823784096,
+      "loss": 0.1082,
+      "step": 12039
+    },
+    {
+      "epoch": 0.8687182077275515,
+      "grad_norm": 0.12496042251586914,
+      "learning_rate": 0.00016527060181844422,
+      "loss": 0.1775,
+      "step": 12040
+    },
+    {
+      "epoch": 0.8687903604026119,
+      "grad_norm": 0.12290967255830765,
+      "learning_rate": 0.00016526771539904749,
+      "loss": 0.1664,
+      "step": 12041
+    },
+    {
+      "epoch": 0.8688625130776724,
+      "grad_norm": 0.10611739009618759,
+      "learning_rate": 0.00016526482897965075,
+      "loss": 0.1673,
+      "step": 12042
+    },
+    {
+      "epoch": 0.8689346657527328,
+      "grad_norm": 0.13516947627067566,
+      "learning_rate": 0.000165261942560254,
+      "loss": 0.1949,
+      "step": 12043
+    },
+    {
+      "epoch": 0.8690068184277933,
+      "grad_norm": 0.11694905161857605,
+      "learning_rate": 0.00016525905614085727,
+      "loss": 0.1935,
+      "step": 12044
+    },
+    {
+      "epoch": 0.8690789711028536,
+      "grad_norm": 0.1327820122241974,
+      "learning_rate": 0.00016525616972146054,
+      "loss": 0.1673,
+      "step": 12045
+    },
+    {
+      "epoch": 0.869151123777914,
+      "grad_norm": 0.1115252748131752,
+      "learning_rate": 0.0001652532833020638,
+      "loss": 0.1141,
+      "step": 12046
+    },
+    {
+      "epoch": 0.8692232764529745,
+      "grad_norm": 0.11399739235639572,
+      "learning_rate": 0.00016525039688266706,
+      "loss": 0.1891,
+      "step": 12047
+    },
+    {
+      "epoch": 0.8692954291280349,
+      "grad_norm": 0.14533619582653046,
+      "learning_rate": 0.00016524751046327033,
+      "loss": 0.1543,
+      "step": 12048
+    },
+    {
+      "epoch": 0.8693675818030954,
+      "grad_norm": 0.13893792033195496,
+      "learning_rate": 0.00016524462404387356,
+      "loss": 0.123,
+      "step": 12049
+    },
+    {
+      "epoch": 0.8694397344781558,
+      "grad_norm": 0.11470438539981842,
+      "learning_rate": 0.00016524173762447682,
+      "loss": 0.2039,
+      "step": 12050
+    },
+    {
+      "epoch": 0.8695118871532163,
+      "grad_norm": 0.14197883009910583,
+      "learning_rate": 0.00016523885120508011,
+      "loss": 0.1669,
+      "step": 12051
+    },
+    {
+      "epoch": 0.8695840398282766,
+      "grad_norm": 0.11966146528720856,
+      "learning_rate": 0.00016523596478568338,
+      "loss": 0.1399,
+      "step": 12052
+    },
+    {
+      "epoch": 0.869656192503337,
+      "grad_norm": 0.15418173372745514,
+      "learning_rate": 0.00016523307836628664,
+      "loss": 0.1202,
+      "step": 12053
+    },
+    {
+      "epoch": 0.8697283451783975,
+      "grad_norm": 0.13532495498657227,
+      "learning_rate": 0.0001652301919468899,
+      "loss": 0.1673,
+      "step": 12054
+    },
+    {
+      "epoch": 0.8698004978534579,
+      "grad_norm": 0.13908350467681885,
+      "learning_rate": 0.00016522730552749314,
+      "loss": 0.131,
+      "step": 12055
+    },
+    {
+      "epoch": 0.8698726505285184,
+      "grad_norm": 0.150166854262352,
+      "learning_rate": 0.0001652244191080964,
+      "loss": 0.1332,
+      "step": 12056
+    },
+    {
+      "epoch": 0.8699448032035788,
+      "grad_norm": 0.13706979155540466,
+      "learning_rate": 0.00016522153268869966,
+      "loss": 0.1606,
+      "step": 12057
+    },
+    {
+      "epoch": 0.8700169558786393,
+      "grad_norm": 0.13957710564136505,
+      "learning_rate": 0.00016521864626930295,
+      "loss": 0.1066,
+      "step": 12058
+    },
+    {
+      "epoch": 0.8700891085536996,
+      "grad_norm": 0.12413793802261353,
+      "learning_rate": 0.00016521575984990622,
+      "loss": 0.1668,
+      "step": 12059
+    },
+    {
+      "epoch": 0.87016126122876,
+      "grad_norm": 0.13130804896354675,
+      "learning_rate": 0.00016521287343050945,
+      "loss": 0.1344,
+      "step": 12060
+    },
+    {
+      "epoch": 0.8702334139038205,
+      "grad_norm": 0.11016803979873657,
+      "learning_rate": 0.00016520998701111272,
+      "loss": 0.163,
+      "step": 12061
+    },
+    {
+      "epoch": 0.8703055665788809,
+      "grad_norm": 0.1339435875415802,
+      "learning_rate": 0.00016520710059171598,
+      "loss": 0.0847,
+      "step": 12062
+    },
+    {
+      "epoch": 0.8703777192539414,
+      "grad_norm": 0.13038188219070435,
+      "learning_rate": 0.00016520421417231924,
+      "loss": 0.1442,
+      "step": 12063
+    },
+    {
+      "epoch": 0.8704498719290018,
+      "grad_norm": 0.16010652482509613,
+      "learning_rate": 0.0001652013277529225,
+      "loss": 0.1278,
+      "step": 12064
+    },
+    {
+      "epoch": 0.8705220246040623,
+      "grad_norm": 0.11903893947601318,
+      "learning_rate": 0.00016519844133352577,
+      "loss": 0.1064,
+      "step": 12065
+    },
+    {
+      "epoch": 0.8705941772791226,
+      "grad_norm": 0.10911662876605988,
+      "learning_rate": 0.00016519555491412903,
+      "loss": 0.1323,
+      "step": 12066
+    },
+    {
+      "epoch": 0.870666329954183,
+      "grad_norm": 0.12734061479568481,
+      "learning_rate": 0.0001651926684947323,
+      "loss": 0.1139,
+      "step": 12067
+    },
+    {
+      "epoch": 0.8707384826292435,
+      "grad_norm": 0.1067054495215416,
+      "learning_rate": 0.00016518978207533556,
+      "loss": 0.1328,
+      "step": 12068
+    },
+    {
+      "epoch": 0.8708106353043039,
+      "grad_norm": 0.11850341409444809,
+      "learning_rate": 0.00016518689565593882,
+      "loss": 0.1604,
+      "step": 12069
+    },
+    {
+      "epoch": 0.8708827879793644,
+      "grad_norm": 0.13869835436344147,
+      "learning_rate": 0.00016518400923654208,
+      "loss": 0.1415,
+      "step": 12070
+    },
+    {
+      "epoch": 0.8709549406544248,
+      "grad_norm": 0.10934671759605408,
+      "learning_rate": 0.00016518112281714532,
+      "loss": 0.185,
+      "step": 12071
+    },
+    {
+      "epoch": 0.8710270933294851,
+      "grad_norm": 0.09486602246761322,
+      "learning_rate": 0.0001651782363977486,
+      "loss": 0.2053,
+      "step": 12072
+    },
+    {
+      "epoch": 0.8710992460045456,
+      "grad_norm": 0.1067877933382988,
+      "learning_rate": 0.00016517534997835187,
+      "loss": 0.1463,
+      "step": 12073
+    },
+    {
+      "epoch": 0.871171398679606,
+      "grad_norm": 0.13438858091831207,
+      "learning_rate": 0.00016517246355895513,
+      "loss": 0.1328,
+      "step": 12074
+    },
+    {
+      "epoch": 0.8712435513546665,
+      "grad_norm": 0.11246223002672195,
+      "learning_rate": 0.0001651695771395584,
+      "loss": 0.1913,
+      "step": 12075
+    },
+    {
+      "epoch": 0.8713157040297269,
+      "grad_norm": 0.15119744837284088,
+      "learning_rate": 0.00016516669072016163,
+      "loss": 0.1686,
+      "step": 12076
+    },
+    {
+      "epoch": 0.8713878567047874,
+      "grad_norm": 0.11665923148393631,
+      "learning_rate": 0.0001651638043007649,
+      "loss": 0.1706,
+      "step": 12077
+    },
+    {
+      "epoch": 0.8714600093798478,
+      "grad_norm": 0.11429120600223541,
+      "learning_rate": 0.00016516091788136816,
+      "loss": 0.1297,
+      "step": 12078
+    },
+    {
+      "epoch": 0.8715321620549081,
+      "grad_norm": 0.11058809608221054,
+      "learning_rate": 0.00016515803146197145,
+      "loss": 0.1065,
+      "step": 12079
+    },
+    {
+      "epoch": 0.8716043147299686,
+      "grad_norm": 0.12536856532096863,
+      "learning_rate": 0.0001651551450425747,
+      "loss": 0.1183,
+      "step": 12080
+    },
+    {
+      "epoch": 0.871676467405029,
+      "grad_norm": 0.11672108620405197,
+      "learning_rate": 0.00016515225862317795,
+      "loss": 0.1308,
+      "step": 12081
+    },
+    {
+      "epoch": 0.8717486200800895,
+      "grad_norm": 0.1285310685634613,
+      "learning_rate": 0.0001651493722037812,
+      "loss": 0.1425,
+      "step": 12082
+    },
+    {
+      "epoch": 0.8718207727551499,
+      "grad_norm": 0.12578968703746796,
+      "learning_rate": 0.00016514648578438447,
+      "loss": 0.1317,
+      "step": 12083
+    },
+    {
+      "epoch": 0.8718929254302104,
+      "grad_norm": 0.14941126108169556,
+      "learning_rate": 0.00016514359936498774,
+      "loss": 0.2002,
+      "step": 12084
+    },
+    {
+      "epoch": 0.8719650781052708,
+      "grad_norm": 0.1489430069923401,
+      "learning_rate": 0.000165140712945591,
+      "loss": 0.1426,
+      "step": 12085
+    },
+    {
+      "epoch": 0.8720372307803311,
+      "grad_norm": 0.12630683183670044,
+      "learning_rate": 0.00016513782652619426,
+      "loss": 0.1756,
+      "step": 12086
+    },
+    {
+      "epoch": 0.8721093834553916,
+      "grad_norm": 0.1323336958885193,
+      "learning_rate": 0.00016513494010679753,
+      "loss": 0.1421,
+      "step": 12087
+    },
+    {
+      "epoch": 0.872181536130452,
+      "grad_norm": 0.13433264195919037,
+      "learning_rate": 0.0001651320536874008,
+      "loss": 0.1435,
+      "step": 12088
+    },
+    {
+      "epoch": 0.8722536888055125,
+      "grad_norm": 0.1453068107366562,
+      "learning_rate": 0.00016512916726800405,
+      "loss": 0.151,
+      "step": 12089
+    },
+    {
+      "epoch": 0.8723258414805729,
+      "grad_norm": 0.14631931483745575,
+      "learning_rate": 0.00016512628084860731,
+      "loss": 0.1296,
+      "step": 12090
+    },
+    {
+      "epoch": 0.8723979941556333,
+      "grad_norm": 0.11547321081161499,
+      "learning_rate": 0.00016512339442921058,
+      "loss": 0.1698,
+      "step": 12091
+    },
+    {
+      "epoch": 0.8724701468306938,
+      "grad_norm": 0.13233087956905365,
+      "learning_rate": 0.0001651205080098138,
+      "loss": 0.1661,
+      "step": 12092
+    },
+    {
+      "epoch": 0.8725422995057541,
+      "grad_norm": 0.13904765248298645,
+      "learning_rate": 0.0001651176215904171,
+      "loss": 0.1652,
+      "step": 12093
+    },
+    {
+      "epoch": 0.8726144521808146,
+      "grad_norm": 0.11811047792434692,
+      "learning_rate": 0.00016511473517102037,
+      "loss": 0.1001,
+      "step": 12094
+    },
+    {
+      "epoch": 0.872686604855875,
+      "grad_norm": 0.1405722051858902,
+      "learning_rate": 0.00016511184875162363,
+      "loss": 0.1107,
+      "step": 12095
+    },
+    {
+      "epoch": 0.8727587575309355,
+      "grad_norm": 0.1355287730693817,
+      "learning_rate": 0.0001651089623322269,
+      "loss": 0.1465,
+      "step": 12096
+    },
+    {
+      "epoch": 0.8728309102059959,
+      "grad_norm": 0.13099505007266998,
+      "learning_rate": 0.00016510607591283013,
+      "loss": 0.1411,
+      "step": 12097
+    },
+    {
+      "epoch": 0.8729030628810563,
+      "grad_norm": 0.1373823583126068,
+      "learning_rate": 0.0001651031894934334,
+      "loss": 0.1878,
+      "step": 12098
+    },
+    {
+      "epoch": 0.8729752155561168,
+      "grad_norm": 0.1262364238500595,
+      "learning_rate": 0.00016510030307403665,
+      "loss": 0.1755,
+      "step": 12099
+    },
+    {
+      "epoch": 0.8730473682311771,
+      "grad_norm": 0.1368231475353241,
+      "learning_rate": 0.00016509741665463994,
+      "loss": 0.1496,
+      "step": 12100
+    },
+    {
+      "epoch": 0.8731195209062376,
+      "grad_norm": 0.1449502855539322,
+      "learning_rate": 0.0001650945302352432,
+      "loss": 0.1357,
+      "step": 12101
+    },
+    {
+      "epoch": 0.873191673581298,
+      "grad_norm": 0.14914466440677643,
+      "learning_rate": 0.00016509164381584644,
+      "loss": 0.1452,
+      "step": 12102
+    },
+    {
+      "epoch": 0.8732638262563585,
+      "grad_norm": 0.13216231763362885,
+      "learning_rate": 0.0001650887573964497,
+      "loss": 0.1819,
+      "step": 12103
+    },
+    {
+      "epoch": 0.8733359789314189,
+      "grad_norm": 0.14980573952198029,
+      "learning_rate": 0.00016508587097705297,
+      "loss": 0.1577,
+      "step": 12104
+    },
+    {
+      "epoch": 0.8734081316064793,
+      "grad_norm": 0.11215804517269135,
+      "learning_rate": 0.00016508298455765623,
+      "loss": 0.1322,
+      "step": 12105
+    },
+    {
+      "epoch": 0.8734802842815398,
+      "grad_norm": 0.11477739363908768,
+      "learning_rate": 0.0001650800981382595,
+      "loss": 0.1375,
+      "step": 12106
+    },
+    {
+      "epoch": 0.8735524369566001,
+      "grad_norm": 0.20511171221733093,
+      "learning_rate": 0.00016507721171886276,
+      "loss": 0.1463,
+      "step": 12107
+    },
+    {
+      "epoch": 0.8736245896316606,
+      "grad_norm": 0.13001227378845215,
+      "learning_rate": 0.00016507432529946602,
+      "loss": 0.1704,
+      "step": 12108
+    },
+    {
+      "epoch": 0.873696742306721,
+      "grad_norm": 0.12199117243289948,
+      "learning_rate": 0.00016507143888006928,
+      "loss": 0.1092,
+      "step": 12109
+    },
+    {
+      "epoch": 0.8737688949817815,
+      "grad_norm": 0.14155781269073486,
+      "learning_rate": 0.00016506855246067255,
+      "loss": 0.1333,
+      "step": 12110
+    },
+    {
+      "epoch": 0.8738410476568419,
+      "grad_norm": 0.13451431691646576,
+      "learning_rate": 0.0001650656660412758,
+      "loss": 0.1795,
+      "step": 12111
+    },
+    {
+      "epoch": 0.8739132003319023,
+      "grad_norm": 0.1325516253709793,
+      "learning_rate": 0.00016506277962187907,
+      "loss": 0.161,
+      "step": 12112
+    },
+    {
+      "epoch": 0.8739853530069628,
+      "grad_norm": 0.11864909529685974,
+      "learning_rate": 0.0001650598932024823,
+      "loss": 0.1461,
+      "step": 12113
+    },
+    {
+      "epoch": 0.8740575056820231,
+      "grad_norm": 0.12976987659931183,
+      "learning_rate": 0.0001650570067830856,
+      "loss": 0.1994,
+      "step": 12114
+    },
+    {
+      "epoch": 0.8741296583570836,
+      "grad_norm": 0.1296241134405136,
+      "learning_rate": 0.00016505412036368886,
+      "loss": 0.1489,
+      "step": 12115
+    },
+    {
+      "epoch": 0.874201811032144,
+      "grad_norm": 0.1189151182770729,
+      "learning_rate": 0.00016505123394429212,
+      "loss": 0.1615,
+      "step": 12116
+    },
+    {
+      "epoch": 0.8742739637072044,
+      "grad_norm": 0.13982558250427246,
+      "learning_rate": 0.00016504834752489539,
+      "loss": 0.1097,
+      "step": 12117
+    },
+    {
+      "epoch": 0.8743461163822649,
+      "grad_norm": 0.1390528380870819,
+      "learning_rate": 0.00016504546110549862,
+      "loss": 0.1315,
+      "step": 12118
+    },
+    {
+      "epoch": 0.8744182690573253,
+      "grad_norm": 0.12935444712638855,
+      "learning_rate": 0.00016504257468610188,
+      "loss": 0.1407,
+      "step": 12119
+    },
+    {
+      "epoch": 0.8744904217323858,
+      "grad_norm": 0.13345961272716522,
+      "learning_rate": 0.00016503968826670515,
+      "loss": 0.1685,
+      "step": 12120
+    },
+    {
+      "epoch": 0.8745625744074461,
+      "grad_norm": 0.1496542990207672,
+      "learning_rate": 0.00016503680184730844,
+      "loss": 0.1529,
+      "step": 12121
+    },
+    {
+      "epoch": 0.8746347270825066,
+      "grad_norm": 0.15997420251369476,
+      "learning_rate": 0.0001650339154279117,
+      "loss": 0.1783,
+      "step": 12122
+    },
+    {
+      "epoch": 0.874706879757567,
+      "grad_norm": 0.13938643038272858,
+      "learning_rate": 0.00016503102900851494,
+      "loss": 0.1333,
+      "step": 12123
+    },
+    {
+      "epoch": 0.8747790324326274,
+      "grad_norm": 0.15764518082141876,
+      "learning_rate": 0.0001650281425891182,
+      "loss": 0.1774,
+      "step": 12124
+    },
+    {
+      "epoch": 0.8748511851076879,
+      "grad_norm": 0.12833885848522186,
+      "learning_rate": 0.00016502525616972146,
+      "loss": 0.1454,
+      "step": 12125
+    },
+    {
+      "epoch": 0.8749233377827483,
+      "grad_norm": 0.1307978481054306,
+      "learning_rate": 0.00016502236975032473,
+      "loss": 0.1146,
+      "step": 12126
+    },
+    {
+      "epoch": 0.8749954904578088,
+      "grad_norm": 0.1131959781050682,
+      "learning_rate": 0.000165019483330928,
+      "loss": 0.1657,
+      "step": 12127
+    },
+    {
+      "epoch": 0.8750676431328691,
+      "grad_norm": 0.11155702918767929,
+      "learning_rate": 0.00016501659691153125,
+      "loss": 0.1831,
+      "step": 12128
+    },
+    {
+      "epoch": 0.8751397958079296,
+      "grad_norm": 0.1364208310842514,
+      "learning_rate": 0.00016501371049213451,
+      "loss": 0.1599,
+      "step": 12129
+    },
+    {
+      "epoch": 0.87521194848299,
+      "grad_norm": 0.11704082787036896,
+      "learning_rate": 0.00016501082407273778,
+      "loss": 0.1455,
+      "step": 12130
+    },
+    {
+      "epoch": 0.8752841011580504,
+      "grad_norm": 0.12343720346689224,
+      "learning_rate": 0.00016500793765334104,
+      "loss": 0.1096,
+      "step": 12131
+    },
+    {
+      "epoch": 0.8753562538331109,
+      "grad_norm": 0.119235560297966,
+      "learning_rate": 0.0001650050512339443,
+      "loss": 0.1918,
+      "step": 12132
+    },
+    {
+      "epoch": 0.8754284065081713,
+      "grad_norm": 0.11352014541625977,
+      "learning_rate": 0.00016500216481454757,
+      "loss": 0.1756,
+      "step": 12133
+    },
+    {
+      "epoch": 0.8755005591832317,
+      "grad_norm": 0.1395050287246704,
+      "learning_rate": 0.0001649992783951508,
+      "loss": 0.1255,
+      "step": 12134
+    },
+    {
+      "epoch": 0.8755727118582921,
+      "grad_norm": 0.11708272248506546,
+      "learning_rate": 0.0001649963919757541,
+      "loss": 0.1755,
+      "step": 12135
+    },
+    {
+      "epoch": 0.8756448645333526,
+      "grad_norm": 0.11024229973554611,
+      "learning_rate": 0.00016499350555635735,
+      "loss": 0.1124,
+      "step": 12136
+    },
+    {
+      "epoch": 0.875717017208413,
+      "grad_norm": 0.12274841964244843,
+      "learning_rate": 0.00016499061913696062,
+      "loss": 0.147,
+      "step": 12137
+    },
+    {
+      "epoch": 0.8757891698834734,
+      "grad_norm": 0.11719904094934464,
+      "learning_rate": 0.00016498773271756388,
+      "loss": 0.1524,
+      "step": 12138
+    },
+    {
+      "epoch": 0.8758613225585339,
+      "grad_norm": 0.13587170839309692,
+      "learning_rate": 0.00016498484629816712,
+      "loss": 0.1441,
+      "step": 12139
+    },
+    {
+      "epoch": 0.8759334752335943,
+      "grad_norm": 0.1277083307504654,
+      "learning_rate": 0.00016498195987877038,
+      "loss": 0.1409,
+      "step": 12140
+    },
+    {
+      "epoch": 0.8760056279086547,
+      "grad_norm": 0.18424269556999207,
+      "learning_rate": 0.00016497907345937364,
+      "loss": 0.1853,
+      "step": 12141
+    },
+    {
+      "epoch": 0.8760777805837151,
+      "grad_norm": 0.13032427430152893,
+      "learning_rate": 0.00016497618703997693,
+      "loss": 0.1719,
+      "step": 12142
+    },
+    {
+      "epoch": 0.8761499332587755,
+      "grad_norm": 0.11301422864198685,
+      "learning_rate": 0.0001649733006205802,
+      "loss": 0.1651,
+      "step": 12143
+    },
+    {
+      "epoch": 0.876222085933836,
+      "grad_norm": 0.1141270324587822,
+      "learning_rate": 0.00016497041420118343,
+      "loss": 0.1389,
+      "step": 12144
+    },
+    {
+      "epoch": 0.8762942386088964,
+      "grad_norm": 0.14169326424598694,
+      "learning_rate": 0.0001649675277817867,
+      "loss": 0.1953,
+      "step": 12145
+    },
+    {
+      "epoch": 0.8763663912839569,
+      "grad_norm": 0.1484052836894989,
+      "learning_rate": 0.00016496464136238996,
+      "loss": 0.1681,
+      "step": 12146
+    },
+    {
+      "epoch": 0.8764385439590173,
+      "grad_norm": 0.12363431602716446,
+      "learning_rate": 0.00016496175494299322,
+      "loss": 0.1425,
+      "step": 12147
+    },
+    {
+      "epoch": 0.8765106966340777,
+      "grad_norm": 0.18553423881530762,
+      "learning_rate": 0.00016495886852359648,
+      "loss": 0.1713,
+      "step": 12148
+    },
+    {
+      "epoch": 0.8765828493091381,
+      "grad_norm": 0.12179651856422424,
+      "learning_rate": 0.00016495598210419975,
+      "loss": 0.1991,
+      "step": 12149
+    },
+    {
+      "epoch": 0.8766550019841985,
+      "grad_norm": 0.11454205960035324,
+      "learning_rate": 0.000164953095684803,
+      "loss": 0.1546,
+      "step": 12150
+    },
+    {
+      "epoch": 0.876727154659259,
+      "grad_norm": 0.1588776707649231,
+      "learning_rate": 0.00016495020926540627,
+      "loss": 0.0914,
+      "step": 12151
+    },
+    {
+      "epoch": 0.8767993073343194,
+      "grad_norm": 0.10719560831785202,
+      "learning_rate": 0.00016494732284600953,
+      "loss": 0.1348,
+      "step": 12152
+    },
+    {
+      "epoch": 0.8768714600093799,
+      "grad_norm": 0.0964108407497406,
+      "learning_rate": 0.0001649444364266128,
+      "loss": 0.1281,
+      "step": 12153
+    },
+    {
+      "epoch": 0.8769436126844403,
+      "grad_norm": 0.13987363874912262,
+      "learning_rate": 0.00016494155000721606,
+      "loss": 0.1637,
+      "step": 12154
+    },
+    {
+      "epoch": 0.8770157653595007,
+      "grad_norm": 0.12305955588817596,
+      "learning_rate": 0.0001649386635878193,
+      "loss": 0.1668,
+      "step": 12155
+    },
+    {
+      "epoch": 0.8770879180345611,
+      "grad_norm": 0.1471739113330841,
+      "learning_rate": 0.00016493577716842259,
+      "loss": 0.1353,
+      "step": 12156
+    },
+    {
+      "epoch": 0.8771600707096215,
+      "grad_norm": 0.12235169112682343,
+      "learning_rate": 0.00016493289074902585,
+      "loss": 0.1549,
+      "step": 12157
+    },
+    {
+      "epoch": 0.877232223384682,
+      "grad_norm": 0.10822771489620209,
+      "learning_rate": 0.0001649300043296291,
+      "loss": 0.1443,
+      "step": 12158
+    },
+    {
+      "epoch": 0.8773043760597424,
+      "grad_norm": 0.15206243097782135,
+      "learning_rate": 0.00016492711791023237,
+      "loss": 0.1538,
+      "step": 12159
+    },
+    {
+      "epoch": 0.8773765287348029,
+      "grad_norm": 0.12351173907518387,
+      "learning_rate": 0.0001649242314908356,
+      "loss": 0.1008,
+      "step": 12160
+    },
+    {
+      "epoch": 0.8774486814098633,
+      "grad_norm": 0.14727401733398438,
+      "learning_rate": 0.00016492134507143887,
+      "loss": 0.1452,
+      "step": 12161
+    },
+    {
+      "epoch": 0.8775208340849237,
+      "grad_norm": 0.1338951289653778,
+      "learning_rate": 0.00016491845865204214,
+      "loss": 0.1469,
+      "step": 12162
+    },
+    {
+      "epoch": 0.8775929867599841,
+      "grad_norm": 0.13076646625995636,
+      "learning_rate": 0.00016491557223264543,
+      "loss": 0.1715,
+      "step": 12163
+    },
+    {
+      "epoch": 0.8776651394350445,
+      "grad_norm": 0.12688609957695007,
+      "learning_rate": 0.0001649126858132487,
+      "loss": 0.1392,
+      "step": 12164
+    },
+    {
+      "epoch": 0.877737292110105,
+      "grad_norm": 0.13240867853164673,
+      "learning_rate": 0.00016490979939385192,
+      "loss": 0.1551,
+      "step": 12165
+    },
+    {
+      "epoch": 0.8778094447851654,
+      "grad_norm": 0.11780638992786407,
+      "learning_rate": 0.0001649069129744552,
+      "loss": 0.131,
+      "step": 12166
+    },
+    {
+      "epoch": 0.8778815974602259,
+      "grad_norm": 0.11551574617624283,
+      "learning_rate": 0.00016490402655505845,
+      "loss": 0.1378,
+      "step": 12167
+    },
+    {
+      "epoch": 0.8779537501352863,
+      "grad_norm": 0.12620681524276733,
+      "learning_rate": 0.0001649011401356617,
+      "loss": 0.1612,
+      "step": 12168
+    },
+    {
+      "epoch": 0.8780259028103466,
+      "grad_norm": 0.1293182671070099,
+      "learning_rate": 0.00016489825371626498,
+      "loss": 0.153,
+      "step": 12169
+    },
+    {
+      "epoch": 0.8780980554854071,
+      "grad_norm": 0.12778621912002563,
+      "learning_rate": 0.00016489536729686824,
+      "loss": 0.1334,
+      "step": 12170
+    },
+    {
+      "epoch": 0.8781702081604675,
+      "grad_norm": 0.145514577627182,
+      "learning_rate": 0.0001648924808774715,
+      "loss": 0.1543,
+      "step": 12171
+    },
+    {
+      "epoch": 0.878242360835528,
+      "grad_norm": 0.132717102766037,
+      "learning_rate": 0.00016488959445807477,
+      "loss": 0.1442,
+      "step": 12172
+    },
+    {
+      "epoch": 0.8783145135105884,
+      "grad_norm": 0.12787772715091705,
+      "learning_rate": 0.00016488670803867803,
+      "loss": 0.1409,
+      "step": 12173
+    },
+    {
+      "epoch": 0.8783866661856489,
+      "grad_norm": 0.13414645195007324,
+      "learning_rate": 0.0001648838216192813,
+      "loss": 0.1746,
+      "step": 12174
+    },
+    {
+      "epoch": 0.8784588188607093,
+      "grad_norm": 0.1296178102493286,
+      "learning_rate": 0.00016488093519988455,
+      "loss": 0.146,
+      "step": 12175
+    },
+    {
+      "epoch": 0.8785309715357696,
+      "grad_norm": 0.16228878498077393,
+      "learning_rate": 0.00016487804878048782,
+      "loss": 0.1699,
+      "step": 12176
+    },
+    {
+      "epoch": 0.8786031242108301,
+      "grad_norm": 0.12227675318717957,
+      "learning_rate": 0.00016487516236109108,
+      "loss": 0.1425,
+      "step": 12177
+    },
+    {
+      "epoch": 0.8786752768858905,
+      "grad_norm": 0.20164965093135834,
+      "learning_rate": 0.00016487227594169434,
+      "loss": 0.1851,
+      "step": 12178
+    },
+    {
+      "epoch": 0.878747429560951,
+      "grad_norm": 0.1277187317609787,
+      "learning_rate": 0.0001648693895222976,
+      "loss": 0.1085,
+      "step": 12179
+    },
+    {
+      "epoch": 0.8788195822360114,
+      "grad_norm": 0.131092831492424,
+      "learning_rate": 0.00016486650310290087,
+      "loss": 0.1552,
+      "step": 12180
+    },
+    {
+      "epoch": 0.8788917349110719,
+      "grad_norm": 0.119987852871418,
+      "learning_rate": 0.00016486361668350413,
+      "loss": 0.1273,
+      "step": 12181
+    },
+    {
+      "epoch": 0.8789638875861323,
+      "grad_norm": 0.13632720708847046,
+      "learning_rate": 0.00016486073026410737,
+      "loss": 0.1183,
+      "step": 12182
+    },
+    {
+      "epoch": 0.8790360402611926,
+      "grad_norm": 0.14824025332927704,
+      "learning_rate": 0.00016485784384471063,
+      "loss": 0.1434,
+      "step": 12183
+    },
+    {
+      "epoch": 0.8791081929362531,
+      "grad_norm": 0.12320008873939514,
+      "learning_rate": 0.00016485495742531392,
+      "loss": 0.128,
+      "step": 12184
+    },
+    {
+      "epoch": 0.8791803456113135,
+      "grad_norm": 0.10985496640205383,
+      "learning_rate": 0.00016485207100591718,
+      "loss": 0.1667,
+      "step": 12185
+    },
+    {
+      "epoch": 0.879252498286374,
+      "grad_norm": 0.12912528216838837,
+      "learning_rate": 0.00016484918458652045,
+      "loss": 0.1368,
+      "step": 12186
+    },
+    {
+      "epoch": 0.8793246509614344,
+      "grad_norm": 0.136422798037529,
+      "learning_rate": 0.00016484629816712368,
+      "loss": 0.114,
+      "step": 12187
+    },
+    {
+      "epoch": 0.8793968036364949,
+      "grad_norm": 0.11968207359313965,
+      "learning_rate": 0.00016484341174772694,
+      "loss": 0.098,
+      "step": 12188
+    },
+    {
+      "epoch": 0.8794689563115553,
+      "grad_norm": 0.11828417330980301,
+      "learning_rate": 0.0001648405253283302,
+      "loss": 0.1117,
+      "step": 12189
+    },
+    {
+      "epoch": 0.8795411089866156,
+      "grad_norm": 0.11578892916440964,
+      "learning_rate": 0.00016483763890893347,
+      "loss": 0.1282,
+      "step": 12190
+    },
+    {
+      "epoch": 0.8796132616616761,
+      "grad_norm": 0.10919444262981415,
+      "learning_rate": 0.00016483475248953676,
+      "loss": 0.175,
+      "step": 12191
+    },
+    {
+      "epoch": 0.8796854143367365,
+      "grad_norm": 0.1338009089231491,
+      "learning_rate": 0.00016483186607014,
+      "loss": 0.163,
+      "step": 12192
+    },
+    {
+      "epoch": 0.879757567011797,
+      "grad_norm": 0.12004167586565018,
+      "learning_rate": 0.00016482897965074326,
+      "loss": 0.1751,
+      "step": 12193
+    },
+    {
+      "epoch": 0.8798297196868574,
+      "grad_norm": 0.14760249853134155,
+      "learning_rate": 0.00016482609323134652,
+      "loss": 0.1675,
+      "step": 12194
+    },
+    {
+      "epoch": 0.8799018723619179,
+      "grad_norm": 0.16406826674938202,
+      "learning_rate": 0.00016482320681194979,
+      "loss": 0.1977,
+      "step": 12195
+    },
+    {
+      "epoch": 0.8799740250369782,
+      "grad_norm": 0.11777998507022858,
+      "learning_rate": 0.00016482032039255305,
+      "loss": 0.1638,
+      "step": 12196
+    },
+    {
+      "epoch": 0.8800461777120386,
+      "grad_norm": 0.12022560834884644,
+      "learning_rate": 0.0001648174339731563,
+      "loss": 0.1686,
+      "step": 12197
+    },
+    {
+      "epoch": 0.8801183303870991,
+      "grad_norm": 0.12894538044929504,
+      "learning_rate": 0.00016481454755375957,
+      "loss": 0.1485,
+      "step": 12198
+    },
+    {
+      "epoch": 0.8801904830621595,
+      "grad_norm": 0.12284479290246964,
+      "learning_rate": 0.00016481166113436284,
+      "loss": 0.1721,
+      "step": 12199
+    },
+    {
+      "epoch": 0.88026263573722,
+      "grad_norm": 0.11004292219877243,
+      "learning_rate": 0.0001648087747149661,
+      "loss": 0.1955,
+      "step": 12200
+    },
+    {
+      "epoch": 0.8803347884122804,
+      "grad_norm": 0.12605997920036316,
+      "learning_rate": 0.00016480588829556936,
+      "loss": 0.1145,
+      "step": 12201
+    },
+    {
+      "epoch": 0.8804069410873409,
+      "grad_norm": 0.125811368227005,
+      "learning_rate": 0.00016480300187617263,
+      "loss": 0.1377,
+      "step": 12202
+    },
+    {
+      "epoch": 0.8804790937624012,
+      "grad_norm": 0.1498311460018158,
+      "learning_rate": 0.00016480011545677586,
+      "loss": 0.1361,
+      "step": 12203
+    },
+    {
+      "epoch": 0.8805512464374616,
+      "grad_norm": 0.12281493097543716,
+      "learning_rate": 0.00016479722903737912,
+      "loss": 0.1218,
+      "step": 12204
+    },
+    {
+      "epoch": 0.8806233991125221,
+      "grad_norm": 0.11560608446598053,
+      "learning_rate": 0.00016479434261798241,
+      "loss": 0.129,
+      "step": 12205
+    },
+    {
+      "epoch": 0.8806955517875825,
+      "grad_norm": 0.13548126816749573,
+      "learning_rate": 0.00016479145619858568,
+      "loss": 0.1461,
+      "step": 12206
+    },
+    {
+      "epoch": 0.880767704462643,
+      "grad_norm": 0.13236747682094574,
+      "learning_rate": 0.00016478856977918894,
+      "loss": 0.1244,
+      "step": 12207
+    },
+    {
+      "epoch": 0.8808398571377034,
+      "grad_norm": 0.13994522392749786,
+      "learning_rate": 0.00016478568335979218,
+      "loss": 0.1875,
+      "step": 12208
+    },
+    {
+      "epoch": 0.8809120098127639,
+      "grad_norm": 0.126194030046463,
+      "learning_rate": 0.00016478279694039544,
+      "loss": 0.1366,
+      "step": 12209
+    },
+    {
+      "epoch": 0.8809841624878242,
+      "grad_norm": 0.11344815045595169,
+      "learning_rate": 0.0001647799105209987,
+      "loss": 0.1843,
+      "step": 12210
+    },
+    {
+      "epoch": 0.8810563151628846,
+      "grad_norm": 0.1344424933195114,
+      "learning_rate": 0.00016477702410160196,
+      "loss": 0.1549,
+      "step": 12211
+    },
+    {
+      "epoch": 0.8811284678379451,
+      "grad_norm": 0.12091745436191559,
+      "learning_rate": 0.00016477413768220525,
+      "loss": 0.1731,
+      "step": 12212
+    },
+    {
+      "epoch": 0.8812006205130055,
+      "grad_norm": 0.1225610002875328,
+      "learning_rate": 0.0001647712512628085,
+      "loss": 0.1256,
+      "step": 12213
+    },
+    {
+      "epoch": 0.881272773188066,
+      "grad_norm": 0.13267618417739868,
+      "learning_rate": 0.00016476836484341175,
+      "loss": 0.122,
+      "step": 12214
+    },
+    {
+      "epoch": 0.8813449258631264,
+      "grad_norm": 0.15878993272781372,
+      "learning_rate": 0.00016476547842401502,
+      "loss": 0.1221,
+      "step": 12215
+    },
+    {
+      "epoch": 0.8814170785381868,
+      "grad_norm": 0.12710832059383392,
+      "learning_rate": 0.00016476259200461828,
+      "loss": 0.1372,
+      "step": 12216
+    },
+    {
+      "epoch": 0.8814892312132472,
+      "grad_norm": 0.12072007358074188,
+      "learning_rate": 0.00016475970558522154,
+      "loss": 0.1506,
+      "step": 12217
+    },
+    {
+      "epoch": 0.8815613838883076,
+      "grad_norm": 0.1832415908575058,
+      "learning_rate": 0.0001647568191658248,
+      "loss": 0.1452,
+      "step": 12218
+    },
+    {
+      "epoch": 0.8816335365633681,
+      "grad_norm": 0.11516207456588745,
+      "learning_rate": 0.00016475393274642807,
+      "loss": 0.1508,
+      "step": 12219
+    },
+    {
+      "epoch": 0.8817056892384285,
+      "grad_norm": 0.12112973630428314,
+      "learning_rate": 0.00016475104632703133,
+      "loss": 0.2082,
+      "step": 12220
+    },
+    {
+      "epoch": 0.881777841913489,
+      "grad_norm": 0.12404236942529678,
+      "learning_rate": 0.0001647481599076346,
+      "loss": 0.1129,
+      "step": 12221
+    },
+    {
+      "epoch": 0.8818499945885494,
+      "grad_norm": 0.12267255783081055,
+      "learning_rate": 0.00016474527348823786,
+      "loss": 0.1512,
+      "step": 12222
+    },
+    {
+      "epoch": 0.8819221472636098,
+      "grad_norm": 0.10327456891536713,
+      "learning_rate": 0.00016474238706884112,
+      "loss": 0.1504,
+      "step": 12223
+    },
+    {
+      "epoch": 0.8819942999386702,
+      "grad_norm": 0.12243549525737762,
+      "learning_rate": 0.00016473950064944436,
+      "loss": 0.1422,
+      "step": 12224
+    },
+    {
+      "epoch": 0.8820664526137306,
+      "grad_norm": 0.10996757447719574,
+      "learning_rate": 0.00016473661423004762,
+      "loss": 0.1667,
+      "step": 12225
+    },
+    {
+      "epoch": 0.8821386052887911,
+      "grad_norm": 0.130668506026268,
+      "learning_rate": 0.0001647337278106509,
+      "loss": 0.1026,
+      "step": 12226
+    },
+    {
+      "epoch": 0.8822107579638515,
+      "grad_norm": 0.12705399096012115,
+      "learning_rate": 0.00016473084139125417,
+      "loss": 0.1611,
+      "step": 12227
+    },
+    {
+      "epoch": 0.882282910638912,
+      "grad_norm": 0.1277860701084137,
+      "learning_rate": 0.00016472795497185743,
+      "loss": 0.1216,
+      "step": 12228
+    },
+    {
+      "epoch": 0.8823550633139724,
+      "grad_norm": 0.11852487176656723,
+      "learning_rate": 0.00016472506855246067,
+      "loss": 0.1557,
+      "step": 12229
+    },
+    {
+      "epoch": 0.8824272159890328,
+      "grad_norm": 0.12020008265972137,
+      "learning_rate": 0.00016472218213306393,
+      "loss": 0.1284,
+      "step": 12230
+    },
+    {
+      "epoch": 0.8824993686640932,
+      "grad_norm": 0.13153263926506042,
+      "learning_rate": 0.0001647192957136672,
+      "loss": 0.1085,
+      "step": 12231
+    },
+    {
+      "epoch": 0.8825715213391536,
+      "grad_norm": 0.15343275666236877,
+      "learning_rate": 0.00016471640929427046,
+      "loss": 0.1608,
+      "step": 12232
+    },
+    {
+      "epoch": 0.8826436740142141,
+      "grad_norm": 0.1277267336845398,
+      "learning_rate": 0.00016471352287487375,
+      "loss": 0.1511,
+      "step": 12233
+    },
+    {
+      "epoch": 0.8827158266892745,
+      "grad_norm": 0.1161343902349472,
+      "learning_rate": 0.00016471063645547699,
+      "loss": 0.1492,
+      "step": 12234
+    },
+    {
+      "epoch": 0.882787979364335,
+      "grad_norm": 0.1242692694067955,
+      "learning_rate": 0.00016470775003608025,
+      "loss": 0.1598,
+      "step": 12235
+    },
+    {
+      "epoch": 0.8828601320393954,
+      "grad_norm": 0.14076781272888184,
+      "learning_rate": 0.0001647048636166835,
+      "loss": 0.1475,
+      "step": 12236
+    },
+    {
+      "epoch": 0.8829322847144558,
+      "grad_norm": 0.1138407289981842,
+      "learning_rate": 0.00016470197719728677,
+      "loss": 0.1583,
+      "step": 12237
+    },
+    {
+      "epoch": 0.8830044373895162,
+      "grad_norm": 0.12430385500192642,
+      "learning_rate": 0.00016469909077789004,
+      "loss": 0.1189,
+      "step": 12238
+    },
+    {
+      "epoch": 0.8830765900645766,
+      "grad_norm": 0.13104628026485443,
+      "learning_rate": 0.0001646962043584933,
+      "loss": 0.1494,
+      "step": 12239
+    },
+    {
+      "epoch": 0.8831487427396371,
+      "grad_norm": 0.12466448545455933,
+      "learning_rate": 0.00016469331793909656,
+      "loss": 0.1471,
+      "step": 12240
+    },
+    {
+      "epoch": 0.8832208954146975,
+      "grad_norm": 0.13318614661693573,
+      "learning_rate": 0.00016469043151969983,
+      "loss": 0.1135,
+      "step": 12241
+    },
+    {
+      "epoch": 0.883293048089758,
+      "grad_norm": 0.1286313533782959,
+      "learning_rate": 0.0001646875451003031,
+      "loss": 0.1148,
+      "step": 12242
+    },
+    {
+      "epoch": 0.8833652007648184,
+      "grad_norm": 0.11111229658126831,
+      "learning_rate": 0.00016468465868090635,
+      "loss": 0.1358,
+      "step": 12243
+    },
+    {
+      "epoch": 0.8834373534398788,
+      "grad_norm": 0.1381540298461914,
+      "learning_rate": 0.00016468177226150961,
+      "loss": 0.1428,
+      "step": 12244
+    },
+    {
+      "epoch": 0.8835095061149392,
+      "grad_norm": 0.11672315746545792,
+      "learning_rate": 0.00016467888584211285,
+      "loss": 0.127,
+      "step": 12245
+    },
+    {
+      "epoch": 0.8835816587899996,
+      "grad_norm": 0.17194171249866486,
+      "learning_rate": 0.0001646759994227161,
+      "loss": 0.1502,
+      "step": 12246
+    },
+    {
+      "epoch": 0.8836538114650601,
+      "grad_norm": 0.1031903326511383,
+      "learning_rate": 0.00016467311300331938,
+      "loss": 0.1147,
+      "step": 12247
+    },
+    {
+      "epoch": 0.8837259641401205,
+      "grad_norm": 0.12334106117486954,
+      "learning_rate": 0.00016467022658392267,
+      "loss": 0.121,
+      "step": 12248
+    },
+    {
+      "epoch": 0.883798116815181,
+      "grad_norm": 0.13263913989067078,
+      "learning_rate": 0.00016466734016452593,
+      "loss": 0.1376,
+      "step": 12249
+    },
+    {
+      "epoch": 0.8838702694902414,
+      "grad_norm": 0.12783348560333252,
+      "learning_rate": 0.00016466445374512916,
+      "loss": 0.133,
+      "step": 12250
+    },
+    {
+      "epoch": 0.8839424221653018,
+      "grad_norm": 0.1204017698764801,
+      "learning_rate": 0.00016466156732573243,
+      "loss": 0.1071,
+      "step": 12251
+    },
+    {
+      "epoch": 0.8840145748403622,
+      "grad_norm": 0.12959232926368713,
+      "learning_rate": 0.0001646586809063357,
+      "loss": 0.1584,
+      "step": 12252
+    },
+    {
+      "epoch": 0.8840867275154226,
+      "grad_norm": 0.1044325977563858,
+      "learning_rate": 0.00016465579448693895,
+      "loss": 0.1868,
+      "step": 12253
+    },
+    {
+      "epoch": 0.884158880190483,
+      "grad_norm": 0.13037028908729553,
+      "learning_rate": 0.00016465290806754222,
+      "loss": 0.1786,
+      "step": 12254
+    },
+    {
+      "epoch": 0.8842310328655435,
+      "grad_norm": 0.10903565585613251,
+      "learning_rate": 0.00016465002164814548,
+      "loss": 0.1822,
+      "step": 12255
+    },
+    {
+      "epoch": 0.8843031855406039,
+      "grad_norm": 0.10615143924951553,
+      "learning_rate": 0.00016464713522874874,
+      "loss": 0.1216,
+      "step": 12256
+    },
+    {
+      "epoch": 0.8843753382156644,
+      "grad_norm": 0.10600007325410843,
+      "learning_rate": 0.000164644248809352,
+      "loss": 0.2071,
+      "step": 12257
+    },
+    {
+      "epoch": 0.8844474908907247,
+      "grad_norm": 0.19849993288516998,
+      "learning_rate": 0.00016464136238995527,
+      "loss": 0.1893,
+      "step": 12258
+    },
+    {
+      "epoch": 0.8845196435657852,
+      "grad_norm": 0.13202063739299774,
+      "learning_rate": 0.00016463847597055853,
+      "loss": 0.198,
+      "step": 12259
+    },
+    {
+      "epoch": 0.8845917962408456,
+      "grad_norm": 0.11806654185056686,
+      "learning_rate": 0.0001646355895511618,
+      "loss": 0.1756,
+      "step": 12260
+    },
+    {
+      "epoch": 0.884663948915906,
+      "grad_norm": 0.11329864710569382,
+      "learning_rate": 0.00016463270313176503,
+      "loss": 0.1644,
+      "step": 12261
+    },
+    {
+      "epoch": 0.8847361015909665,
+      "grad_norm": 0.11153256148099899,
+      "learning_rate": 0.00016462981671236832,
+      "loss": 0.1735,
+      "step": 12262
+    },
+    {
+      "epoch": 0.8848082542660269,
+      "grad_norm": 0.11486808955669403,
+      "learning_rate": 0.00016462693029297158,
+      "loss": 0.1716,
+      "step": 12263
+    },
+    {
+      "epoch": 0.8848804069410874,
+      "grad_norm": 0.13077178597450256,
+      "learning_rate": 0.00016462404387357485,
+      "loss": 0.1743,
+      "step": 12264
+    },
+    {
+      "epoch": 0.8849525596161477,
+      "grad_norm": 0.11764027178287506,
+      "learning_rate": 0.0001646211574541781,
+      "loss": 0.1531,
+      "step": 12265
+    },
+    {
+      "epoch": 0.8850247122912082,
+      "grad_norm": 0.19978061318397522,
+      "learning_rate": 0.00016461827103478134,
+      "loss": 0.1957,
+      "step": 12266
+    },
+    {
+      "epoch": 0.8850968649662686,
+      "grad_norm": 0.13150623440742493,
+      "learning_rate": 0.0001646153846153846,
+      "loss": 0.1832,
+      "step": 12267
+    },
+    {
+      "epoch": 0.885169017641329,
+      "grad_norm": 0.13365094363689423,
+      "learning_rate": 0.00016461249819598787,
+      "loss": 0.1367,
+      "step": 12268
+    },
+    {
+      "epoch": 0.8852411703163895,
+      "grad_norm": 0.143122598528862,
+      "learning_rate": 0.00016460961177659116,
+      "loss": 0.1552,
+      "step": 12269
+    },
+    {
+      "epoch": 0.8853133229914499,
+      "grad_norm": 0.13805808126926422,
+      "learning_rate": 0.00016460672535719442,
+      "loss": 0.1188,
+      "step": 12270
+    },
+    {
+      "epoch": 0.8853854756665104,
+      "grad_norm": 0.12481489777565002,
+      "learning_rate": 0.00016460383893779766,
+      "loss": 0.1625,
+      "step": 12271
+    },
+    {
+      "epoch": 0.8854576283415707,
+      "grad_norm": 0.12569913268089294,
+      "learning_rate": 0.00016460095251840092,
+      "loss": 0.1273,
+      "step": 12272
+    },
+    {
+      "epoch": 0.8855297810166312,
+      "grad_norm": 0.12131577730178833,
+      "learning_rate": 0.00016459806609900418,
+      "loss": 0.1331,
+      "step": 12273
+    },
+    {
+      "epoch": 0.8856019336916916,
+      "grad_norm": 0.14365747570991516,
+      "learning_rate": 0.00016459517967960745,
+      "loss": 0.1433,
+      "step": 12274
+    },
+    {
+      "epoch": 0.885674086366752,
+      "grad_norm": 0.11679968982934952,
+      "learning_rate": 0.0001645922932602107,
+      "loss": 0.1306,
+      "step": 12275
+    },
+    {
+      "epoch": 0.8857462390418125,
+      "grad_norm": 0.1197090595960617,
+      "learning_rate": 0.00016458940684081397,
+      "loss": 0.0986,
+      "step": 12276
+    },
+    {
+      "epoch": 0.8858183917168729,
+      "grad_norm": 0.1241610199213028,
+      "learning_rate": 0.00016458652042141724,
+      "loss": 0.1339,
+      "step": 12277
+    },
+    {
+      "epoch": 0.8858905443919334,
+      "grad_norm": 0.1254902333021164,
+      "learning_rate": 0.0001645836340020205,
+      "loss": 0.1433,
+      "step": 12278
+    },
+    {
+      "epoch": 0.8859626970669937,
+      "grad_norm": 0.14213827252388,
+      "learning_rate": 0.00016458074758262376,
+      "loss": 0.1072,
+      "step": 12279
+    },
+    {
+      "epoch": 0.8860348497420542,
+      "grad_norm": 0.1069590151309967,
+      "learning_rate": 0.00016457786116322703,
+      "loss": 0.1672,
+      "step": 12280
+    },
+    {
+      "epoch": 0.8861070024171146,
+      "grad_norm": 0.12080402672290802,
+      "learning_rate": 0.0001645749747438303,
+      "loss": 0.1717,
+      "step": 12281
+    },
+    {
+      "epoch": 0.886179155092175,
+      "grad_norm": 0.11748300492763519,
+      "learning_rate": 0.00016457208832443355,
+      "loss": 0.0995,
+      "step": 12282
+    },
+    {
+      "epoch": 0.8862513077672355,
+      "grad_norm": 0.11120393127202988,
+      "learning_rate": 0.00016456920190503681,
+      "loss": 0.1702,
+      "step": 12283
+    },
+    {
+      "epoch": 0.8863234604422959,
+      "grad_norm": 0.11398053914308548,
+      "learning_rate": 0.00016456631548564008,
+      "loss": 0.1158,
+      "step": 12284
+    },
+    {
+      "epoch": 0.8863956131173564,
+      "grad_norm": 0.10123290121555328,
+      "learning_rate": 0.00016456342906624334,
+      "loss": 0.1739,
+      "step": 12285
+    },
+    {
+      "epoch": 0.8864677657924167,
+      "grad_norm": 0.11475709825754166,
+      "learning_rate": 0.0001645605426468466,
+      "loss": 0.1122,
+      "step": 12286
+    },
+    {
+      "epoch": 0.8865399184674772,
+      "grad_norm": 0.13295650482177734,
+      "learning_rate": 0.00016455765622744987,
+      "loss": 0.1475,
+      "step": 12287
+    },
+    {
+      "epoch": 0.8866120711425376,
+      "grad_norm": 0.1309804469347,
+      "learning_rate": 0.0001645547698080531,
+      "loss": 0.1352,
+      "step": 12288
+    },
+    {
+      "epoch": 0.886684223817598,
+      "grad_norm": 0.17298905551433563,
+      "learning_rate": 0.00016455188338865636,
+      "loss": 0.1796,
+      "step": 12289
+    },
+    {
+      "epoch": 0.8867563764926585,
+      "grad_norm": 0.11581968516111374,
+      "learning_rate": 0.00016454899696925965,
+      "loss": 0.1089,
+      "step": 12290
+    },
+    {
+      "epoch": 0.8868285291677189,
+      "grad_norm": 0.13596712052822113,
+      "learning_rate": 0.00016454611054986292,
+      "loss": 0.1785,
+      "step": 12291
+    },
+    {
+      "epoch": 0.8869006818427794,
+      "grad_norm": 0.12236089259386063,
+      "learning_rate": 0.00016454322413046618,
+      "loss": 0.108,
+      "step": 12292
+    },
+    {
+      "epoch": 0.8869728345178397,
+      "grad_norm": 0.1264708787202835,
+      "learning_rate": 0.00016454033771106942,
+      "loss": 0.1284,
+      "step": 12293
+    },
+    {
+      "epoch": 0.8870449871929001,
+      "grad_norm": 0.12240929901599884,
+      "learning_rate": 0.00016453745129167268,
+      "loss": 0.1002,
+      "step": 12294
+    },
+    {
+      "epoch": 0.8871171398679606,
+      "grad_norm": 0.12874439358711243,
+      "learning_rate": 0.00016453456487227594,
+      "loss": 0.1123,
+      "step": 12295
+    },
+    {
+      "epoch": 0.887189292543021,
+      "grad_norm": 0.1249275654554367,
+      "learning_rate": 0.0001645316784528792,
+      "loss": 0.1654,
+      "step": 12296
+    },
+    {
+      "epoch": 0.8872614452180815,
+      "grad_norm": 0.1129474937915802,
+      "learning_rate": 0.0001645287920334825,
+      "loss": 0.1105,
+      "step": 12297
+    },
+    {
+      "epoch": 0.8873335978931419,
+      "grad_norm": 0.1413096785545349,
+      "learning_rate": 0.00016452590561408573,
+      "loss": 0.1364,
+      "step": 12298
+    },
+    {
+      "epoch": 0.8874057505682024,
+      "grad_norm": 0.19914685189723969,
+      "learning_rate": 0.000164523019194689,
+      "loss": 0.1652,
+      "step": 12299
+    },
+    {
+      "epoch": 0.8874779032432627,
+      "grad_norm": 0.1189395859837532,
+      "learning_rate": 0.00016452013277529226,
+      "loss": 0.1557,
+      "step": 12300
+    },
+    {
+      "epoch": 0.8875500559183231,
+      "grad_norm": 0.12614993751049042,
+      "learning_rate": 0.00016451724635589552,
+      "loss": 0.1148,
+      "step": 12301
+    },
+    {
+      "epoch": 0.8876222085933836,
+      "grad_norm": 0.12520967423915863,
+      "learning_rate": 0.00016451435993649878,
+      "loss": 0.1256,
+      "step": 12302
+    },
+    {
+      "epoch": 0.887694361268444,
+      "grad_norm": 0.117799773812294,
+      "learning_rate": 0.00016451147351710205,
+      "loss": 0.164,
+      "step": 12303
+    },
+    {
+      "epoch": 0.8877665139435045,
+      "grad_norm": 0.1549709290266037,
+      "learning_rate": 0.0001645085870977053,
+      "loss": 0.1556,
+      "step": 12304
+    },
+    {
+      "epoch": 0.8878386666185649,
+      "grad_norm": 0.1547599583864212,
+      "learning_rate": 0.00016450570067830857,
+      "loss": 0.1336,
+      "step": 12305
+    },
+    {
+      "epoch": 0.8879108192936254,
+      "grad_norm": 0.1339336782693863,
+      "learning_rate": 0.00016450281425891183,
+      "loss": 0.1222,
+      "step": 12306
+    },
+    {
+      "epoch": 0.8879829719686857,
+      "grad_norm": 0.12404187768697739,
+      "learning_rate": 0.0001644999278395151,
+      "loss": 0.1527,
+      "step": 12307
+    },
+    {
+      "epoch": 0.8880551246437461,
+      "grad_norm": 0.12418852001428604,
+      "learning_rate": 0.00016449704142011836,
+      "loss": 0.176,
+      "step": 12308
+    },
+    {
+      "epoch": 0.8881272773188066,
+      "grad_norm": 0.18659363687038422,
+      "learning_rate": 0.0001644941550007216,
+      "loss": 0.1825,
+      "step": 12309
+    },
+    {
+      "epoch": 0.888199429993867,
+      "grad_norm": 0.15691769123077393,
+      "learning_rate": 0.00016449126858132486,
+      "loss": 0.1564,
+      "step": 12310
+    },
+    {
+      "epoch": 0.8882715826689275,
+      "grad_norm": 0.11697795987129211,
+      "learning_rate": 0.00016448838216192815,
+      "loss": 0.1476,
+      "step": 12311
+    },
+    {
+      "epoch": 0.8883437353439879,
+      "grad_norm": 0.12733706831932068,
+      "learning_rate": 0.0001644854957425314,
+      "loss": 0.1746,
+      "step": 12312
+    },
+    {
+      "epoch": 0.8884158880190484,
+      "grad_norm": 0.13768993318080902,
+      "learning_rate": 0.00016448260932313467,
+      "loss": 0.1183,
+      "step": 12313
+    },
+    {
+      "epoch": 0.8884880406941087,
+      "grad_norm": 0.15787924826145172,
+      "learning_rate": 0.0001644797229037379,
+      "loss": 0.1567,
+      "step": 12314
+    },
+    {
+      "epoch": 0.8885601933691691,
+      "grad_norm": 0.15521618723869324,
+      "learning_rate": 0.00016447683648434117,
+      "loss": 0.0995,
+      "step": 12315
+    },
+    {
+      "epoch": 0.8886323460442296,
+      "grad_norm": 0.1418941766023636,
+      "learning_rate": 0.00016447395006494444,
+      "loss": 0.1839,
+      "step": 12316
+    },
+    {
+      "epoch": 0.88870449871929,
+      "grad_norm": 0.13130566477775574,
+      "learning_rate": 0.0001644710636455477,
+      "loss": 0.1611,
+      "step": 12317
+    },
+    {
+      "epoch": 0.8887766513943505,
+      "grad_norm": 0.11680661141872406,
+      "learning_rate": 0.000164468177226151,
+      "loss": 0.1155,
+      "step": 12318
+    },
+    {
+      "epoch": 0.8888488040694109,
+      "grad_norm": 0.12170635908842087,
+      "learning_rate": 0.00016446529080675422,
+      "loss": 0.1103,
+      "step": 12319
+    },
+    {
+      "epoch": 0.8889209567444712,
+      "grad_norm": 0.1398392617702484,
+      "learning_rate": 0.0001644624043873575,
+      "loss": 0.107,
+      "step": 12320
+    },
+    {
+      "epoch": 0.8889931094195317,
+      "grad_norm": 0.12478537112474442,
+      "learning_rate": 0.00016445951796796075,
+      "loss": 0.1589,
+      "step": 12321
+    },
+    {
+      "epoch": 0.8890652620945921,
+      "grad_norm": 0.11158666014671326,
+      "learning_rate": 0.00016445663154856401,
+      "loss": 0.1273,
+      "step": 12322
+    },
+    {
+      "epoch": 0.8891374147696526,
+      "grad_norm": 0.11000625789165497,
+      "learning_rate": 0.00016445374512916728,
+      "loss": 0.1377,
+      "step": 12323
+    },
+    {
+      "epoch": 0.889209567444713,
+      "grad_norm": 0.1148582473397255,
+      "learning_rate": 0.00016445085870977054,
+      "loss": 0.1342,
+      "step": 12324
+    },
+    {
+      "epoch": 0.8892817201197735,
+      "grad_norm": 0.12248571217060089,
+      "learning_rate": 0.0001644479722903738,
+      "loss": 0.136,
+      "step": 12325
+    },
+    {
+      "epoch": 0.8893538727948339,
+      "grad_norm": 0.13426116108894348,
+      "learning_rate": 0.00016444508587097707,
+      "loss": 0.1739,
+      "step": 12326
+    },
+    {
+      "epoch": 0.8894260254698942,
+      "grad_norm": 0.1333639919757843,
+      "learning_rate": 0.00016444219945158033,
+      "loss": 0.1213,
+      "step": 12327
+    },
+    {
+      "epoch": 0.8894981781449547,
+      "grad_norm": 0.12810485064983368,
+      "learning_rate": 0.0001644393130321836,
+      "loss": 0.1312,
+      "step": 12328
+    },
+    {
+      "epoch": 0.8895703308200151,
+      "grad_norm": 0.13391032814979553,
+      "learning_rate": 0.00016443642661278685,
+      "loss": 0.1716,
+      "step": 12329
+    },
+    {
+      "epoch": 0.8896424834950756,
+      "grad_norm": 0.1410336047410965,
+      "learning_rate": 0.0001644335401933901,
+      "loss": 0.15,
+      "step": 12330
+    },
+    {
+      "epoch": 0.889714636170136,
+      "grad_norm": 0.15608654916286469,
+      "learning_rate": 0.00016443065377399335,
+      "loss": 0.1752,
+      "step": 12331
+    },
+    {
+      "epoch": 0.8897867888451965,
+      "grad_norm": 0.11146390438079834,
+      "learning_rate": 0.00016442776735459664,
+      "loss": 0.1621,
+      "step": 12332
+    },
+    {
+      "epoch": 0.8898589415202569,
+      "grad_norm": 0.13638371229171753,
+      "learning_rate": 0.0001644248809351999,
+      "loss": 0.1706,
+      "step": 12333
+    },
+    {
+      "epoch": 0.8899310941953172,
+      "grad_norm": 0.1298755407333374,
+      "learning_rate": 0.00016442199451580317,
+      "loss": 0.147,
+      "step": 12334
+    },
+    {
+      "epoch": 0.8900032468703777,
+      "grad_norm": 0.13941478729248047,
+      "learning_rate": 0.0001644191080964064,
+      "loss": 0.166,
+      "step": 12335
+    },
+    {
+      "epoch": 0.8900753995454381,
+      "grad_norm": 0.13158883154392242,
+      "learning_rate": 0.00016441622167700967,
+      "loss": 0.1479,
+      "step": 12336
+    },
+    {
+      "epoch": 0.8901475522204986,
+      "grad_norm": 0.12248693406581879,
+      "learning_rate": 0.00016441333525761293,
+      "loss": 0.125,
+      "step": 12337
+    },
+    {
+      "epoch": 0.890219704895559,
+      "grad_norm": 0.12258122116327286,
+      "learning_rate": 0.0001644104488382162,
+      "loss": 0.1977,
+      "step": 12338
+    },
+    {
+      "epoch": 0.8902918575706195,
+      "grad_norm": 0.12818323075771332,
+      "learning_rate": 0.00016440756241881948,
+      "loss": 0.1544,
+      "step": 12339
+    },
+    {
+      "epoch": 0.8903640102456799,
+      "grad_norm": 0.12762263417243958,
+      "learning_rate": 0.00016440467599942272,
+      "loss": 0.1938,
+      "step": 12340
+    },
+    {
+      "epoch": 0.8904361629207402,
+      "grad_norm": 0.12197435647249222,
+      "learning_rate": 0.00016440178958002598,
+      "loss": 0.136,
+      "step": 12341
+    },
+    {
+      "epoch": 0.8905083155958007,
+      "grad_norm": 0.12227319926023483,
+      "learning_rate": 0.00016439890316062925,
+      "loss": 0.1331,
+      "step": 12342
+    },
+    {
+      "epoch": 0.8905804682708611,
+      "grad_norm": 0.12339022010564804,
+      "learning_rate": 0.0001643960167412325,
+      "loss": 0.1192,
+      "step": 12343
+    },
+    {
+      "epoch": 0.8906526209459216,
+      "grad_norm": 0.12474383413791656,
+      "learning_rate": 0.00016439313032183577,
+      "loss": 0.1259,
+      "step": 12344
+    },
+    {
+      "epoch": 0.890724773620982,
+      "grad_norm": 0.14406003057956696,
+      "learning_rate": 0.00016439024390243903,
+      "loss": 0.138,
+      "step": 12345
+    },
+    {
+      "epoch": 0.8907969262960425,
+      "grad_norm": 0.13146288692951202,
+      "learning_rate": 0.0001643873574830423,
+      "loss": 0.1627,
+      "step": 12346
+    },
+    {
+      "epoch": 0.8908690789711029,
+      "grad_norm": 0.1251259446144104,
+      "learning_rate": 0.00016438447106364556,
+      "loss": 0.1442,
+      "step": 12347
+    },
+    {
+      "epoch": 0.8909412316461632,
+      "grad_norm": 0.1310272067785263,
+      "learning_rate": 0.00016438158464424882,
+      "loss": 0.1309,
+      "step": 12348
+    },
+    {
+      "epoch": 0.8910133843212237,
+      "grad_norm": 0.13596667349338531,
+      "learning_rate": 0.00016437869822485209,
+      "loss": 0.1625,
+      "step": 12349
+    },
+    {
+      "epoch": 0.8910855369962841,
+      "grad_norm": 0.12340883165597916,
+      "learning_rate": 0.00016437581180545535,
+      "loss": 0.1453,
+      "step": 12350
+    },
+    {
+      "epoch": 0.8911576896713446,
+      "grad_norm": 0.14122344553470612,
+      "learning_rate": 0.00016437292538605858,
+      "loss": 0.1544,
+      "step": 12351
+    },
+    {
+      "epoch": 0.891229842346405,
+      "grad_norm": 0.12983869016170502,
+      "learning_rate": 0.00016437003896666185,
+      "loss": 0.17,
+      "step": 12352
+    },
+    {
+      "epoch": 0.8913019950214655,
+      "grad_norm": 0.12063828855752945,
+      "learning_rate": 0.00016436715254726514,
+      "loss": 0.1771,
+      "step": 12353
+    },
+    {
+      "epoch": 0.8913741476965259,
+      "grad_norm": 0.1242651641368866,
+      "learning_rate": 0.0001643642661278684,
+      "loss": 0.1481,
+      "step": 12354
+    },
+    {
+      "epoch": 0.8914463003715862,
+      "grad_norm": 0.1284290850162506,
+      "learning_rate": 0.00016436137970847166,
+      "loss": 0.1272,
+      "step": 12355
+    },
+    {
+      "epoch": 0.8915184530466467,
+      "grad_norm": 0.12441599369049072,
+      "learning_rate": 0.0001643584932890749,
+      "loss": 0.0934,
+      "step": 12356
+    },
+    {
+      "epoch": 0.8915906057217071,
+      "grad_norm": 0.10706061869859695,
+      "learning_rate": 0.00016435560686967816,
+      "loss": 0.1213,
+      "step": 12357
+    },
+    {
+      "epoch": 0.8916627583967676,
+      "grad_norm": 0.12179552763700485,
+      "learning_rate": 0.00016435272045028142,
+      "loss": 0.1591,
+      "step": 12358
+    },
+    {
+      "epoch": 0.891734911071828,
+      "grad_norm": 0.16151952743530273,
+      "learning_rate": 0.0001643498340308847,
+      "loss": 0.1726,
+      "step": 12359
+    },
+    {
+      "epoch": 0.8918070637468885,
+      "grad_norm": 0.1302734762430191,
+      "learning_rate": 0.00016434694761148798,
+      "loss": 0.1007,
+      "step": 12360
+    },
+    {
+      "epoch": 0.8918792164219489,
+      "grad_norm": 0.14528721570968628,
+      "learning_rate": 0.0001643440611920912,
+      "loss": 0.1522,
+      "step": 12361
+    },
+    {
+      "epoch": 0.8919513690970092,
+      "grad_norm": 0.15876981616020203,
+      "learning_rate": 0.00016434117477269448,
+      "loss": 0.1378,
+      "step": 12362
+    },
+    {
+      "epoch": 0.8920235217720697,
+      "grad_norm": 0.13293051719665527,
+      "learning_rate": 0.00016433828835329774,
+      "loss": 0.1772,
+      "step": 12363
+    },
+    {
+      "epoch": 0.8920956744471301,
+      "grad_norm": 0.10721206665039062,
+      "learning_rate": 0.000164335401933901,
+      "loss": 0.1711,
+      "step": 12364
+    },
+    {
+      "epoch": 0.8921678271221906,
+      "grad_norm": 0.14066430926322937,
+      "learning_rate": 0.00016433251551450427,
+      "loss": 0.1305,
+      "step": 12365
+    },
+    {
+      "epoch": 0.892239979797251,
+      "grad_norm": 0.13327549397945404,
+      "learning_rate": 0.00016432962909510753,
+      "loss": 0.1487,
+      "step": 12366
+    },
+    {
+      "epoch": 0.8923121324723114,
+      "grad_norm": 0.12224187701940536,
+      "learning_rate": 0.0001643267426757108,
+      "loss": 0.1402,
+      "step": 12367
+    },
+    {
+      "epoch": 0.8923842851473719,
+      "grad_norm": 0.11235873401165009,
+      "learning_rate": 0.00016432385625631405,
+      "loss": 0.1545,
+      "step": 12368
+    },
+    {
+      "epoch": 0.8924564378224322,
+      "grad_norm": 0.13327015936374664,
+      "learning_rate": 0.00016432096983691732,
+      "loss": 0.1212,
+      "step": 12369
+    },
+    {
+      "epoch": 0.8925285904974927,
+      "grad_norm": 0.0986461266875267,
+      "learning_rate": 0.00016431808341752058,
+      "loss": 0.1105,
+      "step": 12370
+    },
+    {
+      "epoch": 0.8926007431725531,
+      "grad_norm": 0.17483176290988922,
+      "learning_rate": 0.00016431519699812384,
+      "loss": 0.1472,
+      "step": 12371
+    },
+    {
+      "epoch": 0.8926728958476136,
+      "grad_norm": 0.11941957473754883,
+      "learning_rate": 0.00016431231057872708,
+      "loss": 0.1612,
+      "step": 12372
+    },
+    {
+      "epoch": 0.892745048522674,
+      "grad_norm": 0.1409369558095932,
+      "learning_rate": 0.00016430942415933034,
+      "loss": 0.1794,
+      "step": 12373
+    },
+    {
+      "epoch": 0.8928172011977344,
+      "grad_norm": 0.14732030034065247,
+      "learning_rate": 0.00016430653773993363,
+      "loss": 0.2351,
+      "step": 12374
+    },
+    {
+      "epoch": 0.8928893538727949,
+      "grad_norm": 0.11152256280183792,
+      "learning_rate": 0.0001643036513205369,
+      "loss": 0.1866,
+      "step": 12375
+    },
+    {
+      "epoch": 0.8929615065478552,
+      "grad_norm": 0.13826146721839905,
+      "learning_rate": 0.00016430076490114016,
+      "loss": 0.1083,
+      "step": 12376
+    },
+    {
+      "epoch": 0.8930336592229157,
+      "grad_norm": 0.12121112644672394,
+      "learning_rate": 0.0001642978784817434,
+      "loss": 0.1578,
+      "step": 12377
+    },
+    {
+      "epoch": 0.8931058118979761,
+      "grad_norm": 0.12537214159965515,
+      "learning_rate": 0.00016429499206234666,
+      "loss": 0.1067,
+      "step": 12378
+    },
+    {
+      "epoch": 0.8931779645730366,
+      "grad_norm": 0.12527628242969513,
+      "learning_rate": 0.00016429210564294992,
+      "loss": 0.1585,
+      "step": 12379
+    },
+    {
+      "epoch": 0.893250117248097,
+      "grad_norm": 0.1377083957195282,
+      "learning_rate": 0.00016428921922355318,
+      "loss": 0.1674,
+      "step": 12380
+    },
+    {
+      "epoch": 0.8933222699231574,
+      "grad_norm": 0.11523400247097015,
+      "learning_rate": 0.00016428633280415647,
+      "loss": 0.1275,
+      "step": 12381
+    },
+    {
+      "epoch": 0.8933944225982178,
+      "grad_norm": 0.14673997461795807,
+      "learning_rate": 0.0001642834463847597,
+      "loss": 0.2389,
+      "step": 12382
+    },
+    {
+      "epoch": 0.8934665752732782,
+      "grad_norm": 0.13048113882541656,
+      "learning_rate": 0.00016428055996536297,
+      "loss": 0.2019,
+      "step": 12383
+    },
+    {
+      "epoch": 0.8935387279483387,
+      "grad_norm": 0.13305433094501495,
+      "learning_rate": 0.00016427767354596623,
+      "loss": 0.1961,
+      "step": 12384
+    },
+    {
+      "epoch": 0.8936108806233991,
+      "grad_norm": 0.15378667414188385,
+      "learning_rate": 0.0001642747871265695,
+      "loss": 0.1607,
+      "step": 12385
+    },
+    {
+      "epoch": 0.8936830332984596,
+      "grad_norm": 0.16681982576847076,
+      "learning_rate": 0.00016427190070717276,
+      "loss": 0.219,
+      "step": 12386
+    },
+    {
+      "epoch": 0.89375518597352,
+      "grad_norm": 0.11870287358760834,
+      "learning_rate": 0.00016426901428777602,
+      "loss": 0.1189,
+      "step": 12387
+    },
+    {
+      "epoch": 0.8938273386485804,
+      "grad_norm": 0.1373557448387146,
+      "learning_rate": 0.00016426612786837929,
+      "loss": 0.1105,
+      "step": 12388
+    },
+    {
+      "epoch": 0.8938994913236408,
+      "grad_norm": 0.13363225758075714,
+      "learning_rate": 0.00016426324144898255,
+      "loss": 0.1998,
+      "step": 12389
+    },
+    {
+      "epoch": 0.8939716439987012,
+      "grad_norm": 0.12109125405550003,
+      "learning_rate": 0.0001642603550295858,
+      "loss": 0.1629,
+      "step": 12390
+    },
+    {
+      "epoch": 0.8940437966737617,
+      "grad_norm": 0.1376250982284546,
+      "learning_rate": 0.00016425746861018907,
+      "loss": 0.1867,
+      "step": 12391
+    },
+    {
+      "epoch": 0.8941159493488221,
+      "grad_norm": 0.11796516180038452,
+      "learning_rate": 0.00016425458219079234,
+      "loss": 0.1319,
+      "step": 12392
+    },
+    {
+      "epoch": 0.8941881020238825,
+      "grad_norm": 0.1224747747182846,
+      "learning_rate": 0.00016425169577139557,
+      "loss": 0.1562,
+      "step": 12393
+    },
+    {
+      "epoch": 0.894260254698943,
+      "grad_norm": 0.11128373444080353,
+      "learning_rate": 0.00016424880935199884,
+      "loss": 0.1912,
+      "step": 12394
+    },
+    {
+      "epoch": 0.8943324073740034,
+      "grad_norm": 0.1349061131477356,
+      "learning_rate": 0.00016424592293260213,
+      "loss": 0.1219,
+      "step": 12395
+    },
+    {
+      "epoch": 0.8944045600490638,
+      "grad_norm": 0.12552239000797272,
+      "learning_rate": 0.0001642430365132054,
+      "loss": 0.118,
+      "step": 12396
+    },
+    {
+      "epoch": 0.8944767127241242,
+      "grad_norm": 0.10382308065891266,
+      "learning_rate": 0.00016424015009380865,
+      "loss": 0.1273,
+      "step": 12397
+    },
+    {
+      "epoch": 0.8945488653991847,
+      "grad_norm": 0.1467970311641693,
+      "learning_rate": 0.0001642372636744119,
+      "loss": 0.1296,
+      "step": 12398
+    },
+    {
+      "epoch": 0.8946210180742451,
+      "grad_norm": 0.11753301322460175,
+      "learning_rate": 0.00016423437725501515,
+      "loss": 0.1716,
+      "step": 12399
+    },
+    {
+      "epoch": 0.8946931707493055,
+      "grad_norm": 0.1308908462524414,
+      "learning_rate": 0.0001642314908356184,
+      "loss": 0.1593,
+      "step": 12400
+    },
+    {
+      "epoch": 0.894765323424366,
+      "grad_norm": 0.14492283761501312,
+      "learning_rate": 0.00016422860441622168,
+      "loss": 0.1411,
+      "step": 12401
+    },
+    {
+      "epoch": 0.8948374760994264,
+      "grad_norm": 0.13398638367652893,
+      "learning_rate": 0.00016422571799682497,
+      "loss": 0.1848,
+      "step": 12402
+    },
+    {
+      "epoch": 0.8949096287744868,
+      "grad_norm": 0.15173323452472687,
+      "learning_rate": 0.00016422283157742823,
+      "loss": 0.1408,
+      "step": 12403
+    },
+    {
+      "epoch": 0.8949817814495472,
+      "grad_norm": 0.14614036679267883,
+      "learning_rate": 0.00016421994515803146,
+      "loss": 0.1572,
+      "step": 12404
+    },
+    {
+      "epoch": 0.8950539341246077,
+      "grad_norm": 0.13788336515426636,
+      "learning_rate": 0.00016421705873863473,
+      "loss": 0.1051,
+      "step": 12405
+    },
+    {
+      "epoch": 0.8951260867996681,
+      "grad_norm": 0.1386488974094391,
+      "learning_rate": 0.000164214172319238,
+      "loss": 0.1812,
+      "step": 12406
+    },
+    {
+      "epoch": 0.8951982394747285,
+      "grad_norm": 0.1316649317741394,
+      "learning_rate": 0.00016421128589984125,
+      "loss": 0.1131,
+      "step": 12407
+    },
+    {
+      "epoch": 0.895270392149789,
+      "grad_norm": 0.12185010313987732,
+      "learning_rate": 0.00016420839948044452,
+      "loss": 0.1823,
+      "step": 12408
+    },
+    {
+      "epoch": 0.8953425448248494,
+      "grad_norm": 0.12921541929244995,
+      "learning_rate": 0.00016420551306104778,
+      "loss": 0.1266,
+      "step": 12409
+    },
+    {
+      "epoch": 0.8954146974999098,
+      "grad_norm": 0.12820370495319366,
+      "learning_rate": 0.00016420262664165104,
+      "loss": 0.1816,
+      "step": 12410
+    },
+    {
+      "epoch": 0.8954868501749702,
+      "grad_norm": 0.13184615969657898,
+      "learning_rate": 0.0001641997402222543,
+      "loss": 0.1916,
+      "step": 12411
+    },
+    {
+      "epoch": 0.8955590028500306,
+      "grad_norm": 0.1054464802145958,
+      "learning_rate": 0.00016419685380285757,
+      "loss": 0.0965,
+      "step": 12412
+    },
+    {
+      "epoch": 0.8956311555250911,
+      "grad_norm": 0.12233717739582062,
+      "learning_rate": 0.00016419396738346083,
+      "loss": 0.1234,
+      "step": 12413
+    },
+    {
+      "epoch": 0.8957033082001515,
+      "grad_norm": 0.10888828337192535,
+      "learning_rate": 0.0001641910809640641,
+      "loss": 0.1317,
+      "step": 12414
+    },
+    {
+      "epoch": 0.895775460875212,
+      "grad_norm": 0.13491316139698029,
+      "learning_rate": 0.00016418819454466733,
+      "loss": 0.1156,
+      "step": 12415
+    },
+    {
+      "epoch": 0.8958476135502724,
+      "grad_norm": 0.13821394741535187,
+      "learning_rate": 0.00016418530812527062,
+      "loss": 0.136,
+      "step": 12416
+    },
+    {
+      "epoch": 0.8959197662253328,
+      "grad_norm": 0.12744927406311035,
+      "learning_rate": 0.00016418242170587388,
+      "loss": 0.1185,
+      "step": 12417
+    },
+    {
+      "epoch": 0.8959919189003932,
+      "grad_norm": 0.14690862596035004,
+      "learning_rate": 0.00016417953528647715,
+      "loss": 0.1198,
+      "step": 12418
+    },
+    {
+      "epoch": 0.8960640715754536,
+      "grad_norm": 0.1134641095995903,
+      "learning_rate": 0.0001641766488670804,
+      "loss": 0.1544,
+      "step": 12419
+    },
+    {
+      "epoch": 0.8961362242505141,
+      "grad_norm": 0.13892638683319092,
+      "learning_rate": 0.00016417376244768364,
+      "loss": 0.1552,
+      "step": 12420
+    },
+    {
+      "epoch": 0.8962083769255745,
+      "grad_norm": 0.1294938623905182,
+      "learning_rate": 0.0001641708760282869,
+      "loss": 0.1247,
+      "step": 12421
+    },
+    {
+      "epoch": 0.896280529600635,
+      "grad_norm": 0.12001454830169678,
+      "learning_rate": 0.00016416798960889017,
+      "loss": 0.1152,
+      "step": 12422
+    },
+    {
+      "epoch": 0.8963526822756954,
+      "grad_norm": 0.14264273643493652,
+      "learning_rate": 0.00016416510318949346,
+      "loss": 0.1921,
+      "step": 12423
+    },
+    {
+      "epoch": 0.8964248349507558,
+      "grad_norm": 0.11626090109348297,
+      "learning_rate": 0.00016416221677009672,
+      "loss": 0.1392,
+      "step": 12424
+    },
+    {
+      "epoch": 0.8964969876258162,
+      "grad_norm": 0.13233236968517303,
+      "learning_rate": 0.00016415933035069996,
+      "loss": 0.1176,
+      "step": 12425
+    },
+    {
+      "epoch": 0.8965691403008766,
+      "grad_norm": 0.147277370095253,
+      "learning_rate": 0.00016415644393130322,
+      "loss": 0.1517,
+      "step": 12426
+    },
+    {
+      "epoch": 0.8966412929759371,
+      "grad_norm": 0.12489044666290283,
+      "learning_rate": 0.00016415355751190648,
+      "loss": 0.1451,
+      "step": 12427
+    },
+    {
+      "epoch": 0.8967134456509975,
+      "grad_norm": 0.12134548276662827,
+      "learning_rate": 0.00016415067109250975,
+      "loss": 0.1487,
+      "step": 12428
+    },
+    {
+      "epoch": 0.896785598326058,
+      "grad_norm": 0.15623758733272552,
+      "learning_rate": 0.000164147784673113,
+      "loss": 0.1654,
+      "step": 12429
+    },
+    {
+      "epoch": 0.8968577510011184,
+      "grad_norm": 0.14344453811645508,
+      "learning_rate": 0.00016414489825371627,
+      "loss": 0.1764,
+      "step": 12430
+    },
+    {
+      "epoch": 0.8969299036761788,
+      "grad_norm": 0.13360056281089783,
+      "learning_rate": 0.00016414201183431954,
+      "loss": 0.1335,
+      "step": 12431
+    },
+    {
+      "epoch": 0.8970020563512392,
+      "grad_norm": 0.16808222234249115,
+      "learning_rate": 0.0001641391254149228,
+      "loss": 0.1346,
+      "step": 12432
+    },
+    {
+      "epoch": 0.8970742090262996,
+      "grad_norm": 0.20998232066631317,
+      "learning_rate": 0.00016413623899552606,
+      "loss": 0.2015,
+      "step": 12433
+    },
+    {
+      "epoch": 0.8971463617013601,
+      "grad_norm": 0.12847408652305603,
+      "learning_rate": 0.00016413335257612933,
+      "loss": 0.1336,
+      "step": 12434
+    },
+    {
+      "epoch": 0.8972185143764205,
+      "grad_norm": 0.14319449663162231,
+      "learning_rate": 0.0001641304661567326,
+      "loss": 0.1304,
+      "step": 12435
+    },
+    {
+      "epoch": 0.897290667051481,
+      "grad_norm": 0.1424330621957779,
+      "learning_rate": 0.00016412757973733582,
+      "loss": 0.1457,
+      "step": 12436
+    },
+    {
+      "epoch": 0.8973628197265414,
+      "grad_norm": 0.1303742378950119,
+      "learning_rate": 0.00016412469331793911,
+      "loss": 0.1281,
+      "step": 12437
+    },
+    {
+      "epoch": 0.8974349724016017,
+      "grad_norm": 0.11268772184848785,
+      "learning_rate": 0.00016412180689854238,
+      "loss": 0.1346,
+      "step": 12438
+    },
+    {
+      "epoch": 0.8975071250766622,
+      "grad_norm": 0.13568419218063354,
+      "learning_rate": 0.00016411892047914564,
+      "loss": 0.1778,
+      "step": 12439
+    },
+    {
+      "epoch": 0.8975792777517226,
+      "grad_norm": 0.14853152632713318,
+      "learning_rate": 0.0001641160340597489,
+      "loss": 0.1839,
+      "step": 12440
+    },
+    {
+      "epoch": 0.8976514304267831,
+      "grad_norm": 0.11737915128469467,
+      "learning_rate": 0.00016411314764035214,
+      "loss": 0.1111,
+      "step": 12441
+    },
+    {
+      "epoch": 0.8977235831018435,
+      "grad_norm": 0.17313599586486816,
+      "learning_rate": 0.0001641102612209554,
+      "loss": 0.183,
+      "step": 12442
+    },
+    {
+      "epoch": 0.897795735776904,
+      "grad_norm": 0.13710598647594452,
+      "learning_rate": 0.00016410737480155866,
+      "loss": 0.1558,
+      "step": 12443
+    },
+    {
+      "epoch": 0.8978678884519643,
+      "grad_norm": 0.12724435329437256,
+      "learning_rate": 0.00016410448838216193,
+      "loss": 0.1142,
+      "step": 12444
+    },
+    {
+      "epoch": 0.8979400411270247,
+      "grad_norm": 0.15760374069213867,
+      "learning_rate": 0.00016410160196276522,
+      "loss": 0.1551,
+      "step": 12445
+    },
+    {
+      "epoch": 0.8980121938020852,
+      "grad_norm": 0.16165392100811005,
+      "learning_rate": 0.00016409871554336845,
+      "loss": 0.1822,
+      "step": 12446
+    },
+    {
+      "epoch": 0.8980843464771456,
+      "grad_norm": 0.13458803296089172,
+      "learning_rate": 0.00016409582912397172,
+      "loss": 0.1332,
+      "step": 12447
+    },
+    {
+      "epoch": 0.8981564991522061,
+      "grad_norm": 0.10760502517223358,
+      "learning_rate": 0.00016409294270457498,
+      "loss": 0.1418,
+      "step": 12448
+    },
+    {
+      "epoch": 0.8982286518272665,
+      "grad_norm": 0.1373852640390396,
+      "learning_rate": 0.00016409005628517824,
+      "loss": 0.1644,
+      "step": 12449
+    },
+    {
+      "epoch": 0.898300804502327,
+      "grad_norm": 0.1437990516424179,
+      "learning_rate": 0.0001640871698657815,
+      "loss": 0.1508,
+      "step": 12450
+    },
+    {
+      "epoch": 0.8983729571773873,
+      "grad_norm": 0.11738962680101395,
+      "learning_rate": 0.00016408428344638477,
+      "loss": 0.1662,
+      "step": 12451
+    },
+    {
+      "epoch": 0.8984451098524477,
+      "grad_norm": 0.15426859259605408,
+      "learning_rate": 0.00016408139702698803,
+      "loss": 0.1088,
+      "step": 12452
+    },
+    {
+      "epoch": 0.8985172625275082,
+      "grad_norm": 0.1113508865237236,
+      "learning_rate": 0.0001640785106075913,
+      "loss": 0.1271,
+      "step": 12453
+    },
+    {
+      "epoch": 0.8985894152025686,
+      "grad_norm": 0.150124654173851,
+      "learning_rate": 0.00016407562418819456,
+      "loss": 0.194,
+      "step": 12454
+    },
+    {
+      "epoch": 0.8986615678776291,
+      "grad_norm": 0.13325640559196472,
+      "learning_rate": 0.00016407273776879782,
+      "loss": 0.1425,
+      "step": 12455
+    },
+    {
+      "epoch": 0.8987337205526895,
+      "grad_norm": 0.11264251172542572,
+      "learning_rate": 0.00016406985134940108,
+      "loss": 0.191,
+      "step": 12456
+    },
+    {
+      "epoch": 0.89880587322775,
+      "grad_norm": 0.13700659573078156,
+      "learning_rate": 0.00016406696493000432,
+      "loss": 0.1505,
+      "step": 12457
+    },
+    {
+      "epoch": 0.8988780259028103,
+      "grad_norm": 0.16137053072452545,
+      "learning_rate": 0.00016406407851060758,
+      "loss": 0.1943,
+      "step": 12458
+    },
+    {
+      "epoch": 0.8989501785778707,
+      "grad_norm": 0.12548089027404785,
+      "learning_rate": 0.00016406119209121087,
+      "loss": 0.1418,
+      "step": 12459
+    },
+    {
+      "epoch": 0.8990223312529312,
+      "grad_norm": 0.12392608076334,
+      "learning_rate": 0.00016405830567181413,
+      "loss": 0.0933,
+      "step": 12460
+    },
+    {
+      "epoch": 0.8990944839279916,
+      "grad_norm": 0.12155414372682571,
+      "learning_rate": 0.0001640554192524174,
+      "loss": 0.1841,
+      "step": 12461
+    },
+    {
+      "epoch": 0.8991666366030521,
+      "grad_norm": 0.13689649105072021,
+      "learning_rate": 0.00016405253283302063,
+      "loss": 0.1192,
+      "step": 12462
+    },
+    {
+      "epoch": 0.8992387892781125,
+      "grad_norm": 0.13133807480335236,
+      "learning_rate": 0.0001640496464136239,
+      "loss": 0.1471,
+      "step": 12463
+    },
+    {
+      "epoch": 0.899310941953173,
+      "grad_norm": 0.12057525664567947,
+      "learning_rate": 0.00016404675999422716,
+      "loss": 0.1704,
+      "step": 12464
+    },
+    {
+      "epoch": 0.8993830946282333,
+      "grad_norm": 0.11189797520637512,
+      "learning_rate": 0.00016404387357483042,
+      "loss": 0.1724,
+      "step": 12465
+    },
+    {
+      "epoch": 0.8994552473032937,
+      "grad_norm": 0.11240506917238235,
+      "learning_rate": 0.0001640409871554337,
+      "loss": 0.1414,
+      "step": 12466
+    },
+    {
+      "epoch": 0.8995273999783542,
+      "grad_norm": 0.13948418200016022,
+      "learning_rate": 0.00016403810073603695,
+      "loss": 0.1724,
+      "step": 12467
+    },
+    {
+      "epoch": 0.8995995526534146,
+      "grad_norm": 0.1354711651802063,
+      "learning_rate": 0.0001640352143166402,
+      "loss": 0.1177,
+      "step": 12468
+    },
+    {
+      "epoch": 0.8996717053284751,
+      "grad_norm": 0.1620495766401291,
+      "learning_rate": 0.00016403232789724347,
+      "loss": 0.1898,
+      "step": 12469
+    },
+    {
+      "epoch": 0.8997438580035355,
+      "grad_norm": 0.13859111070632935,
+      "learning_rate": 0.00016402944147784674,
+      "loss": 0.1376,
+      "step": 12470
+    },
+    {
+      "epoch": 0.899816010678596,
+      "grad_norm": 0.1199670135974884,
+      "learning_rate": 0.00016402655505845,
+      "loss": 0.1747,
+      "step": 12471
+    },
+    {
+      "epoch": 0.8998881633536563,
+      "grad_norm": 0.1164129227399826,
+      "learning_rate": 0.00016402366863905326,
+      "loss": 0.1454,
+      "step": 12472
+    },
+    {
+      "epoch": 0.8999603160287167,
+      "grad_norm": 0.11841829866170883,
+      "learning_rate": 0.00016402078221965653,
+      "loss": 0.1219,
+      "step": 12473
+    },
+    {
+      "epoch": 0.9000324687037772,
+      "grad_norm": 0.12008563429117203,
+      "learning_rate": 0.0001640178958002598,
+      "loss": 0.1216,
+      "step": 12474
+    },
+    {
+      "epoch": 0.9001046213788376,
+      "grad_norm": 0.11511898040771484,
+      "learning_rate": 0.00016401500938086305,
+      "loss": 0.1556,
+      "step": 12475
+    },
+    {
+      "epoch": 0.9001767740538981,
+      "grad_norm": 0.11898694932460785,
+      "learning_rate": 0.00016401212296146631,
+      "loss": 0.1861,
+      "step": 12476
+    },
+    {
+      "epoch": 0.9002489267289585,
+      "grad_norm": 0.16332779824733734,
+      "learning_rate": 0.00016400923654206958,
+      "loss": 0.2241,
+      "step": 12477
+    },
+    {
+      "epoch": 0.900321079404019,
+      "grad_norm": 0.13668668270111084,
+      "learning_rate": 0.0001640063501226728,
+      "loss": 0.1629,
+      "step": 12478
+    },
+    {
+      "epoch": 0.9003932320790793,
+      "grad_norm": 0.0982336550951004,
+      "learning_rate": 0.00016400346370327608,
+      "loss": 0.0953,
+      "step": 12479
+    },
+    {
+      "epoch": 0.9004653847541397,
+      "grad_norm": 0.12939897179603577,
+      "learning_rate": 0.00016400057728387937,
+      "loss": 0.1281,
+      "step": 12480
+    },
+    {
+      "epoch": 0.9005375374292002,
+      "grad_norm": 0.16161668300628662,
+      "learning_rate": 0.00016399769086448263,
+      "loss": 0.1457,
+      "step": 12481
+    },
+    {
+      "epoch": 0.9006096901042606,
+      "grad_norm": 0.13126331567764282,
+      "learning_rate": 0.0001639948044450859,
+      "loss": 0.1349,
+      "step": 12482
+    },
+    {
+      "epoch": 0.9006818427793211,
+      "grad_norm": 0.12295763939619064,
+      "learning_rate": 0.00016399191802568913,
+      "loss": 0.1537,
+      "step": 12483
+    },
+    {
+      "epoch": 0.9007539954543815,
+      "grad_norm": 0.13977226614952087,
+      "learning_rate": 0.0001639890316062924,
+      "loss": 0.1223,
+      "step": 12484
+    },
+    {
+      "epoch": 0.900826148129442,
+      "grad_norm": 0.1546591967344284,
+      "learning_rate": 0.00016398614518689565,
+      "loss": 0.1674,
+      "step": 12485
+    },
+    {
+      "epoch": 0.9008983008045023,
+      "grad_norm": 0.09397142380475998,
+      "learning_rate": 0.00016398325876749892,
+      "loss": 0.0949,
+      "step": 12486
+    },
+    {
+      "epoch": 0.9009704534795627,
+      "grad_norm": 0.138092502951622,
+      "learning_rate": 0.0001639803723481022,
+      "loss": 0.2451,
+      "step": 12487
+    },
+    {
+      "epoch": 0.9010426061546232,
+      "grad_norm": 0.12386883795261383,
+      "learning_rate": 0.00016397748592870544,
+      "loss": 0.1326,
+      "step": 12488
+    },
+    {
+      "epoch": 0.9011147588296836,
+      "grad_norm": 0.11555249989032745,
+      "learning_rate": 0.0001639745995093087,
+      "loss": 0.1195,
+      "step": 12489
+    },
+    {
+      "epoch": 0.9011869115047441,
+      "grad_norm": 0.172087162733078,
+      "learning_rate": 0.00016397171308991197,
+      "loss": 0.1241,
+      "step": 12490
+    },
+    {
+      "epoch": 0.9012590641798045,
+      "grad_norm": 0.12430737912654877,
+      "learning_rate": 0.00016396882667051523,
+      "loss": 0.162,
+      "step": 12491
+    },
+    {
+      "epoch": 0.901331216854865,
+      "grad_norm": 0.12657372653484344,
+      "learning_rate": 0.0001639659402511185,
+      "loss": 0.1487,
+      "step": 12492
+    },
+    {
+      "epoch": 0.9014033695299253,
+      "grad_norm": 0.12591242790222168,
+      "learning_rate": 0.00016396305383172176,
+      "loss": 0.1452,
+      "step": 12493
+    },
+    {
+      "epoch": 0.9014755222049857,
+      "grad_norm": 0.11260319501161575,
+      "learning_rate": 0.00016396016741232502,
+      "loss": 0.163,
+      "step": 12494
+    },
+    {
+      "epoch": 0.9015476748800462,
+      "grad_norm": 0.138995960354805,
+      "learning_rate": 0.00016395728099292828,
+      "loss": 0.1362,
+      "step": 12495
+    },
+    {
+      "epoch": 0.9016198275551066,
+      "grad_norm": 0.14916054904460907,
+      "learning_rate": 0.00016395439457353155,
+      "loss": 0.1919,
+      "step": 12496
+    },
+    {
+      "epoch": 0.9016919802301671,
+      "grad_norm": 0.10642482340335846,
+      "learning_rate": 0.0001639515081541348,
+      "loss": 0.1997,
+      "step": 12497
+    },
+    {
+      "epoch": 0.9017641329052275,
+      "grad_norm": 0.1367085725069046,
+      "learning_rate": 0.00016394862173473807,
+      "loss": 0.1338,
+      "step": 12498
+    },
+    {
+      "epoch": 0.9018362855802879,
+      "grad_norm": 0.16044127941131592,
+      "learning_rate": 0.0001639457353153413,
+      "loss": 0.1513,
+      "step": 12499
+    },
+    {
+      "epoch": 0.9019084382553483,
+      "grad_norm": 0.13745981454849243,
+      "learning_rate": 0.00016394284889594457,
+      "loss": 0.1736,
+      "step": 12500
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 69295,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.686622915010806e+18,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}