random_RToQ651klcfvYBE4 / trainer_state.json
cutelemonlili's picture
Add files using upload-large-folder tool
8793c16 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 2846,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0007027406886858749,
"grad_norm": 0.8196235352245071,
"learning_rate": 9.999996953719305e-06,
"loss": 0.292,
"step": 1
},
{
"epoch": 0.0014054813773717498,
"grad_norm": 0.7633721725729737,
"learning_rate": 9.999987814880927e-06,
"loss": 0.2801,
"step": 2
},
{
"epoch": 0.0021082220660576245,
"grad_norm": 0.7536242195326497,
"learning_rate": 9.999972583496006e-06,
"loss": 0.2661,
"step": 3
},
{
"epoch": 0.0028109627547434997,
"grad_norm": 0.6258448159582546,
"learning_rate": 9.999951259583096e-06,
"loss": 0.2734,
"step": 4
},
{
"epoch": 0.0035137034434293743,
"grad_norm": 0.4626617791244198,
"learning_rate": 9.999923843168187e-06,
"loss": 0.2375,
"step": 5
},
{
"epoch": 0.004216444132115249,
"grad_norm": 0.419163258426849,
"learning_rate": 9.999890334284681e-06,
"loss": 0.1928,
"step": 6
},
{
"epoch": 0.004919184820801124,
"grad_norm": 0.4683787591367514,
"learning_rate": 9.999850732973412e-06,
"loss": 0.2368,
"step": 7
},
{
"epoch": 0.005621925509486999,
"grad_norm": 0.3631303503379455,
"learning_rate": 9.999805039282636e-06,
"loss": 0.2344,
"step": 8
},
{
"epoch": 0.006324666198172874,
"grad_norm": 0.3548134547290823,
"learning_rate": 9.999753253268027e-06,
"loss": 0.2312,
"step": 9
},
{
"epoch": 0.007027406886858749,
"grad_norm": 0.29528023768627554,
"learning_rate": 9.99969537499269e-06,
"loss": 0.1908,
"step": 10
},
{
"epoch": 0.007730147575544624,
"grad_norm": 0.293101018351405,
"learning_rate": 9.999631404527148e-06,
"loss": 0.2107,
"step": 11
},
{
"epoch": 0.008432888264230498,
"grad_norm": 0.3037162293226832,
"learning_rate": 9.999561341949354e-06,
"loss": 0.2259,
"step": 12
},
{
"epoch": 0.009135628952916374,
"grad_norm": 0.28517045842139394,
"learning_rate": 9.999485187344676e-06,
"loss": 0.2104,
"step": 13
},
{
"epoch": 0.009838369641602248,
"grad_norm": 0.2664237810954005,
"learning_rate": 9.999402940805908e-06,
"loss": 0.2004,
"step": 14
},
{
"epoch": 0.010541110330288124,
"grad_norm": 0.25250087233379465,
"learning_rate": 9.999314602433274e-06,
"loss": 0.1905,
"step": 15
},
{
"epoch": 0.011243851018973999,
"grad_norm": 0.2728513147781514,
"learning_rate": 9.999220172334414e-06,
"loss": 0.2236,
"step": 16
},
{
"epoch": 0.011946591707659873,
"grad_norm": 0.2461154242048915,
"learning_rate": 9.999119650624387e-06,
"loss": 0.1941,
"step": 17
},
{
"epoch": 0.012649332396345749,
"grad_norm": 0.2772064625396051,
"learning_rate": 9.999013037425686e-06,
"loss": 0.225,
"step": 18
},
{
"epoch": 0.013352073085031623,
"grad_norm": 0.24224569728007858,
"learning_rate": 9.998900332868217e-06,
"loss": 0.1699,
"step": 19
},
{
"epoch": 0.014054813773717497,
"grad_norm": 0.2410101823961705,
"learning_rate": 9.998781537089316e-06,
"loss": 0.1569,
"step": 20
},
{
"epoch": 0.014757554462403373,
"grad_norm": 0.2245352136933077,
"learning_rate": 9.998656650233732e-06,
"loss": 0.1706,
"step": 21
},
{
"epoch": 0.015460295151089248,
"grad_norm": 0.2585191374356976,
"learning_rate": 9.998525672453642e-06,
"loss": 0.1967,
"step": 22
},
{
"epoch": 0.016163035839775124,
"grad_norm": 0.24049035698379764,
"learning_rate": 9.998388603908646e-06,
"loss": 0.1774,
"step": 23
},
{
"epoch": 0.016865776528460996,
"grad_norm": 0.26464797223403097,
"learning_rate": 9.998245444765764e-06,
"loss": 0.2316,
"step": 24
},
{
"epoch": 0.017568517217146872,
"grad_norm": 0.23437266277608312,
"learning_rate": 9.998096195199436e-06,
"loss": 0.1889,
"step": 25
},
{
"epoch": 0.018271257905832748,
"grad_norm": 0.237118383487342,
"learning_rate": 9.997940855391525e-06,
"loss": 0.1877,
"step": 26
},
{
"epoch": 0.018973998594518624,
"grad_norm": 0.2272844482088045,
"learning_rate": 9.997779425531315e-06,
"loss": 0.1647,
"step": 27
},
{
"epoch": 0.019676739283204497,
"grad_norm": 0.23000354511879403,
"learning_rate": 9.997611905815508e-06,
"loss": 0.1692,
"step": 28
},
{
"epoch": 0.020379479971890373,
"grad_norm": 0.2417424885980738,
"learning_rate": 9.99743829644823e-06,
"loss": 0.1972,
"step": 29
},
{
"epoch": 0.02108222066057625,
"grad_norm": 0.23540850081951695,
"learning_rate": 9.997258597641027e-06,
"loss": 0.1868,
"step": 30
},
{
"epoch": 0.02178496134926212,
"grad_norm": 0.231961083766718,
"learning_rate": 9.997072809612864e-06,
"loss": 0.1721,
"step": 31
},
{
"epoch": 0.022487702037947997,
"grad_norm": 0.24704113324004168,
"learning_rate": 9.996880932590125e-06,
"loss": 0.1975,
"step": 32
},
{
"epoch": 0.023190442726633873,
"grad_norm": 0.22256713786767082,
"learning_rate": 9.996682966806614e-06,
"loss": 0.1693,
"step": 33
},
{
"epoch": 0.023893183415319746,
"grad_norm": 0.22839438407774368,
"learning_rate": 9.996478912503557e-06,
"loss": 0.1798,
"step": 34
},
{
"epoch": 0.024595924104005622,
"grad_norm": 0.23817396523480716,
"learning_rate": 9.996268769929597e-06,
"loss": 0.1897,
"step": 35
},
{
"epoch": 0.025298664792691498,
"grad_norm": 0.22816683773055027,
"learning_rate": 9.996052539340793e-06,
"loss": 0.1804,
"step": 36
},
{
"epoch": 0.02600140548137737,
"grad_norm": 0.21655992704185173,
"learning_rate": 9.995830221000624e-06,
"loss": 0.1674,
"step": 37
},
{
"epoch": 0.026704146170063246,
"grad_norm": 0.22210870017549025,
"learning_rate": 9.99560181517999e-06,
"loss": 0.1643,
"step": 38
},
{
"epoch": 0.027406886858749122,
"grad_norm": 0.22995517499009716,
"learning_rate": 9.995367322157205e-06,
"loss": 0.1752,
"step": 39
},
{
"epoch": 0.028109627547434995,
"grad_norm": 0.21812115178435476,
"learning_rate": 9.995126742218002e-06,
"loss": 0.1657,
"step": 40
},
{
"epoch": 0.02881236823612087,
"grad_norm": 0.22655141271794238,
"learning_rate": 9.994880075655531e-06,
"loss": 0.1521,
"step": 41
},
{
"epoch": 0.029515108924806747,
"grad_norm": 0.22716747210487356,
"learning_rate": 9.994627322770358e-06,
"loss": 0.1556,
"step": 42
},
{
"epoch": 0.030217849613492623,
"grad_norm": 0.2270435634482395,
"learning_rate": 9.994368483870466e-06,
"loss": 0.1821,
"step": 43
},
{
"epoch": 0.030920590302178495,
"grad_norm": 0.22828849848787125,
"learning_rate": 9.994103559271252e-06,
"loss": 0.1822,
"step": 44
},
{
"epoch": 0.03162333099086437,
"grad_norm": 0.23086195964729814,
"learning_rate": 9.99383254929553e-06,
"loss": 0.1829,
"step": 45
},
{
"epoch": 0.03232607167955025,
"grad_norm": 0.21291074398557586,
"learning_rate": 9.99355545427353e-06,
"loss": 0.158,
"step": 46
},
{
"epoch": 0.03302881236823612,
"grad_norm": 0.24093356679971864,
"learning_rate": 9.993272274542895e-06,
"loss": 0.1752,
"step": 47
},
{
"epoch": 0.03373155305692199,
"grad_norm": 0.24496905333788505,
"learning_rate": 9.992983010448684e-06,
"loss": 0.2069,
"step": 48
},
{
"epoch": 0.03443429374560787,
"grad_norm": 0.2212242562433903,
"learning_rate": 9.992687662343367e-06,
"loss": 0.1622,
"step": 49
},
{
"epoch": 0.035137034434293744,
"grad_norm": 0.2399102843641276,
"learning_rate": 9.992386230586832e-06,
"loss": 0.1544,
"step": 50
},
{
"epoch": 0.035839775122979624,
"grad_norm": 0.21931189736397577,
"learning_rate": 9.992078715546373e-06,
"loss": 0.1557,
"step": 51
},
{
"epoch": 0.036542515811665496,
"grad_norm": 0.21623417760180716,
"learning_rate": 9.991765117596705e-06,
"loss": 0.1685,
"step": 52
},
{
"epoch": 0.03724525650035137,
"grad_norm": 0.22423148457117947,
"learning_rate": 9.99144543711995e-06,
"loss": 0.1631,
"step": 53
},
{
"epoch": 0.03794799718903725,
"grad_norm": 0.23730110023331727,
"learning_rate": 9.991119674505643e-06,
"loss": 0.1617,
"step": 54
},
{
"epoch": 0.03865073787772312,
"grad_norm": 0.2262439499089269,
"learning_rate": 9.990787830150727e-06,
"loss": 0.1688,
"step": 55
},
{
"epoch": 0.03935347856640899,
"grad_norm": 0.24630946071749615,
"learning_rate": 9.990449904459561e-06,
"loss": 0.1641,
"step": 56
},
{
"epoch": 0.04005621925509487,
"grad_norm": 0.21522471788288589,
"learning_rate": 9.99010589784391e-06,
"loss": 0.1608,
"step": 57
},
{
"epoch": 0.040758959943780745,
"grad_norm": 0.2147585763319495,
"learning_rate": 9.989755810722951e-06,
"loss": 0.1586,
"step": 58
},
{
"epoch": 0.04146170063246662,
"grad_norm": 0.21963644259260404,
"learning_rate": 9.989399643523272e-06,
"loss": 0.16,
"step": 59
},
{
"epoch": 0.0421644413211525,
"grad_norm": 0.22762987788904104,
"learning_rate": 9.989037396678863e-06,
"loss": 0.1764,
"step": 60
},
{
"epoch": 0.04286718200983837,
"grad_norm": 0.21720360914211786,
"learning_rate": 9.988669070631128e-06,
"loss": 0.148,
"step": 61
},
{
"epoch": 0.04356992269852424,
"grad_norm": 0.22946020358887756,
"learning_rate": 9.988294665828877e-06,
"loss": 0.1703,
"step": 62
},
{
"epoch": 0.04427266338721012,
"grad_norm": 0.21870913201771858,
"learning_rate": 9.987914182728327e-06,
"loss": 0.1648,
"step": 63
},
{
"epoch": 0.044975404075895994,
"grad_norm": 0.21640414646112682,
"learning_rate": 9.9875276217931e-06,
"loss": 0.1523,
"step": 64
},
{
"epoch": 0.04567814476458187,
"grad_norm": 0.2375488476457923,
"learning_rate": 9.987134983494227e-06,
"loss": 0.1821,
"step": 65
},
{
"epoch": 0.046380885453267746,
"grad_norm": 0.23775475949447883,
"learning_rate": 9.98673626831014e-06,
"loss": 0.175,
"step": 66
},
{
"epoch": 0.04708362614195362,
"grad_norm": 0.21092977907268362,
"learning_rate": 9.986331476726681e-06,
"loss": 0.1481,
"step": 67
},
{
"epoch": 0.04778636683063949,
"grad_norm": 0.23385802238878012,
"learning_rate": 9.985920609237092e-06,
"loss": 0.163,
"step": 68
},
{
"epoch": 0.04848910751932537,
"grad_norm": 0.21614389441149306,
"learning_rate": 9.985503666342022e-06,
"loss": 0.1513,
"step": 69
},
{
"epoch": 0.049191848208011243,
"grad_norm": 0.2206736408632053,
"learning_rate": 9.98508064854952e-06,
"loss": 0.1607,
"step": 70
},
{
"epoch": 0.049894588896697116,
"grad_norm": 0.2175259636726515,
"learning_rate": 9.984651556375039e-06,
"loss": 0.1444,
"step": 71
},
{
"epoch": 0.050597329585382995,
"grad_norm": 0.22336405711053176,
"learning_rate": 9.984216390341428e-06,
"loss": 0.1502,
"step": 72
},
{
"epoch": 0.05130007027406887,
"grad_norm": 0.25464592310349976,
"learning_rate": 9.98377515097895e-06,
"loss": 0.1808,
"step": 73
},
{
"epoch": 0.05200281096275474,
"grad_norm": 0.21579729375418627,
"learning_rate": 9.983327838825256e-06,
"loss": 0.1626,
"step": 74
},
{
"epoch": 0.05270555165144062,
"grad_norm": 0.2327261647575767,
"learning_rate": 9.982874454425402e-06,
"loss": 0.1845,
"step": 75
},
{
"epoch": 0.05340829234012649,
"grad_norm": 0.21603646380301222,
"learning_rate": 9.982414998331842e-06,
"loss": 0.1674,
"step": 76
},
{
"epoch": 0.054111033028812365,
"grad_norm": 0.24229089060879372,
"learning_rate": 9.98194947110443e-06,
"loss": 0.1566,
"step": 77
},
{
"epoch": 0.054813773717498245,
"grad_norm": 0.22567175238557236,
"learning_rate": 9.981477873310416e-06,
"loss": 0.1477,
"step": 78
},
{
"epoch": 0.05551651440618412,
"grad_norm": 0.21628491392420587,
"learning_rate": 9.981000205524449e-06,
"loss": 0.1453,
"step": 79
},
{
"epoch": 0.05621925509486999,
"grad_norm": 0.2542745073788312,
"learning_rate": 9.980516468328571e-06,
"loss": 0.1423,
"step": 80
},
{
"epoch": 0.05692199578355587,
"grad_norm": 0.2314506603657853,
"learning_rate": 9.980026662312224e-06,
"loss": 0.1672,
"step": 81
},
{
"epoch": 0.05762473647224174,
"grad_norm": 0.22609205520582912,
"learning_rate": 9.979530788072241e-06,
"loss": 0.1739,
"step": 82
},
{
"epoch": 0.05832747716092762,
"grad_norm": 0.22879933358531024,
"learning_rate": 9.979028846212852e-06,
"loss": 0.1545,
"step": 83
},
{
"epoch": 0.059030217849613494,
"grad_norm": 0.24080671138893864,
"learning_rate": 9.978520837345678e-06,
"loss": 0.1614,
"step": 84
},
{
"epoch": 0.059732958538299366,
"grad_norm": 0.2226217622178982,
"learning_rate": 9.978006762089734e-06,
"loss": 0.1502,
"step": 85
},
{
"epoch": 0.060435699226985246,
"grad_norm": 0.22238591500797344,
"learning_rate": 9.977486621071425e-06,
"loss": 0.1521,
"step": 86
},
{
"epoch": 0.06113843991567112,
"grad_norm": 0.23349042110108353,
"learning_rate": 9.976960414924558e-06,
"loss": 0.1607,
"step": 87
},
{
"epoch": 0.06184118060435699,
"grad_norm": 0.24122317397923573,
"learning_rate": 9.97642814429031e-06,
"loss": 0.1662,
"step": 88
},
{
"epoch": 0.06254392129304287,
"grad_norm": 0.22749686343984063,
"learning_rate": 9.975889809817268e-06,
"loss": 0.1599,
"step": 89
},
{
"epoch": 0.06324666198172874,
"grad_norm": 0.24338988286933885,
"learning_rate": 9.975345412161395e-06,
"loss": 0.1597,
"step": 90
},
{
"epoch": 0.06394940267041462,
"grad_norm": 0.22918775499744473,
"learning_rate": 9.974794951986047e-06,
"loss": 0.134,
"step": 91
},
{
"epoch": 0.0646521433591005,
"grad_norm": 0.2306481590706656,
"learning_rate": 9.974238429961965e-06,
"loss": 0.1701,
"step": 92
},
{
"epoch": 0.06535488404778636,
"grad_norm": 0.2306619679739121,
"learning_rate": 9.973675846767281e-06,
"loss": 0.1384,
"step": 93
},
{
"epoch": 0.06605762473647224,
"grad_norm": 0.20775973348536772,
"learning_rate": 9.973107203087508e-06,
"loss": 0.1529,
"step": 94
},
{
"epoch": 0.06676036542515812,
"grad_norm": 0.21586950729705037,
"learning_rate": 9.972532499615546e-06,
"loss": 0.1339,
"step": 95
},
{
"epoch": 0.06746310611384398,
"grad_norm": 0.21751744722603925,
"learning_rate": 9.971951737051677e-06,
"loss": 0.1395,
"step": 96
},
{
"epoch": 0.06816584680252986,
"grad_norm": 0.23033563209972832,
"learning_rate": 9.97136491610357e-06,
"loss": 0.1506,
"step": 97
},
{
"epoch": 0.06886858749121574,
"grad_norm": 0.22278831970183996,
"learning_rate": 9.97077203748627e-06,
"loss": 0.1453,
"step": 98
},
{
"epoch": 0.06957132817990162,
"grad_norm": 0.21851911401623492,
"learning_rate": 9.970173101922207e-06,
"loss": 0.1413,
"step": 99
},
{
"epoch": 0.07027406886858749,
"grad_norm": 0.23702554546021742,
"learning_rate": 9.969568110141194e-06,
"loss": 0.1647,
"step": 100
},
{
"epoch": 0.07097680955727337,
"grad_norm": 0.22139898311737885,
"learning_rate": 9.968957062880419e-06,
"loss": 0.1616,
"step": 101
},
{
"epoch": 0.07167955024595925,
"grad_norm": 0.25877384440602486,
"learning_rate": 9.968339960884452e-06,
"loss": 0.1897,
"step": 102
},
{
"epoch": 0.07238229093464511,
"grad_norm": 0.2307325498965304,
"learning_rate": 9.967716804905238e-06,
"loss": 0.1701,
"step": 103
},
{
"epoch": 0.07308503162333099,
"grad_norm": 0.21440421674760093,
"learning_rate": 9.967087595702101e-06,
"loss": 0.1449,
"step": 104
},
{
"epoch": 0.07378777231201687,
"grad_norm": 0.2185591668852122,
"learning_rate": 9.966452334041741e-06,
"loss": 0.1472,
"step": 105
},
{
"epoch": 0.07449051300070274,
"grad_norm": 0.22568804136389564,
"learning_rate": 9.965811020698228e-06,
"loss": 0.1632,
"step": 106
},
{
"epoch": 0.07519325368938862,
"grad_norm": 0.23705429289228355,
"learning_rate": 9.965163656453017e-06,
"loss": 0.1795,
"step": 107
},
{
"epoch": 0.0758959943780745,
"grad_norm": 0.22378762812506117,
"learning_rate": 9.964510242094922e-06,
"loss": 0.157,
"step": 108
},
{
"epoch": 0.07659873506676036,
"grad_norm": 0.22036459897737168,
"learning_rate": 9.963850778420144e-06,
"loss": 0.1566,
"step": 109
},
{
"epoch": 0.07730147575544624,
"grad_norm": 0.23139104245634406,
"learning_rate": 9.96318526623224e-06,
"loss": 0.1501,
"step": 110
},
{
"epoch": 0.07800421644413212,
"grad_norm": 0.2262566736264041,
"learning_rate": 9.962513706342149e-06,
"loss": 0.1491,
"step": 111
},
{
"epoch": 0.07870695713281799,
"grad_norm": 0.22941484515617555,
"learning_rate": 9.961836099568174e-06,
"loss": 0.1529,
"step": 112
},
{
"epoch": 0.07940969782150387,
"grad_norm": 0.23326831739738074,
"learning_rate": 9.961152446735989e-06,
"loss": 0.1569,
"step": 113
},
{
"epoch": 0.08011243851018975,
"grad_norm": 0.21944918447661144,
"learning_rate": 9.960462748678632e-06,
"loss": 0.1398,
"step": 114
},
{
"epoch": 0.08081517919887561,
"grad_norm": 0.2333740167033304,
"learning_rate": 9.959767006236508e-06,
"loss": 0.1637,
"step": 115
},
{
"epoch": 0.08151791988756149,
"grad_norm": 0.21902990886068902,
"learning_rate": 9.959065220257388e-06,
"loss": 0.1415,
"step": 116
},
{
"epoch": 0.08222066057624737,
"grad_norm": 0.24035739744375173,
"learning_rate": 9.958357391596405e-06,
"loss": 0.1667,
"step": 117
},
{
"epoch": 0.08292340126493324,
"grad_norm": 0.23115524164379975,
"learning_rate": 9.957643521116059e-06,
"loss": 0.145,
"step": 118
},
{
"epoch": 0.08362614195361912,
"grad_norm": 0.22853555573702172,
"learning_rate": 9.956923609686212e-06,
"loss": 0.1537,
"step": 119
},
{
"epoch": 0.084328882642305,
"grad_norm": 0.21471991506033805,
"learning_rate": 9.956197658184082e-06,
"loss": 0.1367,
"step": 120
},
{
"epoch": 0.08503162333099086,
"grad_norm": 0.23327343465576786,
"learning_rate": 9.955465667494249e-06,
"loss": 0.149,
"step": 121
},
{
"epoch": 0.08573436401967674,
"grad_norm": 0.22317083001177346,
"learning_rate": 9.954727638508655e-06,
"loss": 0.1475,
"step": 122
},
{
"epoch": 0.08643710470836262,
"grad_norm": 0.24221607461194244,
"learning_rate": 9.953983572126598e-06,
"loss": 0.1771,
"step": 123
},
{
"epoch": 0.08713984539704848,
"grad_norm": 0.2298444986479478,
"learning_rate": 9.953233469254728e-06,
"loss": 0.1561,
"step": 124
},
{
"epoch": 0.08784258608573436,
"grad_norm": 0.2507442588010247,
"learning_rate": 9.95247733080706e-06,
"loss": 0.1669,
"step": 125
},
{
"epoch": 0.08854532677442024,
"grad_norm": 0.2410117127316703,
"learning_rate": 9.951715157704954e-06,
"loss": 0.1491,
"step": 126
},
{
"epoch": 0.08924806746310611,
"grad_norm": 0.2152453624040396,
"learning_rate": 9.950946950877126e-06,
"loss": 0.1318,
"step": 127
},
{
"epoch": 0.08995080815179199,
"grad_norm": 0.22635731209745702,
"learning_rate": 9.950172711259651e-06,
"loss": 0.1549,
"step": 128
},
{
"epoch": 0.09065354884047787,
"grad_norm": 0.22791388386135156,
"learning_rate": 9.949392439795943e-06,
"loss": 0.1672,
"step": 129
},
{
"epoch": 0.09135628952916373,
"grad_norm": 0.23466986226882583,
"learning_rate": 9.948606137436779e-06,
"loss": 0.147,
"step": 130
},
{
"epoch": 0.09205903021784961,
"grad_norm": 0.21036416219175993,
"learning_rate": 9.947813805140274e-06,
"loss": 0.1353,
"step": 131
},
{
"epoch": 0.09276177090653549,
"grad_norm": 0.22305142869794986,
"learning_rate": 9.947015443871894e-06,
"loss": 0.1622,
"step": 132
},
{
"epoch": 0.09346451159522136,
"grad_norm": 0.2314808648466817,
"learning_rate": 9.946211054604455e-06,
"loss": 0.1642,
"step": 133
},
{
"epoch": 0.09416725228390724,
"grad_norm": 0.22465782680381838,
"learning_rate": 9.945400638318113e-06,
"loss": 0.1483,
"step": 134
},
{
"epoch": 0.09486999297259312,
"grad_norm": 0.2282629153473308,
"learning_rate": 9.94458419600037e-06,
"loss": 0.1536,
"step": 135
},
{
"epoch": 0.09557273366127898,
"grad_norm": 0.23225222504892457,
"learning_rate": 9.943761728646072e-06,
"loss": 0.1595,
"step": 136
},
{
"epoch": 0.09627547434996486,
"grad_norm": 0.23235872039660924,
"learning_rate": 9.942933237257406e-06,
"loss": 0.1489,
"step": 137
},
{
"epoch": 0.09697821503865074,
"grad_norm": 0.2240173794457857,
"learning_rate": 9.942098722843898e-06,
"loss": 0.1629,
"step": 138
},
{
"epoch": 0.09768095572733661,
"grad_norm": 0.2293385993843326,
"learning_rate": 9.941258186422413e-06,
"loss": 0.1573,
"step": 139
},
{
"epoch": 0.09838369641602249,
"grad_norm": 0.22833543043080246,
"learning_rate": 9.940411629017159e-06,
"loss": 0.157,
"step": 140
},
{
"epoch": 0.09908643710470837,
"grad_norm": 0.22954043699721072,
"learning_rate": 9.93955905165967e-06,
"loss": 0.1583,
"step": 141
},
{
"epoch": 0.09978917779339423,
"grad_norm": 0.24370925299371685,
"learning_rate": 9.93870045538883e-06,
"loss": 0.1722,
"step": 142
},
{
"epoch": 0.10049191848208011,
"grad_norm": 0.26037339802711335,
"learning_rate": 9.937835841250842e-06,
"loss": 0.1889,
"step": 143
},
{
"epoch": 0.10119465917076599,
"grad_norm": 0.2339384937796873,
"learning_rate": 9.936965210299254e-06,
"loss": 0.1554,
"step": 144
},
{
"epoch": 0.10189739985945186,
"grad_norm": 0.225777869610915,
"learning_rate": 9.936088563594937e-06,
"loss": 0.1599,
"step": 145
},
{
"epoch": 0.10260014054813774,
"grad_norm": 0.21936237668214217,
"learning_rate": 9.935205902206098e-06,
"loss": 0.151,
"step": 146
},
{
"epoch": 0.10330288123682362,
"grad_norm": 0.23864096391826914,
"learning_rate": 9.934317227208269e-06,
"loss": 0.1792,
"step": 147
},
{
"epoch": 0.10400562192550948,
"grad_norm": 0.21456593076447886,
"learning_rate": 9.933422539684314e-06,
"loss": 0.1323,
"step": 148
},
{
"epoch": 0.10470836261419536,
"grad_norm": 0.22678579145421987,
"learning_rate": 9.932521840724418e-06,
"loss": 0.1406,
"step": 149
},
{
"epoch": 0.10541110330288124,
"grad_norm": 0.2194282400478766,
"learning_rate": 9.931615131426094e-06,
"loss": 0.1553,
"step": 150
},
{
"epoch": 0.1061138439915671,
"grad_norm": 0.21406188620009084,
"learning_rate": 9.930702412894179e-06,
"loss": 0.1417,
"step": 151
},
{
"epoch": 0.10681658468025299,
"grad_norm": 0.2106984614143186,
"learning_rate": 9.929783686240833e-06,
"loss": 0.15,
"step": 152
},
{
"epoch": 0.10751932536893886,
"grad_norm": 0.23020276158722305,
"learning_rate": 9.928858952585535e-06,
"loss": 0.1428,
"step": 153
},
{
"epoch": 0.10822206605762473,
"grad_norm": 0.23064426939098223,
"learning_rate": 9.927928213055082e-06,
"loss": 0.1457,
"step": 154
},
{
"epoch": 0.10892480674631061,
"grad_norm": 0.20229950670567504,
"learning_rate": 9.926991468783595e-06,
"loss": 0.1203,
"step": 155
},
{
"epoch": 0.10962754743499649,
"grad_norm": 0.23005316977007173,
"learning_rate": 9.926048720912509e-06,
"loss": 0.1358,
"step": 156
},
{
"epoch": 0.11033028812368235,
"grad_norm": 0.22661609630008242,
"learning_rate": 9.925099970590568e-06,
"loss": 0.164,
"step": 157
},
{
"epoch": 0.11103302881236823,
"grad_norm": 0.22479674585666098,
"learning_rate": 9.924145218973841e-06,
"loss": 0.1616,
"step": 158
},
{
"epoch": 0.11173576950105411,
"grad_norm": 0.2115112827562552,
"learning_rate": 9.923184467225704e-06,
"loss": 0.148,
"step": 159
},
{
"epoch": 0.11243851018973998,
"grad_norm": 0.22170529259594704,
"learning_rate": 9.922217716516843e-06,
"loss": 0.1405,
"step": 160
},
{
"epoch": 0.11314125087842586,
"grad_norm": 0.23329767078664584,
"learning_rate": 9.921244968025257e-06,
"loss": 0.1592,
"step": 161
},
{
"epoch": 0.11384399156711174,
"grad_norm": 0.21070614899950893,
"learning_rate": 9.920266222936252e-06,
"loss": 0.1324,
"step": 162
},
{
"epoch": 0.11454673225579762,
"grad_norm": 0.2158519942207162,
"learning_rate": 9.91928148244244e-06,
"loss": 0.1302,
"step": 163
},
{
"epoch": 0.11524947294448348,
"grad_norm": 0.21880123190836417,
"learning_rate": 9.91829074774374e-06,
"loss": 0.1407,
"step": 164
},
{
"epoch": 0.11595221363316936,
"grad_norm": 0.21140310737644744,
"learning_rate": 9.917294020047375e-06,
"loss": 0.1304,
"step": 165
},
{
"epoch": 0.11665495432185524,
"grad_norm": 0.21777318484711836,
"learning_rate": 9.916291300567868e-06,
"loss": 0.1535,
"step": 166
},
{
"epoch": 0.11735769501054111,
"grad_norm": 0.22948644143342253,
"learning_rate": 9.915282590527048e-06,
"loss": 0.1607,
"step": 167
},
{
"epoch": 0.11806043569922699,
"grad_norm": 0.2171703607606733,
"learning_rate": 9.914267891154037e-06,
"loss": 0.158,
"step": 168
},
{
"epoch": 0.11876317638791287,
"grad_norm": 0.20521522990055951,
"learning_rate": 9.913247203685261e-06,
"loss": 0.124,
"step": 169
},
{
"epoch": 0.11946591707659873,
"grad_norm": 0.21838401487744416,
"learning_rate": 9.912220529364441e-06,
"loss": 0.1383,
"step": 170
},
{
"epoch": 0.12016865776528461,
"grad_norm": 0.21108339798715825,
"learning_rate": 9.911187869442588e-06,
"loss": 0.145,
"step": 171
},
{
"epoch": 0.12087139845397049,
"grad_norm": 0.2058028529178568,
"learning_rate": 9.910149225178018e-06,
"loss": 0.1502,
"step": 172
},
{
"epoch": 0.12157413914265636,
"grad_norm": 0.21637207802325303,
"learning_rate": 9.909104597836324e-06,
"loss": 0.1598,
"step": 173
},
{
"epoch": 0.12227687983134224,
"grad_norm": 0.2070312659649698,
"learning_rate": 9.908053988690403e-06,
"loss": 0.1474,
"step": 174
},
{
"epoch": 0.12297962052002812,
"grad_norm": 0.2025038975642288,
"learning_rate": 9.90699739902043e-06,
"loss": 0.1481,
"step": 175
},
{
"epoch": 0.12368236120871398,
"grad_norm": 0.21329753265510695,
"learning_rate": 9.905934830113878e-06,
"loss": 0.1711,
"step": 176
},
{
"epoch": 0.12438510189739986,
"grad_norm": 0.2111346574233757,
"learning_rate": 9.904866283265498e-06,
"loss": 0.1483,
"step": 177
},
{
"epoch": 0.12508784258608574,
"grad_norm": 0.2176831991004826,
"learning_rate": 9.903791759777326e-06,
"loss": 0.1648,
"step": 178
},
{
"epoch": 0.1257905832747716,
"grad_norm": 0.22920882811026375,
"learning_rate": 9.902711260958682e-06,
"loss": 0.1745,
"step": 179
},
{
"epoch": 0.12649332396345747,
"grad_norm": 0.21471369605806437,
"learning_rate": 9.901624788126169e-06,
"loss": 0.1539,
"step": 180
},
{
"epoch": 0.12719606465214336,
"grad_norm": 0.21051519857310108,
"learning_rate": 9.900532342603669e-06,
"loss": 0.1503,
"step": 181
},
{
"epoch": 0.12789880534082923,
"grad_norm": 0.20679381262892477,
"learning_rate": 9.899433925722334e-06,
"loss": 0.1396,
"step": 182
},
{
"epoch": 0.1286015460295151,
"grad_norm": 0.20530703991119223,
"learning_rate": 9.898329538820606e-06,
"loss": 0.1349,
"step": 183
},
{
"epoch": 0.129304286718201,
"grad_norm": 0.21306675825977314,
"learning_rate": 9.897219183244188e-06,
"loss": 0.1627,
"step": 184
},
{
"epoch": 0.13000702740688685,
"grad_norm": 0.20814819045510163,
"learning_rate": 9.896102860346066e-06,
"loss": 0.1515,
"step": 185
},
{
"epoch": 0.13070976809557272,
"grad_norm": 0.2106214566796909,
"learning_rate": 9.894980571486492e-06,
"loss": 0.1491,
"step": 186
},
{
"epoch": 0.1314125087842586,
"grad_norm": 0.20094442811285093,
"learning_rate": 9.893852318032986e-06,
"loss": 0.1362,
"step": 187
},
{
"epoch": 0.13211524947294448,
"grad_norm": 0.2174422473951083,
"learning_rate": 9.892718101360344e-06,
"loss": 0.1527,
"step": 188
},
{
"epoch": 0.13281799016163034,
"grad_norm": 0.20777230307040195,
"learning_rate": 9.891577922850616e-06,
"loss": 0.1357,
"step": 189
},
{
"epoch": 0.13352073085031624,
"grad_norm": 0.19574126769047104,
"learning_rate": 9.89043178389313e-06,
"loss": 0.1196,
"step": 190
},
{
"epoch": 0.1342234715390021,
"grad_norm": 0.1972211001296908,
"learning_rate": 9.889279685884468e-06,
"loss": 0.1205,
"step": 191
},
{
"epoch": 0.13492621222768797,
"grad_norm": 0.20133638200663645,
"learning_rate": 9.888121630228476e-06,
"loss": 0.1311,
"step": 192
},
{
"epoch": 0.13562895291637386,
"grad_norm": 0.21168354700179234,
"learning_rate": 9.886957618336257e-06,
"loss": 0.1441,
"step": 193
},
{
"epoch": 0.13633169360505973,
"grad_norm": 0.21241172925192037,
"learning_rate": 9.885787651626176e-06,
"loss": 0.1554,
"step": 194
},
{
"epoch": 0.13703443429374562,
"grad_norm": 0.20637900238564963,
"learning_rate": 9.88461173152385e-06,
"loss": 0.1441,
"step": 195
},
{
"epoch": 0.1377371749824315,
"grad_norm": 0.1988669088284257,
"learning_rate": 9.883429859462155e-06,
"loss": 0.1426,
"step": 196
},
{
"epoch": 0.13843991567111735,
"grad_norm": 0.22723086865623662,
"learning_rate": 9.882242036881214e-06,
"loss": 0.1666,
"step": 197
},
{
"epoch": 0.13914265635980325,
"grad_norm": 0.2102824119314862,
"learning_rate": 9.881048265228402e-06,
"loss": 0.1421,
"step": 198
},
{
"epoch": 0.1398453970484891,
"grad_norm": 0.22401715267511088,
"learning_rate": 9.879848545958348e-06,
"loss": 0.1791,
"step": 199
},
{
"epoch": 0.14054813773717498,
"grad_norm": 0.18679889906920316,
"learning_rate": 9.878642880532923e-06,
"loss": 0.1156,
"step": 200
},
{
"epoch": 0.14125087842586087,
"grad_norm": 0.2066788689425233,
"learning_rate": 9.877431270421248e-06,
"loss": 0.1562,
"step": 201
},
{
"epoch": 0.14195361911454674,
"grad_norm": 0.20348877687155706,
"learning_rate": 9.876213717099678e-06,
"loss": 0.1408,
"step": 202
},
{
"epoch": 0.1426563598032326,
"grad_norm": 0.19331306864239647,
"learning_rate": 9.874990222051824e-06,
"loss": 0.1406,
"step": 203
},
{
"epoch": 0.1433591004919185,
"grad_norm": 0.2036648310599538,
"learning_rate": 9.873760786768524e-06,
"loss": 0.1441,
"step": 204
},
{
"epoch": 0.14406184118060436,
"grad_norm": 0.20284146081062035,
"learning_rate": 9.872525412747865e-06,
"loss": 0.1451,
"step": 205
},
{
"epoch": 0.14476458186929023,
"grad_norm": 0.20368570512479053,
"learning_rate": 9.87128410149516e-06,
"loss": 0.1471,
"step": 206
},
{
"epoch": 0.14546732255797612,
"grad_norm": 0.21542058472595663,
"learning_rate": 9.870036854522967e-06,
"loss": 0.1732,
"step": 207
},
{
"epoch": 0.14617006324666199,
"grad_norm": 0.19676695081312096,
"learning_rate": 9.868783673351069e-06,
"loss": 0.1271,
"step": 208
},
{
"epoch": 0.14687280393534785,
"grad_norm": 0.2041993039864764,
"learning_rate": 9.867524559506484e-06,
"loss": 0.1404,
"step": 209
},
{
"epoch": 0.14757554462403374,
"grad_norm": 0.19324770241438533,
"learning_rate": 9.866259514523456e-06,
"loss": 0.1358,
"step": 210
},
{
"epoch": 0.1482782853127196,
"grad_norm": 0.19368703582739905,
"learning_rate": 9.86498853994346e-06,
"loss": 0.1325,
"step": 211
},
{
"epoch": 0.14898102600140548,
"grad_norm": 0.20594190897232956,
"learning_rate": 9.863711637315193e-06,
"loss": 0.156,
"step": 212
},
{
"epoch": 0.14968376669009137,
"grad_norm": 0.1889673369896229,
"learning_rate": 9.862428808194575e-06,
"loss": 0.1254,
"step": 213
},
{
"epoch": 0.15038650737877723,
"grad_norm": 0.20889037560248144,
"learning_rate": 9.86114005414475e-06,
"loss": 0.1507,
"step": 214
},
{
"epoch": 0.1510892480674631,
"grad_norm": 0.19696721813636178,
"learning_rate": 9.859845376736084e-06,
"loss": 0.1425,
"step": 215
},
{
"epoch": 0.151791988756149,
"grad_norm": 0.19637911874328323,
"learning_rate": 9.858544777546153e-06,
"loss": 0.137,
"step": 216
},
{
"epoch": 0.15249472944483486,
"grad_norm": 0.19395324013553517,
"learning_rate": 9.857238258159755e-06,
"loss": 0.1287,
"step": 217
},
{
"epoch": 0.15319747013352072,
"grad_norm": 0.1994022217008307,
"learning_rate": 9.8559258201689e-06,
"loss": 0.1331,
"step": 218
},
{
"epoch": 0.15390021082220662,
"grad_norm": 0.19640068491429194,
"learning_rate": 9.854607465172808e-06,
"loss": 0.1378,
"step": 219
},
{
"epoch": 0.15460295151089248,
"grad_norm": 0.20985156380470357,
"learning_rate": 9.853283194777913e-06,
"loss": 0.1559,
"step": 220
},
{
"epoch": 0.15530569219957835,
"grad_norm": 0.2119242846404433,
"learning_rate": 9.851953010597854e-06,
"loss": 0.1569,
"step": 221
},
{
"epoch": 0.15600843288826424,
"grad_norm": 0.19551383292270935,
"learning_rate": 9.850616914253476e-06,
"loss": 0.1356,
"step": 222
},
{
"epoch": 0.1567111735769501,
"grad_norm": 0.20404309724036893,
"learning_rate": 9.84927490737283e-06,
"loss": 0.1484,
"step": 223
},
{
"epoch": 0.15741391426563597,
"grad_norm": 0.19588159537886093,
"learning_rate": 9.847926991591165e-06,
"loss": 0.1366,
"step": 224
},
{
"epoch": 0.15811665495432187,
"grad_norm": 0.2028467028445028,
"learning_rate": 9.846573168550936e-06,
"loss": 0.1462,
"step": 225
},
{
"epoch": 0.15881939564300773,
"grad_norm": 0.20417151174789697,
"learning_rate": 9.845213439901795e-06,
"loss": 0.144,
"step": 226
},
{
"epoch": 0.1595221363316936,
"grad_norm": 0.1967566987991376,
"learning_rate": 9.843847807300582e-06,
"loss": 0.1321,
"step": 227
},
{
"epoch": 0.1602248770203795,
"grad_norm": 0.19478067310190322,
"learning_rate": 9.842476272411343e-06,
"loss": 0.126,
"step": 228
},
{
"epoch": 0.16092761770906536,
"grad_norm": 0.20851503046840705,
"learning_rate": 9.841098836905306e-06,
"loss": 0.1519,
"step": 229
},
{
"epoch": 0.16163035839775122,
"grad_norm": 0.2063601988212969,
"learning_rate": 9.839715502460894e-06,
"loss": 0.148,
"step": 230
},
{
"epoch": 0.16233309908643712,
"grad_norm": 0.20959979618188648,
"learning_rate": 9.838326270763717e-06,
"loss": 0.1493,
"step": 231
},
{
"epoch": 0.16303583977512298,
"grad_norm": 0.19798913894624784,
"learning_rate": 9.836931143506572e-06,
"loss": 0.1386,
"step": 232
},
{
"epoch": 0.16373858046380885,
"grad_norm": 0.2026930018103244,
"learning_rate": 9.835530122389439e-06,
"loss": 0.1461,
"step": 233
},
{
"epoch": 0.16444132115249474,
"grad_norm": 0.20203104732148083,
"learning_rate": 9.834123209119478e-06,
"loss": 0.1417,
"step": 234
},
{
"epoch": 0.1651440618411806,
"grad_norm": 0.20353340751359844,
"learning_rate": 9.83271040541103e-06,
"loss": 0.1518,
"step": 235
},
{
"epoch": 0.16584680252986647,
"grad_norm": 0.20225220167990685,
"learning_rate": 9.831291712985613e-06,
"loss": 0.1385,
"step": 236
},
{
"epoch": 0.16654954321855236,
"grad_norm": 0.1954351628443142,
"learning_rate": 9.829867133571924e-06,
"loss": 0.1283,
"step": 237
},
{
"epoch": 0.16725228390723823,
"grad_norm": 0.19276658232546576,
"learning_rate": 9.828436668905829e-06,
"loss": 0.1184,
"step": 238
},
{
"epoch": 0.1679550245959241,
"grad_norm": 0.19712257795367794,
"learning_rate": 9.827000320730366e-06,
"loss": 0.1357,
"step": 239
},
{
"epoch": 0.16865776528461,
"grad_norm": 0.21657190170292834,
"learning_rate": 9.825558090795747e-06,
"loss": 0.1666,
"step": 240
},
{
"epoch": 0.16936050597329586,
"grad_norm": 0.2110714527862585,
"learning_rate": 9.82410998085934e-06,
"loss": 0.1461,
"step": 241
},
{
"epoch": 0.17006324666198172,
"grad_norm": 0.20346359785918153,
"learning_rate": 9.822655992685687e-06,
"loss": 0.1485,
"step": 242
},
{
"epoch": 0.17076598735066761,
"grad_norm": 0.19734304130172103,
"learning_rate": 9.821196128046493e-06,
"loss": 0.1448,
"step": 243
},
{
"epoch": 0.17146872803935348,
"grad_norm": 0.1977924111993959,
"learning_rate": 9.819730388720622e-06,
"loss": 0.1357,
"step": 244
},
{
"epoch": 0.17217146872803935,
"grad_norm": 0.20446387822448595,
"learning_rate": 9.818258776494089e-06,
"loss": 0.1465,
"step": 245
},
{
"epoch": 0.17287420941672524,
"grad_norm": 0.19721437896916333,
"learning_rate": 9.816781293160079e-06,
"loss": 0.148,
"step": 246
},
{
"epoch": 0.1735769501054111,
"grad_norm": 0.20360239110052564,
"learning_rate": 9.815297940518917e-06,
"loss": 0.1458,
"step": 247
},
{
"epoch": 0.17427969079409697,
"grad_norm": 0.19176139731960046,
"learning_rate": 9.81380872037809e-06,
"loss": 0.1291,
"step": 248
},
{
"epoch": 0.17498243148278286,
"grad_norm": 0.19864761383472335,
"learning_rate": 9.812313634552233e-06,
"loss": 0.1513,
"step": 249
},
{
"epoch": 0.17568517217146873,
"grad_norm": 0.19259497761665204,
"learning_rate": 9.810812684863123e-06,
"loss": 0.1334,
"step": 250
},
{
"epoch": 0.1763879128601546,
"grad_norm": 0.19754811656485127,
"learning_rate": 9.809305873139685e-06,
"loss": 0.1418,
"step": 251
},
{
"epoch": 0.1770906535488405,
"grad_norm": 0.19322818342606846,
"learning_rate": 9.80779320121799e-06,
"loss": 0.1279,
"step": 252
},
{
"epoch": 0.17779339423752635,
"grad_norm": 0.20081134768596065,
"learning_rate": 9.806274670941247e-06,
"loss": 0.1517,
"step": 253
},
{
"epoch": 0.17849613492621222,
"grad_norm": 0.19940134193131787,
"learning_rate": 9.804750284159802e-06,
"loss": 0.1387,
"step": 254
},
{
"epoch": 0.1791988756148981,
"grad_norm": 0.21370747000293353,
"learning_rate": 9.803220042731143e-06,
"loss": 0.1602,
"step": 255
},
{
"epoch": 0.17990161630358398,
"grad_norm": 0.21025898790340136,
"learning_rate": 9.801683948519885e-06,
"loss": 0.1594,
"step": 256
},
{
"epoch": 0.18060435699226984,
"grad_norm": 0.18892389409061483,
"learning_rate": 9.800142003397774e-06,
"loss": 0.1244,
"step": 257
},
{
"epoch": 0.18130709768095574,
"grad_norm": 0.2012697662437814,
"learning_rate": 9.798594209243697e-06,
"loss": 0.147,
"step": 258
},
{
"epoch": 0.1820098383696416,
"grad_norm": 0.2021221072192875,
"learning_rate": 9.797040567943654e-06,
"loss": 0.1462,
"step": 259
},
{
"epoch": 0.18271257905832747,
"grad_norm": 0.19298081578981383,
"learning_rate": 9.79548108139078e-06,
"loss": 0.1346,
"step": 260
},
{
"epoch": 0.18341531974701336,
"grad_norm": 0.19917438982161378,
"learning_rate": 9.793915751485326e-06,
"loss": 0.1473,
"step": 261
},
{
"epoch": 0.18411806043569923,
"grad_norm": 0.2102293028196818,
"learning_rate": 9.792344580134664e-06,
"loss": 0.1505,
"step": 262
},
{
"epoch": 0.1848208011243851,
"grad_norm": 0.19200363270807924,
"learning_rate": 9.790767569253292e-06,
"loss": 0.1352,
"step": 263
},
{
"epoch": 0.18552354181307099,
"grad_norm": 0.18695723600463093,
"learning_rate": 9.78918472076281e-06,
"loss": 0.1366,
"step": 264
},
{
"epoch": 0.18622628250175685,
"grad_norm": 0.21193646495619203,
"learning_rate": 9.787596036591944e-06,
"loss": 0.1719,
"step": 265
},
{
"epoch": 0.18692902319044272,
"grad_norm": 0.22781719090648606,
"learning_rate": 9.78600151867652e-06,
"loss": 0.1793,
"step": 266
},
{
"epoch": 0.1876317638791286,
"grad_norm": 0.20133373797830542,
"learning_rate": 9.784401168959482e-06,
"loss": 0.1439,
"step": 267
},
{
"epoch": 0.18833450456781448,
"grad_norm": 0.19985220432628734,
"learning_rate": 9.782794989390874e-06,
"loss": 0.1338,
"step": 268
},
{
"epoch": 0.18903724525650034,
"grad_norm": 0.20161732988761927,
"learning_rate": 9.781182981927843e-06,
"loss": 0.1429,
"step": 269
},
{
"epoch": 0.18973998594518623,
"grad_norm": 0.21529341453100231,
"learning_rate": 9.779565148534645e-06,
"loss": 0.1611,
"step": 270
},
{
"epoch": 0.1904427266338721,
"grad_norm": 0.18744741782716104,
"learning_rate": 9.777941491182628e-06,
"loss": 0.1297,
"step": 271
},
{
"epoch": 0.19114546732255797,
"grad_norm": 0.21435105171070906,
"learning_rate": 9.776312011850236e-06,
"loss": 0.1649,
"step": 272
},
{
"epoch": 0.19184820801124386,
"grad_norm": 0.2087279127932158,
"learning_rate": 9.774676712523013e-06,
"loss": 0.1445,
"step": 273
},
{
"epoch": 0.19255094869992972,
"grad_norm": 0.21360378696023616,
"learning_rate": 9.773035595193588e-06,
"loss": 0.1558,
"step": 274
},
{
"epoch": 0.1932536893886156,
"grad_norm": 0.19530485302751618,
"learning_rate": 9.771388661861684e-06,
"loss": 0.141,
"step": 275
},
{
"epoch": 0.19395643007730148,
"grad_norm": 0.21725591674126798,
"learning_rate": 9.76973591453411e-06,
"loss": 0.1645,
"step": 276
},
{
"epoch": 0.19465917076598735,
"grad_norm": 0.20039362730829058,
"learning_rate": 9.768077355224758e-06,
"loss": 0.1466,
"step": 277
},
{
"epoch": 0.19536191145467321,
"grad_norm": 0.19056983335564764,
"learning_rate": 9.766412985954605e-06,
"loss": 0.125,
"step": 278
},
{
"epoch": 0.1960646521433591,
"grad_norm": 0.19897051209571165,
"learning_rate": 9.764742808751705e-06,
"loss": 0.1284,
"step": 279
},
{
"epoch": 0.19676739283204497,
"grad_norm": 0.19416557724599337,
"learning_rate": 9.763066825651186e-06,
"loss": 0.127,
"step": 280
},
{
"epoch": 0.19747013352073084,
"grad_norm": 0.19744282952954895,
"learning_rate": 9.761385038695257e-06,
"loss": 0.1314,
"step": 281
},
{
"epoch": 0.19817287420941673,
"grad_norm": 0.1921261405605897,
"learning_rate": 9.759697449933194e-06,
"loss": 0.1224,
"step": 282
},
{
"epoch": 0.1988756148981026,
"grad_norm": 0.19457240575585416,
"learning_rate": 9.758004061421347e-06,
"loss": 0.1338,
"step": 283
},
{
"epoch": 0.19957835558678846,
"grad_norm": 0.2147071493073892,
"learning_rate": 9.75630487522313e-06,
"loss": 0.1632,
"step": 284
},
{
"epoch": 0.20028109627547436,
"grad_norm": 0.20910620669407437,
"learning_rate": 9.754599893409023e-06,
"loss": 0.1576,
"step": 285
},
{
"epoch": 0.20098383696416022,
"grad_norm": 0.20622194923738604,
"learning_rate": 9.752889118056565e-06,
"loss": 0.1339,
"step": 286
},
{
"epoch": 0.2016865776528461,
"grad_norm": 0.2198904703734805,
"learning_rate": 9.75117255125036e-06,
"loss": 0.1697,
"step": 287
},
{
"epoch": 0.20238931834153198,
"grad_norm": 0.19944534865957894,
"learning_rate": 9.749450195082059e-06,
"loss": 0.1443,
"step": 288
},
{
"epoch": 0.20309205903021785,
"grad_norm": 0.20975886307055028,
"learning_rate": 9.747722051650384e-06,
"loss": 0.1484,
"step": 289
},
{
"epoch": 0.2037947997189037,
"grad_norm": 0.19063963859416158,
"learning_rate": 9.74598812306109e-06,
"loss": 0.126,
"step": 290
},
{
"epoch": 0.2044975404075896,
"grad_norm": 0.20365881393615565,
"learning_rate": 9.744248411426995e-06,
"loss": 0.1462,
"step": 291
},
{
"epoch": 0.20520028109627547,
"grad_norm": 0.19781542664255383,
"learning_rate": 9.742502918867959e-06,
"loss": 0.1353,
"step": 292
},
{
"epoch": 0.20590302178496134,
"grad_norm": 0.20846390034839965,
"learning_rate": 9.740751647510887e-06,
"loss": 0.1501,
"step": 293
},
{
"epoch": 0.20660576247364723,
"grad_norm": 0.2047381478662133,
"learning_rate": 9.73899459948972e-06,
"loss": 0.1434,
"step": 294
},
{
"epoch": 0.2073085031623331,
"grad_norm": 0.20707581525131896,
"learning_rate": 9.737231776945445e-06,
"loss": 0.1502,
"step": 295
},
{
"epoch": 0.20801124385101896,
"grad_norm": 0.19461042545649337,
"learning_rate": 9.735463182026085e-06,
"loss": 0.1384,
"step": 296
},
{
"epoch": 0.20871398453970486,
"grad_norm": 0.19211129683777164,
"learning_rate": 9.733688816886692e-06,
"loss": 0.1235,
"step": 297
},
{
"epoch": 0.20941672522839072,
"grad_norm": 0.20189460923598518,
"learning_rate": 9.731908683689355e-06,
"loss": 0.1392,
"step": 298
},
{
"epoch": 0.2101194659170766,
"grad_norm": 0.1960284011235396,
"learning_rate": 9.730122784603184e-06,
"loss": 0.1338,
"step": 299
},
{
"epoch": 0.21082220660576248,
"grad_norm": 0.20360386032848427,
"learning_rate": 9.728331121804322e-06,
"loss": 0.152,
"step": 300
},
{
"epoch": 0.21152494729444835,
"grad_norm": 0.1913220313684908,
"learning_rate": 9.726533697475929e-06,
"loss": 0.1213,
"step": 301
},
{
"epoch": 0.2122276879831342,
"grad_norm": 0.20219133230425923,
"learning_rate": 9.724730513808191e-06,
"loss": 0.1427,
"step": 302
},
{
"epoch": 0.2129304286718201,
"grad_norm": 0.1997510075877235,
"learning_rate": 9.722921572998311e-06,
"loss": 0.1477,
"step": 303
},
{
"epoch": 0.21363316936050597,
"grad_norm": 0.21592199149838354,
"learning_rate": 9.721106877250501e-06,
"loss": 0.1618,
"step": 304
},
{
"epoch": 0.21433591004919184,
"grad_norm": 0.201402045737912,
"learning_rate": 9.719286428775995e-06,
"loss": 0.1447,
"step": 305
},
{
"epoch": 0.21503865073787773,
"grad_norm": 0.1999081066710887,
"learning_rate": 9.717460229793027e-06,
"loss": 0.1492,
"step": 306
},
{
"epoch": 0.2157413914265636,
"grad_norm": 0.20021974689952046,
"learning_rate": 9.715628282526847e-06,
"loss": 0.1349,
"step": 307
},
{
"epoch": 0.21644413211524946,
"grad_norm": 0.22307301716894617,
"learning_rate": 9.713790589209704e-06,
"loss": 0.138,
"step": 308
},
{
"epoch": 0.21714687280393535,
"grad_norm": 0.2099176996643859,
"learning_rate": 9.71194715208085e-06,
"loss": 0.159,
"step": 309
},
{
"epoch": 0.21784961349262122,
"grad_norm": 0.19926684760193974,
"learning_rate": 9.710097973386531e-06,
"loss": 0.1283,
"step": 310
},
{
"epoch": 0.21855235418130708,
"grad_norm": 0.18998643011163793,
"learning_rate": 9.708243055380002e-06,
"loss": 0.1328,
"step": 311
},
{
"epoch": 0.21925509486999298,
"grad_norm": 0.1948435334852748,
"learning_rate": 9.7063824003215e-06,
"loss": 0.1362,
"step": 312
},
{
"epoch": 0.21995783555867884,
"grad_norm": 0.19690476081622268,
"learning_rate": 9.704516010478254e-06,
"loss": 0.135,
"step": 313
},
{
"epoch": 0.2206605762473647,
"grad_norm": 0.21512619654006312,
"learning_rate": 9.702643888124484e-06,
"loss": 0.1518,
"step": 314
},
{
"epoch": 0.2213633169360506,
"grad_norm": 0.19349420559343616,
"learning_rate": 9.700766035541396e-06,
"loss": 0.137,
"step": 315
},
{
"epoch": 0.22206605762473647,
"grad_norm": 0.20540329866722937,
"learning_rate": 9.698882455017175e-06,
"loss": 0.146,
"step": 316
},
{
"epoch": 0.22276879831342233,
"grad_norm": 0.18979594501917949,
"learning_rate": 9.696993148846985e-06,
"loss": 0.1241,
"step": 317
},
{
"epoch": 0.22347153900210823,
"grad_norm": 0.20895328827535556,
"learning_rate": 9.695098119332972e-06,
"loss": 0.1573,
"step": 318
},
{
"epoch": 0.2241742796907941,
"grad_norm": 0.19760898542367256,
"learning_rate": 9.693197368784253e-06,
"loss": 0.1423,
"step": 319
},
{
"epoch": 0.22487702037947996,
"grad_norm": 0.20879726922066436,
"learning_rate": 9.691290899516912e-06,
"loss": 0.1543,
"step": 320
},
{
"epoch": 0.22557976106816585,
"grad_norm": 0.22558140334577556,
"learning_rate": 9.68937871385401e-06,
"loss": 0.1767,
"step": 321
},
{
"epoch": 0.22628250175685172,
"grad_norm": 0.2097389199573069,
"learning_rate": 9.687460814125564e-06,
"loss": 0.1518,
"step": 322
},
{
"epoch": 0.22698524244553758,
"grad_norm": 0.20844733363565682,
"learning_rate": 9.685537202668562e-06,
"loss": 0.1567,
"step": 323
},
{
"epoch": 0.22768798313422348,
"grad_norm": 0.21367287227702075,
"learning_rate": 9.683607881826946e-06,
"loss": 0.1573,
"step": 324
},
{
"epoch": 0.22839072382290934,
"grad_norm": 0.20264575839381355,
"learning_rate": 9.68167285395162e-06,
"loss": 0.1378,
"step": 325
},
{
"epoch": 0.22909346451159524,
"grad_norm": 0.1934101208788331,
"learning_rate": 9.679732121400435e-06,
"loss": 0.1261,
"step": 326
},
{
"epoch": 0.2297962052002811,
"grad_norm": 0.20154663516858093,
"learning_rate": 9.677785686538201e-06,
"loss": 0.1439,
"step": 327
},
{
"epoch": 0.23049894588896697,
"grad_norm": 0.20180414197445257,
"learning_rate": 9.67583355173667e-06,
"loss": 0.1397,
"step": 328
},
{
"epoch": 0.23120168657765286,
"grad_norm": 0.19528317860910305,
"learning_rate": 9.673875719374546e-06,
"loss": 0.1316,
"step": 329
},
{
"epoch": 0.23190442726633873,
"grad_norm": 0.19058218832434168,
"learning_rate": 9.671912191837468e-06,
"loss": 0.1274,
"step": 330
},
{
"epoch": 0.2326071679550246,
"grad_norm": 0.20644089200241758,
"learning_rate": 9.669942971518019e-06,
"loss": 0.147,
"step": 331
},
{
"epoch": 0.23330990864371048,
"grad_norm": 0.2004778713168609,
"learning_rate": 9.667968060815721e-06,
"loss": 0.1463,
"step": 332
},
{
"epoch": 0.23401264933239635,
"grad_norm": 0.18976951527391892,
"learning_rate": 9.665987462137024e-06,
"loss": 0.1258,
"step": 333
},
{
"epoch": 0.23471539002108222,
"grad_norm": 0.2186800565150584,
"learning_rate": 9.664001177895312e-06,
"loss": 0.1632,
"step": 334
},
{
"epoch": 0.2354181307097681,
"grad_norm": 0.21176010126974537,
"learning_rate": 9.662009210510897e-06,
"loss": 0.1559,
"step": 335
},
{
"epoch": 0.23612087139845397,
"grad_norm": 0.1828126232632296,
"learning_rate": 9.660011562411018e-06,
"loss": 0.1136,
"step": 336
},
{
"epoch": 0.23682361208713984,
"grad_norm": 0.21056870252958412,
"learning_rate": 9.658008236029832e-06,
"loss": 0.1541,
"step": 337
},
{
"epoch": 0.23752635277582573,
"grad_norm": 0.19376524009816404,
"learning_rate": 9.655999233808415e-06,
"loss": 0.1371,
"step": 338
},
{
"epoch": 0.2382290934645116,
"grad_norm": 0.20845650985785977,
"learning_rate": 9.653984558194764e-06,
"loss": 0.1489,
"step": 339
},
{
"epoch": 0.23893183415319746,
"grad_norm": 0.19786123436210268,
"learning_rate": 9.651964211643784e-06,
"loss": 0.1437,
"step": 340
},
{
"epoch": 0.23963457484188336,
"grad_norm": 0.20841139105117382,
"learning_rate": 9.649938196617292e-06,
"loss": 0.1392,
"step": 341
},
{
"epoch": 0.24033731553056922,
"grad_norm": 0.20953143559939524,
"learning_rate": 9.647906515584014e-06,
"loss": 0.1438,
"step": 342
},
{
"epoch": 0.2410400562192551,
"grad_norm": 0.20091785428411826,
"learning_rate": 9.645869171019578e-06,
"loss": 0.1399,
"step": 343
},
{
"epoch": 0.24174279690794098,
"grad_norm": 0.21414348581348486,
"learning_rate": 9.643826165406512e-06,
"loss": 0.1518,
"step": 344
},
{
"epoch": 0.24244553759662685,
"grad_norm": 0.19251491170241253,
"learning_rate": 9.641777501234242e-06,
"loss": 0.1237,
"step": 345
},
{
"epoch": 0.2431482782853127,
"grad_norm": 0.20192819495094447,
"learning_rate": 9.639723180999094e-06,
"loss": 0.1325,
"step": 346
},
{
"epoch": 0.2438510189739986,
"grad_norm": 0.2037958638962277,
"learning_rate": 9.637663207204279e-06,
"loss": 0.15,
"step": 347
},
{
"epoch": 0.24455375966268447,
"grad_norm": 0.2070818514146289,
"learning_rate": 9.635597582359905e-06,
"loss": 0.1581,
"step": 348
},
{
"epoch": 0.24525650035137034,
"grad_norm": 0.19610140854605426,
"learning_rate": 9.633526308982957e-06,
"loss": 0.1405,
"step": 349
},
{
"epoch": 0.24595924104005623,
"grad_norm": 0.20489035281382875,
"learning_rate": 9.631449389597307e-06,
"loss": 0.1428,
"step": 350
},
{
"epoch": 0.2466619817287421,
"grad_norm": 0.19839047651600314,
"learning_rate": 9.629366826733711e-06,
"loss": 0.1388,
"step": 351
},
{
"epoch": 0.24736472241742796,
"grad_norm": 0.22095271721781543,
"learning_rate": 9.627278622929791e-06,
"loss": 0.1924,
"step": 352
},
{
"epoch": 0.24806746310611386,
"grad_norm": 0.1981684269803114,
"learning_rate": 9.625184780730058e-06,
"loss": 0.1369,
"step": 353
},
{
"epoch": 0.24877020379479972,
"grad_norm": 0.19228090837881337,
"learning_rate": 9.623085302685875e-06,
"loss": 0.1194,
"step": 354
},
{
"epoch": 0.2494729444834856,
"grad_norm": 0.1965323606943778,
"learning_rate": 9.620980191355487e-06,
"loss": 0.1341,
"step": 355
},
{
"epoch": 0.2501756851721715,
"grad_norm": 0.20390438067507205,
"learning_rate": 9.618869449303996e-06,
"loss": 0.1494,
"step": 356
},
{
"epoch": 0.25087842586085735,
"grad_norm": 0.19414237665638956,
"learning_rate": 9.616753079103367e-06,
"loss": 0.1315,
"step": 357
},
{
"epoch": 0.2515811665495432,
"grad_norm": 0.2007807844230908,
"learning_rate": 9.614631083332427e-06,
"loss": 0.1497,
"step": 358
},
{
"epoch": 0.2522839072382291,
"grad_norm": 0.18385626509926112,
"learning_rate": 9.61250346457685e-06,
"loss": 0.12,
"step": 359
},
{
"epoch": 0.25298664792691494,
"grad_norm": 0.1866938501234195,
"learning_rate": 9.610370225429164e-06,
"loss": 0.1145,
"step": 360
},
{
"epoch": 0.25368938861560086,
"grad_norm": 0.19613701602091654,
"learning_rate": 9.608231368488752e-06,
"loss": 0.1395,
"step": 361
},
{
"epoch": 0.25439212930428673,
"grad_norm": 0.1975070294858894,
"learning_rate": 9.606086896361835e-06,
"loss": 0.1291,
"step": 362
},
{
"epoch": 0.2550948699929726,
"grad_norm": 0.19986457037038174,
"learning_rate": 9.603936811661478e-06,
"loss": 0.141,
"step": 363
},
{
"epoch": 0.25579761068165846,
"grad_norm": 0.1984029621932959,
"learning_rate": 9.601781117007586e-06,
"loss": 0.1462,
"step": 364
},
{
"epoch": 0.2565003513703443,
"grad_norm": 0.2114157354265033,
"learning_rate": 9.5996198150269e-06,
"loss": 0.1604,
"step": 365
},
{
"epoch": 0.2572030920590302,
"grad_norm": 0.18644955434841323,
"learning_rate": 9.597452908352994e-06,
"loss": 0.1173,
"step": 366
},
{
"epoch": 0.2579058327477161,
"grad_norm": 0.1980270394945538,
"learning_rate": 9.595280399626267e-06,
"loss": 0.1347,
"step": 367
},
{
"epoch": 0.258608573436402,
"grad_norm": 0.19803060122985386,
"learning_rate": 9.59310229149395e-06,
"loss": 0.1472,
"step": 368
},
{
"epoch": 0.25931131412508784,
"grad_norm": 0.202620356663314,
"learning_rate": 9.590918586610094e-06,
"loss": 0.1451,
"step": 369
},
{
"epoch": 0.2600140548137737,
"grad_norm": 0.18949252762903013,
"learning_rate": 9.588729287635571e-06,
"loss": 0.1247,
"step": 370
},
{
"epoch": 0.2607167955024596,
"grad_norm": 0.19196731799718963,
"learning_rate": 9.586534397238068e-06,
"loss": 0.1234,
"step": 371
},
{
"epoch": 0.26141953619114544,
"grad_norm": 0.20011554768120032,
"learning_rate": 9.584333918092085e-06,
"loss": 0.1363,
"step": 372
},
{
"epoch": 0.26212227687983136,
"grad_norm": 0.20189281573908496,
"learning_rate": 9.582127852878935e-06,
"loss": 0.1346,
"step": 373
},
{
"epoch": 0.2628250175685172,
"grad_norm": 0.22068217456284164,
"learning_rate": 9.579916204286734e-06,
"loss": 0.1605,
"step": 374
},
{
"epoch": 0.2635277582572031,
"grad_norm": 0.18483893110201327,
"learning_rate": 9.577698975010402e-06,
"loss": 0.1107,
"step": 375
},
{
"epoch": 0.26423049894588896,
"grad_norm": 0.21970242733137538,
"learning_rate": 9.575476167751663e-06,
"loss": 0.1552,
"step": 376
},
{
"epoch": 0.2649332396345748,
"grad_norm": 0.1900156252906083,
"learning_rate": 9.573247785219033e-06,
"loss": 0.123,
"step": 377
},
{
"epoch": 0.2656359803232607,
"grad_norm": 0.20889692429247103,
"learning_rate": 9.571013830127822e-06,
"loss": 0.1483,
"step": 378
},
{
"epoch": 0.2663387210119466,
"grad_norm": 0.222152647676307,
"learning_rate": 9.568774305200134e-06,
"loss": 0.1689,
"step": 379
},
{
"epoch": 0.2670414617006325,
"grad_norm": 0.19929944282336964,
"learning_rate": 9.566529213164859e-06,
"loss": 0.138,
"step": 380
},
{
"epoch": 0.26774420238931834,
"grad_norm": 0.21023886026179744,
"learning_rate": 9.564278556757667e-06,
"loss": 0.152,
"step": 381
},
{
"epoch": 0.2684469430780042,
"grad_norm": 0.207860068636097,
"learning_rate": 9.56202233872101e-06,
"loss": 0.1567,
"step": 382
},
{
"epoch": 0.2691496837666901,
"grad_norm": 0.21366296691371667,
"learning_rate": 9.559760561804118e-06,
"loss": 0.1598,
"step": 383
},
{
"epoch": 0.26985242445537594,
"grad_norm": 0.2127287901589574,
"learning_rate": 9.557493228762995e-06,
"loss": 0.1595,
"step": 384
},
{
"epoch": 0.27055516514406186,
"grad_norm": 0.20739523623803044,
"learning_rate": 9.555220342360412e-06,
"loss": 0.1532,
"step": 385
},
{
"epoch": 0.2712579058327477,
"grad_norm": 0.19228019977316227,
"learning_rate": 9.552941905365911e-06,
"loss": 0.1317,
"step": 386
},
{
"epoch": 0.2719606465214336,
"grad_norm": 0.21223716747758148,
"learning_rate": 9.550657920555794e-06,
"loss": 0.1667,
"step": 387
},
{
"epoch": 0.27266338721011946,
"grad_norm": 0.20950620808104398,
"learning_rate": 9.548368390713126e-06,
"loss": 0.1539,
"step": 388
},
{
"epoch": 0.2733661278988053,
"grad_norm": 0.1927930929143535,
"learning_rate": 9.546073318627726e-06,
"loss": 0.1238,
"step": 389
},
{
"epoch": 0.27406886858749124,
"grad_norm": 0.21539977246477923,
"learning_rate": 9.543772707096169e-06,
"loss": 0.1607,
"step": 390
},
{
"epoch": 0.2747716092761771,
"grad_norm": 0.19930365256822216,
"learning_rate": 9.541466558921777e-06,
"loss": 0.1268,
"step": 391
},
{
"epoch": 0.275474349964863,
"grad_norm": 0.19207577663061368,
"learning_rate": 9.53915487691462e-06,
"loss": 0.1243,
"step": 392
},
{
"epoch": 0.27617709065354884,
"grad_norm": 0.200856083281485,
"learning_rate": 9.536837663891511e-06,
"loss": 0.1414,
"step": 393
},
{
"epoch": 0.2768798313422347,
"grad_norm": 0.20348209567815814,
"learning_rate": 9.534514922676003e-06,
"loss": 0.147,
"step": 394
},
{
"epoch": 0.27758257203092057,
"grad_norm": 0.2004451061805254,
"learning_rate": 9.532186656098384e-06,
"loss": 0.1372,
"step": 395
},
{
"epoch": 0.2782853127196065,
"grad_norm": 0.21106658019167104,
"learning_rate": 9.529852866995676e-06,
"loss": 0.1617,
"step": 396
},
{
"epoch": 0.27898805340829236,
"grad_norm": 0.18472734978834238,
"learning_rate": 9.52751355821163e-06,
"loss": 0.1162,
"step": 397
},
{
"epoch": 0.2796907940969782,
"grad_norm": 0.21840832563821366,
"learning_rate": 9.525168732596722e-06,
"loss": 0.156,
"step": 398
},
{
"epoch": 0.2803935347856641,
"grad_norm": 0.2001879711688018,
"learning_rate": 9.522818393008148e-06,
"loss": 0.14,
"step": 399
},
{
"epoch": 0.28109627547434995,
"grad_norm": 0.20422669935955853,
"learning_rate": 9.520462542309832e-06,
"loss": 0.1506,
"step": 400
},
{
"epoch": 0.2817990161630358,
"grad_norm": 0.2090478931774373,
"learning_rate": 9.518101183372402e-06,
"loss": 0.1517,
"step": 401
},
{
"epoch": 0.28250175685172174,
"grad_norm": 0.1975863524558095,
"learning_rate": 9.515734319073204e-06,
"loss": 0.1292,
"step": 402
},
{
"epoch": 0.2832044975404076,
"grad_norm": 0.1992333738554787,
"learning_rate": 9.51336195229629e-06,
"loss": 0.143,
"step": 403
},
{
"epoch": 0.2839072382290935,
"grad_norm": 0.1962317374671124,
"learning_rate": 9.510984085932421e-06,
"loss": 0.138,
"step": 404
},
{
"epoch": 0.28460997891777934,
"grad_norm": 0.19745964516787126,
"learning_rate": 9.508600722879055e-06,
"loss": 0.1345,
"step": 405
},
{
"epoch": 0.2853127196064652,
"grad_norm": 0.21527893883972268,
"learning_rate": 9.50621186604035e-06,
"loss": 0.1719,
"step": 406
},
{
"epoch": 0.28601546029515107,
"grad_norm": 0.19916798608408456,
"learning_rate": 9.503817518327157e-06,
"loss": 0.1361,
"step": 407
},
{
"epoch": 0.286718200983837,
"grad_norm": 0.18931692037263517,
"learning_rate": 9.501417682657015e-06,
"loss": 0.1268,
"step": 408
},
{
"epoch": 0.28742094167252286,
"grad_norm": 0.21306312623194376,
"learning_rate": 9.499012361954156e-06,
"loss": 0.1605,
"step": 409
},
{
"epoch": 0.2881236823612087,
"grad_norm": 0.21683945306363328,
"learning_rate": 9.496601559149494e-06,
"loss": 0.1673,
"step": 410
},
{
"epoch": 0.2888264230498946,
"grad_norm": 0.20276428304028518,
"learning_rate": 9.494185277180619e-06,
"loss": 0.1521,
"step": 411
},
{
"epoch": 0.28952916373858045,
"grad_norm": 0.19457282787311822,
"learning_rate": 9.491763518991803e-06,
"loss": 0.1325,
"step": 412
},
{
"epoch": 0.2902319044272663,
"grad_norm": 0.20524955511204399,
"learning_rate": 9.489336287533985e-06,
"loss": 0.1452,
"step": 413
},
{
"epoch": 0.29093464511595224,
"grad_norm": 0.1940453874682033,
"learning_rate": 9.486903585764778e-06,
"loss": 0.1399,
"step": 414
},
{
"epoch": 0.2916373858046381,
"grad_norm": 0.18324690395770482,
"learning_rate": 9.48446541664846e-06,
"loss": 0.1147,
"step": 415
},
{
"epoch": 0.29234012649332397,
"grad_norm": 0.20981811331570427,
"learning_rate": 9.482021783155971e-06,
"loss": 0.1503,
"step": 416
},
{
"epoch": 0.29304286718200984,
"grad_norm": 0.1777689833257672,
"learning_rate": 9.479572688264902e-06,
"loss": 0.1079,
"step": 417
},
{
"epoch": 0.2937456078706957,
"grad_norm": 0.2052015209602498,
"learning_rate": 9.477118134959513e-06,
"loss": 0.149,
"step": 418
},
{
"epoch": 0.29444834855938157,
"grad_norm": 0.19659242134848204,
"learning_rate": 9.474658126230702e-06,
"loss": 0.1448,
"step": 419
},
{
"epoch": 0.2951510892480675,
"grad_norm": 0.20594770000175644,
"learning_rate": 9.472192665076023e-06,
"loss": 0.1474,
"step": 420
},
{
"epoch": 0.29585382993675335,
"grad_norm": 0.1956351036493982,
"learning_rate": 9.46972175449967e-06,
"loss": 0.1239,
"step": 421
},
{
"epoch": 0.2965565706254392,
"grad_norm": 0.20136294866476193,
"learning_rate": 9.467245397512475e-06,
"loss": 0.1377,
"step": 422
},
{
"epoch": 0.2972593113141251,
"grad_norm": 0.19322183221885803,
"learning_rate": 9.464763597131914e-06,
"loss": 0.1175,
"step": 423
},
{
"epoch": 0.29796205200281095,
"grad_norm": 0.1864937682993171,
"learning_rate": 9.46227635638209e-06,
"loss": 0.1184,
"step": 424
},
{
"epoch": 0.2986647926914968,
"grad_norm": 0.21261976233040658,
"learning_rate": 9.459783678293732e-06,
"loss": 0.1593,
"step": 425
},
{
"epoch": 0.29936753338018274,
"grad_norm": 0.1908867361917329,
"learning_rate": 9.457285565904204e-06,
"loss": 0.1165,
"step": 426
},
{
"epoch": 0.3000702740688686,
"grad_norm": 0.20336506481589128,
"learning_rate": 9.454782022257485e-06,
"loss": 0.1404,
"step": 427
},
{
"epoch": 0.30077301475755447,
"grad_norm": 0.1920615833098641,
"learning_rate": 9.452273050404173e-06,
"loss": 0.1284,
"step": 428
},
{
"epoch": 0.30147575544624033,
"grad_norm": 0.20034326667195387,
"learning_rate": 9.449758653401482e-06,
"loss": 0.1432,
"step": 429
},
{
"epoch": 0.3021784961349262,
"grad_norm": 0.1998823564030616,
"learning_rate": 9.447238834313235e-06,
"loss": 0.1367,
"step": 430
},
{
"epoch": 0.30288123682361207,
"grad_norm": 0.21365544138767167,
"learning_rate": 9.444713596209863e-06,
"loss": 0.1554,
"step": 431
},
{
"epoch": 0.303583977512298,
"grad_norm": 0.20676074610483916,
"learning_rate": 9.442182942168398e-06,
"loss": 0.145,
"step": 432
},
{
"epoch": 0.30428671820098385,
"grad_norm": 0.20991203968911312,
"learning_rate": 9.439646875272476e-06,
"loss": 0.1585,
"step": 433
},
{
"epoch": 0.3049894588896697,
"grad_norm": 0.20533775136490837,
"learning_rate": 9.437105398612323e-06,
"loss": 0.1493,
"step": 434
},
{
"epoch": 0.3056921995783556,
"grad_norm": 0.20784964970841757,
"learning_rate": 9.434558515284761e-06,
"loss": 0.1505,
"step": 435
},
{
"epoch": 0.30639494026704145,
"grad_norm": 0.20692485866989221,
"learning_rate": 9.432006228393198e-06,
"loss": 0.1419,
"step": 436
},
{
"epoch": 0.3070976809557273,
"grad_norm": 0.19814161765156935,
"learning_rate": 9.429448541047627e-06,
"loss": 0.1301,
"step": 437
},
{
"epoch": 0.30780042164441324,
"grad_norm": 0.20991230925602106,
"learning_rate": 9.426885456364622e-06,
"loss": 0.1545,
"step": 438
},
{
"epoch": 0.3085031623330991,
"grad_norm": 0.2042267276506767,
"learning_rate": 9.424316977467332e-06,
"loss": 0.1373,
"step": 439
},
{
"epoch": 0.30920590302178497,
"grad_norm": 0.20419526135331081,
"learning_rate": 9.42174310748548e-06,
"loss": 0.1476,
"step": 440
},
{
"epoch": 0.30990864371047083,
"grad_norm": 0.2045442597068381,
"learning_rate": 9.419163849555359e-06,
"loss": 0.1487,
"step": 441
},
{
"epoch": 0.3106113843991567,
"grad_norm": 0.21367085310772427,
"learning_rate": 9.416579206819828e-06,
"loss": 0.1672,
"step": 442
},
{
"epoch": 0.31131412508784256,
"grad_norm": 0.20563612051307759,
"learning_rate": 9.413989182428303e-06,
"loss": 0.1414,
"step": 443
},
{
"epoch": 0.3120168657765285,
"grad_norm": 0.1876240411105429,
"learning_rate": 9.411393779536761e-06,
"loss": 0.1125,
"step": 444
},
{
"epoch": 0.31271960646521435,
"grad_norm": 0.19906316360131868,
"learning_rate": 9.408793001307734e-06,
"loss": 0.1359,
"step": 445
},
{
"epoch": 0.3134223471539002,
"grad_norm": 0.2026763143506025,
"learning_rate": 9.406186850910301e-06,
"loss": 0.139,
"step": 446
},
{
"epoch": 0.3141250878425861,
"grad_norm": 0.19816780552362714,
"learning_rate": 9.403575331520089e-06,
"loss": 0.1358,
"step": 447
},
{
"epoch": 0.31482782853127195,
"grad_norm": 0.20344604707931468,
"learning_rate": 9.400958446319267e-06,
"loss": 0.1389,
"step": 448
},
{
"epoch": 0.3155305692199578,
"grad_norm": 0.1852942416226737,
"learning_rate": 9.398336198496538e-06,
"loss": 0.1228,
"step": 449
},
{
"epoch": 0.31623330990864373,
"grad_norm": 0.18586856929820195,
"learning_rate": 9.395708591247148e-06,
"loss": 0.1191,
"step": 450
},
{
"epoch": 0.3169360505973296,
"grad_norm": 0.18946171726014088,
"learning_rate": 9.393075627772865e-06,
"loss": 0.1221,
"step": 451
},
{
"epoch": 0.31763879128601546,
"grad_norm": 0.180318637958168,
"learning_rate": 9.39043731128199e-06,
"loss": 0.108,
"step": 452
},
{
"epoch": 0.31834153197470133,
"grad_norm": 0.20919707596587675,
"learning_rate": 9.387793644989342e-06,
"loss": 0.1581,
"step": 453
},
{
"epoch": 0.3190442726633872,
"grad_norm": 0.21459458481790794,
"learning_rate": 9.385144632116263e-06,
"loss": 0.1597,
"step": 454
},
{
"epoch": 0.31974701335207306,
"grad_norm": 0.20019930248144183,
"learning_rate": 9.382490275890606e-06,
"loss": 0.1457,
"step": 455
},
{
"epoch": 0.320449754040759,
"grad_norm": 0.19735534935621307,
"learning_rate": 9.379830579546736e-06,
"loss": 0.14,
"step": 456
},
{
"epoch": 0.32115249472944485,
"grad_norm": 0.20412673659236913,
"learning_rate": 9.377165546325529e-06,
"loss": 0.1402,
"step": 457
},
{
"epoch": 0.3218552354181307,
"grad_norm": 0.2061809403958115,
"learning_rate": 9.374495179474356e-06,
"loss": 0.1513,
"step": 458
},
{
"epoch": 0.3225579761068166,
"grad_norm": 0.1926515864047725,
"learning_rate": 9.371819482247095e-06,
"loss": 0.1298,
"step": 459
},
{
"epoch": 0.32326071679550245,
"grad_norm": 0.21054785751386276,
"learning_rate": 9.369138457904116e-06,
"loss": 0.149,
"step": 460
},
{
"epoch": 0.3239634574841883,
"grad_norm": 0.2008209513647636,
"learning_rate": 9.36645210971228e-06,
"loss": 0.1446,
"step": 461
},
{
"epoch": 0.32466619817287423,
"grad_norm": 0.20516832922996378,
"learning_rate": 9.363760440944933e-06,
"loss": 0.1391,
"step": 462
},
{
"epoch": 0.3253689388615601,
"grad_norm": 0.2094771286003063,
"learning_rate": 9.361063454881909e-06,
"loss": 0.1549,
"step": 463
},
{
"epoch": 0.32607167955024596,
"grad_norm": 0.21512379398306503,
"learning_rate": 9.358361154809517e-06,
"loss": 0.1555,
"step": 464
},
{
"epoch": 0.32677442023893183,
"grad_norm": 0.2058531575253098,
"learning_rate": 9.355653544020543e-06,
"loss": 0.1435,
"step": 465
},
{
"epoch": 0.3274771609276177,
"grad_norm": 0.1997500595644663,
"learning_rate": 9.352940625814244e-06,
"loss": 0.1404,
"step": 466
},
{
"epoch": 0.32817990161630356,
"grad_norm": 0.20395437560070354,
"learning_rate": 9.350222403496348e-06,
"loss": 0.1435,
"step": 467
},
{
"epoch": 0.3288826423049895,
"grad_norm": 0.21266565191590336,
"learning_rate": 9.347498880379036e-06,
"loss": 0.142,
"step": 468
},
{
"epoch": 0.32958538299367535,
"grad_norm": 0.18516457101612846,
"learning_rate": 9.344770059780957e-06,
"loss": 0.121,
"step": 469
},
{
"epoch": 0.3302881236823612,
"grad_norm": 0.20294763539863533,
"learning_rate": 9.342035945027213e-06,
"loss": 0.1376,
"step": 470
},
{
"epoch": 0.3309908643710471,
"grad_norm": 0.2084123720849409,
"learning_rate": 9.339296539449356e-06,
"loss": 0.1309,
"step": 471
},
{
"epoch": 0.33169360505973294,
"grad_norm": 0.20151303430112197,
"learning_rate": 9.336551846385386e-06,
"loss": 0.1407,
"step": 472
},
{
"epoch": 0.3323963457484188,
"grad_norm": 0.21624173216230064,
"learning_rate": 9.333801869179743e-06,
"loss": 0.1553,
"step": 473
},
{
"epoch": 0.33309908643710473,
"grad_norm": 0.21036586387431974,
"learning_rate": 9.331046611183311e-06,
"loss": 0.1505,
"step": 474
},
{
"epoch": 0.3338018271257906,
"grad_norm": 0.19690085907101784,
"learning_rate": 9.328286075753402e-06,
"loss": 0.1286,
"step": 475
},
{
"epoch": 0.33450456781447646,
"grad_norm": 0.20655347164506663,
"learning_rate": 9.325520266253769e-06,
"loss": 0.1566,
"step": 476
},
{
"epoch": 0.3352073085031623,
"grad_norm": 0.1905983642075779,
"learning_rate": 9.322749186054577e-06,
"loss": 0.1272,
"step": 477
},
{
"epoch": 0.3359100491918482,
"grad_norm": 0.21093585999742104,
"learning_rate": 9.319972838532425e-06,
"loss": 0.1554,
"step": 478
},
{
"epoch": 0.33661278988053406,
"grad_norm": 0.21674426466230956,
"learning_rate": 9.317191227070327e-06,
"loss": 0.1707,
"step": 479
},
{
"epoch": 0.33731553056922,
"grad_norm": 0.1990042513482135,
"learning_rate": 9.314404355057708e-06,
"loss": 0.1363,
"step": 480
},
{
"epoch": 0.33801827125790584,
"grad_norm": 0.19082987008814498,
"learning_rate": 9.311612225890411e-06,
"loss": 0.1299,
"step": 481
},
{
"epoch": 0.3387210119465917,
"grad_norm": 0.18628385255747704,
"learning_rate": 9.308814842970675e-06,
"loss": 0.1197,
"step": 482
},
{
"epoch": 0.3394237526352776,
"grad_norm": 0.20355903329220756,
"learning_rate": 9.306012209707145e-06,
"loss": 0.1533,
"step": 483
},
{
"epoch": 0.34012649332396344,
"grad_norm": 0.21916260885638264,
"learning_rate": 9.303204329514868e-06,
"loss": 0.1679,
"step": 484
},
{
"epoch": 0.3408292340126493,
"grad_norm": 0.1964498130954265,
"learning_rate": 9.300391205815276e-06,
"loss": 0.1431,
"step": 485
},
{
"epoch": 0.34153197470133523,
"grad_norm": 0.19175560916458806,
"learning_rate": 9.297572842036199e-06,
"loss": 0.1337,
"step": 486
},
{
"epoch": 0.3422347153900211,
"grad_norm": 0.20040598690953146,
"learning_rate": 9.294749241611845e-06,
"loss": 0.1413,
"step": 487
},
{
"epoch": 0.34293745607870696,
"grad_norm": 0.19533635244999506,
"learning_rate": 9.291920407982807e-06,
"loss": 0.1407,
"step": 488
},
{
"epoch": 0.3436401967673928,
"grad_norm": 0.19662099983066147,
"learning_rate": 9.289086344596055e-06,
"loss": 0.1306,
"step": 489
},
{
"epoch": 0.3443429374560787,
"grad_norm": 0.20647255027447067,
"learning_rate": 9.286247054904926e-06,
"loss": 0.1483,
"step": 490
},
{
"epoch": 0.34504567814476456,
"grad_norm": 0.20638130373716848,
"learning_rate": 9.283402542369132e-06,
"loss": 0.1222,
"step": 491
},
{
"epoch": 0.3457484188334505,
"grad_norm": 0.21474375759268757,
"learning_rate": 9.280552810454745e-06,
"loss": 0.1582,
"step": 492
},
{
"epoch": 0.34645115952213634,
"grad_norm": 0.19497590351989866,
"learning_rate": 9.277697862634203e-06,
"loss": 0.129,
"step": 493
},
{
"epoch": 0.3471539002108222,
"grad_norm": 0.21400004432153957,
"learning_rate": 9.274837702386287e-06,
"loss": 0.1627,
"step": 494
},
{
"epoch": 0.3478566408995081,
"grad_norm": 0.21295218718498718,
"learning_rate": 9.271972333196145e-06,
"loss": 0.1591,
"step": 495
},
{
"epoch": 0.34855938158819394,
"grad_norm": 0.2112379869786436,
"learning_rate": 9.26910175855526e-06,
"loss": 0.1482,
"step": 496
},
{
"epoch": 0.3492621222768798,
"grad_norm": 0.19257415363367295,
"learning_rate": 9.266225981961463e-06,
"loss": 0.131,
"step": 497
},
{
"epoch": 0.3499648629655657,
"grad_norm": 0.2065862395829462,
"learning_rate": 9.263345006918926e-06,
"loss": 0.1505,
"step": 498
},
{
"epoch": 0.3506676036542516,
"grad_norm": 0.19721867962392622,
"learning_rate": 9.260458836938148e-06,
"loss": 0.1354,
"step": 499
},
{
"epoch": 0.35137034434293746,
"grad_norm": 0.1989260627788844,
"learning_rate": 9.257567475535966e-06,
"loss": 0.1422,
"step": 500
},
{
"epoch": 0.35137034434293746,
"eval_loss": 0.14277411997318268,
"eval_runtime": 10.7135,
"eval_samples_per_second": 21.468,
"eval_steps_per_second": 5.414,
"step": 500
},
{
"epoch": 0.3520730850316233,
"grad_norm": 0.2118474689651639,
"learning_rate": 9.254670926235538e-06,
"loss": 0.1572,
"step": 501
},
{
"epoch": 0.3527758257203092,
"grad_norm": 0.2105829517431667,
"learning_rate": 9.251769192566346e-06,
"loss": 0.1478,
"step": 502
},
{
"epoch": 0.35347856640899505,
"grad_norm": 0.19765629771591783,
"learning_rate": 9.248862278064188e-06,
"loss": 0.1306,
"step": 503
},
{
"epoch": 0.354181307097681,
"grad_norm": 0.20128448022519832,
"learning_rate": 9.24595018627117e-06,
"loss": 0.1349,
"step": 504
},
{
"epoch": 0.35488404778636684,
"grad_norm": 0.22641234426897835,
"learning_rate": 9.243032920735719e-06,
"loss": 0.179,
"step": 505
},
{
"epoch": 0.3555867884750527,
"grad_norm": 0.20119839713726767,
"learning_rate": 9.240110485012557e-06,
"loss": 0.1319,
"step": 506
},
{
"epoch": 0.35628952916373857,
"grad_norm": 0.20260949394516659,
"learning_rate": 9.237182882662705e-06,
"loss": 0.1315,
"step": 507
},
{
"epoch": 0.35699226985242444,
"grad_norm": 0.20496902663529426,
"learning_rate": 9.234250117253482e-06,
"loss": 0.1443,
"step": 508
},
{
"epoch": 0.3576950105411103,
"grad_norm": 0.2106635761076251,
"learning_rate": 9.231312192358504e-06,
"loss": 0.154,
"step": 509
},
{
"epoch": 0.3583977512297962,
"grad_norm": 0.21934549353996802,
"learning_rate": 9.228369111557663e-06,
"loss": 0.1591,
"step": 510
},
{
"epoch": 0.3591004919184821,
"grad_norm": 0.20000991610072863,
"learning_rate": 9.22542087843714e-06,
"loss": 0.1429,
"step": 511
},
{
"epoch": 0.35980323260716796,
"grad_norm": 0.20353923401460933,
"learning_rate": 9.222467496589398e-06,
"loss": 0.1439,
"step": 512
},
{
"epoch": 0.3605059732958538,
"grad_norm": 0.19361899726656498,
"learning_rate": 9.219508969613164e-06,
"loss": 0.1287,
"step": 513
},
{
"epoch": 0.3612087139845397,
"grad_norm": 0.2013838922270704,
"learning_rate": 9.21654530111344e-06,
"loss": 0.1392,
"step": 514
},
{
"epoch": 0.36191145467322555,
"grad_norm": 0.1924073134338231,
"learning_rate": 9.213576494701496e-06,
"loss": 0.1202,
"step": 515
},
{
"epoch": 0.3626141953619115,
"grad_norm": 0.2092443780487793,
"learning_rate": 9.210602553994854e-06,
"loss": 0.1605,
"step": 516
},
{
"epoch": 0.36331693605059734,
"grad_norm": 0.20161724507142006,
"learning_rate": 9.2076234826173e-06,
"loss": 0.1408,
"step": 517
},
{
"epoch": 0.3640196767392832,
"grad_norm": 0.1982617253397439,
"learning_rate": 9.204639284198871e-06,
"loss": 0.1358,
"step": 518
},
{
"epoch": 0.36472241742796907,
"grad_norm": 0.19464670052598942,
"learning_rate": 9.201649962375845e-06,
"loss": 0.1298,
"step": 519
},
{
"epoch": 0.36542515811665494,
"grad_norm": 0.20169242749251767,
"learning_rate": 9.19865552079075e-06,
"loss": 0.1354,
"step": 520
},
{
"epoch": 0.36612789880534086,
"grad_norm": 0.2023037208460734,
"learning_rate": 9.195655963092349e-06,
"loss": 0.1424,
"step": 521
},
{
"epoch": 0.3668306394940267,
"grad_norm": 0.18363697128699702,
"learning_rate": 9.192651292935642e-06,
"loss": 0.1199,
"step": 522
},
{
"epoch": 0.3675333801827126,
"grad_norm": 0.22448874178583772,
"learning_rate": 9.189641513981854e-06,
"loss": 0.1677,
"step": 523
},
{
"epoch": 0.36823612087139845,
"grad_norm": 0.1939348308246589,
"learning_rate": 9.186626629898439e-06,
"loss": 0.1333,
"step": 524
},
{
"epoch": 0.3689388615600843,
"grad_norm": 0.20930637648033318,
"learning_rate": 9.183606644359069e-06,
"loss": 0.1509,
"step": 525
},
{
"epoch": 0.3696416022487702,
"grad_norm": 0.1955574239154574,
"learning_rate": 9.180581561043633e-06,
"loss": 0.1286,
"step": 526
},
{
"epoch": 0.3703443429374561,
"grad_norm": 0.19557935041940103,
"learning_rate": 9.177551383638235e-06,
"loss": 0.1325,
"step": 527
},
{
"epoch": 0.37104708362614197,
"grad_norm": 0.1889005795912407,
"learning_rate": 9.174516115835181e-06,
"loss": 0.1215,
"step": 528
},
{
"epoch": 0.37174982431482784,
"grad_norm": 0.20337074461205915,
"learning_rate": 9.171475761332985e-06,
"loss": 0.1426,
"step": 529
},
{
"epoch": 0.3724525650035137,
"grad_norm": 0.1939173426352154,
"learning_rate": 9.168430323836351e-06,
"loss": 0.1344,
"step": 530
},
{
"epoch": 0.37315530569219957,
"grad_norm": 0.19378031177344965,
"learning_rate": 9.165379807056187e-06,
"loss": 0.1224,
"step": 531
},
{
"epoch": 0.37385804638088543,
"grad_norm": 0.2075955290073315,
"learning_rate": 9.162324214709582e-06,
"loss": 0.1524,
"step": 532
},
{
"epoch": 0.37456078706957135,
"grad_norm": 0.20996080191252406,
"learning_rate": 9.159263550519814e-06,
"loss": 0.1424,
"step": 533
},
{
"epoch": 0.3752635277582572,
"grad_norm": 0.20347689805575989,
"learning_rate": 9.15619781821634e-06,
"loss": 0.1341,
"step": 534
},
{
"epoch": 0.3759662684469431,
"grad_norm": 0.2123445433041086,
"learning_rate": 9.153127021534792e-06,
"loss": 0.1591,
"step": 535
},
{
"epoch": 0.37666900913562895,
"grad_norm": 0.1963478913110363,
"learning_rate": 9.150051164216976e-06,
"loss": 0.133,
"step": 536
},
{
"epoch": 0.3773717498243148,
"grad_norm": 0.19645597253957972,
"learning_rate": 9.146970250010857e-06,
"loss": 0.1321,
"step": 537
},
{
"epoch": 0.3780744905130007,
"grad_norm": 0.2053577260913113,
"learning_rate": 9.143884282670572e-06,
"loss": 0.1462,
"step": 538
},
{
"epoch": 0.3787772312016866,
"grad_norm": 0.1897009582781436,
"learning_rate": 9.140793265956405e-06,
"loss": 0.1226,
"step": 539
},
{
"epoch": 0.37947997189037247,
"grad_norm": 0.20301612216082093,
"learning_rate": 9.1376972036348e-06,
"loss": 0.1441,
"step": 540
},
{
"epoch": 0.38018271257905834,
"grad_norm": 0.186935449902276,
"learning_rate": 9.13459609947835e-06,
"loss": 0.1249,
"step": 541
},
{
"epoch": 0.3808854532677442,
"grad_norm": 0.19232669581994852,
"learning_rate": 9.131489957265785e-06,
"loss": 0.1261,
"step": 542
},
{
"epoch": 0.38158819395643007,
"grad_norm": 0.20025545625043553,
"learning_rate": 9.12837878078198e-06,
"loss": 0.1503,
"step": 543
},
{
"epoch": 0.38229093464511593,
"grad_norm": 0.19253710478336086,
"learning_rate": 9.125262573817937e-06,
"loss": 0.1272,
"step": 544
},
{
"epoch": 0.38299367533380185,
"grad_norm": 0.18974432822720333,
"learning_rate": 9.122141340170797e-06,
"loss": 0.1362,
"step": 545
},
{
"epoch": 0.3836964160224877,
"grad_norm": 0.19720810242068318,
"learning_rate": 9.119015083643819e-06,
"loss": 0.1422,
"step": 546
},
{
"epoch": 0.3843991567111736,
"grad_norm": 0.1917658899863071,
"learning_rate": 9.115883808046388e-06,
"loss": 0.1277,
"step": 547
},
{
"epoch": 0.38510189739985945,
"grad_norm": 0.20613948564924658,
"learning_rate": 9.112747517193998e-06,
"loss": 0.1424,
"step": 548
},
{
"epoch": 0.3858046380885453,
"grad_norm": 0.20954994967125076,
"learning_rate": 9.10960621490826e-06,
"loss": 0.1619,
"step": 549
},
{
"epoch": 0.3865073787772312,
"grad_norm": 0.19444446693553472,
"learning_rate": 9.106459905016889e-06,
"loss": 0.1352,
"step": 550
},
{
"epoch": 0.3872101194659171,
"grad_norm": 0.19883516745445387,
"learning_rate": 9.103308591353704e-06,
"loss": 0.1446,
"step": 551
},
{
"epoch": 0.38791286015460297,
"grad_norm": 0.20083973596635346,
"learning_rate": 9.100152277758616e-06,
"loss": 0.1341,
"step": 552
},
{
"epoch": 0.38861560084328883,
"grad_norm": 0.20639227047398648,
"learning_rate": 9.096990968077632e-06,
"loss": 0.1438,
"step": 553
},
{
"epoch": 0.3893183415319747,
"grad_norm": 0.1940782279101402,
"learning_rate": 9.093824666162851e-06,
"loss": 0.1235,
"step": 554
},
{
"epoch": 0.39002108222066056,
"grad_norm": 0.19750586918756138,
"learning_rate": 9.090653375872446e-06,
"loss": 0.1244,
"step": 555
},
{
"epoch": 0.39072382290934643,
"grad_norm": 0.20694872653223884,
"learning_rate": 9.087477101070676e-06,
"loss": 0.1381,
"step": 556
},
{
"epoch": 0.39142656359803235,
"grad_norm": 0.2075088232514457,
"learning_rate": 9.08429584562787e-06,
"loss": 0.1453,
"step": 557
},
{
"epoch": 0.3921293042867182,
"grad_norm": 0.19580486711752298,
"learning_rate": 9.081109613420428e-06,
"loss": 0.1363,
"step": 558
},
{
"epoch": 0.3928320449754041,
"grad_norm": 0.19862849799754176,
"learning_rate": 9.07791840833081e-06,
"loss": 0.13,
"step": 559
},
{
"epoch": 0.39353478566408995,
"grad_norm": 0.19346239251535885,
"learning_rate": 9.07472223424754e-06,
"loss": 0.1279,
"step": 560
},
{
"epoch": 0.3942375263527758,
"grad_norm": 0.18860964319687856,
"learning_rate": 9.071521095065198e-06,
"loss": 0.1241,
"step": 561
},
{
"epoch": 0.3949402670414617,
"grad_norm": 0.21337624744236505,
"learning_rate": 9.068314994684408e-06,
"loss": 0.1533,
"step": 562
},
{
"epoch": 0.3956430077301476,
"grad_norm": 0.2057584111778037,
"learning_rate": 9.065103937011845e-06,
"loss": 0.1447,
"step": 563
},
{
"epoch": 0.39634574841883347,
"grad_norm": 0.19431447225090992,
"learning_rate": 9.061887925960219e-06,
"loss": 0.1226,
"step": 564
},
{
"epoch": 0.39704848910751933,
"grad_norm": 0.20691566679596524,
"learning_rate": 9.058666965448284e-06,
"loss": 0.1359,
"step": 565
},
{
"epoch": 0.3977512297962052,
"grad_norm": 0.20233541762951515,
"learning_rate": 9.055441059400817e-06,
"loss": 0.1429,
"step": 566
},
{
"epoch": 0.39845397048489106,
"grad_norm": 0.19260512069684524,
"learning_rate": 9.05221021174862e-06,
"loss": 0.1243,
"step": 567
},
{
"epoch": 0.39915671117357693,
"grad_norm": 0.19802423352226675,
"learning_rate": 9.048974426428527e-06,
"loss": 0.1287,
"step": 568
},
{
"epoch": 0.39985945186226285,
"grad_norm": 0.22844685947691024,
"learning_rate": 9.04573370738338e-06,
"loss": 0.1833,
"step": 569
},
{
"epoch": 0.4005621925509487,
"grad_norm": 0.1922067231551465,
"learning_rate": 9.042488058562036e-06,
"loss": 0.1276,
"step": 570
},
{
"epoch": 0.4012649332396346,
"grad_norm": 0.19566103135084412,
"learning_rate": 9.039237483919355e-06,
"loss": 0.1265,
"step": 571
},
{
"epoch": 0.40196767392832045,
"grad_norm": 0.18963384254539226,
"learning_rate": 9.035981987416204e-06,
"loss": 0.1304,
"step": 572
},
{
"epoch": 0.4026704146170063,
"grad_norm": 0.19121192161236383,
"learning_rate": 9.032721573019445e-06,
"loss": 0.1235,
"step": 573
},
{
"epoch": 0.4033731553056922,
"grad_norm": 0.19618856443422217,
"learning_rate": 9.029456244701933e-06,
"loss": 0.1405,
"step": 574
},
{
"epoch": 0.4040758959943781,
"grad_norm": 0.19831953795792392,
"learning_rate": 9.026186006442512e-06,
"loss": 0.125,
"step": 575
},
{
"epoch": 0.40477863668306396,
"grad_norm": 0.2040662891242496,
"learning_rate": 9.022910862226005e-06,
"loss": 0.1513,
"step": 576
},
{
"epoch": 0.40548137737174983,
"grad_norm": 0.20693067734120013,
"learning_rate": 9.019630816043218e-06,
"loss": 0.1407,
"step": 577
},
{
"epoch": 0.4061841180604357,
"grad_norm": 0.19615791853578923,
"learning_rate": 9.016345871890927e-06,
"loss": 0.1363,
"step": 578
},
{
"epoch": 0.40688685874912156,
"grad_norm": 0.19227722011540788,
"learning_rate": 9.013056033771874e-06,
"loss": 0.129,
"step": 579
},
{
"epoch": 0.4075895994378074,
"grad_norm": 0.18584536973001187,
"learning_rate": 9.009761305694771e-06,
"loss": 0.1175,
"step": 580
},
{
"epoch": 0.40829234012649335,
"grad_norm": 0.20447132977878532,
"learning_rate": 9.006461691674282e-06,
"loss": 0.1414,
"step": 581
},
{
"epoch": 0.4089950808151792,
"grad_norm": 0.21334456982632227,
"learning_rate": 9.003157195731028e-06,
"loss": 0.1525,
"step": 582
},
{
"epoch": 0.4096978215038651,
"grad_norm": 0.18554787064263908,
"learning_rate": 8.999847821891578e-06,
"loss": 0.1125,
"step": 583
},
{
"epoch": 0.41040056219255094,
"grad_norm": 0.19538799110767305,
"learning_rate": 8.996533574188446e-06,
"loss": 0.1167,
"step": 584
},
{
"epoch": 0.4111033028812368,
"grad_norm": 0.2077393490790881,
"learning_rate": 8.99321445666008e-06,
"loss": 0.1609,
"step": 585
},
{
"epoch": 0.4118060435699227,
"grad_norm": 0.1900836341067772,
"learning_rate": 8.989890473350869e-06,
"loss": 0.1257,
"step": 586
},
{
"epoch": 0.4125087842586086,
"grad_norm": 0.20273906816875512,
"learning_rate": 8.986561628311125e-06,
"loss": 0.1521,
"step": 587
},
{
"epoch": 0.41321152494729446,
"grad_norm": 0.1864285896568759,
"learning_rate": 8.983227925597089e-06,
"loss": 0.1129,
"step": 588
},
{
"epoch": 0.4139142656359803,
"grad_norm": 0.20113766834607882,
"learning_rate": 8.979889369270918e-06,
"loss": 0.1374,
"step": 589
},
{
"epoch": 0.4146170063246662,
"grad_norm": 0.1873133428738817,
"learning_rate": 8.97654596340068e-06,
"loss": 0.1239,
"step": 590
},
{
"epoch": 0.41531974701335206,
"grad_norm": 0.17656844272242236,
"learning_rate": 8.973197712060362e-06,
"loss": 0.1009,
"step": 591
},
{
"epoch": 0.4160224877020379,
"grad_norm": 0.20952832414670086,
"learning_rate": 8.969844619329846e-06,
"loss": 0.1514,
"step": 592
},
{
"epoch": 0.41672522839072385,
"grad_norm": 0.1939967848354695,
"learning_rate": 8.966486689294917e-06,
"loss": 0.1284,
"step": 593
},
{
"epoch": 0.4174279690794097,
"grad_norm": 0.1974715819764699,
"learning_rate": 8.963123926047256e-06,
"loss": 0.1328,
"step": 594
},
{
"epoch": 0.4181307097680956,
"grad_norm": 0.2010768627540249,
"learning_rate": 8.959756333684428e-06,
"loss": 0.1326,
"step": 595
},
{
"epoch": 0.41883345045678144,
"grad_norm": 0.20885751680899112,
"learning_rate": 8.956383916309888e-06,
"loss": 0.1484,
"step": 596
},
{
"epoch": 0.4195361911454673,
"grad_norm": 0.2000330311583217,
"learning_rate": 8.953006678032964e-06,
"loss": 0.1383,
"step": 597
},
{
"epoch": 0.4202389318341532,
"grad_norm": 0.2040013784093668,
"learning_rate": 8.94962462296887e-06,
"loss": 0.1483,
"step": 598
},
{
"epoch": 0.4209416725228391,
"grad_norm": 0.1995130844971459,
"learning_rate": 8.946237755238676e-06,
"loss": 0.136,
"step": 599
},
{
"epoch": 0.42164441321152496,
"grad_norm": 0.18990843566084017,
"learning_rate": 8.942846078969323e-06,
"loss": 0.1204,
"step": 600
},
{
"epoch": 0.4223471539002108,
"grad_norm": 0.20382086542375144,
"learning_rate": 8.93944959829361e-06,
"loss": 0.1451,
"step": 601
},
{
"epoch": 0.4230498945888967,
"grad_norm": 0.19613896751810994,
"learning_rate": 8.93604831735019e-06,
"loss": 0.1326,
"step": 602
},
{
"epoch": 0.42375263527758256,
"grad_norm": 0.187476431243045,
"learning_rate": 8.932642240283567e-06,
"loss": 0.119,
"step": 603
},
{
"epoch": 0.4244553759662684,
"grad_norm": 0.19758508216586151,
"learning_rate": 8.929231371244087e-06,
"loss": 0.1375,
"step": 604
},
{
"epoch": 0.42515811665495434,
"grad_norm": 0.20371563939680068,
"learning_rate": 8.925815714387936e-06,
"loss": 0.1467,
"step": 605
},
{
"epoch": 0.4258608573436402,
"grad_norm": 0.19259493892298862,
"learning_rate": 8.922395273877132e-06,
"loss": 0.1258,
"step": 606
},
{
"epoch": 0.4265635980323261,
"grad_norm": 0.19765297922229177,
"learning_rate": 8.918970053879527e-06,
"loss": 0.1325,
"step": 607
},
{
"epoch": 0.42726633872101194,
"grad_norm": 0.19604234245095667,
"learning_rate": 8.915540058568792e-06,
"loss": 0.1346,
"step": 608
},
{
"epoch": 0.4279690794096978,
"grad_norm": 0.2011733630435876,
"learning_rate": 8.912105292124417e-06,
"loss": 0.1468,
"step": 609
},
{
"epoch": 0.42867182009838367,
"grad_norm": 0.2075362038277648,
"learning_rate": 8.90866575873171e-06,
"loss": 0.1522,
"step": 610
},
{
"epoch": 0.4293745607870696,
"grad_norm": 0.2039914567988138,
"learning_rate": 8.905221462581784e-06,
"loss": 0.1466,
"step": 611
},
{
"epoch": 0.43007730147575546,
"grad_norm": 0.19524700241043055,
"learning_rate": 8.901772407871553e-06,
"loss": 0.1345,
"step": 612
},
{
"epoch": 0.4307800421644413,
"grad_norm": 0.20104823952302525,
"learning_rate": 8.898318598803737e-06,
"loss": 0.1439,
"step": 613
},
{
"epoch": 0.4314827828531272,
"grad_norm": 0.20156234306265697,
"learning_rate": 8.894860039586841e-06,
"loss": 0.1402,
"step": 614
},
{
"epoch": 0.43218552354181305,
"grad_norm": 0.19187475552309977,
"learning_rate": 8.891396734435164e-06,
"loss": 0.1205,
"step": 615
},
{
"epoch": 0.4328882642304989,
"grad_norm": 0.20325395119250034,
"learning_rate": 8.887928687568785e-06,
"loss": 0.139,
"step": 616
},
{
"epoch": 0.43359100491918484,
"grad_norm": 0.20477920835764318,
"learning_rate": 8.884455903213562e-06,
"loss": 0.1441,
"step": 617
},
{
"epoch": 0.4342937456078707,
"grad_norm": 0.1933945906132102,
"learning_rate": 8.880978385601127e-06,
"loss": 0.1264,
"step": 618
},
{
"epoch": 0.4349964862965566,
"grad_norm": 0.21327302703490195,
"learning_rate": 8.877496138968874e-06,
"loss": 0.1593,
"step": 619
},
{
"epoch": 0.43569922698524244,
"grad_norm": 0.20200413704638903,
"learning_rate": 8.874009167559968e-06,
"loss": 0.1332,
"step": 620
},
{
"epoch": 0.4364019676739283,
"grad_norm": 0.19599338494913485,
"learning_rate": 8.870517475623322e-06,
"loss": 0.1251,
"step": 621
},
{
"epoch": 0.43710470836261417,
"grad_norm": 0.20128840126952782,
"learning_rate": 8.867021067413608e-06,
"loss": 0.1372,
"step": 622
},
{
"epoch": 0.4378074490513001,
"grad_norm": 0.18732192188136573,
"learning_rate": 8.863519947191242e-06,
"loss": 0.1086,
"step": 623
},
{
"epoch": 0.43851018973998596,
"grad_norm": 0.20041210263120557,
"learning_rate": 8.86001411922238e-06,
"loss": 0.1412,
"step": 624
},
{
"epoch": 0.4392129304286718,
"grad_norm": 0.1989894955572241,
"learning_rate": 8.856503587778922e-06,
"loss": 0.1306,
"step": 625
},
{
"epoch": 0.4399156711173577,
"grad_norm": 0.21607795700059548,
"learning_rate": 8.852988357138488e-06,
"loss": 0.1675,
"step": 626
},
{
"epoch": 0.44061841180604355,
"grad_norm": 0.1948741529983923,
"learning_rate": 8.849468431584432e-06,
"loss": 0.1327,
"step": 627
},
{
"epoch": 0.4413211524947294,
"grad_norm": 0.20291552372019242,
"learning_rate": 8.845943815405827e-06,
"loss": 0.1327,
"step": 628
},
{
"epoch": 0.44202389318341534,
"grad_norm": 0.18731726448271593,
"learning_rate": 8.842414512897457e-06,
"loss": 0.1278,
"step": 629
},
{
"epoch": 0.4427266338721012,
"grad_norm": 0.20614710380640514,
"learning_rate": 8.838880528359826e-06,
"loss": 0.1359,
"step": 630
},
{
"epoch": 0.44342937456078707,
"grad_norm": 0.19599044690616851,
"learning_rate": 8.835341866099136e-06,
"loss": 0.1332,
"step": 631
},
{
"epoch": 0.44413211524947294,
"grad_norm": 0.19749828695406735,
"learning_rate": 8.831798530427289e-06,
"loss": 0.128,
"step": 632
},
{
"epoch": 0.4448348559381588,
"grad_norm": 0.20115655395875143,
"learning_rate": 8.828250525661884e-06,
"loss": 0.1409,
"step": 633
},
{
"epoch": 0.44553759662684467,
"grad_norm": 0.18644413141548716,
"learning_rate": 8.824697856126206e-06,
"loss": 0.1237,
"step": 634
},
{
"epoch": 0.4462403373155306,
"grad_norm": 0.21004400391958095,
"learning_rate": 8.82114052614923e-06,
"loss": 0.1508,
"step": 635
},
{
"epoch": 0.44694307800421645,
"grad_norm": 0.19669103120931383,
"learning_rate": 8.817578540065605e-06,
"loss": 0.1251,
"step": 636
},
{
"epoch": 0.4476458186929023,
"grad_norm": 0.20948911467663336,
"learning_rate": 8.814011902215654e-06,
"loss": 0.1545,
"step": 637
},
{
"epoch": 0.4483485593815882,
"grad_norm": 0.18609127058760086,
"learning_rate": 8.81044061694537e-06,
"loss": 0.1105,
"step": 638
},
{
"epoch": 0.44905130007027405,
"grad_norm": 0.20117870832410942,
"learning_rate": 8.806864688606409e-06,
"loss": 0.1446,
"step": 639
},
{
"epoch": 0.4497540407589599,
"grad_norm": 0.20469175124774625,
"learning_rate": 8.80328412155608e-06,
"loss": 0.1407,
"step": 640
},
{
"epoch": 0.45045678144764584,
"grad_norm": 0.18716031729379465,
"learning_rate": 8.799698920157348e-06,
"loss": 0.1069,
"step": 641
},
{
"epoch": 0.4511595221363317,
"grad_norm": 0.18867161647564998,
"learning_rate": 8.796109088778831e-06,
"loss": 0.1263,
"step": 642
},
{
"epoch": 0.45186226282501757,
"grad_norm": 0.1911915433239356,
"learning_rate": 8.792514631794778e-06,
"loss": 0.1226,
"step": 643
},
{
"epoch": 0.45256500351370343,
"grad_norm": 0.21555312915686822,
"learning_rate": 8.788915553585079e-06,
"loss": 0.1558,
"step": 644
},
{
"epoch": 0.4532677442023893,
"grad_norm": 0.19908930757388826,
"learning_rate": 8.785311858535254e-06,
"loss": 0.1335,
"step": 645
},
{
"epoch": 0.45397048489107517,
"grad_norm": 0.209007065507454,
"learning_rate": 8.781703551036451e-06,
"loss": 0.1489,
"step": 646
},
{
"epoch": 0.4546732255797611,
"grad_norm": 0.2026688778953258,
"learning_rate": 8.77809063548544e-06,
"loss": 0.1295,
"step": 647
},
{
"epoch": 0.45537596626844695,
"grad_norm": 0.2066626624042239,
"learning_rate": 8.774473116284598e-06,
"loss": 0.1406,
"step": 648
},
{
"epoch": 0.4560787069571328,
"grad_norm": 0.21198472388230957,
"learning_rate": 8.770850997841918e-06,
"loss": 0.1555,
"step": 649
},
{
"epoch": 0.4567814476458187,
"grad_norm": 0.21890096614699261,
"learning_rate": 8.767224284570999e-06,
"loss": 0.1724,
"step": 650
},
{
"epoch": 0.45748418833450455,
"grad_norm": 0.21644712897866436,
"learning_rate": 8.763592980891031e-06,
"loss": 0.1678,
"step": 651
},
{
"epoch": 0.45818692902319047,
"grad_norm": 0.18375968865386702,
"learning_rate": 8.759957091226805e-06,
"loss": 0.1138,
"step": 652
},
{
"epoch": 0.45888966971187634,
"grad_norm": 0.19207743835517968,
"learning_rate": 8.756316620008697e-06,
"loss": 0.1251,
"step": 653
},
{
"epoch": 0.4595924104005622,
"grad_norm": 0.20386276033519504,
"learning_rate": 8.752671571672664e-06,
"loss": 0.1405,
"step": 654
},
{
"epoch": 0.46029515108924807,
"grad_norm": 0.21006170138434346,
"learning_rate": 8.749021950660243e-06,
"loss": 0.1537,
"step": 655
},
{
"epoch": 0.46099789177793393,
"grad_norm": 0.2041431354339698,
"learning_rate": 8.745367761418546e-06,
"loss": 0.1468,
"step": 656
},
{
"epoch": 0.4617006324666198,
"grad_norm": 0.2109769292529394,
"learning_rate": 8.74170900840024e-06,
"loss": 0.1513,
"step": 657
},
{
"epoch": 0.4624033731553057,
"grad_norm": 0.21375500773771028,
"learning_rate": 8.738045696063566e-06,
"loss": 0.1484,
"step": 658
},
{
"epoch": 0.4631061138439916,
"grad_norm": 0.1961446382548,
"learning_rate": 8.734377828872315e-06,
"loss": 0.1327,
"step": 659
},
{
"epoch": 0.46380885453267745,
"grad_norm": 0.19844816785477604,
"learning_rate": 8.730705411295826e-06,
"loss": 0.1366,
"step": 660
},
{
"epoch": 0.4645115952213633,
"grad_norm": 0.18989404844325367,
"learning_rate": 8.727028447808983e-06,
"loss": 0.124,
"step": 661
},
{
"epoch": 0.4652143359100492,
"grad_norm": 0.18697207919429876,
"learning_rate": 8.723346942892217e-06,
"loss": 0.1247,
"step": 662
},
{
"epoch": 0.46591707659873505,
"grad_norm": 0.20353876658944284,
"learning_rate": 8.719660901031482e-06,
"loss": 0.1467,
"step": 663
},
{
"epoch": 0.46661981728742097,
"grad_norm": 0.1863520877601211,
"learning_rate": 8.715970326718269e-06,
"loss": 0.1224,
"step": 664
},
{
"epoch": 0.46732255797610683,
"grad_norm": 0.18953427463978345,
"learning_rate": 8.712275224449583e-06,
"loss": 0.1238,
"step": 665
},
{
"epoch": 0.4680252986647927,
"grad_norm": 0.19835609810272498,
"learning_rate": 8.708575598727958e-06,
"loss": 0.1391,
"step": 666
},
{
"epoch": 0.46872803935347856,
"grad_norm": 0.2044668930185476,
"learning_rate": 8.704871454061428e-06,
"loss": 0.1397,
"step": 667
},
{
"epoch": 0.46943078004216443,
"grad_norm": 0.18129020091307138,
"learning_rate": 8.70116279496354e-06,
"loss": 0.1143,
"step": 668
},
{
"epoch": 0.4701335207308503,
"grad_norm": 0.18014367879210402,
"learning_rate": 8.697449625953343e-06,
"loss": 0.1087,
"step": 669
},
{
"epoch": 0.4708362614195362,
"grad_norm": 0.21973732737204474,
"learning_rate": 8.693731951555376e-06,
"loss": 0.1719,
"step": 670
},
{
"epoch": 0.4715390021082221,
"grad_norm": 0.17593686877057024,
"learning_rate": 8.690009776299673e-06,
"loss": 0.101,
"step": 671
},
{
"epoch": 0.47224174279690795,
"grad_norm": 0.20033203846079256,
"learning_rate": 8.686283104721748e-06,
"loss": 0.1228,
"step": 672
},
{
"epoch": 0.4729444834855938,
"grad_norm": 0.20641608415327958,
"learning_rate": 8.6825519413626e-06,
"loss": 0.1562,
"step": 673
},
{
"epoch": 0.4736472241742797,
"grad_norm": 0.19994103862060295,
"learning_rate": 8.678816290768695e-06,
"loss": 0.1442,
"step": 674
},
{
"epoch": 0.47434996486296555,
"grad_norm": 0.1978437503991619,
"learning_rate": 8.675076157491969e-06,
"loss": 0.1371,
"step": 675
},
{
"epoch": 0.47505270555165147,
"grad_norm": 0.21173254147557383,
"learning_rate": 8.671331546089818e-06,
"loss": 0.1447,
"step": 676
},
{
"epoch": 0.47575544624033733,
"grad_norm": 0.20319734799388245,
"learning_rate": 8.667582461125101e-06,
"loss": 0.144,
"step": 677
},
{
"epoch": 0.4764581869290232,
"grad_norm": 0.18583374369708697,
"learning_rate": 8.663828907166123e-06,
"loss": 0.1217,
"step": 678
},
{
"epoch": 0.47716092761770906,
"grad_norm": 0.1932110676763994,
"learning_rate": 8.660070888786633e-06,
"loss": 0.1274,
"step": 679
},
{
"epoch": 0.47786366830639493,
"grad_norm": 0.21670382935295973,
"learning_rate": 8.656308410565828e-06,
"loss": 0.1583,
"step": 680
},
{
"epoch": 0.4785664089950808,
"grad_norm": 0.1977659540755836,
"learning_rate": 8.652541477088327e-06,
"loss": 0.1343,
"step": 681
},
{
"epoch": 0.4792691496837667,
"grad_norm": 0.2064249782033332,
"learning_rate": 8.64877009294419e-06,
"loss": 0.1487,
"step": 682
},
{
"epoch": 0.4799718903724526,
"grad_norm": 0.19216271116915065,
"learning_rate": 8.644994262728895e-06,
"loss": 0.1249,
"step": 683
},
{
"epoch": 0.48067463106113845,
"grad_norm": 0.21194745170810309,
"learning_rate": 8.64121399104333e-06,
"loss": 0.1537,
"step": 684
},
{
"epoch": 0.4813773717498243,
"grad_norm": 0.2106907070909612,
"learning_rate": 8.637429282493813e-06,
"loss": 0.1539,
"step": 685
},
{
"epoch": 0.4820801124385102,
"grad_norm": 0.21233010575553282,
"learning_rate": 8.633640141692052e-06,
"loss": 0.1534,
"step": 686
},
{
"epoch": 0.48278285312719604,
"grad_norm": 0.20063546027350135,
"learning_rate": 8.629846573255162e-06,
"loss": 0.1412,
"step": 687
},
{
"epoch": 0.48348559381588196,
"grad_norm": 0.2392795073262447,
"learning_rate": 8.626048581805652e-06,
"loss": 0.1521,
"step": 688
},
{
"epoch": 0.48418833450456783,
"grad_norm": 0.20449639474907916,
"learning_rate": 8.622246171971425e-06,
"loss": 0.1399,
"step": 689
},
{
"epoch": 0.4848910751932537,
"grad_norm": 0.21341590123288326,
"learning_rate": 8.61843934838576e-06,
"loss": 0.1693,
"step": 690
},
{
"epoch": 0.48559381588193956,
"grad_norm": 0.20322538987237246,
"learning_rate": 8.614628115687318e-06,
"loss": 0.1518,
"step": 691
},
{
"epoch": 0.4862965565706254,
"grad_norm": 0.19189095386508537,
"learning_rate": 8.610812478520137e-06,
"loss": 0.1306,
"step": 692
},
{
"epoch": 0.4869992972593113,
"grad_norm": 0.20317494635977681,
"learning_rate": 8.606992441533615e-06,
"loss": 0.1421,
"step": 693
},
{
"epoch": 0.4877020379479972,
"grad_norm": 0.22562257408056963,
"learning_rate": 8.603168009382513e-06,
"loss": 0.1668,
"step": 694
},
{
"epoch": 0.4884047786366831,
"grad_norm": 0.19145585498372222,
"learning_rate": 8.59933918672695e-06,
"loss": 0.1249,
"step": 695
},
{
"epoch": 0.48910751932536894,
"grad_norm": 0.19673391468859278,
"learning_rate": 8.595505978232394e-06,
"loss": 0.1334,
"step": 696
},
{
"epoch": 0.4898102600140548,
"grad_norm": 0.21515188563214083,
"learning_rate": 8.591668388569656e-06,
"loss": 0.1587,
"step": 697
},
{
"epoch": 0.4905130007027407,
"grad_norm": 0.20888599556103063,
"learning_rate": 8.587826422414886e-06,
"loss": 0.1478,
"step": 698
},
{
"epoch": 0.49121574139142654,
"grad_norm": 0.20402459535474182,
"learning_rate": 8.583980084449566e-06,
"loss": 0.1457,
"step": 699
},
{
"epoch": 0.49191848208011246,
"grad_norm": 0.1865164314015621,
"learning_rate": 8.580129379360508e-06,
"loss": 0.1227,
"step": 700
},
{
"epoch": 0.49262122276879833,
"grad_norm": 0.20013058698392772,
"learning_rate": 8.576274311839843e-06,
"loss": 0.1334,
"step": 701
},
{
"epoch": 0.4933239634574842,
"grad_norm": 0.2029773919879313,
"learning_rate": 8.572414886585015e-06,
"loss": 0.142,
"step": 702
},
{
"epoch": 0.49402670414617006,
"grad_norm": 0.20918925472708993,
"learning_rate": 8.568551108298785e-06,
"loss": 0.1381,
"step": 703
},
{
"epoch": 0.4947294448348559,
"grad_norm": 0.19420411353176248,
"learning_rate": 8.564682981689214e-06,
"loss": 0.1294,
"step": 704
},
{
"epoch": 0.4954321855235418,
"grad_norm": 0.1972358192247456,
"learning_rate": 8.56081051146966e-06,
"loss": 0.1377,
"step": 705
},
{
"epoch": 0.4961349262122277,
"grad_norm": 0.20599392828878002,
"learning_rate": 8.556933702358774e-06,
"loss": 0.1424,
"step": 706
},
{
"epoch": 0.4968376669009136,
"grad_norm": 0.19581253972452262,
"learning_rate": 8.553052559080498e-06,
"loss": 0.1394,
"step": 707
},
{
"epoch": 0.49754040758959944,
"grad_norm": 0.17971574241491614,
"learning_rate": 8.549167086364056e-06,
"loss": 0.1102,
"step": 708
},
{
"epoch": 0.4982431482782853,
"grad_norm": 0.2011056914255985,
"learning_rate": 8.545277288943938e-06,
"loss": 0.1439,
"step": 709
},
{
"epoch": 0.4989458889669712,
"grad_norm": 0.21368915334770014,
"learning_rate": 8.541383171559911e-06,
"loss": 0.1561,
"step": 710
},
{
"epoch": 0.49964862965565704,
"grad_norm": 0.1780150640316873,
"learning_rate": 8.537484738957009e-06,
"loss": 0.1143,
"step": 711
},
{
"epoch": 0.500351370344343,
"grad_norm": 0.1882535765833112,
"learning_rate": 8.533581995885515e-06,
"loss": 0.1314,
"step": 712
},
{
"epoch": 0.5010541110330288,
"grad_norm": 0.19621977979346347,
"learning_rate": 8.529674947100974e-06,
"loss": 0.1384,
"step": 713
},
{
"epoch": 0.5017568517217147,
"grad_norm": 0.19887840118948752,
"learning_rate": 8.525763597364171e-06,
"loss": 0.1477,
"step": 714
},
{
"epoch": 0.5024595924104006,
"grad_norm": 0.1974239970049024,
"learning_rate": 8.52184795144113e-06,
"loss": 0.1255,
"step": 715
},
{
"epoch": 0.5031623330990864,
"grad_norm": 0.20865086195202795,
"learning_rate": 8.51792801410312e-06,
"loss": 0.1621,
"step": 716
},
{
"epoch": 0.5038650737877723,
"grad_norm": 0.1905648176793741,
"learning_rate": 8.514003790126628e-06,
"loss": 0.1213,
"step": 717
},
{
"epoch": 0.5045678144764582,
"grad_norm": 0.20585822884686872,
"learning_rate": 8.510075284293371e-06,
"loss": 0.1442,
"step": 718
},
{
"epoch": 0.5052705551651441,
"grad_norm": 0.1957502831865175,
"learning_rate": 8.506142501390284e-06,
"loss": 0.1376,
"step": 719
},
{
"epoch": 0.5059732958538299,
"grad_norm": 0.20008208802335115,
"learning_rate": 8.502205446209506e-06,
"loss": 0.1361,
"step": 720
},
{
"epoch": 0.5066760365425158,
"grad_norm": 0.19909834167532797,
"learning_rate": 8.49826412354839e-06,
"loss": 0.1365,
"step": 721
},
{
"epoch": 0.5073787772312017,
"grad_norm": 0.1998683967821546,
"learning_rate": 8.494318538209485e-06,
"loss": 0.1495,
"step": 722
},
{
"epoch": 0.5080815179198875,
"grad_norm": 0.18785326645387374,
"learning_rate": 8.490368695000537e-06,
"loss": 0.1204,
"step": 723
},
{
"epoch": 0.5087842586085735,
"grad_norm": 0.22373280234966053,
"learning_rate": 8.486414598734479e-06,
"loss": 0.1719,
"step": 724
},
{
"epoch": 0.5094869992972593,
"grad_norm": 0.215357904627044,
"learning_rate": 8.482456254229421e-06,
"loss": 0.1503,
"step": 725
},
{
"epoch": 0.5101897399859452,
"grad_norm": 0.1982568472842296,
"learning_rate": 8.47849366630866e-06,
"loss": 0.1401,
"step": 726
},
{
"epoch": 0.5108924806746311,
"grad_norm": 0.2062352471745667,
"learning_rate": 8.474526839800654e-06,
"loss": 0.1495,
"step": 727
},
{
"epoch": 0.5115952213633169,
"grad_norm": 0.18922759103162115,
"learning_rate": 8.470555779539034e-06,
"loss": 0.1261,
"step": 728
},
{
"epoch": 0.5122979620520028,
"grad_norm": 0.1989047478793895,
"learning_rate": 8.46658049036258e-06,
"loss": 0.1322,
"step": 729
},
{
"epoch": 0.5130007027406887,
"grad_norm": 0.1989612754642442,
"learning_rate": 8.462600977115237e-06,
"loss": 0.1356,
"step": 730
},
{
"epoch": 0.5137034434293746,
"grad_norm": 0.19088421382306642,
"learning_rate": 8.458617244646085e-06,
"loss": 0.1155,
"step": 731
},
{
"epoch": 0.5144061841180604,
"grad_norm": 0.2048911582896868,
"learning_rate": 8.454629297809355e-06,
"loss": 0.1359,
"step": 732
},
{
"epoch": 0.5151089248067463,
"grad_norm": 0.19319550629214846,
"learning_rate": 8.450637141464407e-06,
"loss": 0.1261,
"step": 733
},
{
"epoch": 0.5158116654954322,
"grad_norm": 0.20066735020877896,
"learning_rate": 8.446640780475735e-06,
"loss": 0.1366,
"step": 734
},
{
"epoch": 0.516514406184118,
"grad_norm": 0.1935616771807601,
"learning_rate": 8.442640219712949e-06,
"loss": 0.1317,
"step": 735
},
{
"epoch": 0.517217146872804,
"grad_norm": 0.1988755829434389,
"learning_rate": 8.438635464050786e-06,
"loss": 0.1346,
"step": 736
},
{
"epoch": 0.5179198875614898,
"grad_norm": 0.20027669804168208,
"learning_rate": 8.43462651836909e-06,
"loss": 0.1394,
"step": 737
},
{
"epoch": 0.5186226282501757,
"grad_norm": 0.20872781923259143,
"learning_rate": 8.430613387552809e-06,
"loss": 0.153,
"step": 738
},
{
"epoch": 0.5193253689388616,
"grad_norm": 0.18848667968380878,
"learning_rate": 8.42659607649199e-06,
"loss": 0.1167,
"step": 739
},
{
"epoch": 0.5200281096275474,
"grad_norm": 0.20339077112739157,
"learning_rate": 8.42257459008178e-06,
"loss": 0.1409,
"step": 740
},
{
"epoch": 0.5207308503162333,
"grad_norm": 0.2076174170607271,
"learning_rate": 8.418548933222406e-06,
"loss": 0.1477,
"step": 741
},
{
"epoch": 0.5214335910049192,
"grad_norm": 0.20689504384268784,
"learning_rate": 8.414519110819183e-06,
"loss": 0.1404,
"step": 742
},
{
"epoch": 0.5221363316936051,
"grad_norm": 0.1965033677840974,
"learning_rate": 8.410485127782498e-06,
"loss": 0.1295,
"step": 743
},
{
"epoch": 0.5228390723822909,
"grad_norm": 0.200286188370874,
"learning_rate": 8.40644698902781e-06,
"loss": 0.1401,
"step": 744
},
{
"epoch": 0.5235418130709768,
"grad_norm": 0.21038374090446993,
"learning_rate": 8.402404699475637e-06,
"loss": 0.1634,
"step": 745
},
{
"epoch": 0.5242445537596627,
"grad_norm": 0.20640025139223628,
"learning_rate": 8.398358264051563e-06,
"loss": 0.1455,
"step": 746
},
{
"epoch": 0.5249472944483485,
"grad_norm": 0.19528399975958247,
"learning_rate": 8.394307687686219e-06,
"loss": 0.1295,
"step": 747
},
{
"epoch": 0.5256500351370345,
"grad_norm": 0.19770666378910484,
"learning_rate": 8.390252975315276e-06,
"loss": 0.1427,
"step": 748
},
{
"epoch": 0.5263527758257203,
"grad_norm": 0.19359435228762473,
"learning_rate": 8.386194131879458e-06,
"loss": 0.1279,
"step": 749
},
{
"epoch": 0.5270555165144062,
"grad_norm": 0.19153115328127254,
"learning_rate": 8.382131162324512e-06,
"loss": 0.1251,
"step": 750
},
{
"epoch": 0.5277582572030921,
"grad_norm": 0.20998848138812176,
"learning_rate": 8.378064071601218e-06,
"loss": 0.1514,
"step": 751
},
{
"epoch": 0.5284609978917779,
"grad_norm": 0.20154574395106165,
"learning_rate": 8.373992864665374e-06,
"loss": 0.1399,
"step": 752
},
{
"epoch": 0.5291637385804638,
"grad_norm": 0.20653791982779454,
"learning_rate": 8.369917546477794e-06,
"loss": 0.1481,
"step": 753
},
{
"epoch": 0.5298664792691496,
"grad_norm": 0.2124249670423763,
"learning_rate": 8.365838122004311e-06,
"loss": 0.1571,
"step": 754
},
{
"epoch": 0.5305692199578356,
"grad_norm": 0.19448089419509293,
"learning_rate": 8.361754596215745e-06,
"loss": 0.1343,
"step": 755
},
{
"epoch": 0.5312719606465214,
"grad_norm": 0.19565163442946312,
"learning_rate": 8.357666974087928e-06,
"loss": 0.1321,
"step": 756
},
{
"epoch": 0.5319747013352073,
"grad_norm": 0.21602078697917665,
"learning_rate": 8.353575260601674e-06,
"loss": 0.1521,
"step": 757
},
{
"epoch": 0.5326774420238932,
"grad_norm": 0.19749457612813498,
"learning_rate": 8.349479460742788e-06,
"loss": 0.1337,
"step": 758
},
{
"epoch": 0.533380182712579,
"grad_norm": 0.20697769387991802,
"learning_rate": 8.345379579502054e-06,
"loss": 0.1433,
"step": 759
},
{
"epoch": 0.534082923401265,
"grad_norm": 0.20545614463623743,
"learning_rate": 8.341275621875224e-06,
"loss": 0.1447,
"step": 760
},
{
"epoch": 0.5347856640899508,
"grad_norm": 0.20458265180224341,
"learning_rate": 8.337167592863026e-06,
"loss": 0.1409,
"step": 761
},
{
"epoch": 0.5354884047786367,
"grad_norm": 0.17354780877054077,
"learning_rate": 8.333055497471137e-06,
"loss": 0.0977,
"step": 762
},
{
"epoch": 0.5361911454673226,
"grad_norm": 0.21682389180098813,
"learning_rate": 8.3289393407102e-06,
"loss": 0.1563,
"step": 763
},
{
"epoch": 0.5368938861560084,
"grad_norm": 0.19730967924652767,
"learning_rate": 8.324819127595802e-06,
"loss": 0.1304,
"step": 764
},
{
"epoch": 0.5375966268446943,
"grad_norm": 0.20461220910854927,
"learning_rate": 8.320694863148473e-06,
"loss": 0.1443,
"step": 765
},
{
"epoch": 0.5382993675333801,
"grad_norm": 0.19218773615023874,
"learning_rate": 8.31656655239368e-06,
"loss": 0.111,
"step": 766
},
{
"epoch": 0.5390021082220661,
"grad_norm": 0.21414122115648038,
"learning_rate": 8.31243420036182e-06,
"loss": 0.1613,
"step": 767
},
{
"epoch": 0.5397048489107519,
"grad_norm": 0.1850047207981919,
"learning_rate": 8.308297812088215e-06,
"loss": 0.1294,
"step": 768
},
{
"epoch": 0.5404075895994378,
"grad_norm": 0.19265792286552358,
"learning_rate": 8.304157392613103e-06,
"loss": 0.1302,
"step": 769
},
{
"epoch": 0.5411103302881237,
"grad_norm": 0.18786019159574177,
"learning_rate": 8.30001294698164e-06,
"loss": 0.116,
"step": 770
},
{
"epoch": 0.5418130709768095,
"grad_norm": 0.20173013719256142,
"learning_rate": 8.295864480243882e-06,
"loss": 0.1409,
"step": 771
},
{
"epoch": 0.5425158116654955,
"grad_norm": 0.21148664516949145,
"learning_rate": 8.291711997454786e-06,
"loss": 0.1478,
"step": 772
},
{
"epoch": 0.5432185523541813,
"grad_norm": 0.20583240819450543,
"learning_rate": 8.287555503674204e-06,
"loss": 0.1434,
"step": 773
},
{
"epoch": 0.5439212930428672,
"grad_norm": 0.18562348644521987,
"learning_rate": 8.283395003966873e-06,
"loss": 0.1221,
"step": 774
},
{
"epoch": 0.5446240337315531,
"grad_norm": 0.200429171409846,
"learning_rate": 8.279230503402413e-06,
"loss": 0.1301,
"step": 775
},
{
"epoch": 0.5453267744202389,
"grad_norm": 0.19413469738192082,
"learning_rate": 8.275062007055323e-06,
"loss": 0.1305,
"step": 776
},
{
"epoch": 0.5460295151089248,
"grad_norm": 0.19132070436687887,
"learning_rate": 8.270889520004964e-06,
"loss": 0.118,
"step": 777
},
{
"epoch": 0.5467322557976106,
"grad_norm": 0.19870134171374626,
"learning_rate": 8.266713047335563e-06,
"loss": 0.1419,
"step": 778
},
{
"epoch": 0.5474349964862966,
"grad_norm": 0.1999399692215911,
"learning_rate": 8.262532594136202e-06,
"loss": 0.1328,
"step": 779
},
{
"epoch": 0.5481377371749825,
"grad_norm": 0.1995284803991577,
"learning_rate": 8.258348165500815e-06,
"loss": 0.1364,
"step": 780
},
{
"epoch": 0.5488404778636683,
"grad_norm": 0.19115269876331506,
"learning_rate": 8.254159766528184e-06,
"loss": 0.1213,
"step": 781
},
{
"epoch": 0.5495432185523542,
"grad_norm": 0.20466670424739164,
"learning_rate": 8.249967402321919e-06,
"loss": 0.1382,
"step": 782
},
{
"epoch": 0.55024595924104,
"grad_norm": 0.2038953426000264,
"learning_rate": 8.24577107799047e-06,
"loss": 0.143,
"step": 783
},
{
"epoch": 0.550948699929726,
"grad_norm": 0.22016622693658347,
"learning_rate": 8.241570798647107e-06,
"loss": 0.1675,
"step": 784
},
{
"epoch": 0.5516514406184118,
"grad_norm": 0.20146776196921104,
"learning_rate": 8.237366569409927e-06,
"loss": 0.1406,
"step": 785
},
{
"epoch": 0.5523541813070977,
"grad_norm": 0.19765485258736815,
"learning_rate": 8.23315839540183e-06,
"loss": 0.1423,
"step": 786
},
{
"epoch": 0.5530569219957836,
"grad_norm": 0.2004497036022817,
"learning_rate": 8.22894628175053e-06,
"loss": 0.1381,
"step": 787
},
{
"epoch": 0.5537596626844694,
"grad_norm": 0.20748058749076703,
"learning_rate": 8.224730233588539e-06,
"loss": 0.1474,
"step": 788
},
{
"epoch": 0.5544624033731553,
"grad_norm": 0.18924858531010796,
"learning_rate": 8.220510256053162e-06,
"loss": 0.1245,
"step": 789
},
{
"epoch": 0.5551651440618411,
"grad_norm": 0.21003292487729114,
"learning_rate": 8.216286354286499e-06,
"loss": 0.1483,
"step": 790
},
{
"epoch": 0.5558678847505271,
"grad_norm": 0.19095395546115623,
"learning_rate": 8.212058533435418e-06,
"loss": 0.1233,
"step": 791
},
{
"epoch": 0.556570625439213,
"grad_norm": 0.19468966327875434,
"learning_rate": 8.207826798651575e-06,
"loss": 0.1257,
"step": 792
},
{
"epoch": 0.5572733661278988,
"grad_norm": 0.20726635444815417,
"learning_rate": 8.20359115509139e-06,
"loss": 0.1498,
"step": 793
},
{
"epoch": 0.5579761068165847,
"grad_norm": 0.19209631191264587,
"learning_rate": 8.199351607916048e-06,
"loss": 0.1178,
"step": 794
},
{
"epoch": 0.5586788475052705,
"grad_norm": 0.19917044759449545,
"learning_rate": 8.19510816229149e-06,
"loss": 0.1266,
"step": 795
},
{
"epoch": 0.5593815881939564,
"grad_norm": 0.19113231436779143,
"learning_rate": 8.190860823388402e-06,
"loss": 0.1244,
"step": 796
},
{
"epoch": 0.5600843288826423,
"grad_norm": 0.2024870768541665,
"learning_rate": 8.186609596382222e-06,
"loss": 0.137,
"step": 797
},
{
"epoch": 0.5607870695713282,
"grad_norm": 0.19028589812410254,
"learning_rate": 8.182354486453123e-06,
"loss": 0.1213,
"step": 798
},
{
"epoch": 0.5614898102600141,
"grad_norm": 0.20191175162836947,
"learning_rate": 8.178095498786007e-06,
"loss": 0.1316,
"step": 799
},
{
"epoch": 0.5621925509486999,
"grad_norm": 0.19303844550897872,
"learning_rate": 8.173832638570503e-06,
"loss": 0.1228,
"step": 800
},
{
"epoch": 0.5628952916373858,
"grad_norm": 0.20937475887788654,
"learning_rate": 8.169565911000958e-06,
"loss": 0.1524,
"step": 801
},
{
"epoch": 0.5635980323260716,
"grad_norm": 0.19260830092636566,
"learning_rate": 8.165295321276433e-06,
"loss": 0.1243,
"step": 802
},
{
"epoch": 0.5643007730147576,
"grad_norm": 0.18405093767472058,
"learning_rate": 8.161020874600695e-06,
"loss": 0.1207,
"step": 803
},
{
"epoch": 0.5650035137034435,
"grad_norm": 0.2000395460354145,
"learning_rate": 8.156742576182208e-06,
"loss": 0.1351,
"step": 804
},
{
"epoch": 0.5657062543921293,
"grad_norm": 0.18855191041894573,
"learning_rate": 8.152460431234132e-06,
"loss": 0.1184,
"step": 805
},
{
"epoch": 0.5664089950808152,
"grad_norm": 0.19785790739364406,
"learning_rate": 8.148174444974313e-06,
"loss": 0.1316,
"step": 806
},
{
"epoch": 0.567111735769501,
"grad_norm": 0.1963528718640247,
"learning_rate": 8.143884622625276e-06,
"loss": 0.1229,
"step": 807
},
{
"epoch": 0.567814476458187,
"grad_norm": 0.1902100157316882,
"learning_rate": 8.139590969414224e-06,
"loss": 0.1187,
"step": 808
},
{
"epoch": 0.5685172171468728,
"grad_norm": 0.1899789476297236,
"learning_rate": 8.135293490573029e-06,
"loss": 0.1222,
"step": 809
},
{
"epoch": 0.5692199578355587,
"grad_norm": 0.2022099126459621,
"learning_rate": 8.130992191338216e-06,
"loss": 0.1453,
"step": 810
},
{
"epoch": 0.5699226985242446,
"grad_norm": 0.2162802557030896,
"learning_rate": 8.126687076950974e-06,
"loss": 0.1805,
"step": 811
},
{
"epoch": 0.5706254392129304,
"grad_norm": 0.1912769996108278,
"learning_rate": 8.12237815265714e-06,
"loss": 0.1208,
"step": 812
},
{
"epoch": 0.5713281799016163,
"grad_norm": 0.1957020071277363,
"learning_rate": 8.118065423707187e-06,
"loss": 0.134,
"step": 813
},
{
"epoch": 0.5720309205903021,
"grad_norm": 0.18921596914006125,
"learning_rate": 8.113748895356229e-06,
"loss": 0.1197,
"step": 814
},
{
"epoch": 0.5727336612789881,
"grad_norm": 0.20413131532941664,
"learning_rate": 8.10942857286401e-06,
"loss": 0.1508,
"step": 815
},
{
"epoch": 0.573436401967674,
"grad_norm": 0.19339666054245624,
"learning_rate": 8.105104461494896e-06,
"loss": 0.1257,
"step": 816
},
{
"epoch": 0.5741391426563598,
"grad_norm": 0.20698883135391766,
"learning_rate": 8.10077656651787e-06,
"loss": 0.1557,
"step": 817
},
{
"epoch": 0.5748418833450457,
"grad_norm": 0.2105556960850874,
"learning_rate": 8.096444893206524e-06,
"loss": 0.1544,
"step": 818
},
{
"epoch": 0.5755446240337315,
"grad_norm": 0.1969916567103091,
"learning_rate": 8.092109446839056e-06,
"loss": 0.1333,
"step": 819
},
{
"epoch": 0.5762473647224174,
"grad_norm": 0.22451107397856968,
"learning_rate": 8.08777023269826e-06,
"loss": 0.1701,
"step": 820
},
{
"epoch": 0.5769501054111033,
"grad_norm": 0.18803755073816117,
"learning_rate": 8.083427256071523e-06,
"loss": 0.1164,
"step": 821
},
{
"epoch": 0.5776528460997892,
"grad_norm": 0.1929495529530631,
"learning_rate": 8.079080522250812e-06,
"loss": 0.1229,
"step": 822
},
{
"epoch": 0.5783555867884751,
"grad_norm": 0.21704889643660652,
"learning_rate": 8.074730036532678e-06,
"loss": 0.1689,
"step": 823
},
{
"epoch": 0.5790583274771609,
"grad_norm": 0.2013455762515905,
"learning_rate": 8.070375804218244e-06,
"loss": 0.1352,
"step": 824
},
{
"epoch": 0.5797610681658468,
"grad_norm": 0.19520180975437693,
"learning_rate": 8.06601783061319e-06,
"loss": 0.1376,
"step": 825
},
{
"epoch": 0.5804638088545326,
"grad_norm": 0.19497866209118564,
"learning_rate": 8.061656121027766e-06,
"loss": 0.1212,
"step": 826
},
{
"epoch": 0.5811665495432186,
"grad_norm": 0.1996872578160545,
"learning_rate": 8.057290680776766e-06,
"loss": 0.1308,
"step": 827
},
{
"epoch": 0.5818692902319045,
"grad_norm": 0.18850765909086167,
"learning_rate": 8.052921515179528e-06,
"loss": 0.1227,
"step": 828
},
{
"epoch": 0.5825720309205903,
"grad_norm": 0.1950129677822127,
"learning_rate": 8.048548629559942e-06,
"loss": 0.1238,
"step": 829
},
{
"epoch": 0.5832747716092762,
"grad_norm": 0.20824092610502323,
"learning_rate": 8.044172029246418e-06,
"loss": 0.1455,
"step": 830
},
{
"epoch": 0.583977512297962,
"grad_norm": 0.19188242346203052,
"learning_rate": 8.0397917195719e-06,
"loss": 0.124,
"step": 831
},
{
"epoch": 0.5846802529866479,
"grad_norm": 0.1949732299955361,
"learning_rate": 8.035407705873843e-06,
"loss": 0.1362,
"step": 832
},
{
"epoch": 0.5853829936753338,
"grad_norm": 0.19295939769244194,
"learning_rate": 8.031019993494231e-06,
"loss": 0.1217,
"step": 833
},
{
"epoch": 0.5860857343640197,
"grad_norm": 0.2081421600854702,
"learning_rate": 8.026628587779537e-06,
"loss": 0.1432,
"step": 834
},
{
"epoch": 0.5867884750527056,
"grad_norm": 0.21093921430436494,
"learning_rate": 8.022233494080747e-06,
"loss": 0.1576,
"step": 835
},
{
"epoch": 0.5874912157413914,
"grad_norm": 0.1935856935164622,
"learning_rate": 8.017834717753337e-06,
"loss": 0.1196,
"step": 836
},
{
"epoch": 0.5881939564300773,
"grad_norm": 0.2162407954818382,
"learning_rate": 8.013432264157266e-06,
"loss": 0.145,
"step": 837
},
{
"epoch": 0.5888966971187631,
"grad_norm": 0.20571764597399853,
"learning_rate": 8.009026138656983e-06,
"loss": 0.1399,
"step": 838
},
{
"epoch": 0.5895994378074491,
"grad_norm": 0.19946209081389987,
"learning_rate": 8.004616346621401e-06,
"loss": 0.1306,
"step": 839
},
{
"epoch": 0.590302178496135,
"grad_norm": 0.20188947168693952,
"learning_rate": 8.00020289342391e-06,
"loss": 0.1468,
"step": 840
},
{
"epoch": 0.5910049191848208,
"grad_norm": 0.20306867909288748,
"learning_rate": 7.995785784442355e-06,
"loss": 0.1312,
"step": 841
},
{
"epoch": 0.5917076598735067,
"grad_norm": 0.2020548066741918,
"learning_rate": 7.99136502505904e-06,
"loss": 0.1382,
"step": 842
},
{
"epoch": 0.5924104005621925,
"grad_norm": 0.19790761158507408,
"learning_rate": 7.98694062066071e-06,
"loss": 0.132,
"step": 843
},
{
"epoch": 0.5931131412508784,
"grad_norm": 0.20424674242155053,
"learning_rate": 7.982512576638556e-06,
"loss": 0.1478,
"step": 844
},
{
"epoch": 0.5938158819395642,
"grad_norm": 0.20569764027715481,
"learning_rate": 7.97808089838821e-06,
"loss": 0.1411,
"step": 845
},
{
"epoch": 0.5945186226282502,
"grad_norm": 0.19307434051514433,
"learning_rate": 7.973645591309722e-06,
"loss": 0.1305,
"step": 846
},
{
"epoch": 0.5952213633169361,
"grad_norm": 0.21825580831967248,
"learning_rate": 7.969206660807566e-06,
"loss": 0.1576,
"step": 847
},
{
"epoch": 0.5959241040056219,
"grad_norm": 0.20611875858095494,
"learning_rate": 7.964764112290641e-06,
"loss": 0.1435,
"step": 848
},
{
"epoch": 0.5966268446943078,
"grad_norm": 0.20013442913784035,
"learning_rate": 7.96031795117224e-06,
"loss": 0.1403,
"step": 849
},
{
"epoch": 0.5973295853829936,
"grad_norm": 0.18356336811386903,
"learning_rate": 7.955868182870067e-06,
"loss": 0.1159,
"step": 850
},
{
"epoch": 0.5980323260716796,
"grad_norm": 0.1990515620915075,
"learning_rate": 7.95141481280622e-06,
"loss": 0.1365,
"step": 851
},
{
"epoch": 0.5987350667603655,
"grad_norm": 0.20327407256959015,
"learning_rate": 7.946957846407182e-06,
"loss": 0.1432,
"step": 852
},
{
"epoch": 0.5994378074490513,
"grad_norm": 0.20727276784687332,
"learning_rate": 7.942497289103825e-06,
"loss": 0.1452,
"step": 853
},
{
"epoch": 0.6001405481377372,
"grad_norm": 0.21698511865029504,
"learning_rate": 7.938033146331392e-06,
"loss": 0.1597,
"step": 854
},
{
"epoch": 0.600843288826423,
"grad_norm": 0.19484702601271084,
"learning_rate": 7.933565423529495e-06,
"loss": 0.1315,
"step": 855
},
{
"epoch": 0.6015460295151089,
"grad_norm": 0.19383860835640768,
"learning_rate": 7.92909412614211e-06,
"loss": 0.128,
"step": 856
},
{
"epoch": 0.6022487702037947,
"grad_norm": 0.2111761956336731,
"learning_rate": 7.924619259617567e-06,
"loss": 0.1606,
"step": 857
},
{
"epoch": 0.6029515108924807,
"grad_norm": 0.20743544298576905,
"learning_rate": 7.920140829408546e-06,
"loss": 0.1491,
"step": 858
},
{
"epoch": 0.6036542515811666,
"grad_norm": 0.19897942111686426,
"learning_rate": 7.915658840972069e-06,
"loss": 0.1327,
"step": 859
},
{
"epoch": 0.6043569922698524,
"grad_norm": 0.20209235805064069,
"learning_rate": 7.911173299769494e-06,
"loss": 0.1552,
"step": 860
},
{
"epoch": 0.6050597329585383,
"grad_norm": 0.17930130137084987,
"learning_rate": 7.906684211266508e-06,
"loss": 0.1119,
"step": 861
},
{
"epoch": 0.6057624736472241,
"grad_norm": 0.19720283363435182,
"learning_rate": 7.902191580933123e-06,
"loss": 0.1324,
"step": 862
},
{
"epoch": 0.60646521433591,
"grad_norm": 0.18835435368229872,
"learning_rate": 7.89769541424366e-06,
"loss": 0.1319,
"step": 863
},
{
"epoch": 0.607167955024596,
"grad_norm": 0.19445071494774374,
"learning_rate": 7.893195716676754e-06,
"loss": 0.1342,
"step": 864
},
{
"epoch": 0.6078706957132818,
"grad_norm": 0.19429837660218452,
"learning_rate": 7.888692493715345e-06,
"loss": 0.1276,
"step": 865
},
{
"epoch": 0.6085734364019677,
"grad_norm": 0.18840752627050047,
"learning_rate": 7.884185750846663e-06,
"loss": 0.1189,
"step": 866
},
{
"epoch": 0.6092761770906535,
"grad_norm": 0.187540864371947,
"learning_rate": 7.87967549356223e-06,
"loss": 0.1253,
"step": 867
},
{
"epoch": 0.6099789177793394,
"grad_norm": 0.20806142782922188,
"learning_rate": 7.875161727357848e-06,
"loss": 0.1534,
"step": 868
},
{
"epoch": 0.6106816584680252,
"grad_norm": 0.20240716145475693,
"learning_rate": 7.8706444577336e-06,
"loss": 0.144,
"step": 869
},
{
"epoch": 0.6113843991567112,
"grad_norm": 0.2106348808929043,
"learning_rate": 7.866123690193832e-06,
"loss": 0.1508,
"step": 870
},
{
"epoch": 0.6120871398453971,
"grad_norm": 0.19422234296553917,
"learning_rate": 7.861599430247157e-06,
"loss": 0.1253,
"step": 871
},
{
"epoch": 0.6127898805340829,
"grad_norm": 0.20567560135938553,
"learning_rate": 7.857071683406438e-06,
"loss": 0.1466,
"step": 872
},
{
"epoch": 0.6134926212227688,
"grad_norm": 0.19982122518239448,
"learning_rate": 7.852540455188793e-06,
"loss": 0.1375,
"step": 873
},
{
"epoch": 0.6141953619114546,
"grad_norm": 0.1992180985468984,
"learning_rate": 7.848005751115579e-06,
"loss": 0.1403,
"step": 874
},
{
"epoch": 0.6148981026001406,
"grad_norm": 0.21332571879650303,
"learning_rate": 7.843467576712387e-06,
"loss": 0.1528,
"step": 875
},
{
"epoch": 0.6156008432888265,
"grad_norm": 0.19986197545910162,
"learning_rate": 7.838925937509038e-06,
"loss": 0.1294,
"step": 876
},
{
"epoch": 0.6163035839775123,
"grad_norm": 0.19877017560712115,
"learning_rate": 7.83438083903958e-06,
"loss": 0.1301,
"step": 877
},
{
"epoch": 0.6170063246661982,
"grad_norm": 0.19466525574196658,
"learning_rate": 7.829832286842265e-06,
"loss": 0.1405,
"step": 878
},
{
"epoch": 0.617709065354884,
"grad_norm": 0.20849847417333844,
"learning_rate": 7.825280286459561e-06,
"loss": 0.1488,
"step": 879
},
{
"epoch": 0.6184118060435699,
"grad_norm": 0.21196989214387355,
"learning_rate": 7.82072484343814e-06,
"loss": 0.1519,
"step": 880
},
{
"epoch": 0.6191145467322557,
"grad_norm": 0.20058246306837874,
"learning_rate": 7.81616596332886e-06,
"loss": 0.1449,
"step": 881
},
{
"epoch": 0.6198172874209417,
"grad_norm": 0.20276957902039272,
"learning_rate": 7.811603651686777e-06,
"loss": 0.1291,
"step": 882
},
{
"epoch": 0.6205200281096276,
"grad_norm": 0.18372243287572027,
"learning_rate": 7.80703791407112e-06,
"loss": 0.1212,
"step": 883
},
{
"epoch": 0.6212227687983134,
"grad_norm": 0.1895984013985567,
"learning_rate": 7.802468756045301e-06,
"loss": 0.1269,
"step": 884
},
{
"epoch": 0.6219255094869993,
"grad_norm": 0.1977604022585664,
"learning_rate": 7.797896183176892e-06,
"loss": 0.1343,
"step": 885
},
{
"epoch": 0.6226282501756851,
"grad_norm": 0.2041656390463291,
"learning_rate": 7.793320201037629e-06,
"loss": 0.1415,
"step": 886
},
{
"epoch": 0.623330990864371,
"grad_norm": 0.21197944162464266,
"learning_rate": 7.788740815203404e-06,
"loss": 0.1494,
"step": 887
},
{
"epoch": 0.624033731553057,
"grad_norm": 0.20441126902865464,
"learning_rate": 7.784158031254251e-06,
"loss": 0.1436,
"step": 888
},
{
"epoch": 0.6247364722417428,
"grad_norm": 0.21295097045752787,
"learning_rate": 7.779571854774356e-06,
"loss": 0.1511,
"step": 889
},
{
"epoch": 0.6254392129304287,
"grad_norm": 0.20000290255989056,
"learning_rate": 7.774982291352022e-06,
"loss": 0.1323,
"step": 890
},
{
"epoch": 0.6261419536191145,
"grad_norm": 0.20127484063709794,
"learning_rate": 7.770389346579696e-06,
"loss": 0.1284,
"step": 891
},
{
"epoch": 0.6268446943078004,
"grad_norm": 0.17920162451363114,
"learning_rate": 7.765793026053934e-06,
"loss": 0.1078,
"step": 892
},
{
"epoch": 0.6275474349964862,
"grad_norm": 0.19228845504760642,
"learning_rate": 7.761193335375411e-06,
"loss": 0.1291,
"step": 893
},
{
"epoch": 0.6282501756851722,
"grad_norm": 0.20039439683872026,
"learning_rate": 7.756590280148904e-06,
"loss": 0.1505,
"step": 894
},
{
"epoch": 0.6289529163738581,
"grad_norm": 0.20224338178689225,
"learning_rate": 7.751983865983295e-06,
"loss": 0.1386,
"step": 895
},
{
"epoch": 0.6296556570625439,
"grad_norm": 0.20027287298423224,
"learning_rate": 7.747374098491553e-06,
"loss": 0.142,
"step": 896
},
{
"epoch": 0.6303583977512298,
"grad_norm": 0.19070716082623212,
"learning_rate": 7.742760983290738e-06,
"loss": 0.1245,
"step": 897
},
{
"epoch": 0.6310611384399156,
"grad_norm": 0.19913325641642718,
"learning_rate": 7.73814452600199e-06,
"loss": 0.1327,
"step": 898
},
{
"epoch": 0.6317638791286015,
"grad_norm": 0.19363689769402678,
"learning_rate": 7.733524732250515e-06,
"loss": 0.1274,
"step": 899
},
{
"epoch": 0.6324666198172875,
"grad_norm": 0.19346970321234555,
"learning_rate": 7.728901607665591e-06,
"loss": 0.1247,
"step": 900
},
{
"epoch": 0.6331693605059733,
"grad_norm": 0.20913991222733647,
"learning_rate": 7.724275157880551e-06,
"loss": 0.1586,
"step": 901
},
{
"epoch": 0.6338721011946592,
"grad_norm": 0.20211385799925669,
"learning_rate": 7.719645388532779e-06,
"loss": 0.1388,
"step": 902
},
{
"epoch": 0.634574841883345,
"grad_norm": 0.203771454205177,
"learning_rate": 7.71501230526371e-06,
"loss": 0.1486,
"step": 903
},
{
"epoch": 0.6352775825720309,
"grad_norm": 0.1972451785836636,
"learning_rate": 7.71037591371881e-06,
"loss": 0.1341,
"step": 904
},
{
"epoch": 0.6359803232607167,
"grad_norm": 0.19099218326906833,
"learning_rate": 7.705736219547579e-06,
"loss": 0.1203,
"step": 905
},
{
"epoch": 0.6366830639494027,
"grad_norm": 0.19940149545724112,
"learning_rate": 7.701093228403543e-06,
"loss": 0.1315,
"step": 906
},
{
"epoch": 0.6373858046380886,
"grad_norm": 0.20322721705388594,
"learning_rate": 7.696446945944241e-06,
"loss": 0.1456,
"step": 907
},
{
"epoch": 0.6380885453267744,
"grad_norm": 0.20569343610359078,
"learning_rate": 7.691797377831226e-06,
"loss": 0.1524,
"step": 908
},
{
"epoch": 0.6387912860154603,
"grad_norm": 0.2031727416376656,
"learning_rate": 7.687144529730058e-06,
"loss": 0.1445,
"step": 909
},
{
"epoch": 0.6394940267041461,
"grad_norm": 0.19354238892967268,
"learning_rate": 7.682488407310284e-06,
"loss": 0.125,
"step": 910
},
{
"epoch": 0.640196767392832,
"grad_norm": 0.19919779644064706,
"learning_rate": 7.67782901624545e-06,
"loss": 0.1393,
"step": 911
},
{
"epoch": 0.640899508081518,
"grad_norm": 0.18861023826996035,
"learning_rate": 7.673166362213077e-06,
"loss": 0.117,
"step": 912
},
{
"epoch": 0.6416022487702038,
"grad_norm": 0.1903978157024909,
"learning_rate": 7.668500450894674e-06,
"loss": 0.1253,
"step": 913
},
{
"epoch": 0.6423049894588897,
"grad_norm": 0.20021051595393566,
"learning_rate": 7.663831287975702e-06,
"loss": 0.1351,
"step": 914
},
{
"epoch": 0.6430077301475755,
"grad_norm": 0.20333243840655038,
"learning_rate": 7.659158879145599e-06,
"loss": 0.1406,
"step": 915
},
{
"epoch": 0.6437104708362614,
"grad_norm": 0.20831933906836067,
"learning_rate": 7.654483230097752e-06,
"loss": 0.1435,
"step": 916
},
{
"epoch": 0.6444132115249473,
"grad_norm": 0.20295632776385214,
"learning_rate": 7.649804346529493e-06,
"loss": 0.1292,
"step": 917
},
{
"epoch": 0.6451159522136332,
"grad_norm": 0.1849066997806639,
"learning_rate": 7.645122234142103e-06,
"loss": 0.1053,
"step": 918
},
{
"epoch": 0.6458186929023191,
"grad_norm": 0.2113526278526316,
"learning_rate": 7.640436898640795e-06,
"loss": 0.1556,
"step": 919
},
{
"epoch": 0.6465214335910049,
"grad_norm": 0.1838181260718162,
"learning_rate": 7.635748345734702e-06,
"loss": 0.1127,
"step": 920
},
{
"epoch": 0.6472241742796908,
"grad_norm": 0.18996488610999201,
"learning_rate": 7.63105658113689e-06,
"loss": 0.1294,
"step": 921
},
{
"epoch": 0.6479269149683766,
"grad_norm": 0.18965858895688584,
"learning_rate": 7.626361610564325e-06,
"loss": 0.1267,
"step": 922
},
{
"epoch": 0.6486296556570625,
"grad_norm": 0.1897804749185013,
"learning_rate": 7.6216634397378905e-06,
"loss": 0.1351,
"step": 923
},
{
"epoch": 0.6493323963457485,
"grad_norm": 0.18698725729698149,
"learning_rate": 7.616962074382364e-06,
"loss": 0.1254,
"step": 924
},
{
"epoch": 0.6500351370344343,
"grad_norm": 0.1927143691729744,
"learning_rate": 7.612257520226418e-06,
"loss": 0.1261,
"step": 925
},
{
"epoch": 0.6507378777231202,
"grad_norm": 0.19514489889613745,
"learning_rate": 7.607549783002608e-06,
"loss": 0.1289,
"step": 926
},
{
"epoch": 0.651440618411806,
"grad_norm": 0.18601890962139325,
"learning_rate": 7.602838868447373e-06,
"loss": 0.1179,
"step": 927
},
{
"epoch": 0.6521433591004919,
"grad_norm": 0.2048957860602049,
"learning_rate": 7.598124782301015e-06,
"loss": 0.1469,
"step": 928
},
{
"epoch": 0.6528460997891778,
"grad_norm": 0.20939808961111278,
"learning_rate": 7.593407530307709e-06,
"loss": 0.1414,
"step": 929
},
{
"epoch": 0.6535488404778637,
"grad_norm": 0.19759486744702545,
"learning_rate": 7.588687118215485e-06,
"loss": 0.1369,
"step": 930
},
{
"epoch": 0.6542515811665496,
"grad_norm": 0.19254710320485008,
"learning_rate": 7.583963551776221e-06,
"loss": 0.1236,
"step": 931
},
{
"epoch": 0.6549543218552354,
"grad_norm": 0.20856792064805346,
"learning_rate": 7.579236836745643e-06,
"loss": 0.1497,
"step": 932
},
{
"epoch": 0.6556570625439213,
"grad_norm": 0.20091987664359942,
"learning_rate": 7.5745069788833094e-06,
"loss": 0.1314,
"step": 933
},
{
"epoch": 0.6563598032326071,
"grad_norm": 0.20907161491466478,
"learning_rate": 7.569773983952611e-06,
"loss": 0.1383,
"step": 934
},
{
"epoch": 0.657062543921293,
"grad_norm": 0.21238971169944118,
"learning_rate": 7.56503785772076e-06,
"loss": 0.1483,
"step": 935
},
{
"epoch": 0.657765284609979,
"grad_norm": 0.18771077355975518,
"learning_rate": 7.560298605958782e-06,
"loss": 0.1233,
"step": 936
},
{
"epoch": 0.6584680252986648,
"grad_norm": 0.21746849440934551,
"learning_rate": 7.555556234441519e-06,
"loss": 0.1749,
"step": 937
},
{
"epoch": 0.6591707659873507,
"grad_norm": 0.20889152457872937,
"learning_rate": 7.550810748947605e-06,
"loss": 0.1465,
"step": 938
},
{
"epoch": 0.6598735066760365,
"grad_norm": 0.21694502077292718,
"learning_rate": 7.546062155259473e-06,
"loss": 0.1736,
"step": 939
},
{
"epoch": 0.6605762473647224,
"grad_norm": 0.2169447620295464,
"learning_rate": 7.541310459163343e-06,
"loss": 0.169,
"step": 940
},
{
"epoch": 0.6612789880534083,
"grad_norm": 0.20648061145073593,
"learning_rate": 7.536555666449214e-06,
"loss": 0.1318,
"step": 941
},
{
"epoch": 0.6619817287420942,
"grad_norm": 0.19834531810018538,
"learning_rate": 7.5317977829108605e-06,
"loss": 0.1372,
"step": 942
},
{
"epoch": 0.6626844694307801,
"grad_norm": 0.20949898219055765,
"learning_rate": 7.5270368143458216e-06,
"loss": 0.1468,
"step": 943
},
{
"epoch": 0.6633872101194659,
"grad_norm": 0.21623863570143026,
"learning_rate": 7.522272766555397e-06,
"loss": 0.1595,
"step": 944
},
{
"epoch": 0.6640899508081518,
"grad_norm": 0.2089012496501474,
"learning_rate": 7.517505645344636e-06,
"loss": 0.1443,
"step": 945
},
{
"epoch": 0.6647926914968376,
"grad_norm": 0.19732923883794568,
"learning_rate": 7.512735456522333e-06,
"loss": 0.1311,
"step": 946
},
{
"epoch": 0.6654954321855235,
"grad_norm": 0.19637608219551983,
"learning_rate": 7.507962205901026e-06,
"loss": 0.1369,
"step": 947
},
{
"epoch": 0.6661981728742095,
"grad_norm": 0.20125284542218375,
"learning_rate": 7.503185899296974e-06,
"loss": 0.1436,
"step": 948
},
{
"epoch": 0.6669009135628953,
"grad_norm": 0.19311772592429208,
"learning_rate": 7.498406542530173e-06,
"loss": 0.126,
"step": 949
},
{
"epoch": 0.6676036542515812,
"grad_norm": 0.20951119521709272,
"learning_rate": 7.4936241414243185e-06,
"loss": 0.1609,
"step": 950
},
{
"epoch": 0.668306394940267,
"grad_norm": 0.1923568665435091,
"learning_rate": 7.488838701806832e-06,
"loss": 0.1193,
"step": 951
},
{
"epoch": 0.6690091356289529,
"grad_norm": 0.2081180512573276,
"learning_rate": 7.484050229508826e-06,
"loss": 0.1575,
"step": 952
},
{
"epoch": 0.6697118763176388,
"grad_norm": 0.21531670290874474,
"learning_rate": 7.479258730365117e-06,
"loss": 0.1258,
"step": 953
},
{
"epoch": 0.6704146170063247,
"grad_norm": 0.1944237934499214,
"learning_rate": 7.474464210214202e-06,
"loss": 0.1289,
"step": 954
},
{
"epoch": 0.6711173576950106,
"grad_norm": 0.20978007297687942,
"learning_rate": 7.469666674898264e-06,
"loss": 0.1381,
"step": 955
},
{
"epoch": 0.6718200983836964,
"grad_norm": 0.1944868331437772,
"learning_rate": 7.464866130263159e-06,
"loss": 0.1259,
"step": 956
},
{
"epoch": 0.6725228390723823,
"grad_norm": 0.21071151689642312,
"learning_rate": 7.4600625821584095e-06,
"loss": 0.15,
"step": 957
},
{
"epoch": 0.6732255797610681,
"grad_norm": 0.1974521075909456,
"learning_rate": 7.4552560364371975e-06,
"loss": 0.1399,
"step": 958
},
{
"epoch": 0.673928320449754,
"grad_norm": 0.2019354743188158,
"learning_rate": 7.4504464989563575e-06,
"loss": 0.146,
"step": 959
},
{
"epoch": 0.67463106113844,
"grad_norm": 0.19796845619322417,
"learning_rate": 7.44563397557637e-06,
"loss": 0.1227,
"step": 960
},
{
"epoch": 0.6753338018271258,
"grad_norm": 0.1934449417166171,
"learning_rate": 7.4408184721613565e-06,
"loss": 0.1222,
"step": 961
},
{
"epoch": 0.6760365425158117,
"grad_norm": 0.20728272112169566,
"learning_rate": 7.435999994579062e-06,
"loss": 0.1551,
"step": 962
},
{
"epoch": 0.6767392832044975,
"grad_norm": 0.18877484711532813,
"learning_rate": 7.431178548700866e-06,
"loss": 0.1162,
"step": 963
},
{
"epoch": 0.6774420238931834,
"grad_norm": 0.20047813280498078,
"learning_rate": 7.426354140401756e-06,
"loss": 0.1415,
"step": 964
},
{
"epoch": 0.6781447645818693,
"grad_norm": 0.18771328571127022,
"learning_rate": 7.421526775560334e-06,
"loss": 0.1223,
"step": 965
},
{
"epoch": 0.6788475052705552,
"grad_norm": 0.1975478508769395,
"learning_rate": 7.4166964600588035e-06,
"loss": 0.1273,
"step": 966
},
{
"epoch": 0.6795502459592411,
"grad_norm": 0.19657042323236482,
"learning_rate": 7.411863199782962e-06,
"loss": 0.1327,
"step": 967
},
{
"epoch": 0.6802529866479269,
"grad_norm": 0.19202424748783872,
"learning_rate": 7.4070270006221975e-06,
"loss": 0.1264,
"step": 968
},
{
"epoch": 0.6809557273366128,
"grad_norm": 0.19789161516470455,
"learning_rate": 7.402187868469478e-06,
"loss": 0.1241,
"step": 969
},
{
"epoch": 0.6816584680252986,
"grad_norm": 0.17103055582139456,
"learning_rate": 7.397345809221346e-06,
"loss": 0.0895,
"step": 970
},
{
"epoch": 0.6823612087139845,
"grad_norm": 0.20286466633114844,
"learning_rate": 7.392500828777909e-06,
"loss": 0.1375,
"step": 971
},
{
"epoch": 0.6830639494026705,
"grad_norm": 0.1902691973946889,
"learning_rate": 7.387652933042835e-06,
"loss": 0.1215,
"step": 972
},
{
"epoch": 0.6837666900913563,
"grad_norm": 0.20498536533507586,
"learning_rate": 7.382802127923346e-06,
"loss": 0.1558,
"step": 973
},
{
"epoch": 0.6844694307800422,
"grad_norm": 0.18673309472632008,
"learning_rate": 7.377948419330206e-06,
"loss": 0.1143,
"step": 974
},
{
"epoch": 0.685172171468728,
"grad_norm": 0.20868373178456817,
"learning_rate": 7.3730918131777215e-06,
"loss": 0.1585,
"step": 975
},
{
"epoch": 0.6858749121574139,
"grad_norm": 0.1914614911453642,
"learning_rate": 7.368232315383721e-06,
"loss": 0.1286,
"step": 976
},
{
"epoch": 0.6865776528460998,
"grad_norm": 0.18493028852250087,
"learning_rate": 7.363369931869568e-06,
"loss": 0.1212,
"step": 977
},
{
"epoch": 0.6872803935347856,
"grad_norm": 0.19786594538544697,
"learning_rate": 7.358504668560134e-06,
"loss": 0.1325,
"step": 978
},
{
"epoch": 0.6879831342234716,
"grad_norm": 0.19985756772213595,
"learning_rate": 7.353636531383802e-06,
"loss": 0.1381,
"step": 979
},
{
"epoch": 0.6886858749121574,
"grad_norm": 0.19632630302951515,
"learning_rate": 7.348765526272457e-06,
"loss": 0.1286,
"step": 980
},
{
"epoch": 0.6893886156008433,
"grad_norm": 0.1942668067134201,
"learning_rate": 7.34389165916148e-06,
"loss": 0.1297,
"step": 981
},
{
"epoch": 0.6900913562895291,
"grad_norm": 0.20858507171446963,
"learning_rate": 7.339014935989734e-06,
"loss": 0.1433,
"step": 982
},
{
"epoch": 0.690794096978215,
"grad_norm": 0.2037759954049846,
"learning_rate": 7.334135362699571e-06,
"loss": 0.1425,
"step": 983
},
{
"epoch": 0.691496837666901,
"grad_norm": 0.20494105998118778,
"learning_rate": 7.329252945236808e-06,
"loss": 0.1412,
"step": 984
},
{
"epoch": 0.6921995783555868,
"grad_norm": 0.20544022394484354,
"learning_rate": 7.324367689550732e-06,
"loss": 0.1446,
"step": 985
},
{
"epoch": 0.6929023190442727,
"grad_norm": 0.20845678555694033,
"learning_rate": 7.319479601594085e-06,
"loss": 0.1593,
"step": 986
},
{
"epoch": 0.6936050597329585,
"grad_norm": 0.20441004916420916,
"learning_rate": 7.3145886873230655e-06,
"loss": 0.1465,
"step": 987
},
{
"epoch": 0.6943078004216444,
"grad_norm": 0.20646953801695345,
"learning_rate": 7.309694952697308e-06,
"loss": 0.1368,
"step": 988
},
{
"epoch": 0.6950105411103303,
"grad_norm": 0.18646840648285923,
"learning_rate": 7.304798403679893e-06,
"loss": 0.118,
"step": 989
},
{
"epoch": 0.6957132817990161,
"grad_norm": 0.19796242949698056,
"learning_rate": 7.299899046237323e-06,
"loss": 0.1356,
"step": 990
},
{
"epoch": 0.6964160224877021,
"grad_norm": 0.20786490871607696,
"learning_rate": 7.294996886339526e-06,
"loss": 0.1394,
"step": 991
},
{
"epoch": 0.6971187631763879,
"grad_norm": 0.20018256115369898,
"learning_rate": 7.290091929959843e-06,
"loss": 0.1374,
"step": 992
},
{
"epoch": 0.6978215038650738,
"grad_norm": 0.1976940982989298,
"learning_rate": 7.285184183075025e-06,
"loss": 0.1306,
"step": 993
},
{
"epoch": 0.6985242445537596,
"grad_norm": 0.22510021644672853,
"learning_rate": 7.2802736516652205e-06,
"loss": 0.1433,
"step": 994
},
{
"epoch": 0.6992269852424455,
"grad_norm": 0.20556948767676095,
"learning_rate": 7.275360341713973e-06,
"loss": 0.1526,
"step": 995
},
{
"epoch": 0.6999297259311315,
"grad_norm": 0.2075525696956084,
"learning_rate": 7.270444259208211e-06,
"loss": 0.1328,
"step": 996
},
{
"epoch": 0.7006324666198173,
"grad_norm": 0.18867261144261133,
"learning_rate": 7.265525410138242e-06,
"loss": 0.1237,
"step": 997
},
{
"epoch": 0.7013352073085032,
"grad_norm": 0.21344593499172673,
"learning_rate": 7.2606038004977435e-06,
"loss": 0.1666,
"step": 998
},
{
"epoch": 0.702037947997189,
"grad_norm": 0.19310547041508477,
"learning_rate": 7.255679436283757e-06,
"loss": 0.1188,
"step": 999
},
{
"epoch": 0.7027406886858749,
"grad_norm": 0.20047662337806704,
"learning_rate": 7.250752323496679e-06,
"loss": 0.1347,
"step": 1000
},
{
"epoch": 0.7027406886858749,
"eval_loss": 0.13746784627437592,
"eval_runtime": 10.6934,
"eval_samples_per_second": 21.509,
"eval_steps_per_second": 5.424,
"step": 1000
},
{
"epoch": 0.7034434293745608,
"grad_norm": 0.18412490054270844,
"learning_rate": 7.24582246814026e-06,
"loss": 0.1185,
"step": 1001
},
{
"epoch": 0.7041461700632466,
"grad_norm": 0.19631764808944197,
"learning_rate": 7.240889876221589e-06,
"loss": 0.1286,
"step": 1002
},
{
"epoch": 0.7048489107519326,
"grad_norm": 0.21005696201204196,
"learning_rate": 7.2359545537510875e-06,
"loss": 0.1519,
"step": 1003
},
{
"epoch": 0.7055516514406184,
"grad_norm": 0.22412888143203666,
"learning_rate": 7.23101650674251e-06,
"loss": 0.1772,
"step": 1004
},
{
"epoch": 0.7062543921293043,
"grad_norm": 0.21107456715892647,
"learning_rate": 7.226075741212923e-06,
"loss": 0.1484,
"step": 1005
},
{
"epoch": 0.7069571328179901,
"grad_norm": 0.19490075952260433,
"learning_rate": 7.221132263182713e-06,
"loss": 0.1307,
"step": 1006
},
{
"epoch": 0.707659873506676,
"grad_norm": 0.2102194710615554,
"learning_rate": 7.216186078675569e-06,
"loss": 0.1552,
"step": 1007
},
{
"epoch": 0.708362614195362,
"grad_norm": 0.20445073756401105,
"learning_rate": 7.211237193718476e-06,
"loss": 0.1458,
"step": 1008
},
{
"epoch": 0.7090653548840478,
"grad_norm": 0.19417339360524766,
"learning_rate": 7.206285614341711e-06,
"loss": 0.1338,
"step": 1009
},
{
"epoch": 0.7097680955727337,
"grad_norm": 0.1885716116611981,
"learning_rate": 7.201331346578836e-06,
"loss": 0.117,
"step": 1010
},
{
"epoch": 0.7104708362614195,
"grad_norm": 0.18827679023652263,
"learning_rate": 7.196374396466686e-06,
"loss": 0.1217,
"step": 1011
},
{
"epoch": 0.7111735769501054,
"grad_norm": 0.19735723969253513,
"learning_rate": 7.191414770045364e-06,
"loss": 0.1245,
"step": 1012
},
{
"epoch": 0.7118763176387913,
"grad_norm": 0.19692603134019873,
"learning_rate": 7.186452473358238e-06,
"loss": 0.1353,
"step": 1013
},
{
"epoch": 0.7125790583274771,
"grad_norm": 0.20434983882421323,
"learning_rate": 7.181487512451927e-06,
"loss": 0.1477,
"step": 1014
},
{
"epoch": 0.7132817990161631,
"grad_norm": 0.20821509338878808,
"learning_rate": 7.176519893376296e-06,
"loss": 0.1365,
"step": 1015
},
{
"epoch": 0.7139845397048489,
"grad_norm": 0.2105052454562383,
"learning_rate": 7.17154962218445e-06,
"loss": 0.1499,
"step": 1016
},
{
"epoch": 0.7146872803935348,
"grad_norm": 0.1884132589850243,
"learning_rate": 7.1665767049327284e-06,
"loss": 0.1139,
"step": 1017
},
{
"epoch": 0.7153900210822206,
"grad_norm": 0.18358125023015598,
"learning_rate": 7.161601147680688e-06,
"loss": 0.1076,
"step": 1018
},
{
"epoch": 0.7160927617709065,
"grad_norm": 0.1892492606460429,
"learning_rate": 7.156622956491107e-06,
"loss": 0.1159,
"step": 1019
},
{
"epoch": 0.7167955024595924,
"grad_norm": 0.20627056684031847,
"learning_rate": 7.1516421374299735e-06,
"loss": 0.1569,
"step": 1020
},
{
"epoch": 0.7174982431482783,
"grad_norm": 0.18971970093971738,
"learning_rate": 7.146658696566478e-06,
"loss": 0.1186,
"step": 1021
},
{
"epoch": 0.7182009838369642,
"grad_norm": 0.20002701981561502,
"learning_rate": 7.141672639973e-06,
"loss": 0.1327,
"step": 1022
},
{
"epoch": 0.71890372452565,
"grad_norm": 0.22504018457692682,
"learning_rate": 7.136683973725116e-06,
"loss": 0.1633,
"step": 1023
},
{
"epoch": 0.7196064652143359,
"grad_norm": 0.2093954842681073,
"learning_rate": 7.1316927039015736e-06,
"loss": 0.154,
"step": 1024
},
{
"epoch": 0.7203092059030218,
"grad_norm": 0.18937478851532624,
"learning_rate": 7.126698836584296e-06,
"loss": 0.1213,
"step": 1025
},
{
"epoch": 0.7210119465917076,
"grad_norm": 0.1986388824941596,
"learning_rate": 7.121702377858375e-06,
"loss": 0.1261,
"step": 1026
},
{
"epoch": 0.7217146872803936,
"grad_norm": 0.2013954733689864,
"learning_rate": 7.116703333812055e-06,
"loss": 0.1373,
"step": 1027
},
{
"epoch": 0.7224174279690794,
"grad_norm": 0.1871652603980141,
"learning_rate": 7.111701710536732e-06,
"loss": 0.1184,
"step": 1028
},
{
"epoch": 0.7231201686577653,
"grad_norm": 0.19992088355564525,
"learning_rate": 7.106697514126947e-06,
"loss": 0.1314,
"step": 1029
},
{
"epoch": 0.7238229093464511,
"grad_norm": 0.19569795781102292,
"learning_rate": 7.101690750680373e-06,
"loss": 0.1313,
"step": 1030
},
{
"epoch": 0.724525650035137,
"grad_norm": 0.20365517724879573,
"learning_rate": 7.096681426297814e-06,
"loss": 0.1403,
"step": 1031
},
{
"epoch": 0.725228390723823,
"grad_norm": 0.19458069296420866,
"learning_rate": 7.091669547083193e-06,
"loss": 0.1273,
"step": 1032
},
{
"epoch": 0.7259311314125088,
"grad_norm": 0.20172830752226292,
"learning_rate": 7.0866551191435464e-06,
"loss": 0.1367,
"step": 1033
},
{
"epoch": 0.7266338721011947,
"grad_norm": 0.19901171501067313,
"learning_rate": 7.081638148589015e-06,
"loss": 0.1343,
"step": 1034
},
{
"epoch": 0.7273366127898805,
"grad_norm": 0.20966141979168246,
"learning_rate": 7.07661864153284e-06,
"loss": 0.1412,
"step": 1035
},
{
"epoch": 0.7280393534785664,
"grad_norm": 0.20527275660007557,
"learning_rate": 7.071596604091353e-06,
"loss": 0.1376,
"step": 1036
},
{
"epoch": 0.7287420941672523,
"grad_norm": 0.19961070790748098,
"learning_rate": 7.066572042383967e-06,
"loss": 0.1384,
"step": 1037
},
{
"epoch": 0.7294448348559381,
"grad_norm": 0.19224645327914783,
"learning_rate": 7.061544962533174e-06,
"loss": 0.1227,
"step": 1038
},
{
"epoch": 0.7301475755446241,
"grad_norm": 0.19277645061571613,
"learning_rate": 7.056515370664529e-06,
"loss": 0.1164,
"step": 1039
},
{
"epoch": 0.7308503162333099,
"grad_norm": 0.19408963370208726,
"learning_rate": 7.051483272906656e-06,
"loss": 0.1126,
"step": 1040
},
{
"epoch": 0.7315530569219958,
"grad_norm": 0.20646336284515324,
"learning_rate": 7.0464486753912255e-06,
"loss": 0.1402,
"step": 1041
},
{
"epoch": 0.7322557976106817,
"grad_norm": 0.2014079791915788,
"learning_rate": 7.041411584252956e-06,
"loss": 0.1326,
"step": 1042
},
{
"epoch": 0.7329585382993675,
"grad_norm": 0.18623721082538525,
"learning_rate": 7.036372005629606e-06,
"loss": 0.1146,
"step": 1043
},
{
"epoch": 0.7336612789880534,
"grad_norm": 0.21216820804150982,
"learning_rate": 7.0313299456619635e-06,
"loss": 0.1568,
"step": 1044
},
{
"epoch": 0.7343640196767393,
"grad_norm": 0.2074913407353464,
"learning_rate": 7.026285410493839e-06,
"loss": 0.1332,
"step": 1045
},
{
"epoch": 0.7350667603654252,
"grad_norm": 0.19215000214372022,
"learning_rate": 7.021238406272064e-06,
"loss": 0.1241,
"step": 1046
},
{
"epoch": 0.735769501054111,
"grad_norm": 0.21421037310376015,
"learning_rate": 7.016188939146471e-06,
"loss": 0.1337,
"step": 1047
},
{
"epoch": 0.7364722417427969,
"grad_norm": 0.19939298899551505,
"learning_rate": 7.011137015269901e-06,
"loss": 0.1392,
"step": 1048
},
{
"epoch": 0.7371749824314828,
"grad_norm": 0.20939331583536933,
"learning_rate": 7.006082640798183e-06,
"loss": 0.1489,
"step": 1049
},
{
"epoch": 0.7378777231201686,
"grad_norm": 0.20123621380817572,
"learning_rate": 7.0010258218901375e-06,
"loss": 0.1442,
"step": 1050
},
{
"epoch": 0.7385804638088546,
"grad_norm": 0.20026937309127452,
"learning_rate": 6.995966564707556e-06,
"loss": 0.1325,
"step": 1051
},
{
"epoch": 0.7392832044975404,
"grad_norm": 0.19455200651133495,
"learning_rate": 6.99090487541521e-06,
"loss": 0.1227,
"step": 1052
},
{
"epoch": 0.7399859451862263,
"grad_norm": 0.1977022222015236,
"learning_rate": 6.985840760180824e-06,
"loss": 0.1361,
"step": 1053
},
{
"epoch": 0.7406886858749122,
"grad_norm": 0.1903579429922977,
"learning_rate": 6.980774225175092e-06,
"loss": 0.1224,
"step": 1054
},
{
"epoch": 0.741391426563598,
"grad_norm": 0.19315773937190478,
"learning_rate": 6.975705276571645e-06,
"loss": 0.1242,
"step": 1055
},
{
"epoch": 0.7420941672522839,
"grad_norm": 0.1904517651669447,
"learning_rate": 6.970633920547059e-06,
"loss": 0.124,
"step": 1056
},
{
"epoch": 0.7427969079409698,
"grad_norm": 0.20085279089064773,
"learning_rate": 6.965560163280844e-06,
"loss": 0.1425,
"step": 1057
},
{
"epoch": 0.7434996486296557,
"grad_norm": 0.20193708119207782,
"learning_rate": 6.960484010955436e-06,
"loss": 0.1377,
"step": 1058
},
{
"epoch": 0.7442023893183415,
"grad_norm": 0.18196643389825534,
"learning_rate": 6.955405469756189e-06,
"loss": 0.1078,
"step": 1059
},
{
"epoch": 0.7449051300070274,
"grad_norm": 0.21754598182092894,
"learning_rate": 6.950324545871367e-06,
"loss": 0.1729,
"step": 1060
},
{
"epoch": 0.7456078706957133,
"grad_norm": 0.20135696656114324,
"learning_rate": 6.945241245492139e-06,
"loss": 0.1289,
"step": 1061
},
{
"epoch": 0.7463106113843991,
"grad_norm": 0.1820735905769522,
"learning_rate": 6.940155574812571e-06,
"loss": 0.1177,
"step": 1062
},
{
"epoch": 0.7470133520730851,
"grad_norm": 0.20492440435151,
"learning_rate": 6.935067540029608e-06,
"loss": 0.1577,
"step": 1063
},
{
"epoch": 0.7477160927617709,
"grad_norm": 0.1994005178007996,
"learning_rate": 6.929977147343092e-06,
"loss": 0.1384,
"step": 1064
},
{
"epoch": 0.7484188334504568,
"grad_norm": 0.20563366342243797,
"learning_rate": 6.924884402955722e-06,
"loss": 0.1515,
"step": 1065
},
{
"epoch": 0.7491215741391427,
"grad_norm": 0.21005993509755702,
"learning_rate": 6.919789313073072e-06,
"loss": 0.1588,
"step": 1066
},
{
"epoch": 0.7498243148278285,
"grad_norm": 0.19763850426088478,
"learning_rate": 6.914691883903573e-06,
"loss": 0.1359,
"step": 1067
},
{
"epoch": 0.7505270555165144,
"grad_norm": 0.18316570131527934,
"learning_rate": 6.909592121658504e-06,
"loss": 0.1105,
"step": 1068
},
{
"epoch": 0.7512297962052003,
"grad_norm": 0.20562881234857472,
"learning_rate": 6.904490032551987e-06,
"loss": 0.136,
"step": 1069
},
{
"epoch": 0.7519325368938862,
"grad_norm": 0.20429917758802024,
"learning_rate": 6.899385622800981e-06,
"loss": 0.1413,
"step": 1070
},
{
"epoch": 0.752635277582572,
"grad_norm": 0.1880581557294585,
"learning_rate": 6.894278898625272e-06,
"loss": 0.1168,
"step": 1071
},
{
"epoch": 0.7533380182712579,
"grad_norm": 0.1875932953249795,
"learning_rate": 6.889169866247466e-06,
"loss": 0.1254,
"step": 1072
},
{
"epoch": 0.7540407589599438,
"grad_norm": 0.18011573108264922,
"learning_rate": 6.8840585318929806e-06,
"loss": 0.1076,
"step": 1073
},
{
"epoch": 0.7547434996486296,
"grad_norm": 0.19943876990511136,
"learning_rate": 6.8789449017900425e-06,
"loss": 0.1271,
"step": 1074
},
{
"epoch": 0.7554462403373156,
"grad_norm": 0.19911218562063843,
"learning_rate": 6.873828982169669e-06,
"loss": 0.1371,
"step": 1075
},
{
"epoch": 0.7561489810260014,
"grad_norm": 0.19344636008984378,
"learning_rate": 6.868710779265675e-06,
"loss": 0.1365,
"step": 1076
},
{
"epoch": 0.7568517217146873,
"grad_norm": 0.20526922442823353,
"learning_rate": 6.8635902993146485e-06,
"loss": 0.1456,
"step": 1077
},
{
"epoch": 0.7575544624033732,
"grad_norm": 0.20007778519647024,
"learning_rate": 6.858467548555963e-06,
"loss": 0.1241,
"step": 1078
},
{
"epoch": 0.758257203092059,
"grad_norm": 0.201475794536539,
"learning_rate": 6.853342533231748e-06,
"loss": 0.1335,
"step": 1079
},
{
"epoch": 0.7589599437807449,
"grad_norm": 0.22478947371040345,
"learning_rate": 6.848215259586901e-06,
"loss": 0.1766,
"step": 1080
},
{
"epoch": 0.7596626844694307,
"grad_norm": 0.18505626497543082,
"learning_rate": 6.8430857338690655e-06,
"loss": 0.1119,
"step": 1081
},
{
"epoch": 0.7603654251581167,
"grad_norm": 0.19828871045040683,
"learning_rate": 6.837953962328635e-06,
"loss": 0.1142,
"step": 1082
},
{
"epoch": 0.7610681658468025,
"grad_norm": 0.19937545394668713,
"learning_rate": 6.832819951218732e-06,
"loss": 0.1343,
"step": 1083
},
{
"epoch": 0.7617709065354884,
"grad_norm": 0.2140422464409519,
"learning_rate": 6.827683706795216e-06,
"loss": 0.1593,
"step": 1084
},
{
"epoch": 0.7624736472241743,
"grad_norm": 0.2143213686370296,
"learning_rate": 6.82254523531666e-06,
"loss": 0.1302,
"step": 1085
},
{
"epoch": 0.7631763879128601,
"grad_norm": 0.20115502504799535,
"learning_rate": 6.817404543044358e-06,
"loss": 0.1301,
"step": 1086
},
{
"epoch": 0.763879128601546,
"grad_norm": 0.1980726550332544,
"learning_rate": 6.812261636242303e-06,
"loss": 0.1343,
"step": 1087
},
{
"epoch": 0.7645818692902319,
"grad_norm": 0.19257786863918447,
"learning_rate": 6.807116521177195e-06,
"loss": 0.126,
"step": 1088
},
{
"epoch": 0.7652846099789178,
"grad_norm": 0.1983610855672105,
"learning_rate": 6.801969204118415e-06,
"loss": 0.1327,
"step": 1089
},
{
"epoch": 0.7659873506676037,
"grad_norm": 0.1736896471739148,
"learning_rate": 6.796819691338035e-06,
"loss": 0.1018,
"step": 1090
},
{
"epoch": 0.7666900913562895,
"grad_norm": 0.19882250970525375,
"learning_rate": 6.7916679891108e-06,
"loss": 0.1286,
"step": 1091
},
{
"epoch": 0.7673928320449754,
"grad_norm": 0.19565970612413142,
"learning_rate": 6.786514103714119e-06,
"loss": 0.1366,
"step": 1092
},
{
"epoch": 0.7680955727336612,
"grad_norm": 0.20099876915944423,
"learning_rate": 6.781358041428068e-06,
"loss": 0.1356,
"step": 1093
},
{
"epoch": 0.7687983134223472,
"grad_norm": 0.2006191550385529,
"learning_rate": 6.776199808535371e-06,
"loss": 0.1363,
"step": 1094
},
{
"epoch": 0.769501054111033,
"grad_norm": 0.19310601295027008,
"learning_rate": 6.771039411321397e-06,
"loss": 0.1176,
"step": 1095
},
{
"epoch": 0.7702037947997189,
"grad_norm": 0.19100052651097993,
"learning_rate": 6.765876856074156e-06,
"loss": 0.1058,
"step": 1096
},
{
"epoch": 0.7709065354884048,
"grad_norm": 0.20235030845431948,
"learning_rate": 6.760712149084282e-06,
"loss": 0.1403,
"step": 1097
},
{
"epoch": 0.7716092761770906,
"grad_norm": 0.21012632317456992,
"learning_rate": 6.755545296645037e-06,
"loss": 0.1578,
"step": 1098
},
{
"epoch": 0.7723120168657766,
"grad_norm": 0.1943506450417216,
"learning_rate": 6.7503763050522904e-06,
"loss": 0.1234,
"step": 1099
},
{
"epoch": 0.7730147575544624,
"grad_norm": 0.19169978822930997,
"learning_rate": 6.745205180604526e-06,
"loss": 0.1292,
"step": 1100
},
{
"epoch": 0.7737174982431483,
"grad_norm": 0.18817029120242748,
"learning_rate": 6.74003192960282e-06,
"loss": 0.1222,
"step": 1101
},
{
"epoch": 0.7744202389318342,
"grad_norm": 0.19225111519144214,
"learning_rate": 6.734856558350842e-06,
"loss": 0.1226,
"step": 1102
},
{
"epoch": 0.77512297962052,
"grad_norm": 0.19815705521566201,
"learning_rate": 6.729679073154845e-06,
"loss": 0.127,
"step": 1103
},
{
"epoch": 0.7758257203092059,
"grad_norm": 0.19894544060243646,
"learning_rate": 6.724499480323662e-06,
"loss": 0.132,
"step": 1104
},
{
"epoch": 0.7765284609978917,
"grad_norm": 0.19092262521441739,
"learning_rate": 6.719317786168687e-06,
"loss": 0.1233,
"step": 1105
},
{
"epoch": 0.7772312016865777,
"grad_norm": 0.2012094516725219,
"learning_rate": 6.714133997003878e-06,
"loss": 0.1405,
"step": 1106
},
{
"epoch": 0.7779339423752635,
"grad_norm": 0.19234810285554485,
"learning_rate": 6.708948119145746e-06,
"loss": 0.115,
"step": 1107
},
{
"epoch": 0.7786366830639494,
"grad_norm": 0.20586163300949348,
"learning_rate": 6.703760158913349e-06,
"loss": 0.14,
"step": 1108
},
{
"epoch": 0.7793394237526353,
"grad_norm": 0.19537415209493642,
"learning_rate": 6.698570122628276e-06,
"loss": 0.1259,
"step": 1109
},
{
"epoch": 0.7800421644413211,
"grad_norm": 0.19809442428040008,
"learning_rate": 6.693378016614657e-06,
"loss": 0.1349,
"step": 1110
},
{
"epoch": 0.780744905130007,
"grad_norm": 0.20310153031966582,
"learning_rate": 6.6881838471991274e-06,
"loss": 0.1522,
"step": 1111
},
{
"epoch": 0.7814476458186929,
"grad_norm": 0.18216420500268724,
"learning_rate": 6.682987620710856e-06,
"loss": 0.1187,
"step": 1112
},
{
"epoch": 0.7821503865073788,
"grad_norm": 0.18700509060373544,
"learning_rate": 6.677789343481501e-06,
"loss": 0.1171,
"step": 1113
},
{
"epoch": 0.7828531271960647,
"grad_norm": 0.18313810828298677,
"learning_rate": 6.6725890218452315e-06,
"loss": 0.1115,
"step": 1114
},
{
"epoch": 0.7835558678847505,
"grad_norm": 0.20155542892789138,
"learning_rate": 6.667386662138702e-06,
"loss": 0.1352,
"step": 1115
},
{
"epoch": 0.7842586085734364,
"grad_norm": 0.19776802266950735,
"learning_rate": 6.662182270701051e-06,
"loss": 0.1247,
"step": 1116
},
{
"epoch": 0.7849613492621222,
"grad_norm": 0.21383756380377217,
"learning_rate": 6.656975853873895e-06,
"loss": 0.1511,
"step": 1117
},
{
"epoch": 0.7856640899508082,
"grad_norm": 0.21659404942971586,
"learning_rate": 6.651767418001314e-06,
"loss": 0.1677,
"step": 1118
},
{
"epoch": 0.786366830639494,
"grad_norm": 0.1906439099679687,
"learning_rate": 6.646556969429854e-06,
"loss": 0.1221,
"step": 1119
},
{
"epoch": 0.7870695713281799,
"grad_norm": 0.20373304040385384,
"learning_rate": 6.64134451450851e-06,
"loss": 0.1332,
"step": 1120
},
{
"epoch": 0.7877723120168658,
"grad_norm": 0.18386267489160207,
"learning_rate": 6.636130059588719e-06,
"loss": 0.1158,
"step": 1121
},
{
"epoch": 0.7884750527055516,
"grad_norm": 0.20396148437880343,
"learning_rate": 6.630913611024365e-06,
"loss": 0.1341,
"step": 1122
},
{
"epoch": 0.7891777933942375,
"grad_norm": 0.1935816921249085,
"learning_rate": 6.625695175171747e-06,
"loss": 0.1287,
"step": 1123
},
{
"epoch": 0.7898805340829234,
"grad_norm": 0.19699320079872426,
"learning_rate": 6.6204747583896e-06,
"loss": 0.1388,
"step": 1124
},
{
"epoch": 0.7905832747716093,
"grad_norm": 0.19711137362915726,
"learning_rate": 6.61525236703906e-06,
"loss": 0.1288,
"step": 1125
},
{
"epoch": 0.7912860154602952,
"grad_norm": 0.21372822161790903,
"learning_rate": 6.610028007483679e-06,
"loss": 0.1556,
"step": 1126
},
{
"epoch": 0.791988756148981,
"grad_norm": 0.20085401457385302,
"learning_rate": 6.604801686089403e-06,
"loss": 0.1305,
"step": 1127
},
{
"epoch": 0.7926914968376669,
"grad_norm": 0.19658167432832763,
"learning_rate": 6.599573409224567e-06,
"loss": 0.1325,
"step": 1128
},
{
"epoch": 0.7933942375263527,
"grad_norm": 0.19682645794006615,
"learning_rate": 6.59434318325989e-06,
"loss": 0.1238,
"step": 1129
},
{
"epoch": 0.7940969782150387,
"grad_norm": 0.20233409188135923,
"learning_rate": 6.58911101456847e-06,
"loss": 0.1314,
"step": 1130
},
{
"epoch": 0.7947997189037245,
"grad_norm": 0.1950956553270949,
"learning_rate": 6.583876909525766e-06,
"loss": 0.1298,
"step": 1131
},
{
"epoch": 0.7955024595924104,
"grad_norm": 0.19923170409102747,
"learning_rate": 6.578640874509599e-06,
"loss": 0.1298,
"step": 1132
},
{
"epoch": 0.7962052002810963,
"grad_norm": 0.21488763566081692,
"learning_rate": 6.573402915900145e-06,
"loss": 0.166,
"step": 1133
},
{
"epoch": 0.7969079409697821,
"grad_norm": 0.1968225868136994,
"learning_rate": 6.568163040079918e-06,
"loss": 0.1298,
"step": 1134
},
{
"epoch": 0.797610681658468,
"grad_norm": 0.20189718016942376,
"learning_rate": 6.562921253433771e-06,
"loss": 0.148,
"step": 1135
},
{
"epoch": 0.7983134223471539,
"grad_norm": 0.1927750587068008,
"learning_rate": 6.557677562348887e-06,
"loss": 0.1218,
"step": 1136
},
{
"epoch": 0.7990161630358398,
"grad_norm": 0.1883656003284253,
"learning_rate": 6.552431973214767e-06,
"loss": 0.1236,
"step": 1137
},
{
"epoch": 0.7997189037245257,
"grad_norm": 0.1992710939348564,
"learning_rate": 6.547184492423227e-06,
"loss": 0.139,
"step": 1138
},
{
"epoch": 0.8004216444132115,
"grad_norm": 0.17941047581554348,
"learning_rate": 6.541935126368384e-06,
"loss": 0.1082,
"step": 1139
},
{
"epoch": 0.8011243851018974,
"grad_norm": 0.19722783061438928,
"learning_rate": 6.536683881446658e-06,
"loss": 0.131,
"step": 1140
},
{
"epoch": 0.8018271257905832,
"grad_norm": 0.19548740469261053,
"learning_rate": 6.531430764056755e-06,
"loss": 0.1372,
"step": 1141
},
{
"epoch": 0.8025298664792692,
"grad_norm": 0.17787525375063432,
"learning_rate": 6.5261757805996605e-06,
"loss": 0.1003,
"step": 1142
},
{
"epoch": 0.803232607167955,
"grad_norm": 0.1860844461028948,
"learning_rate": 6.520918937478639e-06,
"loss": 0.1187,
"step": 1143
},
{
"epoch": 0.8039353478566409,
"grad_norm": 0.2022429068055759,
"learning_rate": 6.515660241099217e-06,
"loss": 0.1381,
"step": 1144
},
{
"epoch": 0.8046380885453268,
"grad_norm": 0.20730779355603451,
"learning_rate": 6.51039969786918e-06,
"loss": 0.1319,
"step": 1145
},
{
"epoch": 0.8053408292340126,
"grad_norm": 0.21368826100830407,
"learning_rate": 6.5051373141985685e-06,
"loss": 0.1532,
"step": 1146
},
{
"epoch": 0.8060435699226985,
"grad_norm": 0.1948386590696453,
"learning_rate": 6.499873096499656e-06,
"loss": 0.1271,
"step": 1147
},
{
"epoch": 0.8067463106113844,
"grad_norm": 0.2278073168156262,
"learning_rate": 6.49460705118696e-06,
"loss": 0.1783,
"step": 1148
},
{
"epoch": 0.8074490513000703,
"grad_norm": 0.19891051733883963,
"learning_rate": 6.489339184677221e-06,
"loss": 0.1345,
"step": 1149
},
{
"epoch": 0.8081517919887562,
"grad_norm": 0.19579221578462905,
"learning_rate": 6.484069503389398e-06,
"loss": 0.1297,
"step": 1150
},
{
"epoch": 0.808854532677442,
"grad_norm": 0.1784613063933403,
"learning_rate": 6.478798013744662e-06,
"loss": 0.1098,
"step": 1151
},
{
"epoch": 0.8095572733661279,
"grad_norm": 0.21523840583925652,
"learning_rate": 6.473524722166391e-06,
"loss": 0.1458,
"step": 1152
},
{
"epoch": 0.8102600140548137,
"grad_norm": 0.20726234707735258,
"learning_rate": 6.468249635080153e-06,
"loss": 0.1453,
"step": 1153
},
{
"epoch": 0.8109627547434997,
"grad_norm": 0.2181175971779649,
"learning_rate": 6.462972758913705e-06,
"loss": 0.1507,
"step": 1154
},
{
"epoch": 0.8116654954321855,
"grad_norm": 0.20739204860771884,
"learning_rate": 6.457694100096988e-06,
"loss": 0.1322,
"step": 1155
},
{
"epoch": 0.8123682361208714,
"grad_norm": 0.19929711338929335,
"learning_rate": 6.452413665062111e-06,
"loss": 0.1463,
"step": 1156
},
{
"epoch": 0.8130709768095573,
"grad_norm": 0.21977064234372565,
"learning_rate": 6.44713146024335e-06,
"loss": 0.1465,
"step": 1157
},
{
"epoch": 0.8137737174982431,
"grad_norm": 0.19418555551881903,
"learning_rate": 6.4418474920771365e-06,
"loss": 0.1227,
"step": 1158
},
{
"epoch": 0.814476458186929,
"grad_norm": 0.19199900977647233,
"learning_rate": 6.436561767002048e-06,
"loss": 0.1292,
"step": 1159
},
{
"epoch": 0.8151791988756149,
"grad_norm": 0.19291664437317543,
"learning_rate": 6.431274291458811e-06,
"loss": 0.1209,
"step": 1160
},
{
"epoch": 0.8158819395643008,
"grad_norm": 0.1974812250410335,
"learning_rate": 6.425985071890273e-06,
"loss": 0.1384,
"step": 1161
},
{
"epoch": 0.8165846802529867,
"grad_norm": 0.20358984063464855,
"learning_rate": 6.420694114741417e-06,
"loss": 0.1517,
"step": 1162
},
{
"epoch": 0.8172874209416725,
"grad_norm": 0.19162944072155,
"learning_rate": 6.415401426459338e-06,
"loss": 0.1276,
"step": 1163
},
{
"epoch": 0.8179901616303584,
"grad_norm": 0.20380752875206928,
"learning_rate": 6.410107013493241e-06,
"loss": 0.1388,
"step": 1164
},
{
"epoch": 0.8186929023190442,
"grad_norm": 0.18138541702104197,
"learning_rate": 6.404810882294436e-06,
"loss": 0.1087,
"step": 1165
},
{
"epoch": 0.8193956430077302,
"grad_norm": 0.18240253853230093,
"learning_rate": 6.399513039316319e-06,
"loss": 0.1126,
"step": 1166
},
{
"epoch": 0.8200983836964161,
"grad_norm": 0.18809715741165062,
"learning_rate": 6.3942134910143805e-06,
"loss": 0.1169,
"step": 1167
},
{
"epoch": 0.8208011243851019,
"grad_norm": 0.21131477317913167,
"learning_rate": 6.388912243846186e-06,
"loss": 0.149,
"step": 1168
},
{
"epoch": 0.8215038650737878,
"grad_norm": 0.19820852890022736,
"learning_rate": 6.3836093042713665e-06,
"loss": 0.139,
"step": 1169
},
{
"epoch": 0.8222066057624736,
"grad_norm": 0.2050996757971281,
"learning_rate": 6.378304678751624e-06,
"loss": 0.139,
"step": 1170
},
{
"epoch": 0.8229093464511595,
"grad_norm": 0.19802039622768783,
"learning_rate": 6.372998373750703e-06,
"loss": 0.1311,
"step": 1171
},
{
"epoch": 0.8236120871398454,
"grad_norm": 0.2052580161883074,
"learning_rate": 6.367690395734407e-06,
"loss": 0.1425,
"step": 1172
},
{
"epoch": 0.8243148278285313,
"grad_norm": 0.19654703134128906,
"learning_rate": 6.362380751170569e-06,
"loss": 0.1274,
"step": 1173
},
{
"epoch": 0.8250175685172172,
"grad_norm": 0.19299368724656255,
"learning_rate": 6.35706944652906e-06,
"loss": 0.1249,
"step": 1174
},
{
"epoch": 0.825720309205903,
"grad_norm": 0.20543445267598598,
"learning_rate": 6.351756488281766e-06,
"loss": 0.1444,
"step": 1175
},
{
"epoch": 0.8264230498945889,
"grad_norm": 0.19861218482762813,
"learning_rate": 6.346441882902594e-06,
"loss": 0.124,
"step": 1176
},
{
"epoch": 0.8271257905832747,
"grad_norm": 0.20117202386150698,
"learning_rate": 6.341125636867455e-06,
"loss": 0.1382,
"step": 1177
},
{
"epoch": 0.8278285312719607,
"grad_norm": 0.2007954010187093,
"learning_rate": 6.335807756654262e-06,
"loss": 0.1289,
"step": 1178
},
{
"epoch": 0.8285312719606466,
"grad_norm": 0.1988589671357077,
"learning_rate": 6.330488248742914e-06,
"loss": 0.1308,
"step": 1179
},
{
"epoch": 0.8292340126493324,
"grad_norm": 0.2016058390740639,
"learning_rate": 6.325167119615299e-06,
"loss": 0.1349,
"step": 1180
},
{
"epoch": 0.8299367533380183,
"grad_norm": 0.18744598072396934,
"learning_rate": 6.319844375755275e-06,
"loss": 0.1182,
"step": 1181
},
{
"epoch": 0.8306394940267041,
"grad_norm": 0.20933130988240092,
"learning_rate": 6.314520023648678e-06,
"loss": 0.1396,
"step": 1182
},
{
"epoch": 0.83134223471539,
"grad_norm": 0.19944170046395707,
"learning_rate": 6.309194069783288e-06,
"loss": 0.1294,
"step": 1183
},
{
"epoch": 0.8320449754040758,
"grad_norm": 0.18652452961292854,
"learning_rate": 6.303866520648851e-06,
"loss": 0.1111,
"step": 1184
},
{
"epoch": 0.8327477160927618,
"grad_norm": 0.1907511339450951,
"learning_rate": 6.298537382737048e-06,
"loss": 0.1179,
"step": 1185
},
{
"epoch": 0.8334504567814477,
"grad_norm": 0.19810201708280198,
"learning_rate": 6.2932066625415e-06,
"loss": 0.1259,
"step": 1186
},
{
"epoch": 0.8341531974701335,
"grad_norm": 0.20210818584747303,
"learning_rate": 6.287874366557756e-06,
"loss": 0.1428,
"step": 1187
},
{
"epoch": 0.8348559381588194,
"grad_norm": 0.18737200997392453,
"learning_rate": 6.2825405012832815e-06,
"loss": 0.1211,
"step": 1188
},
{
"epoch": 0.8355586788475052,
"grad_norm": 0.19892771268639417,
"learning_rate": 6.2772050732174595e-06,
"loss": 0.1324,
"step": 1189
},
{
"epoch": 0.8362614195361912,
"grad_norm": 0.18370820530543852,
"learning_rate": 6.2718680888615734e-06,
"loss": 0.1165,
"step": 1190
},
{
"epoch": 0.8369641602248771,
"grad_norm": 0.18531477064495158,
"learning_rate": 6.266529554718804e-06,
"loss": 0.1168,
"step": 1191
},
{
"epoch": 0.8376669009135629,
"grad_norm": 0.19003410594772305,
"learning_rate": 6.261189477294221e-06,
"loss": 0.1246,
"step": 1192
},
{
"epoch": 0.8383696416022488,
"grad_norm": 0.20797243226557352,
"learning_rate": 6.255847863094775e-06,
"loss": 0.1513,
"step": 1193
},
{
"epoch": 0.8390723822909346,
"grad_norm": 0.20847488356357405,
"learning_rate": 6.250504718629288e-06,
"loss": 0.1427,
"step": 1194
},
{
"epoch": 0.8397751229796205,
"grad_norm": 0.1822480904595882,
"learning_rate": 6.245160050408446e-06,
"loss": 0.1158,
"step": 1195
},
{
"epoch": 0.8404778636683063,
"grad_norm": 0.18301388638123467,
"learning_rate": 6.2398138649447935e-06,
"loss": 0.1123,
"step": 1196
},
{
"epoch": 0.8411806043569923,
"grad_norm": 0.2038041845576162,
"learning_rate": 6.234466168752724e-06,
"loss": 0.1517,
"step": 1197
},
{
"epoch": 0.8418833450456782,
"grad_norm": 0.19558231288392228,
"learning_rate": 6.22911696834847e-06,
"loss": 0.1176,
"step": 1198
},
{
"epoch": 0.842586085734364,
"grad_norm": 0.2157081526945961,
"learning_rate": 6.223766270250099e-06,
"loss": 0.1658,
"step": 1199
},
{
"epoch": 0.8432888264230499,
"grad_norm": 0.20692891372992678,
"learning_rate": 6.218414080977502e-06,
"loss": 0.1491,
"step": 1200
},
{
"epoch": 0.8439915671117357,
"grad_norm": 0.19013615828959696,
"learning_rate": 6.2130604070523855e-06,
"loss": 0.1128,
"step": 1201
},
{
"epoch": 0.8446943078004217,
"grad_norm": 0.1930525766862121,
"learning_rate": 6.207705254998269e-06,
"loss": 0.1207,
"step": 1202
},
{
"epoch": 0.8453970484891076,
"grad_norm": 0.20251119171594884,
"learning_rate": 6.2023486313404715e-06,
"loss": 0.1405,
"step": 1203
},
{
"epoch": 0.8460997891777934,
"grad_norm": 0.19602286103258867,
"learning_rate": 6.196990542606102e-06,
"loss": 0.1267,
"step": 1204
},
{
"epoch": 0.8468025298664793,
"grad_norm": 0.20071115929601205,
"learning_rate": 6.19163099532406e-06,
"loss": 0.1398,
"step": 1205
},
{
"epoch": 0.8475052705551651,
"grad_norm": 0.2086898206188605,
"learning_rate": 6.186269996025018e-06,
"loss": 0.1563,
"step": 1206
},
{
"epoch": 0.848208011243851,
"grad_norm": 0.19603290697178688,
"learning_rate": 6.18090755124142e-06,
"loss": 0.1355,
"step": 1207
},
{
"epoch": 0.8489107519325368,
"grad_norm": 0.20396007238043157,
"learning_rate": 6.175543667507472e-06,
"loss": 0.135,
"step": 1208
},
{
"epoch": 0.8496134926212228,
"grad_norm": 0.22252976882208425,
"learning_rate": 6.17017835135913e-06,
"loss": 0.1793,
"step": 1209
},
{
"epoch": 0.8503162333099087,
"grad_norm": 0.1996061427633973,
"learning_rate": 6.1648116093340985e-06,
"loss": 0.1443,
"step": 1210
},
{
"epoch": 0.8510189739985945,
"grad_norm": 0.1909027773028849,
"learning_rate": 6.15944344797182e-06,
"loss": 0.1298,
"step": 1211
},
{
"epoch": 0.8517217146872804,
"grad_norm": 0.21718155430108493,
"learning_rate": 6.154073873813463e-06,
"loss": 0.1618,
"step": 1212
},
{
"epoch": 0.8524244553759662,
"grad_norm": 0.2061881787473553,
"learning_rate": 6.148702893401921e-06,
"loss": 0.1448,
"step": 1213
},
{
"epoch": 0.8531271960646521,
"grad_norm": 0.19829285729453713,
"learning_rate": 6.143330513281799e-06,
"loss": 0.1228,
"step": 1214
},
{
"epoch": 0.8538299367533381,
"grad_norm": 0.17905129782753776,
"learning_rate": 6.137956739999408e-06,
"loss": 0.1059,
"step": 1215
},
{
"epoch": 0.8545326774420239,
"grad_norm": 0.20342839560987236,
"learning_rate": 6.132581580102757e-06,
"loss": 0.1477,
"step": 1216
},
{
"epoch": 0.8552354181307098,
"grad_norm": 0.19340402891242583,
"learning_rate": 6.127205040141544e-06,
"loss": 0.1331,
"step": 1217
},
{
"epoch": 0.8559381588193956,
"grad_norm": 0.20103281884835802,
"learning_rate": 6.121827126667149e-06,
"loss": 0.1463,
"step": 1218
},
{
"epoch": 0.8566408995080815,
"grad_norm": 0.18798814880074372,
"learning_rate": 6.116447846232626e-06,
"loss": 0.1187,
"step": 1219
},
{
"epoch": 0.8573436401967673,
"grad_norm": 0.21302895523872914,
"learning_rate": 6.111067205392693e-06,
"loss": 0.157,
"step": 1220
},
{
"epoch": 0.8580463808854533,
"grad_norm": 0.18948890110620684,
"learning_rate": 6.105685210703728e-06,
"loss": 0.1233,
"step": 1221
},
{
"epoch": 0.8587491215741392,
"grad_norm": 0.19075563298043724,
"learning_rate": 6.100301868723758e-06,
"loss": 0.1248,
"step": 1222
},
{
"epoch": 0.859451862262825,
"grad_norm": 0.18662897163170272,
"learning_rate": 6.0949171860124516e-06,
"loss": 0.1132,
"step": 1223
},
{
"epoch": 0.8601546029515109,
"grad_norm": 0.19171255386601416,
"learning_rate": 6.089531169131109e-06,
"loss": 0.1161,
"step": 1224
},
{
"epoch": 0.8608573436401967,
"grad_norm": 0.20528814690127212,
"learning_rate": 6.08414382464266e-06,
"loss": 0.1467,
"step": 1225
},
{
"epoch": 0.8615600843288826,
"grad_norm": 0.196194052679027,
"learning_rate": 6.078755159111648e-06,
"loss": 0.1341,
"step": 1226
},
{
"epoch": 0.8622628250175686,
"grad_norm": 0.20709248977213376,
"learning_rate": 6.073365179104229e-06,
"loss": 0.1529,
"step": 1227
},
{
"epoch": 0.8629655657062544,
"grad_norm": 0.19820702802650494,
"learning_rate": 6.067973891188161e-06,
"loss": 0.1293,
"step": 1228
},
{
"epoch": 0.8636683063949403,
"grad_norm": 0.19496999217899602,
"learning_rate": 6.0625813019327925e-06,
"loss": 0.1319,
"step": 1229
},
{
"epoch": 0.8643710470836261,
"grad_norm": 0.20769083447477954,
"learning_rate": 6.057187417909061e-06,
"loss": 0.1395,
"step": 1230
},
{
"epoch": 0.865073787772312,
"grad_norm": 0.18620264873088294,
"learning_rate": 6.05179224568948e-06,
"loss": 0.1146,
"step": 1231
},
{
"epoch": 0.8657765284609978,
"grad_norm": 0.18731816886204025,
"learning_rate": 6.046395791848133e-06,
"loss": 0.1129,
"step": 1232
},
{
"epoch": 0.8664792691496838,
"grad_norm": 0.19767999141617804,
"learning_rate": 6.040998062960666e-06,
"loss": 0.1288,
"step": 1233
},
{
"epoch": 0.8671820098383697,
"grad_norm": 0.1975351425927544,
"learning_rate": 6.035599065604275e-06,
"loss": 0.1307,
"step": 1234
},
{
"epoch": 0.8678847505270555,
"grad_norm": 0.19079658145562522,
"learning_rate": 6.0301988063577075e-06,
"loss": 0.1182,
"step": 1235
},
{
"epoch": 0.8685874912157414,
"grad_norm": 0.20892833869288904,
"learning_rate": 6.024797291801247e-06,
"loss": 0.1478,
"step": 1236
},
{
"epoch": 0.8692902319044272,
"grad_norm": 0.2081802190937048,
"learning_rate": 6.019394528516702e-06,
"loss": 0.1381,
"step": 1237
},
{
"epoch": 0.8699929725931131,
"grad_norm": 0.18253976319557175,
"learning_rate": 6.013990523087409e-06,
"loss": 0.1165,
"step": 1238
},
{
"epoch": 0.8706957132817991,
"grad_norm": 0.19329762556102978,
"learning_rate": 6.008585282098212e-06,
"loss": 0.1245,
"step": 1239
},
{
"epoch": 0.8713984539704849,
"grad_norm": 0.19214764550156277,
"learning_rate": 6.003178812135464e-06,
"loss": 0.1353,
"step": 1240
},
{
"epoch": 0.8721011946591708,
"grad_norm": 0.21009610799450493,
"learning_rate": 5.997771119787017e-06,
"loss": 0.1337,
"step": 1241
},
{
"epoch": 0.8728039353478566,
"grad_norm": 0.2026429297482465,
"learning_rate": 5.99236221164221e-06,
"loss": 0.1409,
"step": 1242
},
{
"epoch": 0.8735066760365425,
"grad_norm": 0.20815332765574632,
"learning_rate": 5.986952094291861e-06,
"loss": 0.1505,
"step": 1243
},
{
"epoch": 0.8742094167252283,
"grad_norm": 0.20407908784351347,
"learning_rate": 5.9815407743282694e-06,
"loss": 0.1398,
"step": 1244
},
{
"epoch": 0.8749121574139143,
"grad_norm": 0.20017364028316625,
"learning_rate": 5.9761282583451906e-06,
"loss": 0.1304,
"step": 1245
},
{
"epoch": 0.8756148981026002,
"grad_norm": 0.1802214932315393,
"learning_rate": 5.970714552937843e-06,
"loss": 0.1044,
"step": 1246
},
{
"epoch": 0.876317638791286,
"grad_norm": 0.1975570342825927,
"learning_rate": 5.965299664702896e-06,
"loss": 0.1376,
"step": 1247
},
{
"epoch": 0.8770203794799719,
"grad_norm": 0.1937462090080273,
"learning_rate": 5.959883600238452e-06,
"loss": 0.1255,
"step": 1248
},
{
"epoch": 0.8777231201686577,
"grad_norm": 0.19891864457757816,
"learning_rate": 5.954466366144057e-06,
"loss": 0.1465,
"step": 1249
},
{
"epoch": 0.8784258608573436,
"grad_norm": 0.19706460820635913,
"learning_rate": 5.949047969020676e-06,
"loss": 0.1363,
"step": 1250
},
{
"epoch": 0.8791286015460296,
"grad_norm": 0.1945191674526151,
"learning_rate": 5.94362841547069e-06,
"loss": 0.134,
"step": 1251
},
{
"epoch": 0.8798313422347154,
"grad_norm": 0.2051470391654913,
"learning_rate": 5.938207712097895e-06,
"loss": 0.1454,
"step": 1252
},
{
"epoch": 0.8805340829234013,
"grad_norm": 0.2122588171490022,
"learning_rate": 5.932785865507482e-06,
"loss": 0.1437,
"step": 1253
},
{
"epoch": 0.8812368236120871,
"grad_norm": 0.2063353002065839,
"learning_rate": 5.927362882306039e-06,
"loss": 0.1499,
"step": 1254
},
{
"epoch": 0.881939564300773,
"grad_norm": 0.20121158340918333,
"learning_rate": 5.9219387691015376e-06,
"loss": 0.1385,
"step": 1255
},
{
"epoch": 0.8826423049894588,
"grad_norm": 0.19057307147298028,
"learning_rate": 5.916513532503325e-06,
"loss": 0.1199,
"step": 1256
},
{
"epoch": 0.8833450456781448,
"grad_norm": 0.20726784769737133,
"learning_rate": 5.911087179122121e-06,
"loss": 0.1499,
"step": 1257
},
{
"epoch": 0.8840477863668307,
"grad_norm": 0.18888520339090947,
"learning_rate": 5.90565971557e-06,
"loss": 0.1218,
"step": 1258
},
{
"epoch": 0.8847505270555165,
"grad_norm": 0.19938337102070366,
"learning_rate": 5.900231148460398e-06,
"loss": 0.131,
"step": 1259
},
{
"epoch": 0.8854532677442024,
"grad_norm": 0.19875983694746654,
"learning_rate": 5.894801484408086e-06,
"loss": 0.1336,
"step": 1260
},
{
"epoch": 0.8861560084328882,
"grad_norm": 0.19462965456032738,
"learning_rate": 5.8893707300291805e-06,
"loss": 0.1277,
"step": 1261
},
{
"epoch": 0.8868587491215741,
"grad_norm": 0.20200084114325262,
"learning_rate": 5.883938891941117e-06,
"loss": 0.1411,
"step": 1262
},
{
"epoch": 0.8875614898102601,
"grad_norm": 0.19507189793139265,
"learning_rate": 5.878505976762664e-06,
"loss": 0.1286,
"step": 1263
},
{
"epoch": 0.8882642304989459,
"grad_norm": 0.18983829564765659,
"learning_rate": 5.873071991113889e-06,
"loss": 0.1181,
"step": 1264
},
{
"epoch": 0.8889669711876318,
"grad_norm": 0.2164513940561111,
"learning_rate": 5.867636941616174e-06,
"loss": 0.1496,
"step": 1265
},
{
"epoch": 0.8896697118763176,
"grad_norm": 0.19495039715874374,
"learning_rate": 5.862200834892192e-06,
"loss": 0.133,
"step": 1266
},
{
"epoch": 0.8903724525650035,
"grad_norm": 0.1857016519813593,
"learning_rate": 5.856763677565905e-06,
"loss": 0.1228,
"step": 1267
},
{
"epoch": 0.8910751932536893,
"grad_norm": 0.2025611265977965,
"learning_rate": 5.851325476262558e-06,
"loss": 0.1378,
"step": 1268
},
{
"epoch": 0.8917779339423753,
"grad_norm": 0.20713736345189485,
"learning_rate": 5.845886237608665e-06,
"loss": 0.1415,
"step": 1269
},
{
"epoch": 0.8924806746310612,
"grad_norm": 0.19195033612887302,
"learning_rate": 5.840445968232005e-06,
"loss": 0.1182,
"step": 1270
},
{
"epoch": 0.893183415319747,
"grad_norm": 0.18175769043505943,
"learning_rate": 5.8350046747616154e-06,
"loss": 0.1184,
"step": 1271
},
{
"epoch": 0.8938861560084329,
"grad_norm": 0.19018085731091808,
"learning_rate": 5.829562363827773e-06,
"loss": 0.1221,
"step": 1272
},
{
"epoch": 0.8945888966971187,
"grad_norm": 0.20984687884657038,
"learning_rate": 5.824119042062007e-06,
"loss": 0.1395,
"step": 1273
},
{
"epoch": 0.8952916373858046,
"grad_norm": 0.20834807238284192,
"learning_rate": 5.818674716097068e-06,
"loss": 0.1457,
"step": 1274
},
{
"epoch": 0.8959943780744906,
"grad_norm": 0.20504695619337393,
"learning_rate": 5.813229392566937e-06,
"loss": 0.1273,
"step": 1275
},
{
"epoch": 0.8966971187631764,
"grad_norm": 0.20816204737148788,
"learning_rate": 5.8077830781068044e-06,
"loss": 0.1385,
"step": 1276
},
{
"epoch": 0.8973998594518623,
"grad_norm": 0.1972049491786556,
"learning_rate": 5.802335779353074e-06,
"loss": 0.1327,
"step": 1277
},
{
"epoch": 0.8981026001405481,
"grad_norm": 0.2038524196891147,
"learning_rate": 5.796887502943343e-06,
"loss": 0.1405,
"step": 1278
},
{
"epoch": 0.898805340829234,
"grad_norm": 0.2148750618067432,
"learning_rate": 5.791438255516407e-06,
"loss": 0.1693,
"step": 1279
},
{
"epoch": 0.8995080815179198,
"grad_norm": 0.22785838730752947,
"learning_rate": 5.785988043712239e-06,
"loss": 0.1656,
"step": 1280
},
{
"epoch": 0.9002108222066058,
"grad_norm": 0.19737684919076223,
"learning_rate": 5.780536874171987e-06,
"loss": 0.1353,
"step": 1281
},
{
"epoch": 0.9009135628952917,
"grad_norm": 0.1963132860003882,
"learning_rate": 5.775084753537969e-06,
"loss": 0.1321,
"step": 1282
},
{
"epoch": 0.9016163035839775,
"grad_norm": 0.1984512734055927,
"learning_rate": 5.769631688453666e-06,
"loss": 0.1372,
"step": 1283
},
{
"epoch": 0.9023190442726634,
"grad_norm": 0.20235182788032663,
"learning_rate": 5.764177685563698e-06,
"loss": 0.1375,
"step": 1284
},
{
"epoch": 0.9030217849613492,
"grad_norm": 0.20879125435203022,
"learning_rate": 5.758722751513838e-06,
"loss": 0.1523,
"step": 1285
},
{
"epoch": 0.9037245256500351,
"grad_norm": 0.1907111358163119,
"learning_rate": 5.753266892950989e-06,
"loss": 0.1268,
"step": 1286
},
{
"epoch": 0.9044272663387211,
"grad_norm": 0.1866505994845053,
"learning_rate": 5.74781011652318e-06,
"loss": 0.1159,
"step": 1287
},
{
"epoch": 0.9051300070274069,
"grad_norm": 0.19392256854032486,
"learning_rate": 5.742352428879565e-06,
"loss": 0.1181,
"step": 1288
},
{
"epoch": 0.9058327477160928,
"grad_norm": 0.19481198330651944,
"learning_rate": 5.736893836670399e-06,
"loss": 0.1326,
"step": 1289
},
{
"epoch": 0.9065354884047786,
"grad_norm": 0.20533703122224492,
"learning_rate": 5.731434346547045e-06,
"loss": 0.1378,
"step": 1290
},
{
"epoch": 0.9072382290934645,
"grad_norm": 0.19599390636057287,
"learning_rate": 5.72597396516196e-06,
"loss": 0.1285,
"step": 1291
},
{
"epoch": 0.9079409697821503,
"grad_norm": 0.20912104059255976,
"learning_rate": 5.7205126991686825e-06,
"loss": 0.1508,
"step": 1292
},
{
"epoch": 0.9086437104708363,
"grad_norm": 0.206888639346413,
"learning_rate": 5.7150505552218346e-06,
"loss": 0.1413,
"step": 1293
},
{
"epoch": 0.9093464511595222,
"grad_norm": 0.19812457934255984,
"learning_rate": 5.709587539977105e-06,
"loss": 0.1441,
"step": 1294
},
{
"epoch": 0.910049191848208,
"grad_norm": 0.19323908067810255,
"learning_rate": 5.7041236600912475e-06,
"loss": 0.1334,
"step": 1295
},
{
"epoch": 0.9107519325368939,
"grad_norm": 0.2034235888199498,
"learning_rate": 5.698658922222062e-06,
"loss": 0.1337,
"step": 1296
},
{
"epoch": 0.9114546732255797,
"grad_norm": 0.19462058497580845,
"learning_rate": 5.693193333028404e-06,
"loss": 0.1218,
"step": 1297
},
{
"epoch": 0.9121574139142656,
"grad_norm": 0.202514408308712,
"learning_rate": 5.687726899170155e-06,
"loss": 0.1326,
"step": 1298
},
{
"epoch": 0.9128601546029516,
"grad_norm": 0.1955236822400444,
"learning_rate": 5.682259627308238e-06,
"loss": 0.1231,
"step": 1299
},
{
"epoch": 0.9135628952916374,
"grad_norm": 0.1864082729572482,
"learning_rate": 5.6767915241045855e-06,
"loss": 0.1163,
"step": 1300
},
{
"epoch": 0.9142656359803233,
"grad_norm": 0.1956115775479403,
"learning_rate": 5.671322596222153e-06,
"loss": 0.1213,
"step": 1301
},
{
"epoch": 0.9149683766690091,
"grad_norm": 0.17763773286216072,
"learning_rate": 5.665852850324893e-06,
"loss": 0.0998,
"step": 1302
},
{
"epoch": 0.915671117357695,
"grad_norm": 0.20865374425973626,
"learning_rate": 5.660382293077759e-06,
"loss": 0.1532,
"step": 1303
},
{
"epoch": 0.9163738580463809,
"grad_norm": 0.19881395044414618,
"learning_rate": 5.654910931146692e-06,
"loss": 0.1328,
"step": 1304
},
{
"epoch": 0.9170765987350668,
"grad_norm": 0.20455767458356575,
"learning_rate": 5.649438771198616e-06,
"loss": 0.1367,
"step": 1305
},
{
"epoch": 0.9177793394237527,
"grad_norm": 0.20137760614586306,
"learning_rate": 5.64396581990142e-06,
"loss": 0.144,
"step": 1306
},
{
"epoch": 0.9184820801124385,
"grad_norm": 0.19836952897261645,
"learning_rate": 5.638492083923969e-06,
"loss": 0.1346,
"step": 1307
},
{
"epoch": 0.9191848208011244,
"grad_norm": 0.1970760500693914,
"learning_rate": 5.633017569936071e-06,
"loss": 0.1444,
"step": 1308
},
{
"epoch": 0.9198875614898102,
"grad_norm": 0.20040516005994624,
"learning_rate": 5.6275422846084945e-06,
"loss": 0.137,
"step": 1309
},
{
"epoch": 0.9205903021784961,
"grad_norm": 0.19449916465177472,
"learning_rate": 5.622066234612936e-06,
"loss": 0.1191,
"step": 1310
},
{
"epoch": 0.921293042867182,
"grad_norm": 0.1997571361750326,
"learning_rate": 5.616589426622033e-06,
"loss": 0.1276,
"step": 1311
},
{
"epoch": 0.9219957835558679,
"grad_norm": 0.1845130208799834,
"learning_rate": 5.611111867309344e-06,
"loss": 0.1135,
"step": 1312
},
{
"epoch": 0.9226985242445538,
"grad_norm": 0.19983579997322715,
"learning_rate": 5.605633563349341e-06,
"loss": 0.1376,
"step": 1313
},
{
"epoch": 0.9234012649332396,
"grad_norm": 0.2039363846646255,
"learning_rate": 5.600154521417405e-06,
"loss": 0.1403,
"step": 1314
},
{
"epoch": 0.9241040056219255,
"grad_norm": 0.19867191221270214,
"learning_rate": 5.5946747481898144e-06,
"loss": 0.1294,
"step": 1315
},
{
"epoch": 0.9248067463106114,
"grad_norm": 0.2091148124506521,
"learning_rate": 5.589194250343741e-06,
"loss": 0.1625,
"step": 1316
},
{
"epoch": 0.9255094869992972,
"grad_norm": 0.20432932557262368,
"learning_rate": 5.583713034557241e-06,
"loss": 0.1384,
"step": 1317
},
{
"epoch": 0.9262122276879832,
"grad_norm": 0.19233957552359676,
"learning_rate": 5.57823110750924e-06,
"loss": 0.1266,
"step": 1318
},
{
"epoch": 0.926914968376669,
"grad_norm": 0.20208516380112534,
"learning_rate": 5.572748475879536e-06,
"loss": 0.1333,
"step": 1319
},
{
"epoch": 0.9276177090653549,
"grad_norm": 0.1959648694828676,
"learning_rate": 5.567265146348779e-06,
"loss": 0.1275,
"step": 1320
},
{
"epoch": 0.9283204497540407,
"grad_norm": 0.19315473676484518,
"learning_rate": 5.561781125598479e-06,
"loss": 0.1335,
"step": 1321
},
{
"epoch": 0.9290231904427266,
"grad_norm": 0.18521614624152785,
"learning_rate": 5.556296420310977e-06,
"loss": 0.1183,
"step": 1322
},
{
"epoch": 0.9297259311314126,
"grad_norm": 0.18081820449560138,
"learning_rate": 5.550811037169457e-06,
"loss": 0.1122,
"step": 1323
},
{
"epoch": 0.9304286718200984,
"grad_norm": 0.20430313141876325,
"learning_rate": 5.545324982857926e-06,
"loss": 0.1441,
"step": 1324
},
{
"epoch": 0.9311314125087843,
"grad_norm": 0.1966653696375191,
"learning_rate": 5.539838264061207e-06,
"loss": 0.1363,
"step": 1325
},
{
"epoch": 0.9318341531974701,
"grad_norm": 0.1950261488129381,
"learning_rate": 5.534350887464934e-06,
"loss": 0.135,
"step": 1326
},
{
"epoch": 0.932536893886156,
"grad_norm": 0.192488373641706,
"learning_rate": 5.528862859755545e-06,
"loss": 0.1236,
"step": 1327
},
{
"epoch": 0.9332396345748419,
"grad_norm": 0.20065740545294353,
"learning_rate": 5.523374187620266e-06,
"loss": 0.1414,
"step": 1328
},
{
"epoch": 0.9339423752635277,
"grad_norm": 0.19949129292188908,
"learning_rate": 5.517884877747116e-06,
"loss": 0.1441,
"step": 1329
},
{
"epoch": 0.9346451159522137,
"grad_norm": 0.19480490934002895,
"learning_rate": 5.512394936824881e-06,
"loss": 0.1202,
"step": 1330
},
{
"epoch": 0.9353478566408995,
"grad_norm": 0.1918058108849614,
"learning_rate": 5.506904371543126e-06,
"loss": 0.1258,
"step": 1331
},
{
"epoch": 0.9360505973295854,
"grad_norm": 0.18764707361457877,
"learning_rate": 5.501413188592167e-06,
"loss": 0.1152,
"step": 1332
},
{
"epoch": 0.9367533380182712,
"grad_norm": 0.20672693406991102,
"learning_rate": 5.495921394663085e-06,
"loss": 0.146,
"step": 1333
},
{
"epoch": 0.9374560787069571,
"grad_norm": 0.20355109699872836,
"learning_rate": 5.4904289964476905e-06,
"loss": 0.1346,
"step": 1334
},
{
"epoch": 0.938158819395643,
"grad_norm": 0.20576182776854895,
"learning_rate": 5.484936000638546e-06,
"loss": 0.1357,
"step": 1335
},
{
"epoch": 0.9388615600843289,
"grad_norm": 0.2086415103622744,
"learning_rate": 5.479442413928927e-06,
"loss": 0.1477,
"step": 1336
},
{
"epoch": 0.9395643007730148,
"grad_norm": 0.20430942101414945,
"learning_rate": 5.473948243012842e-06,
"loss": 0.1423,
"step": 1337
},
{
"epoch": 0.9402670414617006,
"grad_norm": 0.2014929850207918,
"learning_rate": 5.468453494585002e-06,
"loss": 0.1454,
"step": 1338
},
{
"epoch": 0.9409697821503865,
"grad_norm": 0.18911605807627685,
"learning_rate": 5.462958175340828e-06,
"loss": 0.126,
"step": 1339
},
{
"epoch": 0.9416725228390724,
"grad_norm": 0.19429749333215868,
"learning_rate": 5.457462291976432e-06,
"loss": 0.1238,
"step": 1340
},
{
"epoch": 0.9423752635277582,
"grad_norm": 0.19520663438469207,
"learning_rate": 5.451965851188618e-06,
"loss": 0.1258,
"step": 1341
},
{
"epoch": 0.9430780042164442,
"grad_norm": 0.19954738388425083,
"learning_rate": 5.446468859674862e-06,
"loss": 0.1384,
"step": 1342
},
{
"epoch": 0.94378074490513,
"grad_norm": 0.19226510609068476,
"learning_rate": 5.440971324133322e-06,
"loss": 0.1184,
"step": 1343
},
{
"epoch": 0.9444834855938159,
"grad_norm": 0.19961354344369092,
"learning_rate": 5.435473251262805e-06,
"loss": 0.1424,
"step": 1344
},
{
"epoch": 0.9451862262825017,
"grad_norm": 0.1966261656302828,
"learning_rate": 5.429974647762788e-06,
"loss": 0.1355,
"step": 1345
},
{
"epoch": 0.9458889669711876,
"grad_norm": 0.20891467713825948,
"learning_rate": 5.424475520333381e-06,
"loss": 0.1492,
"step": 1346
},
{
"epoch": 0.9465917076598735,
"grad_norm": 0.18987518451898477,
"learning_rate": 5.418975875675341e-06,
"loss": 0.1097,
"step": 1347
},
{
"epoch": 0.9472944483485594,
"grad_norm": 0.1979288965758812,
"learning_rate": 5.4134757204900525e-06,
"loss": 0.129,
"step": 1348
},
{
"epoch": 0.9479971890372453,
"grad_norm": 0.19569650128771243,
"learning_rate": 5.407975061479521e-06,
"loss": 0.1238,
"step": 1349
},
{
"epoch": 0.9486999297259311,
"grad_norm": 0.20921545395825505,
"learning_rate": 5.402473905346368e-06,
"loss": 0.1384,
"step": 1350
},
{
"epoch": 0.949402670414617,
"grad_norm": 0.19148390544334118,
"learning_rate": 5.39697225879382e-06,
"loss": 0.1229,
"step": 1351
},
{
"epoch": 0.9501054111033029,
"grad_norm": 0.20007469379298098,
"learning_rate": 5.3914701285257e-06,
"loss": 0.1265,
"step": 1352
},
{
"epoch": 0.9508081517919887,
"grad_norm": 0.21742322187444954,
"learning_rate": 5.385967521246422e-06,
"loss": 0.1619,
"step": 1353
},
{
"epoch": 0.9515108924806747,
"grad_norm": 0.19656231433333834,
"learning_rate": 5.38046444366098e-06,
"loss": 0.1321,
"step": 1354
},
{
"epoch": 0.9522136331693605,
"grad_norm": 0.1898061874911607,
"learning_rate": 5.3749609024749424e-06,
"loss": 0.1248,
"step": 1355
},
{
"epoch": 0.9529163738580464,
"grad_norm": 0.1908369097500313,
"learning_rate": 5.36945690439444e-06,
"loss": 0.1196,
"step": 1356
},
{
"epoch": 0.9536191145467322,
"grad_norm": 0.2064164279970009,
"learning_rate": 5.363952456126165e-06,
"loss": 0.1487,
"step": 1357
},
{
"epoch": 0.9543218552354181,
"grad_norm": 0.18913110875072,
"learning_rate": 5.358447564377352e-06,
"loss": 0.1252,
"step": 1358
},
{
"epoch": 0.955024595924104,
"grad_norm": 0.18574202848544408,
"learning_rate": 5.35294223585578e-06,
"loss": 0.1103,
"step": 1359
},
{
"epoch": 0.9557273366127899,
"grad_norm": 0.19778016912060184,
"learning_rate": 5.34743647726976e-06,
"loss": 0.1281,
"step": 1360
},
{
"epoch": 0.9564300773014758,
"grad_norm": 0.2165279691126957,
"learning_rate": 5.341930295328129e-06,
"loss": 0.1586,
"step": 1361
},
{
"epoch": 0.9571328179901616,
"grad_norm": 0.21407084354366374,
"learning_rate": 5.336423696740233e-06,
"loss": 0.1675,
"step": 1362
},
{
"epoch": 0.9578355586788475,
"grad_norm": 0.18337549199463796,
"learning_rate": 5.330916688215931e-06,
"loss": 0.1044,
"step": 1363
},
{
"epoch": 0.9585382993675334,
"grad_norm": 0.18423105878296023,
"learning_rate": 5.325409276465581e-06,
"loss": 0.1119,
"step": 1364
},
{
"epoch": 0.9592410400562192,
"grad_norm": 0.19151373169623573,
"learning_rate": 5.319901468200034e-06,
"loss": 0.1228,
"step": 1365
},
{
"epoch": 0.9599437807449052,
"grad_norm": 0.19935683818222558,
"learning_rate": 5.314393270130617e-06,
"loss": 0.1308,
"step": 1366
},
{
"epoch": 0.960646521433591,
"grad_norm": 0.1932892974724098,
"learning_rate": 5.308884688969145e-06,
"loss": 0.1272,
"step": 1367
},
{
"epoch": 0.9613492621222769,
"grad_norm": 0.1901326144671129,
"learning_rate": 5.303375731427882e-06,
"loss": 0.1241,
"step": 1368
},
{
"epoch": 0.9620520028109627,
"grad_norm": 0.19854285521936257,
"learning_rate": 5.297866404219569e-06,
"loss": 0.1231,
"step": 1369
},
{
"epoch": 0.9627547434996486,
"grad_norm": 0.1997883915245294,
"learning_rate": 5.292356714057382e-06,
"loss": 0.1348,
"step": 1370
},
{
"epoch": 0.9634574841883345,
"grad_norm": 0.18290454543764084,
"learning_rate": 5.28684666765495e-06,
"loss": 0.1036,
"step": 1371
},
{
"epoch": 0.9641602248770204,
"grad_norm": 0.1908167714389136,
"learning_rate": 5.281336271726333e-06,
"loss": 0.1234,
"step": 1372
},
{
"epoch": 0.9648629655657063,
"grad_norm": 0.19924674051678415,
"learning_rate": 5.275825532986013e-06,
"loss": 0.122,
"step": 1373
},
{
"epoch": 0.9655657062543921,
"grad_norm": 0.18331624742905975,
"learning_rate": 5.270314458148896e-06,
"loss": 0.103,
"step": 1374
},
{
"epoch": 0.966268446943078,
"grad_norm": 0.2068178765166946,
"learning_rate": 5.2648030539302894e-06,
"loss": 0.148,
"step": 1375
},
{
"epoch": 0.9669711876317639,
"grad_norm": 0.21054530324787113,
"learning_rate": 5.259291327045912e-06,
"loss": 0.1542,
"step": 1376
},
{
"epoch": 0.9676739283204497,
"grad_norm": 0.19927041046322405,
"learning_rate": 5.2537792842118694e-06,
"loss": 0.1158,
"step": 1377
},
{
"epoch": 0.9683766690091357,
"grad_norm": 0.1817538284139379,
"learning_rate": 5.248266932144652e-06,
"loss": 0.1079,
"step": 1378
},
{
"epoch": 0.9690794096978215,
"grad_norm": 0.18435736985454354,
"learning_rate": 5.2427542775611314e-06,
"loss": 0.1139,
"step": 1379
},
{
"epoch": 0.9697821503865074,
"grad_norm": 0.1924658670274933,
"learning_rate": 5.23724132717854e-06,
"loss": 0.1303,
"step": 1380
},
{
"epoch": 0.9704848910751932,
"grad_norm": 0.1983712960465921,
"learning_rate": 5.231728087714482e-06,
"loss": 0.1361,
"step": 1381
},
{
"epoch": 0.9711876317638791,
"grad_norm": 0.2091403895243271,
"learning_rate": 5.2262145658869005e-06,
"loss": 0.1391,
"step": 1382
},
{
"epoch": 0.971890372452565,
"grad_norm": 0.21357411033234036,
"learning_rate": 5.220700768414094e-06,
"loss": 0.1474,
"step": 1383
},
{
"epoch": 0.9725931131412509,
"grad_norm": 0.2039118475473804,
"learning_rate": 5.215186702014692e-06,
"loss": 0.1445,
"step": 1384
},
{
"epoch": 0.9732958538299368,
"grad_norm": 0.19877337775522677,
"learning_rate": 5.209672373407651e-06,
"loss": 0.1312,
"step": 1385
},
{
"epoch": 0.9739985945186226,
"grad_norm": 0.19833968680109917,
"learning_rate": 5.204157789312248e-06,
"loss": 0.128,
"step": 1386
},
{
"epoch": 0.9747013352073085,
"grad_norm": 0.20137393257767838,
"learning_rate": 5.198642956448072e-06,
"loss": 0.1343,
"step": 1387
},
{
"epoch": 0.9754040758959944,
"grad_norm": 0.20551146092893077,
"learning_rate": 5.193127881535015e-06,
"loss": 0.1443,
"step": 1388
},
{
"epoch": 0.9761068165846802,
"grad_norm": 0.20432016645208295,
"learning_rate": 5.187612571293263e-06,
"loss": 0.1376,
"step": 1389
},
{
"epoch": 0.9768095572733662,
"grad_norm": 0.2033422621715428,
"learning_rate": 5.182097032443288e-06,
"loss": 0.1385,
"step": 1390
},
{
"epoch": 0.977512297962052,
"grad_norm": 0.17633686737690363,
"learning_rate": 5.176581271705845e-06,
"loss": 0.1066,
"step": 1391
},
{
"epoch": 0.9782150386507379,
"grad_norm": 0.1864279377135331,
"learning_rate": 5.1710652958019525e-06,
"loss": 0.1124,
"step": 1392
},
{
"epoch": 0.9789177793394237,
"grad_norm": 0.19613069538988684,
"learning_rate": 5.165549111452899e-06,
"loss": 0.1238,
"step": 1393
},
{
"epoch": 0.9796205200281096,
"grad_norm": 0.18936686483286688,
"learning_rate": 5.1600327253802184e-06,
"loss": 0.1185,
"step": 1394
},
{
"epoch": 0.9803232607167955,
"grad_norm": 0.1927386152450253,
"learning_rate": 5.154516144305698e-06,
"loss": 0.1202,
"step": 1395
},
{
"epoch": 0.9810260014054814,
"grad_norm": 0.1829279606991691,
"learning_rate": 5.1489993749513576e-06,
"loss": 0.1135,
"step": 1396
},
{
"epoch": 0.9817287420941673,
"grad_norm": 0.17758831078847528,
"learning_rate": 5.1434824240394494e-06,
"loss": 0.1044,
"step": 1397
},
{
"epoch": 0.9824314827828531,
"grad_norm": 0.17580142994925224,
"learning_rate": 5.1379652982924465e-06,
"loss": 0.1003,
"step": 1398
},
{
"epoch": 0.983134223471539,
"grad_norm": 0.19726702175694222,
"learning_rate": 5.132448004433034e-06,
"loss": 0.1364,
"step": 1399
},
{
"epoch": 0.9838369641602249,
"grad_norm": 0.1985294526595358,
"learning_rate": 5.1269305491841015e-06,
"loss": 0.1398,
"step": 1400
},
{
"epoch": 0.9845397048489107,
"grad_norm": 0.20315467894750108,
"learning_rate": 5.121412939268736e-06,
"loss": 0.1407,
"step": 1401
},
{
"epoch": 0.9852424455375967,
"grad_norm": 0.20446393313230393,
"learning_rate": 5.115895181410213e-06,
"loss": 0.1463,
"step": 1402
},
{
"epoch": 0.9859451862262825,
"grad_norm": 0.19621907040364944,
"learning_rate": 5.110377282331988e-06,
"loss": 0.1171,
"step": 1403
},
{
"epoch": 0.9866479269149684,
"grad_norm": 0.1967949773479424,
"learning_rate": 5.10485924875769e-06,
"loss": 0.119,
"step": 1404
},
{
"epoch": 0.9873506676036542,
"grad_norm": 0.1989646695718241,
"learning_rate": 5.09934108741111e-06,
"loss": 0.1415,
"step": 1405
},
{
"epoch": 0.9880534082923401,
"grad_norm": 0.1829336027220892,
"learning_rate": 5.093822805016194e-06,
"loss": 0.1087,
"step": 1406
},
{
"epoch": 0.988756148981026,
"grad_norm": 0.2013636016887553,
"learning_rate": 5.088304408297039e-06,
"loss": 0.13,
"step": 1407
},
{
"epoch": 0.9894588896697118,
"grad_norm": 0.19439213099717614,
"learning_rate": 5.0827859039778784e-06,
"loss": 0.1219,
"step": 1408
},
{
"epoch": 0.9901616303583978,
"grad_norm": 0.19107016737081817,
"learning_rate": 5.077267298783077e-06,
"loss": 0.1258,
"step": 1409
},
{
"epoch": 0.9908643710470836,
"grad_norm": 0.20210212229111227,
"learning_rate": 5.071748599437124e-06,
"loss": 0.141,
"step": 1410
},
{
"epoch": 0.9915671117357695,
"grad_norm": 0.19768204699215267,
"learning_rate": 5.066229812664621e-06,
"loss": 0.1259,
"step": 1411
},
{
"epoch": 0.9922698524244554,
"grad_norm": 0.20824918097695097,
"learning_rate": 5.060710945190278e-06,
"loss": 0.146,
"step": 1412
},
{
"epoch": 0.9929725931131412,
"grad_norm": 0.1924470486470161,
"learning_rate": 5.0551920037389035e-06,
"loss": 0.1135,
"step": 1413
},
{
"epoch": 0.9936753338018272,
"grad_norm": 0.19971221645697407,
"learning_rate": 5.049672995035394e-06,
"loss": 0.1364,
"step": 1414
},
{
"epoch": 0.994378074490513,
"grad_norm": 0.1961290229020502,
"learning_rate": 5.04415392580473e-06,
"loss": 0.1196,
"step": 1415
},
{
"epoch": 0.9950808151791989,
"grad_norm": 0.20928492057732942,
"learning_rate": 5.038634802771966e-06,
"loss": 0.154,
"step": 1416
},
{
"epoch": 0.9957835558678847,
"grad_norm": 0.2072797410333225,
"learning_rate": 5.03311563266222e-06,
"loss": 0.1436,
"step": 1417
},
{
"epoch": 0.9964862965565706,
"grad_norm": 0.20381967030909048,
"learning_rate": 5.027596422200668e-06,
"loss": 0.1414,
"step": 1418
},
{
"epoch": 0.9971890372452565,
"grad_norm": 0.20619629771742598,
"learning_rate": 5.022077178112537e-06,
"loss": 0.1408,
"step": 1419
},
{
"epoch": 0.9978917779339423,
"grad_norm": 0.19087361611936965,
"learning_rate": 5.016557907123095e-06,
"loss": 0.1157,
"step": 1420
},
{
"epoch": 0.9985945186226283,
"grad_norm": 0.19029763729738525,
"learning_rate": 5.011038615957639e-06,
"loss": 0.1232,
"step": 1421
},
{
"epoch": 0.9992972593113141,
"grad_norm": 0.19774263932983807,
"learning_rate": 5.005519311341495e-06,
"loss": 0.1373,
"step": 1422
},
{
"epoch": 1.0,
"grad_norm": 0.19143947555684038,
"learning_rate": 5e-06,
"loss": 0.121,
"step": 1423
},
{
"epoch": 1.000702740688686,
"grad_norm": 0.187176088651155,
"learning_rate": 4.994480688658508e-06,
"loss": 0.1198,
"step": 1424
},
{
"epoch": 1.0014054813773718,
"grad_norm": 0.18685670131542642,
"learning_rate": 4.9889613840423615e-06,
"loss": 0.1174,
"step": 1425
},
{
"epoch": 1.0021082220660575,
"grad_norm": 0.189988750879881,
"learning_rate": 4.983442092876906e-06,
"loss": 0.126,
"step": 1426
},
{
"epoch": 1.0028109627547435,
"grad_norm": 0.1949149062459651,
"learning_rate": 4.977922821887463e-06,
"loss": 0.1327,
"step": 1427
},
{
"epoch": 1.0035137034434294,
"grad_norm": 0.17737119216804442,
"learning_rate": 4.972403577799334e-06,
"loss": 0.1017,
"step": 1428
},
{
"epoch": 1.0042164441321153,
"grad_norm": 0.19059719773915879,
"learning_rate": 4.966884367337781e-06,
"loss": 0.1109,
"step": 1429
},
{
"epoch": 1.0049191848208012,
"grad_norm": 0.18053277607384044,
"learning_rate": 4.961365197228035e-06,
"loss": 0.1023,
"step": 1430
},
{
"epoch": 1.005621925509487,
"grad_norm": 0.19942996059434606,
"learning_rate": 4.9558460741952725e-06,
"loss": 0.1258,
"step": 1431
},
{
"epoch": 1.0063246661981728,
"grad_norm": 0.19401776831333392,
"learning_rate": 4.950327004964607e-06,
"loss": 0.1268,
"step": 1432
},
{
"epoch": 1.0070274068868588,
"grad_norm": 0.1996552970179131,
"learning_rate": 4.944807996261098e-06,
"loss": 0.1186,
"step": 1433
},
{
"epoch": 1.0077301475755447,
"grad_norm": 0.1836098813519472,
"learning_rate": 4.9392890548097235e-06,
"loss": 0.0979,
"step": 1434
},
{
"epoch": 1.0084328882642306,
"grad_norm": 0.20754445721686504,
"learning_rate": 4.93377018733538e-06,
"loss": 0.1366,
"step": 1435
},
{
"epoch": 1.0091356289529163,
"grad_norm": 0.19588592694045923,
"learning_rate": 4.928251400562878e-06,
"loss": 0.1161,
"step": 1436
},
{
"epoch": 1.0098383696416022,
"grad_norm": 0.20054591015028514,
"learning_rate": 4.922732701216924e-06,
"loss": 0.1137,
"step": 1437
},
{
"epoch": 1.0105411103302882,
"grad_norm": 0.198040716164282,
"learning_rate": 4.917214096022123e-06,
"loss": 0.1101,
"step": 1438
},
{
"epoch": 1.011243851018974,
"grad_norm": 0.20829477333406718,
"learning_rate": 4.911695591702962e-06,
"loss": 0.1264,
"step": 1439
},
{
"epoch": 1.0119465917076598,
"grad_norm": 0.1981796987055125,
"learning_rate": 4.906177194983807e-06,
"loss": 0.1039,
"step": 1440
},
{
"epoch": 1.0126493323963457,
"grad_norm": 0.1951618981370917,
"learning_rate": 4.9006589125888924e-06,
"loss": 0.097,
"step": 1441
},
{
"epoch": 1.0133520730850316,
"grad_norm": 0.2073693584960714,
"learning_rate": 4.8951407512423125e-06,
"loss": 0.1171,
"step": 1442
},
{
"epoch": 1.0140548137737175,
"grad_norm": 0.20642032279058445,
"learning_rate": 4.889622717668012e-06,
"loss": 0.119,
"step": 1443
},
{
"epoch": 1.0147575544624035,
"grad_norm": 0.20454418127738971,
"learning_rate": 4.884104818589788e-06,
"loss": 0.104,
"step": 1444
},
{
"epoch": 1.0154602951510892,
"grad_norm": 0.21019797725915265,
"learning_rate": 4.878587060731267e-06,
"loss": 0.1209,
"step": 1445
},
{
"epoch": 1.016163035839775,
"grad_norm": 0.21343372886861442,
"learning_rate": 4.8730694508159e-06,
"loss": 0.1101,
"step": 1446
},
{
"epoch": 1.016865776528461,
"grad_norm": 0.20969701615392722,
"learning_rate": 4.867551995566968e-06,
"loss": 0.1199,
"step": 1447
},
{
"epoch": 1.017568517217147,
"grad_norm": 0.19597182104328686,
"learning_rate": 4.862034701707554e-06,
"loss": 0.1015,
"step": 1448
},
{
"epoch": 1.0182712579058328,
"grad_norm": 0.2000639630813112,
"learning_rate": 4.8565175759605505e-06,
"loss": 0.1104,
"step": 1449
},
{
"epoch": 1.0189739985945185,
"grad_norm": 0.20856271362966375,
"learning_rate": 4.851000625048643e-06,
"loss": 0.1147,
"step": 1450
},
{
"epoch": 1.0196767392832045,
"grad_norm": 0.19241563154221814,
"learning_rate": 4.845483855694304e-06,
"loss": 0.0937,
"step": 1451
},
{
"epoch": 1.0203794799718904,
"grad_norm": 0.19816004987233626,
"learning_rate": 4.839967274619783e-06,
"loss": 0.1097,
"step": 1452
},
{
"epoch": 1.0210822206605763,
"grad_norm": 0.21292693936724938,
"learning_rate": 4.834450888547103e-06,
"loss": 0.1368,
"step": 1453
},
{
"epoch": 1.0217849613492622,
"grad_norm": 0.21240292887672785,
"learning_rate": 4.8289347041980475e-06,
"loss": 0.1257,
"step": 1454
},
{
"epoch": 1.022487702037948,
"grad_norm": 0.1931491293899139,
"learning_rate": 4.823418728294157e-06,
"loss": 0.0959,
"step": 1455
},
{
"epoch": 1.0231904427266338,
"grad_norm": 0.19952978590878218,
"learning_rate": 4.817902967556714e-06,
"loss": 0.1111,
"step": 1456
},
{
"epoch": 1.0238931834153198,
"grad_norm": 0.2018490905514719,
"learning_rate": 4.8123874287067385e-06,
"loss": 0.1151,
"step": 1457
},
{
"epoch": 1.0245959241040057,
"grad_norm": 0.2101173250281467,
"learning_rate": 4.806872118464987e-06,
"loss": 0.1308,
"step": 1458
},
{
"epoch": 1.0252986647926916,
"grad_norm": 0.2053777175875725,
"learning_rate": 4.801357043551928e-06,
"loss": 0.117,
"step": 1459
},
{
"epoch": 1.0260014054813773,
"grad_norm": 0.19641488987956607,
"learning_rate": 4.795842210687754e-06,
"loss": 0.1213,
"step": 1460
},
{
"epoch": 1.0267041461700632,
"grad_norm": 0.19662883300911635,
"learning_rate": 4.790327626592351e-06,
"loss": 0.1014,
"step": 1461
},
{
"epoch": 1.0274068868587491,
"grad_norm": 0.20054305473676068,
"learning_rate": 4.78481329798531e-06,
"loss": 0.1139,
"step": 1462
},
{
"epoch": 1.028109627547435,
"grad_norm": 0.20480383173455802,
"learning_rate": 4.779299231585907e-06,
"loss": 0.124,
"step": 1463
},
{
"epoch": 1.0288123682361208,
"grad_norm": 0.20598573974020698,
"learning_rate": 4.773785434113101e-06,
"loss": 0.1105,
"step": 1464
},
{
"epoch": 1.0295151089248067,
"grad_norm": 0.2097361583143868,
"learning_rate": 4.768271912285521e-06,
"loss": 0.1258,
"step": 1465
},
{
"epoch": 1.0302178496134926,
"grad_norm": 0.1901578717475438,
"learning_rate": 4.7627586728214606e-06,
"loss": 0.099,
"step": 1466
},
{
"epoch": 1.0309205903021785,
"grad_norm": 0.20861741323212532,
"learning_rate": 4.75724572243887e-06,
"loss": 0.1131,
"step": 1467
},
{
"epoch": 1.0316233309908645,
"grad_norm": 0.20174388714071156,
"learning_rate": 4.751733067855348e-06,
"loss": 0.1229,
"step": 1468
},
{
"epoch": 1.0323260716795502,
"grad_norm": 0.18549952631890682,
"learning_rate": 4.746220715788132e-06,
"loss": 0.0924,
"step": 1469
},
{
"epoch": 1.033028812368236,
"grad_norm": 0.2050524442419532,
"learning_rate": 4.74070867295409e-06,
"loss": 0.1131,
"step": 1470
},
{
"epoch": 1.033731553056922,
"grad_norm": 0.21016080893030858,
"learning_rate": 4.735196946069711e-06,
"loss": 0.1265,
"step": 1471
},
{
"epoch": 1.034434293745608,
"grad_norm": 0.19422761384612083,
"learning_rate": 4.729685541851107e-06,
"loss": 0.0917,
"step": 1472
},
{
"epoch": 1.0351370344342938,
"grad_norm": 0.21721806259747856,
"learning_rate": 4.724174467013987e-06,
"loss": 0.1308,
"step": 1473
},
{
"epoch": 1.0358397751229795,
"grad_norm": 0.20391351432174598,
"learning_rate": 4.718663728273669e-06,
"loss": 0.101,
"step": 1474
},
{
"epoch": 1.0365425158116655,
"grad_norm": 0.2127128214630209,
"learning_rate": 4.7131533323450505e-06,
"loss": 0.1299,
"step": 1475
},
{
"epoch": 1.0372452565003514,
"grad_norm": 0.20498203915735017,
"learning_rate": 4.707643285942619e-06,
"loss": 0.12,
"step": 1476
},
{
"epoch": 1.0379479971890373,
"grad_norm": 0.21418347159975537,
"learning_rate": 4.702133595780433e-06,
"loss": 0.1315,
"step": 1477
},
{
"epoch": 1.0386507378777232,
"grad_norm": 0.198710983339266,
"learning_rate": 4.696624268572118e-06,
"loss": 0.1129,
"step": 1478
},
{
"epoch": 1.039353478566409,
"grad_norm": 0.21200699843579981,
"learning_rate": 4.6911153110308574e-06,
"loss": 0.1263,
"step": 1479
},
{
"epoch": 1.0400562192550948,
"grad_norm": 0.19694618486372614,
"learning_rate": 4.6856067298693834e-06,
"loss": 0.113,
"step": 1480
},
{
"epoch": 1.0407589599437808,
"grad_norm": 0.19664632524178943,
"learning_rate": 4.680098531799967e-06,
"loss": 0.107,
"step": 1481
},
{
"epoch": 1.0414617006324667,
"grad_norm": 0.18554820543768438,
"learning_rate": 4.674590723534419e-06,
"loss": 0.0908,
"step": 1482
},
{
"epoch": 1.0421644413211526,
"grad_norm": 0.205735688615173,
"learning_rate": 4.669083311784069e-06,
"loss": 0.124,
"step": 1483
},
{
"epoch": 1.0428671820098383,
"grad_norm": 0.20693579979085613,
"learning_rate": 4.6635763032597704e-06,
"loss": 0.1171,
"step": 1484
},
{
"epoch": 1.0435699226985242,
"grad_norm": 0.21265071171444125,
"learning_rate": 4.658069704671873e-06,
"loss": 0.1319,
"step": 1485
},
{
"epoch": 1.0442726633872101,
"grad_norm": 0.20649683931937868,
"learning_rate": 4.65256352273024e-06,
"loss": 0.1095,
"step": 1486
},
{
"epoch": 1.044975404075896,
"grad_norm": 0.2014578161027546,
"learning_rate": 4.64705776414422e-06,
"loss": 0.1109,
"step": 1487
},
{
"epoch": 1.0456781447645818,
"grad_norm": 0.20604952945406715,
"learning_rate": 4.641552435622651e-06,
"loss": 0.1163,
"step": 1488
},
{
"epoch": 1.0463808854532677,
"grad_norm": 0.20211639752431132,
"learning_rate": 4.636047543873838e-06,
"loss": 0.1119,
"step": 1489
},
{
"epoch": 1.0470836261419536,
"grad_norm": 0.1912274101132898,
"learning_rate": 4.630543095605562e-06,
"loss": 0.0997,
"step": 1490
},
{
"epoch": 1.0477863668306395,
"grad_norm": 0.20668171797542279,
"learning_rate": 4.625039097525058e-06,
"loss": 0.1076,
"step": 1491
},
{
"epoch": 1.0484891075193254,
"grad_norm": 0.19870855498699982,
"learning_rate": 4.619535556339021e-06,
"loss": 0.1128,
"step": 1492
},
{
"epoch": 1.0491918482080111,
"grad_norm": 0.20803829377970773,
"learning_rate": 4.61403247875358e-06,
"loss": 0.1197,
"step": 1493
},
{
"epoch": 1.049894588896697,
"grad_norm": 0.19927569524820904,
"learning_rate": 4.6085298714743025e-06,
"loss": 0.1066,
"step": 1494
},
{
"epoch": 1.050597329585383,
"grad_norm": 0.20227417627883712,
"learning_rate": 4.603027741206181e-06,
"loss": 0.1199,
"step": 1495
},
{
"epoch": 1.051300070274069,
"grad_norm": 0.18009482280055714,
"learning_rate": 4.597526094653633e-06,
"loss": 0.0841,
"step": 1496
},
{
"epoch": 1.0520028109627548,
"grad_norm": 0.19760520495532036,
"learning_rate": 4.592024938520479e-06,
"loss": 0.1014,
"step": 1497
},
{
"epoch": 1.0527055516514405,
"grad_norm": 0.1895974145714912,
"learning_rate": 4.58652427950995e-06,
"loss": 0.0908,
"step": 1498
},
{
"epoch": 1.0534082923401265,
"grad_norm": 0.1918687585935464,
"learning_rate": 4.581024124324661e-06,
"loss": 0.1054,
"step": 1499
},
{
"epoch": 1.0541110330288124,
"grad_norm": 0.18793187153395455,
"learning_rate": 4.575524479666621e-06,
"loss": 0.1007,
"step": 1500
},
{
"epoch": 1.0541110330288124,
"eval_loss": 0.1364968866109848,
"eval_runtime": 10.716,
"eval_samples_per_second": 21.463,
"eval_steps_per_second": 5.412,
"step": 1500
},
{
"epoch": 1.0548137737174983,
"grad_norm": 0.20694473544859504,
"learning_rate": 4.570025352237213e-06,
"loss": 0.1225,
"step": 1501
},
{
"epoch": 1.0555165144061842,
"grad_norm": 0.2189785261324962,
"learning_rate": 4.564526748737195e-06,
"loss": 0.1378,
"step": 1502
},
{
"epoch": 1.05621925509487,
"grad_norm": 0.2076316325683222,
"learning_rate": 4.559028675866681e-06,
"loss": 0.1177,
"step": 1503
},
{
"epoch": 1.0569219957835558,
"grad_norm": 0.20033504941296018,
"learning_rate": 4.553531140325139e-06,
"loss": 0.1085,
"step": 1504
},
{
"epoch": 1.0576247364722418,
"grad_norm": 0.18776159413715549,
"learning_rate": 4.548034148811384e-06,
"loss": 0.0863,
"step": 1505
},
{
"epoch": 1.0583274771609277,
"grad_norm": 0.1990273084920482,
"learning_rate": 4.542537708023569e-06,
"loss": 0.1072,
"step": 1506
},
{
"epoch": 1.0590302178496136,
"grad_norm": 0.20872636298305347,
"learning_rate": 4.537041824659172e-06,
"loss": 0.1194,
"step": 1507
},
{
"epoch": 1.0597329585382993,
"grad_norm": 0.19629963927186558,
"learning_rate": 4.531546505415e-06,
"loss": 0.1032,
"step": 1508
},
{
"epoch": 1.0604356992269852,
"grad_norm": 0.2078942881069584,
"learning_rate": 4.52605175698716e-06,
"loss": 0.1233,
"step": 1509
},
{
"epoch": 1.0611384399156711,
"grad_norm": 0.19473329374966022,
"learning_rate": 4.520557586071074e-06,
"loss": 0.1048,
"step": 1510
},
{
"epoch": 1.061841180604357,
"grad_norm": 0.1996146661082397,
"learning_rate": 4.515063999361455e-06,
"loss": 0.0976,
"step": 1511
},
{
"epoch": 1.062543921293043,
"grad_norm": 0.20550998162631082,
"learning_rate": 4.509571003552311e-06,
"loss": 0.1072,
"step": 1512
},
{
"epoch": 1.0632466619817287,
"grad_norm": 0.19801510506179737,
"learning_rate": 4.5040786053369175e-06,
"loss": 0.0983,
"step": 1513
},
{
"epoch": 1.0639494026704146,
"grad_norm": 0.21214356100554063,
"learning_rate": 4.498586811407834e-06,
"loss": 0.124,
"step": 1514
},
{
"epoch": 1.0646521433591005,
"grad_norm": 0.20204313916068004,
"learning_rate": 4.493095628456876e-06,
"loss": 0.1131,
"step": 1515
},
{
"epoch": 1.0653548840477864,
"grad_norm": 0.21828692333953034,
"learning_rate": 4.487605063175119e-06,
"loss": 0.1357,
"step": 1516
},
{
"epoch": 1.0660576247364721,
"grad_norm": 0.21144583033790573,
"learning_rate": 4.482115122252887e-06,
"loss": 0.1225,
"step": 1517
},
{
"epoch": 1.066760365425158,
"grad_norm": 0.1861021213659463,
"learning_rate": 4.4766258123797355e-06,
"loss": 0.0958,
"step": 1518
},
{
"epoch": 1.067463106113844,
"grad_norm": 0.20245135295233802,
"learning_rate": 4.471137140244456e-06,
"loss": 0.1129,
"step": 1519
},
{
"epoch": 1.06816584680253,
"grad_norm": 0.19745721785963175,
"learning_rate": 4.465649112535067e-06,
"loss": 0.1087,
"step": 1520
},
{
"epoch": 1.0688685874912158,
"grad_norm": 0.20160238215677423,
"learning_rate": 4.460161735938794e-06,
"loss": 0.1108,
"step": 1521
},
{
"epoch": 1.0695713281799015,
"grad_norm": 0.19825037041663826,
"learning_rate": 4.4546750171420764e-06,
"loss": 0.106,
"step": 1522
},
{
"epoch": 1.0702740688685874,
"grad_norm": 0.19297871235254654,
"learning_rate": 4.449188962830544e-06,
"loss": 0.0966,
"step": 1523
},
{
"epoch": 1.0709768095572734,
"grad_norm": 0.2046795814180762,
"learning_rate": 4.443703579689025e-06,
"loss": 0.1145,
"step": 1524
},
{
"epoch": 1.0716795502459593,
"grad_norm": 0.20313049773683697,
"learning_rate": 4.438218874401522e-06,
"loss": 0.1171,
"step": 1525
},
{
"epoch": 1.0723822909346452,
"grad_norm": 0.20609356337547366,
"learning_rate": 4.432734853651222e-06,
"loss": 0.1156,
"step": 1526
},
{
"epoch": 1.073085031623331,
"grad_norm": 0.2127505686714447,
"learning_rate": 4.4272515241204674e-06,
"loss": 0.1256,
"step": 1527
},
{
"epoch": 1.0737877723120168,
"grad_norm": 0.2009238483526129,
"learning_rate": 4.421768892490762e-06,
"loss": 0.1057,
"step": 1528
},
{
"epoch": 1.0744905130007028,
"grad_norm": 0.21574510191302398,
"learning_rate": 4.416286965442761e-06,
"loss": 0.1229,
"step": 1529
},
{
"epoch": 1.0751932536893887,
"grad_norm": 0.2095063057083897,
"learning_rate": 4.41080574965626e-06,
"loss": 0.1217,
"step": 1530
},
{
"epoch": 1.0758959943780746,
"grad_norm": 0.2127454677446772,
"learning_rate": 4.4053252518101855e-06,
"loss": 0.118,
"step": 1531
},
{
"epoch": 1.0765987350667603,
"grad_norm": 0.20140159845916322,
"learning_rate": 4.399845478582598e-06,
"loss": 0.1137,
"step": 1532
},
{
"epoch": 1.0773014757554462,
"grad_norm": 0.19182879580235387,
"learning_rate": 4.394366436650661e-06,
"loss": 0.1006,
"step": 1533
},
{
"epoch": 1.0780042164441321,
"grad_norm": 0.20406632935152563,
"learning_rate": 4.388888132690657e-06,
"loss": 0.0908,
"step": 1534
},
{
"epoch": 1.078706957132818,
"grad_norm": 0.20209372125490144,
"learning_rate": 4.383410573377966e-06,
"loss": 0.1063,
"step": 1535
},
{
"epoch": 1.0794096978215038,
"grad_norm": 0.20993132422340827,
"learning_rate": 4.3779337653870666e-06,
"loss": 0.1177,
"step": 1536
},
{
"epoch": 1.0801124385101897,
"grad_norm": 0.2157446256584516,
"learning_rate": 4.372457715391508e-06,
"loss": 0.1252,
"step": 1537
},
{
"epoch": 1.0808151791988756,
"grad_norm": 0.21628237205208456,
"learning_rate": 4.3669824300639305e-06,
"loss": 0.1398,
"step": 1538
},
{
"epoch": 1.0815179198875615,
"grad_norm": 0.1986447009558914,
"learning_rate": 4.361507916076032e-06,
"loss": 0.1077,
"step": 1539
},
{
"epoch": 1.0822206605762474,
"grad_norm": 0.19573739105453836,
"learning_rate": 4.35603418009858e-06,
"loss": 0.0936,
"step": 1540
},
{
"epoch": 1.0829234012649331,
"grad_norm": 0.19572389292085354,
"learning_rate": 4.350561228801386e-06,
"loss": 0.1023,
"step": 1541
},
{
"epoch": 1.083626141953619,
"grad_norm": 0.19728622211333122,
"learning_rate": 4.345089068853309e-06,
"loss": 0.1031,
"step": 1542
},
{
"epoch": 1.084328882642305,
"grad_norm": 0.20630759482504232,
"learning_rate": 4.339617706922242e-06,
"loss": 0.1147,
"step": 1543
},
{
"epoch": 1.085031623330991,
"grad_norm": 0.21096633008461227,
"learning_rate": 4.3341471496751085e-06,
"loss": 0.1073,
"step": 1544
},
{
"epoch": 1.0857343640196768,
"grad_norm": 0.20218981444714496,
"learning_rate": 4.328677403777848e-06,
"loss": 0.1049,
"step": 1545
},
{
"epoch": 1.0864371047083625,
"grad_norm": 0.21763398652797736,
"learning_rate": 4.323208475895416e-06,
"loss": 0.1415,
"step": 1546
},
{
"epoch": 1.0871398453970484,
"grad_norm": 0.20397390976640065,
"learning_rate": 4.317740372691765e-06,
"loss": 0.1093,
"step": 1547
},
{
"epoch": 1.0878425860857344,
"grad_norm": 0.19943581163243598,
"learning_rate": 4.312273100829845e-06,
"loss": 0.0974,
"step": 1548
},
{
"epoch": 1.0885453267744203,
"grad_norm": 0.2150853827756242,
"learning_rate": 4.306806666971597e-06,
"loss": 0.1243,
"step": 1549
},
{
"epoch": 1.0892480674631062,
"grad_norm": 0.21562006161951364,
"learning_rate": 4.3013410777779375e-06,
"loss": 0.1218,
"step": 1550
},
{
"epoch": 1.089950808151792,
"grad_norm": 0.186713531358137,
"learning_rate": 4.295876339908755e-06,
"loss": 0.0851,
"step": 1551
},
{
"epoch": 1.0906535488404778,
"grad_norm": 0.20748980558823718,
"learning_rate": 4.290412460022896e-06,
"loss": 0.1034,
"step": 1552
},
{
"epoch": 1.0913562895291637,
"grad_norm": 0.2081548336859854,
"learning_rate": 4.284949444778166e-06,
"loss": 0.1164,
"step": 1553
},
{
"epoch": 1.0920590302178497,
"grad_norm": 0.2036173131211529,
"learning_rate": 4.279487300831318e-06,
"loss": 0.1022,
"step": 1554
},
{
"epoch": 1.0927617709065356,
"grad_norm": 0.1972168024335755,
"learning_rate": 4.274026034838043e-06,
"loss": 0.1023,
"step": 1555
},
{
"epoch": 1.0934645115952213,
"grad_norm": 0.21245394195362868,
"learning_rate": 4.2685656534529576e-06,
"loss": 0.1245,
"step": 1556
},
{
"epoch": 1.0941672522839072,
"grad_norm": 0.21557474825792342,
"learning_rate": 4.263106163329603e-06,
"loss": 0.1313,
"step": 1557
},
{
"epoch": 1.0948699929725931,
"grad_norm": 0.20216212526370522,
"learning_rate": 4.257647571120437e-06,
"loss": 0.0985,
"step": 1558
},
{
"epoch": 1.095572733661279,
"grad_norm": 0.2096173815940637,
"learning_rate": 4.25218988347682e-06,
"loss": 0.1155,
"step": 1559
},
{
"epoch": 1.096275474349965,
"grad_norm": 0.19965130284366517,
"learning_rate": 4.246733107049012e-06,
"loss": 0.103,
"step": 1560
},
{
"epoch": 1.0969782150386507,
"grad_norm": 0.20184927153140259,
"learning_rate": 4.241277248486164e-06,
"loss": 0.1017,
"step": 1561
},
{
"epoch": 1.0976809557273366,
"grad_norm": 0.19797317284043472,
"learning_rate": 4.2358223144363046e-06,
"loss": 0.1001,
"step": 1562
},
{
"epoch": 1.0983836964160225,
"grad_norm": 0.21141678503375533,
"learning_rate": 4.2303683115463355e-06,
"loss": 0.113,
"step": 1563
},
{
"epoch": 1.0990864371047084,
"grad_norm": 0.19990507420006287,
"learning_rate": 4.22491524646203e-06,
"loss": 0.0975,
"step": 1564
},
{
"epoch": 1.0997891777933941,
"grad_norm": 0.18269711313405204,
"learning_rate": 4.219463125828015e-06,
"loss": 0.0863,
"step": 1565
},
{
"epoch": 1.10049191848208,
"grad_norm": 0.20656507181926959,
"learning_rate": 4.214011956287765e-06,
"loss": 0.1117,
"step": 1566
},
{
"epoch": 1.101194659170766,
"grad_norm": 0.20089144359434452,
"learning_rate": 4.208561744483595e-06,
"loss": 0.1045,
"step": 1567
},
{
"epoch": 1.101897399859452,
"grad_norm": 0.20772284286581574,
"learning_rate": 4.2031124970566576e-06,
"loss": 0.1128,
"step": 1568
},
{
"epoch": 1.1026001405481378,
"grad_norm": 0.19554347329232408,
"learning_rate": 4.197664220646928e-06,
"loss": 0.0957,
"step": 1569
},
{
"epoch": 1.1033028812368235,
"grad_norm": 0.2019412844246058,
"learning_rate": 4.192216921893198e-06,
"loss": 0.1061,
"step": 1570
},
{
"epoch": 1.1040056219255094,
"grad_norm": 0.21963470622670456,
"learning_rate": 4.186770607433065e-06,
"loss": 0.1348,
"step": 1571
},
{
"epoch": 1.1047083626141954,
"grad_norm": 0.2177392556848463,
"learning_rate": 4.1813252839029325e-06,
"loss": 0.1255,
"step": 1572
},
{
"epoch": 1.1054111033028813,
"grad_norm": 0.20111923064407966,
"learning_rate": 4.175880957937994e-06,
"loss": 0.118,
"step": 1573
},
{
"epoch": 1.1061138439915672,
"grad_norm": 0.20937523321170448,
"learning_rate": 4.170437636172227e-06,
"loss": 0.1212,
"step": 1574
},
{
"epoch": 1.106816584680253,
"grad_norm": 0.21462536738065816,
"learning_rate": 4.164995325238388e-06,
"loss": 0.1264,
"step": 1575
},
{
"epoch": 1.1075193253689388,
"grad_norm": 0.2237302984887825,
"learning_rate": 4.159554031767996e-06,
"loss": 0.1405,
"step": 1576
},
{
"epoch": 1.1082220660576247,
"grad_norm": 0.20857226921110014,
"learning_rate": 4.1541137623913355e-06,
"loss": 0.1239,
"step": 1577
},
{
"epoch": 1.1089248067463107,
"grad_norm": 0.2005730694246431,
"learning_rate": 4.148674523737443e-06,
"loss": 0.105,
"step": 1578
},
{
"epoch": 1.1096275474349966,
"grad_norm": 0.20577912984245864,
"learning_rate": 4.143236322434096e-06,
"loss": 0.1108,
"step": 1579
},
{
"epoch": 1.1103302881236823,
"grad_norm": 0.20404024733162424,
"learning_rate": 4.137799165107811e-06,
"loss": 0.1141,
"step": 1580
},
{
"epoch": 1.1110330288123682,
"grad_norm": 0.1960576657876908,
"learning_rate": 4.132363058383828e-06,
"loss": 0.108,
"step": 1581
},
{
"epoch": 1.1117357695010541,
"grad_norm": 0.19644758705196563,
"learning_rate": 4.126928008886112e-06,
"loss": 0.1026,
"step": 1582
},
{
"epoch": 1.11243851018974,
"grad_norm": 0.2114393501487219,
"learning_rate": 4.121494023237338e-06,
"loss": 0.1237,
"step": 1583
},
{
"epoch": 1.1131412508784257,
"grad_norm": 0.1992390889642251,
"learning_rate": 4.116061108058882e-06,
"loss": 0.1015,
"step": 1584
},
{
"epoch": 1.1138439915671117,
"grad_norm": 0.20680703462395478,
"learning_rate": 4.110629269970822e-06,
"loss": 0.1149,
"step": 1585
},
{
"epoch": 1.1145467322557976,
"grad_norm": 0.20184983728296313,
"learning_rate": 4.105198515591915e-06,
"loss": 0.1091,
"step": 1586
},
{
"epoch": 1.1152494729444835,
"grad_norm": 0.2094396976212368,
"learning_rate": 4.099768851539603e-06,
"loss": 0.1154,
"step": 1587
},
{
"epoch": 1.1159522136331694,
"grad_norm": 0.19158460213507128,
"learning_rate": 4.0943402844300004e-06,
"loss": 0.0953,
"step": 1588
},
{
"epoch": 1.1166549543218554,
"grad_norm": 0.22038267427678518,
"learning_rate": 4.088912820877881e-06,
"loss": 0.1305,
"step": 1589
},
{
"epoch": 1.117357695010541,
"grad_norm": 0.19964132662386183,
"learning_rate": 4.0834864674966765e-06,
"loss": 0.109,
"step": 1590
},
{
"epoch": 1.118060435699227,
"grad_norm": 0.2043932255893905,
"learning_rate": 4.078061230898463e-06,
"loss": 0.1216,
"step": 1591
},
{
"epoch": 1.118763176387913,
"grad_norm": 0.20919509240179218,
"learning_rate": 4.072637117693962e-06,
"loss": 0.1199,
"step": 1592
},
{
"epoch": 1.1194659170765988,
"grad_norm": 0.20201797308616062,
"learning_rate": 4.067214134492519e-06,
"loss": 0.1107,
"step": 1593
},
{
"epoch": 1.1201686577652845,
"grad_norm": 0.2023530808114435,
"learning_rate": 4.061792287902107e-06,
"loss": 0.1144,
"step": 1594
},
{
"epoch": 1.1208713984539704,
"grad_norm": 0.2008018357720501,
"learning_rate": 4.056371584529311e-06,
"loss": 0.1014,
"step": 1595
},
{
"epoch": 1.1215741391426564,
"grad_norm": 0.20287658791997282,
"learning_rate": 4.050952030979326e-06,
"loss": 0.0999,
"step": 1596
},
{
"epoch": 1.1222768798313423,
"grad_norm": 0.2048488067103378,
"learning_rate": 4.0455336338559446e-06,
"loss": 0.1127,
"step": 1597
},
{
"epoch": 1.1229796205200282,
"grad_norm": 0.2042694111390459,
"learning_rate": 4.040116399761547e-06,
"loss": 0.1097,
"step": 1598
},
{
"epoch": 1.123682361208714,
"grad_norm": 0.22619352532669462,
"learning_rate": 4.034700335297107e-06,
"loss": 0.1317,
"step": 1599
},
{
"epoch": 1.1243851018973998,
"grad_norm": 0.18380516004709158,
"learning_rate": 4.029285447062159e-06,
"loss": 0.0884,
"step": 1600
},
{
"epoch": 1.1250878425860857,
"grad_norm": 0.20531044736018747,
"learning_rate": 4.02387174165481e-06,
"loss": 0.1101,
"step": 1601
},
{
"epoch": 1.1257905832747717,
"grad_norm": 0.20329220044676866,
"learning_rate": 4.018459225671732e-06,
"loss": 0.109,
"step": 1602
},
{
"epoch": 1.1264933239634574,
"grad_norm": 0.1935965892628706,
"learning_rate": 4.01304790570814e-06,
"loss": 0.0974,
"step": 1603
},
{
"epoch": 1.1271960646521433,
"grad_norm": 0.1918353193744751,
"learning_rate": 4.007637788357793e-06,
"loss": 0.0998,
"step": 1604
},
{
"epoch": 1.1278988053408292,
"grad_norm": 0.20505456043134263,
"learning_rate": 4.002228880212984e-06,
"loss": 0.1114,
"step": 1605
},
{
"epoch": 1.1286015460295151,
"grad_norm": 0.20710704204954097,
"learning_rate": 3.996821187864537e-06,
"loss": 0.1044,
"step": 1606
},
{
"epoch": 1.129304286718201,
"grad_norm": 0.19501453629578433,
"learning_rate": 3.99141471790179e-06,
"loss": 0.1026,
"step": 1607
},
{
"epoch": 1.130007027406887,
"grad_norm": 0.1892692003844952,
"learning_rate": 3.986009476912592e-06,
"loss": 0.092,
"step": 1608
},
{
"epoch": 1.1307097680955727,
"grad_norm": 0.1992192698467776,
"learning_rate": 3.980605471483299e-06,
"loss": 0.1116,
"step": 1609
},
{
"epoch": 1.1314125087842586,
"grad_norm": 0.21976147618369019,
"learning_rate": 3.975202708198754e-06,
"loss": 0.1345,
"step": 1610
},
{
"epoch": 1.1321152494729445,
"grad_norm": 0.19965848602530795,
"learning_rate": 3.969801193642293e-06,
"loss": 0.1072,
"step": 1611
},
{
"epoch": 1.1328179901616304,
"grad_norm": 0.21203522209088402,
"learning_rate": 3.964400934395726e-06,
"loss": 0.1209,
"step": 1612
},
{
"epoch": 1.1335207308503161,
"grad_norm": 0.2100287894439907,
"learning_rate": 3.959001937039337e-06,
"loss": 0.127,
"step": 1613
},
{
"epoch": 1.134223471539002,
"grad_norm": 0.20986722081403691,
"learning_rate": 3.95360420815187e-06,
"loss": 0.1213,
"step": 1614
},
{
"epoch": 1.134926212227688,
"grad_norm": 0.2041713346566192,
"learning_rate": 3.948207754310522e-06,
"loss": 0.1105,
"step": 1615
},
{
"epoch": 1.135628952916374,
"grad_norm": 0.20606768311237736,
"learning_rate": 3.94281258209094e-06,
"loss": 0.1126,
"step": 1616
},
{
"epoch": 1.1363316936050598,
"grad_norm": 0.20431152137437644,
"learning_rate": 3.937418698067209e-06,
"loss": 0.1139,
"step": 1617
},
{
"epoch": 1.1370344342937457,
"grad_norm": 0.20905192183608534,
"learning_rate": 3.932026108811841e-06,
"loss": 0.1222,
"step": 1618
},
{
"epoch": 1.1377371749824314,
"grad_norm": 0.21274181378486928,
"learning_rate": 3.9266348208957716e-06,
"loss": 0.1254,
"step": 1619
},
{
"epoch": 1.1384399156711174,
"grad_norm": 0.21292332332120562,
"learning_rate": 3.921244840888353e-06,
"loss": 0.1149,
"step": 1620
},
{
"epoch": 1.1391426563598033,
"grad_norm": 0.1962246417970121,
"learning_rate": 3.915856175357341e-06,
"loss": 0.0964,
"step": 1621
},
{
"epoch": 1.1398453970484892,
"grad_norm": 0.2092997007191885,
"learning_rate": 3.910468830868891e-06,
"loss": 0.1206,
"step": 1622
},
{
"epoch": 1.140548137737175,
"grad_norm": 0.19643397861487025,
"learning_rate": 3.90508281398755e-06,
"loss": 0.1086,
"step": 1623
},
{
"epoch": 1.1412508784258608,
"grad_norm": 0.2058321168128017,
"learning_rate": 3.899698131276243e-06,
"loss": 0.1129,
"step": 1624
},
{
"epoch": 1.1419536191145467,
"grad_norm": 0.20353741842752004,
"learning_rate": 3.894314789296274e-06,
"loss": 0.1022,
"step": 1625
},
{
"epoch": 1.1426563598032327,
"grad_norm": 0.20952700044920755,
"learning_rate": 3.888932794607308e-06,
"loss": 0.1212,
"step": 1626
},
{
"epoch": 1.1433591004919186,
"grad_norm": 0.2018168240532568,
"learning_rate": 3.883552153767376e-06,
"loss": 0.0931,
"step": 1627
},
{
"epoch": 1.1440618411806043,
"grad_norm": 0.2056989305829168,
"learning_rate": 3.878172873332854e-06,
"loss": 0.1098,
"step": 1628
},
{
"epoch": 1.1447645818692902,
"grad_norm": 0.21854591682796196,
"learning_rate": 3.872794959858457e-06,
"loss": 0.1384,
"step": 1629
},
{
"epoch": 1.1454673225579761,
"grad_norm": 0.19668404783765958,
"learning_rate": 3.867418419897245e-06,
"loss": 0.1006,
"step": 1630
},
{
"epoch": 1.146170063246662,
"grad_norm": 0.21405008450012455,
"learning_rate": 3.862043260000593e-06,
"loss": 0.1157,
"step": 1631
},
{
"epoch": 1.1468728039353477,
"grad_norm": 0.20212181366594892,
"learning_rate": 3.856669486718201e-06,
"loss": 0.1085,
"step": 1632
},
{
"epoch": 1.1475755446240337,
"grad_norm": 0.20533148603255513,
"learning_rate": 3.85129710659808e-06,
"loss": 0.1157,
"step": 1633
},
{
"epoch": 1.1482782853127196,
"grad_norm": 0.19851908552083888,
"learning_rate": 3.845926126186539e-06,
"loss": 0.0999,
"step": 1634
},
{
"epoch": 1.1489810260014055,
"grad_norm": 0.18961745031463031,
"learning_rate": 3.840556552028182e-06,
"loss": 0.0864,
"step": 1635
},
{
"epoch": 1.1496837666900914,
"grad_norm": 0.20457716809456541,
"learning_rate": 3.8351883906659015e-06,
"loss": 0.1037,
"step": 1636
},
{
"epoch": 1.1503865073787773,
"grad_norm": 0.19878315543011016,
"learning_rate": 3.829821648640873e-06,
"loss": 0.103,
"step": 1637
},
{
"epoch": 1.151089248067463,
"grad_norm": 0.18871436956594148,
"learning_rate": 3.824456332492531e-06,
"loss": 0.0936,
"step": 1638
},
{
"epoch": 1.151791988756149,
"grad_norm": 0.20304551095863083,
"learning_rate": 3.8190924487585825e-06,
"loss": 0.107,
"step": 1639
},
{
"epoch": 1.1524947294448349,
"grad_norm": 0.20891965183850822,
"learning_rate": 3.8137300039749837e-06,
"loss": 0.1129,
"step": 1640
},
{
"epoch": 1.1531974701335208,
"grad_norm": 0.20905900532248192,
"learning_rate": 3.808369004675942e-06,
"loss": 0.1275,
"step": 1641
},
{
"epoch": 1.1539002108222065,
"grad_norm": 0.2040192326939405,
"learning_rate": 3.803009457393901e-06,
"loss": 0.1035,
"step": 1642
},
{
"epoch": 1.1546029515108924,
"grad_norm": 0.20540455741207822,
"learning_rate": 3.7976513686595306e-06,
"loss": 0.1086,
"step": 1643
},
{
"epoch": 1.1553056921995783,
"grad_norm": 0.19903898709578496,
"learning_rate": 3.792294745001732e-06,
"loss": 0.104,
"step": 1644
},
{
"epoch": 1.1560084328882643,
"grad_norm": 0.2037957872634639,
"learning_rate": 3.786939592947616e-06,
"loss": 0.1057,
"step": 1645
},
{
"epoch": 1.1567111735769502,
"grad_norm": 0.20393092844886251,
"learning_rate": 3.781585919022499e-06,
"loss": 0.1185,
"step": 1646
},
{
"epoch": 1.157413914265636,
"grad_norm": 0.2107120906098051,
"learning_rate": 3.7762337297499026e-06,
"loss": 0.1135,
"step": 1647
},
{
"epoch": 1.1581166549543218,
"grad_norm": 0.21406211620911497,
"learning_rate": 3.770883031651531e-06,
"loss": 0.1287,
"step": 1648
},
{
"epoch": 1.1588193956430077,
"grad_norm": 0.2099171495131818,
"learning_rate": 3.765533831247278e-06,
"loss": 0.118,
"step": 1649
},
{
"epoch": 1.1595221363316937,
"grad_norm": 0.20884400966191696,
"learning_rate": 3.7601861350552073e-06,
"loss": 0.1163,
"step": 1650
},
{
"epoch": 1.1602248770203796,
"grad_norm": 0.2173515261261405,
"learning_rate": 3.7548399495915555e-06,
"loss": 0.1163,
"step": 1651
},
{
"epoch": 1.1609276177090653,
"grad_norm": 0.20669913726530742,
"learning_rate": 3.7494952813707154e-06,
"loss": 0.1106,
"step": 1652
},
{
"epoch": 1.1616303583977512,
"grad_norm": 0.196801491914636,
"learning_rate": 3.744152136905226e-06,
"loss": 0.094,
"step": 1653
},
{
"epoch": 1.1623330990864371,
"grad_norm": 0.20895514775017704,
"learning_rate": 3.7388105227057796e-06,
"loss": 0.1137,
"step": 1654
},
{
"epoch": 1.163035839775123,
"grad_norm": 0.19426368565932461,
"learning_rate": 3.733470445281197e-06,
"loss": 0.1009,
"step": 1655
},
{
"epoch": 1.163738580463809,
"grad_norm": 0.19058894418303207,
"learning_rate": 3.7281319111384274e-06,
"loss": 0.0961,
"step": 1656
},
{
"epoch": 1.1644413211524947,
"grad_norm": 0.195402846516857,
"learning_rate": 3.722794926782542e-06,
"loss": 0.1101,
"step": 1657
},
{
"epoch": 1.1651440618411806,
"grad_norm": 0.19114283425194353,
"learning_rate": 3.71745949871672e-06,
"loss": 0.0945,
"step": 1658
},
{
"epoch": 1.1658468025298665,
"grad_norm": 0.2045666086006227,
"learning_rate": 3.712125633442246e-06,
"loss": 0.1177,
"step": 1659
},
{
"epoch": 1.1665495432185524,
"grad_norm": 0.22344067552653685,
"learning_rate": 3.7067933374585003e-06,
"loss": 0.1373,
"step": 1660
},
{
"epoch": 1.1672522839072381,
"grad_norm": 0.20682816708157278,
"learning_rate": 3.7014626172629536e-06,
"loss": 0.1278,
"step": 1661
},
{
"epoch": 1.167955024595924,
"grad_norm": 0.1947601414484512,
"learning_rate": 3.696133479351151e-06,
"loss": 0.1062,
"step": 1662
},
{
"epoch": 1.16865776528461,
"grad_norm": 0.2002188300754005,
"learning_rate": 3.6908059302167134e-06,
"loss": 0.1032,
"step": 1663
},
{
"epoch": 1.1693605059732959,
"grad_norm": 0.21466012846275453,
"learning_rate": 3.6854799763513238e-06,
"loss": 0.1061,
"step": 1664
},
{
"epoch": 1.1700632466619818,
"grad_norm": 0.1923562283306017,
"learning_rate": 3.6801556242447247e-06,
"loss": 0.095,
"step": 1665
},
{
"epoch": 1.1707659873506677,
"grad_norm": 0.20303804962126984,
"learning_rate": 3.6748328803847044e-06,
"loss": 0.1146,
"step": 1666
},
{
"epoch": 1.1714687280393534,
"grad_norm": 0.2044320532324044,
"learning_rate": 3.6695117512570878e-06,
"loss": 0.1118,
"step": 1667
},
{
"epoch": 1.1721714687280393,
"grad_norm": 0.1831883147579388,
"learning_rate": 3.66419224334574e-06,
"loss": 0.0842,
"step": 1668
},
{
"epoch": 1.1728742094167253,
"grad_norm": 0.18210605114051479,
"learning_rate": 3.658874363132546e-06,
"loss": 0.0867,
"step": 1669
},
{
"epoch": 1.1735769501054112,
"grad_norm": 0.19387705042096753,
"learning_rate": 3.6535581170974055e-06,
"loss": 0.1003,
"step": 1670
},
{
"epoch": 1.1742796907940969,
"grad_norm": 0.2038323045786604,
"learning_rate": 3.648243511718235e-06,
"loss": 0.1046,
"step": 1671
},
{
"epoch": 1.1749824314827828,
"grad_norm": 0.20839110129313798,
"learning_rate": 3.6429305534709415e-06,
"loss": 0.1177,
"step": 1672
},
{
"epoch": 1.1756851721714687,
"grad_norm": 0.19207499396202854,
"learning_rate": 3.6376192488294317e-06,
"loss": 0.0971,
"step": 1673
},
{
"epoch": 1.1763879128601546,
"grad_norm": 0.1859772269097111,
"learning_rate": 3.6323096042655936e-06,
"loss": 0.0861,
"step": 1674
},
{
"epoch": 1.1770906535488406,
"grad_norm": 0.20736054367549323,
"learning_rate": 3.627001626249298e-06,
"loss": 0.1114,
"step": 1675
},
{
"epoch": 1.1777933942375263,
"grad_norm": 0.20429579706942522,
"learning_rate": 3.6216953212483796e-06,
"loss": 0.1108,
"step": 1676
},
{
"epoch": 1.1784961349262122,
"grad_norm": 0.19703078240700186,
"learning_rate": 3.6163906957286347e-06,
"loss": 0.1004,
"step": 1677
},
{
"epoch": 1.1791988756148981,
"grad_norm": 0.19096964405112193,
"learning_rate": 3.611087756153815e-06,
"loss": 0.0934,
"step": 1678
},
{
"epoch": 1.179901616303584,
"grad_norm": 0.19554474448433679,
"learning_rate": 3.605786508985619e-06,
"loss": 0.1083,
"step": 1679
},
{
"epoch": 1.1806043569922697,
"grad_norm": 0.20885429961923802,
"learning_rate": 3.6004869606836807e-06,
"loss": 0.1177,
"step": 1680
},
{
"epoch": 1.1813070976809557,
"grad_norm": 0.20865517253250215,
"learning_rate": 3.5951891177055663e-06,
"loss": 0.1151,
"step": 1681
},
{
"epoch": 1.1820098383696416,
"grad_norm": 0.19891445674084385,
"learning_rate": 3.58989298650676e-06,
"loss": 0.102,
"step": 1682
},
{
"epoch": 1.1827125790583275,
"grad_norm": 0.2029605931731061,
"learning_rate": 3.5845985735406634e-06,
"loss": 0.1164,
"step": 1683
},
{
"epoch": 1.1834153197470134,
"grad_norm": 0.21181088375449306,
"learning_rate": 3.5793058852585837e-06,
"loss": 0.121,
"step": 1684
},
{
"epoch": 1.1841180604356993,
"grad_norm": 0.20687426042689525,
"learning_rate": 3.5740149281097276e-06,
"loss": 0.1092,
"step": 1685
},
{
"epoch": 1.184820801124385,
"grad_norm": 0.20670072713698134,
"learning_rate": 3.5687257085411913e-06,
"loss": 0.1174,
"step": 1686
},
{
"epoch": 1.185523541813071,
"grad_norm": 0.2045280062381537,
"learning_rate": 3.563438232997952e-06,
"loss": 0.1156,
"step": 1687
},
{
"epoch": 1.1862262825017569,
"grad_norm": 0.20353439072227206,
"learning_rate": 3.5581525079228647e-06,
"loss": 0.1111,
"step": 1688
},
{
"epoch": 1.1869290231904428,
"grad_norm": 0.20766273649361583,
"learning_rate": 3.552868539756651e-06,
"loss": 0.1149,
"step": 1689
},
{
"epoch": 1.1876317638791285,
"grad_norm": 0.19650635308296607,
"learning_rate": 3.5475863349378907e-06,
"loss": 0.1012,
"step": 1690
},
{
"epoch": 1.1883345045678144,
"grad_norm": 0.21169904197911277,
"learning_rate": 3.5423058999030145e-06,
"loss": 0.1239,
"step": 1691
},
{
"epoch": 1.1890372452565003,
"grad_norm": 0.2054597881477478,
"learning_rate": 3.537027241086296e-06,
"loss": 0.1154,
"step": 1692
},
{
"epoch": 1.1897399859451863,
"grad_norm": 0.205083678622398,
"learning_rate": 3.531750364919849e-06,
"loss": 0.1157,
"step": 1693
},
{
"epoch": 1.1904427266338722,
"grad_norm": 0.21275294744490048,
"learning_rate": 3.526475277833609e-06,
"loss": 0.1168,
"step": 1694
},
{
"epoch": 1.1911454673225579,
"grad_norm": 0.21027295681097122,
"learning_rate": 3.521201986255338e-06,
"loss": 0.1136,
"step": 1695
},
{
"epoch": 1.1918482080112438,
"grad_norm": 0.2107695417660648,
"learning_rate": 3.5159304966106034e-06,
"loss": 0.1161,
"step": 1696
},
{
"epoch": 1.1925509486999297,
"grad_norm": 0.20407216223471178,
"learning_rate": 3.5106608153227805e-06,
"loss": 0.1152,
"step": 1697
},
{
"epoch": 1.1932536893886156,
"grad_norm": 0.2089280185263461,
"learning_rate": 3.50539294881304e-06,
"loss": 0.1144,
"step": 1698
},
{
"epoch": 1.1939564300773016,
"grad_norm": 0.21355980792663823,
"learning_rate": 3.500126903500345e-06,
"loss": 0.1228,
"step": 1699
},
{
"epoch": 1.1946591707659873,
"grad_norm": 0.20509268936469083,
"learning_rate": 3.4948626858014345e-06,
"loss": 0.0939,
"step": 1700
},
{
"epoch": 1.1953619114546732,
"grad_norm": 0.19448700854130416,
"learning_rate": 3.4896003021308213e-06,
"loss": 0.0946,
"step": 1701
},
{
"epoch": 1.196064652143359,
"grad_norm": 0.20532544239268455,
"learning_rate": 3.4843397589007842e-06,
"loss": 0.1142,
"step": 1702
},
{
"epoch": 1.196767392832045,
"grad_norm": 0.20684377085652614,
"learning_rate": 3.4790810625213627e-06,
"loss": 0.1154,
"step": 1703
},
{
"epoch": 1.197470133520731,
"grad_norm": 0.21033865172622362,
"learning_rate": 3.4738242194003403e-06,
"loss": 0.1143,
"step": 1704
},
{
"epoch": 1.1981728742094166,
"grad_norm": 0.2071766373847229,
"learning_rate": 3.4685692359432487e-06,
"loss": 0.1179,
"step": 1705
},
{
"epoch": 1.1988756148981026,
"grad_norm": 0.2167376804628501,
"learning_rate": 3.4633161185533435e-06,
"loss": 0.1257,
"step": 1706
},
{
"epoch": 1.1995783555867885,
"grad_norm": 0.20269640918875273,
"learning_rate": 3.4580648736316167e-06,
"loss": 0.1055,
"step": 1707
},
{
"epoch": 1.2002810962754744,
"grad_norm": 0.20724165542140882,
"learning_rate": 3.4528155075767746e-06,
"loss": 0.1164,
"step": 1708
},
{
"epoch": 1.2009838369641601,
"grad_norm": 0.20089015903041435,
"learning_rate": 3.447568026785233e-06,
"loss": 0.1079,
"step": 1709
},
{
"epoch": 1.201686577652846,
"grad_norm": 0.19872386057478314,
"learning_rate": 3.4423224376511143e-06,
"loss": 0.1071,
"step": 1710
},
{
"epoch": 1.202389318341532,
"grad_norm": 0.2019865857414783,
"learning_rate": 3.4370787465662304e-06,
"loss": 0.1017,
"step": 1711
},
{
"epoch": 1.2030920590302179,
"grad_norm": 0.1952217118308541,
"learning_rate": 3.431836959920083e-06,
"loss": 0.0982,
"step": 1712
},
{
"epoch": 1.2037947997189038,
"grad_norm": 0.2168579588018165,
"learning_rate": 3.4265970840998562e-06,
"loss": 0.1249,
"step": 1713
},
{
"epoch": 1.2044975404075897,
"grad_norm": 0.2024334396169925,
"learning_rate": 3.4213591254904023e-06,
"loss": 0.1141,
"step": 1714
},
{
"epoch": 1.2052002810962754,
"grad_norm": 0.19166621253337635,
"learning_rate": 3.416123090474236e-06,
"loss": 0.0998,
"step": 1715
},
{
"epoch": 1.2059030217849613,
"grad_norm": 0.19347933991783164,
"learning_rate": 3.4108889854315315e-06,
"loss": 0.0978,
"step": 1716
},
{
"epoch": 1.2066057624736473,
"grad_norm": 0.19838523403711514,
"learning_rate": 3.4056568167401106e-06,
"loss": 0.1013,
"step": 1717
},
{
"epoch": 1.2073085031623332,
"grad_norm": 0.22870939339340943,
"learning_rate": 3.4004265907754343e-06,
"loss": 0.1427,
"step": 1718
},
{
"epoch": 1.2080112438510189,
"grad_norm": 0.19834134620162563,
"learning_rate": 3.3951983139106005e-06,
"loss": 0.1056,
"step": 1719
},
{
"epoch": 1.2087139845397048,
"grad_norm": 0.21288635512079315,
"learning_rate": 3.3899719925163223e-06,
"loss": 0.1219,
"step": 1720
},
{
"epoch": 1.2094167252283907,
"grad_norm": 0.19314209177711314,
"learning_rate": 3.3847476329609415e-06,
"loss": 0.0989,
"step": 1721
},
{
"epoch": 1.2101194659170766,
"grad_norm": 0.20747242503212865,
"learning_rate": 3.379525241610402e-06,
"loss": 0.1098,
"step": 1722
},
{
"epoch": 1.2108222066057626,
"grad_norm": 0.20450630568530612,
"learning_rate": 3.3743048248282527e-06,
"loss": 0.1167,
"step": 1723
},
{
"epoch": 1.2115249472944483,
"grad_norm": 0.21134157998997932,
"learning_rate": 3.3690863889756374e-06,
"loss": 0.12,
"step": 1724
},
{
"epoch": 1.2122276879831342,
"grad_norm": 0.19092208478524983,
"learning_rate": 3.363869940411282e-06,
"loss": 0.097,
"step": 1725
},
{
"epoch": 1.21293042867182,
"grad_norm": 0.2009529321180883,
"learning_rate": 3.358655485491492e-06,
"loss": 0.1077,
"step": 1726
},
{
"epoch": 1.213633169360506,
"grad_norm": 0.20912484996842268,
"learning_rate": 3.353443030570147e-06,
"loss": 0.1216,
"step": 1727
},
{
"epoch": 1.2143359100491917,
"grad_norm": 0.20029488490283381,
"learning_rate": 3.348232581998686e-06,
"loss": 0.1056,
"step": 1728
},
{
"epoch": 1.2150386507378776,
"grad_norm": 0.2041981352853178,
"learning_rate": 3.343024146126108e-06,
"loss": 0.1151,
"step": 1729
},
{
"epoch": 1.2157413914265636,
"grad_norm": 0.2207835816035607,
"learning_rate": 3.33781772929895e-06,
"loss": 0.1286,
"step": 1730
},
{
"epoch": 1.2164441321152495,
"grad_norm": 0.22219393944091456,
"learning_rate": 3.3326133378612996e-06,
"loss": 0.1222,
"step": 1731
},
{
"epoch": 1.2171468728039354,
"grad_norm": 0.19079655025927333,
"learning_rate": 3.3274109781547685e-06,
"loss": 0.1045,
"step": 1732
},
{
"epoch": 1.2178496134926213,
"grad_norm": 0.20769085087537087,
"learning_rate": 3.322210656518499e-06,
"loss": 0.1151,
"step": 1733
},
{
"epoch": 1.218552354181307,
"grad_norm": 0.19288873562073267,
"learning_rate": 3.317012379289146e-06,
"loss": 0.092,
"step": 1734
},
{
"epoch": 1.219255094869993,
"grad_norm": 0.19355521401794687,
"learning_rate": 3.311816152800873e-06,
"loss": 0.1023,
"step": 1735
},
{
"epoch": 1.2199578355586789,
"grad_norm": 0.19556119602628833,
"learning_rate": 3.3066219833853454e-06,
"loss": 0.0996,
"step": 1736
},
{
"epoch": 1.2206605762473648,
"grad_norm": 0.20834965560293484,
"learning_rate": 3.3014298773717235e-06,
"loss": 0.1083,
"step": 1737
},
{
"epoch": 1.2213633169360505,
"grad_norm": 0.22327393832478082,
"learning_rate": 3.2962398410866535e-06,
"loss": 0.1243,
"step": 1738
},
{
"epoch": 1.2220660576247364,
"grad_norm": 0.20303283981680254,
"learning_rate": 3.2910518808542557e-06,
"loss": 0.1143,
"step": 1739
},
{
"epoch": 1.2227687983134223,
"grad_norm": 0.20240845211273598,
"learning_rate": 3.285866002996124e-06,
"loss": 0.1077,
"step": 1740
},
{
"epoch": 1.2234715390021083,
"grad_norm": 0.19670141122532492,
"learning_rate": 3.2806822138313154e-06,
"loss": 0.1089,
"step": 1741
},
{
"epoch": 1.2241742796907942,
"grad_norm": 0.2077352636226475,
"learning_rate": 3.275500519676339e-06,
"loss": 0.1151,
"step": 1742
},
{
"epoch": 1.2248770203794799,
"grad_norm": 0.2019527734351478,
"learning_rate": 3.2703209268451565e-06,
"loss": 0.1144,
"step": 1743
},
{
"epoch": 1.2255797610681658,
"grad_norm": 0.21134790894412536,
"learning_rate": 3.26514344164916e-06,
"loss": 0.1238,
"step": 1744
},
{
"epoch": 1.2262825017568517,
"grad_norm": 0.22236322541300063,
"learning_rate": 3.2599680703971824e-06,
"loss": 0.1347,
"step": 1745
},
{
"epoch": 1.2269852424455376,
"grad_norm": 0.20747770975376068,
"learning_rate": 3.2547948193954747e-06,
"loss": 0.1221,
"step": 1746
},
{
"epoch": 1.2276879831342236,
"grad_norm": 0.21970441569818594,
"learning_rate": 3.24962369494771e-06,
"loss": 0.1403,
"step": 1747
},
{
"epoch": 1.2283907238229093,
"grad_norm": 0.20254095424218116,
"learning_rate": 3.2444547033549654e-06,
"loss": 0.1014,
"step": 1748
},
{
"epoch": 1.2290934645115952,
"grad_norm": 0.20846019838019555,
"learning_rate": 3.23928785091572e-06,
"loss": 0.1159,
"step": 1749
},
{
"epoch": 1.229796205200281,
"grad_norm": 0.20624929509100082,
"learning_rate": 3.2341231439258454e-06,
"loss": 0.1167,
"step": 1750
},
{
"epoch": 1.230498945888967,
"grad_norm": 0.19372984994266518,
"learning_rate": 3.2289605886786035e-06,
"loss": 0.0889,
"step": 1751
},
{
"epoch": 1.231201686577653,
"grad_norm": 0.20933892659344214,
"learning_rate": 3.22380019146463e-06,
"loss": 0.1251,
"step": 1752
},
{
"epoch": 1.2319044272663386,
"grad_norm": 0.20688277475960937,
"learning_rate": 3.2186419585719344e-06,
"loss": 0.1208,
"step": 1753
},
{
"epoch": 1.2326071679550246,
"grad_norm": 0.1981701078030928,
"learning_rate": 3.2134858962858824e-06,
"loss": 0.1006,
"step": 1754
},
{
"epoch": 1.2333099086437105,
"grad_norm": 0.2137832665676374,
"learning_rate": 3.2083320108892026e-06,
"loss": 0.1201,
"step": 1755
},
{
"epoch": 1.2340126493323964,
"grad_norm": 0.20764984984804763,
"learning_rate": 3.203180308661965e-06,
"loss": 0.112,
"step": 1756
},
{
"epoch": 1.234715390021082,
"grad_norm": 0.19700480894712394,
"learning_rate": 3.1980307958815852e-06,
"loss": 0.0952,
"step": 1757
},
{
"epoch": 1.235418130709768,
"grad_norm": 0.2201070303017595,
"learning_rate": 3.192883478822807e-06,
"loss": 0.1263,
"step": 1758
},
{
"epoch": 1.236120871398454,
"grad_norm": 0.19966535733317384,
"learning_rate": 3.187738363757698e-06,
"loss": 0.1008,
"step": 1759
},
{
"epoch": 1.2368236120871399,
"grad_norm": 0.2040583070543025,
"learning_rate": 3.182595456955644e-06,
"loss": 0.1117,
"step": 1760
},
{
"epoch": 1.2375263527758258,
"grad_norm": 0.19546125708516837,
"learning_rate": 3.1774547646833407e-06,
"loss": 0.1001,
"step": 1761
},
{
"epoch": 1.2382290934645117,
"grad_norm": 0.19308210829973696,
"learning_rate": 3.172316293204787e-06,
"loss": 0.0943,
"step": 1762
},
{
"epoch": 1.2389318341531974,
"grad_norm": 0.21348670945345954,
"learning_rate": 3.1671800487812697e-06,
"loss": 0.1203,
"step": 1763
},
{
"epoch": 1.2396345748418833,
"grad_norm": 0.20656074911397304,
"learning_rate": 3.1620460376713668e-06,
"loss": 0.1204,
"step": 1764
},
{
"epoch": 1.2403373155305693,
"grad_norm": 0.20398521665008706,
"learning_rate": 3.156914266130935e-06,
"loss": 0.1142,
"step": 1765
},
{
"epoch": 1.2410400562192552,
"grad_norm": 0.20356507507363272,
"learning_rate": 3.1517847404131e-06,
"loss": 0.1058,
"step": 1766
},
{
"epoch": 1.2417427969079409,
"grad_norm": 0.20389638348379735,
"learning_rate": 3.1466574667682546e-06,
"loss": 0.1072,
"step": 1767
},
{
"epoch": 1.2424455375966268,
"grad_norm": 0.20526470705304986,
"learning_rate": 3.1415324514440392e-06,
"loss": 0.1103,
"step": 1768
},
{
"epoch": 1.2431482782853127,
"grad_norm": 0.19830701109267698,
"learning_rate": 3.1364097006853523e-06,
"loss": 0.1015,
"step": 1769
},
{
"epoch": 1.2438510189739986,
"grad_norm": 0.19432063349968579,
"learning_rate": 3.131289220734327e-06,
"loss": 0.0991,
"step": 1770
},
{
"epoch": 1.2445537596626846,
"grad_norm": 0.20350574461027898,
"learning_rate": 3.1261710178303316e-06,
"loss": 0.1185,
"step": 1771
},
{
"epoch": 1.2452565003513703,
"grad_norm": 0.19437680161691603,
"learning_rate": 3.1210550982099596e-06,
"loss": 0.0935,
"step": 1772
},
{
"epoch": 1.2459592410400562,
"grad_norm": 0.21452335422138652,
"learning_rate": 3.115941468107021e-06,
"loss": 0.1282,
"step": 1773
},
{
"epoch": 1.246661981728742,
"grad_norm": 0.2067435833066444,
"learning_rate": 3.110830133752536e-06,
"loss": 0.1105,
"step": 1774
},
{
"epoch": 1.247364722417428,
"grad_norm": 0.20926740641204392,
"learning_rate": 3.1057211013747295e-06,
"loss": 0.113,
"step": 1775
},
{
"epoch": 1.248067463106114,
"grad_norm": 0.20366970954446123,
"learning_rate": 3.1006143771990205e-06,
"loss": 0.1011,
"step": 1776
},
{
"epoch": 1.2487702037947996,
"grad_norm": 0.19798007201399767,
"learning_rate": 3.095509967448016e-06,
"loss": 0.0986,
"step": 1777
},
{
"epoch": 1.2494729444834856,
"grad_norm": 0.21335576094063644,
"learning_rate": 3.090407878341498e-06,
"loss": 0.123,
"step": 1778
},
{
"epoch": 1.2501756851721715,
"grad_norm": 0.20628253800179683,
"learning_rate": 3.085308116096428e-06,
"loss": 0.1025,
"step": 1779
},
{
"epoch": 1.2508784258608574,
"grad_norm": 0.21598662126133159,
"learning_rate": 3.080210686926928e-06,
"loss": 0.1186,
"step": 1780
},
{
"epoch": 1.2515811665495433,
"grad_norm": 0.20388807376466817,
"learning_rate": 3.0751155970442792e-06,
"loss": 0.1079,
"step": 1781
},
{
"epoch": 1.252283907238229,
"grad_norm": 0.20720027166201274,
"learning_rate": 3.070022852656911e-06,
"loss": 0.1087,
"step": 1782
},
{
"epoch": 1.252986647926915,
"grad_norm": 0.2029358795787075,
"learning_rate": 3.0649324599703933e-06,
"loss": 0.1027,
"step": 1783
},
{
"epoch": 1.2536893886156009,
"grad_norm": 0.2017032602390486,
"learning_rate": 3.0598444251874315e-06,
"loss": 0.1014,
"step": 1784
},
{
"epoch": 1.2543921293042868,
"grad_norm": 0.19606575107494112,
"learning_rate": 3.0547587545078615e-06,
"loss": 0.0995,
"step": 1785
},
{
"epoch": 1.2550948699929725,
"grad_norm": 0.20341518032647446,
"learning_rate": 3.0496754541286346e-06,
"loss": 0.1097,
"step": 1786
},
{
"epoch": 1.2557976106816584,
"grad_norm": 0.220031125756578,
"learning_rate": 3.044594530243813e-06,
"loss": 0.1219,
"step": 1787
},
{
"epoch": 1.2565003513703443,
"grad_norm": 0.20910531202777438,
"learning_rate": 3.0395159890445647e-06,
"loss": 0.116,
"step": 1788
},
{
"epoch": 1.2572030920590302,
"grad_norm": 0.20088859938914494,
"learning_rate": 3.0344398367191574e-06,
"loss": 0.104,
"step": 1789
},
{
"epoch": 1.2579058327477162,
"grad_norm": 0.18797074254872537,
"learning_rate": 3.029366079452943e-06,
"loss": 0.0906,
"step": 1790
},
{
"epoch": 1.258608573436402,
"grad_norm": 0.2217796124359789,
"learning_rate": 3.024294723428358e-06,
"loss": 0.133,
"step": 1791
},
{
"epoch": 1.2593113141250878,
"grad_norm": 0.20817586964189583,
"learning_rate": 3.0192257748249097e-06,
"loss": 0.104,
"step": 1792
},
{
"epoch": 1.2600140548137737,
"grad_norm": 0.20323888739451168,
"learning_rate": 3.0141592398191765e-06,
"loss": 0.112,
"step": 1793
},
{
"epoch": 1.2607167955024596,
"grad_norm": 0.2014830381369988,
"learning_rate": 3.009095124584792e-06,
"loss": 0.1042,
"step": 1794
},
{
"epoch": 1.2614195361911453,
"grad_norm": 0.23341886318977387,
"learning_rate": 3.004033435292445e-06,
"loss": 0.1432,
"step": 1795
},
{
"epoch": 1.2621222768798313,
"grad_norm": 0.19664086628435518,
"learning_rate": 2.9989741781098654e-06,
"loss": 0.101,
"step": 1796
},
{
"epoch": 1.2628250175685172,
"grad_norm": 0.21004580890602423,
"learning_rate": 2.9939173592018185e-06,
"loss": 0.1219,
"step": 1797
},
{
"epoch": 1.263527758257203,
"grad_norm": 0.19631794416850062,
"learning_rate": 2.9888629847301e-06,
"loss": 0.1021,
"step": 1798
},
{
"epoch": 1.264230498945889,
"grad_norm": 0.19528634272658907,
"learning_rate": 2.9838110608535297e-06,
"loss": 0.0971,
"step": 1799
},
{
"epoch": 1.264933239634575,
"grad_norm": 0.20182142003426387,
"learning_rate": 2.978761593727938e-06,
"loss": 0.1035,
"step": 1800
},
{
"epoch": 1.2656359803232606,
"grad_norm": 0.20229308398373397,
"learning_rate": 2.9737145895061626e-06,
"loss": 0.1067,
"step": 1801
},
{
"epoch": 1.2663387210119466,
"grad_norm": 0.1898310882591269,
"learning_rate": 2.9686700543380386e-06,
"loss": 0.0915,
"step": 1802
},
{
"epoch": 1.2670414617006325,
"grad_norm": 0.20568746801035867,
"learning_rate": 2.9636279943703956e-06,
"loss": 0.1114,
"step": 1803
},
{
"epoch": 1.2677442023893184,
"grad_norm": 0.20951658512058696,
"learning_rate": 2.9585884157470457e-06,
"loss": 0.1181,
"step": 1804
},
{
"epoch": 1.268446943078004,
"grad_norm": 0.1969209782521385,
"learning_rate": 2.953551324608775e-06,
"loss": 0.1095,
"step": 1805
},
{
"epoch": 1.26914968376669,
"grad_norm": 0.1983154238297072,
"learning_rate": 2.948516727093345e-06,
"loss": 0.1104,
"step": 1806
},
{
"epoch": 1.269852424455376,
"grad_norm": 0.2013394277132579,
"learning_rate": 2.943484629335471e-06,
"loss": 0.1055,
"step": 1807
},
{
"epoch": 1.2705551651440619,
"grad_norm": 0.21090747903480334,
"learning_rate": 2.9384550374668276e-06,
"loss": 0.1321,
"step": 1808
},
{
"epoch": 1.2712579058327478,
"grad_norm": 0.20183694702949714,
"learning_rate": 2.933427957616034e-06,
"loss": 0.1056,
"step": 1809
},
{
"epoch": 1.2719606465214337,
"grad_norm": 0.20276217414880113,
"learning_rate": 2.9284033959086494e-06,
"loss": 0.1086,
"step": 1810
},
{
"epoch": 1.2726633872101194,
"grad_norm": 0.20935565032827277,
"learning_rate": 2.923381358467162e-06,
"loss": 0.1195,
"step": 1811
},
{
"epoch": 1.2733661278988053,
"grad_norm": 0.20515912374236397,
"learning_rate": 2.918361851410987e-06,
"loss": 0.1068,
"step": 1812
},
{
"epoch": 1.2740688685874912,
"grad_norm": 0.19715122453763603,
"learning_rate": 2.9133448808564556e-06,
"loss": 0.0961,
"step": 1813
},
{
"epoch": 1.2747716092761772,
"grad_norm": 0.19901522332348257,
"learning_rate": 2.9083304529168087e-06,
"loss": 0.1095,
"step": 1814
},
{
"epoch": 1.2754743499648629,
"grad_norm": 0.22503016538006648,
"learning_rate": 2.9033185737021875e-06,
"loss": 0.1306,
"step": 1815
},
{
"epoch": 1.2761770906535488,
"grad_norm": 0.21388247138026822,
"learning_rate": 2.8983092493196286e-06,
"loss": 0.1286,
"step": 1816
},
{
"epoch": 1.2768798313422347,
"grad_norm": 0.18564813772528047,
"learning_rate": 2.8933024858730546e-06,
"loss": 0.0862,
"step": 1817
},
{
"epoch": 1.2775825720309206,
"grad_norm": 0.21727640527524716,
"learning_rate": 2.8882982894632694e-06,
"loss": 0.1245,
"step": 1818
},
{
"epoch": 1.2782853127196065,
"grad_norm": 0.20283709887777684,
"learning_rate": 2.883296666187947e-06,
"loss": 0.1075,
"step": 1819
},
{
"epoch": 1.2789880534082925,
"grad_norm": 0.2151726427220625,
"learning_rate": 2.8782976221416265e-06,
"loss": 0.1196,
"step": 1820
},
{
"epoch": 1.2796907940969782,
"grad_norm": 0.2113052794474568,
"learning_rate": 2.873301163415705e-06,
"loss": 0.1125,
"step": 1821
},
{
"epoch": 1.280393534785664,
"grad_norm": 0.2050604129929308,
"learning_rate": 2.8683072960984294e-06,
"loss": 0.1111,
"step": 1822
},
{
"epoch": 1.28109627547435,
"grad_norm": 0.1974898180416581,
"learning_rate": 2.8633160262748873e-06,
"loss": 0.102,
"step": 1823
},
{
"epoch": 1.2817990161630357,
"grad_norm": 0.2231666433186142,
"learning_rate": 2.858327360027e-06,
"loss": 0.1416,
"step": 1824
},
{
"epoch": 1.2825017568517216,
"grad_norm": 0.20270107668104503,
"learning_rate": 2.8533413034335257e-06,
"loss": 0.105,
"step": 1825
},
{
"epoch": 1.2832044975404076,
"grad_norm": 0.1939507659998499,
"learning_rate": 2.8483578625700286e-06,
"loss": 0.1027,
"step": 1826
},
{
"epoch": 1.2839072382290935,
"grad_norm": 0.19707780444478387,
"learning_rate": 2.8433770435088957e-06,
"loss": 0.1059,
"step": 1827
},
{
"epoch": 1.2846099789177794,
"grad_norm": 0.1936990951814314,
"learning_rate": 2.838398852319313e-06,
"loss": 0.1018,
"step": 1828
},
{
"epoch": 1.2853127196064653,
"grad_norm": 0.20318027269436129,
"learning_rate": 2.8334232950672724e-06,
"loss": 0.1113,
"step": 1829
},
{
"epoch": 1.286015460295151,
"grad_norm": 0.21847964804499967,
"learning_rate": 2.8284503778155513e-06,
"loss": 0.1344,
"step": 1830
},
{
"epoch": 1.286718200983837,
"grad_norm": 0.19063085310277356,
"learning_rate": 2.823480106623704e-06,
"loss": 0.0928,
"step": 1831
},
{
"epoch": 1.2874209416725229,
"grad_norm": 0.19382317068744026,
"learning_rate": 2.8185124875480742e-06,
"loss": 0.0995,
"step": 1832
},
{
"epoch": 1.2881236823612088,
"grad_norm": 0.18595679228806974,
"learning_rate": 2.8135475266417626e-06,
"loss": 0.0913,
"step": 1833
},
{
"epoch": 1.2888264230498945,
"grad_norm": 0.1996164209513941,
"learning_rate": 2.808585229954637e-06,
"loss": 0.1016,
"step": 1834
},
{
"epoch": 1.2895291637385804,
"grad_norm": 0.20011221166641444,
"learning_rate": 2.803625603533316e-06,
"loss": 0.1089,
"step": 1835
},
{
"epoch": 1.2902319044272663,
"grad_norm": 0.20501378204057225,
"learning_rate": 2.7986686534211656e-06,
"loss": 0.0925,
"step": 1836
},
{
"epoch": 1.2909346451159522,
"grad_norm": 0.20698702391047014,
"learning_rate": 2.79371438565829e-06,
"loss": 0.1093,
"step": 1837
},
{
"epoch": 1.2916373858046382,
"grad_norm": 0.20212629623394096,
"learning_rate": 2.7887628062815252e-06,
"loss": 0.1019,
"step": 1838
},
{
"epoch": 1.292340126493324,
"grad_norm": 0.20171954124776764,
"learning_rate": 2.7838139213244318e-06,
"loss": 0.1053,
"step": 1839
},
{
"epoch": 1.2930428671820098,
"grad_norm": 0.20809679576695989,
"learning_rate": 2.7788677368172877e-06,
"loss": 0.1247,
"step": 1840
},
{
"epoch": 1.2937456078706957,
"grad_norm": 0.21307456107463,
"learning_rate": 2.7739242587870786e-06,
"loss": 0.1279,
"step": 1841
},
{
"epoch": 1.2944483485593816,
"grad_norm": 0.20450262680355996,
"learning_rate": 2.7689834932574923e-06,
"loss": 0.1123,
"step": 1842
},
{
"epoch": 1.2951510892480675,
"grad_norm": 0.21079471844092945,
"learning_rate": 2.764045446248913e-06,
"loss": 0.1206,
"step": 1843
},
{
"epoch": 1.2958538299367532,
"grad_norm": 0.2222153952258277,
"learning_rate": 2.7591101237784122e-06,
"loss": 0.1326,
"step": 1844
},
{
"epoch": 1.2965565706254392,
"grad_norm": 0.19517804489366536,
"learning_rate": 2.7541775318597407e-06,
"loss": 0.0982,
"step": 1845
},
{
"epoch": 1.297259311314125,
"grad_norm": 0.20053795050901027,
"learning_rate": 2.7492476765033227e-06,
"loss": 0.1137,
"step": 1846
},
{
"epoch": 1.297962052002811,
"grad_norm": 0.21424851108854256,
"learning_rate": 2.7443205637162463e-06,
"loss": 0.1268,
"step": 1847
},
{
"epoch": 1.298664792691497,
"grad_norm": 0.20742266286451969,
"learning_rate": 2.7393961995022565e-06,
"loss": 0.1112,
"step": 1848
},
{
"epoch": 1.2993675333801828,
"grad_norm": 0.1921458488895861,
"learning_rate": 2.7344745898617598e-06,
"loss": 0.099,
"step": 1849
},
{
"epoch": 1.3000702740688685,
"grad_norm": 0.20991007618743646,
"learning_rate": 2.7295557407917904e-06,
"loss": 0.1184,
"step": 1850
},
{
"epoch": 1.3007730147575545,
"grad_norm": 0.19739785680293268,
"learning_rate": 2.7246396582860293e-06,
"loss": 0.1074,
"step": 1851
},
{
"epoch": 1.3014757554462404,
"grad_norm": 0.20061378143577605,
"learning_rate": 2.71972634833478e-06,
"loss": 0.1054,
"step": 1852
},
{
"epoch": 1.302178496134926,
"grad_norm": 0.19114088090909145,
"learning_rate": 2.7148158169249757e-06,
"loss": 0.1039,
"step": 1853
},
{
"epoch": 1.302881236823612,
"grad_norm": 0.2081777966566255,
"learning_rate": 2.709908070040159e-06,
"loss": 0.1124,
"step": 1854
},
{
"epoch": 1.303583977512298,
"grad_norm": 0.19990415282293647,
"learning_rate": 2.705003113660477e-06,
"loss": 0.1063,
"step": 1855
},
{
"epoch": 1.3042867182009839,
"grad_norm": 0.20468258589863594,
"learning_rate": 2.7001009537626775e-06,
"loss": 0.1122,
"step": 1856
},
{
"epoch": 1.3049894588896698,
"grad_norm": 0.22006178866159676,
"learning_rate": 2.695201596320107e-06,
"loss": 0.1311,
"step": 1857
},
{
"epoch": 1.3056921995783557,
"grad_norm": 0.2029269262274828,
"learning_rate": 2.690305047302692e-06,
"loss": 0.1174,
"step": 1858
},
{
"epoch": 1.3063949402670414,
"grad_norm": 0.2008496778820487,
"learning_rate": 2.685411312676936e-06,
"loss": 0.1079,
"step": 1859
},
{
"epoch": 1.3070976809557273,
"grad_norm": 0.2008762160212735,
"learning_rate": 2.6805203984059156e-06,
"loss": 0.1104,
"step": 1860
},
{
"epoch": 1.3078004216444132,
"grad_norm": 0.20910023830605717,
"learning_rate": 2.67563231044927e-06,
"loss": 0.1168,
"step": 1861
},
{
"epoch": 1.3085031623330992,
"grad_norm": 0.19807919944378127,
"learning_rate": 2.670747054763193e-06,
"loss": 0.1011,
"step": 1862
},
{
"epoch": 1.3092059030217849,
"grad_norm": 0.2148014202188045,
"learning_rate": 2.6658646373004304e-06,
"loss": 0.1225,
"step": 1863
},
{
"epoch": 1.3099086437104708,
"grad_norm": 0.20107482404962182,
"learning_rate": 2.6609850640102665e-06,
"loss": 0.0999,
"step": 1864
},
{
"epoch": 1.3106113843991567,
"grad_norm": 0.2222740225722789,
"learning_rate": 2.6561083408385224e-06,
"loss": 0.13,
"step": 1865
},
{
"epoch": 1.3113141250878426,
"grad_norm": 0.21040652131180118,
"learning_rate": 2.6512344737275443e-06,
"loss": 0.1105,
"step": 1866
},
{
"epoch": 1.3120168657765285,
"grad_norm": 0.21004588769914104,
"learning_rate": 2.6463634686161998e-06,
"loss": 0.1108,
"step": 1867
},
{
"epoch": 1.3127196064652145,
"grad_norm": 0.21815151919528478,
"learning_rate": 2.6414953314398673e-06,
"loss": 0.1272,
"step": 1868
},
{
"epoch": 1.3134223471539002,
"grad_norm": 0.21013234762732644,
"learning_rate": 2.6366300681304334e-06,
"loss": 0.1094,
"step": 1869
},
{
"epoch": 1.314125087842586,
"grad_norm": 0.21291880739779054,
"learning_rate": 2.63176768461628e-06,
"loss": 0.1318,
"step": 1870
},
{
"epoch": 1.314827828531272,
"grad_norm": 0.19924885610924828,
"learning_rate": 2.6269081868222814e-06,
"loss": 0.1013,
"step": 1871
},
{
"epoch": 1.3155305692199577,
"grad_norm": 0.1906840060147861,
"learning_rate": 2.6220515806697934e-06,
"loss": 0.0926,
"step": 1872
},
{
"epoch": 1.3162333099086436,
"grad_norm": 0.20164657868390015,
"learning_rate": 2.6171978720766557e-06,
"loss": 0.102,
"step": 1873
},
{
"epoch": 1.3169360505973295,
"grad_norm": 0.22596416763259972,
"learning_rate": 2.6123470669571665e-06,
"loss": 0.1364,
"step": 1874
},
{
"epoch": 1.3176387912860155,
"grad_norm": 0.21287580820476498,
"learning_rate": 2.607499171222093e-06,
"loss": 0.1164,
"step": 1875
},
{
"epoch": 1.3183415319747014,
"grad_norm": 0.2075397541951565,
"learning_rate": 2.602654190778654e-06,
"loss": 0.1154,
"step": 1876
},
{
"epoch": 1.3190442726633873,
"grad_norm": 0.19585759266950592,
"learning_rate": 2.5978121315305217e-06,
"loss": 0.0973,
"step": 1877
},
{
"epoch": 1.319747013352073,
"grad_norm": 0.20705440563880406,
"learning_rate": 2.5929729993778046e-06,
"loss": 0.1111,
"step": 1878
},
{
"epoch": 1.320449754040759,
"grad_norm": 0.20390632453383642,
"learning_rate": 2.5881368002170403e-06,
"loss": 0.1159,
"step": 1879
},
{
"epoch": 1.3211524947294448,
"grad_norm": 0.2062234048828981,
"learning_rate": 2.5833035399411977e-06,
"loss": 0.1134,
"step": 1880
},
{
"epoch": 1.3218552354181308,
"grad_norm": 0.21894675321858886,
"learning_rate": 2.5784732244396667e-06,
"loss": 0.1282,
"step": 1881
},
{
"epoch": 1.3225579761068165,
"grad_norm": 0.21635101767419024,
"learning_rate": 2.573645859598245e-06,
"loss": 0.1346,
"step": 1882
},
{
"epoch": 1.3232607167955024,
"grad_norm": 0.2116064229074609,
"learning_rate": 2.568821451299135e-06,
"loss": 0.1318,
"step": 1883
},
{
"epoch": 1.3239634574841883,
"grad_norm": 0.191925509411417,
"learning_rate": 2.564000005420938e-06,
"loss": 0.0944,
"step": 1884
},
{
"epoch": 1.3246661981728742,
"grad_norm": 0.2060492658065952,
"learning_rate": 2.5591815278386456e-06,
"loss": 0.1122,
"step": 1885
},
{
"epoch": 1.3253689388615602,
"grad_norm": 0.20004025676345918,
"learning_rate": 2.554366024423631e-06,
"loss": 0.1077,
"step": 1886
},
{
"epoch": 1.326071679550246,
"grad_norm": 0.2076668153505361,
"learning_rate": 2.5495535010436445e-06,
"loss": 0.1176,
"step": 1887
},
{
"epoch": 1.3267744202389318,
"grad_norm": 0.20257144102904875,
"learning_rate": 2.5447439635628046e-06,
"loss": 0.1032,
"step": 1888
},
{
"epoch": 1.3274771609276177,
"grad_norm": 0.1988139666093882,
"learning_rate": 2.5399374178415926e-06,
"loss": 0.1031,
"step": 1889
},
{
"epoch": 1.3281799016163036,
"grad_norm": 0.21394229892796204,
"learning_rate": 2.535133869736842e-06,
"loss": 0.1228,
"step": 1890
},
{
"epoch": 1.3288826423049895,
"grad_norm": 0.19349364831658605,
"learning_rate": 2.5303333251017378e-06,
"loss": 0.0984,
"step": 1891
},
{
"epoch": 1.3295853829936752,
"grad_norm": 0.21107969084770078,
"learning_rate": 2.5255357897857996e-06,
"loss": 0.119,
"step": 1892
},
{
"epoch": 1.3302881236823612,
"grad_norm": 0.22032518714575655,
"learning_rate": 2.5207412696348854e-06,
"loss": 0.128,
"step": 1893
},
{
"epoch": 1.330990864371047,
"grad_norm": 0.19205964333763206,
"learning_rate": 2.515949770491175e-06,
"loss": 0.1031,
"step": 1894
},
{
"epoch": 1.331693605059733,
"grad_norm": 0.20646564394355452,
"learning_rate": 2.51116129819317e-06,
"loss": 0.1182,
"step": 1895
},
{
"epoch": 1.332396345748419,
"grad_norm": 0.21982646639193332,
"learning_rate": 2.5063758585756814e-06,
"loss": 0.1288,
"step": 1896
},
{
"epoch": 1.3330990864371048,
"grad_norm": 0.21224918006784035,
"learning_rate": 2.5015934574698303e-06,
"loss": 0.1138,
"step": 1897
},
{
"epoch": 1.3338018271257905,
"grad_norm": 0.20030391941105088,
"learning_rate": 2.496814100703026e-06,
"loss": 0.1032,
"step": 1898
},
{
"epoch": 1.3345045678144765,
"grad_norm": 0.19744946793364554,
"learning_rate": 2.4920377940989763e-06,
"loss": 0.1006,
"step": 1899
},
{
"epoch": 1.3352073085031624,
"grad_norm": 0.20432085170734207,
"learning_rate": 2.4872645434776666e-06,
"loss": 0.1132,
"step": 1900
},
{
"epoch": 1.335910049191848,
"grad_norm": 0.21607254462059097,
"learning_rate": 2.4824943546553646e-06,
"loss": 0.1156,
"step": 1901
},
{
"epoch": 1.336612789880534,
"grad_norm": 0.2039200106752253,
"learning_rate": 2.4777272334446055e-06,
"loss": 0.1064,
"step": 1902
},
{
"epoch": 1.33731553056922,
"grad_norm": 0.20226173545261977,
"learning_rate": 2.472963185654181e-06,
"loss": 0.1128,
"step": 1903
},
{
"epoch": 1.3380182712579058,
"grad_norm": 0.21330987318185843,
"learning_rate": 2.4682022170891403e-06,
"loss": 0.1204,
"step": 1904
},
{
"epoch": 1.3387210119465918,
"grad_norm": 0.19676962320132224,
"learning_rate": 2.4634443335507868e-06,
"loss": 0.1093,
"step": 1905
},
{
"epoch": 1.3394237526352777,
"grad_norm": 0.20216984296388715,
"learning_rate": 2.4586895408366585e-06,
"loss": 0.11,
"step": 1906
},
{
"epoch": 1.3401264933239634,
"grad_norm": 0.2089307138353558,
"learning_rate": 2.45393784474053e-06,
"loss": 0.1138,
"step": 1907
},
{
"epoch": 1.3408292340126493,
"grad_norm": 0.2041792932362996,
"learning_rate": 2.449189251052396e-06,
"loss": 0.1014,
"step": 1908
},
{
"epoch": 1.3415319747013352,
"grad_norm": 0.19929864437821332,
"learning_rate": 2.444443765558482e-06,
"loss": 0.0966,
"step": 1909
},
{
"epoch": 1.3422347153900211,
"grad_norm": 0.1994941698544247,
"learning_rate": 2.4397013940412178e-06,
"loss": 0.1001,
"step": 1910
},
{
"epoch": 1.3429374560787068,
"grad_norm": 0.2028731873157231,
"learning_rate": 2.434962142279242e-06,
"loss": 0.1148,
"step": 1911
},
{
"epoch": 1.3436401967673928,
"grad_norm": 0.21501077566107213,
"learning_rate": 2.4302260160473906e-06,
"loss": 0.1175,
"step": 1912
},
{
"epoch": 1.3443429374560787,
"grad_norm": 0.1961470782067696,
"learning_rate": 2.4254930211166922e-06,
"loss": 0.1068,
"step": 1913
},
{
"epoch": 1.3450456781447646,
"grad_norm": 0.20422366061670116,
"learning_rate": 2.420763163254359e-06,
"loss": 0.1134,
"step": 1914
},
{
"epoch": 1.3457484188334505,
"grad_norm": 0.19712020651802423,
"learning_rate": 2.4160364482237797e-06,
"loss": 0.0965,
"step": 1915
},
{
"epoch": 1.3464511595221365,
"grad_norm": 0.19561609360272306,
"learning_rate": 2.4113128817845165e-06,
"loss": 0.0973,
"step": 1916
},
{
"epoch": 1.3471539002108222,
"grad_norm": 0.21321322813402874,
"learning_rate": 2.406592469692292e-06,
"loss": 0.1214,
"step": 1917
},
{
"epoch": 1.347856640899508,
"grad_norm": 0.2203467970794102,
"learning_rate": 2.4018752176989864e-06,
"loss": 0.1409,
"step": 1918
},
{
"epoch": 1.348559381588194,
"grad_norm": 0.22079685438635657,
"learning_rate": 2.3971611315526295e-06,
"loss": 0.1276,
"step": 1919
},
{
"epoch": 1.3492621222768797,
"grad_norm": 0.19234087846553494,
"learning_rate": 2.392450216997391e-06,
"loss": 0.0897,
"step": 1920
},
{
"epoch": 1.3499648629655656,
"grad_norm": 0.19535466052009293,
"learning_rate": 2.3877424797735834e-06,
"loss": 0.0925,
"step": 1921
},
{
"epoch": 1.3506676036542515,
"grad_norm": 0.21468048507970636,
"learning_rate": 2.383037925617637e-06,
"loss": 0.122,
"step": 1922
},
{
"epoch": 1.3513703443429375,
"grad_norm": 0.20719124344349177,
"learning_rate": 2.3783365602621116e-06,
"loss": 0.1163,
"step": 1923
},
{
"epoch": 1.3520730850316234,
"grad_norm": 0.19894778483539985,
"learning_rate": 2.373638389435676e-06,
"loss": 0.1074,
"step": 1924
},
{
"epoch": 1.3527758257203093,
"grad_norm": 0.20263753208350302,
"learning_rate": 2.368943418863112e-06,
"loss": 0.1024,
"step": 1925
},
{
"epoch": 1.353478566408995,
"grad_norm": 0.2067932100976602,
"learning_rate": 2.3642516542652993e-06,
"loss": 0.1161,
"step": 1926
},
{
"epoch": 1.354181307097681,
"grad_norm": 0.20895245271729412,
"learning_rate": 2.359563101359208e-06,
"loss": 0.1172,
"step": 1927
},
{
"epoch": 1.3548840477863668,
"grad_norm": 0.18984995407877459,
"learning_rate": 2.3548777658578964e-06,
"loss": 0.0937,
"step": 1928
},
{
"epoch": 1.3555867884750528,
"grad_norm": 0.21709101466769462,
"learning_rate": 2.350195653470507e-06,
"loss": 0.1174,
"step": 1929
},
{
"epoch": 1.3562895291637385,
"grad_norm": 0.1868293778161965,
"learning_rate": 2.3455167699022497e-06,
"loss": 0.0838,
"step": 1930
},
{
"epoch": 1.3569922698524244,
"grad_norm": 0.19194725844829635,
"learning_rate": 2.3408411208544036e-06,
"loss": 0.0962,
"step": 1931
},
{
"epoch": 1.3576950105411103,
"grad_norm": 0.19721257439065992,
"learning_rate": 2.3361687120242986e-06,
"loss": 0.0983,
"step": 1932
},
{
"epoch": 1.3583977512297962,
"grad_norm": 0.21358591610494476,
"learning_rate": 2.331499549105328e-06,
"loss": 0.1205,
"step": 1933
},
{
"epoch": 1.3591004919184821,
"grad_norm": 0.20986726846090942,
"learning_rate": 2.3268336377869222e-06,
"loss": 0.1055,
"step": 1934
},
{
"epoch": 1.359803232607168,
"grad_norm": 0.20166510133123713,
"learning_rate": 2.322170983754553e-06,
"loss": 0.1059,
"step": 1935
},
{
"epoch": 1.3605059732958538,
"grad_norm": 0.21732993882599927,
"learning_rate": 2.3175115926897164e-06,
"loss": 0.1216,
"step": 1936
},
{
"epoch": 1.3612087139845397,
"grad_norm": 0.20105129964887172,
"learning_rate": 2.312855470269943e-06,
"loss": 0.1038,
"step": 1937
},
{
"epoch": 1.3619114546732256,
"grad_norm": 0.22106765730718453,
"learning_rate": 2.3082026221687736e-06,
"loss": 0.1401,
"step": 1938
},
{
"epoch": 1.3626141953619115,
"grad_norm": 0.21118139626040042,
"learning_rate": 2.3035530540557606e-06,
"loss": 0.116,
"step": 1939
},
{
"epoch": 1.3633169360505972,
"grad_norm": 0.19771600093946853,
"learning_rate": 2.2989067715964592e-06,
"loss": 0.1015,
"step": 1940
},
{
"epoch": 1.3640196767392831,
"grad_norm": 0.20418319024556572,
"learning_rate": 2.2942637804524224e-06,
"loss": 0.1096,
"step": 1941
},
{
"epoch": 1.364722417427969,
"grad_norm": 0.2087250548407141,
"learning_rate": 2.289624086281192e-06,
"loss": 0.1148,
"step": 1942
},
{
"epoch": 1.365425158116655,
"grad_norm": 0.1989947499140422,
"learning_rate": 2.2849876947362916e-06,
"loss": 0.1004,
"step": 1943
},
{
"epoch": 1.366127898805341,
"grad_norm": 0.21366001011045058,
"learning_rate": 2.28035461146722e-06,
"loss": 0.1227,
"step": 1944
},
{
"epoch": 1.3668306394940268,
"grad_norm": 0.20411438571696935,
"learning_rate": 2.275724842119451e-06,
"loss": 0.1087,
"step": 1945
},
{
"epoch": 1.3675333801827125,
"grad_norm": 0.21574289574856162,
"learning_rate": 2.2710983923344106e-06,
"loss": 0.1271,
"step": 1946
},
{
"epoch": 1.3682361208713985,
"grad_norm": 0.1864284789215416,
"learning_rate": 2.266475267749486e-06,
"loss": 0.0863,
"step": 1947
},
{
"epoch": 1.3689388615600844,
"grad_norm": 0.1936346194700634,
"learning_rate": 2.26185547399801e-06,
"loss": 0.0894,
"step": 1948
},
{
"epoch": 1.36964160224877,
"grad_norm": 0.1985819977993313,
"learning_rate": 2.2572390167092607e-06,
"loss": 0.1088,
"step": 1949
},
{
"epoch": 1.370344342937456,
"grad_norm": 0.20832796151721625,
"learning_rate": 2.252625901508449e-06,
"loss": 0.117,
"step": 1950
},
{
"epoch": 1.371047083626142,
"grad_norm": 0.20398267773883874,
"learning_rate": 2.248016134016708e-06,
"loss": 0.1079,
"step": 1951
},
{
"epoch": 1.3717498243148278,
"grad_norm": 0.2097935467710298,
"learning_rate": 2.2434097198510964e-06,
"loss": 0.1176,
"step": 1952
},
{
"epoch": 1.3724525650035138,
"grad_norm": 0.21230663646716944,
"learning_rate": 2.2388066646245895e-06,
"loss": 0.1198,
"step": 1953
},
{
"epoch": 1.3731553056921997,
"grad_norm": 0.21511735963798928,
"learning_rate": 2.2342069739460654e-06,
"loss": 0.1087,
"step": 1954
},
{
"epoch": 1.3738580463808854,
"grad_norm": 0.19887602260978487,
"learning_rate": 2.229610653420306e-06,
"loss": 0.1029,
"step": 1955
},
{
"epoch": 1.3745607870695713,
"grad_norm": 0.20312353764937316,
"learning_rate": 2.2250177086479774e-06,
"loss": 0.1166,
"step": 1956
},
{
"epoch": 1.3752635277582572,
"grad_norm": 0.2036734011575826,
"learning_rate": 2.220428145225646e-06,
"loss": 0.1074,
"step": 1957
},
{
"epoch": 1.3759662684469431,
"grad_norm": 0.19117003408267308,
"learning_rate": 2.2158419687457484e-06,
"loss": 0.0952,
"step": 1958
},
{
"epoch": 1.3766690091356288,
"grad_norm": 0.2114107270053678,
"learning_rate": 2.2112591847965977e-06,
"loss": 0.1193,
"step": 1959
},
{
"epoch": 1.3773717498243148,
"grad_norm": 0.20738678520252515,
"learning_rate": 2.206679798962372e-06,
"loss": 0.1148,
"step": 1960
},
{
"epoch": 1.3780744905130007,
"grad_norm": 0.20963346017953932,
"learning_rate": 2.202103816823109e-06,
"loss": 0.1186,
"step": 1961
},
{
"epoch": 1.3787772312016866,
"grad_norm": 0.2036954826363129,
"learning_rate": 2.1975312439547e-06,
"loss": 0.1135,
"step": 1962
},
{
"epoch": 1.3794799718903725,
"grad_norm": 0.19726342666812274,
"learning_rate": 2.1929620859288796e-06,
"loss": 0.1043,
"step": 1963
},
{
"epoch": 1.3801827125790584,
"grad_norm": 0.21870069311530438,
"learning_rate": 2.1883963483132243e-06,
"loss": 0.1207,
"step": 1964
},
{
"epoch": 1.3808854532677441,
"grad_norm": 0.19122393765007917,
"learning_rate": 2.1838340366711406e-06,
"loss": 0.0923,
"step": 1965
},
{
"epoch": 1.38158819395643,
"grad_norm": 0.19628119558214588,
"learning_rate": 2.1792751565618625e-06,
"loss": 0.0985,
"step": 1966
},
{
"epoch": 1.382290934645116,
"grad_norm": 0.1889996131725444,
"learning_rate": 2.17471971354044e-06,
"loss": 0.0884,
"step": 1967
},
{
"epoch": 1.382993675333802,
"grad_norm": 0.19834704319562207,
"learning_rate": 2.170167713157736e-06,
"loss": 0.0978,
"step": 1968
},
{
"epoch": 1.3836964160224876,
"grad_norm": 0.21825760245866319,
"learning_rate": 2.165619160960423e-06,
"loss": 0.1183,
"step": 1969
},
{
"epoch": 1.3843991567111735,
"grad_norm": 0.20029547259241942,
"learning_rate": 2.161074062490962e-06,
"loss": 0.1015,
"step": 1970
},
{
"epoch": 1.3851018973998594,
"grad_norm": 0.22837726916295475,
"learning_rate": 2.1565324232876143e-06,
"loss": 0.1303,
"step": 1971
},
{
"epoch": 1.3858046380885454,
"grad_norm": 0.20028498138434037,
"learning_rate": 2.1519942488844208e-06,
"loss": 0.1039,
"step": 1972
},
{
"epoch": 1.3865073787772313,
"grad_norm": 0.20446712276521511,
"learning_rate": 2.1474595448112064e-06,
"loss": 0.109,
"step": 1973
},
{
"epoch": 1.3872101194659172,
"grad_norm": 0.2108577036700221,
"learning_rate": 2.142928316593563e-06,
"loss": 0.1183,
"step": 1974
},
{
"epoch": 1.387912860154603,
"grad_norm": 0.20507904072792962,
"learning_rate": 2.1384005697528454e-06,
"loss": 0.0944,
"step": 1975
},
{
"epoch": 1.3886156008432888,
"grad_norm": 0.19602615398966677,
"learning_rate": 2.133876309806168e-06,
"loss": 0.1022,
"step": 1976
},
{
"epoch": 1.3893183415319748,
"grad_norm": 0.21509372229065307,
"learning_rate": 2.1293555422664e-06,
"loss": 0.1295,
"step": 1977
},
{
"epoch": 1.3900210822206605,
"grad_norm": 0.20725222813695857,
"learning_rate": 2.1248382726421525e-06,
"loss": 0.1171,
"step": 1978
},
{
"epoch": 1.3907238229093464,
"grad_norm": 0.2098088961017894,
"learning_rate": 2.1203245064377737e-06,
"loss": 0.12,
"step": 1979
},
{
"epoch": 1.3914265635980323,
"grad_norm": 0.1985033207469377,
"learning_rate": 2.1158142491533384e-06,
"loss": 0.0972,
"step": 1980
},
{
"epoch": 1.3921293042867182,
"grad_norm": 0.20666051671474164,
"learning_rate": 2.111307506284656e-06,
"loss": 0.1136,
"step": 1981
},
{
"epoch": 1.3928320449754041,
"grad_norm": 0.2088697093945666,
"learning_rate": 2.106804283323246e-06,
"loss": 0.1143,
"step": 1982
},
{
"epoch": 1.39353478566409,
"grad_norm": 0.2139556126114619,
"learning_rate": 2.1023045857563417e-06,
"loss": 0.1158,
"step": 1983
},
{
"epoch": 1.3942375263527758,
"grad_norm": 0.1956627158088341,
"learning_rate": 2.0978084190668785e-06,
"loss": 0.0996,
"step": 1984
},
{
"epoch": 1.3949402670414617,
"grad_norm": 0.2000029162790928,
"learning_rate": 2.093315788733492e-06,
"loss": 0.1007,
"step": 1985
},
{
"epoch": 1.3956430077301476,
"grad_norm": 0.2076892456710741,
"learning_rate": 2.088826700230506e-06,
"loss": 0.1115,
"step": 1986
},
{
"epoch": 1.3963457484188335,
"grad_norm": 0.19560387382367483,
"learning_rate": 2.084341159027932e-06,
"loss": 0.1025,
"step": 1987
},
{
"epoch": 1.3970484891075192,
"grad_norm": 0.2116692580182671,
"learning_rate": 2.079859170591455e-06,
"loss": 0.1214,
"step": 1988
},
{
"epoch": 1.3977512297962051,
"grad_norm": 0.21037810808939886,
"learning_rate": 2.0753807403824346e-06,
"loss": 0.1135,
"step": 1989
},
{
"epoch": 1.398453970484891,
"grad_norm": 0.19802904107743322,
"learning_rate": 2.0709058738578915e-06,
"loss": 0.1061,
"step": 1990
},
{
"epoch": 1.399156711173577,
"grad_norm": 0.20095641742655926,
"learning_rate": 2.0664345764705064e-06,
"loss": 0.1052,
"step": 1991
},
{
"epoch": 1.399859451862263,
"grad_norm": 0.20603632348580908,
"learning_rate": 2.0619668536686095e-06,
"loss": 0.1134,
"step": 1992
},
{
"epoch": 1.4005621925509488,
"grad_norm": 0.21000377252926367,
"learning_rate": 2.0575027108961766e-06,
"loss": 0.1231,
"step": 1993
},
{
"epoch": 1.4012649332396345,
"grad_norm": 0.20534122070711414,
"learning_rate": 2.0530421535928197e-06,
"loss": 0.119,
"step": 1994
},
{
"epoch": 1.4019676739283204,
"grad_norm": 0.2040928139422081,
"learning_rate": 2.0485851871937833e-06,
"loss": 0.1152,
"step": 1995
},
{
"epoch": 1.4026704146170064,
"grad_norm": 0.20330678742663,
"learning_rate": 2.044131817129934e-06,
"loss": 0.1131,
"step": 1996
},
{
"epoch": 1.403373155305692,
"grad_norm": 0.2030121966301544,
"learning_rate": 2.0396820488277606e-06,
"loss": 0.105,
"step": 1997
},
{
"epoch": 1.404075895994378,
"grad_norm": 0.19912211766901255,
"learning_rate": 2.0352358877093616e-06,
"loss": 0.1035,
"step": 1998
},
{
"epoch": 1.404778636683064,
"grad_norm": 0.20149362214263367,
"learning_rate": 2.030793339192434e-06,
"loss": 0.1041,
"step": 1999
},
{
"epoch": 1.4054813773717498,
"grad_norm": 0.20042191772542825,
"learning_rate": 2.0263544086902785e-06,
"loss": 0.111,
"step": 2000
},
{
"epoch": 1.4054813773717498,
"eval_loss": 0.13530589640140533,
"eval_runtime": 10.7129,
"eval_samples_per_second": 21.47,
"eval_steps_per_second": 5.414,
"step": 2000
},
{
"epoch": 1.4061841180604358,
"grad_norm": 0.20826897619115586,
"learning_rate": 2.0219191016117905e-06,
"loss": 0.1165,
"step": 2001
},
{
"epoch": 1.4068868587491217,
"grad_norm": 0.19681721496916088,
"learning_rate": 2.0174874233614433e-06,
"loss": 0.1038,
"step": 2002
},
{
"epoch": 1.4075895994378074,
"grad_norm": 0.20753839353006018,
"learning_rate": 2.013059379339294e-06,
"loss": 0.1111,
"step": 2003
},
{
"epoch": 1.4082923401264933,
"grad_norm": 0.18947075559552176,
"learning_rate": 2.008634974940962e-06,
"loss": 0.0929,
"step": 2004
},
{
"epoch": 1.4089950808151792,
"grad_norm": 0.18233956351222705,
"learning_rate": 2.004214215557645e-06,
"loss": 0.0874,
"step": 2005
},
{
"epoch": 1.4096978215038651,
"grad_norm": 0.21912854616739671,
"learning_rate": 1.9997971065760897e-06,
"loss": 0.1292,
"step": 2006
},
{
"epoch": 1.4104005621925508,
"grad_norm": 0.20730436043623252,
"learning_rate": 1.9953836533785986e-06,
"loss": 0.1202,
"step": 2007
},
{
"epoch": 1.4111033028812368,
"grad_norm": 0.18994587192796794,
"learning_rate": 1.9909738613430187e-06,
"loss": 0.092,
"step": 2008
},
{
"epoch": 1.4118060435699227,
"grad_norm": 0.21912604890020082,
"learning_rate": 1.986567735842735e-06,
"loss": 0.1229,
"step": 2009
},
{
"epoch": 1.4125087842586086,
"grad_norm": 0.2207424752002224,
"learning_rate": 1.982165282246665e-06,
"loss": 0.1318,
"step": 2010
},
{
"epoch": 1.4132115249472945,
"grad_norm": 0.2101346836217857,
"learning_rate": 1.9777665059192542e-06,
"loss": 0.1163,
"step": 2011
},
{
"epoch": 1.4139142656359804,
"grad_norm": 0.2069204021171819,
"learning_rate": 1.9733714122204646e-06,
"loss": 0.1146,
"step": 2012
},
{
"epoch": 1.4146170063246661,
"grad_norm": 0.2025510331549367,
"learning_rate": 1.9689800065057716e-06,
"loss": 0.1039,
"step": 2013
},
{
"epoch": 1.415319747013352,
"grad_norm": 0.2042429241806955,
"learning_rate": 1.9645922941261575e-06,
"loss": 0.1145,
"step": 2014
},
{
"epoch": 1.416022487702038,
"grad_norm": 0.2031990974583769,
"learning_rate": 1.960208280428103e-06,
"loss": 0.1097,
"step": 2015
},
{
"epoch": 1.416725228390724,
"grad_norm": 0.20791180942703225,
"learning_rate": 1.955827970753583e-06,
"loss": 0.1144,
"step": 2016
},
{
"epoch": 1.4174279690794096,
"grad_norm": 0.2156630066960087,
"learning_rate": 1.9514513704400593e-06,
"loss": 0.1176,
"step": 2017
},
{
"epoch": 1.4181307097680955,
"grad_norm": 0.20292697813261448,
"learning_rate": 1.947078484820472e-06,
"loss": 0.1056,
"step": 2018
},
{
"epoch": 1.4188334504567814,
"grad_norm": 0.19408064716610754,
"learning_rate": 1.9427093192232373e-06,
"loss": 0.0999,
"step": 2019
},
{
"epoch": 1.4195361911454674,
"grad_norm": 0.20426796377491982,
"learning_rate": 1.9383438789722353e-06,
"loss": 0.1157,
"step": 2020
},
{
"epoch": 1.4202389318341533,
"grad_norm": 0.20698546798786516,
"learning_rate": 1.9339821693868082e-06,
"loss": 0.1191,
"step": 2021
},
{
"epoch": 1.4209416725228392,
"grad_norm": 0.20522140429033994,
"learning_rate": 1.9296241957817575e-06,
"loss": 0.1214,
"step": 2022
},
{
"epoch": 1.421644413211525,
"grad_norm": 0.21062980787555174,
"learning_rate": 1.925269963467322e-06,
"loss": 0.1204,
"step": 2023
},
{
"epoch": 1.4223471539002108,
"grad_norm": 0.20319096900932834,
"learning_rate": 1.9209194777491887e-06,
"loss": 0.1119,
"step": 2024
},
{
"epoch": 1.4230498945888967,
"grad_norm": 0.2225703676044375,
"learning_rate": 1.916572743928479e-06,
"loss": 0.1312,
"step": 2025
},
{
"epoch": 1.4237526352775824,
"grad_norm": 0.19325524012635267,
"learning_rate": 1.912229767301741e-06,
"loss": 0.101,
"step": 2026
},
{
"epoch": 1.4244553759662684,
"grad_norm": 0.20224914766637922,
"learning_rate": 1.907890553160947e-06,
"loss": 0.1116,
"step": 2027
},
{
"epoch": 1.4251581166549543,
"grad_norm": 0.21409766171669678,
"learning_rate": 1.903555106793477e-06,
"loss": 0.1323,
"step": 2028
},
{
"epoch": 1.4258608573436402,
"grad_norm": 0.20743407672141492,
"learning_rate": 1.8992234334821313e-06,
"loss": 0.1151,
"step": 2029
},
{
"epoch": 1.4265635980323261,
"grad_norm": 0.21077154623323105,
"learning_rate": 1.894895538505105e-06,
"loss": 0.1134,
"step": 2030
},
{
"epoch": 1.427266338721012,
"grad_norm": 0.20137404269979964,
"learning_rate": 1.8905714271359909e-06,
"loss": 0.1009,
"step": 2031
},
{
"epoch": 1.4279690794096978,
"grad_norm": 0.20219797015878077,
"learning_rate": 1.886251104643772e-06,
"loss": 0.106,
"step": 2032
},
{
"epoch": 1.4286718200983837,
"grad_norm": 0.21614170295671153,
"learning_rate": 1.8819345762928148e-06,
"loss": 0.121,
"step": 2033
},
{
"epoch": 1.4293745607870696,
"grad_norm": 0.20275572756141108,
"learning_rate": 1.877621847342862e-06,
"loss": 0.1094,
"step": 2034
},
{
"epoch": 1.4300773014757555,
"grad_norm": 0.2118462576683597,
"learning_rate": 1.873312923049026e-06,
"loss": 0.1252,
"step": 2035
},
{
"epoch": 1.4307800421644412,
"grad_norm": 0.19929905771471276,
"learning_rate": 1.8690078086617847e-06,
"loss": 0.0914,
"step": 2036
},
{
"epoch": 1.4314827828531271,
"grad_norm": 0.21051068801250855,
"learning_rate": 1.864706509426973e-06,
"loss": 0.1181,
"step": 2037
},
{
"epoch": 1.432185523541813,
"grad_norm": 0.20254777973163096,
"learning_rate": 1.8604090305857757e-06,
"loss": 0.1088,
"step": 2038
},
{
"epoch": 1.432888264230499,
"grad_norm": 0.19863347215964763,
"learning_rate": 1.8561153773747253e-06,
"loss": 0.1051,
"step": 2039
},
{
"epoch": 1.433591004919185,
"grad_norm": 0.19445035120738507,
"learning_rate": 1.851825555025689e-06,
"loss": 0.0961,
"step": 2040
},
{
"epoch": 1.4342937456078708,
"grad_norm": 0.19694816790924283,
"learning_rate": 1.8475395687658699e-06,
"loss": 0.0946,
"step": 2041
},
{
"epoch": 1.4349964862965565,
"grad_norm": 0.18324489933964094,
"learning_rate": 1.843257423817793e-06,
"loss": 0.0928,
"step": 2042
},
{
"epoch": 1.4356992269852424,
"grad_norm": 0.227483860533187,
"learning_rate": 1.838979125399306e-06,
"loss": 0.1366,
"step": 2043
},
{
"epoch": 1.4364019676739284,
"grad_norm": 0.1950323325903862,
"learning_rate": 1.8347046787235677e-06,
"loss": 0.1032,
"step": 2044
},
{
"epoch": 1.437104708362614,
"grad_norm": 0.189011579660336,
"learning_rate": 1.8304340889990418e-06,
"loss": 0.0955,
"step": 2045
},
{
"epoch": 1.4378074490513,
"grad_norm": 0.2024388034514232,
"learning_rate": 1.8261673614294996e-06,
"loss": 0.1028,
"step": 2046
},
{
"epoch": 1.438510189739986,
"grad_norm": 0.2027902407985909,
"learning_rate": 1.8219045012139957e-06,
"loss": 0.1113,
"step": 2047
},
{
"epoch": 1.4392129304286718,
"grad_norm": 0.1947236224475286,
"learning_rate": 1.8176455135468796e-06,
"loss": 0.0977,
"step": 2048
},
{
"epoch": 1.4399156711173577,
"grad_norm": 0.2049564138338783,
"learning_rate": 1.8133904036177785e-06,
"loss": 0.1103,
"step": 2049
},
{
"epoch": 1.4406184118060437,
"grad_norm": 0.22763851564463972,
"learning_rate": 1.809139176611599e-06,
"loss": 0.1363,
"step": 2050
},
{
"epoch": 1.4413211524947294,
"grad_norm": 0.21736474157917995,
"learning_rate": 1.804891837708514e-06,
"loss": 0.1319,
"step": 2051
},
{
"epoch": 1.4420238931834153,
"grad_norm": 0.19347850559325697,
"learning_rate": 1.8006483920839524e-06,
"loss": 0.0979,
"step": 2052
},
{
"epoch": 1.4427266338721012,
"grad_norm": 0.2229214069489444,
"learning_rate": 1.7964088449086103e-06,
"loss": 0.1436,
"step": 2053
},
{
"epoch": 1.4434293745607871,
"grad_norm": 0.20941677595322297,
"learning_rate": 1.792173201348426e-06,
"loss": 0.1214,
"step": 2054
},
{
"epoch": 1.4441321152494728,
"grad_norm": 0.2177060761303515,
"learning_rate": 1.7879414665645834e-06,
"loss": 0.1213,
"step": 2055
},
{
"epoch": 1.4448348559381587,
"grad_norm": 0.21419547899241684,
"learning_rate": 1.7837136457135035e-06,
"loss": 0.1162,
"step": 2056
},
{
"epoch": 1.4455375966268447,
"grad_norm": 0.20489036246836206,
"learning_rate": 1.7794897439468378e-06,
"loss": 0.1101,
"step": 2057
},
{
"epoch": 1.4462403373155306,
"grad_norm": 0.2031599476589993,
"learning_rate": 1.7752697664114621e-06,
"loss": 0.1055,
"step": 2058
},
{
"epoch": 1.4469430780042165,
"grad_norm": 0.1937756593935405,
"learning_rate": 1.7710537182494714e-06,
"loss": 0.0931,
"step": 2059
},
{
"epoch": 1.4476458186929024,
"grad_norm": 0.19358280512131437,
"learning_rate": 1.7668416045981712e-06,
"loss": 0.0945,
"step": 2060
},
{
"epoch": 1.4483485593815881,
"grad_norm": 0.20791504012724032,
"learning_rate": 1.762633430590075e-06,
"loss": 0.1202,
"step": 2061
},
{
"epoch": 1.449051300070274,
"grad_norm": 0.20671342697104078,
"learning_rate": 1.7584292013528935e-06,
"loss": 0.1089,
"step": 2062
},
{
"epoch": 1.44975404075896,
"grad_norm": 0.1969402506543575,
"learning_rate": 1.754228922009532e-06,
"loss": 0.1044,
"step": 2063
},
{
"epoch": 1.450456781447646,
"grad_norm": 0.19992333533047266,
"learning_rate": 1.7500325976780824e-06,
"loss": 0.1078,
"step": 2064
},
{
"epoch": 1.4511595221363316,
"grad_norm": 0.1878259664747497,
"learning_rate": 1.7458402334718177e-06,
"loss": 0.0832,
"step": 2065
},
{
"epoch": 1.4518622628250175,
"grad_norm": 0.18937987641327556,
"learning_rate": 1.741651834499185e-06,
"loss": 0.0889,
"step": 2066
},
{
"epoch": 1.4525650035137034,
"grad_norm": 0.19497025876706472,
"learning_rate": 1.7374674058637997e-06,
"loss": 0.0998,
"step": 2067
},
{
"epoch": 1.4532677442023894,
"grad_norm": 0.20101193827579156,
"learning_rate": 1.7332869526644396e-06,
"loss": 0.1023,
"step": 2068
},
{
"epoch": 1.4539704848910753,
"grad_norm": 0.2068235856613755,
"learning_rate": 1.7291104799950364e-06,
"loss": 0.1112,
"step": 2069
},
{
"epoch": 1.4546732255797612,
"grad_norm": 0.20380945228349628,
"learning_rate": 1.7249379929446786e-06,
"loss": 0.1034,
"step": 2070
},
{
"epoch": 1.455375966268447,
"grad_norm": 0.20494430176374126,
"learning_rate": 1.7207694965975879e-06,
"loss": 0.1116,
"step": 2071
},
{
"epoch": 1.4560787069571328,
"grad_norm": 0.2102388632901854,
"learning_rate": 1.71660499603313e-06,
"loss": 0.1089,
"step": 2072
},
{
"epoch": 1.4567814476458187,
"grad_norm": 0.21115748699950196,
"learning_rate": 1.7124444963257974e-06,
"loss": 0.1087,
"step": 2073
},
{
"epoch": 1.4574841883345044,
"grad_norm": 0.20114644358444939,
"learning_rate": 1.7082880025452147e-06,
"loss": 0.1092,
"step": 2074
},
{
"epoch": 1.4581869290231904,
"grad_norm": 0.19748770458671025,
"learning_rate": 1.70413551975612e-06,
"loss": 0.1059,
"step": 2075
},
{
"epoch": 1.4588896697118763,
"grad_norm": 0.21356241862665043,
"learning_rate": 1.6999870530183615e-06,
"loss": 0.1243,
"step": 2076
},
{
"epoch": 1.4595924104005622,
"grad_norm": 0.19646838015454351,
"learning_rate": 1.6958426073868967e-06,
"loss": 0.0996,
"step": 2077
},
{
"epoch": 1.4602951510892481,
"grad_norm": 0.2226606078449577,
"learning_rate": 1.6917021879117861e-06,
"loss": 0.1347,
"step": 2078
},
{
"epoch": 1.460997891777934,
"grad_norm": 0.20839056872656725,
"learning_rate": 1.6875657996381812e-06,
"loss": 0.1209,
"step": 2079
},
{
"epoch": 1.4617006324666197,
"grad_norm": 0.19890262161889527,
"learning_rate": 1.6834334476063214e-06,
"loss": 0.1052,
"step": 2080
},
{
"epoch": 1.4624033731553057,
"grad_norm": 0.19970816536646627,
"learning_rate": 1.6793051368515283e-06,
"loss": 0.102,
"step": 2081
},
{
"epoch": 1.4631061138439916,
"grad_norm": 0.19919494012550767,
"learning_rate": 1.6751808724041996e-06,
"loss": 0.1052,
"step": 2082
},
{
"epoch": 1.4638088545326775,
"grad_norm": 0.20234045088218264,
"learning_rate": 1.6710606592898016e-06,
"loss": 0.1046,
"step": 2083
},
{
"epoch": 1.4645115952213632,
"grad_norm": 0.20572553269965432,
"learning_rate": 1.6669445025288649e-06,
"loss": 0.1181,
"step": 2084
},
{
"epoch": 1.4652143359100491,
"grad_norm": 0.19547026400291875,
"learning_rate": 1.6628324071369768e-06,
"loss": 0.0988,
"step": 2085
},
{
"epoch": 1.465917076598735,
"grad_norm": 0.2106472595643068,
"learning_rate": 1.6587243781247764e-06,
"loss": 0.1203,
"step": 2086
},
{
"epoch": 1.466619817287421,
"grad_norm": 0.2148175133664876,
"learning_rate": 1.6546204204979478e-06,
"loss": 0.1222,
"step": 2087
},
{
"epoch": 1.467322557976107,
"grad_norm": 0.19082443936118623,
"learning_rate": 1.6505205392572128e-06,
"loss": 0.0946,
"step": 2088
},
{
"epoch": 1.4680252986647928,
"grad_norm": 0.18429777590678642,
"learning_rate": 1.6464247393983273e-06,
"loss": 0.0839,
"step": 2089
},
{
"epoch": 1.4687280393534785,
"grad_norm": 0.21414972494445392,
"learning_rate": 1.642333025912074e-06,
"loss": 0.1199,
"step": 2090
},
{
"epoch": 1.4694307800421644,
"grad_norm": 0.21691985325819063,
"learning_rate": 1.6382454037842565e-06,
"loss": 0.1401,
"step": 2091
},
{
"epoch": 1.4701335207308504,
"grad_norm": 0.20066276836526453,
"learning_rate": 1.6341618779956913e-06,
"loss": 0.1014,
"step": 2092
},
{
"epoch": 1.4708362614195363,
"grad_norm": 0.2076710567440635,
"learning_rate": 1.6300824535222043e-06,
"loss": 0.1198,
"step": 2093
},
{
"epoch": 1.471539002108222,
"grad_norm": 0.21512961700284627,
"learning_rate": 1.626007135334629e-06,
"loss": 0.1252,
"step": 2094
},
{
"epoch": 1.472241742796908,
"grad_norm": 0.18721080562415257,
"learning_rate": 1.6219359283987852e-06,
"loss": 0.09,
"step": 2095
},
{
"epoch": 1.4729444834855938,
"grad_norm": 0.20886293945741455,
"learning_rate": 1.6178688376754896e-06,
"loss": 0.1168,
"step": 2096
},
{
"epoch": 1.4736472241742797,
"grad_norm": 0.20205595053556027,
"learning_rate": 1.6138058681205425e-06,
"loss": 0.1035,
"step": 2097
},
{
"epoch": 1.4743499648629657,
"grad_norm": 0.2108679767781241,
"learning_rate": 1.6097470246847236e-06,
"loss": 0.1171,
"step": 2098
},
{
"epoch": 1.4750527055516516,
"grad_norm": 0.2053279889047571,
"learning_rate": 1.6056923123137846e-06,
"loss": 0.1047,
"step": 2099
},
{
"epoch": 1.4757554462403373,
"grad_norm": 0.20172621981725175,
"learning_rate": 1.6016417359484388e-06,
"loss": 0.1085,
"step": 2100
},
{
"epoch": 1.4764581869290232,
"grad_norm": 0.20321058147246363,
"learning_rate": 1.5975953005243628e-06,
"loss": 0.1083,
"step": 2101
},
{
"epoch": 1.4771609276177091,
"grad_norm": 0.22002296435014382,
"learning_rate": 1.5935530109721915e-06,
"loss": 0.1373,
"step": 2102
},
{
"epoch": 1.4778636683063948,
"grad_norm": 0.19976545526435993,
"learning_rate": 1.5895148722175025e-06,
"loss": 0.1044,
"step": 2103
},
{
"epoch": 1.4785664089950807,
"grad_norm": 0.19701563572167735,
"learning_rate": 1.5854808891808192e-06,
"loss": 0.0972,
"step": 2104
},
{
"epoch": 1.4792691496837667,
"grad_norm": 0.20208667991029897,
"learning_rate": 1.5814510667775944e-06,
"loss": 0.1063,
"step": 2105
},
{
"epoch": 1.4799718903724526,
"grad_norm": 0.20649632978909177,
"learning_rate": 1.5774254099182217e-06,
"loss": 0.1098,
"step": 2106
},
{
"epoch": 1.4806746310611385,
"grad_norm": 0.21492811407502477,
"learning_rate": 1.5734039235080112e-06,
"loss": 0.1198,
"step": 2107
},
{
"epoch": 1.4813773717498244,
"grad_norm": 0.18436958228796746,
"learning_rate": 1.5693866124471935e-06,
"loss": 0.0845,
"step": 2108
},
{
"epoch": 1.4820801124385101,
"grad_norm": 0.1911998628446746,
"learning_rate": 1.5653734816309113e-06,
"loss": 0.0978,
"step": 2109
},
{
"epoch": 1.482782853127196,
"grad_norm": 0.20337894191977432,
"learning_rate": 1.5613645359492141e-06,
"loss": 0.1106,
"step": 2110
},
{
"epoch": 1.483485593815882,
"grad_norm": 0.2057315959766292,
"learning_rate": 1.5573597802870515e-06,
"loss": 0.105,
"step": 2111
},
{
"epoch": 1.4841883345045679,
"grad_norm": 0.21056854041744383,
"learning_rate": 1.5533592195242674e-06,
"loss": 0.1183,
"step": 2112
},
{
"epoch": 1.4848910751932536,
"grad_norm": 0.21165789341728766,
"learning_rate": 1.549362858535594e-06,
"loss": 0.108,
"step": 2113
},
{
"epoch": 1.4855938158819395,
"grad_norm": 0.2095711894174198,
"learning_rate": 1.5453707021906467e-06,
"loss": 0.1186,
"step": 2114
},
{
"epoch": 1.4862965565706254,
"grad_norm": 0.2082970520677581,
"learning_rate": 1.5413827553539162e-06,
"loss": 0.1037,
"step": 2115
},
{
"epoch": 1.4869992972593113,
"grad_norm": 0.21404896956382127,
"learning_rate": 1.5373990228847657e-06,
"loss": 0.1196,
"step": 2116
},
{
"epoch": 1.4877020379479973,
"grad_norm": 0.19006792607815243,
"learning_rate": 1.5334195096374193e-06,
"loss": 0.0933,
"step": 2117
},
{
"epoch": 1.4884047786366832,
"grad_norm": 0.22491773089278205,
"learning_rate": 1.529444220460969e-06,
"loss": 0.1257,
"step": 2118
},
{
"epoch": 1.489107519325369,
"grad_norm": 0.2136597897461031,
"learning_rate": 1.5254731601993472e-06,
"loss": 0.1215,
"step": 2119
},
{
"epoch": 1.4898102600140548,
"grad_norm": 0.20554402273098446,
"learning_rate": 1.5215063336913421e-06,
"loss": 0.1116,
"step": 2120
},
{
"epoch": 1.4905130007027407,
"grad_norm": 0.21340344773652498,
"learning_rate": 1.5175437457705787e-06,
"loss": 0.1202,
"step": 2121
},
{
"epoch": 1.4912157413914264,
"grad_norm": 0.1912761683073787,
"learning_rate": 1.5135854012655227e-06,
"loss": 0.0969,
"step": 2122
},
{
"epoch": 1.4919184820801124,
"grad_norm": 0.22084856935462074,
"learning_rate": 1.509631304999465e-06,
"loss": 0.1215,
"step": 2123
},
{
"epoch": 1.4926212227687983,
"grad_norm": 0.19661809439616135,
"learning_rate": 1.5056814617905168e-06,
"loss": 0.0956,
"step": 2124
},
{
"epoch": 1.4933239634574842,
"grad_norm": 0.20350684578288455,
"learning_rate": 1.501735876451611e-06,
"loss": 0.1034,
"step": 2125
},
{
"epoch": 1.4940267041461701,
"grad_norm": 0.20577450775131825,
"learning_rate": 1.4977945537904953e-06,
"loss": 0.1145,
"step": 2126
},
{
"epoch": 1.494729444834856,
"grad_norm": 0.18283098027703504,
"learning_rate": 1.4938574986097176e-06,
"loss": 0.0838,
"step": 2127
},
{
"epoch": 1.4954321855235417,
"grad_norm": 0.2126580030852982,
"learning_rate": 1.4899247157066303e-06,
"loss": 0.1185,
"step": 2128
},
{
"epoch": 1.4961349262122277,
"grad_norm": 0.2268441420583724,
"learning_rate": 1.485996209873372e-06,
"loss": 0.1316,
"step": 2129
},
{
"epoch": 1.4968376669009136,
"grad_norm": 0.18783170574622482,
"learning_rate": 1.4820719858968807e-06,
"loss": 0.0869,
"step": 2130
},
{
"epoch": 1.4975404075895995,
"grad_norm": 0.19097818831806937,
"learning_rate": 1.4781520485588696e-06,
"loss": 0.0962,
"step": 2131
},
{
"epoch": 1.4982431482782852,
"grad_norm": 0.2091117755178235,
"learning_rate": 1.4742364026358307e-06,
"loss": 0.1146,
"step": 2132
},
{
"epoch": 1.4989458889669711,
"grad_norm": 0.21027268457384118,
"learning_rate": 1.4703250528990265e-06,
"loss": 0.1148,
"step": 2133
},
{
"epoch": 1.499648629655657,
"grad_norm": 0.21342650738228533,
"learning_rate": 1.4664180041144843e-06,
"loss": 0.1164,
"step": 2134
},
{
"epoch": 1.500351370344343,
"grad_norm": 0.21173750693471938,
"learning_rate": 1.4625152610429922e-06,
"loss": 0.1111,
"step": 2135
},
{
"epoch": 1.5010541110330289,
"grad_norm": 0.19662324572850004,
"learning_rate": 1.4586168284400893e-06,
"loss": 0.104,
"step": 2136
},
{
"epoch": 1.5017568517217148,
"grad_norm": 0.21595970348216834,
"learning_rate": 1.4547227110560642e-06,
"loss": 0.1245,
"step": 2137
},
{
"epoch": 1.5024595924104007,
"grad_norm": 0.21143124551761613,
"learning_rate": 1.4508329136359462e-06,
"loss": 0.1188,
"step": 2138
},
{
"epoch": 1.5031623330990864,
"grad_norm": 0.19834499116854085,
"learning_rate": 1.4469474409195017e-06,
"loss": 0.1052,
"step": 2139
},
{
"epoch": 1.5038650737877723,
"grad_norm": 0.209139698577494,
"learning_rate": 1.4430662976412268e-06,
"loss": 0.1241,
"step": 2140
},
{
"epoch": 1.504567814476458,
"grad_norm": 0.21525376069223828,
"learning_rate": 1.4391894885303414e-06,
"loss": 0.1179,
"step": 2141
},
{
"epoch": 1.505270555165144,
"grad_norm": 0.21077797750680113,
"learning_rate": 1.4353170183107884e-06,
"loss": 0.1227,
"step": 2142
},
{
"epoch": 1.5059732958538299,
"grad_norm": 0.19951331334907021,
"learning_rate": 1.4314488917012164e-06,
"loss": 0.1075,
"step": 2143
},
{
"epoch": 1.5066760365425158,
"grad_norm": 0.19898253039153893,
"learning_rate": 1.4275851134149864e-06,
"loss": 0.111,
"step": 2144
},
{
"epoch": 1.5073787772312017,
"grad_norm": 0.21653486884763662,
"learning_rate": 1.4237256881601585e-06,
"loss": 0.1207,
"step": 2145
},
{
"epoch": 1.5080815179198876,
"grad_norm": 0.2150949362068544,
"learning_rate": 1.4198706206394924e-06,
"loss": 0.138,
"step": 2146
},
{
"epoch": 1.5087842586085736,
"grad_norm": 0.21109961035170485,
"learning_rate": 1.4160199155504357e-06,
"loss": 0.1073,
"step": 2147
},
{
"epoch": 1.5094869992972593,
"grad_norm": 0.1841921542561286,
"learning_rate": 1.4121735775851164e-06,
"loss": 0.0828,
"step": 2148
},
{
"epoch": 1.5101897399859452,
"grad_norm": 0.2040515179301333,
"learning_rate": 1.4083316114303448e-06,
"loss": 0.1163,
"step": 2149
},
{
"epoch": 1.510892480674631,
"grad_norm": 0.20241690892388398,
"learning_rate": 1.4044940217676061e-06,
"loss": 0.1086,
"step": 2150
},
{
"epoch": 1.5115952213633168,
"grad_norm": 0.2064344939410934,
"learning_rate": 1.4006608132730504e-06,
"loss": 0.108,
"step": 2151
},
{
"epoch": 1.5122979620520027,
"grad_norm": 0.2120700588889789,
"learning_rate": 1.3968319906174893e-06,
"loss": 0.118,
"step": 2152
},
{
"epoch": 1.5130007027406887,
"grad_norm": 0.21422239852039077,
"learning_rate": 1.3930075584663867e-06,
"loss": 0.1124,
"step": 2153
},
{
"epoch": 1.5137034434293746,
"grad_norm": 0.21078755623866574,
"learning_rate": 1.3891875214798644e-06,
"loss": 0.1149,
"step": 2154
},
{
"epoch": 1.5144061841180605,
"grad_norm": 0.1895745540304648,
"learning_rate": 1.3853718843126824e-06,
"loss": 0.0885,
"step": 2155
},
{
"epoch": 1.5151089248067464,
"grad_norm": 0.20114109509443637,
"learning_rate": 1.3815606516142422e-06,
"loss": 0.1149,
"step": 2156
},
{
"epoch": 1.5158116654954323,
"grad_norm": 0.2036147414836946,
"learning_rate": 1.3777538280285767e-06,
"loss": 0.1061,
"step": 2157
},
{
"epoch": 1.516514406184118,
"grad_norm": 0.2147409559125996,
"learning_rate": 1.3739514181943486e-06,
"loss": 0.1308,
"step": 2158
},
{
"epoch": 1.517217146872804,
"grad_norm": 0.20667125214621468,
"learning_rate": 1.3701534267448395e-06,
"loss": 0.1102,
"step": 2159
},
{
"epoch": 1.5179198875614897,
"grad_norm": 0.2026313826859528,
"learning_rate": 1.366359858307949e-06,
"loss": 0.1085,
"step": 2160
},
{
"epoch": 1.5186226282501756,
"grad_norm": 0.1977641258226134,
"learning_rate": 1.3625707175061876e-06,
"loss": 0.0978,
"step": 2161
},
{
"epoch": 1.5193253689388615,
"grad_norm": 0.21169366806002546,
"learning_rate": 1.358786008956669e-06,
"loss": 0.1194,
"step": 2162
},
{
"epoch": 1.5200281096275474,
"grad_norm": 0.20091830816997738,
"learning_rate": 1.3550057372711078e-06,
"loss": 0.1023,
"step": 2163
},
{
"epoch": 1.5207308503162333,
"grad_norm": 0.19251199833891325,
"learning_rate": 1.3512299070558104e-06,
"loss": 0.0921,
"step": 2164
},
{
"epoch": 1.5214335910049193,
"grad_norm": 0.20802550155451285,
"learning_rate": 1.347458522911672e-06,
"loss": 0.1116,
"step": 2165
},
{
"epoch": 1.5221363316936052,
"grad_norm": 0.20781627282211243,
"learning_rate": 1.343691589434174e-06,
"loss": 0.1186,
"step": 2166
},
{
"epoch": 1.5228390723822909,
"grad_norm": 0.21809071230318944,
"learning_rate": 1.3399291112133673e-06,
"loss": 0.1348,
"step": 2167
},
{
"epoch": 1.5235418130709768,
"grad_norm": 0.20349145062575685,
"learning_rate": 1.336171092833879e-06,
"loss": 0.1125,
"step": 2168
},
{
"epoch": 1.5242445537596627,
"grad_norm": 0.21233320891953184,
"learning_rate": 1.3324175388748989e-06,
"loss": 0.1287,
"step": 2169
},
{
"epoch": 1.5249472944483484,
"grad_norm": 0.1970053151712138,
"learning_rate": 1.3286684539101823e-06,
"loss": 0.1043,
"step": 2170
},
{
"epoch": 1.5256500351370343,
"grad_norm": 0.2006486400079084,
"learning_rate": 1.3249238425080346e-06,
"loss": 0.0993,
"step": 2171
},
{
"epoch": 1.5263527758257203,
"grad_norm": 0.19609135016603213,
"learning_rate": 1.3211837092313074e-06,
"loss": 0.0972,
"step": 2172
},
{
"epoch": 1.5270555165144062,
"grad_norm": 0.21719257095361527,
"learning_rate": 1.3174480586374e-06,
"loss": 0.1317,
"step": 2173
},
{
"epoch": 1.527758257203092,
"grad_norm": 0.22153099757616065,
"learning_rate": 1.3137168952782514e-06,
"loss": 0.1274,
"step": 2174
},
{
"epoch": 1.528460997891778,
"grad_norm": 0.20478508809660045,
"learning_rate": 1.309990223700328e-06,
"loss": 0.1124,
"step": 2175
},
{
"epoch": 1.529163738580464,
"grad_norm": 0.21734116749378735,
"learning_rate": 1.3062680484446267e-06,
"loss": 0.1191,
"step": 2176
},
{
"epoch": 1.5298664792691496,
"grad_norm": 0.19086165820708542,
"learning_rate": 1.3025503740466588e-06,
"loss": 0.0996,
"step": 2177
},
{
"epoch": 1.5305692199578356,
"grad_norm": 0.22556463525749282,
"learning_rate": 1.298837205036461e-06,
"loss": 0.1295,
"step": 2178
},
{
"epoch": 1.5312719606465213,
"grad_norm": 0.20465767282965405,
"learning_rate": 1.2951285459385737e-06,
"loss": 0.0952,
"step": 2179
},
{
"epoch": 1.5319747013352072,
"grad_norm": 0.20782753239758756,
"learning_rate": 1.291424401272044e-06,
"loss": 0.116,
"step": 2180
},
{
"epoch": 1.532677442023893,
"grad_norm": 0.20561987301995432,
"learning_rate": 1.2877247755504174e-06,
"loss": 0.1041,
"step": 2181
},
{
"epoch": 1.533380182712579,
"grad_norm": 0.2214136390062332,
"learning_rate": 1.2840296732817332e-06,
"loss": 0.131,
"step": 2182
},
{
"epoch": 1.534082923401265,
"grad_norm": 0.19800868890009252,
"learning_rate": 1.2803390989685189e-06,
"loss": 0.0964,
"step": 2183
},
{
"epoch": 1.5347856640899509,
"grad_norm": 0.2113460787948863,
"learning_rate": 1.276653057107784e-06,
"loss": 0.1109,
"step": 2184
},
{
"epoch": 1.5354884047786368,
"grad_norm": 0.2052746645481145,
"learning_rate": 1.2729715521910168e-06,
"loss": 0.1038,
"step": 2185
},
{
"epoch": 1.5361911454673227,
"grad_norm": 0.20345966186550987,
"learning_rate": 1.2692945887041763e-06,
"loss": 0.1054,
"step": 2186
},
{
"epoch": 1.5368938861560084,
"grad_norm": 0.2076140998528222,
"learning_rate": 1.2656221711276867e-06,
"loss": 0.1105,
"step": 2187
},
{
"epoch": 1.5375966268446943,
"grad_norm": 0.22170059939566786,
"learning_rate": 1.261954303936434e-06,
"loss": 0.1376,
"step": 2188
},
{
"epoch": 1.53829936753338,
"grad_norm": 0.18480014710173598,
"learning_rate": 1.2582909915997604e-06,
"loss": 0.0906,
"step": 2189
},
{
"epoch": 1.539002108222066,
"grad_norm": 0.2007697005783376,
"learning_rate": 1.2546322385814564e-06,
"loss": 0.1038,
"step": 2190
},
{
"epoch": 1.5397048489107519,
"grad_norm": 0.20251036193726052,
"learning_rate": 1.2509780493397573e-06,
"loss": 0.0981,
"step": 2191
},
{
"epoch": 1.5404075895994378,
"grad_norm": 0.20330238945954568,
"learning_rate": 1.2473284283273373e-06,
"loss": 0.108,
"step": 2192
},
{
"epoch": 1.5411103302881237,
"grad_norm": 0.21669546473141843,
"learning_rate": 1.243683379991304e-06,
"loss": 0.1222,
"step": 2193
},
{
"epoch": 1.5418130709768096,
"grad_norm": 0.21441695594530708,
"learning_rate": 1.2400429087731952e-06,
"loss": 0.1183,
"step": 2194
},
{
"epoch": 1.5425158116654956,
"grad_norm": 0.20530833176631885,
"learning_rate": 1.236407019108971e-06,
"loss": 0.1062,
"step": 2195
},
{
"epoch": 1.5432185523541813,
"grad_norm": 0.19919774828351675,
"learning_rate": 1.2327757154290037e-06,
"loss": 0.1017,
"step": 2196
},
{
"epoch": 1.5439212930428672,
"grad_norm": 0.20531475973602628,
"learning_rate": 1.229149002158082e-06,
"loss": 0.1178,
"step": 2197
},
{
"epoch": 1.544624033731553,
"grad_norm": 0.20334677121124856,
"learning_rate": 1.2255268837154034e-06,
"loss": 0.1007,
"step": 2198
},
{
"epoch": 1.5453267744202388,
"grad_norm": 0.20099444240910416,
"learning_rate": 1.2219093645145613e-06,
"loss": 0.1039,
"step": 2199
},
{
"epoch": 1.5460295151089247,
"grad_norm": 0.2105425132927698,
"learning_rate": 1.2182964489635502e-06,
"loss": 0.1128,
"step": 2200
},
{
"epoch": 1.5467322557976106,
"grad_norm": 0.21491312442303748,
"learning_rate": 1.2146881414647471e-06,
"loss": 0.1179,
"step": 2201
},
{
"epoch": 1.5474349964862966,
"grad_norm": 0.20244269612166116,
"learning_rate": 1.211084446414923e-06,
"loss": 0.1139,
"step": 2202
},
{
"epoch": 1.5481377371749825,
"grad_norm": 0.21390331058970313,
"learning_rate": 1.2074853682052235e-06,
"loss": 0.1106,
"step": 2203
},
{
"epoch": 1.5488404778636684,
"grad_norm": 0.20952682866546957,
"learning_rate": 1.20389091122117e-06,
"loss": 0.1119,
"step": 2204
},
{
"epoch": 1.5495432185523543,
"grad_norm": 0.19416399178836952,
"learning_rate": 1.2003010798426512e-06,
"loss": 0.0876,
"step": 2205
},
{
"epoch": 1.55024595924104,
"grad_norm": 0.20885763300102236,
"learning_rate": 1.1967158784439214e-06,
"loss": 0.1208,
"step": 2206
},
{
"epoch": 1.550948699929726,
"grad_norm": 0.21016787884405874,
"learning_rate": 1.1931353113935935e-06,
"loss": 0.1175,
"step": 2207
},
{
"epoch": 1.5516514406184116,
"grad_norm": 0.196270716771967,
"learning_rate": 1.1895593830546308e-06,
"loss": 0.0984,
"step": 2208
},
{
"epoch": 1.5523541813070976,
"grad_norm": 0.1974914568857876,
"learning_rate": 1.1859880977843469e-06,
"loss": 0.0988,
"step": 2209
},
{
"epoch": 1.5530569219957835,
"grad_norm": 0.21167241148285781,
"learning_rate": 1.1824214599343958e-06,
"loss": 0.1213,
"step": 2210
},
{
"epoch": 1.5537596626844694,
"grad_norm": 0.20553500001565458,
"learning_rate": 1.1788594738507708e-06,
"loss": 0.1186,
"step": 2211
},
{
"epoch": 1.5544624033731553,
"grad_norm": 0.19847997013748428,
"learning_rate": 1.175302143873795e-06,
"loss": 0.1102,
"step": 2212
},
{
"epoch": 1.5551651440618413,
"grad_norm": 0.20715612397273658,
"learning_rate": 1.1717494743381187e-06,
"loss": 0.117,
"step": 2213
},
{
"epoch": 1.5558678847505272,
"grad_norm": 0.20937669398547118,
"learning_rate": 1.1682014695727129e-06,
"loss": 0.1151,
"step": 2214
},
{
"epoch": 1.556570625439213,
"grad_norm": 0.19405603573589256,
"learning_rate": 1.164658133900866e-06,
"loss": 0.1023,
"step": 2215
},
{
"epoch": 1.5572733661278988,
"grad_norm": 0.2282522606956639,
"learning_rate": 1.1611194716401752e-06,
"loss": 0.1404,
"step": 2216
},
{
"epoch": 1.5579761068165847,
"grad_norm": 0.19239061243766944,
"learning_rate": 1.1575854871025445e-06,
"loss": 0.0937,
"step": 2217
},
{
"epoch": 1.5586788475052704,
"grad_norm": 0.20207148636935549,
"learning_rate": 1.154056184594175e-06,
"loss": 0.1067,
"step": 2218
},
{
"epoch": 1.5593815881939563,
"grad_norm": 0.21200273458239072,
"learning_rate": 1.1505315684155704e-06,
"loss": 0.1113,
"step": 2219
},
{
"epoch": 1.5600843288826423,
"grad_norm": 0.21814455748076184,
"learning_rate": 1.1470116428615141e-06,
"loss": 0.1255,
"step": 2220
},
{
"epoch": 1.5607870695713282,
"grad_norm": 0.18998435105291442,
"learning_rate": 1.143496412221079e-06,
"loss": 0.0965,
"step": 2221
},
{
"epoch": 1.561489810260014,
"grad_norm": 0.20299620693620404,
"learning_rate": 1.1399858807776194e-06,
"loss": 0.111,
"step": 2222
},
{
"epoch": 1.5621925509487,
"grad_norm": 0.20606096110391503,
"learning_rate": 1.1364800528087594e-06,
"loss": 0.1114,
"step": 2223
},
{
"epoch": 1.562895291637386,
"grad_norm": 0.20769433344453436,
"learning_rate": 1.132978932586395e-06,
"loss": 0.1112,
"step": 2224
},
{
"epoch": 1.5635980323260716,
"grad_norm": 0.21303902525539456,
"learning_rate": 1.1294825243766794e-06,
"loss": 0.1194,
"step": 2225
},
{
"epoch": 1.5643007730147576,
"grad_norm": 0.2063220087352883,
"learning_rate": 1.1259908324400343e-06,
"loss": 0.1065,
"step": 2226
},
{
"epoch": 1.5650035137034435,
"grad_norm": 0.21210635808550846,
"learning_rate": 1.1225038610311267e-06,
"loss": 0.1249,
"step": 2227
},
{
"epoch": 1.5657062543921292,
"grad_norm": 0.1889437432037374,
"learning_rate": 1.1190216143988746e-06,
"loss": 0.0893,
"step": 2228
},
{
"epoch": 1.566408995080815,
"grad_norm": 0.2066927578746364,
"learning_rate": 1.115544096786439e-06,
"loss": 0.1097,
"step": 2229
},
{
"epoch": 1.567111735769501,
"grad_norm": 0.20994313755789867,
"learning_rate": 1.112071312431216e-06,
"loss": 0.117,
"step": 2230
},
{
"epoch": 1.567814476458187,
"grad_norm": 0.21204340034517896,
"learning_rate": 1.1086032655648377e-06,
"loss": 0.1173,
"step": 2231
},
{
"epoch": 1.5685172171468729,
"grad_norm": 0.2031289112855245,
"learning_rate": 1.1051399604131601e-06,
"loss": 0.0978,
"step": 2232
},
{
"epoch": 1.5692199578355588,
"grad_norm": 0.2090183655685043,
"learning_rate": 1.1016814011962651e-06,
"loss": 0.1063,
"step": 2233
},
{
"epoch": 1.5699226985242447,
"grad_norm": 0.2181629778571553,
"learning_rate": 1.098227592128448e-06,
"loss": 0.1281,
"step": 2234
},
{
"epoch": 1.5706254392129304,
"grad_norm": 0.21695816446915328,
"learning_rate": 1.094778537418218e-06,
"loss": 0.1185,
"step": 2235
},
{
"epoch": 1.5713281799016163,
"grad_norm": 0.19226955339369178,
"learning_rate": 1.091334241268291e-06,
"loss": 0.0862,
"step": 2236
},
{
"epoch": 1.572030920590302,
"grad_norm": 0.20170576366935927,
"learning_rate": 1.0878947078755836e-06,
"loss": 0.1115,
"step": 2237
},
{
"epoch": 1.572733661278988,
"grad_norm": 0.20776048462835786,
"learning_rate": 1.08445994143121e-06,
"loss": 0.1117,
"step": 2238
},
{
"epoch": 1.5734364019676739,
"grad_norm": 0.214063804691396,
"learning_rate": 1.0810299461204749e-06,
"loss": 0.1196,
"step": 2239
},
{
"epoch": 1.5741391426563598,
"grad_norm": 0.20456077112030305,
"learning_rate": 1.0776047261228694e-06,
"loss": 0.1169,
"step": 2240
},
{
"epoch": 1.5748418833450457,
"grad_norm": 0.19935563250274657,
"learning_rate": 1.0741842856120665e-06,
"loss": 0.1,
"step": 2241
},
{
"epoch": 1.5755446240337316,
"grad_norm": 0.2231312470280824,
"learning_rate": 1.070768628755914e-06,
"loss": 0.1238,
"step": 2242
},
{
"epoch": 1.5762473647224176,
"grad_norm": 0.19884272779801346,
"learning_rate": 1.0673577597164352e-06,
"loss": 0.1077,
"step": 2243
},
{
"epoch": 1.5769501054111033,
"grad_norm": 0.19548842979419342,
"learning_rate": 1.0639516826498125e-06,
"loss": 0.103,
"step": 2244
},
{
"epoch": 1.5776528460997892,
"grad_norm": 0.2066547148079408,
"learning_rate": 1.0605504017063927e-06,
"loss": 0.1049,
"step": 2245
},
{
"epoch": 1.578355586788475,
"grad_norm": 0.19438085779579703,
"learning_rate": 1.0571539210306785e-06,
"loss": 0.0979,
"step": 2246
},
{
"epoch": 1.5790583274771608,
"grad_norm": 0.2119119384477096,
"learning_rate": 1.0537622447613249e-06,
"loss": 0.1227,
"step": 2247
},
{
"epoch": 1.5797610681658467,
"grad_norm": 0.20320776414866357,
"learning_rate": 1.050375377031132e-06,
"loss": 0.1025,
"step": 2248
},
{
"epoch": 1.5804638088545326,
"grad_norm": 0.21183790516372125,
"learning_rate": 1.0469933219670354e-06,
"loss": 0.1322,
"step": 2249
},
{
"epoch": 1.5811665495432186,
"grad_norm": 0.20855608996431732,
"learning_rate": 1.0436160836901138e-06,
"loss": 0.1059,
"step": 2250
},
{
"epoch": 1.5818692902319045,
"grad_norm": 0.20424747828243672,
"learning_rate": 1.0402436663155736e-06,
"loss": 0.1084,
"step": 2251
},
{
"epoch": 1.5825720309205904,
"grad_norm": 0.20495264719391587,
"learning_rate": 1.0368760739527455e-06,
"loss": 0.1134,
"step": 2252
},
{
"epoch": 1.5832747716092763,
"grad_norm": 0.1937197819521696,
"learning_rate": 1.0335133107050833e-06,
"loss": 0.1006,
"step": 2253
},
{
"epoch": 1.583977512297962,
"grad_norm": 0.20286179369001442,
"learning_rate": 1.0301553806701547e-06,
"loss": 0.1066,
"step": 2254
},
{
"epoch": 1.584680252986648,
"grad_norm": 0.21076471908395034,
"learning_rate": 1.0268022879396388e-06,
"loss": 0.1156,
"step": 2255
},
{
"epoch": 1.5853829936753336,
"grad_norm": 0.2204998831314066,
"learning_rate": 1.02345403659932e-06,
"loss": 0.1309,
"step": 2256
},
{
"epoch": 1.5860857343640196,
"grad_norm": 0.19661575951624372,
"learning_rate": 1.0201106307290842e-06,
"loss": 0.0973,
"step": 2257
},
{
"epoch": 1.5867884750527055,
"grad_norm": 0.2095469486567903,
"learning_rate": 1.0167720744029118e-06,
"loss": 0.1167,
"step": 2258
},
{
"epoch": 1.5874912157413914,
"grad_norm": 0.20309899334086073,
"learning_rate": 1.0134383716888752e-06,
"loss": 0.1063,
"step": 2259
},
{
"epoch": 1.5881939564300773,
"grad_norm": 0.21227704300586195,
"learning_rate": 1.0101095266491323e-06,
"loss": 0.1251,
"step": 2260
},
{
"epoch": 1.5888966971187632,
"grad_norm": 0.1928417238347528,
"learning_rate": 1.006785543339921e-06,
"loss": 0.1028,
"step": 2261
},
{
"epoch": 1.5895994378074492,
"grad_norm": 0.2145732307166128,
"learning_rate": 1.0034664258115561e-06,
"loss": 0.1277,
"step": 2262
},
{
"epoch": 1.590302178496135,
"grad_norm": 0.19830989975829438,
"learning_rate": 1.0001521781084233e-06,
"loss": 0.0975,
"step": 2263
},
{
"epoch": 1.5910049191848208,
"grad_norm": 0.20962175294180938,
"learning_rate": 9.968428042689738e-07,
"loss": 0.1155,
"step": 2264
},
{
"epoch": 1.5917076598735067,
"grad_norm": 0.19993567188295397,
"learning_rate": 9.935383083257199e-07,
"loss": 0.1095,
"step": 2265
},
{
"epoch": 1.5924104005621924,
"grad_norm": 0.1936984601035664,
"learning_rate": 9.9023869430523e-07,
"loss": 0.0878,
"step": 2266
},
{
"epoch": 1.5931131412508783,
"grad_norm": 0.19878254555331681,
"learning_rate": 9.869439662281276e-07,
"loss": 0.0985,
"step": 2267
},
{
"epoch": 1.5938158819395642,
"grad_norm": 0.2086094551719869,
"learning_rate": 9.836541281090757e-07,
"loss": 0.1027,
"step": 2268
},
{
"epoch": 1.5945186226282502,
"grad_norm": 0.20978615918555354,
"learning_rate": 9.803691839567835e-07,
"loss": 0.119,
"step": 2269
},
{
"epoch": 1.595221363316936,
"grad_norm": 0.2006864891252534,
"learning_rate": 9.77089137773995e-07,
"loss": 0.1024,
"step": 2270
},
{
"epoch": 1.595924104005622,
"grad_norm": 0.20533161033739727,
"learning_rate": 9.738139935574893e-07,
"loss": 0.1135,
"step": 2271
},
{
"epoch": 1.596626844694308,
"grad_norm": 0.2017232326828494,
"learning_rate": 9.70543755298069e-07,
"loss": 0.1097,
"step": 2272
},
{
"epoch": 1.5973295853829936,
"grad_norm": 0.21227217306895307,
"learning_rate": 9.672784269805574e-07,
"loss": 0.109,
"step": 2273
},
{
"epoch": 1.5980323260716796,
"grad_norm": 0.21020243189416726,
"learning_rate": 9.640180125837972e-07,
"loss": 0.115,
"step": 2274
},
{
"epoch": 1.5987350667603655,
"grad_norm": 0.20569926954052126,
"learning_rate": 9.607625160806466e-07,
"loss": 0.1041,
"step": 2275
},
{
"epoch": 1.5994378074490512,
"grad_norm": 0.20996348296961215,
"learning_rate": 9.575119414379657e-07,
"loss": 0.1247,
"step": 2276
},
{
"epoch": 1.600140548137737,
"grad_norm": 0.29936346948286,
"learning_rate": 9.542662926166207e-07,
"loss": 0.1229,
"step": 2277
},
{
"epoch": 1.600843288826423,
"grad_norm": 0.1899504678291544,
"learning_rate": 9.510255735714735e-07,
"loss": 0.0945,
"step": 2278
},
{
"epoch": 1.601546029515109,
"grad_norm": 0.21652100191488713,
"learning_rate": 9.477897882513809e-07,
"loss": 0.1192,
"step": 2279
},
{
"epoch": 1.6022487702037949,
"grad_norm": 0.21154771041756734,
"learning_rate": 9.445589405991862e-07,
"loss": 0.1088,
"step": 2280
},
{
"epoch": 1.6029515108924808,
"grad_norm": 0.20449293470039537,
"learning_rate": 9.413330345517174e-07,
"loss": 0.1075,
"step": 2281
},
{
"epoch": 1.6036542515811667,
"grad_norm": 0.21973972655160778,
"learning_rate": 9.381120740397809e-07,
"loss": 0.1068,
"step": 2282
},
{
"epoch": 1.6043569922698524,
"grad_norm": 0.21558073011165066,
"learning_rate": 9.34896062988156e-07,
"loss": 0.1272,
"step": 2283
},
{
"epoch": 1.6050597329585383,
"grad_norm": 0.1970799372523342,
"learning_rate": 9.316850053155923e-07,
"loss": 0.1018,
"step": 2284
},
{
"epoch": 1.605762473647224,
"grad_norm": 0.21188415958604262,
"learning_rate": 9.284789049348025e-07,
"loss": 0.1173,
"step": 2285
},
{
"epoch": 1.60646521433591,
"grad_norm": 0.2079255138418115,
"learning_rate": 9.252777657524598e-07,
"loss": 0.104,
"step": 2286
},
{
"epoch": 1.6071679550245959,
"grad_norm": 0.20956624753013672,
"learning_rate": 9.220815916691911e-07,
"loss": 0.1193,
"step": 2287
},
{
"epoch": 1.6078706957132818,
"grad_norm": 0.20238484626691766,
"learning_rate": 9.18890386579574e-07,
"loss": 0.106,
"step": 2288
},
{
"epoch": 1.6085734364019677,
"grad_norm": 0.20524675514991203,
"learning_rate": 9.157041543721307e-07,
"loss": 0.113,
"step": 2289
},
{
"epoch": 1.6092761770906536,
"grad_norm": 0.20266302624986365,
"learning_rate": 9.125228989293234e-07,
"loss": 0.113,
"step": 2290
},
{
"epoch": 1.6099789177793395,
"grad_norm": 0.2188977561339997,
"learning_rate": 9.093466241275551e-07,
"loss": 0.1337,
"step": 2291
},
{
"epoch": 1.6106816584680252,
"grad_norm": 0.20520134985004138,
"learning_rate": 9.061753338371509e-07,
"loss": 0.107,
"step": 2292
},
{
"epoch": 1.6113843991567112,
"grad_norm": 0.2117343064305145,
"learning_rate": 9.030090319223689e-07,
"loss": 0.1122,
"step": 2293
},
{
"epoch": 1.612087139845397,
"grad_norm": 0.2109959771350101,
"learning_rate": 8.998477222413854e-07,
"loss": 0.1159,
"step": 2294
},
{
"epoch": 1.6127898805340828,
"grad_norm": 0.2109523781746478,
"learning_rate": 8.96691408646298e-07,
"loss": 0.1225,
"step": 2295
},
{
"epoch": 1.6134926212227687,
"grad_norm": 0.21389493218424108,
"learning_rate": 8.935400949831125e-07,
"loss": 0.1207,
"step": 2296
},
{
"epoch": 1.6141953619114546,
"grad_norm": 0.19536232337268647,
"learning_rate": 8.903937850917421e-07,
"loss": 0.1083,
"step": 2297
},
{
"epoch": 1.6148981026001406,
"grad_norm": 0.2129984924519198,
"learning_rate": 8.87252482806003e-07,
"loss": 0.1212,
"step": 2298
},
{
"epoch": 1.6156008432888265,
"grad_norm": 0.199374707967153,
"learning_rate": 8.841161919536134e-07,
"loss": 0.1071,
"step": 2299
},
{
"epoch": 1.6163035839775124,
"grad_norm": 0.1960528894973754,
"learning_rate": 8.809849163561812e-07,
"loss": 0.0988,
"step": 2300
},
{
"epoch": 1.6170063246661983,
"grad_norm": 0.1940294802029543,
"learning_rate": 8.778586598292055e-07,
"loss": 0.0969,
"step": 2301
},
{
"epoch": 1.617709065354884,
"grad_norm": 0.20093652327315584,
"learning_rate": 8.74737426182064e-07,
"loss": 0.1131,
"step": 2302
},
{
"epoch": 1.61841180604357,
"grad_norm": 0.20342025729404065,
"learning_rate": 8.716212192180223e-07,
"loss": 0.1127,
"step": 2303
},
{
"epoch": 1.6191145467322556,
"grad_norm": 0.20759922338042286,
"learning_rate": 8.685100427342153e-07,
"loss": 0.1085,
"step": 2304
},
{
"epoch": 1.6198172874209416,
"grad_norm": 0.2067987008636676,
"learning_rate": 8.654039005216503e-07,
"loss": 0.1083,
"step": 2305
},
{
"epoch": 1.6205200281096275,
"grad_norm": 0.2025131693655046,
"learning_rate": 8.623027963651998e-07,
"loss": 0.1067,
"step": 2306
},
{
"epoch": 1.6212227687983134,
"grad_norm": 0.2051881785340546,
"learning_rate": 8.592067340435961e-07,
"loss": 0.1087,
"step": 2307
},
{
"epoch": 1.6219255094869993,
"grad_norm": 0.20735507318168722,
"learning_rate": 8.561157173294305e-07,
"loss": 0.1096,
"step": 2308
},
{
"epoch": 1.6226282501756852,
"grad_norm": 0.19627178887412597,
"learning_rate": 8.530297499891444e-07,
"loss": 0.1054,
"step": 2309
},
{
"epoch": 1.6233309908643712,
"grad_norm": 0.20776067530765358,
"learning_rate": 8.499488357830266e-07,
"loss": 0.1096,
"step": 2310
},
{
"epoch": 1.624033731553057,
"grad_norm": 0.20888314507683292,
"learning_rate": 8.468729784652091e-07,
"loss": 0.1147,
"step": 2311
},
{
"epoch": 1.6247364722417428,
"grad_norm": 0.18591854358190849,
"learning_rate": 8.438021817836617e-07,
"loss": 0.0875,
"step": 2312
},
{
"epoch": 1.6254392129304287,
"grad_norm": 0.21728014356714054,
"learning_rate": 8.407364494801879e-07,
"loss": 0.137,
"step": 2313
},
{
"epoch": 1.6261419536191144,
"grad_norm": 0.21342600414485147,
"learning_rate": 8.376757852904194e-07,
"loss": 0.1194,
"step": 2314
},
{
"epoch": 1.6268446943078003,
"grad_norm": 0.21075553417510964,
"learning_rate": 8.346201929438158e-07,
"loss": 0.1162,
"step": 2315
},
{
"epoch": 1.6275474349964862,
"grad_norm": 0.2117534782345173,
"learning_rate": 8.31569676163651e-07,
"loss": 0.11,
"step": 2316
},
{
"epoch": 1.6282501756851722,
"grad_norm": 0.19498713431349643,
"learning_rate": 8.285242386670178e-07,
"loss": 0.0936,
"step": 2317
},
{
"epoch": 1.628952916373858,
"grad_norm": 0.1987361455963197,
"learning_rate": 8.254838841648188e-07,
"loss": 0.0989,
"step": 2318
},
{
"epoch": 1.629655657062544,
"grad_norm": 0.21029808342499587,
"learning_rate": 8.224486163617651e-07,
"loss": 0.1191,
"step": 2319
},
{
"epoch": 1.63035839775123,
"grad_norm": 0.21713520210795012,
"learning_rate": 8.194184389563681e-07,
"loss": 0.1172,
"step": 2320
},
{
"epoch": 1.6310611384399156,
"grad_norm": 0.21142042466924305,
"learning_rate": 8.163933556409332e-07,
"loss": 0.125,
"step": 2321
},
{
"epoch": 1.6317638791286015,
"grad_norm": 0.2038687429750128,
"learning_rate": 8.133733701015623e-07,
"loss": 0.1122,
"step": 2322
},
{
"epoch": 1.6324666198172875,
"grad_norm": 0.2123312230683746,
"learning_rate": 8.103584860181468e-07,
"loss": 0.11,
"step": 2323
},
{
"epoch": 1.6331693605059732,
"grad_norm": 0.20919504171439768,
"learning_rate": 8.073487070643588e-07,
"loss": 0.1107,
"step": 2324
},
{
"epoch": 1.633872101194659,
"grad_norm": 0.20552229401866648,
"learning_rate": 8.043440369076522e-07,
"loss": 0.1106,
"step": 2325
},
{
"epoch": 1.634574841883345,
"grad_norm": 0.2054860238102078,
"learning_rate": 8.013444792092506e-07,
"loss": 0.1127,
"step": 2326
},
{
"epoch": 1.635277582572031,
"grad_norm": 0.2112838524493516,
"learning_rate": 7.98350037624156e-07,
"loss": 0.1245,
"step": 2327
},
{
"epoch": 1.6359803232607169,
"grad_norm": 0.1934243264473309,
"learning_rate": 7.953607158011311e-07,
"loss": 0.1017,
"step": 2328
},
{
"epoch": 1.6366830639494028,
"grad_norm": 0.2104236850610135,
"learning_rate": 7.923765173827003e-07,
"loss": 0.113,
"step": 2329
},
{
"epoch": 1.6373858046380887,
"grad_norm": 0.2104428425152862,
"learning_rate": 7.893974460051474e-07,
"loss": 0.1263,
"step": 2330
},
{
"epoch": 1.6380885453267744,
"grad_norm": 0.1843120419063022,
"learning_rate": 7.864235052985059e-07,
"loss": 0.0824,
"step": 2331
},
{
"epoch": 1.6387912860154603,
"grad_norm": 0.19839278188073703,
"learning_rate": 7.834546988865605e-07,
"loss": 0.1052,
"step": 2332
},
{
"epoch": 1.639494026704146,
"grad_norm": 0.20558154826205674,
"learning_rate": 7.804910303868374e-07,
"loss": 0.1163,
"step": 2333
},
{
"epoch": 1.640196767392832,
"grad_norm": 0.20507260725846108,
"learning_rate": 7.775325034106024e-07,
"loss": 0.1117,
"step": 2334
},
{
"epoch": 1.6408995080815179,
"grad_norm": 0.20675748934173196,
"learning_rate": 7.745791215628596e-07,
"loss": 0.1161,
"step": 2335
},
{
"epoch": 1.6416022487702038,
"grad_norm": 0.20625716657025583,
"learning_rate": 7.716308884423385e-07,
"loss": 0.1202,
"step": 2336
},
{
"epoch": 1.6423049894588897,
"grad_norm": 0.19256161687893347,
"learning_rate": 7.686878076414984e-07,
"loss": 0.0977,
"step": 2337
},
{
"epoch": 1.6430077301475756,
"grad_norm": 0.2039458997297406,
"learning_rate": 7.657498827465176e-07,
"loss": 0.1085,
"step": 2338
},
{
"epoch": 1.6437104708362615,
"grad_norm": 0.2131910538373007,
"learning_rate": 7.628171173372973e-07,
"loss": 0.1234,
"step": 2339
},
{
"epoch": 1.6444132115249475,
"grad_norm": 0.22259614591996388,
"learning_rate": 7.598895149874453e-07,
"loss": 0.1179,
"step": 2340
},
{
"epoch": 1.6451159522136332,
"grad_norm": 0.18319058618805253,
"learning_rate": 7.569670792642819e-07,
"loss": 0.0865,
"step": 2341
},
{
"epoch": 1.645818692902319,
"grad_norm": 0.1904705952648299,
"learning_rate": 7.540498137288294e-07,
"loss": 0.0923,
"step": 2342
},
{
"epoch": 1.6465214335910048,
"grad_norm": 0.19915838124217006,
"learning_rate": 7.51137721935814e-07,
"loss": 0.1075,
"step": 2343
},
{
"epoch": 1.6472241742796907,
"grad_norm": 0.2107919575899344,
"learning_rate": 7.482308074336558e-07,
"loss": 0.1285,
"step": 2344
},
{
"epoch": 1.6479269149683766,
"grad_norm": 0.1950447029095639,
"learning_rate": 7.453290737644631e-07,
"loss": 0.1024,
"step": 2345
},
{
"epoch": 1.6486296556570625,
"grad_norm": 0.19969890489458458,
"learning_rate": 7.42432524464034e-07,
"loss": 0.1007,
"step": 2346
},
{
"epoch": 1.6493323963457485,
"grad_norm": 0.21139127646371883,
"learning_rate": 7.39541163061852e-07,
"loss": 0.1185,
"step": 2347
},
{
"epoch": 1.6500351370344344,
"grad_norm": 0.19206696956020633,
"learning_rate": 7.366549930810751e-07,
"loss": 0.0982,
"step": 2348
},
{
"epoch": 1.6507378777231203,
"grad_norm": 0.20555098921314446,
"learning_rate": 7.337740180385384e-07,
"loss": 0.1028,
"step": 2349
},
{
"epoch": 1.651440618411806,
"grad_norm": 0.20673824587926481,
"learning_rate": 7.308982414447407e-07,
"loss": 0.1039,
"step": 2350
},
{
"epoch": 1.652143359100492,
"grad_norm": 0.19052670867230564,
"learning_rate": 7.28027666803856e-07,
"loss": 0.0859,
"step": 2351
},
{
"epoch": 1.6528460997891778,
"grad_norm": 0.20339061268809702,
"learning_rate": 7.251622976137129e-07,
"loss": 0.1061,
"step": 2352
},
{
"epoch": 1.6535488404778635,
"grad_norm": 0.2044655215341024,
"learning_rate": 7.22302137365799e-07,
"loss": 0.1116,
"step": 2353
},
{
"epoch": 1.6542515811665495,
"grad_norm": 0.19836388923177664,
"learning_rate": 7.194471895452548e-07,
"loss": 0.1003,
"step": 2354
},
{
"epoch": 1.6549543218552354,
"grad_norm": 0.20324548229852177,
"learning_rate": 7.165974576308693e-07,
"loss": 0.1125,
"step": 2355
},
{
"epoch": 1.6556570625439213,
"grad_norm": 0.19650741017278026,
"learning_rate": 7.137529450950759e-07,
"loss": 0.0907,
"step": 2356
},
{
"epoch": 1.6563598032326072,
"grad_norm": 0.20638097409643058,
"learning_rate": 7.109136554039475e-07,
"loss": 0.1174,
"step": 2357
},
{
"epoch": 1.6570625439212932,
"grad_norm": 0.2151474329954063,
"learning_rate": 7.080795920171934e-07,
"loss": 0.1243,
"step": 2358
},
{
"epoch": 1.657765284609979,
"grad_norm": 0.2164541927454775,
"learning_rate": 7.052507583881557e-07,
"loss": 0.1255,
"step": 2359
},
{
"epoch": 1.6584680252986648,
"grad_norm": 0.20399016627099487,
"learning_rate": 7.02427157963802e-07,
"loss": 0.1102,
"step": 2360
},
{
"epoch": 1.6591707659873507,
"grad_norm": 0.20330433220416733,
"learning_rate": 6.996087941847246e-07,
"loss": 0.1102,
"step": 2361
},
{
"epoch": 1.6598735066760364,
"grad_norm": 0.2176242849655342,
"learning_rate": 6.96795670485133e-07,
"loss": 0.1239,
"step": 2362
},
{
"epoch": 1.6605762473647223,
"grad_norm": 0.1950613925771786,
"learning_rate": 6.93987790292856e-07,
"loss": 0.0968,
"step": 2363
},
{
"epoch": 1.6612789880534082,
"grad_norm": 0.21676307490562896,
"learning_rate": 6.911851570293271e-07,
"loss": 0.1214,
"step": 2364
},
{
"epoch": 1.6619817287420942,
"grad_norm": 0.2144802181749889,
"learning_rate": 6.883877741095907e-07,
"loss": 0.1168,
"step": 2365
},
{
"epoch": 1.66268446943078,
"grad_norm": 0.21854428985964822,
"learning_rate": 6.855956449422907e-07,
"loss": 0.1165,
"step": 2366
},
{
"epoch": 1.663387210119466,
"grad_norm": 0.20244280099699374,
"learning_rate": 6.828087729296734e-07,
"loss": 0.098,
"step": 2367
},
{
"epoch": 1.664089950808152,
"grad_norm": 0.2017189026291793,
"learning_rate": 6.800271614675763e-07,
"loss": 0.1008,
"step": 2368
},
{
"epoch": 1.6647926914968376,
"grad_norm": 0.1950510087935971,
"learning_rate": 6.772508139454248e-07,
"loss": 0.0984,
"step": 2369
},
{
"epoch": 1.6654954321855235,
"grad_norm": 0.20171385115585153,
"learning_rate": 6.744797337462322e-07,
"loss": 0.1038,
"step": 2370
},
{
"epoch": 1.6661981728742095,
"grad_norm": 0.20385952719359773,
"learning_rate": 6.717139242465965e-07,
"loss": 0.1132,
"step": 2371
},
{
"epoch": 1.6669009135628952,
"grad_norm": 0.1931014947845427,
"learning_rate": 6.689533888166893e-07,
"loss": 0.0937,
"step": 2372
},
{
"epoch": 1.667603654251581,
"grad_norm": 0.23326418214904476,
"learning_rate": 6.661981308202581e-07,
"loss": 0.1375,
"step": 2373
},
{
"epoch": 1.668306394940267,
"grad_norm": 0.19573186417833854,
"learning_rate": 6.634481536146153e-07,
"loss": 0.0952,
"step": 2374
},
{
"epoch": 1.669009135628953,
"grad_norm": 0.1971609483277154,
"learning_rate": 6.607034605506451e-07,
"loss": 0.0991,
"step": 2375
},
{
"epoch": 1.6697118763176388,
"grad_norm": 0.2094229939936354,
"learning_rate": 6.579640549727884e-07,
"loss": 0.1088,
"step": 2376
},
{
"epoch": 1.6704146170063248,
"grad_norm": 0.18982468326048188,
"learning_rate": 6.552299402190443e-07,
"loss": 0.0933,
"step": 2377
},
{
"epoch": 1.6711173576950107,
"grad_norm": 0.1956647340853187,
"learning_rate": 6.525011196209657e-07,
"loss": 0.0946,
"step": 2378
},
{
"epoch": 1.6718200983836964,
"grad_norm": 0.2031015150628662,
"learning_rate": 6.497775965036545e-07,
"loss": 0.1108,
"step": 2379
},
{
"epoch": 1.6725228390723823,
"grad_norm": 0.20978409329341435,
"learning_rate": 6.470593741857562e-07,
"loss": 0.1104,
"step": 2380
},
{
"epoch": 1.673225579761068,
"grad_norm": 0.21252284077595973,
"learning_rate": 6.443464559794583e-07,
"loss": 0.1254,
"step": 2381
},
{
"epoch": 1.673928320449754,
"grad_norm": 0.2137313168513677,
"learning_rate": 6.416388451904848e-07,
"loss": 0.123,
"step": 2382
},
{
"epoch": 1.6746310611384398,
"grad_norm": 0.2038367939473381,
"learning_rate": 6.389365451180928e-07,
"loss": 0.1124,
"step": 2383
},
{
"epoch": 1.6753338018271258,
"grad_norm": 0.21487758114572308,
"learning_rate": 6.362395590550685e-07,
"loss": 0.117,
"step": 2384
},
{
"epoch": 1.6760365425158117,
"grad_norm": 0.21671924596990794,
"learning_rate": 6.335478902877218e-07,
"loss": 0.1192,
"step": 2385
},
{
"epoch": 1.6767392832044976,
"grad_norm": 0.20107764082299406,
"learning_rate": 6.308615420958847e-07,
"loss": 0.0921,
"step": 2386
},
{
"epoch": 1.6774420238931835,
"grad_norm": 0.2060307722462165,
"learning_rate": 6.281805177529055e-07,
"loss": 0.107,
"step": 2387
},
{
"epoch": 1.6781447645818695,
"grad_norm": 0.21244603529324457,
"learning_rate": 6.255048205256447e-07,
"loss": 0.1182,
"step": 2388
},
{
"epoch": 1.6788475052705552,
"grad_norm": 0.20879938337688222,
"learning_rate": 6.228344536744735e-07,
"loss": 0.1098,
"step": 2389
},
{
"epoch": 1.679550245959241,
"grad_norm": 0.2000856421347874,
"learning_rate": 6.201694204532638e-07,
"loss": 0.1035,
"step": 2390
},
{
"epoch": 1.6802529866479268,
"grad_norm": 0.1963851764986925,
"learning_rate": 6.175097241093947e-07,
"loss": 0.0895,
"step": 2391
},
{
"epoch": 1.6809557273366127,
"grad_norm": 0.20537378056488192,
"learning_rate": 6.148553678837388e-07,
"loss": 0.1039,
"step": 2392
},
{
"epoch": 1.6816584680252986,
"grad_norm": 0.2171189379311851,
"learning_rate": 6.122063550106594e-07,
"loss": 0.1271,
"step": 2393
},
{
"epoch": 1.6823612087139845,
"grad_norm": 0.199874371795221,
"learning_rate": 6.095626887180106e-07,
"loss": 0.108,
"step": 2394
},
{
"epoch": 1.6830639494026705,
"grad_norm": 0.2087339850062711,
"learning_rate": 6.06924372227135e-07,
"loss": 0.1213,
"step": 2395
},
{
"epoch": 1.6837666900913564,
"grad_norm": 0.20350329037581566,
"learning_rate": 6.042914087528529e-07,
"loss": 0.1105,
"step": 2396
},
{
"epoch": 1.6844694307800423,
"grad_norm": 0.21848618504295214,
"learning_rate": 6.016638015034631e-07,
"loss": 0.1215,
"step": 2397
},
{
"epoch": 1.685172171468728,
"grad_norm": 0.20412324475053867,
"learning_rate": 5.990415536807348e-07,
"loss": 0.106,
"step": 2398
},
{
"epoch": 1.685874912157414,
"grad_norm": 0.21514555001192368,
"learning_rate": 5.964246684799113e-07,
"loss": 0.1116,
"step": 2399
},
{
"epoch": 1.6865776528460998,
"grad_norm": 0.19589348932834194,
"learning_rate": 5.938131490896992e-07,
"loss": 0.0968,
"step": 2400
},
{
"epoch": 1.6872803935347855,
"grad_norm": 0.1940135602420098,
"learning_rate": 5.912069986922664e-07,
"loss": 0.0968,
"step": 2401
},
{
"epoch": 1.6879831342234715,
"grad_norm": 0.2144559835783796,
"learning_rate": 5.886062204632392e-07,
"loss": 0.1292,
"step": 2402
},
{
"epoch": 1.6886858749121574,
"grad_norm": 0.19971328510193995,
"learning_rate": 5.860108175716983e-07,
"loss": 0.1013,
"step": 2403
},
{
"epoch": 1.6893886156008433,
"grad_norm": 0.2002261808345876,
"learning_rate": 5.834207931801733e-07,
"loss": 0.1026,
"step": 2404
},
{
"epoch": 1.6900913562895292,
"grad_norm": 0.2040939310803932,
"learning_rate": 5.808361504446413e-07,
"loss": 0.1065,
"step": 2405
},
{
"epoch": 1.6907940969782151,
"grad_norm": 0.1976323280791592,
"learning_rate": 5.78256892514521e-07,
"loss": 0.1033,
"step": 2406
},
{
"epoch": 1.691496837666901,
"grad_norm": 0.19887391062745582,
"learning_rate": 5.756830225326692e-07,
"loss": 0.0996,
"step": 2407
},
{
"epoch": 1.6921995783555868,
"grad_norm": 0.19980470348439858,
"learning_rate": 5.731145436353796e-07,
"loss": 0.1056,
"step": 2408
},
{
"epoch": 1.6929023190442727,
"grad_norm": 0.22028565595529456,
"learning_rate": 5.705514589523742e-07,
"loss": 0.125,
"step": 2409
},
{
"epoch": 1.6936050597329584,
"grad_norm": 0.19838208314175187,
"learning_rate": 5.679937716068029e-07,
"loss": 0.0963,
"step": 2410
},
{
"epoch": 1.6943078004216443,
"grad_norm": 0.19659333739886545,
"learning_rate": 5.654414847152401e-07,
"loss": 0.0951,
"step": 2411
},
{
"epoch": 1.6950105411103302,
"grad_norm": 0.19406581483320431,
"learning_rate": 5.628946013876779e-07,
"loss": 0.0941,
"step": 2412
},
{
"epoch": 1.6957132817990161,
"grad_norm": 0.19308740970959692,
"learning_rate": 5.603531247275251e-07,
"loss": 0.0987,
"step": 2413
},
{
"epoch": 1.696416022487702,
"grad_norm": 0.20503680077935446,
"learning_rate": 5.578170578316017e-07,
"loss": 0.1094,
"step": 2414
},
{
"epoch": 1.697118763176388,
"grad_norm": 0.21042547286765984,
"learning_rate": 5.552864037901379e-07,
"loss": 0.127,
"step": 2415
},
{
"epoch": 1.697821503865074,
"grad_norm": 0.2031406821798381,
"learning_rate": 5.527611656867666e-07,
"loss": 0.1091,
"step": 2416
},
{
"epoch": 1.6985242445537596,
"grad_norm": 0.22052150429699638,
"learning_rate": 5.502413465985196e-07,
"loss": 0.1179,
"step": 2417
},
{
"epoch": 1.6992269852424455,
"grad_norm": 0.1919670851352082,
"learning_rate": 5.477269495958276e-07,
"loss": 0.0906,
"step": 2418
},
{
"epoch": 1.6999297259311315,
"grad_norm": 0.20443224270839813,
"learning_rate": 5.452179777425159e-07,
"loss": 0.1085,
"step": 2419
},
{
"epoch": 1.7006324666198172,
"grad_norm": 0.20934745742663316,
"learning_rate": 5.427144340957968e-07,
"loss": 0.1118,
"step": 2420
},
{
"epoch": 1.701335207308503,
"grad_norm": 0.21647669642923967,
"learning_rate": 5.402163217062695e-07,
"loss": 0.1205,
"step": 2421
},
{
"epoch": 1.702037947997189,
"grad_norm": 0.21059883935926482,
"learning_rate": 5.377236436179123e-07,
"loss": 0.1193,
"step": 2422
},
{
"epoch": 1.702740688685875,
"grad_norm": 0.19582306453178425,
"learning_rate": 5.352364028680868e-07,
"loss": 0.0844,
"step": 2423
},
{
"epoch": 1.7034434293745608,
"grad_norm": 0.20104641336211135,
"learning_rate": 5.327546024875252e-07,
"loss": 0.0986,
"step": 2424
},
{
"epoch": 1.7041461700632468,
"grad_norm": 0.21548425149945807,
"learning_rate": 5.302782455003313e-07,
"loss": 0.1256,
"step": 2425
},
{
"epoch": 1.7048489107519327,
"grad_norm": 0.2064161355540851,
"learning_rate": 5.278073349239776e-07,
"loss": 0.1155,
"step": 2426
},
{
"epoch": 1.7055516514406184,
"grad_norm": 0.19865199477369222,
"learning_rate": 5.253418737692983e-07,
"loss": 0.0926,
"step": 2427
},
{
"epoch": 1.7062543921293043,
"grad_norm": 0.1914598159075777,
"learning_rate": 5.228818650404883e-07,
"loss": 0.0879,
"step": 2428
},
{
"epoch": 1.70695713281799,
"grad_norm": 0.21392604846120264,
"learning_rate": 5.204273117350983e-07,
"loss": 0.1248,
"step": 2429
},
{
"epoch": 1.707659873506676,
"grad_norm": 0.21023021049962565,
"learning_rate": 5.179782168440317e-07,
"loss": 0.1146,
"step": 2430
},
{
"epoch": 1.7083626141953618,
"grad_norm": 0.203241829706655,
"learning_rate": 5.155345833515408e-07,
"loss": 0.1119,
"step": 2431
},
{
"epoch": 1.7090653548840478,
"grad_norm": 0.2122051002820696,
"learning_rate": 5.130964142352223e-07,
"loss": 0.1123,
"step": 2432
},
{
"epoch": 1.7097680955727337,
"grad_norm": 0.19865625035721068,
"learning_rate": 5.106637124660164e-07,
"loss": 0.0999,
"step": 2433
},
{
"epoch": 1.7104708362614196,
"grad_norm": 0.2038356923749995,
"learning_rate": 5.082364810081991e-07,
"loss": 0.1122,
"step": 2434
},
{
"epoch": 1.7111735769501055,
"grad_norm": 0.20586569385298514,
"learning_rate": 5.058147228193828e-07,
"loss": 0.1087,
"step": 2435
},
{
"epoch": 1.7118763176387914,
"grad_norm": 0.20731920852794578,
"learning_rate": 5.033984408505083e-07,
"loss": 0.1184,
"step": 2436
},
{
"epoch": 1.7125790583274771,
"grad_norm": 0.21120591458055912,
"learning_rate": 5.00987638045845e-07,
"loss": 0.124,
"step": 2437
},
{
"epoch": 1.713281799016163,
"grad_norm": 0.20412449918608894,
"learning_rate": 4.985823173429871e-07,
"loss": 0.1032,
"step": 2438
},
{
"epoch": 1.7139845397048488,
"grad_norm": 0.21337807642396558,
"learning_rate": 4.96182481672845e-07,
"loss": 0.1284,
"step": 2439
},
{
"epoch": 1.7146872803935347,
"grad_norm": 0.1966467425578043,
"learning_rate": 4.937881339596518e-07,
"loss": 0.1046,
"step": 2440
},
{
"epoch": 1.7153900210822206,
"grad_norm": 0.20783552769770477,
"learning_rate": 4.913992771209458e-07,
"loss": 0.1191,
"step": 2441
},
{
"epoch": 1.7160927617709065,
"grad_norm": 0.18795531062476806,
"learning_rate": 4.890159140675787e-07,
"loss": 0.0887,
"step": 2442
},
{
"epoch": 1.7167955024595924,
"grad_norm": 0.2115839646540402,
"learning_rate": 4.866380477037097e-07,
"loss": 0.1335,
"step": 2443
},
{
"epoch": 1.7174982431482784,
"grad_norm": 0.2092911915844625,
"learning_rate": 4.842656809267976e-07,
"loss": 0.1139,
"step": 2444
},
{
"epoch": 1.7182009838369643,
"grad_norm": 0.19927154633064284,
"learning_rate": 4.818988166276006e-07,
"loss": 0.1116,
"step": 2445
},
{
"epoch": 1.71890372452565,
"grad_norm": 0.18170074177533957,
"learning_rate": 4.795374576901696e-07,
"loss": 0.0837,
"step": 2446
},
{
"epoch": 1.719606465214336,
"grad_norm": 0.2072318298563855,
"learning_rate": 4.771816069918522e-07,
"loss": 0.1113,
"step": 2447
},
{
"epoch": 1.7203092059030218,
"grad_norm": 0.18873568449440511,
"learning_rate": 4.7483126740328013e-07,
"loss": 0.0938,
"step": 2448
},
{
"epoch": 1.7210119465917075,
"grad_norm": 0.20194415396441612,
"learning_rate": 4.7248644178837176e-07,
"loss": 0.0999,
"step": 2449
},
{
"epoch": 1.7217146872803935,
"grad_norm": 0.21962957486014542,
"learning_rate": 4.7014713300432504e-07,
"loss": 0.1195,
"step": 2450
},
{
"epoch": 1.7224174279690794,
"grad_norm": 0.20173996054779358,
"learning_rate": 4.6781334390161745e-07,
"loss": 0.1095,
"step": 2451
},
{
"epoch": 1.7231201686577653,
"grad_norm": 0.20184508242786436,
"learning_rate": 4.6548507732399826e-07,
"loss": 0.0963,
"step": 2452
},
{
"epoch": 1.7238229093464512,
"grad_norm": 0.19337226905797475,
"learning_rate": 4.631623361084903e-07,
"loss": 0.0998,
"step": 2453
},
{
"epoch": 1.7245256500351371,
"grad_norm": 0.21815781186446964,
"learning_rate": 4.6084512308538165e-07,
"loss": 0.1272,
"step": 2454
},
{
"epoch": 1.725228390723823,
"grad_norm": 0.19759519592525188,
"learning_rate": 4.585334410782244e-07,
"loss": 0.0957,
"step": 2455
},
{
"epoch": 1.7259311314125088,
"grad_norm": 0.20497833848021832,
"learning_rate": 4.562272929038325e-07,
"loss": 0.1171,
"step": 2456
},
{
"epoch": 1.7266338721011947,
"grad_norm": 0.20630359100740514,
"learning_rate": 4.539266813722748e-07,
"loss": 0.1126,
"step": 2457
},
{
"epoch": 1.7273366127898804,
"grad_norm": 0.23613027768284595,
"learning_rate": 4.51631609286875e-07,
"loss": 0.1088,
"step": 2458
},
{
"epoch": 1.7280393534785663,
"grad_norm": 0.20796626393023246,
"learning_rate": 4.4934207944420604e-07,
"loss": 0.1111,
"step": 2459
},
{
"epoch": 1.7287420941672522,
"grad_norm": 0.22691687610121677,
"learning_rate": 4.4705809463409077e-07,
"loss": 0.1298,
"step": 2460
},
{
"epoch": 1.7294448348559381,
"grad_norm": 0.20155909655190277,
"learning_rate": 4.447796576395896e-07,
"loss": 0.106,
"step": 2461
},
{
"epoch": 1.730147575544624,
"grad_norm": 0.21837081417133225,
"learning_rate": 4.425067712370074e-07,
"loss": 0.1261,
"step": 2462
},
{
"epoch": 1.73085031623331,
"grad_norm": 0.20039024717855256,
"learning_rate": 4.40239438195883e-07,
"loss": 0.1028,
"step": 2463
},
{
"epoch": 1.731553056921996,
"grad_norm": 0.20972016191167342,
"learning_rate": 4.379776612789921e-07,
"loss": 0.1148,
"step": 2464
},
{
"epoch": 1.7322557976106818,
"grad_norm": 0.2087736527813742,
"learning_rate": 4.357214432423351e-07,
"loss": 0.1131,
"step": 2465
},
{
"epoch": 1.7329585382993675,
"grad_norm": 0.19589508639457565,
"learning_rate": 4.334707868351423e-07,
"loss": 0.1012,
"step": 2466
},
{
"epoch": 1.7336612789880534,
"grad_norm": 0.2261446614610728,
"learning_rate": 4.312256947998655e-07,
"loss": 0.1366,
"step": 2467
},
{
"epoch": 1.7343640196767391,
"grad_norm": 0.19479300882599224,
"learning_rate": 4.2898616987217866e-07,
"loss": 0.1061,
"step": 2468
},
{
"epoch": 1.735066760365425,
"grad_norm": 0.2019718363532922,
"learning_rate": 4.2675221478096995e-07,
"loss": 0.0961,
"step": 2469
},
{
"epoch": 1.735769501054111,
"grad_norm": 0.20975769287568874,
"learning_rate": 4.245238322483386e-07,
"loss": 0.1112,
"step": 2470
},
{
"epoch": 1.736472241742797,
"grad_norm": 0.21038938139404187,
"learning_rate": 4.223010249895987e-07,
"loss": 0.1167,
"step": 2471
},
{
"epoch": 1.7371749824314828,
"grad_norm": 0.21781287042454267,
"learning_rate": 4.2008379571326753e-07,
"loss": 0.1293,
"step": 2472
},
{
"epoch": 1.7378777231201687,
"grad_norm": 0.18765196769841816,
"learning_rate": 4.178721471210662e-07,
"loss": 0.0828,
"step": 2473
},
{
"epoch": 1.7385804638088547,
"grad_norm": 0.20031205040211092,
"learning_rate": 4.156660819079156e-07,
"loss": 0.1064,
"step": 2474
},
{
"epoch": 1.7392832044975404,
"grad_norm": 0.19754212921234351,
"learning_rate": 4.134656027619333e-07,
"loss": 0.098,
"step": 2475
},
{
"epoch": 1.7399859451862263,
"grad_norm": 0.19406302419378108,
"learning_rate": 4.1127071236442993e-07,
"loss": 0.1012,
"step": 2476
},
{
"epoch": 1.7406886858749122,
"grad_norm": 0.1963016200091139,
"learning_rate": 4.090814133899068e-07,
"loss": 0.1028,
"step": 2477
},
{
"epoch": 1.741391426563598,
"grad_norm": 0.21180204050924242,
"learning_rate": 4.06897708506051e-07,
"loss": 0.1107,
"step": 2478
},
{
"epoch": 1.7420941672522838,
"grad_norm": 0.20984656173631291,
"learning_rate": 4.047196003737347e-07,
"loss": 0.1115,
"step": 2479
},
{
"epoch": 1.7427969079409698,
"grad_norm": 0.20907355575878966,
"learning_rate": 4.025470916470081e-07,
"loss": 0.1212,
"step": 2480
},
{
"epoch": 1.7434996486296557,
"grad_norm": 0.19626098073013043,
"learning_rate": 4.0038018497310096e-07,
"loss": 0.0997,
"step": 2481
},
{
"epoch": 1.7442023893183416,
"grad_norm": 0.19947371918102133,
"learning_rate": 3.98218882992415e-07,
"loss": 0.109,
"step": 2482
},
{
"epoch": 1.7449051300070275,
"grad_norm": 0.19456028586961002,
"learning_rate": 3.960631883385224e-07,
"loss": 0.1037,
"step": 2483
},
{
"epoch": 1.7456078706957134,
"grad_norm": 0.20532565866863556,
"learning_rate": 3.939131036381666e-07,
"loss": 0.1112,
"step": 2484
},
{
"epoch": 1.7463106113843991,
"grad_norm": 0.1941390719859521,
"learning_rate": 3.91768631511249e-07,
"loss": 0.0986,
"step": 2485
},
{
"epoch": 1.747013352073085,
"grad_norm": 0.20433619436970774,
"learning_rate": 3.8962977457083663e-07,
"loss": 0.1105,
"step": 2486
},
{
"epoch": 1.7477160927617708,
"grad_norm": 0.2054912258025397,
"learning_rate": 3.874965354231514e-07,
"loss": 0.1085,
"step": 2487
},
{
"epoch": 1.7484188334504567,
"grad_norm": 0.20845390505218692,
"learning_rate": 3.8536891666757446e-07,
"loss": 0.1163,
"step": 2488
},
{
"epoch": 1.7491215741391426,
"grad_norm": 0.2068171185080583,
"learning_rate": 3.832469208966333e-07,
"loss": 0.1096,
"step": 2489
},
{
"epoch": 1.7498243148278285,
"grad_norm": 0.21527489864559002,
"learning_rate": 3.8113055069600555e-07,
"loss": 0.109,
"step": 2490
},
{
"epoch": 1.7505270555165144,
"grad_norm": 0.20312081490287673,
"learning_rate": 3.790198086445146e-07,
"loss": 0.1081,
"step": 2491
},
{
"epoch": 1.7512297962052004,
"grad_norm": 0.21165446769562754,
"learning_rate": 3.7691469731412635e-07,
"loss": 0.1063,
"step": 2492
},
{
"epoch": 1.7519325368938863,
"grad_norm": 0.20217463262958196,
"learning_rate": 3.7481521926994504e-07,
"loss": 0.0944,
"step": 2493
},
{
"epoch": 1.752635277582572,
"grad_norm": 0.19073246370898175,
"learning_rate": 3.7272137707020875e-07,
"loss": 0.0992,
"step": 2494
},
{
"epoch": 1.753338018271258,
"grad_norm": 0.20756731300634884,
"learning_rate": 3.7063317326629043e-07,
"loss": 0.119,
"step": 2495
},
{
"epoch": 1.7540407589599438,
"grad_norm": 0.21279987995637017,
"learning_rate": 3.685506104026931e-07,
"loss": 0.1263,
"step": 2496
},
{
"epoch": 1.7547434996486295,
"grad_norm": 0.1866026466182793,
"learning_rate": 3.6647369101704465e-07,
"loss": 0.0879,
"step": 2497
},
{
"epoch": 1.7554462403373154,
"grad_norm": 0.18716041595575972,
"learning_rate": 3.644024176400962e-07,
"loss": 0.0833,
"step": 2498
},
{
"epoch": 1.7561489810260014,
"grad_norm": 0.20184735682802424,
"learning_rate": 3.623367927957211e-07,
"loss": 0.113,
"step": 2499
},
{
"epoch": 1.7568517217146873,
"grad_norm": 0.1983958899866269,
"learning_rate": 3.602768190009076e-07,
"loss": 0.1007,
"step": 2500
},
{
"epoch": 1.7568517217146873,
"eval_loss": 0.1345888078212738,
"eval_runtime": 10.7574,
"eval_samples_per_second": 21.381,
"eval_steps_per_second": 5.392,
"step": 2500
},
{
"epoch": 1.7575544624033732,
"grad_norm": 0.19145262082076198,
"learning_rate": 3.5822249876575897e-07,
"loss": 0.0877,
"step": 2501
},
{
"epoch": 1.7582572030920591,
"grad_norm": 0.21147912700878405,
"learning_rate": 3.561738345934901e-07,
"loss": 0.1085,
"step": 2502
},
{
"epoch": 1.758959943780745,
"grad_norm": 0.20464847845834164,
"learning_rate": 3.541308289804235e-07,
"loss": 0.1095,
"step": 2503
},
{
"epoch": 1.7596626844694307,
"grad_norm": 0.2151868301622328,
"learning_rate": 3.5209348441598626e-07,
"loss": 0.1239,
"step": 2504
},
{
"epoch": 1.7603654251581167,
"grad_norm": 0.18977764500053737,
"learning_rate": 3.50061803382708e-07,
"loss": 0.0938,
"step": 2505
},
{
"epoch": 1.7610681658468024,
"grad_norm": 0.22694847853550001,
"learning_rate": 3.4803578835621685e-07,
"loss": 0.1357,
"step": 2506
},
{
"epoch": 1.7617709065354883,
"grad_norm": 0.2080888054220229,
"learning_rate": 3.460154418052364e-07,
"loss": 0.1203,
"step": 2507
},
{
"epoch": 1.7624736472241742,
"grad_norm": 0.19966595170143583,
"learning_rate": 3.440007661915856e-07,
"loss": 0.0989,
"step": 2508
},
{
"epoch": 1.7631763879128601,
"grad_norm": 0.19500067295203174,
"learning_rate": 3.419917639701698e-07,
"loss": 0.0983,
"step": 2509
},
{
"epoch": 1.763879128601546,
"grad_norm": 0.20330434434289213,
"learning_rate": 3.3998843758898336e-07,
"loss": 0.1048,
"step": 2510
},
{
"epoch": 1.764581869290232,
"grad_norm": 0.2151714566493691,
"learning_rate": 3.379907894891027e-07,
"loss": 0.1252,
"step": 2511
},
{
"epoch": 1.765284609978918,
"grad_norm": 0.2040561631501836,
"learning_rate": 3.3599882210468947e-07,
"loss": 0.1114,
"step": 2512
},
{
"epoch": 1.7659873506676038,
"grad_norm": 0.21026306739704717,
"learning_rate": 3.340125378629783e-07,
"loss": 0.1072,
"step": 2513
},
{
"epoch": 1.7666900913562895,
"grad_norm": 0.21619171691222386,
"learning_rate": 3.320319391842813e-07,
"loss": 0.1146,
"step": 2514
},
{
"epoch": 1.7673928320449754,
"grad_norm": 0.20364432294036325,
"learning_rate": 3.300570284819815e-07,
"loss": 0.1069,
"step": 2515
},
{
"epoch": 1.7680955727336611,
"grad_norm": 0.21311752343226606,
"learning_rate": 3.280878081625333e-07,
"loss": 0.1243,
"step": 2516
},
{
"epoch": 1.768798313422347,
"grad_norm": 0.20308079084527983,
"learning_rate": 3.261242806254561e-07,
"loss": 0.11,
"step": 2517
},
{
"epoch": 1.769501054111033,
"grad_norm": 0.1859391129942004,
"learning_rate": 3.241664482633311e-07,
"loss": 0.0945,
"step": 2518
},
{
"epoch": 1.770203794799719,
"grad_norm": 0.2092136979835357,
"learning_rate": 3.222143134618e-07,
"loss": 0.1077,
"step": 2519
},
{
"epoch": 1.7709065354884048,
"grad_norm": 0.20077950212845894,
"learning_rate": 3.202678785995655e-07,
"loss": 0.1051,
"step": 2520
},
{
"epoch": 1.7716092761770907,
"grad_norm": 0.20306119826625935,
"learning_rate": 3.1832714604838166e-07,
"loss": 0.1063,
"step": 2521
},
{
"epoch": 1.7723120168657767,
"grad_norm": 0.2165847633999375,
"learning_rate": 3.16392118173055e-07,
"loss": 0.1255,
"step": 2522
},
{
"epoch": 1.7730147575544624,
"grad_norm": 0.2079400272768175,
"learning_rate": 3.144627973314385e-07,
"loss": 0.1105,
"step": 2523
},
{
"epoch": 1.7737174982431483,
"grad_norm": 0.20235795502260362,
"learning_rate": 3.1253918587443645e-07,
"loss": 0.1097,
"step": 2524
},
{
"epoch": 1.7744202389318342,
"grad_norm": 0.21973875224478653,
"learning_rate": 3.1062128614599176e-07,
"loss": 0.1292,
"step": 2525
},
{
"epoch": 1.77512297962052,
"grad_norm": 0.20010718433635905,
"learning_rate": 3.0870910048308833e-07,
"loss": 0.1056,
"step": 2526
},
{
"epoch": 1.7758257203092058,
"grad_norm": 0.20499097507882152,
"learning_rate": 3.068026312157485e-07,
"loss": 0.1121,
"step": 2527
},
{
"epoch": 1.7765284609978917,
"grad_norm": 0.20877020029055102,
"learning_rate": 3.049018806670284e-07,
"loss": 0.1168,
"step": 2528
},
{
"epoch": 1.7772312016865777,
"grad_norm": 0.21802560868824236,
"learning_rate": 3.030068511530154e-07,
"loss": 0.1216,
"step": 2529
},
{
"epoch": 1.7779339423752636,
"grad_norm": 0.21124724672397877,
"learning_rate": 3.0111754498282686e-07,
"loss": 0.124,
"step": 2530
},
{
"epoch": 1.7786366830639495,
"grad_norm": 0.2130095037674706,
"learning_rate": 2.9923396445860454e-07,
"loss": 0.119,
"step": 2531
},
{
"epoch": 1.7793394237526354,
"grad_norm": 0.2011819645074646,
"learning_rate": 2.9735611187551696e-07,
"loss": 0.1133,
"step": 2532
},
{
"epoch": 1.7800421644413211,
"grad_norm": 0.2061042630228151,
"learning_rate": 2.9548398952174764e-07,
"loss": 0.1076,
"step": 2533
},
{
"epoch": 1.780744905130007,
"grad_norm": 0.20168335161242262,
"learning_rate": 2.936175996785018e-07,
"loss": 0.1061,
"step": 2534
},
{
"epoch": 1.7814476458186927,
"grad_norm": 0.21562972322148308,
"learning_rate": 2.917569446199975e-07,
"loss": 0.1241,
"step": 2535
},
{
"epoch": 1.7821503865073787,
"grad_norm": 0.19889915872640407,
"learning_rate": 2.8990202661346887e-07,
"loss": 0.1025,
"step": 2536
},
{
"epoch": 1.7828531271960646,
"grad_norm": 0.19921176435788115,
"learning_rate": 2.8805284791915245e-07,
"loss": 0.101,
"step": 2537
},
{
"epoch": 1.7835558678847505,
"grad_norm": 0.20107707644197673,
"learning_rate": 2.862094107902974e-07,
"loss": 0.1093,
"step": 2538
},
{
"epoch": 1.7842586085734364,
"grad_norm": 0.19139711339389817,
"learning_rate": 2.8437171747315306e-07,
"loss": 0.1003,
"step": 2539
},
{
"epoch": 1.7849613492621224,
"grad_norm": 0.20438133149226598,
"learning_rate": 2.8253977020697266e-07,
"loss": 0.1158,
"step": 2540
},
{
"epoch": 1.7856640899508083,
"grad_norm": 0.19829457023070307,
"learning_rate": 2.8071357122400666e-07,
"loss": 0.1027,
"step": 2541
},
{
"epoch": 1.786366830639494,
"grad_norm": 0.21457558354821363,
"learning_rate": 2.788931227494995e-07,
"loss": 0.12,
"step": 2542
},
{
"epoch": 1.78706957132818,
"grad_norm": 0.20571706633343914,
"learning_rate": 2.770784270016902e-07,
"loss": 0.1063,
"step": 2543
},
{
"epoch": 1.7877723120168658,
"grad_norm": 0.20702583531082766,
"learning_rate": 2.752694861918087e-07,
"loss": 0.1171,
"step": 2544
},
{
"epoch": 1.7884750527055515,
"grad_norm": 0.21251833368617065,
"learning_rate": 2.7346630252407136e-07,
"loss": 0.1156,
"step": 2545
},
{
"epoch": 1.7891777933942374,
"grad_norm": 0.20121969400803644,
"learning_rate": 2.7166887819568055e-07,
"loss": 0.1084,
"step": 2546
},
{
"epoch": 1.7898805340829234,
"grad_norm": 0.2029012524334056,
"learning_rate": 2.6987721539681655e-07,
"loss": 0.1113,
"step": 2547
},
{
"epoch": 1.7905832747716093,
"grad_norm": 0.2070863589121346,
"learning_rate": 2.6809131631064634e-07,
"loss": 0.1076,
"step": 2548
},
{
"epoch": 1.7912860154602952,
"grad_norm": 0.21959896186249803,
"learning_rate": 2.663111831133075e-07,
"loss": 0.1173,
"step": 2549
},
{
"epoch": 1.7919887561489811,
"grad_norm": 0.20783442211002837,
"learning_rate": 2.645368179739155e-07,
"loss": 0.105,
"step": 2550
},
{
"epoch": 1.792691496837667,
"grad_norm": 0.19844405906730314,
"learning_rate": 2.627682230545547e-07,
"loss": 0.0988,
"step": 2551
},
{
"epoch": 1.7933942375263527,
"grad_norm": 0.20518708440043534,
"learning_rate": 2.6100540051028136e-07,
"loss": 0.1105,
"step": 2552
},
{
"epoch": 1.7940969782150387,
"grad_norm": 0.19108685523698263,
"learning_rate": 2.592483524891154e-07,
"loss": 0.0951,
"step": 2553
},
{
"epoch": 1.7947997189037244,
"grad_norm": 0.20826463655497943,
"learning_rate": 2.5749708113204097e-07,
"loss": 0.1142,
"step": 2554
},
{
"epoch": 1.7955024595924103,
"grad_norm": 0.18205755350083277,
"learning_rate": 2.5575158857300444e-07,
"loss": 0.0808,
"step": 2555
},
{
"epoch": 1.7962052002810962,
"grad_norm": 0.2060935907644872,
"learning_rate": 2.540118769389105e-07,
"loss": 0.1067,
"step": 2556
},
{
"epoch": 1.7969079409697821,
"grad_norm": 0.20288736772732407,
"learning_rate": 2.522779483496185e-07,
"loss": 0.1153,
"step": 2557
},
{
"epoch": 1.797610681658468,
"grad_norm": 0.2073696861923814,
"learning_rate": 2.505498049179411e-07,
"loss": 0.1069,
"step": 2558
},
{
"epoch": 1.798313422347154,
"grad_norm": 0.19436091824349477,
"learning_rate": 2.4882744874964226e-07,
"loss": 0.1016,
"step": 2559
},
{
"epoch": 1.7990161630358399,
"grad_norm": 0.20128896714711036,
"learning_rate": 2.471108819434359e-07,
"loss": 0.1019,
"step": 2560
},
{
"epoch": 1.7997189037245258,
"grad_norm": 0.19298988947465953,
"learning_rate": 2.4540010659097836e-07,
"loss": 0.0988,
"step": 2561
},
{
"epoch": 1.8004216444132115,
"grad_norm": 0.20533617760627604,
"learning_rate": 2.436951247768704e-07,
"loss": 0.1138,
"step": 2562
},
{
"epoch": 1.8011243851018974,
"grad_norm": 0.20471345662293167,
"learning_rate": 2.4199593857865247e-07,
"loss": 0.1081,
"step": 2563
},
{
"epoch": 1.8018271257905831,
"grad_norm": 0.20213970227438857,
"learning_rate": 2.40302550066806e-07,
"loss": 0.1127,
"step": 2564
},
{
"epoch": 1.802529866479269,
"grad_norm": 0.20505753450013417,
"learning_rate": 2.38614961304745e-07,
"loss": 0.1136,
"step": 2565
},
{
"epoch": 1.803232607167955,
"grad_norm": 0.19198926080683518,
"learning_rate": 2.3693317434881623e-07,
"loss": 0.0944,
"step": 2566
},
{
"epoch": 1.803935347856641,
"grad_norm": 0.19360849352590348,
"learning_rate": 2.3525719124829705e-07,
"loss": 0.0957,
"step": 2567
},
{
"epoch": 1.8046380885453268,
"grad_norm": 0.20711777752876298,
"learning_rate": 2.3358701404539552e-07,
"loss": 0.1082,
"step": 2568
},
{
"epoch": 1.8053408292340127,
"grad_norm": 0.20845172035232562,
"learning_rate": 2.3192264477524207e-07,
"loss": 0.1097,
"step": 2569
},
{
"epoch": 1.8060435699226987,
"grad_norm": 0.22774573792488434,
"learning_rate": 2.3026408546589162e-07,
"loss": 0.1341,
"step": 2570
},
{
"epoch": 1.8067463106113844,
"grad_norm": 0.20901107696126575,
"learning_rate": 2.2861133813831703e-07,
"loss": 0.1204,
"step": 2571
},
{
"epoch": 1.8074490513000703,
"grad_norm": 0.2022071255867901,
"learning_rate": 2.2696440480641401e-07,
"loss": 0.1134,
"step": 2572
},
{
"epoch": 1.8081517919887562,
"grad_norm": 0.21048692101101332,
"learning_rate": 2.2532328747698894e-07,
"loss": 0.1097,
"step": 2573
},
{
"epoch": 1.808854532677442,
"grad_norm": 0.21414589322730102,
"learning_rate": 2.23687988149765e-07,
"loss": 0.1141,
"step": 2574
},
{
"epoch": 1.8095572733661278,
"grad_norm": 0.2203964930170127,
"learning_rate": 2.2205850881737378e-07,
"loss": 0.1318,
"step": 2575
},
{
"epoch": 1.8102600140548137,
"grad_norm": 0.1969839050115635,
"learning_rate": 2.2043485146535537e-07,
"loss": 0.1101,
"step": 2576
},
{
"epoch": 1.8109627547434997,
"grad_norm": 0.1938036954341366,
"learning_rate": 2.188170180721566e-07,
"loss": 0.0988,
"step": 2577
},
{
"epoch": 1.8116654954321856,
"grad_norm": 0.2006870596448109,
"learning_rate": 2.172050106091278e-07,
"loss": 0.1047,
"step": 2578
},
{
"epoch": 1.8123682361208715,
"grad_norm": 0.21085538930601352,
"learning_rate": 2.1559883104051938e-07,
"loss": 0.1191,
"step": 2579
},
{
"epoch": 1.8130709768095574,
"grad_norm": 0.20847035025593505,
"learning_rate": 2.1399848132348078e-07,
"loss": 0.1146,
"step": 2580
},
{
"epoch": 1.8137737174982431,
"grad_norm": 0.2018451357345144,
"learning_rate": 2.1240396340805825e-07,
"loss": 0.114,
"step": 2581
},
{
"epoch": 1.814476458186929,
"grad_norm": 0.20882769289255812,
"learning_rate": 2.1081527923719035e-07,
"loss": 0.1104,
"step": 2582
},
{
"epoch": 1.8151791988756147,
"grad_norm": 0.1870000800818775,
"learning_rate": 2.0923243074670918e-07,
"loss": 0.0891,
"step": 2583
},
{
"epoch": 1.8158819395643007,
"grad_norm": 0.2135640399982624,
"learning_rate": 2.0765541986533577e-07,
"loss": 0.1225,
"step": 2584
},
{
"epoch": 1.8165846802529866,
"grad_norm": 0.2185824301037667,
"learning_rate": 2.0608424851467578e-07,
"loss": 0.1219,
"step": 2585
},
{
"epoch": 1.8172874209416725,
"grad_norm": 0.20309397016003536,
"learning_rate": 2.0451891860922167e-07,
"loss": 0.109,
"step": 2586
},
{
"epoch": 1.8179901616303584,
"grad_norm": 0.21755516058636112,
"learning_rate": 2.0295943205634605e-07,
"loss": 0.1161,
"step": 2587
},
{
"epoch": 1.8186929023190443,
"grad_norm": 0.21059140615492117,
"learning_rate": 2.0140579075630384e-07,
"loss": 0.121,
"step": 2588
},
{
"epoch": 1.8193956430077303,
"grad_norm": 0.2108891133062913,
"learning_rate": 1.9985799660222626e-07,
"loss": 0.1119,
"step": 2589
},
{
"epoch": 1.8200983836964162,
"grad_norm": 0.2021032013455884,
"learning_rate": 1.9831605148011745e-07,
"loss": 0.1056,
"step": 2590
},
{
"epoch": 1.8208011243851019,
"grad_norm": 0.19213373737235764,
"learning_rate": 1.9677995726885778e-07,
"loss": 0.0951,
"step": 2591
},
{
"epoch": 1.8215038650737878,
"grad_norm": 0.1841642807178634,
"learning_rate": 1.9524971584019726e-07,
"loss": 0.084,
"step": 2592
},
{
"epoch": 1.8222066057624735,
"grad_norm": 0.20515771614183495,
"learning_rate": 1.937253290587532e-07,
"loss": 0.1074,
"step": 2593
},
{
"epoch": 1.8229093464511594,
"grad_norm": 0.21207090317744803,
"learning_rate": 1.9220679878201086e-07,
"loss": 0.12,
"step": 2594
},
{
"epoch": 1.8236120871398454,
"grad_norm": 0.20489792952705746,
"learning_rate": 1.9069412686031575e-07,
"loss": 0.1147,
"step": 2595
},
{
"epoch": 1.8243148278285313,
"grad_norm": 0.2209816257390566,
"learning_rate": 1.8918731513687893e-07,
"loss": 0.132,
"step": 2596
},
{
"epoch": 1.8250175685172172,
"grad_norm": 0.2026476322008862,
"learning_rate": 1.876863654477684e-07,
"loss": 0.0935,
"step": 2597
},
{
"epoch": 1.8257203092059031,
"grad_norm": 0.19559080054731068,
"learning_rate": 1.8619127962190952e-07,
"loss": 0.1036,
"step": 2598
},
{
"epoch": 1.826423049894589,
"grad_norm": 0.19625473917487424,
"learning_rate": 1.847020594810839e-07,
"loss": 0.0974,
"step": 2599
},
{
"epoch": 1.8271257905832747,
"grad_norm": 0.19464014692823636,
"learning_rate": 1.8321870683992326e-07,
"loss": 0.1016,
"step": 2600
},
{
"epoch": 1.8278285312719607,
"grad_norm": 0.19783816088290526,
"learning_rate": 1.817412235059113e-07,
"loss": 0.1044,
"step": 2601
},
{
"epoch": 1.8285312719606466,
"grad_norm": 0.20616130281335224,
"learning_rate": 1.8026961127938059e-07,
"loss": 0.1132,
"step": 2602
},
{
"epoch": 1.8292340126493323,
"grad_norm": 0.194421905698615,
"learning_rate": 1.7880387195350734e-07,
"loss": 0.0897,
"step": 2603
},
{
"epoch": 1.8299367533380182,
"grad_norm": 0.20707233922300075,
"learning_rate": 1.7734400731431344e-07,
"loss": 0.1074,
"step": 2604
},
{
"epoch": 1.8306394940267041,
"grad_norm": 0.20637409447907124,
"learning_rate": 1.7589001914066206e-07,
"loss": 0.1095,
"step": 2605
},
{
"epoch": 1.83134223471539,
"grad_norm": 0.20471464163832506,
"learning_rate": 1.744419092042554e-07,
"loss": 0.1031,
"step": 2606
},
{
"epoch": 1.832044975404076,
"grad_norm": 0.19833928380505625,
"learning_rate": 1.7299967926963367e-07,
"loss": 0.0974,
"step": 2607
},
{
"epoch": 1.8327477160927619,
"grad_norm": 0.21550383195052739,
"learning_rate": 1.7156333109417055e-07,
"loss": 0.118,
"step": 2608
},
{
"epoch": 1.8334504567814478,
"grad_norm": 0.20366018756859272,
"learning_rate": 1.7013286642807602e-07,
"loss": 0.1018,
"step": 2609
},
{
"epoch": 1.8341531974701335,
"grad_norm": 0.22649100233111708,
"learning_rate": 1.687082870143869e-07,
"loss": 0.135,
"step": 2610
},
{
"epoch": 1.8348559381588194,
"grad_norm": 0.2186952373550721,
"learning_rate": 1.672895945889713e-07,
"loss": 0.1231,
"step": 2611
},
{
"epoch": 1.8355586788475051,
"grad_norm": 0.19152954701290428,
"learning_rate": 1.6587679088052365e-07,
"loss": 0.0923,
"step": 2612
},
{
"epoch": 1.836261419536191,
"grad_norm": 0.22637518489180214,
"learning_rate": 1.6446987761056244e-07,
"loss": 0.1395,
"step": 2613
},
{
"epoch": 1.836964160224877,
"grad_norm": 0.20651651925563613,
"learning_rate": 1.6306885649342906e-07,
"loss": 0.1043,
"step": 2614
},
{
"epoch": 1.8376669009135629,
"grad_norm": 0.20610783906668487,
"learning_rate": 1.6167372923628354e-07,
"loss": 0.1092,
"step": 2615
},
{
"epoch": 1.8383696416022488,
"grad_norm": 0.19879539189978518,
"learning_rate": 1.6028449753910768e-07,
"loss": 0.0977,
"step": 2616
},
{
"epoch": 1.8390723822909347,
"grad_norm": 0.2168732312013328,
"learning_rate": 1.5890116309469573e-07,
"loss": 0.125,
"step": 2617
},
{
"epoch": 1.8397751229796206,
"grad_norm": 0.18884638634199546,
"learning_rate": 1.575237275886593e-07,
"loss": 0.0854,
"step": 2618
},
{
"epoch": 1.8404778636683063,
"grad_norm": 0.2164678089979939,
"learning_rate": 1.5615219269941807e-07,
"loss": 0.1225,
"step": 2619
},
{
"epoch": 1.8411806043569923,
"grad_norm": 0.21003318137423047,
"learning_rate": 1.5478656009820626e-07,
"loss": 0.1185,
"step": 2620
},
{
"epoch": 1.8418833450456782,
"grad_norm": 0.21047557844576498,
"learning_rate": 1.5342683144906334e-07,
"loss": 0.1118,
"step": 2621
},
{
"epoch": 1.8425860857343639,
"grad_norm": 0.22375251381020572,
"learning_rate": 1.520730084088351e-07,
"loss": 0.1216,
"step": 2622
},
{
"epoch": 1.8432888264230498,
"grad_norm": 0.21051775385702437,
"learning_rate": 1.5072509262717195e-07,
"loss": 0.1145,
"step": 2623
},
{
"epoch": 1.8439915671117357,
"grad_norm": 0.20183056219726414,
"learning_rate": 1.4938308574652505e-07,
"loss": 0.1028,
"step": 2624
},
{
"epoch": 1.8446943078004217,
"grad_norm": 0.20573009342900433,
"learning_rate": 1.4804698940214746e-07,
"loss": 0.1129,
"step": 2625
},
{
"epoch": 1.8453970484891076,
"grad_norm": 0.2034322713970437,
"learning_rate": 1.4671680522208797e-07,
"loss": 0.1105,
"step": 2626
},
{
"epoch": 1.8460997891777935,
"grad_norm": 0.19156390414948218,
"learning_rate": 1.4539253482719286e-07,
"loss": 0.0898,
"step": 2627
},
{
"epoch": 1.8468025298664794,
"grad_norm": 0.1927346892309437,
"learning_rate": 1.4407417983110127e-07,
"loss": 0.0907,
"step": 2628
},
{
"epoch": 1.8475052705551651,
"grad_norm": 0.20592982545855976,
"learning_rate": 1.427617418402455e-07,
"loss": 0.1108,
"step": 2629
},
{
"epoch": 1.848208011243851,
"grad_norm": 0.2135914812577168,
"learning_rate": 1.4145522245384735e-07,
"loss": 0.1251,
"step": 2630
},
{
"epoch": 1.8489107519325367,
"grad_norm": 0.2145947771591151,
"learning_rate": 1.401546232639167e-07,
"loss": 0.1142,
"step": 2631
},
{
"epoch": 1.8496134926212227,
"grad_norm": 0.20461737132685856,
"learning_rate": 1.388599458552492e-07,
"loss": 0.1139,
"step": 2632
},
{
"epoch": 1.8503162333099086,
"grad_norm": 0.22932939431622504,
"learning_rate": 1.3757119180542623e-07,
"loss": 0.1354,
"step": 2633
},
{
"epoch": 1.8510189739985945,
"grad_norm": 0.19749510207912097,
"learning_rate": 1.3628836268480883e-07,
"loss": 0.0989,
"step": 2634
},
{
"epoch": 1.8517217146872804,
"grad_norm": 0.2111462648920841,
"learning_rate": 1.3501146005654164e-07,
"loss": 0.1065,
"step": 2635
},
{
"epoch": 1.8524244553759663,
"grad_norm": 0.19434392528484917,
"learning_rate": 1.337404854765445e-07,
"loss": 0.0964,
"step": 2636
},
{
"epoch": 1.8531271960646523,
"grad_norm": 0.20560587409957587,
"learning_rate": 1.3247544049351745e-07,
"loss": 0.1119,
"step": 2637
},
{
"epoch": 1.8538299367533382,
"grad_norm": 0.20122701135955734,
"learning_rate": 1.3121632664893192e-07,
"loss": 0.1069,
"step": 2638
},
{
"epoch": 1.8545326774420239,
"grad_norm": 0.22068030727811397,
"learning_rate": 1.2996314547703393e-07,
"loss": 0.1347,
"step": 2639
},
{
"epoch": 1.8552354181307098,
"grad_norm": 0.1973325696365381,
"learning_rate": 1.2871589850484034e-07,
"loss": 0.106,
"step": 2640
},
{
"epoch": 1.8559381588193955,
"grad_norm": 0.2116526244061434,
"learning_rate": 1.2747458725213712e-07,
"loss": 0.113,
"step": 2641
},
{
"epoch": 1.8566408995080814,
"grad_norm": 0.20419175194400124,
"learning_rate": 1.2623921323147714e-07,
"loss": 0.1121,
"step": 2642
},
{
"epoch": 1.8573436401967673,
"grad_norm": 0.2076263686083871,
"learning_rate": 1.2500977794817794e-07,
"loss": 0.0992,
"step": 2643
},
{
"epoch": 1.8580463808854533,
"grad_norm": 0.21572674015428164,
"learning_rate": 1.237862829003228e-07,
"loss": 0.1222,
"step": 2644
},
{
"epoch": 1.8587491215741392,
"grad_norm": 0.2090928782987142,
"learning_rate": 1.225687295787542e-07,
"loss": 0.1103,
"step": 2645
},
{
"epoch": 1.859451862262825,
"grad_norm": 0.2219145255018689,
"learning_rate": 1.2135711946707708e-07,
"loss": 0.1355,
"step": 2646
},
{
"epoch": 1.860154602951511,
"grad_norm": 0.20169941205416073,
"learning_rate": 1.2015145404165261e-07,
"loss": 0.1037,
"step": 2647
},
{
"epoch": 1.8608573436401967,
"grad_norm": 0.19840568342708678,
"learning_rate": 1.1895173477159849e-07,
"loss": 0.106,
"step": 2648
},
{
"epoch": 1.8615600843288826,
"grad_norm": 0.20936029122147987,
"learning_rate": 1.1775796311878807e-07,
"loss": 0.1142,
"step": 2649
},
{
"epoch": 1.8622628250175686,
"grad_norm": 0.21002511391967393,
"learning_rate": 1.1657014053784666e-07,
"loss": 0.1146,
"step": 2650
},
{
"epoch": 1.8629655657062543,
"grad_norm": 0.19409027427876802,
"learning_rate": 1.1538826847615037e-07,
"loss": 0.1006,
"step": 2651
},
{
"epoch": 1.8636683063949402,
"grad_norm": 0.20399947800835463,
"learning_rate": 1.14212348373825e-07,
"loss": 0.1088,
"step": 2652
},
{
"epoch": 1.864371047083626,
"grad_norm": 0.20483157389656576,
"learning_rate": 1.1304238166374381e-07,
"loss": 0.1139,
"step": 2653
},
{
"epoch": 1.865073787772312,
"grad_norm": 0.19729745582200012,
"learning_rate": 1.1187836977152533e-07,
"loss": 0.0895,
"step": 2654
},
{
"epoch": 1.865776528460998,
"grad_norm": 0.19685641260773287,
"learning_rate": 1.1072031411553219e-07,
"loss": 0.0997,
"step": 2655
},
{
"epoch": 1.8664792691496839,
"grad_norm": 0.212553846688535,
"learning_rate": 1.0956821610686952e-07,
"loss": 0.118,
"step": 2656
},
{
"epoch": 1.8671820098383698,
"grad_norm": 0.18837800327559476,
"learning_rate": 1.084220771493838e-07,
"loss": 0.0888,
"step": 2657
},
{
"epoch": 1.8678847505270555,
"grad_norm": 0.19431355798625033,
"learning_rate": 1.0728189863965788e-07,
"loss": 0.0949,
"step": 2658
},
{
"epoch": 1.8685874912157414,
"grad_norm": 0.20677146474052638,
"learning_rate": 1.061476819670143e-07,
"loss": 0.1125,
"step": 2659
},
{
"epoch": 1.8692902319044271,
"grad_norm": 0.19043358486402379,
"learning_rate": 1.0501942851350921e-07,
"loss": 0.0926,
"step": 2660
},
{
"epoch": 1.869992972593113,
"grad_norm": 0.20989465956142217,
"learning_rate": 1.0389713965393455e-07,
"loss": 0.1083,
"step": 2661
},
{
"epoch": 1.870695713281799,
"grad_norm": 0.19277631665789985,
"learning_rate": 1.0278081675581253e-07,
"loss": 0.0973,
"step": 2662
},
{
"epoch": 1.8713984539704849,
"grad_norm": 0.19997444514254192,
"learning_rate": 1.0167046117939561e-07,
"loss": 0.1029,
"step": 2663
},
{
"epoch": 1.8721011946591708,
"grad_norm": 0.1984585978732971,
"learning_rate": 1.005660742776654e-07,
"loss": 0.108,
"step": 2664
},
{
"epoch": 1.8728039353478567,
"grad_norm": 0.2218092283325443,
"learning_rate": 9.946765739633269e-08,
"loss": 0.1376,
"step": 2665
},
{
"epoch": 1.8735066760365426,
"grad_norm": 0.2078311482517579,
"learning_rate": 9.837521187383126e-08,
"loss": 0.1092,
"step": 2666
},
{
"epoch": 1.8742094167252283,
"grad_norm": 0.21626439924261134,
"learning_rate": 9.728873904131853e-08,
"loss": 0.1369,
"step": 2667
},
{
"epoch": 1.8749121574139143,
"grad_norm": 0.20324047969944123,
"learning_rate": 9.620824022267549e-08,
"loss": 0.1059,
"step": 2668
},
{
"epoch": 1.8756148981026002,
"grad_norm": 0.1974971931646483,
"learning_rate": 9.513371673450344e-08,
"loss": 0.1044,
"step": 2669
},
{
"epoch": 1.8763176387912859,
"grad_norm": 0.20372684283948508,
"learning_rate": 9.40651698861228e-08,
"loss": 0.1025,
"step": 2670
},
{
"epoch": 1.8770203794799718,
"grad_norm": 0.20767018624709432,
"learning_rate": 9.300260097956981e-08,
"loss": 0.115,
"step": 2671
},
{
"epoch": 1.8777231201686577,
"grad_norm": 0.18842365566798558,
"learning_rate": 9.19460113095988e-08,
"loss": 0.0889,
"step": 2672
},
{
"epoch": 1.8784258608573436,
"grad_norm": 0.21022014715609388,
"learning_rate": 9.089540216367654e-08,
"loss": 0.1161,
"step": 2673
},
{
"epoch": 1.8791286015460296,
"grad_norm": 0.20567582997891232,
"learning_rate": 8.985077482198346e-08,
"loss": 0.105,
"step": 2674
},
{
"epoch": 1.8798313422347155,
"grad_norm": 0.18918321164081345,
"learning_rate": 8.881213055741134e-08,
"loss": 0.086,
"step": 2675
},
{
"epoch": 1.8805340829234014,
"grad_norm": 0.19880062856366956,
"learning_rate": 8.777947063556002e-08,
"loss": 0.1033,
"step": 2676
},
{
"epoch": 1.881236823612087,
"grad_norm": 0.20325470166271942,
"learning_rate": 8.67527963147391e-08,
"loss": 0.1105,
"step": 2677
},
{
"epoch": 1.881939564300773,
"grad_norm": 0.19517620650630263,
"learning_rate": 8.57321088459634e-08,
"loss": 0.1028,
"step": 2678
},
{
"epoch": 1.8826423049894587,
"grad_norm": 0.20452970614023697,
"learning_rate": 8.471740947295304e-08,
"loss": 0.1119,
"step": 2679
},
{
"epoch": 1.8833450456781446,
"grad_norm": 0.19340075330519227,
"learning_rate": 8.370869943213178e-08,
"loss": 0.0963,
"step": 2680
},
{
"epoch": 1.8840477863668306,
"grad_norm": 0.20002296966605265,
"learning_rate": 8.270597995262586e-08,
"loss": 0.1106,
"step": 2681
},
{
"epoch": 1.8847505270555165,
"grad_norm": 0.19833636953085362,
"learning_rate": 8.17092522562607e-08,
"loss": 0.1012,
"step": 2682
},
{
"epoch": 1.8854532677442024,
"grad_norm": 0.2083805019601911,
"learning_rate": 8.071851755756088e-08,
"loss": 0.109,
"step": 2683
},
{
"epoch": 1.8861560084328883,
"grad_norm": 0.2007767455810867,
"learning_rate": 7.973377706374852e-08,
"loss": 0.0997,
"step": 2684
},
{
"epoch": 1.8868587491215743,
"grad_norm": 0.19559471932165817,
"learning_rate": 7.875503197474377e-08,
"loss": 0.1061,
"step": 2685
},
{
"epoch": 1.8875614898102602,
"grad_norm": 0.2124493795321907,
"learning_rate": 7.778228348315763e-08,
"loss": 0.1206,
"step": 2686
},
{
"epoch": 1.8882642304989459,
"grad_norm": 0.20516327683248498,
"learning_rate": 7.681553277429698e-08,
"loss": 0.1156,
"step": 2687
},
{
"epoch": 1.8889669711876318,
"grad_norm": 0.20870512050957005,
"learning_rate": 7.585478102615951e-08,
"loss": 0.1011,
"step": 2688
},
{
"epoch": 1.8896697118763175,
"grad_norm": 0.1969715441178502,
"learning_rate": 7.490002940943263e-08,
"loss": 0.1017,
"step": 2689
},
{
"epoch": 1.8903724525650034,
"grad_norm": 0.20581729752634534,
"learning_rate": 7.395127908749356e-08,
"loss": 0.1188,
"step": 2690
},
{
"epoch": 1.8910751932536893,
"grad_norm": 0.20799500701317067,
"learning_rate": 7.300853121640528e-08,
"loss": 0.1073,
"step": 2691
},
{
"epoch": 1.8917779339423753,
"grad_norm": 0.209833277575468,
"learning_rate": 7.207178694491778e-08,
"loss": 0.1153,
"step": 2692
},
{
"epoch": 1.8924806746310612,
"grad_norm": 0.20387186739986818,
"learning_rate": 7.114104741446581e-08,
"loss": 0.1101,
"step": 2693
},
{
"epoch": 1.893183415319747,
"grad_norm": 0.2128519883912037,
"learning_rate": 7.021631375916716e-08,
"loss": 0.1262,
"step": 2694
},
{
"epoch": 1.893886156008433,
"grad_norm": 0.2027110373347771,
"learning_rate": 6.929758710582102e-08,
"loss": 0.1076,
"step": 2695
},
{
"epoch": 1.8945888966971187,
"grad_norm": 0.19099442002098957,
"learning_rate": 6.838486857390692e-08,
"loss": 0.0843,
"step": 2696
},
{
"epoch": 1.8952916373858046,
"grad_norm": 0.20247569292535222,
"learning_rate": 6.747815927558354e-08,
"loss": 0.1048,
"step": 2697
},
{
"epoch": 1.8959943780744906,
"grad_norm": 0.20682159362696018,
"learning_rate": 6.657746031568769e-08,
"loss": 0.1162,
"step": 2698
},
{
"epoch": 1.8966971187631763,
"grad_norm": 0.20837799491365674,
"learning_rate": 6.568277279173141e-08,
"loss": 0.1206,
"step": 2699
},
{
"epoch": 1.8973998594518622,
"grad_norm": 0.20931372034112256,
"learning_rate": 6.479409779390267e-08,
"loss": 0.1187,
"step": 2700
},
{
"epoch": 1.898102600140548,
"grad_norm": 0.21771985082060258,
"learning_rate": 6.391143640506359e-08,
"loss": 0.1291,
"step": 2701
},
{
"epoch": 1.898805340829234,
"grad_norm": 0.19682610401788064,
"learning_rate": 6.303478970074716e-08,
"loss": 0.0993,
"step": 2702
},
{
"epoch": 1.89950808151792,
"grad_norm": 0.21359915441787286,
"learning_rate": 6.216415874915837e-08,
"loss": 0.1108,
"step": 2703
},
{
"epoch": 1.9002108222066059,
"grad_norm": 0.21017919558994758,
"learning_rate": 6.129954461117083e-08,
"loss": 0.1208,
"step": 2704
},
{
"epoch": 1.9009135628952918,
"grad_norm": 0.21073240157888354,
"learning_rate": 6.044094834032954e-08,
"loss": 0.1163,
"step": 2705
},
{
"epoch": 1.9016163035839775,
"grad_norm": 0.1972512302396635,
"learning_rate": 5.95883709828432e-08,
"loss": 0.0977,
"step": 2706
},
{
"epoch": 1.9023190442726634,
"grad_norm": 0.20402879505861052,
"learning_rate": 5.874181357758746e-08,
"loss": 0.0985,
"step": 2707
},
{
"epoch": 1.903021784961349,
"grad_norm": 0.20453837489711418,
"learning_rate": 5.790127715610328e-08,
"loss": 0.1175,
"step": 2708
},
{
"epoch": 1.903724525650035,
"grad_norm": 0.19832400523663543,
"learning_rate": 5.706676274259582e-08,
"loss": 0.1039,
"step": 2709
},
{
"epoch": 1.904427266338721,
"grad_norm": 0.21756328916089554,
"learning_rate": 5.6238271353929455e-08,
"loss": 0.1228,
"step": 2710
},
{
"epoch": 1.9051300070274069,
"grad_norm": 0.19386876824852114,
"learning_rate": 5.541580399963165e-08,
"loss": 0.103,
"step": 2711
},
{
"epoch": 1.9058327477160928,
"grad_norm": 0.2081454946282205,
"learning_rate": 5.459936168188906e-08,
"loss": 0.1212,
"step": 2712
},
{
"epoch": 1.9065354884047787,
"grad_norm": 0.21018065211528353,
"learning_rate": 5.3788945395546465e-08,
"loss": 0.1278,
"step": 2713
},
{
"epoch": 1.9072382290934646,
"grad_norm": 0.203504870940182,
"learning_rate": 5.2984556128107266e-08,
"loss": 0.1121,
"step": 2714
},
{
"epoch": 1.9079409697821503,
"grad_norm": 0.20260220683551405,
"learning_rate": 5.2186194859727977e-08,
"loss": 0.1016,
"step": 2715
},
{
"epoch": 1.9086437104708363,
"grad_norm": 0.21780266077488314,
"learning_rate": 5.13938625632221e-08,
"loss": 0.1323,
"step": 2716
},
{
"epoch": 1.9093464511595222,
"grad_norm": 0.2196567627741213,
"learning_rate": 5.060756020405677e-08,
"loss": 0.1342,
"step": 2717
},
{
"epoch": 1.9100491918482079,
"grad_norm": 0.20646327812601895,
"learning_rate": 4.982728874035059e-08,
"loss": 0.1124,
"step": 2718
},
{
"epoch": 1.9107519325368938,
"grad_norm": 0.2121186838560243,
"learning_rate": 4.905304912287468e-08,
"loss": 0.1119,
"step": 2719
},
{
"epoch": 1.9114546732255797,
"grad_norm": 0.20819956040041104,
"learning_rate": 4.8284842295048265e-08,
"loss": 0.1066,
"step": 2720
},
{
"epoch": 1.9121574139142656,
"grad_norm": 0.20324819093516344,
"learning_rate": 4.7522669192942014e-08,
"loss": 0.1138,
"step": 2721
},
{
"epoch": 1.9128601546029516,
"grad_norm": 0.20666227320368274,
"learning_rate": 4.676653074527249e-08,
"loss": 0.112,
"step": 2722
},
{
"epoch": 1.9135628952916375,
"grad_norm": 0.21223963284578107,
"learning_rate": 4.601642787340377e-08,
"loss": 0.1096,
"step": 2723
},
{
"epoch": 1.9142656359803234,
"grad_norm": 0.20278499966338331,
"learning_rate": 4.5272361491345286e-08,
"loss": 0.1013,
"step": 2724
},
{
"epoch": 1.914968376669009,
"grad_norm": 0.21554894561968635,
"learning_rate": 4.4534332505751786e-08,
"loss": 0.1219,
"step": 2725
},
{
"epoch": 1.915671117357695,
"grad_norm": 0.20204864779481058,
"learning_rate": 4.380234181592002e-08,
"loss": 0.1099,
"step": 2726
},
{
"epoch": 1.916373858046381,
"grad_norm": 0.19901280835933124,
"learning_rate": 4.30763903137893e-08,
"loss": 0.1107,
"step": 2727
},
{
"epoch": 1.9170765987350666,
"grad_norm": 0.2005087983026402,
"learning_rate": 4.23564788839409e-08,
"loss": 0.1041,
"step": 2728
},
{
"epoch": 1.9177793394237526,
"grad_norm": 0.20933271529738173,
"learning_rate": 4.164260840359646e-08,
"loss": 0.1183,
"step": 2729
},
{
"epoch": 1.9184820801124385,
"grad_norm": 0.21264240495440878,
"learning_rate": 4.0934779742615174e-08,
"loss": 0.1094,
"step": 2730
},
{
"epoch": 1.9191848208011244,
"grad_norm": 0.21647750623178083,
"learning_rate": 4.0232993763494324e-08,
"loss": 0.128,
"step": 2731
},
{
"epoch": 1.9198875614898103,
"grad_norm": 0.21400869739661144,
"learning_rate": 3.953725132136932e-08,
"loss": 0.1168,
"step": 2732
},
{
"epoch": 1.9205903021784962,
"grad_norm": 0.19756966928984881,
"learning_rate": 3.884755326401146e-08,
"loss": 0.103,
"step": 2733
},
{
"epoch": 1.9212930428671822,
"grad_norm": 0.20635070425473276,
"learning_rate": 3.816390043182572e-08,
"loss": 0.103,
"step": 2734
},
{
"epoch": 1.9219957835558679,
"grad_norm": 0.20082821737443338,
"learning_rate": 3.748629365785184e-08,
"loss": 0.1061,
"step": 2735
},
{
"epoch": 1.9226985242445538,
"grad_norm": 0.1980845632305311,
"learning_rate": 3.681473376776101e-08,
"loss": 0.0954,
"step": 2736
},
{
"epoch": 1.9234012649332395,
"grad_norm": 0.19565376455924918,
"learning_rate": 3.614922157985812e-08,
"loss": 0.1034,
"step": 2737
},
{
"epoch": 1.9241040056219254,
"grad_norm": 0.2104584934124414,
"learning_rate": 3.548975790507836e-08,
"loss": 0.1203,
"step": 2738
},
{
"epoch": 1.9248067463106113,
"grad_norm": 0.1918388180639807,
"learning_rate": 3.483634354698506e-08,
"loss": 0.0939,
"step": 2739
},
{
"epoch": 1.9255094869992972,
"grad_norm": 0.22102045460526204,
"learning_rate": 3.41889793017719e-08,
"loss": 0.1136,
"step": 2740
},
{
"epoch": 1.9262122276879832,
"grad_norm": 0.2085378380509966,
"learning_rate": 3.354766595826064e-08,
"loss": 0.1194,
"step": 2741
},
{
"epoch": 1.926914968376669,
"grad_norm": 0.20367389726571328,
"learning_rate": 3.291240429789955e-08,
"loss": 0.1136,
"step": 2742
},
{
"epoch": 1.927617709065355,
"grad_norm": 0.2022397090374957,
"learning_rate": 3.22831950947633e-08,
"loss": 0.1105,
"step": 2743
},
{
"epoch": 1.9283204497540407,
"grad_norm": 0.19992843015309555,
"learning_rate": 3.166003911554916e-08,
"loss": 0.0966,
"step": 2744
},
{
"epoch": 1.9290231904427266,
"grad_norm": 0.19359138994663094,
"learning_rate": 3.104293711958195e-08,
"loss": 0.093,
"step": 2745
},
{
"epoch": 1.9297259311314126,
"grad_norm": 0.20331503245057836,
"learning_rate": 3.0431889858807405e-08,
"loss": 0.1104,
"step": 2746
},
{
"epoch": 1.9304286718200983,
"grad_norm": 0.2025077493326202,
"learning_rate": 2.982689807779382e-08,
"loss": 0.1118,
"step": 2747
},
{
"epoch": 1.9311314125087842,
"grad_norm": 0.2019355391237662,
"learning_rate": 2.9227962513732057e-08,
"loss": 0.1037,
"step": 2748
},
{
"epoch": 1.93183415319747,
"grad_norm": 0.21750159492952145,
"learning_rate": 2.863508389643166e-08,
"loss": 0.1263,
"step": 2749
},
{
"epoch": 1.932536893886156,
"grad_norm": 0.20544163637327512,
"learning_rate": 2.804826294832308e-08,
"loss": 0.1027,
"step": 2750
},
{
"epoch": 1.933239634574842,
"grad_norm": 0.19273786606668483,
"learning_rate": 2.7467500384454336e-08,
"loss": 0.0955,
"step": 2751
},
{
"epoch": 1.9339423752635279,
"grad_norm": 0.2048583758118885,
"learning_rate": 2.6892796912492136e-08,
"loss": 0.1013,
"step": 2752
},
{
"epoch": 1.9346451159522138,
"grad_norm": 0.21469673265844896,
"learning_rate": 2.632415323271964e-08,
"loss": 0.119,
"step": 2753
},
{
"epoch": 1.9353478566408995,
"grad_norm": 0.20640738082458038,
"learning_rate": 2.5761570038035367e-08,
"loss": 0.1106,
"step": 2754
},
{
"epoch": 1.9360505973295854,
"grad_norm": 0.19650433315970928,
"learning_rate": 2.5205048013955402e-08,
"loss": 0.0966,
"step": 2755
},
{
"epoch": 1.936753338018271,
"grad_norm": 0.20145717955115533,
"learning_rate": 2.4654587838606748e-08,
"loss": 0.1103,
"step": 2756
},
{
"epoch": 1.937456078706957,
"grad_norm": 0.1897577385846952,
"learning_rate": 2.411019018273342e-08,
"loss": 0.0924,
"step": 2757
},
{
"epoch": 1.938158819395643,
"grad_norm": 0.20004385863480625,
"learning_rate": 2.3571855709690894e-08,
"loss": 0.1081,
"step": 2758
},
{
"epoch": 1.9388615600843289,
"grad_norm": 0.19498095848830385,
"learning_rate": 2.303958507544446e-08,
"loss": 0.0953,
"step": 2759
},
{
"epoch": 1.9395643007730148,
"grad_norm": 0.20823317826832002,
"learning_rate": 2.251337892857419e-08,
"loss": 0.1129,
"step": 2760
},
{
"epoch": 1.9402670414617007,
"grad_norm": 0.21273598127449586,
"learning_rate": 2.1993237910267752e-08,
"loss": 0.1192,
"step": 2761
},
{
"epoch": 1.9409697821503866,
"grad_norm": 0.19987358053231358,
"learning_rate": 2.147916265432426e-08,
"loss": 0.1043,
"step": 2762
},
{
"epoch": 1.9416725228390725,
"grad_norm": 0.1965629120101942,
"learning_rate": 2.0971153787149867e-08,
"loss": 0.0996,
"step": 2763
},
{
"epoch": 1.9423752635277582,
"grad_norm": 0.21766925827196718,
"learning_rate": 2.0469211927759413e-08,
"loss": 0.1152,
"step": 2764
},
{
"epoch": 1.9430780042164442,
"grad_norm": 0.19666765102495434,
"learning_rate": 1.9973337687776428e-08,
"loss": 0.108,
"step": 2765
},
{
"epoch": 1.9437807449051299,
"grad_norm": 0.19345011024638742,
"learning_rate": 1.948353167142869e-08,
"loss": 0.0969,
"step": 2766
},
{
"epoch": 1.9444834855938158,
"grad_norm": 0.19438883024446976,
"learning_rate": 1.899979447555156e-08,
"loss": 0.1018,
"step": 2767
},
{
"epoch": 1.9451862262825017,
"grad_norm": 0.19899139687944353,
"learning_rate": 1.852212668958353e-08,
"loss": 0.1058,
"step": 2768
},
{
"epoch": 1.9458889669711876,
"grad_norm": 0.22857466561418002,
"learning_rate": 1.805052889557013e-08,
"loss": 0.1305,
"step": 2769
},
{
"epoch": 1.9465917076598735,
"grad_norm": 0.20910320283421194,
"learning_rate": 1.7585001668158907e-08,
"loss": 0.117,
"step": 2770
},
{
"epoch": 1.9472944483485595,
"grad_norm": 0.19822607951889626,
"learning_rate": 1.7125545574599445e-08,
"loss": 0.0956,
"step": 2771
},
{
"epoch": 1.9479971890372454,
"grad_norm": 0.20520904454557382,
"learning_rate": 1.667216117474557e-08,
"loss": 0.1007,
"step": 2772
},
{
"epoch": 1.948699929725931,
"grad_norm": 0.22395758045158284,
"learning_rate": 1.622484902105148e-08,
"loss": 0.1333,
"step": 2773
},
{
"epoch": 1.949402670414617,
"grad_norm": 0.19853469470530757,
"learning_rate": 1.5783609658572284e-08,
"loss": 0.1056,
"step": 2774
},
{
"epoch": 1.950105411103303,
"grad_norm": 0.2167810789656521,
"learning_rate": 1.534844362496346e-08,
"loss": 0.125,
"step": 2775
},
{
"epoch": 1.9508081517919886,
"grad_norm": 0.1953732351830069,
"learning_rate": 1.4919351450480847e-08,
"loss": 0.1,
"step": 2776
},
{
"epoch": 1.9515108924806746,
"grad_norm": 0.202513308673447,
"learning_rate": 1.4496333657978423e-08,
"loss": 0.1086,
"step": 2777
},
{
"epoch": 1.9522136331693605,
"grad_norm": 0.20321959448278623,
"learning_rate": 1.4079390762907763e-08,
"loss": 0.1007,
"step": 2778
},
{
"epoch": 1.9529163738580464,
"grad_norm": 0.20708862204418732,
"learning_rate": 1.366852327331969e-08,
"loss": 0.1003,
"step": 2779
},
{
"epoch": 1.9536191145467323,
"grad_norm": 0.20434360077387825,
"learning_rate": 1.3263731689860949e-08,
"loss": 0.1122,
"step": 2780
},
{
"epoch": 1.9543218552354182,
"grad_norm": 0.2064695547684738,
"learning_rate": 1.2865016505774763e-08,
"loss": 0.1084,
"step": 2781
},
{
"epoch": 1.9550245959241042,
"grad_norm": 0.19369303432487409,
"learning_rate": 1.2472378206901392e-08,
"loss": 0.0958,
"step": 2782
},
{
"epoch": 1.9557273366127899,
"grad_norm": 0.20914980102515215,
"learning_rate": 1.2085817271674794e-08,
"loss": 0.1221,
"step": 2783
},
{
"epoch": 1.9564300773014758,
"grad_norm": 0.18488897543216906,
"learning_rate": 1.1705334171123739e-08,
"loss": 0.0869,
"step": 2784
},
{
"epoch": 1.9571328179901615,
"grad_norm": 0.20030582861862548,
"learning_rate": 1.1330929368872368e-08,
"loss": 0.106,
"step": 2785
},
{
"epoch": 1.9578355586788474,
"grad_norm": 0.20037959644019826,
"learning_rate": 1.0962603321137965e-08,
"loss": 0.1005,
"step": 2786
},
{
"epoch": 1.9585382993675333,
"grad_norm": 0.213451164951274,
"learning_rate": 1.0600356476728746e-08,
"loss": 0.1308,
"step": 2787
},
{
"epoch": 1.9592410400562192,
"grad_norm": 0.19781179806138552,
"learning_rate": 1.0244189277048289e-08,
"loss": 0.1039,
"step": 2788
},
{
"epoch": 1.9599437807449052,
"grad_norm": 0.19483497148385293,
"learning_rate": 9.894102156089991e-09,
"loss": 0.1013,
"step": 2789
},
{
"epoch": 1.960646521433591,
"grad_norm": 0.21548250232722918,
"learning_rate": 9.550095540439841e-09,
"loss": 0.1215,
"step": 2790
},
{
"epoch": 1.961349262122277,
"grad_norm": 0.19972609560240734,
"learning_rate": 9.212169849273645e-09,
"loss": 0.0999,
"step": 2791
},
{
"epoch": 1.9620520028109627,
"grad_norm": 0.21834432401485884,
"learning_rate": 8.880325494358132e-09,
"loss": 0.1258,
"step": 2792
},
{
"epoch": 1.9627547434996486,
"grad_norm": 0.2020654895152793,
"learning_rate": 8.554562880049855e-09,
"loss": 0.105,
"step": 2793
},
{
"epoch": 1.9634574841883345,
"grad_norm": 0.20223921433752243,
"learning_rate": 8.23488240329462e-09,
"loss": 0.112,
"step": 2794
},
{
"epoch": 1.9641602248770202,
"grad_norm": 0.19775329520769883,
"learning_rate": 7.921284453626943e-09,
"loss": 0.1058,
"step": 2795
},
{
"epoch": 1.9648629655657062,
"grad_norm": 0.22057916447129347,
"learning_rate": 7.613769413169492e-09,
"loss": 0.1262,
"step": 2796
},
{
"epoch": 1.965565706254392,
"grad_norm": 0.19927034401237112,
"learning_rate": 7.312337656633639e-09,
"loss": 0.1058,
"step": 2797
},
{
"epoch": 1.966268446943078,
"grad_norm": 0.20329054558712462,
"learning_rate": 7.016989551317244e-09,
"loss": 0.1115,
"step": 2798
},
{
"epoch": 1.966971187631764,
"grad_norm": 0.2039552547479961,
"learning_rate": 6.72772545710576e-09,
"loss": 0.1136,
"step": 2799
},
{
"epoch": 1.9676739283204498,
"grad_norm": 0.2049612869401537,
"learning_rate": 6.4445457264711295e-09,
"loss": 0.1109,
"step": 2800
},
{
"epoch": 1.9683766690091358,
"grad_norm": 0.1977741641922388,
"learning_rate": 6.167450704471223e-09,
"loss": 0.1023,
"step": 2801
},
{
"epoch": 1.9690794096978215,
"grad_norm": 0.20469172234627211,
"learning_rate": 5.896440728749286e-09,
"loss": 0.1121,
"step": 2802
},
{
"epoch": 1.9697821503865074,
"grad_norm": 0.21913035289927627,
"learning_rate": 5.631516129535053e-09,
"loss": 0.1355,
"step": 2803
},
{
"epoch": 1.970484891075193,
"grad_norm": 0.2099153012350531,
"learning_rate": 5.37267722964252e-09,
"loss": 0.1083,
"step": 2804
},
{
"epoch": 1.971187631763879,
"grad_norm": 0.2090082289608079,
"learning_rate": 5.1199243444693955e-09,
"loss": 0.1122,
"step": 2805
},
{
"epoch": 1.971890372452565,
"grad_norm": 0.20474410660300696,
"learning_rate": 4.8732577819982084e-09,
"loss": 0.1163,
"step": 2806
},
{
"epoch": 1.9725931131412509,
"grad_norm": 0.21233903838386528,
"learning_rate": 4.632677842795752e-09,
"loss": 0.1162,
"step": 2807
},
{
"epoch": 1.9732958538299368,
"grad_norm": 0.22589502146414686,
"learning_rate": 4.398184820010865e-09,
"loss": 0.1325,
"step": 2808
},
{
"epoch": 1.9739985945186227,
"grad_norm": 0.20265944408292183,
"learning_rate": 4.16977899937665e-09,
"loss": 0.1032,
"step": 2809
},
{
"epoch": 1.9747013352073086,
"grad_norm": 0.21231696694288626,
"learning_rate": 3.9474606592088125e-09,
"loss": 0.1212,
"step": 2810
},
{
"epoch": 1.9754040758959945,
"grad_norm": 0.20680091970984674,
"learning_rate": 3.731230070403991e-09,
"loss": 0.1103,
"step": 2811
},
{
"epoch": 1.9761068165846802,
"grad_norm": 0.2026802343176272,
"learning_rate": 3.5210874964425323e-09,
"loss": 0.1126,
"step": 2812
},
{
"epoch": 1.9768095572733662,
"grad_norm": 0.22028127277568188,
"learning_rate": 3.3170331933857214e-09,
"loss": 0.1209,
"step": 2813
},
{
"epoch": 1.9775122979620519,
"grad_norm": 0.20445795086826396,
"learning_rate": 3.1190674098757756e-09,
"loss": 0.1148,
"step": 2814
},
{
"epoch": 1.9782150386507378,
"grad_norm": 0.2017174140180486,
"learning_rate": 2.927190387137513e-09,
"loss": 0.1055,
"step": 2815
},
{
"epoch": 1.9789177793394237,
"grad_norm": 0.2119579379695155,
"learning_rate": 2.7414023589739104e-09,
"loss": 0.118,
"step": 2816
},
{
"epoch": 1.9796205200281096,
"grad_norm": 0.20069522660395195,
"learning_rate": 2.5617035517705448e-09,
"loss": 0.1114,
"step": 2817
},
{
"epoch": 1.9803232607167955,
"grad_norm": 0.2187796518891936,
"learning_rate": 2.3880941844933727e-09,
"loss": 0.1311,
"step": 2818
},
{
"epoch": 1.9810260014054815,
"grad_norm": 0.2041932064582187,
"learning_rate": 2.2205744686865093e-09,
"loss": 0.1114,
"step": 2819
},
{
"epoch": 1.9817287420941674,
"grad_norm": 0.21768276509060047,
"learning_rate": 2.0591446084755608e-09,
"loss": 0.1243,
"step": 2820
},
{
"epoch": 1.982431482782853,
"grad_norm": 0.21195870193295815,
"learning_rate": 1.9038048005642905e-09,
"loss": 0.1258,
"step": 2821
},
{
"epoch": 1.983134223471539,
"grad_norm": 0.19446624603492016,
"learning_rate": 1.754555234236288e-09,
"loss": 0.0962,
"step": 2822
},
{
"epoch": 1.983836964160225,
"grad_norm": 0.21916439433126284,
"learning_rate": 1.6113960913538562e-09,
"loss": 0.1365,
"step": 2823
},
{
"epoch": 1.9845397048489106,
"grad_norm": 0.20891357022338253,
"learning_rate": 1.4743275463585672e-09,
"loss": 0.1085,
"step": 2824
},
{
"epoch": 1.9852424455375965,
"grad_norm": 0.1968823212865481,
"learning_rate": 1.3433497662701522e-09,
"loss": 0.1023,
"step": 2825
},
{
"epoch": 1.9859451862262825,
"grad_norm": 0.1894828907434347,
"learning_rate": 1.2184629106859468e-09,
"loss": 0.095,
"step": 2826
},
{
"epoch": 1.9866479269149684,
"grad_norm": 0.20055042929602243,
"learning_rate": 1.0996671317825558e-09,
"loss": 0.101,
"step": 2827
},
{
"epoch": 1.9873506676036543,
"grad_norm": 0.20223280577359617,
"learning_rate": 9.869625743147426e-10,
"loss": 0.1012,
"step": 2828
},
{
"epoch": 1.9880534082923402,
"grad_norm": 0.21322522823756476,
"learning_rate": 8.803493756132097e-10,
"loss": 0.1116,
"step": 2829
},
{
"epoch": 1.9887561489810262,
"grad_norm": 0.19983868557110027,
"learning_rate": 7.798276655879289e-10,
"loss": 0.1007,
"step": 2830
},
{
"epoch": 1.9894588896697118,
"grad_norm": 0.20464541250470328,
"learning_rate": 6.853975667259205e-10,
"loss": 0.1069,
"step": 2831
},
{
"epoch": 1.9901616303583978,
"grad_norm": 0.21509751637463675,
"learning_rate": 5.97059194091254e-10,
"loss": 0.1264,
"step": 2832
},
{
"epoch": 1.9908643710470835,
"grad_norm": 0.2060446625624957,
"learning_rate": 5.148126553256027e-10,
"loss": 0.1141,
"step": 2833
},
{
"epoch": 1.9915671117357694,
"grad_norm": 0.2196926601970814,
"learning_rate": 4.3865805064768895e-10,
"loss": 0.1262,
"step": 2834
},
{
"epoch": 1.9922698524244553,
"grad_norm": 0.21470169433273117,
"learning_rate": 3.6859547285217343e-10,
"loss": 0.1253,
"step": 2835
},
{
"epoch": 1.9929725931131412,
"grad_norm": 0.21846400543064942,
"learning_rate": 3.0462500731076595e-10,
"loss": 0.1247,
"step": 2836
},
{
"epoch": 1.9936753338018272,
"grad_norm": 0.2305746144520317,
"learning_rate": 2.467467319733352e-10,
"loss": 0.1401,
"step": 2837
},
{
"epoch": 1.994378074490513,
"grad_norm": 0.20665061964606274,
"learning_rate": 1.9496071736513356e-10,
"loss": 0.1163,
"step": 2838
},
{
"epoch": 1.995080815179199,
"grad_norm": 0.21531810882375454,
"learning_rate": 1.4926702658735192e-10,
"loss": 0.1257,
"step": 2839
},
{
"epoch": 1.9957835558678847,
"grad_norm": 0.2151014788292838,
"learning_rate": 1.0966571531878523e-10,
"loss": 0.1287,
"step": 2840
},
{
"epoch": 1.9964862965565706,
"grad_norm": 0.19633187533600843,
"learning_rate": 7.6156831814167e-11,
"loss": 0.0989,
"step": 2841
},
{
"epoch": 1.9971890372452565,
"grad_norm": 0.21063307866538145,
"learning_rate": 4.874041690416942e-11,
"loss": 0.1275,
"step": 2842
},
{
"epoch": 1.9978917779339422,
"grad_norm": 0.210076234182619,
"learning_rate": 2.741650399595841e-11,
"loss": 0.1205,
"step": 2843
},
{
"epoch": 1.9985945186226282,
"grad_norm": 0.20702026135550075,
"learning_rate": 1.2185119073748753e-11,
"loss": 0.1012,
"step": 2844
},
{
"epoch": 1.999297259311314,
"grad_norm": 0.20782984350297543,
"learning_rate": 3.046280696583637e-12,
"loss": 0.1155,
"step": 2845
},
{
"epoch": 2.0,
"grad_norm": 0.20110064012714549,
"learning_rate": 0.0,
"loss": 0.1056,
"step": 2846
},
{
"epoch": 2.0,
"step": 2846,
"total_flos": 162152061075456.0,
"train_loss": 0.12556490986408353,
"train_runtime": 5188.7607,
"train_samples_per_second": 8.774,
"train_steps_per_second": 0.548
}
],
"logging_steps": 1,
"max_steps": 2846,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 700,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 162152061075456.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}