random_SnlayqYYRKn5e9CX / trainer_state.json
cutelemonlili's picture
Add files using upload-large-folder tool
fd5d234 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 2846,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0007027406886858749,
"grad_norm": 1.960867455770027,
"learning_rate": 9.999996953719305e-06,
"loss": 0.3258,
"step": 1
},
{
"epoch": 0.0014054813773717498,
"grad_norm": 1.3068761667718831,
"learning_rate": 9.999987814880927e-06,
"loss": 0.2937,
"step": 2
},
{
"epoch": 0.0021082220660576245,
"grad_norm": 1.619988769107426,
"learning_rate": 9.999972583496006e-06,
"loss": 0.2712,
"step": 3
},
{
"epoch": 0.0028109627547434997,
"grad_norm": 0.9963583704539947,
"learning_rate": 9.999951259583096e-06,
"loss": 0.2802,
"step": 4
},
{
"epoch": 0.0035137034434293743,
"grad_norm": 0.830284309838698,
"learning_rate": 9.999923843168187e-06,
"loss": 0.2609,
"step": 5
},
{
"epoch": 0.004216444132115249,
"grad_norm": 0.7775733770418086,
"learning_rate": 9.999890334284681e-06,
"loss": 0.2105,
"step": 6
},
{
"epoch": 0.004919184820801124,
"grad_norm": 0.6886436559357607,
"learning_rate": 9.999850732973412e-06,
"loss": 0.248,
"step": 7
},
{
"epoch": 0.005621925509486999,
"grad_norm": 0.7515236777838128,
"learning_rate": 9.999805039282636e-06,
"loss": 0.2576,
"step": 8
},
{
"epoch": 0.006324666198172874,
"grad_norm": 0.8276988585807789,
"learning_rate": 9.999753253268027e-06,
"loss": 0.2559,
"step": 9
},
{
"epoch": 0.007027406886858749,
"grad_norm": 0.6880595073761062,
"learning_rate": 9.99969537499269e-06,
"loss": 0.2175,
"step": 10
},
{
"epoch": 0.007730147575544624,
"grad_norm": 0.6538562805014537,
"learning_rate": 9.999631404527148e-06,
"loss": 0.2449,
"step": 11
},
{
"epoch": 0.008432888264230498,
"grad_norm": 0.6210961893951643,
"learning_rate": 9.999561341949354e-06,
"loss": 0.2498,
"step": 12
},
{
"epoch": 0.009135628952916374,
"grad_norm": 0.6562390246212256,
"learning_rate": 9.999485187344676e-06,
"loss": 0.2438,
"step": 13
},
{
"epoch": 0.009838369641602248,
"grad_norm": 0.7195647150789044,
"learning_rate": 9.999402940805908e-06,
"loss": 0.2267,
"step": 14
},
{
"epoch": 0.010541110330288124,
"grad_norm": 0.5848583629858494,
"learning_rate": 9.999314602433274e-06,
"loss": 0.2199,
"step": 15
},
{
"epoch": 0.011243851018973999,
"grad_norm": 0.6057840591545668,
"learning_rate": 9.999220172334414e-06,
"loss": 0.2577,
"step": 16
},
{
"epoch": 0.011946591707659873,
"grad_norm": 0.6152015591414531,
"learning_rate": 9.999119650624387e-06,
"loss": 0.2342,
"step": 17
},
{
"epoch": 0.012649332396345749,
"grad_norm": 0.6421227980011669,
"learning_rate": 9.999013037425686e-06,
"loss": 0.2654,
"step": 18
},
{
"epoch": 0.013352073085031623,
"grad_norm": 0.533422255577839,
"learning_rate": 9.998900332868217e-06,
"loss": 0.2046,
"step": 19
},
{
"epoch": 0.014054813773717497,
"grad_norm": 0.5642607629975687,
"learning_rate": 9.998781537089316e-06,
"loss": 0.1884,
"step": 20
},
{
"epoch": 0.014757554462403373,
"grad_norm": 0.5445226739287741,
"learning_rate": 9.998656650233732e-06,
"loss": 0.2075,
"step": 21
},
{
"epoch": 0.015460295151089248,
"grad_norm": 0.5946646609141334,
"learning_rate": 9.998525672453642e-06,
"loss": 0.2335,
"step": 22
},
{
"epoch": 0.016163035839775124,
"grad_norm": 0.578240118854655,
"learning_rate": 9.998388603908646e-06,
"loss": 0.2136,
"step": 23
},
{
"epoch": 0.016865776528460996,
"grad_norm": 0.6335244002467505,
"learning_rate": 9.998245444765764e-06,
"loss": 0.2807,
"step": 24
},
{
"epoch": 0.017568517217146872,
"grad_norm": 0.5609924624891021,
"learning_rate": 9.998096195199436e-06,
"loss": 0.2287,
"step": 25
},
{
"epoch": 0.018271257905832748,
"grad_norm": 0.5502965794491784,
"learning_rate": 9.997940855391525e-06,
"loss": 0.2277,
"step": 26
},
{
"epoch": 0.018973998594518624,
"grad_norm": 0.5519014674709402,
"learning_rate": 9.997779425531315e-06,
"loss": 0.2106,
"step": 27
},
{
"epoch": 0.019676739283204497,
"grad_norm": 0.5536708670861661,
"learning_rate": 9.997611905815508e-06,
"loss": 0.2156,
"step": 28
},
{
"epoch": 0.020379479971890373,
"grad_norm": 0.5729118843091581,
"learning_rate": 9.99743829644823e-06,
"loss": 0.2436,
"step": 29
},
{
"epoch": 0.02108222066057625,
"grad_norm": 0.5498493930566353,
"learning_rate": 9.997258597641027e-06,
"loss": 0.2342,
"step": 30
},
{
"epoch": 0.02178496134926212,
"grad_norm": 0.5602470830706633,
"learning_rate": 9.997072809612864e-06,
"loss": 0.2155,
"step": 31
},
{
"epoch": 0.022487702037947997,
"grad_norm": 0.5838068038561821,
"learning_rate": 9.996880932590125e-06,
"loss": 0.2461,
"step": 32
},
{
"epoch": 0.023190442726633873,
"grad_norm": 0.5320447337269636,
"learning_rate": 9.996682966806614e-06,
"loss": 0.2126,
"step": 33
},
{
"epoch": 0.023893183415319746,
"grad_norm": 0.5715049548629288,
"learning_rate": 9.996478912503557e-06,
"loss": 0.229,
"step": 34
},
{
"epoch": 0.024595924104005622,
"grad_norm": 0.5639356143321008,
"learning_rate": 9.996268769929597e-06,
"loss": 0.2361,
"step": 35
},
{
"epoch": 0.025298664792691498,
"grad_norm": 0.5490799330847167,
"learning_rate": 9.996052539340793e-06,
"loss": 0.225,
"step": 36
},
{
"epoch": 0.02600140548137737,
"grad_norm": 0.5197636186398789,
"learning_rate": 9.995830221000624e-06,
"loss": 0.2105,
"step": 37
},
{
"epoch": 0.026704146170063246,
"grad_norm": 0.5343885544219437,
"learning_rate": 9.99560181517999e-06,
"loss": 0.209,
"step": 38
},
{
"epoch": 0.027406886858749122,
"grad_norm": 0.5434262864184476,
"learning_rate": 9.995367322157205e-06,
"loss": 0.2232,
"step": 39
},
{
"epoch": 0.028109627547434995,
"grad_norm": 0.5281205534492845,
"learning_rate": 9.995126742218002e-06,
"loss": 0.2101,
"step": 40
},
{
"epoch": 0.02881236823612087,
"grad_norm": 0.5414344318501926,
"learning_rate": 9.994880075655531e-06,
"loss": 0.1932,
"step": 41
},
{
"epoch": 0.029515108924806747,
"grad_norm": 0.5552903981632334,
"learning_rate": 9.994627322770358e-06,
"loss": 0.1977,
"step": 42
},
{
"epoch": 0.030217849613492623,
"grad_norm": 0.5475965068067495,
"learning_rate": 9.994368483870466e-06,
"loss": 0.2275,
"step": 43
},
{
"epoch": 0.030920590302178495,
"grad_norm": 0.5820073528574793,
"learning_rate": 9.994103559271252e-06,
"loss": 0.2298,
"step": 44
},
{
"epoch": 0.03162333099086437,
"grad_norm": 0.572740663614088,
"learning_rate": 9.99383254929553e-06,
"loss": 0.2308,
"step": 45
},
{
"epoch": 0.03232607167955025,
"grad_norm": 0.5376971201448391,
"learning_rate": 9.99355545427353e-06,
"loss": 0.2038,
"step": 46
},
{
"epoch": 0.03302881236823612,
"grad_norm": 0.5803946358100757,
"learning_rate": 9.993272274542895e-06,
"loss": 0.2226,
"step": 47
},
{
"epoch": 0.03373155305692199,
"grad_norm": 0.5995490513194511,
"learning_rate": 9.992983010448684e-06,
"loss": 0.2553,
"step": 48
},
{
"epoch": 0.03443429374560787,
"grad_norm": 0.5345926632678341,
"learning_rate": 9.992687662343367e-06,
"loss": 0.2064,
"step": 49
},
{
"epoch": 0.035137034434293744,
"grad_norm": 0.5904489663547782,
"learning_rate": 9.992386230586832e-06,
"loss": 0.1973,
"step": 50
},
{
"epoch": 0.035839775122979624,
"grad_norm": 0.5474568790983223,
"learning_rate": 9.992078715546373e-06,
"loss": 0.199,
"step": 51
},
{
"epoch": 0.036542515811665496,
"grad_norm": 0.5639916029733052,
"learning_rate": 9.991765117596705e-06,
"loss": 0.2195,
"step": 52
},
{
"epoch": 0.03724525650035137,
"grad_norm": 0.5720230854284688,
"learning_rate": 9.99144543711995e-06,
"loss": 0.2151,
"step": 53
},
{
"epoch": 0.03794799718903725,
"grad_norm": 0.5648940910761961,
"learning_rate": 9.991119674505643e-06,
"loss": 0.2115,
"step": 54
},
{
"epoch": 0.03865073787772312,
"grad_norm": 0.5452367285194587,
"learning_rate": 9.990787830150727e-06,
"loss": 0.2182,
"step": 55
},
{
"epoch": 0.03935347856640899,
"grad_norm": 0.5805438865311197,
"learning_rate": 9.990449904459561e-06,
"loss": 0.2111,
"step": 56
},
{
"epoch": 0.04005621925509487,
"grad_norm": 0.5553341050808482,
"learning_rate": 9.99010589784391e-06,
"loss": 0.2084,
"step": 57
},
{
"epoch": 0.040758959943780745,
"grad_norm": 0.5273226505412448,
"learning_rate": 9.989755810722951e-06,
"loss": 0.2053,
"step": 58
},
{
"epoch": 0.04146170063246662,
"grad_norm": 0.5497593519290821,
"learning_rate": 9.989399643523272e-06,
"loss": 0.2074,
"step": 59
},
{
"epoch": 0.0421644413211525,
"grad_norm": 0.5549546743333674,
"learning_rate": 9.989037396678863e-06,
"loss": 0.2204,
"step": 60
},
{
"epoch": 0.04286718200983837,
"grad_norm": 0.5549078894733431,
"learning_rate": 9.988669070631128e-06,
"loss": 0.1981,
"step": 61
},
{
"epoch": 0.04356992269852424,
"grad_norm": 0.5639221318363923,
"learning_rate": 9.988294665828877e-06,
"loss": 0.2261,
"step": 62
},
{
"epoch": 0.04427266338721012,
"grad_norm": 0.540986074696611,
"learning_rate": 9.987914182728327e-06,
"loss": 0.2145,
"step": 63
},
{
"epoch": 0.044975404075895994,
"grad_norm": 0.5535674392248264,
"learning_rate": 9.9875276217931e-06,
"loss": 0.1974,
"step": 64
},
{
"epoch": 0.04567814476458187,
"grad_norm": 0.5744845275186403,
"learning_rate": 9.987134983494227e-06,
"loss": 0.2395,
"step": 65
},
{
"epoch": 0.046380885453267746,
"grad_norm": 0.5841243634839673,
"learning_rate": 9.98673626831014e-06,
"loss": 0.2259,
"step": 66
},
{
"epoch": 0.04708362614195362,
"grad_norm": 0.5381761834722242,
"learning_rate": 9.986331476726681e-06,
"loss": 0.1967,
"step": 67
},
{
"epoch": 0.04778636683063949,
"grad_norm": 0.56850467548114,
"learning_rate": 9.985920609237092e-06,
"loss": 0.2081,
"step": 68
},
{
"epoch": 0.04848910751932537,
"grad_norm": 0.5325622293618841,
"learning_rate": 9.985503666342022e-06,
"loss": 0.1975,
"step": 69
},
{
"epoch": 0.049191848208011243,
"grad_norm": 0.5411807875900252,
"learning_rate": 9.98508064854952e-06,
"loss": 0.2092,
"step": 70
},
{
"epoch": 0.049894588896697116,
"grad_norm": 0.5409391121214355,
"learning_rate": 9.984651556375039e-06,
"loss": 0.191,
"step": 71
},
{
"epoch": 0.050597329585382995,
"grad_norm": 0.5611013234227988,
"learning_rate": 9.984216390341428e-06,
"loss": 0.2031,
"step": 72
},
{
"epoch": 0.05130007027406887,
"grad_norm": 0.6004633606197677,
"learning_rate": 9.98377515097895e-06,
"loss": 0.2345,
"step": 73
},
{
"epoch": 0.05200281096275474,
"grad_norm": 0.5576736229012247,
"learning_rate": 9.983327838825256e-06,
"loss": 0.2129,
"step": 74
},
{
"epoch": 0.05270555165144062,
"grad_norm": 0.5702531585788543,
"learning_rate": 9.982874454425402e-06,
"loss": 0.2401,
"step": 75
},
{
"epoch": 0.05340829234012649,
"grad_norm": 0.5388339761813628,
"learning_rate": 9.982414998331842e-06,
"loss": 0.2196,
"step": 76
},
{
"epoch": 0.054111033028812365,
"grad_norm": 0.5796848449772599,
"learning_rate": 9.98194947110443e-06,
"loss": 0.2062,
"step": 77
},
{
"epoch": 0.054813773717498245,
"grad_norm": 0.5743923596689938,
"learning_rate": 9.981477873310416e-06,
"loss": 0.1997,
"step": 78
},
{
"epoch": 0.05551651440618412,
"grad_norm": 0.5256817899147247,
"learning_rate": 9.981000205524449e-06,
"loss": 0.1912,
"step": 79
},
{
"epoch": 0.05621925509486999,
"grad_norm": 0.5787140729797523,
"learning_rate": 9.980516468328571e-06,
"loss": 0.1846,
"step": 80
},
{
"epoch": 0.05692199578355587,
"grad_norm": 0.5928293871778944,
"learning_rate": 9.980026662312224e-06,
"loss": 0.22,
"step": 81
},
{
"epoch": 0.05762473647224174,
"grad_norm": 0.5869970405152036,
"learning_rate": 9.979530788072241e-06,
"loss": 0.2262,
"step": 82
},
{
"epoch": 0.05832747716092762,
"grad_norm": 0.5601186971331559,
"learning_rate": 9.979028846212852e-06,
"loss": 0.1995,
"step": 83
},
{
"epoch": 0.059030217849613494,
"grad_norm": 0.5762686780457437,
"learning_rate": 9.978520837345678e-06,
"loss": 0.211,
"step": 84
},
{
"epoch": 0.059732958538299366,
"grad_norm": 0.5658006462984013,
"learning_rate": 9.978006762089734e-06,
"loss": 0.1998,
"step": 85
},
{
"epoch": 0.060435699226985246,
"grad_norm": 0.5448568216925331,
"learning_rate": 9.977486621071425e-06,
"loss": 0.1976,
"step": 86
},
{
"epoch": 0.06113843991567112,
"grad_norm": 0.5449325392449603,
"learning_rate": 9.976960414924558e-06,
"loss": 0.2069,
"step": 87
},
{
"epoch": 0.06184118060435699,
"grad_norm": 0.5676641969425263,
"learning_rate": 9.97642814429031e-06,
"loss": 0.2138,
"step": 88
},
{
"epoch": 0.06254392129304287,
"grad_norm": 0.5424526783085432,
"learning_rate": 9.975889809817268e-06,
"loss": 0.2065,
"step": 89
},
{
"epoch": 0.06324666198172874,
"grad_norm": 0.5789089542007724,
"learning_rate": 9.975345412161395e-06,
"loss": 0.2097,
"step": 90
},
{
"epoch": 0.06394940267041462,
"grad_norm": 0.5227720371474548,
"learning_rate": 9.974794951986047e-06,
"loss": 0.1745,
"step": 91
},
{
"epoch": 0.0646521433591005,
"grad_norm": 0.5664675290538582,
"learning_rate": 9.974238429961965e-06,
"loss": 0.2258,
"step": 92
},
{
"epoch": 0.06535488404778636,
"grad_norm": 0.5242625446880697,
"learning_rate": 9.973675846767281e-06,
"loss": 0.182,
"step": 93
},
{
"epoch": 0.06605762473647224,
"grad_norm": 0.5130151810198286,
"learning_rate": 9.973107203087508e-06,
"loss": 0.195,
"step": 94
},
{
"epoch": 0.06676036542515812,
"grad_norm": 0.5107003932791141,
"learning_rate": 9.972532499615546e-06,
"loss": 0.1784,
"step": 95
},
{
"epoch": 0.06746310611384398,
"grad_norm": 0.50176695520801,
"learning_rate": 9.971951737051677e-06,
"loss": 0.185,
"step": 96
},
{
"epoch": 0.06816584680252986,
"grad_norm": 0.5708031718262291,
"learning_rate": 9.97136491610357e-06,
"loss": 0.2012,
"step": 97
},
{
"epoch": 0.06886858749121574,
"grad_norm": 0.5454650812883806,
"learning_rate": 9.97077203748627e-06,
"loss": 0.1975,
"step": 98
},
{
"epoch": 0.06957132817990162,
"grad_norm": 0.5159484154382179,
"learning_rate": 9.970173101922207e-06,
"loss": 0.1871,
"step": 99
},
{
"epoch": 0.07027406886858749,
"grad_norm": 0.5531737229062328,
"learning_rate": 9.969568110141194e-06,
"loss": 0.2169,
"step": 100
},
{
"epoch": 0.07097680955727337,
"grad_norm": 0.5426370539414972,
"learning_rate": 9.968957062880419e-06,
"loss": 0.2028,
"step": 101
},
{
"epoch": 0.07167955024595925,
"grad_norm": 0.6218361656819086,
"learning_rate": 9.968339960884452e-06,
"loss": 0.2471,
"step": 102
},
{
"epoch": 0.07238229093464511,
"grad_norm": 0.5809630898624576,
"learning_rate": 9.967716804905238e-06,
"loss": 0.2224,
"step": 103
},
{
"epoch": 0.07308503162333099,
"grad_norm": 0.5311062278784064,
"learning_rate": 9.967087595702101e-06,
"loss": 0.189,
"step": 104
},
{
"epoch": 0.07378777231201687,
"grad_norm": 0.522063350301436,
"learning_rate": 9.966452334041741e-06,
"loss": 0.1921,
"step": 105
},
{
"epoch": 0.07449051300070274,
"grad_norm": 0.5671623833518951,
"learning_rate": 9.965811020698228e-06,
"loss": 0.2219,
"step": 106
},
{
"epoch": 0.07519325368938862,
"grad_norm": 0.5727822563672408,
"learning_rate": 9.965163656453017e-06,
"loss": 0.2402,
"step": 107
},
{
"epoch": 0.0758959943780745,
"grad_norm": 0.5256490527259093,
"learning_rate": 9.964510242094922e-06,
"loss": 0.2091,
"step": 108
},
{
"epoch": 0.07659873506676036,
"grad_norm": 0.5285646235064523,
"learning_rate": 9.963850778420144e-06,
"loss": 0.2092,
"step": 109
},
{
"epoch": 0.07730147575544624,
"grad_norm": 0.5420907018436252,
"learning_rate": 9.96318526623224e-06,
"loss": 0.2025,
"step": 110
},
{
"epoch": 0.07800421644413212,
"grad_norm": 0.5232772340159911,
"learning_rate": 9.962513706342149e-06,
"loss": 0.2005,
"step": 111
},
{
"epoch": 0.07870695713281799,
"grad_norm": 0.5543317820424953,
"learning_rate": 9.961836099568174e-06,
"loss": 0.2009,
"step": 112
},
{
"epoch": 0.07940969782150387,
"grad_norm": 0.5313190776729853,
"learning_rate": 9.961152446735989e-06,
"loss": 0.2082,
"step": 113
},
{
"epoch": 0.08011243851018975,
"grad_norm": 0.5301047159823322,
"learning_rate": 9.960462748678632e-06,
"loss": 0.1942,
"step": 114
},
{
"epoch": 0.08081517919887561,
"grad_norm": 0.5482604596341181,
"learning_rate": 9.959767006236508e-06,
"loss": 0.2141,
"step": 115
},
{
"epoch": 0.08151791988756149,
"grad_norm": 0.5241312613602601,
"learning_rate": 9.959065220257388e-06,
"loss": 0.1954,
"step": 116
},
{
"epoch": 0.08222066057624737,
"grad_norm": 0.5536434360318407,
"learning_rate": 9.958357391596405e-06,
"loss": 0.2191,
"step": 117
},
{
"epoch": 0.08292340126493324,
"grad_norm": 0.5565448027851918,
"learning_rate": 9.957643521116059e-06,
"loss": 0.1919,
"step": 118
},
{
"epoch": 0.08362614195361912,
"grad_norm": 0.5357229743914843,
"learning_rate": 9.956923609686212e-06,
"loss": 0.202,
"step": 119
},
{
"epoch": 0.084328882642305,
"grad_norm": 0.5058154999730936,
"learning_rate": 9.956197658184082e-06,
"loss": 0.1832,
"step": 120
},
{
"epoch": 0.08503162333099086,
"grad_norm": 0.5580603557762164,
"learning_rate": 9.955465667494249e-06,
"loss": 0.1998,
"step": 121
},
{
"epoch": 0.08573436401967674,
"grad_norm": 0.5441411673706941,
"learning_rate": 9.954727638508655e-06,
"loss": 0.1987,
"step": 122
},
{
"epoch": 0.08643710470836262,
"grad_norm": 0.5621956668698223,
"learning_rate": 9.953983572126598e-06,
"loss": 0.2329,
"step": 123
},
{
"epoch": 0.08713984539704848,
"grad_norm": 0.5555218799042205,
"learning_rate": 9.953233469254728e-06,
"loss": 0.2113,
"step": 124
},
{
"epoch": 0.08784258608573436,
"grad_norm": 0.5553758129637749,
"learning_rate": 9.95247733080706e-06,
"loss": 0.2215,
"step": 125
},
{
"epoch": 0.08854532677442024,
"grad_norm": 0.5355462057697746,
"learning_rate": 9.951715157704954e-06,
"loss": 0.202,
"step": 126
},
{
"epoch": 0.08924806746310611,
"grad_norm": 0.5050684078508851,
"learning_rate": 9.950946950877126e-06,
"loss": 0.1768,
"step": 127
},
{
"epoch": 0.08995080815179199,
"grad_norm": 0.5370575562075645,
"learning_rate": 9.950172711259651e-06,
"loss": 0.2088,
"step": 128
},
{
"epoch": 0.09065354884047787,
"grad_norm": 0.5424876550648152,
"learning_rate": 9.949392439795943e-06,
"loss": 0.2208,
"step": 129
},
{
"epoch": 0.09135628952916373,
"grad_norm": 0.5261671250993958,
"learning_rate": 9.948606137436779e-06,
"loss": 0.198,
"step": 130
},
{
"epoch": 0.09205903021784961,
"grad_norm": 0.5088818678092768,
"learning_rate": 9.947813805140274e-06,
"loss": 0.187,
"step": 131
},
{
"epoch": 0.09276177090653549,
"grad_norm": 0.5328110111985129,
"learning_rate": 9.947015443871894e-06,
"loss": 0.2154,
"step": 132
},
{
"epoch": 0.09346451159522136,
"grad_norm": 0.5324344180286568,
"learning_rate": 9.946211054604455e-06,
"loss": 0.2116,
"step": 133
},
{
"epoch": 0.09416725228390724,
"grad_norm": 0.5236749308752624,
"learning_rate": 9.945400638318113e-06,
"loss": 0.2015,
"step": 134
},
{
"epoch": 0.09486999297259312,
"grad_norm": 0.583442779591505,
"learning_rate": 9.94458419600037e-06,
"loss": 0.2133,
"step": 135
},
{
"epoch": 0.09557273366127898,
"grad_norm": 0.5453942041965456,
"learning_rate": 9.943761728646072e-06,
"loss": 0.2178,
"step": 136
},
{
"epoch": 0.09627547434996486,
"grad_norm": 0.5456774305577579,
"learning_rate": 9.942933237257406e-06,
"loss": 0.1992,
"step": 137
},
{
"epoch": 0.09697821503865074,
"grad_norm": 0.5383843010460779,
"learning_rate": 9.942098722843898e-06,
"loss": 0.2194,
"step": 138
},
{
"epoch": 0.09768095572733661,
"grad_norm": 0.5287988030526317,
"learning_rate": 9.941258186422413e-06,
"loss": 0.2124,
"step": 139
},
{
"epoch": 0.09838369641602249,
"grad_norm": 0.5430300098732184,
"learning_rate": 9.940411629017159e-06,
"loss": 0.2112,
"step": 140
},
{
"epoch": 0.09908643710470837,
"grad_norm": 0.520885326315766,
"learning_rate": 9.93955905165967e-06,
"loss": 0.2103,
"step": 141
},
{
"epoch": 0.09978917779339423,
"grad_norm": 0.5449094302378578,
"learning_rate": 9.93870045538883e-06,
"loss": 0.2327,
"step": 142
},
{
"epoch": 0.10049191848208011,
"grad_norm": 0.5687041901069785,
"learning_rate": 9.937835841250842e-06,
"loss": 0.2475,
"step": 143
},
{
"epoch": 0.10119465917076599,
"grad_norm": 0.5292925413224571,
"learning_rate": 9.936965210299254e-06,
"loss": 0.2058,
"step": 144
},
{
"epoch": 0.10189739985945186,
"grad_norm": 0.5450162719241888,
"learning_rate": 9.936088563594937e-06,
"loss": 0.2166,
"step": 145
},
{
"epoch": 0.10260014054813774,
"grad_norm": 0.5030226831647348,
"learning_rate": 9.935205902206098e-06,
"loss": 0.2,
"step": 146
},
{
"epoch": 0.10330288123682362,
"grad_norm": 0.5606886718196615,
"learning_rate": 9.934317227208269e-06,
"loss": 0.2395,
"step": 147
},
{
"epoch": 0.10400562192550948,
"grad_norm": 0.508456211029729,
"learning_rate": 9.933422539684314e-06,
"loss": 0.1826,
"step": 148
},
{
"epoch": 0.10470836261419536,
"grad_norm": 0.5276224067096561,
"learning_rate": 9.932521840724418e-06,
"loss": 0.1943,
"step": 149
},
{
"epoch": 0.10541110330288124,
"grad_norm": 0.5370428953048157,
"learning_rate": 9.931615131426094e-06,
"loss": 0.2142,
"step": 150
},
{
"epoch": 0.1061138439915671,
"grad_norm": 0.504130332599666,
"learning_rate": 9.930702412894179e-06,
"loss": 0.1934,
"step": 151
},
{
"epoch": 0.10681658468025299,
"grad_norm": 0.5290656249486817,
"learning_rate": 9.929783686240833e-06,
"loss": 0.2025,
"step": 152
},
{
"epoch": 0.10751932536893886,
"grad_norm": 0.5141584039459383,
"learning_rate": 9.928858952585535e-06,
"loss": 0.1877,
"step": 153
},
{
"epoch": 0.10822206605762473,
"grad_norm": 0.5562386101706629,
"learning_rate": 9.927928213055082e-06,
"loss": 0.1997,
"step": 154
},
{
"epoch": 0.10892480674631061,
"grad_norm": 0.49443664961132877,
"learning_rate": 9.926991468783595e-06,
"loss": 0.1678,
"step": 155
},
{
"epoch": 0.10962754743499649,
"grad_norm": 0.550549791203403,
"learning_rate": 9.926048720912509e-06,
"loss": 0.1883,
"step": 156
},
{
"epoch": 0.11033028812368235,
"grad_norm": 0.5429116475858584,
"learning_rate": 9.925099970590568e-06,
"loss": 0.2199,
"step": 157
},
{
"epoch": 0.11103302881236823,
"grad_norm": 0.5290919161535431,
"learning_rate": 9.924145218973841e-06,
"loss": 0.2153,
"step": 158
},
{
"epoch": 0.11173576950105411,
"grad_norm": 0.5213091776944005,
"learning_rate": 9.923184467225704e-06,
"loss": 0.2002,
"step": 159
},
{
"epoch": 0.11243851018973998,
"grad_norm": 0.5161618699416421,
"learning_rate": 9.922217716516843e-06,
"loss": 0.1913,
"step": 160
},
{
"epoch": 0.11314125087842586,
"grad_norm": 0.558417418707948,
"learning_rate": 9.921244968025257e-06,
"loss": 0.2169,
"step": 161
},
{
"epoch": 0.11384399156711174,
"grad_norm": 0.48467929480194855,
"learning_rate": 9.920266222936252e-06,
"loss": 0.1816,
"step": 162
},
{
"epoch": 0.11454673225579762,
"grad_norm": 0.507478691170454,
"learning_rate": 9.91928148244244e-06,
"loss": 0.1827,
"step": 163
},
{
"epoch": 0.11524947294448348,
"grad_norm": 0.5214917414048722,
"learning_rate": 9.91829074774374e-06,
"loss": 0.1936,
"step": 164
},
{
"epoch": 0.11595221363316936,
"grad_norm": 0.5280684849167947,
"learning_rate": 9.917294020047375e-06,
"loss": 0.1835,
"step": 165
},
{
"epoch": 0.11665495432185524,
"grad_norm": 0.5521589119277396,
"learning_rate": 9.916291300567868e-06,
"loss": 0.2151,
"step": 166
},
{
"epoch": 0.11735769501054111,
"grad_norm": 0.5858753766045725,
"learning_rate": 9.915282590527048e-06,
"loss": 0.2227,
"step": 167
},
{
"epoch": 0.11806043569922699,
"grad_norm": 0.553369867577223,
"learning_rate": 9.914267891154037e-06,
"loss": 0.2132,
"step": 168
},
{
"epoch": 0.11876317638791287,
"grad_norm": 0.5147106193312105,
"learning_rate": 9.913247203685261e-06,
"loss": 0.1743,
"step": 169
},
{
"epoch": 0.11946591707659873,
"grad_norm": 0.5393121589562151,
"learning_rate": 9.912220529364441e-06,
"loss": 0.1946,
"step": 170
},
{
"epoch": 0.12016865776528461,
"grad_norm": 0.5337320423505406,
"learning_rate": 9.911187869442588e-06,
"loss": 0.1994,
"step": 171
},
{
"epoch": 0.12087139845397049,
"grad_norm": 0.521658044662446,
"learning_rate": 9.910149225178018e-06,
"loss": 0.2042,
"step": 172
},
{
"epoch": 0.12157413914265636,
"grad_norm": 0.5683795188065389,
"learning_rate": 9.909104597836324e-06,
"loss": 0.2138,
"step": 173
},
{
"epoch": 0.12227687983134224,
"grad_norm": 0.5287528993195494,
"learning_rate": 9.908053988690403e-06,
"loss": 0.204,
"step": 174
},
{
"epoch": 0.12297962052002812,
"grad_norm": 0.5030979458003717,
"learning_rate": 9.90699739902043e-06,
"loss": 0.1995,
"step": 175
},
{
"epoch": 0.12368236120871398,
"grad_norm": 0.5333811207421939,
"learning_rate": 9.905934830113878e-06,
"loss": 0.2247,
"step": 176
},
{
"epoch": 0.12438510189739986,
"grad_norm": 0.5242706389608097,
"learning_rate": 9.904866283265498e-06,
"loss": 0.2001,
"step": 177
},
{
"epoch": 0.12508784258608574,
"grad_norm": 0.5553133727029044,
"learning_rate": 9.903791759777326e-06,
"loss": 0.2232,
"step": 178
},
{
"epoch": 0.1257905832747716,
"grad_norm": 0.5854998441746612,
"learning_rate": 9.902711260958682e-06,
"loss": 0.2326,
"step": 179
},
{
"epoch": 0.12649332396345747,
"grad_norm": 0.5465334372407958,
"learning_rate": 9.901624788126169e-06,
"loss": 0.2135,
"step": 180
},
{
"epoch": 0.12719606465214336,
"grad_norm": 0.5551496513314771,
"learning_rate": 9.900532342603669e-06,
"loss": 0.2061,
"step": 181
},
{
"epoch": 0.12789880534082923,
"grad_norm": 0.5225704469654726,
"learning_rate": 9.899433925722334e-06,
"loss": 0.1904,
"step": 182
},
{
"epoch": 0.1286015460295151,
"grad_norm": 0.5394995980067866,
"learning_rate": 9.898329538820606e-06,
"loss": 0.189,
"step": 183
},
{
"epoch": 0.129304286718201,
"grad_norm": 0.594160825487313,
"learning_rate": 9.897219183244188e-06,
"loss": 0.2193,
"step": 184
},
{
"epoch": 0.13000702740688685,
"grad_norm": 0.5645972118149901,
"learning_rate": 9.896102860346066e-06,
"loss": 0.2032,
"step": 185
},
{
"epoch": 0.13070976809557272,
"grad_norm": 0.5576545796400294,
"learning_rate": 9.894980571486492e-06,
"loss": 0.2071,
"step": 186
},
{
"epoch": 0.1314125087842586,
"grad_norm": 0.5085872856787232,
"learning_rate": 9.893852318032986e-06,
"loss": 0.1908,
"step": 187
},
{
"epoch": 0.13211524947294448,
"grad_norm": 0.5442927380248144,
"learning_rate": 9.892718101360344e-06,
"loss": 0.2061,
"step": 188
},
{
"epoch": 0.13281799016163034,
"grad_norm": 0.5475838060489673,
"learning_rate": 9.891577922850616e-06,
"loss": 0.1893,
"step": 189
},
{
"epoch": 0.13352073085031624,
"grad_norm": 0.540506279685077,
"learning_rate": 9.89043178389313e-06,
"loss": 0.1665,
"step": 190
},
{
"epoch": 0.1342234715390021,
"grad_norm": 0.509049673509137,
"learning_rate": 9.889279685884468e-06,
"loss": 0.1683,
"step": 191
},
{
"epoch": 0.13492621222768797,
"grad_norm": 0.5104559471477775,
"learning_rate": 9.888121630228476e-06,
"loss": 0.1809,
"step": 192
},
{
"epoch": 0.13562895291637386,
"grad_norm": 0.5357517170593388,
"learning_rate": 9.886957618336257e-06,
"loss": 0.1972,
"step": 193
},
{
"epoch": 0.13633169360505973,
"grad_norm": 0.5651199180828425,
"learning_rate": 9.885787651626176e-06,
"loss": 0.216,
"step": 194
},
{
"epoch": 0.13703443429374562,
"grad_norm": 0.5604061776938946,
"learning_rate": 9.88461173152385e-06,
"loss": 0.1988,
"step": 195
},
{
"epoch": 0.1377371749824315,
"grad_norm": 0.5260311965836726,
"learning_rate": 9.883429859462155e-06,
"loss": 0.1921,
"step": 196
},
{
"epoch": 0.13843991567111735,
"grad_norm": 0.5881491100254291,
"learning_rate": 9.882242036881214e-06,
"loss": 0.227,
"step": 197
},
{
"epoch": 0.13914265635980325,
"grad_norm": 0.5360647056862757,
"learning_rate": 9.881048265228402e-06,
"loss": 0.1927,
"step": 198
},
{
"epoch": 0.1398453970484891,
"grad_norm": 0.5700218496598528,
"learning_rate": 9.879848545958348e-06,
"loss": 0.2414,
"step": 199
},
{
"epoch": 0.14054813773717498,
"grad_norm": 0.49314994278279367,
"learning_rate": 9.878642880532923e-06,
"loss": 0.1615,
"step": 200
},
{
"epoch": 0.14125087842586087,
"grad_norm": 0.5587419946990672,
"learning_rate": 9.877431270421248e-06,
"loss": 0.2154,
"step": 201
},
{
"epoch": 0.14195361911454674,
"grad_norm": 0.5369484921567617,
"learning_rate": 9.876213717099678e-06,
"loss": 0.1901,
"step": 202
},
{
"epoch": 0.1426563598032326,
"grad_norm": 0.4973260815445628,
"learning_rate": 9.874990222051824e-06,
"loss": 0.1963,
"step": 203
},
{
"epoch": 0.1433591004919185,
"grad_norm": 0.5261029662772938,
"learning_rate": 9.873760786768524e-06,
"loss": 0.1963,
"step": 204
},
{
"epoch": 0.14406184118060436,
"grad_norm": 0.5329267697253043,
"learning_rate": 9.872525412747865e-06,
"loss": 0.1981,
"step": 205
},
{
"epoch": 0.14476458186929023,
"grad_norm": 0.5246233049215846,
"learning_rate": 9.87128410149516e-06,
"loss": 0.2045,
"step": 206
},
{
"epoch": 0.14546732255797612,
"grad_norm": 0.552740038183233,
"learning_rate": 9.870036854522967e-06,
"loss": 0.236,
"step": 207
},
{
"epoch": 0.14617006324666199,
"grad_norm": 0.5250297115428508,
"learning_rate": 9.868783673351069e-06,
"loss": 0.177,
"step": 208
},
{
"epoch": 0.14687280393534785,
"grad_norm": 0.5359417007107535,
"learning_rate": 9.867524559506484e-06,
"loss": 0.195,
"step": 209
},
{
"epoch": 0.14757554462403374,
"grad_norm": 0.5466980070361454,
"learning_rate": 9.866259514523456e-06,
"loss": 0.1914,
"step": 210
},
{
"epoch": 0.1482782853127196,
"grad_norm": 0.5250685884809476,
"learning_rate": 9.86498853994346e-06,
"loss": 0.1866,
"step": 211
},
{
"epoch": 0.14898102600140548,
"grad_norm": 0.5346721147028544,
"learning_rate": 9.863711637315193e-06,
"loss": 0.2116,
"step": 212
},
{
"epoch": 0.14968376669009137,
"grad_norm": 0.5120753920923115,
"learning_rate": 9.862428808194575e-06,
"loss": 0.1765,
"step": 213
},
{
"epoch": 0.15038650737877723,
"grad_norm": 0.5203505661834146,
"learning_rate": 9.86114005414475e-06,
"loss": 0.2017,
"step": 214
},
{
"epoch": 0.1510892480674631,
"grad_norm": 0.5135432969565993,
"learning_rate": 9.859845376736084e-06,
"loss": 0.1965,
"step": 215
},
{
"epoch": 0.151791988756149,
"grad_norm": 0.5242400487450163,
"learning_rate": 9.858544777546153e-06,
"loss": 0.1885,
"step": 216
},
{
"epoch": 0.15249472944483486,
"grad_norm": 0.5037441921440698,
"learning_rate": 9.857238258159755e-06,
"loss": 0.1835,
"step": 217
},
{
"epoch": 0.15319747013352072,
"grad_norm": 0.5250153350576638,
"learning_rate": 9.8559258201689e-06,
"loss": 0.1859,
"step": 218
},
{
"epoch": 0.15390021082220662,
"grad_norm": 0.5829455871447047,
"learning_rate": 9.854607465172808e-06,
"loss": 0.1907,
"step": 219
},
{
"epoch": 0.15460295151089248,
"grad_norm": 0.5498786288501906,
"learning_rate": 9.853283194777913e-06,
"loss": 0.2158,
"step": 220
},
{
"epoch": 0.15530569219957835,
"grad_norm": 0.5692564261688635,
"learning_rate": 9.851953010597854e-06,
"loss": 0.21,
"step": 221
},
{
"epoch": 0.15600843288826424,
"grad_norm": 0.5050049560317424,
"learning_rate": 9.850616914253476e-06,
"loss": 0.1915,
"step": 222
},
{
"epoch": 0.1567111735769501,
"grad_norm": 0.5481805472229878,
"learning_rate": 9.84927490737283e-06,
"loss": 0.2051,
"step": 223
},
{
"epoch": 0.15741391426563597,
"grad_norm": 0.4932418454519954,
"learning_rate": 9.847926991591165e-06,
"loss": 0.1876,
"step": 224
},
{
"epoch": 0.15811665495432187,
"grad_norm": 0.5096279574460413,
"learning_rate": 9.846573168550936e-06,
"loss": 0.2002,
"step": 225
},
{
"epoch": 0.15881939564300773,
"grad_norm": 0.5199894898915933,
"learning_rate": 9.845213439901795e-06,
"loss": 0.1981,
"step": 226
},
{
"epoch": 0.1595221363316936,
"grad_norm": 0.5155058717704711,
"learning_rate": 9.843847807300582e-06,
"loss": 0.1869,
"step": 227
},
{
"epoch": 0.1602248770203795,
"grad_norm": 0.5451860630048405,
"learning_rate": 9.842476272411343e-06,
"loss": 0.1785,
"step": 228
},
{
"epoch": 0.16092761770906536,
"grad_norm": 0.5572085114446077,
"learning_rate": 9.841098836905306e-06,
"loss": 0.2063,
"step": 229
},
{
"epoch": 0.16163035839775122,
"grad_norm": 0.5464205485143655,
"learning_rate": 9.839715502460894e-06,
"loss": 0.2066,
"step": 230
},
{
"epoch": 0.16233309908643712,
"grad_norm": 0.5496024905732382,
"learning_rate": 9.838326270763717e-06,
"loss": 0.2072,
"step": 231
},
{
"epoch": 0.16303583977512298,
"grad_norm": 0.5244444353434804,
"learning_rate": 9.836931143506572e-06,
"loss": 0.1952,
"step": 232
},
{
"epoch": 0.16373858046380885,
"grad_norm": 0.5462213747463449,
"learning_rate": 9.835530122389439e-06,
"loss": 0.2054,
"step": 233
},
{
"epoch": 0.16444132115249474,
"grad_norm": 0.5210865544015293,
"learning_rate": 9.834123209119478e-06,
"loss": 0.1965,
"step": 234
},
{
"epoch": 0.1651440618411806,
"grad_norm": 0.5414620686814897,
"learning_rate": 9.83271040541103e-06,
"loss": 0.2109,
"step": 235
},
{
"epoch": 0.16584680252986647,
"grad_norm": 0.5369198973702732,
"learning_rate": 9.831291712985613e-06,
"loss": 0.1934,
"step": 236
},
{
"epoch": 0.16654954321855236,
"grad_norm": 0.511788601277114,
"learning_rate": 9.829867133571924e-06,
"loss": 0.18,
"step": 237
},
{
"epoch": 0.16725228390723823,
"grad_norm": 0.5121987915269581,
"learning_rate": 9.828436668905829e-06,
"loss": 0.1676,
"step": 238
},
{
"epoch": 0.1679550245959241,
"grad_norm": 0.5188342768801897,
"learning_rate": 9.827000320730366e-06,
"loss": 0.191,
"step": 239
},
{
"epoch": 0.16865776528461,
"grad_norm": 0.5626887582456082,
"learning_rate": 9.825558090795747e-06,
"loss": 0.2233,
"step": 240
},
{
"epoch": 0.16936050597329586,
"grad_norm": 0.5343729872311432,
"learning_rate": 9.82410998085934e-06,
"loss": 0.2045,
"step": 241
},
{
"epoch": 0.17006324666198172,
"grad_norm": 0.5634331433740161,
"learning_rate": 9.822655992685687e-06,
"loss": 0.2094,
"step": 242
},
{
"epoch": 0.17076598735066761,
"grad_norm": 0.5269987943660347,
"learning_rate": 9.821196128046493e-06,
"loss": 0.2031,
"step": 243
},
{
"epoch": 0.17146872803935348,
"grad_norm": 0.5116594387967673,
"learning_rate": 9.819730388720622e-06,
"loss": 0.1867,
"step": 244
},
{
"epoch": 0.17217146872803935,
"grad_norm": 0.531716345555225,
"learning_rate": 9.818258776494089e-06,
"loss": 0.2053,
"step": 245
},
{
"epoch": 0.17287420941672524,
"grad_norm": 0.5300008699510442,
"learning_rate": 9.816781293160079e-06,
"loss": 0.2001,
"step": 246
},
{
"epoch": 0.1735769501054111,
"grad_norm": 0.5727507016739112,
"learning_rate": 9.815297940518917e-06,
"loss": 0.2032,
"step": 247
},
{
"epoch": 0.17427969079409697,
"grad_norm": 0.511482182034896,
"learning_rate": 9.81380872037809e-06,
"loss": 0.1776,
"step": 248
},
{
"epoch": 0.17498243148278286,
"grad_norm": 0.5223485030118387,
"learning_rate": 9.812313634552233e-06,
"loss": 0.2095,
"step": 249
},
{
"epoch": 0.17568517217146873,
"grad_norm": 0.5274459252576597,
"learning_rate": 9.810812684863123e-06,
"loss": 0.1865,
"step": 250
},
{
"epoch": 0.1763879128601546,
"grad_norm": 0.5387287518026215,
"learning_rate": 9.809305873139685e-06,
"loss": 0.1896,
"step": 251
},
{
"epoch": 0.1770906535488405,
"grad_norm": 0.5321583220265408,
"learning_rate": 9.80779320121799e-06,
"loss": 0.1803,
"step": 252
},
{
"epoch": 0.17779339423752635,
"grad_norm": 0.5311044503887951,
"learning_rate": 9.806274670941247e-06,
"loss": 0.21,
"step": 253
},
{
"epoch": 0.17849613492621222,
"grad_norm": 0.5238717900947666,
"learning_rate": 9.804750284159802e-06,
"loss": 0.1909,
"step": 254
},
{
"epoch": 0.1791988756148981,
"grad_norm": 0.5687996235127285,
"learning_rate": 9.803220042731143e-06,
"loss": 0.2234,
"step": 255
},
{
"epoch": 0.17990161630358398,
"grad_norm": 0.5574469376886612,
"learning_rate": 9.801683948519885e-06,
"loss": 0.2173,
"step": 256
},
{
"epoch": 0.18060435699226984,
"grad_norm": 0.5180438084466581,
"learning_rate": 9.800142003397774e-06,
"loss": 0.1732,
"step": 257
},
{
"epoch": 0.18130709768095574,
"grad_norm": 0.5310855661683267,
"learning_rate": 9.798594209243697e-06,
"loss": 0.202,
"step": 258
},
{
"epoch": 0.1820098383696416,
"grad_norm": 0.5386589341794008,
"learning_rate": 9.797040567943654e-06,
"loss": 0.2023,
"step": 259
},
{
"epoch": 0.18271257905832747,
"grad_norm": 0.5118955272519512,
"learning_rate": 9.79548108139078e-06,
"loss": 0.1876,
"step": 260
},
{
"epoch": 0.18341531974701336,
"grad_norm": 0.5472298366884414,
"learning_rate": 9.793915751485326e-06,
"loss": 0.2072,
"step": 261
},
{
"epoch": 0.18411806043569923,
"grad_norm": 0.5512321528105381,
"learning_rate": 9.792344580134664e-06,
"loss": 0.2103,
"step": 262
},
{
"epoch": 0.1848208011243851,
"grad_norm": 0.5329543717616166,
"learning_rate": 9.790767569253292e-06,
"loss": 0.1933,
"step": 263
},
{
"epoch": 0.18552354181307099,
"grad_norm": 0.5164467362087247,
"learning_rate": 9.78918472076281e-06,
"loss": 0.1925,
"step": 264
},
{
"epoch": 0.18622628250175685,
"grad_norm": 0.54505710084966,
"learning_rate": 9.787596036591944e-06,
"loss": 0.2334,
"step": 265
},
{
"epoch": 0.18692902319044272,
"grad_norm": 0.5615688025483314,
"learning_rate": 9.78600151867652e-06,
"loss": 0.2412,
"step": 266
},
{
"epoch": 0.1876317638791286,
"grad_norm": 0.5680664305453649,
"learning_rate": 9.784401168959482e-06,
"loss": 0.2042,
"step": 267
},
{
"epoch": 0.18833450456781448,
"grad_norm": 0.5156312242676864,
"learning_rate": 9.782794989390874e-06,
"loss": 0.1826,
"step": 268
},
{
"epoch": 0.18903724525650034,
"grad_norm": 0.5398485094700511,
"learning_rate": 9.781182981927843e-06,
"loss": 0.1988,
"step": 269
},
{
"epoch": 0.18973998594518623,
"grad_norm": 0.5622582389734884,
"learning_rate": 9.779565148534645e-06,
"loss": 0.22,
"step": 270
},
{
"epoch": 0.1904427266338721,
"grad_norm": 0.49531208082546646,
"learning_rate": 9.777941491182628e-06,
"loss": 0.1807,
"step": 271
},
{
"epoch": 0.19114546732255797,
"grad_norm": 0.5553653639761128,
"learning_rate": 9.776312011850236e-06,
"loss": 0.2233,
"step": 272
},
{
"epoch": 0.19184820801124386,
"grad_norm": 0.5905131477395442,
"learning_rate": 9.774676712523013e-06,
"loss": 0.2026,
"step": 273
},
{
"epoch": 0.19255094869992972,
"grad_norm": 0.5757065193118327,
"learning_rate": 9.773035595193588e-06,
"loss": 0.213,
"step": 274
},
{
"epoch": 0.1932536893886156,
"grad_norm": 0.5224920946455179,
"learning_rate": 9.771388661861684e-06,
"loss": 0.1968,
"step": 275
},
{
"epoch": 0.19395643007730148,
"grad_norm": 0.5623112222215833,
"learning_rate": 9.76973591453411e-06,
"loss": 0.229,
"step": 276
},
{
"epoch": 0.19465917076598735,
"grad_norm": 0.5629423566915981,
"learning_rate": 9.768077355224758e-06,
"loss": 0.2081,
"step": 277
},
{
"epoch": 0.19536191145467321,
"grad_norm": 0.5183782780342989,
"learning_rate": 9.766412985954605e-06,
"loss": 0.1769,
"step": 278
},
{
"epoch": 0.1960646521433591,
"grad_norm": 0.5212145167380839,
"learning_rate": 9.764742808751705e-06,
"loss": 0.1828,
"step": 279
},
{
"epoch": 0.19676739283204497,
"grad_norm": 0.5176379746169677,
"learning_rate": 9.763066825651186e-06,
"loss": 0.1819,
"step": 280
},
{
"epoch": 0.19747013352073084,
"grad_norm": 0.5219120130877213,
"learning_rate": 9.761385038695257e-06,
"loss": 0.181,
"step": 281
},
{
"epoch": 0.19817287420941673,
"grad_norm": 0.497045902364289,
"learning_rate": 9.759697449933194e-06,
"loss": 0.1734,
"step": 282
},
{
"epoch": 0.1988756148981026,
"grad_norm": 0.5190482550474241,
"learning_rate": 9.758004061421347e-06,
"loss": 0.1865,
"step": 283
},
{
"epoch": 0.19957835558678846,
"grad_norm": 0.5842551279450745,
"learning_rate": 9.75630487522313e-06,
"loss": 0.2203,
"step": 284
},
{
"epoch": 0.20028109627547436,
"grad_norm": 0.5551403137801947,
"learning_rate": 9.754599893409023e-06,
"loss": 0.2179,
"step": 285
},
{
"epoch": 0.20098383696416022,
"grad_norm": 0.524413493015236,
"learning_rate": 9.752889118056565e-06,
"loss": 0.1866,
"step": 286
},
{
"epoch": 0.2016865776528461,
"grad_norm": 0.5786394750468196,
"learning_rate": 9.75117255125036e-06,
"loss": 0.2337,
"step": 287
},
{
"epoch": 0.20238931834153198,
"grad_norm": 0.5485405276227957,
"learning_rate": 9.749450195082059e-06,
"loss": 0.2059,
"step": 288
},
{
"epoch": 0.20309205903021785,
"grad_norm": 0.5252768606055862,
"learning_rate": 9.747722051650384e-06,
"loss": 0.2058,
"step": 289
},
{
"epoch": 0.2037947997189037,
"grad_norm": 0.49209713692252655,
"learning_rate": 9.74598812306109e-06,
"loss": 0.1736,
"step": 290
},
{
"epoch": 0.2044975404075896,
"grad_norm": 0.5265702195074639,
"learning_rate": 9.744248411426995e-06,
"loss": 0.2066,
"step": 291
},
{
"epoch": 0.20520028109627547,
"grad_norm": 0.5083836068529662,
"learning_rate": 9.742502918867959e-06,
"loss": 0.1889,
"step": 292
},
{
"epoch": 0.20590302178496134,
"grad_norm": 0.5294998246539774,
"learning_rate": 9.740751647510887e-06,
"loss": 0.205,
"step": 293
},
{
"epoch": 0.20660576247364723,
"grad_norm": 0.5477460583347653,
"learning_rate": 9.73899459948972e-06,
"loss": 0.2022,
"step": 294
},
{
"epoch": 0.2073085031623331,
"grad_norm": 0.5237490738163196,
"learning_rate": 9.737231776945445e-06,
"loss": 0.2044,
"step": 295
},
{
"epoch": 0.20801124385101896,
"grad_norm": 0.5377161539099496,
"learning_rate": 9.735463182026085e-06,
"loss": 0.1951,
"step": 296
},
{
"epoch": 0.20871398453970486,
"grad_norm": 0.4953170972218495,
"learning_rate": 9.733688816886692e-06,
"loss": 0.1732,
"step": 297
},
{
"epoch": 0.20941672522839072,
"grad_norm": 0.5572093168778736,
"learning_rate": 9.731908683689355e-06,
"loss": 0.1903,
"step": 298
},
{
"epoch": 0.2101194659170766,
"grad_norm": 0.5481859373130906,
"learning_rate": 9.730122784603184e-06,
"loss": 0.1922,
"step": 299
},
{
"epoch": 0.21082220660576248,
"grad_norm": 0.5505813737799056,
"learning_rate": 9.728331121804322e-06,
"loss": 0.2135,
"step": 300
},
{
"epoch": 0.21152494729444835,
"grad_norm": 0.5183424768074485,
"learning_rate": 9.726533697475929e-06,
"loss": 0.1735,
"step": 301
},
{
"epoch": 0.2122276879831342,
"grad_norm": 0.5449744005434142,
"learning_rate": 9.724730513808191e-06,
"loss": 0.1989,
"step": 302
},
{
"epoch": 0.2129304286718201,
"grad_norm": 0.5263283262096657,
"learning_rate": 9.722921572998311e-06,
"loss": 0.1982,
"step": 303
},
{
"epoch": 0.21363316936050597,
"grad_norm": 0.555778893603175,
"learning_rate": 9.721106877250501e-06,
"loss": 0.2249,
"step": 304
},
{
"epoch": 0.21433591004919184,
"grad_norm": 0.5488655032726547,
"learning_rate": 9.719286428775995e-06,
"loss": 0.2025,
"step": 305
},
{
"epoch": 0.21503865073787773,
"grad_norm": 0.508188510072533,
"learning_rate": 9.717460229793027e-06,
"loss": 0.204,
"step": 306
},
{
"epoch": 0.2157413914265636,
"grad_norm": 0.5161364072590501,
"learning_rate": 9.715628282526847e-06,
"loss": 0.1879,
"step": 307
},
{
"epoch": 0.21644413211524946,
"grad_norm": 0.5238141029252569,
"learning_rate": 9.713790589209704e-06,
"loss": 0.1895,
"step": 308
},
{
"epoch": 0.21714687280393535,
"grad_norm": 0.5342330104422577,
"learning_rate": 9.71194715208085e-06,
"loss": 0.2221,
"step": 309
},
{
"epoch": 0.21784961349262122,
"grad_norm": 0.5259125390406799,
"learning_rate": 9.710097973386531e-06,
"loss": 0.1821,
"step": 310
},
{
"epoch": 0.21855235418130708,
"grad_norm": 0.5063736175509841,
"learning_rate": 9.708243055380002e-06,
"loss": 0.1857,
"step": 311
},
{
"epoch": 0.21925509486999298,
"grad_norm": 0.5192947497176749,
"learning_rate": 9.7063824003215e-06,
"loss": 0.189,
"step": 312
},
{
"epoch": 0.21995783555867884,
"grad_norm": 0.5284361182139489,
"learning_rate": 9.704516010478254e-06,
"loss": 0.1896,
"step": 313
},
{
"epoch": 0.2206605762473647,
"grad_norm": 0.5471475403815499,
"learning_rate": 9.702643888124484e-06,
"loss": 0.2096,
"step": 314
},
{
"epoch": 0.2213633169360506,
"grad_norm": 0.5105524910165029,
"learning_rate": 9.700766035541396e-06,
"loss": 0.1889,
"step": 315
},
{
"epoch": 0.22206605762473647,
"grad_norm": 0.5187072895311851,
"learning_rate": 9.698882455017175e-06,
"loss": 0.1996,
"step": 316
},
{
"epoch": 0.22276879831342233,
"grad_norm": 0.5485833610090907,
"learning_rate": 9.696993148846985e-06,
"loss": 0.1753,
"step": 317
},
{
"epoch": 0.22347153900210823,
"grad_norm": 0.5477845615559535,
"learning_rate": 9.695098119332972e-06,
"loss": 0.2167,
"step": 318
},
{
"epoch": 0.2241742796907941,
"grad_norm": 0.5308904052089458,
"learning_rate": 9.693197368784253e-06,
"loss": 0.1973,
"step": 319
},
{
"epoch": 0.22487702037947996,
"grad_norm": 0.5258594332013726,
"learning_rate": 9.691290899516912e-06,
"loss": 0.205,
"step": 320
},
{
"epoch": 0.22557976106816585,
"grad_norm": 0.5931484026375198,
"learning_rate": 9.68937871385401e-06,
"loss": 0.2379,
"step": 321
},
{
"epoch": 0.22628250175685172,
"grad_norm": 0.5421251288874882,
"learning_rate": 9.687460814125564e-06,
"loss": 0.2078,
"step": 322
},
{
"epoch": 0.22698524244553758,
"grad_norm": 0.5432947071522846,
"learning_rate": 9.685537202668562e-06,
"loss": 0.2188,
"step": 323
},
{
"epoch": 0.22768798313422348,
"grad_norm": 0.5428655853501718,
"learning_rate": 9.683607881826946e-06,
"loss": 0.2178,
"step": 324
},
{
"epoch": 0.22839072382290934,
"grad_norm": 0.599957858627133,
"learning_rate": 9.68167285395162e-06,
"loss": 0.1941,
"step": 325
},
{
"epoch": 0.22909346451159524,
"grad_norm": 0.5163974418996417,
"learning_rate": 9.679732121400435e-06,
"loss": 0.178,
"step": 326
},
{
"epoch": 0.2297962052002811,
"grad_norm": 0.547411245613051,
"learning_rate": 9.677785686538201e-06,
"loss": 0.1942,
"step": 327
},
{
"epoch": 0.23049894588896697,
"grad_norm": 0.539505049320006,
"learning_rate": 9.67583355173667e-06,
"loss": 0.1971,
"step": 328
},
{
"epoch": 0.23120168657765286,
"grad_norm": 0.4943662915871163,
"learning_rate": 9.673875719374546e-06,
"loss": 0.1813,
"step": 329
},
{
"epoch": 0.23190442726633873,
"grad_norm": 0.49792991102742756,
"learning_rate": 9.671912191837468e-06,
"loss": 0.1767,
"step": 330
},
{
"epoch": 0.2326071679550246,
"grad_norm": 0.5955250059704561,
"learning_rate": 9.669942971518019e-06,
"loss": 0.2072,
"step": 331
},
{
"epoch": 0.23330990864371048,
"grad_norm": 0.5356463210185225,
"learning_rate": 9.667968060815721e-06,
"loss": 0.1983,
"step": 332
},
{
"epoch": 0.23401264933239635,
"grad_norm": 0.5400030207757831,
"learning_rate": 9.665987462137024e-06,
"loss": 0.182,
"step": 333
},
{
"epoch": 0.23471539002108222,
"grad_norm": 0.5879893453341771,
"learning_rate": 9.664001177895312e-06,
"loss": 0.2258,
"step": 334
},
{
"epoch": 0.2354181307097681,
"grad_norm": 0.5410874019193243,
"learning_rate": 9.662009210510897e-06,
"loss": 0.2141,
"step": 335
},
{
"epoch": 0.23612087139845397,
"grad_norm": 0.5448240164751721,
"learning_rate": 9.660011562411018e-06,
"loss": 0.1632,
"step": 336
},
{
"epoch": 0.23682361208713984,
"grad_norm": 0.5532123186835779,
"learning_rate": 9.658008236029832e-06,
"loss": 0.2092,
"step": 337
},
{
"epoch": 0.23752635277582573,
"grad_norm": 0.500391469524619,
"learning_rate": 9.655999233808415e-06,
"loss": 0.1915,
"step": 338
},
{
"epoch": 0.2382290934645116,
"grad_norm": 0.546807248500238,
"learning_rate": 9.653984558194764e-06,
"loss": 0.2063,
"step": 339
},
{
"epoch": 0.23893183415319746,
"grad_norm": 0.5126701401194782,
"learning_rate": 9.651964211643784e-06,
"loss": 0.2002,
"step": 340
},
{
"epoch": 0.23963457484188336,
"grad_norm": 0.5275409703143071,
"learning_rate": 9.649938196617292e-06,
"loss": 0.1885,
"step": 341
},
{
"epoch": 0.24033731553056922,
"grad_norm": 0.56088723480931,
"learning_rate": 9.647906515584014e-06,
"loss": 0.2023,
"step": 342
},
{
"epoch": 0.2410400562192551,
"grad_norm": 0.5028945023569494,
"learning_rate": 9.645869171019578e-06,
"loss": 0.1917,
"step": 343
},
{
"epoch": 0.24174279690794098,
"grad_norm": 0.5481203732285579,
"learning_rate": 9.643826165406512e-06,
"loss": 0.2109,
"step": 344
},
{
"epoch": 0.24244553759662685,
"grad_norm": 0.5119601341263857,
"learning_rate": 9.641777501234242e-06,
"loss": 0.1767,
"step": 345
},
{
"epoch": 0.2431482782853127,
"grad_norm": 0.5455063911316448,
"learning_rate": 9.639723180999094e-06,
"loss": 0.1823,
"step": 346
},
{
"epoch": 0.2438510189739986,
"grad_norm": 0.5436455570742348,
"learning_rate": 9.637663207204279e-06,
"loss": 0.206,
"step": 347
},
{
"epoch": 0.24455375966268447,
"grad_norm": 0.5469291865780584,
"learning_rate": 9.635597582359905e-06,
"loss": 0.2156,
"step": 348
},
{
"epoch": 0.24525650035137034,
"grad_norm": 0.5349192232174842,
"learning_rate": 9.633526308982957e-06,
"loss": 0.1971,
"step": 349
},
{
"epoch": 0.24595924104005623,
"grad_norm": 0.5412118455088122,
"learning_rate": 9.631449389597307e-06,
"loss": 0.1988,
"step": 350
},
{
"epoch": 0.2466619817287421,
"grad_norm": 0.5076227227961279,
"learning_rate": 9.629366826733711e-06,
"loss": 0.1953,
"step": 351
},
{
"epoch": 0.24736472241742796,
"grad_norm": 0.581444320826964,
"learning_rate": 9.627278622929791e-06,
"loss": 0.2648,
"step": 352
},
{
"epoch": 0.24806746310611386,
"grad_norm": 0.5271444953779912,
"learning_rate": 9.625184780730058e-06,
"loss": 0.1892,
"step": 353
},
{
"epoch": 0.24877020379479972,
"grad_norm": 0.49390242477879215,
"learning_rate": 9.623085302685875e-06,
"loss": 0.1697,
"step": 354
},
{
"epoch": 0.2494729444834856,
"grad_norm": 0.5253207337911392,
"learning_rate": 9.620980191355487e-06,
"loss": 0.1848,
"step": 355
},
{
"epoch": 0.2501756851721715,
"grad_norm": 0.531312442220435,
"learning_rate": 9.618869449303996e-06,
"loss": 0.2034,
"step": 356
},
{
"epoch": 0.25087842586085735,
"grad_norm": 0.5224940697851705,
"learning_rate": 9.616753079103367e-06,
"loss": 0.1832,
"step": 357
},
{
"epoch": 0.2515811665495432,
"grad_norm": 0.5273873773882911,
"learning_rate": 9.614631083332427e-06,
"loss": 0.204,
"step": 358
},
{
"epoch": 0.2522839072382291,
"grad_norm": 0.4903812652752979,
"learning_rate": 9.61250346457685e-06,
"loss": 0.1667,
"step": 359
},
{
"epoch": 0.25298664792691494,
"grad_norm": 0.5207234544378162,
"learning_rate": 9.610370225429164e-06,
"loss": 0.1627,
"step": 360
},
{
"epoch": 0.25368938861560086,
"grad_norm": 0.5186116034731955,
"learning_rate": 9.608231368488752e-06,
"loss": 0.1853,
"step": 361
},
{
"epoch": 0.25439212930428673,
"grad_norm": 0.5170078110207293,
"learning_rate": 9.606086896361835e-06,
"loss": 0.183,
"step": 362
},
{
"epoch": 0.2550948699929726,
"grad_norm": 0.6083568217795984,
"learning_rate": 9.603936811661478e-06,
"loss": 0.1948,
"step": 363
},
{
"epoch": 0.25579761068165846,
"grad_norm": 0.5980210849151338,
"learning_rate": 9.601781117007586e-06,
"loss": 0.2054,
"step": 364
},
{
"epoch": 0.2565003513703443,
"grad_norm": 0.5522680255382288,
"learning_rate": 9.5996198150269e-06,
"loss": 0.2202,
"step": 365
},
{
"epoch": 0.2572030920590302,
"grad_norm": 0.48493716821204863,
"learning_rate": 9.597452908352994e-06,
"loss": 0.1685,
"step": 366
},
{
"epoch": 0.2579058327477161,
"grad_norm": 0.5339213828651493,
"learning_rate": 9.595280399626267e-06,
"loss": 0.1893,
"step": 367
},
{
"epoch": 0.258608573436402,
"grad_norm": 0.5292483062543865,
"learning_rate": 9.59310229149395e-06,
"loss": 0.206,
"step": 368
},
{
"epoch": 0.25931131412508784,
"grad_norm": 0.5548812323378177,
"learning_rate": 9.590918586610094e-06,
"loss": 0.1997,
"step": 369
},
{
"epoch": 0.2600140548137737,
"grad_norm": 0.5332802460465896,
"learning_rate": 9.588729287635571e-06,
"loss": 0.1792,
"step": 370
},
{
"epoch": 0.2607167955024596,
"grad_norm": 0.4956910808361609,
"learning_rate": 9.586534397238068e-06,
"loss": 0.1723,
"step": 371
},
{
"epoch": 0.26141953619114544,
"grad_norm": 0.5517713285660699,
"learning_rate": 9.584333918092085e-06,
"loss": 0.1939,
"step": 372
},
{
"epoch": 0.26212227687983136,
"grad_norm": 0.5833231321170365,
"learning_rate": 9.582127852878935e-06,
"loss": 0.1896,
"step": 373
},
{
"epoch": 0.2628250175685172,
"grad_norm": 0.5874594178160686,
"learning_rate": 9.579916204286734e-06,
"loss": 0.212,
"step": 374
},
{
"epoch": 0.2635277582572031,
"grad_norm": 0.5064635499958178,
"learning_rate": 9.577698975010402e-06,
"loss": 0.1573,
"step": 375
},
{
"epoch": 0.26423049894588896,
"grad_norm": 0.556733353232176,
"learning_rate": 9.575476167751663e-06,
"loss": 0.2104,
"step": 376
},
{
"epoch": 0.2649332396345748,
"grad_norm": 0.5068053458771671,
"learning_rate": 9.573247785219033e-06,
"loss": 0.1733,
"step": 377
},
{
"epoch": 0.2656359803232607,
"grad_norm": 0.5497150779026637,
"learning_rate": 9.571013830127822e-06,
"loss": 0.2038,
"step": 378
},
{
"epoch": 0.2663387210119466,
"grad_norm": 0.5722948262491019,
"learning_rate": 9.568774305200134e-06,
"loss": 0.2334,
"step": 379
},
{
"epoch": 0.2670414617006325,
"grad_norm": 0.5339942012379263,
"learning_rate": 9.566529213164859e-06,
"loss": 0.1951,
"step": 380
},
{
"epoch": 0.26774420238931834,
"grad_norm": 0.5460642679137601,
"learning_rate": 9.564278556757667e-06,
"loss": 0.2091,
"step": 381
},
{
"epoch": 0.2684469430780042,
"grad_norm": 0.534776590338903,
"learning_rate": 9.56202233872101e-06,
"loss": 0.2114,
"step": 382
},
{
"epoch": 0.2691496837666901,
"grad_norm": 0.5513749037432834,
"learning_rate": 9.559760561804118e-06,
"loss": 0.2183,
"step": 383
},
{
"epoch": 0.26985242445537594,
"grad_norm": 0.5639565013105937,
"learning_rate": 9.557493228762995e-06,
"loss": 0.2147,
"step": 384
},
{
"epoch": 0.27055516514406186,
"grad_norm": 0.5443366795904921,
"learning_rate": 9.555220342360412e-06,
"loss": 0.2114,
"step": 385
},
{
"epoch": 0.2712579058327477,
"grad_norm": 0.5298303349972439,
"learning_rate": 9.552941905365911e-06,
"loss": 0.1829,
"step": 386
},
{
"epoch": 0.2719606465214336,
"grad_norm": 0.5680275927151393,
"learning_rate": 9.550657920555794e-06,
"loss": 0.2319,
"step": 387
},
{
"epoch": 0.27266338721011946,
"grad_norm": 0.5336137799727587,
"learning_rate": 9.548368390713126e-06,
"loss": 0.214,
"step": 388
},
{
"epoch": 0.2733661278988053,
"grad_norm": 0.5074047669128428,
"learning_rate": 9.546073318627726e-06,
"loss": 0.172,
"step": 389
},
{
"epoch": 0.27406886858749124,
"grad_norm": 0.5347261724203398,
"learning_rate": 9.543772707096169e-06,
"loss": 0.2168,
"step": 390
},
{
"epoch": 0.2747716092761771,
"grad_norm": 0.5107301810364906,
"learning_rate": 9.541466558921777e-06,
"loss": 0.1819,
"step": 391
},
{
"epoch": 0.275474349964863,
"grad_norm": 0.5368258971699591,
"learning_rate": 9.53915487691462e-06,
"loss": 0.1791,
"step": 392
},
{
"epoch": 0.27617709065354884,
"grad_norm": 0.5194259665889182,
"learning_rate": 9.536837663891511e-06,
"loss": 0.1967,
"step": 393
},
{
"epoch": 0.2768798313422347,
"grad_norm": 0.5211561192261743,
"learning_rate": 9.534514922676003e-06,
"loss": 0.2022,
"step": 394
},
{
"epoch": 0.27758257203092057,
"grad_norm": 0.5248500327348706,
"learning_rate": 9.532186656098384e-06,
"loss": 0.1912,
"step": 395
},
{
"epoch": 0.2782853127196065,
"grad_norm": 0.5513383222078235,
"learning_rate": 9.529852866995676e-06,
"loss": 0.2188,
"step": 396
},
{
"epoch": 0.27898805340829236,
"grad_norm": 0.478487035975437,
"learning_rate": 9.52751355821163e-06,
"loss": 0.1652,
"step": 397
},
{
"epoch": 0.2796907940969782,
"grad_norm": 0.5525603762782381,
"learning_rate": 9.525168732596722e-06,
"loss": 0.2114,
"step": 398
},
{
"epoch": 0.2803935347856641,
"grad_norm": 0.5232596487449998,
"learning_rate": 9.522818393008148e-06,
"loss": 0.1987,
"step": 399
},
{
"epoch": 0.28109627547434995,
"grad_norm": 0.5145451382660522,
"learning_rate": 9.520462542309832e-06,
"loss": 0.2027,
"step": 400
},
{
"epoch": 0.2817990161630358,
"grad_norm": 0.5452391125340568,
"learning_rate": 9.518101183372402e-06,
"loss": 0.2094,
"step": 401
},
{
"epoch": 0.28250175685172174,
"grad_norm": 0.5320164325853812,
"learning_rate": 9.515734319073204e-06,
"loss": 0.1851,
"step": 402
},
{
"epoch": 0.2832044975404076,
"grad_norm": 0.524402173661058,
"learning_rate": 9.51336195229629e-06,
"loss": 0.2004,
"step": 403
},
{
"epoch": 0.2839072382290935,
"grad_norm": 0.5265133901839926,
"learning_rate": 9.510984085932421e-06,
"loss": 0.1955,
"step": 404
},
{
"epoch": 0.28460997891777934,
"grad_norm": 0.514065778497817,
"learning_rate": 9.508600722879055e-06,
"loss": 0.1871,
"step": 405
},
{
"epoch": 0.2853127196064652,
"grad_norm": 0.5575578059566517,
"learning_rate": 9.50621186604035e-06,
"loss": 0.2344,
"step": 406
},
{
"epoch": 0.28601546029515107,
"grad_norm": 0.5326375778389936,
"learning_rate": 9.503817518327157e-06,
"loss": 0.1864,
"step": 407
},
{
"epoch": 0.286718200983837,
"grad_norm": 0.5235959853643891,
"learning_rate": 9.501417682657015e-06,
"loss": 0.1807,
"step": 408
},
{
"epoch": 0.28742094167252286,
"grad_norm": 0.5801383878544786,
"learning_rate": 9.499012361954156e-06,
"loss": 0.2237,
"step": 409
},
{
"epoch": 0.2881236823612087,
"grad_norm": 0.5530514175655553,
"learning_rate": 9.496601559149494e-06,
"loss": 0.2279,
"step": 410
},
{
"epoch": 0.2888264230498946,
"grad_norm": 0.5512139697013972,
"learning_rate": 9.494185277180619e-06,
"loss": 0.2175,
"step": 411
},
{
"epoch": 0.28952916373858045,
"grad_norm": 0.5164819220405377,
"learning_rate": 9.491763518991803e-06,
"loss": 0.1875,
"step": 412
},
{
"epoch": 0.2902319044272663,
"grad_norm": 0.5386437078889298,
"learning_rate": 9.489336287533985e-06,
"loss": 0.1998,
"step": 413
},
{
"epoch": 0.29093464511595224,
"grad_norm": 0.5236451663214202,
"learning_rate": 9.486903585764778e-06,
"loss": 0.1985,
"step": 414
},
{
"epoch": 0.2916373858046381,
"grad_norm": 0.4867428696576212,
"learning_rate": 9.48446541664846e-06,
"loss": 0.165,
"step": 415
},
{
"epoch": 0.29234012649332397,
"grad_norm": 0.5232948042295151,
"learning_rate": 9.482021783155971e-06,
"loss": 0.2087,
"step": 416
},
{
"epoch": 0.29304286718200984,
"grad_norm": 0.46481489245879515,
"learning_rate": 9.479572688264902e-06,
"loss": 0.1562,
"step": 417
},
{
"epoch": 0.2937456078706957,
"grad_norm": 0.5361695405334741,
"learning_rate": 9.477118134959513e-06,
"loss": 0.2044,
"step": 418
},
{
"epoch": 0.29444834855938157,
"grad_norm": 0.5142493311144378,
"learning_rate": 9.474658126230702e-06,
"loss": 0.2029,
"step": 419
},
{
"epoch": 0.2951510892480675,
"grad_norm": 0.5377969872183673,
"learning_rate": 9.472192665076023e-06,
"loss": 0.2086,
"step": 420
},
{
"epoch": 0.29585382993675335,
"grad_norm": 0.5211562808056464,
"learning_rate": 9.46972175449967e-06,
"loss": 0.1747,
"step": 421
},
{
"epoch": 0.2965565706254392,
"grad_norm": 0.5063681882231872,
"learning_rate": 9.467245397512475e-06,
"loss": 0.1918,
"step": 422
},
{
"epoch": 0.2972593113141251,
"grad_norm": 0.5349189194767427,
"learning_rate": 9.464763597131914e-06,
"loss": 0.1693,
"step": 423
},
{
"epoch": 0.29796205200281095,
"grad_norm": 0.5060109119066764,
"learning_rate": 9.46227635638209e-06,
"loss": 0.1691,
"step": 424
},
{
"epoch": 0.2986647926914968,
"grad_norm": 0.5112413048034882,
"learning_rate": 9.459783678293732e-06,
"loss": 0.2144,
"step": 425
},
{
"epoch": 0.29936753338018274,
"grad_norm": 0.48287792076678004,
"learning_rate": 9.457285565904204e-06,
"loss": 0.1611,
"step": 426
},
{
"epoch": 0.3000702740688686,
"grad_norm": 0.54633800687917,
"learning_rate": 9.454782022257485e-06,
"loss": 0.1926,
"step": 427
},
{
"epoch": 0.30077301475755447,
"grad_norm": 0.4956322086389379,
"learning_rate": 9.452273050404173e-06,
"loss": 0.1812,
"step": 428
},
{
"epoch": 0.30147575544624033,
"grad_norm": 0.5396954853131288,
"learning_rate": 9.449758653401482e-06,
"loss": 0.2046,
"step": 429
},
{
"epoch": 0.3021784961349262,
"grad_norm": 0.5196303590332476,
"learning_rate": 9.447238834313235e-06,
"loss": 0.1871,
"step": 430
},
{
"epoch": 0.30288123682361207,
"grad_norm": 0.5428731896865115,
"learning_rate": 9.444713596209863e-06,
"loss": 0.2102,
"step": 431
},
{
"epoch": 0.303583977512298,
"grad_norm": 0.5419723366224838,
"learning_rate": 9.442182942168398e-06,
"loss": 0.2035,
"step": 432
},
{
"epoch": 0.30428671820098385,
"grad_norm": 0.5498847598478627,
"learning_rate": 9.439646875272476e-06,
"loss": 0.2174,
"step": 433
},
{
"epoch": 0.3049894588896697,
"grad_norm": 0.527950833410309,
"learning_rate": 9.437105398612323e-06,
"loss": 0.2059,
"step": 434
},
{
"epoch": 0.3056921995783556,
"grad_norm": 0.5305130279989605,
"learning_rate": 9.434558515284761e-06,
"loss": 0.2063,
"step": 435
},
{
"epoch": 0.30639494026704145,
"grad_norm": 0.5394780225533176,
"learning_rate": 9.432006228393198e-06,
"loss": 0.1941,
"step": 436
},
{
"epoch": 0.3070976809557273,
"grad_norm": 0.5467103824266542,
"learning_rate": 9.429448541047627e-06,
"loss": 0.1817,
"step": 437
},
{
"epoch": 0.30780042164441324,
"grad_norm": 0.5190838331472895,
"learning_rate": 9.426885456364622e-06,
"loss": 0.2078,
"step": 438
},
{
"epoch": 0.3085031623330991,
"grad_norm": 0.5179531242157348,
"learning_rate": 9.424316977467332e-06,
"loss": 0.1914,
"step": 439
},
{
"epoch": 0.30920590302178497,
"grad_norm": 0.5211061345526468,
"learning_rate": 9.42174310748548e-06,
"loss": 0.2044,
"step": 440
},
{
"epoch": 0.30990864371047083,
"grad_norm": 0.5001258576515429,
"learning_rate": 9.419163849555359e-06,
"loss": 0.2019,
"step": 441
},
{
"epoch": 0.3106113843991567,
"grad_norm": 0.558385311192709,
"learning_rate": 9.416579206819828e-06,
"loss": 0.2271,
"step": 442
},
{
"epoch": 0.31131412508784256,
"grad_norm": 0.5526340473234094,
"learning_rate": 9.413989182428303e-06,
"loss": 0.2016,
"step": 443
},
{
"epoch": 0.3120168657765285,
"grad_norm": 0.47924620283664776,
"learning_rate": 9.411393779536761e-06,
"loss": 0.1607,
"step": 444
},
{
"epoch": 0.31271960646521435,
"grad_norm": 0.49526228123251687,
"learning_rate": 9.408793001307734e-06,
"loss": 0.1871,
"step": 445
},
{
"epoch": 0.3134223471539002,
"grad_norm": 0.5341836166522139,
"learning_rate": 9.406186850910301e-06,
"loss": 0.1916,
"step": 446
},
{
"epoch": 0.3141250878425861,
"grad_norm": 0.51251185690614,
"learning_rate": 9.403575331520089e-06,
"loss": 0.1894,
"step": 447
},
{
"epoch": 0.31482782853127195,
"grad_norm": 0.5091841651296144,
"learning_rate": 9.400958446319267e-06,
"loss": 0.1896,
"step": 448
},
{
"epoch": 0.3155305692199578,
"grad_norm": 0.4946655079357005,
"learning_rate": 9.398336198496538e-06,
"loss": 0.1755,
"step": 449
},
{
"epoch": 0.31623330990864373,
"grad_norm": 0.5003863682492026,
"learning_rate": 9.395708591247148e-06,
"loss": 0.1659,
"step": 450
},
{
"epoch": 0.3169360505973296,
"grad_norm": 0.5046152034040112,
"learning_rate": 9.393075627772865e-06,
"loss": 0.1709,
"step": 451
},
{
"epoch": 0.31763879128601546,
"grad_norm": 0.4704925664122499,
"learning_rate": 9.39043731128199e-06,
"loss": 0.1523,
"step": 452
},
{
"epoch": 0.31834153197470133,
"grad_norm": 0.5369046558983234,
"learning_rate": 9.387793644989342e-06,
"loss": 0.217,
"step": 453
},
{
"epoch": 0.3190442726633872,
"grad_norm": 0.5340721759585088,
"learning_rate": 9.385144632116263e-06,
"loss": 0.2148,
"step": 454
},
{
"epoch": 0.31974701335207306,
"grad_norm": 0.543171250126434,
"learning_rate": 9.382490275890606e-06,
"loss": 0.2079,
"step": 455
},
{
"epoch": 0.320449754040759,
"grad_norm": 0.5217566731039174,
"learning_rate": 9.379830579546736e-06,
"loss": 0.1975,
"step": 456
},
{
"epoch": 0.32115249472944485,
"grad_norm": 0.526622633076201,
"learning_rate": 9.377165546325529e-06,
"loss": 0.1912,
"step": 457
},
{
"epoch": 0.3218552354181307,
"grad_norm": 0.5626287070018154,
"learning_rate": 9.374495179474356e-06,
"loss": 0.2117,
"step": 458
},
{
"epoch": 0.3225579761068166,
"grad_norm": 0.4978836844851344,
"learning_rate": 9.371819482247095e-06,
"loss": 0.1806,
"step": 459
},
{
"epoch": 0.32326071679550245,
"grad_norm": 0.5430850460420662,
"learning_rate": 9.369138457904116e-06,
"loss": 0.2099,
"step": 460
},
{
"epoch": 0.3239634574841883,
"grad_norm": 0.540097593922978,
"learning_rate": 9.36645210971228e-06,
"loss": 0.2025,
"step": 461
},
{
"epoch": 0.32466619817287423,
"grad_norm": 0.5604909326477547,
"learning_rate": 9.363760440944933e-06,
"loss": 0.191,
"step": 462
},
{
"epoch": 0.3253689388615601,
"grad_norm": 0.5613882972602502,
"learning_rate": 9.361063454881909e-06,
"loss": 0.2166,
"step": 463
},
{
"epoch": 0.32607167955024596,
"grad_norm": 0.5277972773706446,
"learning_rate": 9.358361154809517e-06,
"loss": 0.216,
"step": 464
},
{
"epoch": 0.32677442023893183,
"grad_norm": 0.5496623992313513,
"learning_rate": 9.355653544020543e-06,
"loss": 0.2022,
"step": 465
},
{
"epoch": 0.3274771609276177,
"grad_norm": 0.5349380111318438,
"learning_rate": 9.352940625814244e-06,
"loss": 0.1948,
"step": 466
},
{
"epoch": 0.32817990161630356,
"grad_norm": 0.5204199184660335,
"learning_rate": 9.350222403496348e-06,
"loss": 0.2023,
"step": 467
},
{
"epoch": 0.3288826423049895,
"grad_norm": 0.5299224780941074,
"learning_rate": 9.347498880379036e-06,
"loss": 0.198,
"step": 468
},
{
"epoch": 0.32958538299367535,
"grad_norm": 0.48184730540740855,
"learning_rate": 9.344770059780957e-06,
"loss": 0.1702,
"step": 469
},
{
"epoch": 0.3302881236823612,
"grad_norm": 0.5342698219695755,
"learning_rate": 9.342035945027213e-06,
"loss": 0.1939,
"step": 470
},
{
"epoch": 0.3309908643710471,
"grad_norm": 0.5534613984953773,
"learning_rate": 9.339296539449356e-06,
"loss": 0.1853,
"step": 471
},
{
"epoch": 0.33169360505973294,
"grad_norm": 0.5249296127883286,
"learning_rate": 9.336551846385386e-06,
"loss": 0.1938,
"step": 472
},
{
"epoch": 0.3323963457484188,
"grad_norm": 0.5483291331444264,
"learning_rate": 9.333801869179743e-06,
"loss": 0.2144,
"step": 473
},
{
"epoch": 0.33309908643710473,
"grad_norm": 0.5505045621076627,
"learning_rate": 9.331046611183311e-06,
"loss": 0.2044,
"step": 474
},
{
"epoch": 0.3338018271257906,
"grad_norm": 0.545549121039099,
"learning_rate": 9.328286075753402e-06,
"loss": 0.1791,
"step": 475
},
{
"epoch": 0.33450456781447646,
"grad_norm": 0.5594469699398668,
"learning_rate": 9.325520266253769e-06,
"loss": 0.2149,
"step": 476
},
{
"epoch": 0.3352073085031623,
"grad_norm": 0.5261973827645597,
"learning_rate": 9.322749186054577e-06,
"loss": 0.1813,
"step": 477
},
{
"epoch": 0.3359100491918482,
"grad_norm": 0.5510767918170081,
"learning_rate": 9.319972838532425e-06,
"loss": 0.2099,
"step": 478
},
{
"epoch": 0.33661278988053406,
"grad_norm": 0.5788350977612923,
"learning_rate": 9.317191227070327e-06,
"loss": 0.2346,
"step": 479
},
{
"epoch": 0.33731553056922,
"grad_norm": 0.5221987371523338,
"learning_rate": 9.314404355057708e-06,
"loss": 0.1936,
"step": 480
},
{
"epoch": 0.33801827125790584,
"grad_norm": 0.5155335450625089,
"learning_rate": 9.311612225890411e-06,
"loss": 0.1808,
"step": 481
},
{
"epoch": 0.3387210119465917,
"grad_norm": 0.5326628394952834,
"learning_rate": 9.308814842970675e-06,
"loss": 0.1693,
"step": 482
},
{
"epoch": 0.3394237526352776,
"grad_norm": 0.5181443109018903,
"learning_rate": 9.306012209707145e-06,
"loss": 0.2128,
"step": 483
},
{
"epoch": 0.34012649332396344,
"grad_norm": 0.5665392002357718,
"learning_rate": 9.303204329514868e-06,
"loss": 0.2306,
"step": 484
},
{
"epoch": 0.3408292340126493,
"grad_norm": 0.49013232207237717,
"learning_rate": 9.300391205815276e-06,
"loss": 0.1947,
"step": 485
},
{
"epoch": 0.34153197470133523,
"grad_norm": 0.5232919096297761,
"learning_rate": 9.297572842036199e-06,
"loss": 0.187,
"step": 486
},
{
"epoch": 0.3422347153900211,
"grad_norm": 0.5040579327331741,
"learning_rate": 9.294749241611845e-06,
"loss": 0.1939,
"step": 487
},
{
"epoch": 0.34293745607870696,
"grad_norm": 0.5094561317464454,
"learning_rate": 9.291920407982807e-06,
"loss": 0.1966,
"step": 488
},
{
"epoch": 0.3436401967673928,
"grad_norm": 0.5151638920794955,
"learning_rate": 9.289086344596055e-06,
"loss": 0.1829,
"step": 489
},
{
"epoch": 0.3443429374560787,
"grad_norm": 0.548953154810862,
"learning_rate": 9.286247054904926e-06,
"loss": 0.2057,
"step": 490
},
{
"epoch": 0.34504567814476456,
"grad_norm": 0.504122354329602,
"learning_rate": 9.283402542369132e-06,
"loss": 0.1703,
"step": 491
},
{
"epoch": 0.3457484188334505,
"grad_norm": 0.5408868358898966,
"learning_rate": 9.280552810454745e-06,
"loss": 0.2186,
"step": 492
},
{
"epoch": 0.34645115952213634,
"grad_norm": 0.48896282326675716,
"learning_rate": 9.277697862634203e-06,
"loss": 0.1735,
"step": 493
},
{
"epoch": 0.3471539002108222,
"grad_norm": 0.5282931074682675,
"learning_rate": 9.274837702386287e-06,
"loss": 0.2181,
"step": 494
},
{
"epoch": 0.3478566408995081,
"grad_norm": 0.5564622571714379,
"learning_rate": 9.271972333196145e-06,
"loss": 0.2196,
"step": 495
},
{
"epoch": 0.34855938158819394,
"grad_norm": 0.543549952419283,
"learning_rate": 9.26910175855526e-06,
"loss": 0.2073,
"step": 496
},
{
"epoch": 0.3492621222768798,
"grad_norm": 0.5104148895528936,
"learning_rate": 9.266225981961463e-06,
"loss": 0.1874,
"step": 497
},
{
"epoch": 0.3499648629655657,
"grad_norm": 0.5251709011006419,
"learning_rate": 9.263345006918926e-06,
"loss": 0.2071,
"step": 498
},
{
"epoch": 0.3506676036542516,
"grad_norm": 0.5021423425183384,
"learning_rate": 9.260458836938148e-06,
"loss": 0.1821,
"step": 499
},
{
"epoch": 0.35137034434293746,
"grad_norm": 0.5134047019086833,
"learning_rate": 9.257567475535966e-06,
"loss": 0.1959,
"step": 500
},
{
"epoch": 0.35137034434293746,
"eval_loss": 0.197735995054245,
"eval_runtime": 10.8924,
"eval_samples_per_second": 21.116,
"eval_steps_per_second": 5.325,
"step": 500
},
{
"epoch": 0.3520730850316233,
"grad_norm": 0.5483891410899071,
"learning_rate": 9.254670926235538e-06,
"loss": 0.2173,
"step": 501
},
{
"epoch": 0.3527758257203092,
"grad_norm": 0.5422389677547926,
"learning_rate": 9.251769192566346e-06,
"loss": 0.2066,
"step": 502
},
{
"epoch": 0.35347856640899505,
"grad_norm": 0.4955794411352627,
"learning_rate": 9.248862278064188e-06,
"loss": 0.1846,
"step": 503
},
{
"epoch": 0.354181307097681,
"grad_norm": 0.5297565807446326,
"learning_rate": 9.24595018627117e-06,
"loss": 0.1887,
"step": 504
},
{
"epoch": 0.35488404778636684,
"grad_norm": 0.5923727821439864,
"learning_rate": 9.243032920735719e-06,
"loss": 0.244,
"step": 505
},
{
"epoch": 0.3555867884750527,
"grad_norm": 0.5034976230839563,
"learning_rate": 9.240110485012557e-06,
"loss": 0.1845,
"step": 506
},
{
"epoch": 0.35628952916373857,
"grad_norm": 0.4945164136866163,
"learning_rate": 9.237182882662705e-06,
"loss": 0.1854,
"step": 507
},
{
"epoch": 0.35699226985242444,
"grad_norm": 0.5131450742064524,
"learning_rate": 9.234250117253482e-06,
"loss": 0.2018,
"step": 508
},
{
"epoch": 0.3576950105411103,
"grad_norm": 0.5735270387984924,
"learning_rate": 9.231312192358504e-06,
"loss": 0.2125,
"step": 509
},
{
"epoch": 0.3583977512297962,
"grad_norm": 0.5552995583028044,
"learning_rate": 9.228369111557663e-06,
"loss": 0.2142,
"step": 510
},
{
"epoch": 0.3591004919184821,
"grad_norm": 0.5271596438833579,
"learning_rate": 9.22542087843714e-06,
"loss": 0.2017,
"step": 511
},
{
"epoch": 0.35980323260716796,
"grad_norm": 0.5371328438167742,
"learning_rate": 9.222467496589398e-06,
"loss": 0.1976,
"step": 512
},
{
"epoch": 0.3605059732958538,
"grad_norm": 0.5006277197363624,
"learning_rate": 9.219508969613164e-06,
"loss": 0.1799,
"step": 513
},
{
"epoch": 0.3612087139845397,
"grad_norm": 0.5349029583291667,
"learning_rate": 9.21654530111344e-06,
"loss": 0.1962,
"step": 514
},
{
"epoch": 0.36191145467322555,
"grad_norm": 0.524522944941706,
"learning_rate": 9.213576494701496e-06,
"loss": 0.1676,
"step": 515
},
{
"epoch": 0.3626141953619115,
"grad_norm": 0.5329908252072586,
"learning_rate": 9.210602553994854e-06,
"loss": 0.2256,
"step": 516
},
{
"epoch": 0.36331693605059734,
"grad_norm": 0.5495720248309052,
"learning_rate": 9.2076234826173e-06,
"loss": 0.1975,
"step": 517
},
{
"epoch": 0.3640196767392832,
"grad_norm": 0.5726123218143253,
"learning_rate": 9.204639284198871e-06,
"loss": 0.1904,
"step": 518
},
{
"epoch": 0.36472241742796907,
"grad_norm": 0.5345104526063583,
"learning_rate": 9.201649962375845e-06,
"loss": 0.1804,
"step": 519
},
{
"epoch": 0.36542515811665494,
"grad_norm": 0.5494850097074085,
"learning_rate": 9.19865552079075e-06,
"loss": 0.1872,
"step": 520
},
{
"epoch": 0.36612789880534086,
"grad_norm": 0.5319764484978339,
"learning_rate": 9.195655963092349e-06,
"loss": 0.2019,
"step": 521
},
{
"epoch": 0.3668306394940267,
"grad_norm": 0.47753457015015016,
"learning_rate": 9.192651292935642e-06,
"loss": 0.1707,
"step": 522
},
{
"epoch": 0.3675333801827126,
"grad_norm": 0.5575533755113633,
"learning_rate": 9.189641513981854e-06,
"loss": 0.2247,
"step": 523
},
{
"epoch": 0.36823612087139845,
"grad_norm": 0.5268585908928828,
"learning_rate": 9.186626629898439e-06,
"loss": 0.1929,
"step": 524
},
{
"epoch": 0.3689388615600843,
"grad_norm": 0.5187645216202305,
"learning_rate": 9.183606644359069e-06,
"loss": 0.2046,
"step": 525
},
{
"epoch": 0.3696416022487702,
"grad_norm": 0.5180777894741176,
"learning_rate": 9.180581561043633e-06,
"loss": 0.1788,
"step": 526
},
{
"epoch": 0.3703443429374561,
"grad_norm": 0.5077472213445343,
"learning_rate": 9.177551383638235e-06,
"loss": 0.1884,
"step": 527
},
{
"epoch": 0.37104708362614197,
"grad_norm": 0.49435628317139846,
"learning_rate": 9.174516115835181e-06,
"loss": 0.1713,
"step": 528
},
{
"epoch": 0.37174982431482784,
"grad_norm": 0.5360306241142581,
"learning_rate": 9.171475761332985e-06,
"loss": 0.1977,
"step": 529
},
{
"epoch": 0.3724525650035137,
"grad_norm": 0.5161474322247328,
"learning_rate": 9.168430323836351e-06,
"loss": 0.1885,
"step": 530
},
{
"epoch": 0.37315530569219957,
"grad_norm": 0.5076498278434765,
"learning_rate": 9.165379807056187e-06,
"loss": 0.172,
"step": 531
},
{
"epoch": 0.37385804638088543,
"grad_norm": 0.5281747548967208,
"learning_rate": 9.162324214709582e-06,
"loss": 0.211,
"step": 532
},
{
"epoch": 0.37456078706957135,
"grad_norm": 0.543230684176949,
"learning_rate": 9.159263550519814e-06,
"loss": 0.1921,
"step": 533
},
{
"epoch": 0.3752635277582572,
"grad_norm": 0.5084812933944886,
"learning_rate": 9.15619781821634e-06,
"loss": 0.1841,
"step": 534
},
{
"epoch": 0.3759662684469431,
"grad_norm": 0.5291350492742669,
"learning_rate": 9.153127021534792e-06,
"loss": 0.2138,
"step": 535
},
{
"epoch": 0.37666900913562895,
"grad_norm": 0.4811297400412177,
"learning_rate": 9.150051164216976e-06,
"loss": 0.184,
"step": 536
},
{
"epoch": 0.3773717498243148,
"grad_norm": 0.5151432859399669,
"learning_rate": 9.146970250010857e-06,
"loss": 0.1844,
"step": 537
},
{
"epoch": 0.3780744905130007,
"grad_norm": 0.5399791772673718,
"learning_rate": 9.143884282670572e-06,
"loss": 0.2047,
"step": 538
},
{
"epoch": 0.3787772312016866,
"grad_norm": 0.5142260800657374,
"learning_rate": 9.140793265956405e-06,
"loss": 0.1737,
"step": 539
},
{
"epoch": 0.37947997189037247,
"grad_norm": 0.516094797857363,
"learning_rate": 9.1376972036348e-06,
"loss": 0.2008,
"step": 540
},
{
"epoch": 0.38018271257905834,
"grad_norm": 0.5039266568929068,
"learning_rate": 9.13459609947835e-06,
"loss": 0.1791,
"step": 541
},
{
"epoch": 0.3808854532677442,
"grad_norm": 0.5331270278668164,
"learning_rate": 9.131489957265785e-06,
"loss": 0.1796,
"step": 542
},
{
"epoch": 0.38158819395643007,
"grad_norm": 0.530022538198907,
"learning_rate": 9.12837878078198e-06,
"loss": 0.2087,
"step": 543
},
{
"epoch": 0.38229093464511593,
"grad_norm": 0.49190936568072163,
"learning_rate": 9.125262573817937e-06,
"loss": 0.1788,
"step": 544
},
{
"epoch": 0.38299367533380185,
"grad_norm": 0.49930721918609494,
"learning_rate": 9.122141340170797e-06,
"loss": 0.191,
"step": 545
},
{
"epoch": 0.3836964160224877,
"grad_norm": 0.5201647469830472,
"learning_rate": 9.119015083643819e-06,
"loss": 0.1956,
"step": 546
},
{
"epoch": 0.3843991567111736,
"grad_norm": 0.5114303438446094,
"learning_rate": 9.115883808046388e-06,
"loss": 0.1813,
"step": 547
},
{
"epoch": 0.38510189739985945,
"grad_norm": 0.5120282022833964,
"learning_rate": 9.112747517193998e-06,
"loss": 0.1968,
"step": 548
},
{
"epoch": 0.3858046380885453,
"grad_norm": 0.5230390160471704,
"learning_rate": 9.10960621490826e-06,
"loss": 0.2197,
"step": 549
},
{
"epoch": 0.3865073787772312,
"grad_norm": 0.5079726838327981,
"learning_rate": 9.106459905016889e-06,
"loss": 0.1902,
"step": 550
},
{
"epoch": 0.3872101194659171,
"grad_norm": 0.5035005825294026,
"learning_rate": 9.103308591353704e-06,
"loss": 0.1952,
"step": 551
},
{
"epoch": 0.38791286015460297,
"grad_norm": 0.516238460175823,
"learning_rate": 9.100152277758616e-06,
"loss": 0.1855,
"step": 552
},
{
"epoch": 0.38861560084328883,
"grad_norm": 0.5267325979480687,
"learning_rate": 9.096990968077632e-06,
"loss": 0.2013,
"step": 553
},
{
"epoch": 0.3893183415319747,
"grad_norm": 0.5087934813758571,
"learning_rate": 9.093824666162851e-06,
"loss": 0.1747,
"step": 554
},
{
"epoch": 0.39002108222066056,
"grad_norm": 0.5016214126441332,
"learning_rate": 9.090653375872446e-06,
"loss": 0.1749,
"step": 555
},
{
"epoch": 0.39072382290934643,
"grad_norm": 0.5568674230647427,
"learning_rate": 9.087477101070676e-06,
"loss": 0.1934,
"step": 556
},
{
"epoch": 0.39142656359803235,
"grad_norm": 0.5258018583878791,
"learning_rate": 9.08429584562787e-06,
"loss": 0.2019,
"step": 557
},
{
"epoch": 0.3921293042867182,
"grad_norm": 0.4839118458012187,
"learning_rate": 9.081109613420428e-06,
"loss": 0.1882,
"step": 558
},
{
"epoch": 0.3928320449754041,
"grad_norm": 0.537561444315623,
"learning_rate": 9.07791840833081e-06,
"loss": 0.182,
"step": 559
},
{
"epoch": 0.39353478566408995,
"grad_norm": 0.4880111814877124,
"learning_rate": 9.07472223424754e-06,
"loss": 0.1779,
"step": 560
},
{
"epoch": 0.3942375263527758,
"grad_norm": 0.47498275665443646,
"learning_rate": 9.071521095065198e-06,
"loss": 0.1738,
"step": 561
},
{
"epoch": 0.3949402670414617,
"grad_norm": 0.5312325520155257,
"learning_rate": 9.068314994684408e-06,
"loss": 0.2117,
"step": 562
},
{
"epoch": 0.3956430077301476,
"grad_norm": 0.5301298438603556,
"learning_rate": 9.065103937011845e-06,
"loss": 0.2019,
"step": 563
},
{
"epoch": 0.39634574841883347,
"grad_norm": 0.5128028520964962,
"learning_rate": 9.061887925960219e-06,
"loss": 0.1727,
"step": 564
},
{
"epoch": 0.39704848910751933,
"grad_norm": 0.5366622811720921,
"learning_rate": 9.058666965448284e-06,
"loss": 0.1896,
"step": 565
},
{
"epoch": 0.3977512297962052,
"grad_norm": 0.5117621492230902,
"learning_rate": 9.055441059400817e-06,
"loss": 0.1978,
"step": 566
},
{
"epoch": 0.39845397048489106,
"grad_norm": 0.4915840023844641,
"learning_rate": 9.05221021174862e-06,
"loss": 0.1712,
"step": 567
},
{
"epoch": 0.39915671117357693,
"grad_norm": 0.5108306348480334,
"learning_rate": 9.048974426428527e-06,
"loss": 0.1786,
"step": 568
},
{
"epoch": 0.39985945186226285,
"grad_norm": 0.6042044973210363,
"learning_rate": 9.04573370738338e-06,
"loss": 0.2518,
"step": 569
},
{
"epoch": 0.4005621925509487,
"grad_norm": 0.4896303170997346,
"learning_rate": 9.042488058562036e-06,
"loss": 0.1755,
"step": 570
},
{
"epoch": 0.4012649332396346,
"grad_norm": 0.5178900338599037,
"learning_rate": 9.039237483919355e-06,
"loss": 0.1779,
"step": 571
},
{
"epoch": 0.40196767392832045,
"grad_norm": 0.5035747551640741,
"learning_rate": 9.035981987416204e-06,
"loss": 0.1826,
"step": 572
},
{
"epoch": 0.4026704146170063,
"grad_norm": 0.4841001561572722,
"learning_rate": 9.032721573019445e-06,
"loss": 0.1761,
"step": 573
},
{
"epoch": 0.4033731553056922,
"grad_norm": 0.4894780615391314,
"learning_rate": 9.029456244701933e-06,
"loss": 0.1946,
"step": 574
},
{
"epoch": 0.4040758959943781,
"grad_norm": 0.4931922831677282,
"learning_rate": 9.026186006442512e-06,
"loss": 0.1709,
"step": 575
},
{
"epoch": 0.40477863668306396,
"grad_norm": 0.5294896422319311,
"learning_rate": 9.022910862226005e-06,
"loss": 0.2096,
"step": 576
},
{
"epoch": 0.40548137737174983,
"grad_norm": 0.52772562716086,
"learning_rate": 9.019630816043218e-06,
"loss": 0.1929,
"step": 577
},
{
"epoch": 0.4061841180604357,
"grad_norm": 0.5262259523881948,
"learning_rate": 9.016345871890927e-06,
"loss": 0.1909,
"step": 578
},
{
"epoch": 0.40688685874912156,
"grad_norm": 0.4958464311424527,
"learning_rate": 9.013056033771874e-06,
"loss": 0.1829,
"step": 579
},
{
"epoch": 0.4075895994378074,
"grad_norm": 0.49237353080900215,
"learning_rate": 9.009761305694771e-06,
"loss": 0.1674,
"step": 580
},
{
"epoch": 0.40829234012649335,
"grad_norm": 0.5543604620220995,
"learning_rate": 9.006461691674282e-06,
"loss": 0.2006,
"step": 581
},
{
"epoch": 0.4089950808151792,
"grad_norm": 0.5570444226404165,
"learning_rate": 9.003157195731028e-06,
"loss": 0.2088,
"step": 582
},
{
"epoch": 0.4096978215038651,
"grad_norm": 0.5289121008170838,
"learning_rate": 8.999847821891578e-06,
"loss": 0.1617,
"step": 583
},
{
"epoch": 0.41040056219255094,
"grad_norm": 0.5016680673979437,
"learning_rate": 8.996533574188446e-06,
"loss": 0.164,
"step": 584
},
{
"epoch": 0.4111033028812368,
"grad_norm": 0.5546393120307688,
"learning_rate": 8.99321445666008e-06,
"loss": 0.2161,
"step": 585
},
{
"epoch": 0.4118060435699227,
"grad_norm": 0.5212929936467661,
"learning_rate": 8.989890473350869e-06,
"loss": 0.1761,
"step": 586
},
{
"epoch": 0.4125087842586086,
"grad_norm": 0.547983163784588,
"learning_rate": 8.986561628311125e-06,
"loss": 0.2059,
"step": 587
},
{
"epoch": 0.41321152494729446,
"grad_norm": 0.5060325503395477,
"learning_rate": 8.983227925597089e-06,
"loss": 0.1593,
"step": 588
},
{
"epoch": 0.4139142656359803,
"grad_norm": 0.5136669444711566,
"learning_rate": 8.979889369270918e-06,
"loss": 0.1866,
"step": 589
},
{
"epoch": 0.4146170063246662,
"grad_norm": 0.4796282381622905,
"learning_rate": 8.97654596340068e-06,
"loss": 0.1658,
"step": 590
},
{
"epoch": 0.41531974701335206,
"grad_norm": 0.4949764849962763,
"learning_rate": 8.973197712060362e-06,
"loss": 0.1444,
"step": 591
},
{
"epoch": 0.4160224877020379,
"grad_norm": 0.5618864872781972,
"learning_rate": 8.969844619329846e-06,
"loss": 0.2107,
"step": 592
},
{
"epoch": 0.41672522839072385,
"grad_norm": 0.4998169797231573,
"learning_rate": 8.966486689294917e-06,
"loss": 0.1839,
"step": 593
},
{
"epoch": 0.4174279690794097,
"grad_norm": 0.5208193294012201,
"learning_rate": 8.963123926047256e-06,
"loss": 0.1898,
"step": 594
},
{
"epoch": 0.4181307097680956,
"grad_norm": 0.48987306167909656,
"learning_rate": 8.959756333684428e-06,
"loss": 0.1804,
"step": 595
},
{
"epoch": 0.41883345045678144,
"grad_norm": 0.5429426019102795,
"learning_rate": 8.956383916309888e-06,
"loss": 0.2057,
"step": 596
},
{
"epoch": 0.4195361911454673,
"grad_norm": 0.5093881979661048,
"learning_rate": 8.953006678032964e-06,
"loss": 0.1877,
"step": 597
},
{
"epoch": 0.4202389318341532,
"grad_norm": 0.5064829784458187,
"learning_rate": 8.94962462296887e-06,
"loss": 0.2043,
"step": 598
},
{
"epoch": 0.4209416725228391,
"grad_norm": 0.5358570127635844,
"learning_rate": 8.946237755238676e-06,
"loss": 0.1894,
"step": 599
},
{
"epoch": 0.42164441321152496,
"grad_norm": 0.49467508377867137,
"learning_rate": 8.942846078969323e-06,
"loss": 0.1701,
"step": 600
},
{
"epoch": 0.4223471539002108,
"grad_norm": 0.5277598519911398,
"learning_rate": 8.93944959829361e-06,
"loss": 0.2024,
"step": 601
},
{
"epoch": 0.4230498945888967,
"grad_norm": 0.5224316014682783,
"learning_rate": 8.93604831735019e-06,
"loss": 0.1857,
"step": 602
},
{
"epoch": 0.42375263527758256,
"grad_norm": 0.4899632798167767,
"learning_rate": 8.932642240283567e-06,
"loss": 0.1685,
"step": 603
},
{
"epoch": 0.4244553759662684,
"grad_norm": 0.5192534676901748,
"learning_rate": 8.929231371244087e-06,
"loss": 0.1911,
"step": 604
},
{
"epoch": 0.42515811665495434,
"grad_norm": 0.5469056276784454,
"learning_rate": 8.925815714387936e-06,
"loss": 0.2044,
"step": 605
},
{
"epoch": 0.4258608573436402,
"grad_norm": 0.512069981775501,
"learning_rate": 8.922395273877132e-06,
"loss": 0.1768,
"step": 606
},
{
"epoch": 0.4265635980323261,
"grad_norm": 0.49391731200550093,
"learning_rate": 8.918970053879527e-06,
"loss": 0.1828,
"step": 607
},
{
"epoch": 0.42726633872101194,
"grad_norm": 0.5050402167748838,
"learning_rate": 8.915540058568792e-06,
"loss": 0.1892,
"step": 608
},
{
"epoch": 0.4279690794096978,
"grad_norm": 0.5133347815366437,
"learning_rate": 8.912105292124417e-06,
"loss": 0.2006,
"step": 609
},
{
"epoch": 0.42867182009838367,
"grad_norm": 0.5376239794010119,
"learning_rate": 8.90866575873171e-06,
"loss": 0.2076,
"step": 610
},
{
"epoch": 0.4293745607870696,
"grad_norm": 0.5393211059228664,
"learning_rate": 8.905221462581784e-06,
"loss": 0.2013,
"step": 611
},
{
"epoch": 0.43007730147575546,
"grad_norm": 0.5090627949640147,
"learning_rate": 8.901772407871553e-06,
"loss": 0.192,
"step": 612
},
{
"epoch": 0.4307800421644413,
"grad_norm": 0.5333308319943344,
"learning_rate": 8.898318598803737e-06,
"loss": 0.2015,
"step": 613
},
{
"epoch": 0.4314827828531272,
"grad_norm": 0.5259173989837693,
"learning_rate": 8.894860039586841e-06,
"loss": 0.1924,
"step": 614
},
{
"epoch": 0.43218552354181305,
"grad_norm": 0.5068982404113911,
"learning_rate": 8.891396734435164e-06,
"loss": 0.1726,
"step": 615
},
{
"epoch": 0.4328882642304989,
"grad_norm": 0.5255620473631372,
"learning_rate": 8.887928687568785e-06,
"loss": 0.1939,
"step": 616
},
{
"epoch": 0.43359100491918484,
"grad_norm": 0.5158930229508965,
"learning_rate": 8.884455903213562e-06,
"loss": 0.2029,
"step": 617
},
{
"epoch": 0.4342937456078707,
"grad_norm": 0.5108134855434232,
"learning_rate": 8.880978385601127e-06,
"loss": 0.1753,
"step": 618
},
{
"epoch": 0.4349964862965566,
"grad_norm": 0.5630010933594187,
"learning_rate": 8.877496138968874e-06,
"loss": 0.213,
"step": 619
},
{
"epoch": 0.43569922698524244,
"grad_norm": 0.5243389693119963,
"learning_rate": 8.874009167559968e-06,
"loss": 0.1858,
"step": 620
},
{
"epoch": 0.4364019676739283,
"grad_norm": 0.5096070989662833,
"learning_rate": 8.870517475623322e-06,
"loss": 0.1759,
"step": 621
},
{
"epoch": 0.43710470836261417,
"grad_norm": 0.536046658809568,
"learning_rate": 8.867021067413608e-06,
"loss": 0.1882,
"step": 622
},
{
"epoch": 0.4378074490513001,
"grad_norm": 0.5206843932833073,
"learning_rate": 8.863519947191242e-06,
"loss": 0.1558,
"step": 623
},
{
"epoch": 0.43851018973998596,
"grad_norm": 0.5298564998445344,
"learning_rate": 8.86001411922238e-06,
"loss": 0.1971,
"step": 624
},
{
"epoch": 0.4392129304286718,
"grad_norm": 0.5389102910060309,
"learning_rate": 8.856503587778922e-06,
"loss": 0.1817,
"step": 625
},
{
"epoch": 0.4399156711173577,
"grad_norm": 0.5715625478369358,
"learning_rate": 8.852988357138488e-06,
"loss": 0.2288,
"step": 626
},
{
"epoch": 0.44061841180604355,
"grad_norm": 0.5153652268496476,
"learning_rate": 8.849468431584432e-06,
"loss": 0.1856,
"step": 627
},
{
"epoch": 0.4413211524947294,
"grad_norm": 0.5845746770538888,
"learning_rate": 8.845943815405827e-06,
"loss": 0.1841,
"step": 628
},
{
"epoch": 0.44202389318341534,
"grad_norm": 0.496201396481146,
"learning_rate": 8.842414512897457e-06,
"loss": 0.1778,
"step": 629
},
{
"epoch": 0.4427266338721012,
"grad_norm": 0.5104474152486059,
"learning_rate": 8.838880528359826e-06,
"loss": 0.1864,
"step": 630
},
{
"epoch": 0.44342937456078707,
"grad_norm": 0.5058086912236018,
"learning_rate": 8.835341866099136e-06,
"loss": 0.1884,
"step": 631
},
{
"epoch": 0.44413211524947294,
"grad_norm": 0.506448200952019,
"learning_rate": 8.831798530427289e-06,
"loss": 0.1828,
"step": 632
},
{
"epoch": 0.4448348559381588,
"grad_norm": 0.5245179863804086,
"learning_rate": 8.828250525661884e-06,
"loss": 0.191,
"step": 633
},
{
"epoch": 0.44553759662684467,
"grad_norm": 0.5005515175671515,
"learning_rate": 8.824697856126206e-06,
"loss": 0.1761,
"step": 634
},
{
"epoch": 0.4462403373155306,
"grad_norm": 0.521240908293417,
"learning_rate": 8.82114052614923e-06,
"loss": 0.2034,
"step": 635
},
{
"epoch": 0.44694307800421645,
"grad_norm": 0.538009865575798,
"learning_rate": 8.817578540065605e-06,
"loss": 0.1767,
"step": 636
},
{
"epoch": 0.4476458186929023,
"grad_norm": 0.5706487029627249,
"learning_rate": 8.814011902215654e-06,
"loss": 0.2107,
"step": 637
},
{
"epoch": 0.4483485593815882,
"grad_norm": 0.48577016697100317,
"learning_rate": 8.81044061694537e-06,
"loss": 0.1592,
"step": 638
},
{
"epoch": 0.44905130007027405,
"grad_norm": 0.5086273769884814,
"learning_rate": 8.806864688606409e-06,
"loss": 0.1993,
"step": 639
},
{
"epoch": 0.4497540407589599,
"grad_norm": 0.5213927236920854,
"learning_rate": 8.80328412155608e-06,
"loss": 0.1938,
"step": 640
},
{
"epoch": 0.45045678144764584,
"grad_norm": 0.4932799721037502,
"learning_rate": 8.799698920157348e-06,
"loss": 0.1541,
"step": 641
},
{
"epoch": 0.4511595221363317,
"grad_norm": 0.5212558600526757,
"learning_rate": 8.796109088778831e-06,
"loss": 0.1771,
"step": 642
},
{
"epoch": 0.45186226282501757,
"grad_norm": 0.5251417804862062,
"learning_rate": 8.792514631794778e-06,
"loss": 0.1728,
"step": 643
},
{
"epoch": 0.45256500351370343,
"grad_norm": 0.5585069520314704,
"learning_rate": 8.788915553585079e-06,
"loss": 0.2139,
"step": 644
},
{
"epoch": 0.4532677442023893,
"grad_norm": 0.5398054240125882,
"learning_rate": 8.785311858535254e-06,
"loss": 0.187,
"step": 645
},
{
"epoch": 0.45397048489107517,
"grad_norm": 0.5192000438530474,
"learning_rate": 8.781703551036451e-06,
"loss": 0.1988,
"step": 646
},
{
"epoch": 0.4546732255797611,
"grad_norm": 0.5546647321834434,
"learning_rate": 8.77809063548544e-06,
"loss": 0.1818,
"step": 647
},
{
"epoch": 0.45537596626844695,
"grad_norm": 0.5082569452775141,
"learning_rate": 8.774473116284598e-06,
"loss": 0.1952,
"step": 648
},
{
"epoch": 0.4560787069571328,
"grad_norm": 0.5240312521252924,
"learning_rate": 8.770850997841918e-06,
"loss": 0.2161,
"step": 649
},
{
"epoch": 0.4567814476458187,
"grad_norm": 0.550176453635836,
"learning_rate": 8.767224284570999e-06,
"loss": 0.2338,
"step": 650
},
{
"epoch": 0.45748418833450455,
"grad_norm": 0.5404600992817532,
"learning_rate": 8.763592980891031e-06,
"loss": 0.2245,
"step": 651
},
{
"epoch": 0.45818692902319047,
"grad_norm": 0.4826072134962918,
"learning_rate": 8.759957091226805e-06,
"loss": 0.1642,
"step": 652
},
{
"epoch": 0.45888966971187634,
"grad_norm": 0.49177110191527174,
"learning_rate": 8.756316620008697e-06,
"loss": 0.1742,
"step": 653
},
{
"epoch": 0.4595924104005622,
"grad_norm": 0.521888175445801,
"learning_rate": 8.752671571672664e-06,
"loss": 0.1955,
"step": 654
},
{
"epoch": 0.46029515108924807,
"grad_norm": 0.5316702516376998,
"learning_rate": 8.749021950660243e-06,
"loss": 0.211,
"step": 655
},
{
"epoch": 0.46099789177793393,
"grad_norm": 0.5374770965759721,
"learning_rate": 8.745367761418546e-06,
"loss": 0.2005,
"step": 656
},
{
"epoch": 0.4617006324666198,
"grad_norm": 0.5604330034770029,
"learning_rate": 8.74170900840024e-06,
"loss": 0.2033,
"step": 657
},
{
"epoch": 0.4624033731553057,
"grad_norm": 0.5418410632315168,
"learning_rate": 8.738045696063566e-06,
"loss": 0.2006,
"step": 658
},
{
"epoch": 0.4631061138439916,
"grad_norm": 0.5130755032814118,
"learning_rate": 8.734377828872315e-06,
"loss": 0.1835,
"step": 659
},
{
"epoch": 0.46380885453267745,
"grad_norm": 0.5184391654004975,
"learning_rate": 8.730705411295826e-06,
"loss": 0.1857,
"step": 660
},
{
"epoch": 0.4645115952213633,
"grad_norm": 0.5038674683244418,
"learning_rate": 8.727028447808983e-06,
"loss": 0.1703,
"step": 661
},
{
"epoch": 0.4652143359100492,
"grad_norm": 0.47564418179318896,
"learning_rate": 8.723346942892217e-06,
"loss": 0.1723,
"step": 662
},
{
"epoch": 0.46591707659873505,
"grad_norm": 0.5315309419432281,
"learning_rate": 8.719660901031482e-06,
"loss": 0.2046,
"step": 663
},
{
"epoch": 0.46661981728742097,
"grad_norm": 0.4811380443536227,
"learning_rate": 8.715970326718269e-06,
"loss": 0.1709,
"step": 664
},
{
"epoch": 0.46732255797610683,
"grad_norm": 0.49909000040476986,
"learning_rate": 8.712275224449583e-06,
"loss": 0.1749,
"step": 665
},
{
"epoch": 0.4680252986647927,
"grad_norm": 0.5132811766370297,
"learning_rate": 8.708575598727958e-06,
"loss": 0.195,
"step": 666
},
{
"epoch": 0.46872803935347856,
"grad_norm": 0.5302404539766977,
"learning_rate": 8.704871454061428e-06,
"loss": 0.187,
"step": 667
},
{
"epoch": 0.46943078004216443,
"grad_norm": 0.4928463401919786,
"learning_rate": 8.70116279496354e-06,
"loss": 0.1605,
"step": 668
},
{
"epoch": 0.4701335207308503,
"grad_norm": 0.4947320640053909,
"learning_rate": 8.697449625953343e-06,
"loss": 0.1587,
"step": 669
},
{
"epoch": 0.4708362614195362,
"grad_norm": 0.5819740823592446,
"learning_rate": 8.693731951555376e-06,
"loss": 0.2296,
"step": 670
},
{
"epoch": 0.4715390021082221,
"grad_norm": 0.4945779206239264,
"learning_rate": 8.690009776299673e-06,
"loss": 0.149,
"step": 671
},
{
"epoch": 0.47224174279690795,
"grad_norm": 0.5421868791658425,
"learning_rate": 8.686283104721748e-06,
"loss": 0.1717,
"step": 672
},
{
"epoch": 0.4729444834855938,
"grad_norm": 0.5700508341647283,
"learning_rate": 8.6825519413626e-06,
"loss": 0.2141,
"step": 673
},
{
"epoch": 0.4736472241742797,
"grad_norm": 0.5156963620890469,
"learning_rate": 8.678816290768695e-06,
"loss": 0.1989,
"step": 674
},
{
"epoch": 0.47434996486296555,
"grad_norm": 0.5122207370299912,
"learning_rate": 8.675076157491969e-06,
"loss": 0.1873,
"step": 675
},
{
"epoch": 0.47505270555165147,
"grad_norm": 0.5337943374422462,
"learning_rate": 8.671331546089818e-06,
"loss": 0.2019,
"step": 676
},
{
"epoch": 0.47575544624033733,
"grad_norm": 0.5251324360814932,
"learning_rate": 8.667582461125101e-06,
"loss": 0.2005,
"step": 677
},
{
"epoch": 0.4764581869290232,
"grad_norm": 0.5065576843514569,
"learning_rate": 8.663828907166123e-06,
"loss": 0.1747,
"step": 678
},
{
"epoch": 0.47716092761770906,
"grad_norm": 0.5253098567795085,
"learning_rate": 8.660070888786633e-06,
"loss": 0.1803,
"step": 679
},
{
"epoch": 0.47786366830639493,
"grad_norm": 0.5392312860851205,
"learning_rate": 8.656308410565828e-06,
"loss": 0.2155,
"step": 680
},
{
"epoch": 0.4785664089950808,
"grad_norm": 0.49303438802471045,
"learning_rate": 8.652541477088327e-06,
"loss": 0.1881,
"step": 681
},
{
"epoch": 0.4792691496837667,
"grad_norm": 0.5425096543739556,
"learning_rate": 8.64877009294419e-06,
"loss": 0.2057,
"step": 682
},
{
"epoch": 0.4799718903724526,
"grad_norm": 0.518810486720963,
"learning_rate": 8.644994262728895e-06,
"loss": 0.1725,
"step": 683
},
{
"epoch": 0.48067463106113845,
"grad_norm": 0.5336909571595239,
"learning_rate": 8.64121399104333e-06,
"loss": 0.2098,
"step": 684
},
{
"epoch": 0.4813773717498243,
"grad_norm": 0.5508261128077607,
"learning_rate": 8.637429282493813e-06,
"loss": 0.2113,
"step": 685
},
{
"epoch": 0.4820801124385102,
"grad_norm": 0.5313395208357798,
"learning_rate": 8.633640141692052e-06,
"loss": 0.2102,
"step": 686
},
{
"epoch": 0.48278285312719604,
"grad_norm": 0.5049708987416442,
"learning_rate": 8.629846573255162e-06,
"loss": 0.1892,
"step": 687
},
{
"epoch": 0.48348559381588196,
"grad_norm": 0.6129720209725857,
"learning_rate": 8.626048581805652e-06,
"loss": 0.2177,
"step": 688
},
{
"epoch": 0.48418833450456783,
"grad_norm": 0.5238844175884503,
"learning_rate": 8.622246171971425e-06,
"loss": 0.1909,
"step": 689
},
{
"epoch": 0.4848910751932537,
"grad_norm": 0.5424145483910142,
"learning_rate": 8.61843934838576e-06,
"loss": 0.2277,
"step": 690
},
{
"epoch": 0.48559381588193956,
"grad_norm": 0.5293822135419718,
"learning_rate": 8.614628115687318e-06,
"loss": 0.2099,
"step": 691
},
{
"epoch": 0.4862965565706254,
"grad_norm": 0.49282770222211403,
"learning_rate": 8.610812478520137e-06,
"loss": 0.1789,
"step": 692
},
{
"epoch": 0.4869992972593113,
"grad_norm": 0.5057276784193864,
"learning_rate": 8.606992441533615e-06,
"loss": 0.1973,
"step": 693
},
{
"epoch": 0.4877020379479972,
"grad_norm": 0.5595811057572404,
"learning_rate": 8.603168009382513e-06,
"loss": 0.2241,
"step": 694
},
{
"epoch": 0.4884047786366831,
"grad_norm": 0.5048847106159845,
"learning_rate": 8.59933918672695e-06,
"loss": 0.1768,
"step": 695
},
{
"epoch": 0.48910751932536894,
"grad_norm": 0.5000796298258966,
"learning_rate": 8.595505978232394e-06,
"loss": 0.1862,
"step": 696
},
{
"epoch": 0.4898102600140548,
"grad_norm": 0.5264485387449231,
"learning_rate": 8.591668388569656e-06,
"loss": 0.2124,
"step": 697
},
{
"epoch": 0.4905130007027407,
"grad_norm": 0.5411644040686916,
"learning_rate": 8.587826422414886e-06,
"loss": 0.2023,
"step": 698
},
{
"epoch": 0.49121574139142654,
"grad_norm": 0.5183969203449558,
"learning_rate": 8.583980084449566e-06,
"loss": 0.199,
"step": 699
},
{
"epoch": 0.49191848208011246,
"grad_norm": 0.46967921241106914,
"learning_rate": 8.580129379360508e-06,
"loss": 0.1661,
"step": 700
},
{
"epoch": 0.49262122276879833,
"grad_norm": 0.516712650806593,
"learning_rate": 8.576274311839843e-06,
"loss": 0.1889,
"step": 701
},
{
"epoch": 0.4933239634574842,
"grad_norm": 0.5133872542750583,
"learning_rate": 8.572414886585015e-06,
"loss": 0.1956,
"step": 702
},
{
"epoch": 0.49402670414617006,
"grad_norm": 0.5238151140101951,
"learning_rate": 8.568551108298785e-06,
"loss": 0.1885,
"step": 703
},
{
"epoch": 0.4947294448348559,
"grad_norm": 0.5059425082115941,
"learning_rate": 8.564682981689214e-06,
"loss": 0.1795,
"step": 704
},
{
"epoch": 0.4954321855235418,
"grad_norm": 0.5334482963601956,
"learning_rate": 8.56081051146966e-06,
"loss": 0.1932,
"step": 705
},
{
"epoch": 0.4961349262122277,
"grad_norm": 0.5315230969579532,
"learning_rate": 8.556933702358774e-06,
"loss": 0.1955,
"step": 706
},
{
"epoch": 0.4968376669009136,
"grad_norm": 0.5109056548326143,
"learning_rate": 8.553052559080498e-06,
"loss": 0.1929,
"step": 707
},
{
"epoch": 0.49754040758959944,
"grad_norm": 0.4748541849992265,
"learning_rate": 8.549167086364056e-06,
"loss": 0.1592,
"step": 708
},
{
"epoch": 0.4982431482782853,
"grad_norm": 0.5009205672759742,
"learning_rate": 8.545277288943938e-06,
"loss": 0.1932,
"step": 709
},
{
"epoch": 0.4989458889669712,
"grad_norm": 0.532777145103931,
"learning_rate": 8.541383171559911e-06,
"loss": 0.2104,
"step": 710
},
{
"epoch": 0.49964862965565704,
"grad_norm": 0.4735190849852184,
"learning_rate": 8.537484738957009e-06,
"loss": 0.164,
"step": 711
},
{
"epoch": 0.500351370344343,
"grad_norm": 0.476645516988027,
"learning_rate": 8.533581995885515e-06,
"loss": 0.1831,
"step": 712
},
{
"epoch": 0.5010541110330288,
"grad_norm": 0.49952632234721994,
"learning_rate": 8.529674947100974e-06,
"loss": 0.191,
"step": 713
},
{
"epoch": 0.5017568517217147,
"grad_norm": 0.5326196925860098,
"learning_rate": 8.525763597364171e-06,
"loss": 0.2019,
"step": 714
},
{
"epoch": 0.5024595924104006,
"grad_norm": 0.501923796884598,
"learning_rate": 8.52184795144113e-06,
"loss": 0.1771,
"step": 715
},
{
"epoch": 0.5031623330990864,
"grad_norm": 0.541991243282928,
"learning_rate": 8.51792801410312e-06,
"loss": 0.2227,
"step": 716
},
{
"epoch": 0.5038650737877723,
"grad_norm": 0.4836988086090048,
"learning_rate": 8.514003790126628e-06,
"loss": 0.1654,
"step": 717
},
{
"epoch": 0.5045678144764582,
"grad_norm": 0.5307517555915229,
"learning_rate": 8.510075284293371e-06,
"loss": 0.2015,
"step": 718
},
{
"epoch": 0.5052705551651441,
"grad_norm": 0.5050575772007962,
"learning_rate": 8.506142501390284e-06,
"loss": 0.1885,
"step": 719
},
{
"epoch": 0.5059732958538299,
"grad_norm": 0.5239923393931152,
"learning_rate": 8.502205446209506e-06,
"loss": 0.1888,
"step": 720
},
{
"epoch": 0.5066760365425158,
"grad_norm": 0.5351515639161576,
"learning_rate": 8.49826412354839e-06,
"loss": 0.1886,
"step": 721
},
{
"epoch": 0.5073787772312017,
"grad_norm": 0.5299030400055579,
"learning_rate": 8.494318538209485e-06,
"loss": 0.2103,
"step": 722
},
{
"epoch": 0.5080815179198875,
"grad_norm": 0.49715068879445723,
"learning_rate": 8.490368695000537e-06,
"loss": 0.1676,
"step": 723
},
{
"epoch": 0.5087842586085735,
"grad_norm": 0.5509751380513195,
"learning_rate": 8.486414598734479e-06,
"loss": 0.2308,
"step": 724
},
{
"epoch": 0.5094869992972593,
"grad_norm": 0.5227387590991529,
"learning_rate": 8.482456254229421e-06,
"loss": 0.2039,
"step": 725
},
{
"epoch": 0.5101897399859452,
"grad_norm": 0.5193247462295407,
"learning_rate": 8.47849366630866e-06,
"loss": 0.1968,
"step": 726
},
{
"epoch": 0.5108924806746311,
"grad_norm": 0.5351993261677674,
"learning_rate": 8.474526839800654e-06,
"loss": 0.2064,
"step": 727
},
{
"epoch": 0.5115952213633169,
"grad_norm": 0.5113165182003795,
"learning_rate": 8.470555779539034e-06,
"loss": 0.1812,
"step": 728
},
{
"epoch": 0.5122979620520028,
"grad_norm": 0.5183643401615331,
"learning_rate": 8.46658049036258e-06,
"loss": 0.1847,
"step": 729
},
{
"epoch": 0.5130007027406887,
"grad_norm": 0.49914598016030814,
"learning_rate": 8.462600977115237e-06,
"loss": 0.1842,
"step": 730
},
{
"epoch": 0.5137034434293746,
"grad_norm": 0.49713477165140296,
"learning_rate": 8.458617244646085e-06,
"loss": 0.1639,
"step": 731
},
{
"epoch": 0.5144061841180604,
"grad_norm": 0.5162993659845747,
"learning_rate": 8.454629297809355e-06,
"loss": 0.1924,
"step": 732
},
{
"epoch": 0.5151089248067463,
"grad_norm": 0.48434349466475823,
"learning_rate": 8.450637141464407e-06,
"loss": 0.1778,
"step": 733
},
{
"epoch": 0.5158116654954322,
"grad_norm": 0.49000304311803644,
"learning_rate": 8.446640780475735e-06,
"loss": 0.1874,
"step": 734
},
{
"epoch": 0.516514406184118,
"grad_norm": 0.5101984669847915,
"learning_rate": 8.442640219712949e-06,
"loss": 0.1838,
"step": 735
},
{
"epoch": 0.517217146872804,
"grad_norm": 0.5493059286665023,
"learning_rate": 8.438635464050786e-06,
"loss": 0.1883,
"step": 736
},
{
"epoch": 0.5179198875614898,
"grad_norm": 0.5358418918029862,
"learning_rate": 8.43462651836909e-06,
"loss": 0.1951,
"step": 737
},
{
"epoch": 0.5186226282501757,
"grad_norm": 0.5328193680745292,
"learning_rate": 8.430613387552809e-06,
"loss": 0.2101,
"step": 738
},
{
"epoch": 0.5193253689388616,
"grad_norm": 0.5101375728411082,
"learning_rate": 8.42659607649199e-06,
"loss": 0.1697,
"step": 739
},
{
"epoch": 0.5200281096275474,
"grad_norm": 0.5068924928786236,
"learning_rate": 8.42257459008178e-06,
"loss": 0.1912,
"step": 740
},
{
"epoch": 0.5207308503162333,
"grad_norm": 0.5395748819849159,
"learning_rate": 8.418548933222406e-06,
"loss": 0.2013,
"step": 741
},
{
"epoch": 0.5214335910049192,
"grad_norm": 0.5302590521377698,
"learning_rate": 8.414519110819183e-06,
"loss": 0.1974,
"step": 742
},
{
"epoch": 0.5221363316936051,
"grad_norm": 0.5177183035638452,
"learning_rate": 8.410485127782498e-06,
"loss": 0.1865,
"step": 743
},
{
"epoch": 0.5228390723822909,
"grad_norm": 0.5123014715477399,
"learning_rate": 8.40644698902781e-06,
"loss": 0.1929,
"step": 744
},
{
"epoch": 0.5235418130709768,
"grad_norm": 0.5518810669431153,
"learning_rate": 8.402404699475637e-06,
"loss": 0.2242,
"step": 745
},
{
"epoch": 0.5242445537596627,
"grad_norm": 0.5259717237421058,
"learning_rate": 8.398358264051563e-06,
"loss": 0.1978,
"step": 746
},
{
"epoch": 0.5249472944483485,
"grad_norm": 0.4986579106356241,
"learning_rate": 8.394307687686219e-06,
"loss": 0.1808,
"step": 747
},
{
"epoch": 0.5256500351370345,
"grad_norm": 0.4979969646630182,
"learning_rate": 8.390252975315276e-06,
"loss": 0.1979,
"step": 748
},
{
"epoch": 0.5263527758257203,
"grad_norm": 0.49601425793058973,
"learning_rate": 8.386194131879458e-06,
"loss": 0.1782,
"step": 749
},
{
"epoch": 0.5270555165144062,
"grad_norm": 0.5073407231128871,
"learning_rate": 8.382131162324512e-06,
"loss": 0.1775,
"step": 750
},
{
"epoch": 0.5277582572030921,
"grad_norm": 0.501600479759895,
"learning_rate": 8.378064071601218e-06,
"loss": 0.2013,
"step": 751
},
{
"epoch": 0.5284609978917779,
"grad_norm": 0.5294937893906491,
"learning_rate": 8.373992864665374e-06,
"loss": 0.1947,
"step": 752
},
{
"epoch": 0.5291637385804638,
"grad_norm": 0.538107208892889,
"learning_rate": 8.369917546477794e-06,
"loss": 0.2031,
"step": 753
},
{
"epoch": 0.5298664792691496,
"grad_norm": 0.5419919263290914,
"learning_rate": 8.365838122004311e-06,
"loss": 0.2157,
"step": 754
},
{
"epoch": 0.5305692199578356,
"grad_norm": 0.49421087524687923,
"learning_rate": 8.361754596215745e-06,
"loss": 0.1822,
"step": 755
},
{
"epoch": 0.5312719606465214,
"grad_norm": 0.5130499099727955,
"learning_rate": 8.357666974087928e-06,
"loss": 0.1858,
"step": 756
},
{
"epoch": 0.5319747013352073,
"grad_norm": 0.546549568304958,
"learning_rate": 8.353575260601674e-06,
"loss": 0.2066,
"step": 757
},
{
"epoch": 0.5326774420238932,
"grad_norm": 0.5474314359553429,
"learning_rate": 8.349479460742788e-06,
"loss": 0.1834,
"step": 758
},
{
"epoch": 0.533380182712579,
"grad_norm": 0.5432435200665976,
"learning_rate": 8.345379579502054e-06,
"loss": 0.1978,
"step": 759
},
{
"epoch": 0.534082923401265,
"grad_norm": 0.5272022508499994,
"learning_rate": 8.341275621875224e-06,
"loss": 0.1996,
"step": 760
},
{
"epoch": 0.5347856640899508,
"grad_norm": 0.5336040416372276,
"learning_rate": 8.337167592863026e-06,
"loss": 0.1906,
"step": 761
},
{
"epoch": 0.5354884047786367,
"grad_norm": 0.4651896643747724,
"learning_rate": 8.333055497471137e-06,
"loss": 0.1431,
"step": 762
},
{
"epoch": 0.5361911454673226,
"grad_norm": 0.5509722540845838,
"learning_rate": 8.3289393407102e-06,
"loss": 0.2132,
"step": 763
},
{
"epoch": 0.5368938861560084,
"grad_norm": 0.5233084034813252,
"learning_rate": 8.324819127595802e-06,
"loss": 0.1846,
"step": 764
},
{
"epoch": 0.5375966268446943,
"grad_norm": 0.5194855578839023,
"learning_rate": 8.320694863148473e-06,
"loss": 0.2019,
"step": 765
},
{
"epoch": 0.5382993675333801,
"grad_norm": 0.4748744123295701,
"learning_rate": 8.31656655239368e-06,
"loss": 0.1567,
"step": 766
},
{
"epoch": 0.5390021082220661,
"grad_norm": 0.5536236422394342,
"learning_rate": 8.31243420036182e-06,
"loss": 0.2169,
"step": 767
},
{
"epoch": 0.5397048489107519,
"grad_norm": 0.47265985270876465,
"learning_rate": 8.308297812088215e-06,
"loss": 0.1773,
"step": 768
},
{
"epoch": 0.5404075895994378,
"grad_norm": 0.5133572713659621,
"learning_rate": 8.304157392613103e-06,
"loss": 0.1793,
"step": 769
},
{
"epoch": 0.5411103302881237,
"grad_norm": 0.48828244081957234,
"learning_rate": 8.30001294698164e-06,
"loss": 0.164,
"step": 770
},
{
"epoch": 0.5418130709768095,
"grad_norm": 0.526163241791338,
"learning_rate": 8.295864480243882e-06,
"loss": 0.1958,
"step": 771
},
{
"epoch": 0.5425158116654955,
"grad_norm": 0.5397637143368118,
"learning_rate": 8.291711997454786e-06,
"loss": 0.202,
"step": 772
},
{
"epoch": 0.5432185523541813,
"grad_norm": 0.5256648878700448,
"learning_rate": 8.287555503674204e-06,
"loss": 0.1945,
"step": 773
},
{
"epoch": 0.5439212930428672,
"grad_norm": 0.48367565249712235,
"learning_rate": 8.283395003966873e-06,
"loss": 0.1703,
"step": 774
},
{
"epoch": 0.5446240337315531,
"grad_norm": 0.515794431545339,
"learning_rate": 8.279230503402413e-06,
"loss": 0.1841,
"step": 775
},
{
"epoch": 0.5453267744202389,
"grad_norm": 0.5103637275060798,
"learning_rate": 8.275062007055323e-06,
"loss": 0.1818,
"step": 776
},
{
"epoch": 0.5460295151089248,
"grad_norm": 0.5271934797145162,
"learning_rate": 8.270889520004964e-06,
"loss": 0.1691,
"step": 777
},
{
"epoch": 0.5467322557976106,
"grad_norm": 0.5107768942044677,
"learning_rate": 8.266713047335563e-06,
"loss": 0.1965,
"step": 778
},
{
"epoch": 0.5474349964862966,
"grad_norm": 0.5243060256391675,
"learning_rate": 8.262532594136202e-06,
"loss": 0.1828,
"step": 779
},
{
"epoch": 0.5481377371749825,
"grad_norm": 0.5073550978308514,
"learning_rate": 8.258348165500815e-06,
"loss": 0.1858,
"step": 780
},
{
"epoch": 0.5488404778636683,
"grad_norm": 0.5000707758019513,
"learning_rate": 8.254159766528184e-06,
"loss": 0.1686,
"step": 781
},
{
"epoch": 0.5495432185523542,
"grad_norm": 0.5065682350019561,
"learning_rate": 8.249967402321919e-06,
"loss": 0.1952,
"step": 782
},
{
"epoch": 0.55024595924104,
"grad_norm": 0.5456644015029175,
"learning_rate": 8.24577107799047e-06,
"loss": 0.1992,
"step": 783
},
{
"epoch": 0.550948699929726,
"grad_norm": 0.5605460822148821,
"learning_rate": 8.241570798647107e-06,
"loss": 0.2265,
"step": 784
},
{
"epoch": 0.5516514406184118,
"grad_norm": 0.5153471040976462,
"learning_rate": 8.237366569409927e-06,
"loss": 0.1914,
"step": 785
},
{
"epoch": 0.5523541813070977,
"grad_norm": 0.536498676224286,
"learning_rate": 8.23315839540183e-06,
"loss": 0.1965,
"step": 786
},
{
"epoch": 0.5530569219957836,
"grad_norm": 0.5398731216126065,
"learning_rate": 8.22894628175053e-06,
"loss": 0.1924,
"step": 787
},
{
"epoch": 0.5537596626844694,
"grad_norm": 0.5412329632942741,
"learning_rate": 8.224730233588539e-06,
"loss": 0.2031,
"step": 788
},
{
"epoch": 0.5544624033731553,
"grad_norm": 0.49166134955419794,
"learning_rate": 8.220510256053162e-06,
"loss": 0.1701,
"step": 789
},
{
"epoch": 0.5551651440618411,
"grad_norm": 0.5411757068250052,
"learning_rate": 8.216286354286499e-06,
"loss": 0.203,
"step": 790
},
{
"epoch": 0.5558678847505271,
"grad_norm": 0.5055791983257125,
"learning_rate": 8.212058533435418e-06,
"loss": 0.1707,
"step": 791
},
{
"epoch": 0.556570625439213,
"grad_norm": 0.5005713463900835,
"learning_rate": 8.207826798651575e-06,
"loss": 0.1724,
"step": 792
},
{
"epoch": 0.5572733661278988,
"grad_norm": 0.5388542302369276,
"learning_rate": 8.20359115509139e-06,
"loss": 0.2061,
"step": 793
},
{
"epoch": 0.5579761068165847,
"grad_norm": 0.5000375222239247,
"learning_rate": 8.199351607916048e-06,
"loss": 0.1661,
"step": 794
},
{
"epoch": 0.5586788475052705,
"grad_norm": 0.5054661491915279,
"learning_rate": 8.19510816229149e-06,
"loss": 0.1752,
"step": 795
},
{
"epoch": 0.5593815881939564,
"grad_norm": 0.5260455054665505,
"learning_rate": 8.190860823388402e-06,
"loss": 0.1769,
"step": 796
},
{
"epoch": 0.5600843288826423,
"grad_norm": 0.5313271592798854,
"learning_rate": 8.186609596382222e-06,
"loss": 0.1903,
"step": 797
},
{
"epoch": 0.5607870695713282,
"grad_norm": 0.5134128367032493,
"learning_rate": 8.182354486453123e-06,
"loss": 0.1699,
"step": 798
},
{
"epoch": 0.5614898102600141,
"grad_norm": 0.5235982420004922,
"learning_rate": 8.178095498786007e-06,
"loss": 0.1778,
"step": 799
},
{
"epoch": 0.5621925509486999,
"grad_norm": 0.49504886329354375,
"learning_rate": 8.173832638570503e-06,
"loss": 0.1714,
"step": 800
},
{
"epoch": 0.5628952916373858,
"grad_norm": 0.5229810088775857,
"learning_rate": 8.169565911000958e-06,
"loss": 0.2053,
"step": 801
},
{
"epoch": 0.5635980323260716,
"grad_norm": 0.48023017540213414,
"learning_rate": 8.165295321276433e-06,
"loss": 0.1695,
"step": 802
},
{
"epoch": 0.5643007730147576,
"grad_norm": 0.4943594101833042,
"learning_rate": 8.161020874600695e-06,
"loss": 0.1727,
"step": 803
},
{
"epoch": 0.5650035137034435,
"grad_norm": 0.5384022749347142,
"learning_rate": 8.156742576182208e-06,
"loss": 0.1883,
"step": 804
},
{
"epoch": 0.5657062543921293,
"grad_norm": 0.5181464629326008,
"learning_rate": 8.152460431234132e-06,
"loss": 0.1652,
"step": 805
},
{
"epoch": 0.5664089950808152,
"grad_norm": 0.5010507122028295,
"learning_rate": 8.148174444974313e-06,
"loss": 0.1786,
"step": 806
},
{
"epoch": 0.567111735769501,
"grad_norm": 0.5337851852934541,
"learning_rate": 8.143884622625276e-06,
"loss": 0.1753,
"step": 807
},
{
"epoch": 0.567814476458187,
"grad_norm": 0.49816613967519474,
"learning_rate": 8.139590969414224e-06,
"loss": 0.1682,
"step": 808
},
{
"epoch": 0.5685172171468728,
"grad_norm": 0.5090345971257679,
"learning_rate": 8.135293490573029e-06,
"loss": 0.1756,
"step": 809
},
{
"epoch": 0.5692199578355587,
"grad_norm": 0.5327856293158361,
"learning_rate": 8.130992191338216e-06,
"loss": 0.203,
"step": 810
},
{
"epoch": 0.5699226985242446,
"grad_norm": 0.5450247599218729,
"learning_rate": 8.126687076950974e-06,
"loss": 0.2399,
"step": 811
},
{
"epoch": 0.5706254392129304,
"grad_norm": 0.49736262284537636,
"learning_rate": 8.12237815265714e-06,
"loss": 0.1689,
"step": 812
},
{
"epoch": 0.5713281799016163,
"grad_norm": 0.5025148715885837,
"learning_rate": 8.118065423707187e-06,
"loss": 0.1834,
"step": 813
},
{
"epoch": 0.5720309205903021,
"grad_norm": 0.5225499168344663,
"learning_rate": 8.113748895356229e-06,
"loss": 0.1655,
"step": 814
},
{
"epoch": 0.5727336612789881,
"grad_norm": 0.5295754034074439,
"learning_rate": 8.10942857286401e-06,
"loss": 0.2025,
"step": 815
},
{
"epoch": 0.573436401967674,
"grad_norm": 0.5064954187259164,
"learning_rate": 8.105104461494896e-06,
"loss": 0.1758,
"step": 816
},
{
"epoch": 0.5741391426563598,
"grad_norm": 0.5154575824391756,
"learning_rate": 8.10077656651787e-06,
"loss": 0.2125,
"step": 817
},
{
"epoch": 0.5748418833450457,
"grad_norm": 0.5330202712762878,
"learning_rate": 8.096444893206524e-06,
"loss": 0.2047,
"step": 818
},
{
"epoch": 0.5755446240337315,
"grad_norm": 0.5370877039064466,
"learning_rate": 8.092109446839056e-06,
"loss": 0.189,
"step": 819
},
{
"epoch": 0.5762473647224174,
"grad_norm": 0.5975682017172536,
"learning_rate": 8.08777023269826e-06,
"loss": 0.2243,
"step": 820
},
{
"epoch": 0.5769501054111033,
"grad_norm": 0.47951433169488117,
"learning_rate": 8.083427256071523e-06,
"loss": 0.1633,
"step": 821
},
{
"epoch": 0.5776528460997892,
"grad_norm": 0.5016662797816358,
"learning_rate": 8.079080522250812e-06,
"loss": 0.175,
"step": 822
},
{
"epoch": 0.5783555867884751,
"grad_norm": 0.539983951759243,
"learning_rate": 8.074730036532678e-06,
"loss": 0.2253,
"step": 823
},
{
"epoch": 0.5790583274771609,
"grad_norm": 0.5107178641124133,
"learning_rate": 8.070375804218244e-06,
"loss": 0.1882,
"step": 824
},
{
"epoch": 0.5797610681658468,
"grad_norm": 0.49893529645121637,
"learning_rate": 8.06601783061319e-06,
"loss": 0.1877,
"step": 825
},
{
"epoch": 0.5804638088545326,
"grad_norm": 0.4980261424327893,
"learning_rate": 8.061656121027766e-06,
"loss": 0.1705,
"step": 826
},
{
"epoch": 0.5811665495432186,
"grad_norm": 0.5059694073902211,
"learning_rate": 8.057290680776766e-06,
"loss": 0.1822,
"step": 827
},
{
"epoch": 0.5818692902319045,
"grad_norm": 0.4774032938434036,
"learning_rate": 8.052921515179528e-06,
"loss": 0.1671,
"step": 828
},
{
"epoch": 0.5825720309205903,
"grad_norm": 0.49938521358793453,
"learning_rate": 8.048548629559942e-06,
"loss": 0.1713,
"step": 829
},
{
"epoch": 0.5832747716092762,
"grad_norm": 0.5471684119029511,
"learning_rate": 8.044172029246418e-06,
"loss": 0.2015,
"step": 830
},
{
"epoch": 0.583977512297962,
"grad_norm": 0.5060906162510813,
"learning_rate": 8.0397917195719e-06,
"loss": 0.1737,
"step": 831
},
{
"epoch": 0.5846802529866479,
"grad_norm": 0.511760606872915,
"learning_rate": 8.035407705873843e-06,
"loss": 0.1877,
"step": 832
},
{
"epoch": 0.5853829936753338,
"grad_norm": 0.5161775988752425,
"learning_rate": 8.031019993494231e-06,
"loss": 0.1731,
"step": 833
},
{
"epoch": 0.5860857343640197,
"grad_norm": 0.5043230288865019,
"learning_rate": 8.026628587779537e-06,
"loss": 0.2033,
"step": 834
},
{
"epoch": 0.5867884750527056,
"grad_norm": 0.5546809360655157,
"learning_rate": 8.022233494080747e-06,
"loss": 0.2095,
"step": 835
},
{
"epoch": 0.5874912157413914,
"grad_norm": 0.5140267887390528,
"learning_rate": 8.017834717753337e-06,
"loss": 0.1652,
"step": 836
},
{
"epoch": 0.5881939564300773,
"grad_norm": 0.5227425873771978,
"learning_rate": 8.013432264157266e-06,
"loss": 0.2006,
"step": 837
},
{
"epoch": 0.5888966971187631,
"grad_norm": 0.5475350248824223,
"learning_rate": 8.009026138656983e-06,
"loss": 0.1958,
"step": 838
},
{
"epoch": 0.5895994378074491,
"grad_norm": 0.5060761591366437,
"learning_rate": 8.004616346621401e-06,
"loss": 0.1843,
"step": 839
},
{
"epoch": 0.590302178496135,
"grad_norm": 0.5196943847533497,
"learning_rate": 8.00020289342391e-06,
"loss": 0.2025,
"step": 840
},
{
"epoch": 0.5910049191848208,
"grad_norm": 0.5301167099931641,
"learning_rate": 7.995785784442355e-06,
"loss": 0.1844,
"step": 841
},
{
"epoch": 0.5917076598735067,
"grad_norm": 0.5148146678785309,
"learning_rate": 7.99136502505904e-06,
"loss": 0.1932,
"step": 842
},
{
"epoch": 0.5924104005621925,
"grad_norm": 0.49623445019704854,
"learning_rate": 7.98694062066071e-06,
"loss": 0.1826,
"step": 843
},
{
"epoch": 0.5931131412508784,
"grad_norm": 0.5317236357310177,
"learning_rate": 7.982512576638556e-06,
"loss": 0.2059,
"step": 844
},
{
"epoch": 0.5938158819395642,
"grad_norm": 0.5361323808096335,
"learning_rate": 7.97808089838821e-06,
"loss": 0.1941,
"step": 845
},
{
"epoch": 0.5945186226282502,
"grad_norm": 0.503533977157267,
"learning_rate": 7.973645591309722e-06,
"loss": 0.1813,
"step": 846
},
{
"epoch": 0.5952213633169361,
"grad_norm": 0.5719921027289341,
"learning_rate": 7.969206660807566e-06,
"loss": 0.21,
"step": 847
},
{
"epoch": 0.5959241040056219,
"grad_norm": 0.5068126767235045,
"learning_rate": 7.964764112290641e-06,
"loss": 0.1973,
"step": 848
},
{
"epoch": 0.5966268446943078,
"grad_norm": 0.513537054452568,
"learning_rate": 7.96031795117224e-06,
"loss": 0.1937,
"step": 849
},
{
"epoch": 0.5973295853829936,
"grad_norm": 0.4734141319226508,
"learning_rate": 7.955868182870067e-06,
"loss": 0.1616,
"step": 850
},
{
"epoch": 0.5980323260716796,
"grad_norm": 0.5320131363474777,
"learning_rate": 7.95141481280622e-06,
"loss": 0.1898,
"step": 851
},
{
"epoch": 0.5987350667603655,
"grad_norm": 0.5268959314805932,
"learning_rate": 7.946957846407182e-06,
"loss": 0.2,
"step": 852
},
{
"epoch": 0.5994378074490513,
"grad_norm": 0.5260806167494971,
"learning_rate": 7.942497289103825e-06,
"loss": 0.1945,
"step": 853
},
{
"epoch": 0.6001405481377372,
"grad_norm": 0.554805066779978,
"learning_rate": 7.938033146331392e-06,
"loss": 0.2185,
"step": 854
},
{
"epoch": 0.600843288826423,
"grad_norm": 0.48503127848140637,
"learning_rate": 7.933565423529495e-06,
"loss": 0.1851,
"step": 855
},
{
"epoch": 0.6015460295151089,
"grad_norm": 0.48894225595082286,
"learning_rate": 7.92909412614211e-06,
"loss": 0.181,
"step": 856
},
{
"epoch": 0.6022487702037947,
"grad_norm": 0.5521975195910774,
"learning_rate": 7.924619259617567e-06,
"loss": 0.2175,
"step": 857
},
{
"epoch": 0.6029515108924807,
"grad_norm": 0.5192114495679688,
"learning_rate": 7.920140829408546e-06,
"loss": 0.2021,
"step": 858
},
{
"epoch": 0.6036542515811666,
"grad_norm": 0.5039292999332279,
"learning_rate": 7.915658840972069e-06,
"loss": 0.1814,
"step": 859
},
{
"epoch": 0.6043569922698524,
"grad_norm": 0.5077634967163096,
"learning_rate": 7.911173299769494e-06,
"loss": 0.2114,
"step": 860
},
{
"epoch": 0.6050597329585383,
"grad_norm": 0.48220662569267886,
"learning_rate": 7.906684211266508e-06,
"loss": 0.158,
"step": 861
},
{
"epoch": 0.6057624736472241,
"grad_norm": 0.502308562632445,
"learning_rate": 7.902191580933123e-06,
"loss": 0.1874,
"step": 862
},
{
"epoch": 0.60646521433591,
"grad_norm": 0.497708237740537,
"learning_rate": 7.89769541424366e-06,
"loss": 0.1829,
"step": 863
},
{
"epoch": 0.607167955024596,
"grad_norm": 0.5161766882263005,
"learning_rate": 7.893195716676754e-06,
"loss": 0.185,
"step": 864
},
{
"epoch": 0.6078706957132818,
"grad_norm": 0.5069263975379218,
"learning_rate": 7.888692493715345e-06,
"loss": 0.1798,
"step": 865
},
{
"epoch": 0.6085734364019677,
"grad_norm": 0.5033869041437443,
"learning_rate": 7.884185750846663e-06,
"loss": 0.1681,
"step": 866
},
{
"epoch": 0.6092761770906535,
"grad_norm": 0.4864070026149026,
"learning_rate": 7.87967549356223e-06,
"loss": 0.1731,
"step": 867
},
{
"epoch": 0.6099789177793394,
"grad_norm": 0.5542622782060213,
"learning_rate": 7.875161727357848e-06,
"loss": 0.2152,
"step": 868
},
{
"epoch": 0.6106816584680252,
"grad_norm": 0.5236156647293086,
"learning_rate": 7.8706444577336e-06,
"loss": 0.2008,
"step": 869
},
{
"epoch": 0.6113843991567112,
"grad_norm": 0.5258759841468933,
"learning_rate": 7.866123690193832e-06,
"loss": 0.2054,
"step": 870
},
{
"epoch": 0.6120871398453971,
"grad_norm": 0.5164374327517247,
"learning_rate": 7.861599430247157e-06,
"loss": 0.1799,
"step": 871
},
{
"epoch": 0.6127898805340829,
"grad_norm": 0.5105618406792289,
"learning_rate": 7.857071683406438e-06,
"loss": 0.1971,
"step": 872
},
{
"epoch": 0.6134926212227688,
"grad_norm": 0.5069610593839458,
"learning_rate": 7.852540455188793e-06,
"loss": 0.1925,
"step": 873
},
{
"epoch": 0.6141953619114546,
"grad_norm": 0.5050671644481959,
"learning_rate": 7.848005751115579e-06,
"loss": 0.19,
"step": 874
},
{
"epoch": 0.6148981026001406,
"grad_norm": 0.5363105611849845,
"learning_rate": 7.843467576712387e-06,
"loss": 0.2099,
"step": 875
},
{
"epoch": 0.6156008432888265,
"grad_norm": 0.5185051863527694,
"learning_rate": 7.838925937509038e-06,
"loss": 0.1797,
"step": 876
},
{
"epoch": 0.6163035839775123,
"grad_norm": 0.5249263436811443,
"learning_rate": 7.83438083903958e-06,
"loss": 0.1787,
"step": 877
},
{
"epoch": 0.6170063246661982,
"grad_norm": 0.4986807783693537,
"learning_rate": 7.829832286842265e-06,
"loss": 0.1955,
"step": 878
},
{
"epoch": 0.617709065354884,
"grad_norm": 0.5533489886091887,
"learning_rate": 7.825280286459561e-06,
"loss": 0.203,
"step": 879
},
{
"epoch": 0.6184118060435699,
"grad_norm": 0.5821797082052109,
"learning_rate": 7.82072484343814e-06,
"loss": 0.2058,
"step": 880
},
{
"epoch": 0.6191145467322557,
"grad_norm": 0.5204351294166948,
"learning_rate": 7.81616596332886e-06,
"loss": 0.1937,
"step": 881
},
{
"epoch": 0.6198172874209417,
"grad_norm": 0.5241918723451235,
"learning_rate": 7.811603651686777e-06,
"loss": 0.1824,
"step": 882
},
{
"epoch": 0.6205200281096276,
"grad_norm": 0.47726678490631336,
"learning_rate": 7.80703791407112e-06,
"loss": 0.1672,
"step": 883
},
{
"epoch": 0.6212227687983134,
"grad_norm": 0.4826936758469551,
"learning_rate": 7.802468756045301e-06,
"loss": 0.1748,
"step": 884
},
{
"epoch": 0.6219255094869993,
"grad_norm": 0.5290298423944293,
"learning_rate": 7.797896183176892e-06,
"loss": 0.1833,
"step": 885
},
{
"epoch": 0.6226282501756851,
"grad_norm": 0.5451208380190049,
"learning_rate": 7.793320201037629e-06,
"loss": 0.1945,
"step": 886
},
{
"epoch": 0.623330990864371,
"grad_norm": 0.5469122761116605,
"learning_rate": 7.788740815203404e-06,
"loss": 0.2048,
"step": 887
},
{
"epoch": 0.624033731553057,
"grad_norm": 0.520441291999963,
"learning_rate": 7.784158031254251e-06,
"loss": 0.1944,
"step": 888
},
{
"epoch": 0.6247364722417428,
"grad_norm": 0.526928075448614,
"learning_rate": 7.779571854774356e-06,
"loss": 0.203,
"step": 889
},
{
"epoch": 0.6254392129304287,
"grad_norm": 0.49886608378637465,
"learning_rate": 7.774982291352022e-06,
"loss": 0.1775,
"step": 890
},
{
"epoch": 0.6261419536191145,
"grad_norm": 0.5170210910797823,
"learning_rate": 7.770389346579696e-06,
"loss": 0.1764,
"step": 891
},
{
"epoch": 0.6268446943078004,
"grad_norm": 0.47869106466151334,
"learning_rate": 7.765793026053934e-06,
"loss": 0.1497,
"step": 892
},
{
"epoch": 0.6275474349964862,
"grad_norm": 0.5126979144891471,
"learning_rate": 7.761193335375411e-06,
"loss": 0.1767,
"step": 893
},
{
"epoch": 0.6282501756851722,
"grad_norm": 0.5193995670376416,
"learning_rate": 7.756590280148904e-06,
"loss": 0.205,
"step": 894
},
{
"epoch": 0.6289529163738581,
"grad_norm": 0.5346694403603001,
"learning_rate": 7.751983865983295e-06,
"loss": 0.1919,
"step": 895
},
{
"epoch": 0.6296556570625439,
"grad_norm": 0.5336291753755593,
"learning_rate": 7.747374098491553e-06,
"loss": 0.1983,
"step": 896
},
{
"epoch": 0.6303583977512298,
"grad_norm": 0.49993946624577135,
"learning_rate": 7.742760983290738e-06,
"loss": 0.1716,
"step": 897
},
{
"epoch": 0.6310611384399156,
"grad_norm": 0.5300665843429727,
"learning_rate": 7.73814452600199e-06,
"loss": 0.1784,
"step": 898
},
{
"epoch": 0.6317638791286015,
"grad_norm": 0.503470906387593,
"learning_rate": 7.733524732250515e-06,
"loss": 0.1781,
"step": 899
},
{
"epoch": 0.6324666198172875,
"grad_norm": 0.5096260624175444,
"learning_rate": 7.728901607665591e-06,
"loss": 0.1754,
"step": 900
},
{
"epoch": 0.6331693605059733,
"grad_norm": 0.5838253255797453,
"learning_rate": 7.724275157880551e-06,
"loss": 0.223,
"step": 901
},
{
"epoch": 0.6338721011946592,
"grad_norm": 0.5084981196070715,
"learning_rate": 7.719645388532779e-06,
"loss": 0.1897,
"step": 902
},
{
"epoch": 0.634574841883345,
"grad_norm": 0.528356961734376,
"learning_rate": 7.71501230526371e-06,
"loss": 0.2007,
"step": 903
},
{
"epoch": 0.6352775825720309,
"grad_norm": 0.5218892843740671,
"learning_rate": 7.71037591371881e-06,
"loss": 0.1865,
"step": 904
},
{
"epoch": 0.6359803232607167,
"grad_norm": 0.4932257464154493,
"learning_rate": 7.705736219547579e-06,
"loss": 0.1703,
"step": 905
},
{
"epoch": 0.6366830639494027,
"grad_norm": 0.4958132069555908,
"learning_rate": 7.701093228403543e-06,
"loss": 0.1813,
"step": 906
},
{
"epoch": 0.6373858046380886,
"grad_norm": 0.5242560868098635,
"learning_rate": 7.696446945944241e-06,
"loss": 0.1986,
"step": 907
},
{
"epoch": 0.6380885453267744,
"grad_norm": 0.5334976116325654,
"learning_rate": 7.691797377831226e-06,
"loss": 0.2066,
"step": 908
},
{
"epoch": 0.6387912860154603,
"grad_norm": 0.5262699778377731,
"learning_rate": 7.687144529730058e-06,
"loss": 0.1997,
"step": 909
},
{
"epoch": 0.6394940267041461,
"grad_norm": 0.49950063093797237,
"learning_rate": 7.682488407310284e-06,
"loss": 0.1774,
"step": 910
},
{
"epoch": 0.640196767392832,
"grad_norm": 0.5334542098027915,
"learning_rate": 7.67782901624545e-06,
"loss": 0.1871,
"step": 911
},
{
"epoch": 0.640899508081518,
"grad_norm": 0.5185057986662502,
"learning_rate": 7.673166362213077e-06,
"loss": 0.1698,
"step": 912
},
{
"epoch": 0.6416022487702038,
"grad_norm": 0.5027268618275128,
"learning_rate": 7.668500450894674e-06,
"loss": 0.1774,
"step": 913
},
{
"epoch": 0.6423049894588897,
"grad_norm": 0.5301993082173253,
"learning_rate": 7.663831287975702e-06,
"loss": 0.1883,
"step": 914
},
{
"epoch": 0.6430077301475755,
"grad_norm": 0.5217885639264032,
"learning_rate": 7.659158879145599e-06,
"loss": 0.1956,
"step": 915
},
{
"epoch": 0.6437104708362614,
"grad_norm": 0.5381047788093676,
"learning_rate": 7.654483230097752e-06,
"loss": 0.1992,
"step": 916
},
{
"epoch": 0.6444132115249473,
"grad_norm": 0.5392291589420021,
"learning_rate": 7.649804346529493e-06,
"loss": 0.1807,
"step": 917
},
{
"epoch": 0.6451159522136332,
"grad_norm": 0.47877224169561405,
"learning_rate": 7.645122234142103e-06,
"loss": 0.1527,
"step": 918
},
{
"epoch": 0.6458186929023191,
"grad_norm": 0.5295518466347142,
"learning_rate": 7.640436898640795e-06,
"loss": 0.2115,
"step": 919
},
{
"epoch": 0.6465214335910049,
"grad_norm": 0.4909132810206349,
"learning_rate": 7.635748345734702e-06,
"loss": 0.1599,
"step": 920
},
{
"epoch": 0.6472241742796908,
"grad_norm": 0.4899660738658723,
"learning_rate": 7.63105658113689e-06,
"loss": 0.1817,
"step": 921
},
{
"epoch": 0.6479269149683766,
"grad_norm": 0.48355409348428285,
"learning_rate": 7.626361610564325e-06,
"loss": 0.1744,
"step": 922
},
{
"epoch": 0.6486296556570625,
"grad_norm": 0.4924250519507862,
"learning_rate": 7.6216634397378905e-06,
"loss": 0.182,
"step": 923
},
{
"epoch": 0.6493323963457485,
"grad_norm": 0.4927644815358553,
"learning_rate": 7.616962074382364e-06,
"loss": 0.1723,
"step": 924
},
{
"epoch": 0.6500351370344343,
"grad_norm": 0.4984759900130036,
"learning_rate": 7.612257520226418e-06,
"loss": 0.1775,
"step": 925
},
{
"epoch": 0.6507378777231202,
"grad_norm": 0.5203766307202669,
"learning_rate": 7.607549783002608e-06,
"loss": 0.1796,
"step": 926
},
{
"epoch": 0.651440618411806,
"grad_norm": 0.5033951336673999,
"learning_rate": 7.602838868447373e-06,
"loss": 0.1663,
"step": 927
},
{
"epoch": 0.6521433591004919,
"grad_norm": 0.5424370742405987,
"learning_rate": 7.598124782301015e-06,
"loss": 0.2023,
"step": 928
},
{
"epoch": 0.6528460997891778,
"grad_norm": 0.5151861951466656,
"learning_rate": 7.593407530307709e-06,
"loss": 0.193,
"step": 929
},
{
"epoch": 0.6535488404778637,
"grad_norm": 0.49581799095048523,
"learning_rate": 7.588687118215485e-06,
"loss": 0.1928,
"step": 930
},
{
"epoch": 0.6542515811665496,
"grad_norm": 0.5036985985813803,
"learning_rate": 7.583963551776221e-06,
"loss": 0.1722,
"step": 931
},
{
"epoch": 0.6549543218552354,
"grad_norm": 0.5174965530179328,
"learning_rate": 7.579236836745643e-06,
"loss": 0.2037,
"step": 932
},
{
"epoch": 0.6556570625439213,
"grad_norm": 0.5098346365676857,
"learning_rate": 7.5745069788833094e-06,
"loss": 0.1822,
"step": 933
},
{
"epoch": 0.6563598032326071,
"grad_norm": 0.506190343396925,
"learning_rate": 7.569773983952611e-06,
"loss": 0.1906,
"step": 934
},
{
"epoch": 0.657062543921293,
"grad_norm": 0.5133813688467773,
"learning_rate": 7.56503785772076e-06,
"loss": 0.1992,
"step": 935
},
{
"epoch": 0.657765284609979,
"grad_norm": 0.47411929585172563,
"learning_rate": 7.560298605958782e-06,
"loss": 0.1746,
"step": 936
},
{
"epoch": 0.6584680252986648,
"grad_norm": 0.5542559408000669,
"learning_rate": 7.555556234441519e-06,
"loss": 0.2363,
"step": 937
},
{
"epoch": 0.6591707659873507,
"grad_norm": 0.5157859159826783,
"learning_rate": 7.550810748947605e-06,
"loss": 0.2001,
"step": 938
},
{
"epoch": 0.6598735066760365,
"grad_norm": 0.5377402338987854,
"learning_rate": 7.546062155259473e-06,
"loss": 0.2352,
"step": 939
},
{
"epoch": 0.6605762473647224,
"grad_norm": 0.5468943850332758,
"learning_rate": 7.541310459163343e-06,
"loss": 0.2265,
"step": 940
},
{
"epoch": 0.6612789880534083,
"grad_norm": 0.5222470713922112,
"learning_rate": 7.536555666449214e-06,
"loss": 0.1834,
"step": 941
},
{
"epoch": 0.6619817287420942,
"grad_norm": 0.5145793565709798,
"learning_rate": 7.5317977829108605e-06,
"loss": 0.1915,
"step": 942
},
{
"epoch": 0.6626844694307801,
"grad_norm": 0.5302885963092634,
"learning_rate": 7.5270368143458216e-06,
"loss": 0.2008,
"step": 943
},
{
"epoch": 0.6633872101194659,
"grad_norm": 0.5305335077958598,
"learning_rate": 7.522272766555397e-06,
"loss": 0.2205,
"step": 944
},
{
"epoch": 0.6640899508081518,
"grad_norm": 0.5180420979313441,
"learning_rate": 7.517505645344636e-06,
"loss": 0.1917,
"step": 945
},
{
"epoch": 0.6647926914968376,
"grad_norm": 0.5202273362852701,
"learning_rate": 7.512735456522333e-06,
"loss": 0.1797,
"step": 946
},
{
"epoch": 0.6654954321855235,
"grad_norm": 0.5230777310370995,
"learning_rate": 7.507962205901026e-06,
"loss": 0.1942,
"step": 947
},
{
"epoch": 0.6661981728742095,
"grad_norm": 0.5216832102567025,
"learning_rate": 7.503185899296974e-06,
"loss": 0.1949,
"step": 948
},
{
"epoch": 0.6669009135628953,
"grad_norm": 0.5056140211785736,
"learning_rate": 7.498406542530173e-06,
"loss": 0.1721,
"step": 949
},
{
"epoch": 0.6676036542515812,
"grad_norm": 0.527215934538876,
"learning_rate": 7.4936241414243185e-06,
"loss": 0.2235,
"step": 950
},
{
"epoch": 0.668306394940267,
"grad_norm": 0.5014203639608845,
"learning_rate": 7.488838701806832e-06,
"loss": 0.1665,
"step": 951
},
{
"epoch": 0.6690091356289529,
"grad_norm": 0.529468100126926,
"learning_rate": 7.484050229508826e-06,
"loss": 0.2088,
"step": 952
},
{
"epoch": 0.6697118763176388,
"grad_norm": 0.5140119884370032,
"learning_rate": 7.479258730365117e-06,
"loss": 0.1776,
"step": 953
},
{
"epoch": 0.6704146170063247,
"grad_norm": 0.5196864861391487,
"learning_rate": 7.474464210214202e-06,
"loss": 0.1813,
"step": 954
},
{
"epoch": 0.6711173576950106,
"grad_norm": 0.5156500667877755,
"learning_rate": 7.469666674898264e-06,
"loss": 0.187,
"step": 955
},
{
"epoch": 0.6718200983836964,
"grad_norm": 0.49244989255735266,
"learning_rate": 7.464866130263159e-06,
"loss": 0.1745,
"step": 956
},
{
"epoch": 0.6725228390723823,
"grad_norm": 0.5259031998968214,
"learning_rate": 7.4600625821584095e-06,
"loss": 0.203,
"step": 957
},
{
"epoch": 0.6732255797610681,
"grad_norm": 0.502301194672857,
"learning_rate": 7.4552560364371975e-06,
"loss": 0.1908,
"step": 958
},
{
"epoch": 0.673928320449754,
"grad_norm": 0.5107269319280141,
"learning_rate": 7.4504464989563575e-06,
"loss": 0.2006,
"step": 959
},
{
"epoch": 0.67463106113844,
"grad_norm": 0.5118724541676758,
"learning_rate": 7.44563397557637e-06,
"loss": 0.1717,
"step": 960
},
{
"epoch": 0.6753338018271258,
"grad_norm": 0.5143114162067215,
"learning_rate": 7.4408184721613565e-06,
"loss": 0.1754,
"step": 961
},
{
"epoch": 0.6760365425158117,
"grad_norm": 0.5083668965935106,
"learning_rate": 7.435999994579062e-06,
"loss": 0.2088,
"step": 962
},
{
"epoch": 0.6767392832044975,
"grad_norm": 0.4881974079162172,
"learning_rate": 7.431178548700866e-06,
"loss": 0.1615,
"step": 963
},
{
"epoch": 0.6774420238931834,
"grad_norm": 0.5113043533333093,
"learning_rate": 7.426354140401756e-06,
"loss": 0.1948,
"step": 964
},
{
"epoch": 0.6781447645818693,
"grad_norm": 0.48680437305335184,
"learning_rate": 7.421526775560334e-06,
"loss": 0.1735,
"step": 965
},
{
"epoch": 0.6788475052705552,
"grad_norm": 0.5405627885402856,
"learning_rate": 7.4166964600588035e-06,
"loss": 0.1786,
"step": 966
},
{
"epoch": 0.6795502459592411,
"grad_norm": 0.4905377754918616,
"learning_rate": 7.411863199782962e-06,
"loss": 0.1836,
"step": 967
},
{
"epoch": 0.6802529866479269,
"grad_norm": 0.5347687574929526,
"learning_rate": 7.4070270006221975e-06,
"loss": 0.1757,
"step": 968
},
{
"epoch": 0.6809557273366128,
"grad_norm": 0.5187266886970135,
"learning_rate": 7.402187868469478e-06,
"loss": 0.1709,
"step": 969
},
{
"epoch": 0.6816584680252986,
"grad_norm": 0.44063607377299313,
"learning_rate": 7.397345809221346e-06,
"loss": 0.13,
"step": 970
},
{
"epoch": 0.6823612087139845,
"grad_norm": 0.5055942136497711,
"learning_rate": 7.392500828777909e-06,
"loss": 0.1911,
"step": 971
},
{
"epoch": 0.6830639494026705,
"grad_norm": 0.49986827112331206,
"learning_rate": 7.387652933042835e-06,
"loss": 0.169,
"step": 972
},
{
"epoch": 0.6837666900913563,
"grad_norm": 0.5301476942772901,
"learning_rate": 7.382802127923346e-06,
"loss": 0.2122,
"step": 973
},
{
"epoch": 0.6844694307800422,
"grad_norm": 0.49785388875502984,
"learning_rate": 7.377948419330206e-06,
"loss": 0.1609,
"step": 974
},
{
"epoch": 0.685172171468728,
"grad_norm": 0.5250134490606625,
"learning_rate": 7.3730918131777215e-06,
"loss": 0.2135,
"step": 975
},
{
"epoch": 0.6858749121574139,
"grad_norm": 0.5034261014309968,
"learning_rate": 7.368232315383721e-06,
"loss": 0.1799,
"step": 976
},
{
"epoch": 0.6865776528460998,
"grad_norm": 0.45840308420918235,
"learning_rate": 7.363369931869568e-06,
"loss": 0.1697,
"step": 977
},
{
"epoch": 0.6872803935347856,
"grad_norm": 0.5209810656647423,
"learning_rate": 7.358504668560134e-06,
"loss": 0.1856,
"step": 978
},
{
"epoch": 0.6879831342234716,
"grad_norm": 0.511510447533771,
"learning_rate": 7.353636531383802e-06,
"loss": 0.1846,
"step": 979
},
{
"epoch": 0.6886858749121574,
"grad_norm": 0.49950476702778257,
"learning_rate": 7.348765526272457e-06,
"loss": 0.1783,
"step": 980
},
{
"epoch": 0.6893886156008433,
"grad_norm": 0.5188367362925587,
"learning_rate": 7.34389165916148e-06,
"loss": 0.1813,
"step": 981
},
{
"epoch": 0.6900913562895291,
"grad_norm": 0.5367080926443806,
"learning_rate": 7.339014935989734e-06,
"loss": 0.1966,
"step": 982
},
{
"epoch": 0.690794096978215,
"grad_norm": 0.5430848597714294,
"learning_rate": 7.334135362699571e-06,
"loss": 0.1984,
"step": 983
},
{
"epoch": 0.691496837666901,
"grad_norm": 0.5279381895210722,
"learning_rate": 7.329252945236808e-06,
"loss": 0.1971,
"step": 984
},
{
"epoch": 0.6921995783555868,
"grad_norm": 0.5465587333436775,
"learning_rate": 7.324367689550732e-06,
"loss": 0.1978,
"step": 985
},
{
"epoch": 0.6929023190442727,
"grad_norm": 0.5494700126422907,
"learning_rate": 7.319479601594085e-06,
"loss": 0.222,
"step": 986
},
{
"epoch": 0.6936050597329585,
"grad_norm": 0.5133356544069808,
"learning_rate": 7.3145886873230655e-06,
"loss": 0.198,
"step": 987
},
{
"epoch": 0.6943078004216444,
"grad_norm": 0.5284662389797274,
"learning_rate": 7.309694952697308e-06,
"loss": 0.1888,
"step": 988
},
{
"epoch": 0.6950105411103303,
"grad_norm": 0.49633826165910416,
"learning_rate": 7.304798403679893e-06,
"loss": 0.1655,
"step": 989
},
{
"epoch": 0.6957132817990161,
"grad_norm": 0.49626789953298106,
"learning_rate": 7.299899046237323e-06,
"loss": 0.1825,
"step": 990
},
{
"epoch": 0.6964160224877021,
"grad_norm": 0.5168604222500065,
"learning_rate": 7.294996886339526e-06,
"loss": 0.1905,
"step": 991
},
{
"epoch": 0.6971187631763879,
"grad_norm": 0.5148459672208613,
"learning_rate": 7.290091929959843e-06,
"loss": 0.1894,
"step": 992
},
{
"epoch": 0.6978215038650738,
"grad_norm": 0.5108835468867117,
"learning_rate": 7.285184183075025e-06,
"loss": 0.1824,
"step": 993
},
{
"epoch": 0.6985242445537596,
"grad_norm": 0.5227565151551233,
"learning_rate": 7.2802736516652205e-06,
"loss": 0.2019,
"step": 994
},
{
"epoch": 0.6992269852424455,
"grad_norm": 0.5326973237464283,
"learning_rate": 7.275360341713973e-06,
"loss": 0.2094,
"step": 995
},
{
"epoch": 0.6999297259311315,
"grad_norm": 0.5614700009477083,
"learning_rate": 7.270444259208211e-06,
"loss": 0.1801,
"step": 996
},
{
"epoch": 0.7006324666198173,
"grad_norm": 0.501410180912708,
"learning_rate": 7.265525410138242e-06,
"loss": 0.1675,
"step": 997
},
{
"epoch": 0.7013352073085032,
"grad_norm": 0.5599442703154489,
"learning_rate": 7.2606038004977435e-06,
"loss": 0.2242,
"step": 998
},
{
"epoch": 0.702037947997189,
"grad_norm": 0.5144840354436657,
"learning_rate": 7.255679436283757e-06,
"loss": 0.1639,
"step": 999
},
{
"epoch": 0.7027406886858749,
"grad_norm": 0.5309811915865177,
"learning_rate": 7.250752323496679e-06,
"loss": 0.1879,
"step": 1000
},
{
"epoch": 0.7027406886858749,
"eval_loss": 0.18907217681407928,
"eval_runtime": 10.8577,
"eval_samples_per_second": 21.183,
"eval_steps_per_second": 5.342,
"step": 1000
},
{
"epoch": 0.7034434293745608,
"grad_norm": 0.48234786437812116,
"learning_rate": 7.24582246814026e-06,
"loss": 0.1671,
"step": 1001
},
{
"epoch": 0.7041461700632466,
"grad_norm": 0.49996231689533716,
"learning_rate": 7.240889876221589e-06,
"loss": 0.1752,
"step": 1002
},
{
"epoch": 0.7048489107519326,
"grad_norm": 0.5525556422783636,
"learning_rate": 7.2359545537510875e-06,
"loss": 0.2075,
"step": 1003
},
{
"epoch": 0.7055516514406184,
"grad_norm": 0.5944880911698127,
"learning_rate": 7.23101650674251e-06,
"loss": 0.2378,
"step": 1004
},
{
"epoch": 0.7062543921293043,
"grad_norm": 0.5578191512662339,
"learning_rate": 7.226075741212923e-06,
"loss": 0.2067,
"step": 1005
},
{
"epoch": 0.7069571328179901,
"grad_norm": 0.5061065739706729,
"learning_rate": 7.221132263182713e-06,
"loss": 0.1811,
"step": 1006
},
{
"epoch": 0.707659873506676,
"grad_norm": 0.542622669313164,
"learning_rate": 7.216186078675569e-06,
"loss": 0.216,
"step": 1007
},
{
"epoch": 0.708362614195362,
"grad_norm": 0.5030138850034241,
"learning_rate": 7.211237193718476e-06,
"loss": 0.199,
"step": 1008
},
{
"epoch": 0.7090653548840478,
"grad_norm": 0.4916488160349469,
"learning_rate": 7.206285614341711e-06,
"loss": 0.1784,
"step": 1009
},
{
"epoch": 0.7097680955727337,
"grad_norm": 0.5035817623689635,
"learning_rate": 7.201331346578836e-06,
"loss": 0.1619,
"step": 1010
},
{
"epoch": 0.7104708362614195,
"grad_norm": 0.4952692089496329,
"learning_rate": 7.196374396466686e-06,
"loss": 0.1748,
"step": 1011
},
{
"epoch": 0.7111735769501054,
"grad_norm": 0.5311768647808931,
"learning_rate": 7.191414770045364e-06,
"loss": 0.1725,
"step": 1012
},
{
"epoch": 0.7118763176387913,
"grad_norm": 0.5397785974634004,
"learning_rate": 7.186452473358238e-06,
"loss": 0.1884,
"step": 1013
},
{
"epoch": 0.7125790583274771,
"grad_norm": 0.5418017219505858,
"learning_rate": 7.181487512451927e-06,
"loss": 0.2033,
"step": 1014
},
{
"epoch": 0.7132817990161631,
"grad_norm": 0.5337125176776485,
"learning_rate": 7.176519893376296e-06,
"loss": 0.1905,
"step": 1015
},
{
"epoch": 0.7139845397048489,
"grad_norm": 0.5233510689055844,
"learning_rate": 7.17154962218445e-06,
"loss": 0.2035,
"step": 1016
},
{
"epoch": 0.7146872803935348,
"grad_norm": 0.4926954442413844,
"learning_rate": 7.1665767049327284e-06,
"loss": 0.1592,
"step": 1017
},
{
"epoch": 0.7153900210822206,
"grad_norm": 0.48443312603006383,
"learning_rate": 7.161601147680688e-06,
"loss": 0.153,
"step": 1018
},
{
"epoch": 0.7160927617709065,
"grad_norm": 0.49655894092848546,
"learning_rate": 7.156622956491107e-06,
"loss": 0.1628,
"step": 1019
},
{
"epoch": 0.7167955024595924,
"grad_norm": 0.5256199048045755,
"learning_rate": 7.1516421374299735e-06,
"loss": 0.2129,
"step": 1020
},
{
"epoch": 0.7174982431482783,
"grad_norm": 0.5100591937912284,
"learning_rate": 7.146658696566478e-06,
"loss": 0.1711,
"step": 1021
},
{
"epoch": 0.7182009838369642,
"grad_norm": 0.49456212678014866,
"learning_rate": 7.141672639973e-06,
"loss": 0.1812,
"step": 1022
},
{
"epoch": 0.71890372452565,
"grad_norm": 0.5619739635445484,
"learning_rate": 7.136683973725116e-06,
"loss": 0.2216,
"step": 1023
},
{
"epoch": 0.7196064652143359,
"grad_norm": 0.5412580193904376,
"learning_rate": 7.1316927039015736e-06,
"loss": 0.2073,
"step": 1024
},
{
"epoch": 0.7203092059030218,
"grad_norm": 0.49843680543795715,
"learning_rate": 7.126698836584296e-06,
"loss": 0.1666,
"step": 1025
},
{
"epoch": 0.7210119465917076,
"grad_norm": 0.49802440745701665,
"learning_rate": 7.121702377858375e-06,
"loss": 0.1772,
"step": 1026
},
{
"epoch": 0.7217146872803936,
"grad_norm": 0.4984117313770376,
"learning_rate": 7.116703333812055e-06,
"loss": 0.1877,
"step": 1027
},
{
"epoch": 0.7224174279690794,
"grad_norm": 0.501178356120356,
"learning_rate": 7.111701710536732e-06,
"loss": 0.1696,
"step": 1028
},
{
"epoch": 0.7231201686577653,
"grad_norm": 0.5135210124439293,
"learning_rate": 7.106697514126947e-06,
"loss": 0.1806,
"step": 1029
},
{
"epoch": 0.7238229093464511,
"grad_norm": 0.5281606401354669,
"learning_rate": 7.101690750680373e-06,
"loss": 0.1833,
"step": 1030
},
{
"epoch": 0.724525650035137,
"grad_norm": 0.5126826359434293,
"learning_rate": 7.096681426297814e-06,
"loss": 0.1913,
"step": 1031
},
{
"epoch": 0.725228390723823,
"grad_norm": 0.48162715829056846,
"learning_rate": 7.091669547083193e-06,
"loss": 0.1763,
"step": 1032
},
{
"epoch": 0.7259311314125088,
"grad_norm": 0.5208495206878236,
"learning_rate": 7.0866551191435464e-06,
"loss": 0.1921,
"step": 1033
},
{
"epoch": 0.7266338721011947,
"grad_norm": 0.5419110976510364,
"learning_rate": 7.081638148589015e-06,
"loss": 0.1838,
"step": 1034
},
{
"epoch": 0.7273366127898805,
"grad_norm": 0.530400846773951,
"learning_rate": 7.07661864153284e-06,
"loss": 0.1921,
"step": 1035
},
{
"epoch": 0.7280393534785664,
"grad_norm": 0.5244327142014001,
"learning_rate": 7.071596604091353e-06,
"loss": 0.1915,
"step": 1036
},
{
"epoch": 0.7287420941672523,
"grad_norm": 0.5120455453570685,
"learning_rate": 7.066572042383967e-06,
"loss": 0.1948,
"step": 1037
},
{
"epoch": 0.7294448348559381,
"grad_norm": 0.49788820003147394,
"learning_rate": 7.061544962533174e-06,
"loss": 0.1699,
"step": 1038
},
{
"epoch": 0.7301475755446241,
"grad_norm": 0.5101121707543748,
"learning_rate": 7.056515370664529e-06,
"loss": 0.1649,
"step": 1039
},
{
"epoch": 0.7308503162333099,
"grad_norm": 0.5015108857525926,
"learning_rate": 7.051483272906656e-06,
"loss": 0.1601,
"step": 1040
},
{
"epoch": 0.7315530569219958,
"grad_norm": 0.5208544509411792,
"learning_rate": 7.0464486753912255e-06,
"loss": 0.1897,
"step": 1041
},
{
"epoch": 0.7322557976106817,
"grad_norm": 0.513293497339928,
"learning_rate": 7.041411584252956e-06,
"loss": 0.1799,
"step": 1042
},
{
"epoch": 0.7329585382993675,
"grad_norm": 0.4924607506595166,
"learning_rate": 7.036372005629606e-06,
"loss": 0.1629,
"step": 1043
},
{
"epoch": 0.7336612789880534,
"grad_norm": 0.5392223533203832,
"learning_rate": 7.0313299456619635e-06,
"loss": 0.2173,
"step": 1044
},
{
"epoch": 0.7343640196767393,
"grad_norm": 0.5506431334123577,
"learning_rate": 7.026285410493839e-06,
"loss": 0.1823,
"step": 1045
},
{
"epoch": 0.7350667603654252,
"grad_norm": 0.5461836214581263,
"learning_rate": 7.021238406272064e-06,
"loss": 0.1703,
"step": 1046
},
{
"epoch": 0.735769501054111,
"grad_norm": 0.523312140712101,
"learning_rate": 7.016188939146471e-06,
"loss": 0.1802,
"step": 1047
},
{
"epoch": 0.7364722417427969,
"grad_norm": 0.5059982446931061,
"learning_rate": 7.011137015269901e-06,
"loss": 0.1938,
"step": 1048
},
{
"epoch": 0.7371749824314828,
"grad_norm": 0.600343031316344,
"learning_rate": 7.006082640798183e-06,
"loss": 0.2044,
"step": 1049
},
{
"epoch": 0.7378777231201686,
"grad_norm": 0.5410248828748458,
"learning_rate": 7.0010258218901375e-06,
"loss": 0.1974,
"step": 1050
},
{
"epoch": 0.7385804638088546,
"grad_norm": 0.5211734683573049,
"learning_rate": 6.995966564707556e-06,
"loss": 0.1835,
"step": 1051
},
{
"epoch": 0.7392832044975404,
"grad_norm": 0.48255922615953734,
"learning_rate": 6.99090487541521e-06,
"loss": 0.1691,
"step": 1052
},
{
"epoch": 0.7399859451862263,
"grad_norm": 0.5417704830229917,
"learning_rate": 6.985840760180824e-06,
"loss": 0.1936,
"step": 1053
},
{
"epoch": 0.7406886858749122,
"grad_norm": 0.4962640922424155,
"learning_rate": 6.980774225175092e-06,
"loss": 0.1684,
"step": 1054
},
{
"epoch": 0.741391426563598,
"grad_norm": 0.5198225259061435,
"learning_rate": 6.975705276571645e-06,
"loss": 0.1757,
"step": 1055
},
{
"epoch": 0.7420941672522839,
"grad_norm": 0.46801334813073625,
"learning_rate": 6.970633920547059e-06,
"loss": 0.1738,
"step": 1056
},
{
"epoch": 0.7427969079409698,
"grad_norm": 0.5252907799329338,
"learning_rate": 6.965560163280844e-06,
"loss": 0.1995,
"step": 1057
},
{
"epoch": 0.7434996486296557,
"grad_norm": 0.5081424969197407,
"learning_rate": 6.960484010955436e-06,
"loss": 0.1875,
"step": 1058
},
{
"epoch": 0.7442023893183415,
"grad_norm": 0.4818396772545229,
"learning_rate": 6.955405469756189e-06,
"loss": 0.1535,
"step": 1059
},
{
"epoch": 0.7449051300070274,
"grad_norm": 0.534075500585145,
"learning_rate": 6.950324545871367e-06,
"loss": 0.2258,
"step": 1060
},
{
"epoch": 0.7456078706957133,
"grad_norm": 0.5153747390981624,
"learning_rate": 6.945241245492139e-06,
"loss": 0.1804,
"step": 1061
},
{
"epoch": 0.7463106113843991,
"grad_norm": 0.48028126074749794,
"learning_rate": 6.940155574812571e-06,
"loss": 0.1666,
"step": 1062
},
{
"epoch": 0.7470133520730851,
"grad_norm": 0.5221059933317176,
"learning_rate": 6.935067540029608e-06,
"loss": 0.2155,
"step": 1063
},
{
"epoch": 0.7477160927617709,
"grad_norm": 0.5128145113353367,
"learning_rate": 6.929977147343092e-06,
"loss": 0.1907,
"step": 1064
},
{
"epoch": 0.7484188334504568,
"grad_norm": 0.5179655111085886,
"learning_rate": 6.924884402955722e-06,
"loss": 0.2095,
"step": 1065
},
{
"epoch": 0.7491215741391427,
"grad_norm": 0.5374983900631398,
"learning_rate": 6.919789313073072e-06,
"loss": 0.2189,
"step": 1066
},
{
"epoch": 0.7498243148278285,
"grad_norm": 0.5066549150752421,
"learning_rate": 6.914691883903573e-06,
"loss": 0.1855,
"step": 1067
},
{
"epoch": 0.7505270555165144,
"grad_norm": 0.4962869754285091,
"learning_rate": 6.909592121658504e-06,
"loss": 0.1562,
"step": 1068
},
{
"epoch": 0.7512297962052003,
"grad_norm": 0.5337154685194031,
"learning_rate": 6.904490032551987e-06,
"loss": 0.1911,
"step": 1069
},
{
"epoch": 0.7519325368938862,
"grad_norm": 0.5169733664567905,
"learning_rate": 6.899385622800981e-06,
"loss": 0.1881,
"step": 1070
},
{
"epoch": 0.752635277582572,
"grad_norm": 0.4792931662791021,
"learning_rate": 6.894278898625272e-06,
"loss": 0.1688,
"step": 1071
},
{
"epoch": 0.7533380182712579,
"grad_norm": 0.48103340714870696,
"learning_rate": 6.889169866247466e-06,
"loss": 0.1734,
"step": 1072
},
{
"epoch": 0.7540407589599438,
"grad_norm": 0.4632594693657935,
"learning_rate": 6.8840585318929806e-06,
"loss": 0.1507,
"step": 1073
},
{
"epoch": 0.7547434996486296,
"grad_norm": 0.5078797250921595,
"learning_rate": 6.8789449017900425e-06,
"loss": 0.1751,
"step": 1074
},
{
"epoch": 0.7554462403373156,
"grad_norm": 0.5330746582913394,
"learning_rate": 6.873828982169669e-06,
"loss": 0.1866,
"step": 1075
},
{
"epoch": 0.7561489810260014,
"grad_norm": 0.5006495318019759,
"learning_rate": 6.868710779265675e-06,
"loss": 0.1876,
"step": 1076
},
{
"epoch": 0.7568517217146873,
"grad_norm": 0.539588386937122,
"learning_rate": 6.8635902993146485e-06,
"loss": 0.1996,
"step": 1077
},
{
"epoch": 0.7575544624033732,
"grad_norm": 0.5154656321020359,
"learning_rate": 6.858467548555963e-06,
"loss": 0.1724,
"step": 1078
},
{
"epoch": 0.758257203092059,
"grad_norm": 0.5531641648558123,
"learning_rate": 6.853342533231748e-06,
"loss": 0.1842,
"step": 1079
},
{
"epoch": 0.7589599437807449,
"grad_norm": 0.5651429168073647,
"learning_rate": 6.848215259586901e-06,
"loss": 0.2352,
"step": 1080
},
{
"epoch": 0.7596626844694307,
"grad_norm": 0.48594498448781775,
"learning_rate": 6.8430857338690655e-06,
"loss": 0.1544,
"step": 1081
},
{
"epoch": 0.7603654251581167,
"grad_norm": 0.504044589169003,
"learning_rate": 6.837953962328635e-06,
"loss": 0.158,
"step": 1082
},
{
"epoch": 0.7610681658468025,
"grad_norm": 0.525608593831342,
"learning_rate": 6.832819951218732e-06,
"loss": 0.1864,
"step": 1083
},
{
"epoch": 0.7617709065354884,
"grad_norm": 0.5467297549015869,
"learning_rate": 6.827683706795216e-06,
"loss": 0.2165,
"step": 1084
},
{
"epoch": 0.7624736472241743,
"grad_norm": 0.5335682743668645,
"learning_rate": 6.82254523531666e-06,
"loss": 0.1772,
"step": 1085
},
{
"epoch": 0.7631763879128601,
"grad_norm": 0.4964276321848615,
"learning_rate": 6.817404543044358e-06,
"loss": 0.1783,
"step": 1086
},
{
"epoch": 0.763879128601546,
"grad_norm": 0.5265906150660045,
"learning_rate": 6.812261636242303e-06,
"loss": 0.188,
"step": 1087
},
{
"epoch": 0.7645818692902319,
"grad_norm": 0.5139859252183324,
"learning_rate": 6.807116521177195e-06,
"loss": 0.1774,
"step": 1088
},
{
"epoch": 0.7652846099789178,
"grad_norm": 0.5284736356104716,
"learning_rate": 6.801969204118415e-06,
"loss": 0.1828,
"step": 1089
},
{
"epoch": 0.7659873506676037,
"grad_norm": 0.4576123884410576,
"learning_rate": 6.796819691338035e-06,
"loss": 0.1479,
"step": 1090
},
{
"epoch": 0.7666900913562895,
"grad_norm": 0.5073988935393058,
"learning_rate": 6.7916679891108e-06,
"loss": 0.1765,
"step": 1091
},
{
"epoch": 0.7673928320449754,
"grad_norm": 0.4842579017253722,
"learning_rate": 6.786514103714119e-06,
"loss": 0.186,
"step": 1092
},
{
"epoch": 0.7680955727336612,
"grad_norm": 0.49917544829379384,
"learning_rate": 6.781358041428068e-06,
"loss": 0.1827,
"step": 1093
},
{
"epoch": 0.7687983134223472,
"grad_norm": 0.5140787255287416,
"learning_rate": 6.776199808535371e-06,
"loss": 0.1855,
"step": 1094
},
{
"epoch": 0.769501054111033,
"grad_norm": 0.4919847504130854,
"learning_rate": 6.771039411321397e-06,
"loss": 0.169,
"step": 1095
},
{
"epoch": 0.7702037947997189,
"grad_norm": 0.47780874627390346,
"learning_rate": 6.765876856074156e-06,
"loss": 0.152,
"step": 1096
},
{
"epoch": 0.7709065354884048,
"grad_norm": 0.5202858563610114,
"learning_rate": 6.760712149084282e-06,
"loss": 0.1925,
"step": 1097
},
{
"epoch": 0.7716092761770906,
"grad_norm": 0.5295724117460601,
"learning_rate": 6.755545296645037e-06,
"loss": 0.2081,
"step": 1098
},
{
"epoch": 0.7723120168657766,
"grad_norm": 0.5039771408196154,
"learning_rate": 6.7503763050522904e-06,
"loss": 0.1691,
"step": 1099
},
{
"epoch": 0.7730147575544624,
"grad_norm": 0.49655979674457273,
"learning_rate": 6.745205180604526e-06,
"loss": 0.1794,
"step": 1100
},
{
"epoch": 0.7737174982431483,
"grad_norm": 0.5109049881981059,
"learning_rate": 6.74003192960282e-06,
"loss": 0.1712,
"step": 1101
},
{
"epoch": 0.7744202389318342,
"grad_norm": 0.5048290654661958,
"learning_rate": 6.734856558350842e-06,
"loss": 0.1715,
"step": 1102
},
{
"epoch": 0.77512297962052,
"grad_norm": 0.5387534022945553,
"learning_rate": 6.729679073154845e-06,
"loss": 0.1806,
"step": 1103
},
{
"epoch": 0.7758257203092059,
"grad_norm": 0.5153193952791956,
"learning_rate": 6.724499480323662e-06,
"loss": 0.1828,
"step": 1104
},
{
"epoch": 0.7765284609978917,
"grad_norm": 0.49014211897656185,
"learning_rate": 6.719317786168687e-06,
"loss": 0.1686,
"step": 1105
},
{
"epoch": 0.7772312016865777,
"grad_norm": 0.5228845198917895,
"learning_rate": 6.714133997003878e-06,
"loss": 0.1941,
"step": 1106
},
{
"epoch": 0.7779339423752635,
"grad_norm": 0.5136766056514168,
"learning_rate": 6.708948119145746e-06,
"loss": 0.1595,
"step": 1107
},
{
"epoch": 0.7786366830639494,
"grad_norm": 0.5306146622363762,
"learning_rate": 6.703760158913349e-06,
"loss": 0.1978,
"step": 1108
},
{
"epoch": 0.7793394237526353,
"grad_norm": 0.49993379289456086,
"learning_rate": 6.698570122628276e-06,
"loss": 0.173,
"step": 1109
},
{
"epoch": 0.7800421644413211,
"grad_norm": 0.49442203874511687,
"learning_rate": 6.693378016614657e-06,
"loss": 0.1858,
"step": 1110
},
{
"epoch": 0.780744905130007,
"grad_norm": 0.5137438587782226,
"learning_rate": 6.6881838471991274e-06,
"loss": 0.2078,
"step": 1111
},
{
"epoch": 0.7814476458186929,
"grad_norm": 0.47131004501273516,
"learning_rate": 6.682987620710856e-06,
"loss": 0.1629,
"step": 1112
},
{
"epoch": 0.7821503865073788,
"grad_norm": 0.46698415621575573,
"learning_rate": 6.677789343481501e-06,
"loss": 0.165,
"step": 1113
},
{
"epoch": 0.7828531271960647,
"grad_norm": 0.48925746166723594,
"learning_rate": 6.6725890218452315e-06,
"loss": 0.1571,
"step": 1114
},
{
"epoch": 0.7835558678847505,
"grad_norm": 0.5319566631486711,
"learning_rate": 6.667386662138702e-06,
"loss": 0.193,
"step": 1115
},
{
"epoch": 0.7842586085734364,
"grad_norm": 0.5027355928606957,
"learning_rate": 6.662182270701051e-06,
"loss": 0.1678,
"step": 1116
},
{
"epoch": 0.7849613492621222,
"grad_norm": 0.5446992180433403,
"learning_rate": 6.656975853873895e-06,
"loss": 0.206,
"step": 1117
},
{
"epoch": 0.7856640899508082,
"grad_norm": 0.5374277406573096,
"learning_rate": 6.651767418001314e-06,
"loss": 0.2242,
"step": 1118
},
{
"epoch": 0.786366830639494,
"grad_norm": 0.518162302839699,
"learning_rate": 6.646556969429854e-06,
"loss": 0.1744,
"step": 1119
},
{
"epoch": 0.7870695713281799,
"grad_norm": 0.5170789776181299,
"learning_rate": 6.64134451450851e-06,
"loss": 0.1838,
"step": 1120
},
{
"epoch": 0.7877723120168658,
"grad_norm": 0.4798167702159322,
"learning_rate": 6.636130059588719e-06,
"loss": 0.1585,
"step": 1121
},
{
"epoch": 0.7884750527055516,
"grad_norm": 0.5398354189241774,
"learning_rate": 6.630913611024365e-06,
"loss": 0.1843,
"step": 1122
},
{
"epoch": 0.7891777933942375,
"grad_norm": 0.5365706374888599,
"learning_rate": 6.625695175171747e-06,
"loss": 0.1806,
"step": 1123
},
{
"epoch": 0.7898805340829234,
"grad_norm": 0.5209103560498805,
"learning_rate": 6.6204747583896e-06,
"loss": 0.1985,
"step": 1124
},
{
"epoch": 0.7905832747716093,
"grad_norm": 0.5029116303455078,
"learning_rate": 6.61525236703906e-06,
"loss": 0.1759,
"step": 1125
},
{
"epoch": 0.7912860154602952,
"grad_norm": 0.541377972488622,
"learning_rate": 6.610028007483679e-06,
"loss": 0.2066,
"step": 1126
},
{
"epoch": 0.791988756148981,
"grad_norm": 0.5297123690875152,
"learning_rate": 6.604801686089403e-06,
"loss": 0.1832,
"step": 1127
},
{
"epoch": 0.7926914968376669,
"grad_norm": 0.5278360942405322,
"learning_rate": 6.599573409224567e-06,
"loss": 0.1825,
"step": 1128
},
{
"epoch": 0.7933942375263527,
"grad_norm": 0.5021558885331302,
"learning_rate": 6.59434318325989e-06,
"loss": 0.1702,
"step": 1129
},
{
"epoch": 0.7940969782150387,
"grad_norm": 0.5068400467726666,
"learning_rate": 6.58911101456847e-06,
"loss": 0.1809,
"step": 1130
},
{
"epoch": 0.7947997189037245,
"grad_norm": 0.49468722085985894,
"learning_rate": 6.583876909525766e-06,
"loss": 0.1794,
"step": 1131
},
{
"epoch": 0.7955024595924104,
"grad_norm": 0.5007669274861396,
"learning_rate": 6.578640874509599e-06,
"loss": 0.1791,
"step": 1132
},
{
"epoch": 0.7962052002810963,
"grad_norm": 0.5409971215006368,
"learning_rate": 6.573402915900145e-06,
"loss": 0.2237,
"step": 1133
},
{
"epoch": 0.7969079409697821,
"grad_norm": 0.5399705409644362,
"learning_rate": 6.568163040079918e-06,
"loss": 0.1798,
"step": 1134
},
{
"epoch": 0.797610681658468,
"grad_norm": 0.5320796227271901,
"learning_rate": 6.562921253433771e-06,
"loss": 0.1974,
"step": 1135
},
{
"epoch": 0.7983134223471539,
"grad_norm": 0.5048052724879288,
"learning_rate": 6.557677562348887e-06,
"loss": 0.1686,
"step": 1136
},
{
"epoch": 0.7990161630358398,
"grad_norm": 0.47729262726676874,
"learning_rate": 6.552431973214767e-06,
"loss": 0.1699,
"step": 1137
},
{
"epoch": 0.7997189037245257,
"grad_norm": 0.4985141252624496,
"learning_rate": 6.547184492423227e-06,
"loss": 0.1876,
"step": 1138
},
{
"epoch": 0.8004216444132115,
"grad_norm": 0.4782702198502859,
"learning_rate": 6.541935126368384e-06,
"loss": 0.1498,
"step": 1139
},
{
"epoch": 0.8011243851018974,
"grad_norm": 0.540657184084682,
"learning_rate": 6.536683881446658e-06,
"loss": 0.1824,
"step": 1140
},
{
"epoch": 0.8018271257905832,
"grad_norm": 0.5138414346028792,
"learning_rate": 6.531430764056755e-06,
"loss": 0.1891,
"step": 1141
},
{
"epoch": 0.8025298664792692,
"grad_norm": 0.488009202375122,
"learning_rate": 6.5261757805996605e-06,
"loss": 0.1405,
"step": 1142
},
{
"epoch": 0.803232607167955,
"grad_norm": 0.5012186195889693,
"learning_rate": 6.520918937478639e-06,
"loss": 0.165,
"step": 1143
},
{
"epoch": 0.8039353478566409,
"grad_norm": 0.5127172901148541,
"learning_rate": 6.515660241099217e-06,
"loss": 0.1895,
"step": 1144
},
{
"epoch": 0.8046380885453268,
"grad_norm": 0.591375168506247,
"learning_rate": 6.51039969786918e-06,
"loss": 0.1909,
"step": 1145
},
{
"epoch": 0.8053408292340126,
"grad_norm": 0.5402971286129592,
"learning_rate": 6.5051373141985685e-06,
"loss": 0.2039,
"step": 1146
},
{
"epoch": 0.8060435699226985,
"grad_norm": 0.4986929896241223,
"learning_rate": 6.499873096499656e-06,
"loss": 0.1765,
"step": 1147
},
{
"epoch": 0.8067463106113844,
"grad_norm": 0.5601401970811486,
"learning_rate": 6.49460705118696e-06,
"loss": 0.237,
"step": 1148
},
{
"epoch": 0.8074490513000703,
"grad_norm": 0.5234632851450135,
"learning_rate": 6.489339184677221e-06,
"loss": 0.1865,
"step": 1149
},
{
"epoch": 0.8081517919887562,
"grad_norm": 0.5095651432213575,
"learning_rate": 6.484069503389398e-06,
"loss": 0.1787,
"step": 1150
},
{
"epoch": 0.808854532677442,
"grad_norm": 0.47427988030503876,
"learning_rate": 6.478798013744662e-06,
"loss": 0.1558,
"step": 1151
},
{
"epoch": 0.8095572733661279,
"grad_norm": 0.5283869622250998,
"learning_rate": 6.473524722166391e-06,
"loss": 0.2002,
"step": 1152
},
{
"epoch": 0.8102600140548137,
"grad_norm": 0.49727119184651486,
"learning_rate": 6.468249635080153e-06,
"loss": 0.1984,
"step": 1153
},
{
"epoch": 0.8109627547434997,
"grad_norm": 0.5464292898742243,
"learning_rate": 6.462972758913705e-06,
"loss": 0.2046,
"step": 1154
},
{
"epoch": 0.8116654954321855,
"grad_norm": 0.5250022392156026,
"learning_rate": 6.457694100096988e-06,
"loss": 0.1839,
"step": 1155
},
{
"epoch": 0.8123682361208714,
"grad_norm": 0.5165840127290805,
"learning_rate": 6.452413665062111e-06,
"loss": 0.2008,
"step": 1156
},
{
"epoch": 0.8130709768095573,
"grad_norm": 0.5481547327891854,
"learning_rate": 6.44713146024335e-06,
"loss": 0.1954,
"step": 1157
},
{
"epoch": 0.8137737174982431,
"grad_norm": 0.4874887769913095,
"learning_rate": 6.4418474920771365e-06,
"loss": 0.1685,
"step": 1158
},
{
"epoch": 0.814476458186929,
"grad_norm": 0.5039495230620578,
"learning_rate": 6.436561767002048e-06,
"loss": 0.1783,
"step": 1159
},
{
"epoch": 0.8151791988756149,
"grad_norm": 0.5199968656487806,
"learning_rate": 6.431274291458811e-06,
"loss": 0.1661,
"step": 1160
},
{
"epoch": 0.8158819395643008,
"grad_norm": 0.5027199873157974,
"learning_rate": 6.425985071890273e-06,
"loss": 0.1876,
"step": 1161
},
{
"epoch": 0.8165846802529867,
"grad_norm": 0.5249959950530578,
"learning_rate": 6.420694114741417e-06,
"loss": 0.209,
"step": 1162
},
{
"epoch": 0.8172874209416725,
"grad_norm": 0.484576963911013,
"learning_rate": 6.415401426459338e-06,
"loss": 0.1769,
"step": 1163
},
{
"epoch": 0.8179901616303584,
"grad_norm": 0.5358651554370841,
"learning_rate": 6.410107013493241e-06,
"loss": 0.1907,
"step": 1164
},
{
"epoch": 0.8186929023190442,
"grad_norm": 0.48281374879586475,
"learning_rate": 6.404810882294436e-06,
"loss": 0.15,
"step": 1165
},
{
"epoch": 0.8193956430077302,
"grad_norm": 0.47211990315454627,
"learning_rate": 6.399513039316319e-06,
"loss": 0.156,
"step": 1166
},
{
"epoch": 0.8200983836964161,
"grad_norm": 0.46372230344661597,
"learning_rate": 6.3942134910143805e-06,
"loss": 0.1604,
"step": 1167
},
{
"epoch": 0.8208011243851019,
"grad_norm": 0.5409494486579302,
"learning_rate": 6.388912243846186e-06,
"loss": 0.2035,
"step": 1168
},
{
"epoch": 0.8215038650737878,
"grad_norm": 0.5065033400554584,
"learning_rate": 6.3836093042713665e-06,
"loss": 0.1895,
"step": 1169
},
{
"epoch": 0.8222066057624736,
"grad_norm": 0.5316271171416881,
"learning_rate": 6.378304678751624e-06,
"loss": 0.1932,
"step": 1170
},
{
"epoch": 0.8229093464511595,
"grad_norm": 0.4948881303499293,
"learning_rate": 6.372998373750703e-06,
"loss": 0.1803,
"step": 1171
},
{
"epoch": 0.8236120871398454,
"grad_norm": 0.5321214151082184,
"learning_rate": 6.367690395734407e-06,
"loss": 0.1972,
"step": 1172
},
{
"epoch": 0.8243148278285313,
"grad_norm": 0.5111253171146781,
"learning_rate": 6.362380751170569e-06,
"loss": 0.177,
"step": 1173
},
{
"epoch": 0.8250175685172172,
"grad_norm": 0.49781005241117016,
"learning_rate": 6.35706944652906e-06,
"loss": 0.1733,
"step": 1174
},
{
"epoch": 0.825720309205903,
"grad_norm": 0.539119114081232,
"learning_rate": 6.351756488281766e-06,
"loss": 0.2035,
"step": 1175
},
{
"epoch": 0.8264230498945889,
"grad_norm": 0.5064581464166177,
"learning_rate": 6.346441882902594e-06,
"loss": 0.1712,
"step": 1176
},
{
"epoch": 0.8271257905832747,
"grad_norm": 0.5210623273152416,
"learning_rate": 6.341125636867455e-06,
"loss": 0.194,
"step": 1177
},
{
"epoch": 0.8278285312719607,
"grad_norm": 0.5130472931428885,
"learning_rate": 6.335807756654262e-06,
"loss": 0.1798,
"step": 1178
},
{
"epoch": 0.8285312719606466,
"grad_norm": 0.4972135153196797,
"learning_rate": 6.330488248742914e-06,
"loss": 0.1773,
"step": 1179
},
{
"epoch": 0.8292340126493324,
"grad_norm": 0.5066236011744614,
"learning_rate": 6.325167119615299e-06,
"loss": 0.1831,
"step": 1180
},
{
"epoch": 0.8299367533380183,
"grad_norm": 0.47094120421480634,
"learning_rate": 6.319844375755275e-06,
"loss": 0.163,
"step": 1181
},
{
"epoch": 0.8306394940267041,
"grad_norm": 0.5147685974539552,
"learning_rate": 6.314520023648678e-06,
"loss": 0.1939,
"step": 1182
},
{
"epoch": 0.83134223471539,
"grad_norm": 0.5042441050844522,
"learning_rate": 6.309194069783288e-06,
"loss": 0.1778,
"step": 1183
},
{
"epoch": 0.8320449754040758,
"grad_norm": 0.4675752799091352,
"learning_rate": 6.303866520648851e-06,
"loss": 0.158,
"step": 1184
},
{
"epoch": 0.8327477160927618,
"grad_norm": 0.4998544292032765,
"learning_rate": 6.298537382737048e-06,
"loss": 0.1636,
"step": 1185
},
{
"epoch": 0.8334504567814477,
"grad_norm": 0.4932259110586246,
"learning_rate": 6.2932066625415e-06,
"loss": 0.1776,
"step": 1186
},
{
"epoch": 0.8341531974701335,
"grad_norm": 0.5356101565107714,
"learning_rate": 6.287874366557756e-06,
"loss": 0.1937,
"step": 1187
},
{
"epoch": 0.8348559381588194,
"grad_norm": 0.4798323523373808,
"learning_rate": 6.2825405012832815e-06,
"loss": 0.169,
"step": 1188
},
{
"epoch": 0.8355586788475052,
"grad_norm": 0.5140851871320405,
"learning_rate": 6.2772050732174595e-06,
"loss": 0.1828,
"step": 1189
},
{
"epoch": 0.8362614195361912,
"grad_norm": 0.47825587417926313,
"learning_rate": 6.2718680888615734e-06,
"loss": 0.1613,
"step": 1190
},
{
"epoch": 0.8369641602248771,
"grad_norm": 0.4677851677300216,
"learning_rate": 6.266529554718804e-06,
"loss": 0.1616,
"step": 1191
},
{
"epoch": 0.8376669009135629,
"grad_norm": 0.49528999706107246,
"learning_rate": 6.261189477294221e-06,
"loss": 0.1728,
"step": 1192
},
{
"epoch": 0.8383696416022488,
"grad_norm": 0.5309360153372439,
"learning_rate": 6.255847863094775e-06,
"loss": 0.2056,
"step": 1193
},
{
"epoch": 0.8390723822909346,
"grad_norm": 0.5481499219892606,
"learning_rate": 6.250504718629288e-06,
"loss": 0.1931,
"step": 1194
},
{
"epoch": 0.8397751229796205,
"grad_norm": 0.47889263637156904,
"learning_rate": 6.245160050408446e-06,
"loss": 0.1582,
"step": 1195
},
{
"epoch": 0.8404778636683063,
"grad_norm": 0.4750288921863997,
"learning_rate": 6.2398138649447935e-06,
"loss": 0.156,
"step": 1196
},
{
"epoch": 0.8411806043569923,
"grad_norm": 0.5280516814060533,
"learning_rate": 6.234466168752724e-06,
"loss": 0.2062,
"step": 1197
},
{
"epoch": 0.8418833450456782,
"grad_norm": 0.5281778278031862,
"learning_rate": 6.22911696834847e-06,
"loss": 0.162,
"step": 1198
},
{
"epoch": 0.842586085734364,
"grad_norm": 0.543676618680521,
"learning_rate": 6.223766270250099e-06,
"loss": 0.2266,
"step": 1199
},
{
"epoch": 0.8432888264230499,
"grad_norm": 0.5327014887264019,
"learning_rate": 6.218414080977502e-06,
"loss": 0.2045,
"step": 1200
},
{
"epoch": 0.8439915671117357,
"grad_norm": 0.4655347457613403,
"learning_rate": 6.2130604070523855e-06,
"loss": 0.1555,
"step": 1201
},
{
"epoch": 0.8446943078004217,
"grad_norm": 0.49191482162616434,
"learning_rate": 6.207705254998269e-06,
"loss": 0.1725,
"step": 1202
},
{
"epoch": 0.8453970484891076,
"grad_norm": 0.5130123761446342,
"learning_rate": 6.2023486313404715e-06,
"loss": 0.1863,
"step": 1203
},
{
"epoch": 0.8460997891777934,
"grad_norm": 0.4890407398904942,
"learning_rate": 6.196990542606102e-06,
"loss": 0.1732,
"step": 1204
},
{
"epoch": 0.8468025298664793,
"grad_norm": 0.5205797335179796,
"learning_rate": 6.19163099532406e-06,
"loss": 0.1907,
"step": 1205
},
{
"epoch": 0.8475052705551651,
"grad_norm": 0.5278606523762948,
"learning_rate": 6.186269996025018e-06,
"loss": 0.2092,
"step": 1206
},
{
"epoch": 0.848208011243851,
"grad_norm": 0.5359697801822066,
"learning_rate": 6.18090755124142e-06,
"loss": 0.1889,
"step": 1207
},
{
"epoch": 0.8489107519325368,
"grad_norm": 0.5115121044938924,
"learning_rate": 6.175543667507472e-06,
"loss": 0.1857,
"step": 1208
},
{
"epoch": 0.8496134926212228,
"grad_norm": 0.5600510635643645,
"learning_rate": 6.17017835135913e-06,
"loss": 0.2388,
"step": 1209
},
{
"epoch": 0.8503162333099087,
"grad_norm": 0.4999111546944338,
"learning_rate": 6.1648116093340985e-06,
"loss": 0.1863,
"step": 1210
},
{
"epoch": 0.8510189739985945,
"grad_norm": 0.48107596282402415,
"learning_rate": 6.15944344797182e-06,
"loss": 0.1781,
"step": 1211
},
{
"epoch": 0.8517217146872804,
"grad_norm": 0.5497594325694529,
"learning_rate": 6.154073873813463e-06,
"loss": 0.2167,
"step": 1212
},
{
"epoch": 0.8524244553759662,
"grad_norm": 0.5328284668818077,
"learning_rate": 6.148702893401921e-06,
"loss": 0.1937,
"step": 1213
},
{
"epoch": 0.8531271960646521,
"grad_norm": 0.5136318451918118,
"learning_rate": 6.143330513281799e-06,
"loss": 0.1721,
"step": 1214
},
{
"epoch": 0.8538299367533381,
"grad_norm": 0.4778719513564468,
"learning_rate": 6.137956739999408e-06,
"loss": 0.1486,
"step": 1215
},
{
"epoch": 0.8545326774420239,
"grad_norm": 0.5172461977678994,
"learning_rate": 6.132581580102757e-06,
"loss": 0.196,
"step": 1216
},
{
"epoch": 0.8552354181307098,
"grad_norm": 0.5052295881766661,
"learning_rate": 6.127205040141544e-06,
"loss": 0.1827,
"step": 1217
},
{
"epoch": 0.8559381588193956,
"grad_norm": 0.5585326496987986,
"learning_rate": 6.121827126667149e-06,
"loss": 0.2012,
"step": 1218
},
{
"epoch": 0.8566408995080815,
"grad_norm": 0.48772318069780995,
"learning_rate": 6.116447846232626e-06,
"loss": 0.1624,
"step": 1219
},
{
"epoch": 0.8573436401967673,
"grad_norm": 0.5620286701814415,
"learning_rate": 6.111067205392693e-06,
"loss": 0.2114,
"step": 1220
},
{
"epoch": 0.8580463808854533,
"grad_norm": 0.4768501973943461,
"learning_rate": 6.105685210703728e-06,
"loss": 0.169,
"step": 1221
},
{
"epoch": 0.8587491215741392,
"grad_norm": 0.4879964365664385,
"learning_rate": 6.100301868723758e-06,
"loss": 0.1721,
"step": 1222
},
{
"epoch": 0.859451862262825,
"grad_norm": 0.49211477947291504,
"learning_rate": 6.0949171860124516e-06,
"loss": 0.1589,
"step": 1223
},
{
"epoch": 0.8601546029515109,
"grad_norm": 0.48242821081279824,
"learning_rate": 6.089531169131109e-06,
"loss": 0.1589,
"step": 1224
},
{
"epoch": 0.8608573436401967,
"grad_norm": 0.5446078856755274,
"learning_rate": 6.08414382464266e-06,
"loss": 0.2006,
"step": 1225
},
{
"epoch": 0.8615600843288826,
"grad_norm": 0.5147593100041957,
"learning_rate": 6.078755159111648e-06,
"loss": 0.1864,
"step": 1226
},
{
"epoch": 0.8622628250175686,
"grad_norm": 0.5307757224318073,
"learning_rate": 6.073365179104229e-06,
"loss": 0.2122,
"step": 1227
},
{
"epoch": 0.8629655657062544,
"grad_norm": 0.5029892450213788,
"learning_rate": 6.067973891188161e-06,
"loss": 0.1809,
"step": 1228
},
{
"epoch": 0.8636683063949403,
"grad_norm": 0.5132171947385449,
"learning_rate": 6.0625813019327925e-06,
"loss": 0.1835,
"step": 1229
},
{
"epoch": 0.8643710470836261,
"grad_norm": 0.5101091466907139,
"learning_rate": 6.057187417909061e-06,
"loss": 0.188,
"step": 1230
},
{
"epoch": 0.865073787772312,
"grad_norm": 0.46211949086790133,
"learning_rate": 6.05179224568948e-06,
"loss": 0.1589,
"step": 1231
},
{
"epoch": 0.8657765284609978,
"grad_norm": 0.47046382666591835,
"learning_rate": 6.046395791848133e-06,
"loss": 0.1565,
"step": 1232
},
{
"epoch": 0.8664792691496838,
"grad_norm": 0.49826244659146274,
"learning_rate": 6.040998062960666e-06,
"loss": 0.1782,
"step": 1233
},
{
"epoch": 0.8671820098383697,
"grad_norm": 0.5193486118705439,
"learning_rate": 6.035599065604275e-06,
"loss": 0.1852,
"step": 1234
},
{
"epoch": 0.8678847505270555,
"grad_norm": 0.49205156065630956,
"learning_rate": 6.0301988063577075e-06,
"loss": 0.1619,
"step": 1235
},
{
"epoch": 0.8685874912157414,
"grad_norm": 0.5119883525242078,
"learning_rate": 6.024797291801247e-06,
"loss": 0.1968,
"step": 1236
},
{
"epoch": 0.8692902319044272,
"grad_norm": 0.5127786450316105,
"learning_rate": 6.019394528516702e-06,
"loss": 0.1879,
"step": 1237
},
{
"epoch": 0.8699929725931131,
"grad_norm": 0.4656441543907257,
"learning_rate": 6.013990523087409e-06,
"loss": 0.1613,
"step": 1238
},
{
"epoch": 0.8706957132817991,
"grad_norm": 0.5198896911626247,
"learning_rate": 6.008585282098212e-06,
"loss": 0.1766,
"step": 1239
},
{
"epoch": 0.8713984539704849,
"grad_norm": 0.4911965550275012,
"learning_rate": 6.003178812135464e-06,
"loss": 0.1843,
"step": 1240
},
{
"epoch": 0.8721011946591708,
"grad_norm": 0.5585501627083996,
"learning_rate": 5.997771119787017e-06,
"loss": 0.1811,
"step": 1241
},
{
"epoch": 0.8728039353478566,
"grad_norm": 0.5026455634926816,
"learning_rate": 5.99236221164221e-06,
"loss": 0.1928,
"step": 1242
},
{
"epoch": 0.8735066760365425,
"grad_norm": 0.5356055151967403,
"learning_rate": 5.986952094291861e-06,
"loss": 0.1984,
"step": 1243
},
{
"epoch": 0.8742094167252283,
"grad_norm": 0.5068822428289327,
"learning_rate": 5.9815407743282694e-06,
"loss": 0.1906,
"step": 1244
},
{
"epoch": 0.8749121574139143,
"grad_norm": 0.5115140958509385,
"learning_rate": 5.9761282583451906e-06,
"loss": 0.177,
"step": 1245
},
{
"epoch": 0.8756148981026002,
"grad_norm": 0.48290458027970495,
"learning_rate": 5.970714552937843e-06,
"loss": 0.1514,
"step": 1246
},
{
"epoch": 0.876317638791286,
"grad_norm": 0.49640574921131947,
"learning_rate": 5.965299664702896e-06,
"loss": 0.1855,
"step": 1247
},
{
"epoch": 0.8770203794799719,
"grad_norm": 0.5044371470192852,
"learning_rate": 5.959883600238452e-06,
"loss": 0.1736,
"step": 1248
},
{
"epoch": 0.8777231201686577,
"grad_norm": 0.505160522141664,
"learning_rate": 5.954466366144057e-06,
"loss": 0.1972,
"step": 1249
},
{
"epoch": 0.8784258608573436,
"grad_norm": 0.523945910921815,
"learning_rate": 5.949047969020676e-06,
"loss": 0.189,
"step": 1250
},
{
"epoch": 0.8791286015460296,
"grad_norm": 0.5256872085388279,
"learning_rate": 5.94362841547069e-06,
"loss": 0.1863,
"step": 1251
},
{
"epoch": 0.8798313422347154,
"grad_norm": 0.5506874839118318,
"learning_rate": 5.938207712097895e-06,
"loss": 0.1999,
"step": 1252
},
{
"epoch": 0.8805340829234013,
"grad_norm": 0.5282521300164007,
"learning_rate": 5.932785865507482e-06,
"loss": 0.1969,
"step": 1253
},
{
"epoch": 0.8812368236120871,
"grad_norm": 0.5153960622956695,
"learning_rate": 5.927362882306039e-06,
"loss": 0.2015,
"step": 1254
},
{
"epoch": 0.881939564300773,
"grad_norm": 0.531206524060467,
"learning_rate": 5.9219387691015376e-06,
"loss": 0.1882,
"step": 1255
},
{
"epoch": 0.8826423049894588,
"grad_norm": 0.4816995124378558,
"learning_rate": 5.916513532503325e-06,
"loss": 0.1634,
"step": 1256
},
{
"epoch": 0.8833450456781448,
"grad_norm": 0.5177569025093481,
"learning_rate": 5.911087179122121e-06,
"loss": 0.1966,
"step": 1257
},
{
"epoch": 0.8840477863668307,
"grad_norm": 0.48498359527901896,
"learning_rate": 5.90565971557e-06,
"loss": 0.1643,
"step": 1258
},
{
"epoch": 0.8847505270555165,
"grad_norm": 0.5115850555415963,
"learning_rate": 5.900231148460398e-06,
"loss": 0.1789,
"step": 1259
},
{
"epoch": 0.8854532677442024,
"grad_norm": 0.491914234210044,
"learning_rate": 5.894801484408086e-06,
"loss": 0.1815,
"step": 1260
},
{
"epoch": 0.8861560084328882,
"grad_norm": 0.5013558086686257,
"learning_rate": 5.8893707300291805e-06,
"loss": 0.1774,
"step": 1261
},
{
"epoch": 0.8868587491215741,
"grad_norm": 0.5071700374713674,
"learning_rate": 5.883938891941117e-06,
"loss": 0.1888,
"step": 1262
},
{
"epoch": 0.8875614898102601,
"grad_norm": 0.5049441962524359,
"learning_rate": 5.878505976762664e-06,
"loss": 0.1755,
"step": 1263
},
{
"epoch": 0.8882642304989459,
"grad_norm": 0.5065049785488899,
"learning_rate": 5.873071991113889e-06,
"loss": 0.1658,
"step": 1264
},
{
"epoch": 0.8889669711876318,
"grad_norm": 0.5434003669210763,
"learning_rate": 5.867636941616174e-06,
"loss": 0.2053,
"step": 1265
},
{
"epoch": 0.8896697118763176,
"grad_norm": 0.48316536570276314,
"learning_rate": 5.862200834892192e-06,
"loss": 0.1805,
"step": 1266
},
{
"epoch": 0.8903724525650035,
"grad_norm": 0.4805591607424035,
"learning_rate": 5.856763677565905e-06,
"loss": 0.1681,
"step": 1267
},
{
"epoch": 0.8910751932536893,
"grad_norm": 0.4987631420062705,
"learning_rate": 5.851325476262558e-06,
"loss": 0.1849,
"step": 1268
},
{
"epoch": 0.8917779339423753,
"grad_norm": 0.5229631899808861,
"learning_rate": 5.845886237608665e-06,
"loss": 0.1917,
"step": 1269
},
{
"epoch": 0.8924806746310612,
"grad_norm": 0.4789523347189992,
"learning_rate": 5.840445968232005e-06,
"loss": 0.1613,
"step": 1270
},
{
"epoch": 0.893183415319747,
"grad_norm": 0.47123822162637413,
"learning_rate": 5.8350046747616154e-06,
"loss": 0.1648,
"step": 1271
},
{
"epoch": 0.8938861560084329,
"grad_norm": 0.4945508158698064,
"learning_rate": 5.829562363827773e-06,
"loss": 0.1777,
"step": 1272
},
{
"epoch": 0.8945888966971187,
"grad_norm": 0.5229050810270796,
"learning_rate": 5.824119042062007e-06,
"loss": 0.19,
"step": 1273
},
{
"epoch": 0.8952916373858046,
"grad_norm": 0.5176570875009562,
"learning_rate": 5.818674716097068e-06,
"loss": 0.1994,
"step": 1274
},
{
"epoch": 0.8959943780744906,
"grad_norm": 0.5185724352450841,
"learning_rate": 5.813229392566937e-06,
"loss": 0.173,
"step": 1275
},
{
"epoch": 0.8966971187631764,
"grad_norm": 0.5188312046851375,
"learning_rate": 5.8077830781068044e-06,
"loss": 0.1827,
"step": 1276
},
{
"epoch": 0.8973998594518623,
"grad_norm": 0.4994378916272213,
"learning_rate": 5.802335779353074e-06,
"loss": 0.1834,
"step": 1277
},
{
"epoch": 0.8981026001405481,
"grad_norm": 0.5053930448321152,
"learning_rate": 5.796887502943343e-06,
"loss": 0.191,
"step": 1278
},
{
"epoch": 0.898805340829234,
"grad_norm": 0.5306229359834849,
"learning_rate": 5.791438255516407e-06,
"loss": 0.2269,
"step": 1279
},
{
"epoch": 0.8995080815179198,
"grad_norm": 0.5399478136370445,
"learning_rate": 5.785988043712239e-06,
"loss": 0.2189,
"step": 1280
},
{
"epoch": 0.9002108222066058,
"grad_norm": 0.5061614097634163,
"learning_rate": 5.780536874171987e-06,
"loss": 0.1837,
"step": 1281
},
{
"epoch": 0.9009135628952917,
"grad_norm": 0.5277378891669225,
"learning_rate": 5.775084753537969e-06,
"loss": 0.1813,
"step": 1282
},
{
"epoch": 0.9016163035839775,
"grad_norm": 0.5085817730767297,
"learning_rate": 5.769631688453666e-06,
"loss": 0.187,
"step": 1283
},
{
"epoch": 0.9023190442726634,
"grad_norm": 0.5068441155995959,
"learning_rate": 5.764177685563698e-06,
"loss": 0.1834,
"step": 1284
},
{
"epoch": 0.9030217849613492,
"grad_norm": 0.5205665157263202,
"learning_rate": 5.758722751513838e-06,
"loss": 0.2025,
"step": 1285
},
{
"epoch": 0.9037245256500351,
"grad_norm": 0.4964882488251083,
"learning_rate": 5.753266892950989e-06,
"loss": 0.1765,
"step": 1286
},
{
"epoch": 0.9044272663387211,
"grad_norm": 0.49957597445619084,
"learning_rate": 5.74781011652318e-06,
"loss": 0.1626,
"step": 1287
},
{
"epoch": 0.9051300070274069,
"grad_norm": 0.5454163588824682,
"learning_rate": 5.742352428879565e-06,
"loss": 0.1625,
"step": 1288
},
{
"epoch": 0.9058327477160928,
"grad_norm": 0.49019948841483496,
"learning_rate": 5.736893836670399e-06,
"loss": 0.1817,
"step": 1289
},
{
"epoch": 0.9065354884047786,
"grad_norm": 0.540938094603087,
"learning_rate": 5.731434346547045e-06,
"loss": 0.1903,
"step": 1290
},
{
"epoch": 0.9072382290934645,
"grad_norm": 0.5185744975996959,
"learning_rate": 5.72597396516196e-06,
"loss": 0.1759,
"step": 1291
},
{
"epoch": 0.9079409697821503,
"grad_norm": 0.5460557766702974,
"learning_rate": 5.7205126991686825e-06,
"loss": 0.2089,
"step": 1292
},
{
"epoch": 0.9086437104708363,
"grad_norm": 0.5261555481195017,
"learning_rate": 5.7150505552218346e-06,
"loss": 0.1937,
"step": 1293
},
{
"epoch": 0.9093464511595222,
"grad_norm": 0.4967183113214995,
"learning_rate": 5.709587539977105e-06,
"loss": 0.1907,
"step": 1294
},
{
"epoch": 0.910049191848208,
"grad_norm": 0.5016407484137889,
"learning_rate": 5.7041236600912475e-06,
"loss": 0.1844,
"step": 1295
},
{
"epoch": 0.9107519325368939,
"grad_norm": 0.5475144668884032,
"learning_rate": 5.698658922222062e-06,
"loss": 0.1893,
"step": 1296
},
{
"epoch": 0.9114546732255797,
"grad_norm": 0.4888372655496957,
"learning_rate": 5.693193333028404e-06,
"loss": 0.1668,
"step": 1297
},
{
"epoch": 0.9121574139142656,
"grad_norm": 0.5136792617691439,
"learning_rate": 5.687726899170155e-06,
"loss": 0.1795,
"step": 1298
},
{
"epoch": 0.9128601546029516,
"grad_norm": 0.49755317409906136,
"learning_rate": 5.682259627308238e-06,
"loss": 0.1698,
"step": 1299
},
{
"epoch": 0.9135628952916374,
"grad_norm": 0.48426769502045947,
"learning_rate": 5.6767915241045855e-06,
"loss": 0.1619,
"step": 1300
},
{
"epoch": 0.9142656359803233,
"grad_norm": 0.4925450873469613,
"learning_rate": 5.671322596222153e-06,
"loss": 0.168,
"step": 1301
},
{
"epoch": 0.9149683766690091,
"grad_norm": 0.45271196582377093,
"learning_rate": 5.665852850324893e-06,
"loss": 0.1427,
"step": 1302
},
{
"epoch": 0.915671117357695,
"grad_norm": 0.5331015370489443,
"learning_rate": 5.660382293077759e-06,
"loss": 0.2079,
"step": 1303
},
{
"epoch": 0.9163738580463809,
"grad_norm": 0.4987644474302223,
"learning_rate": 5.654910931146692e-06,
"loss": 0.1821,
"step": 1304
},
{
"epoch": 0.9170765987350668,
"grad_norm": 0.5042023902121021,
"learning_rate": 5.649438771198616e-06,
"loss": 0.1912,
"step": 1305
},
{
"epoch": 0.9177793394237527,
"grad_norm": 0.48973082504550525,
"learning_rate": 5.64396581990142e-06,
"loss": 0.1928,
"step": 1306
},
{
"epoch": 0.9184820801124385,
"grad_norm": 0.48831989879878884,
"learning_rate": 5.638492083923969e-06,
"loss": 0.1846,
"step": 1307
},
{
"epoch": 0.9191848208011244,
"grad_norm": 0.5016664170305484,
"learning_rate": 5.633017569936071e-06,
"loss": 0.1965,
"step": 1308
},
{
"epoch": 0.9198875614898102,
"grad_norm": 0.5166324839960242,
"learning_rate": 5.6275422846084945e-06,
"loss": 0.1909,
"step": 1309
},
{
"epoch": 0.9205903021784961,
"grad_norm": 0.4990652621475701,
"learning_rate": 5.622066234612936e-06,
"loss": 0.1647,
"step": 1310
},
{
"epoch": 0.921293042867182,
"grad_norm": 0.5279858641913935,
"learning_rate": 5.616589426622033e-06,
"loss": 0.1785,
"step": 1311
},
{
"epoch": 0.9219957835558679,
"grad_norm": 0.5028120525677001,
"learning_rate": 5.611111867309344e-06,
"loss": 0.1566,
"step": 1312
},
{
"epoch": 0.9226985242445538,
"grad_norm": 0.49496209816045444,
"learning_rate": 5.605633563349341e-06,
"loss": 0.1878,
"step": 1313
},
{
"epoch": 0.9234012649332396,
"grad_norm": 0.5103355627273767,
"learning_rate": 5.600154521417405e-06,
"loss": 0.1915,
"step": 1314
},
{
"epoch": 0.9241040056219255,
"grad_norm": 0.5083064986304011,
"learning_rate": 5.5946747481898144e-06,
"loss": 0.173,
"step": 1315
},
{
"epoch": 0.9248067463106114,
"grad_norm": 0.5443980894311817,
"learning_rate": 5.589194250343741e-06,
"loss": 0.217,
"step": 1316
},
{
"epoch": 0.9255094869992972,
"grad_norm": 0.5163177476037929,
"learning_rate": 5.583713034557241e-06,
"loss": 0.1911,
"step": 1317
},
{
"epoch": 0.9262122276879832,
"grad_norm": 0.4837536735411591,
"learning_rate": 5.57823110750924e-06,
"loss": 0.1741,
"step": 1318
},
{
"epoch": 0.926914968376669,
"grad_norm": 0.4874969026198589,
"learning_rate": 5.572748475879536e-06,
"loss": 0.1813,
"step": 1319
},
{
"epoch": 0.9276177090653549,
"grad_norm": 0.49763208093324596,
"learning_rate": 5.567265146348779e-06,
"loss": 0.1764,
"step": 1320
},
{
"epoch": 0.9283204497540407,
"grad_norm": 0.5078828461218955,
"learning_rate": 5.561781125598479e-06,
"loss": 0.1836,
"step": 1321
},
{
"epoch": 0.9290231904427266,
"grad_norm": 0.4715355993383816,
"learning_rate": 5.556296420310977e-06,
"loss": 0.1639,
"step": 1322
},
{
"epoch": 0.9297259311314126,
"grad_norm": 0.4697787642937931,
"learning_rate": 5.550811037169457e-06,
"loss": 0.1588,
"step": 1323
},
{
"epoch": 0.9304286718200984,
"grad_norm": 0.5053902581080298,
"learning_rate": 5.545324982857926e-06,
"loss": 0.1963,
"step": 1324
},
{
"epoch": 0.9311314125087843,
"grad_norm": 0.499350933270986,
"learning_rate": 5.539838264061207e-06,
"loss": 0.187,
"step": 1325
},
{
"epoch": 0.9318341531974701,
"grad_norm": 0.4855473339463826,
"learning_rate": 5.534350887464934e-06,
"loss": 0.1804,
"step": 1326
},
{
"epoch": 0.932536893886156,
"grad_norm": 0.4931168918656159,
"learning_rate": 5.528862859755545e-06,
"loss": 0.1738,
"step": 1327
},
{
"epoch": 0.9332396345748419,
"grad_norm": 0.516534148217859,
"learning_rate": 5.523374187620266e-06,
"loss": 0.1911,
"step": 1328
},
{
"epoch": 0.9339423752635277,
"grad_norm": 0.5010990203093634,
"learning_rate": 5.517884877747116e-06,
"loss": 0.1954,
"step": 1329
},
{
"epoch": 0.9346451159522137,
"grad_norm": 0.49888090289331416,
"learning_rate": 5.512394936824881e-06,
"loss": 0.1669,
"step": 1330
},
{
"epoch": 0.9353478566408995,
"grad_norm": 0.4942647057814772,
"learning_rate": 5.506904371543126e-06,
"loss": 0.1706,
"step": 1331
},
{
"epoch": 0.9360505973295854,
"grad_norm": 0.4930304560138631,
"learning_rate": 5.501413188592167e-06,
"loss": 0.16,
"step": 1332
},
{
"epoch": 0.9367533380182712,
"grad_norm": 0.5202336246837628,
"learning_rate": 5.495921394663085e-06,
"loss": 0.1956,
"step": 1333
},
{
"epoch": 0.9374560787069571,
"grad_norm": 0.5195462073922367,
"learning_rate": 5.4904289964476905e-06,
"loss": 0.1796,
"step": 1334
},
{
"epoch": 0.938158819395643,
"grad_norm": 0.5278613831428796,
"learning_rate": 5.484936000638546e-06,
"loss": 0.1848,
"step": 1335
},
{
"epoch": 0.9388615600843289,
"grad_norm": 0.5387221558941402,
"learning_rate": 5.479442413928927e-06,
"loss": 0.1948,
"step": 1336
},
{
"epoch": 0.9395643007730148,
"grad_norm": 0.5292981166694255,
"learning_rate": 5.473948243012842e-06,
"loss": 0.1976,
"step": 1337
},
{
"epoch": 0.9402670414617006,
"grad_norm": 0.49095341165304357,
"learning_rate": 5.468453494585002e-06,
"loss": 0.1936,
"step": 1338
},
{
"epoch": 0.9409697821503865,
"grad_norm": 0.476137761784659,
"learning_rate": 5.462958175340828e-06,
"loss": 0.1732,
"step": 1339
},
{
"epoch": 0.9416725228390724,
"grad_norm": 0.5035071373800364,
"learning_rate": 5.457462291976432e-06,
"loss": 0.1693,
"step": 1340
},
{
"epoch": 0.9423752635277582,
"grad_norm": 0.4927374677495356,
"learning_rate": 5.451965851188618e-06,
"loss": 0.173,
"step": 1341
},
{
"epoch": 0.9430780042164442,
"grad_norm": 0.521962512270835,
"learning_rate": 5.446468859674862e-06,
"loss": 0.1893,
"step": 1342
},
{
"epoch": 0.94378074490513,
"grad_norm": 0.488287317097337,
"learning_rate": 5.440971324133322e-06,
"loss": 0.1566,
"step": 1343
},
{
"epoch": 0.9444834855938159,
"grad_norm": 0.49933441776873916,
"learning_rate": 5.435473251262805e-06,
"loss": 0.1941,
"step": 1344
},
{
"epoch": 0.9451862262825017,
"grad_norm": 0.5150937905899252,
"learning_rate": 5.429974647762788e-06,
"loss": 0.1849,
"step": 1345
},
{
"epoch": 0.9458889669711876,
"grad_norm": 0.4937729920011368,
"learning_rate": 5.424475520333381e-06,
"loss": 0.1953,
"step": 1346
},
{
"epoch": 0.9465917076598735,
"grad_norm": 0.4979160581197338,
"learning_rate": 5.418975875675341e-06,
"loss": 0.15,
"step": 1347
},
{
"epoch": 0.9472944483485594,
"grad_norm": 0.49572714712562493,
"learning_rate": 5.4134757204900525e-06,
"loss": 0.171,
"step": 1348
},
{
"epoch": 0.9479971890372453,
"grad_norm": 0.5017033812754259,
"learning_rate": 5.407975061479521e-06,
"loss": 0.1685,
"step": 1349
},
{
"epoch": 0.9486999297259311,
"grad_norm": 0.5232483398771414,
"learning_rate": 5.402473905346368e-06,
"loss": 0.1924,
"step": 1350
},
{
"epoch": 0.949402670414617,
"grad_norm": 0.49099314208156364,
"learning_rate": 5.39697225879382e-06,
"loss": 0.167,
"step": 1351
},
{
"epoch": 0.9501054111033029,
"grad_norm": 0.5290685851917885,
"learning_rate": 5.3914701285257e-06,
"loss": 0.1756,
"step": 1352
},
{
"epoch": 0.9508081517919887,
"grad_norm": 0.5369121617826327,
"learning_rate": 5.385967521246422e-06,
"loss": 0.2139,
"step": 1353
},
{
"epoch": 0.9515108924806747,
"grad_norm": 0.5134430449452313,
"learning_rate": 5.38046444366098e-06,
"loss": 0.1793,
"step": 1354
},
{
"epoch": 0.9522136331693605,
"grad_norm": 0.4831689948808722,
"learning_rate": 5.3749609024749424e-06,
"loss": 0.1693,
"step": 1355
},
{
"epoch": 0.9529163738580464,
"grad_norm": 0.4887397506632592,
"learning_rate": 5.36945690439444e-06,
"loss": 0.1601,
"step": 1356
},
{
"epoch": 0.9536191145467322,
"grad_norm": 0.5372445202703667,
"learning_rate": 5.363952456126165e-06,
"loss": 0.2015,
"step": 1357
},
{
"epoch": 0.9543218552354181,
"grad_norm": 0.4850031506109228,
"learning_rate": 5.358447564377352e-06,
"loss": 0.1755,
"step": 1358
},
{
"epoch": 0.955024595924104,
"grad_norm": 0.5027528352188118,
"learning_rate": 5.35294223585578e-06,
"loss": 0.1558,
"step": 1359
},
{
"epoch": 0.9557273366127899,
"grad_norm": 0.4931882801165771,
"learning_rate": 5.34743647726976e-06,
"loss": 0.1761,
"step": 1360
},
{
"epoch": 0.9564300773014758,
"grad_norm": 0.5588951139536696,
"learning_rate": 5.341930295328129e-06,
"loss": 0.2131,
"step": 1361
},
{
"epoch": 0.9571328179901616,
"grad_norm": 0.5529601552521203,
"learning_rate": 5.336423696740233e-06,
"loss": 0.226,
"step": 1362
},
{
"epoch": 0.9578355586788475,
"grad_norm": 0.47333211007445464,
"learning_rate": 5.330916688215931e-06,
"loss": 0.1452,
"step": 1363
},
{
"epoch": 0.9585382993675334,
"grad_norm": 0.47566193526005807,
"learning_rate": 5.325409276465581e-06,
"loss": 0.1555,
"step": 1364
},
{
"epoch": 0.9592410400562192,
"grad_norm": 0.4914481726281524,
"learning_rate": 5.319901468200034e-06,
"loss": 0.1744,
"step": 1365
},
{
"epoch": 0.9599437807449052,
"grad_norm": 0.49224736330749624,
"learning_rate": 5.314393270130617e-06,
"loss": 0.181,
"step": 1366
},
{
"epoch": 0.960646521433591,
"grad_norm": 0.4858591625950129,
"learning_rate": 5.308884688969145e-06,
"loss": 0.1764,
"step": 1367
},
{
"epoch": 0.9613492621222769,
"grad_norm": 0.479577498442296,
"learning_rate": 5.303375731427882e-06,
"loss": 0.1695,
"step": 1368
},
{
"epoch": 0.9620520028109627,
"grad_norm": 0.4825228825413551,
"learning_rate": 5.297866404219569e-06,
"loss": 0.167,
"step": 1369
},
{
"epoch": 0.9627547434996486,
"grad_norm": 0.49773073193567335,
"learning_rate": 5.292356714057382e-06,
"loss": 0.1782,
"step": 1370
},
{
"epoch": 0.9634574841883345,
"grad_norm": 0.465706124969307,
"learning_rate": 5.28684666765495e-06,
"loss": 0.1429,
"step": 1371
},
{
"epoch": 0.9641602248770204,
"grad_norm": 0.48475587233272616,
"learning_rate": 5.281336271726333e-06,
"loss": 0.1693,
"step": 1372
},
{
"epoch": 0.9648629655657063,
"grad_norm": 0.5002876235055297,
"learning_rate": 5.275825532986013e-06,
"loss": 0.1698,
"step": 1373
},
{
"epoch": 0.9655657062543921,
"grad_norm": 0.4751205576638796,
"learning_rate": 5.270314458148896e-06,
"loss": 0.1453,
"step": 1374
},
{
"epoch": 0.966268446943078,
"grad_norm": 0.5111002061348704,
"learning_rate": 5.2648030539302894e-06,
"loss": 0.1956,
"step": 1375
},
{
"epoch": 0.9669711876317639,
"grad_norm": 0.5287868754504749,
"learning_rate": 5.259291327045912e-06,
"loss": 0.2098,
"step": 1376
},
{
"epoch": 0.9676739283204497,
"grad_norm": 0.5238439498113354,
"learning_rate": 5.2537792842118694e-06,
"loss": 0.165,
"step": 1377
},
{
"epoch": 0.9683766690091357,
"grad_norm": 0.4838233344953546,
"learning_rate": 5.248266932144652e-06,
"loss": 0.1552,
"step": 1378
},
{
"epoch": 0.9690794096978215,
"grad_norm": 0.49589259249472895,
"learning_rate": 5.2427542775611314e-06,
"loss": 0.1573,
"step": 1379
},
{
"epoch": 0.9697821503865074,
"grad_norm": 0.5121936042194623,
"learning_rate": 5.23724132717854e-06,
"loss": 0.18,
"step": 1380
},
{
"epoch": 0.9704848910751932,
"grad_norm": 0.5539715989420239,
"learning_rate": 5.231728087714482e-06,
"loss": 0.1855,
"step": 1381
},
{
"epoch": 0.9711876317638791,
"grad_norm": 0.5349531279287615,
"learning_rate": 5.2262145658869005e-06,
"loss": 0.1865,
"step": 1382
},
{
"epoch": 0.971890372452565,
"grad_norm": 0.510639043585128,
"learning_rate": 5.220700768414094e-06,
"loss": 0.1944,
"step": 1383
},
{
"epoch": 0.9725931131412509,
"grad_norm": 0.5088479448113863,
"learning_rate": 5.215186702014692e-06,
"loss": 0.1922,
"step": 1384
},
{
"epoch": 0.9732958538299368,
"grad_norm": 0.4796693735802863,
"learning_rate": 5.209672373407651e-06,
"loss": 0.1762,
"step": 1385
},
{
"epoch": 0.9739985945186226,
"grad_norm": 0.499545197442791,
"learning_rate": 5.204157789312248e-06,
"loss": 0.1768,
"step": 1386
},
{
"epoch": 0.9747013352073085,
"grad_norm": 0.5126699251503342,
"learning_rate": 5.198642956448072e-06,
"loss": 0.1783,
"step": 1387
},
{
"epoch": 0.9754040758959944,
"grad_norm": 0.5145385299519166,
"learning_rate": 5.193127881535015e-06,
"loss": 0.1928,
"step": 1388
},
{
"epoch": 0.9761068165846802,
"grad_norm": 0.528813426196794,
"learning_rate": 5.187612571293263e-06,
"loss": 0.1877,
"step": 1389
},
{
"epoch": 0.9768095572733662,
"grad_norm": 0.5099979736626467,
"learning_rate": 5.182097032443288e-06,
"loss": 0.1883,
"step": 1390
},
{
"epoch": 0.977512297962052,
"grad_norm": 0.4523284780455419,
"learning_rate": 5.176581271705845e-06,
"loss": 0.1478,
"step": 1391
},
{
"epoch": 0.9782150386507379,
"grad_norm": 0.4932276047724686,
"learning_rate": 5.1710652958019525e-06,
"loss": 0.1593,
"step": 1392
},
{
"epoch": 0.9789177793394237,
"grad_norm": 0.4796811572296884,
"learning_rate": 5.165549111452899e-06,
"loss": 0.1688,
"step": 1393
},
{
"epoch": 0.9796205200281096,
"grad_norm": 0.5034515653870583,
"learning_rate": 5.1600327253802184e-06,
"loss": 0.1689,
"step": 1394
},
{
"epoch": 0.9803232607167955,
"grad_norm": 0.502198923761688,
"learning_rate": 5.154516144305698e-06,
"loss": 0.1644,
"step": 1395
},
{
"epoch": 0.9810260014054814,
"grad_norm": 0.4602068868601718,
"learning_rate": 5.1489993749513576e-06,
"loss": 0.1544,
"step": 1396
},
{
"epoch": 0.9817287420941673,
"grad_norm": 0.45533835598534217,
"learning_rate": 5.1434824240394494e-06,
"loss": 0.1469,
"step": 1397
},
{
"epoch": 0.9824314827828531,
"grad_norm": 0.45955715449833245,
"learning_rate": 5.1379652982924465e-06,
"loss": 0.1431,
"step": 1398
},
{
"epoch": 0.983134223471539,
"grad_norm": 0.5246077121318076,
"learning_rate": 5.132448004433034e-06,
"loss": 0.1908,
"step": 1399
},
{
"epoch": 0.9838369641602249,
"grad_norm": 0.5029012585391444,
"learning_rate": 5.1269305491841015e-06,
"loss": 0.1898,
"step": 1400
},
{
"epoch": 0.9845397048489107,
"grad_norm": 0.5155559300537573,
"learning_rate": 5.121412939268736e-06,
"loss": 0.1903,
"step": 1401
},
{
"epoch": 0.9852424455375967,
"grad_norm": 0.5167390892548627,
"learning_rate": 5.115895181410213e-06,
"loss": 0.1957,
"step": 1402
},
{
"epoch": 0.9859451862262825,
"grad_norm": 0.503291943738301,
"learning_rate": 5.110377282331988e-06,
"loss": 0.1616,
"step": 1403
},
{
"epoch": 0.9866479269149684,
"grad_norm": 0.5172792729955873,
"learning_rate": 5.10485924875769e-06,
"loss": 0.1648,
"step": 1404
},
{
"epoch": 0.9873506676036542,
"grad_norm": 0.5288947212309869,
"learning_rate": 5.09934108741111e-06,
"loss": 0.1947,
"step": 1405
},
{
"epoch": 0.9880534082923401,
"grad_norm": 0.4536753280886982,
"learning_rate": 5.093822805016194e-06,
"loss": 0.1476,
"step": 1406
},
{
"epoch": 0.988756148981026,
"grad_norm": 0.5458840167868061,
"learning_rate": 5.088304408297039e-06,
"loss": 0.1804,
"step": 1407
},
{
"epoch": 0.9894588896697118,
"grad_norm": 0.4968761929170356,
"learning_rate": 5.0827859039778784e-06,
"loss": 0.1691,
"step": 1408
},
{
"epoch": 0.9901616303583978,
"grad_norm": 0.47922207982218623,
"learning_rate": 5.077267298783077e-06,
"loss": 0.1672,
"step": 1409
},
{
"epoch": 0.9908643710470836,
"grad_norm": 0.5081431828705689,
"learning_rate": 5.071748599437124e-06,
"loss": 0.1886,
"step": 1410
},
{
"epoch": 0.9915671117357695,
"grad_norm": 0.5131479650403806,
"learning_rate": 5.066229812664621e-06,
"loss": 0.1718,
"step": 1411
},
{
"epoch": 0.9922698524244554,
"grad_norm": 0.540035551569854,
"learning_rate": 5.060710945190278e-06,
"loss": 0.1971,
"step": 1412
},
{
"epoch": 0.9929725931131412,
"grad_norm": 0.4803244137760998,
"learning_rate": 5.0551920037389035e-06,
"loss": 0.1553,
"step": 1413
},
{
"epoch": 0.9936753338018272,
"grad_norm": 0.4899909375914942,
"learning_rate": 5.049672995035394e-06,
"loss": 0.1808,
"step": 1414
},
{
"epoch": 0.994378074490513,
"grad_norm": 0.49275152076452683,
"learning_rate": 5.04415392580473e-06,
"loss": 0.1679,
"step": 1415
},
{
"epoch": 0.9950808151791989,
"grad_norm": 0.5129171105956463,
"learning_rate": 5.038634802771966e-06,
"loss": 0.2091,
"step": 1416
},
{
"epoch": 0.9957835558678847,
"grad_norm": 0.5185735469093622,
"learning_rate": 5.03311563266222e-06,
"loss": 0.199,
"step": 1417
},
{
"epoch": 0.9964862965565706,
"grad_norm": 0.520279570499453,
"learning_rate": 5.027596422200668e-06,
"loss": 0.1969,
"step": 1418
},
{
"epoch": 0.9971890372452565,
"grad_norm": 0.5171439479650721,
"learning_rate": 5.022077178112537e-06,
"loss": 0.1888,
"step": 1419
},
{
"epoch": 0.9978917779339423,
"grad_norm": 0.4990256654919666,
"learning_rate": 5.016557907123095e-06,
"loss": 0.161,
"step": 1420
},
{
"epoch": 0.9985945186226283,
"grad_norm": 0.48386951524477756,
"learning_rate": 5.011038615957639e-06,
"loss": 0.1714,
"step": 1421
},
{
"epoch": 0.9992972593113141,
"grad_norm": 0.5210856503064014,
"learning_rate": 5.005519311341495e-06,
"loss": 0.1874,
"step": 1422
},
{
"epoch": 1.0,
"grad_norm": 0.48163350666059024,
"learning_rate": 5e-06,
"loss": 0.1658,
"step": 1423
},
{
"epoch": 1.000702740688686,
"grad_norm": 0.5224393577574205,
"learning_rate": 4.994480688658508e-06,
"loss": 0.1568,
"step": 1424
},
{
"epoch": 1.0014054813773718,
"grad_norm": 0.5167212944296797,
"learning_rate": 4.9889613840423615e-06,
"loss": 0.1565,
"step": 1425
},
{
"epoch": 1.0021082220660575,
"grad_norm": 0.5083112192863517,
"learning_rate": 4.983442092876906e-06,
"loss": 0.1627,
"step": 1426
},
{
"epoch": 1.0028109627547435,
"grad_norm": 0.5178461118513241,
"learning_rate": 4.977922821887463e-06,
"loss": 0.1774,
"step": 1427
},
{
"epoch": 1.0035137034434294,
"grad_norm": 0.466141334877386,
"learning_rate": 4.972403577799334e-06,
"loss": 0.136,
"step": 1428
},
{
"epoch": 1.0042164441321153,
"grad_norm": 0.5074856152966533,
"learning_rate": 4.966884367337781e-06,
"loss": 0.145,
"step": 1429
},
{
"epoch": 1.0049191848208012,
"grad_norm": 0.4766089411473581,
"learning_rate": 4.961365197228035e-06,
"loss": 0.1368,
"step": 1430
},
{
"epoch": 1.005621925509487,
"grad_norm": 0.541711677183431,
"learning_rate": 4.9558460741952725e-06,
"loss": 0.1654,
"step": 1431
},
{
"epoch": 1.0063246661981728,
"grad_norm": 0.5468030419613309,
"learning_rate": 4.950327004964607e-06,
"loss": 0.1661,
"step": 1432
},
{
"epoch": 1.0070274068868588,
"grad_norm": 0.604212676464957,
"learning_rate": 4.944807996261098e-06,
"loss": 0.1553,
"step": 1433
},
{
"epoch": 1.0077301475755447,
"grad_norm": 0.5559877236162696,
"learning_rate": 4.9392890548097235e-06,
"loss": 0.1261,
"step": 1434
},
{
"epoch": 1.0084328882642306,
"grad_norm": 0.5878546627004245,
"learning_rate": 4.93377018733538e-06,
"loss": 0.1814,
"step": 1435
},
{
"epoch": 1.0091356289529163,
"grad_norm": 0.5440844459813221,
"learning_rate": 4.928251400562878e-06,
"loss": 0.1532,
"step": 1436
},
{
"epoch": 1.0098383696416022,
"grad_norm": 0.5351616063352487,
"learning_rate": 4.922732701216924e-06,
"loss": 0.1479,
"step": 1437
},
{
"epoch": 1.0105411103302882,
"grad_norm": 0.5301467629550286,
"learning_rate": 4.917214096022123e-06,
"loss": 0.1466,
"step": 1438
},
{
"epoch": 1.011243851018974,
"grad_norm": 0.5678252044217474,
"learning_rate": 4.911695591702962e-06,
"loss": 0.168,
"step": 1439
},
{
"epoch": 1.0119465917076598,
"grad_norm": 0.5172947465808124,
"learning_rate": 4.906177194983807e-06,
"loss": 0.1393,
"step": 1440
},
{
"epoch": 1.0126493323963457,
"grad_norm": 0.47596293325834516,
"learning_rate": 4.9006589125888924e-06,
"loss": 0.1296,
"step": 1441
},
{
"epoch": 1.0133520730850316,
"grad_norm": 0.5197667557365847,
"learning_rate": 4.8951407512423125e-06,
"loss": 0.1613,
"step": 1442
},
{
"epoch": 1.0140548137737175,
"grad_norm": 0.5140905511446842,
"learning_rate": 4.889622717668012e-06,
"loss": 0.1596,
"step": 1443
},
{
"epoch": 1.0147575544624035,
"grad_norm": 0.491748434545197,
"learning_rate": 4.884104818589788e-06,
"loss": 0.1389,
"step": 1444
},
{
"epoch": 1.0154602951510892,
"grad_norm": 0.5352629126378262,
"learning_rate": 4.878587060731267e-06,
"loss": 0.1609,
"step": 1445
},
{
"epoch": 1.016163035839775,
"grad_norm": 0.5174976258751066,
"learning_rate": 4.8730694508159e-06,
"loss": 0.1468,
"step": 1446
},
{
"epoch": 1.016865776528461,
"grad_norm": 0.5101606514926836,
"learning_rate": 4.867551995566968e-06,
"loss": 0.1563,
"step": 1447
},
{
"epoch": 1.017568517217147,
"grad_norm": 0.49989777003227814,
"learning_rate": 4.862034701707554e-06,
"loss": 0.1393,
"step": 1448
},
{
"epoch": 1.0182712579058328,
"grad_norm": 0.5097054061097768,
"learning_rate": 4.8565175759605505e-06,
"loss": 0.1459,
"step": 1449
},
{
"epoch": 1.0189739985945185,
"grad_norm": 0.534062454314789,
"learning_rate": 4.851000625048643e-06,
"loss": 0.1525,
"step": 1450
},
{
"epoch": 1.0196767392832045,
"grad_norm": 0.49640571814198436,
"learning_rate": 4.845483855694304e-06,
"loss": 0.1246,
"step": 1451
},
{
"epoch": 1.0203794799718904,
"grad_norm": 0.49400528414597805,
"learning_rate": 4.839967274619783e-06,
"loss": 0.144,
"step": 1452
},
{
"epoch": 1.0210822206605763,
"grad_norm": 0.5750467346465727,
"learning_rate": 4.834450888547103e-06,
"loss": 0.1769,
"step": 1453
},
{
"epoch": 1.0217849613492622,
"grad_norm": 0.5415534666265035,
"learning_rate": 4.8289347041980475e-06,
"loss": 0.1671,
"step": 1454
},
{
"epoch": 1.022487702037948,
"grad_norm": 0.5283731167275352,
"learning_rate": 4.823418728294157e-06,
"loss": 0.1304,
"step": 1455
},
{
"epoch": 1.0231904427266338,
"grad_norm": 0.5163103493159343,
"learning_rate": 4.817902967556714e-06,
"loss": 0.1474,
"step": 1456
},
{
"epoch": 1.0238931834153198,
"grad_norm": 0.5181379014674464,
"learning_rate": 4.8123874287067385e-06,
"loss": 0.1528,
"step": 1457
},
{
"epoch": 1.0245959241040057,
"grad_norm": 0.546691491685927,
"learning_rate": 4.806872118464987e-06,
"loss": 0.1701,
"step": 1458
},
{
"epoch": 1.0252986647926916,
"grad_norm": 0.5368233255973394,
"learning_rate": 4.801357043551928e-06,
"loss": 0.157,
"step": 1459
},
{
"epoch": 1.0260014054813773,
"grad_norm": 0.49807381807404494,
"learning_rate": 4.795842210687754e-06,
"loss": 0.1606,
"step": 1460
},
{
"epoch": 1.0267041461700632,
"grad_norm": 0.49941618444368396,
"learning_rate": 4.790327626592351e-06,
"loss": 0.1346,
"step": 1461
},
{
"epoch": 1.0274068868587491,
"grad_norm": 0.5204036110725074,
"learning_rate": 4.78481329798531e-06,
"loss": 0.151,
"step": 1462
},
{
"epoch": 1.028109627547435,
"grad_norm": 0.5284160836774149,
"learning_rate": 4.779299231585907e-06,
"loss": 0.1659,
"step": 1463
},
{
"epoch": 1.0288123682361208,
"grad_norm": 0.5287172091425115,
"learning_rate": 4.773785434113101e-06,
"loss": 0.1466,
"step": 1464
},
{
"epoch": 1.0295151089248067,
"grad_norm": 0.5242209228306783,
"learning_rate": 4.768271912285521e-06,
"loss": 0.1646,
"step": 1465
},
{
"epoch": 1.0302178496134926,
"grad_norm": 0.48065702521415976,
"learning_rate": 4.7627586728214606e-06,
"loss": 0.1323,
"step": 1466
},
{
"epoch": 1.0309205903021785,
"grad_norm": 0.5271978633140205,
"learning_rate": 4.75724572243887e-06,
"loss": 0.1524,
"step": 1467
},
{
"epoch": 1.0316233309908645,
"grad_norm": 0.5278668269452242,
"learning_rate": 4.751733067855348e-06,
"loss": 0.1652,
"step": 1468
},
{
"epoch": 1.0323260716795502,
"grad_norm": 0.4714409663625622,
"learning_rate": 4.746220715788132e-06,
"loss": 0.1243,
"step": 1469
},
{
"epoch": 1.033028812368236,
"grad_norm": 0.5167269663375293,
"learning_rate": 4.74070867295409e-06,
"loss": 0.1539,
"step": 1470
},
{
"epoch": 1.033731553056922,
"grad_norm": 0.513279157041316,
"learning_rate": 4.735196946069711e-06,
"loss": 0.1631,
"step": 1471
},
{
"epoch": 1.034434293745608,
"grad_norm": 0.4998917276920187,
"learning_rate": 4.729685541851107e-06,
"loss": 0.1237,
"step": 1472
},
{
"epoch": 1.0351370344342938,
"grad_norm": 0.529541513903104,
"learning_rate": 4.724174467013987e-06,
"loss": 0.1738,
"step": 1473
},
{
"epoch": 1.0358397751229795,
"grad_norm": 0.5184555550536734,
"learning_rate": 4.718663728273669e-06,
"loss": 0.1323,
"step": 1474
},
{
"epoch": 1.0365425158116655,
"grad_norm": 0.530116425204895,
"learning_rate": 4.7131533323450505e-06,
"loss": 0.1692,
"step": 1475
},
{
"epoch": 1.0372452565003514,
"grad_norm": 0.5011637485620603,
"learning_rate": 4.707643285942619e-06,
"loss": 0.1577,
"step": 1476
},
{
"epoch": 1.0379479971890373,
"grad_norm": 0.52765636889385,
"learning_rate": 4.702133595780433e-06,
"loss": 0.172,
"step": 1477
},
{
"epoch": 1.0386507378777232,
"grad_norm": 0.49850621349027224,
"learning_rate": 4.696624268572118e-06,
"loss": 0.1508,
"step": 1478
},
{
"epoch": 1.039353478566409,
"grad_norm": 0.5416958214373463,
"learning_rate": 4.6911153110308574e-06,
"loss": 0.1699,
"step": 1479
},
{
"epoch": 1.0400562192550948,
"grad_norm": 0.5399259415678693,
"learning_rate": 4.6856067298693834e-06,
"loss": 0.1524,
"step": 1480
},
{
"epoch": 1.0407589599437808,
"grad_norm": 0.5032021860715928,
"learning_rate": 4.680098531799967e-06,
"loss": 0.1411,
"step": 1481
},
{
"epoch": 1.0414617006324667,
"grad_norm": 0.46742852855684963,
"learning_rate": 4.674590723534419e-06,
"loss": 0.1157,
"step": 1482
},
{
"epoch": 1.0421644413211526,
"grad_norm": 0.5180997769388833,
"learning_rate": 4.669083311784069e-06,
"loss": 0.1599,
"step": 1483
},
{
"epoch": 1.0428671820098383,
"grad_norm": 0.5309463070515528,
"learning_rate": 4.6635763032597704e-06,
"loss": 0.1511,
"step": 1484
},
{
"epoch": 1.0435699226985242,
"grad_norm": 0.5302382026589856,
"learning_rate": 4.658069704671873e-06,
"loss": 0.1724,
"step": 1485
},
{
"epoch": 1.0442726633872101,
"grad_norm": 0.5577755953646487,
"learning_rate": 4.65256352273024e-06,
"loss": 0.1433,
"step": 1486
},
{
"epoch": 1.044975404075896,
"grad_norm": 0.5137472463204175,
"learning_rate": 4.64705776414422e-06,
"loss": 0.1441,
"step": 1487
},
{
"epoch": 1.0456781447645818,
"grad_norm": 0.5046663966998653,
"learning_rate": 4.641552435622651e-06,
"loss": 0.1491,
"step": 1488
},
{
"epoch": 1.0463808854532677,
"grad_norm": 0.5153150802385964,
"learning_rate": 4.636047543873838e-06,
"loss": 0.148,
"step": 1489
},
{
"epoch": 1.0470836261419536,
"grad_norm": 0.5167782468770977,
"learning_rate": 4.630543095605562e-06,
"loss": 0.1326,
"step": 1490
},
{
"epoch": 1.0477863668306395,
"grad_norm": 0.5638675566292636,
"learning_rate": 4.625039097525058e-06,
"loss": 0.1424,
"step": 1491
},
{
"epoch": 1.0484891075193254,
"grad_norm": 0.527824249323301,
"learning_rate": 4.619535556339021e-06,
"loss": 0.1548,
"step": 1492
},
{
"epoch": 1.0491918482080111,
"grad_norm": 0.530738132593687,
"learning_rate": 4.61403247875358e-06,
"loss": 0.1548,
"step": 1493
},
{
"epoch": 1.049894588896697,
"grad_norm": 0.515381804578322,
"learning_rate": 4.6085298714743025e-06,
"loss": 0.1435,
"step": 1494
},
{
"epoch": 1.050597329585383,
"grad_norm": 0.5078634734847863,
"learning_rate": 4.603027741206181e-06,
"loss": 0.1599,
"step": 1495
},
{
"epoch": 1.051300070274069,
"grad_norm": 0.47626536929288615,
"learning_rate": 4.597526094653633e-06,
"loss": 0.1225,
"step": 1496
},
{
"epoch": 1.0520028109627548,
"grad_norm": 0.5101924345356312,
"learning_rate": 4.592024938520479e-06,
"loss": 0.1327,
"step": 1497
},
{
"epoch": 1.0527055516514405,
"grad_norm": 0.4893049540660336,
"learning_rate": 4.58652427950995e-06,
"loss": 0.1211,
"step": 1498
},
{
"epoch": 1.0534082923401265,
"grad_norm": 0.5213987638988877,
"learning_rate": 4.581024124324661e-06,
"loss": 0.1457,
"step": 1499
},
{
"epoch": 1.0541110330288124,
"grad_norm": 0.4655223777487336,
"learning_rate": 4.575524479666621e-06,
"loss": 0.1337,
"step": 1500
},
{
"epoch": 1.0541110330288124,
"eval_loss": 0.18591229617595673,
"eval_runtime": 10.8438,
"eval_samples_per_second": 21.21,
"eval_steps_per_second": 5.349,
"step": 1500
},
{
"epoch": 1.0548137737174983,
"grad_norm": 0.5473471671772354,
"learning_rate": 4.570025352237213e-06,
"loss": 0.1645,
"step": 1501
},
{
"epoch": 1.0555165144061842,
"grad_norm": 0.5543584858779272,
"learning_rate": 4.564526748737195e-06,
"loss": 0.185,
"step": 1502
},
{
"epoch": 1.05621925509487,
"grad_norm": 0.5249136679057413,
"learning_rate": 4.559028675866681e-06,
"loss": 0.1531,
"step": 1503
},
{
"epoch": 1.0569219957835558,
"grad_norm": 0.5077634369356774,
"learning_rate": 4.553531140325139e-06,
"loss": 0.1426,
"step": 1504
},
{
"epoch": 1.0576247364722418,
"grad_norm": 0.4901031269988132,
"learning_rate": 4.548034148811384e-06,
"loss": 0.1181,
"step": 1505
},
{
"epoch": 1.0583274771609277,
"grad_norm": 0.5168940632987495,
"learning_rate": 4.542537708023569e-06,
"loss": 0.1425,
"step": 1506
},
{
"epoch": 1.0590302178496136,
"grad_norm": 0.5158342744313869,
"learning_rate": 4.537041824659172e-06,
"loss": 0.157,
"step": 1507
},
{
"epoch": 1.0597329585382993,
"grad_norm": 0.5197652354490249,
"learning_rate": 4.531546505415e-06,
"loss": 0.14,
"step": 1508
},
{
"epoch": 1.0604356992269852,
"grad_norm": 0.5206997910822582,
"learning_rate": 4.52605175698716e-06,
"loss": 0.1585,
"step": 1509
},
{
"epoch": 1.0611384399156711,
"grad_norm": 0.5014343619476886,
"learning_rate": 4.520557586071074e-06,
"loss": 0.1375,
"step": 1510
},
{
"epoch": 1.061841180604357,
"grad_norm": 0.5177852995426864,
"learning_rate": 4.515063999361455e-06,
"loss": 0.1348,
"step": 1511
},
{
"epoch": 1.062543921293043,
"grad_norm": 0.5277049664691724,
"learning_rate": 4.509571003552311e-06,
"loss": 0.1461,
"step": 1512
},
{
"epoch": 1.0632466619817287,
"grad_norm": 0.5125849050122258,
"learning_rate": 4.5040786053369175e-06,
"loss": 0.1328,
"step": 1513
},
{
"epoch": 1.0639494026704146,
"grad_norm": 0.5258856277575326,
"learning_rate": 4.498586811407834e-06,
"loss": 0.1641,
"step": 1514
},
{
"epoch": 1.0646521433591005,
"grad_norm": 0.533358579978669,
"learning_rate": 4.493095628456876e-06,
"loss": 0.1492,
"step": 1515
},
{
"epoch": 1.0653548840477864,
"grad_norm": 0.5410665746866157,
"learning_rate": 4.487605063175119e-06,
"loss": 0.1754,
"step": 1516
},
{
"epoch": 1.0660576247364721,
"grad_norm": 0.5445504453331569,
"learning_rate": 4.482115122252887e-06,
"loss": 0.164,
"step": 1517
},
{
"epoch": 1.066760365425158,
"grad_norm": 0.48571042799729647,
"learning_rate": 4.4766258123797355e-06,
"loss": 0.1291,
"step": 1518
},
{
"epoch": 1.067463106113844,
"grad_norm": 0.5240647619658219,
"learning_rate": 4.471137140244456e-06,
"loss": 0.1502,
"step": 1519
},
{
"epoch": 1.06816584680253,
"grad_norm": 0.4938137279997872,
"learning_rate": 4.465649112535067e-06,
"loss": 0.1427,
"step": 1520
},
{
"epoch": 1.0688685874912158,
"grad_norm": 0.5007927131769512,
"learning_rate": 4.460161735938794e-06,
"loss": 0.1462,
"step": 1521
},
{
"epoch": 1.0695713281799015,
"grad_norm": 0.5170092663246689,
"learning_rate": 4.4546750171420764e-06,
"loss": 0.1381,
"step": 1522
},
{
"epoch": 1.0702740688685874,
"grad_norm": 0.49458124261997344,
"learning_rate": 4.449188962830544e-06,
"loss": 0.1302,
"step": 1523
},
{
"epoch": 1.0709768095572734,
"grad_norm": 0.5145222594315508,
"learning_rate": 4.443703579689025e-06,
"loss": 0.1469,
"step": 1524
},
{
"epoch": 1.0716795502459593,
"grad_norm": 0.5063131328210128,
"learning_rate": 4.438218874401522e-06,
"loss": 0.1536,
"step": 1525
},
{
"epoch": 1.0723822909346452,
"grad_norm": 0.5203157749421187,
"learning_rate": 4.432734853651222e-06,
"loss": 0.1507,
"step": 1526
},
{
"epoch": 1.073085031623331,
"grad_norm": 0.5397251232553768,
"learning_rate": 4.4272515241204674e-06,
"loss": 0.1644,
"step": 1527
},
{
"epoch": 1.0737877723120168,
"grad_norm": 0.5318045536009254,
"learning_rate": 4.421768892490762e-06,
"loss": 0.1419,
"step": 1528
},
{
"epoch": 1.0744905130007028,
"grad_norm": 0.5317716259625153,
"learning_rate": 4.416286965442761e-06,
"loss": 0.159,
"step": 1529
},
{
"epoch": 1.0751932536893887,
"grad_norm": 0.536014633971988,
"learning_rate": 4.41080574965626e-06,
"loss": 0.1612,
"step": 1530
},
{
"epoch": 1.0758959943780746,
"grad_norm": 0.5495640591297969,
"learning_rate": 4.4053252518101855e-06,
"loss": 0.1551,
"step": 1531
},
{
"epoch": 1.0765987350667603,
"grad_norm": 0.5078396198787789,
"learning_rate": 4.399845478582598e-06,
"loss": 0.1483,
"step": 1532
},
{
"epoch": 1.0773014757554462,
"grad_norm": 0.5024234016147644,
"learning_rate": 4.394366436650661e-06,
"loss": 0.1346,
"step": 1533
},
{
"epoch": 1.0780042164441321,
"grad_norm": 0.5173858114853311,
"learning_rate": 4.388888132690657e-06,
"loss": 0.1164,
"step": 1534
},
{
"epoch": 1.078706957132818,
"grad_norm": 0.5238027648125677,
"learning_rate": 4.383410573377966e-06,
"loss": 0.1432,
"step": 1535
},
{
"epoch": 1.0794096978215038,
"grad_norm": 0.5526852874415407,
"learning_rate": 4.3779337653870666e-06,
"loss": 0.1561,
"step": 1536
},
{
"epoch": 1.0801124385101897,
"grad_norm": 0.55461552732306,
"learning_rate": 4.372457715391508e-06,
"loss": 0.1636,
"step": 1537
},
{
"epoch": 1.0808151791988756,
"grad_norm": 0.5590817602249681,
"learning_rate": 4.3669824300639305e-06,
"loss": 0.1832,
"step": 1538
},
{
"epoch": 1.0815179198875615,
"grad_norm": 0.510901886107954,
"learning_rate": 4.361507916076032e-06,
"loss": 0.1417,
"step": 1539
},
{
"epoch": 1.0822206605762474,
"grad_norm": 0.5081306360693141,
"learning_rate": 4.35603418009858e-06,
"loss": 0.1241,
"step": 1540
},
{
"epoch": 1.0829234012649331,
"grad_norm": 0.4978947306976569,
"learning_rate": 4.350561228801386e-06,
"loss": 0.1415,
"step": 1541
},
{
"epoch": 1.083626141953619,
"grad_norm": 0.5074726593124405,
"learning_rate": 4.345089068853309e-06,
"loss": 0.1391,
"step": 1542
},
{
"epoch": 1.084328882642305,
"grad_norm": 0.5204197326136466,
"learning_rate": 4.339617706922242e-06,
"loss": 0.1516,
"step": 1543
},
{
"epoch": 1.085031623330991,
"grad_norm": 0.5168236552395792,
"learning_rate": 4.3341471496751085e-06,
"loss": 0.1432,
"step": 1544
},
{
"epoch": 1.0857343640196768,
"grad_norm": 0.5123174698235842,
"learning_rate": 4.328677403777848e-06,
"loss": 0.1401,
"step": 1545
},
{
"epoch": 1.0864371047083625,
"grad_norm": 0.5277797986160024,
"learning_rate": 4.323208475895416e-06,
"loss": 0.1848,
"step": 1546
},
{
"epoch": 1.0871398453970484,
"grad_norm": 0.5192275580663916,
"learning_rate": 4.317740372691765e-06,
"loss": 0.1523,
"step": 1547
},
{
"epoch": 1.0878425860857344,
"grad_norm": 0.5018066227654631,
"learning_rate": 4.312273100829845e-06,
"loss": 0.1299,
"step": 1548
},
{
"epoch": 1.0885453267744203,
"grad_norm": 0.5183620807357253,
"learning_rate": 4.306806666971597e-06,
"loss": 0.1609,
"step": 1549
},
{
"epoch": 1.0892480674631062,
"grad_norm": 0.5244515698415202,
"learning_rate": 4.3013410777779375e-06,
"loss": 0.1601,
"step": 1550
},
{
"epoch": 1.089950808151792,
"grad_norm": 0.47123123465237887,
"learning_rate": 4.295876339908755e-06,
"loss": 0.1164,
"step": 1551
},
{
"epoch": 1.0906535488404778,
"grad_norm": 0.5184538408690857,
"learning_rate": 4.290412460022896e-06,
"loss": 0.1352,
"step": 1552
},
{
"epoch": 1.0913562895291637,
"grad_norm": 0.520938972909478,
"learning_rate": 4.284949444778166e-06,
"loss": 0.1584,
"step": 1553
},
{
"epoch": 1.0920590302178497,
"grad_norm": 0.5456503103112614,
"learning_rate": 4.279487300831318e-06,
"loss": 0.135,
"step": 1554
},
{
"epoch": 1.0927617709065356,
"grad_norm": 0.5107554261266217,
"learning_rate": 4.274026034838043e-06,
"loss": 0.1368,
"step": 1555
},
{
"epoch": 1.0934645115952213,
"grad_norm": 0.5819292852086433,
"learning_rate": 4.2685656534529576e-06,
"loss": 0.1664,
"step": 1556
},
{
"epoch": 1.0941672522839072,
"grad_norm": 0.5449361933830971,
"learning_rate": 4.263106163329603e-06,
"loss": 0.1729,
"step": 1557
},
{
"epoch": 1.0948699929725931,
"grad_norm": 0.5275764569882169,
"learning_rate": 4.257647571120437e-06,
"loss": 0.1368,
"step": 1558
},
{
"epoch": 1.095572733661279,
"grad_norm": 0.5164645233179017,
"learning_rate": 4.25218988347682e-06,
"loss": 0.1525,
"step": 1559
},
{
"epoch": 1.096275474349965,
"grad_norm": 0.5060477908801764,
"learning_rate": 4.246733107049012e-06,
"loss": 0.1341,
"step": 1560
},
{
"epoch": 1.0969782150386507,
"grad_norm": 0.5299530311152008,
"learning_rate": 4.241277248486164e-06,
"loss": 0.1341,
"step": 1561
},
{
"epoch": 1.0976809557273366,
"grad_norm": 0.5171934228573,
"learning_rate": 4.2358223144363046e-06,
"loss": 0.1315,
"step": 1562
},
{
"epoch": 1.0983836964160225,
"grad_norm": 0.534962523606485,
"learning_rate": 4.2303683115463355e-06,
"loss": 0.1525,
"step": 1563
},
{
"epoch": 1.0990864371047084,
"grad_norm": 0.513404314461455,
"learning_rate": 4.22491524646203e-06,
"loss": 0.1334,
"step": 1564
},
{
"epoch": 1.0997891777933941,
"grad_norm": 0.4876830686188593,
"learning_rate": 4.219463125828015e-06,
"loss": 0.12,
"step": 1565
},
{
"epoch": 1.10049191848208,
"grad_norm": 0.518920756296569,
"learning_rate": 4.214011956287765e-06,
"loss": 0.1445,
"step": 1566
},
{
"epoch": 1.101194659170766,
"grad_norm": 0.5051886440212208,
"learning_rate": 4.208561744483595e-06,
"loss": 0.1391,
"step": 1567
},
{
"epoch": 1.101897399859452,
"grad_norm": 0.530898642935748,
"learning_rate": 4.2031124970566576e-06,
"loss": 0.1486,
"step": 1568
},
{
"epoch": 1.1026001405481378,
"grad_norm": 0.4944119236879768,
"learning_rate": 4.197664220646928e-06,
"loss": 0.1294,
"step": 1569
},
{
"epoch": 1.1033028812368235,
"grad_norm": 0.520973466007102,
"learning_rate": 4.192216921893198e-06,
"loss": 0.1431,
"step": 1570
},
{
"epoch": 1.1040056219255094,
"grad_norm": 0.5436921574604013,
"learning_rate": 4.186770607433065e-06,
"loss": 0.1703,
"step": 1571
},
{
"epoch": 1.1047083626141954,
"grad_norm": 0.544033910389497,
"learning_rate": 4.1813252839029325e-06,
"loss": 0.1653,
"step": 1572
},
{
"epoch": 1.1054111033028813,
"grad_norm": 0.5192877976808141,
"learning_rate": 4.175880957937994e-06,
"loss": 0.16,
"step": 1573
},
{
"epoch": 1.1061138439915672,
"grad_norm": 0.5126150735386685,
"learning_rate": 4.170437636172227e-06,
"loss": 0.161,
"step": 1574
},
{
"epoch": 1.106816584680253,
"grad_norm": 0.5279167572838358,
"learning_rate": 4.164995325238388e-06,
"loss": 0.164,
"step": 1575
},
{
"epoch": 1.1075193253689388,
"grad_norm": 0.5648028936730404,
"learning_rate": 4.159554031767996e-06,
"loss": 0.1877,
"step": 1576
},
{
"epoch": 1.1082220660576247,
"grad_norm": 0.5193777403326719,
"learning_rate": 4.1541137623913355e-06,
"loss": 0.1644,
"step": 1577
},
{
"epoch": 1.1089248067463107,
"grad_norm": 0.4954080216440343,
"learning_rate": 4.148674523737443e-06,
"loss": 0.1353,
"step": 1578
},
{
"epoch": 1.1096275474349966,
"grad_norm": 0.5221218148685247,
"learning_rate": 4.143236322434096e-06,
"loss": 0.1467,
"step": 1579
},
{
"epoch": 1.1103302881236823,
"grad_norm": 0.5148823212995353,
"learning_rate": 4.137799165107811e-06,
"loss": 0.1491,
"step": 1580
},
{
"epoch": 1.1110330288123682,
"grad_norm": 0.49902117768501153,
"learning_rate": 4.132363058383828e-06,
"loss": 0.1462,
"step": 1581
},
{
"epoch": 1.1117357695010541,
"grad_norm": 0.5050608112360148,
"learning_rate": 4.126928008886112e-06,
"loss": 0.1368,
"step": 1582
},
{
"epoch": 1.11243851018974,
"grad_norm": 0.5570867896413365,
"learning_rate": 4.121494023237338e-06,
"loss": 0.1677,
"step": 1583
},
{
"epoch": 1.1131412508784257,
"grad_norm": 0.5246201638744743,
"learning_rate": 4.116061108058882e-06,
"loss": 0.1382,
"step": 1584
},
{
"epoch": 1.1138439915671117,
"grad_norm": 0.5325551208566502,
"learning_rate": 4.110629269970822e-06,
"loss": 0.1525,
"step": 1585
},
{
"epoch": 1.1145467322557976,
"grad_norm": 0.5117833002933512,
"learning_rate": 4.105198515591915e-06,
"loss": 0.149,
"step": 1586
},
{
"epoch": 1.1152494729444835,
"grad_norm": 0.552294045065401,
"learning_rate": 4.099768851539603e-06,
"loss": 0.155,
"step": 1587
},
{
"epoch": 1.1159522136331694,
"grad_norm": 0.491251116627733,
"learning_rate": 4.0943402844300004e-06,
"loss": 0.1261,
"step": 1588
},
{
"epoch": 1.1166549543218554,
"grad_norm": 0.5537353355618069,
"learning_rate": 4.088912820877881e-06,
"loss": 0.1724,
"step": 1589
},
{
"epoch": 1.117357695010541,
"grad_norm": 0.4985476104458816,
"learning_rate": 4.0834864674966765e-06,
"loss": 0.1436,
"step": 1590
},
{
"epoch": 1.118060435699227,
"grad_norm": 0.5114879066856933,
"learning_rate": 4.078061230898463e-06,
"loss": 0.1644,
"step": 1591
},
{
"epoch": 1.118763176387913,
"grad_norm": 0.5302626334189574,
"learning_rate": 4.072637117693962e-06,
"loss": 0.16,
"step": 1592
},
{
"epoch": 1.1194659170765988,
"grad_norm": 0.5012286863562989,
"learning_rate": 4.067214134492519e-06,
"loss": 0.1481,
"step": 1593
},
{
"epoch": 1.1201686577652845,
"grad_norm": 0.5209237907289656,
"learning_rate": 4.061792287902107e-06,
"loss": 0.1551,
"step": 1594
},
{
"epoch": 1.1208713984539704,
"grad_norm": 0.5307699445933584,
"learning_rate": 4.056371584529311e-06,
"loss": 0.1387,
"step": 1595
},
{
"epoch": 1.1215741391426564,
"grad_norm": 0.5171963652616054,
"learning_rate": 4.050952030979326e-06,
"loss": 0.1333,
"step": 1596
},
{
"epoch": 1.1222768798313423,
"grad_norm": 0.5190474007139281,
"learning_rate": 4.0455336338559446e-06,
"loss": 0.1489,
"step": 1597
},
{
"epoch": 1.1229796205200282,
"grad_norm": 0.5083668842001544,
"learning_rate": 4.040116399761547e-06,
"loss": 0.1448,
"step": 1598
},
{
"epoch": 1.123682361208714,
"grad_norm": 0.5417158344934303,
"learning_rate": 4.034700335297107e-06,
"loss": 0.1687,
"step": 1599
},
{
"epoch": 1.1243851018973998,
"grad_norm": 0.4797849651915903,
"learning_rate": 4.029285447062159e-06,
"loss": 0.1199,
"step": 1600
},
{
"epoch": 1.1250878425860857,
"grad_norm": 0.5022976081283302,
"learning_rate": 4.02387174165481e-06,
"loss": 0.1438,
"step": 1601
},
{
"epoch": 1.1257905832747717,
"grad_norm": 0.5079689484794458,
"learning_rate": 4.018459225671732e-06,
"loss": 0.1462,
"step": 1602
},
{
"epoch": 1.1264933239634574,
"grad_norm": 0.4934039692280827,
"learning_rate": 4.01304790570814e-06,
"loss": 0.1281,
"step": 1603
},
{
"epoch": 1.1271960646521433,
"grad_norm": 0.5014371900597421,
"learning_rate": 4.007637788357793e-06,
"loss": 0.1396,
"step": 1604
},
{
"epoch": 1.1278988053408292,
"grad_norm": 0.5268724559733456,
"learning_rate": 4.002228880212984e-06,
"loss": 0.1474,
"step": 1605
},
{
"epoch": 1.1286015460295151,
"grad_norm": 0.5209824286459966,
"learning_rate": 3.996821187864537e-06,
"loss": 0.1414,
"step": 1606
},
{
"epoch": 1.129304286718201,
"grad_norm": 0.48759501821979767,
"learning_rate": 3.99141471790179e-06,
"loss": 0.1336,
"step": 1607
},
{
"epoch": 1.130007027406887,
"grad_norm": 0.4802373810096626,
"learning_rate": 3.986009476912592e-06,
"loss": 0.1258,
"step": 1608
},
{
"epoch": 1.1307097680955727,
"grad_norm": 0.5209903064603748,
"learning_rate": 3.980605471483299e-06,
"loss": 0.1517,
"step": 1609
},
{
"epoch": 1.1314125087842586,
"grad_norm": 0.5549038348276285,
"learning_rate": 3.975202708198754e-06,
"loss": 0.176,
"step": 1610
},
{
"epoch": 1.1321152494729445,
"grad_norm": 0.5084752069197226,
"learning_rate": 3.969801193642293e-06,
"loss": 0.1453,
"step": 1611
},
{
"epoch": 1.1328179901616304,
"grad_norm": 0.5271233290836675,
"learning_rate": 3.964400934395726e-06,
"loss": 0.1596,
"step": 1612
},
{
"epoch": 1.1335207308503161,
"grad_norm": 0.5320555907499869,
"learning_rate": 3.959001937039337e-06,
"loss": 0.166,
"step": 1613
},
{
"epoch": 1.134223471539002,
"grad_norm": 0.5232518466549592,
"learning_rate": 3.95360420815187e-06,
"loss": 0.1574,
"step": 1614
},
{
"epoch": 1.134926212227688,
"grad_norm": 0.51281620193853,
"learning_rate": 3.948207754310522e-06,
"loss": 0.1443,
"step": 1615
},
{
"epoch": 1.135628952916374,
"grad_norm": 0.5279682828795013,
"learning_rate": 3.94281258209094e-06,
"loss": 0.1539,
"step": 1616
},
{
"epoch": 1.1363316936050598,
"grad_norm": 0.5253882313772008,
"learning_rate": 3.937418698067209e-06,
"loss": 0.1522,
"step": 1617
},
{
"epoch": 1.1370344342937457,
"grad_norm": 0.5263221297165886,
"learning_rate": 3.932026108811841e-06,
"loss": 0.163,
"step": 1618
},
{
"epoch": 1.1377371749824314,
"grad_norm": 0.529816285315717,
"learning_rate": 3.9266348208957716e-06,
"loss": 0.1628,
"step": 1619
},
{
"epoch": 1.1384399156711174,
"grad_norm": 0.5589296845016164,
"learning_rate": 3.921244840888353e-06,
"loss": 0.1542,
"step": 1620
},
{
"epoch": 1.1391426563598033,
"grad_norm": 0.5095397475823957,
"learning_rate": 3.915856175357341e-06,
"loss": 0.1302,
"step": 1621
},
{
"epoch": 1.1398453970484892,
"grad_norm": 0.5152041254097331,
"learning_rate": 3.910468830868891e-06,
"loss": 0.1568,
"step": 1622
},
{
"epoch": 1.140548137737175,
"grad_norm": 0.498483758902883,
"learning_rate": 3.90508281398755e-06,
"loss": 0.1448,
"step": 1623
},
{
"epoch": 1.1412508784258608,
"grad_norm": 0.5424368341811135,
"learning_rate": 3.899698131276243e-06,
"loss": 0.1537,
"step": 1624
},
{
"epoch": 1.1419536191145467,
"grad_norm": 0.5044011713227908,
"learning_rate": 3.894314789296274e-06,
"loss": 0.1345,
"step": 1625
},
{
"epoch": 1.1426563598032327,
"grad_norm": 0.5163947411118621,
"learning_rate": 3.888932794607308e-06,
"loss": 0.1566,
"step": 1626
},
{
"epoch": 1.1433591004919186,
"grad_norm": 0.5214483574311828,
"learning_rate": 3.883552153767376e-06,
"loss": 0.1256,
"step": 1627
},
{
"epoch": 1.1440618411806043,
"grad_norm": 0.5234440658460365,
"learning_rate": 3.878172873332854e-06,
"loss": 0.148,
"step": 1628
},
{
"epoch": 1.1447645818692902,
"grad_norm": 0.5361870114650095,
"learning_rate": 3.872794959858457e-06,
"loss": 0.1793,
"step": 1629
},
{
"epoch": 1.1454673225579761,
"grad_norm": 0.5027193241001592,
"learning_rate": 3.867418419897245e-06,
"loss": 0.1354,
"step": 1630
},
{
"epoch": 1.146170063246662,
"grad_norm": 0.5201478011166566,
"learning_rate": 3.862043260000593e-06,
"loss": 0.1529,
"step": 1631
},
{
"epoch": 1.1468728039353477,
"grad_norm": 0.5005128128993738,
"learning_rate": 3.856669486718201e-06,
"loss": 0.1451,
"step": 1632
},
{
"epoch": 1.1475755446240337,
"grad_norm": 0.5187700145278601,
"learning_rate": 3.85129710659808e-06,
"loss": 0.1485,
"step": 1633
},
{
"epoch": 1.1482782853127196,
"grad_norm": 0.4976097692834372,
"learning_rate": 3.845926126186539e-06,
"loss": 0.1325,
"step": 1634
},
{
"epoch": 1.1489810260014055,
"grad_norm": 0.4955834841080017,
"learning_rate": 3.840556552028182e-06,
"loss": 0.1148,
"step": 1635
},
{
"epoch": 1.1496837666900914,
"grad_norm": 0.5454349128260972,
"learning_rate": 3.8351883906659015e-06,
"loss": 0.1381,
"step": 1636
},
{
"epoch": 1.1503865073787773,
"grad_norm": 0.5124326684693784,
"learning_rate": 3.829821648640873e-06,
"loss": 0.137,
"step": 1637
},
{
"epoch": 1.151089248067463,
"grad_norm": 0.4962124166833035,
"learning_rate": 3.824456332492531e-06,
"loss": 0.1226,
"step": 1638
},
{
"epoch": 1.151791988756149,
"grad_norm": 0.49974840132238146,
"learning_rate": 3.8190924487585825e-06,
"loss": 0.1336,
"step": 1639
},
{
"epoch": 1.1524947294448349,
"grad_norm": 0.531460623031864,
"learning_rate": 3.8137300039749837e-06,
"loss": 0.1514,
"step": 1640
},
{
"epoch": 1.1531974701335208,
"grad_norm": 0.5245882163563044,
"learning_rate": 3.808369004675942e-06,
"loss": 0.167,
"step": 1641
},
{
"epoch": 1.1539002108222065,
"grad_norm": 0.5174623994992702,
"learning_rate": 3.803009457393901e-06,
"loss": 0.1342,
"step": 1642
},
{
"epoch": 1.1546029515108924,
"grad_norm": 0.5328113159558158,
"learning_rate": 3.7976513686595306e-06,
"loss": 0.1455,
"step": 1643
},
{
"epoch": 1.1553056921995783,
"grad_norm": 0.5134419088998426,
"learning_rate": 3.792294745001732e-06,
"loss": 0.1405,
"step": 1644
},
{
"epoch": 1.1560084328882643,
"grad_norm": 0.513456988869456,
"learning_rate": 3.786939592947616e-06,
"loss": 0.1407,
"step": 1645
},
{
"epoch": 1.1567111735769502,
"grad_norm": 0.5231031001511144,
"learning_rate": 3.781585919022499e-06,
"loss": 0.1562,
"step": 1646
},
{
"epoch": 1.157413914265636,
"grad_norm": 0.5598929326454744,
"learning_rate": 3.7762337297499026e-06,
"loss": 0.1555,
"step": 1647
},
{
"epoch": 1.1581166549543218,
"grad_norm": 0.5196277810093672,
"learning_rate": 3.770883031651531e-06,
"loss": 0.1681,
"step": 1648
},
{
"epoch": 1.1588193956430077,
"grad_norm": 0.5248335205070664,
"learning_rate": 3.765533831247278e-06,
"loss": 0.1535,
"step": 1649
},
{
"epoch": 1.1595221363316937,
"grad_norm": 0.5179389153060617,
"learning_rate": 3.7601861350552073e-06,
"loss": 0.1502,
"step": 1650
},
{
"epoch": 1.1602248770203796,
"grad_norm": 0.5268002561119876,
"learning_rate": 3.7548399495915555e-06,
"loss": 0.1529,
"step": 1651
},
{
"epoch": 1.1609276177090653,
"grad_norm": 0.5210242798238149,
"learning_rate": 3.7494952813707154e-06,
"loss": 0.1484,
"step": 1652
},
{
"epoch": 1.1616303583977512,
"grad_norm": 0.48687971653015544,
"learning_rate": 3.744152136905226e-06,
"loss": 0.1237,
"step": 1653
},
{
"epoch": 1.1623330990864371,
"grad_norm": 0.5227291974655749,
"learning_rate": 3.7388105227057796e-06,
"loss": 0.1526,
"step": 1654
},
{
"epoch": 1.163035839775123,
"grad_norm": 0.49035055461560756,
"learning_rate": 3.733470445281197e-06,
"loss": 0.1389,
"step": 1655
},
{
"epoch": 1.163738580463809,
"grad_norm": 0.48742906301342115,
"learning_rate": 3.7281319111384274e-06,
"loss": 0.1272,
"step": 1656
},
{
"epoch": 1.1644413211524947,
"grad_norm": 0.4817423144150866,
"learning_rate": 3.722794926782542e-06,
"loss": 0.1444,
"step": 1657
},
{
"epoch": 1.1651440618411806,
"grad_norm": 0.4913925343480694,
"learning_rate": 3.71745949871672e-06,
"loss": 0.1271,
"step": 1658
},
{
"epoch": 1.1658468025298665,
"grad_norm": 0.5048171170148831,
"learning_rate": 3.712125633442246e-06,
"loss": 0.1552,
"step": 1659
},
{
"epoch": 1.1665495432185524,
"grad_norm": 0.5703783627697752,
"learning_rate": 3.7067933374585003e-06,
"loss": 0.1833,
"step": 1660
},
{
"epoch": 1.1672522839072381,
"grad_norm": 0.5225800614551854,
"learning_rate": 3.7014626172629536e-06,
"loss": 0.1691,
"step": 1661
},
{
"epoch": 1.167955024595924,
"grad_norm": 0.4995785135922539,
"learning_rate": 3.696133479351151e-06,
"loss": 0.1425,
"step": 1662
},
{
"epoch": 1.16865776528461,
"grad_norm": 0.48685260955142334,
"learning_rate": 3.6908059302167134e-06,
"loss": 0.1342,
"step": 1663
},
{
"epoch": 1.1693605059732959,
"grad_norm": 0.5318537127203009,
"learning_rate": 3.6854799763513238e-06,
"loss": 0.1383,
"step": 1664
},
{
"epoch": 1.1700632466619818,
"grad_norm": 0.4910402665339831,
"learning_rate": 3.6801556242447247e-06,
"loss": 0.1272,
"step": 1665
},
{
"epoch": 1.1707659873506677,
"grad_norm": 0.49911615962552697,
"learning_rate": 3.6748328803847044e-06,
"loss": 0.1522,
"step": 1666
},
{
"epoch": 1.1714687280393534,
"grad_norm": 0.5262194858026492,
"learning_rate": 3.6695117512570878e-06,
"loss": 0.1491,
"step": 1667
},
{
"epoch": 1.1721714687280393,
"grad_norm": 0.4684801808092165,
"learning_rate": 3.66419224334574e-06,
"loss": 0.1134,
"step": 1668
},
{
"epoch": 1.1728742094167253,
"grad_norm": 0.4636585324685207,
"learning_rate": 3.658874363132546e-06,
"loss": 0.1159,
"step": 1669
},
{
"epoch": 1.1735769501054112,
"grad_norm": 0.5024935722580612,
"learning_rate": 3.6535581170974055e-06,
"loss": 0.1389,
"step": 1670
},
{
"epoch": 1.1742796907940969,
"grad_norm": 0.5274319478864739,
"learning_rate": 3.648243511718235e-06,
"loss": 0.138,
"step": 1671
},
{
"epoch": 1.1749824314827828,
"grad_norm": 0.5261000202757115,
"learning_rate": 3.6429305534709415e-06,
"loss": 0.1524,
"step": 1672
},
{
"epoch": 1.1756851721714687,
"grad_norm": 0.505490322072027,
"learning_rate": 3.6376192488294317e-06,
"loss": 0.1284,
"step": 1673
},
{
"epoch": 1.1763879128601546,
"grad_norm": 0.4688629466709282,
"learning_rate": 3.6323096042655936e-06,
"loss": 0.1103,
"step": 1674
},
{
"epoch": 1.1770906535488406,
"grad_norm": 0.5399605141659941,
"learning_rate": 3.627001626249298e-06,
"loss": 0.1494,
"step": 1675
},
{
"epoch": 1.1777933942375263,
"grad_norm": 0.5187430831627153,
"learning_rate": 3.6216953212483796e-06,
"loss": 0.1475,
"step": 1676
},
{
"epoch": 1.1784961349262122,
"grad_norm": 0.5076664057019005,
"learning_rate": 3.6163906957286347e-06,
"loss": 0.1336,
"step": 1677
},
{
"epoch": 1.1791988756148981,
"grad_norm": 0.5196016098967992,
"learning_rate": 3.611087756153815e-06,
"loss": 0.1262,
"step": 1678
},
{
"epoch": 1.179901616303584,
"grad_norm": 0.49731461259146237,
"learning_rate": 3.605786508985619e-06,
"loss": 0.1458,
"step": 1679
},
{
"epoch": 1.1806043569922697,
"grad_norm": 0.5261002476169107,
"learning_rate": 3.6004869606836807e-06,
"loss": 0.1575,
"step": 1680
},
{
"epoch": 1.1813070976809557,
"grad_norm": 0.5195604327281339,
"learning_rate": 3.5951891177055663e-06,
"loss": 0.1514,
"step": 1681
},
{
"epoch": 1.1820098383696416,
"grad_norm": 0.5075140626950995,
"learning_rate": 3.58989298650676e-06,
"loss": 0.1357,
"step": 1682
},
{
"epoch": 1.1827125790583275,
"grad_norm": 0.5196638994182908,
"learning_rate": 3.5845985735406634e-06,
"loss": 0.1554,
"step": 1683
},
{
"epoch": 1.1834153197470134,
"grad_norm": 0.5253842081752831,
"learning_rate": 3.5793058852585837e-06,
"loss": 0.1599,
"step": 1684
},
{
"epoch": 1.1841180604356993,
"grad_norm": 0.5049337808941067,
"learning_rate": 3.5740149281097276e-06,
"loss": 0.1434,
"step": 1685
},
{
"epoch": 1.184820801124385,
"grad_norm": 0.519415710896182,
"learning_rate": 3.5687257085411913e-06,
"loss": 0.154,
"step": 1686
},
{
"epoch": 1.185523541813071,
"grad_norm": 0.5155923291806198,
"learning_rate": 3.563438232997952e-06,
"loss": 0.1545,
"step": 1687
},
{
"epoch": 1.1862262825017569,
"grad_norm": 0.5165664428751502,
"learning_rate": 3.5581525079228647e-06,
"loss": 0.1483,
"step": 1688
},
{
"epoch": 1.1869290231904428,
"grad_norm": 0.5015409975851226,
"learning_rate": 3.552868539756651e-06,
"loss": 0.1472,
"step": 1689
},
{
"epoch": 1.1876317638791285,
"grad_norm": 0.5051071368670725,
"learning_rate": 3.5475863349378907e-06,
"loss": 0.1359,
"step": 1690
},
{
"epoch": 1.1883345045678144,
"grad_norm": 0.5205146308658571,
"learning_rate": 3.5423058999030145e-06,
"loss": 0.161,
"step": 1691
},
{
"epoch": 1.1890372452565003,
"grad_norm": 0.5203042023193757,
"learning_rate": 3.537027241086296e-06,
"loss": 0.1505,
"step": 1692
},
{
"epoch": 1.1897399859451863,
"grad_norm": 0.49733017023610404,
"learning_rate": 3.531750364919849e-06,
"loss": 0.1508,
"step": 1693
},
{
"epoch": 1.1904427266338722,
"grad_norm": 0.5336269994581739,
"learning_rate": 3.526475277833609e-06,
"loss": 0.1527,
"step": 1694
},
{
"epoch": 1.1911454673225579,
"grad_norm": 0.5200437431542487,
"learning_rate": 3.521201986255338e-06,
"loss": 0.1481,
"step": 1695
},
{
"epoch": 1.1918482080112438,
"grad_norm": 0.5475045123595398,
"learning_rate": 3.5159304966106034e-06,
"loss": 0.1528,
"step": 1696
},
{
"epoch": 1.1925509486999297,
"grad_norm": 0.5366297526728846,
"learning_rate": 3.5106608153227805e-06,
"loss": 0.154,
"step": 1697
},
{
"epoch": 1.1932536893886156,
"grad_norm": 0.5299936676513248,
"learning_rate": 3.50539294881304e-06,
"loss": 0.1515,
"step": 1698
},
{
"epoch": 1.1939564300773016,
"grad_norm": 0.5247029140518117,
"learning_rate": 3.500126903500345e-06,
"loss": 0.1612,
"step": 1699
},
{
"epoch": 1.1946591707659873,
"grad_norm": 0.5191132709788684,
"learning_rate": 3.4948626858014345e-06,
"loss": 0.1245,
"step": 1700
},
{
"epoch": 1.1953619114546732,
"grad_norm": 0.48372763603370605,
"learning_rate": 3.4896003021308213e-06,
"loss": 0.1276,
"step": 1701
},
{
"epoch": 1.196064652143359,
"grad_norm": 0.5303852655503682,
"learning_rate": 3.4843397589007842e-06,
"loss": 0.153,
"step": 1702
},
{
"epoch": 1.196767392832045,
"grad_norm": 0.521746022896869,
"learning_rate": 3.4790810625213627e-06,
"loss": 0.1531,
"step": 1703
},
{
"epoch": 1.197470133520731,
"grad_norm": 0.5174479878366598,
"learning_rate": 3.4738242194003403e-06,
"loss": 0.1492,
"step": 1704
},
{
"epoch": 1.1981728742094166,
"grad_norm": 0.5190434505967585,
"learning_rate": 3.4685692359432487e-06,
"loss": 0.1551,
"step": 1705
},
{
"epoch": 1.1988756148981026,
"grad_norm": 0.5309086817418733,
"learning_rate": 3.4633161185533435e-06,
"loss": 0.1662,
"step": 1706
},
{
"epoch": 1.1995783555867885,
"grad_norm": 0.5109056506042583,
"learning_rate": 3.4580648736316167e-06,
"loss": 0.1427,
"step": 1707
},
{
"epoch": 1.2002810962754744,
"grad_norm": 0.5133755265180353,
"learning_rate": 3.4528155075767746e-06,
"loss": 0.1557,
"step": 1708
},
{
"epoch": 1.2009838369641601,
"grad_norm": 0.5009107458499668,
"learning_rate": 3.447568026785233e-06,
"loss": 0.1424,
"step": 1709
},
{
"epoch": 1.201686577652846,
"grad_norm": 0.4951519629017366,
"learning_rate": 3.4423224376511143e-06,
"loss": 0.1452,
"step": 1710
},
{
"epoch": 1.202389318341532,
"grad_norm": 0.515481038243041,
"learning_rate": 3.4370787465662304e-06,
"loss": 0.1377,
"step": 1711
},
{
"epoch": 1.2030920590302179,
"grad_norm": 0.49124281287351257,
"learning_rate": 3.431836959920083e-06,
"loss": 0.1284,
"step": 1712
},
{
"epoch": 1.2037947997189038,
"grad_norm": 0.5437800123662826,
"learning_rate": 3.4265970840998562e-06,
"loss": 0.1652,
"step": 1713
},
{
"epoch": 1.2044975404075897,
"grad_norm": 0.5071503507526097,
"learning_rate": 3.4213591254904023e-06,
"loss": 0.1488,
"step": 1714
},
{
"epoch": 1.2052002810962754,
"grad_norm": 0.5003831626896319,
"learning_rate": 3.416123090474236e-06,
"loss": 0.132,
"step": 1715
},
{
"epoch": 1.2059030217849613,
"grad_norm": 0.49683404967777817,
"learning_rate": 3.4108889854315315e-06,
"loss": 0.131,
"step": 1716
},
{
"epoch": 1.2066057624736473,
"grad_norm": 0.5390326412523678,
"learning_rate": 3.4056568167401106e-06,
"loss": 0.134,
"step": 1717
},
{
"epoch": 1.2073085031623332,
"grad_norm": 0.5776173390980446,
"learning_rate": 3.4004265907754343e-06,
"loss": 0.189,
"step": 1718
},
{
"epoch": 1.2080112438510189,
"grad_norm": 0.5137922725555822,
"learning_rate": 3.3951983139106005e-06,
"loss": 0.141,
"step": 1719
},
{
"epoch": 1.2087139845397048,
"grad_norm": 0.513839062180173,
"learning_rate": 3.3899719925163223e-06,
"loss": 0.1602,
"step": 1720
},
{
"epoch": 1.2094167252283907,
"grad_norm": 0.4843895500119056,
"learning_rate": 3.3847476329609415e-06,
"loss": 0.1298,
"step": 1721
},
{
"epoch": 1.2101194659170766,
"grad_norm": 0.5220447841417628,
"learning_rate": 3.379525241610402e-06,
"loss": 0.1457,
"step": 1722
},
{
"epoch": 1.2108222066057626,
"grad_norm": 0.524805904004868,
"learning_rate": 3.3743048248282527e-06,
"loss": 0.1562,
"step": 1723
},
{
"epoch": 1.2115249472944483,
"grad_norm": 0.5309924745094761,
"learning_rate": 3.3690863889756374e-06,
"loss": 0.1589,
"step": 1724
},
{
"epoch": 1.2122276879831342,
"grad_norm": 0.498855889039376,
"learning_rate": 3.363869940411282e-06,
"loss": 0.1309,
"step": 1725
},
{
"epoch": 1.21293042867182,
"grad_norm": 0.5230334811682804,
"learning_rate": 3.358655485491492e-06,
"loss": 0.1437,
"step": 1726
},
{
"epoch": 1.213633169360506,
"grad_norm": 0.5285661047217612,
"learning_rate": 3.353443030570147e-06,
"loss": 0.1599,
"step": 1727
},
{
"epoch": 1.2143359100491917,
"grad_norm": 0.49767850498696875,
"learning_rate": 3.348232581998686e-06,
"loss": 0.1366,
"step": 1728
},
{
"epoch": 1.2150386507378776,
"grad_norm": 0.5040842660094004,
"learning_rate": 3.343024146126108e-06,
"loss": 0.1507,
"step": 1729
},
{
"epoch": 1.2157413914265636,
"grad_norm": 0.5588734088360507,
"learning_rate": 3.33781772929895e-06,
"loss": 0.166,
"step": 1730
},
{
"epoch": 1.2164441321152495,
"grad_norm": 0.5470329607806411,
"learning_rate": 3.3326133378612996e-06,
"loss": 0.1641,
"step": 1731
},
{
"epoch": 1.2171468728039354,
"grad_norm": 0.5070426231374715,
"learning_rate": 3.3274109781547685e-06,
"loss": 0.139,
"step": 1732
},
{
"epoch": 1.2178496134926213,
"grad_norm": 0.5215478507795955,
"learning_rate": 3.322210656518499e-06,
"loss": 0.153,
"step": 1733
},
{
"epoch": 1.218552354181307,
"grad_norm": 0.4799184395974528,
"learning_rate": 3.317012379289146e-06,
"loss": 0.1232,
"step": 1734
},
{
"epoch": 1.219255094869993,
"grad_norm": 0.5054843399887229,
"learning_rate": 3.311816152800873e-06,
"loss": 0.1336,
"step": 1735
},
{
"epoch": 1.2199578355586789,
"grad_norm": 0.5015658349787894,
"learning_rate": 3.3066219833853454e-06,
"loss": 0.1329,
"step": 1736
},
{
"epoch": 1.2206605762473648,
"grad_norm": 0.5251819654602317,
"learning_rate": 3.3014298773717235e-06,
"loss": 0.1415,
"step": 1737
},
{
"epoch": 1.2213633169360505,
"grad_norm": 0.569004014239607,
"learning_rate": 3.2962398410866535e-06,
"loss": 0.1609,
"step": 1738
},
{
"epoch": 1.2220660576247364,
"grad_norm": 0.5238455440223404,
"learning_rate": 3.2910518808542557e-06,
"loss": 0.1532,
"step": 1739
},
{
"epoch": 1.2227687983134223,
"grad_norm": 0.5140066709020665,
"learning_rate": 3.285866002996124e-06,
"loss": 0.1384,
"step": 1740
},
{
"epoch": 1.2234715390021083,
"grad_norm": 0.5117475965906966,
"learning_rate": 3.2806822138313154e-06,
"loss": 0.1445,
"step": 1741
},
{
"epoch": 1.2241742796907942,
"grad_norm": 0.5179832136007794,
"learning_rate": 3.275500519676339e-06,
"loss": 0.1489,
"step": 1742
},
{
"epoch": 1.2248770203794799,
"grad_norm": 0.5044506657580975,
"learning_rate": 3.2703209268451565e-06,
"loss": 0.1469,
"step": 1743
},
{
"epoch": 1.2255797610681658,
"grad_norm": 0.5146259932596444,
"learning_rate": 3.26514344164916e-06,
"loss": 0.1596,
"step": 1744
},
{
"epoch": 1.2262825017568517,
"grad_norm": 0.5525856588040384,
"learning_rate": 3.2599680703971824e-06,
"loss": 0.1759,
"step": 1745
},
{
"epoch": 1.2269852424455376,
"grad_norm": 0.5109312883592165,
"learning_rate": 3.2547948193954747e-06,
"loss": 0.1579,
"step": 1746
},
{
"epoch": 1.2276879831342236,
"grad_norm": 0.5423796532607921,
"learning_rate": 3.24962369494771e-06,
"loss": 0.1787,
"step": 1747
},
{
"epoch": 1.2283907238229093,
"grad_norm": 0.5210898787241186,
"learning_rate": 3.2444547033549654e-06,
"loss": 0.1378,
"step": 1748
},
{
"epoch": 1.2290934645115952,
"grad_norm": 0.52972502210687,
"learning_rate": 3.23928785091572e-06,
"loss": 0.1545,
"step": 1749
},
{
"epoch": 1.229796205200281,
"grad_norm": 0.5393701634157857,
"learning_rate": 3.2341231439258454e-06,
"loss": 0.1502,
"step": 1750
},
{
"epoch": 1.230498945888967,
"grad_norm": 0.513428650112163,
"learning_rate": 3.2289605886786035e-06,
"loss": 0.1231,
"step": 1751
},
{
"epoch": 1.231201686577653,
"grad_norm": 0.5232149614791697,
"learning_rate": 3.22380019146463e-06,
"loss": 0.1646,
"step": 1752
},
{
"epoch": 1.2319044272663386,
"grad_norm": 0.5151905240888011,
"learning_rate": 3.2186419585719344e-06,
"loss": 0.1591,
"step": 1753
},
{
"epoch": 1.2326071679550246,
"grad_norm": 0.5008232200255709,
"learning_rate": 3.2134858962858824e-06,
"loss": 0.1346,
"step": 1754
},
{
"epoch": 1.2333099086437105,
"grad_norm": 0.5387557557049685,
"learning_rate": 3.2083320108892026e-06,
"loss": 0.1577,
"step": 1755
},
{
"epoch": 1.2340126493323964,
"grad_norm": 0.5225873098719417,
"learning_rate": 3.203180308661965e-06,
"loss": 0.1482,
"step": 1756
},
{
"epoch": 1.234715390021082,
"grad_norm": 0.5402414597227396,
"learning_rate": 3.1980307958815852e-06,
"loss": 0.1256,
"step": 1757
},
{
"epoch": 1.235418130709768,
"grad_norm": 0.5706661096829184,
"learning_rate": 3.192883478822807e-06,
"loss": 0.1642,
"step": 1758
},
{
"epoch": 1.236120871398454,
"grad_norm": 0.5069769720641505,
"learning_rate": 3.187738363757698e-06,
"loss": 0.1331,
"step": 1759
},
{
"epoch": 1.2368236120871399,
"grad_norm": 0.5188196857915583,
"learning_rate": 3.182595456955644e-06,
"loss": 0.1438,
"step": 1760
},
{
"epoch": 1.2375263527758258,
"grad_norm": 0.505547435629863,
"learning_rate": 3.1774547646833407e-06,
"loss": 0.1345,
"step": 1761
},
{
"epoch": 1.2382290934645117,
"grad_norm": 0.4967289635160625,
"learning_rate": 3.172316293204787e-06,
"loss": 0.1279,
"step": 1762
},
{
"epoch": 1.2389318341531974,
"grad_norm": 0.5324187041156441,
"learning_rate": 3.1671800487812697e-06,
"loss": 0.1567,
"step": 1763
},
{
"epoch": 1.2396345748418833,
"grad_norm": 0.5192324122593844,
"learning_rate": 3.1620460376713668e-06,
"loss": 0.1606,
"step": 1764
},
{
"epoch": 1.2403373155305693,
"grad_norm": 0.5202902428884335,
"learning_rate": 3.156914266130935e-06,
"loss": 0.1482,
"step": 1765
},
{
"epoch": 1.2410400562192552,
"grad_norm": 0.5128178814546309,
"learning_rate": 3.1517847404131e-06,
"loss": 0.1429,
"step": 1766
},
{
"epoch": 1.2417427969079409,
"grad_norm": 0.5374214791148246,
"learning_rate": 3.1466574667682546e-06,
"loss": 0.147,
"step": 1767
},
{
"epoch": 1.2424455375966268,
"grad_norm": 0.4946429960253402,
"learning_rate": 3.1415324514440392e-06,
"loss": 0.1469,
"step": 1768
},
{
"epoch": 1.2431482782853127,
"grad_norm": 0.49213088843995484,
"learning_rate": 3.1364097006853523e-06,
"loss": 0.1359,
"step": 1769
},
{
"epoch": 1.2438510189739986,
"grad_norm": 0.5011419601118812,
"learning_rate": 3.131289220734327e-06,
"loss": 0.1345,
"step": 1770
},
{
"epoch": 1.2445537596626846,
"grad_norm": 0.5200319915187279,
"learning_rate": 3.1261710178303316e-06,
"loss": 0.1602,
"step": 1771
},
{
"epoch": 1.2452565003513703,
"grad_norm": 0.49248263319506386,
"learning_rate": 3.1210550982099596e-06,
"loss": 0.1235,
"step": 1772
},
{
"epoch": 1.2459592410400562,
"grad_norm": 0.5349869231670884,
"learning_rate": 3.115941468107021e-06,
"loss": 0.1698,
"step": 1773
},
{
"epoch": 1.246661981728742,
"grad_norm": 0.5263951000032334,
"learning_rate": 3.110830133752536e-06,
"loss": 0.1463,
"step": 1774
},
{
"epoch": 1.247364722417428,
"grad_norm": 0.5167900381423574,
"learning_rate": 3.1057211013747295e-06,
"loss": 0.1447,
"step": 1775
},
{
"epoch": 1.248067463106114,
"grad_norm": 0.5309999924089229,
"learning_rate": 3.1006143771990205e-06,
"loss": 0.1343,
"step": 1776
},
{
"epoch": 1.2487702037947996,
"grad_norm": 0.5150448538036977,
"learning_rate": 3.095509967448016e-06,
"loss": 0.1352,
"step": 1777
},
{
"epoch": 1.2494729444834856,
"grad_norm": 0.5262709024276062,
"learning_rate": 3.090407878341498e-06,
"loss": 0.1592,
"step": 1778
},
{
"epoch": 1.2501756851721715,
"grad_norm": 0.5302743651945259,
"learning_rate": 3.085308116096428e-06,
"loss": 0.1397,
"step": 1779
},
{
"epoch": 1.2508784258608574,
"grad_norm": 0.5427861797478236,
"learning_rate": 3.080210686926928e-06,
"loss": 0.1528,
"step": 1780
},
{
"epoch": 1.2515811665495433,
"grad_norm": 0.5154882831721702,
"learning_rate": 3.0751155970442792e-06,
"loss": 0.1434,
"step": 1781
},
{
"epoch": 1.252283907238229,
"grad_norm": 0.5251446781371032,
"learning_rate": 3.070022852656911e-06,
"loss": 0.146,
"step": 1782
},
{
"epoch": 1.252986647926915,
"grad_norm": 0.5099873463651408,
"learning_rate": 3.0649324599703933e-06,
"loss": 0.1328,
"step": 1783
},
{
"epoch": 1.2536893886156009,
"grad_norm": 0.49880544349781974,
"learning_rate": 3.0598444251874315e-06,
"loss": 0.1326,
"step": 1784
},
{
"epoch": 1.2543921293042868,
"grad_norm": 0.498993602346327,
"learning_rate": 3.0547587545078615e-06,
"loss": 0.1301,
"step": 1785
},
{
"epoch": 1.2550948699929725,
"grad_norm": 0.5057816731207657,
"learning_rate": 3.0496754541286346e-06,
"loss": 0.1464,
"step": 1786
},
{
"epoch": 1.2557976106816584,
"grad_norm": 0.5371622700474783,
"learning_rate": 3.044594530243813e-06,
"loss": 0.1581,
"step": 1787
},
{
"epoch": 1.2565003513703443,
"grad_norm": 0.5341064067042999,
"learning_rate": 3.0395159890445647e-06,
"loss": 0.1506,
"step": 1788
},
{
"epoch": 1.2572030920590302,
"grad_norm": 0.5271349251255161,
"learning_rate": 3.0344398367191574e-06,
"loss": 0.138,
"step": 1789
},
{
"epoch": 1.2579058327477162,
"grad_norm": 0.4824505775115255,
"learning_rate": 3.029366079452943e-06,
"loss": 0.1211,
"step": 1790
},
{
"epoch": 1.258608573436402,
"grad_norm": 0.5545474070295585,
"learning_rate": 3.024294723428358e-06,
"loss": 0.1749,
"step": 1791
},
{
"epoch": 1.2593113141250878,
"grad_norm": 0.5067819128272134,
"learning_rate": 3.0192257748249097e-06,
"loss": 0.1347,
"step": 1792
},
{
"epoch": 1.2600140548137737,
"grad_norm": 0.5146999127963285,
"learning_rate": 3.0141592398191765e-06,
"loss": 0.1481,
"step": 1793
},
{
"epoch": 1.2607167955024596,
"grad_norm": 0.4993235127306429,
"learning_rate": 3.009095124584792e-06,
"loss": 0.139,
"step": 1794
},
{
"epoch": 1.2614195361911453,
"grad_norm": 0.5667651682034487,
"learning_rate": 3.004033435292445e-06,
"loss": 0.185,
"step": 1795
},
{
"epoch": 1.2621222768798313,
"grad_norm": 0.5462740967024108,
"learning_rate": 2.9989741781098654e-06,
"loss": 0.1348,
"step": 1796
},
{
"epoch": 1.2628250175685172,
"grad_norm": 0.5416746386665264,
"learning_rate": 2.9939173592018185e-06,
"loss": 0.1603,
"step": 1797
},
{
"epoch": 1.263527758257203,
"grad_norm": 0.4922679741577877,
"learning_rate": 2.9888629847301e-06,
"loss": 0.1345,
"step": 1798
},
{
"epoch": 1.264230498945889,
"grad_norm": 0.511441166644177,
"learning_rate": 2.9838110608535297e-06,
"loss": 0.1309,
"step": 1799
},
{
"epoch": 1.264933239634575,
"grad_norm": 0.5120913001962054,
"learning_rate": 2.978761593727938e-06,
"loss": 0.1367,
"step": 1800
},
{
"epoch": 1.2656359803232606,
"grad_norm": 0.5095120682563343,
"learning_rate": 2.9737145895061626e-06,
"loss": 0.14,
"step": 1801
},
{
"epoch": 1.2663387210119466,
"grad_norm": 0.47522805017685915,
"learning_rate": 2.9686700543380386e-06,
"loss": 0.1199,
"step": 1802
},
{
"epoch": 1.2670414617006325,
"grad_norm": 0.5224128281661045,
"learning_rate": 2.9636279943703956e-06,
"loss": 0.1478,
"step": 1803
},
{
"epoch": 1.2677442023893184,
"grad_norm": 0.5645184496216876,
"learning_rate": 2.9585884157470457e-06,
"loss": 0.1584,
"step": 1804
},
{
"epoch": 1.268446943078004,
"grad_norm": 0.5007962250263087,
"learning_rate": 2.953551324608775e-06,
"loss": 0.1453,
"step": 1805
},
{
"epoch": 1.26914968376669,
"grad_norm": 0.5030706513703921,
"learning_rate": 2.948516727093345e-06,
"loss": 0.1431,
"step": 1806
},
{
"epoch": 1.269852424455376,
"grad_norm": 0.5115034601876566,
"learning_rate": 2.943484629335471e-06,
"loss": 0.1399,
"step": 1807
},
{
"epoch": 1.2705551651440619,
"grad_norm": 0.5306475619859111,
"learning_rate": 2.9384550374668276e-06,
"loss": 0.1682,
"step": 1808
},
{
"epoch": 1.2712579058327478,
"grad_norm": 0.5163969175891816,
"learning_rate": 2.933427957616034e-06,
"loss": 0.1425,
"step": 1809
},
{
"epoch": 1.2719606465214337,
"grad_norm": 0.5094171802140397,
"learning_rate": 2.9284033959086494e-06,
"loss": 0.1446,
"step": 1810
},
{
"epoch": 1.2726633872101194,
"grad_norm": 0.5177997618489504,
"learning_rate": 2.923381358467162e-06,
"loss": 0.1553,
"step": 1811
},
{
"epoch": 1.2733661278988053,
"grad_norm": 0.5433548572173907,
"learning_rate": 2.918361851410987e-06,
"loss": 0.1416,
"step": 1812
},
{
"epoch": 1.2740688685874912,
"grad_norm": 0.5004983199100684,
"learning_rate": 2.9133448808564556e-06,
"loss": 0.128,
"step": 1813
},
{
"epoch": 1.2747716092761772,
"grad_norm": 0.503883415450621,
"learning_rate": 2.9083304529168087e-06,
"loss": 0.1483,
"step": 1814
},
{
"epoch": 1.2754743499648629,
"grad_norm": 0.5679390165546186,
"learning_rate": 2.9033185737021875e-06,
"loss": 0.17,
"step": 1815
},
{
"epoch": 1.2761770906535488,
"grad_norm": 0.5465055047033353,
"learning_rate": 2.8983092493196286e-06,
"loss": 0.1703,
"step": 1816
},
{
"epoch": 1.2768798313422347,
"grad_norm": 0.4787604232572746,
"learning_rate": 2.8933024858730546e-06,
"loss": 0.1151,
"step": 1817
},
{
"epoch": 1.2775825720309206,
"grad_norm": 0.5174270003066762,
"learning_rate": 2.8882982894632694e-06,
"loss": 0.1575,
"step": 1818
},
{
"epoch": 1.2782853127196065,
"grad_norm": 0.519352188838613,
"learning_rate": 2.883296666187947e-06,
"loss": 0.1408,
"step": 1819
},
{
"epoch": 1.2789880534082925,
"grad_norm": 0.5331544627649402,
"learning_rate": 2.8782976221416265e-06,
"loss": 0.1569,
"step": 1820
},
{
"epoch": 1.2796907940969782,
"grad_norm": 0.5190193086008424,
"learning_rate": 2.873301163415705e-06,
"loss": 0.1461,
"step": 1821
},
{
"epoch": 1.280393534785664,
"grad_norm": 0.531637624498204,
"learning_rate": 2.8683072960984294e-06,
"loss": 0.1499,
"step": 1822
},
{
"epoch": 1.28109627547435,
"grad_norm": 0.4985066831209329,
"learning_rate": 2.8633160262748873e-06,
"loss": 0.1377,
"step": 1823
},
{
"epoch": 1.2817990161630357,
"grad_norm": 0.5495919052999558,
"learning_rate": 2.858327360027e-06,
"loss": 0.176,
"step": 1824
},
{
"epoch": 1.2825017568517216,
"grad_norm": 0.503827728446379,
"learning_rate": 2.8533413034335257e-06,
"loss": 0.1358,
"step": 1825
},
{
"epoch": 1.2832044975404076,
"grad_norm": 0.49803838389907806,
"learning_rate": 2.8483578625700286e-06,
"loss": 0.1373,
"step": 1826
},
{
"epoch": 1.2839072382290935,
"grad_norm": 0.49609765850369625,
"learning_rate": 2.8433770435088957e-06,
"loss": 0.1435,
"step": 1827
},
{
"epoch": 1.2846099789177794,
"grad_norm": 0.48624878597669635,
"learning_rate": 2.838398852319313e-06,
"loss": 0.1316,
"step": 1828
},
{
"epoch": 1.2853127196064653,
"grad_norm": 0.5429732189301352,
"learning_rate": 2.8334232950672724e-06,
"loss": 0.1507,
"step": 1829
},
{
"epoch": 1.286015460295151,
"grad_norm": 0.5469891683215277,
"learning_rate": 2.8284503778155513e-06,
"loss": 0.1751,
"step": 1830
},
{
"epoch": 1.286718200983837,
"grad_norm": 0.4824278437036638,
"learning_rate": 2.823480106623704e-06,
"loss": 0.1217,
"step": 1831
},
{
"epoch": 1.2874209416725229,
"grad_norm": 0.5155410308993346,
"learning_rate": 2.8185124875480742e-06,
"loss": 0.1347,
"step": 1832
},
{
"epoch": 1.2881236823612088,
"grad_norm": 0.4668465729229129,
"learning_rate": 2.8135475266417626e-06,
"loss": 0.1202,
"step": 1833
},
{
"epoch": 1.2888264230498945,
"grad_norm": 0.49186559366844784,
"learning_rate": 2.808585229954637e-06,
"loss": 0.135,
"step": 1834
},
{
"epoch": 1.2895291637385804,
"grad_norm": 0.5058080898482784,
"learning_rate": 2.803625603533316e-06,
"loss": 0.1415,
"step": 1835
},
{
"epoch": 1.2902319044272663,
"grad_norm": 0.5125628233669447,
"learning_rate": 2.7986686534211656e-06,
"loss": 0.1228,
"step": 1836
},
{
"epoch": 1.2909346451159522,
"grad_norm": 0.522505002650681,
"learning_rate": 2.79371438565829e-06,
"loss": 0.1449,
"step": 1837
},
{
"epoch": 1.2916373858046382,
"grad_norm": 0.503912487637843,
"learning_rate": 2.7887628062815252e-06,
"loss": 0.1372,
"step": 1838
},
{
"epoch": 1.292340126493324,
"grad_norm": 0.5069732877673296,
"learning_rate": 2.7838139213244318e-06,
"loss": 0.1388,
"step": 1839
},
{
"epoch": 1.2930428671820098,
"grad_norm": 0.5250585361768124,
"learning_rate": 2.7788677368172877e-06,
"loss": 0.1605,
"step": 1840
},
{
"epoch": 1.2937456078706957,
"grad_norm": 0.5263704515292615,
"learning_rate": 2.7739242587870786e-06,
"loss": 0.1657,
"step": 1841
},
{
"epoch": 1.2944483485593816,
"grad_norm": 0.5165185021883313,
"learning_rate": 2.7689834932574923e-06,
"loss": 0.147,
"step": 1842
},
{
"epoch": 1.2951510892480675,
"grad_norm": 0.5270573489356727,
"learning_rate": 2.764045446248913e-06,
"loss": 0.16,
"step": 1843
},
{
"epoch": 1.2958538299367532,
"grad_norm": 0.5429711458873587,
"learning_rate": 2.7591101237784122e-06,
"loss": 0.1653,
"step": 1844
},
{
"epoch": 1.2965565706254392,
"grad_norm": 0.5095626117090168,
"learning_rate": 2.7541775318597407e-06,
"loss": 0.1328,
"step": 1845
},
{
"epoch": 1.297259311314125,
"grad_norm": 0.5076499802471852,
"learning_rate": 2.7492476765033227e-06,
"loss": 0.1485,
"step": 1846
},
{
"epoch": 1.297962052002811,
"grad_norm": 0.5427967731010709,
"learning_rate": 2.7443205637162463e-06,
"loss": 0.1679,
"step": 1847
},
{
"epoch": 1.298664792691497,
"grad_norm": 0.5308997772141762,
"learning_rate": 2.7393961995022565e-06,
"loss": 0.1466,
"step": 1848
},
{
"epoch": 1.2993675333801828,
"grad_norm": 0.49043902053039395,
"learning_rate": 2.7344745898617598e-06,
"loss": 0.1309,
"step": 1849
},
{
"epoch": 1.3000702740688685,
"grad_norm": 0.5195631188883199,
"learning_rate": 2.7295557407917904e-06,
"loss": 0.1525,
"step": 1850
},
{
"epoch": 1.3007730147575545,
"grad_norm": 0.5009791984310343,
"learning_rate": 2.7246396582860293e-06,
"loss": 0.1419,
"step": 1851
},
{
"epoch": 1.3014757554462404,
"grad_norm": 0.5067602717562613,
"learning_rate": 2.71972634833478e-06,
"loss": 0.1374,
"step": 1852
},
{
"epoch": 1.302178496134926,
"grad_norm": 0.5374645736356453,
"learning_rate": 2.7148158169249757e-06,
"loss": 0.1355,
"step": 1853
},
{
"epoch": 1.302881236823612,
"grad_norm": 0.5224103943507988,
"learning_rate": 2.709908070040159e-06,
"loss": 0.1454,
"step": 1854
},
{
"epoch": 1.303583977512298,
"grad_norm": 0.5032174206767985,
"learning_rate": 2.705003113660477e-06,
"loss": 0.1415,
"step": 1855
},
{
"epoch": 1.3042867182009839,
"grad_norm": 0.534616859772977,
"learning_rate": 2.7001009537626775e-06,
"loss": 0.1442,
"step": 1856
},
{
"epoch": 1.3049894588896698,
"grad_norm": 0.5627581840537774,
"learning_rate": 2.695201596320107e-06,
"loss": 0.1728,
"step": 1857
},
{
"epoch": 1.3056921995783557,
"grad_norm": 0.5080008680203384,
"learning_rate": 2.690305047302692e-06,
"loss": 0.1506,
"step": 1858
},
{
"epoch": 1.3063949402670414,
"grad_norm": 0.5187284564546811,
"learning_rate": 2.685411312676936e-06,
"loss": 0.1391,
"step": 1859
},
{
"epoch": 1.3070976809557273,
"grad_norm": 0.5112305622849427,
"learning_rate": 2.6805203984059156e-06,
"loss": 0.145,
"step": 1860
},
{
"epoch": 1.3078004216444132,
"grad_norm": 0.5429219613865652,
"learning_rate": 2.67563231044927e-06,
"loss": 0.154,
"step": 1861
},
{
"epoch": 1.3085031623330992,
"grad_norm": 0.4981918212569302,
"learning_rate": 2.670747054763193e-06,
"loss": 0.1325,
"step": 1862
},
{
"epoch": 1.3092059030217849,
"grad_norm": 0.5156778705541998,
"learning_rate": 2.6658646373004304e-06,
"loss": 0.1544,
"step": 1863
},
{
"epoch": 1.3099086437104708,
"grad_norm": 0.5002674697829435,
"learning_rate": 2.6609850640102665e-06,
"loss": 0.1314,
"step": 1864
},
{
"epoch": 1.3106113843991567,
"grad_norm": 0.5543015508160836,
"learning_rate": 2.6561083408385224e-06,
"loss": 0.1692,
"step": 1865
},
{
"epoch": 1.3113141250878426,
"grad_norm": 0.5619325590564459,
"learning_rate": 2.6512344737275443e-06,
"loss": 0.1464,
"step": 1866
},
{
"epoch": 1.3120168657765285,
"grad_norm": 0.5237448611265626,
"learning_rate": 2.6463634686161998e-06,
"loss": 0.1462,
"step": 1867
},
{
"epoch": 1.3127196064652145,
"grad_norm": 0.5403930457832352,
"learning_rate": 2.6414953314398673e-06,
"loss": 0.1669,
"step": 1868
},
{
"epoch": 1.3134223471539002,
"grad_norm": 0.5220596141331452,
"learning_rate": 2.6366300681304334e-06,
"loss": 0.1437,
"step": 1869
},
{
"epoch": 1.314125087842586,
"grad_norm": 0.5361348116943498,
"learning_rate": 2.63176768461628e-06,
"loss": 0.1694,
"step": 1870
},
{
"epoch": 1.314827828531272,
"grad_norm": 0.5110847626454531,
"learning_rate": 2.6269081868222814e-06,
"loss": 0.1377,
"step": 1871
},
{
"epoch": 1.3155305692199577,
"grad_norm": 0.4842268666664499,
"learning_rate": 2.6220515806697934e-06,
"loss": 0.1245,
"step": 1872
},
{
"epoch": 1.3162333099086436,
"grad_norm": 0.5160972703016548,
"learning_rate": 2.6171978720766557e-06,
"loss": 0.1328,
"step": 1873
},
{
"epoch": 1.3169360505973295,
"grad_norm": 0.548131436737344,
"learning_rate": 2.6123470669571665e-06,
"loss": 0.1779,
"step": 1874
},
{
"epoch": 1.3176387912860155,
"grad_norm": 0.5573921571705782,
"learning_rate": 2.607499171222093e-06,
"loss": 0.1561,
"step": 1875
},
{
"epoch": 1.3183415319747014,
"grad_norm": 0.5272568688464775,
"learning_rate": 2.602654190778654e-06,
"loss": 0.1517,
"step": 1876
},
{
"epoch": 1.3190442726633873,
"grad_norm": 0.4901855937914321,
"learning_rate": 2.5978121315305217e-06,
"loss": 0.129,
"step": 1877
},
{
"epoch": 1.319747013352073,
"grad_norm": 0.5216856895424252,
"learning_rate": 2.5929729993778046e-06,
"loss": 0.1467,
"step": 1878
},
{
"epoch": 1.320449754040759,
"grad_norm": 0.5060265023823881,
"learning_rate": 2.5881368002170403e-06,
"loss": 0.1538,
"step": 1879
},
{
"epoch": 1.3211524947294448,
"grad_norm": 0.5207186271810702,
"learning_rate": 2.5833035399411977e-06,
"loss": 0.1483,
"step": 1880
},
{
"epoch": 1.3218552354181308,
"grad_norm": 0.5369159344507257,
"learning_rate": 2.5784732244396667e-06,
"loss": 0.1662,
"step": 1881
},
{
"epoch": 1.3225579761068165,
"grad_norm": 0.5485861290717046,
"learning_rate": 2.573645859598245e-06,
"loss": 0.1784,
"step": 1882
},
{
"epoch": 1.3232607167955024,
"grad_norm": 0.5169582374334584,
"learning_rate": 2.568821451299135e-06,
"loss": 0.1691,
"step": 1883
},
{
"epoch": 1.3239634574841883,
"grad_norm": 0.4888073677651971,
"learning_rate": 2.564000005420938e-06,
"loss": 0.1256,
"step": 1884
},
{
"epoch": 1.3246661981728742,
"grad_norm": 0.5325076620163575,
"learning_rate": 2.5591815278386456e-06,
"loss": 0.1506,
"step": 1885
},
{
"epoch": 1.3253689388615602,
"grad_norm": 0.49725808885086276,
"learning_rate": 2.554366024423631e-06,
"loss": 0.1425,
"step": 1886
},
{
"epoch": 1.326071679550246,
"grad_norm": 0.5168771320310778,
"learning_rate": 2.5495535010436445e-06,
"loss": 0.1499,
"step": 1887
},
{
"epoch": 1.3267744202389318,
"grad_norm": 0.5065760057974372,
"learning_rate": 2.5447439635628046e-06,
"loss": 0.1416,
"step": 1888
},
{
"epoch": 1.3274771609276177,
"grad_norm": 0.5036549891249,
"learning_rate": 2.5399374178415926e-06,
"loss": 0.1416,
"step": 1889
},
{
"epoch": 1.3281799016163036,
"grad_norm": 0.5242118469333106,
"learning_rate": 2.535133869736842e-06,
"loss": 0.1608,
"step": 1890
},
{
"epoch": 1.3288826423049895,
"grad_norm": 0.5123139889811016,
"learning_rate": 2.5303333251017378e-06,
"loss": 0.1302,
"step": 1891
},
{
"epoch": 1.3295853829936752,
"grad_norm": 0.527324699306745,
"learning_rate": 2.5255357897857996e-06,
"loss": 0.1553,
"step": 1892
},
{
"epoch": 1.3302881236823612,
"grad_norm": 0.5460335838857496,
"learning_rate": 2.5207412696348854e-06,
"loss": 0.1679,
"step": 1893
},
{
"epoch": 1.330990864371047,
"grad_norm": 0.4880081828121528,
"learning_rate": 2.515949770491175e-06,
"loss": 0.133,
"step": 1894
},
{
"epoch": 1.331693605059733,
"grad_norm": 0.511063147112203,
"learning_rate": 2.51116129819317e-06,
"loss": 0.1552,
"step": 1895
},
{
"epoch": 1.332396345748419,
"grad_norm": 0.5570249105151223,
"learning_rate": 2.5063758585756814e-06,
"loss": 0.1706,
"step": 1896
},
{
"epoch": 1.3330990864371048,
"grad_norm": 0.5281127442022048,
"learning_rate": 2.5015934574698303e-06,
"loss": 0.1461,
"step": 1897
},
{
"epoch": 1.3338018271257905,
"grad_norm": 0.5323432905157081,
"learning_rate": 2.496814100703026e-06,
"loss": 0.1379,
"step": 1898
},
{
"epoch": 1.3345045678144765,
"grad_norm": 0.5044474529457625,
"learning_rate": 2.4920377940989763e-06,
"loss": 0.1298,
"step": 1899
},
{
"epoch": 1.3352073085031624,
"grad_norm": 0.5020068606866582,
"learning_rate": 2.4872645434776666e-06,
"loss": 0.1463,
"step": 1900
},
{
"epoch": 1.335910049191848,
"grad_norm": 0.5295400057005242,
"learning_rate": 2.4824943546553646e-06,
"loss": 0.1493,
"step": 1901
},
{
"epoch": 1.336612789880534,
"grad_norm": 0.5162367748874149,
"learning_rate": 2.4777272334446055e-06,
"loss": 0.141,
"step": 1902
},
{
"epoch": 1.33731553056922,
"grad_norm": 0.5146721336573555,
"learning_rate": 2.472963185654181e-06,
"loss": 0.1491,
"step": 1903
},
{
"epoch": 1.3380182712579058,
"grad_norm": 0.5344070480631355,
"learning_rate": 2.4682022170891403e-06,
"loss": 0.1569,
"step": 1904
},
{
"epoch": 1.3387210119465918,
"grad_norm": 0.4998274905470932,
"learning_rate": 2.4634443335507868e-06,
"loss": 0.1442,
"step": 1905
},
{
"epoch": 1.3394237526352777,
"grad_norm": 0.5097557411992474,
"learning_rate": 2.4586895408366585e-06,
"loss": 0.1418,
"step": 1906
},
{
"epoch": 1.3401264933239634,
"grad_norm": 0.5170662896606593,
"learning_rate": 2.45393784474053e-06,
"loss": 0.1516,
"step": 1907
},
{
"epoch": 1.3408292340126493,
"grad_norm": 0.5242448072331167,
"learning_rate": 2.449189251052396e-06,
"loss": 0.1368,
"step": 1908
},
{
"epoch": 1.3415319747013352,
"grad_norm": 0.5333176246201976,
"learning_rate": 2.444443765558482e-06,
"loss": 0.1274,
"step": 1909
},
{
"epoch": 1.3422347153900211,
"grad_norm": 0.49863542852206516,
"learning_rate": 2.4397013940412178e-06,
"loss": 0.1309,
"step": 1910
},
{
"epoch": 1.3429374560787068,
"grad_norm": 0.5200347853432555,
"learning_rate": 2.434962142279242e-06,
"loss": 0.1535,
"step": 1911
},
{
"epoch": 1.3436401967673928,
"grad_norm": 0.5197849832074735,
"learning_rate": 2.4302260160473906e-06,
"loss": 0.1558,
"step": 1912
},
{
"epoch": 1.3443429374560787,
"grad_norm": 0.5042674561742618,
"learning_rate": 2.4254930211166922e-06,
"loss": 0.143,
"step": 1913
},
{
"epoch": 1.3450456781447646,
"grad_norm": 0.4941678205162263,
"learning_rate": 2.420763163254359e-06,
"loss": 0.1504,
"step": 1914
},
{
"epoch": 1.3457484188334505,
"grad_norm": 0.4900796733037519,
"learning_rate": 2.4160364482237797e-06,
"loss": 0.126,
"step": 1915
},
{
"epoch": 1.3464511595221365,
"grad_norm": 0.4963392642038735,
"learning_rate": 2.4113128817845165e-06,
"loss": 0.1303,
"step": 1916
},
{
"epoch": 1.3471539002108222,
"grad_norm": 0.5327815129273105,
"learning_rate": 2.406592469692292e-06,
"loss": 0.1566,
"step": 1917
},
{
"epoch": 1.347856640899508,
"grad_norm": 0.5657132844042618,
"learning_rate": 2.4018752176989864e-06,
"loss": 0.1779,
"step": 1918
},
{
"epoch": 1.348559381588194,
"grad_norm": 0.5462692424625433,
"learning_rate": 2.3971611315526295e-06,
"loss": 0.1661,
"step": 1919
},
{
"epoch": 1.3492621222768797,
"grad_norm": 0.4845535800498534,
"learning_rate": 2.392450216997391e-06,
"loss": 0.12,
"step": 1920
},
{
"epoch": 1.3499648629655656,
"grad_norm": 0.5064621748609605,
"learning_rate": 2.3877424797735834e-06,
"loss": 0.1217,
"step": 1921
},
{
"epoch": 1.3506676036542515,
"grad_norm": 0.5419331329012272,
"learning_rate": 2.383037925617637e-06,
"loss": 0.1563,
"step": 1922
},
{
"epoch": 1.3513703443429375,
"grad_norm": 0.525351328602467,
"learning_rate": 2.3783365602621116e-06,
"loss": 0.1524,
"step": 1923
},
{
"epoch": 1.3520730850316234,
"grad_norm": 0.49639153719673085,
"learning_rate": 2.373638389435676e-06,
"loss": 0.1411,
"step": 1924
},
{
"epoch": 1.3527758257203093,
"grad_norm": 0.5222993820698251,
"learning_rate": 2.368943418863112e-06,
"loss": 0.1352,
"step": 1925
},
{
"epoch": 1.353478566408995,
"grad_norm": 0.5061532595194184,
"learning_rate": 2.3642516542652993e-06,
"loss": 0.1494,
"step": 1926
},
{
"epoch": 1.354181307097681,
"grad_norm": 0.5248954075948614,
"learning_rate": 2.359563101359208e-06,
"loss": 0.1577,
"step": 1927
},
{
"epoch": 1.3548840477863668,
"grad_norm": 0.49574925810066905,
"learning_rate": 2.3548777658578964e-06,
"loss": 0.1218,
"step": 1928
},
{
"epoch": 1.3555867884750528,
"grad_norm": 0.5576434451325147,
"learning_rate": 2.350195653470507e-06,
"loss": 0.1479,
"step": 1929
},
{
"epoch": 1.3562895291637385,
"grad_norm": 0.4715912767260302,
"learning_rate": 2.3455167699022497e-06,
"loss": 0.1114,
"step": 1930
},
{
"epoch": 1.3569922698524244,
"grad_norm": 0.4724305197988123,
"learning_rate": 2.3408411208544036e-06,
"loss": 0.1255,
"step": 1931
},
{
"epoch": 1.3576950105411103,
"grad_norm": 0.5188745796075134,
"learning_rate": 2.3361687120242986e-06,
"loss": 0.1288,
"step": 1932
},
{
"epoch": 1.3583977512297962,
"grad_norm": 0.5248266634453557,
"learning_rate": 2.331499549105328e-06,
"loss": 0.1532,
"step": 1933
},
{
"epoch": 1.3591004919184821,
"grad_norm": 0.5304302761744716,
"learning_rate": 2.3268336377869222e-06,
"loss": 0.1419,
"step": 1934
},
{
"epoch": 1.359803232607168,
"grad_norm": 0.5324957027484029,
"learning_rate": 2.322170983754553e-06,
"loss": 0.1384,
"step": 1935
},
{
"epoch": 1.3605059732958538,
"grad_norm": 0.5532695221140957,
"learning_rate": 2.3175115926897164e-06,
"loss": 0.156,
"step": 1936
},
{
"epoch": 1.3612087139845397,
"grad_norm": 0.5068068080227023,
"learning_rate": 2.312855470269943e-06,
"loss": 0.1377,
"step": 1937
},
{
"epoch": 1.3619114546732256,
"grad_norm": 0.5453469673278214,
"learning_rate": 2.3082026221687736e-06,
"loss": 0.1801,
"step": 1938
},
{
"epoch": 1.3626141953619115,
"grad_norm": 0.5265671485284429,
"learning_rate": 2.3035530540557606e-06,
"loss": 0.1492,
"step": 1939
},
{
"epoch": 1.3633169360505972,
"grad_norm": 0.49139032579352454,
"learning_rate": 2.2989067715964592e-06,
"loss": 0.1321,
"step": 1940
},
{
"epoch": 1.3640196767392831,
"grad_norm": 0.529716442358963,
"learning_rate": 2.2942637804524224e-06,
"loss": 0.1463,
"step": 1941
},
{
"epoch": 1.364722417427969,
"grad_norm": 0.5209589657201931,
"learning_rate": 2.289624086281192e-06,
"loss": 0.1479,
"step": 1942
},
{
"epoch": 1.365425158116655,
"grad_norm": 0.5016577040642886,
"learning_rate": 2.2849876947362916e-06,
"loss": 0.1331,
"step": 1943
},
{
"epoch": 1.366127898805341,
"grad_norm": 0.539055167208763,
"learning_rate": 2.28035461146722e-06,
"loss": 0.1647,
"step": 1944
},
{
"epoch": 1.3668306394940268,
"grad_norm": 0.5261244711392749,
"learning_rate": 2.275724842119451e-06,
"loss": 0.1432,
"step": 1945
},
{
"epoch": 1.3675333801827125,
"grad_norm": 0.5568764543776502,
"learning_rate": 2.2710983923344106e-06,
"loss": 0.1685,
"step": 1946
},
{
"epoch": 1.3682361208713985,
"grad_norm": 0.49062271894554804,
"learning_rate": 2.266475267749486e-06,
"loss": 0.1177,
"step": 1947
},
{
"epoch": 1.3689388615600844,
"grad_norm": 0.48696733638864986,
"learning_rate": 2.26185547399801e-06,
"loss": 0.1153,
"step": 1948
},
{
"epoch": 1.36964160224877,
"grad_norm": 0.49560405544084934,
"learning_rate": 2.2572390167092607e-06,
"loss": 0.1426,
"step": 1949
},
{
"epoch": 1.370344342937456,
"grad_norm": 0.5301140856189038,
"learning_rate": 2.252625901508449e-06,
"loss": 0.1578,
"step": 1950
},
{
"epoch": 1.371047083626142,
"grad_norm": 0.5093655282853022,
"learning_rate": 2.248016134016708e-06,
"loss": 0.1453,
"step": 1951
},
{
"epoch": 1.3717498243148278,
"grad_norm": 0.543262050045053,
"learning_rate": 2.2434097198510964e-06,
"loss": 0.1525,
"step": 1952
},
{
"epoch": 1.3724525650035138,
"grad_norm": 0.5265051707386406,
"learning_rate": 2.2388066646245895e-06,
"loss": 0.1522,
"step": 1953
},
{
"epoch": 1.3731553056921997,
"grad_norm": 0.5335276742676673,
"learning_rate": 2.2342069739460654e-06,
"loss": 0.1393,
"step": 1954
},
{
"epoch": 1.3738580463808854,
"grad_norm": 0.494281111058332,
"learning_rate": 2.229610653420306e-06,
"loss": 0.1372,
"step": 1955
},
{
"epoch": 1.3745607870695713,
"grad_norm": 0.4913495541219822,
"learning_rate": 2.2250177086479774e-06,
"loss": 0.1525,
"step": 1956
},
{
"epoch": 1.3752635277582572,
"grad_norm": 0.5007106634090195,
"learning_rate": 2.220428145225646e-06,
"loss": 0.1428,
"step": 1957
},
{
"epoch": 1.3759662684469431,
"grad_norm": 0.4844942783124336,
"learning_rate": 2.2158419687457484e-06,
"loss": 0.1253,
"step": 1958
},
{
"epoch": 1.3766690091356288,
"grad_norm": 0.5224064614703137,
"learning_rate": 2.2112591847965977e-06,
"loss": 0.16,
"step": 1959
},
{
"epoch": 1.3773717498243148,
"grad_norm": 0.5112556692103628,
"learning_rate": 2.206679798962372e-06,
"loss": 0.1502,
"step": 1960
},
{
"epoch": 1.3780744905130007,
"grad_norm": 0.5221819158333172,
"learning_rate": 2.202103816823109e-06,
"loss": 0.1553,
"step": 1961
},
{
"epoch": 1.3787772312016866,
"grad_norm": 0.5243230446835379,
"learning_rate": 2.1975312439547e-06,
"loss": 0.1471,
"step": 1962
},
{
"epoch": 1.3794799718903725,
"grad_norm": 0.49899460160338066,
"learning_rate": 2.1929620859288796e-06,
"loss": 0.1397,
"step": 1963
},
{
"epoch": 1.3801827125790584,
"grad_norm": 0.5222487798899712,
"learning_rate": 2.1883963483132243e-06,
"loss": 0.1579,
"step": 1964
},
{
"epoch": 1.3808854532677441,
"grad_norm": 0.49363235261228205,
"learning_rate": 2.1838340366711406e-06,
"loss": 0.1259,
"step": 1965
},
{
"epoch": 1.38158819395643,
"grad_norm": 0.498718548065352,
"learning_rate": 2.1792751565618625e-06,
"loss": 0.1313,
"step": 1966
},
{
"epoch": 1.382290934645116,
"grad_norm": 0.47289640528416244,
"learning_rate": 2.17471971354044e-06,
"loss": 0.1144,
"step": 1967
},
{
"epoch": 1.382993675333802,
"grad_norm": 0.49732662681393414,
"learning_rate": 2.170167713157736e-06,
"loss": 0.1286,
"step": 1968
},
{
"epoch": 1.3836964160224876,
"grad_norm": 0.5281736532502597,
"learning_rate": 2.165619160960423e-06,
"loss": 0.1468,
"step": 1969
},
{
"epoch": 1.3843991567111735,
"grad_norm": 0.5017770871745806,
"learning_rate": 2.161074062490962e-06,
"loss": 0.1301,
"step": 1970
},
{
"epoch": 1.3851018973998594,
"grad_norm": 0.5516180162182499,
"learning_rate": 2.1565324232876143e-06,
"loss": 0.1703,
"step": 1971
},
{
"epoch": 1.3858046380885454,
"grad_norm": 0.5393133994562473,
"learning_rate": 2.1519942488844208e-06,
"loss": 0.1418,
"step": 1972
},
{
"epoch": 1.3865073787772313,
"grad_norm": 0.509469521241699,
"learning_rate": 2.1474595448112064e-06,
"loss": 0.1433,
"step": 1973
},
{
"epoch": 1.3872101194659172,
"grad_norm": 0.5458557996064164,
"learning_rate": 2.142928316593563e-06,
"loss": 0.1574,
"step": 1974
},
{
"epoch": 1.387912860154603,
"grad_norm": 0.50012128313779,
"learning_rate": 2.1384005697528454e-06,
"loss": 0.1289,
"step": 1975
},
{
"epoch": 1.3886156008432888,
"grad_norm": 0.4933344845667629,
"learning_rate": 2.133876309806168e-06,
"loss": 0.1334,
"step": 1976
},
{
"epoch": 1.3893183415319748,
"grad_norm": 0.5363708061550977,
"learning_rate": 2.1293555422664e-06,
"loss": 0.1659,
"step": 1977
},
{
"epoch": 1.3900210822206605,
"grad_norm": 0.5214051908858237,
"learning_rate": 2.1248382726421525e-06,
"loss": 0.1561,
"step": 1978
},
{
"epoch": 1.3907238229093464,
"grad_norm": 0.5180654399804175,
"learning_rate": 2.1203245064377737e-06,
"loss": 0.1566,
"step": 1979
},
{
"epoch": 1.3914265635980323,
"grad_norm": 0.5032749283223799,
"learning_rate": 2.1158142491533384e-06,
"loss": 0.1274,
"step": 1980
},
{
"epoch": 1.3921293042867182,
"grad_norm": 0.4962899530235887,
"learning_rate": 2.111307506284656e-06,
"loss": 0.1479,
"step": 1981
},
{
"epoch": 1.3928320449754041,
"grad_norm": 0.5086023007941962,
"learning_rate": 2.106804283323246e-06,
"loss": 0.1457,
"step": 1982
},
{
"epoch": 1.39353478566409,
"grad_norm": 0.5193372699637497,
"learning_rate": 2.1023045857563417e-06,
"loss": 0.1527,
"step": 1983
},
{
"epoch": 1.3942375263527758,
"grad_norm": 0.49881275519383994,
"learning_rate": 2.0978084190668785e-06,
"loss": 0.1328,
"step": 1984
},
{
"epoch": 1.3949402670414617,
"grad_norm": 0.5273887840960143,
"learning_rate": 2.093315788733492e-06,
"loss": 0.1333,
"step": 1985
},
{
"epoch": 1.3956430077301476,
"grad_norm": 0.5287999702366413,
"learning_rate": 2.088826700230506e-06,
"loss": 0.1455,
"step": 1986
},
{
"epoch": 1.3963457484188335,
"grad_norm": 0.4821679279733663,
"learning_rate": 2.084341159027932e-06,
"loss": 0.1321,
"step": 1987
},
{
"epoch": 1.3970484891075192,
"grad_norm": 0.5296817721467848,
"learning_rate": 2.079859170591455e-06,
"loss": 0.1606,
"step": 1988
},
{
"epoch": 1.3977512297962051,
"grad_norm": 0.5269614340509261,
"learning_rate": 2.0753807403824346e-06,
"loss": 0.1481,
"step": 1989
},
{
"epoch": 1.398453970484891,
"grad_norm": 0.5028102448988832,
"learning_rate": 2.0709058738578915e-06,
"loss": 0.1417,
"step": 1990
},
{
"epoch": 1.399156711173577,
"grad_norm": 0.5090353625060701,
"learning_rate": 2.0664345764705064e-06,
"loss": 0.1382,
"step": 1991
},
{
"epoch": 1.399859451862263,
"grad_norm": 0.5011415001593718,
"learning_rate": 2.0619668536686095e-06,
"loss": 0.1465,
"step": 1992
},
{
"epoch": 1.4005621925509488,
"grad_norm": 0.534292604798123,
"learning_rate": 2.0575027108961766e-06,
"loss": 0.1631,
"step": 1993
},
{
"epoch": 1.4012649332396345,
"grad_norm": 0.5202199997234441,
"learning_rate": 2.0530421535928197e-06,
"loss": 0.1574,
"step": 1994
},
{
"epoch": 1.4019676739283204,
"grad_norm": 0.5072372439854256,
"learning_rate": 2.0485851871937833e-06,
"loss": 0.1487,
"step": 1995
},
{
"epoch": 1.4026704146170064,
"grad_norm": 0.5035054049412758,
"learning_rate": 2.044131817129934e-06,
"loss": 0.1499,
"step": 1996
},
{
"epoch": 1.403373155305692,
"grad_norm": 0.5150043404618471,
"learning_rate": 2.0396820488277606e-06,
"loss": 0.1422,
"step": 1997
},
{
"epoch": 1.404075895994378,
"grad_norm": 0.4890257808747772,
"learning_rate": 2.0352358877093616e-06,
"loss": 0.1332,
"step": 1998
},
{
"epoch": 1.404778636683064,
"grad_norm": 0.5057461383103304,
"learning_rate": 2.030793339192434e-06,
"loss": 0.1339,
"step": 1999
},
{
"epoch": 1.4054813773717498,
"grad_norm": 0.5076823837359717,
"learning_rate": 2.0263544086902785e-06,
"loss": 0.1444,
"step": 2000
},
{
"epoch": 1.4054813773717498,
"eval_loss": 0.1826380044221878,
"eval_runtime": 10.8554,
"eval_samples_per_second": 21.188,
"eval_steps_per_second": 5.343,
"step": 2000
},
{
"epoch": 1.4061841180604358,
"grad_norm": 0.5131854187997845,
"learning_rate": 2.0219191016117905e-06,
"loss": 0.1529,
"step": 2001
},
{
"epoch": 1.4068868587491217,
"grad_norm": 0.5104573945165697,
"learning_rate": 2.0174874233614433e-06,
"loss": 0.1367,
"step": 2002
},
{
"epoch": 1.4075895994378074,
"grad_norm": 0.5207156973057229,
"learning_rate": 2.013059379339294e-06,
"loss": 0.146,
"step": 2003
},
{
"epoch": 1.4082923401264933,
"grad_norm": 0.479655684284797,
"learning_rate": 2.008634974940962e-06,
"loss": 0.1204,
"step": 2004
},
{
"epoch": 1.4089950808151792,
"grad_norm": 0.4802072779399691,
"learning_rate": 2.004214215557645e-06,
"loss": 0.1187,
"step": 2005
},
{
"epoch": 1.4096978215038651,
"grad_norm": 0.5253795088467027,
"learning_rate": 1.9997971065760897e-06,
"loss": 0.1656,
"step": 2006
},
{
"epoch": 1.4104005621925508,
"grad_norm": 0.5171460592475359,
"learning_rate": 1.9953836533785986e-06,
"loss": 0.1574,
"step": 2007
},
{
"epoch": 1.4111033028812368,
"grad_norm": 0.48510682723998483,
"learning_rate": 1.9909738613430187e-06,
"loss": 0.1254,
"step": 2008
},
{
"epoch": 1.4118060435699227,
"grad_norm": 0.5356051465446946,
"learning_rate": 1.986567735842735e-06,
"loss": 0.1605,
"step": 2009
},
{
"epoch": 1.4125087842586086,
"grad_norm": 0.5540257584486038,
"learning_rate": 1.982165282246665e-06,
"loss": 0.1726,
"step": 2010
},
{
"epoch": 1.4132115249472945,
"grad_norm": 0.5039359359613459,
"learning_rate": 1.9777665059192542e-06,
"loss": 0.1497,
"step": 2011
},
{
"epoch": 1.4139142656359804,
"grad_norm": 0.5219384004348654,
"learning_rate": 1.9733714122204646e-06,
"loss": 0.1492,
"step": 2012
},
{
"epoch": 1.4146170063246661,
"grad_norm": 0.5152329715483507,
"learning_rate": 1.9689800065057716e-06,
"loss": 0.1366,
"step": 2013
},
{
"epoch": 1.415319747013352,
"grad_norm": 0.5219215989924776,
"learning_rate": 1.9645922941261575e-06,
"loss": 0.1534,
"step": 2014
},
{
"epoch": 1.416022487702038,
"grad_norm": 0.5281526630040745,
"learning_rate": 1.960208280428103e-06,
"loss": 0.1448,
"step": 2015
},
{
"epoch": 1.416725228390724,
"grad_norm": 0.5259203714755477,
"learning_rate": 1.955827970753583e-06,
"loss": 0.1492,
"step": 2016
},
{
"epoch": 1.4174279690794096,
"grad_norm": 0.53826986829664,
"learning_rate": 1.9514513704400593e-06,
"loss": 0.1554,
"step": 2017
},
{
"epoch": 1.4181307097680955,
"grad_norm": 0.504885814082157,
"learning_rate": 1.947078484820472e-06,
"loss": 0.141,
"step": 2018
},
{
"epoch": 1.4188334504567814,
"grad_norm": 0.48868717567609277,
"learning_rate": 1.9427093192232373e-06,
"loss": 0.1294,
"step": 2019
},
{
"epoch": 1.4195361911454674,
"grad_norm": 0.5189210369986282,
"learning_rate": 1.9383438789722353e-06,
"loss": 0.1571,
"step": 2020
},
{
"epoch": 1.4202389318341533,
"grad_norm": 0.5009087905881117,
"learning_rate": 1.9339821693868082e-06,
"loss": 0.1446,
"step": 2021
},
{
"epoch": 1.4209416725228392,
"grad_norm": 0.5043136984347781,
"learning_rate": 1.9296241957817575e-06,
"loss": 0.1613,
"step": 2022
},
{
"epoch": 1.421644413211525,
"grad_norm": 0.5253592191479244,
"learning_rate": 1.925269963467322e-06,
"loss": 0.1613,
"step": 2023
},
{
"epoch": 1.4223471539002108,
"grad_norm": 0.514189513890053,
"learning_rate": 1.9209194777491887e-06,
"loss": 0.1528,
"step": 2024
},
{
"epoch": 1.4230498945888967,
"grad_norm": 0.5482483688452934,
"learning_rate": 1.916572743928479e-06,
"loss": 0.17,
"step": 2025
},
{
"epoch": 1.4237526352775824,
"grad_norm": 0.4818544171729801,
"learning_rate": 1.912229767301741e-06,
"loss": 0.1328,
"step": 2026
},
{
"epoch": 1.4244553759662684,
"grad_norm": 0.5002816069324538,
"learning_rate": 1.907890553160947e-06,
"loss": 0.1444,
"step": 2027
},
{
"epoch": 1.4251581166549543,
"grad_norm": 0.5106788901812277,
"learning_rate": 1.903555106793477e-06,
"loss": 0.167,
"step": 2028
},
{
"epoch": 1.4258608573436402,
"grad_norm": 0.5194882916077539,
"learning_rate": 1.8992234334821313e-06,
"loss": 0.1525,
"step": 2029
},
{
"epoch": 1.4265635980323261,
"grad_norm": 0.5279227097337229,
"learning_rate": 1.894895538505105e-06,
"loss": 0.1516,
"step": 2030
},
{
"epoch": 1.427266338721012,
"grad_norm": 0.5125155464974567,
"learning_rate": 1.8905714271359909e-06,
"loss": 0.1388,
"step": 2031
},
{
"epoch": 1.4279690794096978,
"grad_norm": 0.4986869168396727,
"learning_rate": 1.886251104643772e-06,
"loss": 0.1371,
"step": 2032
},
{
"epoch": 1.4286718200983837,
"grad_norm": 0.5386131850939692,
"learning_rate": 1.8819345762928148e-06,
"loss": 0.1572,
"step": 2033
},
{
"epoch": 1.4293745607870696,
"grad_norm": 0.5147570364863752,
"learning_rate": 1.877621847342862e-06,
"loss": 0.1466,
"step": 2034
},
{
"epoch": 1.4300773014757555,
"grad_norm": 0.5170906038507872,
"learning_rate": 1.873312923049026e-06,
"loss": 0.1623,
"step": 2035
},
{
"epoch": 1.4307800421644412,
"grad_norm": 0.48784987395992657,
"learning_rate": 1.8690078086617847e-06,
"loss": 0.1203,
"step": 2036
},
{
"epoch": 1.4314827828531271,
"grad_norm": 0.5101351674685919,
"learning_rate": 1.864706509426973e-06,
"loss": 0.1487,
"step": 2037
},
{
"epoch": 1.432185523541813,
"grad_norm": 0.49552255830825465,
"learning_rate": 1.8604090305857757e-06,
"loss": 0.1413,
"step": 2038
},
{
"epoch": 1.432888264230499,
"grad_norm": 0.4905989635114193,
"learning_rate": 1.8561153773747253e-06,
"loss": 0.1338,
"step": 2039
},
{
"epoch": 1.433591004919185,
"grad_norm": 0.493134783533051,
"learning_rate": 1.851825555025689e-06,
"loss": 0.1293,
"step": 2040
},
{
"epoch": 1.4342937456078708,
"grad_norm": 0.48077940942391195,
"learning_rate": 1.8475395687658699e-06,
"loss": 0.1279,
"step": 2041
},
{
"epoch": 1.4349964862965565,
"grad_norm": 0.4715910653602217,
"learning_rate": 1.843257423817793e-06,
"loss": 0.1222,
"step": 2042
},
{
"epoch": 1.4356992269852424,
"grad_norm": 0.5551181849334715,
"learning_rate": 1.838979125399306e-06,
"loss": 0.1782,
"step": 2043
},
{
"epoch": 1.4364019676739284,
"grad_norm": 0.5029944238272629,
"learning_rate": 1.8347046787235677e-06,
"loss": 0.138,
"step": 2044
},
{
"epoch": 1.437104708362614,
"grad_norm": 0.5025199667016227,
"learning_rate": 1.8304340889990418e-06,
"loss": 0.1284,
"step": 2045
},
{
"epoch": 1.4378074490513,
"grad_norm": 0.5166701187982591,
"learning_rate": 1.8261673614294996e-06,
"loss": 0.1362,
"step": 2046
},
{
"epoch": 1.438510189739986,
"grad_norm": 0.518865687852262,
"learning_rate": 1.8219045012139957e-06,
"loss": 0.1478,
"step": 2047
},
{
"epoch": 1.4392129304286718,
"grad_norm": 0.48605772421114374,
"learning_rate": 1.8176455135468796e-06,
"loss": 0.1273,
"step": 2048
},
{
"epoch": 1.4399156711173577,
"grad_norm": 0.5264019128235427,
"learning_rate": 1.8133904036177785e-06,
"loss": 0.1437,
"step": 2049
},
{
"epoch": 1.4406184118060437,
"grad_norm": 0.5640079905965288,
"learning_rate": 1.809139176611599e-06,
"loss": 0.177,
"step": 2050
},
{
"epoch": 1.4413211524947294,
"grad_norm": 0.537064034931882,
"learning_rate": 1.804891837708514e-06,
"loss": 0.1685,
"step": 2051
},
{
"epoch": 1.4420238931834153,
"grad_norm": 0.5079898047846592,
"learning_rate": 1.8006483920839524e-06,
"loss": 0.1325,
"step": 2052
},
{
"epoch": 1.4427266338721012,
"grad_norm": 0.5695707721683408,
"learning_rate": 1.7964088449086103e-06,
"loss": 0.1852,
"step": 2053
},
{
"epoch": 1.4434293745607871,
"grad_norm": 0.5363631384695723,
"learning_rate": 1.792173201348426e-06,
"loss": 0.1618,
"step": 2054
},
{
"epoch": 1.4441321152494728,
"grad_norm": 0.5632395868037631,
"learning_rate": 1.7879414665645834e-06,
"loss": 0.1637,
"step": 2055
},
{
"epoch": 1.4448348559381587,
"grad_norm": 0.5360209729861478,
"learning_rate": 1.7837136457135035e-06,
"loss": 0.1529,
"step": 2056
},
{
"epoch": 1.4455375966268447,
"grad_norm": 0.5245271097047481,
"learning_rate": 1.7794897439468378e-06,
"loss": 0.147,
"step": 2057
},
{
"epoch": 1.4462403373155306,
"grad_norm": 0.5042534725139199,
"learning_rate": 1.7752697664114621e-06,
"loss": 0.1379,
"step": 2058
},
{
"epoch": 1.4469430780042165,
"grad_norm": 0.4838221136458502,
"learning_rate": 1.7710537182494714e-06,
"loss": 0.1214,
"step": 2059
},
{
"epoch": 1.4476458186929024,
"grad_norm": 0.4870099806636077,
"learning_rate": 1.7668416045981712e-06,
"loss": 0.1272,
"step": 2060
},
{
"epoch": 1.4483485593815881,
"grad_norm": 0.5151525413649795,
"learning_rate": 1.762633430590075e-06,
"loss": 0.1566,
"step": 2061
},
{
"epoch": 1.449051300070274,
"grad_norm": 0.4960453083623157,
"learning_rate": 1.7584292013528935e-06,
"loss": 0.138,
"step": 2062
},
{
"epoch": 1.44975404075896,
"grad_norm": 0.491359999587759,
"learning_rate": 1.754228922009532e-06,
"loss": 0.1406,
"step": 2063
},
{
"epoch": 1.450456781447646,
"grad_norm": 0.5141990562668809,
"learning_rate": 1.7500325976780824e-06,
"loss": 0.1401,
"step": 2064
},
{
"epoch": 1.4511595221363316,
"grad_norm": 0.480593857312402,
"learning_rate": 1.7458402334718177e-06,
"loss": 0.1134,
"step": 2065
},
{
"epoch": 1.4518622628250175,
"grad_norm": 0.4823544002980055,
"learning_rate": 1.741651834499185e-06,
"loss": 0.1186,
"step": 2066
},
{
"epoch": 1.4525650035137034,
"grad_norm": 0.4883901662939787,
"learning_rate": 1.7374674058637997e-06,
"loss": 0.131,
"step": 2067
},
{
"epoch": 1.4532677442023894,
"grad_norm": 0.49488089290245274,
"learning_rate": 1.7332869526644396e-06,
"loss": 0.1382,
"step": 2068
},
{
"epoch": 1.4539704848910753,
"grad_norm": 0.5025783147637897,
"learning_rate": 1.7291104799950364e-06,
"loss": 0.1421,
"step": 2069
},
{
"epoch": 1.4546732255797612,
"grad_norm": 0.5135337473852053,
"learning_rate": 1.7249379929446786e-06,
"loss": 0.1357,
"step": 2070
},
{
"epoch": 1.455375966268447,
"grad_norm": 0.5079848243622339,
"learning_rate": 1.7207694965975879e-06,
"loss": 0.1454,
"step": 2071
},
{
"epoch": 1.4560787069571328,
"grad_norm": 0.525085885735298,
"learning_rate": 1.71660499603313e-06,
"loss": 0.1428,
"step": 2072
},
{
"epoch": 1.4567814476458187,
"grad_norm": 0.5182851381761374,
"learning_rate": 1.7124444963257974e-06,
"loss": 0.1426,
"step": 2073
},
{
"epoch": 1.4574841883345044,
"grad_norm": 0.5164467264921618,
"learning_rate": 1.7082880025452147e-06,
"loss": 0.1446,
"step": 2074
},
{
"epoch": 1.4581869290231904,
"grad_norm": 0.48425978894094407,
"learning_rate": 1.70413551975612e-06,
"loss": 0.136,
"step": 2075
},
{
"epoch": 1.4588896697118763,
"grad_norm": 0.5349857216691936,
"learning_rate": 1.6999870530183615e-06,
"loss": 0.1627,
"step": 2076
},
{
"epoch": 1.4595924104005622,
"grad_norm": 0.4945920386944028,
"learning_rate": 1.6958426073868967e-06,
"loss": 0.1294,
"step": 2077
},
{
"epoch": 1.4602951510892481,
"grad_norm": 0.5492545918703949,
"learning_rate": 1.6917021879117861e-06,
"loss": 0.1786,
"step": 2078
},
{
"epoch": 1.460997891777934,
"grad_norm": 0.5099087505970852,
"learning_rate": 1.6875657996381812e-06,
"loss": 0.1582,
"step": 2079
},
{
"epoch": 1.4617006324666197,
"grad_norm": 0.49742097581952377,
"learning_rate": 1.6834334476063214e-06,
"loss": 0.1389,
"step": 2080
},
{
"epoch": 1.4624033731553057,
"grad_norm": 0.4826117925750648,
"learning_rate": 1.6793051368515283e-06,
"loss": 0.1309,
"step": 2081
},
{
"epoch": 1.4631061138439916,
"grad_norm": 0.513712362930316,
"learning_rate": 1.6751808724041996e-06,
"loss": 0.1377,
"step": 2082
},
{
"epoch": 1.4638088545326775,
"grad_norm": 0.5284740998171508,
"learning_rate": 1.6710606592898016e-06,
"loss": 0.1407,
"step": 2083
},
{
"epoch": 1.4645115952213632,
"grad_norm": 0.5266927611311717,
"learning_rate": 1.6669445025288649e-06,
"loss": 0.1542,
"step": 2084
},
{
"epoch": 1.4652143359100491,
"grad_norm": 0.5004330148088008,
"learning_rate": 1.6628324071369768e-06,
"loss": 0.1347,
"step": 2085
},
{
"epoch": 1.465917076598735,
"grad_norm": 0.5133116593057202,
"learning_rate": 1.6587243781247764e-06,
"loss": 0.1547,
"step": 2086
},
{
"epoch": 1.466619817287421,
"grad_norm": 0.5313696987465686,
"learning_rate": 1.6546204204979478e-06,
"loss": 0.1567,
"step": 2087
},
{
"epoch": 1.467322557976107,
"grad_norm": 0.4869528563529301,
"learning_rate": 1.6505205392572128e-06,
"loss": 0.126,
"step": 2088
},
{
"epoch": 1.4680252986647928,
"grad_norm": 0.489245879712265,
"learning_rate": 1.6464247393983273e-06,
"loss": 0.1108,
"step": 2089
},
{
"epoch": 1.4687280393534785,
"grad_norm": 0.515009361771683,
"learning_rate": 1.642333025912074e-06,
"loss": 0.1534,
"step": 2090
},
{
"epoch": 1.4694307800421644,
"grad_norm": 0.5215520866230325,
"learning_rate": 1.6382454037842565e-06,
"loss": 0.1797,
"step": 2091
},
{
"epoch": 1.4701335207308504,
"grad_norm": 0.5034561708219197,
"learning_rate": 1.6341618779956913e-06,
"loss": 0.1336,
"step": 2092
},
{
"epoch": 1.4708362614195363,
"grad_norm": 0.5133677130668264,
"learning_rate": 1.6300824535222043e-06,
"loss": 0.153,
"step": 2093
},
{
"epoch": 1.471539002108222,
"grad_norm": 0.5246991854782783,
"learning_rate": 1.626007135334629e-06,
"loss": 0.1632,
"step": 2094
},
{
"epoch": 1.472241742796908,
"grad_norm": 0.4804227544378076,
"learning_rate": 1.6219359283987852e-06,
"loss": 0.1213,
"step": 2095
},
{
"epoch": 1.4729444834855938,
"grad_norm": 0.5031868063988818,
"learning_rate": 1.6178688376754896e-06,
"loss": 0.1475,
"step": 2096
},
{
"epoch": 1.4736472241742797,
"grad_norm": 0.4971654477550433,
"learning_rate": 1.6138058681205425e-06,
"loss": 0.1343,
"step": 2097
},
{
"epoch": 1.4743499648629657,
"grad_norm": 0.5366178895790169,
"learning_rate": 1.6097470246847236e-06,
"loss": 0.157,
"step": 2098
},
{
"epoch": 1.4750527055516516,
"grad_norm": 0.5110633032912951,
"learning_rate": 1.6056923123137846e-06,
"loss": 0.1399,
"step": 2099
},
{
"epoch": 1.4757554462403373,
"grad_norm": 0.49700957520710715,
"learning_rate": 1.6016417359484388e-06,
"loss": 0.1393,
"step": 2100
},
{
"epoch": 1.4764581869290232,
"grad_norm": 0.5033159024264766,
"learning_rate": 1.5975953005243628e-06,
"loss": 0.1426,
"step": 2101
},
{
"epoch": 1.4771609276177091,
"grad_norm": 0.5479522458545618,
"learning_rate": 1.5935530109721915e-06,
"loss": 0.1795,
"step": 2102
},
{
"epoch": 1.4778636683063948,
"grad_norm": 0.5030601127371045,
"learning_rate": 1.5895148722175025e-06,
"loss": 0.139,
"step": 2103
},
{
"epoch": 1.4785664089950807,
"grad_norm": 0.501233824654983,
"learning_rate": 1.5854808891808192e-06,
"loss": 0.1292,
"step": 2104
},
{
"epoch": 1.4792691496837667,
"grad_norm": 0.507832389453275,
"learning_rate": 1.5814510667775944e-06,
"loss": 0.1409,
"step": 2105
},
{
"epoch": 1.4799718903724526,
"grad_norm": 0.5124476953162119,
"learning_rate": 1.5774254099182217e-06,
"loss": 0.1443,
"step": 2106
},
{
"epoch": 1.4806746310611385,
"grad_norm": 0.5322204944380016,
"learning_rate": 1.5734039235080112e-06,
"loss": 0.1584,
"step": 2107
},
{
"epoch": 1.4813773717498244,
"grad_norm": 0.47146448385700807,
"learning_rate": 1.5693866124471935e-06,
"loss": 0.1109,
"step": 2108
},
{
"epoch": 1.4820801124385101,
"grad_norm": 0.5016029280258626,
"learning_rate": 1.5653734816309113e-06,
"loss": 0.1323,
"step": 2109
},
{
"epoch": 1.482782853127196,
"grad_norm": 0.5101882079807832,
"learning_rate": 1.5613645359492141e-06,
"loss": 0.1435,
"step": 2110
},
{
"epoch": 1.483485593815882,
"grad_norm": 0.5091675982820156,
"learning_rate": 1.5573597802870515e-06,
"loss": 0.1393,
"step": 2111
},
{
"epoch": 1.4841883345045679,
"grad_norm": 0.5217815960604922,
"learning_rate": 1.5533592195242674e-06,
"loss": 0.1536,
"step": 2112
},
{
"epoch": 1.4848910751932536,
"grad_norm": 0.5183465180846747,
"learning_rate": 1.549362858535594e-06,
"loss": 0.1389,
"step": 2113
},
{
"epoch": 1.4855938158819395,
"grad_norm": 0.5071245694618659,
"learning_rate": 1.5453707021906467e-06,
"loss": 0.1558,
"step": 2114
},
{
"epoch": 1.4862965565706254,
"grad_norm": 0.5214478193648973,
"learning_rate": 1.5413827553539162e-06,
"loss": 0.1363,
"step": 2115
},
{
"epoch": 1.4869992972593113,
"grad_norm": 0.529511429283781,
"learning_rate": 1.5373990228847657e-06,
"loss": 0.1564,
"step": 2116
},
{
"epoch": 1.4877020379479973,
"grad_norm": 0.4878969089896803,
"learning_rate": 1.5334195096374193e-06,
"loss": 0.1238,
"step": 2117
},
{
"epoch": 1.4884047786366832,
"grad_norm": 0.5665371619087282,
"learning_rate": 1.529444220460969e-06,
"loss": 0.1621,
"step": 2118
},
{
"epoch": 1.489107519325369,
"grad_norm": 0.5201262867703623,
"learning_rate": 1.5254731601993472e-06,
"loss": 0.159,
"step": 2119
},
{
"epoch": 1.4898102600140548,
"grad_norm": 0.5097670660342238,
"learning_rate": 1.5215063336913421e-06,
"loss": 0.1479,
"step": 2120
},
{
"epoch": 1.4905130007027407,
"grad_norm": 0.5131401687757647,
"learning_rate": 1.5175437457705787e-06,
"loss": 0.1574,
"step": 2121
},
{
"epoch": 1.4912157413914264,
"grad_norm": 0.46788685857399476,
"learning_rate": 1.5135854012655227e-06,
"loss": 0.1282,
"step": 2122
},
{
"epoch": 1.4919184820801124,
"grad_norm": 0.5355929641138256,
"learning_rate": 1.509631304999465e-06,
"loss": 0.1606,
"step": 2123
},
{
"epoch": 1.4926212227687983,
"grad_norm": 0.5019990018308255,
"learning_rate": 1.5056814617905168e-06,
"loss": 0.126,
"step": 2124
},
{
"epoch": 1.4933239634574842,
"grad_norm": 0.5100567049359251,
"learning_rate": 1.501735876451611e-06,
"loss": 0.1353,
"step": 2125
},
{
"epoch": 1.4940267041461701,
"grad_norm": 0.5276711403853661,
"learning_rate": 1.4977945537904953e-06,
"loss": 0.1519,
"step": 2126
},
{
"epoch": 1.494729444834856,
"grad_norm": 0.4561955475270246,
"learning_rate": 1.4938574986097176e-06,
"loss": 0.1109,
"step": 2127
},
{
"epoch": 1.4954321855235417,
"grad_norm": 0.5258796232827486,
"learning_rate": 1.4899247157066303e-06,
"loss": 0.1531,
"step": 2128
},
{
"epoch": 1.4961349262122277,
"grad_norm": 0.5434050717223791,
"learning_rate": 1.485996209873372e-06,
"loss": 0.1667,
"step": 2129
},
{
"epoch": 1.4968376669009136,
"grad_norm": 0.47184637783971156,
"learning_rate": 1.4820719858968807e-06,
"loss": 0.1158,
"step": 2130
},
{
"epoch": 1.4975404075895995,
"grad_norm": 0.47350047699188863,
"learning_rate": 1.4781520485588696e-06,
"loss": 0.1266,
"step": 2131
},
{
"epoch": 1.4982431482782852,
"grad_norm": 0.508209436132349,
"learning_rate": 1.4742364026358307e-06,
"loss": 0.1458,
"step": 2132
},
{
"epoch": 1.4989458889669711,
"grad_norm": 0.5244850933042244,
"learning_rate": 1.4703250528990265e-06,
"loss": 0.1459,
"step": 2133
},
{
"epoch": 1.499648629655657,
"grad_norm": 0.5197565951343396,
"learning_rate": 1.4664180041144843e-06,
"loss": 0.1532,
"step": 2134
},
{
"epoch": 1.500351370344343,
"grad_norm": 0.5253142565422563,
"learning_rate": 1.4625152610429922e-06,
"loss": 0.1465,
"step": 2135
},
{
"epoch": 1.5010541110330289,
"grad_norm": 0.49960269677237373,
"learning_rate": 1.4586168284400893e-06,
"loss": 0.1366,
"step": 2136
},
{
"epoch": 1.5017568517217148,
"grad_norm": 0.529644657730334,
"learning_rate": 1.4547227110560642e-06,
"loss": 0.1599,
"step": 2137
},
{
"epoch": 1.5024595924104007,
"grad_norm": 0.5159155953257838,
"learning_rate": 1.4508329136359462e-06,
"loss": 0.1526,
"step": 2138
},
{
"epoch": 1.5031623330990864,
"grad_norm": 0.49239463898400426,
"learning_rate": 1.4469474409195017e-06,
"loss": 0.1369,
"step": 2139
},
{
"epoch": 1.5038650737877723,
"grad_norm": 0.5224057723398261,
"learning_rate": 1.4430662976412268e-06,
"loss": 0.1608,
"step": 2140
},
{
"epoch": 1.504567814476458,
"grad_norm": 0.5244999650971088,
"learning_rate": 1.4391894885303414e-06,
"loss": 0.1543,
"step": 2141
},
{
"epoch": 1.505270555165144,
"grad_norm": 0.5110844959732017,
"learning_rate": 1.4353170183107884e-06,
"loss": 0.1596,
"step": 2142
},
{
"epoch": 1.5059732958538299,
"grad_norm": 0.5093261756275554,
"learning_rate": 1.4314488917012164e-06,
"loss": 0.1465,
"step": 2143
},
{
"epoch": 1.5066760365425158,
"grad_norm": 0.4873406151854595,
"learning_rate": 1.4275851134149864e-06,
"loss": 0.1437,
"step": 2144
},
{
"epoch": 1.5073787772312017,
"grad_norm": 0.5329279562540382,
"learning_rate": 1.4237256881601585e-06,
"loss": 0.1546,
"step": 2145
},
{
"epoch": 1.5080815179198876,
"grad_norm": 0.5327753630385557,
"learning_rate": 1.4198706206394924e-06,
"loss": 0.177,
"step": 2146
},
{
"epoch": 1.5087842586085736,
"grad_norm": 0.5181599657597732,
"learning_rate": 1.4160199155504357e-06,
"loss": 0.1373,
"step": 2147
},
{
"epoch": 1.5094869992972593,
"grad_norm": 0.4692289997035836,
"learning_rate": 1.4121735775851164e-06,
"loss": 0.107,
"step": 2148
},
{
"epoch": 1.5101897399859452,
"grad_norm": 0.5088768403918668,
"learning_rate": 1.4083316114303448e-06,
"loss": 0.1523,
"step": 2149
},
{
"epoch": 1.510892480674631,
"grad_norm": 0.5094143771026569,
"learning_rate": 1.4044940217676061e-06,
"loss": 0.1411,
"step": 2150
},
{
"epoch": 1.5115952213633168,
"grad_norm": 0.511386387103471,
"learning_rate": 1.4006608132730504e-06,
"loss": 0.1407,
"step": 2151
},
{
"epoch": 1.5122979620520027,
"grad_norm": 0.520482910488152,
"learning_rate": 1.3968319906174893e-06,
"loss": 0.1506,
"step": 2152
},
{
"epoch": 1.5130007027406887,
"grad_norm": 0.5433542109670519,
"learning_rate": 1.3930075584663867e-06,
"loss": 0.1469,
"step": 2153
},
{
"epoch": 1.5137034434293746,
"grad_norm": 0.5272449396496899,
"learning_rate": 1.3891875214798644e-06,
"loss": 0.1509,
"step": 2154
},
{
"epoch": 1.5144061841180605,
"grad_norm": 0.4903186696381816,
"learning_rate": 1.3853718843126824e-06,
"loss": 0.1176,
"step": 2155
},
{
"epoch": 1.5151089248067464,
"grad_norm": 0.5067264890176539,
"learning_rate": 1.3815606516142422e-06,
"loss": 0.1479,
"step": 2156
},
{
"epoch": 1.5158116654954323,
"grad_norm": 0.5177281848447265,
"learning_rate": 1.3777538280285767e-06,
"loss": 0.1397,
"step": 2157
},
{
"epoch": 1.516514406184118,
"grad_norm": 0.5140016496572131,
"learning_rate": 1.3739514181943486e-06,
"loss": 0.1623,
"step": 2158
},
{
"epoch": 1.517217146872804,
"grad_norm": 0.5220023492099699,
"learning_rate": 1.3701534267448395e-06,
"loss": 0.1436,
"step": 2159
},
{
"epoch": 1.5179198875614897,
"grad_norm": 0.5161035372001767,
"learning_rate": 1.366359858307949e-06,
"loss": 0.1428,
"step": 2160
},
{
"epoch": 1.5186226282501756,
"grad_norm": 0.5086023982291189,
"learning_rate": 1.3625707175061876e-06,
"loss": 0.1297,
"step": 2161
},
{
"epoch": 1.5193253689388615,
"grad_norm": 0.5398633665566651,
"learning_rate": 1.358786008956669e-06,
"loss": 0.1571,
"step": 2162
},
{
"epoch": 1.5200281096275474,
"grad_norm": 0.5117688888959533,
"learning_rate": 1.3550057372711078e-06,
"loss": 0.136,
"step": 2163
},
{
"epoch": 1.5207308503162333,
"grad_norm": 0.49190830346284004,
"learning_rate": 1.3512299070558104e-06,
"loss": 0.1177,
"step": 2164
},
{
"epoch": 1.5214335910049193,
"grad_norm": 0.5213200786252551,
"learning_rate": 1.347458522911672e-06,
"loss": 0.1453,
"step": 2165
},
{
"epoch": 1.5221363316936052,
"grad_norm": 0.5226027442062637,
"learning_rate": 1.343691589434174e-06,
"loss": 0.1531,
"step": 2166
},
{
"epoch": 1.5228390723822909,
"grad_norm": 0.5503507406617123,
"learning_rate": 1.3399291112133673e-06,
"loss": 0.1734,
"step": 2167
},
{
"epoch": 1.5235418130709768,
"grad_norm": 0.5102965948709162,
"learning_rate": 1.336171092833879e-06,
"loss": 0.1455,
"step": 2168
},
{
"epoch": 1.5242445537596627,
"grad_norm": 0.5107129259550931,
"learning_rate": 1.3324175388748989e-06,
"loss": 0.1628,
"step": 2169
},
{
"epoch": 1.5249472944483484,
"grad_norm": 0.5070346465598122,
"learning_rate": 1.3286684539101823e-06,
"loss": 0.1332,
"step": 2170
},
{
"epoch": 1.5256500351370343,
"grad_norm": 0.51598371155757,
"learning_rate": 1.3249238425080346e-06,
"loss": 0.131,
"step": 2171
},
{
"epoch": 1.5263527758257203,
"grad_norm": 0.4882384084303687,
"learning_rate": 1.3211837092313074e-06,
"loss": 0.1308,
"step": 2172
},
{
"epoch": 1.5270555165144062,
"grad_norm": 0.5321774835869371,
"learning_rate": 1.3174480586374e-06,
"loss": 0.1708,
"step": 2173
},
{
"epoch": 1.527758257203092,
"grad_norm": 0.55387738733551,
"learning_rate": 1.3137168952782514e-06,
"loss": 0.1652,
"step": 2174
},
{
"epoch": 1.528460997891778,
"grad_norm": 0.5099557259534563,
"learning_rate": 1.309990223700328e-06,
"loss": 0.1451,
"step": 2175
},
{
"epoch": 1.529163738580464,
"grad_norm": 0.5410731968618563,
"learning_rate": 1.3062680484446267e-06,
"loss": 0.1587,
"step": 2176
},
{
"epoch": 1.5298664792691496,
"grad_norm": 0.4853197081691339,
"learning_rate": 1.3025503740466588e-06,
"loss": 0.1297,
"step": 2177
},
{
"epoch": 1.5305692199578356,
"grad_norm": 0.541524059003662,
"learning_rate": 1.298837205036461e-06,
"loss": 0.1693,
"step": 2178
},
{
"epoch": 1.5312719606465213,
"grad_norm": 0.4997483368619478,
"learning_rate": 1.2951285459385737e-06,
"loss": 0.124,
"step": 2179
},
{
"epoch": 1.5319747013352072,
"grad_norm": 0.5195231035312816,
"learning_rate": 1.291424401272044e-06,
"loss": 0.1487,
"step": 2180
},
{
"epoch": 1.532677442023893,
"grad_norm": 0.5063111561918022,
"learning_rate": 1.2877247755504174e-06,
"loss": 0.1347,
"step": 2181
},
{
"epoch": 1.533380182712579,
"grad_norm": 0.5491929250983179,
"learning_rate": 1.2840296732817332e-06,
"loss": 0.1649,
"step": 2182
},
{
"epoch": 1.534082923401265,
"grad_norm": 0.49328743508314504,
"learning_rate": 1.2803390989685189e-06,
"loss": 0.1233,
"step": 2183
},
{
"epoch": 1.5347856640899509,
"grad_norm": 0.5233912984109269,
"learning_rate": 1.276653057107784e-06,
"loss": 0.1462,
"step": 2184
},
{
"epoch": 1.5354884047786368,
"grad_norm": 0.49839242079793133,
"learning_rate": 1.2729715521910168e-06,
"loss": 0.1331,
"step": 2185
},
{
"epoch": 1.5361911454673227,
"grad_norm": 0.5043852726925345,
"learning_rate": 1.2692945887041763e-06,
"loss": 0.1387,
"step": 2186
},
{
"epoch": 1.5368938861560084,
"grad_norm": 0.5384067661653723,
"learning_rate": 1.2656221711276867e-06,
"loss": 0.1481,
"step": 2187
},
{
"epoch": 1.5375966268446943,
"grad_norm": 0.5479793052050106,
"learning_rate": 1.261954303936434e-06,
"loss": 0.1775,
"step": 2188
},
{
"epoch": 1.53829936753338,
"grad_norm": 0.45764911011948567,
"learning_rate": 1.2582909915997604e-06,
"loss": 0.117,
"step": 2189
},
{
"epoch": 1.539002108222066,
"grad_norm": 0.499823540139615,
"learning_rate": 1.2546322385814564e-06,
"loss": 0.1397,
"step": 2190
},
{
"epoch": 1.5397048489107519,
"grad_norm": 0.5095299030936337,
"learning_rate": 1.2509780493397573e-06,
"loss": 0.1319,
"step": 2191
},
{
"epoch": 1.5404075895994378,
"grad_norm": 0.5071698659448782,
"learning_rate": 1.2473284283273373e-06,
"loss": 0.1399,
"step": 2192
},
{
"epoch": 1.5411103302881237,
"grad_norm": 0.5234428919899052,
"learning_rate": 1.243683379991304e-06,
"loss": 0.1573,
"step": 2193
},
{
"epoch": 1.5418130709768096,
"grad_norm": 0.5350763730505442,
"learning_rate": 1.2400429087731952e-06,
"loss": 0.155,
"step": 2194
},
{
"epoch": 1.5425158116654956,
"grad_norm": 0.5091434987159394,
"learning_rate": 1.236407019108971e-06,
"loss": 0.1401,
"step": 2195
},
{
"epoch": 1.5432185523541813,
"grad_norm": 0.5023105860405399,
"learning_rate": 1.2327757154290037e-06,
"loss": 0.1321,
"step": 2196
},
{
"epoch": 1.5439212930428672,
"grad_norm": 0.5077759531075567,
"learning_rate": 1.229149002158082e-06,
"loss": 0.1531,
"step": 2197
},
{
"epoch": 1.544624033731553,
"grad_norm": 0.5259031385429787,
"learning_rate": 1.2255268837154034e-06,
"loss": 0.1346,
"step": 2198
},
{
"epoch": 1.5453267744202388,
"grad_norm": 0.499418004700236,
"learning_rate": 1.2219093645145613e-06,
"loss": 0.1363,
"step": 2199
},
{
"epoch": 1.5460295151089247,
"grad_norm": 0.5280318657161265,
"learning_rate": 1.2182964489635502e-06,
"loss": 0.1506,
"step": 2200
},
{
"epoch": 1.5467322557976106,
"grad_norm": 0.5338981790372893,
"learning_rate": 1.2146881414647471e-06,
"loss": 0.1574,
"step": 2201
},
{
"epoch": 1.5474349964862966,
"grad_norm": 0.513601118377399,
"learning_rate": 1.211084446414923e-06,
"loss": 0.1508,
"step": 2202
},
{
"epoch": 1.5481377371749825,
"grad_norm": 0.5302281271672624,
"learning_rate": 1.2074853682052235e-06,
"loss": 0.1437,
"step": 2203
},
{
"epoch": 1.5488404778636684,
"grad_norm": 0.5195178100677815,
"learning_rate": 1.20389091122117e-06,
"loss": 0.1477,
"step": 2204
},
{
"epoch": 1.5495432185523543,
"grad_norm": 0.5019957340440979,
"learning_rate": 1.2003010798426512e-06,
"loss": 0.1131,
"step": 2205
},
{
"epoch": 1.55024595924104,
"grad_norm": 0.510219552236996,
"learning_rate": 1.1967158784439214e-06,
"loss": 0.1545,
"step": 2206
},
{
"epoch": 1.550948699929726,
"grad_norm": 0.5172634742559926,
"learning_rate": 1.1931353113935935e-06,
"loss": 0.1553,
"step": 2207
},
{
"epoch": 1.5516514406184116,
"grad_norm": 0.5068735572234946,
"learning_rate": 1.1895593830546308e-06,
"loss": 0.1314,
"step": 2208
},
{
"epoch": 1.5523541813070976,
"grad_norm": 0.5019792388664622,
"learning_rate": 1.1859880977843469e-06,
"loss": 0.1289,
"step": 2209
},
{
"epoch": 1.5530569219957835,
"grad_norm": 0.5343633445717745,
"learning_rate": 1.1824214599343958e-06,
"loss": 0.1585,
"step": 2210
},
{
"epoch": 1.5537596626844694,
"grad_norm": 0.4983419591247481,
"learning_rate": 1.1788594738507708e-06,
"loss": 0.1542,
"step": 2211
},
{
"epoch": 1.5544624033731553,
"grad_norm": 0.5072492403320521,
"learning_rate": 1.175302143873795e-06,
"loss": 0.148,
"step": 2212
},
{
"epoch": 1.5551651440618413,
"grad_norm": 0.5115835547402442,
"learning_rate": 1.1717494743381187e-06,
"loss": 0.1482,
"step": 2213
},
{
"epoch": 1.5558678847505272,
"grad_norm": 0.5072302811409777,
"learning_rate": 1.1682014695727129e-06,
"loss": 0.1492,
"step": 2214
},
{
"epoch": 1.556570625439213,
"grad_norm": 0.48941247607237437,
"learning_rate": 1.164658133900866e-06,
"loss": 0.1369,
"step": 2215
},
{
"epoch": 1.5572733661278988,
"grad_norm": 0.5629621734782317,
"learning_rate": 1.1611194716401752e-06,
"loss": 0.1839,
"step": 2216
},
{
"epoch": 1.5579761068165847,
"grad_norm": 0.49186056840638026,
"learning_rate": 1.1575854871025445e-06,
"loss": 0.1244,
"step": 2217
},
{
"epoch": 1.5586788475052704,
"grad_norm": 0.48784211634601093,
"learning_rate": 1.154056184594175e-06,
"loss": 0.1357,
"step": 2218
},
{
"epoch": 1.5593815881939563,
"grad_norm": 0.51634235643248,
"learning_rate": 1.1505315684155704e-06,
"loss": 0.1446,
"step": 2219
},
{
"epoch": 1.5600843288826423,
"grad_norm": 0.5311972591720889,
"learning_rate": 1.1470116428615141e-06,
"loss": 0.1603,
"step": 2220
},
{
"epoch": 1.5607870695713282,
"grad_norm": 0.48075220298354765,
"learning_rate": 1.143496412221079e-06,
"loss": 0.1284,
"step": 2221
},
{
"epoch": 1.561489810260014,
"grad_norm": 0.5101512440680539,
"learning_rate": 1.1399858807776194e-06,
"loss": 0.1485,
"step": 2222
},
{
"epoch": 1.5621925509487,
"grad_norm": 0.5107976534764769,
"learning_rate": 1.1364800528087594e-06,
"loss": 0.1485,
"step": 2223
},
{
"epoch": 1.562895291637386,
"grad_norm": 0.5110004452171197,
"learning_rate": 1.132978932586395e-06,
"loss": 0.1487,
"step": 2224
},
{
"epoch": 1.5635980323260716,
"grad_norm": 0.5322552538407819,
"learning_rate": 1.1294825243766794e-06,
"loss": 0.1572,
"step": 2225
},
{
"epoch": 1.5643007730147576,
"grad_norm": 0.5157808574584885,
"learning_rate": 1.1259908324400343e-06,
"loss": 0.138,
"step": 2226
},
{
"epoch": 1.5650035137034435,
"grad_norm": 0.5216191938955658,
"learning_rate": 1.1225038610311267e-06,
"loss": 0.1611,
"step": 2227
},
{
"epoch": 1.5657062543921292,
"grad_norm": 0.4811572260100985,
"learning_rate": 1.1190216143988746e-06,
"loss": 0.1187,
"step": 2228
},
{
"epoch": 1.566408995080815,
"grad_norm": 0.5162514568978829,
"learning_rate": 1.115544096786439e-06,
"loss": 0.1433,
"step": 2229
},
{
"epoch": 1.567111735769501,
"grad_norm": 0.5273932111449955,
"learning_rate": 1.112071312431216e-06,
"loss": 0.1548,
"step": 2230
},
{
"epoch": 1.567814476458187,
"grad_norm": 0.5118261654104636,
"learning_rate": 1.1086032655648377e-06,
"loss": 0.1519,
"step": 2231
},
{
"epoch": 1.5685172171468729,
"grad_norm": 0.5017814253766492,
"learning_rate": 1.1051399604131601e-06,
"loss": 0.1271,
"step": 2232
},
{
"epoch": 1.5692199578355588,
"grad_norm": 0.5235570064798826,
"learning_rate": 1.1016814011962651e-06,
"loss": 0.1398,
"step": 2233
},
{
"epoch": 1.5699226985242447,
"grad_norm": 0.5574642218396508,
"learning_rate": 1.098227592128448e-06,
"loss": 0.1667,
"step": 2234
},
{
"epoch": 1.5706254392129304,
"grad_norm": 0.5406231676482339,
"learning_rate": 1.094778537418218e-06,
"loss": 0.1569,
"step": 2235
},
{
"epoch": 1.5713281799016163,
"grad_norm": 0.4789049280498609,
"learning_rate": 1.091334241268291e-06,
"loss": 0.1134,
"step": 2236
},
{
"epoch": 1.572030920590302,
"grad_norm": 0.5073056731351359,
"learning_rate": 1.0878947078755836e-06,
"loss": 0.1453,
"step": 2237
},
{
"epoch": 1.572733661278988,
"grad_norm": 0.524020076875916,
"learning_rate": 1.08445994143121e-06,
"loss": 0.1408,
"step": 2238
},
{
"epoch": 1.5734364019676739,
"grad_norm": 0.5254104049874896,
"learning_rate": 1.0810299461204749e-06,
"loss": 0.1513,
"step": 2239
},
{
"epoch": 1.5741391426563598,
"grad_norm": 0.5116747289633726,
"learning_rate": 1.0776047261228694e-06,
"loss": 0.1509,
"step": 2240
},
{
"epoch": 1.5748418833450457,
"grad_norm": 0.4981477241898844,
"learning_rate": 1.0741842856120665e-06,
"loss": 0.137,
"step": 2241
},
{
"epoch": 1.5755446240337316,
"grad_norm": 0.5442648850947261,
"learning_rate": 1.070768628755914e-06,
"loss": 0.1605,
"step": 2242
},
{
"epoch": 1.5762473647224176,
"grad_norm": 0.49407957712201855,
"learning_rate": 1.0673577597164352e-06,
"loss": 0.1437,
"step": 2243
},
{
"epoch": 1.5769501054111033,
"grad_norm": 0.4948569136977495,
"learning_rate": 1.0639516826498125e-06,
"loss": 0.1373,
"step": 2244
},
{
"epoch": 1.5776528460997892,
"grad_norm": 0.5129901488204297,
"learning_rate": 1.0605504017063927e-06,
"loss": 0.1377,
"step": 2245
},
{
"epoch": 1.578355586788475,
"grad_norm": 0.4884813921465927,
"learning_rate": 1.0571539210306785e-06,
"loss": 0.1306,
"step": 2246
},
{
"epoch": 1.5790583274771608,
"grad_norm": 0.533055317053586,
"learning_rate": 1.0537622447613249e-06,
"loss": 0.1593,
"step": 2247
},
{
"epoch": 1.5797610681658467,
"grad_norm": 0.5169736612637754,
"learning_rate": 1.050375377031132e-06,
"loss": 0.1343,
"step": 2248
},
{
"epoch": 1.5804638088545326,
"grad_norm": 0.5092996391909007,
"learning_rate": 1.0469933219670354e-06,
"loss": 0.1731,
"step": 2249
},
{
"epoch": 1.5811665495432186,
"grad_norm": 0.49312300954673655,
"learning_rate": 1.0436160836901138e-06,
"loss": 0.1369,
"step": 2250
},
{
"epoch": 1.5818692902319045,
"grad_norm": 0.5143062027674279,
"learning_rate": 1.0402436663155736e-06,
"loss": 0.1464,
"step": 2251
},
{
"epoch": 1.5825720309205904,
"grad_norm": 0.4961263841708869,
"learning_rate": 1.0368760739527455e-06,
"loss": 0.1462,
"step": 2252
},
{
"epoch": 1.5832747716092763,
"grad_norm": 0.48827827334800944,
"learning_rate": 1.0335133107050833e-06,
"loss": 0.1313,
"step": 2253
},
{
"epoch": 1.583977512297962,
"grad_norm": 0.5003738034616632,
"learning_rate": 1.0301553806701547e-06,
"loss": 0.1406,
"step": 2254
},
{
"epoch": 1.584680252986648,
"grad_norm": 0.5204509488609387,
"learning_rate": 1.0268022879396388e-06,
"loss": 0.1515,
"step": 2255
},
{
"epoch": 1.5853829936753336,
"grad_norm": 0.5342613945231212,
"learning_rate": 1.02345403659932e-06,
"loss": 0.1704,
"step": 2256
},
{
"epoch": 1.5860857343640196,
"grad_norm": 0.48880885015575143,
"learning_rate": 1.0201106307290842e-06,
"loss": 0.128,
"step": 2257
},
{
"epoch": 1.5867884750527055,
"grad_norm": 0.5122958838247167,
"learning_rate": 1.0167720744029118e-06,
"loss": 0.1508,
"step": 2258
},
{
"epoch": 1.5874912157413914,
"grad_norm": 0.5071165627608565,
"learning_rate": 1.0134383716888752e-06,
"loss": 0.1417,
"step": 2259
},
{
"epoch": 1.5881939564300773,
"grad_norm": 0.5290408040911058,
"learning_rate": 1.0101095266491323e-06,
"loss": 0.1611,
"step": 2260
},
{
"epoch": 1.5888966971187632,
"grad_norm": 0.4858876066191951,
"learning_rate": 1.006785543339921e-06,
"loss": 0.132,
"step": 2261
},
{
"epoch": 1.5895994378074492,
"grad_norm": 0.5256636395563555,
"learning_rate": 1.0034664258115561e-06,
"loss": 0.1634,
"step": 2262
},
{
"epoch": 1.590302178496135,
"grad_norm": 0.4956676211873417,
"learning_rate": 1.0001521781084233e-06,
"loss": 0.1297,
"step": 2263
},
{
"epoch": 1.5910049191848208,
"grad_norm": 0.5210150179348411,
"learning_rate": 9.968428042689738e-07,
"loss": 0.1547,
"step": 2264
},
{
"epoch": 1.5917076598735067,
"grad_norm": 0.4984418786059971,
"learning_rate": 9.935383083257199e-07,
"loss": 0.1435,
"step": 2265
},
{
"epoch": 1.5924104005621924,
"grad_norm": 0.4846669041708789,
"learning_rate": 9.9023869430523e-07,
"loss": 0.1151,
"step": 2266
},
{
"epoch": 1.5931131412508783,
"grad_norm": 0.49142196234118307,
"learning_rate": 9.869439662281276e-07,
"loss": 0.1247,
"step": 2267
},
{
"epoch": 1.5938158819395642,
"grad_norm": 0.5154558430938554,
"learning_rate": 9.836541281090757e-07,
"loss": 0.1307,
"step": 2268
},
{
"epoch": 1.5945186226282502,
"grad_norm": 0.5205658154982574,
"learning_rate": 9.803691839567835e-07,
"loss": 0.1515,
"step": 2269
},
{
"epoch": 1.595221363316936,
"grad_norm": 0.5115161976082128,
"learning_rate": 9.77089137773995e-07,
"loss": 0.1324,
"step": 2270
},
{
"epoch": 1.595924104005622,
"grad_norm": 0.5078002237780612,
"learning_rate": 9.738139935574893e-07,
"loss": 0.1468,
"step": 2271
},
{
"epoch": 1.596626844694308,
"grad_norm": 0.5206735967605656,
"learning_rate": 9.70543755298069e-07,
"loss": 0.1438,
"step": 2272
},
{
"epoch": 1.5973295853829936,
"grad_norm": 0.5432801333216214,
"learning_rate": 9.672784269805574e-07,
"loss": 0.1452,
"step": 2273
},
{
"epoch": 1.5980323260716796,
"grad_norm": 0.5450686717747824,
"learning_rate": 9.640180125837972e-07,
"loss": 0.1516,
"step": 2274
},
{
"epoch": 1.5987350667603655,
"grad_norm": 0.5039764969620342,
"learning_rate": 9.607625160806466e-07,
"loss": 0.1347,
"step": 2275
},
{
"epoch": 1.5994378074490512,
"grad_norm": 0.529784097768845,
"learning_rate": 9.575119414379657e-07,
"loss": 0.1622,
"step": 2276
},
{
"epoch": 1.600140548137737,
"grad_norm": 0.5345454544692056,
"learning_rate": 9.542662926166207e-07,
"loss": 0.1663,
"step": 2277
},
{
"epoch": 1.600843288826423,
"grad_norm": 0.4825191338212087,
"learning_rate": 9.510255735714735e-07,
"loss": 0.1285,
"step": 2278
},
{
"epoch": 1.601546029515109,
"grad_norm": 0.5374064787133747,
"learning_rate": 9.477897882513809e-07,
"loss": 0.156,
"step": 2279
},
{
"epoch": 1.6022487702037949,
"grad_norm": 0.5394896703211097,
"learning_rate": 9.445589405991862e-07,
"loss": 0.141,
"step": 2280
},
{
"epoch": 1.6029515108924808,
"grad_norm": 0.5074659761765667,
"learning_rate": 9.413330345517174e-07,
"loss": 0.1397,
"step": 2281
},
{
"epoch": 1.6036542515811667,
"grad_norm": 0.5542221038432015,
"learning_rate": 9.381120740397809e-07,
"loss": 0.1411,
"step": 2282
},
{
"epoch": 1.6043569922698524,
"grad_norm": 0.5317086337250758,
"learning_rate": 9.34896062988156e-07,
"loss": 0.17,
"step": 2283
},
{
"epoch": 1.6050597329585383,
"grad_norm": 0.5079594090701999,
"learning_rate": 9.316850053155923e-07,
"loss": 0.1335,
"step": 2284
},
{
"epoch": 1.605762473647224,
"grad_norm": 0.5197004214997037,
"learning_rate": 9.284789049348025e-07,
"loss": 0.1536,
"step": 2285
},
{
"epoch": 1.60646521433591,
"grad_norm": 0.5069837633405543,
"learning_rate": 9.252777657524598e-07,
"loss": 0.1351,
"step": 2286
},
{
"epoch": 1.6071679550245959,
"grad_norm": 0.5128782973377883,
"learning_rate": 9.220815916691911e-07,
"loss": 0.1524,
"step": 2287
},
{
"epoch": 1.6078706957132818,
"grad_norm": 0.5072606579831977,
"learning_rate": 9.18890386579574e-07,
"loss": 0.1364,
"step": 2288
},
{
"epoch": 1.6085734364019677,
"grad_norm": 0.5138978253319006,
"learning_rate": 9.157041543721307e-07,
"loss": 0.1506,
"step": 2289
},
{
"epoch": 1.6092761770906536,
"grad_norm": 0.5092956850533811,
"learning_rate": 9.125228989293234e-07,
"loss": 0.1452,
"step": 2290
},
{
"epoch": 1.6099789177793395,
"grad_norm": 0.5244667600954808,
"learning_rate": 9.093466241275551e-07,
"loss": 0.1718,
"step": 2291
},
{
"epoch": 1.6106816584680252,
"grad_norm": 0.5002388023948965,
"learning_rate": 9.061753338371509e-07,
"loss": 0.1375,
"step": 2292
},
{
"epoch": 1.6113843991567112,
"grad_norm": 0.5213143982929758,
"learning_rate": 9.030090319223689e-07,
"loss": 0.1458,
"step": 2293
},
{
"epoch": 1.612087139845397,
"grad_norm": 0.5217943373407234,
"learning_rate": 8.998477222413854e-07,
"loss": 0.1505,
"step": 2294
},
{
"epoch": 1.6127898805340828,
"grad_norm": 0.5210015888671756,
"learning_rate": 8.96691408646298e-07,
"loss": 0.1596,
"step": 2295
},
{
"epoch": 1.6134926212227687,
"grad_norm": 0.5113567748842857,
"learning_rate": 8.935400949831125e-07,
"loss": 0.1494,
"step": 2296
},
{
"epoch": 1.6141953619114546,
"grad_norm": 0.49175212721895384,
"learning_rate": 8.903937850917421e-07,
"loss": 0.1416,
"step": 2297
},
{
"epoch": 1.6148981026001406,
"grad_norm": 0.527821795899354,
"learning_rate": 8.87252482806003e-07,
"loss": 0.1558,
"step": 2298
},
{
"epoch": 1.6156008432888265,
"grad_norm": 0.5029335877582335,
"learning_rate": 8.841161919536134e-07,
"loss": 0.1403,
"step": 2299
},
{
"epoch": 1.6163035839775124,
"grad_norm": 0.48684100110647555,
"learning_rate": 8.809849163561812e-07,
"loss": 0.1321,
"step": 2300
},
{
"epoch": 1.6170063246661983,
"grad_norm": 0.49550472782171096,
"learning_rate": 8.778586598292055e-07,
"loss": 0.1307,
"step": 2301
},
{
"epoch": 1.617709065354884,
"grad_norm": 0.5096028642567304,
"learning_rate": 8.74737426182064e-07,
"loss": 0.1487,
"step": 2302
},
{
"epoch": 1.61841180604357,
"grad_norm": 0.5234130152806513,
"learning_rate": 8.716212192180223e-07,
"loss": 0.146,
"step": 2303
},
{
"epoch": 1.6191145467322556,
"grad_norm": 0.5320717266437079,
"learning_rate": 8.685100427342153e-07,
"loss": 0.1454,
"step": 2304
},
{
"epoch": 1.6198172874209416,
"grad_norm": 0.5079954187403964,
"learning_rate": 8.654039005216503e-07,
"loss": 0.14,
"step": 2305
},
{
"epoch": 1.6205200281096275,
"grad_norm": 0.5057831303192136,
"learning_rate": 8.623027963651998e-07,
"loss": 0.1419,
"step": 2306
},
{
"epoch": 1.6212227687983134,
"grad_norm": 0.5204770292694081,
"learning_rate": 8.592067340435961e-07,
"loss": 0.1428,
"step": 2307
},
{
"epoch": 1.6219255094869993,
"grad_norm": 0.5239518237454117,
"learning_rate": 8.561157173294305e-07,
"loss": 0.1466,
"step": 2308
},
{
"epoch": 1.6226282501756852,
"grad_norm": 0.4840991520517137,
"learning_rate": 8.530297499891444e-07,
"loss": 0.133,
"step": 2309
},
{
"epoch": 1.6233309908643712,
"grad_norm": 0.5273419965943936,
"learning_rate": 8.499488357830266e-07,
"loss": 0.1432,
"step": 2310
},
{
"epoch": 1.624033731553057,
"grad_norm": 0.5175758840691818,
"learning_rate": 8.468729784652091e-07,
"loss": 0.1491,
"step": 2311
},
{
"epoch": 1.6247364722417428,
"grad_norm": 0.47457090160700066,
"learning_rate": 8.438021817836617e-07,
"loss": 0.115,
"step": 2312
},
{
"epoch": 1.6254392129304287,
"grad_norm": 0.5277798554783621,
"learning_rate": 8.407364494801879e-07,
"loss": 0.1762,
"step": 2313
},
{
"epoch": 1.6261419536191144,
"grad_norm": 0.5300295294715935,
"learning_rate": 8.376757852904194e-07,
"loss": 0.1566,
"step": 2314
},
{
"epoch": 1.6268446943078003,
"grad_norm": 0.527024097049886,
"learning_rate": 8.346201929438158e-07,
"loss": 0.1508,
"step": 2315
},
{
"epoch": 1.6275474349964862,
"grad_norm": 0.5173928269861602,
"learning_rate": 8.31569676163651e-07,
"loss": 0.143,
"step": 2316
},
{
"epoch": 1.6282501756851722,
"grad_norm": 0.4880270080993743,
"learning_rate": 8.285242386670178e-07,
"loss": 0.1266,
"step": 2317
},
{
"epoch": 1.628952916373858,
"grad_norm": 0.4905342764166846,
"learning_rate": 8.254838841648188e-07,
"loss": 0.1298,
"step": 2318
},
{
"epoch": 1.629655657062544,
"grad_norm": 0.5292812716220816,
"learning_rate": 8.224486163617651e-07,
"loss": 0.1553,
"step": 2319
},
{
"epoch": 1.63035839775123,
"grad_norm": 0.5375650954209948,
"learning_rate": 8.194184389563681e-07,
"loss": 0.152,
"step": 2320
},
{
"epoch": 1.6310611384399156,
"grad_norm": 0.5249899005937064,
"learning_rate": 8.163933556409332e-07,
"loss": 0.1655,
"step": 2321
},
{
"epoch": 1.6317638791286015,
"grad_norm": 0.5008579174864624,
"learning_rate": 8.133733701015623e-07,
"loss": 0.1448,
"step": 2322
},
{
"epoch": 1.6324666198172875,
"grad_norm": 0.5275812524416689,
"learning_rate": 8.103584860181468e-07,
"loss": 0.1427,
"step": 2323
},
{
"epoch": 1.6331693605059732,
"grad_norm": 0.5208239793531394,
"learning_rate": 8.073487070643588e-07,
"loss": 0.1431,
"step": 2324
},
{
"epoch": 1.633872101194659,
"grad_norm": 0.5097900629432118,
"learning_rate": 8.043440369076522e-07,
"loss": 0.1458,
"step": 2325
},
{
"epoch": 1.634574841883345,
"grad_norm": 0.5095005341957477,
"learning_rate": 8.013444792092506e-07,
"loss": 0.1449,
"step": 2326
},
{
"epoch": 1.635277582572031,
"grad_norm": 0.5345229753716869,
"learning_rate": 7.98350037624156e-07,
"loss": 0.1649,
"step": 2327
},
{
"epoch": 1.6359803232607169,
"grad_norm": 0.4803018361460086,
"learning_rate": 7.953607158011311e-07,
"loss": 0.1346,
"step": 2328
},
{
"epoch": 1.6366830639494028,
"grad_norm": 0.5256630721228922,
"learning_rate": 7.923765173827003e-07,
"loss": 0.1471,
"step": 2329
},
{
"epoch": 1.6373858046380887,
"grad_norm": 0.5239766646899795,
"learning_rate": 7.893974460051474e-07,
"loss": 0.1599,
"step": 2330
},
{
"epoch": 1.6380885453267744,
"grad_norm": 0.4619956572855626,
"learning_rate": 7.864235052985059e-07,
"loss": 0.1119,
"step": 2331
},
{
"epoch": 1.6387912860154603,
"grad_norm": 0.48974247545807786,
"learning_rate": 7.834546988865605e-07,
"loss": 0.1343,
"step": 2332
},
{
"epoch": 1.639494026704146,
"grad_norm": 0.5059755421676134,
"learning_rate": 7.804910303868374e-07,
"loss": 0.1504,
"step": 2333
},
{
"epoch": 1.640196767392832,
"grad_norm": 0.506983971344678,
"learning_rate": 7.775325034106024e-07,
"loss": 0.1425,
"step": 2334
},
{
"epoch": 1.6408995080815179,
"grad_norm": 0.5094355686094365,
"learning_rate": 7.745791215628596e-07,
"loss": 0.1527,
"step": 2335
},
{
"epoch": 1.6416022487702038,
"grad_norm": 0.505821708619955,
"learning_rate": 7.716308884423385e-07,
"loss": 0.1572,
"step": 2336
},
{
"epoch": 1.6423049894588897,
"grad_norm": 0.4875178197686803,
"learning_rate": 7.686878076414984e-07,
"loss": 0.1275,
"step": 2337
},
{
"epoch": 1.6430077301475756,
"grad_norm": 0.5082564428895446,
"learning_rate": 7.657498827465176e-07,
"loss": 0.1449,
"step": 2338
},
{
"epoch": 1.6437104708362615,
"grad_norm": 0.5153668179679097,
"learning_rate": 7.628171173372973e-07,
"loss": 0.1621,
"step": 2339
},
{
"epoch": 1.6444132115249475,
"grad_norm": 0.555828918714009,
"learning_rate": 7.598895149874453e-07,
"loss": 0.1525,
"step": 2340
},
{
"epoch": 1.6451159522136332,
"grad_norm": 0.47107537304470376,
"learning_rate": 7.569670792642819e-07,
"loss": 0.1132,
"step": 2341
},
{
"epoch": 1.645818692902319,
"grad_norm": 0.4916614358769194,
"learning_rate": 7.540498137288294e-07,
"loss": 0.1228,
"step": 2342
},
{
"epoch": 1.6465214335910048,
"grad_norm": 0.5035749634030029,
"learning_rate": 7.51137721935814e-07,
"loss": 0.1423,
"step": 2343
},
{
"epoch": 1.6472241742796907,
"grad_norm": 0.5296296563991698,
"learning_rate": 7.482308074336558e-07,
"loss": 0.1652,
"step": 2344
},
{
"epoch": 1.6479269149683766,
"grad_norm": 0.6096326429096276,
"learning_rate": 7.453290737644631e-07,
"loss": 0.1345,
"step": 2345
},
{
"epoch": 1.6486296556570625,
"grad_norm": 0.5071869408599778,
"learning_rate": 7.42432524464034e-07,
"loss": 0.1324,
"step": 2346
},
{
"epoch": 1.6493323963457485,
"grad_norm": 0.5387024296544436,
"learning_rate": 7.39541163061852e-07,
"loss": 0.1526,
"step": 2347
},
{
"epoch": 1.6500351370344344,
"grad_norm": 0.480203002294713,
"learning_rate": 7.366549930810751e-07,
"loss": 0.1286,
"step": 2348
},
{
"epoch": 1.6507378777231203,
"grad_norm": 0.49344298898379824,
"learning_rate": 7.337740180385384e-07,
"loss": 0.1308,
"step": 2349
},
{
"epoch": 1.651440618411806,
"grad_norm": 0.49175543942265804,
"learning_rate": 7.308982414447407e-07,
"loss": 0.1361,
"step": 2350
},
{
"epoch": 1.652143359100492,
"grad_norm": 0.4909661959594217,
"learning_rate": 7.28027666803856e-07,
"loss": 0.1137,
"step": 2351
},
{
"epoch": 1.6528460997891778,
"grad_norm": 0.5162636490307603,
"learning_rate": 7.251622976137129e-07,
"loss": 0.1422,
"step": 2352
},
{
"epoch": 1.6535488404778635,
"grad_norm": 0.5090125943506136,
"learning_rate": 7.22302137365799e-07,
"loss": 0.1496,
"step": 2353
},
{
"epoch": 1.6542515811665495,
"grad_norm": 0.5116383303271588,
"learning_rate": 7.194471895452548e-07,
"loss": 0.133,
"step": 2354
},
{
"epoch": 1.6549543218552354,
"grad_norm": 0.493822635650948,
"learning_rate": 7.165974576308693e-07,
"loss": 0.1405,
"step": 2355
},
{
"epoch": 1.6556570625439213,
"grad_norm": 0.4905807719886315,
"learning_rate": 7.137529450950759e-07,
"loss": 0.122,
"step": 2356
},
{
"epoch": 1.6563598032326072,
"grad_norm": 0.5004371986255278,
"learning_rate": 7.109136554039475e-07,
"loss": 0.1529,
"step": 2357
},
{
"epoch": 1.6570625439212932,
"grad_norm": 0.5468228810965717,
"learning_rate": 7.080795920171934e-07,
"loss": 0.1618,
"step": 2358
},
{
"epoch": 1.657765284609979,
"grad_norm": 0.5425018208720173,
"learning_rate": 7.052507583881557e-07,
"loss": 0.1596,
"step": 2359
},
{
"epoch": 1.6584680252986648,
"grad_norm": 0.5031612125939998,
"learning_rate": 7.02427157963802e-07,
"loss": 0.1458,
"step": 2360
},
{
"epoch": 1.6591707659873507,
"grad_norm": 0.512872070538685,
"learning_rate": 6.996087941847246e-07,
"loss": 0.1461,
"step": 2361
},
{
"epoch": 1.6598735066760364,
"grad_norm": 0.5174019394695357,
"learning_rate": 6.96795670485133e-07,
"loss": 0.1611,
"step": 2362
},
{
"epoch": 1.6605762473647223,
"grad_norm": 0.5004973374301548,
"learning_rate": 6.93987790292856e-07,
"loss": 0.1278,
"step": 2363
},
{
"epoch": 1.6612789880534082,
"grad_norm": 0.5377978395806923,
"learning_rate": 6.911851570293271e-07,
"loss": 0.1579,
"step": 2364
},
{
"epoch": 1.6619817287420942,
"grad_norm": 0.5412554742787239,
"learning_rate": 6.883877741095907e-07,
"loss": 0.153,
"step": 2365
},
{
"epoch": 1.66268446943078,
"grad_norm": 0.5332035715220973,
"learning_rate": 6.855956449422907e-07,
"loss": 0.1519,
"step": 2366
},
{
"epoch": 1.663387210119466,
"grad_norm": 0.5070254553090329,
"learning_rate": 6.828087729296734e-07,
"loss": 0.1268,
"step": 2367
},
{
"epoch": 1.664089950808152,
"grad_norm": 0.5147766086337985,
"learning_rate": 6.800271614675763e-07,
"loss": 0.1327,
"step": 2368
},
{
"epoch": 1.6647926914968376,
"grad_norm": 0.4794259826824323,
"learning_rate": 6.772508139454248e-07,
"loss": 0.128,
"step": 2369
},
{
"epoch": 1.6654954321855235,
"grad_norm": 0.5002922450925902,
"learning_rate": 6.744797337462322e-07,
"loss": 0.1335,
"step": 2370
},
{
"epoch": 1.6661981728742095,
"grad_norm": 0.5269102344516572,
"learning_rate": 6.717139242465965e-07,
"loss": 0.15,
"step": 2371
},
{
"epoch": 1.6669009135628952,
"grad_norm": 0.46645795900459636,
"learning_rate": 6.689533888166893e-07,
"loss": 0.1229,
"step": 2372
},
{
"epoch": 1.667603654251581,
"grad_norm": 0.5598120255378365,
"learning_rate": 6.661981308202581e-07,
"loss": 0.1814,
"step": 2373
},
{
"epoch": 1.668306394940267,
"grad_norm": 0.49918107426335867,
"learning_rate": 6.634481536146153e-07,
"loss": 0.1269,
"step": 2374
},
{
"epoch": 1.669009135628953,
"grad_norm": 0.4895121066286522,
"learning_rate": 6.607034605506451e-07,
"loss": 0.1322,
"step": 2375
},
{
"epoch": 1.6697118763176388,
"grad_norm": 0.520883377380827,
"learning_rate": 6.579640549727884e-07,
"loss": 0.1423,
"step": 2376
},
{
"epoch": 1.6704146170063248,
"grad_norm": 0.4975352282908073,
"learning_rate": 6.552299402190443e-07,
"loss": 0.1264,
"step": 2377
},
{
"epoch": 1.6711173576950107,
"grad_norm": 0.48098684498429956,
"learning_rate": 6.525011196209657e-07,
"loss": 0.1231,
"step": 2378
},
{
"epoch": 1.6718200983836964,
"grad_norm": 0.5165363758513422,
"learning_rate": 6.497775965036545e-07,
"loss": 0.1482,
"step": 2379
},
{
"epoch": 1.6725228390723823,
"grad_norm": 0.5114743479853605,
"learning_rate": 6.470593741857562e-07,
"loss": 0.1432,
"step": 2380
},
{
"epoch": 1.673225579761068,
"grad_norm": 0.5273654787510778,
"learning_rate": 6.443464559794583e-07,
"loss": 0.1649,
"step": 2381
},
{
"epoch": 1.673928320449754,
"grad_norm": 0.5159148551260601,
"learning_rate": 6.416388451904848e-07,
"loss": 0.1557,
"step": 2382
},
{
"epoch": 1.6746310611384398,
"grad_norm": 0.5191454473362067,
"learning_rate": 6.389365451180928e-07,
"loss": 0.1477,
"step": 2383
},
{
"epoch": 1.6753338018271258,
"grad_norm": 0.5687120624192377,
"learning_rate": 6.362395590550685e-07,
"loss": 0.1482,
"step": 2384
},
{
"epoch": 1.6760365425158117,
"grad_norm": 0.526467280906606,
"learning_rate": 6.335478902877218e-07,
"loss": 0.1507,
"step": 2385
},
{
"epoch": 1.6767392832044976,
"grad_norm": 0.49784562198830185,
"learning_rate": 6.308615420958847e-07,
"loss": 0.1236,
"step": 2386
},
{
"epoch": 1.6774420238931835,
"grad_norm": 0.5021605236664165,
"learning_rate": 6.281805177529055e-07,
"loss": 0.1364,
"step": 2387
},
{
"epoch": 1.6781447645818695,
"grad_norm": 0.5433621471341872,
"learning_rate": 6.255048205256447e-07,
"loss": 0.1545,
"step": 2388
},
{
"epoch": 1.6788475052705552,
"grad_norm": 0.5086447206750199,
"learning_rate": 6.228344536744735e-07,
"loss": 0.1427,
"step": 2389
},
{
"epoch": 1.679550245959241,
"grad_norm": 0.488672149468298,
"learning_rate": 6.201694204532638e-07,
"loss": 0.1362,
"step": 2390
},
{
"epoch": 1.6802529866479268,
"grad_norm": 0.5015968183374993,
"learning_rate": 6.175097241093947e-07,
"loss": 0.1191,
"step": 2391
},
{
"epoch": 1.6809557273366127,
"grad_norm": 0.507982140467693,
"learning_rate": 6.148553678837388e-07,
"loss": 0.1361,
"step": 2392
},
{
"epoch": 1.6816584680252986,
"grad_norm": 0.543812531672844,
"learning_rate": 6.122063550106594e-07,
"loss": 0.1635,
"step": 2393
},
{
"epoch": 1.6823612087139845,
"grad_norm": 0.5051667241659186,
"learning_rate": 6.095626887180106e-07,
"loss": 0.1436,
"step": 2394
},
{
"epoch": 1.6830639494026705,
"grad_norm": 0.5190112740587888,
"learning_rate": 6.06924372227135e-07,
"loss": 0.1609,
"step": 2395
},
{
"epoch": 1.6837666900913564,
"grad_norm": 0.5012651989538832,
"learning_rate": 6.042914087528529e-07,
"loss": 0.1466,
"step": 2396
},
{
"epoch": 1.6844694307800423,
"grad_norm": 0.5342634253220491,
"learning_rate": 6.016638015034631e-07,
"loss": 0.1531,
"step": 2397
},
{
"epoch": 1.685172171468728,
"grad_norm": 0.5104916701635575,
"learning_rate": 5.990415536807348e-07,
"loss": 0.1368,
"step": 2398
},
{
"epoch": 1.685874912157414,
"grad_norm": 0.5402576298863184,
"learning_rate": 5.964246684799113e-07,
"loss": 0.1463,
"step": 2399
},
{
"epoch": 1.6865776528460998,
"grad_norm": 0.49478922563864103,
"learning_rate": 5.938131490896992e-07,
"loss": 0.132,
"step": 2400
},
{
"epoch": 1.6872803935347855,
"grad_norm": 0.4848426418999095,
"learning_rate": 5.912069986922664e-07,
"loss": 0.1284,
"step": 2401
},
{
"epoch": 1.6879831342234715,
"grad_norm": 0.5201410319194085,
"learning_rate": 5.886062204632392e-07,
"loss": 0.1666,
"step": 2402
},
{
"epoch": 1.6886858749121574,
"grad_norm": 0.48047980705813403,
"learning_rate": 5.860108175716983e-07,
"loss": 0.1295,
"step": 2403
},
{
"epoch": 1.6893886156008433,
"grad_norm": 0.4922821220869354,
"learning_rate": 5.834207931801733e-07,
"loss": 0.1335,
"step": 2404
},
{
"epoch": 1.6900913562895292,
"grad_norm": 0.4867400451453754,
"learning_rate": 5.808361504446413e-07,
"loss": 0.1362,
"step": 2405
},
{
"epoch": 1.6907940969782151,
"grad_norm": 0.49058499289490437,
"learning_rate": 5.78256892514521e-07,
"loss": 0.1328,
"step": 2406
},
{
"epoch": 1.691496837666901,
"grad_norm": 0.5098413704188566,
"learning_rate": 5.756830225326692e-07,
"loss": 0.1331,
"step": 2407
},
{
"epoch": 1.6921995783555868,
"grad_norm": 0.4927926778545796,
"learning_rate": 5.731145436353796e-07,
"loss": 0.1393,
"step": 2408
},
{
"epoch": 1.6929023190442727,
"grad_norm": 0.5310607286670721,
"learning_rate": 5.705514589523742e-07,
"loss": 0.1601,
"step": 2409
},
{
"epoch": 1.6936050597329584,
"grad_norm": 0.5102362802375777,
"learning_rate": 5.679937716068029e-07,
"loss": 0.1269,
"step": 2410
},
{
"epoch": 1.6943078004216443,
"grad_norm": 0.4963369484791542,
"learning_rate": 5.654414847152401e-07,
"loss": 0.1266,
"step": 2411
},
{
"epoch": 1.6950105411103302,
"grad_norm": 0.4979253801389448,
"learning_rate": 5.628946013876779e-07,
"loss": 0.1245,
"step": 2412
},
{
"epoch": 1.6957132817990161,
"grad_norm": 0.4706047744648595,
"learning_rate": 5.603531247275251e-07,
"loss": 0.1295,
"step": 2413
},
{
"epoch": 1.696416022487702,
"grad_norm": 0.5215378117236438,
"learning_rate": 5.578170578316017e-07,
"loss": 0.1449,
"step": 2414
},
{
"epoch": 1.697118763176388,
"grad_norm": 0.5105605056053008,
"learning_rate": 5.552864037901379e-07,
"loss": 0.1613,
"step": 2415
},
{
"epoch": 1.697821503865074,
"grad_norm": 0.5064441659619855,
"learning_rate": 5.527611656867666e-07,
"loss": 0.1441,
"step": 2416
},
{
"epoch": 1.6985242445537596,
"grad_norm": 0.5075552772873742,
"learning_rate": 5.502413465985196e-07,
"loss": 0.1528,
"step": 2417
},
{
"epoch": 1.6992269852424455,
"grad_norm": 0.4746001706879934,
"learning_rate": 5.477269495958276e-07,
"loss": 0.1174,
"step": 2418
},
{
"epoch": 1.6999297259311315,
"grad_norm": 0.5052458891568644,
"learning_rate": 5.452179777425159e-07,
"loss": 0.1379,
"step": 2419
},
{
"epoch": 1.7006324666198172,
"grad_norm": 0.5213509493117172,
"learning_rate": 5.427144340957968e-07,
"loss": 0.146,
"step": 2420
},
{
"epoch": 1.701335207308503,
"grad_norm": 0.5330086349381643,
"learning_rate": 5.402163217062695e-07,
"loss": 0.158,
"step": 2421
},
{
"epoch": 1.702037947997189,
"grad_norm": 0.5214808636163243,
"learning_rate": 5.377236436179123e-07,
"loss": 0.1591,
"step": 2422
},
{
"epoch": 1.702740688685875,
"grad_norm": 0.5093912142497137,
"learning_rate": 5.352364028680868e-07,
"loss": 0.1124,
"step": 2423
},
{
"epoch": 1.7034434293745608,
"grad_norm": 0.5016904016774145,
"learning_rate": 5.327546024875252e-07,
"loss": 0.1292,
"step": 2424
},
{
"epoch": 1.7041461700632468,
"grad_norm": 0.5246425921614887,
"learning_rate": 5.302782455003313e-07,
"loss": 0.1581,
"step": 2425
},
{
"epoch": 1.7048489107519327,
"grad_norm": 0.508729816721276,
"learning_rate": 5.278073349239776e-07,
"loss": 0.148,
"step": 2426
},
{
"epoch": 1.7055516514406184,
"grad_norm": 0.4953460594490315,
"learning_rate": 5.253418737692983e-07,
"loss": 0.1222,
"step": 2427
},
{
"epoch": 1.7062543921293043,
"grad_norm": 0.46518839809506507,
"learning_rate": 5.228818650404883e-07,
"loss": 0.1167,
"step": 2428
},
{
"epoch": 1.70695713281799,
"grad_norm": 0.5300380812375458,
"learning_rate": 5.204273117350983e-07,
"loss": 0.1638,
"step": 2429
},
{
"epoch": 1.707659873506676,
"grad_norm": 0.5160670771529812,
"learning_rate": 5.179782168440317e-07,
"loss": 0.1467,
"step": 2430
},
{
"epoch": 1.7083626141953618,
"grad_norm": 0.5078286556518824,
"learning_rate": 5.155345833515408e-07,
"loss": 0.1471,
"step": 2431
},
{
"epoch": 1.7090653548840478,
"grad_norm": 0.5213953654428531,
"learning_rate": 5.130964142352223e-07,
"loss": 0.1425,
"step": 2432
},
{
"epoch": 1.7097680955727337,
"grad_norm": 0.4912589892481834,
"learning_rate": 5.106637124660164e-07,
"loss": 0.1303,
"step": 2433
},
{
"epoch": 1.7104708362614196,
"grad_norm": 0.5098794137430794,
"learning_rate": 5.082364810081991e-07,
"loss": 0.1472,
"step": 2434
},
{
"epoch": 1.7111735769501055,
"grad_norm": 0.5097065734170783,
"learning_rate": 5.058147228193828e-07,
"loss": 0.1432,
"step": 2435
},
{
"epoch": 1.7118763176387914,
"grad_norm": 0.5197966266223981,
"learning_rate": 5.033984408505083e-07,
"loss": 0.1565,
"step": 2436
},
{
"epoch": 1.7125790583274771,
"grad_norm": 0.5395624881326725,
"learning_rate": 5.00987638045845e-07,
"loss": 0.1627,
"step": 2437
},
{
"epoch": 1.713281799016163,
"grad_norm": 0.5050550106778823,
"learning_rate": 4.985823173429871e-07,
"loss": 0.1364,
"step": 2438
},
{
"epoch": 1.7139845397048488,
"grad_norm": 0.5133659277558639,
"learning_rate": 4.96182481672845e-07,
"loss": 0.168,
"step": 2439
},
{
"epoch": 1.7146872803935347,
"grad_norm": 0.4957226425404724,
"learning_rate": 4.937881339596518e-07,
"loss": 0.1354,
"step": 2440
},
{
"epoch": 1.7153900210822206,
"grad_norm": 0.5038820359283196,
"learning_rate": 4.913992771209458e-07,
"loss": 0.1569,
"step": 2441
},
{
"epoch": 1.7160927617709065,
"grad_norm": 0.4646110309365998,
"learning_rate": 4.890159140675787e-07,
"loss": 0.1187,
"step": 2442
},
{
"epoch": 1.7167955024595924,
"grad_norm": 0.5124760939995081,
"learning_rate": 4.866380477037097e-07,
"loss": 0.1672,
"step": 2443
},
{
"epoch": 1.7174982431482784,
"grad_norm": 0.5222516788464301,
"learning_rate": 4.842656809267976e-07,
"loss": 0.1507,
"step": 2444
},
{
"epoch": 1.7182009838369643,
"grad_norm": 0.4985368526066697,
"learning_rate": 4.818988166276006e-07,
"loss": 0.1506,
"step": 2445
},
{
"epoch": 1.71890372452565,
"grad_norm": 0.449723651901996,
"learning_rate": 4.795374576901696e-07,
"loss": 0.1125,
"step": 2446
},
{
"epoch": 1.719606465214336,
"grad_norm": 0.5178151029260145,
"learning_rate": 4.771816069918522e-07,
"loss": 0.1497,
"step": 2447
},
{
"epoch": 1.7203092059030218,
"grad_norm": 0.45657534578624065,
"learning_rate": 4.7483126740328013e-07,
"loss": 0.1233,
"step": 2448
},
{
"epoch": 1.7210119465917075,
"grad_norm": 0.5025267713102743,
"learning_rate": 4.7248644178837176e-07,
"loss": 0.1345,
"step": 2449
},
{
"epoch": 1.7217146872803935,
"grad_norm": 0.5151146709386307,
"learning_rate": 4.7014713300432504e-07,
"loss": 0.1535,
"step": 2450
},
{
"epoch": 1.7224174279690794,
"grad_norm": 0.5209287261898038,
"learning_rate": 4.6781334390161745e-07,
"loss": 0.1431,
"step": 2451
},
{
"epoch": 1.7231201686577653,
"grad_norm": 0.5120122620271434,
"learning_rate": 4.6548507732399826e-07,
"loss": 0.1287,
"step": 2452
},
{
"epoch": 1.7238229093464512,
"grad_norm": 0.46997281974094524,
"learning_rate": 4.631623361084903e-07,
"loss": 0.1254,
"step": 2453
},
{
"epoch": 1.7245256500351371,
"grad_norm": 0.5192006381261042,
"learning_rate": 4.6084512308538165e-07,
"loss": 0.1562,
"step": 2454
},
{
"epoch": 1.725228390723823,
"grad_norm": 0.5008623260500528,
"learning_rate": 4.585334410782244e-07,
"loss": 0.1263,
"step": 2455
},
{
"epoch": 1.7259311314125088,
"grad_norm": 0.5143719878493999,
"learning_rate": 4.562272929038325e-07,
"loss": 0.1552,
"step": 2456
},
{
"epoch": 1.7266338721011947,
"grad_norm": 0.548662826223495,
"learning_rate": 4.539266813722748e-07,
"loss": 0.1502,
"step": 2457
},
{
"epoch": 1.7273366127898804,
"grad_norm": 0.5018819839038278,
"learning_rate": 4.51631609286875e-07,
"loss": 0.1388,
"step": 2458
},
{
"epoch": 1.7280393534785663,
"grad_norm": 0.5207434511563519,
"learning_rate": 4.4934207944420604e-07,
"loss": 0.1399,
"step": 2459
},
{
"epoch": 1.7287420941672522,
"grad_norm": 0.5425488205953698,
"learning_rate": 4.4705809463409077e-07,
"loss": 0.165,
"step": 2460
},
{
"epoch": 1.7294448348559381,
"grad_norm": 0.5037567942770113,
"learning_rate": 4.447796576395896e-07,
"loss": 0.1388,
"step": 2461
},
{
"epoch": 1.730147575544624,
"grad_norm": 0.5358452059638205,
"learning_rate": 4.425067712370074e-07,
"loss": 0.1641,
"step": 2462
},
{
"epoch": 1.73085031623331,
"grad_norm": 0.5014671352903216,
"learning_rate": 4.40239438195883e-07,
"loss": 0.133,
"step": 2463
},
{
"epoch": 1.731553056921996,
"grad_norm": 0.5056100677702643,
"learning_rate": 4.379776612789921e-07,
"loss": 0.1471,
"step": 2464
},
{
"epoch": 1.7322557976106818,
"grad_norm": 0.500848117316493,
"learning_rate": 4.357214432423351e-07,
"loss": 0.1442,
"step": 2465
},
{
"epoch": 1.7329585382993675,
"grad_norm": 0.49673774404191945,
"learning_rate": 4.334707868351423e-07,
"loss": 0.1315,
"step": 2466
},
{
"epoch": 1.7336612789880534,
"grad_norm": 0.5726501902979345,
"learning_rate": 4.312256947998655e-07,
"loss": 0.1805,
"step": 2467
},
{
"epoch": 1.7343640196767391,
"grad_norm": 0.48459402143841357,
"learning_rate": 4.2898616987217866e-07,
"loss": 0.1383,
"step": 2468
},
{
"epoch": 1.735066760365425,
"grad_norm": 0.49921590652395076,
"learning_rate": 4.2675221478096995e-07,
"loss": 0.1285,
"step": 2469
},
{
"epoch": 1.735769501054111,
"grad_norm": 0.5216661199055698,
"learning_rate": 4.245238322483386e-07,
"loss": 0.1456,
"step": 2470
},
{
"epoch": 1.736472241742797,
"grad_norm": 0.4987564923420657,
"learning_rate": 4.223010249895987e-07,
"loss": 0.1469,
"step": 2471
},
{
"epoch": 1.7371749824314828,
"grad_norm": 0.5366348222163734,
"learning_rate": 4.2008379571326753e-07,
"loss": 0.1646,
"step": 2472
},
{
"epoch": 1.7378777231201687,
"grad_norm": 0.4779108172235227,
"learning_rate": 4.178721471210662e-07,
"loss": 0.1113,
"step": 2473
},
{
"epoch": 1.7385804638088547,
"grad_norm": 0.48603005090548207,
"learning_rate": 4.156660819079156e-07,
"loss": 0.1378,
"step": 2474
},
{
"epoch": 1.7392832044975404,
"grad_norm": 0.4981575526182675,
"learning_rate": 4.134656027619333e-07,
"loss": 0.1309,
"step": 2475
},
{
"epoch": 1.7399859451862263,
"grad_norm": 0.4967251090524896,
"learning_rate": 4.1127071236442993e-07,
"loss": 0.133,
"step": 2476
},
{
"epoch": 1.7406886858749122,
"grad_norm": 0.482832954527618,
"learning_rate": 4.090814133899068e-07,
"loss": 0.1326,
"step": 2477
},
{
"epoch": 1.741391426563598,
"grad_norm": 0.5066810187945623,
"learning_rate": 4.06897708506051e-07,
"loss": 0.1419,
"step": 2478
},
{
"epoch": 1.7420941672522838,
"grad_norm": 0.5068986215582441,
"learning_rate": 4.047196003737347e-07,
"loss": 0.1455,
"step": 2479
},
{
"epoch": 1.7427969079409698,
"grad_norm": 0.5032666697164222,
"learning_rate": 4.025470916470081e-07,
"loss": 0.1572,
"step": 2480
},
{
"epoch": 1.7434996486296557,
"grad_norm": 0.4946885703818128,
"learning_rate": 4.0038018497310096e-07,
"loss": 0.1288,
"step": 2481
},
{
"epoch": 1.7442023893183416,
"grad_norm": 0.4890875664149045,
"learning_rate": 3.98218882992415e-07,
"loss": 0.1403,
"step": 2482
},
{
"epoch": 1.7449051300070275,
"grad_norm": 0.4983759132183108,
"learning_rate": 3.960631883385224e-07,
"loss": 0.136,
"step": 2483
},
{
"epoch": 1.7456078706957134,
"grad_norm": 0.508545575257902,
"learning_rate": 3.939131036381666e-07,
"loss": 0.1461,
"step": 2484
},
{
"epoch": 1.7463106113843991,
"grad_norm": 0.4825280158847343,
"learning_rate": 3.91768631511249e-07,
"loss": 0.1305,
"step": 2485
},
{
"epoch": 1.747013352073085,
"grad_norm": 0.5051276013993659,
"learning_rate": 3.8962977457083663e-07,
"loss": 0.1473,
"step": 2486
},
{
"epoch": 1.7477160927617708,
"grad_norm": 0.5111228805661661,
"learning_rate": 3.874965354231514e-07,
"loss": 0.1424,
"step": 2487
},
{
"epoch": 1.7484188334504567,
"grad_norm": 0.5019801408898831,
"learning_rate": 3.8536891666757446e-07,
"loss": 0.1518,
"step": 2488
},
{
"epoch": 1.7491215741391426,
"grad_norm": 0.5173740334228242,
"learning_rate": 3.832469208966333e-07,
"loss": 0.1438,
"step": 2489
},
{
"epoch": 1.7498243148278285,
"grad_norm": 0.5400760960183123,
"learning_rate": 3.8113055069600555e-07,
"loss": 0.1435,
"step": 2490
},
{
"epoch": 1.7505270555165144,
"grad_norm": 0.5121676217669354,
"learning_rate": 3.790198086445146e-07,
"loss": 0.1426,
"step": 2491
},
{
"epoch": 1.7512297962052004,
"grad_norm": 0.5257987208072362,
"learning_rate": 3.7691469731412635e-07,
"loss": 0.1425,
"step": 2492
},
{
"epoch": 1.7519325368938863,
"grad_norm": 0.5039463710418196,
"learning_rate": 3.7481521926994504e-07,
"loss": 0.1251,
"step": 2493
},
{
"epoch": 1.752635277582572,
"grad_norm": 0.4819301001498982,
"learning_rate": 3.7272137707020875e-07,
"loss": 0.1331,
"step": 2494
},
{
"epoch": 1.753338018271258,
"grad_norm": 0.5166677476186342,
"learning_rate": 3.7063317326629043e-07,
"loss": 0.1596,
"step": 2495
},
{
"epoch": 1.7540407589599438,
"grad_norm": 0.5234064687149996,
"learning_rate": 3.685506104026931e-07,
"loss": 0.1629,
"step": 2496
},
{
"epoch": 1.7547434996486295,
"grad_norm": 0.48338946523379994,
"learning_rate": 3.6647369101704465e-07,
"loss": 0.1165,
"step": 2497
},
{
"epoch": 1.7554462403373154,
"grad_norm": 0.49272297433420176,
"learning_rate": 3.644024176400962e-07,
"loss": 0.1143,
"step": 2498
},
{
"epoch": 1.7561489810260014,
"grad_norm": 0.5037259028058367,
"learning_rate": 3.623367927957211e-07,
"loss": 0.1479,
"step": 2499
},
{
"epoch": 1.7568517217146873,
"grad_norm": 0.4816989043292015,
"learning_rate": 3.602768190009076e-07,
"loss": 0.1367,
"step": 2500
},
{
"epoch": 1.7568517217146873,
"eval_loss": 0.1807616651058197,
"eval_runtime": 10.8673,
"eval_samples_per_second": 21.164,
"eval_steps_per_second": 5.337,
"step": 2500
},
{
"epoch": 1.7575544624033732,
"grad_norm": 0.47584583406463116,
"learning_rate": 3.5822249876575897e-07,
"loss": 0.114,
"step": 2501
},
{
"epoch": 1.7582572030920591,
"grad_norm": 0.512319533865695,
"learning_rate": 3.561738345934901e-07,
"loss": 0.1422,
"step": 2502
},
{
"epoch": 1.758959943780745,
"grad_norm": 0.502050085228889,
"learning_rate": 3.541308289804235e-07,
"loss": 0.1432,
"step": 2503
},
{
"epoch": 1.7596626844694307,
"grad_norm": 0.523940078738328,
"learning_rate": 3.5209348441598626e-07,
"loss": 0.1598,
"step": 2504
},
{
"epoch": 1.7603654251581167,
"grad_norm": 0.4810307504199012,
"learning_rate": 3.50061803382708e-07,
"loss": 0.1269,
"step": 2505
},
{
"epoch": 1.7610681658468024,
"grad_norm": 0.5523631787953741,
"learning_rate": 3.4803578835621685e-07,
"loss": 0.1814,
"step": 2506
},
{
"epoch": 1.7617709065354883,
"grad_norm": 0.5118306383379198,
"learning_rate": 3.460154418052364e-07,
"loss": 0.1555,
"step": 2507
},
{
"epoch": 1.7624736472241742,
"grad_norm": 0.4960231120219577,
"learning_rate": 3.440007661915856e-07,
"loss": 0.1296,
"step": 2508
},
{
"epoch": 1.7631763879128601,
"grad_norm": 0.49992329060380697,
"learning_rate": 3.419917639701698e-07,
"loss": 0.129,
"step": 2509
},
{
"epoch": 1.763879128601546,
"grad_norm": 0.5118691406174483,
"learning_rate": 3.3998843758898336e-07,
"loss": 0.1389,
"step": 2510
},
{
"epoch": 1.764581869290232,
"grad_norm": 0.5315435497899398,
"learning_rate": 3.379907894891027e-07,
"loss": 0.1638,
"step": 2511
},
{
"epoch": 1.765284609978918,
"grad_norm": 0.5114367673359693,
"learning_rate": 3.3599882210468947e-07,
"loss": 0.1469,
"step": 2512
},
{
"epoch": 1.7659873506676038,
"grad_norm": 0.526630904956023,
"learning_rate": 3.340125378629783e-07,
"loss": 0.1404,
"step": 2513
},
{
"epoch": 1.7666900913562895,
"grad_norm": 0.5299821315417063,
"learning_rate": 3.320319391842813e-07,
"loss": 0.1481,
"step": 2514
},
{
"epoch": 1.7673928320449754,
"grad_norm": 0.5115665096272445,
"learning_rate": 3.300570284819815e-07,
"loss": 0.1406,
"step": 2515
},
{
"epoch": 1.7680955727336611,
"grad_norm": 0.5254724265146067,
"learning_rate": 3.280878081625333e-07,
"loss": 0.1617,
"step": 2516
},
{
"epoch": 1.768798313422347,
"grad_norm": 0.5083467646494765,
"learning_rate": 3.261242806254561e-07,
"loss": 0.1457,
"step": 2517
},
{
"epoch": 1.769501054111033,
"grad_norm": 0.4598533715565604,
"learning_rate": 3.241664482633311e-07,
"loss": 0.1261,
"step": 2518
},
{
"epoch": 1.770203794799719,
"grad_norm": 0.5060996400415871,
"learning_rate": 3.222143134618e-07,
"loss": 0.1407,
"step": 2519
},
{
"epoch": 1.7709065354884048,
"grad_norm": 0.5020144901552022,
"learning_rate": 3.202678785995655e-07,
"loss": 0.1328,
"step": 2520
},
{
"epoch": 1.7716092761770907,
"grad_norm": 0.502990891060725,
"learning_rate": 3.1832714604838166e-07,
"loss": 0.1371,
"step": 2521
},
{
"epoch": 1.7723120168657767,
"grad_norm": 0.536747819689489,
"learning_rate": 3.16392118173055e-07,
"loss": 0.169,
"step": 2522
},
{
"epoch": 1.7730147575544624,
"grad_norm": 0.5008783249213785,
"learning_rate": 3.144627973314385e-07,
"loss": 0.1403,
"step": 2523
},
{
"epoch": 1.7737174982431483,
"grad_norm": 0.4922819846556829,
"learning_rate": 3.1253918587443645e-07,
"loss": 0.1408,
"step": 2524
},
{
"epoch": 1.7744202389318342,
"grad_norm": 0.5243120358752659,
"learning_rate": 3.1062128614599176e-07,
"loss": 0.1678,
"step": 2525
},
{
"epoch": 1.77512297962052,
"grad_norm": 0.5056274835855529,
"learning_rate": 3.0870910048308833e-07,
"loss": 0.1379,
"step": 2526
},
{
"epoch": 1.7758257203092058,
"grad_norm": 0.5039032672623872,
"learning_rate": 3.068026312157485e-07,
"loss": 0.1471,
"step": 2527
},
{
"epoch": 1.7765284609978917,
"grad_norm": 0.4981154294195558,
"learning_rate": 3.049018806670284e-07,
"loss": 0.1496,
"step": 2528
},
{
"epoch": 1.7772312016865777,
"grad_norm": 0.5365626189197689,
"learning_rate": 3.030068511530154e-07,
"loss": 0.1575,
"step": 2529
},
{
"epoch": 1.7779339423752636,
"grad_norm": 0.5268016582903655,
"learning_rate": 3.0111754498282686e-07,
"loss": 0.1648,
"step": 2530
},
{
"epoch": 1.7786366830639495,
"grad_norm": 0.5318004110306369,
"learning_rate": 2.9923396445860454e-07,
"loss": 0.154,
"step": 2531
},
{
"epoch": 1.7793394237526354,
"grad_norm": 0.5000644029802647,
"learning_rate": 2.9735611187551696e-07,
"loss": 0.1483,
"step": 2532
},
{
"epoch": 1.7800421644413211,
"grad_norm": 0.517187407258313,
"learning_rate": 2.9548398952174764e-07,
"loss": 0.1414,
"step": 2533
},
{
"epoch": 1.780744905130007,
"grad_norm": 0.49888098628686334,
"learning_rate": 2.936175996785018e-07,
"loss": 0.1344,
"step": 2534
},
{
"epoch": 1.7814476458186927,
"grad_norm": 0.5382013575120231,
"learning_rate": 2.917569446199975e-07,
"loss": 0.1637,
"step": 2535
},
{
"epoch": 1.7821503865073787,
"grad_norm": 0.48847817394584436,
"learning_rate": 2.8990202661346887e-07,
"loss": 0.1335,
"step": 2536
},
{
"epoch": 1.7828531271960646,
"grad_norm": 0.5004412682779585,
"learning_rate": 2.8805284791915245e-07,
"loss": 0.1297,
"step": 2537
},
{
"epoch": 1.7835558678847505,
"grad_norm": 0.4834107517340589,
"learning_rate": 2.862094107902974e-07,
"loss": 0.141,
"step": 2538
},
{
"epoch": 1.7842586085734364,
"grad_norm": 0.4768745420958175,
"learning_rate": 2.8437171747315306e-07,
"loss": 0.1303,
"step": 2539
},
{
"epoch": 1.7849613492621224,
"grad_norm": 0.49617621055480216,
"learning_rate": 2.8253977020697266e-07,
"loss": 0.1457,
"step": 2540
},
{
"epoch": 1.7856640899508083,
"grad_norm": 0.48857079830190625,
"learning_rate": 2.8071357122400666e-07,
"loss": 0.1348,
"step": 2541
},
{
"epoch": 1.786366830639494,
"grad_norm": 0.5292025988518864,
"learning_rate": 2.788931227494995e-07,
"loss": 0.152,
"step": 2542
},
{
"epoch": 1.78706957132818,
"grad_norm": 0.5020878542990461,
"learning_rate": 2.770784270016902e-07,
"loss": 0.1395,
"step": 2543
},
{
"epoch": 1.7877723120168658,
"grad_norm": 0.5160056724991398,
"learning_rate": 2.752694861918087e-07,
"loss": 0.1514,
"step": 2544
},
{
"epoch": 1.7884750527055515,
"grad_norm": 0.5161625361813755,
"learning_rate": 2.7346630252407136e-07,
"loss": 0.1488,
"step": 2545
},
{
"epoch": 1.7891777933942374,
"grad_norm": 0.49588597123940026,
"learning_rate": 2.7166887819568055e-07,
"loss": 0.1436,
"step": 2546
},
{
"epoch": 1.7898805340829234,
"grad_norm": 0.5001326686522389,
"learning_rate": 2.6987721539681655e-07,
"loss": 0.1442,
"step": 2547
},
{
"epoch": 1.7905832747716093,
"grad_norm": 0.5145397053415843,
"learning_rate": 2.6809131631064634e-07,
"loss": 0.1404,
"step": 2548
},
{
"epoch": 1.7912860154602952,
"grad_norm": 0.537795771162458,
"learning_rate": 2.663111831133075e-07,
"loss": 0.1524,
"step": 2549
},
{
"epoch": 1.7919887561489811,
"grad_norm": 0.5069325960299716,
"learning_rate": 2.645368179739155e-07,
"loss": 0.1361,
"step": 2550
},
{
"epoch": 1.792691496837667,
"grad_norm": 0.5012563168513016,
"learning_rate": 2.627682230545547e-07,
"loss": 0.1291,
"step": 2551
},
{
"epoch": 1.7933942375263527,
"grad_norm": 0.5241127028951241,
"learning_rate": 2.6100540051028136e-07,
"loss": 0.1456,
"step": 2552
},
{
"epoch": 1.7940969782150387,
"grad_norm": 0.4902698362142184,
"learning_rate": 2.592483524891154e-07,
"loss": 0.1294,
"step": 2553
},
{
"epoch": 1.7947997189037244,
"grad_norm": 0.5148708371992679,
"learning_rate": 2.5749708113204097e-07,
"loss": 0.1485,
"step": 2554
},
{
"epoch": 1.7955024595924103,
"grad_norm": 0.4645746079975259,
"learning_rate": 2.5575158857300444e-07,
"loss": 0.106,
"step": 2555
},
{
"epoch": 1.7962052002810962,
"grad_norm": 0.5336196943338167,
"learning_rate": 2.540118769389105e-07,
"loss": 0.1438,
"step": 2556
},
{
"epoch": 1.7969079409697821,
"grad_norm": 0.516699415209297,
"learning_rate": 2.522779483496185e-07,
"loss": 0.1513,
"step": 2557
},
{
"epoch": 1.797610681658468,
"grad_norm": 0.526151733582901,
"learning_rate": 2.505498049179411e-07,
"loss": 0.1421,
"step": 2558
},
{
"epoch": 1.798313422347154,
"grad_norm": 0.47850031858462644,
"learning_rate": 2.4882744874964226e-07,
"loss": 0.1294,
"step": 2559
},
{
"epoch": 1.7990161630358399,
"grad_norm": 0.5025156438770971,
"learning_rate": 2.471108819434359e-07,
"loss": 0.1316,
"step": 2560
},
{
"epoch": 1.7997189037245258,
"grad_norm": 0.49547096832318893,
"learning_rate": 2.4540010659097836e-07,
"loss": 0.1346,
"step": 2561
},
{
"epoch": 1.8004216444132115,
"grad_norm": 0.5061555212364162,
"learning_rate": 2.436951247768704e-07,
"loss": 0.1481,
"step": 2562
},
{
"epoch": 1.8011243851018974,
"grad_norm": 0.5142042671043724,
"learning_rate": 2.4199593857865247e-07,
"loss": 0.1409,
"step": 2563
},
{
"epoch": 1.8018271257905831,
"grad_norm": 0.5250821495396907,
"learning_rate": 2.40302550066806e-07,
"loss": 0.1441,
"step": 2564
},
{
"epoch": 1.802529866479269,
"grad_norm": 0.5024031575209251,
"learning_rate": 2.38614961304745e-07,
"loss": 0.1496,
"step": 2565
},
{
"epoch": 1.803232607167955,
"grad_norm": 0.47002878222205746,
"learning_rate": 2.3693317434881623e-07,
"loss": 0.1226,
"step": 2566
},
{
"epoch": 1.803935347856641,
"grad_norm": 0.4860903886167231,
"learning_rate": 2.3525719124829705e-07,
"loss": 0.1265,
"step": 2567
},
{
"epoch": 1.8046380885453268,
"grad_norm": 0.5232051344089709,
"learning_rate": 2.3358701404539552e-07,
"loss": 0.1435,
"step": 2568
},
{
"epoch": 1.8053408292340127,
"grad_norm": 0.5183042693006089,
"learning_rate": 2.3192264477524207e-07,
"loss": 0.1434,
"step": 2569
},
{
"epoch": 1.8060435699226987,
"grad_norm": 0.560087591646533,
"learning_rate": 2.3026408546589162e-07,
"loss": 0.1725,
"step": 2570
},
{
"epoch": 1.8067463106113844,
"grad_norm": 0.5229255819455728,
"learning_rate": 2.2861133813831703e-07,
"loss": 0.1591,
"step": 2571
},
{
"epoch": 1.8074490513000703,
"grad_norm": 0.4987718709096516,
"learning_rate": 2.2696440480641401e-07,
"loss": 0.143,
"step": 2572
},
{
"epoch": 1.8081517919887562,
"grad_norm": 0.5260906121103223,
"learning_rate": 2.2532328747698894e-07,
"loss": 0.1429,
"step": 2573
},
{
"epoch": 1.808854532677442,
"grad_norm": 0.5391767173504673,
"learning_rate": 2.23687988149765e-07,
"loss": 0.1519,
"step": 2574
},
{
"epoch": 1.8095572733661278,
"grad_norm": 0.5285385670188469,
"learning_rate": 2.2205850881737378e-07,
"loss": 0.168,
"step": 2575
},
{
"epoch": 1.8102600140548137,
"grad_norm": 0.49240077630564033,
"learning_rate": 2.2043485146535537e-07,
"loss": 0.1458,
"step": 2576
},
{
"epoch": 1.8109627547434997,
"grad_norm": 0.49215531125082934,
"learning_rate": 2.188170180721566e-07,
"loss": 0.1281,
"step": 2577
},
{
"epoch": 1.8116654954321856,
"grad_norm": 0.4963972594713652,
"learning_rate": 2.172050106091278e-07,
"loss": 0.1375,
"step": 2578
},
{
"epoch": 1.8123682361208715,
"grad_norm": 0.5045098450000102,
"learning_rate": 2.1559883104051938e-07,
"loss": 0.152,
"step": 2579
},
{
"epoch": 1.8130709768095574,
"grad_norm": 0.5077926482353168,
"learning_rate": 2.1399848132348078e-07,
"loss": 0.1502,
"step": 2580
},
{
"epoch": 1.8137737174982431,
"grad_norm": 0.4901609403829654,
"learning_rate": 2.1240396340805825e-07,
"loss": 0.1479,
"step": 2581
},
{
"epoch": 1.814476458186929,
"grad_norm": 0.5040668902616426,
"learning_rate": 2.1081527923719035e-07,
"loss": 0.1433,
"step": 2582
},
{
"epoch": 1.8151791988756147,
"grad_norm": 0.47605679931138023,
"learning_rate": 2.0923243074670918e-07,
"loss": 0.1172,
"step": 2583
},
{
"epoch": 1.8158819395643007,
"grad_norm": 0.5451089294072474,
"learning_rate": 2.0765541986533577e-07,
"loss": 0.1586,
"step": 2584
},
{
"epoch": 1.8165846802529866,
"grad_norm": 0.544266604892749,
"learning_rate": 2.0608424851467578e-07,
"loss": 0.1613,
"step": 2585
},
{
"epoch": 1.8172874209416725,
"grad_norm": 0.5140738338986157,
"learning_rate": 2.0451891860922167e-07,
"loss": 0.1436,
"step": 2586
},
{
"epoch": 1.8179901616303584,
"grad_norm": 0.5439700880344692,
"learning_rate": 2.0295943205634605e-07,
"loss": 0.154,
"step": 2587
},
{
"epoch": 1.8186929023190443,
"grad_norm": 0.5173480144185391,
"learning_rate": 2.0140579075630384e-07,
"loss": 0.1605,
"step": 2588
},
{
"epoch": 1.8193956430077303,
"grad_norm": 0.5208480717274079,
"learning_rate": 1.9985799660222626e-07,
"loss": 0.1451,
"step": 2589
},
{
"epoch": 1.8200983836964162,
"grad_norm": 0.49845186932887076,
"learning_rate": 1.9831605148011745e-07,
"loss": 0.1429,
"step": 2590
},
{
"epoch": 1.8208011243851019,
"grad_norm": 0.48163278303529733,
"learning_rate": 1.9677995726885778e-07,
"loss": 0.1255,
"step": 2591
},
{
"epoch": 1.8215038650737878,
"grad_norm": 0.47652368952818397,
"learning_rate": 1.9524971584019726e-07,
"loss": 0.1153,
"step": 2592
},
{
"epoch": 1.8222066057624735,
"grad_norm": 0.5081801481197005,
"learning_rate": 1.937253290587532e-07,
"loss": 0.1377,
"step": 2593
},
{
"epoch": 1.8229093464511594,
"grad_norm": 0.5129758655170876,
"learning_rate": 1.9220679878201086e-07,
"loss": 0.1552,
"step": 2594
},
{
"epoch": 1.8236120871398454,
"grad_norm": 0.5048691989074335,
"learning_rate": 1.9069412686031575e-07,
"loss": 0.1485,
"step": 2595
},
{
"epoch": 1.8243148278285313,
"grad_norm": 0.5404970301699522,
"learning_rate": 1.8918731513687893e-07,
"loss": 0.1684,
"step": 2596
},
{
"epoch": 1.8250175685172172,
"grad_norm": 0.4967219145554596,
"learning_rate": 1.876863654477684e-07,
"loss": 0.1247,
"step": 2597
},
{
"epoch": 1.8257203092059031,
"grad_norm": 0.5077317448642489,
"learning_rate": 1.8619127962190952e-07,
"loss": 0.1355,
"step": 2598
},
{
"epoch": 1.826423049894589,
"grad_norm": 0.4971023482577422,
"learning_rate": 1.847020594810839e-07,
"loss": 0.129,
"step": 2599
},
{
"epoch": 1.8271257905832747,
"grad_norm": 0.48601797488301623,
"learning_rate": 1.8321870683992326e-07,
"loss": 0.1322,
"step": 2600
},
{
"epoch": 1.8278285312719607,
"grad_norm": 0.48392971082553665,
"learning_rate": 1.817412235059113e-07,
"loss": 0.1301,
"step": 2601
},
{
"epoch": 1.8285312719606466,
"grad_norm": 0.5102334446297592,
"learning_rate": 1.8026961127938059e-07,
"loss": 0.1506,
"step": 2602
},
{
"epoch": 1.8292340126493323,
"grad_norm": 0.4914040388893914,
"learning_rate": 1.7880387195350734e-07,
"loss": 0.1175,
"step": 2603
},
{
"epoch": 1.8299367533380182,
"grad_norm": 0.5121422819226531,
"learning_rate": 1.7734400731431344e-07,
"loss": 0.1421,
"step": 2604
},
{
"epoch": 1.8306394940267041,
"grad_norm": 0.5037999031914915,
"learning_rate": 1.7589001914066206e-07,
"loss": 0.1408,
"step": 2605
},
{
"epoch": 1.83134223471539,
"grad_norm": 0.5067033731990441,
"learning_rate": 1.744419092042554e-07,
"loss": 0.1335,
"step": 2606
},
{
"epoch": 1.832044975404076,
"grad_norm": 0.49393593459338514,
"learning_rate": 1.7299967926963367e-07,
"loss": 0.1247,
"step": 2607
},
{
"epoch": 1.8327477160927619,
"grad_norm": 0.5242193894382914,
"learning_rate": 1.7156333109417055e-07,
"loss": 0.1569,
"step": 2608
},
{
"epoch": 1.8334504567814478,
"grad_norm": 0.5202636164472492,
"learning_rate": 1.7013286642807602e-07,
"loss": 0.1342,
"step": 2609
},
{
"epoch": 1.8341531974701335,
"grad_norm": 0.5614919056652893,
"learning_rate": 1.687082870143869e-07,
"loss": 0.1757,
"step": 2610
},
{
"epoch": 1.8348559381588194,
"grad_norm": 0.5281146658221509,
"learning_rate": 1.672895945889713e-07,
"loss": 0.1565,
"step": 2611
},
{
"epoch": 1.8355586788475051,
"grad_norm": 0.48187838560746477,
"learning_rate": 1.6587679088052365e-07,
"loss": 0.1218,
"step": 2612
},
{
"epoch": 1.836261419536191,
"grad_norm": 0.5657258268237095,
"learning_rate": 1.6446987761056244e-07,
"loss": 0.1796,
"step": 2613
},
{
"epoch": 1.836964160224877,
"grad_norm": 0.5329160569784503,
"learning_rate": 1.6306885649342906e-07,
"loss": 0.1382,
"step": 2614
},
{
"epoch": 1.8376669009135629,
"grad_norm": 0.5041024772538047,
"learning_rate": 1.6167372923628354e-07,
"loss": 0.1406,
"step": 2615
},
{
"epoch": 1.8383696416022488,
"grad_norm": 0.507700276509694,
"learning_rate": 1.6028449753910768e-07,
"loss": 0.1297,
"step": 2616
},
{
"epoch": 1.8390723822909347,
"grad_norm": 0.5265584444159414,
"learning_rate": 1.5890116309469573e-07,
"loss": 0.1612,
"step": 2617
},
{
"epoch": 1.8397751229796206,
"grad_norm": 0.5209812882103455,
"learning_rate": 1.575237275886593e-07,
"loss": 0.1117,
"step": 2618
},
{
"epoch": 1.8404778636683063,
"grad_norm": 0.542521675976148,
"learning_rate": 1.5615219269941807e-07,
"loss": 0.1592,
"step": 2619
},
{
"epoch": 1.8411806043569923,
"grad_norm": 0.529711101080399,
"learning_rate": 1.5478656009820626e-07,
"loss": 0.1525,
"step": 2620
},
{
"epoch": 1.8418833450456782,
"grad_norm": 0.534179504469705,
"learning_rate": 1.5342683144906334e-07,
"loss": 0.15,
"step": 2621
},
{
"epoch": 1.8425860857343639,
"grad_norm": 0.5400629684296321,
"learning_rate": 1.520730084088351e-07,
"loss": 0.1565,
"step": 2622
},
{
"epoch": 1.8432888264230498,
"grad_norm": 0.5170789645604357,
"learning_rate": 1.5072509262717195e-07,
"loss": 0.1475,
"step": 2623
},
{
"epoch": 1.8439915671117357,
"grad_norm": 0.5063403071076594,
"learning_rate": 1.4938308574652505e-07,
"loss": 0.138,
"step": 2624
},
{
"epoch": 1.8446943078004217,
"grad_norm": 0.5152059684103599,
"learning_rate": 1.4804698940214746e-07,
"loss": 0.1464,
"step": 2625
},
{
"epoch": 1.8453970484891076,
"grad_norm": 0.504957720786916,
"learning_rate": 1.4671680522208797e-07,
"loss": 0.1463,
"step": 2626
},
{
"epoch": 1.8460997891777935,
"grad_norm": 0.4699643971631126,
"learning_rate": 1.4539253482719286e-07,
"loss": 0.1177,
"step": 2627
},
{
"epoch": 1.8468025298664794,
"grad_norm": 0.4777017263154567,
"learning_rate": 1.4407417983110127e-07,
"loss": 0.1172,
"step": 2628
},
{
"epoch": 1.8475052705551651,
"grad_norm": 0.5087995897430411,
"learning_rate": 1.427617418402455e-07,
"loss": 0.1403,
"step": 2629
},
{
"epoch": 1.848208011243851,
"grad_norm": 0.5201978878578722,
"learning_rate": 1.4145522245384735e-07,
"loss": 0.1587,
"step": 2630
},
{
"epoch": 1.8489107519325367,
"grad_norm": 0.5382052069103397,
"learning_rate": 1.401546232639167e-07,
"loss": 0.1472,
"step": 2631
},
{
"epoch": 1.8496134926212227,
"grad_norm": 0.5168716574125763,
"learning_rate": 1.388599458552492e-07,
"loss": 0.1516,
"step": 2632
},
{
"epoch": 1.8503162333099086,
"grad_norm": 0.554664247706861,
"learning_rate": 1.3757119180542623e-07,
"loss": 0.1717,
"step": 2633
},
{
"epoch": 1.8510189739985945,
"grad_norm": 0.4939411994253164,
"learning_rate": 1.3628836268480883e-07,
"loss": 0.1281,
"step": 2634
},
{
"epoch": 1.8517217146872804,
"grad_norm": 0.5335256253163561,
"learning_rate": 1.3501146005654164e-07,
"loss": 0.1389,
"step": 2635
},
{
"epoch": 1.8524244553759663,
"grad_norm": 0.4981498675638026,
"learning_rate": 1.337404854765445e-07,
"loss": 0.1267,
"step": 2636
},
{
"epoch": 1.8531271960646523,
"grad_norm": 0.5221789691308846,
"learning_rate": 1.3247544049351745e-07,
"loss": 0.1451,
"step": 2637
},
{
"epoch": 1.8538299367533382,
"grad_norm": 0.4876082081623457,
"learning_rate": 1.3121632664893192e-07,
"loss": 0.1357,
"step": 2638
},
{
"epoch": 1.8545326774420239,
"grad_norm": 0.5284294376664426,
"learning_rate": 1.2996314547703393e-07,
"loss": 0.1694,
"step": 2639
},
{
"epoch": 1.8552354181307098,
"grad_norm": 0.49857435677523226,
"learning_rate": 1.2871589850484034e-07,
"loss": 0.1431,
"step": 2640
},
{
"epoch": 1.8559381588193955,
"grad_norm": 0.5183214493498189,
"learning_rate": 1.2747458725213712e-07,
"loss": 0.147,
"step": 2641
},
{
"epoch": 1.8566408995080814,
"grad_norm": 0.5052453730973389,
"learning_rate": 1.2623921323147714e-07,
"loss": 0.1448,
"step": 2642
},
{
"epoch": 1.8573436401967673,
"grad_norm": 0.5079225417127723,
"learning_rate": 1.2500977794817794e-07,
"loss": 0.1307,
"step": 2643
},
{
"epoch": 1.8580463808854533,
"grad_norm": 0.546064986386369,
"learning_rate": 1.237862829003228e-07,
"loss": 0.1571,
"step": 2644
},
{
"epoch": 1.8587491215741392,
"grad_norm": 0.5143898150930651,
"learning_rate": 1.225687295787542e-07,
"loss": 0.1456,
"step": 2645
},
{
"epoch": 1.859451862262825,
"grad_norm": 0.5265743190917148,
"learning_rate": 1.2135711946707708e-07,
"loss": 0.1753,
"step": 2646
},
{
"epoch": 1.860154602951511,
"grad_norm": 0.5024792783520283,
"learning_rate": 1.2015145404165261e-07,
"loss": 0.1334,
"step": 2647
},
{
"epoch": 1.8608573436401967,
"grad_norm": 0.5197838757541134,
"learning_rate": 1.1895173477159849e-07,
"loss": 0.1401,
"step": 2648
},
{
"epoch": 1.8615600843288826,
"grad_norm": 0.4941943554491134,
"learning_rate": 1.1775796311878807e-07,
"loss": 0.1446,
"step": 2649
},
{
"epoch": 1.8622628250175686,
"grad_norm": 0.510797781459705,
"learning_rate": 1.1657014053784666e-07,
"loss": 0.1504,
"step": 2650
},
{
"epoch": 1.8629655657062543,
"grad_norm": 0.4864424119263491,
"learning_rate": 1.1538826847615037e-07,
"loss": 0.1341,
"step": 2651
},
{
"epoch": 1.8636683063949402,
"grad_norm": 0.5087411860311789,
"learning_rate": 1.14212348373825e-07,
"loss": 0.1463,
"step": 2652
},
{
"epoch": 1.864371047083626,
"grad_norm": 0.49707309590982207,
"learning_rate": 1.1304238166374381e-07,
"loss": 0.1489,
"step": 2653
},
{
"epoch": 1.865073787772312,
"grad_norm": 0.4931342074513031,
"learning_rate": 1.1187836977152533e-07,
"loss": 0.1173,
"step": 2654
},
{
"epoch": 1.865776528460998,
"grad_norm": 0.5137748127105113,
"learning_rate": 1.1072031411553219e-07,
"loss": 0.1327,
"step": 2655
},
{
"epoch": 1.8664792691496839,
"grad_norm": 0.5207656669400329,
"learning_rate": 1.0956821610686952e-07,
"loss": 0.1534,
"step": 2656
},
{
"epoch": 1.8671820098383698,
"grad_norm": 0.47726471803816006,
"learning_rate": 1.084220771493838e-07,
"loss": 0.1172,
"step": 2657
},
{
"epoch": 1.8678847505270555,
"grad_norm": 0.5049845652854204,
"learning_rate": 1.0728189863965788e-07,
"loss": 0.1241,
"step": 2658
},
{
"epoch": 1.8685874912157414,
"grad_norm": 0.5015664314853837,
"learning_rate": 1.061476819670143e-07,
"loss": 0.1437,
"step": 2659
},
{
"epoch": 1.8692902319044271,
"grad_norm": 0.4898745907546558,
"learning_rate": 1.0501942851350921e-07,
"loss": 0.1245,
"step": 2660
},
{
"epoch": 1.869992972593113,
"grad_norm": 0.5284895632749437,
"learning_rate": 1.0389713965393455e-07,
"loss": 0.1393,
"step": 2661
},
{
"epoch": 1.870695713281799,
"grad_norm": 0.47810971939622376,
"learning_rate": 1.0278081675581253e-07,
"loss": 0.131,
"step": 2662
},
{
"epoch": 1.8713984539704849,
"grad_norm": 0.5070597308989767,
"learning_rate": 1.0167046117939561e-07,
"loss": 0.1377,
"step": 2663
},
{
"epoch": 1.8721011946591708,
"grad_norm": 0.49224780941636986,
"learning_rate": 1.005660742776654e-07,
"loss": 0.139,
"step": 2664
},
{
"epoch": 1.8728039353478567,
"grad_norm": 0.5440013848755799,
"learning_rate": 9.946765739633269e-08,
"loss": 0.1734,
"step": 2665
},
{
"epoch": 1.8735066760365426,
"grad_norm": 0.5112080258416924,
"learning_rate": 9.837521187383126e-08,
"loss": 0.1463,
"step": 2666
},
{
"epoch": 1.8742094167252283,
"grad_norm": 0.5218668643540916,
"learning_rate": 9.728873904131853e-08,
"loss": 0.1769,
"step": 2667
},
{
"epoch": 1.8749121574139143,
"grad_norm": 0.51026817542633,
"learning_rate": 9.620824022267549e-08,
"loss": 0.1404,
"step": 2668
},
{
"epoch": 1.8756148981026002,
"grad_norm": 0.482823703843089,
"learning_rate": 9.513371673450344e-08,
"loss": 0.1373,
"step": 2669
},
{
"epoch": 1.8763176387912859,
"grad_norm": 0.5010018048694237,
"learning_rate": 9.40651698861228e-08,
"loss": 0.1329,
"step": 2670
},
{
"epoch": 1.8770203794799718,
"grad_norm": 0.5247195128185501,
"learning_rate": 9.300260097956981e-08,
"loss": 0.1542,
"step": 2671
},
{
"epoch": 1.8777231201686577,
"grad_norm": 0.48310092650513153,
"learning_rate": 9.19460113095988e-08,
"loss": 0.1173,
"step": 2672
},
{
"epoch": 1.8784258608573436,
"grad_norm": 0.5262005900960619,
"learning_rate": 9.089540216367654e-08,
"loss": 0.1531,
"step": 2673
},
{
"epoch": 1.8791286015460296,
"grad_norm": 0.5087073281936366,
"learning_rate": 8.985077482198346e-08,
"loss": 0.1386,
"step": 2674
},
{
"epoch": 1.8798313422347155,
"grad_norm": 0.46451880297201525,
"learning_rate": 8.881213055741134e-08,
"loss": 0.11,
"step": 2675
},
{
"epoch": 1.8805340829234014,
"grad_norm": 0.4901323654825113,
"learning_rate": 8.777947063556002e-08,
"loss": 0.1344,
"step": 2676
},
{
"epoch": 1.881236823612087,
"grad_norm": 0.5048362106132257,
"learning_rate": 8.67527963147391e-08,
"loss": 0.1451,
"step": 2677
},
{
"epoch": 1.881939564300773,
"grad_norm": 0.4788357243727687,
"learning_rate": 8.57321088459634e-08,
"loss": 0.1334,
"step": 2678
},
{
"epoch": 1.8826423049894587,
"grad_norm": 0.5065406066592795,
"learning_rate": 8.471740947295304e-08,
"loss": 0.1459,
"step": 2679
},
{
"epoch": 1.8833450456781446,
"grad_norm": 0.47638466417584086,
"learning_rate": 8.370869943213178e-08,
"loss": 0.1276,
"step": 2680
},
{
"epoch": 1.8840477863668306,
"grad_norm": 0.513019187609436,
"learning_rate": 8.270597995262586e-08,
"loss": 0.1439,
"step": 2681
},
{
"epoch": 1.8847505270555165,
"grad_norm": 0.4952226280223815,
"learning_rate": 8.17092522562607e-08,
"loss": 0.1307,
"step": 2682
},
{
"epoch": 1.8854532677442024,
"grad_norm": 0.5075275049563399,
"learning_rate": 8.071851755756088e-08,
"loss": 0.1424,
"step": 2683
},
{
"epoch": 1.8861560084328883,
"grad_norm": 0.4985028012920937,
"learning_rate": 7.973377706374852e-08,
"loss": 0.131,
"step": 2684
},
{
"epoch": 1.8868587491215743,
"grad_norm": 0.4929395898649719,
"learning_rate": 7.875503197474377e-08,
"loss": 0.1395,
"step": 2685
},
{
"epoch": 1.8875614898102602,
"grad_norm": 0.5162454789554229,
"learning_rate": 7.778228348315763e-08,
"loss": 0.158,
"step": 2686
},
{
"epoch": 1.8882642304989459,
"grad_norm": 0.5192574950152272,
"learning_rate": 7.681553277429698e-08,
"loss": 0.1525,
"step": 2687
},
{
"epoch": 1.8889669711876318,
"grad_norm": 0.521601549534333,
"learning_rate": 7.585478102615951e-08,
"loss": 0.132,
"step": 2688
},
{
"epoch": 1.8896697118763175,
"grad_norm": 0.4843516768403466,
"learning_rate": 7.490002940943263e-08,
"loss": 0.1334,
"step": 2689
},
{
"epoch": 1.8903724525650034,
"grad_norm": 0.5043173752107828,
"learning_rate": 7.395127908749356e-08,
"loss": 0.1541,
"step": 2690
},
{
"epoch": 1.8910751932536893,
"grad_norm": 0.5144753873581086,
"learning_rate": 7.300853121640528e-08,
"loss": 0.1418,
"step": 2691
},
{
"epoch": 1.8917779339423753,
"grad_norm": 0.5127526031674718,
"learning_rate": 7.207178694491778e-08,
"loss": 0.1501,
"step": 2692
},
{
"epoch": 1.8924806746310612,
"grad_norm": 0.5007235999573614,
"learning_rate": 7.114104741446581e-08,
"loss": 0.1449,
"step": 2693
},
{
"epoch": 1.893183415319747,
"grad_norm": 0.5252681135070831,
"learning_rate": 7.021631375916716e-08,
"loss": 0.1591,
"step": 2694
},
{
"epoch": 1.893886156008433,
"grad_norm": 0.5063517537683107,
"learning_rate": 6.929758710582102e-08,
"loss": 0.1424,
"step": 2695
},
{
"epoch": 1.8945888966971187,
"grad_norm": 0.47396271781618415,
"learning_rate": 6.838486857390692e-08,
"loss": 0.1126,
"step": 2696
},
{
"epoch": 1.8952916373858046,
"grad_norm": 0.5032356998875548,
"learning_rate": 6.747815927558354e-08,
"loss": 0.139,
"step": 2697
},
{
"epoch": 1.8959943780744906,
"grad_norm": 0.5125103242000826,
"learning_rate": 6.657746031568769e-08,
"loss": 0.1509,
"step": 2698
},
{
"epoch": 1.8966971187631763,
"grad_norm": 0.5201568570124898,
"learning_rate": 6.568277279173141e-08,
"loss": 0.1561,
"step": 2699
},
{
"epoch": 1.8973998594518622,
"grad_norm": 0.5186568338619438,
"learning_rate": 6.479409779390267e-08,
"loss": 0.1526,
"step": 2700
},
{
"epoch": 1.898102600140548,
"grad_norm": 0.5283258080713146,
"learning_rate": 6.391143640506359e-08,
"loss": 0.1653,
"step": 2701
},
{
"epoch": 1.898805340829234,
"grad_norm": 0.49052332848233793,
"learning_rate": 6.303478970074716e-08,
"loss": 0.132,
"step": 2702
},
{
"epoch": 1.89950808151792,
"grad_norm": 0.5239835152186789,
"learning_rate": 6.216415874915837e-08,
"loss": 0.1446,
"step": 2703
},
{
"epoch": 1.9002108222066059,
"grad_norm": 0.5149848382309479,
"learning_rate": 6.129954461117083e-08,
"loss": 0.156,
"step": 2704
},
{
"epoch": 1.9009135628952918,
"grad_norm": 0.5318498025754016,
"learning_rate": 6.044094834032954e-08,
"loss": 0.1495,
"step": 2705
},
{
"epoch": 1.9016163035839775,
"grad_norm": 0.4883391326061208,
"learning_rate": 5.95883709828432e-08,
"loss": 0.1245,
"step": 2706
},
{
"epoch": 1.9023190442726634,
"grad_norm": 0.5076806126800628,
"learning_rate": 5.874181357758746e-08,
"loss": 0.1306,
"step": 2707
},
{
"epoch": 1.903021784961349,
"grad_norm": 0.5259427028692687,
"learning_rate": 5.790127715610328e-08,
"loss": 0.1535,
"step": 2708
},
{
"epoch": 1.903724525650035,
"grad_norm": 0.49017069354307125,
"learning_rate": 5.706676274259582e-08,
"loss": 0.1354,
"step": 2709
},
{
"epoch": 1.904427266338721,
"grad_norm": 0.5392957251670593,
"learning_rate": 5.6238271353929455e-08,
"loss": 0.1612,
"step": 2710
},
{
"epoch": 1.9051300070274069,
"grad_norm": 0.47921362798313144,
"learning_rate": 5.541580399963165e-08,
"loss": 0.1253,
"step": 2711
},
{
"epoch": 1.9058327477160928,
"grad_norm": 0.5000567809080751,
"learning_rate": 5.459936168188906e-08,
"loss": 0.1543,
"step": 2712
},
{
"epoch": 1.9065354884047787,
"grad_norm": 0.532479705121779,
"learning_rate": 5.3788945395546465e-08,
"loss": 0.1667,
"step": 2713
},
{
"epoch": 1.9072382290934646,
"grad_norm": 0.5231968810105011,
"learning_rate": 5.2984556128107266e-08,
"loss": 0.1476,
"step": 2714
},
{
"epoch": 1.9079409697821503,
"grad_norm": 0.5106285992840719,
"learning_rate": 5.2186194859727977e-08,
"loss": 0.1321,
"step": 2715
},
{
"epoch": 1.9086437104708363,
"grad_norm": 0.7664757738965309,
"learning_rate": 5.13938625632221e-08,
"loss": 0.1743,
"step": 2716
},
{
"epoch": 1.9093464511595222,
"grad_norm": 0.5381852691192065,
"learning_rate": 5.060756020405677e-08,
"loss": 0.1731,
"step": 2717
},
{
"epoch": 1.9100491918482079,
"grad_norm": 0.5148348910887726,
"learning_rate": 4.982728874035059e-08,
"loss": 0.1466,
"step": 2718
},
{
"epoch": 1.9107519325368938,
"grad_norm": 0.5401335083232811,
"learning_rate": 4.905304912287468e-08,
"loss": 0.1454,
"step": 2719
},
{
"epoch": 1.9114546732255797,
"grad_norm": 0.49382596968863424,
"learning_rate": 4.8284842295048265e-08,
"loss": 0.1404,
"step": 2720
},
{
"epoch": 1.9121574139142656,
"grad_norm": 0.5015414659105242,
"learning_rate": 4.7522669192942014e-08,
"loss": 0.1506,
"step": 2721
},
{
"epoch": 1.9128601546029516,
"grad_norm": 0.5233071572038667,
"learning_rate": 4.676653074527249e-08,
"loss": 0.1458,
"step": 2722
},
{
"epoch": 1.9135628952916375,
"grad_norm": 0.5270575024731265,
"learning_rate": 4.601642787340377e-08,
"loss": 0.1457,
"step": 2723
},
{
"epoch": 1.9142656359803234,
"grad_norm": 0.5243966284195736,
"learning_rate": 4.5272361491345286e-08,
"loss": 0.1356,
"step": 2724
},
{
"epoch": 1.914968376669009,
"grad_norm": 0.5239495021750533,
"learning_rate": 4.4534332505751786e-08,
"loss": 0.158,
"step": 2725
},
{
"epoch": 1.915671117357695,
"grad_norm": 0.5001475973542321,
"learning_rate": 4.380234181592002e-08,
"loss": 0.1428,
"step": 2726
},
{
"epoch": 1.916373858046381,
"grad_norm": 0.4866615950225424,
"learning_rate": 4.30763903137893e-08,
"loss": 0.1424,
"step": 2727
},
{
"epoch": 1.9170765987350666,
"grad_norm": 0.5021516588646925,
"learning_rate": 4.23564788839409e-08,
"loss": 0.1349,
"step": 2728
},
{
"epoch": 1.9177793394237526,
"grad_norm": 0.5107961331826483,
"learning_rate": 4.164260840359646e-08,
"loss": 0.1537,
"step": 2729
},
{
"epoch": 1.9184820801124385,
"grad_norm": 0.5307070440853738,
"learning_rate": 4.0934779742615174e-08,
"loss": 0.1436,
"step": 2730
},
{
"epoch": 1.9191848208011244,
"grad_norm": 0.5330489784728988,
"learning_rate": 4.0232993763494324e-08,
"loss": 0.1616,
"step": 2731
},
{
"epoch": 1.9198875614898103,
"grad_norm": 0.5234903895461293,
"learning_rate": 3.953725132136932e-08,
"loss": 0.1517,
"step": 2732
},
{
"epoch": 1.9205903021784962,
"grad_norm": 0.4752098567300896,
"learning_rate": 3.884755326401146e-08,
"loss": 0.1338,
"step": 2733
},
{
"epoch": 1.9212930428671822,
"grad_norm": 0.5131113828284778,
"learning_rate": 3.816390043182572e-08,
"loss": 0.1336,
"step": 2734
},
{
"epoch": 1.9219957835558679,
"grad_norm": 0.5123087049753486,
"learning_rate": 3.748629365785184e-08,
"loss": 0.1392,
"step": 2735
},
{
"epoch": 1.9226985242445538,
"grad_norm": 0.502380548388396,
"learning_rate": 3.681473376776101e-08,
"loss": 0.1278,
"step": 2736
},
{
"epoch": 1.9234012649332395,
"grad_norm": 0.48935748420238434,
"learning_rate": 3.614922157985812e-08,
"loss": 0.1331,
"step": 2737
},
{
"epoch": 1.9241040056219254,
"grad_norm": 0.5259100691200583,
"learning_rate": 3.548975790507836e-08,
"loss": 0.16,
"step": 2738
},
{
"epoch": 1.9248067463106113,
"grad_norm": 0.4780326595314659,
"learning_rate": 3.483634354698506e-08,
"loss": 0.124,
"step": 2739
},
{
"epoch": 1.9255094869992972,
"grad_norm": 0.5471598983467048,
"learning_rate": 3.41889793017719e-08,
"loss": 0.1511,
"step": 2740
},
{
"epoch": 1.9262122276879832,
"grad_norm": 0.5365268330722736,
"learning_rate": 3.354766595826064e-08,
"loss": 0.1566,
"step": 2741
},
{
"epoch": 1.926914968376669,
"grad_norm": 0.5079159571319655,
"learning_rate": 3.291240429789955e-08,
"loss": 0.1472,
"step": 2742
},
{
"epoch": 1.927617709065355,
"grad_norm": 0.5102390213530034,
"learning_rate": 3.22831950947633e-08,
"loss": 0.1473,
"step": 2743
},
{
"epoch": 1.9283204497540407,
"grad_norm": 0.5073968459457318,
"learning_rate": 3.166003911554916e-08,
"loss": 0.128,
"step": 2744
},
{
"epoch": 1.9290231904427266,
"grad_norm": 0.4913075941214151,
"learning_rate": 3.104293711958195e-08,
"loss": 0.1205,
"step": 2745
},
{
"epoch": 1.9297259311314126,
"grad_norm": 0.5047863034079181,
"learning_rate": 3.0431889858807405e-08,
"loss": 0.1414,
"step": 2746
},
{
"epoch": 1.9304286718200983,
"grad_norm": 0.5130306164211104,
"learning_rate": 2.982689807779382e-08,
"loss": 0.1452,
"step": 2747
},
{
"epoch": 1.9311314125087842,
"grad_norm": 0.5124308413800075,
"learning_rate": 2.9227962513732057e-08,
"loss": 0.1388,
"step": 2748
},
{
"epoch": 1.93183415319747,
"grad_norm": 0.5269103376555939,
"learning_rate": 2.863508389643166e-08,
"loss": 0.1647,
"step": 2749
},
{
"epoch": 1.932536893886156,
"grad_norm": 0.5064525296116995,
"learning_rate": 2.804826294832308e-08,
"loss": 0.135,
"step": 2750
},
{
"epoch": 1.933239634574842,
"grad_norm": 0.46886335922702166,
"learning_rate": 2.7467500384454336e-08,
"loss": 0.1224,
"step": 2751
},
{
"epoch": 1.9339423752635279,
"grad_norm": 0.5083094318225186,
"learning_rate": 2.6892796912492136e-08,
"loss": 0.1328,
"step": 2752
},
{
"epoch": 1.9346451159522138,
"grad_norm": 0.5386106239516805,
"learning_rate": 2.632415323271964e-08,
"loss": 0.1554,
"step": 2753
},
{
"epoch": 1.9353478566408995,
"grad_norm": 0.5061277285818073,
"learning_rate": 2.5761570038035367e-08,
"loss": 0.1428,
"step": 2754
},
{
"epoch": 1.9360505973295854,
"grad_norm": 0.4983884500730727,
"learning_rate": 2.5205048013955402e-08,
"loss": 0.1259,
"step": 2755
},
{
"epoch": 1.936753338018271,
"grad_norm": 0.5020273022078597,
"learning_rate": 2.4654587838606748e-08,
"loss": 0.1459,
"step": 2756
},
{
"epoch": 1.937456078706957,
"grad_norm": 0.47886167897388193,
"learning_rate": 2.411019018273342e-08,
"loss": 0.1225,
"step": 2757
},
{
"epoch": 1.938158819395643,
"grad_norm": 0.4988142905864442,
"learning_rate": 2.3571855709690894e-08,
"loss": 0.1445,
"step": 2758
},
{
"epoch": 1.9388615600843289,
"grad_norm": 0.49716727924247517,
"learning_rate": 2.303958507544446e-08,
"loss": 0.1256,
"step": 2759
},
{
"epoch": 1.9395643007730148,
"grad_norm": 0.5304810136406167,
"learning_rate": 2.251337892857419e-08,
"loss": 0.1514,
"step": 2760
},
{
"epoch": 1.9402670414617007,
"grad_norm": 0.5290525965050465,
"learning_rate": 2.1993237910267752e-08,
"loss": 0.1544,
"step": 2761
},
{
"epoch": 1.9409697821503866,
"grad_norm": 0.4885011615603234,
"learning_rate": 2.147916265432426e-08,
"loss": 0.1378,
"step": 2762
},
{
"epoch": 1.9416725228390725,
"grad_norm": 0.4915011344083778,
"learning_rate": 2.0971153787149867e-08,
"loss": 0.1302,
"step": 2763
},
{
"epoch": 1.9423752635277582,
"grad_norm": 0.543096423378022,
"learning_rate": 2.0469211927759413e-08,
"loss": 0.1549,
"step": 2764
},
{
"epoch": 1.9430780042164442,
"grad_norm": 0.49262107002073285,
"learning_rate": 1.9973337687776428e-08,
"loss": 0.1425,
"step": 2765
},
{
"epoch": 1.9437807449051299,
"grad_norm": 0.485666471340219,
"learning_rate": 1.948353167142869e-08,
"loss": 0.1266,
"step": 2766
},
{
"epoch": 1.9444834855938158,
"grad_norm": 0.49561951662222453,
"learning_rate": 1.899979447555156e-08,
"loss": 0.1304,
"step": 2767
},
{
"epoch": 1.9451862262825017,
"grad_norm": 0.49184544313668943,
"learning_rate": 1.852212668958353e-08,
"loss": 0.1362,
"step": 2768
},
{
"epoch": 1.9458889669711876,
"grad_norm": 0.5484980579758657,
"learning_rate": 1.805052889557013e-08,
"loss": 0.1696,
"step": 2769
},
{
"epoch": 1.9465917076598735,
"grad_norm": 0.5128507362320103,
"learning_rate": 1.7585001668158907e-08,
"loss": 0.1501,
"step": 2770
},
{
"epoch": 1.9472944483485595,
"grad_norm": 0.4833317483057414,
"learning_rate": 1.7125545574599445e-08,
"loss": 0.1228,
"step": 2771
},
{
"epoch": 1.9479971890372454,
"grad_norm": 0.5356504888410408,
"learning_rate": 1.667216117474557e-08,
"loss": 0.1339,
"step": 2772
},
{
"epoch": 1.948699929725931,
"grad_norm": 0.5418834936652501,
"learning_rate": 1.622484902105148e-08,
"loss": 0.1713,
"step": 2773
},
{
"epoch": 1.949402670414617,
"grad_norm": 0.49467990996465944,
"learning_rate": 1.5783609658572284e-08,
"loss": 0.1382,
"step": 2774
},
{
"epoch": 1.950105411103303,
"grad_norm": 0.5228184568169573,
"learning_rate": 1.534844362496346e-08,
"loss": 0.1591,
"step": 2775
},
{
"epoch": 1.9508081517919886,
"grad_norm": 0.48791356942766656,
"learning_rate": 1.4919351450480847e-08,
"loss": 0.1307,
"step": 2776
},
{
"epoch": 1.9515108924806746,
"grad_norm": 0.5069352430808004,
"learning_rate": 1.4496333657978423e-08,
"loss": 0.1427,
"step": 2777
},
{
"epoch": 1.9522136331693605,
"grad_norm": 0.4877245461993811,
"learning_rate": 1.4079390762907763e-08,
"loss": 0.1285,
"step": 2778
},
{
"epoch": 1.9529163738580464,
"grad_norm": 0.5205213256224333,
"learning_rate": 1.366852327331969e-08,
"loss": 0.1351,
"step": 2779
},
{
"epoch": 1.9536191145467323,
"grad_norm": 0.5167715049465063,
"learning_rate": 1.3263731689860949e-08,
"loss": 0.1473,
"step": 2780
},
{
"epoch": 1.9543218552354182,
"grad_norm": 0.5089333361983708,
"learning_rate": 1.2865016505774763e-08,
"loss": 0.1405,
"step": 2781
},
{
"epoch": 1.9550245959241042,
"grad_norm": 0.47431207475045234,
"learning_rate": 1.2472378206901392e-08,
"loss": 0.1249,
"step": 2782
},
{
"epoch": 1.9557273366127899,
"grad_norm": 0.5305144875113679,
"learning_rate": 1.2085817271674794e-08,
"loss": 0.1646,
"step": 2783
},
{
"epoch": 1.9564300773014758,
"grad_norm": 0.4808002717712444,
"learning_rate": 1.1705334171123739e-08,
"loss": 0.1195,
"step": 2784
},
{
"epoch": 1.9571328179901615,
"grad_norm": 0.5008382500155728,
"learning_rate": 1.1330929368872368e-08,
"loss": 0.1387,
"step": 2785
},
{
"epoch": 1.9578355586788474,
"grad_norm": 0.5085425504372016,
"learning_rate": 1.0962603321137965e-08,
"loss": 0.1363,
"step": 2786
},
{
"epoch": 1.9585382993675333,
"grad_norm": 0.509817666180665,
"learning_rate": 1.0600356476728746e-08,
"loss": 0.1648,
"step": 2787
},
{
"epoch": 1.9592410400562192,
"grad_norm": 0.47543924070843757,
"learning_rate": 1.0244189277048289e-08,
"loss": 0.1304,
"step": 2788
},
{
"epoch": 1.9599437807449052,
"grad_norm": 0.48357072384876065,
"learning_rate": 9.894102156089991e-09,
"loss": 0.1334,
"step": 2789
},
{
"epoch": 1.960646521433591,
"grad_norm": 0.5344793420221453,
"learning_rate": 9.550095540439841e-09,
"loss": 0.1609,
"step": 2790
},
{
"epoch": 1.961349262122277,
"grad_norm": 0.498941387604127,
"learning_rate": 9.212169849273645e-09,
"loss": 0.1318,
"step": 2791
},
{
"epoch": 1.9620520028109627,
"grad_norm": 0.5234164816723301,
"learning_rate": 8.880325494358132e-09,
"loss": 0.1609,
"step": 2792
},
{
"epoch": 1.9627547434996486,
"grad_norm": 0.48420091499064977,
"learning_rate": 8.554562880049855e-09,
"loss": 0.1348,
"step": 2793
},
{
"epoch": 1.9634574841883345,
"grad_norm": 0.494649993734143,
"learning_rate": 8.23488240329462e-09,
"loss": 0.1428,
"step": 2794
},
{
"epoch": 1.9641602248770202,
"grad_norm": 0.48798840617888206,
"learning_rate": 7.921284453626943e-09,
"loss": 0.1368,
"step": 2795
},
{
"epoch": 1.9648629655657062,
"grad_norm": 0.5358397286286372,
"learning_rate": 7.613769413169492e-09,
"loss": 0.1619,
"step": 2796
},
{
"epoch": 1.965565706254392,
"grad_norm": 0.4942463703204265,
"learning_rate": 7.312337656633639e-09,
"loss": 0.1417,
"step": 2797
},
{
"epoch": 1.966268446943078,
"grad_norm": 0.5168794803827059,
"learning_rate": 7.016989551317244e-09,
"loss": 0.1482,
"step": 2798
},
{
"epoch": 1.966971187631764,
"grad_norm": 0.5172478466477005,
"learning_rate": 6.72772545710576e-09,
"loss": 0.1504,
"step": 2799
},
{
"epoch": 1.9676739283204498,
"grad_norm": 0.5067616207084628,
"learning_rate": 6.4445457264711295e-09,
"loss": 0.1476,
"step": 2800
},
{
"epoch": 1.9683766690091358,
"grad_norm": 0.4925368391455481,
"learning_rate": 6.167450704471223e-09,
"loss": 0.1332,
"step": 2801
},
{
"epoch": 1.9690794096978215,
"grad_norm": 0.5035916486478855,
"learning_rate": 5.896440728749286e-09,
"loss": 0.1432,
"step": 2802
},
{
"epoch": 1.9697821503865074,
"grad_norm": 0.5338161968329757,
"learning_rate": 5.631516129535053e-09,
"loss": 0.1777,
"step": 2803
},
{
"epoch": 1.970484891075193,
"grad_norm": 0.5224862430478892,
"learning_rate": 5.37267722964252e-09,
"loss": 0.1455,
"step": 2804
},
{
"epoch": 1.971187631763879,
"grad_norm": 0.5141185484314288,
"learning_rate": 5.1199243444693955e-09,
"loss": 0.1483,
"step": 2805
},
{
"epoch": 1.971890372452565,
"grad_norm": 0.5077083364089896,
"learning_rate": 4.8732577819982084e-09,
"loss": 0.1514,
"step": 2806
},
{
"epoch": 1.9725931131412509,
"grad_norm": 0.5319882872378586,
"learning_rate": 4.632677842795752e-09,
"loss": 0.1544,
"step": 2807
},
{
"epoch": 1.9732958538299368,
"grad_norm": 0.5586920586860366,
"learning_rate": 4.398184820010865e-09,
"loss": 0.1743,
"step": 2808
},
{
"epoch": 1.9739985945186227,
"grad_norm": 0.5134131618759824,
"learning_rate": 4.16977899937665e-09,
"loss": 0.1359,
"step": 2809
},
{
"epoch": 1.9747013352073086,
"grad_norm": 0.5315119457479176,
"learning_rate": 3.9474606592088125e-09,
"loss": 0.1565,
"step": 2810
},
{
"epoch": 1.9754040758959945,
"grad_norm": 0.5207916928215086,
"learning_rate": 3.731230070403991e-09,
"loss": 0.1455,
"step": 2811
},
{
"epoch": 1.9761068165846802,
"grad_norm": 0.5057179303956064,
"learning_rate": 3.5210874964425323e-09,
"loss": 0.1474,
"step": 2812
},
{
"epoch": 1.9768095572733662,
"grad_norm": 0.53228415448207,
"learning_rate": 3.3170331933857214e-09,
"loss": 0.1546,
"step": 2813
},
{
"epoch": 1.9775122979620519,
"grad_norm": 0.5235103877189453,
"learning_rate": 3.1190674098757756e-09,
"loss": 0.1465,
"step": 2814
},
{
"epoch": 1.9782150386507378,
"grad_norm": 0.5139023335124028,
"learning_rate": 2.927190387137513e-09,
"loss": 0.1388,
"step": 2815
},
{
"epoch": 1.9789177793394237,
"grad_norm": 0.5083201912942791,
"learning_rate": 2.7414023589739104e-09,
"loss": 0.15,
"step": 2816
},
{
"epoch": 1.9796205200281096,
"grad_norm": 0.48924078867088844,
"learning_rate": 2.5617035517705448e-09,
"loss": 0.1414,
"step": 2817
},
{
"epoch": 1.9803232607167955,
"grad_norm": 0.5402871088107077,
"learning_rate": 2.3880941844933727e-09,
"loss": 0.1685,
"step": 2818
},
{
"epoch": 1.9810260014054815,
"grad_norm": 0.4986525119684943,
"learning_rate": 2.2205744686865093e-09,
"loss": 0.1453,
"step": 2819
},
{
"epoch": 1.9817287420941674,
"grad_norm": 0.5331741287449765,
"learning_rate": 2.0591446084755608e-09,
"loss": 0.1599,
"step": 2820
},
{
"epoch": 1.982431482782853,
"grad_norm": 0.5212883380775323,
"learning_rate": 1.9038048005642905e-09,
"loss": 0.1594,
"step": 2821
},
{
"epoch": 1.983134223471539,
"grad_norm": 0.4909700064168984,
"learning_rate": 1.754555234236288e-09,
"loss": 0.1277,
"step": 2822
},
{
"epoch": 1.983836964160225,
"grad_norm": 0.517172188613898,
"learning_rate": 1.6113960913538562e-09,
"loss": 0.1709,
"step": 2823
},
{
"epoch": 1.9845397048489106,
"grad_norm": 0.5264165237576832,
"learning_rate": 1.4743275463585672e-09,
"loss": 0.1449,
"step": 2824
},
{
"epoch": 1.9852424455375965,
"grad_norm": 0.4818380551145174,
"learning_rate": 1.3433497662701522e-09,
"loss": 0.1302,
"step": 2825
},
{
"epoch": 1.9859451862262825,
"grad_norm": 0.4773897093551395,
"learning_rate": 1.2184629106859468e-09,
"loss": 0.1283,
"step": 2826
},
{
"epoch": 1.9866479269149684,
"grad_norm": 0.5102079428826756,
"learning_rate": 1.0996671317825558e-09,
"loss": 0.1333,
"step": 2827
},
{
"epoch": 1.9873506676036543,
"grad_norm": 0.5066329999242195,
"learning_rate": 9.869625743147426e-10,
"loss": 0.1339,
"step": 2828
},
{
"epoch": 1.9880534082923402,
"grad_norm": 0.5322476428530849,
"learning_rate": 8.803493756132097e-10,
"loss": 0.1462,
"step": 2829
},
{
"epoch": 1.9887561489810262,
"grad_norm": 0.4984279936375653,
"learning_rate": 7.798276655879289e-10,
"loss": 0.1312,
"step": 2830
},
{
"epoch": 1.9894588896697118,
"grad_norm": 0.49844369269627375,
"learning_rate": 6.853975667259205e-10,
"loss": 0.1382,
"step": 2831
},
{
"epoch": 1.9901616303583978,
"grad_norm": 0.5203835407085381,
"learning_rate": 5.97059194091254e-10,
"loss": 0.1638,
"step": 2832
},
{
"epoch": 1.9908643710470835,
"grad_norm": 0.49832405876466607,
"learning_rate": 5.148126553256027e-10,
"loss": 0.146,
"step": 2833
},
{
"epoch": 1.9915671117357694,
"grad_norm": 0.5329271945627777,
"learning_rate": 4.3865805064768895e-10,
"loss": 0.1618,
"step": 2834
},
{
"epoch": 1.9922698524244553,
"grad_norm": 0.5191116734566509,
"learning_rate": 3.6859547285217343e-10,
"loss": 0.1627,
"step": 2835
},
{
"epoch": 1.9929725931131412,
"grad_norm": 0.5259367915958767,
"learning_rate": 3.0462500731076595e-10,
"loss": 0.1586,
"step": 2836
},
{
"epoch": 1.9936753338018272,
"grad_norm": 0.5488347954719632,
"learning_rate": 2.467467319733352e-10,
"loss": 0.1778,
"step": 2837
},
{
"epoch": 1.994378074490513,
"grad_norm": 0.5207730511495138,
"learning_rate": 1.9496071736513356e-10,
"loss": 0.1496,
"step": 2838
},
{
"epoch": 1.995080815179199,
"grad_norm": 0.5259137512529136,
"learning_rate": 1.4926702658735192e-10,
"loss": 0.1617,
"step": 2839
},
{
"epoch": 1.9957835558678847,
"grad_norm": 0.5261507425271373,
"learning_rate": 1.0966571531878523e-10,
"loss": 0.1636,
"step": 2840
},
{
"epoch": 1.9964862965565706,
"grad_norm": 0.4906760460963486,
"learning_rate": 7.6156831814167e-11,
"loss": 0.1311,
"step": 2841
},
{
"epoch": 1.9971890372452565,
"grad_norm": 0.5197366755912002,
"learning_rate": 4.874041690416942e-11,
"loss": 0.1627,
"step": 2842
},
{
"epoch": 1.9978917779339422,
"grad_norm": 0.6270833027729107,
"learning_rate": 2.741650399595841e-11,
"loss": 0.1591,
"step": 2843
},
{
"epoch": 1.9985945186226282,
"grad_norm": 0.5051745663409105,
"learning_rate": 1.2185119073748753e-11,
"loss": 0.1298,
"step": 2844
},
{
"epoch": 1.999297259311314,
"grad_norm": 0.5062670497815366,
"learning_rate": 3.046280696583637e-12,
"loss": 0.1486,
"step": 2845
},
{
"epoch": 2.0,
"grad_norm": 0.4928858413588873,
"learning_rate": 0.0,
"loss": 0.1344,
"step": 2846
},
{
"epoch": 2.0,
"step": 2846,
"total_flos": 162152061075456.0,
"train_loss": 0.16860538316093443,
"train_runtime": 5138.7122,
"train_samples_per_second": 8.859,
"train_steps_per_second": 0.554
}
],
"logging_steps": 1,
"max_steps": 2846,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 70000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 162152061075456.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}