| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 2846, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0007027406886858749, |
| "grad_norm": 1.960867455770027, |
| "learning_rate": 9.999996953719305e-06, |
| "loss": 0.3258, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0014054813773717498, |
| "grad_norm": 1.3068761667718831, |
| "learning_rate": 9.999987814880927e-06, |
| "loss": 0.2937, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0021082220660576245, |
| "grad_norm": 1.619988769107426, |
| "learning_rate": 9.999972583496006e-06, |
| "loss": 0.2712, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0028109627547434997, |
| "grad_norm": 0.9963583704539947, |
| "learning_rate": 9.999951259583096e-06, |
| "loss": 0.2802, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0035137034434293743, |
| "grad_norm": 0.830284309838698, |
| "learning_rate": 9.999923843168187e-06, |
| "loss": 0.2609, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.004216444132115249, |
| "grad_norm": 0.7775733770418086, |
| "learning_rate": 9.999890334284681e-06, |
| "loss": 0.2105, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.004919184820801124, |
| "grad_norm": 0.6886436559357607, |
| "learning_rate": 9.999850732973412e-06, |
| "loss": 0.248, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.005621925509486999, |
| "grad_norm": 0.7515236777838128, |
| "learning_rate": 9.999805039282636e-06, |
| "loss": 0.2576, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.006324666198172874, |
| "grad_norm": 0.8276988585807789, |
| "learning_rate": 9.999753253268027e-06, |
| "loss": 0.2559, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.007027406886858749, |
| "grad_norm": 0.6880595073761062, |
| "learning_rate": 9.99969537499269e-06, |
| "loss": 0.2175, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.007730147575544624, |
| "grad_norm": 0.6538562805014537, |
| "learning_rate": 9.999631404527148e-06, |
| "loss": 0.2449, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.008432888264230498, |
| "grad_norm": 0.6210961893951643, |
| "learning_rate": 9.999561341949354e-06, |
| "loss": 0.2498, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.009135628952916374, |
| "grad_norm": 0.6562390246212256, |
| "learning_rate": 9.999485187344676e-06, |
| "loss": 0.2438, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.009838369641602248, |
| "grad_norm": 0.7195647150789044, |
| "learning_rate": 9.999402940805908e-06, |
| "loss": 0.2267, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.010541110330288124, |
| "grad_norm": 0.5848583629858494, |
| "learning_rate": 9.999314602433274e-06, |
| "loss": 0.2199, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.011243851018973999, |
| "grad_norm": 0.6057840591545668, |
| "learning_rate": 9.999220172334414e-06, |
| "loss": 0.2577, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.011946591707659873, |
| "grad_norm": 0.6152015591414531, |
| "learning_rate": 9.999119650624387e-06, |
| "loss": 0.2342, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.012649332396345749, |
| "grad_norm": 0.6421227980011669, |
| "learning_rate": 9.999013037425686e-06, |
| "loss": 0.2654, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.013352073085031623, |
| "grad_norm": 0.533422255577839, |
| "learning_rate": 9.998900332868217e-06, |
| "loss": 0.2046, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.014054813773717497, |
| "grad_norm": 0.5642607629975687, |
| "learning_rate": 9.998781537089316e-06, |
| "loss": 0.1884, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.014757554462403373, |
| "grad_norm": 0.5445226739287741, |
| "learning_rate": 9.998656650233732e-06, |
| "loss": 0.2075, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.015460295151089248, |
| "grad_norm": 0.5946646609141334, |
| "learning_rate": 9.998525672453642e-06, |
| "loss": 0.2335, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.016163035839775124, |
| "grad_norm": 0.578240118854655, |
| "learning_rate": 9.998388603908646e-06, |
| "loss": 0.2136, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.016865776528460996, |
| "grad_norm": 0.6335244002467505, |
| "learning_rate": 9.998245444765764e-06, |
| "loss": 0.2807, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.017568517217146872, |
| "grad_norm": 0.5609924624891021, |
| "learning_rate": 9.998096195199436e-06, |
| "loss": 0.2287, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.018271257905832748, |
| "grad_norm": 0.5502965794491784, |
| "learning_rate": 9.997940855391525e-06, |
| "loss": 0.2277, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.018973998594518624, |
| "grad_norm": 0.5519014674709402, |
| "learning_rate": 9.997779425531315e-06, |
| "loss": 0.2106, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.019676739283204497, |
| "grad_norm": 0.5536708670861661, |
| "learning_rate": 9.997611905815508e-06, |
| "loss": 0.2156, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.020379479971890373, |
| "grad_norm": 0.5729118843091581, |
| "learning_rate": 9.99743829644823e-06, |
| "loss": 0.2436, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.02108222066057625, |
| "grad_norm": 0.5498493930566353, |
| "learning_rate": 9.997258597641027e-06, |
| "loss": 0.2342, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02178496134926212, |
| "grad_norm": 0.5602470830706633, |
| "learning_rate": 9.997072809612864e-06, |
| "loss": 0.2155, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.022487702037947997, |
| "grad_norm": 0.5838068038561821, |
| "learning_rate": 9.996880932590125e-06, |
| "loss": 0.2461, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.023190442726633873, |
| "grad_norm": 0.5320447337269636, |
| "learning_rate": 9.996682966806614e-06, |
| "loss": 0.2126, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.023893183415319746, |
| "grad_norm": 0.5715049548629288, |
| "learning_rate": 9.996478912503557e-06, |
| "loss": 0.229, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.024595924104005622, |
| "grad_norm": 0.5639356143321008, |
| "learning_rate": 9.996268769929597e-06, |
| "loss": 0.2361, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.025298664792691498, |
| "grad_norm": 0.5490799330847167, |
| "learning_rate": 9.996052539340793e-06, |
| "loss": 0.225, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.02600140548137737, |
| "grad_norm": 0.5197636186398789, |
| "learning_rate": 9.995830221000624e-06, |
| "loss": 0.2105, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.026704146170063246, |
| "grad_norm": 0.5343885544219437, |
| "learning_rate": 9.99560181517999e-06, |
| "loss": 0.209, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.027406886858749122, |
| "grad_norm": 0.5434262864184476, |
| "learning_rate": 9.995367322157205e-06, |
| "loss": 0.2232, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.028109627547434995, |
| "grad_norm": 0.5281205534492845, |
| "learning_rate": 9.995126742218002e-06, |
| "loss": 0.2101, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02881236823612087, |
| "grad_norm": 0.5414344318501926, |
| "learning_rate": 9.994880075655531e-06, |
| "loss": 0.1932, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.029515108924806747, |
| "grad_norm": 0.5552903981632334, |
| "learning_rate": 9.994627322770358e-06, |
| "loss": 0.1977, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.030217849613492623, |
| "grad_norm": 0.5475965068067495, |
| "learning_rate": 9.994368483870466e-06, |
| "loss": 0.2275, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.030920590302178495, |
| "grad_norm": 0.5820073528574793, |
| "learning_rate": 9.994103559271252e-06, |
| "loss": 0.2298, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.03162333099086437, |
| "grad_norm": 0.572740663614088, |
| "learning_rate": 9.99383254929553e-06, |
| "loss": 0.2308, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.03232607167955025, |
| "grad_norm": 0.5376971201448391, |
| "learning_rate": 9.99355545427353e-06, |
| "loss": 0.2038, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.03302881236823612, |
| "grad_norm": 0.5803946358100757, |
| "learning_rate": 9.993272274542895e-06, |
| "loss": 0.2226, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.03373155305692199, |
| "grad_norm": 0.5995490513194511, |
| "learning_rate": 9.992983010448684e-06, |
| "loss": 0.2553, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.03443429374560787, |
| "grad_norm": 0.5345926632678341, |
| "learning_rate": 9.992687662343367e-06, |
| "loss": 0.2064, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.035137034434293744, |
| "grad_norm": 0.5904489663547782, |
| "learning_rate": 9.992386230586832e-06, |
| "loss": 0.1973, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.035839775122979624, |
| "grad_norm": 0.5474568790983223, |
| "learning_rate": 9.992078715546373e-06, |
| "loss": 0.199, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.036542515811665496, |
| "grad_norm": 0.5639916029733052, |
| "learning_rate": 9.991765117596705e-06, |
| "loss": 0.2195, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.03724525650035137, |
| "grad_norm": 0.5720230854284688, |
| "learning_rate": 9.99144543711995e-06, |
| "loss": 0.2151, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.03794799718903725, |
| "grad_norm": 0.5648940910761961, |
| "learning_rate": 9.991119674505643e-06, |
| "loss": 0.2115, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.03865073787772312, |
| "grad_norm": 0.5452367285194587, |
| "learning_rate": 9.990787830150727e-06, |
| "loss": 0.2182, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.03935347856640899, |
| "grad_norm": 0.5805438865311197, |
| "learning_rate": 9.990449904459561e-06, |
| "loss": 0.2111, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.04005621925509487, |
| "grad_norm": 0.5553341050808482, |
| "learning_rate": 9.99010589784391e-06, |
| "loss": 0.2084, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.040758959943780745, |
| "grad_norm": 0.5273226505412448, |
| "learning_rate": 9.989755810722951e-06, |
| "loss": 0.2053, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.04146170063246662, |
| "grad_norm": 0.5497593519290821, |
| "learning_rate": 9.989399643523272e-06, |
| "loss": 0.2074, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0421644413211525, |
| "grad_norm": 0.5549546743333674, |
| "learning_rate": 9.989037396678863e-06, |
| "loss": 0.2204, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04286718200983837, |
| "grad_norm": 0.5549078894733431, |
| "learning_rate": 9.988669070631128e-06, |
| "loss": 0.1981, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.04356992269852424, |
| "grad_norm": 0.5639221318363923, |
| "learning_rate": 9.988294665828877e-06, |
| "loss": 0.2261, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.04427266338721012, |
| "grad_norm": 0.540986074696611, |
| "learning_rate": 9.987914182728327e-06, |
| "loss": 0.2145, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.044975404075895994, |
| "grad_norm": 0.5535674392248264, |
| "learning_rate": 9.9875276217931e-06, |
| "loss": 0.1974, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.04567814476458187, |
| "grad_norm": 0.5744845275186403, |
| "learning_rate": 9.987134983494227e-06, |
| "loss": 0.2395, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.046380885453267746, |
| "grad_norm": 0.5841243634839673, |
| "learning_rate": 9.98673626831014e-06, |
| "loss": 0.2259, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.04708362614195362, |
| "grad_norm": 0.5381761834722242, |
| "learning_rate": 9.986331476726681e-06, |
| "loss": 0.1967, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.04778636683063949, |
| "grad_norm": 0.56850467548114, |
| "learning_rate": 9.985920609237092e-06, |
| "loss": 0.2081, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.04848910751932537, |
| "grad_norm": 0.5325622293618841, |
| "learning_rate": 9.985503666342022e-06, |
| "loss": 0.1975, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.049191848208011243, |
| "grad_norm": 0.5411807875900252, |
| "learning_rate": 9.98508064854952e-06, |
| "loss": 0.2092, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.049894588896697116, |
| "grad_norm": 0.5409391121214355, |
| "learning_rate": 9.984651556375039e-06, |
| "loss": 0.191, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.050597329585382995, |
| "grad_norm": 0.5611013234227988, |
| "learning_rate": 9.984216390341428e-06, |
| "loss": 0.2031, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.05130007027406887, |
| "grad_norm": 0.6004633606197677, |
| "learning_rate": 9.98377515097895e-06, |
| "loss": 0.2345, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.05200281096275474, |
| "grad_norm": 0.5576736229012247, |
| "learning_rate": 9.983327838825256e-06, |
| "loss": 0.2129, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.05270555165144062, |
| "grad_norm": 0.5702531585788543, |
| "learning_rate": 9.982874454425402e-06, |
| "loss": 0.2401, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.05340829234012649, |
| "grad_norm": 0.5388339761813628, |
| "learning_rate": 9.982414998331842e-06, |
| "loss": 0.2196, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.054111033028812365, |
| "grad_norm": 0.5796848449772599, |
| "learning_rate": 9.98194947110443e-06, |
| "loss": 0.2062, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.054813773717498245, |
| "grad_norm": 0.5743923596689938, |
| "learning_rate": 9.981477873310416e-06, |
| "loss": 0.1997, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.05551651440618412, |
| "grad_norm": 0.5256817899147247, |
| "learning_rate": 9.981000205524449e-06, |
| "loss": 0.1912, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.05621925509486999, |
| "grad_norm": 0.5787140729797523, |
| "learning_rate": 9.980516468328571e-06, |
| "loss": 0.1846, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05692199578355587, |
| "grad_norm": 0.5928293871778944, |
| "learning_rate": 9.980026662312224e-06, |
| "loss": 0.22, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.05762473647224174, |
| "grad_norm": 0.5869970405152036, |
| "learning_rate": 9.979530788072241e-06, |
| "loss": 0.2262, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.05832747716092762, |
| "grad_norm": 0.5601186971331559, |
| "learning_rate": 9.979028846212852e-06, |
| "loss": 0.1995, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.059030217849613494, |
| "grad_norm": 0.5762686780457437, |
| "learning_rate": 9.978520837345678e-06, |
| "loss": 0.211, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.059732958538299366, |
| "grad_norm": 0.5658006462984013, |
| "learning_rate": 9.978006762089734e-06, |
| "loss": 0.1998, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.060435699226985246, |
| "grad_norm": 0.5448568216925331, |
| "learning_rate": 9.977486621071425e-06, |
| "loss": 0.1976, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.06113843991567112, |
| "grad_norm": 0.5449325392449603, |
| "learning_rate": 9.976960414924558e-06, |
| "loss": 0.2069, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.06184118060435699, |
| "grad_norm": 0.5676641969425263, |
| "learning_rate": 9.97642814429031e-06, |
| "loss": 0.2138, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.06254392129304287, |
| "grad_norm": 0.5424526783085432, |
| "learning_rate": 9.975889809817268e-06, |
| "loss": 0.2065, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.06324666198172874, |
| "grad_norm": 0.5789089542007724, |
| "learning_rate": 9.975345412161395e-06, |
| "loss": 0.2097, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06394940267041462, |
| "grad_norm": 0.5227720371474548, |
| "learning_rate": 9.974794951986047e-06, |
| "loss": 0.1745, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.0646521433591005, |
| "grad_norm": 0.5664675290538582, |
| "learning_rate": 9.974238429961965e-06, |
| "loss": 0.2258, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.06535488404778636, |
| "grad_norm": 0.5242625446880697, |
| "learning_rate": 9.973675846767281e-06, |
| "loss": 0.182, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.06605762473647224, |
| "grad_norm": 0.5130151810198286, |
| "learning_rate": 9.973107203087508e-06, |
| "loss": 0.195, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.06676036542515812, |
| "grad_norm": 0.5107003932791141, |
| "learning_rate": 9.972532499615546e-06, |
| "loss": 0.1784, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.06746310611384398, |
| "grad_norm": 0.50176695520801, |
| "learning_rate": 9.971951737051677e-06, |
| "loss": 0.185, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.06816584680252986, |
| "grad_norm": 0.5708031718262291, |
| "learning_rate": 9.97136491610357e-06, |
| "loss": 0.2012, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.06886858749121574, |
| "grad_norm": 0.5454650812883806, |
| "learning_rate": 9.97077203748627e-06, |
| "loss": 0.1975, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.06957132817990162, |
| "grad_norm": 0.5159484154382179, |
| "learning_rate": 9.970173101922207e-06, |
| "loss": 0.1871, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.07027406886858749, |
| "grad_norm": 0.5531737229062328, |
| "learning_rate": 9.969568110141194e-06, |
| "loss": 0.2169, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07097680955727337, |
| "grad_norm": 0.5426370539414972, |
| "learning_rate": 9.968957062880419e-06, |
| "loss": 0.2028, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.07167955024595925, |
| "grad_norm": 0.6218361656819086, |
| "learning_rate": 9.968339960884452e-06, |
| "loss": 0.2471, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.07238229093464511, |
| "grad_norm": 0.5809630898624576, |
| "learning_rate": 9.967716804905238e-06, |
| "loss": 0.2224, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.07308503162333099, |
| "grad_norm": 0.5311062278784064, |
| "learning_rate": 9.967087595702101e-06, |
| "loss": 0.189, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.07378777231201687, |
| "grad_norm": 0.522063350301436, |
| "learning_rate": 9.966452334041741e-06, |
| "loss": 0.1921, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.07449051300070274, |
| "grad_norm": 0.5671623833518951, |
| "learning_rate": 9.965811020698228e-06, |
| "loss": 0.2219, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.07519325368938862, |
| "grad_norm": 0.5727822563672408, |
| "learning_rate": 9.965163656453017e-06, |
| "loss": 0.2402, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.0758959943780745, |
| "grad_norm": 0.5256490527259093, |
| "learning_rate": 9.964510242094922e-06, |
| "loss": 0.2091, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.07659873506676036, |
| "grad_norm": 0.5285646235064523, |
| "learning_rate": 9.963850778420144e-06, |
| "loss": 0.2092, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.07730147575544624, |
| "grad_norm": 0.5420907018436252, |
| "learning_rate": 9.96318526623224e-06, |
| "loss": 0.2025, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.07800421644413212, |
| "grad_norm": 0.5232772340159911, |
| "learning_rate": 9.962513706342149e-06, |
| "loss": 0.2005, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.07870695713281799, |
| "grad_norm": 0.5543317820424953, |
| "learning_rate": 9.961836099568174e-06, |
| "loss": 0.2009, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.07940969782150387, |
| "grad_norm": 0.5313190776729853, |
| "learning_rate": 9.961152446735989e-06, |
| "loss": 0.2082, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.08011243851018975, |
| "grad_norm": 0.5301047159823322, |
| "learning_rate": 9.960462748678632e-06, |
| "loss": 0.1942, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.08081517919887561, |
| "grad_norm": 0.5482604596341181, |
| "learning_rate": 9.959767006236508e-06, |
| "loss": 0.2141, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.08151791988756149, |
| "grad_norm": 0.5241312613602601, |
| "learning_rate": 9.959065220257388e-06, |
| "loss": 0.1954, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.08222066057624737, |
| "grad_norm": 0.5536434360318407, |
| "learning_rate": 9.958357391596405e-06, |
| "loss": 0.2191, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.08292340126493324, |
| "grad_norm": 0.5565448027851918, |
| "learning_rate": 9.957643521116059e-06, |
| "loss": 0.1919, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.08362614195361912, |
| "grad_norm": 0.5357229743914843, |
| "learning_rate": 9.956923609686212e-06, |
| "loss": 0.202, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.084328882642305, |
| "grad_norm": 0.5058154999730936, |
| "learning_rate": 9.956197658184082e-06, |
| "loss": 0.1832, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.08503162333099086, |
| "grad_norm": 0.5580603557762164, |
| "learning_rate": 9.955465667494249e-06, |
| "loss": 0.1998, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.08573436401967674, |
| "grad_norm": 0.5441411673706941, |
| "learning_rate": 9.954727638508655e-06, |
| "loss": 0.1987, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.08643710470836262, |
| "grad_norm": 0.5621956668698223, |
| "learning_rate": 9.953983572126598e-06, |
| "loss": 0.2329, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.08713984539704848, |
| "grad_norm": 0.5555218799042205, |
| "learning_rate": 9.953233469254728e-06, |
| "loss": 0.2113, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.08784258608573436, |
| "grad_norm": 0.5553758129637749, |
| "learning_rate": 9.95247733080706e-06, |
| "loss": 0.2215, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.08854532677442024, |
| "grad_norm": 0.5355462057697746, |
| "learning_rate": 9.951715157704954e-06, |
| "loss": 0.202, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.08924806746310611, |
| "grad_norm": 0.5050684078508851, |
| "learning_rate": 9.950946950877126e-06, |
| "loss": 0.1768, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.08995080815179199, |
| "grad_norm": 0.5370575562075645, |
| "learning_rate": 9.950172711259651e-06, |
| "loss": 0.2088, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.09065354884047787, |
| "grad_norm": 0.5424876550648152, |
| "learning_rate": 9.949392439795943e-06, |
| "loss": 0.2208, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.09135628952916373, |
| "grad_norm": 0.5261671250993958, |
| "learning_rate": 9.948606137436779e-06, |
| "loss": 0.198, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.09205903021784961, |
| "grad_norm": 0.5088818678092768, |
| "learning_rate": 9.947813805140274e-06, |
| "loss": 0.187, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.09276177090653549, |
| "grad_norm": 0.5328110111985129, |
| "learning_rate": 9.947015443871894e-06, |
| "loss": 0.2154, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.09346451159522136, |
| "grad_norm": 0.5324344180286568, |
| "learning_rate": 9.946211054604455e-06, |
| "loss": 0.2116, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.09416725228390724, |
| "grad_norm": 0.5236749308752624, |
| "learning_rate": 9.945400638318113e-06, |
| "loss": 0.2015, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.09486999297259312, |
| "grad_norm": 0.583442779591505, |
| "learning_rate": 9.94458419600037e-06, |
| "loss": 0.2133, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.09557273366127898, |
| "grad_norm": 0.5453942041965456, |
| "learning_rate": 9.943761728646072e-06, |
| "loss": 0.2178, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.09627547434996486, |
| "grad_norm": 0.5456774305577579, |
| "learning_rate": 9.942933237257406e-06, |
| "loss": 0.1992, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.09697821503865074, |
| "grad_norm": 0.5383843010460779, |
| "learning_rate": 9.942098722843898e-06, |
| "loss": 0.2194, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.09768095572733661, |
| "grad_norm": 0.5287988030526317, |
| "learning_rate": 9.941258186422413e-06, |
| "loss": 0.2124, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.09838369641602249, |
| "grad_norm": 0.5430300098732184, |
| "learning_rate": 9.940411629017159e-06, |
| "loss": 0.2112, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.09908643710470837, |
| "grad_norm": 0.520885326315766, |
| "learning_rate": 9.93955905165967e-06, |
| "loss": 0.2103, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.09978917779339423, |
| "grad_norm": 0.5449094302378578, |
| "learning_rate": 9.93870045538883e-06, |
| "loss": 0.2327, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.10049191848208011, |
| "grad_norm": 0.5687041901069785, |
| "learning_rate": 9.937835841250842e-06, |
| "loss": 0.2475, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.10119465917076599, |
| "grad_norm": 0.5292925413224571, |
| "learning_rate": 9.936965210299254e-06, |
| "loss": 0.2058, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.10189739985945186, |
| "grad_norm": 0.5450162719241888, |
| "learning_rate": 9.936088563594937e-06, |
| "loss": 0.2166, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.10260014054813774, |
| "grad_norm": 0.5030226831647348, |
| "learning_rate": 9.935205902206098e-06, |
| "loss": 0.2, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.10330288123682362, |
| "grad_norm": 0.5606886718196615, |
| "learning_rate": 9.934317227208269e-06, |
| "loss": 0.2395, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.10400562192550948, |
| "grad_norm": 0.508456211029729, |
| "learning_rate": 9.933422539684314e-06, |
| "loss": 0.1826, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.10470836261419536, |
| "grad_norm": 0.5276224067096561, |
| "learning_rate": 9.932521840724418e-06, |
| "loss": 0.1943, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.10541110330288124, |
| "grad_norm": 0.5370428953048157, |
| "learning_rate": 9.931615131426094e-06, |
| "loss": 0.2142, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1061138439915671, |
| "grad_norm": 0.504130332599666, |
| "learning_rate": 9.930702412894179e-06, |
| "loss": 0.1934, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.10681658468025299, |
| "grad_norm": 0.5290656249486817, |
| "learning_rate": 9.929783686240833e-06, |
| "loss": 0.2025, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.10751932536893886, |
| "grad_norm": 0.5141584039459383, |
| "learning_rate": 9.928858952585535e-06, |
| "loss": 0.1877, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.10822206605762473, |
| "grad_norm": 0.5562386101706629, |
| "learning_rate": 9.927928213055082e-06, |
| "loss": 0.1997, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.10892480674631061, |
| "grad_norm": 0.49443664961132877, |
| "learning_rate": 9.926991468783595e-06, |
| "loss": 0.1678, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.10962754743499649, |
| "grad_norm": 0.550549791203403, |
| "learning_rate": 9.926048720912509e-06, |
| "loss": 0.1883, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.11033028812368235, |
| "grad_norm": 0.5429116475858584, |
| "learning_rate": 9.925099970590568e-06, |
| "loss": 0.2199, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.11103302881236823, |
| "grad_norm": 0.5290919161535431, |
| "learning_rate": 9.924145218973841e-06, |
| "loss": 0.2153, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.11173576950105411, |
| "grad_norm": 0.5213091776944005, |
| "learning_rate": 9.923184467225704e-06, |
| "loss": 0.2002, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.11243851018973998, |
| "grad_norm": 0.5161618699416421, |
| "learning_rate": 9.922217716516843e-06, |
| "loss": 0.1913, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.11314125087842586, |
| "grad_norm": 0.558417418707948, |
| "learning_rate": 9.921244968025257e-06, |
| "loss": 0.2169, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.11384399156711174, |
| "grad_norm": 0.48467929480194855, |
| "learning_rate": 9.920266222936252e-06, |
| "loss": 0.1816, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.11454673225579762, |
| "grad_norm": 0.507478691170454, |
| "learning_rate": 9.91928148244244e-06, |
| "loss": 0.1827, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.11524947294448348, |
| "grad_norm": 0.5214917414048722, |
| "learning_rate": 9.91829074774374e-06, |
| "loss": 0.1936, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.11595221363316936, |
| "grad_norm": 0.5280684849167947, |
| "learning_rate": 9.917294020047375e-06, |
| "loss": 0.1835, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.11665495432185524, |
| "grad_norm": 0.5521589119277396, |
| "learning_rate": 9.916291300567868e-06, |
| "loss": 0.2151, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.11735769501054111, |
| "grad_norm": 0.5858753766045725, |
| "learning_rate": 9.915282590527048e-06, |
| "loss": 0.2227, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.11806043569922699, |
| "grad_norm": 0.553369867577223, |
| "learning_rate": 9.914267891154037e-06, |
| "loss": 0.2132, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.11876317638791287, |
| "grad_norm": 0.5147106193312105, |
| "learning_rate": 9.913247203685261e-06, |
| "loss": 0.1743, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.11946591707659873, |
| "grad_norm": 0.5393121589562151, |
| "learning_rate": 9.912220529364441e-06, |
| "loss": 0.1946, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.12016865776528461, |
| "grad_norm": 0.5337320423505406, |
| "learning_rate": 9.911187869442588e-06, |
| "loss": 0.1994, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.12087139845397049, |
| "grad_norm": 0.521658044662446, |
| "learning_rate": 9.910149225178018e-06, |
| "loss": 0.2042, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.12157413914265636, |
| "grad_norm": 0.5683795188065389, |
| "learning_rate": 9.909104597836324e-06, |
| "loss": 0.2138, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.12227687983134224, |
| "grad_norm": 0.5287528993195494, |
| "learning_rate": 9.908053988690403e-06, |
| "loss": 0.204, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.12297962052002812, |
| "grad_norm": 0.5030979458003717, |
| "learning_rate": 9.90699739902043e-06, |
| "loss": 0.1995, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.12368236120871398, |
| "grad_norm": 0.5333811207421939, |
| "learning_rate": 9.905934830113878e-06, |
| "loss": 0.2247, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.12438510189739986, |
| "grad_norm": 0.5242706389608097, |
| "learning_rate": 9.904866283265498e-06, |
| "loss": 0.2001, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.12508784258608574, |
| "grad_norm": 0.5553133727029044, |
| "learning_rate": 9.903791759777326e-06, |
| "loss": 0.2232, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.1257905832747716, |
| "grad_norm": 0.5854998441746612, |
| "learning_rate": 9.902711260958682e-06, |
| "loss": 0.2326, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.12649332396345747, |
| "grad_norm": 0.5465334372407958, |
| "learning_rate": 9.901624788126169e-06, |
| "loss": 0.2135, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.12719606465214336, |
| "grad_norm": 0.5551496513314771, |
| "learning_rate": 9.900532342603669e-06, |
| "loss": 0.2061, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.12789880534082923, |
| "grad_norm": 0.5225704469654726, |
| "learning_rate": 9.899433925722334e-06, |
| "loss": 0.1904, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.1286015460295151, |
| "grad_norm": 0.5394995980067866, |
| "learning_rate": 9.898329538820606e-06, |
| "loss": 0.189, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.129304286718201, |
| "grad_norm": 0.594160825487313, |
| "learning_rate": 9.897219183244188e-06, |
| "loss": 0.2193, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.13000702740688685, |
| "grad_norm": 0.5645972118149901, |
| "learning_rate": 9.896102860346066e-06, |
| "loss": 0.2032, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.13070976809557272, |
| "grad_norm": 0.5576545796400294, |
| "learning_rate": 9.894980571486492e-06, |
| "loss": 0.2071, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.1314125087842586, |
| "grad_norm": 0.5085872856787232, |
| "learning_rate": 9.893852318032986e-06, |
| "loss": 0.1908, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.13211524947294448, |
| "grad_norm": 0.5442927380248144, |
| "learning_rate": 9.892718101360344e-06, |
| "loss": 0.2061, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.13281799016163034, |
| "grad_norm": 0.5475838060489673, |
| "learning_rate": 9.891577922850616e-06, |
| "loss": 0.1893, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.13352073085031624, |
| "grad_norm": 0.540506279685077, |
| "learning_rate": 9.89043178389313e-06, |
| "loss": 0.1665, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1342234715390021, |
| "grad_norm": 0.509049673509137, |
| "learning_rate": 9.889279685884468e-06, |
| "loss": 0.1683, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.13492621222768797, |
| "grad_norm": 0.5104559471477775, |
| "learning_rate": 9.888121630228476e-06, |
| "loss": 0.1809, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.13562895291637386, |
| "grad_norm": 0.5357517170593388, |
| "learning_rate": 9.886957618336257e-06, |
| "loss": 0.1972, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.13633169360505973, |
| "grad_norm": 0.5651199180828425, |
| "learning_rate": 9.885787651626176e-06, |
| "loss": 0.216, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.13703443429374562, |
| "grad_norm": 0.5604061776938946, |
| "learning_rate": 9.88461173152385e-06, |
| "loss": 0.1988, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.1377371749824315, |
| "grad_norm": 0.5260311965836726, |
| "learning_rate": 9.883429859462155e-06, |
| "loss": 0.1921, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.13843991567111735, |
| "grad_norm": 0.5881491100254291, |
| "learning_rate": 9.882242036881214e-06, |
| "loss": 0.227, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.13914265635980325, |
| "grad_norm": 0.5360647056862757, |
| "learning_rate": 9.881048265228402e-06, |
| "loss": 0.1927, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.1398453970484891, |
| "grad_norm": 0.5700218496598528, |
| "learning_rate": 9.879848545958348e-06, |
| "loss": 0.2414, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.14054813773717498, |
| "grad_norm": 0.49314994278279367, |
| "learning_rate": 9.878642880532923e-06, |
| "loss": 0.1615, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.14125087842586087, |
| "grad_norm": 0.5587419946990672, |
| "learning_rate": 9.877431270421248e-06, |
| "loss": 0.2154, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.14195361911454674, |
| "grad_norm": 0.5369484921567617, |
| "learning_rate": 9.876213717099678e-06, |
| "loss": 0.1901, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.1426563598032326, |
| "grad_norm": 0.4973260815445628, |
| "learning_rate": 9.874990222051824e-06, |
| "loss": 0.1963, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.1433591004919185, |
| "grad_norm": 0.5261029662772938, |
| "learning_rate": 9.873760786768524e-06, |
| "loss": 0.1963, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.14406184118060436, |
| "grad_norm": 0.5329267697253043, |
| "learning_rate": 9.872525412747865e-06, |
| "loss": 0.1981, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.14476458186929023, |
| "grad_norm": 0.5246233049215846, |
| "learning_rate": 9.87128410149516e-06, |
| "loss": 0.2045, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.14546732255797612, |
| "grad_norm": 0.552740038183233, |
| "learning_rate": 9.870036854522967e-06, |
| "loss": 0.236, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.14617006324666199, |
| "grad_norm": 0.5250297115428508, |
| "learning_rate": 9.868783673351069e-06, |
| "loss": 0.177, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.14687280393534785, |
| "grad_norm": 0.5359417007107535, |
| "learning_rate": 9.867524559506484e-06, |
| "loss": 0.195, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.14757554462403374, |
| "grad_norm": 0.5466980070361454, |
| "learning_rate": 9.866259514523456e-06, |
| "loss": 0.1914, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1482782853127196, |
| "grad_norm": 0.5250685884809476, |
| "learning_rate": 9.86498853994346e-06, |
| "loss": 0.1866, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.14898102600140548, |
| "grad_norm": 0.5346721147028544, |
| "learning_rate": 9.863711637315193e-06, |
| "loss": 0.2116, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.14968376669009137, |
| "grad_norm": 0.5120753920923115, |
| "learning_rate": 9.862428808194575e-06, |
| "loss": 0.1765, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.15038650737877723, |
| "grad_norm": 0.5203505661834146, |
| "learning_rate": 9.86114005414475e-06, |
| "loss": 0.2017, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1510892480674631, |
| "grad_norm": 0.5135432969565993, |
| "learning_rate": 9.859845376736084e-06, |
| "loss": 0.1965, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.151791988756149, |
| "grad_norm": 0.5242400487450163, |
| "learning_rate": 9.858544777546153e-06, |
| "loss": 0.1885, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.15249472944483486, |
| "grad_norm": 0.5037441921440698, |
| "learning_rate": 9.857238258159755e-06, |
| "loss": 0.1835, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.15319747013352072, |
| "grad_norm": 0.5250153350576638, |
| "learning_rate": 9.8559258201689e-06, |
| "loss": 0.1859, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.15390021082220662, |
| "grad_norm": 0.5829455871447047, |
| "learning_rate": 9.854607465172808e-06, |
| "loss": 0.1907, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.15460295151089248, |
| "grad_norm": 0.5498786288501906, |
| "learning_rate": 9.853283194777913e-06, |
| "loss": 0.2158, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.15530569219957835, |
| "grad_norm": 0.5692564261688635, |
| "learning_rate": 9.851953010597854e-06, |
| "loss": 0.21, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.15600843288826424, |
| "grad_norm": 0.5050049560317424, |
| "learning_rate": 9.850616914253476e-06, |
| "loss": 0.1915, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.1567111735769501, |
| "grad_norm": 0.5481805472229878, |
| "learning_rate": 9.84927490737283e-06, |
| "loss": 0.2051, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.15741391426563597, |
| "grad_norm": 0.4932418454519954, |
| "learning_rate": 9.847926991591165e-06, |
| "loss": 0.1876, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.15811665495432187, |
| "grad_norm": 0.5096279574460413, |
| "learning_rate": 9.846573168550936e-06, |
| "loss": 0.2002, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.15881939564300773, |
| "grad_norm": 0.5199894898915933, |
| "learning_rate": 9.845213439901795e-06, |
| "loss": 0.1981, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.1595221363316936, |
| "grad_norm": 0.5155058717704711, |
| "learning_rate": 9.843847807300582e-06, |
| "loss": 0.1869, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.1602248770203795, |
| "grad_norm": 0.5451860630048405, |
| "learning_rate": 9.842476272411343e-06, |
| "loss": 0.1785, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.16092761770906536, |
| "grad_norm": 0.5572085114446077, |
| "learning_rate": 9.841098836905306e-06, |
| "loss": 0.2063, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.16163035839775122, |
| "grad_norm": 0.5464205485143655, |
| "learning_rate": 9.839715502460894e-06, |
| "loss": 0.2066, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.16233309908643712, |
| "grad_norm": 0.5496024905732382, |
| "learning_rate": 9.838326270763717e-06, |
| "loss": 0.2072, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.16303583977512298, |
| "grad_norm": 0.5244444353434804, |
| "learning_rate": 9.836931143506572e-06, |
| "loss": 0.1952, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.16373858046380885, |
| "grad_norm": 0.5462213747463449, |
| "learning_rate": 9.835530122389439e-06, |
| "loss": 0.2054, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.16444132115249474, |
| "grad_norm": 0.5210865544015293, |
| "learning_rate": 9.834123209119478e-06, |
| "loss": 0.1965, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.1651440618411806, |
| "grad_norm": 0.5414620686814897, |
| "learning_rate": 9.83271040541103e-06, |
| "loss": 0.2109, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.16584680252986647, |
| "grad_norm": 0.5369198973702732, |
| "learning_rate": 9.831291712985613e-06, |
| "loss": 0.1934, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.16654954321855236, |
| "grad_norm": 0.511788601277114, |
| "learning_rate": 9.829867133571924e-06, |
| "loss": 0.18, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.16725228390723823, |
| "grad_norm": 0.5121987915269581, |
| "learning_rate": 9.828436668905829e-06, |
| "loss": 0.1676, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.1679550245959241, |
| "grad_norm": 0.5188342768801897, |
| "learning_rate": 9.827000320730366e-06, |
| "loss": 0.191, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.16865776528461, |
| "grad_norm": 0.5626887582456082, |
| "learning_rate": 9.825558090795747e-06, |
| "loss": 0.2233, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.16936050597329586, |
| "grad_norm": 0.5343729872311432, |
| "learning_rate": 9.82410998085934e-06, |
| "loss": 0.2045, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.17006324666198172, |
| "grad_norm": 0.5634331433740161, |
| "learning_rate": 9.822655992685687e-06, |
| "loss": 0.2094, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.17076598735066761, |
| "grad_norm": 0.5269987943660347, |
| "learning_rate": 9.821196128046493e-06, |
| "loss": 0.2031, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.17146872803935348, |
| "grad_norm": 0.5116594387967673, |
| "learning_rate": 9.819730388720622e-06, |
| "loss": 0.1867, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.17217146872803935, |
| "grad_norm": 0.531716345555225, |
| "learning_rate": 9.818258776494089e-06, |
| "loss": 0.2053, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.17287420941672524, |
| "grad_norm": 0.5300008699510442, |
| "learning_rate": 9.816781293160079e-06, |
| "loss": 0.2001, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.1735769501054111, |
| "grad_norm": 0.5727507016739112, |
| "learning_rate": 9.815297940518917e-06, |
| "loss": 0.2032, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.17427969079409697, |
| "grad_norm": 0.511482182034896, |
| "learning_rate": 9.81380872037809e-06, |
| "loss": 0.1776, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.17498243148278286, |
| "grad_norm": 0.5223485030118387, |
| "learning_rate": 9.812313634552233e-06, |
| "loss": 0.2095, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.17568517217146873, |
| "grad_norm": 0.5274459252576597, |
| "learning_rate": 9.810812684863123e-06, |
| "loss": 0.1865, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1763879128601546, |
| "grad_norm": 0.5387287518026215, |
| "learning_rate": 9.809305873139685e-06, |
| "loss": 0.1896, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.1770906535488405, |
| "grad_norm": 0.5321583220265408, |
| "learning_rate": 9.80779320121799e-06, |
| "loss": 0.1803, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.17779339423752635, |
| "grad_norm": 0.5311044503887951, |
| "learning_rate": 9.806274670941247e-06, |
| "loss": 0.21, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.17849613492621222, |
| "grad_norm": 0.5238717900947666, |
| "learning_rate": 9.804750284159802e-06, |
| "loss": 0.1909, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.1791988756148981, |
| "grad_norm": 0.5687996235127285, |
| "learning_rate": 9.803220042731143e-06, |
| "loss": 0.2234, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.17990161630358398, |
| "grad_norm": 0.5574469376886612, |
| "learning_rate": 9.801683948519885e-06, |
| "loss": 0.2173, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.18060435699226984, |
| "grad_norm": 0.5180438084466581, |
| "learning_rate": 9.800142003397774e-06, |
| "loss": 0.1732, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.18130709768095574, |
| "grad_norm": 0.5310855661683267, |
| "learning_rate": 9.798594209243697e-06, |
| "loss": 0.202, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1820098383696416, |
| "grad_norm": 0.5386589341794008, |
| "learning_rate": 9.797040567943654e-06, |
| "loss": 0.2023, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.18271257905832747, |
| "grad_norm": 0.5118955272519512, |
| "learning_rate": 9.79548108139078e-06, |
| "loss": 0.1876, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.18341531974701336, |
| "grad_norm": 0.5472298366884414, |
| "learning_rate": 9.793915751485326e-06, |
| "loss": 0.2072, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.18411806043569923, |
| "grad_norm": 0.5512321528105381, |
| "learning_rate": 9.792344580134664e-06, |
| "loss": 0.2103, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.1848208011243851, |
| "grad_norm": 0.5329543717616166, |
| "learning_rate": 9.790767569253292e-06, |
| "loss": 0.1933, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.18552354181307099, |
| "grad_norm": 0.5164467362087247, |
| "learning_rate": 9.78918472076281e-06, |
| "loss": 0.1925, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.18622628250175685, |
| "grad_norm": 0.54505710084966, |
| "learning_rate": 9.787596036591944e-06, |
| "loss": 0.2334, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.18692902319044272, |
| "grad_norm": 0.5615688025483314, |
| "learning_rate": 9.78600151867652e-06, |
| "loss": 0.2412, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.1876317638791286, |
| "grad_norm": 0.5680664305453649, |
| "learning_rate": 9.784401168959482e-06, |
| "loss": 0.2042, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.18833450456781448, |
| "grad_norm": 0.5156312242676864, |
| "learning_rate": 9.782794989390874e-06, |
| "loss": 0.1826, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.18903724525650034, |
| "grad_norm": 0.5398485094700511, |
| "learning_rate": 9.781182981927843e-06, |
| "loss": 0.1988, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.18973998594518623, |
| "grad_norm": 0.5622582389734884, |
| "learning_rate": 9.779565148534645e-06, |
| "loss": 0.22, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.1904427266338721, |
| "grad_norm": 0.49531208082546646, |
| "learning_rate": 9.777941491182628e-06, |
| "loss": 0.1807, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.19114546732255797, |
| "grad_norm": 0.5553653639761128, |
| "learning_rate": 9.776312011850236e-06, |
| "loss": 0.2233, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.19184820801124386, |
| "grad_norm": 0.5905131477395442, |
| "learning_rate": 9.774676712523013e-06, |
| "loss": 0.2026, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.19255094869992972, |
| "grad_norm": 0.5757065193118327, |
| "learning_rate": 9.773035595193588e-06, |
| "loss": 0.213, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1932536893886156, |
| "grad_norm": 0.5224920946455179, |
| "learning_rate": 9.771388661861684e-06, |
| "loss": 0.1968, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.19395643007730148, |
| "grad_norm": 0.5623112222215833, |
| "learning_rate": 9.76973591453411e-06, |
| "loss": 0.229, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.19465917076598735, |
| "grad_norm": 0.5629423566915981, |
| "learning_rate": 9.768077355224758e-06, |
| "loss": 0.2081, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.19536191145467321, |
| "grad_norm": 0.5183782780342989, |
| "learning_rate": 9.766412985954605e-06, |
| "loss": 0.1769, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.1960646521433591, |
| "grad_norm": 0.5212145167380839, |
| "learning_rate": 9.764742808751705e-06, |
| "loss": 0.1828, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.19676739283204497, |
| "grad_norm": 0.5176379746169677, |
| "learning_rate": 9.763066825651186e-06, |
| "loss": 0.1819, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.19747013352073084, |
| "grad_norm": 0.5219120130877213, |
| "learning_rate": 9.761385038695257e-06, |
| "loss": 0.181, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.19817287420941673, |
| "grad_norm": 0.497045902364289, |
| "learning_rate": 9.759697449933194e-06, |
| "loss": 0.1734, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.1988756148981026, |
| "grad_norm": 0.5190482550474241, |
| "learning_rate": 9.758004061421347e-06, |
| "loss": 0.1865, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.19957835558678846, |
| "grad_norm": 0.5842551279450745, |
| "learning_rate": 9.75630487522313e-06, |
| "loss": 0.2203, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.20028109627547436, |
| "grad_norm": 0.5551403137801947, |
| "learning_rate": 9.754599893409023e-06, |
| "loss": 0.2179, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.20098383696416022, |
| "grad_norm": 0.524413493015236, |
| "learning_rate": 9.752889118056565e-06, |
| "loss": 0.1866, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.2016865776528461, |
| "grad_norm": 0.5786394750468196, |
| "learning_rate": 9.75117255125036e-06, |
| "loss": 0.2337, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.20238931834153198, |
| "grad_norm": 0.5485405276227957, |
| "learning_rate": 9.749450195082059e-06, |
| "loss": 0.2059, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.20309205903021785, |
| "grad_norm": 0.5252768606055862, |
| "learning_rate": 9.747722051650384e-06, |
| "loss": 0.2058, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.2037947997189037, |
| "grad_norm": 0.49209713692252655, |
| "learning_rate": 9.74598812306109e-06, |
| "loss": 0.1736, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.2044975404075896, |
| "grad_norm": 0.5265702195074639, |
| "learning_rate": 9.744248411426995e-06, |
| "loss": 0.2066, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.20520028109627547, |
| "grad_norm": 0.5083836068529662, |
| "learning_rate": 9.742502918867959e-06, |
| "loss": 0.1889, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.20590302178496134, |
| "grad_norm": 0.5294998246539774, |
| "learning_rate": 9.740751647510887e-06, |
| "loss": 0.205, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.20660576247364723, |
| "grad_norm": 0.5477460583347653, |
| "learning_rate": 9.73899459948972e-06, |
| "loss": 0.2022, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.2073085031623331, |
| "grad_norm": 0.5237490738163196, |
| "learning_rate": 9.737231776945445e-06, |
| "loss": 0.2044, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.20801124385101896, |
| "grad_norm": 0.5377161539099496, |
| "learning_rate": 9.735463182026085e-06, |
| "loss": 0.1951, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.20871398453970486, |
| "grad_norm": 0.4953170972218495, |
| "learning_rate": 9.733688816886692e-06, |
| "loss": 0.1732, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.20941672522839072, |
| "grad_norm": 0.5572093168778736, |
| "learning_rate": 9.731908683689355e-06, |
| "loss": 0.1903, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.2101194659170766, |
| "grad_norm": 0.5481859373130906, |
| "learning_rate": 9.730122784603184e-06, |
| "loss": 0.1922, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.21082220660576248, |
| "grad_norm": 0.5505813737799056, |
| "learning_rate": 9.728331121804322e-06, |
| "loss": 0.2135, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.21152494729444835, |
| "grad_norm": 0.5183424768074485, |
| "learning_rate": 9.726533697475929e-06, |
| "loss": 0.1735, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.2122276879831342, |
| "grad_norm": 0.5449744005434142, |
| "learning_rate": 9.724730513808191e-06, |
| "loss": 0.1989, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.2129304286718201, |
| "grad_norm": 0.5263283262096657, |
| "learning_rate": 9.722921572998311e-06, |
| "loss": 0.1982, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.21363316936050597, |
| "grad_norm": 0.555778893603175, |
| "learning_rate": 9.721106877250501e-06, |
| "loss": 0.2249, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.21433591004919184, |
| "grad_norm": 0.5488655032726547, |
| "learning_rate": 9.719286428775995e-06, |
| "loss": 0.2025, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.21503865073787773, |
| "grad_norm": 0.508188510072533, |
| "learning_rate": 9.717460229793027e-06, |
| "loss": 0.204, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.2157413914265636, |
| "grad_norm": 0.5161364072590501, |
| "learning_rate": 9.715628282526847e-06, |
| "loss": 0.1879, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.21644413211524946, |
| "grad_norm": 0.5238141029252569, |
| "learning_rate": 9.713790589209704e-06, |
| "loss": 0.1895, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.21714687280393535, |
| "grad_norm": 0.5342330104422577, |
| "learning_rate": 9.71194715208085e-06, |
| "loss": 0.2221, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.21784961349262122, |
| "grad_norm": 0.5259125390406799, |
| "learning_rate": 9.710097973386531e-06, |
| "loss": 0.1821, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.21855235418130708, |
| "grad_norm": 0.5063736175509841, |
| "learning_rate": 9.708243055380002e-06, |
| "loss": 0.1857, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.21925509486999298, |
| "grad_norm": 0.5192947497176749, |
| "learning_rate": 9.7063824003215e-06, |
| "loss": 0.189, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.21995783555867884, |
| "grad_norm": 0.5284361182139489, |
| "learning_rate": 9.704516010478254e-06, |
| "loss": 0.1896, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.2206605762473647, |
| "grad_norm": 0.5471475403815499, |
| "learning_rate": 9.702643888124484e-06, |
| "loss": 0.2096, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.2213633169360506, |
| "grad_norm": 0.5105524910165029, |
| "learning_rate": 9.700766035541396e-06, |
| "loss": 0.1889, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.22206605762473647, |
| "grad_norm": 0.5187072895311851, |
| "learning_rate": 9.698882455017175e-06, |
| "loss": 0.1996, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.22276879831342233, |
| "grad_norm": 0.5485833610090907, |
| "learning_rate": 9.696993148846985e-06, |
| "loss": 0.1753, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.22347153900210823, |
| "grad_norm": 0.5477845615559535, |
| "learning_rate": 9.695098119332972e-06, |
| "loss": 0.2167, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.2241742796907941, |
| "grad_norm": 0.5308904052089458, |
| "learning_rate": 9.693197368784253e-06, |
| "loss": 0.1973, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.22487702037947996, |
| "grad_norm": 0.5258594332013726, |
| "learning_rate": 9.691290899516912e-06, |
| "loss": 0.205, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.22557976106816585, |
| "grad_norm": 0.5931484026375198, |
| "learning_rate": 9.68937871385401e-06, |
| "loss": 0.2379, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.22628250175685172, |
| "grad_norm": 0.5421251288874882, |
| "learning_rate": 9.687460814125564e-06, |
| "loss": 0.2078, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.22698524244553758, |
| "grad_norm": 0.5432947071522846, |
| "learning_rate": 9.685537202668562e-06, |
| "loss": 0.2188, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.22768798313422348, |
| "grad_norm": 0.5428655853501718, |
| "learning_rate": 9.683607881826946e-06, |
| "loss": 0.2178, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.22839072382290934, |
| "grad_norm": 0.599957858627133, |
| "learning_rate": 9.68167285395162e-06, |
| "loss": 0.1941, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.22909346451159524, |
| "grad_norm": 0.5163974418996417, |
| "learning_rate": 9.679732121400435e-06, |
| "loss": 0.178, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.2297962052002811, |
| "grad_norm": 0.547411245613051, |
| "learning_rate": 9.677785686538201e-06, |
| "loss": 0.1942, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.23049894588896697, |
| "grad_norm": 0.539505049320006, |
| "learning_rate": 9.67583355173667e-06, |
| "loss": 0.1971, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.23120168657765286, |
| "grad_norm": 0.4943662915871163, |
| "learning_rate": 9.673875719374546e-06, |
| "loss": 0.1813, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.23190442726633873, |
| "grad_norm": 0.49792991102742756, |
| "learning_rate": 9.671912191837468e-06, |
| "loss": 0.1767, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.2326071679550246, |
| "grad_norm": 0.5955250059704561, |
| "learning_rate": 9.669942971518019e-06, |
| "loss": 0.2072, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.23330990864371048, |
| "grad_norm": 0.5356463210185225, |
| "learning_rate": 9.667968060815721e-06, |
| "loss": 0.1983, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.23401264933239635, |
| "grad_norm": 0.5400030207757831, |
| "learning_rate": 9.665987462137024e-06, |
| "loss": 0.182, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.23471539002108222, |
| "grad_norm": 0.5879893453341771, |
| "learning_rate": 9.664001177895312e-06, |
| "loss": 0.2258, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.2354181307097681, |
| "grad_norm": 0.5410874019193243, |
| "learning_rate": 9.662009210510897e-06, |
| "loss": 0.2141, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.23612087139845397, |
| "grad_norm": 0.5448240164751721, |
| "learning_rate": 9.660011562411018e-06, |
| "loss": 0.1632, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.23682361208713984, |
| "grad_norm": 0.5532123186835779, |
| "learning_rate": 9.658008236029832e-06, |
| "loss": 0.2092, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.23752635277582573, |
| "grad_norm": 0.500391469524619, |
| "learning_rate": 9.655999233808415e-06, |
| "loss": 0.1915, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.2382290934645116, |
| "grad_norm": 0.546807248500238, |
| "learning_rate": 9.653984558194764e-06, |
| "loss": 0.2063, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.23893183415319746, |
| "grad_norm": 0.5126701401194782, |
| "learning_rate": 9.651964211643784e-06, |
| "loss": 0.2002, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.23963457484188336, |
| "grad_norm": 0.5275409703143071, |
| "learning_rate": 9.649938196617292e-06, |
| "loss": 0.1885, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.24033731553056922, |
| "grad_norm": 0.56088723480931, |
| "learning_rate": 9.647906515584014e-06, |
| "loss": 0.2023, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.2410400562192551, |
| "grad_norm": 0.5028945023569494, |
| "learning_rate": 9.645869171019578e-06, |
| "loss": 0.1917, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.24174279690794098, |
| "grad_norm": 0.5481203732285579, |
| "learning_rate": 9.643826165406512e-06, |
| "loss": 0.2109, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.24244553759662685, |
| "grad_norm": 0.5119601341263857, |
| "learning_rate": 9.641777501234242e-06, |
| "loss": 0.1767, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.2431482782853127, |
| "grad_norm": 0.5455063911316448, |
| "learning_rate": 9.639723180999094e-06, |
| "loss": 0.1823, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.2438510189739986, |
| "grad_norm": 0.5436455570742348, |
| "learning_rate": 9.637663207204279e-06, |
| "loss": 0.206, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.24455375966268447, |
| "grad_norm": 0.5469291865780584, |
| "learning_rate": 9.635597582359905e-06, |
| "loss": 0.2156, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.24525650035137034, |
| "grad_norm": 0.5349192232174842, |
| "learning_rate": 9.633526308982957e-06, |
| "loss": 0.1971, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.24595924104005623, |
| "grad_norm": 0.5412118455088122, |
| "learning_rate": 9.631449389597307e-06, |
| "loss": 0.1988, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2466619817287421, |
| "grad_norm": 0.5076227227961279, |
| "learning_rate": 9.629366826733711e-06, |
| "loss": 0.1953, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.24736472241742796, |
| "grad_norm": 0.581444320826964, |
| "learning_rate": 9.627278622929791e-06, |
| "loss": 0.2648, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.24806746310611386, |
| "grad_norm": 0.5271444953779912, |
| "learning_rate": 9.625184780730058e-06, |
| "loss": 0.1892, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.24877020379479972, |
| "grad_norm": 0.49390242477879215, |
| "learning_rate": 9.623085302685875e-06, |
| "loss": 0.1697, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.2494729444834856, |
| "grad_norm": 0.5253207337911392, |
| "learning_rate": 9.620980191355487e-06, |
| "loss": 0.1848, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.2501756851721715, |
| "grad_norm": 0.531312442220435, |
| "learning_rate": 9.618869449303996e-06, |
| "loss": 0.2034, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.25087842586085735, |
| "grad_norm": 0.5224940697851705, |
| "learning_rate": 9.616753079103367e-06, |
| "loss": 0.1832, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.2515811665495432, |
| "grad_norm": 0.5273873773882911, |
| "learning_rate": 9.614631083332427e-06, |
| "loss": 0.204, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.2522839072382291, |
| "grad_norm": 0.4903812652752979, |
| "learning_rate": 9.61250346457685e-06, |
| "loss": 0.1667, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.25298664792691494, |
| "grad_norm": 0.5207234544378162, |
| "learning_rate": 9.610370225429164e-06, |
| "loss": 0.1627, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.25368938861560086, |
| "grad_norm": 0.5186116034731955, |
| "learning_rate": 9.608231368488752e-06, |
| "loss": 0.1853, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.25439212930428673, |
| "grad_norm": 0.5170078110207293, |
| "learning_rate": 9.606086896361835e-06, |
| "loss": 0.183, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.2550948699929726, |
| "grad_norm": 0.6083568217795984, |
| "learning_rate": 9.603936811661478e-06, |
| "loss": 0.1948, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.25579761068165846, |
| "grad_norm": 0.5980210849151338, |
| "learning_rate": 9.601781117007586e-06, |
| "loss": 0.2054, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.2565003513703443, |
| "grad_norm": 0.5522680255382288, |
| "learning_rate": 9.5996198150269e-06, |
| "loss": 0.2202, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.2572030920590302, |
| "grad_norm": 0.48493716821204863, |
| "learning_rate": 9.597452908352994e-06, |
| "loss": 0.1685, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.2579058327477161, |
| "grad_norm": 0.5339213828651493, |
| "learning_rate": 9.595280399626267e-06, |
| "loss": 0.1893, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.258608573436402, |
| "grad_norm": 0.5292483062543865, |
| "learning_rate": 9.59310229149395e-06, |
| "loss": 0.206, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.25931131412508784, |
| "grad_norm": 0.5548812323378177, |
| "learning_rate": 9.590918586610094e-06, |
| "loss": 0.1997, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.2600140548137737, |
| "grad_norm": 0.5332802460465896, |
| "learning_rate": 9.588729287635571e-06, |
| "loss": 0.1792, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2607167955024596, |
| "grad_norm": 0.4956910808361609, |
| "learning_rate": 9.586534397238068e-06, |
| "loss": 0.1723, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.26141953619114544, |
| "grad_norm": 0.5517713285660699, |
| "learning_rate": 9.584333918092085e-06, |
| "loss": 0.1939, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.26212227687983136, |
| "grad_norm": 0.5833231321170365, |
| "learning_rate": 9.582127852878935e-06, |
| "loss": 0.1896, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.2628250175685172, |
| "grad_norm": 0.5874594178160686, |
| "learning_rate": 9.579916204286734e-06, |
| "loss": 0.212, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.2635277582572031, |
| "grad_norm": 0.5064635499958178, |
| "learning_rate": 9.577698975010402e-06, |
| "loss": 0.1573, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.26423049894588896, |
| "grad_norm": 0.556733353232176, |
| "learning_rate": 9.575476167751663e-06, |
| "loss": 0.2104, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.2649332396345748, |
| "grad_norm": 0.5068053458771671, |
| "learning_rate": 9.573247785219033e-06, |
| "loss": 0.1733, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.2656359803232607, |
| "grad_norm": 0.5497150779026637, |
| "learning_rate": 9.571013830127822e-06, |
| "loss": 0.2038, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.2663387210119466, |
| "grad_norm": 0.5722948262491019, |
| "learning_rate": 9.568774305200134e-06, |
| "loss": 0.2334, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.2670414617006325, |
| "grad_norm": 0.5339942012379263, |
| "learning_rate": 9.566529213164859e-06, |
| "loss": 0.1951, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.26774420238931834, |
| "grad_norm": 0.5460642679137601, |
| "learning_rate": 9.564278556757667e-06, |
| "loss": 0.2091, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.2684469430780042, |
| "grad_norm": 0.534776590338903, |
| "learning_rate": 9.56202233872101e-06, |
| "loss": 0.2114, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.2691496837666901, |
| "grad_norm": 0.5513749037432834, |
| "learning_rate": 9.559760561804118e-06, |
| "loss": 0.2183, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.26985242445537594, |
| "grad_norm": 0.5639565013105937, |
| "learning_rate": 9.557493228762995e-06, |
| "loss": 0.2147, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.27055516514406186, |
| "grad_norm": 0.5443366795904921, |
| "learning_rate": 9.555220342360412e-06, |
| "loss": 0.2114, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.2712579058327477, |
| "grad_norm": 0.5298303349972439, |
| "learning_rate": 9.552941905365911e-06, |
| "loss": 0.1829, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.2719606465214336, |
| "grad_norm": 0.5680275927151393, |
| "learning_rate": 9.550657920555794e-06, |
| "loss": 0.2319, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.27266338721011946, |
| "grad_norm": 0.5336137799727587, |
| "learning_rate": 9.548368390713126e-06, |
| "loss": 0.214, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.2733661278988053, |
| "grad_norm": 0.5074047669128428, |
| "learning_rate": 9.546073318627726e-06, |
| "loss": 0.172, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.27406886858749124, |
| "grad_norm": 0.5347261724203398, |
| "learning_rate": 9.543772707096169e-06, |
| "loss": 0.2168, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2747716092761771, |
| "grad_norm": 0.5107301810364906, |
| "learning_rate": 9.541466558921777e-06, |
| "loss": 0.1819, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.275474349964863, |
| "grad_norm": 0.5368258971699591, |
| "learning_rate": 9.53915487691462e-06, |
| "loss": 0.1791, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.27617709065354884, |
| "grad_norm": 0.5194259665889182, |
| "learning_rate": 9.536837663891511e-06, |
| "loss": 0.1967, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.2768798313422347, |
| "grad_norm": 0.5211561192261743, |
| "learning_rate": 9.534514922676003e-06, |
| "loss": 0.2022, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.27758257203092057, |
| "grad_norm": 0.5248500327348706, |
| "learning_rate": 9.532186656098384e-06, |
| "loss": 0.1912, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.2782853127196065, |
| "grad_norm": 0.5513383222078235, |
| "learning_rate": 9.529852866995676e-06, |
| "loss": 0.2188, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.27898805340829236, |
| "grad_norm": 0.478487035975437, |
| "learning_rate": 9.52751355821163e-06, |
| "loss": 0.1652, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.2796907940969782, |
| "grad_norm": 0.5525603762782381, |
| "learning_rate": 9.525168732596722e-06, |
| "loss": 0.2114, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.2803935347856641, |
| "grad_norm": 0.5232596487449998, |
| "learning_rate": 9.522818393008148e-06, |
| "loss": 0.1987, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.28109627547434995, |
| "grad_norm": 0.5145451382660522, |
| "learning_rate": 9.520462542309832e-06, |
| "loss": 0.2027, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2817990161630358, |
| "grad_norm": 0.5452391125340568, |
| "learning_rate": 9.518101183372402e-06, |
| "loss": 0.2094, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.28250175685172174, |
| "grad_norm": 0.5320164325853812, |
| "learning_rate": 9.515734319073204e-06, |
| "loss": 0.1851, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.2832044975404076, |
| "grad_norm": 0.524402173661058, |
| "learning_rate": 9.51336195229629e-06, |
| "loss": 0.2004, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.2839072382290935, |
| "grad_norm": 0.5265133901839926, |
| "learning_rate": 9.510984085932421e-06, |
| "loss": 0.1955, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.28460997891777934, |
| "grad_norm": 0.514065778497817, |
| "learning_rate": 9.508600722879055e-06, |
| "loss": 0.1871, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.2853127196064652, |
| "grad_norm": 0.5575578059566517, |
| "learning_rate": 9.50621186604035e-06, |
| "loss": 0.2344, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.28601546029515107, |
| "grad_norm": 0.5326375778389936, |
| "learning_rate": 9.503817518327157e-06, |
| "loss": 0.1864, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.286718200983837, |
| "grad_norm": 0.5235959853643891, |
| "learning_rate": 9.501417682657015e-06, |
| "loss": 0.1807, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.28742094167252286, |
| "grad_norm": 0.5801383878544786, |
| "learning_rate": 9.499012361954156e-06, |
| "loss": 0.2237, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.2881236823612087, |
| "grad_norm": 0.5530514175655553, |
| "learning_rate": 9.496601559149494e-06, |
| "loss": 0.2279, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2888264230498946, |
| "grad_norm": 0.5512139697013972, |
| "learning_rate": 9.494185277180619e-06, |
| "loss": 0.2175, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.28952916373858045, |
| "grad_norm": 0.5164819220405377, |
| "learning_rate": 9.491763518991803e-06, |
| "loss": 0.1875, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.2902319044272663, |
| "grad_norm": 0.5386437078889298, |
| "learning_rate": 9.489336287533985e-06, |
| "loss": 0.1998, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.29093464511595224, |
| "grad_norm": 0.5236451663214202, |
| "learning_rate": 9.486903585764778e-06, |
| "loss": 0.1985, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.2916373858046381, |
| "grad_norm": 0.4867428696576212, |
| "learning_rate": 9.48446541664846e-06, |
| "loss": 0.165, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.29234012649332397, |
| "grad_norm": 0.5232948042295151, |
| "learning_rate": 9.482021783155971e-06, |
| "loss": 0.2087, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.29304286718200984, |
| "grad_norm": 0.46481489245879515, |
| "learning_rate": 9.479572688264902e-06, |
| "loss": 0.1562, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.2937456078706957, |
| "grad_norm": 0.5361695405334741, |
| "learning_rate": 9.477118134959513e-06, |
| "loss": 0.2044, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.29444834855938157, |
| "grad_norm": 0.5142493311144378, |
| "learning_rate": 9.474658126230702e-06, |
| "loss": 0.2029, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.2951510892480675, |
| "grad_norm": 0.5377969872183673, |
| "learning_rate": 9.472192665076023e-06, |
| "loss": 0.2086, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.29585382993675335, |
| "grad_norm": 0.5211562808056464, |
| "learning_rate": 9.46972175449967e-06, |
| "loss": 0.1747, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.2965565706254392, |
| "grad_norm": 0.5063681882231872, |
| "learning_rate": 9.467245397512475e-06, |
| "loss": 0.1918, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.2972593113141251, |
| "grad_norm": 0.5349189194767427, |
| "learning_rate": 9.464763597131914e-06, |
| "loss": 0.1693, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.29796205200281095, |
| "grad_norm": 0.5060109119066764, |
| "learning_rate": 9.46227635638209e-06, |
| "loss": 0.1691, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.2986647926914968, |
| "grad_norm": 0.5112413048034882, |
| "learning_rate": 9.459783678293732e-06, |
| "loss": 0.2144, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.29936753338018274, |
| "grad_norm": 0.48287792076678004, |
| "learning_rate": 9.457285565904204e-06, |
| "loss": 0.1611, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.3000702740688686, |
| "grad_norm": 0.54633800687917, |
| "learning_rate": 9.454782022257485e-06, |
| "loss": 0.1926, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.30077301475755447, |
| "grad_norm": 0.4956322086389379, |
| "learning_rate": 9.452273050404173e-06, |
| "loss": 0.1812, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.30147575544624033, |
| "grad_norm": 0.5396954853131288, |
| "learning_rate": 9.449758653401482e-06, |
| "loss": 0.2046, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.3021784961349262, |
| "grad_norm": 0.5196303590332476, |
| "learning_rate": 9.447238834313235e-06, |
| "loss": 0.1871, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.30288123682361207, |
| "grad_norm": 0.5428731896865115, |
| "learning_rate": 9.444713596209863e-06, |
| "loss": 0.2102, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.303583977512298, |
| "grad_norm": 0.5419723366224838, |
| "learning_rate": 9.442182942168398e-06, |
| "loss": 0.2035, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.30428671820098385, |
| "grad_norm": 0.5498847598478627, |
| "learning_rate": 9.439646875272476e-06, |
| "loss": 0.2174, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.3049894588896697, |
| "grad_norm": 0.527950833410309, |
| "learning_rate": 9.437105398612323e-06, |
| "loss": 0.2059, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.3056921995783556, |
| "grad_norm": 0.5305130279989605, |
| "learning_rate": 9.434558515284761e-06, |
| "loss": 0.2063, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.30639494026704145, |
| "grad_norm": 0.5394780225533176, |
| "learning_rate": 9.432006228393198e-06, |
| "loss": 0.1941, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.3070976809557273, |
| "grad_norm": 0.5467103824266542, |
| "learning_rate": 9.429448541047627e-06, |
| "loss": 0.1817, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.30780042164441324, |
| "grad_norm": 0.5190838331472895, |
| "learning_rate": 9.426885456364622e-06, |
| "loss": 0.2078, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.3085031623330991, |
| "grad_norm": 0.5179531242157348, |
| "learning_rate": 9.424316977467332e-06, |
| "loss": 0.1914, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.30920590302178497, |
| "grad_norm": 0.5211061345526468, |
| "learning_rate": 9.42174310748548e-06, |
| "loss": 0.2044, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.30990864371047083, |
| "grad_norm": 0.5001258576515429, |
| "learning_rate": 9.419163849555359e-06, |
| "loss": 0.2019, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.3106113843991567, |
| "grad_norm": 0.558385311192709, |
| "learning_rate": 9.416579206819828e-06, |
| "loss": 0.2271, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.31131412508784256, |
| "grad_norm": 0.5526340473234094, |
| "learning_rate": 9.413989182428303e-06, |
| "loss": 0.2016, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.3120168657765285, |
| "grad_norm": 0.47924620283664776, |
| "learning_rate": 9.411393779536761e-06, |
| "loss": 0.1607, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.31271960646521435, |
| "grad_norm": 0.49526228123251687, |
| "learning_rate": 9.408793001307734e-06, |
| "loss": 0.1871, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.3134223471539002, |
| "grad_norm": 0.5341836166522139, |
| "learning_rate": 9.406186850910301e-06, |
| "loss": 0.1916, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.3141250878425861, |
| "grad_norm": 0.51251185690614, |
| "learning_rate": 9.403575331520089e-06, |
| "loss": 0.1894, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.31482782853127195, |
| "grad_norm": 0.5091841651296144, |
| "learning_rate": 9.400958446319267e-06, |
| "loss": 0.1896, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.3155305692199578, |
| "grad_norm": 0.4946655079357005, |
| "learning_rate": 9.398336198496538e-06, |
| "loss": 0.1755, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.31623330990864373, |
| "grad_norm": 0.5003863682492026, |
| "learning_rate": 9.395708591247148e-06, |
| "loss": 0.1659, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3169360505973296, |
| "grad_norm": 0.5046152034040112, |
| "learning_rate": 9.393075627772865e-06, |
| "loss": 0.1709, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.31763879128601546, |
| "grad_norm": 0.4704925664122499, |
| "learning_rate": 9.39043731128199e-06, |
| "loss": 0.1523, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.31834153197470133, |
| "grad_norm": 0.5369046558983234, |
| "learning_rate": 9.387793644989342e-06, |
| "loss": 0.217, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.3190442726633872, |
| "grad_norm": 0.5340721759585088, |
| "learning_rate": 9.385144632116263e-06, |
| "loss": 0.2148, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.31974701335207306, |
| "grad_norm": 0.543171250126434, |
| "learning_rate": 9.382490275890606e-06, |
| "loss": 0.2079, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.320449754040759, |
| "grad_norm": 0.5217566731039174, |
| "learning_rate": 9.379830579546736e-06, |
| "loss": 0.1975, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.32115249472944485, |
| "grad_norm": 0.526622633076201, |
| "learning_rate": 9.377165546325529e-06, |
| "loss": 0.1912, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.3218552354181307, |
| "grad_norm": 0.5626287070018154, |
| "learning_rate": 9.374495179474356e-06, |
| "loss": 0.2117, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.3225579761068166, |
| "grad_norm": 0.4978836844851344, |
| "learning_rate": 9.371819482247095e-06, |
| "loss": 0.1806, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.32326071679550245, |
| "grad_norm": 0.5430850460420662, |
| "learning_rate": 9.369138457904116e-06, |
| "loss": 0.2099, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.3239634574841883, |
| "grad_norm": 0.540097593922978, |
| "learning_rate": 9.36645210971228e-06, |
| "loss": 0.2025, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.32466619817287423, |
| "grad_norm": 0.5604909326477547, |
| "learning_rate": 9.363760440944933e-06, |
| "loss": 0.191, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.3253689388615601, |
| "grad_norm": 0.5613882972602502, |
| "learning_rate": 9.361063454881909e-06, |
| "loss": 0.2166, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.32607167955024596, |
| "grad_norm": 0.5277972773706446, |
| "learning_rate": 9.358361154809517e-06, |
| "loss": 0.216, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.32677442023893183, |
| "grad_norm": 0.5496623992313513, |
| "learning_rate": 9.355653544020543e-06, |
| "loss": 0.2022, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.3274771609276177, |
| "grad_norm": 0.5349380111318438, |
| "learning_rate": 9.352940625814244e-06, |
| "loss": 0.1948, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.32817990161630356, |
| "grad_norm": 0.5204199184660335, |
| "learning_rate": 9.350222403496348e-06, |
| "loss": 0.2023, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.3288826423049895, |
| "grad_norm": 0.5299224780941074, |
| "learning_rate": 9.347498880379036e-06, |
| "loss": 0.198, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.32958538299367535, |
| "grad_norm": 0.48184730540740855, |
| "learning_rate": 9.344770059780957e-06, |
| "loss": 0.1702, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.3302881236823612, |
| "grad_norm": 0.5342698219695755, |
| "learning_rate": 9.342035945027213e-06, |
| "loss": 0.1939, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3309908643710471, |
| "grad_norm": 0.5534613984953773, |
| "learning_rate": 9.339296539449356e-06, |
| "loss": 0.1853, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.33169360505973294, |
| "grad_norm": 0.5249296127883286, |
| "learning_rate": 9.336551846385386e-06, |
| "loss": 0.1938, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.3323963457484188, |
| "grad_norm": 0.5483291331444264, |
| "learning_rate": 9.333801869179743e-06, |
| "loss": 0.2144, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.33309908643710473, |
| "grad_norm": 0.5505045621076627, |
| "learning_rate": 9.331046611183311e-06, |
| "loss": 0.2044, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.3338018271257906, |
| "grad_norm": 0.545549121039099, |
| "learning_rate": 9.328286075753402e-06, |
| "loss": 0.1791, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.33450456781447646, |
| "grad_norm": 0.5594469699398668, |
| "learning_rate": 9.325520266253769e-06, |
| "loss": 0.2149, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.3352073085031623, |
| "grad_norm": 0.5261973827645597, |
| "learning_rate": 9.322749186054577e-06, |
| "loss": 0.1813, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.3359100491918482, |
| "grad_norm": 0.5510767918170081, |
| "learning_rate": 9.319972838532425e-06, |
| "loss": 0.2099, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.33661278988053406, |
| "grad_norm": 0.5788350977612923, |
| "learning_rate": 9.317191227070327e-06, |
| "loss": 0.2346, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.33731553056922, |
| "grad_norm": 0.5221987371523338, |
| "learning_rate": 9.314404355057708e-06, |
| "loss": 0.1936, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.33801827125790584, |
| "grad_norm": 0.5155335450625089, |
| "learning_rate": 9.311612225890411e-06, |
| "loss": 0.1808, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.3387210119465917, |
| "grad_norm": 0.5326628394952834, |
| "learning_rate": 9.308814842970675e-06, |
| "loss": 0.1693, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.3394237526352776, |
| "grad_norm": 0.5181443109018903, |
| "learning_rate": 9.306012209707145e-06, |
| "loss": 0.2128, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.34012649332396344, |
| "grad_norm": 0.5665392002357718, |
| "learning_rate": 9.303204329514868e-06, |
| "loss": 0.2306, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.3408292340126493, |
| "grad_norm": 0.49013232207237717, |
| "learning_rate": 9.300391205815276e-06, |
| "loss": 0.1947, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.34153197470133523, |
| "grad_norm": 0.5232919096297761, |
| "learning_rate": 9.297572842036199e-06, |
| "loss": 0.187, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.3422347153900211, |
| "grad_norm": 0.5040579327331741, |
| "learning_rate": 9.294749241611845e-06, |
| "loss": 0.1939, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.34293745607870696, |
| "grad_norm": 0.5094561317464454, |
| "learning_rate": 9.291920407982807e-06, |
| "loss": 0.1966, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.3436401967673928, |
| "grad_norm": 0.5151638920794955, |
| "learning_rate": 9.289086344596055e-06, |
| "loss": 0.1829, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.3443429374560787, |
| "grad_norm": 0.548953154810862, |
| "learning_rate": 9.286247054904926e-06, |
| "loss": 0.2057, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.34504567814476456, |
| "grad_norm": 0.504122354329602, |
| "learning_rate": 9.283402542369132e-06, |
| "loss": 0.1703, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.3457484188334505, |
| "grad_norm": 0.5408868358898966, |
| "learning_rate": 9.280552810454745e-06, |
| "loss": 0.2186, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.34645115952213634, |
| "grad_norm": 0.48896282326675716, |
| "learning_rate": 9.277697862634203e-06, |
| "loss": 0.1735, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.3471539002108222, |
| "grad_norm": 0.5282931074682675, |
| "learning_rate": 9.274837702386287e-06, |
| "loss": 0.2181, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.3478566408995081, |
| "grad_norm": 0.5564622571714379, |
| "learning_rate": 9.271972333196145e-06, |
| "loss": 0.2196, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.34855938158819394, |
| "grad_norm": 0.543549952419283, |
| "learning_rate": 9.26910175855526e-06, |
| "loss": 0.2073, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.3492621222768798, |
| "grad_norm": 0.5104148895528936, |
| "learning_rate": 9.266225981961463e-06, |
| "loss": 0.1874, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.3499648629655657, |
| "grad_norm": 0.5251709011006419, |
| "learning_rate": 9.263345006918926e-06, |
| "loss": 0.2071, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.3506676036542516, |
| "grad_norm": 0.5021423425183384, |
| "learning_rate": 9.260458836938148e-06, |
| "loss": 0.1821, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.35137034434293746, |
| "grad_norm": 0.5134047019086833, |
| "learning_rate": 9.257567475535966e-06, |
| "loss": 0.1959, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.35137034434293746, |
| "eval_loss": 0.197735995054245, |
| "eval_runtime": 10.8924, |
| "eval_samples_per_second": 21.116, |
| "eval_steps_per_second": 5.325, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3520730850316233, |
| "grad_norm": 0.5483891410899071, |
| "learning_rate": 9.254670926235538e-06, |
| "loss": 0.2173, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.3527758257203092, |
| "grad_norm": 0.5422389677547926, |
| "learning_rate": 9.251769192566346e-06, |
| "loss": 0.2066, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.35347856640899505, |
| "grad_norm": 0.4955794411352627, |
| "learning_rate": 9.248862278064188e-06, |
| "loss": 0.1846, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.354181307097681, |
| "grad_norm": 0.5297565807446326, |
| "learning_rate": 9.24595018627117e-06, |
| "loss": 0.1887, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.35488404778636684, |
| "grad_norm": 0.5923727821439864, |
| "learning_rate": 9.243032920735719e-06, |
| "loss": 0.244, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.3555867884750527, |
| "grad_norm": 0.5034976230839563, |
| "learning_rate": 9.240110485012557e-06, |
| "loss": 0.1845, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.35628952916373857, |
| "grad_norm": 0.4945164136866163, |
| "learning_rate": 9.237182882662705e-06, |
| "loss": 0.1854, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.35699226985242444, |
| "grad_norm": 0.5131450742064524, |
| "learning_rate": 9.234250117253482e-06, |
| "loss": 0.2018, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.3576950105411103, |
| "grad_norm": 0.5735270387984924, |
| "learning_rate": 9.231312192358504e-06, |
| "loss": 0.2125, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.3583977512297962, |
| "grad_norm": 0.5552995583028044, |
| "learning_rate": 9.228369111557663e-06, |
| "loss": 0.2142, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3591004919184821, |
| "grad_norm": 0.5271596438833579, |
| "learning_rate": 9.22542087843714e-06, |
| "loss": 0.2017, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.35980323260716796, |
| "grad_norm": 0.5371328438167742, |
| "learning_rate": 9.222467496589398e-06, |
| "loss": 0.1976, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.3605059732958538, |
| "grad_norm": 0.5006277197363624, |
| "learning_rate": 9.219508969613164e-06, |
| "loss": 0.1799, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.3612087139845397, |
| "grad_norm": 0.5349029583291667, |
| "learning_rate": 9.21654530111344e-06, |
| "loss": 0.1962, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.36191145467322555, |
| "grad_norm": 0.524522944941706, |
| "learning_rate": 9.213576494701496e-06, |
| "loss": 0.1676, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.3626141953619115, |
| "grad_norm": 0.5329908252072586, |
| "learning_rate": 9.210602553994854e-06, |
| "loss": 0.2256, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.36331693605059734, |
| "grad_norm": 0.5495720248309052, |
| "learning_rate": 9.2076234826173e-06, |
| "loss": 0.1975, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.3640196767392832, |
| "grad_norm": 0.5726123218143253, |
| "learning_rate": 9.204639284198871e-06, |
| "loss": 0.1904, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.36472241742796907, |
| "grad_norm": 0.5345104526063583, |
| "learning_rate": 9.201649962375845e-06, |
| "loss": 0.1804, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.36542515811665494, |
| "grad_norm": 0.5494850097074085, |
| "learning_rate": 9.19865552079075e-06, |
| "loss": 0.1872, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.36612789880534086, |
| "grad_norm": 0.5319764484978339, |
| "learning_rate": 9.195655963092349e-06, |
| "loss": 0.2019, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.3668306394940267, |
| "grad_norm": 0.47753457015015016, |
| "learning_rate": 9.192651292935642e-06, |
| "loss": 0.1707, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.3675333801827126, |
| "grad_norm": 0.5575533755113633, |
| "learning_rate": 9.189641513981854e-06, |
| "loss": 0.2247, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.36823612087139845, |
| "grad_norm": 0.5268585908928828, |
| "learning_rate": 9.186626629898439e-06, |
| "loss": 0.1929, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.3689388615600843, |
| "grad_norm": 0.5187645216202305, |
| "learning_rate": 9.183606644359069e-06, |
| "loss": 0.2046, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.3696416022487702, |
| "grad_norm": 0.5180777894741176, |
| "learning_rate": 9.180581561043633e-06, |
| "loss": 0.1788, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.3703443429374561, |
| "grad_norm": 0.5077472213445343, |
| "learning_rate": 9.177551383638235e-06, |
| "loss": 0.1884, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.37104708362614197, |
| "grad_norm": 0.49435628317139846, |
| "learning_rate": 9.174516115835181e-06, |
| "loss": 0.1713, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.37174982431482784, |
| "grad_norm": 0.5360306241142581, |
| "learning_rate": 9.171475761332985e-06, |
| "loss": 0.1977, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.3724525650035137, |
| "grad_norm": 0.5161474322247328, |
| "learning_rate": 9.168430323836351e-06, |
| "loss": 0.1885, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.37315530569219957, |
| "grad_norm": 0.5076498278434765, |
| "learning_rate": 9.165379807056187e-06, |
| "loss": 0.172, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.37385804638088543, |
| "grad_norm": 0.5281747548967208, |
| "learning_rate": 9.162324214709582e-06, |
| "loss": 0.211, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.37456078706957135, |
| "grad_norm": 0.543230684176949, |
| "learning_rate": 9.159263550519814e-06, |
| "loss": 0.1921, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.3752635277582572, |
| "grad_norm": 0.5084812933944886, |
| "learning_rate": 9.15619781821634e-06, |
| "loss": 0.1841, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.3759662684469431, |
| "grad_norm": 0.5291350492742669, |
| "learning_rate": 9.153127021534792e-06, |
| "loss": 0.2138, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.37666900913562895, |
| "grad_norm": 0.4811297400412177, |
| "learning_rate": 9.150051164216976e-06, |
| "loss": 0.184, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.3773717498243148, |
| "grad_norm": 0.5151432859399669, |
| "learning_rate": 9.146970250010857e-06, |
| "loss": 0.1844, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.3780744905130007, |
| "grad_norm": 0.5399791772673718, |
| "learning_rate": 9.143884282670572e-06, |
| "loss": 0.2047, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.3787772312016866, |
| "grad_norm": 0.5142260800657374, |
| "learning_rate": 9.140793265956405e-06, |
| "loss": 0.1737, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.37947997189037247, |
| "grad_norm": 0.516094797857363, |
| "learning_rate": 9.1376972036348e-06, |
| "loss": 0.2008, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.38018271257905834, |
| "grad_norm": 0.5039266568929068, |
| "learning_rate": 9.13459609947835e-06, |
| "loss": 0.1791, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.3808854532677442, |
| "grad_norm": 0.5331270278668164, |
| "learning_rate": 9.131489957265785e-06, |
| "loss": 0.1796, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.38158819395643007, |
| "grad_norm": 0.530022538198907, |
| "learning_rate": 9.12837878078198e-06, |
| "loss": 0.2087, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.38229093464511593, |
| "grad_norm": 0.49190936568072163, |
| "learning_rate": 9.125262573817937e-06, |
| "loss": 0.1788, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.38299367533380185, |
| "grad_norm": 0.49930721918609494, |
| "learning_rate": 9.122141340170797e-06, |
| "loss": 0.191, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.3836964160224877, |
| "grad_norm": 0.5201647469830472, |
| "learning_rate": 9.119015083643819e-06, |
| "loss": 0.1956, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.3843991567111736, |
| "grad_norm": 0.5114303438446094, |
| "learning_rate": 9.115883808046388e-06, |
| "loss": 0.1813, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.38510189739985945, |
| "grad_norm": 0.5120282022833964, |
| "learning_rate": 9.112747517193998e-06, |
| "loss": 0.1968, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.3858046380885453, |
| "grad_norm": 0.5230390160471704, |
| "learning_rate": 9.10960621490826e-06, |
| "loss": 0.2197, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.3865073787772312, |
| "grad_norm": 0.5079726838327981, |
| "learning_rate": 9.106459905016889e-06, |
| "loss": 0.1902, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3872101194659171, |
| "grad_norm": 0.5035005825294026, |
| "learning_rate": 9.103308591353704e-06, |
| "loss": 0.1952, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.38791286015460297, |
| "grad_norm": 0.516238460175823, |
| "learning_rate": 9.100152277758616e-06, |
| "loss": 0.1855, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.38861560084328883, |
| "grad_norm": 0.5267325979480687, |
| "learning_rate": 9.096990968077632e-06, |
| "loss": 0.2013, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.3893183415319747, |
| "grad_norm": 0.5087934813758571, |
| "learning_rate": 9.093824666162851e-06, |
| "loss": 0.1747, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.39002108222066056, |
| "grad_norm": 0.5016214126441332, |
| "learning_rate": 9.090653375872446e-06, |
| "loss": 0.1749, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.39072382290934643, |
| "grad_norm": 0.5568674230647427, |
| "learning_rate": 9.087477101070676e-06, |
| "loss": 0.1934, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.39142656359803235, |
| "grad_norm": 0.5258018583878791, |
| "learning_rate": 9.08429584562787e-06, |
| "loss": 0.2019, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.3921293042867182, |
| "grad_norm": 0.4839118458012187, |
| "learning_rate": 9.081109613420428e-06, |
| "loss": 0.1882, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.3928320449754041, |
| "grad_norm": 0.537561444315623, |
| "learning_rate": 9.07791840833081e-06, |
| "loss": 0.182, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.39353478566408995, |
| "grad_norm": 0.4880111814877124, |
| "learning_rate": 9.07472223424754e-06, |
| "loss": 0.1779, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.3942375263527758, |
| "grad_norm": 0.47498275665443646, |
| "learning_rate": 9.071521095065198e-06, |
| "loss": 0.1738, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.3949402670414617, |
| "grad_norm": 0.5312325520155257, |
| "learning_rate": 9.068314994684408e-06, |
| "loss": 0.2117, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.3956430077301476, |
| "grad_norm": 0.5301298438603556, |
| "learning_rate": 9.065103937011845e-06, |
| "loss": 0.2019, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.39634574841883347, |
| "grad_norm": 0.5128028520964962, |
| "learning_rate": 9.061887925960219e-06, |
| "loss": 0.1727, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.39704848910751933, |
| "grad_norm": 0.5366622811720921, |
| "learning_rate": 9.058666965448284e-06, |
| "loss": 0.1896, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.3977512297962052, |
| "grad_norm": 0.5117621492230902, |
| "learning_rate": 9.055441059400817e-06, |
| "loss": 0.1978, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.39845397048489106, |
| "grad_norm": 0.4915840023844641, |
| "learning_rate": 9.05221021174862e-06, |
| "loss": 0.1712, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.39915671117357693, |
| "grad_norm": 0.5108306348480334, |
| "learning_rate": 9.048974426428527e-06, |
| "loss": 0.1786, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.39985945186226285, |
| "grad_norm": 0.6042044973210363, |
| "learning_rate": 9.04573370738338e-06, |
| "loss": 0.2518, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.4005621925509487, |
| "grad_norm": 0.4896303170997346, |
| "learning_rate": 9.042488058562036e-06, |
| "loss": 0.1755, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.4012649332396346, |
| "grad_norm": 0.5178900338599037, |
| "learning_rate": 9.039237483919355e-06, |
| "loss": 0.1779, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.40196767392832045, |
| "grad_norm": 0.5035747551640741, |
| "learning_rate": 9.035981987416204e-06, |
| "loss": 0.1826, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.4026704146170063, |
| "grad_norm": 0.4841001561572722, |
| "learning_rate": 9.032721573019445e-06, |
| "loss": 0.1761, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.4033731553056922, |
| "grad_norm": 0.4894780615391314, |
| "learning_rate": 9.029456244701933e-06, |
| "loss": 0.1946, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.4040758959943781, |
| "grad_norm": 0.4931922831677282, |
| "learning_rate": 9.026186006442512e-06, |
| "loss": 0.1709, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.40477863668306396, |
| "grad_norm": 0.5294896422319311, |
| "learning_rate": 9.022910862226005e-06, |
| "loss": 0.2096, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.40548137737174983, |
| "grad_norm": 0.52772562716086, |
| "learning_rate": 9.019630816043218e-06, |
| "loss": 0.1929, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.4061841180604357, |
| "grad_norm": 0.5262259523881948, |
| "learning_rate": 9.016345871890927e-06, |
| "loss": 0.1909, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.40688685874912156, |
| "grad_norm": 0.4958464311424527, |
| "learning_rate": 9.013056033771874e-06, |
| "loss": 0.1829, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.4075895994378074, |
| "grad_norm": 0.49237353080900215, |
| "learning_rate": 9.009761305694771e-06, |
| "loss": 0.1674, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.40829234012649335, |
| "grad_norm": 0.5543604620220995, |
| "learning_rate": 9.006461691674282e-06, |
| "loss": 0.2006, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.4089950808151792, |
| "grad_norm": 0.5570444226404165, |
| "learning_rate": 9.003157195731028e-06, |
| "loss": 0.2088, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.4096978215038651, |
| "grad_norm": 0.5289121008170838, |
| "learning_rate": 8.999847821891578e-06, |
| "loss": 0.1617, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.41040056219255094, |
| "grad_norm": 0.5016680673979437, |
| "learning_rate": 8.996533574188446e-06, |
| "loss": 0.164, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.4111033028812368, |
| "grad_norm": 0.5546393120307688, |
| "learning_rate": 8.99321445666008e-06, |
| "loss": 0.2161, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.4118060435699227, |
| "grad_norm": 0.5212929936467661, |
| "learning_rate": 8.989890473350869e-06, |
| "loss": 0.1761, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.4125087842586086, |
| "grad_norm": 0.547983163784588, |
| "learning_rate": 8.986561628311125e-06, |
| "loss": 0.2059, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.41321152494729446, |
| "grad_norm": 0.5060325503395477, |
| "learning_rate": 8.983227925597089e-06, |
| "loss": 0.1593, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.4139142656359803, |
| "grad_norm": 0.5136669444711566, |
| "learning_rate": 8.979889369270918e-06, |
| "loss": 0.1866, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.4146170063246662, |
| "grad_norm": 0.4796282381622905, |
| "learning_rate": 8.97654596340068e-06, |
| "loss": 0.1658, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.41531974701335206, |
| "grad_norm": 0.4949764849962763, |
| "learning_rate": 8.973197712060362e-06, |
| "loss": 0.1444, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.4160224877020379, |
| "grad_norm": 0.5618864872781972, |
| "learning_rate": 8.969844619329846e-06, |
| "loss": 0.2107, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.41672522839072385, |
| "grad_norm": 0.4998169797231573, |
| "learning_rate": 8.966486689294917e-06, |
| "loss": 0.1839, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.4174279690794097, |
| "grad_norm": 0.5208193294012201, |
| "learning_rate": 8.963123926047256e-06, |
| "loss": 0.1898, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.4181307097680956, |
| "grad_norm": 0.48987306167909656, |
| "learning_rate": 8.959756333684428e-06, |
| "loss": 0.1804, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.41883345045678144, |
| "grad_norm": 0.5429426019102795, |
| "learning_rate": 8.956383916309888e-06, |
| "loss": 0.2057, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.4195361911454673, |
| "grad_norm": 0.5093881979661048, |
| "learning_rate": 8.953006678032964e-06, |
| "loss": 0.1877, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.4202389318341532, |
| "grad_norm": 0.5064829784458187, |
| "learning_rate": 8.94962462296887e-06, |
| "loss": 0.2043, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.4209416725228391, |
| "grad_norm": 0.5358570127635844, |
| "learning_rate": 8.946237755238676e-06, |
| "loss": 0.1894, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.42164441321152496, |
| "grad_norm": 0.49467508377867137, |
| "learning_rate": 8.942846078969323e-06, |
| "loss": 0.1701, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4223471539002108, |
| "grad_norm": 0.5277598519911398, |
| "learning_rate": 8.93944959829361e-06, |
| "loss": 0.2024, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.4230498945888967, |
| "grad_norm": 0.5224316014682783, |
| "learning_rate": 8.93604831735019e-06, |
| "loss": 0.1857, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.42375263527758256, |
| "grad_norm": 0.4899632798167767, |
| "learning_rate": 8.932642240283567e-06, |
| "loss": 0.1685, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.4244553759662684, |
| "grad_norm": 0.5192534676901748, |
| "learning_rate": 8.929231371244087e-06, |
| "loss": 0.1911, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.42515811665495434, |
| "grad_norm": 0.5469056276784454, |
| "learning_rate": 8.925815714387936e-06, |
| "loss": 0.2044, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.4258608573436402, |
| "grad_norm": 0.512069981775501, |
| "learning_rate": 8.922395273877132e-06, |
| "loss": 0.1768, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.4265635980323261, |
| "grad_norm": 0.49391731200550093, |
| "learning_rate": 8.918970053879527e-06, |
| "loss": 0.1828, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.42726633872101194, |
| "grad_norm": 0.5050402167748838, |
| "learning_rate": 8.915540058568792e-06, |
| "loss": 0.1892, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.4279690794096978, |
| "grad_norm": 0.5133347815366437, |
| "learning_rate": 8.912105292124417e-06, |
| "loss": 0.2006, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.42867182009838367, |
| "grad_norm": 0.5376239794010119, |
| "learning_rate": 8.90866575873171e-06, |
| "loss": 0.2076, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.4293745607870696, |
| "grad_norm": 0.5393211059228664, |
| "learning_rate": 8.905221462581784e-06, |
| "loss": 0.2013, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.43007730147575546, |
| "grad_norm": 0.5090627949640147, |
| "learning_rate": 8.901772407871553e-06, |
| "loss": 0.192, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.4307800421644413, |
| "grad_norm": 0.5333308319943344, |
| "learning_rate": 8.898318598803737e-06, |
| "loss": 0.2015, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.4314827828531272, |
| "grad_norm": 0.5259173989837693, |
| "learning_rate": 8.894860039586841e-06, |
| "loss": 0.1924, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.43218552354181305, |
| "grad_norm": 0.5068982404113911, |
| "learning_rate": 8.891396734435164e-06, |
| "loss": 0.1726, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.4328882642304989, |
| "grad_norm": 0.5255620473631372, |
| "learning_rate": 8.887928687568785e-06, |
| "loss": 0.1939, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.43359100491918484, |
| "grad_norm": 0.5158930229508965, |
| "learning_rate": 8.884455903213562e-06, |
| "loss": 0.2029, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.4342937456078707, |
| "grad_norm": 0.5108134855434232, |
| "learning_rate": 8.880978385601127e-06, |
| "loss": 0.1753, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.4349964862965566, |
| "grad_norm": 0.5630010933594187, |
| "learning_rate": 8.877496138968874e-06, |
| "loss": 0.213, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.43569922698524244, |
| "grad_norm": 0.5243389693119963, |
| "learning_rate": 8.874009167559968e-06, |
| "loss": 0.1858, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.4364019676739283, |
| "grad_norm": 0.5096070989662833, |
| "learning_rate": 8.870517475623322e-06, |
| "loss": 0.1759, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.43710470836261417, |
| "grad_norm": 0.536046658809568, |
| "learning_rate": 8.867021067413608e-06, |
| "loss": 0.1882, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.4378074490513001, |
| "grad_norm": 0.5206843932833073, |
| "learning_rate": 8.863519947191242e-06, |
| "loss": 0.1558, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.43851018973998596, |
| "grad_norm": 0.5298564998445344, |
| "learning_rate": 8.86001411922238e-06, |
| "loss": 0.1971, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.4392129304286718, |
| "grad_norm": 0.5389102910060309, |
| "learning_rate": 8.856503587778922e-06, |
| "loss": 0.1817, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.4399156711173577, |
| "grad_norm": 0.5715625478369358, |
| "learning_rate": 8.852988357138488e-06, |
| "loss": 0.2288, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.44061841180604355, |
| "grad_norm": 0.5153652268496476, |
| "learning_rate": 8.849468431584432e-06, |
| "loss": 0.1856, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.4413211524947294, |
| "grad_norm": 0.5845746770538888, |
| "learning_rate": 8.845943815405827e-06, |
| "loss": 0.1841, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.44202389318341534, |
| "grad_norm": 0.496201396481146, |
| "learning_rate": 8.842414512897457e-06, |
| "loss": 0.1778, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.4427266338721012, |
| "grad_norm": 0.5104474152486059, |
| "learning_rate": 8.838880528359826e-06, |
| "loss": 0.1864, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.44342937456078707, |
| "grad_norm": 0.5058086912236018, |
| "learning_rate": 8.835341866099136e-06, |
| "loss": 0.1884, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.44413211524947294, |
| "grad_norm": 0.506448200952019, |
| "learning_rate": 8.831798530427289e-06, |
| "loss": 0.1828, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.4448348559381588, |
| "grad_norm": 0.5245179863804086, |
| "learning_rate": 8.828250525661884e-06, |
| "loss": 0.191, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.44553759662684467, |
| "grad_norm": 0.5005515175671515, |
| "learning_rate": 8.824697856126206e-06, |
| "loss": 0.1761, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.4462403373155306, |
| "grad_norm": 0.521240908293417, |
| "learning_rate": 8.82114052614923e-06, |
| "loss": 0.2034, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.44694307800421645, |
| "grad_norm": 0.538009865575798, |
| "learning_rate": 8.817578540065605e-06, |
| "loss": 0.1767, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.4476458186929023, |
| "grad_norm": 0.5706487029627249, |
| "learning_rate": 8.814011902215654e-06, |
| "loss": 0.2107, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.4483485593815882, |
| "grad_norm": 0.48577016697100317, |
| "learning_rate": 8.81044061694537e-06, |
| "loss": 0.1592, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.44905130007027405, |
| "grad_norm": 0.5086273769884814, |
| "learning_rate": 8.806864688606409e-06, |
| "loss": 0.1993, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.4497540407589599, |
| "grad_norm": 0.5213927236920854, |
| "learning_rate": 8.80328412155608e-06, |
| "loss": 0.1938, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.45045678144764584, |
| "grad_norm": 0.4932799721037502, |
| "learning_rate": 8.799698920157348e-06, |
| "loss": 0.1541, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.4511595221363317, |
| "grad_norm": 0.5212558600526757, |
| "learning_rate": 8.796109088778831e-06, |
| "loss": 0.1771, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.45186226282501757, |
| "grad_norm": 0.5251417804862062, |
| "learning_rate": 8.792514631794778e-06, |
| "loss": 0.1728, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.45256500351370343, |
| "grad_norm": 0.5585069520314704, |
| "learning_rate": 8.788915553585079e-06, |
| "loss": 0.2139, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.4532677442023893, |
| "grad_norm": 0.5398054240125882, |
| "learning_rate": 8.785311858535254e-06, |
| "loss": 0.187, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.45397048489107517, |
| "grad_norm": 0.5192000438530474, |
| "learning_rate": 8.781703551036451e-06, |
| "loss": 0.1988, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.4546732255797611, |
| "grad_norm": 0.5546647321834434, |
| "learning_rate": 8.77809063548544e-06, |
| "loss": 0.1818, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.45537596626844695, |
| "grad_norm": 0.5082569452775141, |
| "learning_rate": 8.774473116284598e-06, |
| "loss": 0.1952, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.4560787069571328, |
| "grad_norm": 0.5240312521252924, |
| "learning_rate": 8.770850997841918e-06, |
| "loss": 0.2161, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.4567814476458187, |
| "grad_norm": 0.550176453635836, |
| "learning_rate": 8.767224284570999e-06, |
| "loss": 0.2338, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.45748418833450455, |
| "grad_norm": 0.5404600992817532, |
| "learning_rate": 8.763592980891031e-06, |
| "loss": 0.2245, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.45818692902319047, |
| "grad_norm": 0.4826072134962918, |
| "learning_rate": 8.759957091226805e-06, |
| "loss": 0.1642, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.45888966971187634, |
| "grad_norm": 0.49177110191527174, |
| "learning_rate": 8.756316620008697e-06, |
| "loss": 0.1742, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.4595924104005622, |
| "grad_norm": 0.521888175445801, |
| "learning_rate": 8.752671571672664e-06, |
| "loss": 0.1955, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.46029515108924807, |
| "grad_norm": 0.5316702516376998, |
| "learning_rate": 8.749021950660243e-06, |
| "loss": 0.211, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.46099789177793393, |
| "grad_norm": 0.5374770965759721, |
| "learning_rate": 8.745367761418546e-06, |
| "loss": 0.2005, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.4617006324666198, |
| "grad_norm": 0.5604330034770029, |
| "learning_rate": 8.74170900840024e-06, |
| "loss": 0.2033, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.4624033731553057, |
| "grad_norm": 0.5418410632315168, |
| "learning_rate": 8.738045696063566e-06, |
| "loss": 0.2006, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.4631061138439916, |
| "grad_norm": 0.5130755032814118, |
| "learning_rate": 8.734377828872315e-06, |
| "loss": 0.1835, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.46380885453267745, |
| "grad_norm": 0.5184391654004975, |
| "learning_rate": 8.730705411295826e-06, |
| "loss": 0.1857, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.4645115952213633, |
| "grad_norm": 0.5038674683244418, |
| "learning_rate": 8.727028447808983e-06, |
| "loss": 0.1703, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.4652143359100492, |
| "grad_norm": 0.47564418179318896, |
| "learning_rate": 8.723346942892217e-06, |
| "loss": 0.1723, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.46591707659873505, |
| "grad_norm": 0.5315309419432281, |
| "learning_rate": 8.719660901031482e-06, |
| "loss": 0.2046, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.46661981728742097, |
| "grad_norm": 0.4811380443536227, |
| "learning_rate": 8.715970326718269e-06, |
| "loss": 0.1709, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.46732255797610683, |
| "grad_norm": 0.49909000040476986, |
| "learning_rate": 8.712275224449583e-06, |
| "loss": 0.1749, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.4680252986647927, |
| "grad_norm": 0.5132811766370297, |
| "learning_rate": 8.708575598727958e-06, |
| "loss": 0.195, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.46872803935347856, |
| "grad_norm": 0.5302404539766977, |
| "learning_rate": 8.704871454061428e-06, |
| "loss": 0.187, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.46943078004216443, |
| "grad_norm": 0.4928463401919786, |
| "learning_rate": 8.70116279496354e-06, |
| "loss": 0.1605, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.4701335207308503, |
| "grad_norm": 0.4947320640053909, |
| "learning_rate": 8.697449625953343e-06, |
| "loss": 0.1587, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.4708362614195362, |
| "grad_norm": 0.5819740823592446, |
| "learning_rate": 8.693731951555376e-06, |
| "loss": 0.2296, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4715390021082221, |
| "grad_norm": 0.4945779206239264, |
| "learning_rate": 8.690009776299673e-06, |
| "loss": 0.149, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.47224174279690795, |
| "grad_norm": 0.5421868791658425, |
| "learning_rate": 8.686283104721748e-06, |
| "loss": 0.1717, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.4729444834855938, |
| "grad_norm": 0.5700508341647283, |
| "learning_rate": 8.6825519413626e-06, |
| "loss": 0.2141, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.4736472241742797, |
| "grad_norm": 0.5156963620890469, |
| "learning_rate": 8.678816290768695e-06, |
| "loss": 0.1989, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.47434996486296555, |
| "grad_norm": 0.5122207370299912, |
| "learning_rate": 8.675076157491969e-06, |
| "loss": 0.1873, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.47505270555165147, |
| "grad_norm": 0.5337943374422462, |
| "learning_rate": 8.671331546089818e-06, |
| "loss": 0.2019, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.47575544624033733, |
| "grad_norm": 0.5251324360814932, |
| "learning_rate": 8.667582461125101e-06, |
| "loss": 0.2005, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.4764581869290232, |
| "grad_norm": 0.5065576843514569, |
| "learning_rate": 8.663828907166123e-06, |
| "loss": 0.1747, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.47716092761770906, |
| "grad_norm": 0.5253098567795085, |
| "learning_rate": 8.660070888786633e-06, |
| "loss": 0.1803, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.47786366830639493, |
| "grad_norm": 0.5392312860851205, |
| "learning_rate": 8.656308410565828e-06, |
| "loss": 0.2155, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.4785664089950808, |
| "grad_norm": 0.49303438802471045, |
| "learning_rate": 8.652541477088327e-06, |
| "loss": 0.1881, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.4792691496837667, |
| "grad_norm": 0.5425096543739556, |
| "learning_rate": 8.64877009294419e-06, |
| "loss": 0.2057, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.4799718903724526, |
| "grad_norm": 0.518810486720963, |
| "learning_rate": 8.644994262728895e-06, |
| "loss": 0.1725, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.48067463106113845, |
| "grad_norm": 0.5336909571595239, |
| "learning_rate": 8.64121399104333e-06, |
| "loss": 0.2098, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.4813773717498243, |
| "grad_norm": 0.5508261128077607, |
| "learning_rate": 8.637429282493813e-06, |
| "loss": 0.2113, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.4820801124385102, |
| "grad_norm": 0.5313395208357798, |
| "learning_rate": 8.633640141692052e-06, |
| "loss": 0.2102, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.48278285312719604, |
| "grad_norm": 0.5049708987416442, |
| "learning_rate": 8.629846573255162e-06, |
| "loss": 0.1892, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.48348559381588196, |
| "grad_norm": 0.6129720209725857, |
| "learning_rate": 8.626048581805652e-06, |
| "loss": 0.2177, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.48418833450456783, |
| "grad_norm": 0.5238844175884503, |
| "learning_rate": 8.622246171971425e-06, |
| "loss": 0.1909, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.4848910751932537, |
| "grad_norm": 0.5424145483910142, |
| "learning_rate": 8.61843934838576e-06, |
| "loss": 0.2277, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.48559381588193956, |
| "grad_norm": 0.5293822135419718, |
| "learning_rate": 8.614628115687318e-06, |
| "loss": 0.2099, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.4862965565706254, |
| "grad_norm": 0.49282770222211403, |
| "learning_rate": 8.610812478520137e-06, |
| "loss": 0.1789, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.4869992972593113, |
| "grad_norm": 0.5057276784193864, |
| "learning_rate": 8.606992441533615e-06, |
| "loss": 0.1973, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.4877020379479972, |
| "grad_norm": 0.5595811057572404, |
| "learning_rate": 8.603168009382513e-06, |
| "loss": 0.2241, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.4884047786366831, |
| "grad_norm": 0.5048847106159845, |
| "learning_rate": 8.59933918672695e-06, |
| "loss": 0.1768, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.48910751932536894, |
| "grad_norm": 0.5000796298258966, |
| "learning_rate": 8.595505978232394e-06, |
| "loss": 0.1862, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.4898102600140548, |
| "grad_norm": 0.5264485387449231, |
| "learning_rate": 8.591668388569656e-06, |
| "loss": 0.2124, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.4905130007027407, |
| "grad_norm": 0.5411644040686916, |
| "learning_rate": 8.587826422414886e-06, |
| "loss": 0.2023, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.49121574139142654, |
| "grad_norm": 0.5183969203449558, |
| "learning_rate": 8.583980084449566e-06, |
| "loss": 0.199, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.49191848208011246, |
| "grad_norm": 0.46967921241106914, |
| "learning_rate": 8.580129379360508e-06, |
| "loss": 0.1661, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.49262122276879833, |
| "grad_norm": 0.516712650806593, |
| "learning_rate": 8.576274311839843e-06, |
| "loss": 0.1889, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.4933239634574842, |
| "grad_norm": 0.5133872542750583, |
| "learning_rate": 8.572414886585015e-06, |
| "loss": 0.1956, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.49402670414617006, |
| "grad_norm": 0.5238151140101951, |
| "learning_rate": 8.568551108298785e-06, |
| "loss": 0.1885, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.4947294448348559, |
| "grad_norm": 0.5059425082115941, |
| "learning_rate": 8.564682981689214e-06, |
| "loss": 0.1795, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.4954321855235418, |
| "grad_norm": 0.5334482963601956, |
| "learning_rate": 8.56081051146966e-06, |
| "loss": 0.1932, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.4961349262122277, |
| "grad_norm": 0.5315230969579532, |
| "learning_rate": 8.556933702358774e-06, |
| "loss": 0.1955, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.4968376669009136, |
| "grad_norm": 0.5109056548326143, |
| "learning_rate": 8.553052559080498e-06, |
| "loss": 0.1929, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.49754040758959944, |
| "grad_norm": 0.4748541849992265, |
| "learning_rate": 8.549167086364056e-06, |
| "loss": 0.1592, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.4982431482782853, |
| "grad_norm": 0.5009205672759742, |
| "learning_rate": 8.545277288943938e-06, |
| "loss": 0.1932, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.4989458889669712, |
| "grad_norm": 0.532777145103931, |
| "learning_rate": 8.541383171559911e-06, |
| "loss": 0.2104, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.49964862965565704, |
| "grad_norm": 0.4735190849852184, |
| "learning_rate": 8.537484738957009e-06, |
| "loss": 0.164, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.500351370344343, |
| "grad_norm": 0.476645516988027, |
| "learning_rate": 8.533581995885515e-06, |
| "loss": 0.1831, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.5010541110330288, |
| "grad_norm": 0.49952632234721994, |
| "learning_rate": 8.529674947100974e-06, |
| "loss": 0.191, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.5017568517217147, |
| "grad_norm": 0.5326196925860098, |
| "learning_rate": 8.525763597364171e-06, |
| "loss": 0.2019, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.5024595924104006, |
| "grad_norm": 0.501923796884598, |
| "learning_rate": 8.52184795144113e-06, |
| "loss": 0.1771, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.5031623330990864, |
| "grad_norm": 0.541991243282928, |
| "learning_rate": 8.51792801410312e-06, |
| "loss": 0.2227, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.5038650737877723, |
| "grad_norm": 0.4836988086090048, |
| "learning_rate": 8.514003790126628e-06, |
| "loss": 0.1654, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.5045678144764582, |
| "grad_norm": 0.5307517555915229, |
| "learning_rate": 8.510075284293371e-06, |
| "loss": 0.2015, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.5052705551651441, |
| "grad_norm": 0.5050575772007962, |
| "learning_rate": 8.506142501390284e-06, |
| "loss": 0.1885, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.5059732958538299, |
| "grad_norm": 0.5239923393931152, |
| "learning_rate": 8.502205446209506e-06, |
| "loss": 0.1888, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5066760365425158, |
| "grad_norm": 0.5351515639161576, |
| "learning_rate": 8.49826412354839e-06, |
| "loss": 0.1886, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.5073787772312017, |
| "grad_norm": 0.5299030400055579, |
| "learning_rate": 8.494318538209485e-06, |
| "loss": 0.2103, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.5080815179198875, |
| "grad_norm": 0.49715068879445723, |
| "learning_rate": 8.490368695000537e-06, |
| "loss": 0.1676, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.5087842586085735, |
| "grad_norm": 0.5509751380513195, |
| "learning_rate": 8.486414598734479e-06, |
| "loss": 0.2308, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.5094869992972593, |
| "grad_norm": 0.5227387590991529, |
| "learning_rate": 8.482456254229421e-06, |
| "loss": 0.2039, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.5101897399859452, |
| "grad_norm": 0.5193247462295407, |
| "learning_rate": 8.47849366630866e-06, |
| "loss": 0.1968, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.5108924806746311, |
| "grad_norm": 0.5351993261677674, |
| "learning_rate": 8.474526839800654e-06, |
| "loss": 0.2064, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.5115952213633169, |
| "grad_norm": 0.5113165182003795, |
| "learning_rate": 8.470555779539034e-06, |
| "loss": 0.1812, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.5122979620520028, |
| "grad_norm": 0.5183643401615331, |
| "learning_rate": 8.46658049036258e-06, |
| "loss": 0.1847, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.5130007027406887, |
| "grad_norm": 0.49914598016030814, |
| "learning_rate": 8.462600977115237e-06, |
| "loss": 0.1842, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5137034434293746, |
| "grad_norm": 0.49713477165140296, |
| "learning_rate": 8.458617244646085e-06, |
| "loss": 0.1639, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.5144061841180604, |
| "grad_norm": 0.5162993659845747, |
| "learning_rate": 8.454629297809355e-06, |
| "loss": 0.1924, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.5151089248067463, |
| "grad_norm": 0.48434349466475823, |
| "learning_rate": 8.450637141464407e-06, |
| "loss": 0.1778, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.5158116654954322, |
| "grad_norm": 0.49000304311803644, |
| "learning_rate": 8.446640780475735e-06, |
| "loss": 0.1874, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.516514406184118, |
| "grad_norm": 0.5101984669847915, |
| "learning_rate": 8.442640219712949e-06, |
| "loss": 0.1838, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.517217146872804, |
| "grad_norm": 0.5493059286665023, |
| "learning_rate": 8.438635464050786e-06, |
| "loss": 0.1883, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.5179198875614898, |
| "grad_norm": 0.5358418918029862, |
| "learning_rate": 8.43462651836909e-06, |
| "loss": 0.1951, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.5186226282501757, |
| "grad_norm": 0.5328193680745292, |
| "learning_rate": 8.430613387552809e-06, |
| "loss": 0.2101, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.5193253689388616, |
| "grad_norm": 0.5101375728411082, |
| "learning_rate": 8.42659607649199e-06, |
| "loss": 0.1697, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.5200281096275474, |
| "grad_norm": 0.5068924928786236, |
| "learning_rate": 8.42257459008178e-06, |
| "loss": 0.1912, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5207308503162333, |
| "grad_norm": 0.5395748819849159, |
| "learning_rate": 8.418548933222406e-06, |
| "loss": 0.2013, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.5214335910049192, |
| "grad_norm": 0.5302590521377698, |
| "learning_rate": 8.414519110819183e-06, |
| "loss": 0.1974, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.5221363316936051, |
| "grad_norm": 0.5177183035638452, |
| "learning_rate": 8.410485127782498e-06, |
| "loss": 0.1865, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.5228390723822909, |
| "grad_norm": 0.5123014715477399, |
| "learning_rate": 8.40644698902781e-06, |
| "loss": 0.1929, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.5235418130709768, |
| "grad_norm": 0.5518810669431153, |
| "learning_rate": 8.402404699475637e-06, |
| "loss": 0.2242, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.5242445537596627, |
| "grad_norm": 0.5259717237421058, |
| "learning_rate": 8.398358264051563e-06, |
| "loss": 0.1978, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.5249472944483485, |
| "grad_norm": 0.4986579106356241, |
| "learning_rate": 8.394307687686219e-06, |
| "loss": 0.1808, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.5256500351370345, |
| "grad_norm": 0.4979969646630182, |
| "learning_rate": 8.390252975315276e-06, |
| "loss": 0.1979, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.5263527758257203, |
| "grad_norm": 0.49601425793058973, |
| "learning_rate": 8.386194131879458e-06, |
| "loss": 0.1782, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.5270555165144062, |
| "grad_norm": 0.5073407231128871, |
| "learning_rate": 8.382131162324512e-06, |
| "loss": 0.1775, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5277582572030921, |
| "grad_norm": 0.501600479759895, |
| "learning_rate": 8.378064071601218e-06, |
| "loss": 0.2013, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.5284609978917779, |
| "grad_norm": 0.5294937893906491, |
| "learning_rate": 8.373992864665374e-06, |
| "loss": 0.1947, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.5291637385804638, |
| "grad_norm": 0.538107208892889, |
| "learning_rate": 8.369917546477794e-06, |
| "loss": 0.2031, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.5298664792691496, |
| "grad_norm": 0.5419919263290914, |
| "learning_rate": 8.365838122004311e-06, |
| "loss": 0.2157, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.5305692199578356, |
| "grad_norm": 0.49421087524687923, |
| "learning_rate": 8.361754596215745e-06, |
| "loss": 0.1822, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.5312719606465214, |
| "grad_norm": 0.5130499099727955, |
| "learning_rate": 8.357666974087928e-06, |
| "loss": 0.1858, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.5319747013352073, |
| "grad_norm": 0.546549568304958, |
| "learning_rate": 8.353575260601674e-06, |
| "loss": 0.2066, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.5326774420238932, |
| "grad_norm": 0.5474314359553429, |
| "learning_rate": 8.349479460742788e-06, |
| "loss": 0.1834, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.533380182712579, |
| "grad_norm": 0.5432435200665976, |
| "learning_rate": 8.345379579502054e-06, |
| "loss": 0.1978, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.534082923401265, |
| "grad_norm": 0.5272022508499994, |
| "learning_rate": 8.341275621875224e-06, |
| "loss": 0.1996, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5347856640899508, |
| "grad_norm": 0.5336040416372276, |
| "learning_rate": 8.337167592863026e-06, |
| "loss": 0.1906, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.5354884047786367, |
| "grad_norm": 0.4651896643747724, |
| "learning_rate": 8.333055497471137e-06, |
| "loss": 0.1431, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.5361911454673226, |
| "grad_norm": 0.5509722540845838, |
| "learning_rate": 8.3289393407102e-06, |
| "loss": 0.2132, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.5368938861560084, |
| "grad_norm": 0.5233084034813252, |
| "learning_rate": 8.324819127595802e-06, |
| "loss": 0.1846, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.5375966268446943, |
| "grad_norm": 0.5194855578839023, |
| "learning_rate": 8.320694863148473e-06, |
| "loss": 0.2019, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.5382993675333801, |
| "grad_norm": 0.4748744123295701, |
| "learning_rate": 8.31656655239368e-06, |
| "loss": 0.1567, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.5390021082220661, |
| "grad_norm": 0.5536236422394342, |
| "learning_rate": 8.31243420036182e-06, |
| "loss": 0.2169, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.5397048489107519, |
| "grad_norm": 0.47265985270876465, |
| "learning_rate": 8.308297812088215e-06, |
| "loss": 0.1773, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.5404075895994378, |
| "grad_norm": 0.5133572713659621, |
| "learning_rate": 8.304157392613103e-06, |
| "loss": 0.1793, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.5411103302881237, |
| "grad_norm": 0.48828244081957234, |
| "learning_rate": 8.30001294698164e-06, |
| "loss": 0.164, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5418130709768095, |
| "grad_norm": 0.526163241791338, |
| "learning_rate": 8.295864480243882e-06, |
| "loss": 0.1958, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.5425158116654955, |
| "grad_norm": 0.5397637143368118, |
| "learning_rate": 8.291711997454786e-06, |
| "loss": 0.202, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.5432185523541813, |
| "grad_norm": 0.5256648878700448, |
| "learning_rate": 8.287555503674204e-06, |
| "loss": 0.1945, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.5439212930428672, |
| "grad_norm": 0.48367565249712235, |
| "learning_rate": 8.283395003966873e-06, |
| "loss": 0.1703, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.5446240337315531, |
| "grad_norm": 0.515794431545339, |
| "learning_rate": 8.279230503402413e-06, |
| "loss": 0.1841, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.5453267744202389, |
| "grad_norm": 0.5103637275060798, |
| "learning_rate": 8.275062007055323e-06, |
| "loss": 0.1818, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.5460295151089248, |
| "grad_norm": 0.5271934797145162, |
| "learning_rate": 8.270889520004964e-06, |
| "loss": 0.1691, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.5467322557976106, |
| "grad_norm": 0.5107768942044677, |
| "learning_rate": 8.266713047335563e-06, |
| "loss": 0.1965, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.5474349964862966, |
| "grad_norm": 0.5243060256391675, |
| "learning_rate": 8.262532594136202e-06, |
| "loss": 0.1828, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.5481377371749825, |
| "grad_norm": 0.5073550978308514, |
| "learning_rate": 8.258348165500815e-06, |
| "loss": 0.1858, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5488404778636683, |
| "grad_norm": 0.5000707758019513, |
| "learning_rate": 8.254159766528184e-06, |
| "loss": 0.1686, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.5495432185523542, |
| "grad_norm": 0.5065682350019561, |
| "learning_rate": 8.249967402321919e-06, |
| "loss": 0.1952, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.55024595924104, |
| "grad_norm": 0.5456644015029175, |
| "learning_rate": 8.24577107799047e-06, |
| "loss": 0.1992, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.550948699929726, |
| "grad_norm": 0.5605460822148821, |
| "learning_rate": 8.241570798647107e-06, |
| "loss": 0.2265, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.5516514406184118, |
| "grad_norm": 0.5153471040976462, |
| "learning_rate": 8.237366569409927e-06, |
| "loss": 0.1914, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.5523541813070977, |
| "grad_norm": 0.536498676224286, |
| "learning_rate": 8.23315839540183e-06, |
| "loss": 0.1965, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.5530569219957836, |
| "grad_norm": 0.5398731216126065, |
| "learning_rate": 8.22894628175053e-06, |
| "loss": 0.1924, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.5537596626844694, |
| "grad_norm": 0.5412329632942741, |
| "learning_rate": 8.224730233588539e-06, |
| "loss": 0.2031, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.5544624033731553, |
| "grad_norm": 0.49166134955419794, |
| "learning_rate": 8.220510256053162e-06, |
| "loss": 0.1701, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.5551651440618411, |
| "grad_norm": 0.5411757068250052, |
| "learning_rate": 8.216286354286499e-06, |
| "loss": 0.203, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5558678847505271, |
| "grad_norm": 0.5055791983257125, |
| "learning_rate": 8.212058533435418e-06, |
| "loss": 0.1707, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.556570625439213, |
| "grad_norm": 0.5005713463900835, |
| "learning_rate": 8.207826798651575e-06, |
| "loss": 0.1724, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.5572733661278988, |
| "grad_norm": 0.5388542302369276, |
| "learning_rate": 8.20359115509139e-06, |
| "loss": 0.2061, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.5579761068165847, |
| "grad_norm": 0.5000375222239247, |
| "learning_rate": 8.199351607916048e-06, |
| "loss": 0.1661, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.5586788475052705, |
| "grad_norm": 0.5054661491915279, |
| "learning_rate": 8.19510816229149e-06, |
| "loss": 0.1752, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.5593815881939564, |
| "grad_norm": 0.5260455054665505, |
| "learning_rate": 8.190860823388402e-06, |
| "loss": 0.1769, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.5600843288826423, |
| "grad_norm": 0.5313271592798854, |
| "learning_rate": 8.186609596382222e-06, |
| "loss": 0.1903, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.5607870695713282, |
| "grad_norm": 0.5134128367032493, |
| "learning_rate": 8.182354486453123e-06, |
| "loss": 0.1699, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.5614898102600141, |
| "grad_norm": 0.5235982420004922, |
| "learning_rate": 8.178095498786007e-06, |
| "loss": 0.1778, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.5621925509486999, |
| "grad_norm": 0.49504886329354375, |
| "learning_rate": 8.173832638570503e-06, |
| "loss": 0.1714, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5628952916373858, |
| "grad_norm": 0.5229810088775857, |
| "learning_rate": 8.169565911000958e-06, |
| "loss": 0.2053, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.5635980323260716, |
| "grad_norm": 0.48023017540213414, |
| "learning_rate": 8.165295321276433e-06, |
| "loss": 0.1695, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.5643007730147576, |
| "grad_norm": 0.4943594101833042, |
| "learning_rate": 8.161020874600695e-06, |
| "loss": 0.1727, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.5650035137034435, |
| "grad_norm": 0.5384022749347142, |
| "learning_rate": 8.156742576182208e-06, |
| "loss": 0.1883, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.5657062543921293, |
| "grad_norm": 0.5181464629326008, |
| "learning_rate": 8.152460431234132e-06, |
| "loss": 0.1652, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.5664089950808152, |
| "grad_norm": 0.5010507122028295, |
| "learning_rate": 8.148174444974313e-06, |
| "loss": 0.1786, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.567111735769501, |
| "grad_norm": 0.5337851852934541, |
| "learning_rate": 8.143884622625276e-06, |
| "loss": 0.1753, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.567814476458187, |
| "grad_norm": 0.49816613967519474, |
| "learning_rate": 8.139590969414224e-06, |
| "loss": 0.1682, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.5685172171468728, |
| "grad_norm": 0.5090345971257679, |
| "learning_rate": 8.135293490573029e-06, |
| "loss": 0.1756, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.5692199578355587, |
| "grad_norm": 0.5327856293158361, |
| "learning_rate": 8.130992191338216e-06, |
| "loss": 0.203, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5699226985242446, |
| "grad_norm": 0.5450247599218729, |
| "learning_rate": 8.126687076950974e-06, |
| "loss": 0.2399, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.5706254392129304, |
| "grad_norm": 0.49736262284537636, |
| "learning_rate": 8.12237815265714e-06, |
| "loss": 0.1689, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.5713281799016163, |
| "grad_norm": 0.5025148715885837, |
| "learning_rate": 8.118065423707187e-06, |
| "loss": 0.1834, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.5720309205903021, |
| "grad_norm": 0.5225499168344663, |
| "learning_rate": 8.113748895356229e-06, |
| "loss": 0.1655, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.5727336612789881, |
| "grad_norm": 0.5295754034074439, |
| "learning_rate": 8.10942857286401e-06, |
| "loss": 0.2025, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.573436401967674, |
| "grad_norm": 0.5064954187259164, |
| "learning_rate": 8.105104461494896e-06, |
| "loss": 0.1758, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.5741391426563598, |
| "grad_norm": 0.5154575824391756, |
| "learning_rate": 8.10077656651787e-06, |
| "loss": 0.2125, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.5748418833450457, |
| "grad_norm": 0.5330202712762878, |
| "learning_rate": 8.096444893206524e-06, |
| "loss": 0.2047, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.5755446240337315, |
| "grad_norm": 0.5370877039064466, |
| "learning_rate": 8.092109446839056e-06, |
| "loss": 0.189, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.5762473647224174, |
| "grad_norm": 0.5975682017172536, |
| "learning_rate": 8.08777023269826e-06, |
| "loss": 0.2243, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5769501054111033, |
| "grad_norm": 0.47951433169488117, |
| "learning_rate": 8.083427256071523e-06, |
| "loss": 0.1633, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.5776528460997892, |
| "grad_norm": 0.5016662797816358, |
| "learning_rate": 8.079080522250812e-06, |
| "loss": 0.175, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.5783555867884751, |
| "grad_norm": 0.539983951759243, |
| "learning_rate": 8.074730036532678e-06, |
| "loss": 0.2253, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.5790583274771609, |
| "grad_norm": 0.5107178641124133, |
| "learning_rate": 8.070375804218244e-06, |
| "loss": 0.1882, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.5797610681658468, |
| "grad_norm": 0.49893529645121637, |
| "learning_rate": 8.06601783061319e-06, |
| "loss": 0.1877, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.5804638088545326, |
| "grad_norm": 0.4980261424327893, |
| "learning_rate": 8.061656121027766e-06, |
| "loss": 0.1705, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.5811665495432186, |
| "grad_norm": 0.5059694073902211, |
| "learning_rate": 8.057290680776766e-06, |
| "loss": 0.1822, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.5818692902319045, |
| "grad_norm": 0.4774032938434036, |
| "learning_rate": 8.052921515179528e-06, |
| "loss": 0.1671, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.5825720309205903, |
| "grad_norm": 0.49938521358793453, |
| "learning_rate": 8.048548629559942e-06, |
| "loss": 0.1713, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.5832747716092762, |
| "grad_norm": 0.5471684119029511, |
| "learning_rate": 8.044172029246418e-06, |
| "loss": 0.2015, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.583977512297962, |
| "grad_norm": 0.5060906162510813, |
| "learning_rate": 8.0397917195719e-06, |
| "loss": 0.1737, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.5846802529866479, |
| "grad_norm": 0.511760606872915, |
| "learning_rate": 8.035407705873843e-06, |
| "loss": 0.1877, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.5853829936753338, |
| "grad_norm": 0.5161775988752425, |
| "learning_rate": 8.031019993494231e-06, |
| "loss": 0.1731, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.5860857343640197, |
| "grad_norm": 0.5043230288865019, |
| "learning_rate": 8.026628587779537e-06, |
| "loss": 0.2033, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.5867884750527056, |
| "grad_norm": 0.5546809360655157, |
| "learning_rate": 8.022233494080747e-06, |
| "loss": 0.2095, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.5874912157413914, |
| "grad_norm": 0.5140267887390528, |
| "learning_rate": 8.017834717753337e-06, |
| "loss": 0.1652, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.5881939564300773, |
| "grad_norm": 0.5227425873771978, |
| "learning_rate": 8.013432264157266e-06, |
| "loss": 0.2006, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.5888966971187631, |
| "grad_norm": 0.5475350248824223, |
| "learning_rate": 8.009026138656983e-06, |
| "loss": 0.1958, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.5895994378074491, |
| "grad_norm": 0.5060761591366437, |
| "learning_rate": 8.004616346621401e-06, |
| "loss": 0.1843, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.590302178496135, |
| "grad_norm": 0.5196943847533497, |
| "learning_rate": 8.00020289342391e-06, |
| "loss": 0.2025, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5910049191848208, |
| "grad_norm": 0.5301167099931641, |
| "learning_rate": 7.995785784442355e-06, |
| "loss": 0.1844, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.5917076598735067, |
| "grad_norm": 0.5148146678785309, |
| "learning_rate": 7.99136502505904e-06, |
| "loss": 0.1932, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.5924104005621925, |
| "grad_norm": 0.49623445019704854, |
| "learning_rate": 7.98694062066071e-06, |
| "loss": 0.1826, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.5931131412508784, |
| "grad_norm": 0.5317236357310177, |
| "learning_rate": 7.982512576638556e-06, |
| "loss": 0.2059, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.5938158819395642, |
| "grad_norm": 0.5361323808096335, |
| "learning_rate": 7.97808089838821e-06, |
| "loss": 0.1941, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.5945186226282502, |
| "grad_norm": 0.503533977157267, |
| "learning_rate": 7.973645591309722e-06, |
| "loss": 0.1813, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.5952213633169361, |
| "grad_norm": 0.5719921027289341, |
| "learning_rate": 7.969206660807566e-06, |
| "loss": 0.21, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.5959241040056219, |
| "grad_norm": 0.5068126767235045, |
| "learning_rate": 7.964764112290641e-06, |
| "loss": 0.1973, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.5966268446943078, |
| "grad_norm": 0.513537054452568, |
| "learning_rate": 7.96031795117224e-06, |
| "loss": 0.1937, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.5973295853829936, |
| "grad_norm": 0.4734141319226508, |
| "learning_rate": 7.955868182870067e-06, |
| "loss": 0.1616, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5980323260716796, |
| "grad_norm": 0.5320131363474777, |
| "learning_rate": 7.95141481280622e-06, |
| "loss": 0.1898, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.5987350667603655, |
| "grad_norm": 0.5268959314805932, |
| "learning_rate": 7.946957846407182e-06, |
| "loss": 0.2, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.5994378074490513, |
| "grad_norm": 0.5260806167494971, |
| "learning_rate": 7.942497289103825e-06, |
| "loss": 0.1945, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.6001405481377372, |
| "grad_norm": 0.554805066779978, |
| "learning_rate": 7.938033146331392e-06, |
| "loss": 0.2185, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.600843288826423, |
| "grad_norm": 0.48503127848140637, |
| "learning_rate": 7.933565423529495e-06, |
| "loss": 0.1851, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.6015460295151089, |
| "grad_norm": 0.48894225595082286, |
| "learning_rate": 7.92909412614211e-06, |
| "loss": 0.181, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.6022487702037947, |
| "grad_norm": 0.5521975195910774, |
| "learning_rate": 7.924619259617567e-06, |
| "loss": 0.2175, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.6029515108924807, |
| "grad_norm": 0.5192114495679688, |
| "learning_rate": 7.920140829408546e-06, |
| "loss": 0.2021, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.6036542515811666, |
| "grad_norm": 0.5039292999332279, |
| "learning_rate": 7.915658840972069e-06, |
| "loss": 0.1814, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.6043569922698524, |
| "grad_norm": 0.5077634967163096, |
| "learning_rate": 7.911173299769494e-06, |
| "loss": 0.2114, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.6050597329585383, |
| "grad_norm": 0.48220662569267886, |
| "learning_rate": 7.906684211266508e-06, |
| "loss": 0.158, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.6057624736472241, |
| "grad_norm": 0.502308562632445, |
| "learning_rate": 7.902191580933123e-06, |
| "loss": 0.1874, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.60646521433591, |
| "grad_norm": 0.497708237740537, |
| "learning_rate": 7.89769541424366e-06, |
| "loss": 0.1829, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.607167955024596, |
| "grad_norm": 0.5161766882263005, |
| "learning_rate": 7.893195716676754e-06, |
| "loss": 0.185, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.6078706957132818, |
| "grad_norm": 0.5069263975379218, |
| "learning_rate": 7.888692493715345e-06, |
| "loss": 0.1798, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.6085734364019677, |
| "grad_norm": 0.5033869041437443, |
| "learning_rate": 7.884185750846663e-06, |
| "loss": 0.1681, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.6092761770906535, |
| "grad_norm": 0.4864070026149026, |
| "learning_rate": 7.87967549356223e-06, |
| "loss": 0.1731, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.6099789177793394, |
| "grad_norm": 0.5542622782060213, |
| "learning_rate": 7.875161727357848e-06, |
| "loss": 0.2152, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.6106816584680252, |
| "grad_norm": 0.5236156647293086, |
| "learning_rate": 7.8706444577336e-06, |
| "loss": 0.2008, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.6113843991567112, |
| "grad_norm": 0.5258759841468933, |
| "learning_rate": 7.866123690193832e-06, |
| "loss": 0.2054, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6120871398453971, |
| "grad_norm": 0.5164374327517247, |
| "learning_rate": 7.861599430247157e-06, |
| "loss": 0.1799, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.6127898805340829, |
| "grad_norm": 0.5105618406792289, |
| "learning_rate": 7.857071683406438e-06, |
| "loss": 0.1971, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.6134926212227688, |
| "grad_norm": 0.5069610593839458, |
| "learning_rate": 7.852540455188793e-06, |
| "loss": 0.1925, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.6141953619114546, |
| "grad_norm": 0.5050671644481959, |
| "learning_rate": 7.848005751115579e-06, |
| "loss": 0.19, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.6148981026001406, |
| "grad_norm": 0.5363105611849845, |
| "learning_rate": 7.843467576712387e-06, |
| "loss": 0.2099, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.6156008432888265, |
| "grad_norm": 0.5185051863527694, |
| "learning_rate": 7.838925937509038e-06, |
| "loss": 0.1797, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.6163035839775123, |
| "grad_norm": 0.5249263436811443, |
| "learning_rate": 7.83438083903958e-06, |
| "loss": 0.1787, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.6170063246661982, |
| "grad_norm": 0.4986807783693537, |
| "learning_rate": 7.829832286842265e-06, |
| "loss": 0.1955, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.617709065354884, |
| "grad_norm": 0.5533489886091887, |
| "learning_rate": 7.825280286459561e-06, |
| "loss": 0.203, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.6184118060435699, |
| "grad_norm": 0.5821797082052109, |
| "learning_rate": 7.82072484343814e-06, |
| "loss": 0.2058, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6191145467322557, |
| "grad_norm": 0.5204351294166948, |
| "learning_rate": 7.81616596332886e-06, |
| "loss": 0.1937, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.6198172874209417, |
| "grad_norm": 0.5241918723451235, |
| "learning_rate": 7.811603651686777e-06, |
| "loss": 0.1824, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.6205200281096276, |
| "grad_norm": 0.47726678490631336, |
| "learning_rate": 7.80703791407112e-06, |
| "loss": 0.1672, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.6212227687983134, |
| "grad_norm": 0.4826936758469551, |
| "learning_rate": 7.802468756045301e-06, |
| "loss": 0.1748, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.6219255094869993, |
| "grad_norm": 0.5290298423944293, |
| "learning_rate": 7.797896183176892e-06, |
| "loss": 0.1833, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.6226282501756851, |
| "grad_norm": 0.5451208380190049, |
| "learning_rate": 7.793320201037629e-06, |
| "loss": 0.1945, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.623330990864371, |
| "grad_norm": 0.5469122761116605, |
| "learning_rate": 7.788740815203404e-06, |
| "loss": 0.2048, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.624033731553057, |
| "grad_norm": 0.520441291999963, |
| "learning_rate": 7.784158031254251e-06, |
| "loss": 0.1944, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.6247364722417428, |
| "grad_norm": 0.526928075448614, |
| "learning_rate": 7.779571854774356e-06, |
| "loss": 0.203, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.6254392129304287, |
| "grad_norm": 0.49886608378637465, |
| "learning_rate": 7.774982291352022e-06, |
| "loss": 0.1775, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6261419536191145, |
| "grad_norm": 0.5170210910797823, |
| "learning_rate": 7.770389346579696e-06, |
| "loss": 0.1764, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.6268446943078004, |
| "grad_norm": 0.47869106466151334, |
| "learning_rate": 7.765793026053934e-06, |
| "loss": 0.1497, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.6275474349964862, |
| "grad_norm": 0.5126979144891471, |
| "learning_rate": 7.761193335375411e-06, |
| "loss": 0.1767, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.6282501756851722, |
| "grad_norm": 0.5193995670376416, |
| "learning_rate": 7.756590280148904e-06, |
| "loss": 0.205, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.6289529163738581, |
| "grad_norm": 0.5346694403603001, |
| "learning_rate": 7.751983865983295e-06, |
| "loss": 0.1919, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.6296556570625439, |
| "grad_norm": 0.5336291753755593, |
| "learning_rate": 7.747374098491553e-06, |
| "loss": 0.1983, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.6303583977512298, |
| "grad_norm": 0.49993946624577135, |
| "learning_rate": 7.742760983290738e-06, |
| "loss": 0.1716, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.6310611384399156, |
| "grad_norm": 0.5300665843429727, |
| "learning_rate": 7.73814452600199e-06, |
| "loss": 0.1784, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.6317638791286015, |
| "grad_norm": 0.503470906387593, |
| "learning_rate": 7.733524732250515e-06, |
| "loss": 0.1781, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.6324666198172875, |
| "grad_norm": 0.5096260624175444, |
| "learning_rate": 7.728901607665591e-06, |
| "loss": 0.1754, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6331693605059733, |
| "grad_norm": 0.5838253255797453, |
| "learning_rate": 7.724275157880551e-06, |
| "loss": 0.223, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.6338721011946592, |
| "grad_norm": 0.5084981196070715, |
| "learning_rate": 7.719645388532779e-06, |
| "loss": 0.1897, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.634574841883345, |
| "grad_norm": 0.528356961734376, |
| "learning_rate": 7.71501230526371e-06, |
| "loss": 0.2007, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.6352775825720309, |
| "grad_norm": 0.5218892843740671, |
| "learning_rate": 7.71037591371881e-06, |
| "loss": 0.1865, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.6359803232607167, |
| "grad_norm": 0.4932257464154493, |
| "learning_rate": 7.705736219547579e-06, |
| "loss": 0.1703, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.6366830639494027, |
| "grad_norm": 0.4958132069555908, |
| "learning_rate": 7.701093228403543e-06, |
| "loss": 0.1813, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.6373858046380886, |
| "grad_norm": 0.5242560868098635, |
| "learning_rate": 7.696446945944241e-06, |
| "loss": 0.1986, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.6380885453267744, |
| "grad_norm": 0.5334976116325654, |
| "learning_rate": 7.691797377831226e-06, |
| "loss": 0.2066, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.6387912860154603, |
| "grad_norm": 0.5262699778377731, |
| "learning_rate": 7.687144529730058e-06, |
| "loss": 0.1997, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.6394940267041461, |
| "grad_norm": 0.49950063093797237, |
| "learning_rate": 7.682488407310284e-06, |
| "loss": 0.1774, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.640196767392832, |
| "grad_norm": 0.5334542098027915, |
| "learning_rate": 7.67782901624545e-06, |
| "loss": 0.1871, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.640899508081518, |
| "grad_norm": 0.5185057986662502, |
| "learning_rate": 7.673166362213077e-06, |
| "loss": 0.1698, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.6416022487702038, |
| "grad_norm": 0.5027268618275128, |
| "learning_rate": 7.668500450894674e-06, |
| "loss": 0.1774, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.6423049894588897, |
| "grad_norm": 0.5301993082173253, |
| "learning_rate": 7.663831287975702e-06, |
| "loss": 0.1883, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.6430077301475755, |
| "grad_norm": 0.5217885639264032, |
| "learning_rate": 7.659158879145599e-06, |
| "loss": 0.1956, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.6437104708362614, |
| "grad_norm": 0.5381047788093676, |
| "learning_rate": 7.654483230097752e-06, |
| "loss": 0.1992, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.6444132115249473, |
| "grad_norm": 0.5392291589420021, |
| "learning_rate": 7.649804346529493e-06, |
| "loss": 0.1807, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.6451159522136332, |
| "grad_norm": 0.47877224169561405, |
| "learning_rate": 7.645122234142103e-06, |
| "loss": 0.1527, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.6458186929023191, |
| "grad_norm": 0.5295518466347142, |
| "learning_rate": 7.640436898640795e-06, |
| "loss": 0.2115, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.6465214335910049, |
| "grad_norm": 0.4909132810206349, |
| "learning_rate": 7.635748345734702e-06, |
| "loss": 0.1599, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6472241742796908, |
| "grad_norm": 0.4899660738658723, |
| "learning_rate": 7.63105658113689e-06, |
| "loss": 0.1817, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.6479269149683766, |
| "grad_norm": 0.48355409348428285, |
| "learning_rate": 7.626361610564325e-06, |
| "loss": 0.1744, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.6486296556570625, |
| "grad_norm": 0.4924250519507862, |
| "learning_rate": 7.6216634397378905e-06, |
| "loss": 0.182, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.6493323963457485, |
| "grad_norm": 0.4927644815358553, |
| "learning_rate": 7.616962074382364e-06, |
| "loss": 0.1723, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.6500351370344343, |
| "grad_norm": 0.4984759900130036, |
| "learning_rate": 7.612257520226418e-06, |
| "loss": 0.1775, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.6507378777231202, |
| "grad_norm": 0.5203766307202669, |
| "learning_rate": 7.607549783002608e-06, |
| "loss": 0.1796, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.651440618411806, |
| "grad_norm": 0.5033951336673999, |
| "learning_rate": 7.602838868447373e-06, |
| "loss": 0.1663, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.6521433591004919, |
| "grad_norm": 0.5424370742405987, |
| "learning_rate": 7.598124782301015e-06, |
| "loss": 0.2023, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.6528460997891778, |
| "grad_norm": 0.5151861951466656, |
| "learning_rate": 7.593407530307709e-06, |
| "loss": 0.193, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.6535488404778637, |
| "grad_norm": 0.49581799095048523, |
| "learning_rate": 7.588687118215485e-06, |
| "loss": 0.1928, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6542515811665496, |
| "grad_norm": 0.5036985985813803, |
| "learning_rate": 7.583963551776221e-06, |
| "loss": 0.1722, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.6549543218552354, |
| "grad_norm": 0.5174965530179328, |
| "learning_rate": 7.579236836745643e-06, |
| "loss": 0.2037, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.6556570625439213, |
| "grad_norm": 0.5098346365676857, |
| "learning_rate": 7.5745069788833094e-06, |
| "loss": 0.1822, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.6563598032326071, |
| "grad_norm": 0.506190343396925, |
| "learning_rate": 7.569773983952611e-06, |
| "loss": 0.1906, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.657062543921293, |
| "grad_norm": 0.5133813688467773, |
| "learning_rate": 7.56503785772076e-06, |
| "loss": 0.1992, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.657765284609979, |
| "grad_norm": 0.47411929585172563, |
| "learning_rate": 7.560298605958782e-06, |
| "loss": 0.1746, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.6584680252986648, |
| "grad_norm": 0.5542559408000669, |
| "learning_rate": 7.555556234441519e-06, |
| "loss": 0.2363, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.6591707659873507, |
| "grad_norm": 0.5157859159826783, |
| "learning_rate": 7.550810748947605e-06, |
| "loss": 0.2001, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.6598735066760365, |
| "grad_norm": 0.5377402338987854, |
| "learning_rate": 7.546062155259473e-06, |
| "loss": 0.2352, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.6605762473647224, |
| "grad_norm": 0.5468943850332758, |
| "learning_rate": 7.541310459163343e-06, |
| "loss": 0.2265, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6612789880534083, |
| "grad_norm": 0.5222470713922112, |
| "learning_rate": 7.536555666449214e-06, |
| "loss": 0.1834, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.6619817287420942, |
| "grad_norm": 0.5145793565709798, |
| "learning_rate": 7.5317977829108605e-06, |
| "loss": 0.1915, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.6626844694307801, |
| "grad_norm": 0.5302885963092634, |
| "learning_rate": 7.5270368143458216e-06, |
| "loss": 0.2008, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.6633872101194659, |
| "grad_norm": 0.5305335077958598, |
| "learning_rate": 7.522272766555397e-06, |
| "loss": 0.2205, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.6640899508081518, |
| "grad_norm": 0.5180420979313441, |
| "learning_rate": 7.517505645344636e-06, |
| "loss": 0.1917, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.6647926914968376, |
| "grad_norm": 0.5202273362852701, |
| "learning_rate": 7.512735456522333e-06, |
| "loss": 0.1797, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.6654954321855235, |
| "grad_norm": 0.5230777310370995, |
| "learning_rate": 7.507962205901026e-06, |
| "loss": 0.1942, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.6661981728742095, |
| "grad_norm": 0.5216832102567025, |
| "learning_rate": 7.503185899296974e-06, |
| "loss": 0.1949, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.6669009135628953, |
| "grad_norm": 0.5056140211785736, |
| "learning_rate": 7.498406542530173e-06, |
| "loss": 0.1721, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.6676036542515812, |
| "grad_norm": 0.527215934538876, |
| "learning_rate": 7.4936241414243185e-06, |
| "loss": 0.2235, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.668306394940267, |
| "grad_norm": 0.5014203639608845, |
| "learning_rate": 7.488838701806832e-06, |
| "loss": 0.1665, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.6690091356289529, |
| "grad_norm": 0.529468100126926, |
| "learning_rate": 7.484050229508826e-06, |
| "loss": 0.2088, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.6697118763176388, |
| "grad_norm": 0.5140119884370032, |
| "learning_rate": 7.479258730365117e-06, |
| "loss": 0.1776, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.6704146170063247, |
| "grad_norm": 0.5196864861391487, |
| "learning_rate": 7.474464210214202e-06, |
| "loss": 0.1813, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.6711173576950106, |
| "grad_norm": 0.5156500667877755, |
| "learning_rate": 7.469666674898264e-06, |
| "loss": 0.187, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.6718200983836964, |
| "grad_norm": 0.49244989255735266, |
| "learning_rate": 7.464866130263159e-06, |
| "loss": 0.1745, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.6725228390723823, |
| "grad_norm": 0.5259031998968214, |
| "learning_rate": 7.4600625821584095e-06, |
| "loss": 0.203, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.6732255797610681, |
| "grad_norm": 0.502301194672857, |
| "learning_rate": 7.4552560364371975e-06, |
| "loss": 0.1908, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.673928320449754, |
| "grad_norm": 0.5107269319280141, |
| "learning_rate": 7.4504464989563575e-06, |
| "loss": 0.2006, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.67463106113844, |
| "grad_norm": 0.5118724541676758, |
| "learning_rate": 7.44563397557637e-06, |
| "loss": 0.1717, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.6753338018271258, |
| "grad_norm": 0.5143114162067215, |
| "learning_rate": 7.4408184721613565e-06, |
| "loss": 0.1754, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.6760365425158117, |
| "grad_norm": 0.5083668965935106, |
| "learning_rate": 7.435999994579062e-06, |
| "loss": 0.2088, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.6767392832044975, |
| "grad_norm": 0.4881974079162172, |
| "learning_rate": 7.431178548700866e-06, |
| "loss": 0.1615, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.6774420238931834, |
| "grad_norm": 0.5113043533333093, |
| "learning_rate": 7.426354140401756e-06, |
| "loss": 0.1948, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.6781447645818693, |
| "grad_norm": 0.48680437305335184, |
| "learning_rate": 7.421526775560334e-06, |
| "loss": 0.1735, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.6788475052705552, |
| "grad_norm": 0.5405627885402856, |
| "learning_rate": 7.4166964600588035e-06, |
| "loss": 0.1786, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.6795502459592411, |
| "grad_norm": 0.4905377754918616, |
| "learning_rate": 7.411863199782962e-06, |
| "loss": 0.1836, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.6802529866479269, |
| "grad_norm": 0.5347687574929526, |
| "learning_rate": 7.4070270006221975e-06, |
| "loss": 0.1757, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.6809557273366128, |
| "grad_norm": 0.5187266886970135, |
| "learning_rate": 7.402187868469478e-06, |
| "loss": 0.1709, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.6816584680252986, |
| "grad_norm": 0.44063607377299313, |
| "learning_rate": 7.397345809221346e-06, |
| "loss": 0.13, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.6823612087139845, |
| "grad_norm": 0.5055942136497711, |
| "learning_rate": 7.392500828777909e-06, |
| "loss": 0.1911, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.6830639494026705, |
| "grad_norm": 0.49986827112331206, |
| "learning_rate": 7.387652933042835e-06, |
| "loss": 0.169, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.6837666900913563, |
| "grad_norm": 0.5301476942772901, |
| "learning_rate": 7.382802127923346e-06, |
| "loss": 0.2122, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.6844694307800422, |
| "grad_norm": 0.49785388875502984, |
| "learning_rate": 7.377948419330206e-06, |
| "loss": 0.1609, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.685172171468728, |
| "grad_norm": 0.5250134490606625, |
| "learning_rate": 7.3730918131777215e-06, |
| "loss": 0.2135, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.6858749121574139, |
| "grad_norm": 0.5034261014309968, |
| "learning_rate": 7.368232315383721e-06, |
| "loss": 0.1799, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.6865776528460998, |
| "grad_norm": 0.45840308420918235, |
| "learning_rate": 7.363369931869568e-06, |
| "loss": 0.1697, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.6872803935347856, |
| "grad_norm": 0.5209810656647423, |
| "learning_rate": 7.358504668560134e-06, |
| "loss": 0.1856, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.6879831342234716, |
| "grad_norm": 0.511510447533771, |
| "learning_rate": 7.353636531383802e-06, |
| "loss": 0.1846, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.6886858749121574, |
| "grad_norm": 0.49950476702778257, |
| "learning_rate": 7.348765526272457e-06, |
| "loss": 0.1783, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6893886156008433, |
| "grad_norm": 0.5188367362925587, |
| "learning_rate": 7.34389165916148e-06, |
| "loss": 0.1813, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.6900913562895291, |
| "grad_norm": 0.5367080926443806, |
| "learning_rate": 7.339014935989734e-06, |
| "loss": 0.1966, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.690794096978215, |
| "grad_norm": 0.5430848597714294, |
| "learning_rate": 7.334135362699571e-06, |
| "loss": 0.1984, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.691496837666901, |
| "grad_norm": 0.5279381895210722, |
| "learning_rate": 7.329252945236808e-06, |
| "loss": 0.1971, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.6921995783555868, |
| "grad_norm": 0.5465587333436775, |
| "learning_rate": 7.324367689550732e-06, |
| "loss": 0.1978, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.6929023190442727, |
| "grad_norm": 0.5494700126422907, |
| "learning_rate": 7.319479601594085e-06, |
| "loss": 0.222, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.6936050597329585, |
| "grad_norm": 0.5133356544069808, |
| "learning_rate": 7.3145886873230655e-06, |
| "loss": 0.198, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.6943078004216444, |
| "grad_norm": 0.5284662389797274, |
| "learning_rate": 7.309694952697308e-06, |
| "loss": 0.1888, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.6950105411103303, |
| "grad_norm": 0.49633826165910416, |
| "learning_rate": 7.304798403679893e-06, |
| "loss": 0.1655, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.6957132817990161, |
| "grad_norm": 0.49626789953298106, |
| "learning_rate": 7.299899046237323e-06, |
| "loss": 0.1825, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6964160224877021, |
| "grad_norm": 0.5168604222500065, |
| "learning_rate": 7.294996886339526e-06, |
| "loss": 0.1905, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.6971187631763879, |
| "grad_norm": 0.5148459672208613, |
| "learning_rate": 7.290091929959843e-06, |
| "loss": 0.1894, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.6978215038650738, |
| "grad_norm": 0.5108835468867117, |
| "learning_rate": 7.285184183075025e-06, |
| "loss": 0.1824, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.6985242445537596, |
| "grad_norm": 0.5227565151551233, |
| "learning_rate": 7.2802736516652205e-06, |
| "loss": 0.2019, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.6992269852424455, |
| "grad_norm": 0.5326973237464283, |
| "learning_rate": 7.275360341713973e-06, |
| "loss": 0.2094, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.6999297259311315, |
| "grad_norm": 0.5614700009477083, |
| "learning_rate": 7.270444259208211e-06, |
| "loss": 0.1801, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.7006324666198173, |
| "grad_norm": 0.501410180912708, |
| "learning_rate": 7.265525410138242e-06, |
| "loss": 0.1675, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.7013352073085032, |
| "grad_norm": 0.5599442703154489, |
| "learning_rate": 7.2606038004977435e-06, |
| "loss": 0.2242, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.702037947997189, |
| "grad_norm": 0.5144840354436657, |
| "learning_rate": 7.255679436283757e-06, |
| "loss": 0.1639, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.7027406886858749, |
| "grad_norm": 0.5309811915865177, |
| "learning_rate": 7.250752323496679e-06, |
| "loss": 0.1879, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7027406886858749, |
| "eval_loss": 0.18907217681407928, |
| "eval_runtime": 10.8577, |
| "eval_samples_per_second": 21.183, |
| "eval_steps_per_second": 5.342, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7034434293745608, |
| "grad_norm": 0.48234786437812116, |
| "learning_rate": 7.24582246814026e-06, |
| "loss": 0.1671, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.7041461700632466, |
| "grad_norm": 0.49996231689533716, |
| "learning_rate": 7.240889876221589e-06, |
| "loss": 0.1752, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.7048489107519326, |
| "grad_norm": 0.5525556422783636, |
| "learning_rate": 7.2359545537510875e-06, |
| "loss": 0.2075, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.7055516514406184, |
| "grad_norm": 0.5944880911698127, |
| "learning_rate": 7.23101650674251e-06, |
| "loss": 0.2378, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.7062543921293043, |
| "grad_norm": 0.5578191512662339, |
| "learning_rate": 7.226075741212923e-06, |
| "loss": 0.2067, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.7069571328179901, |
| "grad_norm": 0.5061065739706729, |
| "learning_rate": 7.221132263182713e-06, |
| "loss": 0.1811, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.707659873506676, |
| "grad_norm": 0.542622669313164, |
| "learning_rate": 7.216186078675569e-06, |
| "loss": 0.216, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.708362614195362, |
| "grad_norm": 0.5030138850034241, |
| "learning_rate": 7.211237193718476e-06, |
| "loss": 0.199, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.7090653548840478, |
| "grad_norm": 0.4916488160349469, |
| "learning_rate": 7.206285614341711e-06, |
| "loss": 0.1784, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.7097680955727337, |
| "grad_norm": 0.5035817623689635, |
| "learning_rate": 7.201331346578836e-06, |
| "loss": 0.1619, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7104708362614195, |
| "grad_norm": 0.4952692089496329, |
| "learning_rate": 7.196374396466686e-06, |
| "loss": 0.1748, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.7111735769501054, |
| "grad_norm": 0.5311768647808931, |
| "learning_rate": 7.191414770045364e-06, |
| "loss": 0.1725, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.7118763176387913, |
| "grad_norm": 0.5397785974634004, |
| "learning_rate": 7.186452473358238e-06, |
| "loss": 0.1884, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.7125790583274771, |
| "grad_norm": 0.5418017219505858, |
| "learning_rate": 7.181487512451927e-06, |
| "loss": 0.2033, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.7132817990161631, |
| "grad_norm": 0.5337125176776485, |
| "learning_rate": 7.176519893376296e-06, |
| "loss": 0.1905, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.7139845397048489, |
| "grad_norm": 0.5233510689055844, |
| "learning_rate": 7.17154962218445e-06, |
| "loss": 0.2035, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.7146872803935348, |
| "grad_norm": 0.4926954442413844, |
| "learning_rate": 7.1665767049327284e-06, |
| "loss": 0.1592, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.7153900210822206, |
| "grad_norm": 0.48443312603006383, |
| "learning_rate": 7.161601147680688e-06, |
| "loss": 0.153, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.7160927617709065, |
| "grad_norm": 0.49655894092848546, |
| "learning_rate": 7.156622956491107e-06, |
| "loss": 0.1628, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.7167955024595924, |
| "grad_norm": 0.5256199048045755, |
| "learning_rate": 7.1516421374299735e-06, |
| "loss": 0.2129, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7174982431482783, |
| "grad_norm": 0.5100591937912284, |
| "learning_rate": 7.146658696566478e-06, |
| "loss": 0.1711, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.7182009838369642, |
| "grad_norm": 0.49456212678014866, |
| "learning_rate": 7.141672639973e-06, |
| "loss": 0.1812, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.71890372452565, |
| "grad_norm": 0.5619739635445484, |
| "learning_rate": 7.136683973725116e-06, |
| "loss": 0.2216, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.7196064652143359, |
| "grad_norm": 0.5412580193904376, |
| "learning_rate": 7.1316927039015736e-06, |
| "loss": 0.2073, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.7203092059030218, |
| "grad_norm": 0.49843680543795715, |
| "learning_rate": 7.126698836584296e-06, |
| "loss": 0.1666, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.7210119465917076, |
| "grad_norm": 0.49802440745701665, |
| "learning_rate": 7.121702377858375e-06, |
| "loss": 0.1772, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.7217146872803936, |
| "grad_norm": 0.4984117313770376, |
| "learning_rate": 7.116703333812055e-06, |
| "loss": 0.1877, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.7224174279690794, |
| "grad_norm": 0.501178356120356, |
| "learning_rate": 7.111701710536732e-06, |
| "loss": 0.1696, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.7231201686577653, |
| "grad_norm": 0.5135210124439293, |
| "learning_rate": 7.106697514126947e-06, |
| "loss": 0.1806, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.7238229093464511, |
| "grad_norm": 0.5281606401354669, |
| "learning_rate": 7.101690750680373e-06, |
| "loss": 0.1833, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.724525650035137, |
| "grad_norm": 0.5126826359434293, |
| "learning_rate": 7.096681426297814e-06, |
| "loss": 0.1913, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.725228390723823, |
| "grad_norm": 0.48162715829056846, |
| "learning_rate": 7.091669547083193e-06, |
| "loss": 0.1763, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.7259311314125088, |
| "grad_norm": 0.5208495206878236, |
| "learning_rate": 7.0866551191435464e-06, |
| "loss": 0.1921, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.7266338721011947, |
| "grad_norm": 0.5419110976510364, |
| "learning_rate": 7.081638148589015e-06, |
| "loss": 0.1838, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.7273366127898805, |
| "grad_norm": 0.530400846773951, |
| "learning_rate": 7.07661864153284e-06, |
| "loss": 0.1921, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.7280393534785664, |
| "grad_norm": 0.5244327142014001, |
| "learning_rate": 7.071596604091353e-06, |
| "loss": 0.1915, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.7287420941672523, |
| "grad_norm": 0.5120455453570685, |
| "learning_rate": 7.066572042383967e-06, |
| "loss": 0.1948, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.7294448348559381, |
| "grad_norm": 0.49788820003147394, |
| "learning_rate": 7.061544962533174e-06, |
| "loss": 0.1699, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.7301475755446241, |
| "grad_norm": 0.5101121707543748, |
| "learning_rate": 7.056515370664529e-06, |
| "loss": 0.1649, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.7308503162333099, |
| "grad_norm": 0.5015108857525926, |
| "learning_rate": 7.051483272906656e-06, |
| "loss": 0.1601, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.7315530569219958, |
| "grad_norm": 0.5208544509411792, |
| "learning_rate": 7.0464486753912255e-06, |
| "loss": 0.1897, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.7322557976106817, |
| "grad_norm": 0.513293497339928, |
| "learning_rate": 7.041411584252956e-06, |
| "loss": 0.1799, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.7329585382993675, |
| "grad_norm": 0.4924607506595166, |
| "learning_rate": 7.036372005629606e-06, |
| "loss": 0.1629, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.7336612789880534, |
| "grad_norm": 0.5392223533203832, |
| "learning_rate": 7.0313299456619635e-06, |
| "loss": 0.2173, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.7343640196767393, |
| "grad_norm": 0.5506431334123577, |
| "learning_rate": 7.026285410493839e-06, |
| "loss": 0.1823, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.7350667603654252, |
| "grad_norm": 0.5461836214581263, |
| "learning_rate": 7.021238406272064e-06, |
| "loss": 0.1703, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.735769501054111, |
| "grad_norm": 0.523312140712101, |
| "learning_rate": 7.016188939146471e-06, |
| "loss": 0.1802, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.7364722417427969, |
| "grad_norm": 0.5059982446931061, |
| "learning_rate": 7.011137015269901e-06, |
| "loss": 0.1938, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.7371749824314828, |
| "grad_norm": 0.600343031316344, |
| "learning_rate": 7.006082640798183e-06, |
| "loss": 0.2044, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.7378777231201686, |
| "grad_norm": 0.5410248828748458, |
| "learning_rate": 7.0010258218901375e-06, |
| "loss": 0.1974, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7385804638088546, |
| "grad_norm": 0.5211734683573049, |
| "learning_rate": 6.995966564707556e-06, |
| "loss": 0.1835, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.7392832044975404, |
| "grad_norm": 0.48255922615953734, |
| "learning_rate": 6.99090487541521e-06, |
| "loss": 0.1691, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.7399859451862263, |
| "grad_norm": 0.5417704830229917, |
| "learning_rate": 6.985840760180824e-06, |
| "loss": 0.1936, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.7406886858749122, |
| "grad_norm": 0.4962640922424155, |
| "learning_rate": 6.980774225175092e-06, |
| "loss": 0.1684, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.741391426563598, |
| "grad_norm": 0.5198225259061435, |
| "learning_rate": 6.975705276571645e-06, |
| "loss": 0.1757, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.7420941672522839, |
| "grad_norm": 0.46801334813073625, |
| "learning_rate": 6.970633920547059e-06, |
| "loss": 0.1738, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.7427969079409698, |
| "grad_norm": 0.5252907799329338, |
| "learning_rate": 6.965560163280844e-06, |
| "loss": 0.1995, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.7434996486296557, |
| "grad_norm": 0.5081424969197407, |
| "learning_rate": 6.960484010955436e-06, |
| "loss": 0.1875, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.7442023893183415, |
| "grad_norm": 0.4818396772545229, |
| "learning_rate": 6.955405469756189e-06, |
| "loss": 0.1535, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.7449051300070274, |
| "grad_norm": 0.534075500585145, |
| "learning_rate": 6.950324545871367e-06, |
| "loss": 0.2258, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.7456078706957133, |
| "grad_norm": 0.5153747390981624, |
| "learning_rate": 6.945241245492139e-06, |
| "loss": 0.1804, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.7463106113843991, |
| "grad_norm": 0.48028126074749794, |
| "learning_rate": 6.940155574812571e-06, |
| "loss": 0.1666, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.7470133520730851, |
| "grad_norm": 0.5221059933317176, |
| "learning_rate": 6.935067540029608e-06, |
| "loss": 0.2155, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.7477160927617709, |
| "grad_norm": 0.5128145113353367, |
| "learning_rate": 6.929977147343092e-06, |
| "loss": 0.1907, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.7484188334504568, |
| "grad_norm": 0.5179655111085886, |
| "learning_rate": 6.924884402955722e-06, |
| "loss": 0.2095, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.7491215741391427, |
| "grad_norm": 0.5374983900631398, |
| "learning_rate": 6.919789313073072e-06, |
| "loss": 0.2189, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.7498243148278285, |
| "grad_norm": 0.5066549150752421, |
| "learning_rate": 6.914691883903573e-06, |
| "loss": 0.1855, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.7505270555165144, |
| "grad_norm": 0.4962869754285091, |
| "learning_rate": 6.909592121658504e-06, |
| "loss": 0.1562, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.7512297962052003, |
| "grad_norm": 0.5337154685194031, |
| "learning_rate": 6.904490032551987e-06, |
| "loss": 0.1911, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.7519325368938862, |
| "grad_norm": 0.5169733664567905, |
| "learning_rate": 6.899385622800981e-06, |
| "loss": 0.1881, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.752635277582572, |
| "grad_norm": 0.4792931662791021, |
| "learning_rate": 6.894278898625272e-06, |
| "loss": 0.1688, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.7533380182712579, |
| "grad_norm": 0.48103340714870696, |
| "learning_rate": 6.889169866247466e-06, |
| "loss": 0.1734, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.7540407589599438, |
| "grad_norm": 0.4632594693657935, |
| "learning_rate": 6.8840585318929806e-06, |
| "loss": 0.1507, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.7547434996486296, |
| "grad_norm": 0.5078797250921595, |
| "learning_rate": 6.8789449017900425e-06, |
| "loss": 0.1751, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.7554462403373156, |
| "grad_norm": 0.5330746582913394, |
| "learning_rate": 6.873828982169669e-06, |
| "loss": 0.1866, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.7561489810260014, |
| "grad_norm": 0.5006495318019759, |
| "learning_rate": 6.868710779265675e-06, |
| "loss": 0.1876, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.7568517217146873, |
| "grad_norm": 0.539588386937122, |
| "learning_rate": 6.8635902993146485e-06, |
| "loss": 0.1996, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.7575544624033732, |
| "grad_norm": 0.5154656321020359, |
| "learning_rate": 6.858467548555963e-06, |
| "loss": 0.1724, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.758257203092059, |
| "grad_norm": 0.5531641648558123, |
| "learning_rate": 6.853342533231748e-06, |
| "loss": 0.1842, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.7589599437807449, |
| "grad_norm": 0.5651429168073647, |
| "learning_rate": 6.848215259586901e-06, |
| "loss": 0.2352, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.7596626844694307, |
| "grad_norm": 0.48594498448781775, |
| "learning_rate": 6.8430857338690655e-06, |
| "loss": 0.1544, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.7603654251581167, |
| "grad_norm": 0.504044589169003, |
| "learning_rate": 6.837953962328635e-06, |
| "loss": 0.158, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.7610681658468025, |
| "grad_norm": 0.525608593831342, |
| "learning_rate": 6.832819951218732e-06, |
| "loss": 0.1864, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.7617709065354884, |
| "grad_norm": 0.5467297549015869, |
| "learning_rate": 6.827683706795216e-06, |
| "loss": 0.2165, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.7624736472241743, |
| "grad_norm": 0.5335682743668645, |
| "learning_rate": 6.82254523531666e-06, |
| "loss": 0.1772, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.7631763879128601, |
| "grad_norm": 0.4964276321848615, |
| "learning_rate": 6.817404543044358e-06, |
| "loss": 0.1783, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.763879128601546, |
| "grad_norm": 0.5265906150660045, |
| "learning_rate": 6.812261636242303e-06, |
| "loss": 0.188, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.7645818692902319, |
| "grad_norm": 0.5139859252183324, |
| "learning_rate": 6.807116521177195e-06, |
| "loss": 0.1774, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.7652846099789178, |
| "grad_norm": 0.5284736356104716, |
| "learning_rate": 6.801969204118415e-06, |
| "loss": 0.1828, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.7659873506676037, |
| "grad_norm": 0.4576123884410576, |
| "learning_rate": 6.796819691338035e-06, |
| "loss": 0.1479, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.7666900913562895, |
| "grad_norm": 0.5073988935393058, |
| "learning_rate": 6.7916679891108e-06, |
| "loss": 0.1765, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.7673928320449754, |
| "grad_norm": 0.4842579017253722, |
| "learning_rate": 6.786514103714119e-06, |
| "loss": 0.186, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.7680955727336612, |
| "grad_norm": 0.49917544829379384, |
| "learning_rate": 6.781358041428068e-06, |
| "loss": 0.1827, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.7687983134223472, |
| "grad_norm": 0.5140787255287416, |
| "learning_rate": 6.776199808535371e-06, |
| "loss": 0.1855, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.769501054111033, |
| "grad_norm": 0.4919847504130854, |
| "learning_rate": 6.771039411321397e-06, |
| "loss": 0.169, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.7702037947997189, |
| "grad_norm": 0.47780874627390346, |
| "learning_rate": 6.765876856074156e-06, |
| "loss": 0.152, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.7709065354884048, |
| "grad_norm": 0.5202858563610114, |
| "learning_rate": 6.760712149084282e-06, |
| "loss": 0.1925, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.7716092761770906, |
| "grad_norm": 0.5295724117460601, |
| "learning_rate": 6.755545296645037e-06, |
| "loss": 0.2081, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.7723120168657766, |
| "grad_norm": 0.5039771408196154, |
| "learning_rate": 6.7503763050522904e-06, |
| "loss": 0.1691, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.7730147575544624, |
| "grad_norm": 0.49655979674457273, |
| "learning_rate": 6.745205180604526e-06, |
| "loss": 0.1794, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7737174982431483, |
| "grad_norm": 0.5109049881981059, |
| "learning_rate": 6.74003192960282e-06, |
| "loss": 0.1712, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.7744202389318342, |
| "grad_norm": 0.5048290654661958, |
| "learning_rate": 6.734856558350842e-06, |
| "loss": 0.1715, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.77512297962052, |
| "grad_norm": 0.5387534022945553, |
| "learning_rate": 6.729679073154845e-06, |
| "loss": 0.1806, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.7758257203092059, |
| "grad_norm": 0.5153193952791956, |
| "learning_rate": 6.724499480323662e-06, |
| "loss": 0.1828, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.7765284609978917, |
| "grad_norm": 0.49014211897656185, |
| "learning_rate": 6.719317786168687e-06, |
| "loss": 0.1686, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.7772312016865777, |
| "grad_norm": 0.5228845198917895, |
| "learning_rate": 6.714133997003878e-06, |
| "loss": 0.1941, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.7779339423752635, |
| "grad_norm": 0.5136766056514168, |
| "learning_rate": 6.708948119145746e-06, |
| "loss": 0.1595, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.7786366830639494, |
| "grad_norm": 0.5306146622363762, |
| "learning_rate": 6.703760158913349e-06, |
| "loss": 0.1978, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.7793394237526353, |
| "grad_norm": 0.49993379289456086, |
| "learning_rate": 6.698570122628276e-06, |
| "loss": 0.173, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.7800421644413211, |
| "grad_norm": 0.49442203874511687, |
| "learning_rate": 6.693378016614657e-06, |
| "loss": 0.1858, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.780744905130007, |
| "grad_norm": 0.5137438587782226, |
| "learning_rate": 6.6881838471991274e-06, |
| "loss": 0.2078, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.7814476458186929, |
| "grad_norm": 0.47131004501273516, |
| "learning_rate": 6.682987620710856e-06, |
| "loss": 0.1629, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.7821503865073788, |
| "grad_norm": 0.46698415621575573, |
| "learning_rate": 6.677789343481501e-06, |
| "loss": 0.165, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.7828531271960647, |
| "grad_norm": 0.48925746166723594, |
| "learning_rate": 6.6725890218452315e-06, |
| "loss": 0.1571, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.7835558678847505, |
| "grad_norm": 0.5319566631486711, |
| "learning_rate": 6.667386662138702e-06, |
| "loss": 0.193, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.7842586085734364, |
| "grad_norm": 0.5027355928606957, |
| "learning_rate": 6.662182270701051e-06, |
| "loss": 0.1678, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.7849613492621222, |
| "grad_norm": 0.5446992180433403, |
| "learning_rate": 6.656975853873895e-06, |
| "loss": 0.206, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.7856640899508082, |
| "grad_norm": 0.5374277406573096, |
| "learning_rate": 6.651767418001314e-06, |
| "loss": 0.2242, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.786366830639494, |
| "grad_norm": 0.518162302839699, |
| "learning_rate": 6.646556969429854e-06, |
| "loss": 0.1744, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.7870695713281799, |
| "grad_norm": 0.5170789776181299, |
| "learning_rate": 6.64134451450851e-06, |
| "loss": 0.1838, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.7877723120168658, |
| "grad_norm": 0.4798167702159322, |
| "learning_rate": 6.636130059588719e-06, |
| "loss": 0.1585, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.7884750527055516, |
| "grad_norm": 0.5398354189241774, |
| "learning_rate": 6.630913611024365e-06, |
| "loss": 0.1843, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.7891777933942375, |
| "grad_norm": 0.5365706374888599, |
| "learning_rate": 6.625695175171747e-06, |
| "loss": 0.1806, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.7898805340829234, |
| "grad_norm": 0.5209103560498805, |
| "learning_rate": 6.6204747583896e-06, |
| "loss": 0.1985, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.7905832747716093, |
| "grad_norm": 0.5029116303455078, |
| "learning_rate": 6.61525236703906e-06, |
| "loss": 0.1759, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.7912860154602952, |
| "grad_norm": 0.541377972488622, |
| "learning_rate": 6.610028007483679e-06, |
| "loss": 0.2066, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.791988756148981, |
| "grad_norm": 0.5297123690875152, |
| "learning_rate": 6.604801686089403e-06, |
| "loss": 0.1832, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.7926914968376669, |
| "grad_norm": 0.5278360942405322, |
| "learning_rate": 6.599573409224567e-06, |
| "loss": 0.1825, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.7933942375263527, |
| "grad_norm": 0.5021558885331302, |
| "learning_rate": 6.59434318325989e-06, |
| "loss": 0.1702, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.7940969782150387, |
| "grad_norm": 0.5068400467726666, |
| "learning_rate": 6.58911101456847e-06, |
| "loss": 0.1809, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.7947997189037245, |
| "grad_norm": 0.49468722085985894, |
| "learning_rate": 6.583876909525766e-06, |
| "loss": 0.1794, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.7955024595924104, |
| "grad_norm": 0.5007669274861396, |
| "learning_rate": 6.578640874509599e-06, |
| "loss": 0.1791, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.7962052002810963, |
| "grad_norm": 0.5409971215006368, |
| "learning_rate": 6.573402915900145e-06, |
| "loss": 0.2237, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.7969079409697821, |
| "grad_norm": 0.5399705409644362, |
| "learning_rate": 6.568163040079918e-06, |
| "loss": 0.1798, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.797610681658468, |
| "grad_norm": 0.5320796227271901, |
| "learning_rate": 6.562921253433771e-06, |
| "loss": 0.1974, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.7983134223471539, |
| "grad_norm": 0.5048052724879288, |
| "learning_rate": 6.557677562348887e-06, |
| "loss": 0.1686, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.7990161630358398, |
| "grad_norm": 0.47729262726676874, |
| "learning_rate": 6.552431973214767e-06, |
| "loss": 0.1699, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.7997189037245257, |
| "grad_norm": 0.4985141252624496, |
| "learning_rate": 6.547184492423227e-06, |
| "loss": 0.1876, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.8004216444132115, |
| "grad_norm": 0.4782702198502859, |
| "learning_rate": 6.541935126368384e-06, |
| "loss": 0.1498, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.8011243851018974, |
| "grad_norm": 0.540657184084682, |
| "learning_rate": 6.536683881446658e-06, |
| "loss": 0.1824, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.8018271257905832, |
| "grad_norm": 0.5138414346028792, |
| "learning_rate": 6.531430764056755e-06, |
| "loss": 0.1891, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.8025298664792692, |
| "grad_norm": 0.488009202375122, |
| "learning_rate": 6.5261757805996605e-06, |
| "loss": 0.1405, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.803232607167955, |
| "grad_norm": 0.5012186195889693, |
| "learning_rate": 6.520918937478639e-06, |
| "loss": 0.165, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.8039353478566409, |
| "grad_norm": 0.5127172901148541, |
| "learning_rate": 6.515660241099217e-06, |
| "loss": 0.1895, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.8046380885453268, |
| "grad_norm": 0.591375168506247, |
| "learning_rate": 6.51039969786918e-06, |
| "loss": 0.1909, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.8053408292340126, |
| "grad_norm": 0.5402971286129592, |
| "learning_rate": 6.5051373141985685e-06, |
| "loss": 0.2039, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.8060435699226985, |
| "grad_norm": 0.4986929896241223, |
| "learning_rate": 6.499873096499656e-06, |
| "loss": 0.1765, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.8067463106113844, |
| "grad_norm": 0.5601401970811486, |
| "learning_rate": 6.49460705118696e-06, |
| "loss": 0.237, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.8074490513000703, |
| "grad_norm": 0.5234632851450135, |
| "learning_rate": 6.489339184677221e-06, |
| "loss": 0.1865, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.8081517919887562, |
| "grad_norm": 0.5095651432213575, |
| "learning_rate": 6.484069503389398e-06, |
| "loss": 0.1787, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.808854532677442, |
| "grad_norm": 0.47427988030503876, |
| "learning_rate": 6.478798013744662e-06, |
| "loss": 0.1558, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.8095572733661279, |
| "grad_norm": 0.5283869622250998, |
| "learning_rate": 6.473524722166391e-06, |
| "loss": 0.2002, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.8102600140548137, |
| "grad_norm": 0.49727119184651486, |
| "learning_rate": 6.468249635080153e-06, |
| "loss": 0.1984, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.8109627547434997, |
| "grad_norm": 0.5464292898742243, |
| "learning_rate": 6.462972758913705e-06, |
| "loss": 0.2046, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.8116654954321855, |
| "grad_norm": 0.5250022392156026, |
| "learning_rate": 6.457694100096988e-06, |
| "loss": 0.1839, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.8123682361208714, |
| "grad_norm": 0.5165840127290805, |
| "learning_rate": 6.452413665062111e-06, |
| "loss": 0.2008, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.8130709768095573, |
| "grad_norm": 0.5481547327891854, |
| "learning_rate": 6.44713146024335e-06, |
| "loss": 0.1954, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.8137737174982431, |
| "grad_norm": 0.4874887769913095, |
| "learning_rate": 6.4418474920771365e-06, |
| "loss": 0.1685, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.814476458186929, |
| "grad_norm": 0.5039495230620578, |
| "learning_rate": 6.436561767002048e-06, |
| "loss": 0.1783, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.8151791988756149, |
| "grad_norm": 0.5199968656487806, |
| "learning_rate": 6.431274291458811e-06, |
| "loss": 0.1661, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.8158819395643008, |
| "grad_norm": 0.5027199873157974, |
| "learning_rate": 6.425985071890273e-06, |
| "loss": 0.1876, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.8165846802529867, |
| "grad_norm": 0.5249959950530578, |
| "learning_rate": 6.420694114741417e-06, |
| "loss": 0.209, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.8172874209416725, |
| "grad_norm": 0.484576963911013, |
| "learning_rate": 6.415401426459338e-06, |
| "loss": 0.1769, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.8179901616303584, |
| "grad_norm": 0.5358651554370841, |
| "learning_rate": 6.410107013493241e-06, |
| "loss": 0.1907, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.8186929023190442, |
| "grad_norm": 0.48281374879586475, |
| "learning_rate": 6.404810882294436e-06, |
| "loss": 0.15, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.8193956430077302, |
| "grad_norm": 0.47211990315454627, |
| "learning_rate": 6.399513039316319e-06, |
| "loss": 0.156, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.8200983836964161, |
| "grad_norm": 0.46372230344661597, |
| "learning_rate": 6.3942134910143805e-06, |
| "loss": 0.1604, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.8208011243851019, |
| "grad_norm": 0.5409494486579302, |
| "learning_rate": 6.388912243846186e-06, |
| "loss": 0.2035, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.8215038650737878, |
| "grad_norm": 0.5065033400554584, |
| "learning_rate": 6.3836093042713665e-06, |
| "loss": 0.1895, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.8222066057624736, |
| "grad_norm": 0.5316271171416881, |
| "learning_rate": 6.378304678751624e-06, |
| "loss": 0.1932, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.8229093464511595, |
| "grad_norm": 0.4948881303499293, |
| "learning_rate": 6.372998373750703e-06, |
| "loss": 0.1803, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.8236120871398454, |
| "grad_norm": 0.5321214151082184, |
| "learning_rate": 6.367690395734407e-06, |
| "loss": 0.1972, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.8243148278285313, |
| "grad_norm": 0.5111253171146781, |
| "learning_rate": 6.362380751170569e-06, |
| "loss": 0.177, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.8250175685172172, |
| "grad_norm": 0.49781005241117016, |
| "learning_rate": 6.35706944652906e-06, |
| "loss": 0.1733, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.825720309205903, |
| "grad_norm": 0.539119114081232, |
| "learning_rate": 6.351756488281766e-06, |
| "loss": 0.2035, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.8264230498945889, |
| "grad_norm": 0.5064581464166177, |
| "learning_rate": 6.346441882902594e-06, |
| "loss": 0.1712, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.8271257905832747, |
| "grad_norm": 0.5210623273152416, |
| "learning_rate": 6.341125636867455e-06, |
| "loss": 0.194, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.8278285312719607, |
| "grad_norm": 0.5130472931428885, |
| "learning_rate": 6.335807756654262e-06, |
| "loss": 0.1798, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.8285312719606466, |
| "grad_norm": 0.4972135153196797, |
| "learning_rate": 6.330488248742914e-06, |
| "loss": 0.1773, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.8292340126493324, |
| "grad_norm": 0.5066236011744614, |
| "learning_rate": 6.325167119615299e-06, |
| "loss": 0.1831, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.8299367533380183, |
| "grad_norm": 0.47094120421480634, |
| "learning_rate": 6.319844375755275e-06, |
| "loss": 0.163, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.8306394940267041, |
| "grad_norm": 0.5147685974539552, |
| "learning_rate": 6.314520023648678e-06, |
| "loss": 0.1939, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.83134223471539, |
| "grad_norm": 0.5042441050844522, |
| "learning_rate": 6.309194069783288e-06, |
| "loss": 0.1778, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.8320449754040758, |
| "grad_norm": 0.4675752799091352, |
| "learning_rate": 6.303866520648851e-06, |
| "loss": 0.158, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.8327477160927618, |
| "grad_norm": 0.4998544292032765, |
| "learning_rate": 6.298537382737048e-06, |
| "loss": 0.1636, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.8334504567814477, |
| "grad_norm": 0.4932259110586246, |
| "learning_rate": 6.2932066625415e-06, |
| "loss": 0.1776, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.8341531974701335, |
| "grad_norm": 0.5356101565107714, |
| "learning_rate": 6.287874366557756e-06, |
| "loss": 0.1937, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.8348559381588194, |
| "grad_norm": 0.4798323523373808, |
| "learning_rate": 6.2825405012832815e-06, |
| "loss": 0.169, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.8355586788475052, |
| "grad_norm": 0.5140851871320405, |
| "learning_rate": 6.2772050732174595e-06, |
| "loss": 0.1828, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.8362614195361912, |
| "grad_norm": 0.47825587417926313, |
| "learning_rate": 6.2718680888615734e-06, |
| "loss": 0.1613, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.8369641602248771, |
| "grad_norm": 0.4677851677300216, |
| "learning_rate": 6.266529554718804e-06, |
| "loss": 0.1616, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.8376669009135629, |
| "grad_norm": 0.49528999706107246, |
| "learning_rate": 6.261189477294221e-06, |
| "loss": 0.1728, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.8383696416022488, |
| "grad_norm": 0.5309360153372439, |
| "learning_rate": 6.255847863094775e-06, |
| "loss": 0.2056, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.8390723822909346, |
| "grad_norm": 0.5481499219892606, |
| "learning_rate": 6.250504718629288e-06, |
| "loss": 0.1931, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.8397751229796205, |
| "grad_norm": 0.47889263637156904, |
| "learning_rate": 6.245160050408446e-06, |
| "loss": 0.1582, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.8404778636683063, |
| "grad_norm": 0.4750288921863997, |
| "learning_rate": 6.2398138649447935e-06, |
| "loss": 0.156, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.8411806043569923, |
| "grad_norm": 0.5280516814060533, |
| "learning_rate": 6.234466168752724e-06, |
| "loss": 0.2062, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.8418833450456782, |
| "grad_norm": 0.5281778278031862, |
| "learning_rate": 6.22911696834847e-06, |
| "loss": 0.162, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.842586085734364, |
| "grad_norm": 0.543676618680521, |
| "learning_rate": 6.223766270250099e-06, |
| "loss": 0.2266, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.8432888264230499, |
| "grad_norm": 0.5327014887264019, |
| "learning_rate": 6.218414080977502e-06, |
| "loss": 0.2045, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.8439915671117357, |
| "grad_norm": 0.4655347457613403, |
| "learning_rate": 6.2130604070523855e-06, |
| "loss": 0.1555, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.8446943078004217, |
| "grad_norm": 0.49191482162616434, |
| "learning_rate": 6.207705254998269e-06, |
| "loss": 0.1725, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.8453970484891076, |
| "grad_norm": 0.5130123761446342, |
| "learning_rate": 6.2023486313404715e-06, |
| "loss": 0.1863, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.8460997891777934, |
| "grad_norm": 0.4890407398904942, |
| "learning_rate": 6.196990542606102e-06, |
| "loss": 0.1732, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.8468025298664793, |
| "grad_norm": 0.5205797335179796, |
| "learning_rate": 6.19163099532406e-06, |
| "loss": 0.1907, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.8475052705551651, |
| "grad_norm": 0.5278606523762948, |
| "learning_rate": 6.186269996025018e-06, |
| "loss": 0.2092, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.848208011243851, |
| "grad_norm": 0.5359697801822066, |
| "learning_rate": 6.18090755124142e-06, |
| "loss": 0.1889, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.8489107519325368, |
| "grad_norm": 0.5115121044938924, |
| "learning_rate": 6.175543667507472e-06, |
| "loss": 0.1857, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.8496134926212228, |
| "grad_norm": 0.5600510635643645, |
| "learning_rate": 6.17017835135913e-06, |
| "loss": 0.2388, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.8503162333099087, |
| "grad_norm": 0.4999111546944338, |
| "learning_rate": 6.1648116093340985e-06, |
| "loss": 0.1863, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.8510189739985945, |
| "grad_norm": 0.48107596282402415, |
| "learning_rate": 6.15944344797182e-06, |
| "loss": 0.1781, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.8517217146872804, |
| "grad_norm": 0.5497594325694529, |
| "learning_rate": 6.154073873813463e-06, |
| "loss": 0.2167, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.8524244553759662, |
| "grad_norm": 0.5328284668818077, |
| "learning_rate": 6.148702893401921e-06, |
| "loss": 0.1937, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.8531271960646521, |
| "grad_norm": 0.5136318451918118, |
| "learning_rate": 6.143330513281799e-06, |
| "loss": 0.1721, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.8538299367533381, |
| "grad_norm": 0.4778719513564468, |
| "learning_rate": 6.137956739999408e-06, |
| "loss": 0.1486, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.8545326774420239, |
| "grad_norm": 0.5172461977678994, |
| "learning_rate": 6.132581580102757e-06, |
| "loss": 0.196, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.8552354181307098, |
| "grad_norm": 0.5052295881766661, |
| "learning_rate": 6.127205040141544e-06, |
| "loss": 0.1827, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.8559381588193956, |
| "grad_norm": 0.5585326496987986, |
| "learning_rate": 6.121827126667149e-06, |
| "loss": 0.2012, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.8566408995080815, |
| "grad_norm": 0.48772318069780995, |
| "learning_rate": 6.116447846232626e-06, |
| "loss": 0.1624, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.8573436401967673, |
| "grad_norm": 0.5620286701814415, |
| "learning_rate": 6.111067205392693e-06, |
| "loss": 0.2114, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.8580463808854533, |
| "grad_norm": 0.4768501973943461, |
| "learning_rate": 6.105685210703728e-06, |
| "loss": 0.169, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.8587491215741392, |
| "grad_norm": 0.4879964365664385, |
| "learning_rate": 6.100301868723758e-06, |
| "loss": 0.1721, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.859451862262825, |
| "grad_norm": 0.49211477947291504, |
| "learning_rate": 6.0949171860124516e-06, |
| "loss": 0.1589, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.8601546029515109, |
| "grad_norm": 0.48242821081279824, |
| "learning_rate": 6.089531169131109e-06, |
| "loss": 0.1589, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.8608573436401967, |
| "grad_norm": 0.5446078856755274, |
| "learning_rate": 6.08414382464266e-06, |
| "loss": 0.2006, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.8615600843288826, |
| "grad_norm": 0.5147593100041957, |
| "learning_rate": 6.078755159111648e-06, |
| "loss": 0.1864, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.8622628250175686, |
| "grad_norm": 0.5307757224318073, |
| "learning_rate": 6.073365179104229e-06, |
| "loss": 0.2122, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.8629655657062544, |
| "grad_norm": 0.5029892450213788, |
| "learning_rate": 6.067973891188161e-06, |
| "loss": 0.1809, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.8636683063949403, |
| "grad_norm": 0.5132171947385449, |
| "learning_rate": 6.0625813019327925e-06, |
| "loss": 0.1835, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.8643710470836261, |
| "grad_norm": 0.5101091466907139, |
| "learning_rate": 6.057187417909061e-06, |
| "loss": 0.188, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.865073787772312, |
| "grad_norm": 0.46211949086790133, |
| "learning_rate": 6.05179224568948e-06, |
| "loss": 0.1589, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.8657765284609978, |
| "grad_norm": 0.47046382666591835, |
| "learning_rate": 6.046395791848133e-06, |
| "loss": 0.1565, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.8664792691496838, |
| "grad_norm": 0.49826244659146274, |
| "learning_rate": 6.040998062960666e-06, |
| "loss": 0.1782, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.8671820098383697, |
| "grad_norm": 0.5193486118705439, |
| "learning_rate": 6.035599065604275e-06, |
| "loss": 0.1852, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.8678847505270555, |
| "grad_norm": 0.49205156065630956, |
| "learning_rate": 6.0301988063577075e-06, |
| "loss": 0.1619, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.8685874912157414, |
| "grad_norm": 0.5119883525242078, |
| "learning_rate": 6.024797291801247e-06, |
| "loss": 0.1968, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.8692902319044272, |
| "grad_norm": 0.5127786450316105, |
| "learning_rate": 6.019394528516702e-06, |
| "loss": 0.1879, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.8699929725931131, |
| "grad_norm": 0.4656441543907257, |
| "learning_rate": 6.013990523087409e-06, |
| "loss": 0.1613, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.8706957132817991, |
| "grad_norm": 0.5198896911626247, |
| "learning_rate": 6.008585282098212e-06, |
| "loss": 0.1766, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.8713984539704849, |
| "grad_norm": 0.4911965550275012, |
| "learning_rate": 6.003178812135464e-06, |
| "loss": 0.1843, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.8721011946591708, |
| "grad_norm": 0.5585501627083996, |
| "learning_rate": 5.997771119787017e-06, |
| "loss": 0.1811, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.8728039353478566, |
| "grad_norm": 0.5026455634926816, |
| "learning_rate": 5.99236221164221e-06, |
| "loss": 0.1928, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.8735066760365425, |
| "grad_norm": 0.5356055151967403, |
| "learning_rate": 5.986952094291861e-06, |
| "loss": 0.1984, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.8742094167252283, |
| "grad_norm": 0.5068822428289327, |
| "learning_rate": 5.9815407743282694e-06, |
| "loss": 0.1906, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.8749121574139143, |
| "grad_norm": 0.5115140958509385, |
| "learning_rate": 5.9761282583451906e-06, |
| "loss": 0.177, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.8756148981026002, |
| "grad_norm": 0.48290458027970495, |
| "learning_rate": 5.970714552937843e-06, |
| "loss": 0.1514, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.876317638791286, |
| "grad_norm": 0.49640574921131947, |
| "learning_rate": 5.965299664702896e-06, |
| "loss": 0.1855, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.8770203794799719, |
| "grad_norm": 0.5044371470192852, |
| "learning_rate": 5.959883600238452e-06, |
| "loss": 0.1736, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.8777231201686577, |
| "grad_norm": 0.505160522141664, |
| "learning_rate": 5.954466366144057e-06, |
| "loss": 0.1972, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.8784258608573436, |
| "grad_norm": 0.523945910921815, |
| "learning_rate": 5.949047969020676e-06, |
| "loss": 0.189, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.8791286015460296, |
| "grad_norm": 0.5256872085388279, |
| "learning_rate": 5.94362841547069e-06, |
| "loss": 0.1863, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.8798313422347154, |
| "grad_norm": 0.5506874839118318, |
| "learning_rate": 5.938207712097895e-06, |
| "loss": 0.1999, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.8805340829234013, |
| "grad_norm": 0.5282521300164007, |
| "learning_rate": 5.932785865507482e-06, |
| "loss": 0.1969, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.8812368236120871, |
| "grad_norm": 0.5153960622956695, |
| "learning_rate": 5.927362882306039e-06, |
| "loss": 0.2015, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.881939564300773, |
| "grad_norm": 0.531206524060467, |
| "learning_rate": 5.9219387691015376e-06, |
| "loss": 0.1882, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.8826423049894588, |
| "grad_norm": 0.4816995124378558, |
| "learning_rate": 5.916513532503325e-06, |
| "loss": 0.1634, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.8833450456781448, |
| "grad_norm": 0.5177569025093481, |
| "learning_rate": 5.911087179122121e-06, |
| "loss": 0.1966, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.8840477863668307, |
| "grad_norm": 0.48498359527901896, |
| "learning_rate": 5.90565971557e-06, |
| "loss": 0.1643, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.8847505270555165, |
| "grad_norm": 0.5115850555415963, |
| "learning_rate": 5.900231148460398e-06, |
| "loss": 0.1789, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.8854532677442024, |
| "grad_norm": 0.491914234210044, |
| "learning_rate": 5.894801484408086e-06, |
| "loss": 0.1815, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.8861560084328882, |
| "grad_norm": 0.5013558086686257, |
| "learning_rate": 5.8893707300291805e-06, |
| "loss": 0.1774, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.8868587491215741, |
| "grad_norm": 0.5071700374713674, |
| "learning_rate": 5.883938891941117e-06, |
| "loss": 0.1888, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.8875614898102601, |
| "grad_norm": 0.5049441962524359, |
| "learning_rate": 5.878505976762664e-06, |
| "loss": 0.1755, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.8882642304989459, |
| "grad_norm": 0.5065049785488899, |
| "learning_rate": 5.873071991113889e-06, |
| "loss": 0.1658, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.8889669711876318, |
| "grad_norm": 0.5434003669210763, |
| "learning_rate": 5.867636941616174e-06, |
| "loss": 0.2053, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.8896697118763176, |
| "grad_norm": 0.48316536570276314, |
| "learning_rate": 5.862200834892192e-06, |
| "loss": 0.1805, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.8903724525650035, |
| "grad_norm": 0.4805591607424035, |
| "learning_rate": 5.856763677565905e-06, |
| "loss": 0.1681, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.8910751932536893, |
| "grad_norm": 0.4987631420062705, |
| "learning_rate": 5.851325476262558e-06, |
| "loss": 0.1849, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.8917779339423753, |
| "grad_norm": 0.5229631899808861, |
| "learning_rate": 5.845886237608665e-06, |
| "loss": 0.1917, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.8924806746310612, |
| "grad_norm": 0.4789523347189992, |
| "learning_rate": 5.840445968232005e-06, |
| "loss": 0.1613, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.893183415319747, |
| "grad_norm": 0.47123822162637413, |
| "learning_rate": 5.8350046747616154e-06, |
| "loss": 0.1648, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.8938861560084329, |
| "grad_norm": 0.4945508158698064, |
| "learning_rate": 5.829562363827773e-06, |
| "loss": 0.1777, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.8945888966971187, |
| "grad_norm": 0.5229050810270796, |
| "learning_rate": 5.824119042062007e-06, |
| "loss": 0.19, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.8952916373858046, |
| "grad_norm": 0.5176570875009562, |
| "learning_rate": 5.818674716097068e-06, |
| "loss": 0.1994, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.8959943780744906, |
| "grad_norm": 0.5185724352450841, |
| "learning_rate": 5.813229392566937e-06, |
| "loss": 0.173, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.8966971187631764, |
| "grad_norm": 0.5188312046851375, |
| "learning_rate": 5.8077830781068044e-06, |
| "loss": 0.1827, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.8973998594518623, |
| "grad_norm": 0.4994378916272213, |
| "learning_rate": 5.802335779353074e-06, |
| "loss": 0.1834, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.8981026001405481, |
| "grad_norm": 0.5053930448321152, |
| "learning_rate": 5.796887502943343e-06, |
| "loss": 0.191, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.898805340829234, |
| "grad_norm": 0.5306229359834849, |
| "learning_rate": 5.791438255516407e-06, |
| "loss": 0.2269, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.8995080815179198, |
| "grad_norm": 0.5399478136370445, |
| "learning_rate": 5.785988043712239e-06, |
| "loss": 0.2189, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.9002108222066058, |
| "grad_norm": 0.5061614097634163, |
| "learning_rate": 5.780536874171987e-06, |
| "loss": 0.1837, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.9009135628952917, |
| "grad_norm": 0.5277378891669225, |
| "learning_rate": 5.775084753537969e-06, |
| "loss": 0.1813, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.9016163035839775, |
| "grad_norm": 0.5085817730767297, |
| "learning_rate": 5.769631688453666e-06, |
| "loss": 0.187, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.9023190442726634, |
| "grad_norm": 0.5068441155995959, |
| "learning_rate": 5.764177685563698e-06, |
| "loss": 0.1834, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.9030217849613492, |
| "grad_norm": 0.5205665157263202, |
| "learning_rate": 5.758722751513838e-06, |
| "loss": 0.2025, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.9037245256500351, |
| "grad_norm": 0.4964882488251083, |
| "learning_rate": 5.753266892950989e-06, |
| "loss": 0.1765, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.9044272663387211, |
| "grad_norm": 0.49957597445619084, |
| "learning_rate": 5.74781011652318e-06, |
| "loss": 0.1626, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.9051300070274069, |
| "grad_norm": 0.5454163588824682, |
| "learning_rate": 5.742352428879565e-06, |
| "loss": 0.1625, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.9058327477160928, |
| "grad_norm": 0.49019948841483496, |
| "learning_rate": 5.736893836670399e-06, |
| "loss": 0.1817, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.9065354884047786, |
| "grad_norm": 0.540938094603087, |
| "learning_rate": 5.731434346547045e-06, |
| "loss": 0.1903, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.9072382290934645, |
| "grad_norm": 0.5185744975996959, |
| "learning_rate": 5.72597396516196e-06, |
| "loss": 0.1759, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.9079409697821503, |
| "grad_norm": 0.5460557766702974, |
| "learning_rate": 5.7205126991686825e-06, |
| "loss": 0.2089, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.9086437104708363, |
| "grad_norm": 0.5261555481195017, |
| "learning_rate": 5.7150505552218346e-06, |
| "loss": 0.1937, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.9093464511595222, |
| "grad_norm": 0.4967183113214995, |
| "learning_rate": 5.709587539977105e-06, |
| "loss": 0.1907, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.910049191848208, |
| "grad_norm": 0.5016407484137889, |
| "learning_rate": 5.7041236600912475e-06, |
| "loss": 0.1844, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.9107519325368939, |
| "grad_norm": 0.5475144668884032, |
| "learning_rate": 5.698658922222062e-06, |
| "loss": 0.1893, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.9114546732255797, |
| "grad_norm": 0.4888372655496957, |
| "learning_rate": 5.693193333028404e-06, |
| "loss": 0.1668, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.9121574139142656, |
| "grad_norm": 0.5136792617691439, |
| "learning_rate": 5.687726899170155e-06, |
| "loss": 0.1795, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.9128601546029516, |
| "grad_norm": 0.49755317409906136, |
| "learning_rate": 5.682259627308238e-06, |
| "loss": 0.1698, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.9135628952916374, |
| "grad_norm": 0.48426769502045947, |
| "learning_rate": 5.6767915241045855e-06, |
| "loss": 0.1619, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.9142656359803233, |
| "grad_norm": 0.4925450873469613, |
| "learning_rate": 5.671322596222153e-06, |
| "loss": 0.168, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.9149683766690091, |
| "grad_norm": 0.45271196582377093, |
| "learning_rate": 5.665852850324893e-06, |
| "loss": 0.1427, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.915671117357695, |
| "grad_norm": 0.5331015370489443, |
| "learning_rate": 5.660382293077759e-06, |
| "loss": 0.2079, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.9163738580463809, |
| "grad_norm": 0.4987644474302223, |
| "learning_rate": 5.654910931146692e-06, |
| "loss": 0.1821, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.9170765987350668, |
| "grad_norm": 0.5042023902121021, |
| "learning_rate": 5.649438771198616e-06, |
| "loss": 0.1912, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.9177793394237527, |
| "grad_norm": 0.48973082504550525, |
| "learning_rate": 5.64396581990142e-06, |
| "loss": 0.1928, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.9184820801124385, |
| "grad_norm": 0.48831989879878884, |
| "learning_rate": 5.638492083923969e-06, |
| "loss": 0.1846, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.9191848208011244, |
| "grad_norm": 0.5016664170305484, |
| "learning_rate": 5.633017569936071e-06, |
| "loss": 0.1965, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.9198875614898102, |
| "grad_norm": 0.5166324839960242, |
| "learning_rate": 5.6275422846084945e-06, |
| "loss": 0.1909, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.9205903021784961, |
| "grad_norm": 0.4990652621475701, |
| "learning_rate": 5.622066234612936e-06, |
| "loss": 0.1647, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.921293042867182, |
| "grad_norm": 0.5279858641913935, |
| "learning_rate": 5.616589426622033e-06, |
| "loss": 0.1785, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.9219957835558679, |
| "grad_norm": 0.5028120525677001, |
| "learning_rate": 5.611111867309344e-06, |
| "loss": 0.1566, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.9226985242445538, |
| "grad_norm": 0.49496209816045444, |
| "learning_rate": 5.605633563349341e-06, |
| "loss": 0.1878, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.9234012649332396, |
| "grad_norm": 0.5103355627273767, |
| "learning_rate": 5.600154521417405e-06, |
| "loss": 0.1915, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.9241040056219255, |
| "grad_norm": 0.5083064986304011, |
| "learning_rate": 5.5946747481898144e-06, |
| "loss": 0.173, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.9248067463106114, |
| "grad_norm": 0.5443980894311817, |
| "learning_rate": 5.589194250343741e-06, |
| "loss": 0.217, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.9255094869992972, |
| "grad_norm": 0.5163177476037929, |
| "learning_rate": 5.583713034557241e-06, |
| "loss": 0.1911, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.9262122276879832, |
| "grad_norm": 0.4837536735411591, |
| "learning_rate": 5.57823110750924e-06, |
| "loss": 0.1741, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.926914968376669, |
| "grad_norm": 0.4874969026198589, |
| "learning_rate": 5.572748475879536e-06, |
| "loss": 0.1813, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.9276177090653549, |
| "grad_norm": 0.49763208093324596, |
| "learning_rate": 5.567265146348779e-06, |
| "loss": 0.1764, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.9283204497540407, |
| "grad_norm": 0.5078828461218955, |
| "learning_rate": 5.561781125598479e-06, |
| "loss": 0.1836, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.9290231904427266, |
| "grad_norm": 0.4715355993383816, |
| "learning_rate": 5.556296420310977e-06, |
| "loss": 0.1639, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.9297259311314126, |
| "grad_norm": 0.4697787642937931, |
| "learning_rate": 5.550811037169457e-06, |
| "loss": 0.1588, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.9304286718200984, |
| "grad_norm": 0.5053902581080298, |
| "learning_rate": 5.545324982857926e-06, |
| "loss": 0.1963, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.9311314125087843, |
| "grad_norm": 0.499350933270986, |
| "learning_rate": 5.539838264061207e-06, |
| "loss": 0.187, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.9318341531974701, |
| "grad_norm": 0.4855473339463826, |
| "learning_rate": 5.534350887464934e-06, |
| "loss": 0.1804, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.932536893886156, |
| "grad_norm": 0.4931168918656159, |
| "learning_rate": 5.528862859755545e-06, |
| "loss": 0.1738, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.9332396345748419, |
| "grad_norm": 0.516534148217859, |
| "learning_rate": 5.523374187620266e-06, |
| "loss": 0.1911, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.9339423752635277, |
| "grad_norm": 0.5010990203093634, |
| "learning_rate": 5.517884877747116e-06, |
| "loss": 0.1954, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.9346451159522137, |
| "grad_norm": 0.49888090289331416, |
| "learning_rate": 5.512394936824881e-06, |
| "loss": 0.1669, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.9353478566408995, |
| "grad_norm": 0.4942647057814772, |
| "learning_rate": 5.506904371543126e-06, |
| "loss": 0.1706, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.9360505973295854, |
| "grad_norm": 0.4930304560138631, |
| "learning_rate": 5.501413188592167e-06, |
| "loss": 0.16, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.9367533380182712, |
| "grad_norm": 0.5202336246837628, |
| "learning_rate": 5.495921394663085e-06, |
| "loss": 0.1956, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.9374560787069571, |
| "grad_norm": 0.5195462073922367, |
| "learning_rate": 5.4904289964476905e-06, |
| "loss": 0.1796, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.938158819395643, |
| "grad_norm": 0.5278613831428796, |
| "learning_rate": 5.484936000638546e-06, |
| "loss": 0.1848, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.9388615600843289, |
| "grad_norm": 0.5387221558941402, |
| "learning_rate": 5.479442413928927e-06, |
| "loss": 0.1948, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.9395643007730148, |
| "grad_norm": 0.5292981166694255, |
| "learning_rate": 5.473948243012842e-06, |
| "loss": 0.1976, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.9402670414617006, |
| "grad_norm": 0.49095341165304357, |
| "learning_rate": 5.468453494585002e-06, |
| "loss": 0.1936, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.9409697821503865, |
| "grad_norm": 0.476137761784659, |
| "learning_rate": 5.462958175340828e-06, |
| "loss": 0.1732, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.9416725228390724, |
| "grad_norm": 0.5035071373800364, |
| "learning_rate": 5.457462291976432e-06, |
| "loss": 0.1693, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.9423752635277582, |
| "grad_norm": 0.4927374677495356, |
| "learning_rate": 5.451965851188618e-06, |
| "loss": 0.173, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.9430780042164442, |
| "grad_norm": 0.521962512270835, |
| "learning_rate": 5.446468859674862e-06, |
| "loss": 0.1893, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.94378074490513, |
| "grad_norm": 0.488287317097337, |
| "learning_rate": 5.440971324133322e-06, |
| "loss": 0.1566, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.9444834855938159, |
| "grad_norm": 0.49933441776873916, |
| "learning_rate": 5.435473251262805e-06, |
| "loss": 0.1941, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.9451862262825017, |
| "grad_norm": 0.5150937905899252, |
| "learning_rate": 5.429974647762788e-06, |
| "loss": 0.1849, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.9458889669711876, |
| "grad_norm": 0.4937729920011368, |
| "learning_rate": 5.424475520333381e-06, |
| "loss": 0.1953, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.9465917076598735, |
| "grad_norm": 0.4979160581197338, |
| "learning_rate": 5.418975875675341e-06, |
| "loss": 0.15, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.9472944483485594, |
| "grad_norm": 0.49572714712562493, |
| "learning_rate": 5.4134757204900525e-06, |
| "loss": 0.171, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.9479971890372453, |
| "grad_norm": 0.5017033812754259, |
| "learning_rate": 5.407975061479521e-06, |
| "loss": 0.1685, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.9486999297259311, |
| "grad_norm": 0.5232483398771414, |
| "learning_rate": 5.402473905346368e-06, |
| "loss": 0.1924, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.949402670414617, |
| "grad_norm": 0.49099314208156364, |
| "learning_rate": 5.39697225879382e-06, |
| "loss": 0.167, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.9501054111033029, |
| "grad_norm": 0.5290685851917885, |
| "learning_rate": 5.3914701285257e-06, |
| "loss": 0.1756, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.9508081517919887, |
| "grad_norm": 0.5369121617826327, |
| "learning_rate": 5.385967521246422e-06, |
| "loss": 0.2139, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.9515108924806747, |
| "grad_norm": 0.5134430449452313, |
| "learning_rate": 5.38046444366098e-06, |
| "loss": 0.1793, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.9522136331693605, |
| "grad_norm": 0.4831689948808722, |
| "learning_rate": 5.3749609024749424e-06, |
| "loss": 0.1693, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.9529163738580464, |
| "grad_norm": 0.4887397506632592, |
| "learning_rate": 5.36945690439444e-06, |
| "loss": 0.1601, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.9536191145467322, |
| "grad_norm": 0.5372445202703667, |
| "learning_rate": 5.363952456126165e-06, |
| "loss": 0.2015, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.9543218552354181, |
| "grad_norm": 0.4850031506109228, |
| "learning_rate": 5.358447564377352e-06, |
| "loss": 0.1755, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.955024595924104, |
| "grad_norm": 0.5027528352188118, |
| "learning_rate": 5.35294223585578e-06, |
| "loss": 0.1558, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.9557273366127899, |
| "grad_norm": 0.4931882801165771, |
| "learning_rate": 5.34743647726976e-06, |
| "loss": 0.1761, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.9564300773014758, |
| "grad_norm": 0.5588951139536696, |
| "learning_rate": 5.341930295328129e-06, |
| "loss": 0.2131, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.9571328179901616, |
| "grad_norm": 0.5529601552521203, |
| "learning_rate": 5.336423696740233e-06, |
| "loss": 0.226, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.9578355586788475, |
| "grad_norm": 0.47333211007445464, |
| "learning_rate": 5.330916688215931e-06, |
| "loss": 0.1452, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.9585382993675334, |
| "grad_norm": 0.47566193526005807, |
| "learning_rate": 5.325409276465581e-06, |
| "loss": 0.1555, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.9592410400562192, |
| "grad_norm": 0.4914481726281524, |
| "learning_rate": 5.319901468200034e-06, |
| "loss": 0.1744, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.9599437807449052, |
| "grad_norm": 0.49224736330749624, |
| "learning_rate": 5.314393270130617e-06, |
| "loss": 0.181, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.960646521433591, |
| "grad_norm": 0.4858591625950129, |
| "learning_rate": 5.308884688969145e-06, |
| "loss": 0.1764, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.9613492621222769, |
| "grad_norm": 0.479577498442296, |
| "learning_rate": 5.303375731427882e-06, |
| "loss": 0.1695, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.9620520028109627, |
| "grad_norm": 0.4825228825413551, |
| "learning_rate": 5.297866404219569e-06, |
| "loss": 0.167, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.9627547434996486, |
| "grad_norm": 0.49773073193567335, |
| "learning_rate": 5.292356714057382e-06, |
| "loss": 0.1782, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.9634574841883345, |
| "grad_norm": 0.465706124969307, |
| "learning_rate": 5.28684666765495e-06, |
| "loss": 0.1429, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.9641602248770204, |
| "grad_norm": 0.48475587233272616, |
| "learning_rate": 5.281336271726333e-06, |
| "loss": 0.1693, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.9648629655657063, |
| "grad_norm": 0.5002876235055297, |
| "learning_rate": 5.275825532986013e-06, |
| "loss": 0.1698, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.9655657062543921, |
| "grad_norm": 0.4751205576638796, |
| "learning_rate": 5.270314458148896e-06, |
| "loss": 0.1453, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.966268446943078, |
| "grad_norm": 0.5111002061348704, |
| "learning_rate": 5.2648030539302894e-06, |
| "loss": 0.1956, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.9669711876317639, |
| "grad_norm": 0.5287868754504749, |
| "learning_rate": 5.259291327045912e-06, |
| "loss": 0.2098, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.9676739283204497, |
| "grad_norm": 0.5238439498113354, |
| "learning_rate": 5.2537792842118694e-06, |
| "loss": 0.165, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.9683766690091357, |
| "grad_norm": 0.4838233344953546, |
| "learning_rate": 5.248266932144652e-06, |
| "loss": 0.1552, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.9690794096978215, |
| "grad_norm": 0.49589259249472895, |
| "learning_rate": 5.2427542775611314e-06, |
| "loss": 0.1573, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.9697821503865074, |
| "grad_norm": 0.5121936042194623, |
| "learning_rate": 5.23724132717854e-06, |
| "loss": 0.18, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.9704848910751932, |
| "grad_norm": 0.5539715989420239, |
| "learning_rate": 5.231728087714482e-06, |
| "loss": 0.1855, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.9711876317638791, |
| "grad_norm": 0.5349531279287615, |
| "learning_rate": 5.2262145658869005e-06, |
| "loss": 0.1865, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.971890372452565, |
| "grad_norm": 0.510639043585128, |
| "learning_rate": 5.220700768414094e-06, |
| "loss": 0.1944, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.9725931131412509, |
| "grad_norm": 0.5088479448113863, |
| "learning_rate": 5.215186702014692e-06, |
| "loss": 0.1922, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.9732958538299368, |
| "grad_norm": 0.4796693735802863, |
| "learning_rate": 5.209672373407651e-06, |
| "loss": 0.1762, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.9739985945186226, |
| "grad_norm": 0.499545197442791, |
| "learning_rate": 5.204157789312248e-06, |
| "loss": 0.1768, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.9747013352073085, |
| "grad_norm": 0.5126699251503342, |
| "learning_rate": 5.198642956448072e-06, |
| "loss": 0.1783, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.9754040758959944, |
| "grad_norm": 0.5145385299519166, |
| "learning_rate": 5.193127881535015e-06, |
| "loss": 0.1928, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.9761068165846802, |
| "grad_norm": 0.528813426196794, |
| "learning_rate": 5.187612571293263e-06, |
| "loss": 0.1877, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.9768095572733662, |
| "grad_norm": 0.5099979736626467, |
| "learning_rate": 5.182097032443288e-06, |
| "loss": 0.1883, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.977512297962052, |
| "grad_norm": 0.4523284780455419, |
| "learning_rate": 5.176581271705845e-06, |
| "loss": 0.1478, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.9782150386507379, |
| "grad_norm": 0.4932276047724686, |
| "learning_rate": 5.1710652958019525e-06, |
| "loss": 0.1593, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.9789177793394237, |
| "grad_norm": 0.4796811572296884, |
| "learning_rate": 5.165549111452899e-06, |
| "loss": 0.1688, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.9796205200281096, |
| "grad_norm": 0.5034515653870583, |
| "learning_rate": 5.1600327253802184e-06, |
| "loss": 0.1689, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.9803232607167955, |
| "grad_norm": 0.502198923761688, |
| "learning_rate": 5.154516144305698e-06, |
| "loss": 0.1644, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.9810260014054814, |
| "grad_norm": 0.4602068868601718, |
| "learning_rate": 5.1489993749513576e-06, |
| "loss": 0.1544, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.9817287420941673, |
| "grad_norm": 0.45533835598534217, |
| "learning_rate": 5.1434824240394494e-06, |
| "loss": 0.1469, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.9824314827828531, |
| "grad_norm": 0.45955715449833245, |
| "learning_rate": 5.1379652982924465e-06, |
| "loss": 0.1431, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.983134223471539, |
| "grad_norm": 0.5246077121318076, |
| "learning_rate": 5.132448004433034e-06, |
| "loss": 0.1908, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.9838369641602249, |
| "grad_norm": 0.5029012585391444, |
| "learning_rate": 5.1269305491841015e-06, |
| "loss": 0.1898, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.9845397048489107, |
| "grad_norm": 0.5155559300537573, |
| "learning_rate": 5.121412939268736e-06, |
| "loss": 0.1903, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.9852424455375967, |
| "grad_norm": 0.5167390892548627, |
| "learning_rate": 5.115895181410213e-06, |
| "loss": 0.1957, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.9859451862262825, |
| "grad_norm": 0.503291943738301, |
| "learning_rate": 5.110377282331988e-06, |
| "loss": 0.1616, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.9866479269149684, |
| "grad_norm": 0.5172792729955873, |
| "learning_rate": 5.10485924875769e-06, |
| "loss": 0.1648, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.9873506676036542, |
| "grad_norm": 0.5288947212309869, |
| "learning_rate": 5.09934108741111e-06, |
| "loss": 0.1947, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.9880534082923401, |
| "grad_norm": 0.4536753280886982, |
| "learning_rate": 5.093822805016194e-06, |
| "loss": 0.1476, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.988756148981026, |
| "grad_norm": 0.5458840167868061, |
| "learning_rate": 5.088304408297039e-06, |
| "loss": 0.1804, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.9894588896697118, |
| "grad_norm": 0.4968761929170356, |
| "learning_rate": 5.0827859039778784e-06, |
| "loss": 0.1691, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.9901616303583978, |
| "grad_norm": 0.47922207982218623, |
| "learning_rate": 5.077267298783077e-06, |
| "loss": 0.1672, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.9908643710470836, |
| "grad_norm": 0.5081431828705689, |
| "learning_rate": 5.071748599437124e-06, |
| "loss": 0.1886, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.9915671117357695, |
| "grad_norm": 0.5131479650403806, |
| "learning_rate": 5.066229812664621e-06, |
| "loss": 0.1718, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.9922698524244554, |
| "grad_norm": 0.540035551569854, |
| "learning_rate": 5.060710945190278e-06, |
| "loss": 0.1971, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.9929725931131412, |
| "grad_norm": 0.4803244137760998, |
| "learning_rate": 5.0551920037389035e-06, |
| "loss": 0.1553, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.9936753338018272, |
| "grad_norm": 0.4899909375914942, |
| "learning_rate": 5.049672995035394e-06, |
| "loss": 0.1808, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.994378074490513, |
| "grad_norm": 0.49275152076452683, |
| "learning_rate": 5.04415392580473e-06, |
| "loss": 0.1679, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.9950808151791989, |
| "grad_norm": 0.5129171105956463, |
| "learning_rate": 5.038634802771966e-06, |
| "loss": 0.2091, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.9957835558678847, |
| "grad_norm": 0.5185735469093622, |
| "learning_rate": 5.03311563266222e-06, |
| "loss": 0.199, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.9964862965565706, |
| "grad_norm": 0.520279570499453, |
| "learning_rate": 5.027596422200668e-06, |
| "loss": 0.1969, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.9971890372452565, |
| "grad_norm": 0.5171439479650721, |
| "learning_rate": 5.022077178112537e-06, |
| "loss": 0.1888, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.9978917779339423, |
| "grad_norm": 0.4990256654919666, |
| "learning_rate": 5.016557907123095e-06, |
| "loss": 0.161, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.9985945186226283, |
| "grad_norm": 0.48386951524477756, |
| "learning_rate": 5.011038615957639e-06, |
| "loss": 0.1714, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.9992972593113141, |
| "grad_norm": 0.5210856503064014, |
| "learning_rate": 5.005519311341495e-06, |
| "loss": 0.1874, |
| "step": 1422 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.48163350666059024, |
| "learning_rate": 5e-06, |
| "loss": 0.1658, |
| "step": 1423 |
| }, |
| { |
| "epoch": 1.000702740688686, |
| "grad_norm": 0.5224393577574205, |
| "learning_rate": 4.994480688658508e-06, |
| "loss": 0.1568, |
| "step": 1424 |
| }, |
| { |
| "epoch": 1.0014054813773718, |
| "grad_norm": 0.5167212944296797, |
| "learning_rate": 4.9889613840423615e-06, |
| "loss": 0.1565, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.0021082220660575, |
| "grad_norm": 0.5083112192863517, |
| "learning_rate": 4.983442092876906e-06, |
| "loss": 0.1627, |
| "step": 1426 |
| }, |
| { |
| "epoch": 1.0028109627547435, |
| "grad_norm": 0.5178461118513241, |
| "learning_rate": 4.977922821887463e-06, |
| "loss": 0.1774, |
| "step": 1427 |
| }, |
| { |
| "epoch": 1.0035137034434294, |
| "grad_norm": 0.466141334877386, |
| "learning_rate": 4.972403577799334e-06, |
| "loss": 0.136, |
| "step": 1428 |
| }, |
| { |
| "epoch": 1.0042164441321153, |
| "grad_norm": 0.5074856152966533, |
| "learning_rate": 4.966884367337781e-06, |
| "loss": 0.145, |
| "step": 1429 |
| }, |
| { |
| "epoch": 1.0049191848208012, |
| "grad_norm": 0.4766089411473581, |
| "learning_rate": 4.961365197228035e-06, |
| "loss": 0.1368, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.005621925509487, |
| "grad_norm": 0.541711677183431, |
| "learning_rate": 4.9558460741952725e-06, |
| "loss": 0.1654, |
| "step": 1431 |
| }, |
| { |
| "epoch": 1.0063246661981728, |
| "grad_norm": 0.5468030419613309, |
| "learning_rate": 4.950327004964607e-06, |
| "loss": 0.1661, |
| "step": 1432 |
| }, |
| { |
| "epoch": 1.0070274068868588, |
| "grad_norm": 0.604212676464957, |
| "learning_rate": 4.944807996261098e-06, |
| "loss": 0.1553, |
| "step": 1433 |
| }, |
| { |
| "epoch": 1.0077301475755447, |
| "grad_norm": 0.5559877236162696, |
| "learning_rate": 4.9392890548097235e-06, |
| "loss": 0.1261, |
| "step": 1434 |
| }, |
| { |
| "epoch": 1.0084328882642306, |
| "grad_norm": 0.5878546627004245, |
| "learning_rate": 4.93377018733538e-06, |
| "loss": 0.1814, |
| "step": 1435 |
| }, |
| { |
| "epoch": 1.0091356289529163, |
| "grad_norm": 0.5440844459813221, |
| "learning_rate": 4.928251400562878e-06, |
| "loss": 0.1532, |
| "step": 1436 |
| }, |
| { |
| "epoch": 1.0098383696416022, |
| "grad_norm": 0.5351616063352487, |
| "learning_rate": 4.922732701216924e-06, |
| "loss": 0.1479, |
| "step": 1437 |
| }, |
| { |
| "epoch": 1.0105411103302882, |
| "grad_norm": 0.5301467629550286, |
| "learning_rate": 4.917214096022123e-06, |
| "loss": 0.1466, |
| "step": 1438 |
| }, |
| { |
| "epoch": 1.011243851018974, |
| "grad_norm": 0.5678252044217474, |
| "learning_rate": 4.911695591702962e-06, |
| "loss": 0.168, |
| "step": 1439 |
| }, |
| { |
| "epoch": 1.0119465917076598, |
| "grad_norm": 0.5172947465808124, |
| "learning_rate": 4.906177194983807e-06, |
| "loss": 0.1393, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.0126493323963457, |
| "grad_norm": 0.47596293325834516, |
| "learning_rate": 4.9006589125888924e-06, |
| "loss": 0.1296, |
| "step": 1441 |
| }, |
| { |
| "epoch": 1.0133520730850316, |
| "grad_norm": 0.5197667557365847, |
| "learning_rate": 4.8951407512423125e-06, |
| "loss": 0.1613, |
| "step": 1442 |
| }, |
| { |
| "epoch": 1.0140548137737175, |
| "grad_norm": 0.5140905511446842, |
| "learning_rate": 4.889622717668012e-06, |
| "loss": 0.1596, |
| "step": 1443 |
| }, |
| { |
| "epoch": 1.0147575544624035, |
| "grad_norm": 0.491748434545197, |
| "learning_rate": 4.884104818589788e-06, |
| "loss": 0.1389, |
| "step": 1444 |
| }, |
| { |
| "epoch": 1.0154602951510892, |
| "grad_norm": 0.5352629126378262, |
| "learning_rate": 4.878587060731267e-06, |
| "loss": 0.1609, |
| "step": 1445 |
| }, |
| { |
| "epoch": 1.016163035839775, |
| "grad_norm": 0.5174976258751066, |
| "learning_rate": 4.8730694508159e-06, |
| "loss": 0.1468, |
| "step": 1446 |
| }, |
| { |
| "epoch": 1.016865776528461, |
| "grad_norm": 0.5101606514926836, |
| "learning_rate": 4.867551995566968e-06, |
| "loss": 0.1563, |
| "step": 1447 |
| }, |
| { |
| "epoch": 1.017568517217147, |
| "grad_norm": 0.49989777003227814, |
| "learning_rate": 4.862034701707554e-06, |
| "loss": 0.1393, |
| "step": 1448 |
| }, |
| { |
| "epoch": 1.0182712579058328, |
| "grad_norm": 0.5097054061097768, |
| "learning_rate": 4.8565175759605505e-06, |
| "loss": 0.1459, |
| "step": 1449 |
| }, |
| { |
| "epoch": 1.0189739985945185, |
| "grad_norm": 0.534062454314789, |
| "learning_rate": 4.851000625048643e-06, |
| "loss": 0.1525, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.0196767392832045, |
| "grad_norm": 0.49640571814198436, |
| "learning_rate": 4.845483855694304e-06, |
| "loss": 0.1246, |
| "step": 1451 |
| }, |
| { |
| "epoch": 1.0203794799718904, |
| "grad_norm": 0.49400528414597805, |
| "learning_rate": 4.839967274619783e-06, |
| "loss": 0.144, |
| "step": 1452 |
| }, |
| { |
| "epoch": 1.0210822206605763, |
| "grad_norm": 0.5750467346465727, |
| "learning_rate": 4.834450888547103e-06, |
| "loss": 0.1769, |
| "step": 1453 |
| }, |
| { |
| "epoch": 1.0217849613492622, |
| "grad_norm": 0.5415534666265035, |
| "learning_rate": 4.8289347041980475e-06, |
| "loss": 0.1671, |
| "step": 1454 |
| }, |
| { |
| "epoch": 1.022487702037948, |
| "grad_norm": 0.5283731167275352, |
| "learning_rate": 4.823418728294157e-06, |
| "loss": 0.1304, |
| "step": 1455 |
| }, |
| { |
| "epoch": 1.0231904427266338, |
| "grad_norm": 0.5163103493159343, |
| "learning_rate": 4.817902967556714e-06, |
| "loss": 0.1474, |
| "step": 1456 |
| }, |
| { |
| "epoch": 1.0238931834153198, |
| "grad_norm": 0.5181379014674464, |
| "learning_rate": 4.8123874287067385e-06, |
| "loss": 0.1528, |
| "step": 1457 |
| }, |
| { |
| "epoch": 1.0245959241040057, |
| "grad_norm": 0.546691491685927, |
| "learning_rate": 4.806872118464987e-06, |
| "loss": 0.1701, |
| "step": 1458 |
| }, |
| { |
| "epoch": 1.0252986647926916, |
| "grad_norm": 0.5368233255973394, |
| "learning_rate": 4.801357043551928e-06, |
| "loss": 0.157, |
| "step": 1459 |
| }, |
| { |
| "epoch": 1.0260014054813773, |
| "grad_norm": 0.49807381807404494, |
| "learning_rate": 4.795842210687754e-06, |
| "loss": 0.1606, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.0267041461700632, |
| "grad_norm": 0.49941618444368396, |
| "learning_rate": 4.790327626592351e-06, |
| "loss": 0.1346, |
| "step": 1461 |
| }, |
| { |
| "epoch": 1.0274068868587491, |
| "grad_norm": 0.5204036110725074, |
| "learning_rate": 4.78481329798531e-06, |
| "loss": 0.151, |
| "step": 1462 |
| }, |
| { |
| "epoch": 1.028109627547435, |
| "grad_norm": 0.5284160836774149, |
| "learning_rate": 4.779299231585907e-06, |
| "loss": 0.1659, |
| "step": 1463 |
| }, |
| { |
| "epoch": 1.0288123682361208, |
| "grad_norm": 0.5287172091425115, |
| "learning_rate": 4.773785434113101e-06, |
| "loss": 0.1466, |
| "step": 1464 |
| }, |
| { |
| "epoch": 1.0295151089248067, |
| "grad_norm": 0.5242209228306783, |
| "learning_rate": 4.768271912285521e-06, |
| "loss": 0.1646, |
| "step": 1465 |
| }, |
| { |
| "epoch": 1.0302178496134926, |
| "grad_norm": 0.48065702521415976, |
| "learning_rate": 4.7627586728214606e-06, |
| "loss": 0.1323, |
| "step": 1466 |
| }, |
| { |
| "epoch": 1.0309205903021785, |
| "grad_norm": 0.5271978633140205, |
| "learning_rate": 4.75724572243887e-06, |
| "loss": 0.1524, |
| "step": 1467 |
| }, |
| { |
| "epoch": 1.0316233309908645, |
| "grad_norm": 0.5278668269452242, |
| "learning_rate": 4.751733067855348e-06, |
| "loss": 0.1652, |
| "step": 1468 |
| }, |
| { |
| "epoch": 1.0323260716795502, |
| "grad_norm": 0.4714409663625622, |
| "learning_rate": 4.746220715788132e-06, |
| "loss": 0.1243, |
| "step": 1469 |
| }, |
| { |
| "epoch": 1.033028812368236, |
| "grad_norm": 0.5167269663375293, |
| "learning_rate": 4.74070867295409e-06, |
| "loss": 0.1539, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.033731553056922, |
| "grad_norm": 0.513279157041316, |
| "learning_rate": 4.735196946069711e-06, |
| "loss": 0.1631, |
| "step": 1471 |
| }, |
| { |
| "epoch": 1.034434293745608, |
| "grad_norm": 0.4998917276920187, |
| "learning_rate": 4.729685541851107e-06, |
| "loss": 0.1237, |
| "step": 1472 |
| }, |
| { |
| "epoch": 1.0351370344342938, |
| "grad_norm": 0.529541513903104, |
| "learning_rate": 4.724174467013987e-06, |
| "loss": 0.1738, |
| "step": 1473 |
| }, |
| { |
| "epoch": 1.0358397751229795, |
| "grad_norm": 0.5184555550536734, |
| "learning_rate": 4.718663728273669e-06, |
| "loss": 0.1323, |
| "step": 1474 |
| }, |
| { |
| "epoch": 1.0365425158116655, |
| "grad_norm": 0.530116425204895, |
| "learning_rate": 4.7131533323450505e-06, |
| "loss": 0.1692, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.0372452565003514, |
| "grad_norm": 0.5011637485620603, |
| "learning_rate": 4.707643285942619e-06, |
| "loss": 0.1577, |
| "step": 1476 |
| }, |
| { |
| "epoch": 1.0379479971890373, |
| "grad_norm": 0.52765636889385, |
| "learning_rate": 4.702133595780433e-06, |
| "loss": 0.172, |
| "step": 1477 |
| }, |
| { |
| "epoch": 1.0386507378777232, |
| "grad_norm": 0.49850621349027224, |
| "learning_rate": 4.696624268572118e-06, |
| "loss": 0.1508, |
| "step": 1478 |
| }, |
| { |
| "epoch": 1.039353478566409, |
| "grad_norm": 0.5416958214373463, |
| "learning_rate": 4.6911153110308574e-06, |
| "loss": 0.1699, |
| "step": 1479 |
| }, |
| { |
| "epoch": 1.0400562192550948, |
| "grad_norm": 0.5399259415678693, |
| "learning_rate": 4.6856067298693834e-06, |
| "loss": 0.1524, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.0407589599437808, |
| "grad_norm": 0.5032021860715928, |
| "learning_rate": 4.680098531799967e-06, |
| "loss": 0.1411, |
| "step": 1481 |
| }, |
| { |
| "epoch": 1.0414617006324667, |
| "grad_norm": 0.46742852855684963, |
| "learning_rate": 4.674590723534419e-06, |
| "loss": 0.1157, |
| "step": 1482 |
| }, |
| { |
| "epoch": 1.0421644413211526, |
| "grad_norm": 0.5180997769388833, |
| "learning_rate": 4.669083311784069e-06, |
| "loss": 0.1599, |
| "step": 1483 |
| }, |
| { |
| "epoch": 1.0428671820098383, |
| "grad_norm": 0.5309463070515528, |
| "learning_rate": 4.6635763032597704e-06, |
| "loss": 0.1511, |
| "step": 1484 |
| }, |
| { |
| "epoch": 1.0435699226985242, |
| "grad_norm": 0.5302382026589856, |
| "learning_rate": 4.658069704671873e-06, |
| "loss": 0.1724, |
| "step": 1485 |
| }, |
| { |
| "epoch": 1.0442726633872101, |
| "grad_norm": 0.5577755953646487, |
| "learning_rate": 4.65256352273024e-06, |
| "loss": 0.1433, |
| "step": 1486 |
| }, |
| { |
| "epoch": 1.044975404075896, |
| "grad_norm": 0.5137472463204175, |
| "learning_rate": 4.64705776414422e-06, |
| "loss": 0.1441, |
| "step": 1487 |
| }, |
| { |
| "epoch": 1.0456781447645818, |
| "grad_norm": 0.5046663966998653, |
| "learning_rate": 4.641552435622651e-06, |
| "loss": 0.1491, |
| "step": 1488 |
| }, |
| { |
| "epoch": 1.0463808854532677, |
| "grad_norm": 0.5153150802385964, |
| "learning_rate": 4.636047543873838e-06, |
| "loss": 0.148, |
| "step": 1489 |
| }, |
| { |
| "epoch": 1.0470836261419536, |
| "grad_norm": 0.5167782468770977, |
| "learning_rate": 4.630543095605562e-06, |
| "loss": 0.1326, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.0477863668306395, |
| "grad_norm": 0.5638675566292636, |
| "learning_rate": 4.625039097525058e-06, |
| "loss": 0.1424, |
| "step": 1491 |
| }, |
| { |
| "epoch": 1.0484891075193254, |
| "grad_norm": 0.527824249323301, |
| "learning_rate": 4.619535556339021e-06, |
| "loss": 0.1548, |
| "step": 1492 |
| }, |
| { |
| "epoch": 1.0491918482080111, |
| "grad_norm": 0.530738132593687, |
| "learning_rate": 4.61403247875358e-06, |
| "loss": 0.1548, |
| "step": 1493 |
| }, |
| { |
| "epoch": 1.049894588896697, |
| "grad_norm": 0.515381804578322, |
| "learning_rate": 4.6085298714743025e-06, |
| "loss": 0.1435, |
| "step": 1494 |
| }, |
| { |
| "epoch": 1.050597329585383, |
| "grad_norm": 0.5078634734847863, |
| "learning_rate": 4.603027741206181e-06, |
| "loss": 0.1599, |
| "step": 1495 |
| }, |
| { |
| "epoch": 1.051300070274069, |
| "grad_norm": 0.47626536929288615, |
| "learning_rate": 4.597526094653633e-06, |
| "loss": 0.1225, |
| "step": 1496 |
| }, |
| { |
| "epoch": 1.0520028109627548, |
| "grad_norm": 0.5101924345356312, |
| "learning_rate": 4.592024938520479e-06, |
| "loss": 0.1327, |
| "step": 1497 |
| }, |
| { |
| "epoch": 1.0527055516514405, |
| "grad_norm": 0.4893049540660336, |
| "learning_rate": 4.58652427950995e-06, |
| "loss": 0.1211, |
| "step": 1498 |
| }, |
| { |
| "epoch": 1.0534082923401265, |
| "grad_norm": 0.5213987638988877, |
| "learning_rate": 4.581024124324661e-06, |
| "loss": 0.1457, |
| "step": 1499 |
| }, |
| { |
| "epoch": 1.0541110330288124, |
| "grad_norm": 0.4655223777487336, |
| "learning_rate": 4.575524479666621e-06, |
| "loss": 0.1337, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.0541110330288124, |
| "eval_loss": 0.18591229617595673, |
| "eval_runtime": 10.8438, |
| "eval_samples_per_second": 21.21, |
| "eval_steps_per_second": 5.349, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.0548137737174983, |
| "grad_norm": 0.5473471671772354, |
| "learning_rate": 4.570025352237213e-06, |
| "loss": 0.1645, |
| "step": 1501 |
| }, |
| { |
| "epoch": 1.0555165144061842, |
| "grad_norm": 0.5543584858779272, |
| "learning_rate": 4.564526748737195e-06, |
| "loss": 0.185, |
| "step": 1502 |
| }, |
| { |
| "epoch": 1.05621925509487, |
| "grad_norm": 0.5249136679057413, |
| "learning_rate": 4.559028675866681e-06, |
| "loss": 0.1531, |
| "step": 1503 |
| }, |
| { |
| "epoch": 1.0569219957835558, |
| "grad_norm": 0.5077634369356774, |
| "learning_rate": 4.553531140325139e-06, |
| "loss": 0.1426, |
| "step": 1504 |
| }, |
| { |
| "epoch": 1.0576247364722418, |
| "grad_norm": 0.4901031269988132, |
| "learning_rate": 4.548034148811384e-06, |
| "loss": 0.1181, |
| "step": 1505 |
| }, |
| { |
| "epoch": 1.0583274771609277, |
| "grad_norm": 0.5168940632987495, |
| "learning_rate": 4.542537708023569e-06, |
| "loss": 0.1425, |
| "step": 1506 |
| }, |
| { |
| "epoch": 1.0590302178496136, |
| "grad_norm": 0.5158342744313869, |
| "learning_rate": 4.537041824659172e-06, |
| "loss": 0.157, |
| "step": 1507 |
| }, |
| { |
| "epoch": 1.0597329585382993, |
| "grad_norm": 0.5197652354490249, |
| "learning_rate": 4.531546505415e-06, |
| "loss": 0.14, |
| "step": 1508 |
| }, |
| { |
| "epoch": 1.0604356992269852, |
| "grad_norm": 0.5206997910822582, |
| "learning_rate": 4.52605175698716e-06, |
| "loss": 0.1585, |
| "step": 1509 |
| }, |
| { |
| "epoch": 1.0611384399156711, |
| "grad_norm": 0.5014343619476886, |
| "learning_rate": 4.520557586071074e-06, |
| "loss": 0.1375, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.061841180604357, |
| "grad_norm": 0.5177852995426864, |
| "learning_rate": 4.515063999361455e-06, |
| "loss": 0.1348, |
| "step": 1511 |
| }, |
| { |
| "epoch": 1.062543921293043, |
| "grad_norm": 0.5277049664691724, |
| "learning_rate": 4.509571003552311e-06, |
| "loss": 0.1461, |
| "step": 1512 |
| }, |
| { |
| "epoch": 1.0632466619817287, |
| "grad_norm": 0.5125849050122258, |
| "learning_rate": 4.5040786053369175e-06, |
| "loss": 0.1328, |
| "step": 1513 |
| }, |
| { |
| "epoch": 1.0639494026704146, |
| "grad_norm": 0.5258856277575326, |
| "learning_rate": 4.498586811407834e-06, |
| "loss": 0.1641, |
| "step": 1514 |
| }, |
| { |
| "epoch": 1.0646521433591005, |
| "grad_norm": 0.533358579978669, |
| "learning_rate": 4.493095628456876e-06, |
| "loss": 0.1492, |
| "step": 1515 |
| }, |
| { |
| "epoch": 1.0653548840477864, |
| "grad_norm": 0.5410665746866157, |
| "learning_rate": 4.487605063175119e-06, |
| "loss": 0.1754, |
| "step": 1516 |
| }, |
| { |
| "epoch": 1.0660576247364721, |
| "grad_norm": 0.5445504453331569, |
| "learning_rate": 4.482115122252887e-06, |
| "loss": 0.164, |
| "step": 1517 |
| }, |
| { |
| "epoch": 1.066760365425158, |
| "grad_norm": 0.48571042799729647, |
| "learning_rate": 4.4766258123797355e-06, |
| "loss": 0.1291, |
| "step": 1518 |
| }, |
| { |
| "epoch": 1.067463106113844, |
| "grad_norm": 0.5240647619658219, |
| "learning_rate": 4.471137140244456e-06, |
| "loss": 0.1502, |
| "step": 1519 |
| }, |
| { |
| "epoch": 1.06816584680253, |
| "grad_norm": 0.4938137279997872, |
| "learning_rate": 4.465649112535067e-06, |
| "loss": 0.1427, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.0688685874912158, |
| "grad_norm": 0.5007927131769512, |
| "learning_rate": 4.460161735938794e-06, |
| "loss": 0.1462, |
| "step": 1521 |
| }, |
| { |
| "epoch": 1.0695713281799015, |
| "grad_norm": 0.5170092663246689, |
| "learning_rate": 4.4546750171420764e-06, |
| "loss": 0.1381, |
| "step": 1522 |
| }, |
| { |
| "epoch": 1.0702740688685874, |
| "grad_norm": 0.49458124261997344, |
| "learning_rate": 4.449188962830544e-06, |
| "loss": 0.1302, |
| "step": 1523 |
| }, |
| { |
| "epoch": 1.0709768095572734, |
| "grad_norm": 0.5145222594315508, |
| "learning_rate": 4.443703579689025e-06, |
| "loss": 0.1469, |
| "step": 1524 |
| }, |
| { |
| "epoch": 1.0716795502459593, |
| "grad_norm": 0.5063131328210128, |
| "learning_rate": 4.438218874401522e-06, |
| "loss": 0.1536, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.0723822909346452, |
| "grad_norm": 0.5203157749421187, |
| "learning_rate": 4.432734853651222e-06, |
| "loss": 0.1507, |
| "step": 1526 |
| }, |
| { |
| "epoch": 1.073085031623331, |
| "grad_norm": 0.5397251232553768, |
| "learning_rate": 4.4272515241204674e-06, |
| "loss": 0.1644, |
| "step": 1527 |
| }, |
| { |
| "epoch": 1.0737877723120168, |
| "grad_norm": 0.5318045536009254, |
| "learning_rate": 4.421768892490762e-06, |
| "loss": 0.1419, |
| "step": 1528 |
| }, |
| { |
| "epoch": 1.0744905130007028, |
| "grad_norm": 0.5317716259625153, |
| "learning_rate": 4.416286965442761e-06, |
| "loss": 0.159, |
| "step": 1529 |
| }, |
| { |
| "epoch": 1.0751932536893887, |
| "grad_norm": 0.536014633971988, |
| "learning_rate": 4.41080574965626e-06, |
| "loss": 0.1612, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.0758959943780746, |
| "grad_norm": 0.5495640591297969, |
| "learning_rate": 4.4053252518101855e-06, |
| "loss": 0.1551, |
| "step": 1531 |
| }, |
| { |
| "epoch": 1.0765987350667603, |
| "grad_norm": 0.5078396198787789, |
| "learning_rate": 4.399845478582598e-06, |
| "loss": 0.1483, |
| "step": 1532 |
| }, |
| { |
| "epoch": 1.0773014757554462, |
| "grad_norm": 0.5024234016147644, |
| "learning_rate": 4.394366436650661e-06, |
| "loss": 0.1346, |
| "step": 1533 |
| }, |
| { |
| "epoch": 1.0780042164441321, |
| "grad_norm": 0.5173858114853311, |
| "learning_rate": 4.388888132690657e-06, |
| "loss": 0.1164, |
| "step": 1534 |
| }, |
| { |
| "epoch": 1.078706957132818, |
| "grad_norm": 0.5238027648125677, |
| "learning_rate": 4.383410573377966e-06, |
| "loss": 0.1432, |
| "step": 1535 |
| }, |
| { |
| "epoch": 1.0794096978215038, |
| "grad_norm": 0.5526852874415407, |
| "learning_rate": 4.3779337653870666e-06, |
| "loss": 0.1561, |
| "step": 1536 |
| }, |
| { |
| "epoch": 1.0801124385101897, |
| "grad_norm": 0.55461552732306, |
| "learning_rate": 4.372457715391508e-06, |
| "loss": 0.1636, |
| "step": 1537 |
| }, |
| { |
| "epoch": 1.0808151791988756, |
| "grad_norm": 0.5590817602249681, |
| "learning_rate": 4.3669824300639305e-06, |
| "loss": 0.1832, |
| "step": 1538 |
| }, |
| { |
| "epoch": 1.0815179198875615, |
| "grad_norm": 0.510901886107954, |
| "learning_rate": 4.361507916076032e-06, |
| "loss": 0.1417, |
| "step": 1539 |
| }, |
| { |
| "epoch": 1.0822206605762474, |
| "grad_norm": 0.5081306360693141, |
| "learning_rate": 4.35603418009858e-06, |
| "loss": 0.1241, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.0829234012649331, |
| "grad_norm": 0.4978947306976569, |
| "learning_rate": 4.350561228801386e-06, |
| "loss": 0.1415, |
| "step": 1541 |
| }, |
| { |
| "epoch": 1.083626141953619, |
| "grad_norm": 0.5074726593124405, |
| "learning_rate": 4.345089068853309e-06, |
| "loss": 0.1391, |
| "step": 1542 |
| }, |
| { |
| "epoch": 1.084328882642305, |
| "grad_norm": 0.5204197326136466, |
| "learning_rate": 4.339617706922242e-06, |
| "loss": 0.1516, |
| "step": 1543 |
| }, |
| { |
| "epoch": 1.085031623330991, |
| "grad_norm": 0.5168236552395792, |
| "learning_rate": 4.3341471496751085e-06, |
| "loss": 0.1432, |
| "step": 1544 |
| }, |
| { |
| "epoch": 1.0857343640196768, |
| "grad_norm": 0.5123174698235842, |
| "learning_rate": 4.328677403777848e-06, |
| "loss": 0.1401, |
| "step": 1545 |
| }, |
| { |
| "epoch": 1.0864371047083625, |
| "grad_norm": 0.5277797986160024, |
| "learning_rate": 4.323208475895416e-06, |
| "loss": 0.1848, |
| "step": 1546 |
| }, |
| { |
| "epoch": 1.0871398453970484, |
| "grad_norm": 0.5192275580663916, |
| "learning_rate": 4.317740372691765e-06, |
| "loss": 0.1523, |
| "step": 1547 |
| }, |
| { |
| "epoch": 1.0878425860857344, |
| "grad_norm": 0.5018066227654631, |
| "learning_rate": 4.312273100829845e-06, |
| "loss": 0.1299, |
| "step": 1548 |
| }, |
| { |
| "epoch": 1.0885453267744203, |
| "grad_norm": 0.5183620807357253, |
| "learning_rate": 4.306806666971597e-06, |
| "loss": 0.1609, |
| "step": 1549 |
| }, |
| { |
| "epoch": 1.0892480674631062, |
| "grad_norm": 0.5244515698415202, |
| "learning_rate": 4.3013410777779375e-06, |
| "loss": 0.1601, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.089950808151792, |
| "grad_norm": 0.47123123465237887, |
| "learning_rate": 4.295876339908755e-06, |
| "loss": 0.1164, |
| "step": 1551 |
| }, |
| { |
| "epoch": 1.0906535488404778, |
| "grad_norm": 0.5184538408690857, |
| "learning_rate": 4.290412460022896e-06, |
| "loss": 0.1352, |
| "step": 1552 |
| }, |
| { |
| "epoch": 1.0913562895291637, |
| "grad_norm": 0.520938972909478, |
| "learning_rate": 4.284949444778166e-06, |
| "loss": 0.1584, |
| "step": 1553 |
| }, |
| { |
| "epoch": 1.0920590302178497, |
| "grad_norm": 0.5456503103112614, |
| "learning_rate": 4.279487300831318e-06, |
| "loss": 0.135, |
| "step": 1554 |
| }, |
| { |
| "epoch": 1.0927617709065356, |
| "grad_norm": 0.5107554261266217, |
| "learning_rate": 4.274026034838043e-06, |
| "loss": 0.1368, |
| "step": 1555 |
| }, |
| { |
| "epoch": 1.0934645115952213, |
| "grad_norm": 0.5819292852086433, |
| "learning_rate": 4.2685656534529576e-06, |
| "loss": 0.1664, |
| "step": 1556 |
| }, |
| { |
| "epoch": 1.0941672522839072, |
| "grad_norm": 0.5449361933830971, |
| "learning_rate": 4.263106163329603e-06, |
| "loss": 0.1729, |
| "step": 1557 |
| }, |
| { |
| "epoch": 1.0948699929725931, |
| "grad_norm": 0.5275764569882169, |
| "learning_rate": 4.257647571120437e-06, |
| "loss": 0.1368, |
| "step": 1558 |
| }, |
| { |
| "epoch": 1.095572733661279, |
| "grad_norm": 0.5164645233179017, |
| "learning_rate": 4.25218988347682e-06, |
| "loss": 0.1525, |
| "step": 1559 |
| }, |
| { |
| "epoch": 1.096275474349965, |
| "grad_norm": 0.5060477908801764, |
| "learning_rate": 4.246733107049012e-06, |
| "loss": 0.1341, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.0969782150386507, |
| "grad_norm": 0.5299530311152008, |
| "learning_rate": 4.241277248486164e-06, |
| "loss": 0.1341, |
| "step": 1561 |
| }, |
| { |
| "epoch": 1.0976809557273366, |
| "grad_norm": 0.5171934228573, |
| "learning_rate": 4.2358223144363046e-06, |
| "loss": 0.1315, |
| "step": 1562 |
| }, |
| { |
| "epoch": 1.0983836964160225, |
| "grad_norm": 0.534962523606485, |
| "learning_rate": 4.2303683115463355e-06, |
| "loss": 0.1525, |
| "step": 1563 |
| }, |
| { |
| "epoch": 1.0990864371047084, |
| "grad_norm": 0.513404314461455, |
| "learning_rate": 4.22491524646203e-06, |
| "loss": 0.1334, |
| "step": 1564 |
| }, |
| { |
| "epoch": 1.0997891777933941, |
| "grad_norm": 0.4876830686188593, |
| "learning_rate": 4.219463125828015e-06, |
| "loss": 0.12, |
| "step": 1565 |
| }, |
| { |
| "epoch": 1.10049191848208, |
| "grad_norm": 0.518920756296569, |
| "learning_rate": 4.214011956287765e-06, |
| "loss": 0.1445, |
| "step": 1566 |
| }, |
| { |
| "epoch": 1.101194659170766, |
| "grad_norm": 0.5051886440212208, |
| "learning_rate": 4.208561744483595e-06, |
| "loss": 0.1391, |
| "step": 1567 |
| }, |
| { |
| "epoch": 1.101897399859452, |
| "grad_norm": 0.530898642935748, |
| "learning_rate": 4.2031124970566576e-06, |
| "loss": 0.1486, |
| "step": 1568 |
| }, |
| { |
| "epoch": 1.1026001405481378, |
| "grad_norm": 0.4944119236879768, |
| "learning_rate": 4.197664220646928e-06, |
| "loss": 0.1294, |
| "step": 1569 |
| }, |
| { |
| "epoch": 1.1033028812368235, |
| "grad_norm": 0.520973466007102, |
| "learning_rate": 4.192216921893198e-06, |
| "loss": 0.1431, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.1040056219255094, |
| "grad_norm": 0.5436921574604013, |
| "learning_rate": 4.186770607433065e-06, |
| "loss": 0.1703, |
| "step": 1571 |
| }, |
| { |
| "epoch": 1.1047083626141954, |
| "grad_norm": 0.544033910389497, |
| "learning_rate": 4.1813252839029325e-06, |
| "loss": 0.1653, |
| "step": 1572 |
| }, |
| { |
| "epoch": 1.1054111033028813, |
| "grad_norm": 0.5192877976808141, |
| "learning_rate": 4.175880957937994e-06, |
| "loss": 0.16, |
| "step": 1573 |
| }, |
| { |
| "epoch": 1.1061138439915672, |
| "grad_norm": 0.5126150735386685, |
| "learning_rate": 4.170437636172227e-06, |
| "loss": 0.161, |
| "step": 1574 |
| }, |
| { |
| "epoch": 1.106816584680253, |
| "grad_norm": 0.5279167572838358, |
| "learning_rate": 4.164995325238388e-06, |
| "loss": 0.164, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.1075193253689388, |
| "grad_norm": 0.5648028936730404, |
| "learning_rate": 4.159554031767996e-06, |
| "loss": 0.1877, |
| "step": 1576 |
| }, |
| { |
| "epoch": 1.1082220660576247, |
| "grad_norm": 0.5193777403326719, |
| "learning_rate": 4.1541137623913355e-06, |
| "loss": 0.1644, |
| "step": 1577 |
| }, |
| { |
| "epoch": 1.1089248067463107, |
| "grad_norm": 0.4954080216440343, |
| "learning_rate": 4.148674523737443e-06, |
| "loss": 0.1353, |
| "step": 1578 |
| }, |
| { |
| "epoch": 1.1096275474349966, |
| "grad_norm": 0.5221218148685247, |
| "learning_rate": 4.143236322434096e-06, |
| "loss": 0.1467, |
| "step": 1579 |
| }, |
| { |
| "epoch": 1.1103302881236823, |
| "grad_norm": 0.5148823212995353, |
| "learning_rate": 4.137799165107811e-06, |
| "loss": 0.1491, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.1110330288123682, |
| "grad_norm": 0.49902117768501153, |
| "learning_rate": 4.132363058383828e-06, |
| "loss": 0.1462, |
| "step": 1581 |
| }, |
| { |
| "epoch": 1.1117357695010541, |
| "grad_norm": 0.5050608112360148, |
| "learning_rate": 4.126928008886112e-06, |
| "loss": 0.1368, |
| "step": 1582 |
| }, |
| { |
| "epoch": 1.11243851018974, |
| "grad_norm": 0.5570867896413365, |
| "learning_rate": 4.121494023237338e-06, |
| "loss": 0.1677, |
| "step": 1583 |
| }, |
| { |
| "epoch": 1.1131412508784257, |
| "grad_norm": 0.5246201638744743, |
| "learning_rate": 4.116061108058882e-06, |
| "loss": 0.1382, |
| "step": 1584 |
| }, |
| { |
| "epoch": 1.1138439915671117, |
| "grad_norm": 0.5325551208566502, |
| "learning_rate": 4.110629269970822e-06, |
| "loss": 0.1525, |
| "step": 1585 |
| }, |
| { |
| "epoch": 1.1145467322557976, |
| "grad_norm": 0.5117833002933512, |
| "learning_rate": 4.105198515591915e-06, |
| "loss": 0.149, |
| "step": 1586 |
| }, |
| { |
| "epoch": 1.1152494729444835, |
| "grad_norm": 0.552294045065401, |
| "learning_rate": 4.099768851539603e-06, |
| "loss": 0.155, |
| "step": 1587 |
| }, |
| { |
| "epoch": 1.1159522136331694, |
| "grad_norm": 0.491251116627733, |
| "learning_rate": 4.0943402844300004e-06, |
| "loss": 0.1261, |
| "step": 1588 |
| }, |
| { |
| "epoch": 1.1166549543218554, |
| "grad_norm": 0.5537353355618069, |
| "learning_rate": 4.088912820877881e-06, |
| "loss": 0.1724, |
| "step": 1589 |
| }, |
| { |
| "epoch": 1.117357695010541, |
| "grad_norm": 0.4985476104458816, |
| "learning_rate": 4.0834864674966765e-06, |
| "loss": 0.1436, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.118060435699227, |
| "grad_norm": 0.5114879066856933, |
| "learning_rate": 4.078061230898463e-06, |
| "loss": 0.1644, |
| "step": 1591 |
| }, |
| { |
| "epoch": 1.118763176387913, |
| "grad_norm": 0.5302626334189574, |
| "learning_rate": 4.072637117693962e-06, |
| "loss": 0.16, |
| "step": 1592 |
| }, |
| { |
| "epoch": 1.1194659170765988, |
| "grad_norm": 0.5012286863562989, |
| "learning_rate": 4.067214134492519e-06, |
| "loss": 0.1481, |
| "step": 1593 |
| }, |
| { |
| "epoch": 1.1201686577652845, |
| "grad_norm": 0.5209237907289656, |
| "learning_rate": 4.061792287902107e-06, |
| "loss": 0.1551, |
| "step": 1594 |
| }, |
| { |
| "epoch": 1.1208713984539704, |
| "grad_norm": 0.5307699445933584, |
| "learning_rate": 4.056371584529311e-06, |
| "loss": 0.1387, |
| "step": 1595 |
| }, |
| { |
| "epoch": 1.1215741391426564, |
| "grad_norm": 0.5171963652616054, |
| "learning_rate": 4.050952030979326e-06, |
| "loss": 0.1333, |
| "step": 1596 |
| }, |
| { |
| "epoch": 1.1222768798313423, |
| "grad_norm": 0.5190474007139281, |
| "learning_rate": 4.0455336338559446e-06, |
| "loss": 0.1489, |
| "step": 1597 |
| }, |
| { |
| "epoch": 1.1229796205200282, |
| "grad_norm": 0.5083668842001544, |
| "learning_rate": 4.040116399761547e-06, |
| "loss": 0.1448, |
| "step": 1598 |
| }, |
| { |
| "epoch": 1.123682361208714, |
| "grad_norm": 0.5417158344934303, |
| "learning_rate": 4.034700335297107e-06, |
| "loss": 0.1687, |
| "step": 1599 |
| }, |
| { |
| "epoch": 1.1243851018973998, |
| "grad_norm": 0.4797849651915903, |
| "learning_rate": 4.029285447062159e-06, |
| "loss": 0.1199, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.1250878425860857, |
| "grad_norm": 0.5022976081283302, |
| "learning_rate": 4.02387174165481e-06, |
| "loss": 0.1438, |
| "step": 1601 |
| }, |
| { |
| "epoch": 1.1257905832747717, |
| "grad_norm": 0.5079689484794458, |
| "learning_rate": 4.018459225671732e-06, |
| "loss": 0.1462, |
| "step": 1602 |
| }, |
| { |
| "epoch": 1.1264933239634574, |
| "grad_norm": 0.4934039692280827, |
| "learning_rate": 4.01304790570814e-06, |
| "loss": 0.1281, |
| "step": 1603 |
| }, |
| { |
| "epoch": 1.1271960646521433, |
| "grad_norm": 0.5014371900597421, |
| "learning_rate": 4.007637788357793e-06, |
| "loss": 0.1396, |
| "step": 1604 |
| }, |
| { |
| "epoch": 1.1278988053408292, |
| "grad_norm": 0.5268724559733456, |
| "learning_rate": 4.002228880212984e-06, |
| "loss": 0.1474, |
| "step": 1605 |
| }, |
| { |
| "epoch": 1.1286015460295151, |
| "grad_norm": 0.5209824286459966, |
| "learning_rate": 3.996821187864537e-06, |
| "loss": 0.1414, |
| "step": 1606 |
| }, |
| { |
| "epoch": 1.129304286718201, |
| "grad_norm": 0.48759501821979767, |
| "learning_rate": 3.99141471790179e-06, |
| "loss": 0.1336, |
| "step": 1607 |
| }, |
| { |
| "epoch": 1.130007027406887, |
| "grad_norm": 0.4802373810096626, |
| "learning_rate": 3.986009476912592e-06, |
| "loss": 0.1258, |
| "step": 1608 |
| }, |
| { |
| "epoch": 1.1307097680955727, |
| "grad_norm": 0.5209903064603748, |
| "learning_rate": 3.980605471483299e-06, |
| "loss": 0.1517, |
| "step": 1609 |
| }, |
| { |
| "epoch": 1.1314125087842586, |
| "grad_norm": 0.5549038348276285, |
| "learning_rate": 3.975202708198754e-06, |
| "loss": 0.176, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.1321152494729445, |
| "grad_norm": 0.5084752069197226, |
| "learning_rate": 3.969801193642293e-06, |
| "loss": 0.1453, |
| "step": 1611 |
| }, |
| { |
| "epoch": 1.1328179901616304, |
| "grad_norm": 0.5271233290836675, |
| "learning_rate": 3.964400934395726e-06, |
| "loss": 0.1596, |
| "step": 1612 |
| }, |
| { |
| "epoch": 1.1335207308503161, |
| "grad_norm": 0.5320555907499869, |
| "learning_rate": 3.959001937039337e-06, |
| "loss": 0.166, |
| "step": 1613 |
| }, |
| { |
| "epoch": 1.134223471539002, |
| "grad_norm": 0.5232518466549592, |
| "learning_rate": 3.95360420815187e-06, |
| "loss": 0.1574, |
| "step": 1614 |
| }, |
| { |
| "epoch": 1.134926212227688, |
| "grad_norm": 0.51281620193853, |
| "learning_rate": 3.948207754310522e-06, |
| "loss": 0.1443, |
| "step": 1615 |
| }, |
| { |
| "epoch": 1.135628952916374, |
| "grad_norm": 0.5279682828795013, |
| "learning_rate": 3.94281258209094e-06, |
| "loss": 0.1539, |
| "step": 1616 |
| }, |
| { |
| "epoch": 1.1363316936050598, |
| "grad_norm": 0.5253882313772008, |
| "learning_rate": 3.937418698067209e-06, |
| "loss": 0.1522, |
| "step": 1617 |
| }, |
| { |
| "epoch": 1.1370344342937457, |
| "grad_norm": 0.5263221297165886, |
| "learning_rate": 3.932026108811841e-06, |
| "loss": 0.163, |
| "step": 1618 |
| }, |
| { |
| "epoch": 1.1377371749824314, |
| "grad_norm": 0.529816285315717, |
| "learning_rate": 3.9266348208957716e-06, |
| "loss": 0.1628, |
| "step": 1619 |
| }, |
| { |
| "epoch": 1.1384399156711174, |
| "grad_norm": 0.5589296845016164, |
| "learning_rate": 3.921244840888353e-06, |
| "loss": 0.1542, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.1391426563598033, |
| "grad_norm": 0.5095397475823957, |
| "learning_rate": 3.915856175357341e-06, |
| "loss": 0.1302, |
| "step": 1621 |
| }, |
| { |
| "epoch": 1.1398453970484892, |
| "grad_norm": 0.5152041254097331, |
| "learning_rate": 3.910468830868891e-06, |
| "loss": 0.1568, |
| "step": 1622 |
| }, |
| { |
| "epoch": 1.140548137737175, |
| "grad_norm": 0.498483758902883, |
| "learning_rate": 3.90508281398755e-06, |
| "loss": 0.1448, |
| "step": 1623 |
| }, |
| { |
| "epoch": 1.1412508784258608, |
| "grad_norm": 0.5424368341811135, |
| "learning_rate": 3.899698131276243e-06, |
| "loss": 0.1537, |
| "step": 1624 |
| }, |
| { |
| "epoch": 1.1419536191145467, |
| "grad_norm": 0.5044011713227908, |
| "learning_rate": 3.894314789296274e-06, |
| "loss": 0.1345, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.1426563598032327, |
| "grad_norm": 0.5163947411118621, |
| "learning_rate": 3.888932794607308e-06, |
| "loss": 0.1566, |
| "step": 1626 |
| }, |
| { |
| "epoch": 1.1433591004919186, |
| "grad_norm": 0.5214483574311828, |
| "learning_rate": 3.883552153767376e-06, |
| "loss": 0.1256, |
| "step": 1627 |
| }, |
| { |
| "epoch": 1.1440618411806043, |
| "grad_norm": 0.5234440658460365, |
| "learning_rate": 3.878172873332854e-06, |
| "loss": 0.148, |
| "step": 1628 |
| }, |
| { |
| "epoch": 1.1447645818692902, |
| "grad_norm": 0.5361870114650095, |
| "learning_rate": 3.872794959858457e-06, |
| "loss": 0.1793, |
| "step": 1629 |
| }, |
| { |
| "epoch": 1.1454673225579761, |
| "grad_norm": 0.5027193241001592, |
| "learning_rate": 3.867418419897245e-06, |
| "loss": 0.1354, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.146170063246662, |
| "grad_norm": 0.5201478011166566, |
| "learning_rate": 3.862043260000593e-06, |
| "loss": 0.1529, |
| "step": 1631 |
| }, |
| { |
| "epoch": 1.1468728039353477, |
| "grad_norm": 0.5005128128993738, |
| "learning_rate": 3.856669486718201e-06, |
| "loss": 0.1451, |
| "step": 1632 |
| }, |
| { |
| "epoch": 1.1475755446240337, |
| "grad_norm": 0.5187700145278601, |
| "learning_rate": 3.85129710659808e-06, |
| "loss": 0.1485, |
| "step": 1633 |
| }, |
| { |
| "epoch": 1.1482782853127196, |
| "grad_norm": 0.4976097692834372, |
| "learning_rate": 3.845926126186539e-06, |
| "loss": 0.1325, |
| "step": 1634 |
| }, |
| { |
| "epoch": 1.1489810260014055, |
| "grad_norm": 0.4955834841080017, |
| "learning_rate": 3.840556552028182e-06, |
| "loss": 0.1148, |
| "step": 1635 |
| }, |
| { |
| "epoch": 1.1496837666900914, |
| "grad_norm": 0.5454349128260972, |
| "learning_rate": 3.8351883906659015e-06, |
| "loss": 0.1381, |
| "step": 1636 |
| }, |
| { |
| "epoch": 1.1503865073787773, |
| "grad_norm": 0.5124326684693784, |
| "learning_rate": 3.829821648640873e-06, |
| "loss": 0.137, |
| "step": 1637 |
| }, |
| { |
| "epoch": 1.151089248067463, |
| "grad_norm": 0.4962124166833035, |
| "learning_rate": 3.824456332492531e-06, |
| "loss": 0.1226, |
| "step": 1638 |
| }, |
| { |
| "epoch": 1.151791988756149, |
| "grad_norm": 0.49974840132238146, |
| "learning_rate": 3.8190924487585825e-06, |
| "loss": 0.1336, |
| "step": 1639 |
| }, |
| { |
| "epoch": 1.1524947294448349, |
| "grad_norm": 0.531460623031864, |
| "learning_rate": 3.8137300039749837e-06, |
| "loss": 0.1514, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.1531974701335208, |
| "grad_norm": 0.5245882163563044, |
| "learning_rate": 3.808369004675942e-06, |
| "loss": 0.167, |
| "step": 1641 |
| }, |
| { |
| "epoch": 1.1539002108222065, |
| "grad_norm": 0.5174623994992702, |
| "learning_rate": 3.803009457393901e-06, |
| "loss": 0.1342, |
| "step": 1642 |
| }, |
| { |
| "epoch": 1.1546029515108924, |
| "grad_norm": 0.5328113159558158, |
| "learning_rate": 3.7976513686595306e-06, |
| "loss": 0.1455, |
| "step": 1643 |
| }, |
| { |
| "epoch": 1.1553056921995783, |
| "grad_norm": 0.5134419088998426, |
| "learning_rate": 3.792294745001732e-06, |
| "loss": 0.1405, |
| "step": 1644 |
| }, |
| { |
| "epoch": 1.1560084328882643, |
| "grad_norm": 0.513456988869456, |
| "learning_rate": 3.786939592947616e-06, |
| "loss": 0.1407, |
| "step": 1645 |
| }, |
| { |
| "epoch": 1.1567111735769502, |
| "grad_norm": 0.5231031001511144, |
| "learning_rate": 3.781585919022499e-06, |
| "loss": 0.1562, |
| "step": 1646 |
| }, |
| { |
| "epoch": 1.157413914265636, |
| "grad_norm": 0.5598929326454744, |
| "learning_rate": 3.7762337297499026e-06, |
| "loss": 0.1555, |
| "step": 1647 |
| }, |
| { |
| "epoch": 1.1581166549543218, |
| "grad_norm": 0.5196277810093672, |
| "learning_rate": 3.770883031651531e-06, |
| "loss": 0.1681, |
| "step": 1648 |
| }, |
| { |
| "epoch": 1.1588193956430077, |
| "grad_norm": 0.5248335205070664, |
| "learning_rate": 3.765533831247278e-06, |
| "loss": 0.1535, |
| "step": 1649 |
| }, |
| { |
| "epoch": 1.1595221363316937, |
| "grad_norm": 0.5179389153060617, |
| "learning_rate": 3.7601861350552073e-06, |
| "loss": 0.1502, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.1602248770203796, |
| "grad_norm": 0.5268002561119876, |
| "learning_rate": 3.7548399495915555e-06, |
| "loss": 0.1529, |
| "step": 1651 |
| }, |
| { |
| "epoch": 1.1609276177090653, |
| "grad_norm": 0.5210242798238149, |
| "learning_rate": 3.7494952813707154e-06, |
| "loss": 0.1484, |
| "step": 1652 |
| }, |
| { |
| "epoch": 1.1616303583977512, |
| "grad_norm": 0.48687971653015544, |
| "learning_rate": 3.744152136905226e-06, |
| "loss": 0.1237, |
| "step": 1653 |
| }, |
| { |
| "epoch": 1.1623330990864371, |
| "grad_norm": 0.5227291974655749, |
| "learning_rate": 3.7388105227057796e-06, |
| "loss": 0.1526, |
| "step": 1654 |
| }, |
| { |
| "epoch": 1.163035839775123, |
| "grad_norm": 0.49035055461560756, |
| "learning_rate": 3.733470445281197e-06, |
| "loss": 0.1389, |
| "step": 1655 |
| }, |
| { |
| "epoch": 1.163738580463809, |
| "grad_norm": 0.48742906301342115, |
| "learning_rate": 3.7281319111384274e-06, |
| "loss": 0.1272, |
| "step": 1656 |
| }, |
| { |
| "epoch": 1.1644413211524947, |
| "grad_norm": 0.4817423144150866, |
| "learning_rate": 3.722794926782542e-06, |
| "loss": 0.1444, |
| "step": 1657 |
| }, |
| { |
| "epoch": 1.1651440618411806, |
| "grad_norm": 0.4913925343480694, |
| "learning_rate": 3.71745949871672e-06, |
| "loss": 0.1271, |
| "step": 1658 |
| }, |
| { |
| "epoch": 1.1658468025298665, |
| "grad_norm": 0.5048171170148831, |
| "learning_rate": 3.712125633442246e-06, |
| "loss": 0.1552, |
| "step": 1659 |
| }, |
| { |
| "epoch": 1.1665495432185524, |
| "grad_norm": 0.5703783627697752, |
| "learning_rate": 3.7067933374585003e-06, |
| "loss": 0.1833, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.1672522839072381, |
| "grad_norm": 0.5225800614551854, |
| "learning_rate": 3.7014626172629536e-06, |
| "loss": 0.1691, |
| "step": 1661 |
| }, |
| { |
| "epoch": 1.167955024595924, |
| "grad_norm": 0.4995785135922539, |
| "learning_rate": 3.696133479351151e-06, |
| "loss": 0.1425, |
| "step": 1662 |
| }, |
| { |
| "epoch": 1.16865776528461, |
| "grad_norm": 0.48685260955142334, |
| "learning_rate": 3.6908059302167134e-06, |
| "loss": 0.1342, |
| "step": 1663 |
| }, |
| { |
| "epoch": 1.1693605059732959, |
| "grad_norm": 0.5318537127203009, |
| "learning_rate": 3.6854799763513238e-06, |
| "loss": 0.1383, |
| "step": 1664 |
| }, |
| { |
| "epoch": 1.1700632466619818, |
| "grad_norm": 0.4910402665339831, |
| "learning_rate": 3.6801556242447247e-06, |
| "loss": 0.1272, |
| "step": 1665 |
| }, |
| { |
| "epoch": 1.1707659873506677, |
| "grad_norm": 0.49911615962552697, |
| "learning_rate": 3.6748328803847044e-06, |
| "loss": 0.1522, |
| "step": 1666 |
| }, |
| { |
| "epoch": 1.1714687280393534, |
| "grad_norm": 0.5262194858026492, |
| "learning_rate": 3.6695117512570878e-06, |
| "loss": 0.1491, |
| "step": 1667 |
| }, |
| { |
| "epoch": 1.1721714687280393, |
| "grad_norm": 0.4684801808092165, |
| "learning_rate": 3.66419224334574e-06, |
| "loss": 0.1134, |
| "step": 1668 |
| }, |
| { |
| "epoch": 1.1728742094167253, |
| "grad_norm": 0.4636585324685207, |
| "learning_rate": 3.658874363132546e-06, |
| "loss": 0.1159, |
| "step": 1669 |
| }, |
| { |
| "epoch": 1.1735769501054112, |
| "grad_norm": 0.5024935722580612, |
| "learning_rate": 3.6535581170974055e-06, |
| "loss": 0.1389, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.1742796907940969, |
| "grad_norm": 0.5274319478864739, |
| "learning_rate": 3.648243511718235e-06, |
| "loss": 0.138, |
| "step": 1671 |
| }, |
| { |
| "epoch": 1.1749824314827828, |
| "grad_norm": 0.5261000202757115, |
| "learning_rate": 3.6429305534709415e-06, |
| "loss": 0.1524, |
| "step": 1672 |
| }, |
| { |
| "epoch": 1.1756851721714687, |
| "grad_norm": 0.505490322072027, |
| "learning_rate": 3.6376192488294317e-06, |
| "loss": 0.1284, |
| "step": 1673 |
| }, |
| { |
| "epoch": 1.1763879128601546, |
| "grad_norm": 0.4688629466709282, |
| "learning_rate": 3.6323096042655936e-06, |
| "loss": 0.1103, |
| "step": 1674 |
| }, |
| { |
| "epoch": 1.1770906535488406, |
| "grad_norm": 0.5399605141659941, |
| "learning_rate": 3.627001626249298e-06, |
| "loss": 0.1494, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.1777933942375263, |
| "grad_norm": 0.5187430831627153, |
| "learning_rate": 3.6216953212483796e-06, |
| "loss": 0.1475, |
| "step": 1676 |
| }, |
| { |
| "epoch": 1.1784961349262122, |
| "grad_norm": 0.5076664057019005, |
| "learning_rate": 3.6163906957286347e-06, |
| "loss": 0.1336, |
| "step": 1677 |
| }, |
| { |
| "epoch": 1.1791988756148981, |
| "grad_norm": 0.5196016098967992, |
| "learning_rate": 3.611087756153815e-06, |
| "loss": 0.1262, |
| "step": 1678 |
| }, |
| { |
| "epoch": 1.179901616303584, |
| "grad_norm": 0.49731461259146237, |
| "learning_rate": 3.605786508985619e-06, |
| "loss": 0.1458, |
| "step": 1679 |
| }, |
| { |
| "epoch": 1.1806043569922697, |
| "grad_norm": 0.5261002476169107, |
| "learning_rate": 3.6004869606836807e-06, |
| "loss": 0.1575, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.1813070976809557, |
| "grad_norm": 0.5195604327281339, |
| "learning_rate": 3.5951891177055663e-06, |
| "loss": 0.1514, |
| "step": 1681 |
| }, |
| { |
| "epoch": 1.1820098383696416, |
| "grad_norm": 0.5075140626950995, |
| "learning_rate": 3.58989298650676e-06, |
| "loss": 0.1357, |
| "step": 1682 |
| }, |
| { |
| "epoch": 1.1827125790583275, |
| "grad_norm": 0.5196638994182908, |
| "learning_rate": 3.5845985735406634e-06, |
| "loss": 0.1554, |
| "step": 1683 |
| }, |
| { |
| "epoch": 1.1834153197470134, |
| "grad_norm": 0.5253842081752831, |
| "learning_rate": 3.5793058852585837e-06, |
| "loss": 0.1599, |
| "step": 1684 |
| }, |
| { |
| "epoch": 1.1841180604356993, |
| "grad_norm": 0.5049337808941067, |
| "learning_rate": 3.5740149281097276e-06, |
| "loss": 0.1434, |
| "step": 1685 |
| }, |
| { |
| "epoch": 1.184820801124385, |
| "grad_norm": 0.519415710896182, |
| "learning_rate": 3.5687257085411913e-06, |
| "loss": 0.154, |
| "step": 1686 |
| }, |
| { |
| "epoch": 1.185523541813071, |
| "grad_norm": 0.5155923291806198, |
| "learning_rate": 3.563438232997952e-06, |
| "loss": 0.1545, |
| "step": 1687 |
| }, |
| { |
| "epoch": 1.1862262825017569, |
| "grad_norm": 0.5165664428751502, |
| "learning_rate": 3.5581525079228647e-06, |
| "loss": 0.1483, |
| "step": 1688 |
| }, |
| { |
| "epoch": 1.1869290231904428, |
| "grad_norm": 0.5015409975851226, |
| "learning_rate": 3.552868539756651e-06, |
| "loss": 0.1472, |
| "step": 1689 |
| }, |
| { |
| "epoch": 1.1876317638791285, |
| "grad_norm": 0.5051071368670725, |
| "learning_rate": 3.5475863349378907e-06, |
| "loss": 0.1359, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.1883345045678144, |
| "grad_norm": 0.5205146308658571, |
| "learning_rate": 3.5423058999030145e-06, |
| "loss": 0.161, |
| "step": 1691 |
| }, |
| { |
| "epoch": 1.1890372452565003, |
| "grad_norm": 0.5203042023193757, |
| "learning_rate": 3.537027241086296e-06, |
| "loss": 0.1505, |
| "step": 1692 |
| }, |
| { |
| "epoch": 1.1897399859451863, |
| "grad_norm": 0.49733017023610404, |
| "learning_rate": 3.531750364919849e-06, |
| "loss": 0.1508, |
| "step": 1693 |
| }, |
| { |
| "epoch": 1.1904427266338722, |
| "grad_norm": 0.5336269994581739, |
| "learning_rate": 3.526475277833609e-06, |
| "loss": 0.1527, |
| "step": 1694 |
| }, |
| { |
| "epoch": 1.1911454673225579, |
| "grad_norm": 0.5200437431542487, |
| "learning_rate": 3.521201986255338e-06, |
| "loss": 0.1481, |
| "step": 1695 |
| }, |
| { |
| "epoch": 1.1918482080112438, |
| "grad_norm": 0.5475045123595398, |
| "learning_rate": 3.5159304966106034e-06, |
| "loss": 0.1528, |
| "step": 1696 |
| }, |
| { |
| "epoch": 1.1925509486999297, |
| "grad_norm": 0.5366297526728846, |
| "learning_rate": 3.5106608153227805e-06, |
| "loss": 0.154, |
| "step": 1697 |
| }, |
| { |
| "epoch": 1.1932536893886156, |
| "grad_norm": 0.5299936676513248, |
| "learning_rate": 3.50539294881304e-06, |
| "loss": 0.1515, |
| "step": 1698 |
| }, |
| { |
| "epoch": 1.1939564300773016, |
| "grad_norm": 0.5247029140518117, |
| "learning_rate": 3.500126903500345e-06, |
| "loss": 0.1612, |
| "step": 1699 |
| }, |
| { |
| "epoch": 1.1946591707659873, |
| "grad_norm": 0.5191132709788684, |
| "learning_rate": 3.4948626858014345e-06, |
| "loss": 0.1245, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.1953619114546732, |
| "grad_norm": 0.48372763603370605, |
| "learning_rate": 3.4896003021308213e-06, |
| "loss": 0.1276, |
| "step": 1701 |
| }, |
| { |
| "epoch": 1.196064652143359, |
| "grad_norm": 0.5303852655503682, |
| "learning_rate": 3.4843397589007842e-06, |
| "loss": 0.153, |
| "step": 1702 |
| }, |
| { |
| "epoch": 1.196767392832045, |
| "grad_norm": 0.521746022896869, |
| "learning_rate": 3.4790810625213627e-06, |
| "loss": 0.1531, |
| "step": 1703 |
| }, |
| { |
| "epoch": 1.197470133520731, |
| "grad_norm": 0.5174479878366598, |
| "learning_rate": 3.4738242194003403e-06, |
| "loss": 0.1492, |
| "step": 1704 |
| }, |
| { |
| "epoch": 1.1981728742094166, |
| "grad_norm": 0.5190434505967585, |
| "learning_rate": 3.4685692359432487e-06, |
| "loss": 0.1551, |
| "step": 1705 |
| }, |
| { |
| "epoch": 1.1988756148981026, |
| "grad_norm": 0.5309086817418733, |
| "learning_rate": 3.4633161185533435e-06, |
| "loss": 0.1662, |
| "step": 1706 |
| }, |
| { |
| "epoch": 1.1995783555867885, |
| "grad_norm": 0.5109056506042583, |
| "learning_rate": 3.4580648736316167e-06, |
| "loss": 0.1427, |
| "step": 1707 |
| }, |
| { |
| "epoch": 1.2002810962754744, |
| "grad_norm": 0.5133755265180353, |
| "learning_rate": 3.4528155075767746e-06, |
| "loss": 0.1557, |
| "step": 1708 |
| }, |
| { |
| "epoch": 1.2009838369641601, |
| "grad_norm": 0.5009107458499668, |
| "learning_rate": 3.447568026785233e-06, |
| "loss": 0.1424, |
| "step": 1709 |
| }, |
| { |
| "epoch": 1.201686577652846, |
| "grad_norm": 0.4951519629017366, |
| "learning_rate": 3.4423224376511143e-06, |
| "loss": 0.1452, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.202389318341532, |
| "grad_norm": 0.515481038243041, |
| "learning_rate": 3.4370787465662304e-06, |
| "loss": 0.1377, |
| "step": 1711 |
| }, |
| { |
| "epoch": 1.2030920590302179, |
| "grad_norm": 0.49124281287351257, |
| "learning_rate": 3.431836959920083e-06, |
| "loss": 0.1284, |
| "step": 1712 |
| }, |
| { |
| "epoch": 1.2037947997189038, |
| "grad_norm": 0.5437800123662826, |
| "learning_rate": 3.4265970840998562e-06, |
| "loss": 0.1652, |
| "step": 1713 |
| }, |
| { |
| "epoch": 1.2044975404075897, |
| "grad_norm": 0.5071503507526097, |
| "learning_rate": 3.4213591254904023e-06, |
| "loss": 0.1488, |
| "step": 1714 |
| }, |
| { |
| "epoch": 1.2052002810962754, |
| "grad_norm": 0.5003831626896319, |
| "learning_rate": 3.416123090474236e-06, |
| "loss": 0.132, |
| "step": 1715 |
| }, |
| { |
| "epoch": 1.2059030217849613, |
| "grad_norm": 0.49683404967777817, |
| "learning_rate": 3.4108889854315315e-06, |
| "loss": 0.131, |
| "step": 1716 |
| }, |
| { |
| "epoch": 1.2066057624736473, |
| "grad_norm": 0.5390326412523678, |
| "learning_rate": 3.4056568167401106e-06, |
| "loss": 0.134, |
| "step": 1717 |
| }, |
| { |
| "epoch": 1.2073085031623332, |
| "grad_norm": 0.5776173390980446, |
| "learning_rate": 3.4004265907754343e-06, |
| "loss": 0.189, |
| "step": 1718 |
| }, |
| { |
| "epoch": 1.2080112438510189, |
| "grad_norm": 0.5137922725555822, |
| "learning_rate": 3.3951983139106005e-06, |
| "loss": 0.141, |
| "step": 1719 |
| }, |
| { |
| "epoch": 1.2087139845397048, |
| "grad_norm": 0.513839062180173, |
| "learning_rate": 3.3899719925163223e-06, |
| "loss": 0.1602, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.2094167252283907, |
| "grad_norm": 0.4843895500119056, |
| "learning_rate": 3.3847476329609415e-06, |
| "loss": 0.1298, |
| "step": 1721 |
| }, |
| { |
| "epoch": 1.2101194659170766, |
| "grad_norm": 0.5220447841417628, |
| "learning_rate": 3.379525241610402e-06, |
| "loss": 0.1457, |
| "step": 1722 |
| }, |
| { |
| "epoch": 1.2108222066057626, |
| "grad_norm": 0.524805904004868, |
| "learning_rate": 3.3743048248282527e-06, |
| "loss": 0.1562, |
| "step": 1723 |
| }, |
| { |
| "epoch": 1.2115249472944483, |
| "grad_norm": 0.5309924745094761, |
| "learning_rate": 3.3690863889756374e-06, |
| "loss": 0.1589, |
| "step": 1724 |
| }, |
| { |
| "epoch": 1.2122276879831342, |
| "grad_norm": 0.498855889039376, |
| "learning_rate": 3.363869940411282e-06, |
| "loss": 0.1309, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.21293042867182, |
| "grad_norm": 0.5230334811682804, |
| "learning_rate": 3.358655485491492e-06, |
| "loss": 0.1437, |
| "step": 1726 |
| }, |
| { |
| "epoch": 1.213633169360506, |
| "grad_norm": 0.5285661047217612, |
| "learning_rate": 3.353443030570147e-06, |
| "loss": 0.1599, |
| "step": 1727 |
| }, |
| { |
| "epoch": 1.2143359100491917, |
| "grad_norm": 0.49767850498696875, |
| "learning_rate": 3.348232581998686e-06, |
| "loss": 0.1366, |
| "step": 1728 |
| }, |
| { |
| "epoch": 1.2150386507378776, |
| "grad_norm": 0.5040842660094004, |
| "learning_rate": 3.343024146126108e-06, |
| "loss": 0.1507, |
| "step": 1729 |
| }, |
| { |
| "epoch": 1.2157413914265636, |
| "grad_norm": 0.5588734088360507, |
| "learning_rate": 3.33781772929895e-06, |
| "loss": 0.166, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.2164441321152495, |
| "grad_norm": 0.5470329607806411, |
| "learning_rate": 3.3326133378612996e-06, |
| "loss": 0.1641, |
| "step": 1731 |
| }, |
| { |
| "epoch": 1.2171468728039354, |
| "grad_norm": 0.5070426231374715, |
| "learning_rate": 3.3274109781547685e-06, |
| "loss": 0.139, |
| "step": 1732 |
| }, |
| { |
| "epoch": 1.2178496134926213, |
| "grad_norm": 0.5215478507795955, |
| "learning_rate": 3.322210656518499e-06, |
| "loss": 0.153, |
| "step": 1733 |
| }, |
| { |
| "epoch": 1.218552354181307, |
| "grad_norm": 0.4799184395974528, |
| "learning_rate": 3.317012379289146e-06, |
| "loss": 0.1232, |
| "step": 1734 |
| }, |
| { |
| "epoch": 1.219255094869993, |
| "grad_norm": 0.5054843399887229, |
| "learning_rate": 3.311816152800873e-06, |
| "loss": 0.1336, |
| "step": 1735 |
| }, |
| { |
| "epoch": 1.2199578355586789, |
| "grad_norm": 0.5015658349787894, |
| "learning_rate": 3.3066219833853454e-06, |
| "loss": 0.1329, |
| "step": 1736 |
| }, |
| { |
| "epoch": 1.2206605762473648, |
| "grad_norm": 0.5251819654602317, |
| "learning_rate": 3.3014298773717235e-06, |
| "loss": 0.1415, |
| "step": 1737 |
| }, |
| { |
| "epoch": 1.2213633169360505, |
| "grad_norm": 0.569004014239607, |
| "learning_rate": 3.2962398410866535e-06, |
| "loss": 0.1609, |
| "step": 1738 |
| }, |
| { |
| "epoch": 1.2220660576247364, |
| "grad_norm": 0.5238455440223404, |
| "learning_rate": 3.2910518808542557e-06, |
| "loss": 0.1532, |
| "step": 1739 |
| }, |
| { |
| "epoch": 1.2227687983134223, |
| "grad_norm": 0.5140066709020665, |
| "learning_rate": 3.285866002996124e-06, |
| "loss": 0.1384, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.2234715390021083, |
| "grad_norm": 0.5117475965906966, |
| "learning_rate": 3.2806822138313154e-06, |
| "loss": 0.1445, |
| "step": 1741 |
| }, |
| { |
| "epoch": 1.2241742796907942, |
| "grad_norm": 0.5179832136007794, |
| "learning_rate": 3.275500519676339e-06, |
| "loss": 0.1489, |
| "step": 1742 |
| }, |
| { |
| "epoch": 1.2248770203794799, |
| "grad_norm": 0.5044506657580975, |
| "learning_rate": 3.2703209268451565e-06, |
| "loss": 0.1469, |
| "step": 1743 |
| }, |
| { |
| "epoch": 1.2255797610681658, |
| "grad_norm": 0.5146259932596444, |
| "learning_rate": 3.26514344164916e-06, |
| "loss": 0.1596, |
| "step": 1744 |
| }, |
| { |
| "epoch": 1.2262825017568517, |
| "grad_norm": 0.5525856588040384, |
| "learning_rate": 3.2599680703971824e-06, |
| "loss": 0.1759, |
| "step": 1745 |
| }, |
| { |
| "epoch": 1.2269852424455376, |
| "grad_norm": 0.5109312883592165, |
| "learning_rate": 3.2547948193954747e-06, |
| "loss": 0.1579, |
| "step": 1746 |
| }, |
| { |
| "epoch": 1.2276879831342236, |
| "grad_norm": 0.5423796532607921, |
| "learning_rate": 3.24962369494771e-06, |
| "loss": 0.1787, |
| "step": 1747 |
| }, |
| { |
| "epoch": 1.2283907238229093, |
| "grad_norm": 0.5210898787241186, |
| "learning_rate": 3.2444547033549654e-06, |
| "loss": 0.1378, |
| "step": 1748 |
| }, |
| { |
| "epoch": 1.2290934645115952, |
| "grad_norm": 0.52972502210687, |
| "learning_rate": 3.23928785091572e-06, |
| "loss": 0.1545, |
| "step": 1749 |
| }, |
| { |
| "epoch": 1.229796205200281, |
| "grad_norm": 0.5393701634157857, |
| "learning_rate": 3.2341231439258454e-06, |
| "loss": 0.1502, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.230498945888967, |
| "grad_norm": 0.513428650112163, |
| "learning_rate": 3.2289605886786035e-06, |
| "loss": 0.1231, |
| "step": 1751 |
| }, |
| { |
| "epoch": 1.231201686577653, |
| "grad_norm": 0.5232149614791697, |
| "learning_rate": 3.22380019146463e-06, |
| "loss": 0.1646, |
| "step": 1752 |
| }, |
| { |
| "epoch": 1.2319044272663386, |
| "grad_norm": 0.5151905240888011, |
| "learning_rate": 3.2186419585719344e-06, |
| "loss": 0.1591, |
| "step": 1753 |
| }, |
| { |
| "epoch": 1.2326071679550246, |
| "grad_norm": 0.5008232200255709, |
| "learning_rate": 3.2134858962858824e-06, |
| "loss": 0.1346, |
| "step": 1754 |
| }, |
| { |
| "epoch": 1.2333099086437105, |
| "grad_norm": 0.5387557557049685, |
| "learning_rate": 3.2083320108892026e-06, |
| "loss": 0.1577, |
| "step": 1755 |
| }, |
| { |
| "epoch": 1.2340126493323964, |
| "grad_norm": 0.5225873098719417, |
| "learning_rate": 3.203180308661965e-06, |
| "loss": 0.1482, |
| "step": 1756 |
| }, |
| { |
| "epoch": 1.234715390021082, |
| "grad_norm": 0.5402414597227396, |
| "learning_rate": 3.1980307958815852e-06, |
| "loss": 0.1256, |
| "step": 1757 |
| }, |
| { |
| "epoch": 1.235418130709768, |
| "grad_norm": 0.5706661096829184, |
| "learning_rate": 3.192883478822807e-06, |
| "loss": 0.1642, |
| "step": 1758 |
| }, |
| { |
| "epoch": 1.236120871398454, |
| "grad_norm": 0.5069769720641505, |
| "learning_rate": 3.187738363757698e-06, |
| "loss": 0.1331, |
| "step": 1759 |
| }, |
| { |
| "epoch": 1.2368236120871399, |
| "grad_norm": 0.5188196857915583, |
| "learning_rate": 3.182595456955644e-06, |
| "loss": 0.1438, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.2375263527758258, |
| "grad_norm": 0.505547435629863, |
| "learning_rate": 3.1774547646833407e-06, |
| "loss": 0.1345, |
| "step": 1761 |
| }, |
| { |
| "epoch": 1.2382290934645117, |
| "grad_norm": 0.4967289635160625, |
| "learning_rate": 3.172316293204787e-06, |
| "loss": 0.1279, |
| "step": 1762 |
| }, |
| { |
| "epoch": 1.2389318341531974, |
| "grad_norm": 0.5324187041156441, |
| "learning_rate": 3.1671800487812697e-06, |
| "loss": 0.1567, |
| "step": 1763 |
| }, |
| { |
| "epoch": 1.2396345748418833, |
| "grad_norm": 0.5192324122593844, |
| "learning_rate": 3.1620460376713668e-06, |
| "loss": 0.1606, |
| "step": 1764 |
| }, |
| { |
| "epoch": 1.2403373155305693, |
| "grad_norm": 0.5202902428884335, |
| "learning_rate": 3.156914266130935e-06, |
| "loss": 0.1482, |
| "step": 1765 |
| }, |
| { |
| "epoch": 1.2410400562192552, |
| "grad_norm": 0.5128178814546309, |
| "learning_rate": 3.1517847404131e-06, |
| "loss": 0.1429, |
| "step": 1766 |
| }, |
| { |
| "epoch": 1.2417427969079409, |
| "grad_norm": 0.5374214791148246, |
| "learning_rate": 3.1466574667682546e-06, |
| "loss": 0.147, |
| "step": 1767 |
| }, |
| { |
| "epoch": 1.2424455375966268, |
| "grad_norm": 0.4946429960253402, |
| "learning_rate": 3.1415324514440392e-06, |
| "loss": 0.1469, |
| "step": 1768 |
| }, |
| { |
| "epoch": 1.2431482782853127, |
| "grad_norm": 0.49213088843995484, |
| "learning_rate": 3.1364097006853523e-06, |
| "loss": 0.1359, |
| "step": 1769 |
| }, |
| { |
| "epoch": 1.2438510189739986, |
| "grad_norm": 0.5011419601118812, |
| "learning_rate": 3.131289220734327e-06, |
| "loss": 0.1345, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.2445537596626846, |
| "grad_norm": 0.5200319915187279, |
| "learning_rate": 3.1261710178303316e-06, |
| "loss": 0.1602, |
| "step": 1771 |
| }, |
| { |
| "epoch": 1.2452565003513703, |
| "grad_norm": 0.49248263319506386, |
| "learning_rate": 3.1210550982099596e-06, |
| "loss": 0.1235, |
| "step": 1772 |
| }, |
| { |
| "epoch": 1.2459592410400562, |
| "grad_norm": 0.5349869231670884, |
| "learning_rate": 3.115941468107021e-06, |
| "loss": 0.1698, |
| "step": 1773 |
| }, |
| { |
| "epoch": 1.246661981728742, |
| "grad_norm": 0.5263951000032334, |
| "learning_rate": 3.110830133752536e-06, |
| "loss": 0.1463, |
| "step": 1774 |
| }, |
| { |
| "epoch": 1.247364722417428, |
| "grad_norm": 0.5167900381423574, |
| "learning_rate": 3.1057211013747295e-06, |
| "loss": 0.1447, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.248067463106114, |
| "grad_norm": 0.5309999924089229, |
| "learning_rate": 3.1006143771990205e-06, |
| "loss": 0.1343, |
| "step": 1776 |
| }, |
| { |
| "epoch": 1.2487702037947996, |
| "grad_norm": 0.5150448538036977, |
| "learning_rate": 3.095509967448016e-06, |
| "loss": 0.1352, |
| "step": 1777 |
| }, |
| { |
| "epoch": 1.2494729444834856, |
| "grad_norm": 0.5262709024276062, |
| "learning_rate": 3.090407878341498e-06, |
| "loss": 0.1592, |
| "step": 1778 |
| }, |
| { |
| "epoch": 1.2501756851721715, |
| "grad_norm": 0.5302743651945259, |
| "learning_rate": 3.085308116096428e-06, |
| "loss": 0.1397, |
| "step": 1779 |
| }, |
| { |
| "epoch": 1.2508784258608574, |
| "grad_norm": 0.5427861797478236, |
| "learning_rate": 3.080210686926928e-06, |
| "loss": 0.1528, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.2515811665495433, |
| "grad_norm": 0.5154882831721702, |
| "learning_rate": 3.0751155970442792e-06, |
| "loss": 0.1434, |
| "step": 1781 |
| }, |
| { |
| "epoch": 1.252283907238229, |
| "grad_norm": 0.5251446781371032, |
| "learning_rate": 3.070022852656911e-06, |
| "loss": 0.146, |
| "step": 1782 |
| }, |
| { |
| "epoch": 1.252986647926915, |
| "grad_norm": 0.5099873463651408, |
| "learning_rate": 3.0649324599703933e-06, |
| "loss": 0.1328, |
| "step": 1783 |
| }, |
| { |
| "epoch": 1.2536893886156009, |
| "grad_norm": 0.49880544349781974, |
| "learning_rate": 3.0598444251874315e-06, |
| "loss": 0.1326, |
| "step": 1784 |
| }, |
| { |
| "epoch": 1.2543921293042868, |
| "grad_norm": 0.498993602346327, |
| "learning_rate": 3.0547587545078615e-06, |
| "loss": 0.1301, |
| "step": 1785 |
| }, |
| { |
| "epoch": 1.2550948699929725, |
| "grad_norm": 0.5057816731207657, |
| "learning_rate": 3.0496754541286346e-06, |
| "loss": 0.1464, |
| "step": 1786 |
| }, |
| { |
| "epoch": 1.2557976106816584, |
| "grad_norm": 0.5371622700474783, |
| "learning_rate": 3.044594530243813e-06, |
| "loss": 0.1581, |
| "step": 1787 |
| }, |
| { |
| "epoch": 1.2565003513703443, |
| "grad_norm": 0.5341064067042999, |
| "learning_rate": 3.0395159890445647e-06, |
| "loss": 0.1506, |
| "step": 1788 |
| }, |
| { |
| "epoch": 1.2572030920590302, |
| "grad_norm": 0.5271349251255161, |
| "learning_rate": 3.0344398367191574e-06, |
| "loss": 0.138, |
| "step": 1789 |
| }, |
| { |
| "epoch": 1.2579058327477162, |
| "grad_norm": 0.4824505775115255, |
| "learning_rate": 3.029366079452943e-06, |
| "loss": 0.1211, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.258608573436402, |
| "grad_norm": 0.5545474070295585, |
| "learning_rate": 3.024294723428358e-06, |
| "loss": 0.1749, |
| "step": 1791 |
| }, |
| { |
| "epoch": 1.2593113141250878, |
| "grad_norm": 0.5067819128272134, |
| "learning_rate": 3.0192257748249097e-06, |
| "loss": 0.1347, |
| "step": 1792 |
| }, |
| { |
| "epoch": 1.2600140548137737, |
| "grad_norm": 0.5146999127963285, |
| "learning_rate": 3.0141592398191765e-06, |
| "loss": 0.1481, |
| "step": 1793 |
| }, |
| { |
| "epoch": 1.2607167955024596, |
| "grad_norm": 0.4993235127306429, |
| "learning_rate": 3.009095124584792e-06, |
| "loss": 0.139, |
| "step": 1794 |
| }, |
| { |
| "epoch": 1.2614195361911453, |
| "grad_norm": 0.5667651682034487, |
| "learning_rate": 3.004033435292445e-06, |
| "loss": 0.185, |
| "step": 1795 |
| }, |
| { |
| "epoch": 1.2621222768798313, |
| "grad_norm": 0.5462740967024108, |
| "learning_rate": 2.9989741781098654e-06, |
| "loss": 0.1348, |
| "step": 1796 |
| }, |
| { |
| "epoch": 1.2628250175685172, |
| "grad_norm": 0.5416746386665264, |
| "learning_rate": 2.9939173592018185e-06, |
| "loss": 0.1603, |
| "step": 1797 |
| }, |
| { |
| "epoch": 1.263527758257203, |
| "grad_norm": 0.4922679741577877, |
| "learning_rate": 2.9888629847301e-06, |
| "loss": 0.1345, |
| "step": 1798 |
| }, |
| { |
| "epoch": 1.264230498945889, |
| "grad_norm": 0.511441166644177, |
| "learning_rate": 2.9838110608535297e-06, |
| "loss": 0.1309, |
| "step": 1799 |
| }, |
| { |
| "epoch": 1.264933239634575, |
| "grad_norm": 0.5120913001962054, |
| "learning_rate": 2.978761593727938e-06, |
| "loss": 0.1367, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.2656359803232606, |
| "grad_norm": 0.5095120682563343, |
| "learning_rate": 2.9737145895061626e-06, |
| "loss": 0.14, |
| "step": 1801 |
| }, |
| { |
| "epoch": 1.2663387210119466, |
| "grad_norm": 0.47522805017685915, |
| "learning_rate": 2.9686700543380386e-06, |
| "loss": 0.1199, |
| "step": 1802 |
| }, |
| { |
| "epoch": 1.2670414617006325, |
| "grad_norm": 0.5224128281661045, |
| "learning_rate": 2.9636279943703956e-06, |
| "loss": 0.1478, |
| "step": 1803 |
| }, |
| { |
| "epoch": 1.2677442023893184, |
| "grad_norm": 0.5645184496216876, |
| "learning_rate": 2.9585884157470457e-06, |
| "loss": 0.1584, |
| "step": 1804 |
| }, |
| { |
| "epoch": 1.268446943078004, |
| "grad_norm": 0.5007962250263087, |
| "learning_rate": 2.953551324608775e-06, |
| "loss": 0.1453, |
| "step": 1805 |
| }, |
| { |
| "epoch": 1.26914968376669, |
| "grad_norm": 0.5030706513703921, |
| "learning_rate": 2.948516727093345e-06, |
| "loss": 0.1431, |
| "step": 1806 |
| }, |
| { |
| "epoch": 1.269852424455376, |
| "grad_norm": 0.5115034601876566, |
| "learning_rate": 2.943484629335471e-06, |
| "loss": 0.1399, |
| "step": 1807 |
| }, |
| { |
| "epoch": 1.2705551651440619, |
| "grad_norm": 0.5306475619859111, |
| "learning_rate": 2.9384550374668276e-06, |
| "loss": 0.1682, |
| "step": 1808 |
| }, |
| { |
| "epoch": 1.2712579058327478, |
| "grad_norm": 0.5163969175891816, |
| "learning_rate": 2.933427957616034e-06, |
| "loss": 0.1425, |
| "step": 1809 |
| }, |
| { |
| "epoch": 1.2719606465214337, |
| "grad_norm": 0.5094171802140397, |
| "learning_rate": 2.9284033959086494e-06, |
| "loss": 0.1446, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.2726633872101194, |
| "grad_norm": 0.5177997618489504, |
| "learning_rate": 2.923381358467162e-06, |
| "loss": 0.1553, |
| "step": 1811 |
| }, |
| { |
| "epoch": 1.2733661278988053, |
| "grad_norm": 0.5433548572173907, |
| "learning_rate": 2.918361851410987e-06, |
| "loss": 0.1416, |
| "step": 1812 |
| }, |
| { |
| "epoch": 1.2740688685874912, |
| "grad_norm": 0.5004983199100684, |
| "learning_rate": 2.9133448808564556e-06, |
| "loss": 0.128, |
| "step": 1813 |
| }, |
| { |
| "epoch": 1.2747716092761772, |
| "grad_norm": 0.503883415450621, |
| "learning_rate": 2.9083304529168087e-06, |
| "loss": 0.1483, |
| "step": 1814 |
| }, |
| { |
| "epoch": 1.2754743499648629, |
| "grad_norm": 0.5679390165546186, |
| "learning_rate": 2.9033185737021875e-06, |
| "loss": 0.17, |
| "step": 1815 |
| }, |
| { |
| "epoch": 1.2761770906535488, |
| "grad_norm": 0.5465055047033353, |
| "learning_rate": 2.8983092493196286e-06, |
| "loss": 0.1703, |
| "step": 1816 |
| }, |
| { |
| "epoch": 1.2768798313422347, |
| "grad_norm": 0.4787604232572746, |
| "learning_rate": 2.8933024858730546e-06, |
| "loss": 0.1151, |
| "step": 1817 |
| }, |
| { |
| "epoch": 1.2775825720309206, |
| "grad_norm": 0.5174270003066762, |
| "learning_rate": 2.8882982894632694e-06, |
| "loss": 0.1575, |
| "step": 1818 |
| }, |
| { |
| "epoch": 1.2782853127196065, |
| "grad_norm": 0.519352188838613, |
| "learning_rate": 2.883296666187947e-06, |
| "loss": 0.1408, |
| "step": 1819 |
| }, |
| { |
| "epoch": 1.2789880534082925, |
| "grad_norm": 0.5331544627649402, |
| "learning_rate": 2.8782976221416265e-06, |
| "loss": 0.1569, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.2796907940969782, |
| "grad_norm": 0.5190193086008424, |
| "learning_rate": 2.873301163415705e-06, |
| "loss": 0.1461, |
| "step": 1821 |
| }, |
| { |
| "epoch": 1.280393534785664, |
| "grad_norm": 0.531637624498204, |
| "learning_rate": 2.8683072960984294e-06, |
| "loss": 0.1499, |
| "step": 1822 |
| }, |
| { |
| "epoch": 1.28109627547435, |
| "grad_norm": 0.4985066831209329, |
| "learning_rate": 2.8633160262748873e-06, |
| "loss": 0.1377, |
| "step": 1823 |
| }, |
| { |
| "epoch": 1.2817990161630357, |
| "grad_norm": 0.5495919052999558, |
| "learning_rate": 2.858327360027e-06, |
| "loss": 0.176, |
| "step": 1824 |
| }, |
| { |
| "epoch": 1.2825017568517216, |
| "grad_norm": 0.503827728446379, |
| "learning_rate": 2.8533413034335257e-06, |
| "loss": 0.1358, |
| "step": 1825 |
| }, |
| { |
| "epoch": 1.2832044975404076, |
| "grad_norm": 0.49803838389907806, |
| "learning_rate": 2.8483578625700286e-06, |
| "loss": 0.1373, |
| "step": 1826 |
| }, |
| { |
| "epoch": 1.2839072382290935, |
| "grad_norm": 0.49609765850369625, |
| "learning_rate": 2.8433770435088957e-06, |
| "loss": 0.1435, |
| "step": 1827 |
| }, |
| { |
| "epoch": 1.2846099789177794, |
| "grad_norm": 0.48624878597669635, |
| "learning_rate": 2.838398852319313e-06, |
| "loss": 0.1316, |
| "step": 1828 |
| }, |
| { |
| "epoch": 1.2853127196064653, |
| "grad_norm": 0.5429732189301352, |
| "learning_rate": 2.8334232950672724e-06, |
| "loss": 0.1507, |
| "step": 1829 |
| }, |
| { |
| "epoch": 1.286015460295151, |
| "grad_norm": 0.5469891683215277, |
| "learning_rate": 2.8284503778155513e-06, |
| "loss": 0.1751, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.286718200983837, |
| "grad_norm": 0.4824278437036638, |
| "learning_rate": 2.823480106623704e-06, |
| "loss": 0.1217, |
| "step": 1831 |
| }, |
| { |
| "epoch": 1.2874209416725229, |
| "grad_norm": 0.5155410308993346, |
| "learning_rate": 2.8185124875480742e-06, |
| "loss": 0.1347, |
| "step": 1832 |
| }, |
| { |
| "epoch": 1.2881236823612088, |
| "grad_norm": 0.4668465729229129, |
| "learning_rate": 2.8135475266417626e-06, |
| "loss": 0.1202, |
| "step": 1833 |
| }, |
| { |
| "epoch": 1.2888264230498945, |
| "grad_norm": 0.49186559366844784, |
| "learning_rate": 2.808585229954637e-06, |
| "loss": 0.135, |
| "step": 1834 |
| }, |
| { |
| "epoch": 1.2895291637385804, |
| "grad_norm": 0.5058080898482784, |
| "learning_rate": 2.803625603533316e-06, |
| "loss": 0.1415, |
| "step": 1835 |
| }, |
| { |
| "epoch": 1.2902319044272663, |
| "grad_norm": 0.5125628233669447, |
| "learning_rate": 2.7986686534211656e-06, |
| "loss": 0.1228, |
| "step": 1836 |
| }, |
| { |
| "epoch": 1.2909346451159522, |
| "grad_norm": 0.522505002650681, |
| "learning_rate": 2.79371438565829e-06, |
| "loss": 0.1449, |
| "step": 1837 |
| }, |
| { |
| "epoch": 1.2916373858046382, |
| "grad_norm": 0.503912487637843, |
| "learning_rate": 2.7887628062815252e-06, |
| "loss": 0.1372, |
| "step": 1838 |
| }, |
| { |
| "epoch": 1.292340126493324, |
| "grad_norm": 0.5069732877673296, |
| "learning_rate": 2.7838139213244318e-06, |
| "loss": 0.1388, |
| "step": 1839 |
| }, |
| { |
| "epoch": 1.2930428671820098, |
| "grad_norm": 0.5250585361768124, |
| "learning_rate": 2.7788677368172877e-06, |
| "loss": 0.1605, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.2937456078706957, |
| "grad_norm": 0.5263704515292615, |
| "learning_rate": 2.7739242587870786e-06, |
| "loss": 0.1657, |
| "step": 1841 |
| }, |
| { |
| "epoch": 1.2944483485593816, |
| "grad_norm": 0.5165185021883313, |
| "learning_rate": 2.7689834932574923e-06, |
| "loss": 0.147, |
| "step": 1842 |
| }, |
| { |
| "epoch": 1.2951510892480675, |
| "grad_norm": 0.5270573489356727, |
| "learning_rate": 2.764045446248913e-06, |
| "loss": 0.16, |
| "step": 1843 |
| }, |
| { |
| "epoch": 1.2958538299367532, |
| "grad_norm": 0.5429711458873587, |
| "learning_rate": 2.7591101237784122e-06, |
| "loss": 0.1653, |
| "step": 1844 |
| }, |
| { |
| "epoch": 1.2965565706254392, |
| "grad_norm": 0.5095626117090168, |
| "learning_rate": 2.7541775318597407e-06, |
| "loss": 0.1328, |
| "step": 1845 |
| }, |
| { |
| "epoch": 1.297259311314125, |
| "grad_norm": 0.5076499802471852, |
| "learning_rate": 2.7492476765033227e-06, |
| "loss": 0.1485, |
| "step": 1846 |
| }, |
| { |
| "epoch": 1.297962052002811, |
| "grad_norm": 0.5427967731010709, |
| "learning_rate": 2.7443205637162463e-06, |
| "loss": 0.1679, |
| "step": 1847 |
| }, |
| { |
| "epoch": 1.298664792691497, |
| "grad_norm": 0.5308997772141762, |
| "learning_rate": 2.7393961995022565e-06, |
| "loss": 0.1466, |
| "step": 1848 |
| }, |
| { |
| "epoch": 1.2993675333801828, |
| "grad_norm": 0.49043902053039395, |
| "learning_rate": 2.7344745898617598e-06, |
| "loss": 0.1309, |
| "step": 1849 |
| }, |
| { |
| "epoch": 1.3000702740688685, |
| "grad_norm": 0.5195631188883199, |
| "learning_rate": 2.7295557407917904e-06, |
| "loss": 0.1525, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.3007730147575545, |
| "grad_norm": 0.5009791984310343, |
| "learning_rate": 2.7246396582860293e-06, |
| "loss": 0.1419, |
| "step": 1851 |
| }, |
| { |
| "epoch": 1.3014757554462404, |
| "grad_norm": 0.5067602717562613, |
| "learning_rate": 2.71972634833478e-06, |
| "loss": 0.1374, |
| "step": 1852 |
| }, |
| { |
| "epoch": 1.302178496134926, |
| "grad_norm": 0.5374645736356453, |
| "learning_rate": 2.7148158169249757e-06, |
| "loss": 0.1355, |
| "step": 1853 |
| }, |
| { |
| "epoch": 1.302881236823612, |
| "grad_norm": 0.5224103943507988, |
| "learning_rate": 2.709908070040159e-06, |
| "loss": 0.1454, |
| "step": 1854 |
| }, |
| { |
| "epoch": 1.303583977512298, |
| "grad_norm": 0.5032174206767985, |
| "learning_rate": 2.705003113660477e-06, |
| "loss": 0.1415, |
| "step": 1855 |
| }, |
| { |
| "epoch": 1.3042867182009839, |
| "grad_norm": 0.534616859772977, |
| "learning_rate": 2.7001009537626775e-06, |
| "loss": 0.1442, |
| "step": 1856 |
| }, |
| { |
| "epoch": 1.3049894588896698, |
| "grad_norm": 0.5627581840537774, |
| "learning_rate": 2.695201596320107e-06, |
| "loss": 0.1728, |
| "step": 1857 |
| }, |
| { |
| "epoch": 1.3056921995783557, |
| "grad_norm": 0.5080008680203384, |
| "learning_rate": 2.690305047302692e-06, |
| "loss": 0.1506, |
| "step": 1858 |
| }, |
| { |
| "epoch": 1.3063949402670414, |
| "grad_norm": 0.5187284564546811, |
| "learning_rate": 2.685411312676936e-06, |
| "loss": 0.1391, |
| "step": 1859 |
| }, |
| { |
| "epoch": 1.3070976809557273, |
| "grad_norm": 0.5112305622849427, |
| "learning_rate": 2.6805203984059156e-06, |
| "loss": 0.145, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.3078004216444132, |
| "grad_norm": 0.5429219613865652, |
| "learning_rate": 2.67563231044927e-06, |
| "loss": 0.154, |
| "step": 1861 |
| }, |
| { |
| "epoch": 1.3085031623330992, |
| "grad_norm": 0.4981918212569302, |
| "learning_rate": 2.670747054763193e-06, |
| "loss": 0.1325, |
| "step": 1862 |
| }, |
| { |
| "epoch": 1.3092059030217849, |
| "grad_norm": 0.5156778705541998, |
| "learning_rate": 2.6658646373004304e-06, |
| "loss": 0.1544, |
| "step": 1863 |
| }, |
| { |
| "epoch": 1.3099086437104708, |
| "grad_norm": 0.5002674697829435, |
| "learning_rate": 2.6609850640102665e-06, |
| "loss": 0.1314, |
| "step": 1864 |
| }, |
| { |
| "epoch": 1.3106113843991567, |
| "grad_norm": 0.5543015508160836, |
| "learning_rate": 2.6561083408385224e-06, |
| "loss": 0.1692, |
| "step": 1865 |
| }, |
| { |
| "epoch": 1.3113141250878426, |
| "grad_norm": 0.5619325590564459, |
| "learning_rate": 2.6512344737275443e-06, |
| "loss": 0.1464, |
| "step": 1866 |
| }, |
| { |
| "epoch": 1.3120168657765285, |
| "grad_norm": 0.5237448611265626, |
| "learning_rate": 2.6463634686161998e-06, |
| "loss": 0.1462, |
| "step": 1867 |
| }, |
| { |
| "epoch": 1.3127196064652145, |
| "grad_norm": 0.5403930457832352, |
| "learning_rate": 2.6414953314398673e-06, |
| "loss": 0.1669, |
| "step": 1868 |
| }, |
| { |
| "epoch": 1.3134223471539002, |
| "grad_norm": 0.5220596141331452, |
| "learning_rate": 2.6366300681304334e-06, |
| "loss": 0.1437, |
| "step": 1869 |
| }, |
| { |
| "epoch": 1.314125087842586, |
| "grad_norm": 0.5361348116943498, |
| "learning_rate": 2.63176768461628e-06, |
| "loss": 0.1694, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.314827828531272, |
| "grad_norm": 0.5110847626454531, |
| "learning_rate": 2.6269081868222814e-06, |
| "loss": 0.1377, |
| "step": 1871 |
| }, |
| { |
| "epoch": 1.3155305692199577, |
| "grad_norm": 0.4842268666664499, |
| "learning_rate": 2.6220515806697934e-06, |
| "loss": 0.1245, |
| "step": 1872 |
| }, |
| { |
| "epoch": 1.3162333099086436, |
| "grad_norm": 0.5160972703016548, |
| "learning_rate": 2.6171978720766557e-06, |
| "loss": 0.1328, |
| "step": 1873 |
| }, |
| { |
| "epoch": 1.3169360505973295, |
| "grad_norm": 0.548131436737344, |
| "learning_rate": 2.6123470669571665e-06, |
| "loss": 0.1779, |
| "step": 1874 |
| }, |
| { |
| "epoch": 1.3176387912860155, |
| "grad_norm": 0.5573921571705782, |
| "learning_rate": 2.607499171222093e-06, |
| "loss": 0.1561, |
| "step": 1875 |
| }, |
| { |
| "epoch": 1.3183415319747014, |
| "grad_norm": 0.5272568688464775, |
| "learning_rate": 2.602654190778654e-06, |
| "loss": 0.1517, |
| "step": 1876 |
| }, |
| { |
| "epoch": 1.3190442726633873, |
| "grad_norm": 0.4901855937914321, |
| "learning_rate": 2.5978121315305217e-06, |
| "loss": 0.129, |
| "step": 1877 |
| }, |
| { |
| "epoch": 1.319747013352073, |
| "grad_norm": 0.5216856895424252, |
| "learning_rate": 2.5929729993778046e-06, |
| "loss": 0.1467, |
| "step": 1878 |
| }, |
| { |
| "epoch": 1.320449754040759, |
| "grad_norm": 0.5060265023823881, |
| "learning_rate": 2.5881368002170403e-06, |
| "loss": 0.1538, |
| "step": 1879 |
| }, |
| { |
| "epoch": 1.3211524947294448, |
| "grad_norm": 0.5207186271810702, |
| "learning_rate": 2.5833035399411977e-06, |
| "loss": 0.1483, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.3218552354181308, |
| "grad_norm": 0.5369159344507257, |
| "learning_rate": 2.5784732244396667e-06, |
| "loss": 0.1662, |
| "step": 1881 |
| }, |
| { |
| "epoch": 1.3225579761068165, |
| "grad_norm": 0.5485861290717046, |
| "learning_rate": 2.573645859598245e-06, |
| "loss": 0.1784, |
| "step": 1882 |
| }, |
| { |
| "epoch": 1.3232607167955024, |
| "grad_norm": 0.5169582374334584, |
| "learning_rate": 2.568821451299135e-06, |
| "loss": 0.1691, |
| "step": 1883 |
| }, |
| { |
| "epoch": 1.3239634574841883, |
| "grad_norm": 0.4888073677651971, |
| "learning_rate": 2.564000005420938e-06, |
| "loss": 0.1256, |
| "step": 1884 |
| }, |
| { |
| "epoch": 1.3246661981728742, |
| "grad_norm": 0.5325076620163575, |
| "learning_rate": 2.5591815278386456e-06, |
| "loss": 0.1506, |
| "step": 1885 |
| }, |
| { |
| "epoch": 1.3253689388615602, |
| "grad_norm": 0.49725808885086276, |
| "learning_rate": 2.554366024423631e-06, |
| "loss": 0.1425, |
| "step": 1886 |
| }, |
| { |
| "epoch": 1.326071679550246, |
| "grad_norm": 0.5168771320310778, |
| "learning_rate": 2.5495535010436445e-06, |
| "loss": 0.1499, |
| "step": 1887 |
| }, |
| { |
| "epoch": 1.3267744202389318, |
| "grad_norm": 0.5065760057974372, |
| "learning_rate": 2.5447439635628046e-06, |
| "loss": 0.1416, |
| "step": 1888 |
| }, |
| { |
| "epoch": 1.3274771609276177, |
| "grad_norm": 0.5036549891249, |
| "learning_rate": 2.5399374178415926e-06, |
| "loss": 0.1416, |
| "step": 1889 |
| }, |
| { |
| "epoch": 1.3281799016163036, |
| "grad_norm": 0.5242118469333106, |
| "learning_rate": 2.535133869736842e-06, |
| "loss": 0.1608, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.3288826423049895, |
| "grad_norm": 0.5123139889811016, |
| "learning_rate": 2.5303333251017378e-06, |
| "loss": 0.1302, |
| "step": 1891 |
| }, |
| { |
| "epoch": 1.3295853829936752, |
| "grad_norm": 0.527324699306745, |
| "learning_rate": 2.5255357897857996e-06, |
| "loss": 0.1553, |
| "step": 1892 |
| }, |
| { |
| "epoch": 1.3302881236823612, |
| "grad_norm": 0.5460335838857496, |
| "learning_rate": 2.5207412696348854e-06, |
| "loss": 0.1679, |
| "step": 1893 |
| }, |
| { |
| "epoch": 1.330990864371047, |
| "grad_norm": 0.4880081828121528, |
| "learning_rate": 2.515949770491175e-06, |
| "loss": 0.133, |
| "step": 1894 |
| }, |
| { |
| "epoch": 1.331693605059733, |
| "grad_norm": 0.511063147112203, |
| "learning_rate": 2.51116129819317e-06, |
| "loss": 0.1552, |
| "step": 1895 |
| }, |
| { |
| "epoch": 1.332396345748419, |
| "grad_norm": 0.5570249105151223, |
| "learning_rate": 2.5063758585756814e-06, |
| "loss": 0.1706, |
| "step": 1896 |
| }, |
| { |
| "epoch": 1.3330990864371048, |
| "grad_norm": 0.5281127442022048, |
| "learning_rate": 2.5015934574698303e-06, |
| "loss": 0.1461, |
| "step": 1897 |
| }, |
| { |
| "epoch": 1.3338018271257905, |
| "grad_norm": 0.5323432905157081, |
| "learning_rate": 2.496814100703026e-06, |
| "loss": 0.1379, |
| "step": 1898 |
| }, |
| { |
| "epoch": 1.3345045678144765, |
| "grad_norm": 0.5044474529457625, |
| "learning_rate": 2.4920377940989763e-06, |
| "loss": 0.1298, |
| "step": 1899 |
| }, |
| { |
| "epoch": 1.3352073085031624, |
| "grad_norm": 0.5020068606866582, |
| "learning_rate": 2.4872645434776666e-06, |
| "loss": 0.1463, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.335910049191848, |
| "grad_norm": 0.5295400057005242, |
| "learning_rate": 2.4824943546553646e-06, |
| "loss": 0.1493, |
| "step": 1901 |
| }, |
| { |
| "epoch": 1.336612789880534, |
| "grad_norm": 0.5162367748874149, |
| "learning_rate": 2.4777272334446055e-06, |
| "loss": 0.141, |
| "step": 1902 |
| }, |
| { |
| "epoch": 1.33731553056922, |
| "grad_norm": 0.5146721336573555, |
| "learning_rate": 2.472963185654181e-06, |
| "loss": 0.1491, |
| "step": 1903 |
| }, |
| { |
| "epoch": 1.3380182712579058, |
| "grad_norm": 0.5344070480631355, |
| "learning_rate": 2.4682022170891403e-06, |
| "loss": 0.1569, |
| "step": 1904 |
| }, |
| { |
| "epoch": 1.3387210119465918, |
| "grad_norm": 0.4998274905470932, |
| "learning_rate": 2.4634443335507868e-06, |
| "loss": 0.1442, |
| "step": 1905 |
| }, |
| { |
| "epoch": 1.3394237526352777, |
| "grad_norm": 0.5097557411992474, |
| "learning_rate": 2.4586895408366585e-06, |
| "loss": 0.1418, |
| "step": 1906 |
| }, |
| { |
| "epoch": 1.3401264933239634, |
| "grad_norm": 0.5170662896606593, |
| "learning_rate": 2.45393784474053e-06, |
| "loss": 0.1516, |
| "step": 1907 |
| }, |
| { |
| "epoch": 1.3408292340126493, |
| "grad_norm": 0.5242448072331167, |
| "learning_rate": 2.449189251052396e-06, |
| "loss": 0.1368, |
| "step": 1908 |
| }, |
| { |
| "epoch": 1.3415319747013352, |
| "grad_norm": 0.5333176246201976, |
| "learning_rate": 2.444443765558482e-06, |
| "loss": 0.1274, |
| "step": 1909 |
| }, |
| { |
| "epoch": 1.3422347153900211, |
| "grad_norm": 0.49863542852206516, |
| "learning_rate": 2.4397013940412178e-06, |
| "loss": 0.1309, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.3429374560787068, |
| "grad_norm": 0.5200347853432555, |
| "learning_rate": 2.434962142279242e-06, |
| "loss": 0.1535, |
| "step": 1911 |
| }, |
| { |
| "epoch": 1.3436401967673928, |
| "grad_norm": 0.5197849832074735, |
| "learning_rate": 2.4302260160473906e-06, |
| "loss": 0.1558, |
| "step": 1912 |
| }, |
| { |
| "epoch": 1.3443429374560787, |
| "grad_norm": 0.5042674561742618, |
| "learning_rate": 2.4254930211166922e-06, |
| "loss": 0.143, |
| "step": 1913 |
| }, |
| { |
| "epoch": 1.3450456781447646, |
| "grad_norm": 0.4941678205162263, |
| "learning_rate": 2.420763163254359e-06, |
| "loss": 0.1504, |
| "step": 1914 |
| }, |
| { |
| "epoch": 1.3457484188334505, |
| "grad_norm": 0.4900796733037519, |
| "learning_rate": 2.4160364482237797e-06, |
| "loss": 0.126, |
| "step": 1915 |
| }, |
| { |
| "epoch": 1.3464511595221365, |
| "grad_norm": 0.4963392642038735, |
| "learning_rate": 2.4113128817845165e-06, |
| "loss": 0.1303, |
| "step": 1916 |
| }, |
| { |
| "epoch": 1.3471539002108222, |
| "grad_norm": 0.5327815129273105, |
| "learning_rate": 2.406592469692292e-06, |
| "loss": 0.1566, |
| "step": 1917 |
| }, |
| { |
| "epoch": 1.347856640899508, |
| "grad_norm": 0.5657132844042618, |
| "learning_rate": 2.4018752176989864e-06, |
| "loss": 0.1779, |
| "step": 1918 |
| }, |
| { |
| "epoch": 1.348559381588194, |
| "grad_norm": 0.5462692424625433, |
| "learning_rate": 2.3971611315526295e-06, |
| "loss": 0.1661, |
| "step": 1919 |
| }, |
| { |
| "epoch": 1.3492621222768797, |
| "grad_norm": 0.4845535800498534, |
| "learning_rate": 2.392450216997391e-06, |
| "loss": 0.12, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.3499648629655656, |
| "grad_norm": 0.5064621748609605, |
| "learning_rate": 2.3877424797735834e-06, |
| "loss": 0.1217, |
| "step": 1921 |
| }, |
| { |
| "epoch": 1.3506676036542515, |
| "grad_norm": 0.5419331329012272, |
| "learning_rate": 2.383037925617637e-06, |
| "loss": 0.1563, |
| "step": 1922 |
| }, |
| { |
| "epoch": 1.3513703443429375, |
| "grad_norm": 0.525351328602467, |
| "learning_rate": 2.3783365602621116e-06, |
| "loss": 0.1524, |
| "step": 1923 |
| }, |
| { |
| "epoch": 1.3520730850316234, |
| "grad_norm": 0.49639153719673085, |
| "learning_rate": 2.373638389435676e-06, |
| "loss": 0.1411, |
| "step": 1924 |
| }, |
| { |
| "epoch": 1.3527758257203093, |
| "grad_norm": 0.5222993820698251, |
| "learning_rate": 2.368943418863112e-06, |
| "loss": 0.1352, |
| "step": 1925 |
| }, |
| { |
| "epoch": 1.353478566408995, |
| "grad_norm": 0.5061532595194184, |
| "learning_rate": 2.3642516542652993e-06, |
| "loss": 0.1494, |
| "step": 1926 |
| }, |
| { |
| "epoch": 1.354181307097681, |
| "grad_norm": 0.5248954075948614, |
| "learning_rate": 2.359563101359208e-06, |
| "loss": 0.1577, |
| "step": 1927 |
| }, |
| { |
| "epoch": 1.3548840477863668, |
| "grad_norm": 0.49574925810066905, |
| "learning_rate": 2.3548777658578964e-06, |
| "loss": 0.1218, |
| "step": 1928 |
| }, |
| { |
| "epoch": 1.3555867884750528, |
| "grad_norm": 0.5576434451325147, |
| "learning_rate": 2.350195653470507e-06, |
| "loss": 0.1479, |
| "step": 1929 |
| }, |
| { |
| "epoch": 1.3562895291637385, |
| "grad_norm": 0.4715912767260302, |
| "learning_rate": 2.3455167699022497e-06, |
| "loss": 0.1114, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.3569922698524244, |
| "grad_norm": 0.4724305197988123, |
| "learning_rate": 2.3408411208544036e-06, |
| "loss": 0.1255, |
| "step": 1931 |
| }, |
| { |
| "epoch": 1.3576950105411103, |
| "grad_norm": 0.5188745796075134, |
| "learning_rate": 2.3361687120242986e-06, |
| "loss": 0.1288, |
| "step": 1932 |
| }, |
| { |
| "epoch": 1.3583977512297962, |
| "grad_norm": 0.5248266634453557, |
| "learning_rate": 2.331499549105328e-06, |
| "loss": 0.1532, |
| "step": 1933 |
| }, |
| { |
| "epoch": 1.3591004919184821, |
| "grad_norm": 0.5304302761744716, |
| "learning_rate": 2.3268336377869222e-06, |
| "loss": 0.1419, |
| "step": 1934 |
| }, |
| { |
| "epoch": 1.359803232607168, |
| "grad_norm": 0.5324957027484029, |
| "learning_rate": 2.322170983754553e-06, |
| "loss": 0.1384, |
| "step": 1935 |
| }, |
| { |
| "epoch": 1.3605059732958538, |
| "grad_norm": 0.5532695221140957, |
| "learning_rate": 2.3175115926897164e-06, |
| "loss": 0.156, |
| "step": 1936 |
| }, |
| { |
| "epoch": 1.3612087139845397, |
| "grad_norm": 0.5068068080227023, |
| "learning_rate": 2.312855470269943e-06, |
| "loss": 0.1377, |
| "step": 1937 |
| }, |
| { |
| "epoch": 1.3619114546732256, |
| "grad_norm": 0.5453469673278214, |
| "learning_rate": 2.3082026221687736e-06, |
| "loss": 0.1801, |
| "step": 1938 |
| }, |
| { |
| "epoch": 1.3626141953619115, |
| "grad_norm": 0.5265671485284429, |
| "learning_rate": 2.3035530540557606e-06, |
| "loss": 0.1492, |
| "step": 1939 |
| }, |
| { |
| "epoch": 1.3633169360505972, |
| "grad_norm": 0.49139032579352454, |
| "learning_rate": 2.2989067715964592e-06, |
| "loss": 0.1321, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.3640196767392831, |
| "grad_norm": 0.529716442358963, |
| "learning_rate": 2.2942637804524224e-06, |
| "loss": 0.1463, |
| "step": 1941 |
| }, |
| { |
| "epoch": 1.364722417427969, |
| "grad_norm": 0.5209589657201931, |
| "learning_rate": 2.289624086281192e-06, |
| "loss": 0.1479, |
| "step": 1942 |
| }, |
| { |
| "epoch": 1.365425158116655, |
| "grad_norm": 0.5016577040642886, |
| "learning_rate": 2.2849876947362916e-06, |
| "loss": 0.1331, |
| "step": 1943 |
| }, |
| { |
| "epoch": 1.366127898805341, |
| "grad_norm": 0.539055167208763, |
| "learning_rate": 2.28035461146722e-06, |
| "loss": 0.1647, |
| "step": 1944 |
| }, |
| { |
| "epoch": 1.3668306394940268, |
| "grad_norm": 0.5261244711392749, |
| "learning_rate": 2.275724842119451e-06, |
| "loss": 0.1432, |
| "step": 1945 |
| }, |
| { |
| "epoch": 1.3675333801827125, |
| "grad_norm": 0.5568764543776502, |
| "learning_rate": 2.2710983923344106e-06, |
| "loss": 0.1685, |
| "step": 1946 |
| }, |
| { |
| "epoch": 1.3682361208713985, |
| "grad_norm": 0.49062271894554804, |
| "learning_rate": 2.266475267749486e-06, |
| "loss": 0.1177, |
| "step": 1947 |
| }, |
| { |
| "epoch": 1.3689388615600844, |
| "grad_norm": 0.48696733638864986, |
| "learning_rate": 2.26185547399801e-06, |
| "loss": 0.1153, |
| "step": 1948 |
| }, |
| { |
| "epoch": 1.36964160224877, |
| "grad_norm": 0.49560405544084934, |
| "learning_rate": 2.2572390167092607e-06, |
| "loss": 0.1426, |
| "step": 1949 |
| }, |
| { |
| "epoch": 1.370344342937456, |
| "grad_norm": 0.5301140856189038, |
| "learning_rate": 2.252625901508449e-06, |
| "loss": 0.1578, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.371047083626142, |
| "grad_norm": 0.5093655282853022, |
| "learning_rate": 2.248016134016708e-06, |
| "loss": 0.1453, |
| "step": 1951 |
| }, |
| { |
| "epoch": 1.3717498243148278, |
| "grad_norm": 0.543262050045053, |
| "learning_rate": 2.2434097198510964e-06, |
| "loss": 0.1525, |
| "step": 1952 |
| }, |
| { |
| "epoch": 1.3724525650035138, |
| "grad_norm": 0.5265051707386406, |
| "learning_rate": 2.2388066646245895e-06, |
| "loss": 0.1522, |
| "step": 1953 |
| }, |
| { |
| "epoch": 1.3731553056921997, |
| "grad_norm": 0.5335276742676673, |
| "learning_rate": 2.2342069739460654e-06, |
| "loss": 0.1393, |
| "step": 1954 |
| }, |
| { |
| "epoch": 1.3738580463808854, |
| "grad_norm": 0.494281111058332, |
| "learning_rate": 2.229610653420306e-06, |
| "loss": 0.1372, |
| "step": 1955 |
| }, |
| { |
| "epoch": 1.3745607870695713, |
| "grad_norm": 0.4913495541219822, |
| "learning_rate": 2.2250177086479774e-06, |
| "loss": 0.1525, |
| "step": 1956 |
| }, |
| { |
| "epoch": 1.3752635277582572, |
| "grad_norm": 0.5007106634090195, |
| "learning_rate": 2.220428145225646e-06, |
| "loss": 0.1428, |
| "step": 1957 |
| }, |
| { |
| "epoch": 1.3759662684469431, |
| "grad_norm": 0.4844942783124336, |
| "learning_rate": 2.2158419687457484e-06, |
| "loss": 0.1253, |
| "step": 1958 |
| }, |
| { |
| "epoch": 1.3766690091356288, |
| "grad_norm": 0.5224064614703137, |
| "learning_rate": 2.2112591847965977e-06, |
| "loss": 0.16, |
| "step": 1959 |
| }, |
| { |
| "epoch": 1.3773717498243148, |
| "grad_norm": 0.5112556692103628, |
| "learning_rate": 2.206679798962372e-06, |
| "loss": 0.1502, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.3780744905130007, |
| "grad_norm": 0.5221819158333172, |
| "learning_rate": 2.202103816823109e-06, |
| "loss": 0.1553, |
| "step": 1961 |
| }, |
| { |
| "epoch": 1.3787772312016866, |
| "grad_norm": 0.5243230446835379, |
| "learning_rate": 2.1975312439547e-06, |
| "loss": 0.1471, |
| "step": 1962 |
| }, |
| { |
| "epoch": 1.3794799718903725, |
| "grad_norm": 0.49899460160338066, |
| "learning_rate": 2.1929620859288796e-06, |
| "loss": 0.1397, |
| "step": 1963 |
| }, |
| { |
| "epoch": 1.3801827125790584, |
| "grad_norm": 0.5222487798899712, |
| "learning_rate": 2.1883963483132243e-06, |
| "loss": 0.1579, |
| "step": 1964 |
| }, |
| { |
| "epoch": 1.3808854532677441, |
| "grad_norm": 0.49363235261228205, |
| "learning_rate": 2.1838340366711406e-06, |
| "loss": 0.1259, |
| "step": 1965 |
| }, |
| { |
| "epoch": 1.38158819395643, |
| "grad_norm": 0.498718548065352, |
| "learning_rate": 2.1792751565618625e-06, |
| "loss": 0.1313, |
| "step": 1966 |
| }, |
| { |
| "epoch": 1.382290934645116, |
| "grad_norm": 0.47289640528416244, |
| "learning_rate": 2.17471971354044e-06, |
| "loss": 0.1144, |
| "step": 1967 |
| }, |
| { |
| "epoch": 1.382993675333802, |
| "grad_norm": 0.49732662681393414, |
| "learning_rate": 2.170167713157736e-06, |
| "loss": 0.1286, |
| "step": 1968 |
| }, |
| { |
| "epoch": 1.3836964160224876, |
| "grad_norm": 0.5281736532502597, |
| "learning_rate": 2.165619160960423e-06, |
| "loss": 0.1468, |
| "step": 1969 |
| }, |
| { |
| "epoch": 1.3843991567111735, |
| "grad_norm": 0.5017770871745806, |
| "learning_rate": 2.161074062490962e-06, |
| "loss": 0.1301, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.3851018973998594, |
| "grad_norm": 0.5516180162182499, |
| "learning_rate": 2.1565324232876143e-06, |
| "loss": 0.1703, |
| "step": 1971 |
| }, |
| { |
| "epoch": 1.3858046380885454, |
| "grad_norm": 0.5393133994562473, |
| "learning_rate": 2.1519942488844208e-06, |
| "loss": 0.1418, |
| "step": 1972 |
| }, |
| { |
| "epoch": 1.3865073787772313, |
| "grad_norm": 0.509469521241699, |
| "learning_rate": 2.1474595448112064e-06, |
| "loss": 0.1433, |
| "step": 1973 |
| }, |
| { |
| "epoch": 1.3872101194659172, |
| "grad_norm": 0.5458557996064164, |
| "learning_rate": 2.142928316593563e-06, |
| "loss": 0.1574, |
| "step": 1974 |
| }, |
| { |
| "epoch": 1.387912860154603, |
| "grad_norm": 0.50012128313779, |
| "learning_rate": 2.1384005697528454e-06, |
| "loss": 0.1289, |
| "step": 1975 |
| }, |
| { |
| "epoch": 1.3886156008432888, |
| "grad_norm": 0.4933344845667629, |
| "learning_rate": 2.133876309806168e-06, |
| "loss": 0.1334, |
| "step": 1976 |
| }, |
| { |
| "epoch": 1.3893183415319748, |
| "grad_norm": 0.5363708061550977, |
| "learning_rate": 2.1293555422664e-06, |
| "loss": 0.1659, |
| "step": 1977 |
| }, |
| { |
| "epoch": 1.3900210822206605, |
| "grad_norm": 0.5214051908858237, |
| "learning_rate": 2.1248382726421525e-06, |
| "loss": 0.1561, |
| "step": 1978 |
| }, |
| { |
| "epoch": 1.3907238229093464, |
| "grad_norm": 0.5180654399804175, |
| "learning_rate": 2.1203245064377737e-06, |
| "loss": 0.1566, |
| "step": 1979 |
| }, |
| { |
| "epoch": 1.3914265635980323, |
| "grad_norm": 0.5032749283223799, |
| "learning_rate": 2.1158142491533384e-06, |
| "loss": 0.1274, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.3921293042867182, |
| "grad_norm": 0.4962899530235887, |
| "learning_rate": 2.111307506284656e-06, |
| "loss": 0.1479, |
| "step": 1981 |
| }, |
| { |
| "epoch": 1.3928320449754041, |
| "grad_norm": 0.5086023007941962, |
| "learning_rate": 2.106804283323246e-06, |
| "loss": 0.1457, |
| "step": 1982 |
| }, |
| { |
| "epoch": 1.39353478566409, |
| "grad_norm": 0.5193372699637497, |
| "learning_rate": 2.1023045857563417e-06, |
| "loss": 0.1527, |
| "step": 1983 |
| }, |
| { |
| "epoch": 1.3942375263527758, |
| "grad_norm": 0.49881275519383994, |
| "learning_rate": 2.0978084190668785e-06, |
| "loss": 0.1328, |
| "step": 1984 |
| }, |
| { |
| "epoch": 1.3949402670414617, |
| "grad_norm": 0.5273887840960143, |
| "learning_rate": 2.093315788733492e-06, |
| "loss": 0.1333, |
| "step": 1985 |
| }, |
| { |
| "epoch": 1.3956430077301476, |
| "grad_norm": 0.5287999702366413, |
| "learning_rate": 2.088826700230506e-06, |
| "loss": 0.1455, |
| "step": 1986 |
| }, |
| { |
| "epoch": 1.3963457484188335, |
| "grad_norm": 0.4821679279733663, |
| "learning_rate": 2.084341159027932e-06, |
| "loss": 0.1321, |
| "step": 1987 |
| }, |
| { |
| "epoch": 1.3970484891075192, |
| "grad_norm": 0.5296817721467848, |
| "learning_rate": 2.079859170591455e-06, |
| "loss": 0.1606, |
| "step": 1988 |
| }, |
| { |
| "epoch": 1.3977512297962051, |
| "grad_norm": 0.5269614340509261, |
| "learning_rate": 2.0753807403824346e-06, |
| "loss": 0.1481, |
| "step": 1989 |
| }, |
| { |
| "epoch": 1.398453970484891, |
| "grad_norm": 0.5028102448988832, |
| "learning_rate": 2.0709058738578915e-06, |
| "loss": 0.1417, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.399156711173577, |
| "grad_norm": 0.5090353625060701, |
| "learning_rate": 2.0664345764705064e-06, |
| "loss": 0.1382, |
| "step": 1991 |
| }, |
| { |
| "epoch": 1.399859451862263, |
| "grad_norm": 0.5011415001593718, |
| "learning_rate": 2.0619668536686095e-06, |
| "loss": 0.1465, |
| "step": 1992 |
| }, |
| { |
| "epoch": 1.4005621925509488, |
| "grad_norm": 0.534292604798123, |
| "learning_rate": 2.0575027108961766e-06, |
| "loss": 0.1631, |
| "step": 1993 |
| }, |
| { |
| "epoch": 1.4012649332396345, |
| "grad_norm": 0.5202199997234441, |
| "learning_rate": 2.0530421535928197e-06, |
| "loss": 0.1574, |
| "step": 1994 |
| }, |
| { |
| "epoch": 1.4019676739283204, |
| "grad_norm": 0.5072372439854256, |
| "learning_rate": 2.0485851871937833e-06, |
| "loss": 0.1487, |
| "step": 1995 |
| }, |
| { |
| "epoch": 1.4026704146170064, |
| "grad_norm": 0.5035054049412758, |
| "learning_rate": 2.044131817129934e-06, |
| "loss": 0.1499, |
| "step": 1996 |
| }, |
| { |
| "epoch": 1.403373155305692, |
| "grad_norm": 0.5150043404618471, |
| "learning_rate": 2.0396820488277606e-06, |
| "loss": 0.1422, |
| "step": 1997 |
| }, |
| { |
| "epoch": 1.404075895994378, |
| "grad_norm": 0.4890257808747772, |
| "learning_rate": 2.0352358877093616e-06, |
| "loss": 0.1332, |
| "step": 1998 |
| }, |
| { |
| "epoch": 1.404778636683064, |
| "grad_norm": 0.5057461383103304, |
| "learning_rate": 2.030793339192434e-06, |
| "loss": 0.1339, |
| "step": 1999 |
| }, |
| { |
| "epoch": 1.4054813773717498, |
| "grad_norm": 0.5076823837359717, |
| "learning_rate": 2.0263544086902785e-06, |
| "loss": 0.1444, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.4054813773717498, |
| "eval_loss": 0.1826380044221878, |
| "eval_runtime": 10.8554, |
| "eval_samples_per_second": 21.188, |
| "eval_steps_per_second": 5.343, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.4061841180604358, |
| "grad_norm": 0.5131854187997845, |
| "learning_rate": 2.0219191016117905e-06, |
| "loss": 0.1529, |
| "step": 2001 |
| }, |
| { |
| "epoch": 1.4068868587491217, |
| "grad_norm": 0.5104573945165697, |
| "learning_rate": 2.0174874233614433e-06, |
| "loss": 0.1367, |
| "step": 2002 |
| }, |
| { |
| "epoch": 1.4075895994378074, |
| "grad_norm": 0.5207156973057229, |
| "learning_rate": 2.013059379339294e-06, |
| "loss": 0.146, |
| "step": 2003 |
| }, |
| { |
| "epoch": 1.4082923401264933, |
| "grad_norm": 0.479655684284797, |
| "learning_rate": 2.008634974940962e-06, |
| "loss": 0.1204, |
| "step": 2004 |
| }, |
| { |
| "epoch": 1.4089950808151792, |
| "grad_norm": 0.4802072779399691, |
| "learning_rate": 2.004214215557645e-06, |
| "loss": 0.1187, |
| "step": 2005 |
| }, |
| { |
| "epoch": 1.4096978215038651, |
| "grad_norm": 0.5253795088467027, |
| "learning_rate": 1.9997971065760897e-06, |
| "loss": 0.1656, |
| "step": 2006 |
| }, |
| { |
| "epoch": 1.4104005621925508, |
| "grad_norm": 0.5171460592475359, |
| "learning_rate": 1.9953836533785986e-06, |
| "loss": 0.1574, |
| "step": 2007 |
| }, |
| { |
| "epoch": 1.4111033028812368, |
| "grad_norm": 0.48510682723998483, |
| "learning_rate": 1.9909738613430187e-06, |
| "loss": 0.1254, |
| "step": 2008 |
| }, |
| { |
| "epoch": 1.4118060435699227, |
| "grad_norm": 0.5356051465446946, |
| "learning_rate": 1.986567735842735e-06, |
| "loss": 0.1605, |
| "step": 2009 |
| }, |
| { |
| "epoch": 1.4125087842586086, |
| "grad_norm": 0.5540257584486038, |
| "learning_rate": 1.982165282246665e-06, |
| "loss": 0.1726, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.4132115249472945, |
| "grad_norm": 0.5039359359613459, |
| "learning_rate": 1.9777665059192542e-06, |
| "loss": 0.1497, |
| "step": 2011 |
| }, |
| { |
| "epoch": 1.4139142656359804, |
| "grad_norm": 0.5219384004348654, |
| "learning_rate": 1.9733714122204646e-06, |
| "loss": 0.1492, |
| "step": 2012 |
| }, |
| { |
| "epoch": 1.4146170063246661, |
| "grad_norm": 0.5152329715483507, |
| "learning_rate": 1.9689800065057716e-06, |
| "loss": 0.1366, |
| "step": 2013 |
| }, |
| { |
| "epoch": 1.415319747013352, |
| "grad_norm": 0.5219215989924776, |
| "learning_rate": 1.9645922941261575e-06, |
| "loss": 0.1534, |
| "step": 2014 |
| }, |
| { |
| "epoch": 1.416022487702038, |
| "grad_norm": 0.5281526630040745, |
| "learning_rate": 1.960208280428103e-06, |
| "loss": 0.1448, |
| "step": 2015 |
| }, |
| { |
| "epoch": 1.416725228390724, |
| "grad_norm": 0.5259203714755477, |
| "learning_rate": 1.955827970753583e-06, |
| "loss": 0.1492, |
| "step": 2016 |
| }, |
| { |
| "epoch": 1.4174279690794096, |
| "grad_norm": 0.53826986829664, |
| "learning_rate": 1.9514513704400593e-06, |
| "loss": 0.1554, |
| "step": 2017 |
| }, |
| { |
| "epoch": 1.4181307097680955, |
| "grad_norm": 0.504885814082157, |
| "learning_rate": 1.947078484820472e-06, |
| "loss": 0.141, |
| "step": 2018 |
| }, |
| { |
| "epoch": 1.4188334504567814, |
| "grad_norm": 0.48868717567609277, |
| "learning_rate": 1.9427093192232373e-06, |
| "loss": 0.1294, |
| "step": 2019 |
| }, |
| { |
| "epoch": 1.4195361911454674, |
| "grad_norm": 0.5189210369986282, |
| "learning_rate": 1.9383438789722353e-06, |
| "loss": 0.1571, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.4202389318341533, |
| "grad_norm": 0.5009087905881117, |
| "learning_rate": 1.9339821693868082e-06, |
| "loss": 0.1446, |
| "step": 2021 |
| }, |
| { |
| "epoch": 1.4209416725228392, |
| "grad_norm": 0.5043136984347781, |
| "learning_rate": 1.9296241957817575e-06, |
| "loss": 0.1613, |
| "step": 2022 |
| }, |
| { |
| "epoch": 1.421644413211525, |
| "grad_norm": 0.5253592191479244, |
| "learning_rate": 1.925269963467322e-06, |
| "loss": 0.1613, |
| "step": 2023 |
| }, |
| { |
| "epoch": 1.4223471539002108, |
| "grad_norm": 0.514189513890053, |
| "learning_rate": 1.9209194777491887e-06, |
| "loss": 0.1528, |
| "step": 2024 |
| }, |
| { |
| "epoch": 1.4230498945888967, |
| "grad_norm": 0.5482483688452934, |
| "learning_rate": 1.916572743928479e-06, |
| "loss": 0.17, |
| "step": 2025 |
| }, |
| { |
| "epoch": 1.4237526352775824, |
| "grad_norm": 0.4818544171729801, |
| "learning_rate": 1.912229767301741e-06, |
| "loss": 0.1328, |
| "step": 2026 |
| }, |
| { |
| "epoch": 1.4244553759662684, |
| "grad_norm": 0.5002816069324538, |
| "learning_rate": 1.907890553160947e-06, |
| "loss": 0.1444, |
| "step": 2027 |
| }, |
| { |
| "epoch": 1.4251581166549543, |
| "grad_norm": 0.5106788901812277, |
| "learning_rate": 1.903555106793477e-06, |
| "loss": 0.167, |
| "step": 2028 |
| }, |
| { |
| "epoch": 1.4258608573436402, |
| "grad_norm": 0.5194882916077539, |
| "learning_rate": 1.8992234334821313e-06, |
| "loss": 0.1525, |
| "step": 2029 |
| }, |
| { |
| "epoch": 1.4265635980323261, |
| "grad_norm": 0.5279227097337229, |
| "learning_rate": 1.894895538505105e-06, |
| "loss": 0.1516, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.427266338721012, |
| "grad_norm": 0.5125155464974567, |
| "learning_rate": 1.8905714271359909e-06, |
| "loss": 0.1388, |
| "step": 2031 |
| }, |
| { |
| "epoch": 1.4279690794096978, |
| "grad_norm": 0.4986869168396727, |
| "learning_rate": 1.886251104643772e-06, |
| "loss": 0.1371, |
| "step": 2032 |
| }, |
| { |
| "epoch": 1.4286718200983837, |
| "grad_norm": 0.5386131850939692, |
| "learning_rate": 1.8819345762928148e-06, |
| "loss": 0.1572, |
| "step": 2033 |
| }, |
| { |
| "epoch": 1.4293745607870696, |
| "grad_norm": 0.5147570364863752, |
| "learning_rate": 1.877621847342862e-06, |
| "loss": 0.1466, |
| "step": 2034 |
| }, |
| { |
| "epoch": 1.4300773014757555, |
| "grad_norm": 0.5170906038507872, |
| "learning_rate": 1.873312923049026e-06, |
| "loss": 0.1623, |
| "step": 2035 |
| }, |
| { |
| "epoch": 1.4307800421644412, |
| "grad_norm": 0.48784987395992657, |
| "learning_rate": 1.8690078086617847e-06, |
| "loss": 0.1203, |
| "step": 2036 |
| }, |
| { |
| "epoch": 1.4314827828531271, |
| "grad_norm": 0.5101351674685919, |
| "learning_rate": 1.864706509426973e-06, |
| "loss": 0.1487, |
| "step": 2037 |
| }, |
| { |
| "epoch": 1.432185523541813, |
| "grad_norm": 0.49552255830825465, |
| "learning_rate": 1.8604090305857757e-06, |
| "loss": 0.1413, |
| "step": 2038 |
| }, |
| { |
| "epoch": 1.432888264230499, |
| "grad_norm": 0.4905989635114193, |
| "learning_rate": 1.8561153773747253e-06, |
| "loss": 0.1338, |
| "step": 2039 |
| }, |
| { |
| "epoch": 1.433591004919185, |
| "grad_norm": 0.493134783533051, |
| "learning_rate": 1.851825555025689e-06, |
| "loss": 0.1293, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.4342937456078708, |
| "grad_norm": 0.48077940942391195, |
| "learning_rate": 1.8475395687658699e-06, |
| "loss": 0.1279, |
| "step": 2041 |
| }, |
| { |
| "epoch": 1.4349964862965565, |
| "grad_norm": 0.4715910653602217, |
| "learning_rate": 1.843257423817793e-06, |
| "loss": 0.1222, |
| "step": 2042 |
| }, |
| { |
| "epoch": 1.4356992269852424, |
| "grad_norm": 0.5551181849334715, |
| "learning_rate": 1.838979125399306e-06, |
| "loss": 0.1782, |
| "step": 2043 |
| }, |
| { |
| "epoch": 1.4364019676739284, |
| "grad_norm": 0.5029944238272629, |
| "learning_rate": 1.8347046787235677e-06, |
| "loss": 0.138, |
| "step": 2044 |
| }, |
| { |
| "epoch": 1.437104708362614, |
| "grad_norm": 0.5025199667016227, |
| "learning_rate": 1.8304340889990418e-06, |
| "loss": 0.1284, |
| "step": 2045 |
| }, |
| { |
| "epoch": 1.4378074490513, |
| "grad_norm": 0.5166701187982591, |
| "learning_rate": 1.8261673614294996e-06, |
| "loss": 0.1362, |
| "step": 2046 |
| }, |
| { |
| "epoch": 1.438510189739986, |
| "grad_norm": 0.518865687852262, |
| "learning_rate": 1.8219045012139957e-06, |
| "loss": 0.1478, |
| "step": 2047 |
| }, |
| { |
| "epoch": 1.4392129304286718, |
| "grad_norm": 0.48605772421114374, |
| "learning_rate": 1.8176455135468796e-06, |
| "loss": 0.1273, |
| "step": 2048 |
| }, |
| { |
| "epoch": 1.4399156711173577, |
| "grad_norm": 0.5264019128235427, |
| "learning_rate": 1.8133904036177785e-06, |
| "loss": 0.1437, |
| "step": 2049 |
| }, |
| { |
| "epoch": 1.4406184118060437, |
| "grad_norm": 0.5640079905965288, |
| "learning_rate": 1.809139176611599e-06, |
| "loss": 0.177, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.4413211524947294, |
| "grad_norm": 0.537064034931882, |
| "learning_rate": 1.804891837708514e-06, |
| "loss": 0.1685, |
| "step": 2051 |
| }, |
| { |
| "epoch": 1.4420238931834153, |
| "grad_norm": 0.5079898047846592, |
| "learning_rate": 1.8006483920839524e-06, |
| "loss": 0.1325, |
| "step": 2052 |
| }, |
| { |
| "epoch": 1.4427266338721012, |
| "grad_norm": 0.5695707721683408, |
| "learning_rate": 1.7964088449086103e-06, |
| "loss": 0.1852, |
| "step": 2053 |
| }, |
| { |
| "epoch": 1.4434293745607871, |
| "grad_norm": 0.5363631384695723, |
| "learning_rate": 1.792173201348426e-06, |
| "loss": 0.1618, |
| "step": 2054 |
| }, |
| { |
| "epoch": 1.4441321152494728, |
| "grad_norm": 0.5632395868037631, |
| "learning_rate": 1.7879414665645834e-06, |
| "loss": 0.1637, |
| "step": 2055 |
| }, |
| { |
| "epoch": 1.4448348559381587, |
| "grad_norm": 0.5360209729861478, |
| "learning_rate": 1.7837136457135035e-06, |
| "loss": 0.1529, |
| "step": 2056 |
| }, |
| { |
| "epoch": 1.4455375966268447, |
| "grad_norm": 0.5245271097047481, |
| "learning_rate": 1.7794897439468378e-06, |
| "loss": 0.147, |
| "step": 2057 |
| }, |
| { |
| "epoch": 1.4462403373155306, |
| "grad_norm": 0.5042534725139199, |
| "learning_rate": 1.7752697664114621e-06, |
| "loss": 0.1379, |
| "step": 2058 |
| }, |
| { |
| "epoch": 1.4469430780042165, |
| "grad_norm": 0.4838221136458502, |
| "learning_rate": 1.7710537182494714e-06, |
| "loss": 0.1214, |
| "step": 2059 |
| }, |
| { |
| "epoch": 1.4476458186929024, |
| "grad_norm": 0.4870099806636077, |
| "learning_rate": 1.7668416045981712e-06, |
| "loss": 0.1272, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.4483485593815881, |
| "grad_norm": 0.5151525413649795, |
| "learning_rate": 1.762633430590075e-06, |
| "loss": 0.1566, |
| "step": 2061 |
| }, |
| { |
| "epoch": 1.449051300070274, |
| "grad_norm": 0.4960453083623157, |
| "learning_rate": 1.7584292013528935e-06, |
| "loss": 0.138, |
| "step": 2062 |
| }, |
| { |
| "epoch": 1.44975404075896, |
| "grad_norm": 0.491359999587759, |
| "learning_rate": 1.754228922009532e-06, |
| "loss": 0.1406, |
| "step": 2063 |
| }, |
| { |
| "epoch": 1.450456781447646, |
| "grad_norm": 0.5141990562668809, |
| "learning_rate": 1.7500325976780824e-06, |
| "loss": 0.1401, |
| "step": 2064 |
| }, |
| { |
| "epoch": 1.4511595221363316, |
| "grad_norm": 0.480593857312402, |
| "learning_rate": 1.7458402334718177e-06, |
| "loss": 0.1134, |
| "step": 2065 |
| }, |
| { |
| "epoch": 1.4518622628250175, |
| "grad_norm": 0.4823544002980055, |
| "learning_rate": 1.741651834499185e-06, |
| "loss": 0.1186, |
| "step": 2066 |
| }, |
| { |
| "epoch": 1.4525650035137034, |
| "grad_norm": 0.4883901662939787, |
| "learning_rate": 1.7374674058637997e-06, |
| "loss": 0.131, |
| "step": 2067 |
| }, |
| { |
| "epoch": 1.4532677442023894, |
| "grad_norm": 0.49488089290245274, |
| "learning_rate": 1.7332869526644396e-06, |
| "loss": 0.1382, |
| "step": 2068 |
| }, |
| { |
| "epoch": 1.4539704848910753, |
| "grad_norm": 0.5025783147637897, |
| "learning_rate": 1.7291104799950364e-06, |
| "loss": 0.1421, |
| "step": 2069 |
| }, |
| { |
| "epoch": 1.4546732255797612, |
| "grad_norm": 0.5135337473852053, |
| "learning_rate": 1.7249379929446786e-06, |
| "loss": 0.1357, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.455375966268447, |
| "grad_norm": 0.5079848243622339, |
| "learning_rate": 1.7207694965975879e-06, |
| "loss": 0.1454, |
| "step": 2071 |
| }, |
| { |
| "epoch": 1.4560787069571328, |
| "grad_norm": 0.525085885735298, |
| "learning_rate": 1.71660499603313e-06, |
| "loss": 0.1428, |
| "step": 2072 |
| }, |
| { |
| "epoch": 1.4567814476458187, |
| "grad_norm": 0.5182851381761374, |
| "learning_rate": 1.7124444963257974e-06, |
| "loss": 0.1426, |
| "step": 2073 |
| }, |
| { |
| "epoch": 1.4574841883345044, |
| "grad_norm": 0.5164467264921618, |
| "learning_rate": 1.7082880025452147e-06, |
| "loss": 0.1446, |
| "step": 2074 |
| }, |
| { |
| "epoch": 1.4581869290231904, |
| "grad_norm": 0.48425978894094407, |
| "learning_rate": 1.70413551975612e-06, |
| "loss": 0.136, |
| "step": 2075 |
| }, |
| { |
| "epoch": 1.4588896697118763, |
| "grad_norm": 0.5349857216691936, |
| "learning_rate": 1.6999870530183615e-06, |
| "loss": 0.1627, |
| "step": 2076 |
| }, |
| { |
| "epoch": 1.4595924104005622, |
| "grad_norm": 0.4945920386944028, |
| "learning_rate": 1.6958426073868967e-06, |
| "loss": 0.1294, |
| "step": 2077 |
| }, |
| { |
| "epoch": 1.4602951510892481, |
| "grad_norm": 0.5492545918703949, |
| "learning_rate": 1.6917021879117861e-06, |
| "loss": 0.1786, |
| "step": 2078 |
| }, |
| { |
| "epoch": 1.460997891777934, |
| "grad_norm": 0.5099087505970852, |
| "learning_rate": 1.6875657996381812e-06, |
| "loss": 0.1582, |
| "step": 2079 |
| }, |
| { |
| "epoch": 1.4617006324666197, |
| "grad_norm": 0.49742097581952377, |
| "learning_rate": 1.6834334476063214e-06, |
| "loss": 0.1389, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.4624033731553057, |
| "grad_norm": 0.4826117925750648, |
| "learning_rate": 1.6793051368515283e-06, |
| "loss": 0.1309, |
| "step": 2081 |
| }, |
| { |
| "epoch": 1.4631061138439916, |
| "grad_norm": 0.513712362930316, |
| "learning_rate": 1.6751808724041996e-06, |
| "loss": 0.1377, |
| "step": 2082 |
| }, |
| { |
| "epoch": 1.4638088545326775, |
| "grad_norm": 0.5284740998171508, |
| "learning_rate": 1.6710606592898016e-06, |
| "loss": 0.1407, |
| "step": 2083 |
| }, |
| { |
| "epoch": 1.4645115952213632, |
| "grad_norm": 0.5266927611311717, |
| "learning_rate": 1.6669445025288649e-06, |
| "loss": 0.1542, |
| "step": 2084 |
| }, |
| { |
| "epoch": 1.4652143359100491, |
| "grad_norm": 0.5004330148088008, |
| "learning_rate": 1.6628324071369768e-06, |
| "loss": 0.1347, |
| "step": 2085 |
| }, |
| { |
| "epoch": 1.465917076598735, |
| "grad_norm": 0.5133116593057202, |
| "learning_rate": 1.6587243781247764e-06, |
| "loss": 0.1547, |
| "step": 2086 |
| }, |
| { |
| "epoch": 1.466619817287421, |
| "grad_norm": 0.5313696987465686, |
| "learning_rate": 1.6546204204979478e-06, |
| "loss": 0.1567, |
| "step": 2087 |
| }, |
| { |
| "epoch": 1.467322557976107, |
| "grad_norm": 0.4869528563529301, |
| "learning_rate": 1.6505205392572128e-06, |
| "loss": 0.126, |
| "step": 2088 |
| }, |
| { |
| "epoch": 1.4680252986647928, |
| "grad_norm": 0.489245879712265, |
| "learning_rate": 1.6464247393983273e-06, |
| "loss": 0.1108, |
| "step": 2089 |
| }, |
| { |
| "epoch": 1.4687280393534785, |
| "grad_norm": 0.515009361771683, |
| "learning_rate": 1.642333025912074e-06, |
| "loss": 0.1534, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.4694307800421644, |
| "grad_norm": 0.5215520866230325, |
| "learning_rate": 1.6382454037842565e-06, |
| "loss": 0.1797, |
| "step": 2091 |
| }, |
| { |
| "epoch": 1.4701335207308504, |
| "grad_norm": 0.5034561708219197, |
| "learning_rate": 1.6341618779956913e-06, |
| "loss": 0.1336, |
| "step": 2092 |
| }, |
| { |
| "epoch": 1.4708362614195363, |
| "grad_norm": 0.5133677130668264, |
| "learning_rate": 1.6300824535222043e-06, |
| "loss": 0.153, |
| "step": 2093 |
| }, |
| { |
| "epoch": 1.471539002108222, |
| "grad_norm": 0.5246991854782783, |
| "learning_rate": 1.626007135334629e-06, |
| "loss": 0.1632, |
| "step": 2094 |
| }, |
| { |
| "epoch": 1.472241742796908, |
| "grad_norm": 0.4804227544378076, |
| "learning_rate": 1.6219359283987852e-06, |
| "loss": 0.1213, |
| "step": 2095 |
| }, |
| { |
| "epoch": 1.4729444834855938, |
| "grad_norm": 0.5031868063988818, |
| "learning_rate": 1.6178688376754896e-06, |
| "loss": 0.1475, |
| "step": 2096 |
| }, |
| { |
| "epoch": 1.4736472241742797, |
| "grad_norm": 0.4971654477550433, |
| "learning_rate": 1.6138058681205425e-06, |
| "loss": 0.1343, |
| "step": 2097 |
| }, |
| { |
| "epoch": 1.4743499648629657, |
| "grad_norm": 0.5366178895790169, |
| "learning_rate": 1.6097470246847236e-06, |
| "loss": 0.157, |
| "step": 2098 |
| }, |
| { |
| "epoch": 1.4750527055516516, |
| "grad_norm": 0.5110633032912951, |
| "learning_rate": 1.6056923123137846e-06, |
| "loss": 0.1399, |
| "step": 2099 |
| }, |
| { |
| "epoch": 1.4757554462403373, |
| "grad_norm": 0.49700957520710715, |
| "learning_rate": 1.6016417359484388e-06, |
| "loss": 0.1393, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.4764581869290232, |
| "grad_norm": 0.5033159024264766, |
| "learning_rate": 1.5975953005243628e-06, |
| "loss": 0.1426, |
| "step": 2101 |
| }, |
| { |
| "epoch": 1.4771609276177091, |
| "grad_norm": 0.5479522458545618, |
| "learning_rate": 1.5935530109721915e-06, |
| "loss": 0.1795, |
| "step": 2102 |
| }, |
| { |
| "epoch": 1.4778636683063948, |
| "grad_norm": 0.5030601127371045, |
| "learning_rate": 1.5895148722175025e-06, |
| "loss": 0.139, |
| "step": 2103 |
| }, |
| { |
| "epoch": 1.4785664089950807, |
| "grad_norm": 0.501233824654983, |
| "learning_rate": 1.5854808891808192e-06, |
| "loss": 0.1292, |
| "step": 2104 |
| }, |
| { |
| "epoch": 1.4792691496837667, |
| "grad_norm": 0.507832389453275, |
| "learning_rate": 1.5814510667775944e-06, |
| "loss": 0.1409, |
| "step": 2105 |
| }, |
| { |
| "epoch": 1.4799718903724526, |
| "grad_norm": 0.5124476953162119, |
| "learning_rate": 1.5774254099182217e-06, |
| "loss": 0.1443, |
| "step": 2106 |
| }, |
| { |
| "epoch": 1.4806746310611385, |
| "grad_norm": 0.5322204944380016, |
| "learning_rate": 1.5734039235080112e-06, |
| "loss": 0.1584, |
| "step": 2107 |
| }, |
| { |
| "epoch": 1.4813773717498244, |
| "grad_norm": 0.47146448385700807, |
| "learning_rate": 1.5693866124471935e-06, |
| "loss": 0.1109, |
| "step": 2108 |
| }, |
| { |
| "epoch": 1.4820801124385101, |
| "grad_norm": 0.5016029280258626, |
| "learning_rate": 1.5653734816309113e-06, |
| "loss": 0.1323, |
| "step": 2109 |
| }, |
| { |
| "epoch": 1.482782853127196, |
| "grad_norm": 0.5101882079807832, |
| "learning_rate": 1.5613645359492141e-06, |
| "loss": 0.1435, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.483485593815882, |
| "grad_norm": 0.5091675982820156, |
| "learning_rate": 1.5573597802870515e-06, |
| "loss": 0.1393, |
| "step": 2111 |
| }, |
| { |
| "epoch": 1.4841883345045679, |
| "grad_norm": 0.5217815960604922, |
| "learning_rate": 1.5533592195242674e-06, |
| "loss": 0.1536, |
| "step": 2112 |
| }, |
| { |
| "epoch": 1.4848910751932536, |
| "grad_norm": 0.5183465180846747, |
| "learning_rate": 1.549362858535594e-06, |
| "loss": 0.1389, |
| "step": 2113 |
| }, |
| { |
| "epoch": 1.4855938158819395, |
| "grad_norm": 0.5071245694618659, |
| "learning_rate": 1.5453707021906467e-06, |
| "loss": 0.1558, |
| "step": 2114 |
| }, |
| { |
| "epoch": 1.4862965565706254, |
| "grad_norm": 0.5214478193648973, |
| "learning_rate": 1.5413827553539162e-06, |
| "loss": 0.1363, |
| "step": 2115 |
| }, |
| { |
| "epoch": 1.4869992972593113, |
| "grad_norm": 0.529511429283781, |
| "learning_rate": 1.5373990228847657e-06, |
| "loss": 0.1564, |
| "step": 2116 |
| }, |
| { |
| "epoch": 1.4877020379479973, |
| "grad_norm": 0.4878969089896803, |
| "learning_rate": 1.5334195096374193e-06, |
| "loss": 0.1238, |
| "step": 2117 |
| }, |
| { |
| "epoch": 1.4884047786366832, |
| "grad_norm": 0.5665371619087282, |
| "learning_rate": 1.529444220460969e-06, |
| "loss": 0.1621, |
| "step": 2118 |
| }, |
| { |
| "epoch": 1.489107519325369, |
| "grad_norm": 0.5201262867703623, |
| "learning_rate": 1.5254731601993472e-06, |
| "loss": 0.159, |
| "step": 2119 |
| }, |
| { |
| "epoch": 1.4898102600140548, |
| "grad_norm": 0.5097670660342238, |
| "learning_rate": 1.5215063336913421e-06, |
| "loss": 0.1479, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.4905130007027407, |
| "grad_norm": 0.5131401687757647, |
| "learning_rate": 1.5175437457705787e-06, |
| "loss": 0.1574, |
| "step": 2121 |
| }, |
| { |
| "epoch": 1.4912157413914264, |
| "grad_norm": 0.46788685857399476, |
| "learning_rate": 1.5135854012655227e-06, |
| "loss": 0.1282, |
| "step": 2122 |
| }, |
| { |
| "epoch": 1.4919184820801124, |
| "grad_norm": 0.5355929641138256, |
| "learning_rate": 1.509631304999465e-06, |
| "loss": 0.1606, |
| "step": 2123 |
| }, |
| { |
| "epoch": 1.4926212227687983, |
| "grad_norm": 0.5019990018308255, |
| "learning_rate": 1.5056814617905168e-06, |
| "loss": 0.126, |
| "step": 2124 |
| }, |
| { |
| "epoch": 1.4933239634574842, |
| "grad_norm": 0.5100567049359251, |
| "learning_rate": 1.501735876451611e-06, |
| "loss": 0.1353, |
| "step": 2125 |
| }, |
| { |
| "epoch": 1.4940267041461701, |
| "grad_norm": 0.5276711403853661, |
| "learning_rate": 1.4977945537904953e-06, |
| "loss": 0.1519, |
| "step": 2126 |
| }, |
| { |
| "epoch": 1.494729444834856, |
| "grad_norm": 0.4561955475270246, |
| "learning_rate": 1.4938574986097176e-06, |
| "loss": 0.1109, |
| "step": 2127 |
| }, |
| { |
| "epoch": 1.4954321855235417, |
| "grad_norm": 0.5258796232827486, |
| "learning_rate": 1.4899247157066303e-06, |
| "loss": 0.1531, |
| "step": 2128 |
| }, |
| { |
| "epoch": 1.4961349262122277, |
| "grad_norm": 0.5434050717223791, |
| "learning_rate": 1.485996209873372e-06, |
| "loss": 0.1667, |
| "step": 2129 |
| }, |
| { |
| "epoch": 1.4968376669009136, |
| "grad_norm": 0.47184637783971156, |
| "learning_rate": 1.4820719858968807e-06, |
| "loss": 0.1158, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.4975404075895995, |
| "grad_norm": 0.47350047699188863, |
| "learning_rate": 1.4781520485588696e-06, |
| "loss": 0.1266, |
| "step": 2131 |
| }, |
| { |
| "epoch": 1.4982431482782852, |
| "grad_norm": 0.508209436132349, |
| "learning_rate": 1.4742364026358307e-06, |
| "loss": 0.1458, |
| "step": 2132 |
| }, |
| { |
| "epoch": 1.4989458889669711, |
| "grad_norm": 0.5244850933042244, |
| "learning_rate": 1.4703250528990265e-06, |
| "loss": 0.1459, |
| "step": 2133 |
| }, |
| { |
| "epoch": 1.499648629655657, |
| "grad_norm": 0.5197565951343396, |
| "learning_rate": 1.4664180041144843e-06, |
| "loss": 0.1532, |
| "step": 2134 |
| }, |
| { |
| "epoch": 1.500351370344343, |
| "grad_norm": 0.5253142565422563, |
| "learning_rate": 1.4625152610429922e-06, |
| "loss": 0.1465, |
| "step": 2135 |
| }, |
| { |
| "epoch": 1.5010541110330289, |
| "grad_norm": 0.49960269677237373, |
| "learning_rate": 1.4586168284400893e-06, |
| "loss": 0.1366, |
| "step": 2136 |
| }, |
| { |
| "epoch": 1.5017568517217148, |
| "grad_norm": 0.529644657730334, |
| "learning_rate": 1.4547227110560642e-06, |
| "loss": 0.1599, |
| "step": 2137 |
| }, |
| { |
| "epoch": 1.5024595924104007, |
| "grad_norm": 0.5159155953257838, |
| "learning_rate": 1.4508329136359462e-06, |
| "loss": 0.1526, |
| "step": 2138 |
| }, |
| { |
| "epoch": 1.5031623330990864, |
| "grad_norm": 0.49239463898400426, |
| "learning_rate": 1.4469474409195017e-06, |
| "loss": 0.1369, |
| "step": 2139 |
| }, |
| { |
| "epoch": 1.5038650737877723, |
| "grad_norm": 0.5224057723398261, |
| "learning_rate": 1.4430662976412268e-06, |
| "loss": 0.1608, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.504567814476458, |
| "grad_norm": 0.5244999650971088, |
| "learning_rate": 1.4391894885303414e-06, |
| "loss": 0.1543, |
| "step": 2141 |
| }, |
| { |
| "epoch": 1.505270555165144, |
| "grad_norm": 0.5110844959732017, |
| "learning_rate": 1.4353170183107884e-06, |
| "loss": 0.1596, |
| "step": 2142 |
| }, |
| { |
| "epoch": 1.5059732958538299, |
| "grad_norm": 0.5093261756275554, |
| "learning_rate": 1.4314488917012164e-06, |
| "loss": 0.1465, |
| "step": 2143 |
| }, |
| { |
| "epoch": 1.5066760365425158, |
| "grad_norm": 0.4873406151854595, |
| "learning_rate": 1.4275851134149864e-06, |
| "loss": 0.1437, |
| "step": 2144 |
| }, |
| { |
| "epoch": 1.5073787772312017, |
| "grad_norm": 0.5329279562540382, |
| "learning_rate": 1.4237256881601585e-06, |
| "loss": 0.1546, |
| "step": 2145 |
| }, |
| { |
| "epoch": 1.5080815179198876, |
| "grad_norm": 0.5327753630385557, |
| "learning_rate": 1.4198706206394924e-06, |
| "loss": 0.177, |
| "step": 2146 |
| }, |
| { |
| "epoch": 1.5087842586085736, |
| "grad_norm": 0.5181599657597732, |
| "learning_rate": 1.4160199155504357e-06, |
| "loss": 0.1373, |
| "step": 2147 |
| }, |
| { |
| "epoch": 1.5094869992972593, |
| "grad_norm": 0.4692289997035836, |
| "learning_rate": 1.4121735775851164e-06, |
| "loss": 0.107, |
| "step": 2148 |
| }, |
| { |
| "epoch": 1.5101897399859452, |
| "grad_norm": 0.5088768403918668, |
| "learning_rate": 1.4083316114303448e-06, |
| "loss": 0.1523, |
| "step": 2149 |
| }, |
| { |
| "epoch": 1.510892480674631, |
| "grad_norm": 0.5094143771026569, |
| "learning_rate": 1.4044940217676061e-06, |
| "loss": 0.1411, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.5115952213633168, |
| "grad_norm": 0.511386387103471, |
| "learning_rate": 1.4006608132730504e-06, |
| "loss": 0.1407, |
| "step": 2151 |
| }, |
| { |
| "epoch": 1.5122979620520027, |
| "grad_norm": 0.520482910488152, |
| "learning_rate": 1.3968319906174893e-06, |
| "loss": 0.1506, |
| "step": 2152 |
| }, |
| { |
| "epoch": 1.5130007027406887, |
| "grad_norm": 0.5433542109670519, |
| "learning_rate": 1.3930075584663867e-06, |
| "loss": 0.1469, |
| "step": 2153 |
| }, |
| { |
| "epoch": 1.5137034434293746, |
| "grad_norm": 0.5272449396496899, |
| "learning_rate": 1.3891875214798644e-06, |
| "loss": 0.1509, |
| "step": 2154 |
| }, |
| { |
| "epoch": 1.5144061841180605, |
| "grad_norm": 0.4903186696381816, |
| "learning_rate": 1.3853718843126824e-06, |
| "loss": 0.1176, |
| "step": 2155 |
| }, |
| { |
| "epoch": 1.5151089248067464, |
| "grad_norm": 0.5067264890176539, |
| "learning_rate": 1.3815606516142422e-06, |
| "loss": 0.1479, |
| "step": 2156 |
| }, |
| { |
| "epoch": 1.5158116654954323, |
| "grad_norm": 0.5177281848447265, |
| "learning_rate": 1.3777538280285767e-06, |
| "loss": 0.1397, |
| "step": 2157 |
| }, |
| { |
| "epoch": 1.516514406184118, |
| "grad_norm": 0.5140016496572131, |
| "learning_rate": 1.3739514181943486e-06, |
| "loss": 0.1623, |
| "step": 2158 |
| }, |
| { |
| "epoch": 1.517217146872804, |
| "grad_norm": 0.5220023492099699, |
| "learning_rate": 1.3701534267448395e-06, |
| "loss": 0.1436, |
| "step": 2159 |
| }, |
| { |
| "epoch": 1.5179198875614897, |
| "grad_norm": 0.5161035372001767, |
| "learning_rate": 1.366359858307949e-06, |
| "loss": 0.1428, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.5186226282501756, |
| "grad_norm": 0.5086023982291189, |
| "learning_rate": 1.3625707175061876e-06, |
| "loss": 0.1297, |
| "step": 2161 |
| }, |
| { |
| "epoch": 1.5193253689388615, |
| "grad_norm": 0.5398633665566651, |
| "learning_rate": 1.358786008956669e-06, |
| "loss": 0.1571, |
| "step": 2162 |
| }, |
| { |
| "epoch": 1.5200281096275474, |
| "grad_norm": 0.5117688888959533, |
| "learning_rate": 1.3550057372711078e-06, |
| "loss": 0.136, |
| "step": 2163 |
| }, |
| { |
| "epoch": 1.5207308503162333, |
| "grad_norm": 0.49190830346284004, |
| "learning_rate": 1.3512299070558104e-06, |
| "loss": 0.1177, |
| "step": 2164 |
| }, |
| { |
| "epoch": 1.5214335910049193, |
| "grad_norm": 0.5213200786252551, |
| "learning_rate": 1.347458522911672e-06, |
| "loss": 0.1453, |
| "step": 2165 |
| }, |
| { |
| "epoch": 1.5221363316936052, |
| "grad_norm": 0.5226027442062637, |
| "learning_rate": 1.343691589434174e-06, |
| "loss": 0.1531, |
| "step": 2166 |
| }, |
| { |
| "epoch": 1.5228390723822909, |
| "grad_norm": 0.5503507406617123, |
| "learning_rate": 1.3399291112133673e-06, |
| "loss": 0.1734, |
| "step": 2167 |
| }, |
| { |
| "epoch": 1.5235418130709768, |
| "grad_norm": 0.5102965948709162, |
| "learning_rate": 1.336171092833879e-06, |
| "loss": 0.1455, |
| "step": 2168 |
| }, |
| { |
| "epoch": 1.5242445537596627, |
| "grad_norm": 0.5107129259550931, |
| "learning_rate": 1.3324175388748989e-06, |
| "loss": 0.1628, |
| "step": 2169 |
| }, |
| { |
| "epoch": 1.5249472944483484, |
| "grad_norm": 0.5070346465598122, |
| "learning_rate": 1.3286684539101823e-06, |
| "loss": 0.1332, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.5256500351370343, |
| "grad_norm": 0.51598371155757, |
| "learning_rate": 1.3249238425080346e-06, |
| "loss": 0.131, |
| "step": 2171 |
| }, |
| { |
| "epoch": 1.5263527758257203, |
| "grad_norm": 0.4882384084303687, |
| "learning_rate": 1.3211837092313074e-06, |
| "loss": 0.1308, |
| "step": 2172 |
| }, |
| { |
| "epoch": 1.5270555165144062, |
| "grad_norm": 0.5321774835869371, |
| "learning_rate": 1.3174480586374e-06, |
| "loss": 0.1708, |
| "step": 2173 |
| }, |
| { |
| "epoch": 1.527758257203092, |
| "grad_norm": 0.55387738733551, |
| "learning_rate": 1.3137168952782514e-06, |
| "loss": 0.1652, |
| "step": 2174 |
| }, |
| { |
| "epoch": 1.528460997891778, |
| "grad_norm": 0.5099557259534563, |
| "learning_rate": 1.309990223700328e-06, |
| "loss": 0.1451, |
| "step": 2175 |
| }, |
| { |
| "epoch": 1.529163738580464, |
| "grad_norm": 0.5410731968618563, |
| "learning_rate": 1.3062680484446267e-06, |
| "loss": 0.1587, |
| "step": 2176 |
| }, |
| { |
| "epoch": 1.5298664792691496, |
| "grad_norm": 0.4853197081691339, |
| "learning_rate": 1.3025503740466588e-06, |
| "loss": 0.1297, |
| "step": 2177 |
| }, |
| { |
| "epoch": 1.5305692199578356, |
| "grad_norm": 0.541524059003662, |
| "learning_rate": 1.298837205036461e-06, |
| "loss": 0.1693, |
| "step": 2178 |
| }, |
| { |
| "epoch": 1.5312719606465213, |
| "grad_norm": 0.4997483368619478, |
| "learning_rate": 1.2951285459385737e-06, |
| "loss": 0.124, |
| "step": 2179 |
| }, |
| { |
| "epoch": 1.5319747013352072, |
| "grad_norm": 0.5195231035312816, |
| "learning_rate": 1.291424401272044e-06, |
| "loss": 0.1487, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.532677442023893, |
| "grad_norm": 0.5063111561918022, |
| "learning_rate": 1.2877247755504174e-06, |
| "loss": 0.1347, |
| "step": 2181 |
| }, |
| { |
| "epoch": 1.533380182712579, |
| "grad_norm": 0.5491929250983179, |
| "learning_rate": 1.2840296732817332e-06, |
| "loss": 0.1649, |
| "step": 2182 |
| }, |
| { |
| "epoch": 1.534082923401265, |
| "grad_norm": 0.49328743508314504, |
| "learning_rate": 1.2803390989685189e-06, |
| "loss": 0.1233, |
| "step": 2183 |
| }, |
| { |
| "epoch": 1.5347856640899509, |
| "grad_norm": 0.5233912984109269, |
| "learning_rate": 1.276653057107784e-06, |
| "loss": 0.1462, |
| "step": 2184 |
| }, |
| { |
| "epoch": 1.5354884047786368, |
| "grad_norm": 0.49839242079793133, |
| "learning_rate": 1.2729715521910168e-06, |
| "loss": 0.1331, |
| "step": 2185 |
| }, |
| { |
| "epoch": 1.5361911454673227, |
| "grad_norm": 0.5043852726925345, |
| "learning_rate": 1.2692945887041763e-06, |
| "loss": 0.1387, |
| "step": 2186 |
| }, |
| { |
| "epoch": 1.5368938861560084, |
| "grad_norm": 0.5384067661653723, |
| "learning_rate": 1.2656221711276867e-06, |
| "loss": 0.1481, |
| "step": 2187 |
| }, |
| { |
| "epoch": 1.5375966268446943, |
| "grad_norm": 0.5479793052050106, |
| "learning_rate": 1.261954303936434e-06, |
| "loss": 0.1775, |
| "step": 2188 |
| }, |
| { |
| "epoch": 1.53829936753338, |
| "grad_norm": 0.45764911011948567, |
| "learning_rate": 1.2582909915997604e-06, |
| "loss": 0.117, |
| "step": 2189 |
| }, |
| { |
| "epoch": 1.539002108222066, |
| "grad_norm": 0.499823540139615, |
| "learning_rate": 1.2546322385814564e-06, |
| "loss": 0.1397, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.5397048489107519, |
| "grad_norm": 0.5095299030936337, |
| "learning_rate": 1.2509780493397573e-06, |
| "loss": 0.1319, |
| "step": 2191 |
| }, |
| { |
| "epoch": 1.5404075895994378, |
| "grad_norm": 0.5071698659448782, |
| "learning_rate": 1.2473284283273373e-06, |
| "loss": 0.1399, |
| "step": 2192 |
| }, |
| { |
| "epoch": 1.5411103302881237, |
| "grad_norm": 0.5234428919899052, |
| "learning_rate": 1.243683379991304e-06, |
| "loss": 0.1573, |
| "step": 2193 |
| }, |
| { |
| "epoch": 1.5418130709768096, |
| "grad_norm": 0.5350763730505442, |
| "learning_rate": 1.2400429087731952e-06, |
| "loss": 0.155, |
| "step": 2194 |
| }, |
| { |
| "epoch": 1.5425158116654956, |
| "grad_norm": 0.5091434987159394, |
| "learning_rate": 1.236407019108971e-06, |
| "loss": 0.1401, |
| "step": 2195 |
| }, |
| { |
| "epoch": 1.5432185523541813, |
| "grad_norm": 0.5023105860405399, |
| "learning_rate": 1.2327757154290037e-06, |
| "loss": 0.1321, |
| "step": 2196 |
| }, |
| { |
| "epoch": 1.5439212930428672, |
| "grad_norm": 0.5077759531075567, |
| "learning_rate": 1.229149002158082e-06, |
| "loss": 0.1531, |
| "step": 2197 |
| }, |
| { |
| "epoch": 1.544624033731553, |
| "grad_norm": 0.5259031385429787, |
| "learning_rate": 1.2255268837154034e-06, |
| "loss": 0.1346, |
| "step": 2198 |
| }, |
| { |
| "epoch": 1.5453267744202388, |
| "grad_norm": 0.499418004700236, |
| "learning_rate": 1.2219093645145613e-06, |
| "loss": 0.1363, |
| "step": 2199 |
| }, |
| { |
| "epoch": 1.5460295151089247, |
| "grad_norm": 0.5280318657161265, |
| "learning_rate": 1.2182964489635502e-06, |
| "loss": 0.1506, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.5467322557976106, |
| "grad_norm": 0.5338981790372893, |
| "learning_rate": 1.2146881414647471e-06, |
| "loss": 0.1574, |
| "step": 2201 |
| }, |
| { |
| "epoch": 1.5474349964862966, |
| "grad_norm": 0.513601118377399, |
| "learning_rate": 1.211084446414923e-06, |
| "loss": 0.1508, |
| "step": 2202 |
| }, |
| { |
| "epoch": 1.5481377371749825, |
| "grad_norm": 0.5302281271672624, |
| "learning_rate": 1.2074853682052235e-06, |
| "loss": 0.1437, |
| "step": 2203 |
| }, |
| { |
| "epoch": 1.5488404778636684, |
| "grad_norm": 0.5195178100677815, |
| "learning_rate": 1.20389091122117e-06, |
| "loss": 0.1477, |
| "step": 2204 |
| }, |
| { |
| "epoch": 1.5495432185523543, |
| "grad_norm": 0.5019957340440979, |
| "learning_rate": 1.2003010798426512e-06, |
| "loss": 0.1131, |
| "step": 2205 |
| }, |
| { |
| "epoch": 1.55024595924104, |
| "grad_norm": 0.510219552236996, |
| "learning_rate": 1.1967158784439214e-06, |
| "loss": 0.1545, |
| "step": 2206 |
| }, |
| { |
| "epoch": 1.550948699929726, |
| "grad_norm": 0.5172634742559926, |
| "learning_rate": 1.1931353113935935e-06, |
| "loss": 0.1553, |
| "step": 2207 |
| }, |
| { |
| "epoch": 1.5516514406184116, |
| "grad_norm": 0.5068735572234946, |
| "learning_rate": 1.1895593830546308e-06, |
| "loss": 0.1314, |
| "step": 2208 |
| }, |
| { |
| "epoch": 1.5523541813070976, |
| "grad_norm": 0.5019792388664622, |
| "learning_rate": 1.1859880977843469e-06, |
| "loss": 0.1289, |
| "step": 2209 |
| }, |
| { |
| "epoch": 1.5530569219957835, |
| "grad_norm": 0.5343633445717745, |
| "learning_rate": 1.1824214599343958e-06, |
| "loss": 0.1585, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.5537596626844694, |
| "grad_norm": 0.4983419591247481, |
| "learning_rate": 1.1788594738507708e-06, |
| "loss": 0.1542, |
| "step": 2211 |
| }, |
| { |
| "epoch": 1.5544624033731553, |
| "grad_norm": 0.5072492403320521, |
| "learning_rate": 1.175302143873795e-06, |
| "loss": 0.148, |
| "step": 2212 |
| }, |
| { |
| "epoch": 1.5551651440618413, |
| "grad_norm": 0.5115835547402442, |
| "learning_rate": 1.1717494743381187e-06, |
| "loss": 0.1482, |
| "step": 2213 |
| }, |
| { |
| "epoch": 1.5558678847505272, |
| "grad_norm": 0.5072302811409777, |
| "learning_rate": 1.1682014695727129e-06, |
| "loss": 0.1492, |
| "step": 2214 |
| }, |
| { |
| "epoch": 1.556570625439213, |
| "grad_norm": 0.48941247607237437, |
| "learning_rate": 1.164658133900866e-06, |
| "loss": 0.1369, |
| "step": 2215 |
| }, |
| { |
| "epoch": 1.5572733661278988, |
| "grad_norm": 0.5629621734782317, |
| "learning_rate": 1.1611194716401752e-06, |
| "loss": 0.1839, |
| "step": 2216 |
| }, |
| { |
| "epoch": 1.5579761068165847, |
| "grad_norm": 0.49186056840638026, |
| "learning_rate": 1.1575854871025445e-06, |
| "loss": 0.1244, |
| "step": 2217 |
| }, |
| { |
| "epoch": 1.5586788475052704, |
| "grad_norm": 0.48784211634601093, |
| "learning_rate": 1.154056184594175e-06, |
| "loss": 0.1357, |
| "step": 2218 |
| }, |
| { |
| "epoch": 1.5593815881939563, |
| "grad_norm": 0.51634235643248, |
| "learning_rate": 1.1505315684155704e-06, |
| "loss": 0.1446, |
| "step": 2219 |
| }, |
| { |
| "epoch": 1.5600843288826423, |
| "grad_norm": 0.5311972591720889, |
| "learning_rate": 1.1470116428615141e-06, |
| "loss": 0.1603, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.5607870695713282, |
| "grad_norm": 0.48075220298354765, |
| "learning_rate": 1.143496412221079e-06, |
| "loss": 0.1284, |
| "step": 2221 |
| }, |
| { |
| "epoch": 1.561489810260014, |
| "grad_norm": 0.5101512440680539, |
| "learning_rate": 1.1399858807776194e-06, |
| "loss": 0.1485, |
| "step": 2222 |
| }, |
| { |
| "epoch": 1.5621925509487, |
| "grad_norm": 0.5107976534764769, |
| "learning_rate": 1.1364800528087594e-06, |
| "loss": 0.1485, |
| "step": 2223 |
| }, |
| { |
| "epoch": 1.562895291637386, |
| "grad_norm": 0.5110004452171197, |
| "learning_rate": 1.132978932586395e-06, |
| "loss": 0.1487, |
| "step": 2224 |
| }, |
| { |
| "epoch": 1.5635980323260716, |
| "grad_norm": 0.5322552538407819, |
| "learning_rate": 1.1294825243766794e-06, |
| "loss": 0.1572, |
| "step": 2225 |
| }, |
| { |
| "epoch": 1.5643007730147576, |
| "grad_norm": 0.5157808574584885, |
| "learning_rate": 1.1259908324400343e-06, |
| "loss": 0.138, |
| "step": 2226 |
| }, |
| { |
| "epoch": 1.5650035137034435, |
| "grad_norm": 0.5216191938955658, |
| "learning_rate": 1.1225038610311267e-06, |
| "loss": 0.1611, |
| "step": 2227 |
| }, |
| { |
| "epoch": 1.5657062543921292, |
| "grad_norm": 0.4811572260100985, |
| "learning_rate": 1.1190216143988746e-06, |
| "loss": 0.1187, |
| "step": 2228 |
| }, |
| { |
| "epoch": 1.566408995080815, |
| "grad_norm": 0.5162514568978829, |
| "learning_rate": 1.115544096786439e-06, |
| "loss": 0.1433, |
| "step": 2229 |
| }, |
| { |
| "epoch": 1.567111735769501, |
| "grad_norm": 0.5273932111449955, |
| "learning_rate": 1.112071312431216e-06, |
| "loss": 0.1548, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.567814476458187, |
| "grad_norm": 0.5118261654104636, |
| "learning_rate": 1.1086032655648377e-06, |
| "loss": 0.1519, |
| "step": 2231 |
| }, |
| { |
| "epoch": 1.5685172171468729, |
| "grad_norm": 0.5017814253766492, |
| "learning_rate": 1.1051399604131601e-06, |
| "loss": 0.1271, |
| "step": 2232 |
| }, |
| { |
| "epoch": 1.5692199578355588, |
| "grad_norm": 0.5235570064798826, |
| "learning_rate": 1.1016814011962651e-06, |
| "loss": 0.1398, |
| "step": 2233 |
| }, |
| { |
| "epoch": 1.5699226985242447, |
| "grad_norm": 0.5574642218396508, |
| "learning_rate": 1.098227592128448e-06, |
| "loss": 0.1667, |
| "step": 2234 |
| }, |
| { |
| "epoch": 1.5706254392129304, |
| "grad_norm": 0.5406231676482339, |
| "learning_rate": 1.094778537418218e-06, |
| "loss": 0.1569, |
| "step": 2235 |
| }, |
| { |
| "epoch": 1.5713281799016163, |
| "grad_norm": 0.4789049280498609, |
| "learning_rate": 1.091334241268291e-06, |
| "loss": 0.1134, |
| "step": 2236 |
| }, |
| { |
| "epoch": 1.572030920590302, |
| "grad_norm": 0.5073056731351359, |
| "learning_rate": 1.0878947078755836e-06, |
| "loss": 0.1453, |
| "step": 2237 |
| }, |
| { |
| "epoch": 1.572733661278988, |
| "grad_norm": 0.524020076875916, |
| "learning_rate": 1.08445994143121e-06, |
| "loss": 0.1408, |
| "step": 2238 |
| }, |
| { |
| "epoch": 1.5734364019676739, |
| "grad_norm": 0.5254104049874896, |
| "learning_rate": 1.0810299461204749e-06, |
| "loss": 0.1513, |
| "step": 2239 |
| }, |
| { |
| "epoch": 1.5741391426563598, |
| "grad_norm": 0.5116747289633726, |
| "learning_rate": 1.0776047261228694e-06, |
| "loss": 0.1509, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.5748418833450457, |
| "grad_norm": 0.4981477241898844, |
| "learning_rate": 1.0741842856120665e-06, |
| "loss": 0.137, |
| "step": 2241 |
| }, |
| { |
| "epoch": 1.5755446240337316, |
| "grad_norm": 0.5442648850947261, |
| "learning_rate": 1.070768628755914e-06, |
| "loss": 0.1605, |
| "step": 2242 |
| }, |
| { |
| "epoch": 1.5762473647224176, |
| "grad_norm": 0.49407957712201855, |
| "learning_rate": 1.0673577597164352e-06, |
| "loss": 0.1437, |
| "step": 2243 |
| }, |
| { |
| "epoch": 1.5769501054111033, |
| "grad_norm": 0.4948569136977495, |
| "learning_rate": 1.0639516826498125e-06, |
| "loss": 0.1373, |
| "step": 2244 |
| }, |
| { |
| "epoch": 1.5776528460997892, |
| "grad_norm": 0.5129901488204297, |
| "learning_rate": 1.0605504017063927e-06, |
| "loss": 0.1377, |
| "step": 2245 |
| }, |
| { |
| "epoch": 1.578355586788475, |
| "grad_norm": 0.4884813921465927, |
| "learning_rate": 1.0571539210306785e-06, |
| "loss": 0.1306, |
| "step": 2246 |
| }, |
| { |
| "epoch": 1.5790583274771608, |
| "grad_norm": 0.533055317053586, |
| "learning_rate": 1.0537622447613249e-06, |
| "loss": 0.1593, |
| "step": 2247 |
| }, |
| { |
| "epoch": 1.5797610681658467, |
| "grad_norm": 0.5169736612637754, |
| "learning_rate": 1.050375377031132e-06, |
| "loss": 0.1343, |
| "step": 2248 |
| }, |
| { |
| "epoch": 1.5804638088545326, |
| "grad_norm": 0.5092996391909007, |
| "learning_rate": 1.0469933219670354e-06, |
| "loss": 0.1731, |
| "step": 2249 |
| }, |
| { |
| "epoch": 1.5811665495432186, |
| "grad_norm": 0.49312300954673655, |
| "learning_rate": 1.0436160836901138e-06, |
| "loss": 0.1369, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.5818692902319045, |
| "grad_norm": 0.5143062027674279, |
| "learning_rate": 1.0402436663155736e-06, |
| "loss": 0.1464, |
| "step": 2251 |
| }, |
| { |
| "epoch": 1.5825720309205904, |
| "grad_norm": 0.4961263841708869, |
| "learning_rate": 1.0368760739527455e-06, |
| "loss": 0.1462, |
| "step": 2252 |
| }, |
| { |
| "epoch": 1.5832747716092763, |
| "grad_norm": 0.48827827334800944, |
| "learning_rate": 1.0335133107050833e-06, |
| "loss": 0.1313, |
| "step": 2253 |
| }, |
| { |
| "epoch": 1.583977512297962, |
| "grad_norm": 0.5003738034616632, |
| "learning_rate": 1.0301553806701547e-06, |
| "loss": 0.1406, |
| "step": 2254 |
| }, |
| { |
| "epoch": 1.584680252986648, |
| "grad_norm": 0.5204509488609387, |
| "learning_rate": 1.0268022879396388e-06, |
| "loss": 0.1515, |
| "step": 2255 |
| }, |
| { |
| "epoch": 1.5853829936753336, |
| "grad_norm": 0.5342613945231212, |
| "learning_rate": 1.02345403659932e-06, |
| "loss": 0.1704, |
| "step": 2256 |
| }, |
| { |
| "epoch": 1.5860857343640196, |
| "grad_norm": 0.48880885015575143, |
| "learning_rate": 1.0201106307290842e-06, |
| "loss": 0.128, |
| "step": 2257 |
| }, |
| { |
| "epoch": 1.5867884750527055, |
| "grad_norm": 0.5122958838247167, |
| "learning_rate": 1.0167720744029118e-06, |
| "loss": 0.1508, |
| "step": 2258 |
| }, |
| { |
| "epoch": 1.5874912157413914, |
| "grad_norm": 0.5071165627608565, |
| "learning_rate": 1.0134383716888752e-06, |
| "loss": 0.1417, |
| "step": 2259 |
| }, |
| { |
| "epoch": 1.5881939564300773, |
| "grad_norm": 0.5290408040911058, |
| "learning_rate": 1.0101095266491323e-06, |
| "loss": 0.1611, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.5888966971187632, |
| "grad_norm": 0.4858876066191951, |
| "learning_rate": 1.006785543339921e-06, |
| "loss": 0.132, |
| "step": 2261 |
| }, |
| { |
| "epoch": 1.5895994378074492, |
| "grad_norm": 0.5256636395563555, |
| "learning_rate": 1.0034664258115561e-06, |
| "loss": 0.1634, |
| "step": 2262 |
| }, |
| { |
| "epoch": 1.590302178496135, |
| "grad_norm": 0.4956676211873417, |
| "learning_rate": 1.0001521781084233e-06, |
| "loss": 0.1297, |
| "step": 2263 |
| }, |
| { |
| "epoch": 1.5910049191848208, |
| "grad_norm": 0.5210150179348411, |
| "learning_rate": 9.968428042689738e-07, |
| "loss": 0.1547, |
| "step": 2264 |
| }, |
| { |
| "epoch": 1.5917076598735067, |
| "grad_norm": 0.4984418786059971, |
| "learning_rate": 9.935383083257199e-07, |
| "loss": 0.1435, |
| "step": 2265 |
| }, |
| { |
| "epoch": 1.5924104005621924, |
| "grad_norm": 0.4846669041708789, |
| "learning_rate": 9.9023869430523e-07, |
| "loss": 0.1151, |
| "step": 2266 |
| }, |
| { |
| "epoch": 1.5931131412508783, |
| "grad_norm": 0.49142196234118307, |
| "learning_rate": 9.869439662281276e-07, |
| "loss": 0.1247, |
| "step": 2267 |
| }, |
| { |
| "epoch": 1.5938158819395642, |
| "grad_norm": 0.5154558430938554, |
| "learning_rate": 9.836541281090757e-07, |
| "loss": 0.1307, |
| "step": 2268 |
| }, |
| { |
| "epoch": 1.5945186226282502, |
| "grad_norm": 0.5205658154982574, |
| "learning_rate": 9.803691839567835e-07, |
| "loss": 0.1515, |
| "step": 2269 |
| }, |
| { |
| "epoch": 1.595221363316936, |
| "grad_norm": 0.5115161976082128, |
| "learning_rate": 9.77089137773995e-07, |
| "loss": 0.1324, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.595924104005622, |
| "grad_norm": 0.5078002237780612, |
| "learning_rate": 9.738139935574893e-07, |
| "loss": 0.1468, |
| "step": 2271 |
| }, |
| { |
| "epoch": 1.596626844694308, |
| "grad_norm": 0.5206735967605656, |
| "learning_rate": 9.70543755298069e-07, |
| "loss": 0.1438, |
| "step": 2272 |
| }, |
| { |
| "epoch": 1.5973295853829936, |
| "grad_norm": 0.5432801333216214, |
| "learning_rate": 9.672784269805574e-07, |
| "loss": 0.1452, |
| "step": 2273 |
| }, |
| { |
| "epoch": 1.5980323260716796, |
| "grad_norm": 0.5450686717747824, |
| "learning_rate": 9.640180125837972e-07, |
| "loss": 0.1516, |
| "step": 2274 |
| }, |
| { |
| "epoch": 1.5987350667603655, |
| "grad_norm": 0.5039764969620342, |
| "learning_rate": 9.607625160806466e-07, |
| "loss": 0.1347, |
| "step": 2275 |
| }, |
| { |
| "epoch": 1.5994378074490512, |
| "grad_norm": 0.529784097768845, |
| "learning_rate": 9.575119414379657e-07, |
| "loss": 0.1622, |
| "step": 2276 |
| }, |
| { |
| "epoch": 1.600140548137737, |
| "grad_norm": 0.5345454544692056, |
| "learning_rate": 9.542662926166207e-07, |
| "loss": 0.1663, |
| "step": 2277 |
| }, |
| { |
| "epoch": 1.600843288826423, |
| "grad_norm": 0.4825191338212087, |
| "learning_rate": 9.510255735714735e-07, |
| "loss": 0.1285, |
| "step": 2278 |
| }, |
| { |
| "epoch": 1.601546029515109, |
| "grad_norm": 0.5374064787133747, |
| "learning_rate": 9.477897882513809e-07, |
| "loss": 0.156, |
| "step": 2279 |
| }, |
| { |
| "epoch": 1.6022487702037949, |
| "grad_norm": 0.5394896703211097, |
| "learning_rate": 9.445589405991862e-07, |
| "loss": 0.141, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.6029515108924808, |
| "grad_norm": 0.5074659761765667, |
| "learning_rate": 9.413330345517174e-07, |
| "loss": 0.1397, |
| "step": 2281 |
| }, |
| { |
| "epoch": 1.6036542515811667, |
| "grad_norm": 0.5542221038432015, |
| "learning_rate": 9.381120740397809e-07, |
| "loss": 0.1411, |
| "step": 2282 |
| }, |
| { |
| "epoch": 1.6043569922698524, |
| "grad_norm": 0.5317086337250758, |
| "learning_rate": 9.34896062988156e-07, |
| "loss": 0.17, |
| "step": 2283 |
| }, |
| { |
| "epoch": 1.6050597329585383, |
| "grad_norm": 0.5079594090701999, |
| "learning_rate": 9.316850053155923e-07, |
| "loss": 0.1335, |
| "step": 2284 |
| }, |
| { |
| "epoch": 1.605762473647224, |
| "grad_norm": 0.5197004214997037, |
| "learning_rate": 9.284789049348025e-07, |
| "loss": 0.1536, |
| "step": 2285 |
| }, |
| { |
| "epoch": 1.60646521433591, |
| "grad_norm": 0.5069837633405543, |
| "learning_rate": 9.252777657524598e-07, |
| "loss": 0.1351, |
| "step": 2286 |
| }, |
| { |
| "epoch": 1.6071679550245959, |
| "grad_norm": 0.5128782973377883, |
| "learning_rate": 9.220815916691911e-07, |
| "loss": 0.1524, |
| "step": 2287 |
| }, |
| { |
| "epoch": 1.6078706957132818, |
| "grad_norm": 0.5072606579831977, |
| "learning_rate": 9.18890386579574e-07, |
| "loss": 0.1364, |
| "step": 2288 |
| }, |
| { |
| "epoch": 1.6085734364019677, |
| "grad_norm": 0.5138978253319006, |
| "learning_rate": 9.157041543721307e-07, |
| "loss": 0.1506, |
| "step": 2289 |
| }, |
| { |
| "epoch": 1.6092761770906536, |
| "grad_norm": 0.5092956850533811, |
| "learning_rate": 9.125228989293234e-07, |
| "loss": 0.1452, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.6099789177793395, |
| "grad_norm": 0.5244667600954808, |
| "learning_rate": 9.093466241275551e-07, |
| "loss": 0.1718, |
| "step": 2291 |
| }, |
| { |
| "epoch": 1.6106816584680252, |
| "grad_norm": 0.5002388023948965, |
| "learning_rate": 9.061753338371509e-07, |
| "loss": 0.1375, |
| "step": 2292 |
| }, |
| { |
| "epoch": 1.6113843991567112, |
| "grad_norm": 0.5213143982929758, |
| "learning_rate": 9.030090319223689e-07, |
| "loss": 0.1458, |
| "step": 2293 |
| }, |
| { |
| "epoch": 1.612087139845397, |
| "grad_norm": 0.5217943373407234, |
| "learning_rate": 8.998477222413854e-07, |
| "loss": 0.1505, |
| "step": 2294 |
| }, |
| { |
| "epoch": 1.6127898805340828, |
| "grad_norm": 0.5210015888671756, |
| "learning_rate": 8.96691408646298e-07, |
| "loss": 0.1596, |
| "step": 2295 |
| }, |
| { |
| "epoch": 1.6134926212227687, |
| "grad_norm": 0.5113567748842857, |
| "learning_rate": 8.935400949831125e-07, |
| "loss": 0.1494, |
| "step": 2296 |
| }, |
| { |
| "epoch": 1.6141953619114546, |
| "grad_norm": 0.49175212721895384, |
| "learning_rate": 8.903937850917421e-07, |
| "loss": 0.1416, |
| "step": 2297 |
| }, |
| { |
| "epoch": 1.6148981026001406, |
| "grad_norm": 0.527821795899354, |
| "learning_rate": 8.87252482806003e-07, |
| "loss": 0.1558, |
| "step": 2298 |
| }, |
| { |
| "epoch": 1.6156008432888265, |
| "grad_norm": 0.5029335877582335, |
| "learning_rate": 8.841161919536134e-07, |
| "loss": 0.1403, |
| "step": 2299 |
| }, |
| { |
| "epoch": 1.6163035839775124, |
| "grad_norm": 0.48684100110647555, |
| "learning_rate": 8.809849163561812e-07, |
| "loss": 0.1321, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.6170063246661983, |
| "grad_norm": 0.49550472782171096, |
| "learning_rate": 8.778586598292055e-07, |
| "loss": 0.1307, |
| "step": 2301 |
| }, |
| { |
| "epoch": 1.617709065354884, |
| "grad_norm": 0.5096028642567304, |
| "learning_rate": 8.74737426182064e-07, |
| "loss": 0.1487, |
| "step": 2302 |
| }, |
| { |
| "epoch": 1.61841180604357, |
| "grad_norm": 0.5234130152806513, |
| "learning_rate": 8.716212192180223e-07, |
| "loss": 0.146, |
| "step": 2303 |
| }, |
| { |
| "epoch": 1.6191145467322556, |
| "grad_norm": 0.5320717266437079, |
| "learning_rate": 8.685100427342153e-07, |
| "loss": 0.1454, |
| "step": 2304 |
| }, |
| { |
| "epoch": 1.6198172874209416, |
| "grad_norm": 0.5079954187403964, |
| "learning_rate": 8.654039005216503e-07, |
| "loss": 0.14, |
| "step": 2305 |
| }, |
| { |
| "epoch": 1.6205200281096275, |
| "grad_norm": 0.5057831303192136, |
| "learning_rate": 8.623027963651998e-07, |
| "loss": 0.1419, |
| "step": 2306 |
| }, |
| { |
| "epoch": 1.6212227687983134, |
| "grad_norm": 0.5204770292694081, |
| "learning_rate": 8.592067340435961e-07, |
| "loss": 0.1428, |
| "step": 2307 |
| }, |
| { |
| "epoch": 1.6219255094869993, |
| "grad_norm": 0.5239518237454117, |
| "learning_rate": 8.561157173294305e-07, |
| "loss": 0.1466, |
| "step": 2308 |
| }, |
| { |
| "epoch": 1.6226282501756852, |
| "grad_norm": 0.4840991520517137, |
| "learning_rate": 8.530297499891444e-07, |
| "loss": 0.133, |
| "step": 2309 |
| }, |
| { |
| "epoch": 1.6233309908643712, |
| "grad_norm": 0.5273419965943936, |
| "learning_rate": 8.499488357830266e-07, |
| "loss": 0.1432, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.624033731553057, |
| "grad_norm": 0.5175758840691818, |
| "learning_rate": 8.468729784652091e-07, |
| "loss": 0.1491, |
| "step": 2311 |
| }, |
| { |
| "epoch": 1.6247364722417428, |
| "grad_norm": 0.47457090160700066, |
| "learning_rate": 8.438021817836617e-07, |
| "loss": 0.115, |
| "step": 2312 |
| }, |
| { |
| "epoch": 1.6254392129304287, |
| "grad_norm": 0.5277798554783621, |
| "learning_rate": 8.407364494801879e-07, |
| "loss": 0.1762, |
| "step": 2313 |
| }, |
| { |
| "epoch": 1.6261419536191144, |
| "grad_norm": 0.5300295294715935, |
| "learning_rate": 8.376757852904194e-07, |
| "loss": 0.1566, |
| "step": 2314 |
| }, |
| { |
| "epoch": 1.6268446943078003, |
| "grad_norm": 0.527024097049886, |
| "learning_rate": 8.346201929438158e-07, |
| "loss": 0.1508, |
| "step": 2315 |
| }, |
| { |
| "epoch": 1.6275474349964862, |
| "grad_norm": 0.5173928269861602, |
| "learning_rate": 8.31569676163651e-07, |
| "loss": 0.143, |
| "step": 2316 |
| }, |
| { |
| "epoch": 1.6282501756851722, |
| "grad_norm": 0.4880270080993743, |
| "learning_rate": 8.285242386670178e-07, |
| "loss": 0.1266, |
| "step": 2317 |
| }, |
| { |
| "epoch": 1.628952916373858, |
| "grad_norm": 0.4905342764166846, |
| "learning_rate": 8.254838841648188e-07, |
| "loss": 0.1298, |
| "step": 2318 |
| }, |
| { |
| "epoch": 1.629655657062544, |
| "grad_norm": 0.5292812716220816, |
| "learning_rate": 8.224486163617651e-07, |
| "loss": 0.1553, |
| "step": 2319 |
| }, |
| { |
| "epoch": 1.63035839775123, |
| "grad_norm": 0.5375650954209948, |
| "learning_rate": 8.194184389563681e-07, |
| "loss": 0.152, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.6310611384399156, |
| "grad_norm": 0.5249899005937064, |
| "learning_rate": 8.163933556409332e-07, |
| "loss": 0.1655, |
| "step": 2321 |
| }, |
| { |
| "epoch": 1.6317638791286015, |
| "grad_norm": 0.5008579174864624, |
| "learning_rate": 8.133733701015623e-07, |
| "loss": 0.1448, |
| "step": 2322 |
| }, |
| { |
| "epoch": 1.6324666198172875, |
| "grad_norm": 0.5275812524416689, |
| "learning_rate": 8.103584860181468e-07, |
| "loss": 0.1427, |
| "step": 2323 |
| }, |
| { |
| "epoch": 1.6331693605059732, |
| "grad_norm": 0.5208239793531394, |
| "learning_rate": 8.073487070643588e-07, |
| "loss": 0.1431, |
| "step": 2324 |
| }, |
| { |
| "epoch": 1.633872101194659, |
| "grad_norm": 0.5097900629432118, |
| "learning_rate": 8.043440369076522e-07, |
| "loss": 0.1458, |
| "step": 2325 |
| }, |
| { |
| "epoch": 1.634574841883345, |
| "grad_norm": 0.5095005341957477, |
| "learning_rate": 8.013444792092506e-07, |
| "loss": 0.1449, |
| "step": 2326 |
| }, |
| { |
| "epoch": 1.635277582572031, |
| "grad_norm": 0.5345229753716869, |
| "learning_rate": 7.98350037624156e-07, |
| "loss": 0.1649, |
| "step": 2327 |
| }, |
| { |
| "epoch": 1.6359803232607169, |
| "grad_norm": 0.4803018361460086, |
| "learning_rate": 7.953607158011311e-07, |
| "loss": 0.1346, |
| "step": 2328 |
| }, |
| { |
| "epoch": 1.6366830639494028, |
| "grad_norm": 0.5256630721228922, |
| "learning_rate": 7.923765173827003e-07, |
| "loss": 0.1471, |
| "step": 2329 |
| }, |
| { |
| "epoch": 1.6373858046380887, |
| "grad_norm": 0.5239766646899795, |
| "learning_rate": 7.893974460051474e-07, |
| "loss": 0.1599, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.6380885453267744, |
| "grad_norm": 0.4619956572855626, |
| "learning_rate": 7.864235052985059e-07, |
| "loss": 0.1119, |
| "step": 2331 |
| }, |
| { |
| "epoch": 1.6387912860154603, |
| "grad_norm": 0.48974247545807786, |
| "learning_rate": 7.834546988865605e-07, |
| "loss": 0.1343, |
| "step": 2332 |
| }, |
| { |
| "epoch": 1.639494026704146, |
| "grad_norm": 0.5059755421676134, |
| "learning_rate": 7.804910303868374e-07, |
| "loss": 0.1504, |
| "step": 2333 |
| }, |
| { |
| "epoch": 1.640196767392832, |
| "grad_norm": 0.506983971344678, |
| "learning_rate": 7.775325034106024e-07, |
| "loss": 0.1425, |
| "step": 2334 |
| }, |
| { |
| "epoch": 1.6408995080815179, |
| "grad_norm": 0.5094355686094365, |
| "learning_rate": 7.745791215628596e-07, |
| "loss": 0.1527, |
| "step": 2335 |
| }, |
| { |
| "epoch": 1.6416022487702038, |
| "grad_norm": 0.505821708619955, |
| "learning_rate": 7.716308884423385e-07, |
| "loss": 0.1572, |
| "step": 2336 |
| }, |
| { |
| "epoch": 1.6423049894588897, |
| "grad_norm": 0.4875178197686803, |
| "learning_rate": 7.686878076414984e-07, |
| "loss": 0.1275, |
| "step": 2337 |
| }, |
| { |
| "epoch": 1.6430077301475756, |
| "grad_norm": 0.5082564428895446, |
| "learning_rate": 7.657498827465176e-07, |
| "loss": 0.1449, |
| "step": 2338 |
| }, |
| { |
| "epoch": 1.6437104708362615, |
| "grad_norm": 0.5153668179679097, |
| "learning_rate": 7.628171173372973e-07, |
| "loss": 0.1621, |
| "step": 2339 |
| }, |
| { |
| "epoch": 1.6444132115249475, |
| "grad_norm": 0.555828918714009, |
| "learning_rate": 7.598895149874453e-07, |
| "loss": 0.1525, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.6451159522136332, |
| "grad_norm": 0.47107537304470376, |
| "learning_rate": 7.569670792642819e-07, |
| "loss": 0.1132, |
| "step": 2341 |
| }, |
| { |
| "epoch": 1.645818692902319, |
| "grad_norm": 0.4916614358769194, |
| "learning_rate": 7.540498137288294e-07, |
| "loss": 0.1228, |
| "step": 2342 |
| }, |
| { |
| "epoch": 1.6465214335910048, |
| "grad_norm": 0.5035749634030029, |
| "learning_rate": 7.51137721935814e-07, |
| "loss": 0.1423, |
| "step": 2343 |
| }, |
| { |
| "epoch": 1.6472241742796907, |
| "grad_norm": 0.5296296563991698, |
| "learning_rate": 7.482308074336558e-07, |
| "loss": 0.1652, |
| "step": 2344 |
| }, |
| { |
| "epoch": 1.6479269149683766, |
| "grad_norm": 0.6096326429096276, |
| "learning_rate": 7.453290737644631e-07, |
| "loss": 0.1345, |
| "step": 2345 |
| }, |
| { |
| "epoch": 1.6486296556570625, |
| "grad_norm": 0.5071869408599778, |
| "learning_rate": 7.42432524464034e-07, |
| "loss": 0.1324, |
| "step": 2346 |
| }, |
| { |
| "epoch": 1.6493323963457485, |
| "grad_norm": 0.5387024296544436, |
| "learning_rate": 7.39541163061852e-07, |
| "loss": 0.1526, |
| "step": 2347 |
| }, |
| { |
| "epoch": 1.6500351370344344, |
| "grad_norm": 0.480203002294713, |
| "learning_rate": 7.366549930810751e-07, |
| "loss": 0.1286, |
| "step": 2348 |
| }, |
| { |
| "epoch": 1.6507378777231203, |
| "grad_norm": 0.49344298898379824, |
| "learning_rate": 7.337740180385384e-07, |
| "loss": 0.1308, |
| "step": 2349 |
| }, |
| { |
| "epoch": 1.651440618411806, |
| "grad_norm": 0.49175543942265804, |
| "learning_rate": 7.308982414447407e-07, |
| "loss": 0.1361, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.652143359100492, |
| "grad_norm": 0.4909661959594217, |
| "learning_rate": 7.28027666803856e-07, |
| "loss": 0.1137, |
| "step": 2351 |
| }, |
| { |
| "epoch": 1.6528460997891778, |
| "grad_norm": 0.5162636490307603, |
| "learning_rate": 7.251622976137129e-07, |
| "loss": 0.1422, |
| "step": 2352 |
| }, |
| { |
| "epoch": 1.6535488404778635, |
| "grad_norm": 0.5090125943506136, |
| "learning_rate": 7.22302137365799e-07, |
| "loss": 0.1496, |
| "step": 2353 |
| }, |
| { |
| "epoch": 1.6542515811665495, |
| "grad_norm": 0.5116383303271588, |
| "learning_rate": 7.194471895452548e-07, |
| "loss": 0.133, |
| "step": 2354 |
| }, |
| { |
| "epoch": 1.6549543218552354, |
| "grad_norm": 0.493822635650948, |
| "learning_rate": 7.165974576308693e-07, |
| "loss": 0.1405, |
| "step": 2355 |
| }, |
| { |
| "epoch": 1.6556570625439213, |
| "grad_norm": 0.4905807719886315, |
| "learning_rate": 7.137529450950759e-07, |
| "loss": 0.122, |
| "step": 2356 |
| }, |
| { |
| "epoch": 1.6563598032326072, |
| "grad_norm": 0.5004371986255278, |
| "learning_rate": 7.109136554039475e-07, |
| "loss": 0.1529, |
| "step": 2357 |
| }, |
| { |
| "epoch": 1.6570625439212932, |
| "grad_norm": 0.5468228810965717, |
| "learning_rate": 7.080795920171934e-07, |
| "loss": 0.1618, |
| "step": 2358 |
| }, |
| { |
| "epoch": 1.657765284609979, |
| "grad_norm": 0.5425018208720173, |
| "learning_rate": 7.052507583881557e-07, |
| "loss": 0.1596, |
| "step": 2359 |
| }, |
| { |
| "epoch": 1.6584680252986648, |
| "grad_norm": 0.5031612125939998, |
| "learning_rate": 7.02427157963802e-07, |
| "loss": 0.1458, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.6591707659873507, |
| "grad_norm": 0.512872070538685, |
| "learning_rate": 6.996087941847246e-07, |
| "loss": 0.1461, |
| "step": 2361 |
| }, |
| { |
| "epoch": 1.6598735066760364, |
| "grad_norm": 0.5174019394695357, |
| "learning_rate": 6.96795670485133e-07, |
| "loss": 0.1611, |
| "step": 2362 |
| }, |
| { |
| "epoch": 1.6605762473647223, |
| "grad_norm": 0.5004973374301548, |
| "learning_rate": 6.93987790292856e-07, |
| "loss": 0.1278, |
| "step": 2363 |
| }, |
| { |
| "epoch": 1.6612789880534082, |
| "grad_norm": 0.5377978395806923, |
| "learning_rate": 6.911851570293271e-07, |
| "loss": 0.1579, |
| "step": 2364 |
| }, |
| { |
| "epoch": 1.6619817287420942, |
| "grad_norm": 0.5412554742787239, |
| "learning_rate": 6.883877741095907e-07, |
| "loss": 0.153, |
| "step": 2365 |
| }, |
| { |
| "epoch": 1.66268446943078, |
| "grad_norm": 0.5332035715220973, |
| "learning_rate": 6.855956449422907e-07, |
| "loss": 0.1519, |
| "step": 2366 |
| }, |
| { |
| "epoch": 1.663387210119466, |
| "grad_norm": 0.5070254553090329, |
| "learning_rate": 6.828087729296734e-07, |
| "loss": 0.1268, |
| "step": 2367 |
| }, |
| { |
| "epoch": 1.664089950808152, |
| "grad_norm": 0.5147766086337985, |
| "learning_rate": 6.800271614675763e-07, |
| "loss": 0.1327, |
| "step": 2368 |
| }, |
| { |
| "epoch": 1.6647926914968376, |
| "grad_norm": 0.4794259826824323, |
| "learning_rate": 6.772508139454248e-07, |
| "loss": 0.128, |
| "step": 2369 |
| }, |
| { |
| "epoch": 1.6654954321855235, |
| "grad_norm": 0.5002922450925902, |
| "learning_rate": 6.744797337462322e-07, |
| "loss": 0.1335, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.6661981728742095, |
| "grad_norm": 0.5269102344516572, |
| "learning_rate": 6.717139242465965e-07, |
| "loss": 0.15, |
| "step": 2371 |
| }, |
| { |
| "epoch": 1.6669009135628952, |
| "grad_norm": 0.46645795900459636, |
| "learning_rate": 6.689533888166893e-07, |
| "loss": 0.1229, |
| "step": 2372 |
| }, |
| { |
| "epoch": 1.667603654251581, |
| "grad_norm": 0.5598120255378365, |
| "learning_rate": 6.661981308202581e-07, |
| "loss": 0.1814, |
| "step": 2373 |
| }, |
| { |
| "epoch": 1.668306394940267, |
| "grad_norm": 0.49918107426335867, |
| "learning_rate": 6.634481536146153e-07, |
| "loss": 0.1269, |
| "step": 2374 |
| }, |
| { |
| "epoch": 1.669009135628953, |
| "grad_norm": 0.4895121066286522, |
| "learning_rate": 6.607034605506451e-07, |
| "loss": 0.1322, |
| "step": 2375 |
| }, |
| { |
| "epoch": 1.6697118763176388, |
| "grad_norm": 0.520883377380827, |
| "learning_rate": 6.579640549727884e-07, |
| "loss": 0.1423, |
| "step": 2376 |
| }, |
| { |
| "epoch": 1.6704146170063248, |
| "grad_norm": 0.4975352282908073, |
| "learning_rate": 6.552299402190443e-07, |
| "loss": 0.1264, |
| "step": 2377 |
| }, |
| { |
| "epoch": 1.6711173576950107, |
| "grad_norm": 0.48098684498429956, |
| "learning_rate": 6.525011196209657e-07, |
| "loss": 0.1231, |
| "step": 2378 |
| }, |
| { |
| "epoch": 1.6718200983836964, |
| "grad_norm": 0.5165363758513422, |
| "learning_rate": 6.497775965036545e-07, |
| "loss": 0.1482, |
| "step": 2379 |
| }, |
| { |
| "epoch": 1.6725228390723823, |
| "grad_norm": 0.5114743479853605, |
| "learning_rate": 6.470593741857562e-07, |
| "loss": 0.1432, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.673225579761068, |
| "grad_norm": 0.5273654787510778, |
| "learning_rate": 6.443464559794583e-07, |
| "loss": 0.1649, |
| "step": 2381 |
| }, |
| { |
| "epoch": 1.673928320449754, |
| "grad_norm": 0.5159148551260601, |
| "learning_rate": 6.416388451904848e-07, |
| "loss": 0.1557, |
| "step": 2382 |
| }, |
| { |
| "epoch": 1.6746310611384398, |
| "grad_norm": 0.5191454473362067, |
| "learning_rate": 6.389365451180928e-07, |
| "loss": 0.1477, |
| "step": 2383 |
| }, |
| { |
| "epoch": 1.6753338018271258, |
| "grad_norm": 0.5687120624192377, |
| "learning_rate": 6.362395590550685e-07, |
| "loss": 0.1482, |
| "step": 2384 |
| }, |
| { |
| "epoch": 1.6760365425158117, |
| "grad_norm": 0.526467280906606, |
| "learning_rate": 6.335478902877218e-07, |
| "loss": 0.1507, |
| "step": 2385 |
| }, |
| { |
| "epoch": 1.6767392832044976, |
| "grad_norm": 0.49784562198830185, |
| "learning_rate": 6.308615420958847e-07, |
| "loss": 0.1236, |
| "step": 2386 |
| }, |
| { |
| "epoch": 1.6774420238931835, |
| "grad_norm": 0.5021605236664165, |
| "learning_rate": 6.281805177529055e-07, |
| "loss": 0.1364, |
| "step": 2387 |
| }, |
| { |
| "epoch": 1.6781447645818695, |
| "grad_norm": 0.5433621471341872, |
| "learning_rate": 6.255048205256447e-07, |
| "loss": 0.1545, |
| "step": 2388 |
| }, |
| { |
| "epoch": 1.6788475052705552, |
| "grad_norm": 0.5086447206750199, |
| "learning_rate": 6.228344536744735e-07, |
| "loss": 0.1427, |
| "step": 2389 |
| }, |
| { |
| "epoch": 1.679550245959241, |
| "grad_norm": 0.488672149468298, |
| "learning_rate": 6.201694204532638e-07, |
| "loss": 0.1362, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.6802529866479268, |
| "grad_norm": 0.5015968183374993, |
| "learning_rate": 6.175097241093947e-07, |
| "loss": 0.1191, |
| "step": 2391 |
| }, |
| { |
| "epoch": 1.6809557273366127, |
| "grad_norm": 0.507982140467693, |
| "learning_rate": 6.148553678837388e-07, |
| "loss": 0.1361, |
| "step": 2392 |
| }, |
| { |
| "epoch": 1.6816584680252986, |
| "grad_norm": 0.543812531672844, |
| "learning_rate": 6.122063550106594e-07, |
| "loss": 0.1635, |
| "step": 2393 |
| }, |
| { |
| "epoch": 1.6823612087139845, |
| "grad_norm": 0.5051667241659186, |
| "learning_rate": 6.095626887180106e-07, |
| "loss": 0.1436, |
| "step": 2394 |
| }, |
| { |
| "epoch": 1.6830639494026705, |
| "grad_norm": 0.5190112740587888, |
| "learning_rate": 6.06924372227135e-07, |
| "loss": 0.1609, |
| "step": 2395 |
| }, |
| { |
| "epoch": 1.6837666900913564, |
| "grad_norm": 0.5012651989538832, |
| "learning_rate": 6.042914087528529e-07, |
| "loss": 0.1466, |
| "step": 2396 |
| }, |
| { |
| "epoch": 1.6844694307800423, |
| "grad_norm": 0.5342634253220491, |
| "learning_rate": 6.016638015034631e-07, |
| "loss": 0.1531, |
| "step": 2397 |
| }, |
| { |
| "epoch": 1.685172171468728, |
| "grad_norm": 0.5104916701635575, |
| "learning_rate": 5.990415536807348e-07, |
| "loss": 0.1368, |
| "step": 2398 |
| }, |
| { |
| "epoch": 1.685874912157414, |
| "grad_norm": 0.5402576298863184, |
| "learning_rate": 5.964246684799113e-07, |
| "loss": 0.1463, |
| "step": 2399 |
| }, |
| { |
| "epoch": 1.6865776528460998, |
| "grad_norm": 0.49478922563864103, |
| "learning_rate": 5.938131490896992e-07, |
| "loss": 0.132, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.6872803935347855, |
| "grad_norm": 0.4848426418999095, |
| "learning_rate": 5.912069986922664e-07, |
| "loss": 0.1284, |
| "step": 2401 |
| }, |
| { |
| "epoch": 1.6879831342234715, |
| "grad_norm": 0.5201410319194085, |
| "learning_rate": 5.886062204632392e-07, |
| "loss": 0.1666, |
| "step": 2402 |
| }, |
| { |
| "epoch": 1.6886858749121574, |
| "grad_norm": 0.48047980705813403, |
| "learning_rate": 5.860108175716983e-07, |
| "loss": 0.1295, |
| "step": 2403 |
| }, |
| { |
| "epoch": 1.6893886156008433, |
| "grad_norm": 0.4922821220869354, |
| "learning_rate": 5.834207931801733e-07, |
| "loss": 0.1335, |
| "step": 2404 |
| }, |
| { |
| "epoch": 1.6900913562895292, |
| "grad_norm": 0.4867400451453754, |
| "learning_rate": 5.808361504446413e-07, |
| "loss": 0.1362, |
| "step": 2405 |
| }, |
| { |
| "epoch": 1.6907940969782151, |
| "grad_norm": 0.49058499289490437, |
| "learning_rate": 5.78256892514521e-07, |
| "loss": 0.1328, |
| "step": 2406 |
| }, |
| { |
| "epoch": 1.691496837666901, |
| "grad_norm": 0.5098413704188566, |
| "learning_rate": 5.756830225326692e-07, |
| "loss": 0.1331, |
| "step": 2407 |
| }, |
| { |
| "epoch": 1.6921995783555868, |
| "grad_norm": 0.4927926778545796, |
| "learning_rate": 5.731145436353796e-07, |
| "loss": 0.1393, |
| "step": 2408 |
| }, |
| { |
| "epoch": 1.6929023190442727, |
| "grad_norm": 0.5310607286670721, |
| "learning_rate": 5.705514589523742e-07, |
| "loss": 0.1601, |
| "step": 2409 |
| }, |
| { |
| "epoch": 1.6936050597329584, |
| "grad_norm": 0.5102362802375777, |
| "learning_rate": 5.679937716068029e-07, |
| "loss": 0.1269, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.6943078004216443, |
| "grad_norm": 0.4963369484791542, |
| "learning_rate": 5.654414847152401e-07, |
| "loss": 0.1266, |
| "step": 2411 |
| }, |
| { |
| "epoch": 1.6950105411103302, |
| "grad_norm": 0.4979253801389448, |
| "learning_rate": 5.628946013876779e-07, |
| "loss": 0.1245, |
| "step": 2412 |
| }, |
| { |
| "epoch": 1.6957132817990161, |
| "grad_norm": 0.4706047744648595, |
| "learning_rate": 5.603531247275251e-07, |
| "loss": 0.1295, |
| "step": 2413 |
| }, |
| { |
| "epoch": 1.696416022487702, |
| "grad_norm": 0.5215378117236438, |
| "learning_rate": 5.578170578316017e-07, |
| "loss": 0.1449, |
| "step": 2414 |
| }, |
| { |
| "epoch": 1.697118763176388, |
| "grad_norm": 0.5105605056053008, |
| "learning_rate": 5.552864037901379e-07, |
| "loss": 0.1613, |
| "step": 2415 |
| }, |
| { |
| "epoch": 1.697821503865074, |
| "grad_norm": 0.5064441659619855, |
| "learning_rate": 5.527611656867666e-07, |
| "loss": 0.1441, |
| "step": 2416 |
| }, |
| { |
| "epoch": 1.6985242445537596, |
| "grad_norm": 0.5075552772873742, |
| "learning_rate": 5.502413465985196e-07, |
| "loss": 0.1528, |
| "step": 2417 |
| }, |
| { |
| "epoch": 1.6992269852424455, |
| "grad_norm": 0.4746001706879934, |
| "learning_rate": 5.477269495958276e-07, |
| "loss": 0.1174, |
| "step": 2418 |
| }, |
| { |
| "epoch": 1.6999297259311315, |
| "grad_norm": 0.5052458891568644, |
| "learning_rate": 5.452179777425159e-07, |
| "loss": 0.1379, |
| "step": 2419 |
| }, |
| { |
| "epoch": 1.7006324666198172, |
| "grad_norm": 0.5213509493117172, |
| "learning_rate": 5.427144340957968e-07, |
| "loss": 0.146, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.701335207308503, |
| "grad_norm": 0.5330086349381643, |
| "learning_rate": 5.402163217062695e-07, |
| "loss": 0.158, |
| "step": 2421 |
| }, |
| { |
| "epoch": 1.702037947997189, |
| "grad_norm": 0.5214808636163243, |
| "learning_rate": 5.377236436179123e-07, |
| "loss": 0.1591, |
| "step": 2422 |
| }, |
| { |
| "epoch": 1.702740688685875, |
| "grad_norm": 0.5093912142497137, |
| "learning_rate": 5.352364028680868e-07, |
| "loss": 0.1124, |
| "step": 2423 |
| }, |
| { |
| "epoch": 1.7034434293745608, |
| "grad_norm": 0.5016904016774145, |
| "learning_rate": 5.327546024875252e-07, |
| "loss": 0.1292, |
| "step": 2424 |
| }, |
| { |
| "epoch": 1.7041461700632468, |
| "grad_norm": 0.5246425921614887, |
| "learning_rate": 5.302782455003313e-07, |
| "loss": 0.1581, |
| "step": 2425 |
| }, |
| { |
| "epoch": 1.7048489107519327, |
| "grad_norm": 0.508729816721276, |
| "learning_rate": 5.278073349239776e-07, |
| "loss": 0.148, |
| "step": 2426 |
| }, |
| { |
| "epoch": 1.7055516514406184, |
| "grad_norm": 0.4953460594490315, |
| "learning_rate": 5.253418737692983e-07, |
| "loss": 0.1222, |
| "step": 2427 |
| }, |
| { |
| "epoch": 1.7062543921293043, |
| "grad_norm": 0.46518839809506507, |
| "learning_rate": 5.228818650404883e-07, |
| "loss": 0.1167, |
| "step": 2428 |
| }, |
| { |
| "epoch": 1.70695713281799, |
| "grad_norm": 0.5300380812375458, |
| "learning_rate": 5.204273117350983e-07, |
| "loss": 0.1638, |
| "step": 2429 |
| }, |
| { |
| "epoch": 1.707659873506676, |
| "grad_norm": 0.5160670771529812, |
| "learning_rate": 5.179782168440317e-07, |
| "loss": 0.1467, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.7083626141953618, |
| "grad_norm": 0.5078286556518824, |
| "learning_rate": 5.155345833515408e-07, |
| "loss": 0.1471, |
| "step": 2431 |
| }, |
| { |
| "epoch": 1.7090653548840478, |
| "grad_norm": 0.5213953654428531, |
| "learning_rate": 5.130964142352223e-07, |
| "loss": 0.1425, |
| "step": 2432 |
| }, |
| { |
| "epoch": 1.7097680955727337, |
| "grad_norm": 0.4912589892481834, |
| "learning_rate": 5.106637124660164e-07, |
| "loss": 0.1303, |
| "step": 2433 |
| }, |
| { |
| "epoch": 1.7104708362614196, |
| "grad_norm": 0.5098794137430794, |
| "learning_rate": 5.082364810081991e-07, |
| "loss": 0.1472, |
| "step": 2434 |
| }, |
| { |
| "epoch": 1.7111735769501055, |
| "grad_norm": 0.5097065734170783, |
| "learning_rate": 5.058147228193828e-07, |
| "loss": 0.1432, |
| "step": 2435 |
| }, |
| { |
| "epoch": 1.7118763176387914, |
| "grad_norm": 0.5197966266223981, |
| "learning_rate": 5.033984408505083e-07, |
| "loss": 0.1565, |
| "step": 2436 |
| }, |
| { |
| "epoch": 1.7125790583274771, |
| "grad_norm": 0.5395624881326725, |
| "learning_rate": 5.00987638045845e-07, |
| "loss": 0.1627, |
| "step": 2437 |
| }, |
| { |
| "epoch": 1.713281799016163, |
| "grad_norm": 0.5050550106778823, |
| "learning_rate": 4.985823173429871e-07, |
| "loss": 0.1364, |
| "step": 2438 |
| }, |
| { |
| "epoch": 1.7139845397048488, |
| "grad_norm": 0.5133659277558639, |
| "learning_rate": 4.96182481672845e-07, |
| "loss": 0.168, |
| "step": 2439 |
| }, |
| { |
| "epoch": 1.7146872803935347, |
| "grad_norm": 0.4957226425404724, |
| "learning_rate": 4.937881339596518e-07, |
| "loss": 0.1354, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.7153900210822206, |
| "grad_norm": 0.5038820359283196, |
| "learning_rate": 4.913992771209458e-07, |
| "loss": 0.1569, |
| "step": 2441 |
| }, |
| { |
| "epoch": 1.7160927617709065, |
| "grad_norm": 0.4646110309365998, |
| "learning_rate": 4.890159140675787e-07, |
| "loss": 0.1187, |
| "step": 2442 |
| }, |
| { |
| "epoch": 1.7167955024595924, |
| "grad_norm": 0.5124760939995081, |
| "learning_rate": 4.866380477037097e-07, |
| "loss": 0.1672, |
| "step": 2443 |
| }, |
| { |
| "epoch": 1.7174982431482784, |
| "grad_norm": 0.5222516788464301, |
| "learning_rate": 4.842656809267976e-07, |
| "loss": 0.1507, |
| "step": 2444 |
| }, |
| { |
| "epoch": 1.7182009838369643, |
| "grad_norm": 0.4985368526066697, |
| "learning_rate": 4.818988166276006e-07, |
| "loss": 0.1506, |
| "step": 2445 |
| }, |
| { |
| "epoch": 1.71890372452565, |
| "grad_norm": 0.449723651901996, |
| "learning_rate": 4.795374576901696e-07, |
| "loss": 0.1125, |
| "step": 2446 |
| }, |
| { |
| "epoch": 1.719606465214336, |
| "grad_norm": 0.5178151029260145, |
| "learning_rate": 4.771816069918522e-07, |
| "loss": 0.1497, |
| "step": 2447 |
| }, |
| { |
| "epoch": 1.7203092059030218, |
| "grad_norm": 0.45657534578624065, |
| "learning_rate": 4.7483126740328013e-07, |
| "loss": 0.1233, |
| "step": 2448 |
| }, |
| { |
| "epoch": 1.7210119465917075, |
| "grad_norm": 0.5025267713102743, |
| "learning_rate": 4.7248644178837176e-07, |
| "loss": 0.1345, |
| "step": 2449 |
| }, |
| { |
| "epoch": 1.7217146872803935, |
| "grad_norm": 0.5151146709386307, |
| "learning_rate": 4.7014713300432504e-07, |
| "loss": 0.1535, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.7224174279690794, |
| "grad_norm": 0.5209287261898038, |
| "learning_rate": 4.6781334390161745e-07, |
| "loss": 0.1431, |
| "step": 2451 |
| }, |
| { |
| "epoch": 1.7231201686577653, |
| "grad_norm": 0.5120122620271434, |
| "learning_rate": 4.6548507732399826e-07, |
| "loss": 0.1287, |
| "step": 2452 |
| }, |
| { |
| "epoch": 1.7238229093464512, |
| "grad_norm": 0.46997281974094524, |
| "learning_rate": 4.631623361084903e-07, |
| "loss": 0.1254, |
| "step": 2453 |
| }, |
| { |
| "epoch": 1.7245256500351371, |
| "grad_norm": 0.5192006381261042, |
| "learning_rate": 4.6084512308538165e-07, |
| "loss": 0.1562, |
| "step": 2454 |
| }, |
| { |
| "epoch": 1.725228390723823, |
| "grad_norm": 0.5008623260500528, |
| "learning_rate": 4.585334410782244e-07, |
| "loss": 0.1263, |
| "step": 2455 |
| }, |
| { |
| "epoch": 1.7259311314125088, |
| "grad_norm": 0.5143719878493999, |
| "learning_rate": 4.562272929038325e-07, |
| "loss": 0.1552, |
| "step": 2456 |
| }, |
| { |
| "epoch": 1.7266338721011947, |
| "grad_norm": 0.548662826223495, |
| "learning_rate": 4.539266813722748e-07, |
| "loss": 0.1502, |
| "step": 2457 |
| }, |
| { |
| "epoch": 1.7273366127898804, |
| "grad_norm": 0.5018819839038278, |
| "learning_rate": 4.51631609286875e-07, |
| "loss": 0.1388, |
| "step": 2458 |
| }, |
| { |
| "epoch": 1.7280393534785663, |
| "grad_norm": 0.5207434511563519, |
| "learning_rate": 4.4934207944420604e-07, |
| "loss": 0.1399, |
| "step": 2459 |
| }, |
| { |
| "epoch": 1.7287420941672522, |
| "grad_norm": 0.5425488205953698, |
| "learning_rate": 4.4705809463409077e-07, |
| "loss": 0.165, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.7294448348559381, |
| "grad_norm": 0.5037567942770113, |
| "learning_rate": 4.447796576395896e-07, |
| "loss": 0.1388, |
| "step": 2461 |
| }, |
| { |
| "epoch": 1.730147575544624, |
| "grad_norm": 0.5358452059638205, |
| "learning_rate": 4.425067712370074e-07, |
| "loss": 0.1641, |
| "step": 2462 |
| }, |
| { |
| "epoch": 1.73085031623331, |
| "grad_norm": 0.5014671352903216, |
| "learning_rate": 4.40239438195883e-07, |
| "loss": 0.133, |
| "step": 2463 |
| }, |
| { |
| "epoch": 1.731553056921996, |
| "grad_norm": 0.5056100677702643, |
| "learning_rate": 4.379776612789921e-07, |
| "loss": 0.1471, |
| "step": 2464 |
| }, |
| { |
| "epoch": 1.7322557976106818, |
| "grad_norm": 0.500848117316493, |
| "learning_rate": 4.357214432423351e-07, |
| "loss": 0.1442, |
| "step": 2465 |
| }, |
| { |
| "epoch": 1.7329585382993675, |
| "grad_norm": 0.49673774404191945, |
| "learning_rate": 4.334707868351423e-07, |
| "loss": 0.1315, |
| "step": 2466 |
| }, |
| { |
| "epoch": 1.7336612789880534, |
| "grad_norm": 0.5726501902979345, |
| "learning_rate": 4.312256947998655e-07, |
| "loss": 0.1805, |
| "step": 2467 |
| }, |
| { |
| "epoch": 1.7343640196767391, |
| "grad_norm": 0.48459402143841357, |
| "learning_rate": 4.2898616987217866e-07, |
| "loss": 0.1383, |
| "step": 2468 |
| }, |
| { |
| "epoch": 1.735066760365425, |
| "grad_norm": 0.49921590652395076, |
| "learning_rate": 4.2675221478096995e-07, |
| "loss": 0.1285, |
| "step": 2469 |
| }, |
| { |
| "epoch": 1.735769501054111, |
| "grad_norm": 0.5216661199055698, |
| "learning_rate": 4.245238322483386e-07, |
| "loss": 0.1456, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.736472241742797, |
| "grad_norm": 0.4987564923420657, |
| "learning_rate": 4.223010249895987e-07, |
| "loss": 0.1469, |
| "step": 2471 |
| }, |
| { |
| "epoch": 1.7371749824314828, |
| "grad_norm": 0.5366348222163734, |
| "learning_rate": 4.2008379571326753e-07, |
| "loss": 0.1646, |
| "step": 2472 |
| }, |
| { |
| "epoch": 1.7378777231201687, |
| "grad_norm": 0.4779108172235227, |
| "learning_rate": 4.178721471210662e-07, |
| "loss": 0.1113, |
| "step": 2473 |
| }, |
| { |
| "epoch": 1.7385804638088547, |
| "grad_norm": 0.48603005090548207, |
| "learning_rate": 4.156660819079156e-07, |
| "loss": 0.1378, |
| "step": 2474 |
| }, |
| { |
| "epoch": 1.7392832044975404, |
| "grad_norm": 0.4981575526182675, |
| "learning_rate": 4.134656027619333e-07, |
| "loss": 0.1309, |
| "step": 2475 |
| }, |
| { |
| "epoch": 1.7399859451862263, |
| "grad_norm": 0.4967251090524896, |
| "learning_rate": 4.1127071236442993e-07, |
| "loss": 0.133, |
| "step": 2476 |
| }, |
| { |
| "epoch": 1.7406886858749122, |
| "grad_norm": 0.482832954527618, |
| "learning_rate": 4.090814133899068e-07, |
| "loss": 0.1326, |
| "step": 2477 |
| }, |
| { |
| "epoch": 1.741391426563598, |
| "grad_norm": 0.5066810187945623, |
| "learning_rate": 4.06897708506051e-07, |
| "loss": 0.1419, |
| "step": 2478 |
| }, |
| { |
| "epoch": 1.7420941672522838, |
| "grad_norm": 0.5068986215582441, |
| "learning_rate": 4.047196003737347e-07, |
| "loss": 0.1455, |
| "step": 2479 |
| }, |
| { |
| "epoch": 1.7427969079409698, |
| "grad_norm": 0.5032666697164222, |
| "learning_rate": 4.025470916470081e-07, |
| "loss": 0.1572, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.7434996486296557, |
| "grad_norm": 0.4946885703818128, |
| "learning_rate": 4.0038018497310096e-07, |
| "loss": 0.1288, |
| "step": 2481 |
| }, |
| { |
| "epoch": 1.7442023893183416, |
| "grad_norm": 0.4890875664149045, |
| "learning_rate": 3.98218882992415e-07, |
| "loss": 0.1403, |
| "step": 2482 |
| }, |
| { |
| "epoch": 1.7449051300070275, |
| "grad_norm": 0.4983759132183108, |
| "learning_rate": 3.960631883385224e-07, |
| "loss": 0.136, |
| "step": 2483 |
| }, |
| { |
| "epoch": 1.7456078706957134, |
| "grad_norm": 0.508545575257902, |
| "learning_rate": 3.939131036381666e-07, |
| "loss": 0.1461, |
| "step": 2484 |
| }, |
| { |
| "epoch": 1.7463106113843991, |
| "grad_norm": 0.4825280158847343, |
| "learning_rate": 3.91768631511249e-07, |
| "loss": 0.1305, |
| "step": 2485 |
| }, |
| { |
| "epoch": 1.747013352073085, |
| "grad_norm": 0.5051276013993659, |
| "learning_rate": 3.8962977457083663e-07, |
| "loss": 0.1473, |
| "step": 2486 |
| }, |
| { |
| "epoch": 1.7477160927617708, |
| "grad_norm": 0.5111228805661661, |
| "learning_rate": 3.874965354231514e-07, |
| "loss": 0.1424, |
| "step": 2487 |
| }, |
| { |
| "epoch": 1.7484188334504567, |
| "grad_norm": 0.5019801408898831, |
| "learning_rate": 3.8536891666757446e-07, |
| "loss": 0.1518, |
| "step": 2488 |
| }, |
| { |
| "epoch": 1.7491215741391426, |
| "grad_norm": 0.5173740334228242, |
| "learning_rate": 3.832469208966333e-07, |
| "loss": 0.1438, |
| "step": 2489 |
| }, |
| { |
| "epoch": 1.7498243148278285, |
| "grad_norm": 0.5400760960183123, |
| "learning_rate": 3.8113055069600555e-07, |
| "loss": 0.1435, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.7505270555165144, |
| "grad_norm": 0.5121676217669354, |
| "learning_rate": 3.790198086445146e-07, |
| "loss": 0.1426, |
| "step": 2491 |
| }, |
| { |
| "epoch": 1.7512297962052004, |
| "grad_norm": 0.5257987208072362, |
| "learning_rate": 3.7691469731412635e-07, |
| "loss": 0.1425, |
| "step": 2492 |
| }, |
| { |
| "epoch": 1.7519325368938863, |
| "grad_norm": 0.5039463710418196, |
| "learning_rate": 3.7481521926994504e-07, |
| "loss": 0.1251, |
| "step": 2493 |
| }, |
| { |
| "epoch": 1.752635277582572, |
| "grad_norm": 0.4819301001498982, |
| "learning_rate": 3.7272137707020875e-07, |
| "loss": 0.1331, |
| "step": 2494 |
| }, |
| { |
| "epoch": 1.753338018271258, |
| "grad_norm": 0.5166677476186342, |
| "learning_rate": 3.7063317326629043e-07, |
| "loss": 0.1596, |
| "step": 2495 |
| }, |
| { |
| "epoch": 1.7540407589599438, |
| "grad_norm": 0.5234064687149996, |
| "learning_rate": 3.685506104026931e-07, |
| "loss": 0.1629, |
| "step": 2496 |
| }, |
| { |
| "epoch": 1.7547434996486295, |
| "grad_norm": 0.48338946523379994, |
| "learning_rate": 3.6647369101704465e-07, |
| "loss": 0.1165, |
| "step": 2497 |
| }, |
| { |
| "epoch": 1.7554462403373154, |
| "grad_norm": 0.49272297433420176, |
| "learning_rate": 3.644024176400962e-07, |
| "loss": 0.1143, |
| "step": 2498 |
| }, |
| { |
| "epoch": 1.7561489810260014, |
| "grad_norm": 0.5037259028058367, |
| "learning_rate": 3.623367927957211e-07, |
| "loss": 0.1479, |
| "step": 2499 |
| }, |
| { |
| "epoch": 1.7568517217146873, |
| "grad_norm": 0.4816989043292015, |
| "learning_rate": 3.602768190009076e-07, |
| "loss": 0.1367, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.7568517217146873, |
| "eval_loss": 0.1807616651058197, |
| "eval_runtime": 10.8673, |
| "eval_samples_per_second": 21.164, |
| "eval_steps_per_second": 5.337, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.7575544624033732, |
| "grad_norm": 0.47584583406463116, |
| "learning_rate": 3.5822249876575897e-07, |
| "loss": 0.114, |
| "step": 2501 |
| }, |
| { |
| "epoch": 1.7582572030920591, |
| "grad_norm": 0.512319533865695, |
| "learning_rate": 3.561738345934901e-07, |
| "loss": 0.1422, |
| "step": 2502 |
| }, |
| { |
| "epoch": 1.758959943780745, |
| "grad_norm": 0.502050085228889, |
| "learning_rate": 3.541308289804235e-07, |
| "loss": 0.1432, |
| "step": 2503 |
| }, |
| { |
| "epoch": 1.7596626844694307, |
| "grad_norm": 0.523940078738328, |
| "learning_rate": 3.5209348441598626e-07, |
| "loss": 0.1598, |
| "step": 2504 |
| }, |
| { |
| "epoch": 1.7603654251581167, |
| "grad_norm": 0.4810307504199012, |
| "learning_rate": 3.50061803382708e-07, |
| "loss": 0.1269, |
| "step": 2505 |
| }, |
| { |
| "epoch": 1.7610681658468024, |
| "grad_norm": 0.5523631787953741, |
| "learning_rate": 3.4803578835621685e-07, |
| "loss": 0.1814, |
| "step": 2506 |
| }, |
| { |
| "epoch": 1.7617709065354883, |
| "grad_norm": 0.5118306383379198, |
| "learning_rate": 3.460154418052364e-07, |
| "loss": 0.1555, |
| "step": 2507 |
| }, |
| { |
| "epoch": 1.7624736472241742, |
| "grad_norm": 0.4960231120219577, |
| "learning_rate": 3.440007661915856e-07, |
| "loss": 0.1296, |
| "step": 2508 |
| }, |
| { |
| "epoch": 1.7631763879128601, |
| "grad_norm": 0.49992329060380697, |
| "learning_rate": 3.419917639701698e-07, |
| "loss": 0.129, |
| "step": 2509 |
| }, |
| { |
| "epoch": 1.763879128601546, |
| "grad_norm": 0.5118691406174483, |
| "learning_rate": 3.3998843758898336e-07, |
| "loss": 0.1389, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.764581869290232, |
| "grad_norm": 0.5315435497899398, |
| "learning_rate": 3.379907894891027e-07, |
| "loss": 0.1638, |
| "step": 2511 |
| }, |
| { |
| "epoch": 1.765284609978918, |
| "grad_norm": 0.5114367673359693, |
| "learning_rate": 3.3599882210468947e-07, |
| "loss": 0.1469, |
| "step": 2512 |
| }, |
| { |
| "epoch": 1.7659873506676038, |
| "grad_norm": 0.526630904956023, |
| "learning_rate": 3.340125378629783e-07, |
| "loss": 0.1404, |
| "step": 2513 |
| }, |
| { |
| "epoch": 1.7666900913562895, |
| "grad_norm": 0.5299821315417063, |
| "learning_rate": 3.320319391842813e-07, |
| "loss": 0.1481, |
| "step": 2514 |
| }, |
| { |
| "epoch": 1.7673928320449754, |
| "grad_norm": 0.5115665096272445, |
| "learning_rate": 3.300570284819815e-07, |
| "loss": 0.1406, |
| "step": 2515 |
| }, |
| { |
| "epoch": 1.7680955727336611, |
| "grad_norm": 0.5254724265146067, |
| "learning_rate": 3.280878081625333e-07, |
| "loss": 0.1617, |
| "step": 2516 |
| }, |
| { |
| "epoch": 1.768798313422347, |
| "grad_norm": 0.5083467646494765, |
| "learning_rate": 3.261242806254561e-07, |
| "loss": 0.1457, |
| "step": 2517 |
| }, |
| { |
| "epoch": 1.769501054111033, |
| "grad_norm": 0.4598533715565604, |
| "learning_rate": 3.241664482633311e-07, |
| "loss": 0.1261, |
| "step": 2518 |
| }, |
| { |
| "epoch": 1.770203794799719, |
| "grad_norm": 0.5060996400415871, |
| "learning_rate": 3.222143134618e-07, |
| "loss": 0.1407, |
| "step": 2519 |
| }, |
| { |
| "epoch": 1.7709065354884048, |
| "grad_norm": 0.5020144901552022, |
| "learning_rate": 3.202678785995655e-07, |
| "loss": 0.1328, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.7716092761770907, |
| "grad_norm": 0.502990891060725, |
| "learning_rate": 3.1832714604838166e-07, |
| "loss": 0.1371, |
| "step": 2521 |
| }, |
| { |
| "epoch": 1.7723120168657767, |
| "grad_norm": 0.536747819689489, |
| "learning_rate": 3.16392118173055e-07, |
| "loss": 0.169, |
| "step": 2522 |
| }, |
| { |
| "epoch": 1.7730147575544624, |
| "grad_norm": 0.5008783249213785, |
| "learning_rate": 3.144627973314385e-07, |
| "loss": 0.1403, |
| "step": 2523 |
| }, |
| { |
| "epoch": 1.7737174982431483, |
| "grad_norm": 0.4922819846556829, |
| "learning_rate": 3.1253918587443645e-07, |
| "loss": 0.1408, |
| "step": 2524 |
| }, |
| { |
| "epoch": 1.7744202389318342, |
| "grad_norm": 0.5243120358752659, |
| "learning_rate": 3.1062128614599176e-07, |
| "loss": 0.1678, |
| "step": 2525 |
| }, |
| { |
| "epoch": 1.77512297962052, |
| "grad_norm": 0.5056274835855529, |
| "learning_rate": 3.0870910048308833e-07, |
| "loss": 0.1379, |
| "step": 2526 |
| }, |
| { |
| "epoch": 1.7758257203092058, |
| "grad_norm": 0.5039032672623872, |
| "learning_rate": 3.068026312157485e-07, |
| "loss": 0.1471, |
| "step": 2527 |
| }, |
| { |
| "epoch": 1.7765284609978917, |
| "grad_norm": 0.4981154294195558, |
| "learning_rate": 3.049018806670284e-07, |
| "loss": 0.1496, |
| "step": 2528 |
| }, |
| { |
| "epoch": 1.7772312016865777, |
| "grad_norm": 0.5365626189197689, |
| "learning_rate": 3.030068511530154e-07, |
| "loss": 0.1575, |
| "step": 2529 |
| }, |
| { |
| "epoch": 1.7779339423752636, |
| "grad_norm": 0.5268016582903655, |
| "learning_rate": 3.0111754498282686e-07, |
| "loss": 0.1648, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.7786366830639495, |
| "grad_norm": 0.5318004110306369, |
| "learning_rate": 2.9923396445860454e-07, |
| "loss": 0.154, |
| "step": 2531 |
| }, |
| { |
| "epoch": 1.7793394237526354, |
| "grad_norm": 0.5000644029802647, |
| "learning_rate": 2.9735611187551696e-07, |
| "loss": 0.1483, |
| "step": 2532 |
| }, |
| { |
| "epoch": 1.7800421644413211, |
| "grad_norm": 0.517187407258313, |
| "learning_rate": 2.9548398952174764e-07, |
| "loss": 0.1414, |
| "step": 2533 |
| }, |
| { |
| "epoch": 1.780744905130007, |
| "grad_norm": 0.49888098628686334, |
| "learning_rate": 2.936175996785018e-07, |
| "loss": 0.1344, |
| "step": 2534 |
| }, |
| { |
| "epoch": 1.7814476458186927, |
| "grad_norm": 0.5382013575120231, |
| "learning_rate": 2.917569446199975e-07, |
| "loss": 0.1637, |
| "step": 2535 |
| }, |
| { |
| "epoch": 1.7821503865073787, |
| "grad_norm": 0.48847817394584436, |
| "learning_rate": 2.8990202661346887e-07, |
| "loss": 0.1335, |
| "step": 2536 |
| }, |
| { |
| "epoch": 1.7828531271960646, |
| "grad_norm": 0.5004412682779585, |
| "learning_rate": 2.8805284791915245e-07, |
| "loss": 0.1297, |
| "step": 2537 |
| }, |
| { |
| "epoch": 1.7835558678847505, |
| "grad_norm": 0.4834107517340589, |
| "learning_rate": 2.862094107902974e-07, |
| "loss": 0.141, |
| "step": 2538 |
| }, |
| { |
| "epoch": 1.7842586085734364, |
| "grad_norm": 0.4768745420958175, |
| "learning_rate": 2.8437171747315306e-07, |
| "loss": 0.1303, |
| "step": 2539 |
| }, |
| { |
| "epoch": 1.7849613492621224, |
| "grad_norm": 0.49617621055480216, |
| "learning_rate": 2.8253977020697266e-07, |
| "loss": 0.1457, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.7856640899508083, |
| "grad_norm": 0.48857079830190625, |
| "learning_rate": 2.8071357122400666e-07, |
| "loss": 0.1348, |
| "step": 2541 |
| }, |
| { |
| "epoch": 1.786366830639494, |
| "grad_norm": 0.5292025988518864, |
| "learning_rate": 2.788931227494995e-07, |
| "loss": 0.152, |
| "step": 2542 |
| }, |
| { |
| "epoch": 1.78706957132818, |
| "grad_norm": 0.5020878542990461, |
| "learning_rate": 2.770784270016902e-07, |
| "loss": 0.1395, |
| "step": 2543 |
| }, |
| { |
| "epoch": 1.7877723120168658, |
| "grad_norm": 0.5160056724991398, |
| "learning_rate": 2.752694861918087e-07, |
| "loss": 0.1514, |
| "step": 2544 |
| }, |
| { |
| "epoch": 1.7884750527055515, |
| "grad_norm": 0.5161625361813755, |
| "learning_rate": 2.7346630252407136e-07, |
| "loss": 0.1488, |
| "step": 2545 |
| }, |
| { |
| "epoch": 1.7891777933942374, |
| "grad_norm": 0.49588597123940026, |
| "learning_rate": 2.7166887819568055e-07, |
| "loss": 0.1436, |
| "step": 2546 |
| }, |
| { |
| "epoch": 1.7898805340829234, |
| "grad_norm": 0.5001326686522389, |
| "learning_rate": 2.6987721539681655e-07, |
| "loss": 0.1442, |
| "step": 2547 |
| }, |
| { |
| "epoch": 1.7905832747716093, |
| "grad_norm": 0.5145397053415843, |
| "learning_rate": 2.6809131631064634e-07, |
| "loss": 0.1404, |
| "step": 2548 |
| }, |
| { |
| "epoch": 1.7912860154602952, |
| "grad_norm": 0.537795771162458, |
| "learning_rate": 2.663111831133075e-07, |
| "loss": 0.1524, |
| "step": 2549 |
| }, |
| { |
| "epoch": 1.7919887561489811, |
| "grad_norm": 0.5069325960299716, |
| "learning_rate": 2.645368179739155e-07, |
| "loss": 0.1361, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.792691496837667, |
| "grad_norm": 0.5012563168513016, |
| "learning_rate": 2.627682230545547e-07, |
| "loss": 0.1291, |
| "step": 2551 |
| }, |
| { |
| "epoch": 1.7933942375263527, |
| "grad_norm": 0.5241127028951241, |
| "learning_rate": 2.6100540051028136e-07, |
| "loss": 0.1456, |
| "step": 2552 |
| }, |
| { |
| "epoch": 1.7940969782150387, |
| "grad_norm": 0.4902698362142184, |
| "learning_rate": 2.592483524891154e-07, |
| "loss": 0.1294, |
| "step": 2553 |
| }, |
| { |
| "epoch": 1.7947997189037244, |
| "grad_norm": 0.5148708371992679, |
| "learning_rate": 2.5749708113204097e-07, |
| "loss": 0.1485, |
| "step": 2554 |
| }, |
| { |
| "epoch": 1.7955024595924103, |
| "grad_norm": 0.4645746079975259, |
| "learning_rate": 2.5575158857300444e-07, |
| "loss": 0.106, |
| "step": 2555 |
| }, |
| { |
| "epoch": 1.7962052002810962, |
| "grad_norm": 0.5336196943338167, |
| "learning_rate": 2.540118769389105e-07, |
| "loss": 0.1438, |
| "step": 2556 |
| }, |
| { |
| "epoch": 1.7969079409697821, |
| "grad_norm": 0.516699415209297, |
| "learning_rate": 2.522779483496185e-07, |
| "loss": 0.1513, |
| "step": 2557 |
| }, |
| { |
| "epoch": 1.797610681658468, |
| "grad_norm": 0.526151733582901, |
| "learning_rate": 2.505498049179411e-07, |
| "loss": 0.1421, |
| "step": 2558 |
| }, |
| { |
| "epoch": 1.798313422347154, |
| "grad_norm": 0.47850031858462644, |
| "learning_rate": 2.4882744874964226e-07, |
| "loss": 0.1294, |
| "step": 2559 |
| }, |
| { |
| "epoch": 1.7990161630358399, |
| "grad_norm": 0.5025156438770971, |
| "learning_rate": 2.471108819434359e-07, |
| "loss": 0.1316, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.7997189037245258, |
| "grad_norm": 0.49547096832318893, |
| "learning_rate": 2.4540010659097836e-07, |
| "loss": 0.1346, |
| "step": 2561 |
| }, |
| { |
| "epoch": 1.8004216444132115, |
| "grad_norm": 0.5061555212364162, |
| "learning_rate": 2.436951247768704e-07, |
| "loss": 0.1481, |
| "step": 2562 |
| }, |
| { |
| "epoch": 1.8011243851018974, |
| "grad_norm": 0.5142042671043724, |
| "learning_rate": 2.4199593857865247e-07, |
| "loss": 0.1409, |
| "step": 2563 |
| }, |
| { |
| "epoch": 1.8018271257905831, |
| "grad_norm": 0.5250821495396907, |
| "learning_rate": 2.40302550066806e-07, |
| "loss": 0.1441, |
| "step": 2564 |
| }, |
| { |
| "epoch": 1.802529866479269, |
| "grad_norm": 0.5024031575209251, |
| "learning_rate": 2.38614961304745e-07, |
| "loss": 0.1496, |
| "step": 2565 |
| }, |
| { |
| "epoch": 1.803232607167955, |
| "grad_norm": 0.47002878222205746, |
| "learning_rate": 2.3693317434881623e-07, |
| "loss": 0.1226, |
| "step": 2566 |
| }, |
| { |
| "epoch": 1.803935347856641, |
| "grad_norm": 0.4860903886167231, |
| "learning_rate": 2.3525719124829705e-07, |
| "loss": 0.1265, |
| "step": 2567 |
| }, |
| { |
| "epoch": 1.8046380885453268, |
| "grad_norm": 0.5232051344089709, |
| "learning_rate": 2.3358701404539552e-07, |
| "loss": 0.1435, |
| "step": 2568 |
| }, |
| { |
| "epoch": 1.8053408292340127, |
| "grad_norm": 0.5183042693006089, |
| "learning_rate": 2.3192264477524207e-07, |
| "loss": 0.1434, |
| "step": 2569 |
| }, |
| { |
| "epoch": 1.8060435699226987, |
| "grad_norm": 0.560087591646533, |
| "learning_rate": 2.3026408546589162e-07, |
| "loss": 0.1725, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.8067463106113844, |
| "grad_norm": 0.5229255819455728, |
| "learning_rate": 2.2861133813831703e-07, |
| "loss": 0.1591, |
| "step": 2571 |
| }, |
| { |
| "epoch": 1.8074490513000703, |
| "grad_norm": 0.4987718709096516, |
| "learning_rate": 2.2696440480641401e-07, |
| "loss": 0.143, |
| "step": 2572 |
| }, |
| { |
| "epoch": 1.8081517919887562, |
| "grad_norm": 0.5260906121103223, |
| "learning_rate": 2.2532328747698894e-07, |
| "loss": 0.1429, |
| "step": 2573 |
| }, |
| { |
| "epoch": 1.808854532677442, |
| "grad_norm": 0.5391767173504673, |
| "learning_rate": 2.23687988149765e-07, |
| "loss": 0.1519, |
| "step": 2574 |
| }, |
| { |
| "epoch": 1.8095572733661278, |
| "grad_norm": 0.5285385670188469, |
| "learning_rate": 2.2205850881737378e-07, |
| "loss": 0.168, |
| "step": 2575 |
| }, |
| { |
| "epoch": 1.8102600140548137, |
| "grad_norm": 0.49240077630564033, |
| "learning_rate": 2.2043485146535537e-07, |
| "loss": 0.1458, |
| "step": 2576 |
| }, |
| { |
| "epoch": 1.8109627547434997, |
| "grad_norm": 0.49215531125082934, |
| "learning_rate": 2.188170180721566e-07, |
| "loss": 0.1281, |
| "step": 2577 |
| }, |
| { |
| "epoch": 1.8116654954321856, |
| "grad_norm": 0.4963972594713652, |
| "learning_rate": 2.172050106091278e-07, |
| "loss": 0.1375, |
| "step": 2578 |
| }, |
| { |
| "epoch": 1.8123682361208715, |
| "grad_norm": 0.5045098450000102, |
| "learning_rate": 2.1559883104051938e-07, |
| "loss": 0.152, |
| "step": 2579 |
| }, |
| { |
| "epoch": 1.8130709768095574, |
| "grad_norm": 0.5077926482353168, |
| "learning_rate": 2.1399848132348078e-07, |
| "loss": 0.1502, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.8137737174982431, |
| "grad_norm": 0.4901609403829654, |
| "learning_rate": 2.1240396340805825e-07, |
| "loss": 0.1479, |
| "step": 2581 |
| }, |
| { |
| "epoch": 1.814476458186929, |
| "grad_norm": 0.5040668902616426, |
| "learning_rate": 2.1081527923719035e-07, |
| "loss": 0.1433, |
| "step": 2582 |
| }, |
| { |
| "epoch": 1.8151791988756147, |
| "grad_norm": 0.47605679931138023, |
| "learning_rate": 2.0923243074670918e-07, |
| "loss": 0.1172, |
| "step": 2583 |
| }, |
| { |
| "epoch": 1.8158819395643007, |
| "grad_norm": 0.5451089294072474, |
| "learning_rate": 2.0765541986533577e-07, |
| "loss": 0.1586, |
| "step": 2584 |
| }, |
| { |
| "epoch": 1.8165846802529866, |
| "grad_norm": 0.544266604892749, |
| "learning_rate": 2.0608424851467578e-07, |
| "loss": 0.1613, |
| "step": 2585 |
| }, |
| { |
| "epoch": 1.8172874209416725, |
| "grad_norm": 0.5140738338986157, |
| "learning_rate": 2.0451891860922167e-07, |
| "loss": 0.1436, |
| "step": 2586 |
| }, |
| { |
| "epoch": 1.8179901616303584, |
| "grad_norm": 0.5439700880344692, |
| "learning_rate": 2.0295943205634605e-07, |
| "loss": 0.154, |
| "step": 2587 |
| }, |
| { |
| "epoch": 1.8186929023190443, |
| "grad_norm": 0.5173480144185391, |
| "learning_rate": 2.0140579075630384e-07, |
| "loss": 0.1605, |
| "step": 2588 |
| }, |
| { |
| "epoch": 1.8193956430077303, |
| "grad_norm": 0.5208480717274079, |
| "learning_rate": 1.9985799660222626e-07, |
| "loss": 0.1451, |
| "step": 2589 |
| }, |
| { |
| "epoch": 1.8200983836964162, |
| "grad_norm": 0.49845186932887076, |
| "learning_rate": 1.9831605148011745e-07, |
| "loss": 0.1429, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.8208011243851019, |
| "grad_norm": 0.48163278303529733, |
| "learning_rate": 1.9677995726885778e-07, |
| "loss": 0.1255, |
| "step": 2591 |
| }, |
| { |
| "epoch": 1.8215038650737878, |
| "grad_norm": 0.47652368952818397, |
| "learning_rate": 1.9524971584019726e-07, |
| "loss": 0.1153, |
| "step": 2592 |
| }, |
| { |
| "epoch": 1.8222066057624735, |
| "grad_norm": 0.5081801481197005, |
| "learning_rate": 1.937253290587532e-07, |
| "loss": 0.1377, |
| "step": 2593 |
| }, |
| { |
| "epoch": 1.8229093464511594, |
| "grad_norm": 0.5129758655170876, |
| "learning_rate": 1.9220679878201086e-07, |
| "loss": 0.1552, |
| "step": 2594 |
| }, |
| { |
| "epoch": 1.8236120871398454, |
| "grad_norm": 0.5048691989074335, |
| "learning_rate": 1.9069412686031575e-07, |
| "loss": 0.1485, |
| "step": 2595 |
| }, |
| { |
| "epoch": 1.8243148278285313, |
| "grad_norm": 0.5404970301699522, |
| "learning_rate": 1.8918731513687893e-07, |
| "loss": 0.1684, |
| "step": 2596 |
| }, |
| { |
| "epoch": 1.8250175685172172, |
| "grad_norm": 0.4967219145554596, |
| "learning_rate": 1.876863654477684e-07, |
| "loss": 0.1247, |
| "step": 2597 |
| }, |
| { |
| "epoch": 1.8257203092059031, |
| "grad_norm": 0.5077317448642489, |
| "learning_rate": 1.8619127962190952e-07, |
| "loss": 0.1355, |
| "step": 2598 |
| }, |
| { |
| "epoch": 1.826423049894589, |
| "grad_norm": 0.4971023482577422, |
| "learning_rate": 1.847020594810839e-07, |
| "loss": 0.129, |
| "step": 2599 |
| }, |
| { |
| "epoch": 1.8271257905832747, |
| "grad_norm": 0.48601797488301623, |
| "learning_rate": 1.8321870683992326e-07, |
| "loss": 0.1322, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.8278285312719607, |
| "grad_norm": 0.48392971082553665, |
| "learning_rate": 1.817412235059113e-07, |
| "loss": 0.1301, |
| "step": 2601 |
| }, |
| { |
| "epoch": 1.8285312719606466, |
| "grad_norm": 0.5102334446297592, |
| "learning_rate": 1.8026961127938059e-07, |
| "loss": 0.1506, |
| "step": 2602 |
| }, |
| { |
| "epoch": 1.8292340126493323, |
| "grad_norm": 0.4914040388893914, |
| "learning_rate": 1.7880387195350734e-07, |
| "loss": 0.1175, |
| "step": 2603 |
| }, |
| { |
| "epoch": 1.8299367533380182, |
| "grad_norm": 0.5121422819226531, |
| "learning_rate": 1.7734400731431344e-07, |
| "loss": 0.1421, |
| "step": 2604 |
| }, |
| { |
| "epoch": 1.8306394940267041, |
| "grad_norm": 0.5037999031914915, |
| "learning_rate": 1.7589001914066206e-07, |
| "loss": 0.1408, |
| "step": 2605 |
| }, |
| { |
| "epoch": 1.83134223471539, |
| "grad_norm": 0.5067033731990441, |
| "learning_rate": 1.744419092042554e-07, |
| "loss": 0.1335, |
| "step": 2606 |
| }, |
| { |
| "epoch": 1.832044975404076, |
| "grad_norm": 0.49393593459338514, |
| "learning_rate": 1.7299967926963367e-07, |
| "loss": 0.1247, |
| "step": 2607 |
| }, |
| { |
| "epoch": 1.8327477160927619, |
| "grad_norm": 0.5242193894382914, |
| "learning_rate": 1.7156333109417055e-07, |
| "loss": 0.1569, |
| "step": 2608 |
| }, |
| { |
| "epoch": 1.8334504567814478, |
| "grad_norm": 0.5202636164472492, |
| "learning_rate": 1.7013286642807602e-07, |
| "loss": 0.1342, |
| "step": 2609 |
| }, |
| { |
| "epoch": 1.8341531974701335, |
| "grad_norm": 0.5614919056652893, |
| "learning_rate": 1.687082870143869e-07, |
| "loss": 0.1757, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.8348559381588194, |
| "grad_norm": 0.5281146658221509, |
| "learning_rate": 1.672895945889713e-07, |
| "loss": 0.1565, |
| "step": 2611 |
| }, |
| { |
| "epoch": 1.8355586788475051, |
| "grad_norm": 0.48187838560746477, |
| "learning_rate": 1.6587679088052365e-07, |
| "loss": 0.1218, |
| "step": 2612 |
| }, |
| { |
| "epoch": 1.836261419536191, |
| "grad_norm": 0.5657258268237095, |
| "learning_rate": 1.6446987761056244e-07, |
| "loss": 0.1796, |
| "step": 2613 |
| }, |
| { |
| "epoch": 1.836964160224877, |
| "grad_norm": 0.5329160569784503, |
| "learning_rate": 1.6306885649342906e-07, |
| "loss": 0.1382, |
| "step": 2614 |
| }, |
| { |
| "epoch": 1.8376669009135629, |
| "grad_norm": 0.5041024772538047, |
| "learning_rate": 1.6167372923628354e-07, |
| "loss": 0.1406, |
| "step": 2615 |
| }, |
| { |
| "epoch": 1.8383696416022488, |
| "grad_norm": 0.507700276509694, |
| "learning_rate": 1.6028449753910768e-07, |
| "loss": 0.1297, |
| "step": 2616 |
| }, |
| { |
| "epoch": 1.8390723822909347, |
| "grad_norm": 0.5265584444159414, |
| "learning_rate": 1.5890116309469573e-07, |
| "loss": 0.1612, |
| "step": 2617 |
| }, |
| { |
| "epoch": 1.8397751229796206, |
| "grad_norm": 0.5209812882103455, |
| "learning_rate": 1.575237275886593e-07, |
| "loss": 0.1117, |
| "step": 2618 |
| }, |
| { |
| "epoch": 1.8404778636683063, |
| "grad_norm": 0.542521675976148, |
| "learning_rate": 1.5615219269941807e-07, |
| "loss": 0.1592, |
| "step": 2619 |
| }, |
| { |
| "epoch": 1.8411806043569923, |
| "grad_norm": 0.529711101080399, |
| "learning_rate": 1.5478656009820626e-07, |
| "loss": 0.1525, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.8418833450456782, |
| "grad_norm": 0.534179504469705, |
| "learning_rate": 1.5342683144906334e-07, |
| "loss": 0.15, |
| "step": 2621 |
| }, |
| { |
| "epoch": 1.8425860857343639, |
| "grad_norm": 0.5400629684296321, |
| "learning_rate": 1.520730084088351e-07, |
| "loss": 0.1565, |
| "step": 2622 |
| }, |
| { |
| "epoch": 1.8432888264230498, |
| "grad_norm": 0.5170789645604357, |
| "learning_rate": 1.5072509262717195e-07, |
| "loss": 0.1475, |
| "step": 2623 |
| }, |
| { |
| "epoch": 1.8439915671117357, |
| "grad_norm": 0.5063403071076594, |
| "learning_rate": 1.4938308574652505e-07, |
| "loss": 0.138, |
| "step": 2624 |
| }, |
| { |
| "epoch": 1.8446943078004217, |
| "grad_norm": 0.5152059684103599, |
| "learning_rate": 1.4804698940214746e-07, |
| "loss": 0.1464, |
| "step": 2625 |
| }, |
| { |
| "epoch": 1.8453970484891076, |
| "grad_norm": 0.504957720786916, |
| "learning_rate": 1.4671680522208797e-07, |
| "loss": 0.1463, |
| "step": 2626 |
| }, |
| { |
| "epoch": 1.8460997891777935, |
| "grad_norm": 0.4699643971631126, |
| "learning_rate": 1.4539253482719286e-07, |
| "loss": 0.1177, |
| "step": 2627 |
| }, |
| { |
| "epoch": 1.8468025298664794, |
| "grad_norm": 0.4777017263154567, |
| "learning_rate": 1.4407417983110127e-07, |
| "loss": 0.1172, |
| "step": 2628 |
| }, |
| { |
| "epoch": 1.8475052705551651, |
| "grad_norm": 0.5087995897430411, |
| "learning_rate": 1.427617418402455e-07, |
| "loss": 0.1403, |
| "step": 2629 |
| }, |
| { |
| "epoch": 1.848208011243851, |
| "grad_norm": 0.5201978878578722, |
| "learning_rate": 1.4145522245384735e-07, |
| "loss": 0.1587, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.8489107519325367, |
| "grad_norm": 0.5382052069103397, |
| "learning_rate": 1.401546232639167e-07, |
| "loss": 0.1472, |
| "step": 2631 |
| }, |
| { |
| "epoch": 1.8496134926212227, |
| "grad_norm": 0.5168716574125763, |
| "learning_rate": 1.388599458552492e-07, |
| "loss": 0.1516, |
| "step": 2632 |
| }, |
| { |
| "epoch": 1.8503162333099086, |
| "grad_norm": 0.554664247706861, |
| "learning_rate": 1.3757119180542623e-07, |
| "loss": 0.1717, |
| "step": 2633 |
| }, |
| { |
| "epoch": 1.8510189739985945, |
| "grad_norm": 0.4939411994253164, |
| "learning_rate": 1.3628836268480883e-07, |
| "loss": 0.1281, |
| "step": 2634 |
| }, |
| { |
| "epoch": 1.8517217146872804, |
| "grad_norm": 0.5335256253163561, |
| "learning_rate": 1.3501146005654164e-07, |
| "loss": 0.1389, |
| "step": 2635 |
| }, |
| { |
| "epoch": 1.8524244553759663, |
| "grad_norm": 0.4981498675638026, |
| "learning_rate": 1.337404854765445e-07, |
| "loss": 0.1267, |
| "step": 2636 |
| }, |
| { |
| "epoch": 1.8531271960646523, |
| "grad_norm": 0.5221789691308846, |
| "learning_rate": 1.3247544049351745e-07, |
| "loss": 0.1451, |
| "step": 2637 |
| }, |
| { |
| "epoch": 1.8538299367533382, |
| "grad_norm": 0.4876082081623457, |
| "learning_rate": 1.3121632664893192e-07, |
| "loss": 0.1357, |
| "step": 2638 |
| }, |
| { |
| "epoch": 1.8545326774420239, |
| "grad_norm": 0.5284294376664426, |
| "learning_rate": 1.2996314547703393e-07, |
| "loss": 0.1694, |
| "step": 2639 |
| }, |
| { |
| "epoch": 1.8552354181307098, |
| "grad_norm": 0.49857435677523226, |
| "learning_rate": 1.2871589850484034e-07, |
| "loss": 0.1431, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.8559381588193955, |
| "grad_norm": 0.5183214493498189, |
| "learning_rate": 1.2747458725213712e-07, |
| "loss": 0.147, |
| "step": 2641 |
| }, |
| { |
| "epoch": 1.8566408995080814, |
| "grad_norm": 0.5052453730973389, |
| "learning_rate": 1.2623921323147714e-07, |
| "loss": 0.1448, |
| "step": 2642 |
| }, |
| { |
| "epoch": 1.8573436401967673, |
| "grad_norm": 0.5079225417127723, |
| "learning_rate": 1.2500977794817794e-07, |
| "loss": 0.1307, |
| "step": 2643 |
| }, |
| { |
| "epoch": 1.8580463808854533, |
| "grad_norm": 0.546064986386369, |
| "learning_rate": 1.237862829003228e-07, |
| "loss": 0.1571, |
| "step": 2644 |
| }, |
| { |
| "epoch": 1.8587491215741392, |
| "grad_norm": 0.5143898150930651, |
| "learning_rate": 1.225687295787542e-07, |
| "loss": 0.1456, |
| "step": 2645 |
| }, |
| { |
| "epoch": 1.859451862262825, |
| "grad_norm": 0.5265743190917148, |
| "learning_rate": 1.2135711946707708e-07, |
| "loss": 0.1753, |
| "step": 2646 |
| }, |
| { |
| "epoch": 1.860154602951511, |
| "grad_norm": 0.5024792783520283, |
| "learning_rate": 1.2015145404165261e-07, |
| "loss": 0.1334, |
| "step": 2647 |
| }, |
| { |
| "epoch": 1.8608573436401967, |
| "grad_norm": 0.5197838757541134, |
| "learning_rate": 1.1895173477159849e-07, |
| "loss": 0.1401, |
| "step": 2648 |
| }, |
| { |
| "epoch": 1.8615600843288826, |
| "grad_norm": 0.4941943554491134, |
| "learning_rate": 1.1775796311878807e-07, |
| "loss": 0.1446, |
| "step": 2649 |
| }, |
| { |
| "epoch": 1.8622628250175686, |
| "grad_norm": 0.510797781459705, |
| "learning_rate": 1.1657014053784666e-07, |
| "loss": 0.1504, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.8629655657062543, |
| "grad_norm": 0.4864424119263491, |
| "learning_rate": 1.1538826847615037e-07, |
| "loss": 0.1341, |
| "step": 2651 |
| }, |
| { |
| "epoch": 1.8636683063949402, |
| "grad_norm": 0.5087411860311789, |
| "learning_rate": 1.14212348373825e-07, |
| "loss": 0.1463, |
| "step": 2652 |
| }, |
| { |
| "epoch": 1.864371047083626, |
| "grad_norm": 0.49707309590982207, |
| "learning_rate": 1.1304238166374381e-07, |
| "loss": 0.1489, |
| "step": 2653 |
| }, |
| { |
| "epoch": 1.865073787772312, |
| "grad_norm": 0.4931342074513031, |
| "learning_rate": 1.1187836977152533e-07, |
| "loss": 0.1173, |
| "step": 2654 |
| }, |
| { |
| "epoch": 1.865776528460998, |
| "grad_norm": 0.5137748127105113, |
| "learning_rate": 1.1072031411553219e-07, |
| "loss": 0.1327, |
| "step": 2655 |
| }, |
| { |
| "epoch": 1.8664792691496839, |
| "grad_norm": 0.5207656669400329, |
| "learning_rate": 1.0956821610686952e-07, |
| "loss": 0.1534, |
| "step": 2656 |
| }, |
| { |
| "epoch": 1.8671820098383698, |
| "grad_norm": 0.47726471803816006, |
| "learning_rate": 1.084220771493838e-07, |
| "loss": 0.1172, |
| "step": 2657 |
| }, |
| { |
| "epoch": 1.8678847505270555, |
| "grad_norm": 0.5049845652854204, |
| "learning_rate": 1.0728189863965788e-07, |
| "loss": 0.1241, |
| "step": 2658 |
| }, |
| { |
| "epoch": 1.8685874912157414, |
| "grad_norm": 0.5015664314853837, |
| "learning_rate": 1.061476819670143e-07, |
| "loss": 0.1437, |
| "step": 2659 |
| }, |
| { |
| "epoch": 1.8692902319044271, |
| "grad_norm": 0.4898745907546558, |
| "learning_rate": 1.0501942851350921e-07, |
| "loss": 0.1245, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.869992972593113, |
| "grad_norm": 0.5284895632749437, |
| "learning_rate": 1.0389713965393455e-07, |
| "loss": 0.1393, |
| "step": 2661 |
| }, |
| { |
| "epoch": 1.870695713281799, |
| "grad_norm": 0.47810971939622376, |
| "learning_rate": 1.0278081675581253e-07, |
| "loss": 0.131, |
| "step": 2662 |
| }, |
| { |
| "epoch": 1.8713984539704849, |
| "grad_norm": 0.5070597308989767, |
| "learning_rate": 1.0167046117939561e-07, |
| "loss": 0.1377, |
| "step": 2663 |
| }, |
| { |
| "epoch": 1.8721011946591708, |
| "grad_norm": 0.49224780941636986, |
| "learning_rate": 1.005660742776654e-07, |
| "loss": 0.139, |
| "step": 2664 |
| }, |
| { |
| "epoch": 1.8728039353478567, |
| "grad_norm": 0.5440013848755799, |
| "learning_rate": 9.946765739633269e-08, |
| "loss": 0.1734, |
| "step": 2665 |
| }, |
| { |
| "epoch": 1.8735066760365426, |
| "grad_norm": 0.5112080258416924, |
| "learning_rate": 9.837521187383126e-08, |
| "loss": 0.1463, |
| "step": 2666 |
| }, |
| { |
| "epoch": 1.8742094167252283, |
| "grad_norm": 0.5218668643540916, |
| "learning_rate": 9.728873904131853e-08, |
| "loss": 0.1769, |
| "step": 2667 |
| }, |
| { |
| "epoch": 1.8749121574139143, |
| "grad_norm": 0.51026817542633, |
| "learning_rate": 9.620824022267549e-08, |
| "loss": 0.1404, |
| "step": 2668 |
| }, |
| { |
| "epoch": 1.8756148981026002, |
| "grad_norm": 0.482823703843089, |
| "learning_rate": 9.513371673450344e-08, |
| "loss": 0.1373, |
| "step": 2669 |
| }, |
| { |
| "epoch": 1.8763176387912859, |
| "grad_norm": 0.5010018048694237, |
| "learning_rate": 9.40651698861228e-08, |
| "loss": 0.1329, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.8770203794799718, |
| "grad_norm": 0.5247195128185501, |
| "learning_rate": 9.300260097956981e-08, |
| "loss": 0.1542, |
| "step": 2671 |
| }, |
| { |
| "epoch": 1.8777231201686577, |
| "grad_norm": 0.48310092650513153, |
| "learning_rate": 9.19460113095988e-08, |
| "loss": 0.1173, |
| "step": 2672 |
| }, |
| { |
| "epoch": 1.8784258608573436, |
| "grad_norm": 0.5262005900960619, |
| "learning_rate": 9.089540216367654e-08, |
| "loss": 0.1531, |
| "step": 2673 |
| }, |
| { |
| "epoch": 1.8791286015460296, |
| "grad_norm": 0.5087073281936366, |
| "learning_rate": 8.985077482198346e-08, |
| "loss": 0.1386, |
| "step": 2674 |
| }, |
| { |
| "epoch": 1.8798313422347155, |
| "grad_norm": 0.46451880297201525, |
| "learning_rate": 8.881213055741134e-08, |
| "loss": 0.11, |
| "step": 2675 |
| }, |
| { |
| "epoch": 1.8805340829234014, |
| "grad_norm": 0.4901323654825113, |
| "learning_rate": 8.777947063556002e-08, |
| "loss": 0.1344, |
| "step": 2676 |
| }, |
| { |
| "epoch": 1.881236823612087, |
| "grad_norm": 0.5048362106132257, |
| "learning_rate": 8.67527963147391e-08, |
| "loss": 0.1451, |
| "step": 2677 |
| }, |
| { |
| "epoch": 1.881939564300773, |
| "grad_norm": 0.4788357243727687, |
| "learning_rate": 8.57321088459634e-08, |
| "loss": 0.1334, |
| "step": 2678 |
| }, |
| { |
| "epoch": 1.8826423049894587, |
| "grad_norm": 0.5065406066592795, |
| "learning_rate": 8.471740947295304e-08, |
| "loss": 0.1459, |
| "step": 2679 |
| }, |
| { |
| "epoch": 1.8833450456781446, |
| "grad_norm": 0.47638466417584086, |
| "learning_rate": 8.370869943213178e-08, |
| "loss": 0.1276, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.8840477863668306, |
| "grad_norm": 0.513019187609436, |
| "learning_rate": 8.270597995262586e-08, |
| "loss": 0.1439, |
| "step": 2681 |
| }, |
| { |
| "epoch": 1.8847505270555165, |
| "grad_norm": 0.4952226280223815, |
| "learning_rate": 8.17092522562607e-08, |
| "loss": 0.1307, |
| "step": 2682 |
| }, |
| { |
| "epoch": 1.8854532677442024, |
| "grad_norm": 0.5075275049563399, |
| "learning_rate": 8.071851755756088e-08, |
| "loss": 0.1424, |
| "step": 2683 |
| }, |
| { |
| "epoch": 1.8861560084328883, |
| "grad_norm": 0.4985028012920937, |
| "learning_rate": 7.973377706374852e-08, |
| "loss": 0.131, |
| "step": 2684 |
| }, |
| { |
| "epoch": 1.8868587491215743, |
| "grad_norm": 0.4929395898649719, |
| "learning_rate": 7.875503197474377e-08, |
| "loss": 0.1395, |
| "step": 2685 |
| }, |
| { |
| "epoch": 1.8875614898102602, |
| "grad_norm": 0.5162454789554229, |
| "learning_rate": 7.778228348315763e-08, |
| "loss": 0.158, |
| "step": 2686 |
| }, |
| { |
| "epoch": 1.8882642304989459, |
| "grad_norm": 0.5192574950152272, |
| "learning_rate": 7.681553277429698e-08, |
| "loss": 0.1525, |
| "step": 2687 |
| }, |
| { |
| "epoch": 1.8889669711876318, |
| "grad_norm": 0.521601549534333, |
| "learning_rate": 7.585478102615951e-08, |
| "loss": 0.132, |
| "step": 2688 |
| }, |
| { |
| "epoch": 1.8896697118763175, |
| "grad_norm": 0.4843516768403466, |
| "learning_rate": 7.490002940943263e-08, |
| "loss": 0.1334, |
| "step": 2689 |
| }, |
| { |
| "epoch": 1.8903724525650034, |
| "grad_norm": 0.5043173752107828, |
| "learning_rate": 7.395127908749356e-08, |
| "loss": 0.1541, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.8910751932536893, |
| "grad_norm": 0.5144753873581086, |
| "learning_rate": 7.300853121640528e-08, |
| "loss": 0.1418, |
| "step": 2691 |
| }, |
| { |
| "epoch": 1.8917779339423753, |
| "grad_norm": 0.5127526031674718, |
| "learning_rate": 7.207178694491778e-08, |
| "loss": 0.1501, |
| "step": 2692 |
| }, |
| { |
| "epoch": 1.8924806746310612, |
| "grad_norm": 0.5007235999573614, |
| "learning_rate": 7.114104741446581e-08, |
| "loss": 0.1449, |
| "step": 2693 |
| }, |
| { |
| "epoch": 1.893183415319747, |
| "grad_norm": 0.5252681135070831, |
| "learning_rate": 7.021631375916716e-08, |
| "loss": 0.1591, |
| "step": 2694 |
| }, |
| { |
| "epoch": 1.893886156008433, |
| "grad_norm": 0.5063517537683107, |
| "learning_rate": 6.929758710582102e-08, |
| "loss": 0.1424, |
| "step": 2695 |
| }, |
| { |
| "epoch": 1.8945888966971187, |
| "grad_norm": 0.47396271781618415, |
| "learning_rate": 6.838486857390692e-08, |
| "loss": 0.1126, |
| "step": 2696 |
| }, |
| { |
| "epoch": 1.8952916373858046, |
| "grad_norm": 0.5032356998875548, |
| "learning_rate": 6.747815927558354e-08, |
| "loss": 0.139, |
| "step": 2697 |
| }, |
| { |
| "epoch": 1.8959943780744906, |
| "grad_norm": 0.5125103242000826, |
| "learning_rate": 6.657746031568769e-08, |
| "loss": 0.1509, |
| "step": 2698 |
| }, |
| { |
| "epoch": 1.8966971187631763, |
| "grad_norm": 0.5201568570124898, |
| "learning_rate": 6.568277279173141e-08, |
| "loss": 0.1561, |
| "step": 2699 |
| }, |
| { |
| "epoch": 1.8973998594518622, |
| "grad_norm": 0.5186568338619438, |
| "learning_rate": 6.479409779390267e-08, |
| "loss": 0.1526, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.898102600140548, |
| "grad_norm": 0.5283258080713146, |
| "learning_rate": 6.391143640506359e-08, |
| "loss": 0.1653, |
| "step": 2701 |
| }, |
| { |
| "epoch": 1.898805340829234, |
| "grad_norm": 0.49052332848233793, |
| "learning_rate": 6.303478970074716e-08, |
| "loss": 0.132, |
| "step": 2702 |
| }, |
| { |
| "epoch": 1.89950808151792, |
| "grad_norm": 0.5239835152186789, |
| "learning_rate": 6.216415874915837e-08, |
| "loss": 0.1446, |
| "step": 2703 |
| }, |
| { |
| "epoch": 1.9002108222066059, |
| "grad_norm": 0.5149848382309479, |
| "learning_rate": 6.129954461117083e-08, |
| "loss": 0.156, |
| "step": 2704 |
| }, |
| { |
| "epoch": 1.9009135628952918, |
| "grad_norm": 0.5318498025754016, |
| "learning_rate": 6.044094834032954e-08, |
| "loss": 0.1495, |
| "step": 2705 |
| }, |
| { |
| "epoch": 1.9016163035839775, |
| "grad_norm": 0.4883391326061208, |
| "learning_rate": 5.95883709828432e-08, |
| "loss": 0.1245, |
| "step": 2706 |
| }, |
| { |
| "epoch": 1.9023190442726634, |
| "grad_norm": 0.5076806126800628, |
| "learning_rate": 5.874181357758746e-08, |
| "loss": 0.1306, |
| "step": 2707 |
| }, |
| { |
| "epoch": 1.903021784961349, |
| "grad_norm": 0.5259427028692687, |
| "learning_rate": 5.790127715610328e-08, |
| "loss": 0.1535, |
| "step": 2708 |
| }, |
| { |
| "epoch": 1.903724525650035, |
| "grad_norm": 0.49017069354307125, |
| "learning_rate": 5.706676274259582e-08, |
| "loss": 0.1354, |
| "step": 2709 |
| }, |
| { |
| "epoch": 1.904427266338721, |
| "grad_norm": 0.5392957251670593, |
| "learning_rate": 5.6238271353929455e-08, |
| "loss": 0.1612, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.9051300070274069, |
| "grad_norm": 0.47921362798313144, |
| "learning_rate": 5.541580399963165e-08, |
| "loss": 0.1253, |
| "step": 2711 |
| }, |
| { |
| "epoch": 1.9058327477160928, |
| "grad_norm": 0.5000567809080751, |
| "learning_rate": 5.459936168188906e-08, |
| "loss": 0.1543, |
| "step": 2712 |
| }, |
| { |
| "epoch": 1.9065354884047787, |
| "grad_norm": 0.532479705121779, |
| "learning_rate": 5.3788945395546465e-08, |
| "loss": 0.1667, |
| "step": 2713 |
| }, |
| { |
| "epoch": 1.9072382290934646, |
| "grad_norm": 0.5231968810105011, |
| "learning_rate": 5.2984556128107266e-08, |
| "loss": 0.1476, |
| "step": 2714 |
| }, |
| { |
| "epoch": 1.9079409697821503, |
| "grad_norm": 0.5106285992840719, |
| "learning_rate": 5.2186194859727977e-08, |
| "loss": 0.1321, |
| "step": 2715 |
| }, |
| { |
| "epoch": 1.9086437104708363, |
| "grad_norm": 0.7664757738965309, |
| "learning_rate": 5.13938625632221e-08, |
| "loss": 0.1743, |
| "step": 2716 |
| }, |
| { |
| "epoch": 1.9093464511595222, |
| "grad_norm": 0.5381852691192065, |
| "learning_rate": 5.060756020405677e-08, |
| "loss": 0.1731, |
| "step": 2717 |
| }, |
| { |
| "epoch": 1.9100491918482079, |
| "grad_norm": 0.5148348910887726, |
| "learning_rate": 4.982728874035059e-08, |
| "loss": 0.1466, |
| "step": 2718 |
| }, |
| { |
| "epoch": 1.9107519325368938, |
| "grad_norm": 0.5401335083232811, |
| "learning_rate": 4.905304912287468e-08, |
| "loss": 0.1454, |
| "step": 2719 |
| }, |
| { |
| "epoch": 1.9114546732255797, |
| "grad_norm": 0.49382596968863424, |
| "learning_rate": 4.8284842295048265e-08, |
| "loss": 0.1404, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.9121574139142656, |
| "grad_norm": 0.5015414659105242, |
| "learning_rate": 4.7522669192942014e-08, |
| "loss": 0.1506, |
| "step": 2721 |
| }, |
| { |
| "epoch": 1.9128601546029516, |
| "grad_norm": 0.5233071572038667, |
| "learning_rate": 4.676653074527249e-08, |
| "loss": 0.1458, |
| "step": 2722 |
| }, |
| { |
| "epoch": 1.9135628952916375, |
| "grad_norm": 0.5270575024731265, |
| "learning_rate": 4.601642787340377e-08, |
| "loss": 0.1457, |
| "step": 2723 |
| }, |
| { |
| "epoch": 1.9142656359803234, |
| "grad_norm": 0.5243966284195736, |
| "learning_rate": 4.5272361491345286e-08, |
| "loss": 0.1356, |
| "step": 2724 |
| }, |
| { |
| "epoch": 1.914968376669009, |
| "grad_norm": 0.5239495021750533, |
| "learning_rate": 4.4534332505751786e-08, |
| "loss": 0.158, |
| "step": 2725 |
| }, |
| { |
| "epoch": 1.915671117357695, |
| "grad_norm": 0.5001475973542321, |
| "learning_rate": 4.380234181592002e-08, |
| "loss": 0.1428, |
| "step": 2726 |
| }, |
| { |
| "epoch": 1.916373858046381, |
| "grad_norm": 0.4866615950225424, |
| "learning_rate": 4.30763903137893e-08, |
| "loss": 0.1424, |
| "step": 2727 |
| }, |
| { |
| "epoch": 1.9170765987350666, |
| "grad_norm": 0.5021516588646925, |
| "learning_rate": 4.23564788839409e-08, |
| "loss": 0.1349, |
| "step": 2728 |
| }, |
| { |
| "epoch": 1.9177793394237526, |
| "grad_norm": 0.5107961331826483, |
| "learning_rate": 4.164260840359646e-08, |
| "loss": 0.1537, |
| "step": 2729 |
| }, |
| { |
| "epoch": 1.9184820801124385, |
| "grad_norm": 0.5307070440853738, |
| "learning_rate": 4.0934779742615174e-08, |
| "loss": 0.1436, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.9191848208011244, |
| "grad_norm": 0.5330489784728988, |
| "learning_rate": 4.0232993763494324e-08, |
| "loss": 0.1616, |
| "step": 2731 |
| }, |
| { |
| "epoch": 1.9198875614898103, |
| "grad_norm": 0.5234903895461293, |
| "learning_rate": 3.953725132136932e-08, |
| "loss": 0.1517, |
| "step": 2732 |
| }, |
| { |
| "epoch": 1.9205903021784962, |
| "grad_norm": 0.4752098567300896, |
| "learning_rate": 3.884755326401146e-08, |
| "loss": 0.1338, |
| "step": 2733 |
| }, |
| { |
| "epoch": 1.9212930428671822, |
| "grad_norm": 0.5131113828284778, |
| "learning_rate": 3.816390043182572e-08, |
| "loss": 0.1336, |
| "step": 2734 |
| }, |
| { |
| "epoch": 1.9219957835558679, |
| "grad_norm": 0.5123087049753486, |
| "learning_rate": 3.748629365785184e-08, |
| "loss": 0.1392, |
| "step": 2735 |
| }, |
| { |
| "epoch": 1.9226985242445538, |
| "grad_norm": 0.502380548388396, |
| "learning_rate": 3.681473376776101e-08, |
| "loss": 0.1278, |
| "step": 2736 |
| }, |
| { |
| "epoch": 1.9234012649332395, |
| "grad_norm": 0.48935748420238434, |
| "learning_rate": 3.614922157985812e-08, |
| "loss": 0.1331, |
| "step": 2737 |
| }, |
| { |
| "epoch": 1.9241040056219254, |
| "grad_norm": 0.5259100691200583, |
| "learning_rate": 3.548975790507836e-08, |
| "loss": 0.16, |
| "step": 2738 |
| }, |
| { |
| "epoch": 1.9248067463106113, |
| "grad_norm": 0.4780326595314659, |
| "learning_rate": 3.483634354698506e-08, |
| "loss": 0.124, |
| "step": 2739 |
| }, |
| { |
| "epoch": 1.9255094869992972, |
| "grad_norm": 0.5471598983467048, |
| "learning_rate": 3.41889793017719e-08, |
| "loss": 0.1511, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.9262122276879832, |
| "grad_norm": 0.5365268330722736, |
| "learning_rate": 3.354766595826064e-08, |
| "loss": 0.1566, |
| "step": 2741 |
| }, |
| { |
| "epoch": 1.926914968376669, |
| "grad_norm": 0.5079159571319655, |
| "learning_rate": 3.291240429789955e-08, |
| "loss": 0.1472, |
| "step": 2742 |
| }, |
| { |
| "epoch": 1.927617709065355, |
| "grad_norm": 0.5102390213530034, |
| "learning_rate": 3.22831950947633e-08, |
| "loss": 0.1473, |
| "step": 2743 |
| }, |
| { |
| "epoch": 1.9283204497540407, |
| "grad_norm": 0.5073968459457318, |
| "learning_rate": 3.166003911554916e-08, |
| "loss": 0.128, |
| "step": 2744 |
| }, |
| { |
| "epoch": 1.9290231904427266, |
| "grad_norm": 0.4913075941214151, |
| "learning_rate": 3.104293711958195e-08, |
| "loss": 0.1205, |
| "step": 2745 |
| }, |
| { |
| "epoch": 1.9297259311314126, |
| "grad_norm": 0.5047863034079181, |
| "learning_rate": 3.0431889858807405e-08, |
| "loss": 0.1414, |
| "step": 2746 |
| }, |
| { |
| "epoch": 1.9304286718200983, |
| "grad_norm": 0.5130306164211104, |
| "learning_rate": 2.982689807779382e-08, |
| "loss": 0.1452, |
| "step": 2747 |
| }, |
| { |
| "epoch": 1.9311314125087842, |
| "grad_norm": 0.5124308413800075, |
| "learning_rate": 2.9227962513732057e-08, |
| "loss": 0.1388, |
| "step": 2748 |
| }, |
| { |
| "epoch": 1.93183415319747, |
| "grad_norm": 0.5269103376555939, |
| "learning_rate": 2.863508389643166e-08, |
| "loss": 0.1647, |
| "step": 2749 |
| }, |
| { |
| "epoch": 1.932536893886156, |
| "grad_norm": 0.5064525296116995, |
| "learning_rate": 2.804826294832308e-08, |
| "loss": 0.135, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.933239634574842, |
| "grad_norm": 0.46886335922702166, |
| "learning_rate": 2.7467500384454336e-08, |
| "loss": 0.1224, |
| "step": 2751 |
| }, |
| { |
| "epoch": 1.9339423752635279, |
| "grad_norm": 0.5083094318225186, |
| "learning_rate": 2.6892796912492136e-08, |
| "loss": 0.1328, |
| "step": 2752 |
| }, |
| { |
| "epoch": 1.9346451159522138, |
| "grad_norm": 0.5386106239516805, |
| "learning_rate": 2.632415323271964e-08, |
| "loss": 0.1554, |
| "step": 2753 |
| }, |
| { |
| "epoch": 1.9353478566408995, |
| "grad_norm": 0.5061277285818073, |
| "learning_rate": 2.5761570038035367e-08, |
| "loss": 0.1428, |
| "step": 2754 |
| }, |
| { |
| "epoch": 1.9360505973295854, |
| "grad_norm": 0.4983884500730727, |
| "learning_rate": 2.5205048013955402e-08, |
| "loss": 0.1259, |
| "step": 2755 |
| }, |
| { |
| "epoch": 1.936753338018271, |
| "grad_norm": 0.5020273022078597, |
| "learning_rate": 2.4654587838606748e-08, |
| "loss": 0.1459, |
| "step": 2756 |
| }, |
| { |
| "epoch": 1.937456078706957, |
| "grad_norm": 0.47886167897388193, |
| "learning_rate": 2.411019018273342e-08, |
| "loss": 0.1225, |
| "step": 2757 |
| }, |
| { |
| "epoch": 1.938158819395643, |
| "grad_norm": 0.4988142905864442, |
| "learning_rate": 2.3571855709690894e-08, |
| "loss": 0.1445, |
| "step": 2758 |
| }, |
| { |
| "epoch": 1.9388615600843289, |
| "grad_norm": 0.49716727924247517, |
| "learning_rate": 2.303958507544446e-08, |
| "loss": 0.1256, |
| "step": 2759 |
| }, |
| { |
| "epoch": 1.9395643007730148, |
| "grad_norm": 0.5304810136406167, |
| "learning_rate": 2.251337892857419e-08, |
| "loss": 0.1514, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.9402670414617007, |
| "grad_norm": 0.5290525965050465, |
| "learning_rate": 2.1993237910267752e-08, |
| "loss": 0.1544, |
| "step": 2761 |
| }, |
| { |
| "epoch": 1.9409697821503866, |
| "grad_norm": 0.4885011615603234, |
| "learning_rate": 2.147916265432426e-08, |
| "loss": 0.1378, |
| "step": 2762 |
| }, |
| { |
| "epoch": 1.9416725228390725, |
| "grad_norm": 0.4915011344083778, |
| "learning_rate": 2.0971153787149867e-08, |
| "loss": 0.1302, |
| "step": 2763 |
| }, |
| { |
| "epoch": 1.9423752635277582, |
| "grad_norm": 0.543096423378022, |
| "learning_rate": 2.0469211927759413e-08, |
| "loss": 0.1549, |
| "step": 2764 |
| }, |
| { |
| "epoch": 1.9430780042164442, |
| "grad_norm": 0.49262107002073285, |
| "learning_rate": 1.9973337687776428e-08, |
| "loss": 0.1425, |
| "step": 2765 |
| }, |
| { |
| "epoch": 1.9437807449051299, |
| "grad_norm": 0.485666471340219, |
| "learning_rate": 1.948353167142869e-08, |
| "loss": 0.1266, |
| "step": 2766 |
| }, |
| { |
| "epoch": 1.9444834855938158, |
| "grad_norm": 0.49561951662222453, |
| "learning_rate": 1.899979447555156e-08, |
| "loss": 0.1304, |
| "step": 2767 |
| }, |
| { |
| "epoch": 1.9451862262825017, |
| "grad_norm": 0.49184544313668943, |
| "learning_rate": 1.852212668958353e-08, |
| "loss": 0.1362, |
| "step": 2768 |
| }, |
| { |
| "epoch": 1.9458889669711876, |
| "grad_norm": 0.5484980579758657, |
| "learning_rate": 1.805052889557013e-08, |
| "loss": 0.1696, |
| "step": 2769 |
| }, |
| { |
| "epoch": 1.9465917076598735, |
| "grad_norm": 0.5128507362320103, |
| "learning_rate": 1.7585001668158907e-08, |
| "loss": 0.1501, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.9472944483485595, |
| "grad_norm": 0.4833317483057414, |
| "learning_rate": 1.7125545574599445e-08, |
| "loss": 0.1228, |
| "step": 2771 |
| }, |
| { |
| "epoch": 1.9479971890372454, |
| "grad_norm": 0.5356504888410408, |
| "learning_rate": 1.667216117474557e-08, |
| "loss": 0.1339, |
| "step": 2772 |
| }, |
| { |
| "epoch": 1.948699929725931, |
| "grad_norm": 0.5418834936652501, |
| "learning_rate": 1.622484902105148e-08, |
| "loss": 0.1713, |
| "step": 2773 |
| }, |
| { |
| "epoch": 1.949402670414617, |
| "grad_norm": 0.49467990996465944, |
| "learning_rate": 1.5783609658572284e-08, |
| "loss": 0.1382, |
| "step": 2774 |
| }, |
| { |
| "epoch": 1.950105411103303, |
| "grad_norm": 0.5228184568169573, |
| "learning_rate": 1.534844362496346e-08, |
| "loss": 0.1591, |
| "step": 2775 |
| }, |
| { |
| "epoch": 1.9508081517919886, |
| "grad_norm": 0.48791356942766656, |
| "learning_rate": 1.4919351450480847e-08, |
| "loss": 0.1307, |
| "step": 2776 |
| }, |
| { |
| "epoch": 1.9515108924806746, |
| "grad_norm": 0.5069352430808004, |
| "learning_rate": 1.4496333657978423e-08, |
| "loss": 0.1427, |
| "step": 2777 |
| }, |
| { |
| "epoch": 1.9522136331693605, |
| "grad_norm": 0.4877245461993811, |
| "learning_rate": 1.4079390762907763e-08, |
| "loss": 0.1285, |
| "step": 2778 |
| }, |
| { |
| "epoch": 1.9529163738580464, |
| "grad_norm": 0.5205213256224333, |
| "learning_rate": 1.366852327331969e-08, |
| "loss": 0.1351, |
| "step": 2779 |
| }, |
| { |
| "epoch": 1.9536191145467323, |
| "grad_norm": 0.5167715049465063, |
| "learning_rate": 1.3263731689860949e-08, |
| "loss": 0.1473, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.9543218552354182, |
| "grad_norm": 0.5089333361983708, |
| "learning_rate": 1.2865016505774763e-08, |
| "loss": 0.1405, |
| "step": 2781 |
| }, |
| { |
| "epoch": 1.9550245959241042, |
| "grad_norm": 0.47431207475045234, |
| "learning_rate": 1.2472378206901392e-08, |
| "loss": 0.1249, |
| "step": 2782 |
| }, |
| { |
| "epoch": 1.9557273366127899, |
| "grad_norm": 0.5305144875113679, |
| "learning_rate": 1.2085817271674794e-08, |
| "loss": 0.1646, |
| "step": 2783 |
| }, |
| { |
| "epoch": 1.9564300773014758, |
| "grad_norm": 0.4808002717712444, |
| "learning_rate": 1.1705334171123739e-08, |
| "loss": 0.1195, |
| "step": 2784 |
| }, |
| { |
| "epoch": 1.9571328179901615, |
| "grad_norm": 0.5008382500155728, |
| "learning_rate": 1.1330929368872368e-08, |
| "loss": 0.1387, |
| "step": 2785 |
| }, |
| { |
| "epoch": 1.9578355586788474, |
| "grad_norm": 0.5085425504372016, |
| "learning_rate": 1.0962603321137965e-08, |
| "loss": 0.1363, |
| "step": 2786 |
| }, |
| { |
| "epoch": 1.9585382993675333, |
| "grad_norm": 0.509817666180665, |
| "learning_rate": 1.0600356476728746e-08, |
| "loss": 0.1648, |
| "step": 2787 |
| }, |
| { |
| "epoch": 1.9592410400562192, |
| "grad_norm": 0.47543924070843757, |
| "learning_rate": 1.0244189277048289e-08, |
| "loss": 0.1304, |
| "step": 2788 |
| }, |
| { |
| "epoch": 1.9599437807449052, |
| "grad_norm": 0.48357072384876065, |
| "learning_rate": 9.894102156089991e-09, |
| "loss": 0.1334, |
| "step": 2789 |
| }, |
| { |
| "epoch": 1.960646521433591, |
| "grad_norm": 0.5344793420221453, |
| "learning_rate": 9.550095540439841e-09, |
| "loss": 0.1609, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.961349262122277, |
| "grad_norm": 0.498941387604127, |
| "learning_rate": 9.212169849273645e-09, |
| "loss": 0.1318, |
| "step": 2791 |
| }, |
| { |
| "epoch": 1.9620520028109627, |
| "grad_norm": 0.5234164816723301, |
| "learning_rate": 8.880325494358132e-09, |
| "loss": 0.1609, |
| "step": 2792 |
| }, |
| { |
| "epoch": 1.9627547434996486, |
| "grad_norm": 0.48420091499064977, |
| "learning_rate": 8.554562880049855e-09, |
| "loss": 0.1348, |
| "step": 2793 |
| }, |
| { |
| "epoch": 1.9634574841883345, |
| "grad_norm": 0.494649993734143, |
| "learning_rate": 8.23488240329462e-09, |
| "loss": 0.1428, |
| "step": 2794 |
| }, |
| { |
| "epoch": 1.9641602248770202, |
| "grad_norm": 0.48798840617888206, |
| "learning_rate": 7.921284453626943e-09, |
| "loss": 0.1368, |
| "step": 2795 |
| }, |
| { |
| "epoch": 1.9648629655657062, |
| "grad_norm": 0.5358397286286372, |
| "learning_rate": 7.613769413169492e-09, |
| "loss": 0.1619, |
| "step": 2796 |
| }, |
| { |
| "epoch": 1.965565706254392, |
| "grad_norm": 0.4942463703204265, |
| "learning_rate": 7.312337656633639e-09, |
| "loss": 0.1417, |
| "step": 2797 |
| }, |
| { |
| "epoch": 1.966268446943078, |
| "grad_norm": 0.5168794803827059, |
| "learning_rate": 7.016989551317244e-09, |
| "loss": 0.1482, |
| "step": 2798 |
| }, |
| { |
| "epoch": 1.966971187631764, |
| "grad_norm": 0.5172478466477005, |
| "learning_rate": 6.72772545710576e-09, |
| "loss": 0.1504, |
| "step": 2799 |
| }, |
| { |
| "epoch": 1.9676739283204498, |
| "grad_norm": 0.5067616207084628, |
| "learning_rate": 6.4445457264711295e-09, |
| "loss": 0.1476, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.9683766690091358, |
| "grad_norm": 0.4925368391455481, |
| "learning_rate": 6.167450704471223e-09, |
| "loss": 0.1332, |
| "step": 2801 |
| }, |
| { |
| "epoch": 1.9690794096978215, |
| "grad_norm": 0.5035916486478855, |
| "learning_rate": 5.896440728749286e-09, |
| "loss": 0.1432, |
| "step": 2802 |
| }, |
| { |
| "epoch": 1.9697821503865074, |
| "grad_norm": 0.5338161968329757, |
| "learning_rate": 5.631516129535053e-09, |
| "loss": 0.1777, |
| "step": 2803 |
| }, |
| { |
| "epoch": 1.970484891075193, |
| "grad_norm": 0.5224862430478892, |
| "learning_rate": 5.37267722964252e-09, |
| "loss": 0.1455, |
| "step": 2804 |
| }, |
| { |
| "epoch": 1.971187631763879, |
| "grad_norm": 0.5141185484314288, |
| "learning_rate": 5.1199243444693955e-09, |
| "loss": 0.1483, |
| "step": 2805 |
| }, |
| { |
| "epoch": 1.971890372452565, |
| "grad_norm": 0.5077083364089896, |
| "learning_rate": 4.8732577819982084e-09, |
| "loss": 0.1514, |
| "step": 2806 |
| }, |
| { |
| "epoch": 1.9725931131412509, |
| "grad_norm": 0.5319882872378586, |
| "learning_rate": 4.632677842795752e-09, |
| "loss": 0.1544, |
| "step": 2807 |
| }, |
| { |
| "epoch": 1.9732958538299368, |
| "grad_norm": 0.5586920586860366, |
| "learning_rate": 4.398184820010865e-09, |
| "loss": 0.1743, |
| "step": 2808 |
| }, |
| { |
| "epoch": 1.9739985945186227, |
| "grad_norm": 0.5134131618759824, |
| "learning_rate": 4.16977899937665e-09, |
| "loss": 0.1359, |
| "step": 2809 |
| }, |
| { |
| "epoch": 1.9747013352073086, |
| "grad_norm": 0.5315119457479176, |
| "learning_rate": 3.9474606592088125e-09, |
| "loss": 0.1565, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.9754040758959945, |
| "grad_norm": 0.5207916928215086, |
| "learning_rate": 3.731230070403991e-09, |
| "loss": 0.1455, |
| "step": 2811 |
| }, |
| { |
| "epoch": 1.9761068165846802, |
| "grad_norm": 0.5057179303956064, |
| "learning_rate": 3.5210874964425323e-09, |
| "loss": 0.1474, |
| "step": 2812 |
| }, |
| { |
| "epoch": 1.9768095572733662, |
| "grad_norm": 0.53228415448207, |
| "learning_rate": 3.3170331933857214e-09, |
| "loss": 0.1546, |
| "step": 2813 |
| }, |
| { |
| "epoch": 1.9775122979620519, |
| "grad_norm": 0.5235103877189453, |
| "learning_rate": 3.1190674098757756e-09, |
| "loss": 0.1465, |
| "step": 2814 |
| }, |
| { |
| "epoch": 1.9782150386507378, |
| "grad_norm": 0.5139023335124028, |
| "learning_rate": 2.927190387137513e-09, |
| "loss": 0.1388, |
| "step": 2815 |
| }, |
| { |
| "epoch": 1.9789177793394237, |
| "grad_norm": 0.5083201912942791, |
| "learning_rate": 2.7414023589739104e-09, |
| "loss": 0.15, |
| "step": 2816 |
| }, |
| { |
| "epoch": 1.9796205200281096, |
| "grad_norm": 0.48924078867088844, |
| "learning_rate": 2.5617035517705448e-09, |
| "loss": 0.1414, |
| "step": 2817 |
| }, |
| { |
| "epoch": 1.9803232607167955, |
| "grad_norm": 0.5402871088107077, |
| "learning_rate": 2.3880941844933727e-09, |
| "loss": 0.1685, |
| "step": 2818 |
| }, |
| { |
| "epoch": 1.9810260014054815, |
| "grad_norm": 0.4986525119684943, |
| "learning_rate": 2.2205744686865093e-09, |
| "loss": 0.1453, |
| "step": 2819 |
| }, |
| { |
| "epoch": 1.9817287420941674, |
| "grad_norm": 0.5331741287449765, |
| "learning_rate": 2.0591446084755608e-09, |
| "loss": 0.1599, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.982431482782853, |
| "grad_norm": 0.5212883380775323, |
| "learning_rate": 1.9038048005642905e-09, |
| "loss": 0.1594, |
| "step": 2821 |
| }, |
| { |
| "epoch": 1.983134223471539, |
| "grad_norm": 0.4909700064168984, |
| "learning_rate": 1.754555234236288e-09, |
| "loss": 0.1277, |
| "step": 2822 |
| }, |
| { |
| "epoch": 1.983836964160225, |
| "grad_norm": 0.517172188613898, |
| "learning_rate": 1.6113960913538562e-09, |
| "loss": 0.1709, |
| "step": 2823 |
| }, |
| { |
| "epoch": 1.9845397048489106, |
| "grad_norm": 0.5264165237576832, |
| "learning_rate": 1.4743275463585672e-09, |
| "loss": 0.1449, |
| "step": 2824 |
| }, |
| { |
| "epoch": 1.9852424455375965, |
| "grad_norm": 0.4818380551145174, |
| "learning_rate": 1.3433497662701522e-09, |
| "loss": 0.1302, |
| "step": 2825 |
| }, |
| { |
| "epoch": 1.9859451862262825, |
| "grad_norm": 0.4773897093551395, |
| "learning_rate": 1.2184629106859468e-09, |
| "loss": 0.1283, |
| "step": 2826 |
| }, |
| { |
| "epoch": 1.9866479269149684, |
| "grad_norm": 0.5102079428826756, |
| "learning_rate": 1.0996671317825558e-09, |
| "loss": 0.1333, |
| "step": 2827 |
| }, |
| { |
| "epoch": 1.9873506676036543, |
| "grad_norm": 0.5066329999242195, |
| "learning_rate": 9.869625743147426e-10, |
| "loss": 0.1339, |
| "step": 2828 |
| }, |
| { |
| "epoch": 1.9880534082923402, |
| "grad_norm": 0.5322476428530849, |
| "learning_rate": 8.803493756132097e-10, |
| "loss": 0.1462, |
| "step": 2829 |
| }, |
| { |
| "epoch": 1.9887561489810262, |
| "grad_norm": 0.4984279936375653, |
| "learning_rate": 7.798276655879289e-10, |
| "loss": 0.1312, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.9894588896697118, |
| "grad_norm": 0.49844369269627375, |
| "learning_rate": 6.853975667259205e-10, |
| "loss": 0.1382, |
| "step": 2831 |
| }, |
| { |
| "epoch": 1.9901616303583978, |
| "grad_norm": 0.5203835407085381, |
| "learning_rate": 5.97059194091254e-10, |
| "loss": 0.1638, |
| "step": 2832 |
| }, |
| { |
| "epoch": 1.9908643710470835, |
| "grad_norm": 0.49832405876466607, |
| "learning_rate": 5.148126553256027e-10, |
| "loss": 0.146, |
| "step": 2833 |
| }, |
| { |
| "epoch": 1.9915671117357694, |
| "grad_norm": 0.5329271945627777, |
| "learning_rate": 4.3865805064768895e-10, |
| "loss": 0.1618, |
| "step": 2834 |
| }, |
| { |
| "epoch": 1.9922698524244553, |
| "grad_norm": 0.5191116734566509, |
| "learning_rate": 3.6859547285217343e-10, |
| "loss": 0.1627, |
| "step": 2835 |
| }, |
| { |
| "epoch": 1.9929725931131412, |
| "grad_norm": 0.5259367915958767, |
| "learning_rate": 3.0462500731076595e-10, |
| "loss": 0.1586, |
| "step": 2836 |
| }, |
| { |
| "epoch": 1.9936753338018272, |
| "grad_norm": 0.5488347954719632, |
| "learning_rate": 2.467467319733352e-10, |
| "loss": 0.1778, |
| "step": 2837 |
| }, |
| { |
| "epoch": 1.994378074490513, |
| "grad_norm": 0.5207730511495138, |
| "learning_rate": 1.9496071736513356e-10, |
| "loss": 0.1496, |
| "step": 2838 |
| }, |
| { |
| "epoch": 1.995080815179199, |
| "grad_norm": 0.5259137512529136, |
| "learning_rate": 1.4926702658735192e-10, |
| "loss": 0.1617, |
| "step": 2839 |
| }, |
| { |
| "epoch": 1.9957835558678847, |
| "grad_norm": 0.5261507425271373, |
| "learning_rate": 1.0966571531878523e-10, |
| "loss": 0.1636, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.9964862965565706, |
| "grad_norm": 0.4906760460963486, |
| "learning_rate": 7.6156831814167e-11, |
| "loss": 0.1311, |
| "step": 2841 |
| }, |
| { |
| "epoch": 1.9971890372452565, |
| "grad_norm": 0.5197366755912002, |
| "learning_rate": 4.874041690416942e-11, |
| "loss": 0.1627, |
| "step": 2842 |
| }, |
| { |
| "epoch": 1.9978917779339422, |
| "grad_norm": 0.6270833027729107, |
| "learning_rate": 2.741650399595841e-11, |
| "loss": 0.1591, |
| "step": 2843 |
| }, |
| { |
| "epoch": 1.9985945186226282, |
| "grad_norm": 0.5051745663409105, |
| "learning_rate": 1.2185119073748753e-11, |
| "loss": 0.1298, |
| "step": 2844 |
| }, |
| { |
| "epoch": 1.999297259311314, |
| "grad_norm": 0.5062670497815366, |
| "learning_rate": 3.046280696583637e-12, |
| "loss": 0.1486, |
| "step": 2845 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.4928858413588873, |
| "learning_rate": 0.0, |
| "loss": 0.1344, |
| "step": 2846 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 2846, |
| "total_flos": 162152061075456.0, |
| "train_loss": 0.16860538316093443, |
| "train_runtime": 5138.7122, |
| "train_samples_per_second": 8.859, |
| "train_steps_per_second": 0.554 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2846, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 70000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 162152061075456.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|